xref: /titanic_50/usr/src/uts/common/io/scsi/targets/sd.c (revision 3163a50a40d6c94d3ab61f24084e7048d1c759e8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * SCSI disk target driver.
31  */
32 
33 #include <sys/scsi/scsi.h>
34 #include <sys/dkbad.h>
35 #include <sys/dklabel.h>
36 #include <sys/dkio.h>
37 #include <sys/fdio.h>
38 #include <sys/cdio.h>
39 #include <sys/mhd.h>
40 #include <sys/vtoc.h>
41 #include <sys/dktp/fdisk.h>
42 #include <sys/file.h>
43 #include <sys/stat.h>
44 #include <sys/kstat.h>
45 #include <sys/vtrace.h>
46 #include <sys/note.h>
47 #include <sys/thread.h>
48 #include <sys/proc.h>
49 #include <sys/efi_partition.h>
50 #include <sys/var.h>
51 #include <sys/aio_req.h>
52 #if (defined(__fibre))
53 /* Note: is there a leadville version of the following? */
54 #include <sys/fc4/fcal_linkapp.h>
55 #endif
56 #include <sys/taskq.h>
57 #include <sys/uuid.h>
58 #include <sys/byteorder.h>
59 #include <sys/sdt.h>
60 
61 #include "sd_xbuf.h"
62 
63 #include <sys/scsi/targets/sddef.h>
64 
65 
66 /*
67  * Loadable module info.
68  */
69 #if (defined(__fibre))
70 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver %I%"
71 char _depends_on[]	= "misc/scsi drv/fcp";
72 #else
73 #define	SD_MODULE_NAME	"SCSI Disk Driver %I%"
74 char _depends_on[]	= "misc/scsi";
75 #endif
76 
77 /*
78  * Define the interconnect type, to allow the driver to distinguish
79  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
80  *
81  * This is really for backward compatability. In the future, the driver
82  * should actually check the "interconnect-type" property as reported by
83  * the HBA; however at present this property is not defined by all HBAs,
84  * so we will use this #define (1) to permit the driver to run in
85  * backward-compatability mode; and (2) to print a notification message
86  * if an FC HBA does not support the "interconnect-type" property.  The
87  * behavior of the driver will be to assume parallel SCSI behaviors unless
88  * the "interconnect-type" property is defined by the HBA **AND** has a
89  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
90  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
91  * Channel behaviors (as per the old ssd).  (Note that the
92  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
93  * will result in the driver assuming parallel SCSI behaviors.)
94  *
95  * (see common/sys/scsi/impl/services.h)
96  *
97  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
98  * since some FC HBAs may already support that, and there is some code in
99  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
100  * default would confuse that code, and besides things should work fine
101  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
102  * "interconnect_type" property.
103  */
104 #if (defined(__fibre))
105 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
106 #else
107 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
108 #endif
109 
110 /*
111  * The name of the driver, established from the module name in _init.
112  */
113 static	char *sd_label			= NULL;
114 
115 /*
116  * Driver name is unfortunately prefixed on some driver.conf properties.
117  */
118 #if (defined(__fibre))
119 #define	sd_max_xfer_size		ssd_max_xfer_size
120 #define	sd_config_list			ssd_config_list
121 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
122 static	char *sd_config_list		= "ssd-config-list";
123 #else
124 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
125 static	char *sd_config_list		= "sd-config-list";
126 #endif
127 
128 /*
129  * Driver global variables
130  */
131 
132 #if (defined(__fibre))
133 /*
134  * These #defines are to avoid namespace collisions that occur because this
135  * code is currently used to compile two seperate driver modules: sd and ssd.
136  * All global variables need to be treated this way (even if declared static)
137  * in order to allow the debugger to resolve the names properly.
138  * It is anticipated that in the near future the ssd module will be obsoleted,
139  * at which time this namespace issue should go away.
140  */
141 #define	sd_state			ssd_state
142 #define	sd_io_time			ssd_io_time
143 #define	sd_failfast_enable		ssd_failfast_enable
144 #define	sd_ua_retry_count		ssd_ua_retry_count
145 #define	sd_report_pfa			ssd_report_pfa
146 #define	sd_max_throttle			ssd_max_throttle
147 #define	sd_min_throttle			ssd_min_throttle
148 #define	sd_rot_delay			ssd_rot_delay
149 
150 #define	sd_retry_on_reservation_conflict	\
151 					ssd_retry_on_reservation_conflict
152 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
153 #define	sd_resv_conflict_name		ssd_resv_conflict_name
154 
155 #define	sd_component_mask		ssd_component_mask
156 #define	sd_level_mask			ssd_level_mask
157 #define	sd_debug_un			ssd_debug_un
158 #define	sd_error_level			ssd_error_level
159 
160 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
161 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
162 
163 #define	sd_tr				ssd_tr
164 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
165 #define	sd_check_media_time		ssd_check_media_time
166 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
167 #define	sd_label_mutex			ssd_label_mutex
168 #define	sd_detach_mutex			ssd_detach_mutex
169 #define	sd_log_buf			ssd_log_buf
170 #define	sd_log_mutex			ssd_log_mutex
171 
172 #define	sd_disk_table			ssd_disk_table
173 #define	sd_disk_table_size		ssd_disk_table_size
174 #define	sd_sense_mutex			ssd_sense_mutex
175 #define	sd_cdbtab			ssd_cdbtab
176 
177 #define	sd_cb_ops			ssd_cb_ops
178 #define	sd_ops				ssd_ops
179 #define	sd_additional_codes		ssd_additional_codes
180 
181 #define	sd_minor_data			ssd_minor_data
182 #define	sd_minor_data_efi		ssd_minor_data_efi
183 
184 #define	sd_tq				ssd_tq
185 #define	sd_wmr_tq			ssd_wmr_tq
186 #define	sd_taskq_name			ssd_taskq_name
187 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
188 #define	sd_taskq_minalloc		ssd_taskq_minalloc
189 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
190 
191 #define	sd_dump_format_string		ssd_dump_format_string
192 
193 #define	sd_iostart_chain		ssd_iostart_chain
194 #define	sd_iodone_chain			ssd_iodone_chain
195 
196 #define	sd_pm_idletime			ssd_pm_idletime
197 
198 #define	sd_force_pm_supported		ssd_force_pm_supported
199 
200 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
201 #endif
202 
203 
204 #ifdef	SDDEBUG
205 int	sd_force_pm_supported		= 0;
206 #endif	/* SDDEBUG */
207 
208 void *sd_state				= NULL;
209 int sd_io_time				= SD_IO_TIME;
210 int sd_failfast_enable			= 1;
211 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
212 int sd_report_pfa			= 1;
213 int sd_max_throttle			= SD_MAX_THROTTLE;
214 int sd_min_throttle			= SD_MIN_THROTTLE;
215 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
216 
217 int sd_retry_on_reservation_conflict	= 1;
218 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
219 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
220 
221 static int sd_dtype_optical_bind	= -1;
222 
223 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
224 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
225 
226 /*
227  * Global data for debug logging. To enable debug printing, sd_component_mask
228  * and sd_level_mask should be set to the desired bit patterns as outlined in
229  * sddef.h.
230  */
231 uint_t	sd_component_mask		= 0x0;
232 uint_t	sd_level_mask			= 0x0;
233 struct	sd_lun *sd_debug_un		= NULL;
234 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
235 
236 /* Note: these may go away in the future... */
237 static uint32_t	sd_xbuf_active_limit	= 512;
238 static uint32_t sd_xbuf_reserve_limit	= 16;
239 
240 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
241 
242 /*
243  * Timer value used to reset the throttle after it has been reduced
244  * (typically in response to TRAN_BUSY)
245  */
246 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
247 
248 /*
249  * Interval value associated with the media change scsi watch.
250  */
251 static int sd_check_media_time		= 3000000;
252 
253 /*
254  * Wait value used for in progress operations during a DDI_SUSPEND
255  */
256 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
257 
258 /*
259  * sd_label_mutex protects a static buffer used in the disk label
260  * component of the driver
261  */
262 static kmutex_t sd_label_mutex;
263 
264 /*
265  * sd_detach_mutex protects un_layer_count, un_detach_count, and
266  * un_opens_in_progress in the sd_lun structure.
267  */
268 static kmutex_t sd_detach_mutex;
269 
270 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
271 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
272 
273 /*
274  * Global buffer and mutex for debug logging
275  */
276 static char	sd_log_buf[1024];
277 static kmutex_t	sd_log_mutex;
278 
279 
280 /*
281  * "Smart" Probe Caching structs, globals, #defines, etc.
282  * For parallel scsi and non-self-identify device only.
283  */
284 
285 /*
286  * The following resources and routines are implemented to support
287  * "smart" probing, which caches the scsi_probe() results in an array,
288  * in order to help avoid long probe times.
289  */
290 struct sd_scsi_probe_cache {
291 	struct	sd_scsi_probe_cache	*next;
292 	dev_info_t	*pdip;
293 	int		cache[NTARGETS_WIDE];
294 };
295 
296 static kmutex_t	sd_scsi_probe_cache_mutex;
297 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
298 
299 /*
300  * Really we only need protection on the head of the linked list, but
301  * better safe than sorry.
302  */
303 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
304     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
305 
306 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
307     sd_scsi_probe_cache_head))
308 
309 
310 /*
311  * Vendor specific data name property declarations
312  */
313 
314 #if defined(__fibre) || defined(__i386) ||defined(__amd64)
315 
316 static sd_tunables seagate_properties = {
317 	SEAGATE_THROTTLE_VALUE,
318 	0,
319 	0,
320 	0,
321 	0,
322 	0,
323 	0,
324 	0,
325 	0
326 };
327 
328 
329 static sd_tunables fujitsu_properties = {
330 	FUJITSU_THROTTLE_VALUE,
331 	0,
332 	0,
333 	0,
334 	0,
335 	0,
336 	0,
337 	0,
338 	0
339 };
340 
341 static sd_tunables ibm_properties = {
342 	IBM_THROTTLE_VALUE,
343 	0,
344 	0,
345 	0,
346 	0,
347 	0,
348 	0,
349 	0,
350 	0
351 };
352 
353 static sd_tunables purple_properties = {
354 	PURPLE_THROTTLE_VALUE,
355 	0,
356 	0,
357 	PURPLE_BUSY_RETRIES,
358 	PURPLE_RESET_RETRY_COUNT,
359 	PURPLE_RESERVE_RELEASE_TIME,
360 	0,
361 	0,
362 	0
363 };
364 
365 static sd_tunables sve_properties = {
366 	SVE_THROTTLE_VALUE,
367 	0,
368 	0,
369 	SVE_BUSY_RETRIES,
370 	SVE_RESET_RETRY_COUNT,
371 	SVE_RESERVE_RELEASE_TIME,
372 	SVE_MIN_THROTTLE_VALUE,
373 	SVE_DISKSORT_DISABLED_FLAG,
374 	0
375 };
376 
377 static sd_tunables maserati_properties = {
378 	0,
379 	0,
380 	0,
381 	0,
382 	0,
383 	0,
384 	0,
385 	MASERATI_DISKSORT_DISABLED_FLAG,
386 	MASERATI_LUN_RESET_ENABLED_FLAG
387 };
388 
389 static sd_tunables pirus_properties = {
390 	PIRUS_THROTTLE_VALUE,
391 	0,
392 	PIRUS_NRR_COUNT,
393 	PIRUS_BUSY_RETRIES,
394 	PIRUS_RESET_RETRY_COUNT,
395 	0,
396 	PIRUS_MIN_THROTTLE_VALUE,
397 	PIRUS_DISKSORT_DISABLED_FLAG,
398 	PIRUS_LUN_RESET_ENABLED_FLAG
399 };
400 
401 #endif
402 
403 #if (defined(__sparc) && !defined(__fibre)) || \
404 	(defined(__i386) || defined(__amd64))
405 
406 
407 static sd_tunables elite_properties = {
408 	ELITE_THROTTLE_VALUE,
409 	0,
410 	0,
411 	0,
412 	0,
413 	0,
414 	0,
415 	0,
416 	0
417 };
418 
419 static sd_tunables st31200n_properties = {
420 	ST31200N_THROTTLE_VALUE,
421 	0,
422 	0,
423 	0,
424 	0,
425 	0,
426 	0,
427 	0,
428 	0
429 };
430 
431 #endif /* Fibre or not */
432 
433 static sd_tunables lsi_properties_scsi = {
434 	LSI_THROTTLE_VALUE,
435 	0,
436 	LSI_NOTREADY_RETRIES,
437 	0,
438 	0,
439 	0,
440 	0,
441 	0,
442 	0
443 };
444 
445 static sd_tunables symbios_properties = {
446 	SYMBIOS_THROTTLE_VALUE,
447 	0,
448 	SYMBIOS_NOTREADY_RETRIES,
449 	0,
450 	0,
451 	0,
452 	0,
453 	0,
454 	0
455 };
456 
457 static sd_tunables lsi_properties = {
458 	0,
459 	0,
460 	LSI_NOTREADY_RETRIES,
461 	0,
462 	0,
463 	0,
464 	0,
465 	0,
466 	0
467 };
468 
469 static sd_tunables lsi_oem_properties = {
470 	0,
471 	0,
472 	LSI_OEM_NOTREADY_RETRIES,
473 	0,
474 	0,
475 	0,
476 	0,
477 	0,
478 	0
479 };
480 
481 
482 
483 #if (defined(SD_PROP_TST))
484 
485 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
486 #define	SD_TST_THROTTLE_VAL	16
487 #define	SD_TST_NOTREADY_VAL	12
488 #define	SD_TST_BUSY_VAL		60
489 #define	SD_TST_RST_RETRY_VAL	36
490 #define	SD_TST_RSV_REL_TIME	60
491 
492 static sd_tunables tst_properties = {
493 	SD_TST_THROTTLE_VAL,
494 	SD_TST_CTYPE_VAL,
495 	SD_TST_NOTREADY_VAL,
496 	SD_TST_BUSY_VAL,
497 	SD_TST_RST_RETRY_VAL,
498 	SD_TST_RSV_REL_TIME,
499 	0,
500 	0,
501 	0
502 };
503 #endif
504 
505 /* This is similiar to the ANSI toupper implementation */
506 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
507 
508 /*
509  * Static Driver Configuration Table
510  *
511  * This is the table of disks which need throttle adjustment (or, perhaps
512  * something else as defined by the flags at a future time.)  device_id
513  * is a string consisting of concatenated vid (vendor), pid (product/model)
514  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
515  * the parts of the string are as defined by the sizes in the scsi_inquiry
516  * structure.  Device type is searched as far as the device_id string is
517  * defined.  Flags defines which values are to be set in the driver from the
518  * properties list.
519  *
520  * Entries below which begin and end with a "*" are a special case.
521  * These do not have a specific vendor, and the string which follows
522  * can appear anywhere in the 16 byte PID portion of the inquiry data.
523  *
524  * Entries below which begin and end with a " " (blank) are a special
525  * case. The comparison function will treat multiple consecutive blanks
526  * as equivalent to a single blank. For example, this causes a
527  * sd_disk_table entry of " NEC CDROM " to match a device's id string
528  * of  "NEC       CDROM".
529  *
530  * Note: The MD21 controller type has been obsoleted.
531  *	 ST318202F is a Legacy device
532  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
533  *	 made with an FC connection. The entries here are a legacy.
534  */
535 static sd_disk_config_t sd_disk_table[] = {
536 #if defined(__fibre) || defined(__i386) || defined(__amd64)
537 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
538 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
539 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
540 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
541 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
542 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
543 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
544 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
545 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
546 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
547 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
548 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
549 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
550 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
551 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
552 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
553 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
554 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
555 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
556 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
557 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
558 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
559 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
560 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
561 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
562 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
563 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
564 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
565 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
566 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
567 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
568 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
569 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
570 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
571 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
572 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
573 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
574 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
575 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
576 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
577 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
578 	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
579 			SD_CONF_BSET_BSY_RETRY_COUNT|
580 			SD_CONF_BSET_RST_RETRIES|
581 			SD_CONF_BSET_RSV_REL_TIME,
582 		&purple_properties },
583 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
584 		SD_CONF_BSET_BSY_RETRY_COUNT|
585 		SD_CONF_BSET_RST_RETRIES|
586 		SD_CONF_BSET_RSV_REL_TIME|
587 		SD_CONF_BSET_MIN_THROTTLE|
588 		SD_CONF_BSET_DISKSORT_DISABLED,
589 		&sve_properties },
590 	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
591 			SD_CONF_BSET_BSY_RETRY_COUNT|
592 			SD_CONF_BSET_RST_RETRIES|
593 			SD_CONF_BSET_RSV_REL_TIME,
594 		&purple_properties },
595 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
596 		SD_CONF_BSET_LUN_RESET_ENABLED,
597 		&maserati_properties },
598 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
599 		SD_CONF_BSET_NRR_COUNT|
600 		SD_CONF_BSET_BSY_RETRY_COUNT|
601 		SD_CONF_BSET_RST_RETRIES|
602 		SD_CONF_BSET_MIN_THROTTLE|
603 		SD_CONF_BSET_DISKSORT_DISABLED|
604 		SD_CONF_BSET_LUN_RESET_ENABLED,
605 		&pirus_properties },
606 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
607 		SD_CONF_BSET_NRR_COUNT|
608 		SD_CONF_BSET_BSY_RETRY_COUNT|
609 		SD_CONF_BSET_RST_RETRIES|
610 		SD_CONF_BSET_MIN_THROTTLE|
611 		SD_CONF_BSET_DISKSORT_DISABLED|
612 		SD_CONF_BSET_LUN_RESET_ENABLED,
613 		&pirus_properties },
614 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
615 		SD_CONF_BSET_NRR_COUNT|
616 		SD_CONF_BSET_BSY_RETRY_COUNT|
617 		SD_CONF_BSET_RST_RETRIES|
618 		SD_CONF_BSET_MIN_THROTTLE|
619 		SD_CONF_BSET_DISKSORT_DISABLED|
620 		SD_CONF_BSET_LUN_RESET_ENABLED,
621 		&pirus_properties },
622 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
623 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
624 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
625 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
626 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
627 #endif /* fibre or NON-sparc platforms */
628 #if ((defined(__sparc) && !defined(__fibre)) ||\
629 	(defined(__i386) || defined(__amd64)))
630 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
631 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
632 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
633 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
634 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
635 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
636 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
637 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
638 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
639 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
640 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
641 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
642 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
643 	    &symbios_properties },
644 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
645 	    &lsi_properties_scsi },
646 #if defined(__i386) || defined(__amd64)
647 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
648 				    | SD_CONF_BSET_READSUB_BCD
649 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
650 				    | SD_CONF_BSET_NO_READ_HEADER
651 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
652 
653 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
654 				    | SD_CONF_BSET_READSUB_BCD
655 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
656 				    | SD_CONF_BSET_NO_READ_HEADER
657 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
658 #endif /* __i386 || __amd64 */
659 #endif /* sparc NON-fibre or NON-sparc platforms */
660 
661 #if (defined(SD_PROP_TST))
662 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
663 				| SD_CONF_BSET_CTYPE
664 				| SD_CONF_BSET_NRR_COUNT
665 				| SD_CONF_BSET_FAB_DEVID
666 				| SD_CONF_BSET_NOCACHE
667 				| SD_CONF_BSET_BSY_RETRY_COUNT
668 				| SD_CONF_BSET_PLAYMSF_BCD
669 				| SD_CONF_BSET_READSUB_BCD
670 				| SD_CONF_BSET_READ_TOC_TRK_BCD
671 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
672 				| SD_CONF_BSET_NO_READ_HEADER
673 				| SD_CONF_BSET_READ_CD_XD4
674 				| SD_CONF_BSET_RST_RETRIES
675 				| SD_CONF_BSET_RSV_REL_TIME
676 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
677 #endif
678 };
679 
680 static const int sd_disk_table_size =
681 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
682 
683 
684 /*
685  * Return codes of sd_uselabel().
686  */
687 #define	SD_LABEL_IS_VALID		0
688 #define	SD_LABEL_IS_INVALID		1
689 
690 #define	SD_INTERCONNECT_PARALLEL	0
691 #define	SD_INTERCONNECT_FABRIC		1
692 #define	SD_INTERCONNECT_FIBRE		2
693 #define	SD_INTERCONNECT_SSA		3
694 #define	SD_IS_PARALLEL_SCSI(un)		\
695 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
696 
697 /*
698  * Definitions used by device id registration routines
699  */
700 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
701 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
702 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
703 #define	WD_NODE			7	/* the whole disk minor */
704 
705 static kmutex_t sd_sense_mutex = {0};
706 
707 /*
708  * Macros for updates of the driver state
709  */
710 #define	New_state(un, s)        \
711 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
712 #define	Restore_state(un)	\
713 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
714 
715 static struct sd_cdbinfo sd_cdbtab[] = {
716 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
717 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
718 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
719 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
720 };
721 
722 /*
723  * Specifies the number of seconds that must have elapsed since the last
724  * cmd. has completed for a device to be declared idle to the PM framework.
725  */
726 static int sd_pm_idletime = 1;
727 
728 /*
729  * Internal function prototypes
730  */
731 
732 #if (defined(__fibre))
733 /*
734  * These #defines are to avoid namespace collisions that occur because this
735  * code is currently used to compile two seperate driver modules: sd and ssd.
736  * All function names need to be treated this way (even if declared static)
737  * in order to allow the debugger to resolve the names properly.
738  * It is anticipated that in the near future the ssd module will be obsoleted,
739  * at which time this ugliness should go away.
740  */
741 #define	sd_log_trace			ssd_log_trace
742 #define	sd_log_info			ssd_log_info
743 #define	sd_log_err			ssd_log_err
744 #define	sdprobe				ssdprobe
745 #define	sdinfo				ssdinfo
746 #define	sd_prop_op			ssd_prop_op
747 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
748 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
749 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
750 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
751 #define	sd_spin_up_unit			ssd_spin_up_unit
752 #define	sd_enable_descr_sense		ssd_enable_descr_sense
753 #define	sd_set_mmc_caps			ssd_set_mmc_caps
754 #define	sd_read_unit_properties		ssd_read_unit_properties
755 #define	sd_process_sdconf_file		ssd_process_sdconf_file
756 #define	sd_process_sdconf_table		ssd_process_sdconf_table
757 #define	sd_sdconf_id_match		ssd_sdconf_id_match
758 #define	sd_blank_cmp			ssd_blank_cmp
759 #define	sd_chk_vers1_data		ssd_chk_vers1_data
760 #define	sd_set_vers1_properties		ssd_set_vers1_properties
761 #define	sd_validate_geometry		ssd_validate_geometry
762 
763 #if defined(_SUNOS_VTOC_16)
764 #define	sd_convert_geometry		ssd_convert_geometry
765 #endif
766 
767 #define	sd_resync_geom_caches		ssd_resync_geom_caches
768 #define	sd_read_fdisk			ssd_read_fdisk
769 #define	sd_get_physical_geometry	ssd_get_physical_geometry
770 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
771 #define	sd_update_block_info		ssd_update_block_info
772 #define	sd_swap_efi_gpt			ssd_swap_efi_gpt
773 #define	sd_swap_efi_gpe			ssd_swap_efi_gpe
774 #define	sd_validate_efi			ssd_validate_efi
775 #define	sd_use_efi			ssd_use_efi
776 #define	sd_uselabel			ssd_uselabel
777 #define	sd_build_default_label		ssd_build_default_label
778 #define	sd_has_max_chs_vals		ssd_has_max_chs_vals
779 #define	sd_inq_fill			ssd_inq_fill
780 #define	sd_register_devid		ssd_register_devid
781 #define	sd_get_devid_block		ssd_get_devid_block
782 #define	sd_get_devid			ssd_get_devid
783 #define	sd_create_devid			ssd_create_devid
784 #define	sd_write_deviceid		ssd_write_deviceid
785 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
786 #define	sd_setup_pm			ssd_setup_pm
787 #define	sd_create_pm_components		ssd_create_pm_components
788 #define	sd_ddi_suspend			ssd_ddi_suspend
789 #define	sd_ddi_pm_suspend		ssd_ddi_pm_suspend
790 #define	sd_ddi_resume			ssd_ddi_resume
791 #define	sd_ddi_pm_resume		ssd_ddi_pm_resume
792 #define	sdpower				ssdpower
793 #define	sdattach			ssdattach
794 #define	sddetach			ssddetach
795 #define	sd_unit_attach			ssd_unit_attach
796 #define	sd_unit_detach			ssd_unit_detach
797 #define	sd_create_minor_nodes		ssd_create_minor_nodes
798 #define	sd_create_errstats		ssd_create_errstats
799 #define	sd_set_errstats			ssd_set_errstats
800 #define	sd_set_pstats			ssd_set_pstats
801 #define	sddump				ssddump
802 #define	sd_scsi_poll			ssd_scsi_poll
803 #define	sd_send_polled_RQS		ssd_send_polled_RQS
804 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
805 #define	sd_init_event_callbacks		ssd_init_event_callbacks
806 #define	sd_event_callback		ssd_event_callback
807 #define	sd_disable_caching		ssd_disable_caching
808 #define	sd_make_device			ssd_make_device
809 #define	sdopen				ssdopen
810 #define	sdclose				ssdclose
811 #define	sd_ready_and_valid		ssd_ready_and_valid
812 #define	sdmin				ssdmin
813 #define	sdread				ssdread
814 #define	sdwrite				ssdwrite
815 #define	sdaread				ssdaread
816 #define	sdawrite			ssdawrite
817 #define	sdstrategy			ssdstrategy
818 #define	sdioctl				ssdioctl
819 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
820 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
821 #define	sd_checksum_iostart		ssd_checksum_iostart
822 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
823 #define	sd_pm_iostart			ssd_pm_iostart
824 #define	sd_core_iostart			ssd_core_iostart
825 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
826 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
827 #define	sd_checksum_iodone		ssd_checksum_iodone
828 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
829 #define	sd_pm_iodone			ssd_pm_iodone
830 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
831 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
832 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
833 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
834 #define	sd_buf_iodone			ssd_buf_iodone
835 #define	sd_uscsi_strategy		ssd_uscsi_strategy
836 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
837 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
838 #define	sd_uscsi_iodone			ssd_uscsi_iodone
839 #define	sd_xbuf_strategy		ssd_xbuf_strategy
840 #define	sd_xbuf_init			ssd_xbuf_init
841 #define	sd_pm_entry			ssd_pm_entry
842 #define	sd_pm_exit			ssd_pm_exit
843 
844 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
845 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
846 
847 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
848 #define	sdintr				ssdintr
849 #define	sd_start_cmds			ssd_start_cmds
850 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
851 #define	sd_bioclone_alloc		ssd_bioclone_alloc
852 #define	sd_bioclone_free		ssd_bioclone_free
853 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
854 #define	sd_shadow_buf_free		ssd_shadow_buf_free
855 #define	sd_print_transport_rejected_message	\
856 					ssd_print_transport_rejected_message
857 #define	sd_retry_command		ssd_retry_command
858 #define	sd_set_retry_bp			ssd_set_retry_bp
859 #define	sd_send_request_sense_command	ssd_send_request_sense_command
860 #define	sd_start_retry_command		ssd_start_retry_command
861 #define	sd_start_direct_priority_command	\
862 					ssd_start_direct_priority_command
863 #define	sd_return_failed_command	ssd_return_failed_command
864 #define	sd_return_failed_command_no_restart	\
865 					ssd_return_failed_command_no_restart
866 #define	sd_return_command		ssd_return_command
867 #define	sd_sync_with_callback		ssd_sync_with_callback
868 #define	sdrunout			ssdrunout
869 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
870 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
871 #define	sd_reduce_throttle		ssd_reduce_throttle
872 #define	sd_restore_throttle		ssd_restore_throttle
873 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
874 #define	sd_init_cdb_limits		ssd_init_cdb_limits
875 #define	sd_pkt_status_good		ssd_pkt_status_good
876 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
877 #define	sd_pkt_status_busy		ssd_pkt_status_busy
878 #define	sd_pkt_status_reservation_conflict	\
879 					ssd_pkt_status_reservation_conflict
880 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
881 #define	sd_handle_request_sense		ssd_handle_request_sense
882 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
883 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
884 #define	sd_validate_sense_data		ssd_validate_sense_data
885 #define	sd_decode_sense			ssd_decode_sense
886 #define	sd_print_sense_msg		ssd_print_sense_msg
887 #define	sd_extract_sense_info_descr	ssd_extract_sense_info_descr
888 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
889 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
890 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
891 #define	sd_sense_key_medium_or_hardware_error	\
892 					ssd_sense_key_medium_or_hardware_error
893 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
894 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
895 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
896 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
897 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
898 #define	sd_sense_key_default		ssd_sense_key_default
899 #define	sd_print_retry_msg		ssd_print_retry_msg
900 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
901 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
902 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
903 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
904 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
905 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
906 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
907 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
908 #define	sd_pkt_reason_default		ssd_pkt_reason_default
909 #define	sd_reset_target			ssd_reset_target
910 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
911 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
912 #define	sd_taskq_create			ssd_taskq_create
913 #define	sd_taskq_delete			ssd_taskq_delete
914 #define	sd_media_change_task		ssd_media_change_task
915 #define	sd_handle_mchange		ssd_handle_mchange
916 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
917 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
918 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
919 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
920 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
921 					sd_send_scsi_feature_GET_CONFIGURATION
922 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
923 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
924 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
925 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
926 					ssd_send_scsi_PERSISTENT_RESERVE_IN
927 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
928 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
929 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
930 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
931 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
932 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
933 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
934 #define	sd_alloc_rqs			ssd_alloc_rqs
935 #define	sd_free_rqs			ssd_free_rqs
936 #define	sd_dump_memory			ssd_dump_memory
937 #define	sd_uscsi_ioctl			ssd_uscsi_ioctl
938 #define	sd_get_media_info		ssd_get_media_info
939 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
940 #define	sd_dkio_get_geometry		ssd_dkio_get_geometry
941 #define	sd_dkio_set_geometry		ssd_dkio_set_geometry
942 #define	sd_dkio_get_partition		ssd_dkio_get_partition
943 #define	sd_dkio_set_partition		ssd_dkio_set_partition
944 #define	sd_dkio_partition		ssd_dkio_partition
945 #define	sd_dkio_get_vtoc		ssd_dkio_get_vtoc
946 #define	sd_dkio_get_efi			ssd_dkio_get_efi
947 #define	sd_build_user_vtoc		ssd_build_user_vtoc
948 #define	sd_dkio_set_vtoc		ssd_dkio_set_vtoc
949 #define	sd_dkio_set_efi			ssd_dkio_set_efi
950 #define	sd_build_label_vtoc		ssd_build_label_vtoc
951 #define	sd_write_label			ssd_write_label
952 #define	sd_clear_vtoc			ssd_clear_vtoc
953 #define	sd_clear_efi			ssd_clear_efi
954 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
955 #define	sd_setup_next_xfer		ssd_setup_next_xfer
956 #define	sd_dkio_get_temp		ssd_dkio_get_temp
957 #define	sd_dkio_get_mboot		ssd_dkio_get_mboot
958 #define	sd_dkio_set_mboot		ssd_dkio_set_mboot
959 #define	sd_setup_default_geometry	ssd_setup_default_geometry
960 #define	sd_update_fdisk_and_vtoc	ssd_update_fdisk_and_vtoc
961 #define	sd_check_mhd			ssd_check_mhd
962 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
963 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
964 #define	sd_sname			ssd_sname
965 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
966 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
967 #define	sd_take_ownership		ssd_take_ownership
968 #define	sd_reserve_release		ssd_reserve_release
969 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
970 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
971 #define	sd_persistent_reservation_in_read_keys	\
972 					ssd_persistent_reservation_in_read_keys
973 #define	sd_persistent_reservation_in_read_resv	\
974 					ssd_persistent_reservation_in_read_resv
975 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
976 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
977 #define	sd_mhdioc_release		ssd_mhdioc_release
978 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
979 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
980 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
981 #define	sr_change_blkmode		ssr_change_blkmode
982 #define	sr_change_speed			ssr_change_speed
983 #define	sr_atapi_change_speed		ssr_atapi_change_speed
984 #define	sr_pause_resume			ssr_pause_resume
985 #define	sr_play_msf			ssr_play_msf
986 #define	sr_play_trkind			ssr_play_trkind
987 #define	sr_read_all_subcodes		ssr_read_all_subcodes
988 #define	sr_read_subchannel		ssr_read_subchannel
989 #define	sr_read_tocentry		ssr_read_tocentry
990 #define	sr_read_tochdr			ssr_read_tochdr
991 #define	sr_read_cdda			ssr_read_cdda
992 #define	sr_read_cdxa			ssr_read_cdxa
993 #define	sr_read_mode1			ssr_read_mode1
994 #define	sr_read_mode2			ssr_read_mode2
995 #define	sr_read_cd_mode2		ssr_read_cd_mode2
996 #define	sr_sector_mode			ssr_sector_mode
997 #define	sr_eject			ssr_eject
998 #define	sr_ejected			ssr_ejected
999 #define	sr_check_wp			ssr_check_wp
1000 #define	sd_check_media			ssd_check_media
1001 #define	sd_media_watch_cb		ssd_media_watch_cb
1002 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1003 #define	sr_volume_ctrl			ssr_volume_ctrl
1004 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1005 #define	sd_log_page_supported		ssd_log_page_supported
1006 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1007 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1008 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1009 #define	sd_range_lock			ssd_range_lock
1010 #define	sd_get_range			ssd_get_range
1011 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1012 #define	sd_range_unlock			ssd_range_unlock
1013 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1014 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1015 
1016 #define	sd_iostart_chain		ssd_iostart_chain
1017 #define	sd_iodone_chain			ssd_iodone_chain
1018 #define	sd_initpkt_map			ssd_initpkt_map
1019 #define	sd_destroypkt_map		ssd_destroypkt_map
1020 #define	sd_chain_type_map		ssd_chain_type_map
1021 #define	sd_chain_index_map		ssd_chain_index_map
1022 
1023 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1024 #define	sd_failfast_flushq		ssd_failfast_flushq
1025 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1026 
1027 #define	sd_is_lsi			ssd_is_lsi
1028 
1029 #endif	/* #if (defined(__fibre)) */
1030 
1031 
1032 int _init(void);
1033 int _fini(void);
1034 int _info(struct modinfo *modinfop);
1035 
1036 /*PRINTFLIKE3*/
1037 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1038 /*PRINTFLIKE3*/
1039 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1040 /*PRINTFLIKE3*/
1041 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1042 
1043 static int sdprobe(dev_info_t *devi);
1044 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1045     void **result);
1046 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1047     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1048 
1049 /*
1050  * Smart probe for parallel scsi
1051  */
1052 static void sd_scsi_probe_cache_init(void);
1053 static void sd_scsi_probe_cache_fini(void);
1054 static void sd_scsi_clear_probe_cache(void);
1055 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1056 
1057 static int	sd_spin_up_unit(struct sd_lun *un);
1058 #ifdef _LP64
1059 static void	sd_enable_descr_sense(struct sd_lun *un);
1060 #endif /* _LP64 */
1061 static void	sd_set_mmc_caps(struct sd_lun *un);
1062 
1063 static void sd_read_unit_properties(struct sd_lun *un);
1064 static int  sd_process_sdconf_file(struct sd_lun *un);
1065 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1066     int *data_list, sd_tunables *values);
1067 static void sd_process_sdconf_table(struct sd_lun *un);
1068 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1069 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1070 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1071 	int list_len, char *dataname_ptr);
1072 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1073     sd_tunables *prop_list);
1074 static int  sd_validate_geometry(struct sd_lun *un, int path_flag);
1075 
1076 #if defined(_SUNOS_VTOC_16)
1077 static void sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g);
1078 #endif
1079 
1080 static void sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
1081 	int path_flag);
1082 static int  sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize,
1083 	int path_flag);
1084 static void sd_get_physical_geometry(struct sd_lun *un,
1085 	struct geom_cache *pgeom_p, int capacity, int lbasize, int path_flag);
1086 static void sd_get_virtual_geometry(struct sd_lun *un, int capacity,
1087 	int lbasize);
1088 static int  sd_uselabel(struct sd_lun *un, struct dk_label *l, int path_flag);
1089 static void sd_swap_efi_gpt(efi_gpt_t *);
1090 static void sd_swap_efi_gpe(int nparts, efi_gpe_t *);
1091 static int sd_validate_efi(efi_gpt_t *);
1092 static int sd_use_efi(struct sd_lun *, int);
1093 static void sd_build_default_label(struct sd_lun *un);
1094 
1095 #if defined(_FIRMWARE_NEEDS_FDISK)
1096 static int  sd_has_max_chs_vals(struct ipart *fdp);
1097 #endif
1098 static void sd_inq_fill(char *p, int l, char *s);
1099 
1100 
1101 static void sd_register_devid(struct sd_lun *un, dev_info_t *devi,
1102     int reservation_flag);
1103 static daddr_t  sd_get_devid_block(struct sd_lun *un);
1104 static int  sd_get_devid(struct sd_lun *un);
1105 static int  sd_get_serialnum(struct sd_lun *un, uchar_t *wwn, int *len);
1106 static ddi_devid_t sd_create_devid(struct sd_lun *un);
1107 static int  sd_write_deviceid(struct sd_lun *un);
1108 static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1109 static int  sd_check_vpd_page_support(struct sd_lun *un);
1110 
1111 static void sd_setup_pm(struct sd_lun *un, dev_info_t *devi);
1112 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1113 
1114 static int  sd_ddi_suspend(dev_info_t *devi);
1115 static int  sd_ddi_pm_suspend(struct sd_lun *un);
1116 static int  sd_ddi_resume(dev_info_t *devi);
1117 static int  sd_ddi_pm_resume(struct sd_lun *un);
1118 static int  sdpower(dev_info_t *devi, int component, int level);
1119 
1120 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1121 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1122 static int  sd_unit_attach(dev_info_t *devi);
1123 static int  sd_unit_detach(dev_info_t *devi);
1124 
1125 static int  sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi);
1126 static void sd_create_errstats(struct sd_lun *un, int instance);
1127 static void sd_set_errstats(struct sd_lun *un);
1128 static void sd_set_pstats(struct sd_lun *un);
1129 
1130 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1131 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1132 static int  sd_send_polled_RQS(struct sd_lun *un);
1133 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1134 
1135 #if (defined(__fibre))
1136 /*
1137  * Event callbacks (photon)
1138  */
1139 static void sd_init_event_callbacks(struct sd_lun *un);
1140 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1141 #endif
1142 
1143 
1144 static int   sd_disable_caching(struct sd_lun *un);
1145 static dev_t sd_make_device(dev_info_t *devi);
1146 
1147 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1148 	uint64_t capacity);
1149 
1150 /*
1151  * Driver entry point functions.
1152  */
1153 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1154 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1155 static int  sd_ready_and_valid(struct sd_lun *un);
1156 
1157 static void sdmin(struct buf *bp);
1158 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1159 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1160 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1161 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1162 
1163 static int sdstrategy(struct buf *bp);
1164 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1165 
1166 /*
1167  * Function prototypes for layering functions in the iostart chain.
1168  */
1169 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1170 	struct buf *bp);
1171 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1172 	struct buf *bp);
1173 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1174 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1175 	struct buf *bp);
1176 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1177 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1178 
1179 /*
1180  * Function prototypes for layering functions in the iodone chain.
1181  */
1182 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1183 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1184 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1185 	struct buf *bp);
1186 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1187 	struct buf *bp);
1188 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1189 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1190 	struct buf *bp);
1191 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1192 
1193 /*
1194  * Prototypes for functions to support buf(9S) based IO.
1195  */
1196 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1197 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1198 static void sd_destroypkt_for_buf(struct buf *);
1199 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1200 	struct buf *bp, int flags,
1201 	int (*callback)(caddr_t), caddr_t callback_arg,
1202 	diskaddr_t lba, uint32_t blockcount);
1203 #if defined(__i386) || defined(__amd64)
1204 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1205 	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1206 #endif /* defined(__i386) || defined(__amd64) */
1207 
1208 /*
1209  * Prototypes for functions to support USCSI IO.
1210  */
1211 static int sd_uscsi_strategy(struct buf *bp);
1212 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1213 static void sd_destroypkt_for_uscsi(struct buf *);
1214 
1215 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1216 	uchar_t chain_type, void *pktinfop);
1217 
1218 static int  sd_pm_entry(struct sd_lun *un);
1219 static void sd_pm_exit(struct sd_lun *un);
1220 
1221 static void sd_pm_idletimeout_handler(void *arg);
1222 
1223 /*
1224  * sd_core internal functions (used at the sd_core_io layer).
1225  */
1226 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1227 static void sdintr(struct scsi_pkt *pktp);
1228 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1229 
1230 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
1231 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
1232 	int path_flag);
1233 
1234 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1235 	daddr_t blkno, int (*func)(struct buf *));
1236 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1237 	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1238 static void sd_bioclone_free(struct buf *bp);
1239 static void sd_shadow_buf_free(struct buf *bp);
1240 
1241 static void sd_print_transport_rejected_message(struct sd_lun *un,
1242 	struct sd_xbuf *xp, int code);
1243 
1244 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1245 	int retry_check_flag,
1246 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1247 		int c),
1248 	void *user_arg, int failure_code,  clock_t retry_delay,
1249 	void (*statp)(kstat_io_t *));
1250 
1251 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1252 	clock_t retry_delay, void (*statp)(kstat_io_t *));
1253 
1254 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1255 	struct scsi_pkt *pktp);
1256 static void sd_start_retry_command(void *arg);
1257 static void sd_start_direct_priority_command(void *arg);
1258 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1259 	int errcode);
1260 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1261 	struct buf *bp, int errcode);
1262 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1263 static void sd_sync_with_callback(struct sd_lun *un);
1264 static int sdrunout(caddr_t arg);
1265 
1266 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1267 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1268 
1269 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1270 static void sd_restore_throttle(void *arg);
1271 
1272 static void sd_init_cdb_limits(struct sd_lun *un);
1273 
1274 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1275 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1276 
1277 /*
1278  * Error handling functions
1279  */
1280 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1281 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1282 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1283 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1284 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1285 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1286 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1287 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1288 
1289 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1290 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1291 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1292 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1293 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1294 	struct sd_xbuf *xp);
1295 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1296 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1297 
1298 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1299 	void *arg, int code);
1300 static diskaddr_t sd_extract_sense_info_descr(
1301 	struct scsi_descr_sense_hdr *sdsp);
1302 
1303 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1304 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1305 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1306 	uint8_t asc,
1307 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1308 static void sd_sense_key_not_ready(struct sd_lun *un,
1309 	uint8_t asc, uint8_t ascq,
1310 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1311 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1312 	int sense_key, uint8_t asc,
1313 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1314 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1315 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1316 static void sd_sense_key_unit_attention(struct sd_lun *un,
1317 	uint8_t asc,
1318 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1319 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1320 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1321 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1322 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1323 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1324 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1325 static void sd_sense_key_default(struct sd_lun *un,
1326 	int sense_key,
1327 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1328 
1329 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1330 	void *arg, int flag);
1331 
1332 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1333 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1334 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1335 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1336 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1337 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1338 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1339 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1340 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1341 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1342 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1343 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1344 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1345 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1346 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1347 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1348 
1349 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1350 
1351 static void sd_start_stop_unit_callback(void *arg);
1352 static void sd_start_stop_unit_task(void *arg);
1353 
1354 static void sd_taskq_create(void);
1355 static void sd_taskq_delete(void);
1356 static void sd_media_change_task(void *arg);
1357 
1358 static int sd_handle_mchange(struct sd_lun *un);
1359 static int sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag);
1360 static int sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp,
1361 	uint32_t *lbap, int path_flag);
1362 static int sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
1363 	uint32_t *lbap, int path_flag);
1364 static int sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag,
1365 	int path_flag);
1366 static int sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr,
1367 	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1368 static int sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag);
1369 static int sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un,
1370 	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1371 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un,
1372 	uchar_t usr_cmd, uchar_t *usr_bufp);
1373 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un);
1374 static int sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un,
1375 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1376 	uchar_t *bufaddr, uint_t buflen);
1377 static int sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
1378 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1379 	uchar_t *bufaddr, uint_t buflen, char feature);
1380 static int sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize,
1381 	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1382 static int sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize,
1383 	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1384 static int sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
1385 	size_t buflen, daddr_t start_block, int path_flag);
1386 #define	sd_send_scsi_READ(un, bufaddr, buflen, start_block, path_flag)	\
1387 	sd_send_scsi_RDWR(un, SCMD_READ, bufaddr, buflen, start_block, \
1388 	path_flag)
1389 #define	sd_send_scsi_WRITE(un, bufaddr, buflen, start_block, path_flag)	\
1390 	sd_send_scsi_RDWR(un, SCMD_WRITE, bufaddr, buflen, start_block,\
1391 	path_flag)
1392 
1393 static int sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr,
1394 	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1395 	uint16_t param_ptr, int path_flag);
1396 
1397 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1398 static void sd_free_rqs(struct sd_lun *un);
1399 
1400 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1401 	uchar_t *data, int len, int fmt);
1402 
1403 /*
1404  * Disk Ioctl Function Prototypes
1405  */
1406 static int sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag);
1407 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1408 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1409 static int sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag,
1410 	int geom_validated);
1411 static int sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag);
1412 static int sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag,
1413 	int geom_validated);
1414 static int sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag);
1415 static int sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag,
1416 	int geom_validated);
1417 static int sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag);
1418 static int sd_dkio_partition(dev_t dev, caddr_t arg, int flag);
1419 static void sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1420 static int sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag);
1421 static int sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag);
1422 static int sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1423 static int sd_write_label(dev_t dev);
1424 static int sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl);
1425 static void sd_clear_vtoc(struct sd_lun *un);
1426 static void sd_clear_efi(struct sd_lun *un);
1427 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1428 static int sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag);
1429 static int sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag);
1430 static void sd_setup_default_geometry(struct sd_lun *un);
1431 #if defined(__i386) || defined(__amd64)
1432 static int sd_update_fdisk_and_vtoc(struct sd_lun *un);
1433 #endif
1434 
1435 /*
1436  * Multi-host Ioctl Prototypes
1437  */
1438 static int sd_check_mhd(dev_t dev, int interval);
1439 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1440 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1441 static char *sd_sname(uchar_t status);
1442 static void sd_mhd_resvd_recover(void *arg);
1443 static void sd_resv_reclaim_thread();
1444 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1445 static int sd_reserve_release(dev_t dev, int cmd);
1446 static void sd_rmv_resv_reclaim_req(dev_t dev);
1447 static void sd_mhd_reset_notify_cb(caddr_t arg);
1448 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1449 	mhioc_inkeys_t *usrp, int flag);
1450 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1451 	mhioc_inresvs_t *usrp, int flag);
1452 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1453 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1454 static int sd_mhdioc_release(dev_t dev);
1455 static int sd_mhdioc_register_devid(dev_t dev);
1456 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1457 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1458 
1459 /*
1460  * SCSI removable prototypes
1461  */
1462 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1463 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1464 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1465 static int sr_pause_resume(dev_t dev, int mode);
1466 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1467 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1468 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1469 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1470 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1471 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1472 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1473 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1474 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1475 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1476 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1477 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1478 static int sr_eject(dev_t dev);
1479 static void sr_ejected(register struct sd_lun *un);
1480 static int sr_check_wp(dev_t dev);
1481 static int sd_check_media(dev_t dev, enum dkio_state state);
1482 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1483 static void sd_delayed_cv_broadcast(void *arg);
1484 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1485 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1486 
1487 static int sd_log_page_supported(struct sd_lun *un, int log_page);
1488 
1489 /*
1490  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1491  */
1492 static void sd_check_for_writable_cd(struct sd_lun *un);
1493 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1494 static void sd_wm_cache_destructor(void *wm, void *un);
1495 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1496 	daddr_t endb, ushort_t typ);
1497 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1498 	daddr_t endb);
1499 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1500 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1501 static void sd_read_modify_write_task(void * arg);
1502 static int
1503 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1504 	struct buf **bpp);
1505 
1506 
1507 /*
1508  * Function prototypes for failfast support.
1509  */
1510 static void sd_failfast_flushq(struct sd_lun *un);
1511 static int sd_failfast_flushq_callback(struct buf *bp);
1512 
1513 /*
1514  * Function prototypes to check for lsi devices
1515  */
1516 static void sd_is_lsi(struct sd_lun *un);
1517 
1518 /*
1519  * Function prototypes for x86 support
1520  */
1521 #if defined(__i386) || defined(__amd64)
1522 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1523 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1524 #endif
1525 
1526 /*
1527  * Constants for failfast support:
1528  *
1529  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1530  * failfast processing being performed.
1531  *
1532  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1533  * failfast processing on all bufs with B_FAILFAST set.
1534  */
1535 
1536 #define	SD_FAILFAST_INACTIVE		0
1537 #define	SD_FAILFAST_ACTIVE		1
1538 
1539 /*
1540  * Bitmask to control behavior of buf(9S) flushes when a transition to
1541  * the failfast state occurs. Optional bits include:
1542  *
1543  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1544  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1545  * be flushed.
1546  *
1547  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1548  * driver, in addition to the regular wait queue. This includes the xbuf
1549  * queues. When clear, only the driver's wait queue will be flushed.
1550  */
1551 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1552 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1553 
1554 /*
1555  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1556  * to flush all queues within the driver.
1557  */
1558 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1559 
1560 
1561 /*
1562  * SD Testing Fault Injection
1563  */
1564 #ifdef SD_FAULT_INJECTION
1565 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1566 static void sd_faultinjection(struct scsi_pkt *pktp);
1567 static void sd_injection_log(char *buf, struct sd_lun *un);
1568 #endif
1569 
1570 /*
1571  * Device driver ops vector
1572  */
1573 static struct cb_ops sd_cb_ops = {
1574 	sdopen,			/* open */
1575 	sdclose,		/* close */
1576 	sdstrategy,		/* strategy */
1577 	nodev,			/* print */
1578 	sddump,			/* dump */
1579 	sdread,			/* read */
1580 	sdwrite,		/* write */
1581 	sdioctl,		/* ioctl */
1582 	nodev,			/* devmap */
1583 	nodev,			/* mmap */
1584 	nodev,			/* segmap */
1585 	nochpoll,		/* poll */
1586 	sd_prop_op,		/* cb_prop_op */
1587 	0,			/* streamtab  */
1588 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1589 	CB_REV,			/* cb_rev */
1590 	sdaread, 		/* async I/O read entry point */
1591 	sdawrite		/* async I/O write entry point */
1592 };
1593 
1594 static struct dev_ops sd_ops = {
1595 	DEVO_REV,		/* devo_rev, */
1596 	0,			/* refcnt  */
1597 	sdinfo,			/* info */
1598 	nulldev,		/* identify */
1599 	sdprobe,		/* probe */
1600 	sdattach,		/* attach */
1601 	sddetach,		/* detach */
1602 	nodev,			/* reset */
1603 	&sd_cb_ops,		/* driver operations */
1604 	NULL,			/* bus operations */
1605 	sdpower			/* power */
1606 };
1607 
1608 
1609 /*
1610  * This is the loadable module wrapper.
1611  */
1612 #include <sys/modctl.h>
1613 
1614 static struct modldrv modldrv = {
1615 	&mod_driverops,		/* Type of module. This one is a driver */
1616 	SD_MODULE_NAME,		/* Module name. */
1617 	&sd_ops			/* driver ops */
1618 };
1619 
1620 
1621 static struct modlinkage modlinkage = {
1622 	MODREV_1,
1623 	&modldrv,
1624 	NULL
1625 };
1626 
1627 
1628 static struct scsi_asq_key_strings sd_additional_codes[] = {
1629 	0x81, 0, "Logical Unit is Reserved",
1630 	0x85, 0, "Audio Address Not Valid",
1631 	0xb6, 0, "Media Load Mechanism Failed",
1632 	0xB9, 0, "Audio Play Operation Aborted",
1633 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1634 	0x53, 2, "Medium removal prevented",
1635 	0x6f, 0, "Authentication failed during key exchange",
1636 	0x6f, 1, "Key not present",
1637 	0x6f, 2, "Key not established",
1638 	0x6f, 3, "Read without proper authentication",
1639 	0x6f, 4, "Mismatched region to this logical unit",
1640 	0x6f, 5, "Region reset count error",
1641 	0xffff, 0x0, NULL
1642 };
1643 
1644 
1645 /*
1646  * Struct for passing printing information for sense data messages
1647  */
1648 struct sd_sense_info {
1649 	int	ssi_severity;
1650 	int	ssi_pfa_flag;
1651 };
1652 
1653 /*
1654  * Table of function pointers for iostart-side routines. Seperate "chains"
1655  * of layered function calls are formed by placing the function pointers
1656  * sequentially in the desired order. Functions are called according to an
1657  * incrementing table index ordering. The last function in each chain must
1658  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1659  * in the sd_iodone_chain[] array.
1660  *
1661  * Note: It may seem more natural to organize both the iostart and iodone
1662  * functions together, into an array of structures (or some similar
1663  * organization) with a common index, rather than two seperate arrays which
1664  * must be maintained in synchronization. The purpose of this division is
1665  * to achiece improved performance: individual arrays allows for more
1666  * effective cache line utilization on certain platforms.
1667  */
1668 
1669 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1670 
1671 
1672 static sd_chain_t sd_iostart_chain[] = {
1673 
1674 	/* Chain for buf IO for disk drive targets (PM enabled) */
1675 	sd_mapblockaddr_iostart,	/* Index: 0 */
1676 	sd_pm_iostart,			/* Index: 1 */
1677 	sd_core_iostart,		/* Index: 2 */
1678 
1679 	/* Chain for buf IO for disk drive targets (PM disabled) */
1680 	sd_mapblockaddr_iostart,	/* Index: 3 */
1681 	sd_core_iostart,		/* Index: 4 */
1682 
1683 	/* Chain for buf IO for removable-media targets (PM enabled) */
1684 	sd_mapblockaddr_iostart,	/* Index: 5 */
1685 	sd_mapblocksize_iostart,	/* Index: 6 */
1686 	sd_pm_iostart,			/* Index: 7 */
1687 	sd_core_iostart,		/* Index: 8 */
1688 
1689 	/* Chain for buf IO for removable-media targets (PM disabled) */
1690 	sd_mapblockaddr_iostart,	/* Index: 9 */
1691 	sd_mapblocksize_iostart,	/* Index: 10 */
1692 	sd_core_iostart,		/* Index: 11 */
1693 
1694 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1695 	sd_mapblockaddr_iostart,	/* Index: 12 */
1696 	sd_checksum_iostart,		/* Index: 13 */
1697 	sd_pm_iostart,			/* Index: 14 */
1698 	sd_core_iostart,		/* Index: 15 */
1699 
1700 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1701 	sd_mapblockaddr_iostart,	/* Index: 16 */
1702 	sd_checksum_iostart,		/* Index: 17 */
1703 	sd_core_iostart,		/* Index: 18 */
1704 
1705 	/* Chain for USCSI commands (all targets) */
1706 	sd_pm_iostart,			/* Index: 19 */
1707 	sd_core_iostart,		/* Index: 20 */
1708 
1709 	/* Chain for checksumming USCSI commands (all targets) */
1710 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1711 	sd_pm_iostart,			/* Index: 22 */
1712 	sd_core_iostart,		/* Index: 23 */
1713 
1714 	/* Chain for "direct" USCSI commands (all targets) */
1715 	sd_core_iostart,		/* Index: 24 */
1716 
1717 	/* Chain for "direct priority" USCSI commands (all targets) */
1718 	sd_core_iostart,		/* Index: 25 */
1719 };
1720 
1721 /*
1722  * Macros to locate the first function of each iostart chain in the
1723  * sd_iostart_chain[] array. These are located by the index in the array.
1724  */
1725 #define	SD_CHAIN_DISK_IOSTART			0
1726 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1727 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1728 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1729 #define	SD_CHAIN_CHKSUM_IOSTART			12
1730 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1731 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1732 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1733 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1734 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1735 
1736 
1737 /*
1738  * Table of function pointers for the iodone-side routines for the driver-
1739  * internal layering mechanism.  The calling sequence for iodone routines
1740  * uses a decrementing table index, so the last routine called in a chain
1741  * must be at the lowest array index location for that chain.  The last
1742  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1743  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1744  * of the functions in an iodone side chain must correspond to the ordering
1745  * of the iostart routines for that chain.  Note that there is no iodone
1746  * side routine that corresponds to sd_core_iostart(), so there is no
1747  * entry in the table for this.
1748  */
1749 
1750 static sd_chain_t sd_iodone_chain[] = {
1751 
1752 	/* Chain for buf IO for disk drive targets (PM enabled) */
1753 	sd_buf_iodone,			/* Index: 0 */
1754 	sd_mapblockaddr_iodone,		/* Index: 1 */
1755 	sd_pm_iodone,			/* Index: 2 */
1756 
1757 	/* Chain for buf IO for disk drive targets (PM disabled) */
1758 	sd_buf_iodone,			/* Index: 3 */
1759 	sd_mapblockaddr_iodone,		/* Index: 4 */
1760 
1761 	/* Chain for buf IO for removable-media targets (PM enabled) */
1762 	sd_buf_iodone,			/* Index: 5 */
1763 	sd_mapblockaddr_iodone,		/* Index: 6 */
1764 	sd_mapblocksize_iodone,		/* Index: 7 */
1765 	sd_pm_iodone,			/* Index: 8 */
1766 
1767 	/* Chain for buf IO for removable-media targets (PM disabled) */
1768 	sd_buf_iodone,			/* Index: 9 */
1769 	sd_mapblockaddr_iodone,		/* Index: 10 */
1770 	sd_mapblocksize_iodone,		/* Index: 11 */
1771 
1772 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1773 	sd_buf_iodone,			/* Index: 12 */
1774 	sd_mapblockaddr_iodone,		/* Index: 13 */
1775 	sd_checksum_iodone,		/* Index: 14 */
1776 	sd_pm_iodone,			/* Index: 15 */
1777 
1778 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1779 	sd_buf_iodone,			/* Index: 16 */
1780 	sd_mapblockaddr_iodone,		/* Index: 17 */
1781 	sd_checksum_iodone,		/* Index: 18 */
1782 
1783 	/* Chain for USCSI commands (non-checksum targets) */
1784 	sd_uscsi_iodone,		/* Index: 19 */
1785 	sd_pm_iodone,			/* Index: 20 */
1786 
1787 	/* Chain for USCSI commands (checksum targets) */
1788 	sd_uscsi_iodone,		/* Index: 21 */
1789 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1790 	sd_pm_iodone,			/* Index: 22 */
1791 
1792 	/* Chain for "direct" USCSI commands (all targets) */
1793 	sd_uscsi_iodone,		/* Index: 24 */
1794 
1795 	/* Chain for "direct priority" USCSI commands (all targets) */
1796 	sd_uscsi_iodone,		/* Index: 25 */
1797 };
1798 
1799 
1800 /*
1801  * Macros to locate the "first" function in the sd_iodone_chain[] array for
1802  * each iodone-side chain. These are located by the array index, but as the
1803  * iodone side functions are called in a decrementing-index order, the
1804  * highest index number in each chain must be specified (as these correspond
1805  * to the first function in the iodone chain that will be called by the core
1806  * at IO completion time).
1807  */
1808 
1809 #define	SD_CHAIN_DISK_IODONE			2
1810 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
1811 #define	SD_CHAIN_RMMEDIA_IODONE			8
1812 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1813 #define	SD_CHAIN_CHKSUM_IODONE			15
1814 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1815 #define	SD_CHAIN_USCSI_CMD_IODONE		20
1816 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1817 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
1818 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1819 
1820 
1821 
1822 
1823 /*
1824  * Array to map a layering chain index to the appropriate initpkt routine.
1825  * The redundant entries are present so that the index used for accessing
1826  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1827  * with this table as well.
1828  */
1829 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1830 
1831 static sd_initpkt_t	sd_initpkt_map[] = {
1832 
1833 	/* Chain for buf IO for disk drive targets (PM enabled) */
1834 	sd_initpkt_for_buf,		/* Index: 0 */
1835 	sd_initpkt_for_buf,		/* Index: 1 */
1836 	sd_initpkt_for_buf,		/* Index: 2 */
1837 
1838 	/* Chain for buf IO for disk drive targets (PM disabled) */
1839 	sd_initpkt_for_buf,		/* Index: 3 */
1840 	sd_initpkt_for_buf,		/* Index: 4 */
1841 
1842 	/* Chain for buf IO for removable-media targets (PM enabled) */
1843 	sd_initpkt_for_buf,		/* Index: 5 */
1844 	sd_initpkt_for_buf,		/* Index: 6 */
1845 	sd_initpkt_for_buf,		/* Index: 7 */
1846 	sd_initpkt_for_buf,		/* Index: 8 */
1847 
1848 	/* Chain for buf IO for removable-media targets (PM disabled) */
1849 	sd_initpkt_for_buf,		/* Index: 9 */
1850 	sd_initpkt_for_buf,		/* Index: 10 */
1851 	sd_initpkt_for_buf,		/* Index: 11 */
1852 
1853 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1854 	sd_initpkt_for_buf,		/* Index: 12 */
1855 	sd_initpkt_for_buf,		/* Index: 13 */
1856 	sd_initpkt_for_buf,		/* Index: 14 */
1857 	sd_initpkt_for_buf,		/* Index: 15 */
1858 
1859 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1860 	sd_initpkt_for_buf,		/* Index: 16 */
1861 	sd_initpkt_for_buf,		/* Index: 17 */
1862 	sd_initpkt_for_buf,		/* Index: 18 */
1863 
1864 	/* Chain for USCSI commands (non-checksum targets) */
1865 	sd_initpkt_for_uscsi,		/* Index: 19 */
1866 	sd_initpkt_for_uscsi,		/* Index: 20 */
1867 
1868 	/* Chain for USCSI commands (checksum targets) */
1869 	sd_initpkt_for_uscsi,		/* Index: 21 */
1870 	sd_initpkt_for_uscsi,		/* Index: 22 */
1871 	sd_initpkt_for_uscsi,		/* Index: 22 */
1872 
1873 	/* Chain for "direct" USCSI commands (all targets) */
1874 	sd_initpkt_for_uscsi,		/* Index: 24 */
1875 
1876 	/* Chain for "direct priority" USCSI commands (all targets) */
1877 	sd_initpkt_for_uscsi,		/* Index: 25 */
1878 
1879 };
1880 
1881 
1882 /*
1883  * Array to map a layering chain index to the appropriate destroypktpkt routine.
1884  * The redundant entries are present so that the index used for accessing
1885  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1886  * with this table as well.
1887  */
1888 typedef void (*sd_destroypkt_t)(struct buf *);
1889 
1890 static sd_destroypkt_t	sd_destroypkt_map[] = {
1891 
1892 	/* Chain for buf IO for disk drive targets (PM enabled) */
1893 	sd_destroypkt_for_buf,		/* Index: 0 */
1894 	sd_destroypkt_for_buf,		/* Index: 1 */
1895 	sd_destroypkt_for_buf,		/* Index: 2 */
1896 
1897 	/* Chain for buf IO for disk drive targets (PM disabled) */
1898 	sd_destroypkt_for_buf,		/* Index: 3 */
1899 	sd_destroypkt_for_buf,		/* Index: 4 */
1900 
1901 	/* Chain for buf IO for removable-media targets (PM enabled) */
1902 	sd_destroypkt_for_buf,		/* Index: 5 */
1903 	sd_destroypkt_for_buf,		/* Index: 6 */
1904 	sd_destroypkt_for_buf,		/* Index: 7 */
1905 	sd_destroypkt_for_buf,		/* Index: 8 */
1906 
1907 	/* Chain for buf IO for removable-media targets (PM disabled) */
1908 	sd_destroypkt_for_buf,		/* Index: 9 */
1909 	sd_destroypkt_for_buf,		/* Index: 10 */
1910 	sd_destroypkt_for_buf,		/* Index: 11 */
1911 
1912 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1913 	sd_destroypkt_for_buf,		/* Index: 12 */
1914 	sd_destroypkt_for_buf,		/* Index: 13 */
1915 	sd_destroypkt_for_buf,		/* Index: 14 */
1916 	sd_destroypkt_for_buf,		/* Index: 15 */
1917 
1918 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1919 	sd_destroypkt_for_buf,		/* Index: 16 */
1920 	sd_destroypkt_for_buf,		/* Index: 17 */
1921 	sd_destroypkt_for_buf,		/* Index: 18 */
1922 
1923 	/* Chain for USCSI commands (non-checksum targets) */
1924 	sd_destroypkt_for_uscsi,	/* Index: 19 */
1925 	sd_destroypkt_for_uscsi,	/* Index: 20 */
1926 
1927 	/* Chain for USCSI commands (checksum targets) */
1928 	sd_destroypkt_for_uscsi,	/* Index: 21 */
1929 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1930 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1931 
1932 	/* Chain for "direct" USCSI commands (all targets) */
1933 	sd_destroypkt_for_uscsi,	/* Index: 24 */
1934 
1935 	/* Chain for "direct priority" USCSI commands (all targets) */
1936 	sd_destroypkt_for_uscsi,	/* Index: 25 */
1937 
1938 };
1939 
1940 
1941 
1942 /*
1943  * Array to map a layering chain index to the appropriate chain "type".
1944  * The chain type indicates a specific property/usage of the chain.
1945  * The redundant entries are present so that the index used for accessing
1946  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1947  * with this table as well.
1948  */
1949 
1950 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
1951 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
1952 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
1953 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
1954 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
1955 						/* (for error recovery) */
1956 
1957 static int sd_chain_type_map[] = {
1958 
1959 	/* Chain for buf IO for disk drive targets (PM enabled) */
1960 	SD_CHAIN_BUFIO,			/* Index: 0 */
1961 	SD_CHAIN_BUFIO,			/* Index: 1 */
1962 	SD_CHAIN_BUFIO,			/* Index: 2 */
1963 
1964 	/* Chain for buf IO for disk drive targets (PM disabled) */
1965 	SD_CHAIN_BUFIO,			/* Index: 3 */
1966 	SD_CHAIN_BUFIO,			/* Index: 4 */
1967 
1968 	/* Chain for buf IO for removable-media targets (PM enabled) */
1969 	SD_CHAIN_BUFIO,			/* Index: 5 */
1970 	SD_CHAIN_BUFIO,			/* Index: 6 */
1971 	SD_CHAIN_BUFIO,			/* Index: 7 */
1972 	SD_CHAIN_BUFIO,			/* Index: 8 */
1973 
1974 	/* Chain for buf IO for removable-media targets (PM disabled) */
1975 	SD_CHAIN_BUFIO,			/* Index: 9 */
1976 	SD_CHAIN_BUFIO,			/* Index: 10 */
1977 	SD_CHAIN_BUFIO,			/* Index: 11 */
1978 
1979 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1980 	SD_CHAIN_BUFIO,			/* Index: 12 */
1981 	SD_CHAIN_BUFIO,			/* Index: 13 */
1982 	SD_CHAIN_BUFIO,			/* Index: 14 */
1983 	SD_CHAIN_BUFIO,			/* Index: 15 */
1984 
1985 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1986 	SD_CHAIN_BUFIO,			/* Index: 16 */
1987 	SD_CHAIN_BUFIO,			/* Index: 17 */
1988 	SD_CHAIN_BUFIO,			/* Index: 18 */
1989 
1990 	/* Chain for USCSI commands (non-checksum targets) */
1991 	SD_CHAIN_USCSI,			/* Index: 19 */
1992 	SD_CHAIN_USCSI,			/* Index: 20 */
1993 
1994 	/* Chain for USCSI commands (checksum targets) */
1995 	SD_CHAIN_USCSI,			/* Index: 21 */
1996 	SD_CHAIN_USCSI,			/* Index: 22 */
1997 	SD_CHAIN_USCSI,			/* Index: 22 */
1998 
1999 	/* Chain for "direct" USCSI commands (all targets) */
2000 	SD_CHAIN_DIRECT,		/* Index: 24 */
2001 
2002 	/* Chain for "direct priority" USCSI commands (all targets) */
2003 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2004 };
2005 
2006 
2007 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2008 #define	SD_IS_BUFIO(xp)			\
2009 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2010 
2011 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2012 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2013 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2014 
2015 
2016 
2017 /*
2018  * Struct, array, and macros to map a specific chain to the appropriate
2019  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2020  *
2021  * The sd_chain_index_map[] array is used at attach time to set the various
2022  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2023  * chain to be used with the instance. This allows different instances to use
2024  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2025  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2026  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2027  * dynamically & without the use of locking; and (2) a layer may update the
2028  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2029  * to allow for deferred processing of an IO within the same chain from a
2030  * different execution context.
2031  */
2032 
2033 struct sd_chain_index {
2034 	int	sci_iostart_index;
2035 	int	sci_iodone_index;
2036 };
2037 
2038 static struct sd_chain_index	sd_chain_index_map[] = {
2039 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2040 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2041 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2042 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2043 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2044 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2045 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2046 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2047 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2048 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2049 };
2050 
2051 
2052 /*
2053  * The following are indexes into the sd_chain_index_map[] array.
2054  */
2055 
2056 /* un->un_buf_chain_type must be set to one of these */
2057 #define	SD_CHAIN_INFO_DISK		0
2058 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2059 #define	SD_CHAIN_INFO_RMMEDIA		2
2060 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2061 #define	SD_CHAIN_INFO_CHKSUM		4
2062 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2063 
2064 /* un->un_uscsi_chain_type must be set to one of these */
2065 #define	SD_CHAIN_INFO_USCSI_CMD		6
2066 /* USCSI with PM disabled is the same as DIRECT */
2067 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2068 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2069 
2070 /* un->un_direct_chain_type must be set to one of these */
2071 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2072 
2073 /* un->un_priority_chain_type must be set to one of these */
2074 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2075 
2076 /* size for devid inquiries */
2077 #define	MAX_INQUIRY_SIZE		0xF0
2078 
2079 /*
2080  * Macros used by functions to pass a given buf(9S) struct along to the
2081  * next function in the layering chain for further processing.
2082  *
2083  * In the following macros, passing more than three arguments to the called
2084  * routines causes the optimizer for the SPARC compiler to stop doing tail
2085  * call elimination which results in significant performance degradation.
2086  */
2087 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2088 	((*(sd_iostart_chain[index]))(index, un, bp))
2089 
2090 #define	SD_BEGIN_IODONE(index, un, bp)	\
2091 	((*(sd_iodone_chain[index]))(index, un, bp))
2092 
2093 #define	SD_NEXT_IOSTART(index, un, bp)				\
2094 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2095 
2096 #define	SD_NEXT_IODONE(index, un, bp)				\
2097 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2098 
2099 
2100 /*
2101  *    Function: _init
2102  *
2103  * Description: This is the driver _init(9E) entry point.
2104  *
2105  * Return Code: Returns the value from mod_install(9F) or
2106  *		ddi_soft_state_init(9F) as appropriate.
2107  *
2108  *     Context: Called when driver module loaded.
2109  */
2110 
2111 int
2112 _init(void)
2113 {
2114 	int	err;
2115 
2116 	/* establish driver name from module name */
2117 	sd_label = mod_modname(&modlinkage);
2118 
2119 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2120 		SD_MAXUNIT);
2121 
2122 	if (err != 0) {
2123 		return (err);
2124 	}
2125 
2126 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2127 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2128 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2129 
2130 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2131 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2132 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2133 
2134 	/*
2135 	 * it's ok to init here even for fibre device
2136 	 */
2137 	sd_scsi_probe_cache_init();
2138 
2139 	/*
2140 	 * Creating taskq before mod_install ensures that all callers (threads)
2141 	 * that enter the module after a successfull mod_install encounter
2142 	 * a valid taskq.
2143 	 */
2144 	sd_taskq_create();
2145 
2146 	err = mod_install(&modlinkage);
2147 	if (err != 0) {
2148 		/* delete taskq if install fails */
2149 		sd_taskq_delete();
2150 
2151 		mutex_destroy(&sd_detach_mutex);
2152 		mutex_destroy(&sd_log_mutex);
2153 		mutex_destroy(&sd_label_mutex);
2154 
2155 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2156 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2157 		cv_destroy(&sd_tr.srq_inprocess_cv);
2158 
2159 		sd_scsi_probe_cache_fini();
2160 
2161 		ddi_soft_state_fini(&sd_state);
2162 		return (err);
2163 	}
2164 
2165 	return (err);
2166 }
2167 
2168 
2169 /*
2170  *    Function: _fini
2171  *
2172  * Description: This is the driver _fini(9E) entry point.
2173  *
2174  * Return Code: Returns the value from mod_remove(9F)
2175  *
2176  *     Context: Called when driver module is unloaded.
2177  */
2178 
2179 int
2180 _fini(void)
2181 {
2182 	int err;
2183 
2184 	if ((err = mod_remove(&modlinkage)) != 0) {
2185 		return (err);
2186 	}
2187 
2188 	sd_taskq_delete();
2189 
2190 	mutex_destroy(&sd_detach_mutex);
2191 	mutex_destroy(&sd_log_mutex);
2192 	mutex_destroy(&sd_label_mutex);
2193 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2194 
2195 	sd_scsi_probe_cache_fini();
2196 
2197 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2198 	cv_destroy(&sd_tr.srq_inprocess_cv);
2199 
2200 	ddi_soft_state_fini(&sd_state);
2201 
2202 	return (err);
2203 }
2204 
2205 
2206 /*
2207  *    Function: _info
2208  *
2209  * Description: This is the driver _info(9E) entry point.
2210  *
2211  *   Arguments: modinfop - pointer to the driver modinfo structure
2212  *
2213  * Return Code: Returns the value from mod_info(9F).
2214  *
2215  *     Context: Kernel thread context
2216  */
2217 
2218 int
2219 _info(struct modinfo *modinfop)
2220 {
2221 	return (mod_info(&modlinkage, modinfop));
2222 }
2223 
2224 
2225 /*
2226  * The following routines implement the driver message logging facility.
2227  * They provide component- and level- based debug output filtering.
2228  * Output may also be restricted to messages for a single instance by
2229  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2230  * to NULL, then messages for all instances are printed.
2231  *
2232  * These routines have been cloned from each other due to the language
2233  * constraints of macros and variable argument list processing.
2234  */
2235 
2236 
2237 /*
2238  *    Function: sd_log_err
2239  *
2240  * Description: This routine is called by the SD_ERROR macro for debug
2241  *		logging of error conditions.
2242  *
2243  *   Arguments: comp - driver component being logged
2244  *		dev  - pointer to driver info structure
2245  *		fmt  - error string and format to be logged
2246  */
2247 
2248 static void
2249 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2250 {
2251 	va_list		ap;
2252 	dev_info_t	*dev;
2253 
2254 	ASSERT(un != NULL);
2255 	dev = SD_DEVINFO(un);
2256 	ASSERT(dev != NULL);
2257 
2258 	/*
2259 	 * Filter messages based on the global component and level masks.
2260 	 * Also print if un matches the value of sd_debug_un, or if
2261 	 * sd_debug_un is set to NULL.
2262 	 */
2263 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2264 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2265 		mutex_enter(&sd_log_mutex);
2266 		va_start(ap, fmt);
2267 		(void) vsprintf(sd_log_buf, fmt, ap);
2268 		va_end(ap);
2269 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2270 		mutex_exit(&sd_log_mutex);
2271 	}
2272 #ifdef SD_FAULT_INJECTION
2273 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2274 	if (un->sd_injection_mask & comp) {
2275 		mutex_enter(&sd_log_mutex);
2276 		va_start(ap, fmt);
2277 		(void) vsprintf(sd_log_buf, fmt, ap);
2278 		va_end(ap);
2279 		sd_injection_log(sd_log_buf, un);
2280 		mutex_exit(&sd_log_mutex);
2281 	}
2282 #endif
2283 }
2284 
2285 
2286 /*
2287  *    Function: sd_log_info
2288  *
2289  * Description: This routine is called by the SD_INFO macro for debug
2290  *		logging of general purpose informational conditions.
2291  *
2292  *   Arguments: comp - driver component being logged
2293  *		dev  - pointer to driver info structure
2294  *		fmt  - info string and format to be logged
2295  */
2296 
2297 static void
2298 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2299 {
2300 	va_list		ap;
2301 	dev_info_t	*dev;
2302 
2303 	ASSERT(un != NULL);
2304 	dev = SD_DEVINFO(un);
2305 	ASSERT(dev != NULL);
2306 
2307 	/*
2308 	 * Filter messages based on the global component and level masks.
2309 	 * Also print if un matches the value of sd_debug_un, or if
2310 	 * sd_debug_un is set to NULL.
2311 	 */
2312 	if ((sd_component_mask & component) &&
2313 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2314 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2315 		mutex_enter(&sd_log_mutex);
2316 		va_start(ap, fmt);
2317 		(void) vsprintf(sd_log_buf, fmt, ap);
2318 		va_end(ap);
2319 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2320 		mutex_exit(&sd_log_mutex);
2321 	}
2322 #ifdef SD_FAULT_INJECTION
2323 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2324 	if (un->sd_injection_mask & component) {
2325 		mutex_enter(&sd_log_mutex);
2326 		va_start(ap, fmt);
2327 		(void) vsprintf(sd_log_buf, fmt, ap);
2328 		va_end(ap);
2329 		sd_injection_log(sd_log_buf, un);
2330 		mutex_exit(&sd_log_mutex);
2331 	}
2332 #endif
2333 }
2334 
2335 
2336 /*
2337  *    Function: sd_log_trace
2338  *
2339  * Description: This routine is called by the SD_TRACE macro for debug
2340  *		logging of trace conditions (i.e. function entry/exit).
2341  *
2342  *   Arguments: comp - driver component being logged
2343  *		dev  - pointer to driver info structure
2344  *		fmt  - trace string and format to be logged
2345  */
2346 
2347 static void
2348 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2349 {
2350 	va_list		ap;
2351 	dev_info_t	*dev;
2352 
2353 	ASSERT(un != NULL);
2354 	dev = SD_DEVINFO(un);
2355 	ASSERT(dev != NULL);
2356 
2357 	/*
2358 	 * Filter messages based on the global component and level masks.
2359 	 * Also print if un matches the value of sd_debug_un, or if
2360 	 * sd_debug_un is set to NULL.
2361 	 */
2362 	if ((sd_component_mask & component) &&
2363 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2364 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2365 		mutex_enter(&sd_log_mutex);
2366 		va_start(ap, fmt);
2367 		(void) vsprintf(sd_log_buf, fmt, ap);
2368 		va_end(ap);
2369 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2370 		mutex_exit(&sd_log_mutex);
2371 	}
2372 #ifdef SD_FAULT_INJECTION
2373 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2374 	if (un->sd_injection_mask & component) {
2375 		mutex_enter(&sd_log_mutex);
2376 		va_start(ap, fmt);
2377 		(void) vsprintf(sd_log_buf, fmt, ap);
2378 		va_end(ap);
2379 		sd_injection_log(sd_log_buf, un);
2380 		mutex_exit(&sd_log_mutex);
2381 	}
2382 #endif
2383 }
2384 
2385 
2386 /*
2387  *    Function: sdprobe
2388  *
2389  * Description: This is the driver probe(9e) entry point function.
2390  *
2391  *   Arguments: devi - opaque device info handle
2392  *
2393  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2394  *              DDI_PROBE_FAILURE: If the probe failed.
2395  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2396  *				   but may be present in the future.
2397  */
2398 
2399 static int
2400 sdprobe(dev_info_t *devi)
2401 {
2402 	struct scsi_device	*devp;
2403 	int			rval;
2404 	int			instance;
2405 
2406 	/*
2407 	 * if it wasn't for pln, sdprobe could actually be nulldev
2408 	 * in the "__fibre" case.
2409 	 */
2410 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2411 		return (DDI_PROBE_DONTCARE);
2412 	}
2413 
2414 	devp = ddi_get_driver_private(devi);
2415 
2416 	if (devp == NULL) {
2417 		/* Ooops... nexus driver is mis-configured... */
2418 		return (DDI_PROBE_FAILURE);
2419 	}
2420 
2421 	instance = ddi_get_instance(devi);
2422 
2423 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2424 		return (DDI_PROBE_PARTIAL);
2425 	}
2426 
2427 	/*
2428 	 * Call the SCSA utility probe routine to see if we actually
2429 	 * have a target at this SCSI nexus.
2430 	 */
2431 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2432 	case SCSIPROBE_EXISTS:
2433 		switch (devp->sd_inq->inq_dtype) {
2434 		case DTYPE_DIRECT:
2435 			rval = DDI_PROBE_SUCCESS;
2436 			break;
2437 		case DTYPE_RODIRECT:
2438 			/* CDs etc. Can be removable media */
2439 			rval = DDI_PROBE_SUCCESS;
2440 			break;
2441 		case DTYPE_OPTICAL:
2442 			/*
2443 			 * Rewritable optical driver HP115AA
2444 			 * Can also be removable media
2445 			 */
2446 
2447 			/*
2448 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2449 			 * pre solaris 9 sparc sd behavior is required
2450 			 *
2451 			 * If first time through and sd_dtype_optical_bind
2452 			 * has not been set in /etc/system check properties
2453 			 */
2454 
2455 			if (sd_dtype_optical_bind  < 0) {
2456 			    sd_dtype_optical_bind = ddi_prop_get_int
2457 				(DDI_DEV_T_ANY,	devi,	0,
2458 				"optical-device-bind",	1);
2459 			}
2460 
2461 			if (sd_dtype_optical_bind == 0) {
2462 				rval = DDI_PROBE_FAILURE;
2463 			} else {
2464 				rval = DDI_PROBE_SUCCESS;
2465 			}
2466 			break;
2467 
2468 		case DTYPE_NOTPRESENT:
2469 		default:
2470 			rval = DDI_PROBE_FAILURE;
2471 			break;
2472 		}
2473 		break;
2474 	default:
2475 		rval = DDI_PROBE_PARTIAL;
2476 		break;
2477 	}
2478 
2479 	/*
2480 	 * This routine checks for resource allocation prior to freeing,
2481 	 * so it will take care of the "smart probing" case where a
2482 	 * scsi_probe() may or may not have been issued and will *not*
2483 	 * free previously-freed resources.
2484 	 */
2485 	scsi_unprobe(devp);
2486 	return (rval);
2487 }
2488 
2489 
2490 /*
2491  *    Function: sdinfo
2492  *
2493  * Description: This is the driver getinfo(9e) entry point function.
2494  * 		Given the device number, return the devinfo pointer from
2495  *		the scsi_device structure or the instance number
2496  *		associated with the dev_t.
2497  *
2498  *   Arguments: dip     - pointer to device info structure
2499  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2500  *			  DDI_INFO_DEVT2INSTANCE)
2501  *		arg     - driver dev_t
2502  *		resultp - user buffer for request response
2503  *
2504  * Return Code: DDI_SUCCESS
2505  *              DDI_FAILURE
2506  */
2507 /* ARGSUSED */
2508 static int
2509 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2510 {
2511 	struct sd_lun	*un;
2512 	dev_t		dev;
2513 	int		instance;
2514 	int		error;
2515 
2516 	switch (infocmd) {
2517 	case DDI_INFO_DEVT2DEVINFO:
2518 		dev = (dev_t)arg;
2519 		instance = SDUNIT(dev);
2520 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2521 			return (DDI_FAILURE);
2522 		}
2523 		*result = (void *) SD_DEVINFO(un);
2524 		error = DDI_SUCCESS;
2525 		break;
2526 	case DDI_INFO_DEVT2INSTANCE:
2527 		dev = (dev_t)arg;
2528 		instance = SDUNIT(dev);
2529 		*result = (void *)(uintptr_t)instance;
2530 		error = DDI_SUCCESS;
2531 		break;
2532 	default:
2533 		error = DDI_FAILURE;
2534 	}
2535 	return (error);
2536 }
2537 
2538 /*
2539  *    Function: sd_prop_op
2540  *
2541  * Description: This is the driver prop_op(9e) entry point function.
2542  *		Return the number of blocks for the partition in question
2543  *		or forward the request to the property facilities.
2544  *
2545  *   Arguments: dev       - device number
2546  *		dip       - pointer to device info structure
2547  *		prop_op   - property operator
2548  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2549  *		name      - pointer to property name
2550  *		valuep    - pointer or address of the user buffer
2551  *		lengthp   - property length
2552  *
2553  * Return Code: DDI_PROP_SUCCESS
2554  *              DDI_PROP_NOT_FOUND
2555  *              DDI_PROP_UNDEFINED
2556  *              DDI_PROP_NO_MEMORY
2557  *              DDI_PROP_BUF_TOO_SMALL
2558  */
2559 
2560 static int
2561 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2562 	char *name, caddr_t valuep, int *lengthp)
2563 {
2564 	int		instance = ddi_get_instance(dip);
2565 	struct sd_lun	*un;
2566 	uint64_t	nblocks64;
2567 
2568 	/*
2569 	 * Our dynamic properties are all device specific and size oriented.
2570 	 * Requests issued under conditions where size is valid are passed
2571 	 * to ddi_prop_op_nblocks with the size information, otherwise the
2572 	 * request is passed to ddi_prop_op. Size depends on valid geometry.
2573 	 */
2574 	un = ddi_get_soft_state(sd_state, instance);
2575 	if ((dev == DDI_DEV_T_ANY) || (un == NULL) ||
2576 	    (un->un_f_geometry_is_valid == FALSE)) {
2577 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2578 		    name, valuep, lengthp));
2579 	} else {
2580 		/* get nblocks value */
2581 		ASSERT(!mutex_owned(SD_MUTEX(un)));
2582 		mutex_enter(SD_MUTEX(un));
2583 		nblocks64 = (ulong_t)un->un_map[SDPART(dev)].dkl_nblk;
2584 		mutex_exit(SD_MUTEX(un));
2585 
2586 		return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags,
2587 		    name, valuep, lengthp, nblocks64));
2588 	}
2589 }
2590 
2591 /*
2592  * The following functions are for smart probing:
2593  * sd_scsi_probe_cache_init()
2594  * sd_scsi_probe_cache_fini()
2595  * sd_scsi_clear_probe_cache()
2596  * sd_scsi_probe_with_cache()
2597  */
2598 
2599 /*
2600  *    Function: sd_scsi_probe_cache_init
2601  *
2602  * Description: Initializes the probe response cache mutex and head pointer.
2603  *
2604  *     Context: Kernel thread context
2605  */
2606 
2607 static void
2608 sd_scsi_probe_cache_init(void)
2609 {
2610 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2611 	sd_scsi_probe_cache_head = NULL;
2612 }
2613 
2614 
2615 /*
2616  *    Function: sd_scsi_probe_cache_fini
2617  *
2618  * Description: Frees all resources associated with the probe response cache.
2619  *
2620  *     Context: Kernel thread context
2621  */
2622 
2623 static void
2624 sd_scsi_probe_cache_fini(void)
2625 {
2626 	struct sd_scsi_probe_cache *cp;
2627 	struct sd_scsi_probe_cache *ncp;
2628 
2629 	/* Clean up our smart probing linked list */
2630 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2631 		ncp = cp->next;
2632 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2633 	}
2634 	sd_scsi_probe_cache_head = NULL;
2635 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2636 }
2637 
2638 
2639 /*
2640  *    Function: sd_scsi_clear_probe_cache
2641  *
2642  * Description: This routine clears the probe response cache. This is
2643  *		done when open() returns ENXIO so that when deferred
2644  *		attach is attempted (possibly after a device has been
2645  *		turned on) we will retry the probe. Since we don't know
2646  *		which target we failed to open, we just clear the
2647  *		entire cache.
2648  *
2649  *     Context: Kernel thread context
2650  */
2651 
2652 static void
2653 sd_scsi_clear_probe_cache(void)
2654 {
2655 	struct sd_scsi_probe_cache	*cp;
2656 	int				i;
2657 
2658 	mutex_enter(&sd_scsi_probe_cache_mutex);
2659 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2660 		/*
2661 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2662 		 * force probing to be performed the next time
2663 		 * sd_scsi_probe_with_cache is called.
2664 		 */
2665 		for (i = 0; i < NTARGETS_WIDE; i++) {
2666 			cp->cache[i] = SCSIPROBE_EXISTS;
2667 		}
2668 	}
2669 	mutex_exit(&sd_scsi_probe_cache_mutex);
2670 }
2671 
2672 
2673 /*
2674  *    Function: sd_scsi_probe_with_cache
2675  *
2676  * Description: This routine implements support for a scsi device probe
2677  *		with cache. The driver maintains a cache of the target
2678  *		responses to scsi probes. If we get no response from a
2679  *		target during a probe inquiry, we remember that, and we
2680  *		avoid additional calls to scsi_probe on non-zero LUNs
2681  *		on the same target until the cache is cleared. By doing
2682  *		so we avoid the 1/4 sec selection timeout for nonzero
2683  *		LUNs. lun0 of a target is always probed.
2684  *
2685  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2686  *              waitfunc - indicates what the allocator routines should
2687  *			   do when resources are not available. This value
2688  *			   is passed on to scsi_probe() when that routine
2689  *			   is called.
2690  *
2691  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2692  *		otherwise the value returned by scsi_probe(9F).
2693  *
2694  *     Context: Kernel thread context
2695  */
2696 
2697 static int
2698 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2699 {
2700 	struct sd_scsi_probe_cache	*cp;
2701 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2702 	int		lun, tgt;
2703 
2704 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2705 	    SCSI_ADDR_PROP_LUN, 0);
2706 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2707 	    SCSI_ADDR_PROP_TARGET, -1);
2708 
2709 	/* Make sure caching enabled and target in range */
2710 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2711 		/* do it the old way (no cache) */
2712 		return (scsi_probe(devp, waitfn));
2713 	}
2714 
2715 	mutex_enter(&sd_scsi_probe_cache_mutex);
2716 
2717 	/* Find the cache for this scsi bus instance */
2718 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2719 		if (cp->pdip == pdip) {
2720 			break;
2721 		}
2722 	}
2723 
2724 	/* If we can't find a cache for this pdip, create one */
2725 	if (cp == NULL) {
2726 		int i;
2727 
2728 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2729 		    KM_SLEEP);
2730 		cp->pdip = pdip;
2731 		cp->next = sd_scsi_probe_cache_head;
2732 		sd_scsi_probe_cache_head = cp;
2733 		for (i = 0; i < NTARGETS_WIDE; i++) {
2734 			cp->cache[i] = SCSIPROBE_EXISTS;
2735 		}
2736 	}
2737 
2738 	mutex_exit(&sd_scsi_probe_cache_mutex);
2739 
2740 	/* Recompute the cache for this target if LUN zero */
2741 	if (lun == 0) {
2742 		cp->cache[tgt] = SCSIPROBE_EXISTS;
2743 	}
2744 
2745 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2746 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2747 		return (SCSIPROBE_NORESP);
2748 	}
2749 
2750 	/* Do the actual probe; save & return the result */
2751 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2752 }
2753 
2754 
2755 /*
2756  *    Function: sd_spin_up_unit
2757  *
2758  * Description: Issues the following commands to spin-up the device:
2759  *		START STOP UNIT, and INQUIRY.
2760  *
2761  *   Arguments: un - driver soft state (unit) structure
2762  *
2763  * Return Code: 0 - success
2764  *		EIO - failure
2765  *		EACCES - reservation conflict
2766  *
2767  *     Context: Kernel thread context
2768  */
2769 
2770 static int
2771 sd_spin_up_unit(struct sd_lun *un)
2772 {
2773 	size_t	resid		= 0;
2774 	int	has_conflict	= FALSE;
2775 	uchar_t *bufaddr;
2776 
2777 	ASSERT(un != NULL);
2778 
2779 	/*
2780 	 * Send a throwaway START UNIT command.
2781 	 *
2782 	 * If we fail on this, we don't care presently what precisely
2783 	 * is wrong.  EMC's arrays will also fail this with a check
2784 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
2785 	 * we don't want to fail the attach because it may become
2786 	 * "active" later.
2787 	 */
2788 	if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START, SD_PATH_DIRECT)
2789 	    == EACCES)
2790 		has_conflict = TRUE;
2791 
2792 	/*
2793 	 * Send another INQUIRY command to the target. This is necessary for
2794 	 * non-removable media direct access devices because their INQUIRY data
2795 	 * may not be fully qualified until they are spun up (perhaps via the
2796 	 * START command above).  Note: This seems to be needed for some
2797 	 * legacy devices only.) The INQUIRY command should succeed even if a
2798 	 * Reservation Conflict is present.
2799 	 */
2800 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
2801 	if (sd_send_scsi_INQUIRY(un, bufaddr, SUN_INQSIZE, 0, 0, &resid) != 0) {
2802 		kmem_free(bufaddr, SUN_INQSIZE);
2803 		return (EIO);
2804 	}
2805 
2806 	/*
2807 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
2808 	 * Note that this routine does not return a failure here even if the
2809 	 * INQUIRY command did not return any data.  This is a legacy behavior.
2810 	 */
2811 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
2812 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
2813 	}
2814 
2815 	kmem_free(bufaddr, SUN_INQSIZE);
2816 
2817 	/* If we hit a reservation conflict above, tell the caller. */
2818 	if (has_conflict == TRUE) {
2819 		return (EACCES);
2820 	}
2821 
2822 	return (0);
2823 }
2824 
2825 #ifdef _LP64
2826 /*
2827  *    Function: sd_enable_descr_sense
2828  *
2829  * Description: This routine attempts to select descriptor sense format
2830  *		using the Control mode page.  Devices that support 64 bit
2831  *		LBAs (for >2TB luns) should also implement descriptor
2832  *		sense data so we will call this function whenever we see
2833  *		a lun larger than 2TB.  If for some reason the device
2834  *		supports 64 bit LBAs but doesn't support descriptor sense
2835  *		presumably the mode select will fail.  Everything will
2836  *		continue to work normally except that we will not get
2837  *		complete sense data for commands that fail with an LBA
2838  *		larger than 32 bits.
2839  *
2840  *   Arguments: un - driver soft state (unit) structure
2841  *
2842  *     Context: Kernel thread context only
2843  */
2844 
2845 static void
2846 sd_enable_descr_sense(struct sd_lun *un)
2847 {
2848 	uchar_t			*header;
2849 	struct mode_control_scsi3 *ctrl_bufp;
2850 	size_t			buflen;
2851 	size_t			bd_len;
2852 
2853 	/*
2854 	 * Read MODE SENSE page 0xA, Control Mode Page
2855 	 */
2856 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
2857 	    sizeof (struct mode_control_scsi3);
2858 	header = kmem_zalloc(buflen, KM_SLEEP);
2859 	if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
2860 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT) != 0) {
2861 		SD_ERROR(SD_LOG_COMMON, un,
2862 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
2863 		goto eds_exit;
2864 	}
2865 
2866 	/*
2867 	 * Determine size of Block Descriptors in order to locate
2868 	 * the mode page data. ATAPI devices return 0, SCSI devices
2869 	 * should return MODE_BLK_DESC_LENGTH.
2870 	 */
2871 	bd_len  = ((struct mode_header *)header)->bdesc_length;
2872 
2873 	ctrl_bufp = (struct mode_control_scsi3 *)
2874 	    (header + MODE_HEADER_LENGTH + bd_len);
2875 
2876 	/*
2877 	 * Clear PS bit for MODE SELECT
2878 	 */
2879 	ctrl_bufp->mode_page.ps = 0;
2880 
2881 	/*
2882 	 * Set D_SENSE to enable descriptor sense format.
2883 	 */
2884 	ctrl_bufp->d_sense = 1;
2885 
2886 	/*
2887 	 * Use MODE SELECT to commit the change to the D_SENSE bit
2888 	 */
2889 	if (sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
2890 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT) != 0) {
2891 		SD_INFO(SD_LOG_COMMON, un,
2892 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
2893 		goto eds_exit;
2894 	}
2895 
2896 eds_exit:
2897 	kmem_free(header, buflen);
2898 }
2899 #endif /* _LP64 */
2900 
2901 
2902 /*
2903  *    Function: sd_set_mmc_caps
2904  *
2905  * Description: This routine determines if the device is MMC compliant and if
2906  *		the device supports CDDA via a mode sense of the CDVD
2907  *		capabilities mode page. Also checks if the device is a
2908  *		dvdram writable device.
2909  *
2910  *   Arguments: un - driver soft state (unit) structure
2911  *
2912  *     Context: Kernel thread context only
2913  */
2914 
2915 static void
2916 sd_set_mmc_caps(struct sd_lun *un)
2917 {
2918 	struct mode_header_grp2		*sense_mhp;
2919 	uchar_t				*sense_page;
2920 	caddr_t				buf;
2921 	int				bd_len;
2922 	int				status;
2923 	struct uscsi_cmd		com;
2924 	int				rtn;
2925 	uchar_t				*out_data_rw, *out_data_hd;
2926 	uchar_t				*rqbuf_rw, *rqbuf_hd;
2927 
2928 	ASSERT(un != NULL);
2929 
2930 	/*
2931 	 * The flags which will be set in this function are - mmc compliant,
2932 	 * dvdram writable device, cdda support. Initialize them to FALSE
2933 	 * and if a capability is detected - it will be set to TRUE.
2934 	 */
2935 	un->un_f_mmc_cap = FALSE;
2936 	un->un_f_dvdram_writable_device = FALSE;
2937 	un->un_f_cfg_cdda = FALSE;
2938 
2939 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
2940 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
2941 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
2942 
2943 	if (status != 0) {
2944 		/* command failed; just return */
2945 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
2946 		return;
2947 	}
2948 	/*
2949 	 * If the mode sense request for the CDROM CAPABILITIES
2950 	 * page (0x2A) succeeds the device is assumed to be MMC.
2951 	 */
2952 	un->un_f_mmc_cap = TRUE;
2953 
2954 	/* Get to the page data */
2955 	sense_mhp = (struct mode_header_grp2 *)buf;
2956 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
2957 	    sense_mhp->bdesc_length_lo;
2958 	if (bd_len > MODE_BLK_DESC_LENGTH) {
2959 		/*
2960 		 * We did not get back the expected block descriptor
2961 		 * length so we cannot determine if the device supports
2962 		 * CDDA. However, we still indicate the device is MMC
2963 		 * according to the successful response to the page
2964 		 * 0x2A mode sense request.
2965 		 */
2966 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
2967 		    "sd_set_mmc_caps: Mode Sense returned "
2968 		    "invalid block descriptor length\n");
2969 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
2970 		return;
2971 	}
2972 
2973 	/* See if read CDDA is supported */
2974 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
2975 	    bd_len);
2976 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
2977 
2978 	/* See if writing DVD RAM is supported. */
2979 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
2980 	if (un->un_f_dvdram_writable_device == TRUE) {
2981 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
2982 		return;
2983 	}
2984 
2985 	/*
2986 	 * If the device presents DVD or CD capabilities in the mode
2987 	 * page, we can return here since a RRD will not have
2988 	 * these capabilities.
2989 	 */
2990 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
2991 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
2992 		return;
2993 	}
2994 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
2995 
2996 	/*
2997 	 * If un->un_f_dvdram_writable_device is still FALSE,
2998 	 * check for a Removable Rigid Disk (RRD).  A RRD
2999 	 * device is identified by the features RANDOM_WRITABLE and
3000 	 * HARDWARE_DEFECT_MANAGEMENT.
3001 	 */
3002 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3003 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3004 
3005 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3006 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3007 	    RANDOM_WRITABLE);
3008 	if (rtn != 0) {
3009 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3010 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3011 		return;
3012 	}
3013 
3014 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3015 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3016 
3017 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3018 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3019 	    HARDWARE_DEFECT_MANAGEMENT);
3020 	if (rtn == 0) {
3021 		/*
3022 		 * We have good information, check for random writable
3023 		 * and hardware defect features.
3024 		 */
3025 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3026 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3027 			un->un_f_dvdram_writable_device = TRUE;
3028 		}
3029 	}
3030 
3031 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3032 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3033 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3034 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3035 }
3036 
3037 /*
3038  *    Function: sd_check_for_writable_cd
3039  *
3040  * Description: This routine determines if the media in the device is
3041  *		writable or not. It uses the get configuration command (0x46)
3042  *		to determine if the media is writable
3043  *
3044  *   Arguments: un - driver soft state (unit) structure
3045  *
3046  *     Context: Never called at interrupt context.
3047  */
3048 
3049 static void
3050 sd_check_for_writable_cd(struct sd_lun *un)
3051 {
3052 	struct uscsi_cmd		com;
3053 	uchar_t				*out_data;
3054 	uchar_t				*rqbuf;
3055 	int				rtn;
3056 	uchar_t				*out_data_rw, *out_data_hd;
3057 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3058 	struct mode_header_grp2		*sense_mhp;
3059 	uchar_t				*sense_page;
3060 	caddr_t				buf;
3061 	int				bd_len;
3062 	int				status;
3063 
3064 	ASSERT(un != NULL);
3065 	ASSERT(mutex_owned(SD_MUTEX(un)));
3066 
3067 	/*
3068 	 * Initialize the writable media to false, if configuration info.
3069 	 * tells us otherwise then only we will set it.
3070 	 */
3071 	un->un_f_mmc_writable_media = FALSE;
3072 	mutex_exit(SD_MUTEX(un));
3073 
3074 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3075 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3076 
3077 	rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf, SENSE_LENGTH,
3078 	    out_data, SD_PROFILE_HEADER_LEN);
3079 
3080 	mutex_enter(SD_MUTEX(un));
3081 	if (rtn == 0) {
3082 		/*
3083 		 * We have good information, check for writable DVD.
3084 		 */
3085 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3086 			un->un_f_mmc_writable_media = TRUE;
3087 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3088 			kmem_free(rqbuf, SENSE_LENGTH);
3089 			return;
3090 		}
3091 	}
3092 
3093 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3094 	kmem_free(rqbuf, SENSE_LENGTH);
3095 
3096 	/*
3097 	 * Determine if this is a RRD type device.
3098 	 */
3099 	mutex_exit(SD_MUTEX(un));
3100 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3101 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3102 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3103 	mutex_enter(SD_MUTEX(un));
3104 	if (status != 0) {
3105 		/* command failed; just return */
3106 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3107 		return;
3108 	}
3109 
3110 	/* Get to the page data */
3111 	sense_mhp = (struct mode_header_grp2 *)buf;
3112 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3113 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3114 		/*
3115 		 * We did not get back the expected block descriptor length so
3116 		 * we cannot check the mode page.
3117 		 */
3118 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3119 		    "sd_check_for_writable_cd: Mode Sense returned "
3120 		    "invalid block descriptor length\n");
3121 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3122 		return;
3123 	}
3124 
3125 	/*
3126 	 * If the device presents DVD or CD capabilities in the mode
3127 	 * page, we can return here since a RRD device will not have
3128 	 * these capabilities.
3129 	 */
3130 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3131 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3132 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3133 		return;
3134 	}
3135 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3136 
3137 	/*
3138 	 * If un->un_f_mmc_writable_media is still FALSE,
3139 	 * check for RRD type media.  A RRD device is identified
3140 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3141 	 */
3142 	mutex_exit(SD_MUTEX(un));
3143 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3144 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3145 
3146 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3147 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3148 	    RANDOM_WRITABLE);
3149 	if (rtn != 0) {
3150 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3151 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3152 		mutex_enter(SD_MUTEX(un));
3153 		return;
3154 	}
3155 
3156 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3157 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3158 
3159 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3160 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3161 	    HARDWARE_DEFECT_MANAGEMENT);
3162 	mutex_enter(SD_MUTEX(un));
3163 	if (rtn == 0) {
3164 		/*
3165 		 * We have good information, check for random writable
3166 		 * and hardware defect features as current.
3167 		 */
3168 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3169 		    (out_data_rw[10] & 0x1) &&
3170 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3171 		    (out_data_hd[10] & 0x1)) {
3172 			un->un_f_mmc_writable_media = TRUE;
3173 		}
3174 	}
3175 
3176 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3177 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3178 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3179 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3180 }
3181 
3182 /*
3183  *    Function: sd_read_unit_properties
3184  *
3185  * Description: The following implements a property lookup mechanism.
3186  *		Properties for particular disks (keyed on vendor, model
3187  *		and rev numbers) are sought in the sd.conf file via
3188  *		sd_process_sdconf_file(), and if not found there, are
3189  *		looked for in a list hardcoded in this driver via
3190  *		sd_process_sdconf_table() Once located the properties
3191  *		are used to update the driver unit structure.
3192  *
3193  *   Arguments: un - driver soft state (unit) structure
3194  */
3195 
3196 static void
3197 sd_read_unit_properties(struct sd_lun *un)
3198 {
3199 	/*
3200 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3201 	 * the "sd-config-list" property (from the sd.conf file) or if
3202 	 * there was not a match for the inquiry vid/pid. If this event
3203 	 * occurs the static driver configuration table is searched for
3204 	 * a match.
3205 	 */
3206 	ASSERT(un != NULL);
3207 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3208 		sd_process_sdconf_table(un);
3209 	}
3210 
3211 	/* check for LSI device */
3212 	sd_is_lsi(un);
3213 
3214 	/*
3215 	 * Set this in sd.conf to 0 in order to disable kstats.  The default
3216 	 * is 1, so they are enabled by default.
3217 	 */
3218 	un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
3219 	    SD_DEVINFO(un), DDI_PROP_DONTPASS, "enable-partition-kstats", 1));
3220 }
3221 
3222 
3223 /*
3224  *    Function: sd_process_sdconf_file
3225  *
3226  * Description: Use ddi_getlongprop to obtain the properties from the
3227  *		driver's config file (ie, sd.conf) and update the driver
3228  *		soft state structure accordingly.
3229  *
3230  *   Arguments: un - driver soft state (unit) structure
3231  *
3232  * Return Code: SD_SUCCESS - The properties were successfully set according
3233  *			     to the driver configuration file.
3234  *		SD_FAILURE - The driver config list was not obtained or
3235  *			     there was no vid/pid match. This indicates that
3236  *			     the static config table should be used.
3237  *
3238  * The config file has a property, "sd-config-list", which consists of
3239  * one or more duplets as follows:
3240  *
3241  *  sd-config-list=
3242  *	<duplet>,
3243  *	[<duplet>,]
3244  *	[<duplet>];
3245  *
3246  * The structure of each duplet is as follows:
3247  *
3248  *  <duplet>:= <vid+pid>,<data-property-name_list>
3249  *
3250  * The first entry of the duplet is the device ID string (the concatenated
3251  * vid & pid; not to be confused with a device_id).  This is defined in
3252  * the same way as in the sd_disk_table.
3253  *
3254  * The second part of the duplet is a string that identifies a
3255  * data-property-name-list. The data-property-name-list is defined as
3256  * follows:
3257  *
3258  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3259  *
3260  * The syntax of <data-property-name> depends on the <version> field.
3261  *
3262  * If version = SD_CONF_VERSION_1 we have the following syntax:
3263  *
3264  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3265  *
3266  * where the prop0 value will be used to set prop0 if bit0 set in the
3267  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3268  *
3269  */
3270 
3271 static int
3272 sd_process_sdconf_file(struct sd_lun *un)
3273 {
3274 	char	*config_list = NULL;
3275 	int	config_list_len;
3276 	int	len;
3277 	int	dupletlen = 0;
3278 	char	*vidptr;
3279 	int	vidlen;
3280 	char	*dnlist_ptr;
3281 	char	*dataname_ptr;
3282 	int	dnlist_len;
3283 	int	dataname_len;
3284 	int	*data_list;
3285 	int	data_list_len;
3286 	int	rval = SD_FAILURE;
3287 	int	i;
3288 
3289 	ASSERT(un != NULL);
3290 
3291 	/* Obtain the configuration list associated with the .conf file */
3292 	if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), DDI_PROP_DONTPASS,
3293 	    sd_config_list, (caddr_t)&config_list, &config_list_len)
3294 	    != DDI_PROP_SUCCESS) {
3295 		return (SD_FAILURE);
3296 	}
3297 
3298 	/*
3299 	 * Compare vids in each duplet to the inquiry vid - if a match is
3300 	 * made, get the data value and update the soft state structure
3301 	 * accordingly.
3302 	 *
3303 	 * Note: This algorithm is complex and difficult to maintain. It should
3304 	 * be replaced with a more robust implementation.
3305 	 */
3306 	for (len = config_list_len, vidptr = config_list; len > 0;
3307 	    vidptr += dupletlen, len -= dupletlen) {
3308 		/*
3309 		 * Note: The assumption here is that each vid entry is on
3310 		 * a unique line from its associated duplet.
3311 		 */
3312 		vidlen = dupletlen = (int)strlen(vidptr);
3313 		if ((vidlen == 0) ||
3314 		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3315 			dupletlen++;
3316 			continue;
3317 		}
3318 
3319 		/*
3320 		 * dnlist contains 1 or more blank separated
3321 		 * data-property-name entries
3322 		 */
3323 		dnlist_ptr = vidptr + vidlen + 1;
3324 		dnlist_len = (int)strlen(dnlist_ptr);
3325 		dupletlen += dnlist_len + 2;
3326 
3327 		/*
3328 		 * Set a pointer for the first data-property-name
3329 		 * entry in the list
3330 		 */
3331 		dataname_ptr = dnlist_ptr;
3332 		dataname_len = 0;
3333 
3334 		/*
3335 		 * Loop through all data-property-name entries in the
3336 		 * data-property-name-list setting the properties for each.
3337 		 */
3338 		while (dataname_len < dnlist_len) {
3339 			int version;
3340 
3341 			/*
3342 			 * Determine the length of the current
3343 			 * data-property-name entry by indexing until a
3344 			 * blank or NULL is encountered. When the space is
3345 			 * encountered reset it to a NULL for compliance
3346 			 * with ddi_getlongprop().
3347 			 */
3348 			for (i = 0; ((dataname_ptr[i] != ' ') &&
3349 			    (dataname_ptr[i] != '\0')); i++) {
3350 				;
3351 			}
3352 
3353 			dataname_len += i;
3354 			/* If not null terminated, Make it so */
3355 			if (dataname_ptr[i] == ' ') {
3356 				dataname_ptr[i] = '\0';
3357 			}
3358 			dataname_len++;
3359 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3360 			    "sd_process_sdconf_file: disk:%s, data:%s\n",
3361 			    vidptr, dataname_ptr);
3362 
3363 			/* Get the data list */
3364 			if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), 0,
3365 			    dataname_ptr, (caddr_t)&data_list, &data_list_len)
3366 			    != DDI_PROP_SUCCESS) {
3367 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3368 				    "sd_process_sdconf_file: data property (%s)"
3369 				    " has no value\n", dataname_ptr);
3370 				dataname_ptr = dnlist_ptr + dataname_len;
3371 				continue;
3372 			}
3373 
3374 			version = data_list[0];
3375 
3376 			if (version == SD_CONF_VERSION_1) {
3377 				sd_tunables values;
3378 
3379 				/* Set the properties */
3380 				if (sd_chk_vers1_data(un, data_list[1],
3381 				    &data_list[2], data_list_len, dataname_ptr)
3382 				    == SD_SUCCESS) {
3383 					sd_get_tunables_from_conf(un,
3384 					    data_list[1], &data_list[2],
3385 					    &values);
3386 					sd_set_vers1_properties(un,
3387 					    data_list[1], &values);
3388 					rval = SD_SUCCESS;
3389 				} else {
3390 					rval = SD_FAILURE;
3391 				}
3392 			} else {
3393 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3394 				    "data property %s version 0x%x is invalid.",
3395 				    dataname_ptr, version);
3396 				rval = SD_FAILURE;
3397 			}
3398 			kmem_free(data_list, data_list_len);
3399 			dataname_ptr = dnlist_ptr + dataname_len;
3400 		}
3401 	}
3402 
3403 	/* free up the memory allocated by ddi_getlongprop */
3404 	if (config_list) {
3405 		kmem_free(config_list, config_list_len);
3406 	}
3407 
3408 	return (rval);
3409 }
3410 
3411 /*
3412  *    Function: sd_get_tunables_from_conf()
3413  *
3414  *
3415  *    This function reads the data list from the sd.conf file and pulls
3416  *    the values that can have numeric values as arguments and places
3417  *    the values in the apropriate sd_tunables member.
3418  *    Since the order of the data list members varies across platforms
3419  *    This function reads them from the data list in a platform specific
3420  *    order and places them into the correct sd_tunable member that is
3421  *    a consistant across all platforms.
3422  */
3423 static void
3424 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3425     sd_tunables *values)
3426 {
3427 	int i;
3428 	int mask;
3429 
3430 	bzero(values, sizeof (sd_tunables));
3431 
3432 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3433 
3434 		mask = 1 << i;
3435 		if (mask > flags) {
3436 			break;
3437 		}
3438 
3439 		switch (mask & flags) {
3440 		case 0:	/* This mask bit not set in flags */
3441 			continue;
3442 		case SD_CONF_BSET_THROTTLE:
3443 			values->sdt_throttle = data_list[i];
3444 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3445 			    "sd_get_tunables_from_conf: throttle = %d\n",
3446 			    values->sdt_throttle);
3447 			break;
3448 		case SD_CONF_BSET_CTYPE:
3449 			values->sdt_ctype = data_list[i];
3450 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3451 			    "sd_get_tunables_from_conf: ctype = %d\n",
3452 			    values->sdt_ctype);
3453 			break;
3454 		case SD_CONF_BSET_NRR_COUNT:
3455 			values->sdt_not_rdy_retries = data_list[i];
3456 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3457 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
3458 			    values->sdt_not_rdy_retries);
3459 			break;
3460 		case SD_CONF_BSET_BSY_RETRY_COUNT:
3461 			values->sdt_busy_retries = data_list[i];
3462 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3463 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
3464 			    values->sdt_busy_retries);
3465 			break;
3466 		case SD_CONF_BSET_RST_RETRIES:
3467 			values->sdt_reset_retries = data_list[i];
3468 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3469 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
3470 			    values->sdt_reset_retries);
3471 			break;
3472 		case SD_CONF_BSET_RSV_REL_TIME:
3473 			values->sdt_reserv_rel_time = data_list[i];
3474 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3475 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
3476 			    values->sdt_reserv_rel_time);
3477 			break;
3478 		case SD_CONF_BSET_MIN_THROTTLE:
3479 			values->sdt_min_throttle = data_list[i];
3480 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3481 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
3482 			    values->sdt_min_throttle);
3483 			break;
3484 		case SD_CONF_BSET_DISKSORT_DISABLED:
3485 			values->sdt_disk_sort_dis = data_list[i];
3486 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3487 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
3488 			    values->sdt_disk_sort_dis);
3489 			break;
3490 		case SD_CONF_BSET_LUN_RESET_ENABLED:
3491 			values->sdt_lun_reset_enable = data_list[i];
3492 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3493 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
3494 			    "\n", values->sdt_lun_reset_enable);
3495 			break;
3496 		}
3497 	}
3498 }
3499 
3500 /*
3501  *    Function: sd_process_sdconf_table
3502  *
3503  * Description: Search the static configuration table for a match on the
3504  *		inquiry vid/pid and update the driver soft state structure
3505  *		according to the table property values for the device.
3506  *
3507  *		The form of a configuration table entry is:
3508  *		  <vid+pid>,<flags>,<property-data>
3509  *		  "SEAGATE ST42400N",1,63,0,0			(Fibre)
3510  *		  "SEAGATE ST42400N",1,63,0,0,0,0		(Sparc)
3511  *		  "SEAGATE ST42400N",1,63,0,0,0,0,0,0,0,0,0,0	(Intel)
3512  *
3513  *   Arguments: un - driver soft state (unit) structure
3514  */
3515 
3516 static void
3517 sd_process_sdconf_table(struct sd_lun *un)
3518 {
3519 	char	*id = NULL;
3520 	int	table_index;
3521 	int	idlen;
3522 
3523 	ASSERT(un != NULL);
3524 	for (table_index = 0; table_index < sd_disk_table_size;
3525 	    table_index++) {
3526 		id = sd_disk_table[table_index].device_id;
3527 		idlen = strlen(id);
3528 		if (idlen == 0) {
3529 			continue;
3530 		}
3531 
3532 		/*
3533 		 * The static configuration table currently does not
3534 		 * implement version 10 properties. Additionally,
3535 		 * multiple data-property-name entries are not
3536 		 * implemented in the static configuration table.
3537 		 */
3538 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
3539 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3540 			    "sd_process_sdconf_table: disk %s\n", id);
3541 			sd_set_vers1_properties(un,
3542 			    sd_disk_table[table_index].flags,
3543 			    sd_disk_table[table_index].properties);
3544 			break;
3545 		}
3546 	}
3547 }
3548 
3549 
3550 /*
3551  *    Function: sd_sdconf_id_match
3552  *
3553  * Description: This local function implements a case sensitive vid/pid
3554  *		comparison as well as the boundary cases of wild card and
3555  *		multiple blanks.
3556  *
3557  *		Note: An implicit assumption made here is that the scsi
3558  *		inquiry structure will always keep the vid, pid and
3559  *		revision strings in consecutive sequence, so they can be
3560  *		read as a single string. If this assumption is not the
3561  *		case, a separate string, to be used for the check, needs
3562  *		to be built with these strings concatenated.
3563  *
3564  *   Arguments: un - driver soft state (unit) structure
3565  *		id - table or config file vid/pid
3566  *		idlen  - length of the vid/pid (bytes)
3567  *
3568  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3569  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3570  */
3571 
3572 static int
3573 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
3574 {
3575 	struct scsi_inquiry	*sd_inq;
3576 	int 			rval = SD_SUCCESS;
3577 
3578 	ASSERT(un != NULL);
3579 	sd_inq = un->un_sd->sd_inq;
3580 	ASSERT(id != NULL);
3581 
3582 	/*
3583 	 * We use the inq_vid as a pointer to a buffer containing the
3584 	 * vid and pid and use the entire vid/pid length of the table
3585 	 * entry for the comparison. This works because the inq_pid
3586 	 * data member follows inq_vid in the scsi_inquiry structure.
3587 	 */
3588 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
3589 		/*
3590 		 * The user id string is compared to the inquiry vid/pid
3591 		 * using a case insensitive comparison and ignoring
3592 		 * multiple spaces.
3593 		 */
3594 		rval = sd_blank_cmp(un, id, idlen);
3595 		if (rval != SD_SUCCESS) {
3596 			/*
3597 			 * User id strings that start and end with a "*"
3598 			 * are a special case. These do not have a
3599 			 * specific vendor, and the product string can
3600 			 * appear anywhere in the 16 byte PID portion of
3601 			 * the inquiry data. This is a simple strstr()
3602 			 * type search for the user id in the inquiry data.
3603 			 */
3604 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
3605 				char	*pidptr = &id[1];
3606 				int	i;
3607 				int	j;
3608 				int	pidstrlen = idlen - 2;
3609 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
3610 				    pidstrlen;
3611 
3612 				if (j < 0) {
3613 					return (SD_FAILURE);
3614 				}
3615 				for (i = 0; i < j; i++) {
3616 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
3617 					    pidptr, pidstrlen) == 0) {
3618 						rval = SD_SUCCESS;
3619 						break;
3620 					}
3621 				}
3622 			}
3623 		}
3624 	}
3625 	return (rval);
3626 }
3627 
3628 
3629 /*
3630  *    Function: sd_blank_cmp
3631  *
3632  * Description: If the id string starts and ends with a space, treat
3633  *		multiple consecutive spaces as equivalent to a single
3634  *		space. For example, this causes a sd_disk_table entry
3635  *		of " NEC CDROM " to match a device's id string of
3636  *		"NEC       CDROM".
3637  *
3638  *		Note: The success exit condition for this routine is if
3639  *		the pointer to the table entry is '\0' and the cnt of
3640  *		the inquiry length is zero. This will happen if the inquiry
3641  *		string returned by the device is padded with spaces to be
3642  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
3643  *		SCSI spec states that the inquiry string is to be padded with
3644  *		spaces.
3645  *
3646  *   Arguments: un - driver soft state (unit) structure
3647  *		id - table or config file vid/pid
3648  *		idlen  - length of the vid/pid (bytes)
3649  *
3650  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3651  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3652  */
3653 
3654 static int
3655 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
3656 {
3657 	char		*p1;
3658 	char		*p2;
3659 	int		cnt;
3660 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
3661 	    sizeof (SD_INQUIRY(un)->inq_pid);
3662 
3663 	ASSERT(un != NULL);
3664 	p2 = un->un_sd->sd_inq->inq_vid;
3665 	ASSERT(id != NULL);
3666 	p1 = id;
3667 
3668 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
3669 		/*
3670 		 * Note: string p1 is terminated by a NUL but string p2
3671 		 * isn't.  The end of p2 is determined by cnt.
3672 		 */
3673 		for (;;) {
3674 			/* skip over any extra blanks in both strings */
3675 			while ((*p1 != '\0') && (*p1 == ' ')) {
3676 				p1++;
3677 			}
3678 			while ((cnt != 0) && (*p2 == ' ')) {
3679 				p2++;
3680 				cnt--;
3681 			}
3682 
3683 			/* compare the two strings */
3684 			if ((cnt == 0) ||
3685 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
3686 				break;
3687 			}
3688 			while ((cnt > 0) &&
3689 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
3690 				p1++;
3691 				p2++;
3692 				cnt--;
3693 			}
3694 		}
3695 	}
3696 
3697 	/* return SD_SUCCESS if both strings match */
3698 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
3699 }
3700 
3701 
3702 /*
3703  *    Function: sd_chk_vers1_data
3704  *
3705  * Description: Verify the version 1 device properties provided by the
3706  *		user via the configuration file
3707  *
3708  *   Arguments: un	     - driver soft state (unit) structure
3709  *		flags	     - integer mask indicating properties to be set
3710  *		prop_list    - integer list of property values
3711  *		list_len     - length of user provided data
3712  *
3713  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
3714  *		SD_FAILURE - Indicates the user provided data is invalid
3715  */
3716 
3717 static int
3718 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
3719     int list_len, char *dataname_ptr)
3720 {
3721 	int i;
3722 	int mask = 1;
3723 	int index = 0;
3724 
3725 	ASSERT(un != NULL);
3726 
3727 	/* Check for a NULL property name and list */
3728 	if (dataname_ptr == NULL) {
3729 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3730 		    "sd_chk_vers1_data: NULL data property name.");
3731 		return (SD_FAILURE);
3732 	}
3733 	if (prop_list == NULL) {
3734 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3735 		    "sd_chk_vers1_data: %s NULL data property list.",
3736 		    dataname_ptr);
3737 		return (SD_FAILURE);
3738 	}
3739 
3740 	/* Display a warning if undefined bits are set in the flags */
3741 	if (flags & ~SD_CONF_BIT_MASK) {
3742 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3743 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
3744 		    "Properties not set.",
3745 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
3746 		return (SD_FAILURE);
3747 	}
3748 
3749 	/*
3750 	 * Verify the length of the list by identifying the highest bit set
3751 	 * in the flags and validating that the property list has a length
3752 	 * up to the index of this bit.
3753 	 */
3754 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3755 		if (flags & mask) {
3756 			index++;
3757 		}
3758 		mask = 1 << i;
3759 	}
3760 	if ((list_len / sizeof (int)) < (index + 2)) {
3761 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3762 		    "sd_chk_vers1_data: "
3763 		    "Data property list %s size is incorrect. "
3764 		    "Properties not set.", dataname_ptr);
3765 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
3766 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
3767 		return (SD_FAILURE);
3768 	}
3769 	return (SD_SUCCESS);
3770 }
3771 
3772 
3773 /*
3774  *    Function: sd_set_vers1_properties
3775  *
3776  * Description: Set version 1 device properties based on a property list
3777  *		retrieved from the driver configuration file or static
3778  *		configuration table. Version 1 properties have the format:
3779  *
3780  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3781  *
3782  *		where the prop0 value will be used to set prop0 if bit0
3783  *		is set in the flags
3784  *
3785  *   Arguments: un	     - driver soft state (unit) structure
3786  *		flags	     - integer mask indicating properties to be set
3787  *		prop_list    - integer list of property values
3788  */
3789 
3790 static void
3791 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
3792 {
3793 	ASSERT(un != NULL);
3794 
3795 	/*
3796 	 * Set the flag to indicate cache is to be disabled. An attempt
3797 	 * to disable the cache via sd_disable_caching() will be made
3798 	 * later during attach once the basic initialization is complete.
3799 	 */
3800 	if (flags & SD_CONF_BSET_NOCACHE) {
3801 		un->un_f_opt_disable_cache = TRUE;
3802 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3803 		    "sd_set_vers1_properties: caching disabled flag set\n");
3804 	}
3805 
3806 	/* CD-specific configuration parameters */
3807 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
3808 		un->un_f_cfg_playmsf_bcd = TRUE;
3809 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3810 		    "sd_set_vers1_properties: playmsf_bcd set\n");
3811 	}
3812 	if (flags & SD_CONF_BSET_READSUB_BCD) {
3813 		un->un_f_cfg_readsub_bcd = TRUE;
3814 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3815 		    "sd_set_vers1_properties: readsub_bcd set\n");
3816 	}
3817 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
3818 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
3819 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3820 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
3821 	}
3822 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
3823 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
3824 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3825 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
3826 	}
3827 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
3828 		un->un_f_cfg_no_read_header = TRUE;
3829 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3830 			    "sd_set_vers1_properties: no_read_header set\n");
3831 	}
3832 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
3833 		un->un_f_cfg_read_cd_xd4 = TRUE;
3834 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3835 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
3836 	}
3837 
3838 	/* Support for devices which do not have valid/unique serial numbers */
3839 	if (flags & SD_CONF_BSET_FAB_DEVID) {
3840 		un->un_f_opt_fab_devid = TRUE;
3841 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3842 		    "sd_set_vers1_properties: fab_devid bit set\n");
3843 	}
3844 
3845 	/* Support for user throttle configuration */
3846 	if (flags & SD_CONF_BSET_THROTTLE) {
3847 		ASSERT(prop_list != NULL);
3848 		un->un_saved_throttle = un->un_throttle =
3849 		    prop_list->sdt_throttle;
3850 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3851 		    "sd_set_vers1_properties: throttle set to %d\n",
3852 		    prop_list->sdt_throttle);
3853 	}
3854 
3855 	/* Set the per disk retry count according to the conf file or table. */
3856 	if (flags & SD_CONF_BSET_NRR_COUNT) {
3857 		ASSERT(prop_list != NULL);
3858 		if (prop_list->sdt_not_rdy_retries) {
3859 			un->un_notready_retry_count =
3860 				prop_list->sdt_not_rdy_retries;
3861 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3862 			    "sd_set_vers1_properties: not ready retry count"
3863 			    " set to %d\n", un->un_notready_retry_count);
3864 		}
3865 	}
3866 
3867 	/* The controller type is reported for generic disk driver ioctls */
3868 	if (flags & SD_CONF_BSET_CTYPE) {
3869 		ASSERT(prop_list != NULL);
3870 		switch (prop_list->sdt_ctype) {
3871 		case CTYPE_CDROM:
3872 			un->un_ctype = prop_list->sdt_ctype;
3873 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3874 			    "sd_set_vers1_properties: ctype set to "
3875 			    "CTYPE_CDROM\n");
3876 			break;
3877 		case CTYPE_CCS:
3878 			un->un_ctype = prop_list->sdt_ctype;
3879 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3880 				"sd_set_vers1_properties: ctype set to "
3881 				"CTYPE_CCS\n");
3882 			break;
3883 		case CTYPE_ROD:		/* RW optical */
3884 			un->un_ctype = prop_list->sdt_ctype;
3885 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3886 			    "sd_set_vers1_properties: ctype set to "
3887 			    "CTYPE_ROD\n");
3888 			break;
3889 		default:
3890 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3891 			    "sd_set_vers1_properties: Could not set "
3892 			    "invalid ctype value (%d)",
3893 			    prop_list->sdt_ctype);
3894 		}
3895 	}
3896 
3897 	/* Purple failover timeout */
3898 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
3899 		ASSERT(prop_list != NULL);
3900 		un->un_busy_retry_count =
3901 			prop_list->sdt_busy_retries;
3902 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3903 		    "sd_set_vers1_properties: "
3904 		    "busy retry count set to %d\n",
3905 		    un->un_busy_retry_count);
3906 	}
3907 
3908 	/* Purple reset retry count */
3909 	if (flags & SD_CONF_BSET_RST_RETRIES) {
3910 		ASSERT(prop_list != NULL);
3911 		un->un_reset_retry_count =
3912 			prop_list->sdt_reset_retries;
3913 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3914 		    "sd_set_vers1_properties: "
3915 		    "reset retry count set to %d\n",
3916 		    un->un_reset_retry_count);
3917 	}
3918 
3919 	/* Purple reservation release timeout */
3920 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
3921 		ASSERT(prop_list != NULL);
3922 		un->un_reserve_release_time =
3923 			prop_list->sdt_reserv_rel_time;
3924 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3925 		    "sd_set_vers1_properties: "
3926 		    "reservation release timeout set to %d\n",
3927 		    un->un_reserve_release_time);
3928 	}
3929 
3930 	/*
3931 	 * Driver flag telling the driver to verify that no commands are pending
3932 	 * for a device before issuing a Test Unit Ready. This is a workaround
3933 	 * for a firmware bug in some Seagate eliteI drives.
3934 	 */
3935 	if (flags & SD_CONF_BSET_TUR_CHECK) {
3936 		un->un_f_cfg_tur_check = TRUE;
3937 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3938 		    "sd_set_vers1_properties: tur queue check set\n");
3939 	}
3940 
3941 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
3942 		un->un_min_throttle = prop_list->sdt_min_throttle;
3943 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3944 		    "sd_set_vers1_properties: min throttle set to %d\n",
3945 		    un->un_min_throttle);
3946 	}
3947 
3948 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
3949 		un->un_f_disksort_disabled =
3950 		    (prop_list->sdt_disk_sort_dis != 0) ?
3951 		    TRUE : FALSE;
3952 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3953 		    "sd_set_vers1_properties: disksort disabled "
3954 		    "flag set to %d\n",
3955 		    prop_list->sdt_disk_sort_dis);
3956 	}
3957 
3958 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
3959 		un->un_f_lun_reset_enabled =
3960 		    (prop_list->sdt_lun_reset_enable != 0) ?
3961 		    TRUE : FALSE;
3962 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3963 		    "sd_set_vers1_properties: lun reset enabled "
3964 		    "flag set to %d\n",
3965 		    prop_list->sdt_lun_reset_enable);
3966 	}
3967 
3968 	/*
3969 	 * Validate the throttle values.
3970 	 * If any of the numbers are invalid, set everything to defaults.
3971 	 */
3972 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
3973 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
3974 	    (un->un_min_throttle > un->un_throttle)) {
3975 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
3976 		un->un_min_throttle = sd_min_throttle;
3977 	}
3978 }
3979 
3980 /*
3981  *   Function: sd_is_lsi()
3982  *
3983  *   Description: Check for lsi devices, step throught the static device
3984  *	table to match vid/pid.
3985  *
3986  *   Args: un - ptr to sd_lun
3987  *
3988  *   Notes:  When creating new LSI property, need to add the new LSI property
3989  *		to this function.
3990  */
3991 static void
3992 sd_is_lsi(struct sd_lun *un)
3993 {
3994 	char	*id = NULL;
3995 	int	table_index;
3996 	int	idlen;
3997 	void	*prop;
3998 
3999 	ASSERT(un != NULL);
4000 	for (table_index = 0; table_index < sd_disk_table_size;
4001 	    table_index++) {
4002 		id = sd_disk_table[table_index].device_id;
4003 		idlen = strlen(id);
4004 		if (idlen == 0) {
4005 			continue;
4006 		}
4007 
4008 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4009 			prop = sd_disk_table[table_index].properties;
4010 			if (prop == &lsi_properties ||
4011 			    prop == &lsi_oem_properties ||
4012 			    prop == &lsi_properties_scsi ||
4013 			    prop == &symbios_properties) {
4014 				un->un_f_cfg_is_lsi = TRUE;
4015 			}
4016 			break;
4017 		}
4018 	}
4019 }
4020 
4021 
4022 /*
4023  * The following routines support reading and interpretation of disk labels,
4024  * including Solaris BE (8-slice) vtoc's, Solaris LE (16-slice) vtoc's, and
4025  * fdisk tables.
4026  */
4027 
4028 /*
4029  *    Function: sd_validate_geometry
4030  *
4031  * Description: Read the label from the disk (if present). Update the unit's
4032  *		geometry and vtoc information from the data in the label.
4033  *		Verify that the label is valid.
4034  *
4035  *   Arguments: un - driver soft state (unit) structure
4036  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4037  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4038  *			to use the USCSI "direct" chain and bypass the normal
4039  *			command waitq.
4040  *
4041  * Return Code: 0 - Successful completion
4042  *		EINVAL  - Invalid value in un->un_tgt_blocksize or
4043  *			  un->un_blockcount; or label on disk is corrupted
4044  *			  or unreadable.
4045  *		EACCES  - Reservation conflict at the device.
4046  *		ENOMEM  - Resource allocation error
4047  *		ENOTSUP - geometry not applicable
4048  *
4049  *     Context: Kernel thread only (can sleep).
4050  */
4051 
4052 static int
4053 sd_validate_geometry(struct sd_lun *un, int path_flag)
4054 {
4055 	static	char		labelstring[128];
4056 	static	char		buf[256];
4057 	char	*label		= NULL;
4058 	int	label_error	= 0;
4059 	int	gvalid		= un->un_f_geometry_is_valid;
4060 	int	lbasize;
4061 	uint_t	capacity;
4062 	int	count;
4063 
4064 	ASSERT(un != NULL);
4065 	ASSERT(mutex_owned(SD_MUTEX(un)));
4066 
4067 	/*
4068 	 * If the required values are not valid, then try getting them
4069 	 * once via read capacity. If that fails, then fail this call.
4070 	 * This is necessary with the new mpxio failover behavior in
4071 	 * the T300 where we can get an attach for the inactive path
4072 	 * before the active path. The inactive path fails commands with
4073 	 * sense data of 02,04,88 which happens to the read capacity
4074 	 * before mpxio has had sufficient knowledge to know if it should
4075 	 * force a fail over or not. (Which it won't do at attach anyhow).
4076 	 * If the read capacity at attach time fails, un_tgt_blocksize and
4077 	 * un_blockcount won't be valid.
4078 	 */
4079 	if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4080 	    (un->un_f_blockcount_is_valid != TRUE)) {
4081 		uint64_t	cap;
4082 		uint32_t	lbasz;
4083 		int		rval;
4084 
4085 		mutex_exit(SD_MUTEX(un));
4086 		rval = sd_send_scsi_READ_CAPACITY(un, &cap,
4087 		    &lbasz, SD_PATH_DIRECT);
4088 		mutex_enter(SD_MUTEX(un));
4089 		if (rval == 0) {
4090 			/*
4091 			 * The following relies on
4092 			 * sd_send_scsi_READ_CAPACITY never
4093 			 * returning 0 for capacity and/or lbasize.
4094 			 */
4095 			sd_update_block_info(un, lbasz, cap);
4096 		}
4097 
4098 		if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4099 		    (un->un_f_blockcount_is_valid != TRUE)) {
4100 			return (EINVAL);
4101 		}
4102 	}
4103 
4104 	/*
4105 	 * Copy the lbasize and capacity so that if they're reset while we're
4106 	 * not holding the SD_MUTEX, we will continue to use valid values
4107 	 * after the SD_MUTEX is reacquired. (4119659)
4108 	 */
4109 	lbasize  = un->un_tgt_blocksize;
4110 	capacity = un->un_blockcount;
4111 
4112 #if defined(_SUNOS_VTOC_16)
4113 	/*
4114 	 * Set up the "whole disk" fdisk partition; this should always
4115 	 * exist, regardless of whether the disk contains an fdisk table
4116 	 * or vtoc.
4117 	 */
4118 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
4119 	un->un_map[P0_RAW_DISK].dkl_nblk  = capacity;
4120 #endif
4121 
4122 	/*
4123 	 * Refresh the logical and physical geometry caches.
4124 	 * (data from MODE SENSE format/rigid disk geometry pages,
4125 	 * and scsi_ifgetcap("geometry").
4126 	 */
4127 	sd_resync_geom_caches(un, capacity, lbasize, path_flag);
4128 
4129 	label_error = sd_use_efi(un, path_flag);
4130 	if (label_error == 0) {
4131 		/* found a valid EFI label */
4132 		SD_TRACE(SD_LOG_IO_PARTITION, un,
4133 			"sd_validate_geometry: found EFI label\n");
4134 		un->un_solaris_offset = 0;
4135 		un->un_solaris_size = capacity;
4136 		return (ENOTSUP);
4137 	}
4138 	if (un->un_blockcount > DK_MAX_BLOCKS) {
4139 		if (label_error == ESRCH) {
4140 			/*
4141 			 * they've configured a LUN over 1TB, but used
4142 			 * format.dat to restrict format's view of the
4143 			 * capacity to be under 1TB
4144 			 */
4145 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4146 "is >1TB and has a VTOC label: use format(1M) to either decrease the");
4147 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
4148 "size to be < 1TB or relabel the disk with an EFI label");
4149 		} else {
4150 			/* unlabeled disk over 1TB */
4151 			return (ENOTSUP);
4152 		}
4153 	}
4154 	label_error = 0;
4155 
4156 	/*
4157 	 * at this point it is either labeled with a VTOC or it is
4158 	 * under 1TB
4159 	 */
4160 
4161 	/*
4162 	 * Only DIRECT ACCESS devices will have Sun labels.
4163 	 * CD's supposedly have a Sun label, too
4164 	 */
4165 	if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT || ISREMOVABLE(un)) {
4166 		struct	dk_label *dkl;
4167 		offset_t dkl1;
4168 		offset_t label_addr, real_addr;
4169 		int	rval;
4170 		size_t	buffer_size;
4171 
4172 		/*
4173 		 * Note: This will set up un->un_solaris_size and
4174 		 * un->un_solaris_offset.
4175 		 */
4176 		switch (sd_read_fdisk(un, capacity, lbasize, path_flag)) {
4177 		case SD_CMD_RESERVATION_CONFLICT:
4178 			ASSERT(mutex_owned(SD_MUTEX(un)));
4179 			return (EACCES);
4180 		case SD_CMD_FAILURE:
4181 			ASSERT(mutex_owned(SD_MUTEX(un)));
4182 			return (ENOMEM);
4183 		}
4184 
4185 		if (un->un_solaris_size <= DK_LABEL_LOC) {
4186 			/*
4187 			 * Found fdisk table but no Solaris partition entry,
4188 			 * so don't call sd_uselabel() and don't create
4189 			 * a default label.
4190 			 */
4191 			label_error = 0;
4192 			un->un_f_geometry_is_valid = TRUE;
4193 			goto no_solaris_partition;
4194 		}
4195 		label_addr = (daddr_t)(un->un_solaris_offset + DK_LABEL_LOC);
4196 
4197 		/*
4198 		 * sys_blocksize != tgt_blocksize, need to re-adjust
4199 		 * blkno and save the index to beginning of dk_label
4200 		 */
4201 		real_addr = SD_SYS2TGTBLOCK(un, label_addr);
4202 		buffer_size = SD_REQBYTES2TGTBYTES(un,
4203 		    sizeof (struct dk_label));
4204 
4205 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_validate_geometry: "
4206 		    "label_addr: 0x%x allocation size: 0x%x\n",
4207 		    label_addr, buffer_size);
4208 		dkl = kmem_zalloc(buffer_size, KM_NOSLEEP);
4209 		if (dkl == NULL) {
4210 			return (ENOMEM);
4211 		}
4212 
4213 		mutex_exit(SD_MUTEX(un));
4214 		rval = sd_send_scsi_READ(un, dkl, buffer_size, real_addr,
4215 		    path_flag);
4216 		mutex_enter(SD_MUTEX(un));
4217 
4218 		switch (rval) {
4219 		case 0:
4220 			/*
4221 			 * sd_uselabel will establish that the geometry
4222 			 * is valid.
4223 			 * For sys_blocksize != tgt_blocksize, need
4224 			 * to index into the beginning of dk_label
4225 			 */
4226 			dkl1 = (daddr_t)dkl
4227 				+ SD_TGTBYTEOFFSET(un, label_addr, real_addr);
4228 			if (sd_uselabel(un, (struct dk_label *)(uintptr_t)dkl1,
4229 			    path_flag) != SD_LABEL_IS_VALID) {
4230 				label_error = EINVAL;
4231 			}
4232 			break;
4233 		case EACCES:
4234 			label_error = EACCES;
4235 			break;
4236 		default:
4237 			label_error = EINVAL;
4238 			break;
4239 		}
4240 
4241 		kmem_free(dkl, buffer_size);
4242 
4243 #if defined(_SUNOS_VTOC_8)
4244 		label = (char *)un->un_asciilabel;
4245 #elif defined(_SUNOS_VTOC_16)
4246 		label = (char *)un->un_vtoc.v_asciilabel;
4247 #else
4248 #error "No VTOC format defined."
4249 #endif
4250 	}
4251 
4252 	/*
4253 	 * If a valid label was not found, AND if no reservation conflict
4254 	 * was detected, then go ahead and create a default label (4069506).
4255 	 *
4256 	 * Note: currently, for VTOC_8 devices, the default label is created
4257 	 * for removables only.  For VTOC_16 devices, the default label will
4258 	 * be created for both removables and non-removables alike.
4259 	 * (see sd_build_default_label)
4260 	 */
4261 #if defined(_SUNOS_VTOC_8)
4262 	if (ISREMOVABLE(un) && (label_error != EACCES)) {
4263 #elif defined(_SUNOS_VTOC_16)
4264 	if (label_error != EACCES) {
4265 #endif
4266 		if (un->un_f_geometry_is_valid == FALSE) {
4267 			sd_build_default_label(un);
4268 		}
4269 		label_error = 0;
4270 	}
4271 
4272 no_solaris_partition:
4273 	if ((!ISREMOVABLE(un) ||
4274 	    (ISREMOVABLE(un) && un->un_mediastate == DKIO_EJECTED)) &&
4275 	    (un->un_state == SD_STATE_NORMAL && gvalid == FALSE)) {
4276 		/*
4277 		 * Print out a message indicating who and what we are.
4278 		 * We do this only when we happen to really validate the
4279 		 * geometry. We may call sd_validate_geometry() at other
4280 		 * times, e.g., ioctl()'s like Get VTOC in which case we
4281 		 * don't want to print the label.
4282 		 * If the geometry is valid, print the label string,
4283 		 * else print vendor and product info, if available
4284 		 */
4285 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
4286 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "?<%s>\n", label);
4287 		} else {
4288 			mutex_enter(&sd_label_mutex);
4289 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
4290 			    labelstring);
4291 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
4292 			    &labelstring[64]);
4293 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
4294 			    labelstring, &labelstring[64]);
4295 			if (un->un_f_blockcount_is_valid == TRUE) {
4296 				(void) sprintf(&buf[strlen(buf)],
4297 				    ", %llu %u byte blocks\n",
4298 				    (longlong_t)un->un_blockcount,
4299 				    un->un_tgt_blocksize);
4300 			} else {
4301 				(void) sprintf(&buf[strlen(buf)],
4302 				    ", (unknown capacity)\n");
4303 			}
4304 			SD_INFO(SD_LOG_ATTACH_DETACH, un, buf);
4305 			mutex_exit(&sd_label_mutex);
4306 		}
4307 	}
4308 
4309 #if defined(_SUNOS_VTOC_16)
4310 	/*
4311 	 * If we have valid geometry, set up the remaining fdisk partitions.
4312 	 * Note that dkl_cylno is not used for the fdisk map entries, so
4313 	 * we set it to an entirely bogus value.
4314 	 */
4315 	for (count = 0; count < FD_NUMPART; count++) {
4316 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
4317 		un->un_map[FDISK_P1 + count].dkl_nblk =
4318 		    un->un_fmap[count].fmap_nblk;
4319 
4320 		un->un_offset[FDISK_P1 + count] =
4321 		    un->un_fmap[count].fmap_start;
4322 	}
4323 #endif
4324 
4325 	for (count = 0; count < NDKMAP; count++) {
4326 #if defined(_SUNOS_VTOC_8)
4327 		struct dk_map *lp  = &un->un_map[count];
4328 		un->un_offset[count] =
4329 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
4330 #elif defined(_SUNOS_VTOC_16)
4331 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
4332 
4333 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
4334 #else
4335 #error "No VTOC format defined."
4336 #endif
4337 	}
4338 
4339 	return (label_error);
4340 }
4341 
4342 
4343 #if defined(_SUNOS_VTOC_16)
4344 /*
4345  * Macro: MAX_BLKS
4346  *
4347  *	This macro is used for table entries where we need to have the largest
4348  *	possible sector value for that head & SPT (sectors per track)
4349  *	combination.  Other entries for some smaller disk sizes are set by
4350  *	convention to match those used by X86 BIOS usage.
4351  */
4352 #define	MAX_BLKS(heads, spt)	UINT16_MAX * heads * spt, heads, spt
4353 
4354 /*
4355  *    Function: sd_convert_geometry
4356  *
4357  * Description: Convert physical geometry into a dk_geom structure. In
4358  *		other words, make sure we don't wrap 16-bit values.
4359  *		e.g. converting from geom_cache to dk_geom
4360  *
4361  *     Context: Kernel thread only
4362  */
4363 static void
4364 sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g)
4365 {
4366 	int i;
4367 	static const struct chs_values {
4368 		uint_t max_cap;		/* Max Capacity for this HS. */
4369 		uint_t nhead;		/* Heads to use. */
4370 		uint_t nsect;		/* SPT to use. */
4371 	} CHS_values[] = {
4372 		{0x00200000,  64, 32},		/* 1GB or smaller disk. */
4373 		{0x01000000, 128, 32},		/* 8GB or smaller disk. */
4374 		{MAX_BLKS(255,  63)},		/* 502.02GB or smaller disk. */
4375 		{MAX_BLKS(255, 126)},		/* .98TB or smaller disk. */
4376 		{DK_MAX_BLOCKS, 255, 189}	/* Max size is just under 1TB */
4377 	};
4378 
4379 	/* Unlabeled SCSI floppy device */
4380 	if (capacity <= 0x1000) {
4381 		un_g->dkg_nhead = 2;
4382 		un_g->dkg_ncyl = 80;
4383 		un_g->dkg_nsect = capacity / (un_g->dkg_nhead * un_g->dkg_ncyl);
4384 		return;
4385 	}
4386 
4387 	/*
4388 	 * For all devices we calculate cylinders using the
4389 	 * heads and sectors we assign based on capacity of the
4390 	 * device.  The table is designed to be compatible with the
4391 	 * way other operating systems lay out fdisk tables for X86
4392 	 * and to insure that the cylinders never exceed 65535 to
4393 	 * prevent problems with X86 ioctls that report geometry.
4394 	 * We use SPT that are multiples of 63, since other OSes that
4395 	 * are not limited to 16-bits for cylinders stop at 63 SPT
4396 	 * we make do by using multiples of 63 SPT.
4397 	 *
4398 	 * Note than capacities greater than or equal to 1TB will simply
4399 	 * get the largest geometry from the table. This should be okay
4400 	 * since disks this large shouldn't be using CHS values anyway.
4401 	 */
4402 	for (i = 0; CHS_values[i].max_cap < capacity &&
4403 	    CHS_values[i].max_cap != DK_MAX_BLOCKS; i++)
4404 		;
4405 
4406 	un_g->dkg_nhead = CHS_values[i].nhead;
4407 	un_g->dkg_nsect = CHS_values[i].nsect;
4408 }
4409 #endif
4410 
4411 
4412 /*
4413  *    Function: sd_resync_geom_caches
4414  *
4415  * Description: (Re)initialize both geometry caches: the virtual geometry
4416  *		information is extracted from the HBA (the "geometry"
4417  *		capability), and the physical geometry cache data is
4418  *		generated by issuing MODE SENSE commands.
4419  *
4420  *   Arguments: un - driver soft state (unit) structure
4421  *		capacity - disk capacity in #blocks
4422  *		lbasize - disk block size in bytes
4423  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4424  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4425  *			to use the USCSI "direct" chain and bypass the normal
4426  *			command waitq.
4427  *
4428  *     Context: Kernel thread only (can sleep).
4429  */
4430 
4431 static void
4432 sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
4433 	int path_flag)
4434 {
4435 	struct 	geom_cache 	pgeom;
4436 	struct 	geom_cache	*pgeom_p = &pgeom;
4437 	int 	spc;
4438 	unsigned short nhead;
4439 	unsigned short nsect;
4440 
4441 	ASSERT(un != NULL);
4442 	ASSERT(mutex_owned(SD_MUTEX(un)));
4443 
4444 	/*
4445 	 * Ask the controller for its logical geometry.
4446 	 * Note: if the HBA does not support scsi_ifgetcap("geometry"),
4447 	 * then the lgeom cache will be invalid.
4448 	 */
4449 	sd_get_virtual_geometry(un, capacity, lbasize);
4450 
4451 	/*
4452 	 * Initialize the pgeom cache from lgeom, so that if MODE SENSE
4453 	 * doesn't work, DKIOCG_PHYSGEOM can return reasonable values.
4454 	 */
4455 	if (un->un_lgeom.g_nsect == 0 || un->un_lgeom.g_nhead == 0) {
4456 		/*
4457 		 * Note: Perhaps this needs to be more adaptive? The rationale
4458 		 * is that, if there's no HBA geometry from the HBA driver, any
4459 		 * guess is good, since this is the physical geometry. If MODE
4460 		 * SENSE fails this gives a max cylinder size for non-LBA access
4461 		 */
4462 		nhead = 255;
4463 		nsect = 63;
4464 	} else {
4465 		nhead = un->un_lgeom.g_nhead;
4466 		nsect = un->un_lgeom.g_nsect;
4467 	}
4468 
4469 	if (ISCD(un)) {
4470 		pgeom_p->g_nhead = 1;
4471 		pgeom_p->g_nsect = nsect * nhead;
4472 	} else {
4473 		pgeom_p->g_nhead = nhead;
4474 		pgeom_p->g_nsect = nsect;
4475 	}
4476 
4477 	spc = pgeom_p->g_nhead * pgeom_p->g_nsect;
4478 	pgeom_p->g_capacity = capacity;
4479 	pgeom_p->g_ncyl = pgeom_p->g_capacity / spc;
4480 	pgeom_p->g_acyl = 0;
4481 
4482 	/*
4483 	 * Retrieve fresh geometry data from the hardware, stash it
4484 	 * here temporarily before we rebuild the incore label.
4485 	 *
4486 	 * We want to use the MODE SENSE commands to derive the
4487 	 * physical geometry of the device, but if either command
4488 	 * fails, the logical geometry is used as the fallback for
4489 	 * disk label geometry.
4490 	 */
4491 	mutex_exit(SD_MUTEX(un));
4492 	sd_get_physical_geometry(un, pgeom_p, capacity, lbasize, path_flag);
4493 	mutex_enter(SD_MUTEX(un));
4494 
4495 	/*
4496 	 * Now update the real copy while holding the mutex. This
4497 	 * way the global copy is never in an inconsistent state.
4498 	 */
4499 	bcopy(pgeom_p, &un->un_pgeom,  sizeof (un->un_pgeom));
4500 
4501 	SD_INFO(SD_LOG_COMMON, un, "sd_resync_geom_caches: "
4502 	    "(cached from lgeom)\n");
4503 	SD_INFO(SD_LOG_COMMON, un,
4504 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4505 	    un->un_pgeom.g_ncyl, un->un_pgeom.g_acyl,
4506 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
4507 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
4508 	    "intrlv: %d; rpm: %d\n", un->un_pgeom.g_secsize,
4509 	    un->un_pgeom.g_capacity, un->un_pgeom.g_intrlv,
4510 	    un->un_pgeom.g_rpm);
4511 }
4512 
4513 
4514 /*
4515  *    Function: sd_read_fdisk
4516  *
4517  * Description: utility routine to read the fdisk table.
4518  *
4519  *   Arguments: un - driver soft state (unit) structure
4520  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4521  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4522  *			to use the USCSI "direct" chain and bypass the normal
4523  *			command waitq.
4524  *
4525  * Return Code: SD_CMD_SUCCESS
4526  *		SD_CMD_FAILURE
4527  *
4528  *     Context: Kernel thread only (can sleep).
4529  */
4530 /* ARGSUSED */
4531 static int
4532 sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize, int path_flag)
4533 {
4534 #if defined(_NO_FDISK_PRESENT)
4535 
4536 	un->un_solaris_offset = 0;
4537 	un->un_solaris_size = capacity;
4538 	bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4539 	return (SD_CMD_SUCCESS);
4540 
4541 #elif defined(_FIRMWARE_NEEDS_FDISK)
4542 
4543 	struct ipart	*fdp;
4544 	struct mboot	*mbp;
4545 	struct ipart	fdisk[FD_NUMPART];
4546 	int		i;
4547 	char		sigbuf[2];
4548 	caddr_t		bufp;
4549 	int		uidx;
4550 	int		rval;
4551 	int		lba = 0;
4552 	uint_t		solaris_offset;	/* offset to solaris part. */
4553 	daddr_t		solaris_size;	/* size of solaris partition */
4554 	uint32_t	blocksize;
4555 
4556 	ASSERT(un != NULL);
4557 	ASSERT(mutex_owned(SD_MUTEX(un)));
4558 	ASSERT(un->un_f_tgt_blocksize_is_valid == TRUE);
4559 
4560 	blocksize = un->un_tgt_blocksize;
4561 
4562 	/*
4563 	 * Start off assuming no fdisk table
4564 	 */
4565 	solaris_offset = 0;
4566 	solaris_size   = capacity;
4567 
4568 	mutex_exit(SD_MUTEX(un));
4569 	bufp = kmem_zalloc(blocksize, KM_SLEEP);
4570 	rval = sd_send_scsi_READ(un, bufp, blocksize, 0, path_flag);
4571 	mutex_enter(SD_MUTEX(un));
4572 
4573 	if (rval != 0) {
4574 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4575 		    "sd_read_fdisk: fdisk read err\n");
4576 		kmem_free(bufp, blocksize);
4577 		return (SD_CMD_FAILURE);
4578 	}
4579 
4580 	mbp = (struct mboot *)bufp;
4581 
4582 	/*
4583 	 * The fdisk table does not begin on a 4-byte boundary within the
4584 	 * master boot record, so we copy it to an aligned structure to avoid
4585 	 * alignment exceptions on some processors.
4586 	 */
4587 	bcopy(&mbp->parts[0], fdisk, sizeof (fdisk));
4588 
4589 	/*
4590 	 * Check for lba support before verifying sig; sig might not be
4591 	 * there, say on a blank disk, but the max_chs mark may still
4592 	 * be present.
4593 	 *
4594 	 * Note: LBA support and BEFs are an x86-only concept but this
4595 	 * code should work OK on SPARC as well.
4596 	 */
4597 
4598 	/*
4599 	 * First, check for lba-access-ok on root node (or prom root node)
4600 	 * if present there, don't need to search fdisk table.
4601 	 */
4602 	if (ddi_getprop(DDI_DEV_T_ANY, ddi_root_node(), 0,
4603 	    "lba-access-ok", 0) != 0) {
4604 		/* All drives do LBA; don't search fdisk table */
4605 		lba = 1;
4606 	} else {
4607 		/* Okay, look for mark in fdisk table */
4608 		for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4609 			/* accumulate "lba" value from all partitions */
4610 			lba = (lba || sd_has_max_chs_vals(fdp));
4611 		}
4612 	}
4613 
4614 	/*
4615 	 * Next, look for 'no-bef-lba-access' prop on parent.
4616 	 * Its presence means the realmode driver doesn't support
4617 	 * LBA, so the target driver shouldn't advertise it as ok.
4618 	 * This should be a temporary condition; one day all
4619 	 * BEFs should support the LBA access functions.
4620 	 */
4621 	if ((lba != 0) && (ddi_getprop(DDI_DEV_T_ANY,
4622 	    ddi_get_parent(SD_DEVINFO(un)), DDI_PROP_DONTPASS,
4623 	    "no-bef-lba-access", 0) != 0)) {
4624 		/* BEF doesn't support LBA; don't advertise it as ok */
4625 		lba = 0;
4626 	}
4627 
4628 	if (lba != 0) {
4629 		dev_t dev = sd_make_device(SD_DEVINFO(un));
4630 
4631 		if (ddi_getprop(dev, SD_DEVINFO(un), DDI_PROP_DONTPASS,
4632 		    "lba-access-ok", 0) == 0) {
4633 			/* not found; create it */
4634 			if (ddi_prop_create(dev, SD_DEVINFO(un), 0,
4635 			    "lba-access-ok", (caddr_t)NULL, 0) !=
4636 			    DDI_PROP_SUCCESS) {
4637 				SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4638 				    "sd_read_fdisk: Can't create lba property "
4639 				    "for instance %d\n",
4640 				    ddi_get_instance(SD_DEVINFO(un)));
4641 			}
4642 		}
4643 	}
4644 
4645 	bcopy(&mbp->signature, sigbuf, sizeof (sigbuf));
4646 
4647 	/*
4648 	 * Endian-independent signature check
4649 	 */
4650 	if (((sigbuf[1] & 0xFF) != ((MBB_MAGIC >> 8) & 0xFF)) ||
4651 	    (sigbuf[0] != (MBB_MAGIC & 0xFF))) {
4652 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4653 		    "sd_read_fdisk: no fdisk\n");
4654 		bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4655 		rval = SD_CMD_SUCCESS;
4656 		goto done;
4657 	}
4658 
4659 #ifdef SDDEBUG
4660 	if (sd_level_mask & SD_LOGMASK_INFO) {
4661 		fdp = fdisk;
4662 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_read_fdisk:\n");
4663 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "         relsect    "
4664 		    "numsect         sysid       bootid\n");
4665 		for (i = 0; i < FD_NUMPART; i++, fdp++) {
4666 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4667 			    "    %d:  %8d   %8d     0x%08x     0x%08x\n",
4668 			    i, fdp->relsect, fdp->numsect,
4669 			    fdp->systid, fdp->bootid);
4670 		}
4671 	}
4672 #endif
4673 
4674 	/*
4675 	 * Try to find the unix partition
4676 	 */
4677 	uidx = -1;
4678 	solaris_offset = 0;
4679 	solaris_size   = 0;
4680 
4681 	for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4682 		int	relsect;
4683 		int	numsect;
4684 
4685 		if (fdp->numsect == 0) {
4686 			un->un_fmap[i].fmap_start = 0;
4687 			un->un_fmap[i].fmap_nblk  = 0;
4688 			continue;
4689 		}
4690 
4691 		/*
4692 		 * Data in the fdisk table is little-endian.
4693 		 */
4694 		relsect = LE_32(fdp->relsect);
4695 		numsect = LE_32(fdp->numsect);
4696 
4697 		un->un_fmap[i].fmap_start = relsect;
4698 		un->un_fmap[i].fmap_nblk  = numsect;
4699 
4700 		if (fdp->systid != SUNIXOS &&
4701 		    fdp->systid != SUNIXOS2 &&
4702 		    fdp->systid != EFI_PMBR) {
4703 			continue;
4704 		}
4705 
4706 		/*
4707 		 * use the last active solaris partition id found
4708 		 * (there should only be 1 active partition id)
4709 		 *
4710 		 * if there are no active solaris partition id
4711 		 * then use the first inactive solaris partition id
4712 		 */
4713 		if ((uidx == -1) || (fdp->bootid == ACTIVE)) {
4714 			uidx = i;
4715 			solaris_offset = relsect;
4716 			solaris_size   = numsect;
4717 		}
4718 	}
4719 
4720 	SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk 0x%x 0x%lx",
4721 	    un->un_solaris_offset, un->un_solaris_size);
4722 
4723 	rval = SD_CMD_SUCCESS;
4724 
4725 done:
4726 
4727 	/*
4728 	 * Clear the VTOC info, only if the Solaris partition entry
4729 	 * has moved, changed size, been deleted, or if the size of
4730 	 * the partition is too small to even fit the label sector.
4731 	 */
4732 	if ((un->un_solaris_offset != solaris_offset) ||
4733 	    (un->un_solaris_size != solaris_size) ||
4734 	    solaris_size <= DK_LABEL_LOC) {
4735 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk moved 0x%x 0x%lx",
4736 			solaris_offset, solaris_size);
4737 		bzero(&un->un_g, sizeof (struct dk_geom));
4738 		bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
4739 		bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
4740 		un->un_f_geometry_is_valid = FALSE;
4741 	}
4742 	un->un_solaris_offset = solaris_offset;
4743 	un->un_solaris_size = solaris_size;
4744 	kmem_free(bufp, blocksize);
4745 	return (rval);
4746 
4747 #else	/* #elif defined(_FIRMWARE_NEEDS_FDISK) */
4748 #error "fdisk table presence undetermined for this platform."
4749 #endif	/* #if defined(_NO_FDISK_PRESENT) */
4750 }
4751 
4752 
4753 /*
4754  *    Function: sd_get_physical_geometry
4755  *
4756  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4757  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4758  *		target, and use this information to initialize the physical
4759  *		geometry cache specified by pgeom_p.
4760  *
4761  *		MODE SENSE is an optional command, so failure in this case
4762  *		does not necessarily denote an error. We want to use the
4763  *		MODE SENSE commands to derive the physical geometry of the
4764  *		device, but if either command fails, the logical geometry is
4765  *		used as the fallback for disk label geometry.
4766  *
4767  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4768  *		have already been initialized for the current target and
4769  *		that the current values be passed as args so that we don't
4770  *		end up ever trying to use -1 as a valid value. This could
4771  *		happen if either value is reset while we're not holding
4772  *		the mutex.
4773  *
4774  *   Arguments: un - driver soft state (unit) structure
4775  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4776  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4777  *			to use the USCSI "direct" chain and bypass the normal
4778  *			command waitq.
4779  *
4780  *     Context: Kernel thread only (can sleep).
4781  */
4782 
4783 static void
4784 sd_get_physical_geometry(struct sd_lun *un, struct geom_cache *pgeom_p,
4785 	int capacity, int lbasize, int path_flag)
4786 {
4787 	struct	mode_format	*page3p;
4788 	struct	mode_geometry	*page4p;
4789 	struct	mode_header	*headerp;
4790 	int	sector_size;
4791 	int	nsect;
4792 	int	nhead;
4793 	int	ncyl;
4794 	int	intrlv;
4795 	int	spc;
4796 	int	modesense_capacity;
4797 	int	rpm;
4798 	int	bd_len;
4799 	int	mode_header_length;
4800 	uchar_t	*p3bufp;
4801 	uchar_t	*p4bufp;
4802 	int	cdbsize;
4803 
4804 	ASSERT(un != NULL);
4805 	ASSERT(!(mutex_owned(SD_MUTEX(un))));
4806 
4807 	if (un->un_f_blockcount_is_valid != TRUE) {
4808 		return;
4809 	}
4810 
4811 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
4812 		return;
4813 	}
4814 
4815 	if (lbasize == 0) {
4816 		if (ISCD(un)) {
4817 			lbasize = 2048;
4818 		} else {
4819 			lbasize = un->un_sys_blocksize;
4820 		}
4821 	}
4822 	pgeom_p->g_secsize = (unsigned short)lbasize;
4823 
4824 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4825 
4826 	/*
4827 	 * Retrieve MODE SENSE page 3 - Format Device Page
4828 	 */
4829 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4830 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p3bufp,
4831 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag)
4832 	    != 0) {
4833 		SD_ERROR(SD_LOG_COMMON, un,
4834 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4835 		goto page3_exit;
4836 	}
4837 
4838 	/*
4839 	 * Determine size of Block Descriptors in order to locate the mode
4840 	 * page data.  ATAPI devices return 0, SCSI devices should return
4841 	 * MODE_BLK_DESC_LENGTH.
4842 	 */
4843 	headerp = (struct mode_header *)p3bufp;
4844 	if (un->un_f_cfg_is_atapi == TRUE) {
4845 		struct mode_header_grp2 *mhp =
4846 		    (struct mode_header_grp2 *)headerp;
4847 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4848 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4849 	} else {
4850 		mode_header_length = MODE_HEADER_LENGTH;
4851 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4852 	}
4853 
4854 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4855 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4856 		    "received unexpected bd_len of %d, page3\n", bd_len);
4857 		goto page3_exit;
4858 	}
4859 
4860 	page3p = (struct mode_format *)
4861 	    ((caddr_t)headerp + mode_header_length + bd_len);
4862 
4863 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4864 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4865 		    "mode sense pg3 code mismatch %d\n",
4866 		    page3p->mode_page.code);
4867 		goto page3_exit;
4868 	}
4869 
4870 	/*
4871 	 * Use this physical geometry data only if BOTH MODE SENSE commands
4872 	 * complete successfully; otherwise, revert to the logical geometry.
4873 	 * So, we need to save everything in temporary variables.
4874 	 */
4875 	sector_size = BE_16(page3p->data_bytes_sect);
4876 
4877 	/*
4878 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
4879 	 */
4880 	if (sector_size == 0) {
4881 		sector_size = (ISCD(un)) ? 2048 : un->un_sys_blocksize;
4882 	} else {
4883 		sector_size &= ~(un->un_sys_blocksize - 1);
4884 	}
4885 
4886 	nsect  = BE_16(page3p->sect_track);
4887 	intrlv = BE_16(page3p->interleave);
4888 
4889 	SD_INFO(SD_LOG_COMMON, un,
4890 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
4891 	SD_INFO(SD_LOG_COMMON, un,
4892 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
4893 	    page3p->mode_page.code, nsect, sector_size);
4894 	SD_INFO(SD_LOG_COMMON, un,
4895 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
4896 	    BE_16(page3p->track_skew),
4897 	    BE_16(page3p->cylinder_skew));
4898 
4899 
4900 	/*
4901 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
4902 	 */
4903 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
4904 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p4bufp,
4905 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag)
4906 	    != 0) {
4907 		SD_ERROR(SD_LOG_COMMON, un,
4908 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
4909 		goto page4_exit;
4910 	}
4911 
4912 	/*
4913 	 * Determine size of Block Descriptors in order to locate the mode
4914 	 * page data.  ATAPI devices return 0, SCSI devices should return
4915 	 * MODE_BLK_DESC_LENGTH.
4916 	 */
4917 	headerp = (struct mode_header *)p4bufp;
4918 	if (un->un_f_cfg_is_atapi == TRUE) {
4919 		struct mode_header_grp2 *mhp =
4920 		    (struct mode_header_grp2 *)headerp;
4921 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4922 	} else {
4923 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4924 	}
4925 
4926 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4927 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4928 		    "received unexpected bd_len of %d, page4\n", bd_len);
4929 		goto page4_exit;
4930 	}
4931 
4932 	page4p = (struct mode_geometry *)
4933 	    ((caddr_t)headerp + mode_header_length + bd_len);
4934 
4935 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
4936 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4937 		    "mode sense pg4 code mismatch %d\n",
4938 		    page4p->mode_page.code);
4939 		goto page4_exit;
4940 	}
4941 
4942 	/*
4943 	 * Stash the data now, after we know that both commands completed.
4944 	 */
4945 
4946 	mutex_enter(SD_MUTEX(un));
4947 
4948 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
4949 	spc   = nhead * nsect;
4950 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
4951 	rpm   = BE_16(page4p->rpm);
4952 
4953 	modesense_capacity = spc * ncyl;
4954 
4955 	SD_INFO(SD_LOG_COMMON, un,
4956 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
4957 	SD_INFO(SD_LOG_COMMON, un,
4958 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
4959 	SD_INFO(SD_LOG_COMMON, un,
4960 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
4961 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
4962 	    (void *)pgeom_p, capacity);
4963 
4964 	/*
4965 	 * Compensate if the drive's geometry is not rectangular, i.e.,
4966 	 * the product of C * H * S returned by MODE SENSE >= that returned
4967 	 * by read capacity. This is an idiosyncrasy of the original x86
4968 	 * disk subsystem.
4969 	 */
4970 	if (modesense_capacity >= capacity) {
4971 		SD_INFO(SD_LOG_COMMON, un,
4972 		    "sd_get_physical_geometry: adjusting acyl; "
4973 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
4974 		    (modesense_capacity - capacity + spc - 1) / spc);
4975 		if (sector_size != 0) {
4976 			/* 1243403: NEC D38x7 drives don't support sec size */
4977 			pgeom_p->g_secsize = (unsigned short)sector_size;
4978 		}
4979 		pgeom_p->g_nsect    = (unsigned short)nsect;
4980 		pgeom_p->g_nhead    = (unsigned short)nhead;
4981 		pgeom_p->g_capacity = capacity;
4982 		pgeom_p->g_acyl	    =
4983 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
4984 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
4985 	}
4986 
4987 	pgeom_p->g_rpm    = (unsigned short)rpm;
4988 	pgeom_p->g_intrlv = (unsigned short)intrlv;
4989 
4990 	SD_INFO(SD_LOG_COMMON, un,
4991 	    "sd_get_physical_geometry: mode sense geometry:\n");
4992 	SD_INFO(SD_LOG_COMMON, un,
4993 	    "   nsect: %d; sector size: %d; interlv: %d\n",
4994 	    nsect, sector_size, intrlv);
4995 	SD_INFO(SD_LOG_COMMON, un,
4996 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
4997 	    nhead, ncyl, rpm, modesense_capacity);
4998 	SD_INFO(SD_LOG_COMMON, un,
4999 	    "sd_get_physical_geometry: (cached)\n");
5000 	SD_INFO(SD_LOG_COMMON, un,
5001 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5002 	    un->un_pgeom.g_ncyl,  un->un_pgeom.g_acyl,
5003 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
5004 	SD_INFO(SD_LOG_COMMON, un,
5005 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
5006 	    un->un_pgeom.g_secsize, un->un_pgeom.g_capacity,
5007 	    un->un_pgeom.g_intrlv, un->un_pgeom.g_rpm);
5008 
5009 	mutex_exit(SD_MUTEX(un));
5010 
5011 page4_exit:
5012 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
5013 page3_exit:
5014 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
5015 }
5016 
5017 
5018 /*
5019  *    Function: sd_get_virtual_geometry
5020  *
5021  * Description: Ask the controller to tell us about the target device.
5022  *
5023  *   Arguments: un - pointer to softstate
5024  *		capacity - disk capacity in #blocks
5025  *		lbasize - disk block size in bytes
5026  *
5027  *     Context: Kernel thread only
5028  */
5029 
5030 static void
5031 sd_get_virtual_geometry(struct sd_lun *un, int capacity, int lbasize)
5032 {
5033 	struct	geom_cache 	*lgeom_p = &un->un_lgeom;
5034 	uint_t	geombuf;
5035 	int	spc;
5036 
5037 	ASSERT(un != NULL);
5038 	ASSERT(mutex_owned(SD_MUTEX(un)));
5039 
5040 	mutex_exit(SD_MUTEX(un));
5041 
5042 	/* Set sector size, and total number of sectors */
5043 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
5044 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
5045 
5046 	/* Let the HBA tell us its geometry */
5047 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
5048 
5049 	mutex_enter(SD_MUTEX(un));
5050 
5051 	/* A value of -1 indicates an undefined "geometry" property */
5052 	if (geombuf == (-1)) {
5053 		return;
5054 	}
5055 
5056 	/* Initialize the logical geometry cache. */
5057 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
5058 	lgeom_p->g_nsect   = geombuf & 0xffff;
5059 	lgeom_p->g_secsize = un->un_sys_blocksize;
5060 
5061 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
5062 
5063 	/*
5064 	 * Note: The driver originally converted the capacity value from
5065 	 * target blocks to system blocks. However, the capacity value passed
5066 	 * to this routine is already in terms of system blocks (this scaling
5067 	 * is done when the READ CAPACITY command is issued and processed).
5068 	 * This 'error' may have gone undetected because the usage of g_ncyl
5069 	 * (which is based upon g_capacity) is very limited within the driver
5070 	 */
5071 	lgeom_p->g_capacity = capacity;
5072 
5073 	/*
5074 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
5075 	 * hba may return zero values if the device has been removed.
5076 	 */
5077 	if (spc == 0) {
5078 		lgeom_p->g_ncyl = 0;
5079 	} else {
5080 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
5081 	}
5082 	lgeom_p->g_acyl = 0;
5083 
5084 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
5085 	SD_INFO(SD_LOG_COMMON, un,
5086 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5087 	    un->un_lgeom.g_ncyl,  un->un_lgeom.g_acyl,
5088 	    un->un_lgeom.g_nhead, un->un_lgeom.g_nsect);
5089 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
5090 	    "intrlv: %d; rpm: %d\n", un->un_lgeom.g_secsize,
5091 	    un->un_lgeom.g_capacity, un->un_lgeom.g_intrlv, un->un_lgeom.g_rpm);
5092 }
5093 
5094 
5095 /*
5096  *    Function: sd_update_block_info
5097  *
5098  * Description: Calculate a byte count to sector count bitshift value
5099  *		from sector size.
5100  *
5101  *   Arguments: un: unit struct.
5102  *		lbasize: new target sector size
5103  *		capacity: new target capacity, ie. block count
5104  *
5105  *     Context: Kernel thread context
5106  */
5107 
5108 static void
5109 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
5110 {
5111 	if (lbasize != 0) {
5112 		un->un_tgt_blocksize = lbasize;
5113 		un->un_f_tgt_blocksize_is_valid	= TRUE;
5114 	}
5115 
5116 	if (capacity != 0) {
5117 		un->un_blockcount		= capacity;
5118 		un->un_f_blockcount_is_valid	= TRUE;
5119 	}
5120 }
5121 
5122 
5123 static void
5124 sd_swap_efi_gpt(efi_gpt_t *e)
5125 {
5126 	_NOTE(ASSUMING_PROTECTED(*e))
5127 	e->efi_gpt_Signature = LE_64(e->efi_gpt_Signature);
5128 	e->efi_gpt_Revision = LE_32(e->efi_gpt_Revision);
5129 	e->efi_gpt_HeaderSize = LE_32(e->efi_gpt_HeaderSize);
5130 	e->efi_gpt_HeaderCRC32 = LE_32(e->efi_gpt_HeaderCRC32);
5131 	e->efi_gpt_MyLBA = LE_64(e->efi_gpt_MyLBA);
5132 	e->efi_gpt_AlternateLBA = LE_64(e->efi_gpt_AlternateLBA);
5133 	e->efi_gpt_FirstUsableLBA = LE_64(e->efi_gpt_FirstUsableLBA);
5134 	e->efi_gpt_LastUsableLBA = LE_64(e->efi_gpt_LastUsableLBA);
5135 	UUID_LE_CONVERT(e->efi_gpt_DiskGUID, e->efi_gpt_DiskGUID);
5136 	e->efi_gpt_PartitionEntryLBA = LE_64(e->efi_gpt_PartitionEntryLBA);
5137 	e->efi_gpt_NumberOfPartitionEntries =
5138 	    LE_32(e->efi_gpt_NumberOfPartitionEntries);
5139 	e->efi_gpt_SizeOfPartitionEntry =
5140 	    LE_32(e->efi_gpt_SizeOfPartitionEntry);
5141 	e->efi_gpt_PartitionEntryArrayCRC32 =
5142 	    LE_32(e->efi_gpt_PartitionEntryArrayCRC32);
5143 }
5144 
5145 static void
5146 sd_swap_efi_gpe(int nparts, efi_gpe_t *p)
5147 {
5148 	int i;
5149 
5150 	_NOTE(ASSUMING_PROTECTED(*p))
5151 	for (i = 0; i < nparts; i++) {
5152 		UUID_LE_CONVERT(p[i].efi_gpe_PartitionTypeGUID,
5153 		    p[i].efi_gpe_PartitionTypeGUID);
5154 		p[i].efi_gpe_StartingLBA = LE_64(p[i].efi_gpe_StartingLBA);
5155 		p[i].efi_gpe_EndingLBA = LE_64(p[i].efi_gpe_EndingLBA);
5156 		/* PartitionAttrs */
5157 	}
5158 }
5159 
5160 static int
5161 sd_validate_efi(efi_gpt_t *labp)
5162 {
5163 	if (labp->efi_gpt_Signature != EFI_SIGNATURE)
5164 		return (EINVAL);
5165 	/* at least 96 bytes in this version of the spec. */
5166 	if (sizeof (efi_gpt_t) - sizeof (labp->efi_gpt_Reserved2) >
5167 	    labp->efi_gpt_HeaderSize)
5168 		return (EINVAL);
5169 	/* this should be 128 bytes */
5170 	if (labp->efi_gpt_SizeOfPartitionEntry != sizeof (efi_gpe_t))
5171 		return (EINVAL);
5172 	return (0);
5173 }
5174 
5175 static int
5176 sd_use_efi(struct sd_lun *un, int path_flag)
5177 {
5178 	int		i;
5179 	int		rval = 0;
5180 	efi_gpe_t	*partitions;
5181 	uchar_t		*buf;
5182 	uint_t		lbasize;
5183 	uint64_t	cap;
5184 	uint_t		nparts;
5185 	diskaddr_t	gpe_lba;
5186 
5187 	ASSERT(mutex_owned(SD_MUTEX(un)));
5188 	lbasize = un->un_tgt_blocksize;
5189 
5190 	mutex_exit(SD_MUTEX(un));
5191 
5192 	buf = kmem_zalloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
5193 
5194 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
5195 		rval = EINVAL;
5196 		goto done_err;
5197 	}
5198 
5199 	rval = sd_send_scsi_READ(un, buf, lbasize, 0, path_flag);
5200 	if (rval) {
5201 		goto done_err;
5202 	}
5203 	if (((struct dk_label *)buf)->dkl_magic == DKL_MAGIC) {
5204 		/* not ours */
5205 		rval = ESRCH;
5206 		goto done_err;
5207 	}
5208 
5209 	rval = sd_send_scsi_READ(un, buf, lbasize, 1, path_flag);
5210 	if (rval) {
5211 		goto done_err;
5212 	}
5213 	sd_swap_efi_gpt((efi_gpt_t *)buf);
5214 
5215 	if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0) {
5216 		/*
5217 		 * Couldn't read the primary, try the backup.  Our
5218 		 * capacity at this point could be based on CHS, so
5219 		 * check what the device reports.
5220 		 */
5221 		rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
5222 		    path_flag);
5223 		if (rval) {
5224 			goto done_err;
5225 		}
5226 		if ((rval = sd_send_scsi_READ(un, buf, lbasize,
5227 		    cap - 1, path_flag)) != 0) {
5228 			goto done_err;
5229 		}
5230 		sd_swap_efi_gpt((efi_gpt_t *)buf);
5231 		if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0)
5232 			goto done_err;
5233 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5234 		    "primary label corrupt; using backup\n");
5235 	}
5236 
5237 	nparts = ((efi_gpt_t *)buf)->efi_gpt_NumberOfPartitionEntries;
5238 	gpe_lba = ((efi_gpt_t *)buf)->efi_gpt_PartitionEntryLBA;
5239 
5240 	rval = sd_send_scsi_READ(un, buf, EFI_MIN_ARRAY_SIZE, gpe_lba,
5241 	    path_flag);
5242 	if (rval) {
5243 		goto done_err;
5244 	}
5245 	partitions = (efi_gpe_t *)buf;
5246 
5247 	if (nparts > MAXPART) {
5248 		nparts = MAXPART;
5249 	}
5250 	sd_swap_efi_gpe(nparts, partitions);
5251 
5252 	mutex_enter(SD_MUTEX(un));
5253 
5254 	/* Fill in partition table. */
5255 	for (i = 0; i < nparts; i++) {
5256 		if (partitions->efi_gpe_StartingLBA != 0 ||
5257 		    partitions->efi_gpe_EndingLBA != 0) {
5258 			un->un_map[i].dkl_cylno =
5259 			    partitions->efi_gpe_StartingLBA;
5260 			un->un_map[i].dkl_nblk =
5261 			    partitions->efi_gpe_EndingLBA -
5262 			    partitions->efi_gpe_StartingLBA + 1;
5263 			un->un_offset[i] =
5264 			    partitions->efi_gpe_StartingLBA;
5265 		}
5266 		if (i == WD_NODE) {
5267 			/*
5268 			 * minor number 7 corresponds to the whole disk
5269 			 */
5270 			un->un_map[i].dkl_cylno = 0;
5271 			un->un_map[i].dkl_nblk = un->un_blockcount;
5272 			un->un_offset[i] = 0;
5273 		}
5274 		partitions++;
5275 	}
5276 	un->un_solaris_offset = 0;
5277 	un->un_solaris_size = cap;
5278 	un->un_f_geometry_is_valid = TRUE;
5279 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5280 	return (0);
5281 
5282 done_err:
5283 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5284 	mutex_enter(SD_MUTEX(un));
5285 	/*
5286 	 * if we didn't find something that could look like a VTOC
5287 	 * and the disk is over 1TB, we know there isn't a valid label.
5288 	 * Otherwise let sd_uselabel decide what to do.  We only
5289 	 * want to invalidate this if we're certain the label isn't
5290 	 * valid because sd_prop_op will now fail, which in turn
5291 	 * causes things like opens and stats on the partition to fail.
5292 	 */
5293 	if ((un->un_blockcount > DK_MAX_BLOCKS) && (rval != ESRCH)) {
5294 		un->un_f_geometry_is_valid = FALSE;
5295 	}
5296 	return (rval);
5297 }
5298 
5299 
5300 /*
5301  *    Function: sd_uselabel
5302  *
5303  * Description: Validate the disk label and update the relevant data (geometry,
5304  *		partition, vtoc, and capacity data) in the sd_lun struct.
5305  *		Marks the geometry of the unit as being valid.
5306  *
5307  *   Arguments: un: unit struct.
5308  *		dk_label: disk label
5309  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
5310  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
5311  *			to use the USCSI "direct" chain and bypass the normal
5312  *			command waitq.
5313  *
5314  * Return Code: SD_LABEL_IS_VALID: Label read from disk is OK; geometry,
5315  *		partition, vtoc, and capacity data are good.
5316  *
5317  *		SD_LABEL_IS_INVALID: Magic number or checksum error in the
5318  *		label; or computed capacity does not jibe with capacity
5319  *		reported from the READ CAPACITY command.
5320  *
5321  *     Context: Kernel thread only (can sleep).
5322  */
5323 
5324 static int
5325 sd_uselabel(struct sd_lun *un, struct dk_label *labp, int path_flag)
5326 {
5327 	short	*sp;
5328 	short	sum;
5329 	short	count;
5330 	int	label_error = SD_LABEL_IS_VALID;
5331 	int	i;
5332 	int	capacity;
5333 	int	part_end;
5334 	int	track_capacity;
5335 	int	err;
5336 #if defined(_SUNOS_VTOC_16)
5337 	struct	dkl_partition	*vpartp;
5338 #endif
5339 	ASSERT(un != NULL);
5340 	ASSERT(mutex_owned(SD_MUTEX(un)));
5341 
5342 	/* Validate the magic number of the label. */
5343 	if (labp->dkl_magic != DKL_MAGIC) {
5344 #if defined(__sparc)
5345 		if ((un->un_state == SD_STATE_NORMAL) &&
5346 		    !ISREMOVABLE(un)) {
5347 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5348 			    "Corrupt label; wrong magic number\n");
5349 		}
5350 #endif
5351 		return (SD_LABEL_IS_INVALID);
5352 	}
5353 
5354 	/* Validate the checksum of the label. */
5355 	sp  = (short *)labp;
5356 	sum = 0;
5357 	count = sizeof (struct dk_label) / sizeof (short);
5358 	while (count--)	 {
5359 		sum ^= *sp++;
5360 	}
5361 
5362 	if (sum != 0) {
5363 #if defined(_SUNOS_VTOC_16)
5364 		if (un->un_state == SD_STATE_NORMAL && !ISCD(un)) {
5365 #elif defined(_SUNOS_VTOC_8)
5366 		if (un->un_state == SD_STATE_NORMAL && !ISREMOVABLE(un)) {
5367 #endif
5368 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5369 			    "Corrupt label - label checksum failed\n");
5370 		}
5371 		return (SD_LABEL_IS_INVALID);
5372 	}
5373 
5374 
5375 	/*
5376 	 * Fill in geometry structure with data from label.
5377 	 */
5378 	bzero(&un->un_g, sizeof (struct dk_geom));
5379 	un->un_g.dkg_ncyl   = labp->dkl_ncyl;
5380 	un->un_g.dkg_acyl   = labp->dkl_acyl;
5381 	un->un_g.dkg_bcyl   = 0;
5382 	un->un_g.dkg_nhead  = labp->dkl_nhead;
5383 	un->un_g.dkg_nsect  = labp->dkl_nsect;
5384 	un->un_g.dkg_intrlv = labp->dkl_intrlv;
5385 
5386 #if defined(_SUNOS_VTOC_8)
5387 	un->un_g.dkg_gap1   = labp->dkl_gap1;
5388 	un->un_g.dkg_gap2   = labp->dkl_gap2;
5389 	un->un_g.dkg_bhead  = labp->dkl_bhead;
5390 #endif
5391 #if defined(_SUNOS_VTOC_16)
5392 	un->un_dkg_skew = labp->dkl_skew;
5393 #endif
5394 
5395 #if defined(__i386) || defined(__amd64)
5396 	un->un_g.dkg_apc = labp->dkl_apc;
5397 #endif
5398 
5399 	/*
5400 	 * Currently we rely on the values in the label being accurate. If
5401 	 * dlk_rpm or dlk_pcly are zero in the label, use a default value.
5402 	 *
5403 	 * Note: In the future a MODE SENSE may be used to retrieve this data,
5404 	 * although this command is optional in SCSI-2.
5405 	 */
5406 	un->un_g.dkg_rpm  = (labp->dkl_rpm  != 0) ? labp->dkl_rpm  : 3600;
5407 	un->un_g.dkg_pcyl = (labp->dkl_pcyl != 0) ? labp->dkl_pcyl :
5408 	    (un->un_g.dkg_ncyl + un->un_g.dkg_acyl);
5409 
5410 	/*
5411 	 * The Read and Write reinstruct values may not be valid
5412 	 * for older disks.
5413 	 */
5414 	un->un_g.dkg_read_reinstruct  = labp->dkl_read_reinstruct;
5415 	un->un_g.dkg_write_reinstruct = labp->dkl_write_reinstruct;
5416 
5417 	/* Fill in partition table. */
5418 #if defined(_SUNOS_VTOC_8)
5419 	for (i = 0; i < NDKMAP; i++) {
5420 		un->un_map[i].dkl_cylno = labp->dkl_map[i].dkl_cylno;
5421 		un->un_map[i].dkl_nblk  = labp->dkl_map[i].dkl_nblk;
5422 	}
5423 #endif
5424 #if  defined(_SUNOS_VTOC_16)
5425 	vpartp		= labp->dkl_vtoc.v_part;
5426 	track_capacity	= labp->dkl_nhead * labp->dkl_nsect;
5427 
5428 	for (i = 0; i < NDKMAP; i++, vpartp++) {
5429 		un->un_map[i].dkl_cylno = vpartp->p_start / track_capacity;
5430 		un->un_map[i].dkl_nblk  = vpartp->p_size;
5431 	}
5432 #endif
5433 
5434 	/* Fill in VTOC Structure. */
5435 	bcopy(&labp->dkl_vtoc, &un->un_vtoc, sizeof (struct dk_vtoc));
5436 #if defined(_SUNOS_VTOC_8)
5437 	/*
5438 	 * The 8-slice vtoc does not include the ascii label; save it into
5439 	 * the device's soft state structure here.
5440 	 */
5441 	bcopy(labp->dkl_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
5442 #endif
5443 
5444 	/* Mark the geometry as valid. */
5445 	un->un_f_geometry_is_valid = TRUE;
5446 
5447 	/* Now look for a valid capacity. */
5448 	track_capacity	= (un->un_g.dkg_nhead * un->un_g.dkg_nsect);
5449 	capacity	= (un->un_g.dkg_ncyl  * track_capacity);
5450 
5451 	if (un->un_g.dkg_acyl) {
5452 #if defined(__i386) || defined(__amd64)
5453 		/* we may have > 1 alts cylinder */
5454 		capacity += (track_capacity * un->un_g.dkg_acyl);
5455 #else
5456 		capacity += track_capacity;
5457 #endif
5458 	}
5459 
5460 	/*
5461 	 * At this point, un->un_blockcount should contain valid data from
5462 	 * the READ CAPACITY command.
5463 	 */
5464 	if (un->un_f_blockcount_is_valid != TRUE) {
5465 		/*
5466 		 * We have a situation where the target didn't give us a good
5467 		 * READ CAPACITY value, yet there appears to be a valid label.
5468 		 * In this case, we'll fake the capacity.
5469 		 */
5470 		un->un_blockcount = capacity;
5471 		un->un_f_blockcount_is_valid = TRUE;
5472 		goto done;
5473 	}
5474 
5475 
5476 	if ((capacity <= un->un_blockcount) ||
5477 	    (un->un_state != SD_STATE_NORMAL)) {
5478 #if defined(_SUNOS_VTOC_8)
5479 		/*
5480 		 * We can't let this happen on drives that are subdivided
5481 		 * into logical disks (i.e., that have an fdisk table).
5482 		 * The un_blockcount field should always hold the full media
5483 		 * size in sectors, period.  This code would overwrite
5484 		 * un_blockcount with the size of the Solaris fdisk partition.
5485 		 */
5486 		SD_ERROR(SD_LOG_COMMON, un,
5487 		    "sd_uselabel: Label %d blocks; Drive %d blocks\n",
5488 		    capacity, un->un_blockcount);
5489 		un->un_blockcount = capacity;
5490 		un->un_f_blockcount_is_valid = TRUE;
5491 #endif	/* defined(_SUNOS_VTOC_8) */
5492 		goto done;
5493 	}
5494 
5495 	if (ISCD(un)) {
5496 		/* For CDROMs, we trust that the data in the label is OK. */
5497 #if defined(_SUNOS_VTOC_8)
5498 		for (i = 0; i < NDKMAP; i++) {
5499 			part_end = labp->dkl_nhead * labp->dkl_nsect *
5500 			    labp->dkl_map[i].dkl_cylno +
5501 			    labp->dkl_map[i].dkl_nblk  - 1;
5502 
5503 			if ((labp->dkl_map[i].dkl_nblk) &&
5504 			    (part_end > un->un_blockcount)) {
5505 				un->un_f_geometry_is_valid = FALSE;
5506 				break;
5507 			}
5508 		}
5509 #endif
5510 #if defined(_SUNOS_VTOC_16)
5511 		vpartp = &(labp->dkl_vtoc.v_part[0]);
5512 		for (i = 0; i < NDKMAP; i++, vpartp++) {
5513 			part_end = vpartp->p_start + vpartp->p_size;
5514 			if ((vpartp->p_size > 0) &&
5515 			    (part_end > un->un_blockcount)) {
5516 				un->un_f_geometry_is_valid = FALSE;
5517 				break;
5518 			}
5519 		}
5520 #endif
5521 	} else {
5522 		uint64_t t_capacity;
5523 		uint32_t t_lbasize;
5524 
5525 		mutex_exit(SD_MUTEX(un));
5526 		err = sd_send_scsi_READ_CAPACITY(un, &t_capacity, &t_lbasize,
5527 		    path_flag);
5528 		ASSERT(t_capacity <= DK_MAX_BLOCKS);
5529 		mutex_enter(SD_MUTEX(un));
5530 
5531 		if (err == 0) {
5532 			sd_update_block_info(un, t_lbasize, t_capacity);
5533 		}
5534 
5535 		if (capacity > un->un_blockcount) {
5536 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5537 			    "Corrupt label - bad geometry\n");
5538 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
5539 			    "Label says %u blocks; Drive says %llu blocks\n",
5540 			    capacity, (unsigned long long)un->un_blockcount);
5541 			un->un_f_geometry_is_valid = FALSE;
5542 			label_error = SD_LABEL_IS_INVALID;
5543 		}
5544 	}
5545 
5546 done:
5547 
5548 	SD_INFO(SD_LOG_COMMON, un, "sd_uselabel: (label geometry)\n");
5549 	SD_INFO(SD_LOG_COMMON, un,
5550 	    "   ncyl: %d; acyl: %d; nhead: %d; nsect: %d\n",
5551 	    un->un_g.dkg_ncyl,  un->un_g.dkg_acyl,
5552 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5553 	SD_INFO(SD_LOG_COMMON, un,
5554 	    "   lbasize: %d; capacity: %d; intrlv: %d; rpm: %d\n",
5555 	    un->un_tgt_blocksize, un->un_blockcount,
5556 	    un->un_g.dkg_intrlv, un->un_g.dkg_rpm);
5557 	SD_INFO(SD_LOG_COMMON, un, "   wrt_reinstr: %d; rd_reinstr: %d\n",
5558 	    un->un_g.dkg_write_reinstruct, un->un_g.dkg_read_reinstruct);
5559 
5560 	ASSERT(mutex_owned(SD_MUTEX(un)));
5561 
5562 	return (label_error);
5563 }
5564 
5565 
5566 /*
5567  *    Function: sd_build_default_label
5568  *
5569  * Description: Generate a default label for those devices that do not have
5570  *		one, e.g., new media, removable cartridges, etc..
5571  *
5572  *     Context: Kernel thread only
5573  */
5574 
5575 static void
5576 sd_build_default_label(struct sd_lun *un)
5577 {
5578 #if defined(_SUNOS_VTOC_16)
5579 	uint_t	phys_spc;
5580 	uint_t	disksize;
5581 	struct	dk_geom un_g;
5582 #endif
5583 
5584 	ASSERT(un != NULL);
5585 	ASSERT(mutex_owned(SD_MUTEX(un)));
5586 
5587 #if defined(_SUNOS_VTOC_8)
5588 	/*
5589 	 * Note: This is a legacy check for non-removable devices on VTOC_8
5590 	 * only. This may be a valid check for VTOC_16 as well.
5591 	 */
5592 	if (!ISREMOVABLE(un)) {
5593 		return;
5594 	}
5595 #endif
5596 
5597 	bzero(&un->un_g, sizeof (struct dk_geom));
5598 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5599 	bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
5600 
5601 #if defined(_SUNOS_VTOC_8)
5602 
5603 	/*
5604 	 * It's a REMOVABLE media, therefore no label (on sparc, anyway).
5605 	 * But it is still necessary to set up various geometry information,
5606 	 * and we are doing this here.
5607 	 */
5608 
5609 	/*
5610 	 * For the rpm, we use the minimum for the disk.  For the head, cyl,
5611 	 * and number of sector per track, if the capacity <= 1GB, head = 64,
5612 	 * sect = 32.  else head = 255, sect 63 Note: the capacity should be
5613 	 * equal to C*H*S values.  This will cause some truncation of size due
5614 	 * to round off errors. For CD-ROMs, this truncation can have adverse
5615 	 * side effects, so returning ncyl and nhead as 1. The nsect will
5616 	 * overflow for most of CD-ROMs as nsect is of type ushort. (4190569)
5617 	 */
5618 	if (ISCD(un)) {
5619 		/*
5620 		 * Preserve the old behavior for non-writable
5621 		 * medias. Since dkg_nsect is a ushort, it
5622 		 * will lose bits as cdroms have more than
5623 		 * 65536 sectors. So if we recalculate
5624 		 * capacity, it will become much shorter.
5625 		 * But the dkg_* information is not
5626 		 * used for CDROMs so it is OK. But for
5627 		 * Writable CDs we need this information
5628 		 * to be valid (for newfs say). So we
5629 		 * make nsect and nhead > 1 that way
5630 		 * nsect can still stay within ushort limit
5631 		 * without losing any bits.
5632 		 */
5633 		if (un->un_f_mmc_writable_media == TRUE) {
5634 			un->un_g.dkg_nhead = 64;
5635 			un->un_g.dkg_nsect = 32;
5636 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
5637 			un->un_blockcount = un->un_g.dkg_ncyl *
5638 			    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5639 		} else {
5640 			un->un_g.dkg_ncyl  = 1;
5641 			un->un_g.dkg_nhead = 1;
5642 			un->un_g.dkg_nsect = un->un_blockcount;
5643 		}
5644 	} else {
5645 		if (un->un_blockcount <= 0x1000) {
5646 			/* unlabeled SCSI floppy device */
5647 			un->un_g.dkg_nhead = 2;
5648 			un->un_g.dkg_ncyl = 80;
5649 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
5650 		} else if (un->un_blockcount <= 0x200000) {
5651 			un->un_g.dkg_nhead = 64;
5652 			un->un_g.dkg_nsect = 32;
5653 			un->un_g.dkg_ncyl  = un->un_blockcount / (64 * 32);
5654 		} else {
5655 			un->un_g.dkg_nhead = 255;
5656 			un->un_g.dkg_nsect = 63;
5657 			un->un_g.dkg_ncyl  = un->un_blockcount / (255 * 63);
5658 		}
5659 		un->un_blockcount =
5660 		    un->un_g.dkg_ncyl * un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5661 	}
5662 
5663 	un->un_g.dkg_acyl	= 0;
5664 	un->un_g.dkg_bcyl	= 0;
5665 	un->un_g.dkg_rpm	= 200;
5666 	un->un_asciilabel[0]	= '\0';
5667 	un->un_g.dkg_pcyl	= un->un_g.dkg_ncyl;
5668 
5669 	un->un_map[0].dkl_cylno = 0;
5670 	un->un_map[0].dkl_nblk  = un->un_blockcount;
5671 	un->un_map[2].dkl_cylno = 0;
5672 	un->un_map[2].dkl_nblk  = un->un_blockcount;
5673 
5674 #elif defined(_SUNOS_VTOC_16)
5675 
5676 	if (un->un_solaris_size == 0) {
5677 		/*
5678 		 * Got fdisk table but no solaris entry therefore
5679 		 * don't create a default label
5680 		 */
5681 		un->un_f_geometry_is_valid = TRUE;
5682 		return;
5683 	}
5684 
5685 	/*
5686 	 * For CDs we continue to use the physical geometry to calculate
5687 	 * number of cylinders. All other devices must convert the
5688 	 * physical geometry (geom_cache) to values that will fit
5689 	 * in a dk_geom structure.
5690 	 */
5691 	if (ISCD(un)) {
5692 		phys_spc = un->un_pgeom.g_nhead * un->un_pgeom.g_nsect;
5693 	} else {
5694 		/* Convert physical geometry to disk geometry */
5695 		bzero(&un_g, sizeof (struct dk_geom));
5696 		sd_convert_geometry(un->un_blockcount, &un_g);
5697 		bcopy(&un_g, &un->un_g, sizeof (un->un_g));
5698 		phys_spc = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5699 	}
5700 
5701 	un->un_g.dkg_pcyl = un->un_solaris_size / phys_spc;
5702 	un->un_g.dkg_acyl = DK_ACYL;
5703 	un->un_g.dkg_ncyl = un->un_g.dkg_pcyl - DK_ACYL;
5704 	disksize = un->un_g.dkg_ncyl * phys_spc;
5705 
5706 	if (ISCD(un)) {
5707 		/*
5708 		 * CD's don't use the "heads * sectors * cyls"-type of
5709 		 * geometry, but instead use the entire capacity of the media.
5710 		 */
5711 		disksize = un->un_solaris_size;
5712 		un->un_g.dkg_nhead = 1;
5713 		un->un_g.dkg_nsect = 1;
5714 		un->un_g.dkg_rpm =
5715 		    (un->un_pgeom.g_rpm == 0) ? 200 : un->un_pgeom.g_rpm;
5716 
5717 		un->un_vtoc.v_part[0].p_start = 0;
5718 		un->un_vtoc.v_part[0].p_size  = disksize;
5719 		un->un_vtoc.v_part[0].p_tag   = V_BACKUP;
5720 		un->un_vtoc.v_part[0].p_flag  = V_UNMNT;
5721 
5722 		un->un_map[0].dkl_cylno = 0;
5723 		un->un_map[0].dkl_nblk  = disksize;
5724 		un->un_offset[0] = 0;
5725 
5726 	} else {
5727 		/*
5728 		 * Hard disks and removable media cartridges
5729 		 */
5730 		un->un_g.dkg_rpm =
5731 		    (un->un_pgeom.g_rpm == 0) ? 3600: un->un_pgeom.g_rpm;
5732 		un->un_vtoc.v_sectorsz = un->un_sys_blocksize;
5733 
5734 		/* Add boot slice */
5735 		un->un_vtoc.v_part[8].p_start = 0;
5736 		un->un_vtoc.v_part[8].p_size  = phys_spc;
5737 		un->un_vtoc.v_part[8].p_tag   = V_BOOT;
5738 		un->un_vtoc.v_part[8].p_flag  = V_UNMNT;
5739 
5740 		un->un_map[8].dkl_cylno = 0;
5741 		un->un_map[8].dkl_nblk  = phys_spc;
5742 		un->un_offset[8] = 0;
5743 	}
5744 
5745 	un->un_g.dkg_apc = 0;
5746 	un->un_vtoc.v_nparts = V_NUMPAR;
5747 	un->un_vtoc.v_version = V_VERSION;
5748 
5749 	/* Add backup slice */
5750 	un->un_vtoc.v_part[2].p_start = 0;
5751 	un->un_vtoc.v_part[2].p_size  = disksize;
5752 	un->un_vtoc.v_part[2].p_tag   = V_BACKUP;
5753 	un->un_vtoc.v_part[2].p_flag  = V_UNMNT;
5754 
5755 	un->un_map[2].dkl_cylno = 0;
5756 	un->un_map[2].dkl_nblk  = disksize;
5757 	un->un_offset[2] = 0;
5758 
5759 	(void) sprintf(un->un_vtoc.v_asciilabel, "DEFAULT cyl %d alt %d"
5760 	    " hd %d sec %d", un->un_g.dkg_ncyl, un->un_g.dkg_acyl,
5761 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5762 
5763 #else
5764 #error "No VTOC format defined."
5765 #endif
5766 
5767 	un->un_g.dkg_read_reinstruct  = 0;
5768 	un->un_g.dkg_write_reinstruct = 0;
5769 
5770 	un->un_g.dkg_intrlv = 1;
5771 
5772 	un->un_vtoc.v_sanity  = VTOC_SANE;
5773 
5774 	un->un_f_geometry_is_valid = TRUE;
5775 
5776 	SD_INFO(SD_LOG_COMMON, un,
5777 	    "sd_build_default_label: Default label created: "
5778 	    "cyl: %d\tacyl: %d\tnhead: %d\tnsect: %d\tcap: %d\n",
5779 	    un->un_g.dkg_ncyl, un->un_g.dkg_acyl, un->un_g.dkg_nhead,
5780 	    un->un_g.dkg_nsect, un->un_blockcount);
5781 }
5782 
5783 
5784 #if defined(_FIRMWARE_NEEDS_FDISK)
5785 /*
5786  * Max CHS values, as they are encoded into bytes, for 1022/254/63
5787  */
5788 #define	LBA_MAX_SECT	(63 | ((1022 & 0x300) >> 2))
5789 #define	LBA_MAX_CYL	(1022 & 0xFF)
5790 #define	LBA_MAX_HEAD	(254)
5791 
5792 
5793 /*
5794  *    Function: sd_has_max_chs_vals
5795  *
5796  * Description: Return TRUE if Cylinder-Head-Sector values are all at maximum.
5797  *
5798  *   Arguments: fdp - ptr to CHS info
5799  *
5800  * Return Code: True or false
5801  *
5802  *     Context: Any.
5803  */
5804 
5805 static int
5806 sd_has_max_chs_vals(struct ipart *fdp)
5807 {
5808 	return ((fdp->begcyl  == LBA_MAX_CYL)	&&
5809 	    (fdp->beghead == LBA_MAX_HEAD)	&&
5810 	    (fdp->begsect == LBA_MAX_SECT)	&&
5811 	    (fdp->endcyl  == LBA_MAX_CYL)	&&
5812 	    (fdp->endhead == LBA_MAX_HEAD)	&&
5813 	    (fdp->endsect == LBA_MAX_SECT));
5814 }
5815 #endif
5816 
5817 
5818 /*
5819  *    Function: sd_inq_fill
5820  *
5821  * Description: Print a piece of inquiry data, cleaned up for non-printable
5822  *		characters and stopping at the first space character after
5823  *		the beginning of the passed string;
5824  *
5825  *   Arguments: p - source string
5826  *		l - maximum length to copy
5827  *		s - destination string
5828  *
5829  *     Context: Any.
5830  */
5831 
5832 static void
5833 sd_inq_fill(char *p, int l, char *s)
5834 {
5835 	unsigned i = 0;
5836 	char c;
5837 
5838 	while (i++ < l) {
5839 		if ((c = *p++) < ' ' || c >= 0x7F) {
5840 			c = '*';
5841 		} else if (i != 1 && c == ' ') {
5842 			break;
5843 		}
5844 		*s++ = c;
5845 	}
5846 	*s++ = 0;
5847 }
5848 
5849 
5850 /*
5851  *    Function: sd_register_devid
5852  *
5853  * Description: This routine will obtain the device id information from the
5854  *		target, obtain the serial number, and register the device
5855  *		id with the ddi framework.
5856  *
5857  *   Arguments: devi - the system's dev_info_t for the device.
5858  *		un - driver soft state (unit) structure
5859  *		reservation_flag - indicates if a reservation conflict
5860  *		occurred during attach
5861  *
5862  *     Context: Kernel Thread
5863  */
5864 static void
5865 sd_register_devid(struct sd_lun *un, dev_info_t *devi, int reservation_flag)
5866 {
5867 	int		rval		= 0;
5868 	uchar_t		*inq80		= NULL;
5869 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
5870 	size_t		inq80_resid	= 0;
5871 	uchar_t		*inq83		= NULL;
5872 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
5873 	size_t		inq83_resid	= 0;
5874 
5875 	ASSERT(un != NULL);
5876 	ASSERT(mutex_owned(SD_MUTEX(un)));
5877 	ASSERT((SD_DEVINFO(un)) == devi);
5878 
5879 	/*
5880 	 * This is the case of antiquated Sun disk drives that have the
5881 	 * FAB_DEVID property set in the disk_table.  These drives
5882 	 * manage the devid's by storing them in last 2 available sectors
5883 	 * on the drive and have them fabricated by the ddi layer by calling
5884 	 * ddi_devid_init and passing the DEVID_FAB flag.
5885 	 */
5886 	if (un->un_f_opt_fab_devid == TRUE) {
5887 		/*
5888 		 * Depending on EINVAL isn't reliable, since a reserved disk
5889 		 * may result in invalid geometry, so check to make sure a
5890 		 * reservation conflict did not occur during attach.
5891 		 */
5892 		if ((sd_get_devid(un) == EINVAL) &&
5893 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
5894 			/*
5895 			 * The devid is invalid AND there is no reservation
5896 			 * conflict.  Fabricate a new devid.
5897 			 */
5898 			(void) sd_create_devid(un);
5899 		}
5900 
5901 		/* Register the devid if it exists */
5902 		if (un->un_devid != NULL) {
5903 			(void) ddi_devid_register(SD_DEVINFO(un),
5904 			    un->un_devid);
5905 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5906 			    "sd_register_devid: Devid Fabricated\n");
5907 		}
5908 		return;
5909 	}
5910 
5911 	/*
5912 	 * We check the availibility of the World Wide Name (0x83) and Unit
5913 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
5914 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
5915 	 * 0x83 is availible, that is the best choice.  Our next choice is
5916 	 * 0x80.  If neither are availible, we munge the devid from the device
5917 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
5918 	 * to fabricate a devid for non-Sun qualified disks.
5919 	 */
5920 	if (sd_check_vpd_page_support(un) == 0) {
5921 		/* collect page 80 data if available */
5922 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
5923 
5924 			mutex_exit(SD_MUTEX(un));
5925 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
5926 			rval = sd_send_scsi_INQUIRY(un, inq80, inq80_len,
5927 			    0x01, 0x80, &inq80_resid);
5928 
5929 			if (rval != 0) {
5930 				kmem_free(inq80, inq80_len);
5931 				inq80 = NULL;
5932 				inq80_len = 0;
5933 			}
5934 			mutex_enter(SD_MUTEX(un));
5935 		}
5936 
5937 		/* collect page 83 data if available */
5938 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
5939 
5940 			mutex_exit(SD_MUTEX(un));
5941 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
5942 			rval = sd_send_scsi_INQUIRY(un, inq83, inq83_len,
5943 			    0x01, 0x83, &inq83_resid);
5944 
5945 			if (rval != 0) {
5946 				kmem_free(inq83, inq83_len);
5947 				inq83 = NULL;
5948 				inq83_len = 0;
5949 			}
5950 			mutex_enter(SD_MUTEX(un));
5951 		}
5952 	}
5953 
5954 	/* encode best devid possible based on data available */
5955 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
5956 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
5957 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
5958 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
5959 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
5960 
5961 		/* devid successfully encoded, register devid */
5962 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
5963 
5964 	} else {
5965 		/*
5966 		 * Unable to encode a devid based on data available.
5967 		 * This is not a Sun qualified disk.  Older Sun disk
5968 		 * drives that have the SD_FAB_DEVID property
5969 		 * set in the disk_table and non Sun qualified
5970 		 * disks are treated in the same manner.  These
5971 		 * drives manage the devid's by storing them in
5972 		 * last 2 available sectors on the drive and
5973 		 * have them fabricated by the ddi layer by
5974 		 * calling ddi_devid_init and passing the
5975 		 * DEVID_FAB flag.
5976 		 * Create a fabricate devid only if there's no
5977 		 * fabricate devid existed.
5978 		 */
5979 		if (sd_get_devid(un) == EINVAL) {
5980 			(void) sd_create_devid(un);
5981 			un->un_f_opt_fab_devid = TRUE;
5982 		}
5983 
5984 		/* Register the devid if it exists */
5985 		if (un->un_devid != NULL) {
5986 			(void) ddi_devid_register(SD_DEVINFO(un),
5987 			    un->un_devid);
5988 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5989 			    "sd_register_devid: devid fabricated using "
5990 			    "ddi framework\n");
5991 		}
5992 	}
5993 
5994 	/* clean up resources */
5995 	if (inq80 != NULL) {
5996 		kmem_free(inq80, inq80_len);
5997 	}
5998 	if (inq83 != NULL) {
5999 		kmem_free(inq83, inq83_len);
6000 	}
6001 }
6002 
6003 static daddr_t
6004 sd_get_devid_block(struct sd_lun *un)
6005 {
6006 	daddr_t			spc, blk, head, cyl;
6007 
6008 	if (un->un_blockcount <= DK_MAX_BLOCKS) {
6009 		/* this geometry doesn't allow us to write a devid */
6010 		if (un->un_g.dkg_acyl < 2) {
6011 			return (-1);
6012 		}
6013 
6014 		/*
6015 		 * Subtract 2 guarantees that the next to last cylinder
6016 		 * is used
6017 		 */
6018 		cyl  = un->un_g.dkg_ncyl  + un->un_g.dkg_acyl - 2;
6019 		spc  = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
6020 		head = un->un_g.dkg_nhead - 1;
6021 		blk  = (cyl * (spc - un->un_g.dkg_apc)) +
6022 		    (head * un->un_g.dkg_nsect) + 1;
6023 	} else {
6024 		if (un->un_reserved != -1) {
6025 			blk = un->un_map[un->un_reserved].dkl_cylno + 1;
6026 		} else {
6027 			return (-1);
6028 		}
6029 	}
6030 	return (blk);
6031 }
6032 
6033 /*
6034  *    Function: sd_get_devid
6035  *
6036  * Description: This routine will return 0 if a valid device id has been
6037  *		obtained from the target and stored in the soft state. If a
6038  *		valid device id has not been previously read and stored, a
6039  *		read attempt will be made.
6040  *
6041  *   Arguments: un - driver soft state (unit) structure
6042  *
6043  * Return Code: 0 if we successfully get the device id
6044  *
6045  *     Context: Kernel Thread
6046  */
6047 
6048 static int
6049 sd_get_devid(struct sd_lun *un)
6050 {
6051 	struct dk_devid		*dkdevid;
6052 	ddi_devid_t		tmpid;
6053 	uint_t			*ip;
6054 	size_t			sz;
6055 	daddr_t			blk;
6056 	int			status;
6057 	int			chksum;
6058 	int			i;
6059 	size_t			buffer_size;
6060 
6061 	ASSERT(un != NULL);
6062 	ASSERT(mutex_owned(SD_MUTEX(un)));
6063 
6064 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
6065 	    un);
6066 
6067 	if (un->un_devid != NULL) {
6068 		return (0);
6069 	}
6070 
6071 	blk = sd_get_devid_block(un);
6072 	if (blk < 0)
6073 		return (EINVAL);
6074 
6075 	/*
6076 	 * Read and verify device id, stored in the reserved cylinders at the
6077 	 * end of the disk. Backup label is on the odd sectors of the last
6078 	 * track of the last cylinder. Device id will be on track of the next
6079 	 * to last cylinder.
6080 	 */
6081 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
6082 	mutex_exit(SD_MUTEX(un));
6083 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
6084 	status = sd_send_scsi_READ(un, dkdevid, buffer_size, blk,
6085 	    SD_PATH_DIRECT);
6086 	if (status != 0) {
6087 		goto error;
6088 	}
6089 
6090 	/* Validate the revision */
6091 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
6092 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
6093 		status = EINVAL;
6094 		goto error;
6095 	}
6096 
6097 	/* Calculate the checksum */
6098 	chksum = 0;
6099 	ip = (uint_t *)dkdevid;
6100 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6101 	    i++) {
6102 		chksum ^= ip[i];
6103 	}
6104 
6105 	/* Compare the checksums */
6106 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
6107 		status = EINVAL;
6108 		goto error;
6109 	}
6110 
6111 	/* Validate the device id */
6112 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
6113 		status = EINVAL;
6114 		goto error;
6115 	}
6116 
6117 	/*
6118 	 * Store the device id in the driver soft state
6119 	 */
6120 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
6121 	tmpid = kmem_alloc(sz, KM_SLEEP);
6122 
6123 	mutex_enter(SD_MUTEX(un));
6124 
6125 	un->un_devid = tmpid;
6126 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
6127 
6128 	kmem_free(dkdevid, buffer_size);
6129 
6130 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
6131 
6132 	return (status);
6133 error:
6134 	mutex_enter(SD_MUTEX(un));
6135 	kmem_free(dkdevid, buffer_size);
6136 	return (status);
6137 }
6138 
6139 
6140 /*
6141  *    Function: sd_create_devid
6142  *
6143  * Description: This routine will fabricate the device id and write it
6144  *		to the disk.
6145  *
6146  *   Arguments: un - driver soft state (unit) structure
6147  *
6148  * Return Code: value of the fabricated device id
6149  *
6150  *     Context: Kernel Thread
6151  */
6152 
6153 static ddi_devid_t
6154 sd_create_devid(struct sd_lun *un)
6155 {
6156 	ASSERT(un != NULL);
6157 
6158 	/* Fabricate the devid */
6159 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
6160 	    == DDI_FAILURE) {
6161 		return (NULL);
6162 	}
6163 
6164 	/* Write the devid to disk */
6165 	if (sd_write_deviceid(un) != 0) {
6166 		ddi_devid_free(un->un_devid);
6167 		un->un_devid = NULL;
6168 	}
6169 
6170 	return (un->un_devid);
6171 }
6172 
6173 
6174 /*
6175  *    Function: sd_write_deviceid
6176  *
6177  * Description: This routine will write the device id to the disk
6178  *		reserved sector.
6179  *
6180  *   Arguments: un - driver soft state (unit) structure
6181  *
6182  * Return Code: EINVAL
6183  *		value returned by sd_send_scsi_cmd
6184  *
6185  *     Context: Kernel Thread
6186  */
6187 
6188 static int
6189 sd_write_deviceid(struct sd_lun *un)
6190 {
6191 	struct dk_devid		*dkdevid;
6192 	daddr_t			blk;
6193 	uint_t			*ip, chksum;
6194 	int			status;
6195 	int			i;
6196 
6197 	ASSERT(mutex_owned(SD_MUTEX(un)));
6198 
6199 	blk = sd_get_devid_block(un);
6200 	if (blk < 0)
6201 		return (-1);
6202 	mutex_exit(SD_MUTEX(un));
6203 
6204 	/* Allocate the buffer */
6205 	dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
6206 
6207 	/* Fill in the revision */
6208 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
6209 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
6210 
6211 	/* Copy in the device id */
6212 	mutex_enter(SD_MUTEX(un));
6213 	bcopy(un->un_devid, &dkdevid->dkd_devid,
6214 	    ddi_devid_sizeof(un->un_devid));
6215 	mutex_exit(SD_MUTEX(un));
6216 
6217 	/* Calculate the checksum */
6218 	chksum = 0;
6219 	ip = (uint_t *)dkdevid;
6220 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6221 	    i++) {
6222 		chksum ^= ip[i];
6223 	}
6224 
6225 	/* Fill-in checksum */
6226 	DKD_FORMCHKSUM(chksum, dkdevid);
6227 
6228 	/* Write the reserved sector */
6229 	status = sd_send_scsi_WRITE(un, dkdevid, un->un_sys_blocksize, blk,
6230 	    SD_PATH_DIRECT);
6231 
6232 	kmem_free(dkdevid, un->un_sys_blocksize);
6233 
6234 	mutex_enter(SD_MUTEX(un));
6235 	return (status);
6236 }
6237 
6238 
6239 /*
6240  *    Function: sd_check_vpd_page_support
6241  *
6242  * Description: This routine sends an inquiry command with the EVPD bit set and
6243  *		a page code of 0x00 to the device. It is used to determine which
6244  *		vital product pages are availible to find the devid. We are
6245  *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
6246  *		device does not support that command.
6247  *
6248  *   Arguments: un  - driver soft state (unit) structure
6249  *
6250  * Return Code: 0 - success
6251  *		1 - check condition
6252  *
6253  *     Context: This routine can sleep.
6254  */
6255 
6256 static int
6257 sd_check_vpd_page_support(struct sd_lun *un)
6258 {
6259 	uchar_t	*page_list	= NULL;
6260 	uchar_t	page_length	= 0xff;	/* Use max possible length */
6261 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
6262 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
6263 	int    	rval		= 0;
6264 	int	counter;
6265 
6266 	ASSERT(un != NULL);
6267 	ASSERT(mutex_owned(SD_MUTEX(un)));
6268 
6269 	mutex_exit(SD_MUTEX(un));
6270 
6271 	/*
6272 	 * We'll set the page length to the maximum to save figuring it out
6273 	 * with an additional call.
6274 	 */
6275 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
6276 
6277 	rval = sd_send_scsi_INQUIRY(un, page_list, page_length, evpd,
6278 	    page_code, NULL);
6279 
6280 	mutex_enter(SD_MUTEX(un));
6281 
6282 	/*
6283 	 * Now we must validate that the device accepted the command, as some
6284 	 * drives do not support it.  If the drive does support it, we will
6285 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
6286 	 * not, we return -1.
6287 	 */
6288 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
6289 		/* Loop to find one of the 2 pages we need */
6290 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
6291 
6292 		/*
6293 		 * Pages are returned in ascending order, and 0x83 is what we
6294 		 * are hoping for.
6295 		 */
6296 		while ((page_list[counter] <= 0x83) &&
6297 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
6298 		    VPD_HEAD_OFFSET))) {
6299 			/*
6300 			 * Add 3 because page_list[3] is the number of
6301 			 * pages minus 3
6302 			 */
6303 
6304 			switch (page_list[counter]) {
6305 			case 0x00:
6306 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
6307 				break;
6308 			case 0x80:
6309 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
6310 				break;
6311 			case 0x81:
6312 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
6313 				break;
6314 			case 0x82:
6315 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
6316 				break;
6317 			case 0x83:
6318 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
6319 				break;
6320 			}
6321 			counter++;
6322 		}
6323 
6324 	} else {
6325 		rval = -1;
6326 
6327 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6328 		    "sd_check_vpd_page_support: This drive does not implement "
6329 		    "VPD pages.\n");
6330 	}
6331 
6332 	kmem_free(page_list, page_length);
6333 
6334 	return (rval);
6335 }
6336 
6337 
6338 /*
6339  *    Function: sd_setup_pm
6340  *
6341  * Description: Initialize Power Management on the device
6342  *
6343  *     Context: Kernel Thread
6344  */
6345 
6346 static void
6347 sd_setup_pm(struct sd_lun *un, dev_info_t *devi)
6348 {
6349 	uint_t	log_page_size;
6350 	uchar_t	*log_page_data;
6351 	int	rval;
6352 
6353 	/*
6354 	 * Since we are called from attach, holding a mutex for
6355 	 * un is unnecessary. Because some of the routines called
6356 	 * from here require SD_MUTEX to not be held, assert this
6357 	 * right up front.
6358 	 */
6359 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6360 	/*
6361 	 * Since the sd device does not have the 'reg' property,
6362 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
6363 	 * The following code is to tell cpr that this device
6364 	 * DOES need to be suspended and resumed.
6365 	 */
6366 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
6367 	    "pm-hardware-state", "needs-suspend-resume");
6368 
6369 	/*
6370 	 * Check if HBA has set the "pm-capable" property.
6371 	 * If "pm-capable" exists and is non-zero then we can
6372 	 * power manage the device without checking the start/stop
6373 	 * cycle count log sense page.
6374 	 *
6375 	 * If "pm-capable" exists and is SD_PM_CAPABLE_FALSE (0)
6376 	 * then we should not power manage the device.
6377 	 *
6378 	 * If "pm-capable" doesn't exist then un->un_pm_capable_prop will
6379 	 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case, sd will
6380 	 * check the start/stop cycle count log sense page and power manage
6381 	 * the device if the cycle count limit has not been exceeded.
6382 	 */
6383 	un->un_pm_capable_prop =
6384 	    ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
6385 		"pm-capable", SD_PM_CAPABLE_UNDEFINED);
6386 	if (un->un_pm_capable_prop != SD_PM_CAPABLE_UNDEFINED) {
6387 		/*
6388 		 * pm-capable property exists.
6389 		 *
6390 		 * Convert "TRUE" values for un_pm_capable_prop to
6391 		 * SD_PM_CAPABLE_TRUE (1) to make it easier to check later.
6392 		 * "TRUE" values are any values except SD_PM_CAPABLE_FALSE (0)
6393 		 *  and SD_PM_CAPABLE_UNDEFINED (-1)
6394 		 */
6395 		if (un->un_pm_capable_prop != SD_PM_CAPABLE_FALSE) {
6396 			un->un_pm_capable_prop = SD_PM_CAPABLE_TRUE;
6397 		}
6398 
6399 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6400 		    "sd_unit_attach: un:0x%p pm-capable "
6401 		    "property set to %d.\n", un, un->un_pm_capable_prop);
6402 	}
6403 
6404 	/*
6405 	 * This complies with the new power management framework
6406 	 * for certain desktop machines. Create the pm_components
6407 	 * property as a string array property.
6408 	 *
6409 	 * If this is a removable device or if the pm-capable property
6410 	 * is SD_PM_CAPABLE_TRUE (1) then we should create the
6411 	 * pm_components property without checking for the existance of
6412 	 * the start-stop cycle counter log page
6413 	 */
6414 	if (ISREMOVABLE(un) ||
6415 	    un->un_pm_capable_prop == SD_PM_CAPABLE_TRUE) {
6416 		/*
6417 		 * not all devices have a motor, try it first.
6418 		 * some devices may return ILLEGAL REQUEST, some
6419 		 * will hang
6420 		 */
6421 		un->un_f_start_stop_supported = TRUE;
6422 		if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
6423 		    SD_PATH_DIRECT) != 0) {
6424 			un->un_f_start_stop_supported = FALSE;
6425 		}
6426 
6427 		/*
6428 		 * create pm properties anyways otherwise the parent can't
6429 		 * go to sleep
6430 		 */
6431 		(void) sd_create_pm_components(devi, un);
6432 		un->un_f_pm_is_enabled = TRUE;
6433 
6434 		/*
6435 		 * Need to create a zero length (Boolean) property
6436 		 * removable-media for the removable media devices.
6437 		 * Note that the return value of the property is not being
6438 		 * checked, since if unable to create the property
6439 		 * then do not want the attach to fail altogether. Consistent
6440 		 * with other property creation in attach.
6441 		 */
6442 		if (ISREMOVABLE(un)) {
6443 			(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
6444 			    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
6445 		}
6446 		return;
6447 	}
6448 
6449 	rval = sd_log_page_supported(un, START_STOP_CYCLE_PAGE);
6450 
6451 #ifdef	SDDEBUG
6452 	if (sd_force_pm_supported) {
6453 		/* Force a successful result */
6454 		rval = 1;
6455 	}
6456 #endif
6457 
6458 	/*
6459 	 * If the start-stop cycle counter log page is not supported
6460 	 * or if the pm-capable property is SD_PM_CAPABLE_FALSE (0)
6461 	 * then we should not create the pm_components property.
6462 	 */
6463 	if (rval == -1 || un->un_pm_capable_prop == SD_PM_CAPABLE_FALSE) {
6464 		/*
6465 		 * Error.
6466 		 * Reading log sense failed, most likely this is
6467 		 * an older drive that does not support log sense.
6468 		 * If this fails auto-pm is not supported.
6469 		 */
6470 		un->un_power_level = SD_SPINDLE_ON;
6471 		un->un_f_pm_is_enabled = FALSE;
6472 
6473 	} else if (rval == 0) {
6474 		/*
6475 		 * Page not found.
6476 		 * The start stop cycle counter is implemented as page
6477 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
6478 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
6479 		 */
6480 		if (sd_log_page_supported(un, START_STOP_CYCLE_VU_PAGE) == 1) {
6481 			/*
6482 			 * Page found, use this one.
6483 			 */
6484 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
6485 			un->un_f_pm_is_enabled = TRUE;
6486 		} else {
6487 			/*
6488 			 * Error or page not found.
6489 			 * auto-pm is not supported for this device.
6490 			 */
6491 			un->un_power_level = SD_SPINDLE_ON;
6492 			un->un_f_pm_is_enabled = FALSE;
6493 		}
6494 	} else {
6495 		/*
6496 		 * Page found, use it.
6497 		 */
6498 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
6499 		un->un_f_pm_is_enabled = TRUE;
6500 	}
6501 
6502 
6503 	if (un->un_f_pm_is_enabled == TRUE) {
6504 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6505 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6506 
6507 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
6508 		    log_page_size, un->un_start_stop_cycle_page,
6509 		    0x01, 0, SD_PATH_DIRECT);
6510 #ifdef	SDDEBUG
6511 		if (sd_force_pm_supported) {
6512 			/* Force a successful result */
6513 			rval = 0;
6514 		}
6515 #endif
6516 
6517 		/*
6518 		 * If the Log sense for Page( Start/stop cycle counter page)
6519 		 * succeeds, then power managment is supported and we can
6520 		 * enable auto-pm.
6521 		 */
6522 		if (rval == 0)  {
6523 			(void) sd_create_pm_components(devi, un);
6524 		} else {
6525 			un->un_power_level = SD_SPINDLE_ON;
6526 			un->un_f_pm_is_enabled = FALSE;
6527 		}
6528 
6529 		kmem_free(log_page_data, log_page_size);
6530 	}
6531 }
6532 
6533 
6534 /*
6535  *    Function: sd_create_pm_components
6536  *
6537  * Description: Initialize PM property.
6538  *
6539  *     Context: Kernel thread context
6540  */
6541 
6542 static void
6543 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
6544 {
6545 	char *pm_comp[] = { "NAME=spindle-motor", "0=off", "1=on", NULL };
6546 
6547 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6548 
6549 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
6550 	    "pm-components", pm_comp, 3) == DDI_PROP_SUCCESS) {
6551 		/*
6552 		 * When components are initially created they are idle,
6553 		 * power up any non-removables.
6554 		 * Note: the return value of pm_raise_power can't be used
6555 		 * for determining if PM should be enabled for this device.
6556 		 * Even if you check the return values and remove this
6557 		 * property created above, the PM framework will not honor the
6558 		 * change after the first call to pm_raise_power. Hence,
6559 		 * removal of that property does not help if pm_raise_power
6560 		 * fails. In the case of removable media, the start/stop
6561 		 * will fail if the media is not present.
6562 		 */
6563 		if ((!ISREMOVABLE(un)) && (pm_raise_power(SD_DEVINFO(un), 0,
6564 		    SD_SPINDLE_ON) == DDI_SUCCESS)) {
6565 			mutex_enter(SD_MUTEX(un));
6566 			un->un_power_level = SD_SPINDLE_ON;
6567 			mutex_enter(&un->un_pm_mutex);
6568 			/* Set to on and not busy. */
6569 			un->un_pm_count = 0;
6570 		} else {
6571 			mutex_enter(SD_MUTEX(un));
6572 			un->un_power_level = SD_SPINDLE_OFF;
6573 			mutex_enter(&un->un_pm_mutex);
6574 			/* Set to off. */
6575 			un->un_pm_count = -1;
6576 		}
6577 		mutex_exit(&un->un_pm_mutex);
6578 		mutex_exit(SD_MUTEX(un));
6579 	} else {
6580 		un->un_power_level = SD_SPINDLE_ON;
6581 		un->un_f_pm_is_enabled = FALSE;
6582 	}
6583 }
6584 
6585 
6586 /*
6587  *    Function: sd_ddi_suspend
6588  *
6589  * Description: Performs system power-down operations. This includes
6590  *		setting the drive state to indicate its suspended so
6591  *		that no new commands will be accepted. Also, wait for
6592  *		all commands that are in transport or queued to a timer
6593  *		for retry to complete. All timeout threads are cancelled.
6594  *
6595  * Return Code: DDI_FAILURE or DDI_SUCCESS
6596  *
6597  *     Context: Kernel thread context
6598  */
6599 
6600 static int
6601 sd_ddi_suspend(dev_info_t *devi)
6602 {
6603 	struct	sd_lun	*un;
6604 	clock_t		wait_cmds_complete;
6605 
6606 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6607 	if (un == NULL) {
6608 		return (DDI_FAILURE);
6609 	}
6610 
6611 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
6612 
6613 	mutex_enter(SD_MUTEX(un));
6614 
6615 	/* Return success if the device is already suspended. */
6616 	if (un->un_state == SD_STATE_SUSPENDED) {
6617 		mutex_exit(SD_MUTEX(un));
6618 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6619 		    "device already suspended, exiting\n");
6620 		return (DDI_SUCCESS);
6621 	}
6622 
6623 	/* Return failure if the device is being used by HA */
6624 	if (un->un_resvd_status &
6625 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
6626 		mutex_exit(SD_MUTEX(un));
6627 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6628 		    "device in use by HA, exiting\n");
6629 		return (DDI_FAILURE);
6630 	}
6631 
6632 	/*
6633 	 * Return failure if the device is in a resource wait
6634 	 * or power changing state.
6635 	 */
6636 	if ((un->un_state == SD_STATE_RWAIT) ||
6637 	    (un->un_state == SD_STATE_PM_CHANGING)) {
6638 		mutex_exit(SD_MUTEX(un));
6639 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6640 		    "device in resource wait state, exiting\n");
6641 		return (DDI_FAILURE);
6642 	}
6643 
6644 
6645 	un->un_save_state = un->un_last_state;
6646 	New_state(un, SD_STATE_SUSPENDED);
6647 
6648 	/*
6649 	 * Wait for all commands that are in transport or queued to a timer
6650 	 * for retry to complete.
6651 	 *
6652 	 * While waiting, no new commands will be accepted or sent because of
6653 	 * the new state we set above.
6654 	 *
6655 	 * Wait till current operation has completed. If we are in the resource
6656 	 * wait state (with an intr outstanding) then we need to wait till the
6657 	 * intr completes and starts the next cmd. We want to wait for
6658 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
6659 	 */
6660 	wait_cmds_complete = ddi_get_lbolt() +
6661 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
6662 
6663 	while (un->un_ncmds_in_transport != 0) {
6664 		/*
6665 		 * Fail if commands do not finish in the specified time.
6666 		 */
6667 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
6668 		    wait_cmds_complete) == -1) {
6669 			/*
6670 			 * Undo the state changes made above. Everything
6671 			 * must go back to it's original value.
6672 			 */
6673 			Restore_state(un);
6674 			un->un_last_state = un->un_save_state;
6675 			/* Wake up any threads that might be waiting. */
6676 			cv_broadcast(&un->un_suspend_cv);
6677 			mutex_exit(SD_MUTEX(un));
6678 			SD_ERROR(SD_LOG_IO_PM, un,
6679 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
6680 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
6681 			return (DDI_FAILURE);
6682 		}
6683 	}
6684 
6685 	/*
6686 	 * Cancel SCSI watch thread and timeouts, if any are active
6687 	 */
6688 
6689 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
6690 		opaque_t temp_token = un->un_swr_token;
6691 		mutex_exit(SD_MUTEX(un));
6692 		scsi_watch_suspend(temp_token);
6693 		mutex_enter(SD_MUTEX(un));
6694 	}
6695 
6696 	if (un->un_reset_throttle_timeid != NULL) {
6697 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
6698 		un->un_reset_throttle_timeid = NULL;
6699 		mutex_exit(SD_MUTEX(un));
6700 		(void) untimeout(temp_id);
6701 		mutex_enter(SD_MUTEX(un));
6702 	}
6703 
6704 	if (un->un_dcvb_timeid != NULL) {
6705 		timeout_id_t temp_id = un->un_dcvb_timeid;
6706 		un->un_dcvb_timeid = NULL;
6707 		mutex_exit(SD_MUTEX(un));
6708 		(void) untimeout(temp_id);
6709 		mutex_enter(SD_MUTEX(un));
6710 	}
6711 
6712 	mutex_enter(&un->un_pm_mutex);
6713 	if (un->un_pm_timeid != NULL) {
6714 		timeout_id_t temp_id = un->un_pm_timeid;
6715 		un->un_pm_timeid = NULL;
6716 		mutex_exit(&un->un_pm_mutex);
6717 		mutex_exit(SD_MUTEX(un));
6718 		(void) untimeout(temp_id);
6719 		mutex_enter(SD_MUTEX(un));
6720 	} else {
6721 		mutex_exit(&un->un_pm_mutex);
6722 	}
6723 
6724 	if (un->un_retry_timeid != NULL) {
6725 		timeout_id_t temp_id = un->un_retry_timeid;
6726 		un->un_retry_timeid = NULL;
6727 		mutex_exit(SD_MUTEX(un));
6728 		(void) untimeout(temp_id);
6729 		mutex_enter(SD_MUTEX(un));
6730 	}
6731 
6732 	if (un->un_direct_priority_timeid != NULL) {
6733 		timeout_id_t temp_id = un->un_direct_priority_timeid;
6734 		un->un_direct_priority_timeid = NULL;
6735 		mutex_exit(SD_MUTEX(un));
6736 		(void) untimeout(temp_id);
6737 		mutex_enter(SD_MUTEX(un));
6738 	}
6739 
6740 	if (un->un_f_is_fibre == TRUE) {
6741 		/*
6742 		 * Remove callbacks for insert and remove events
6743 		 */
6744 		if (un->un_insert_event != NULL) {
6745 			mutex_exit(SD_MUTEX(un));
6746 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
6747 			mutex_enter(SD_MUTEX(un));
6748 			un->un_insert_event = NULL;
6749 		}
6750 
6751 		if (un->un_remove_event != NULL) {
6752 			mutex_exit(SD_MUTEX(un));
6753 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
6754 			mutex_enter(SD_MUTEX(un));
6755 			un->un_remove_event = NULL;
6756 		}
6757 	}
6758 
6759 	mutex_exit(SD_MUTEX(un));
6760 
6761 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
6762 
6763 	return (DDI_SUCCESS);
6764 }
6765 
6766 
6767 /*
6768  *    Function: sd_ddi_pm_suspend
6769  *
6770  * Description: Set the drive state to low power.
6771  *		Someone else is required to actually change the drive
6772  *		power level.
6773  *
6774  *   Arguments: un - driver soft state (unit) structure
6775  *
6776  * Return Code: DDI_FAILURE or DDI_SUCCESS
6777  *
6778  *     Context: Kernel thread context
6779  */
6780 
6781 static int
6782 sd_ddi_pm_suspend(struct sd_lun *un)
6783 {
6784 	ASSERT(un != NULL);
6785 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: entry\n");
6786 
6787 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6788 	mutex_enter(SD_MUTEX(un));
6789 
6790 	/*
6791 	 * Exit if power management is not enabled for this device, or if
6792 	 * the device is being used by HA.
6793 	 */
6794 	if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
6795 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
6796 		mutex_exit(SD_MUTEX(un));
6797 		SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exiting\n");
6798 		return (DDI_SUCCESS);
6799 	}
6800 
6801 	SD_INFO(SD_LOG_POWER, un, "sd_ddi_pm_suspend: un_ncmds_in_driver=%ld\n",
6802 	    un->un_ncmds_in_driver);
6803 
6804 	/*
6805 	 * See if the device is not busy, ie.:
6806 	 *    - we have no commands in the driver for this device
6807 	 *    - not waiting for resources
6808 	 */
6809 	if ((un->un_ncmds_in_driver == 0) &&
6810 	    (un->un_state != SD_STATE_RWAIT)) {
6811 		/*
6812 		 * The device is not busy, so it is OK to go to low power state.
6813 		 * Indicate low power, but rely on someone else to actually
6814 		 * change it.
6815 		 */
6816 		mutex_enter(&un->un_pm_mutex);
6817 		un->un_pm_count = -1;
6818 		mutex_exit(&un->un_pm_mutex);
6819 		un->un_power_level = SD_SPINDLE_OFF;
6820 	}
6821 
6822 	mutex_exit(SD_MUTEX(un));
6823 
6824 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exit\n");
6825 
6826 	return (DDI_SUCCESS);
6827 }
6828 
6829 
6830 /*
6831  *    Function: sd_ddi_resume
6832  *
6833  * Description: Performs system power-up operations..
6834  *
6835  * Return Code: DDI_SUCCESS
6836  *		DDI_FAILURE
6837  *
6838  *     Context: Kernel thread context
6839  */
6840 
6841 static int
6842 sd_ddi_resume(dev_info_t *devi)
6843 {
6844 	struct	sd_lun	*un;
6845 
6846 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6847 	if (un == NULL) {
6848 		return (DDI_FAILURE);
6849 	}
6850 
6851 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
6852 
6853 	mutex_enter(SD_MUTEX(un));
6854 	Restore_state(un);
6855 
6856 	/*
6857 	 * Restore the state which was saved to give the
6858 	 * the right state in un_last_state
6859 	 */
6860 	un->un_last_state = un->un_save_state;
6861 	/*
6862 	 * Note: throttle comes back at full.
6863 	 * Also note: this MUST be done before calling pm_raise_power
6864 	 * otherwise the system can get hung in biowait. The scenario where
6865 	 * this'll happen is under cpr suspend. Writing of the system
6866 	 * state goes through sddump, which writes 0 to un_throttle. If
6867 	 * writing the system state then fails, example if the partition is
6868 	 * too small, then cpr attempts a resume. If throttle isn't restored
6869 	 * from the saved value until after calling pm_raise_power then
6870 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
6871 	 * in biowait.
6872 	 */
6873 	un->un_throttle = un->un_saved_throttle;
6874 
6875 	/*
6876 	 * The chance of failure is very rare as the only command done in power
6877 	 * entry point is START command when you transition from 0->1 or
6878 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
6879 	 * which suspend was done. Ignore the return value as the resume should
6880 	 * not be failed. In the case of removable media the media need not be
6881 	 * inserted and hence there is a chance that raise power will fail with
6882 	 * media not present.
6883 	 */
6884 	if (!ISREMOVABLE(un)) {
6885 		mutex_exit(SD_MUTEX(un));
6886 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
6887 		mutex_enter(SD_MUTEX(un));
6888 	}
6889 
6890 	/*
6891 	 * Don't broadcast to the suspend cv and therefore possibly
6892 	 * start I/O until after power has been restored.
6893 	 */
6894 	cv_broadcast(&un->un_suspend_cv);
6895 	cv_broadcast(&un->un_state_cv);
6896 
6897 	/* restart thread */
6898 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
6899 		scsi_watch_resume(un->un_swr_token);
6900 	}
6901 
6902 #if (defined(__fibre))
6903 	if (un->un_f_is_fibre == TRUE) {
6904 		/*
6905 		 * Add callbacks for insert and remove events
6906 		 */
6907 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
6908 			sd_init_event_callbacks(un);
6909 		}
6910 	}
6911 #endif
6912 
6913 	/*
6914 	 * Transport any pending commands to the target.
6915 	 *
6916 	 * If this is a low-activity device commands in queue will have to wait
6917 	 * until new commands come in, which may take awhile. Also, we
6918 	 * specifically don't check un_ncmds_in_transport because we know that
6919 	 * there really are no commands in progress after the unit was
6920 	 * suspended and we could have reached the throttle level, been
6921 	 * suspended, and have no new commands coming in for awhile. Highly
6922 	 * unlikely, but so is the low-activity disk scenario.
6923 	 */
6924 	ddi_xbuf_dispatch(un->un_xbuf_attr);
6925 
6926 	sd_start_cmds(un, NULL);
6927 	mutex_exit(SD_MUTEX(un));
6928 
6929 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
6930 
6931 	return (DDI_SUCCESS);
6932 }
6933 
6934 
6935 /*
6936  *    Function: sd_ddi_pm_resume
6937  *
6938  * Description: Set the drive state to powered on.
6939  *		Someone else is required to actually change the drive
6940  *		power level.
6941  *
6942  *   Arguments: un - driver soft state (unit) structure
6943  *
6944  * Return Code: DDI_SUCCESS
6945  *
6946  *     Context: Kernel thread context
6947  */
6948 
6949 static int
6950 sd_ddi_pm_resume(struct sd_lun *un)
6951 {
6952 	ASSERT(un != NULL);
6953 
6954 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6955 	mutex_enter(SD_MUTEX(un));
6956 	un->un_power_level = SD_SPINDLE_ON;
6957 
6958 	ASSERT(!mutex_owned(&un->un_pm_mutex));
6959 	mutex_enter(&un->un_pm_mutex);
6960 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
6961 		un->un_pm_count++;
6962 		ASSERT(un->un_pm_count == 0);
6963 		/*
6964 		 * Note: no longer do the cv_broadcast on un_suspend_cv. The
6965 		 * un_suspend_cv is for a system resume, not a power management
6966 		 * device resume. (4297749)
6967 		 *	 cv_broadcast(&un->un_suspend_cv);
6968 		 */
6969 	}
6970 	mutex_exit(&un->un_pm_mutex);
6971 	mutex_exit(SD_MUTEX(un));
6972 
6973 	return (DDI_SUCCESS);
6974 }
6975 
6976 
6977 /*
6978  *    Function: sd_pm_idletimeout_handler
6979  *
6980  * Description: A timer routine that's active only while a device is busy.
6981  *		The purpose is to extend slightly the pm framework's busy
6982  *		view of the device to prevent busy/idle thrashing for
6983  *		back-to-back commands. Do this by comparing the current time
6984  *		to the time at which the last command completed and when the
6985  *		difference is greater than sd_pm_idletime, call
6986  *		pm_idle_component. In addition to indicating idle to the pm
6987  *		framework, update the chain type to again use the internal pm
6988  *		layers of the driver.
6989  *
6990  *   Arguments: arg - driver soft state (unit) structure
6991  *
6992  *     Context: Executes in a timeout(9F) thread context
6993  */
6994 
6995 static void
6996 sd_pm_idletimeout_handler(void *arg)
6997 {
6998 	struct sd_lun *un = arg;
6999 
7000 	time_t	now;
7001 
7002 	mutex_enter(&sd_detach_mutex);
7003 	if (un->un_detach_count != 0) {
7004 		/* Abort if the instance is detaching */
7005 		mutex_exit(&sd_detach_mutex);
7006 		return;
7007 	}
7008 	mutex_exit(&sd_detach_mutex);
7009 
7010 	now = ddi_get_time();
7011 	/*
7012 	 * Grab both mutexes, in the proper order, since we're accessing
7013 	 * both PM and softstate variables.
7014 	 */
7015 	mutex_enter(SD_MUTEX(un));
7016 	mutex_enter(&un->un_pm_mutex);
7017 	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
7018 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
7019 		/*
7020 		 * Update the chain types.
7021 		 * This takes affect on the next new command received.
7022 		 */
7023 		if (ISREMOVABLE(un)) {
7024 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
7025 		} else {
7026 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
7027 		}
7028 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
7029 
7030 		SD_TRACE(SD_LOG_IO_PM, un,
7031 		    "sd_pm_idletimeout_handler: idling device\n");
7032 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7033 		un->un_pm_idle_timeid = NULL;
7034 	} else {
7035 		un->un_pm_idle_timeid =
7036 			timeout(sd_pm_idletimeout_handler, un,
7037 			(drv_usectohz((clock_t)300000))); /* 300 ms. */
7038 	}
7039 	mutex_exit(&un->un_pm_mutex);
7040 	mutex_exit(SD_MUTEX(un));
7041 }
7042 
7043 
7044 /*
7045  *    Function: sd_pm_timeout_handler
7046  *
7047  * Description: Callback to tell framework we are idle.
7048  *
7049  *     Context: timeout(9f) thread context.
7050  */
7051 
7052 static void
7053 sd_pm_timeout_handler(void *arg)
7054 {
7055 	struct sd_lun *un = arg;
7056 
7057 	(void) pm_idle_component(SD_DEVINFO(un), 0);
7058 	mutex_enter(&un->un_pm_mutex);
7059 	un->un_pm_timeid = NULL;
7060 	mutex_exit(&un->un_pm_mutex);
7061 }
7062 
7063 
7064 /*
7065  *    Function: sdpower
7066  *
7067  * Description: PM entry point.
7068  *
7069  * Return Code: DDI_SUCCESS
7070  *		DDI_FAILURE
7071  *
7072  *     Context: Kernel thread context
7073  */
7074 
7075 static int
7076 sdpower(dev_info_t *devi, int component, int level)
7077 {
7078 	struct sd_lun	*un;
7079 	int		instance;
7080 	int		rval = DDI_SUCCESS;
7081 	uint_t		i, log_page_size, maxcycles, ncycles;
7082 	uchar_t		*log_page_data;
7083 	int		log_sense_page;
7084 	int		medium_present;
7085 	time_t		intvlp;
7086 	dev_t		dev;
7087 	struct pm_trans_data	sd_pm_tran_data;
7088 	uchar_t		save_state;
7089 	int		sval;
7090 	uchar_t		state_before_pm;
7091 	int		got_semaphore_here;
7092 
7093 	instance = ddi_get_instance(devi);
7094 
7095 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
7096 	    (SD_SPINDLE_OFF > level) || (level > SD_SPINDLE_ON) ||
7097 	    component != 0) {
7098 		return (DDI_FAILURE);
7099 	}
7100 
7101 	dev = sd_make_device(SD_DEVINFO(un));
7102 
7103 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
7104 
7105 	/*
7106 	 * Must synchronize power down with close.
7107 	 * Attempt to decrement/acquire the open/close semaphore,
7108 	 * but do NOT wait on it. If it's not greater than zero,
7109 	 * ie. it can't be decremented without waiting, then
7110 	 * someone else, either open or close, already has it
7111 	 * and the try returns 0. Use that knowledge here to determine
7112 	 * if it's OK to change the device power level.
7113 	 * Also, only increment it on exit if it was decremented, ie. gotten,
7114 	 * here.
7115 	 */
7116 	got_semaphore_here = sema_tryp(&un->un_semoclose);
7117 
7118 	mutex_enter(SD_MUTEX(un));
7119 
7120 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
7121 	    un->un_ncmds_in_driver);
7122 
7123 	/*
7124 	 * If un_ncmds_in_driver is non-zero it indicates commands are
7125 	 * already being processed in the driver, or if the semaphore was
7126 	 * not gotten here it indicates an open or close is being processed.
7127 	 * At the same time somebody is requesting to go low power which
7128 	 * can't happen, therefore we need to return failure.
7129 	 */
7130 	if ((level == SD_SPINDLE_OFF) &&
7131 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
7132 		mutex_exit(SD_MUTEX(un));
7133 
7134 		if (got_semaphore_here != 0) {
7135 			sema_v(&un->un_semoclose);
7136 		}
7137 		SD_TRACE(SD_LOG_IO_PM, un,
7138 		    "sdpower: exit, device has queued cmds.\n");
7139 		return (DDI_FAILURE);
7140 	}
7141 
7142 	/*
7143 	 * if it is OFFLINE that means the disk is completely dead
7144 	 * in our case we have to put the disk in on or off by sending commands
7145 	 * Of course that will fail anyway so return back here.
7146 	 *
7147 	 * Power changes to a device that's OFFLINE or SUSPENDED
7148 	 * are not allowed.
7149 	 */
7150 	if ((un->un_state == SD_STATE_OFFLINE) ||
7151 	    (un->un_state == SD_STATE_SUSPENDED)) {
7152 		mutex_exit(SD_MUTEX(un));
7153 
7154 		if (got_semaphore_here != 0) {
7155 			sema_v(&un->un_semoclose);
7156 		}
7157 		SD_TRACE(SD_LOG_IO_PM, un,
7158 		    "sdpower: exit, device is off-line.\n");
7159 		return (DDI_FAILURE);
7160 	}
7161 
7162 	/*
7163 	 * Change the device's state to indicate it's power level
7164 	 * is being changed. Do this to prevent a power off in the
7165 	 * middle of commands, which is especially bad on devices
7166 	 * that are really powered off instead of just spun down.
7167 	 */
7168 	state_before_pm = un->un_state;
7169 	un->un_state = SD_STATE_PM_CHANGING;
7170 
7171 	mutex_exit(SD_MUTEX(un));
7172 
7173 	/*
7174 	 * Bypass checking the log sense information for removables
7175 	 * and devices for which the HBA set the pm-capable property.
7176 	 * If un->un_pm_capable_prop is SD_PM_CAPABLE_UNDEFINED (-1)
7177 	 * then the HBA did not create the property.
7178 	 */
7179 	if ((level == SD_SPINDLE_OFF) && (!ISREMOVABLE(un)) &&
7180 	    un->un_pm_capable_prop == SD_PM_CAPABLE_UNDEFINED) {
7181 		/*
7182 		 * Get the log sense information to understand whether the
7183 		 * the powercycle counts have gone beyond the threshhold.
7184 		 */
7185 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
7186 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
7187 
7188 		mutex_enter(SD_MUTEX(un));
7189 		log_sense_page = un->un_start_stop_cycle_page;
7190 		mutex_exit(SD_MUTEX(un));
7191 
7192 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
7193 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
7194 #ifdef	SDDEBUG
7195 		if (sd_force_pm_supported) {
7196 			/* Force a successful result */
7197 			rval = 0;
7198 		}
7199 #endif
7200 		if (rval != 0) {
7201 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
7202 			    "Log Sense Failed\n");
7203 			kmem_free(log_page_data, log_page_size);
7204 			/* Cannot support power management on those drives */
7205 
7206 			if (got_semaphore_here != 0) {
7207 				sema_v(&un->un_semoclose);
7208 			}
7209 			/*
7210 			 * On exit put the state back to it's original value
7211 			 * and broadcast to anyone waiting for the power
7212 			 * change completion.
7213 			 */
7214 			mutex_enter(SD_MUTEX(un));
7215 			un->un_state = state_before_pm;
7216 			cv_broadcast(&un->un_suspend_cv);
7217 			mutex_exit(SD_MUTEX(un));
7218 			SD_TRACE(SD_LOG_IO_PM, un,
7219 			    "sdpower: exit, Log Sense Failed.\n");
7220 			return (DDI_FAILURE);
7221 		}
7222 
7223 		/*
7224 		 * From the page data - Convert the essential information to
7225 		 * pm_trans_data
7226 		 */
7227 		maxcycles =
7228 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
7229 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
7230 
7231 		sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
7232 
7233 		ncycles =
7234 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
7235 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
7236 
7237 		sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
7238 
7239 		for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
7240 			sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
7241 			    log_page_data[8+i];
7242 		}
7243 
7244 		kmem_free(log_page_data, log_page_size);
7245 
7246 		/*
7247 		 * Call pm_trans_check routine to get the Ok from
7248 		 * the global policy
7249 		 */
7250 
7251 		sd_pm_tran_data.format = DC_SCSI_FORMAT;
7252 		sd_pm_tran_data.un.scsi_cycles.flag = 0;
7253 
7254 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
7255 #ifdef	SDDEBUG
7256 		if (sd_force_pm_supported) {
7257 			/* Force a successful result */
7258 			rval = 1;
7259 		}
7260 #endif
7261 		switch (rval) {
7262 		case 0:
7263 			/*
7264 			 * Not Ok to Power cycle or error in parameters passed
7265 			 * Would have given the advised time to consider power
7266 			 * cycle. Based on the new intvlp parameter we are
7267 			 * supposed to pretend we are busy so that pm framework
7268 			 * will never call our power entry point. Because of
7269 			 * that install a timeout handler and wait for the
7270 			 * recommended time to elapse so that power management
7271 			 * can be effective again.
7272 			 *
7273 			 * To effect this behavior, call pm_busy_component to
7274 			 * indicate to the framework this device is busy.
7275 			 * By not adjusting un_pm_count the rest of PM in
7276 			 * the driver will function normally, and independant
7277 			 * of this but because the framework is told the device
7278 			 * is busy it won't attempt powering down until it gets
7279 			 * a matching idle. The timeout handler sends this.
7280 			 * Note: sd_pm_entry can't be called here to do this
7281 			 * because sdpower may have been called as a result
7282 			 * of a call to pm_raise_power from within sd_pm_entry.
7283 			 *
7284 			 * If a timeout handler is already active then
7285 			 * don't install another.
7286 			 */
7287 			mutex_enter(&un->un_pm_mutex);
7288 			if (un->un_pm_timeid == NULL) {
7289 				un->un_pm_timeid =
7290 				    timeout(sd_pm_timeout_handler,
7291 				    un, intvlp * drv_usectohz(1000000));
7292 				mutex_exit(&un->un_pm_mutex);
7293 				(void) pm_busy_component(SD_DEVINFO(un), 0);
7294 			} else {
7295 				mutex_exit(&un->un_pm_mutex);
7296 			}
7297 			if (got_semaphore_here != 0) {
7298 				sema_v(&un->un_semoclose);
7299 			}
7300 			/*
7301 			 * On exit put the state back to it's original value
7302 			 * and broadcast to anyone waiting for the power
7303 			 * change completion.
7304 			 */
7305 			mutex_enter(SD_MUTEX(un));
7306 			un->un_state = state_before_pm;
7307 			cv_broadcast(&un->un_suspend_cv);
7308 			mutex_exit(SD_MUTEX(un));
7309 
7310 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
7311 			    "trans check Failed, not ok to power cycle.\n");
7312 			return (DDI_FAILURE);
7313 
7314 		case -1:
7315 			if (got_semaphore_here != 0) {
7316 				sema_v(&un->un_semoclose);
7317 			}
7318 			/*
7319 			 * On exit put the state back to it's original value
7320 			 * and broadcast to anyone waiting for the power
7321 			 * change completion.
7322 			 */
7323 			mutex_enter(SD_MUTEX(un));
7324 			un->un_state = state_before_pm;
7325 			cv_broadcast(&un->un_suspend_cv);
7326 			mutex_exit(SD_MUTEX(un));
7327 			SD_TRACE(SD_LOG_IO_PM, un,
7328 			    "sdpower: exit, trans check command Failed.\n");
7329 			return (DDI_FAILURE);
7330 		}
7331 	}
7332 
7333 	if (level == SD_SPINDLE_OFF) {
7334 		/*
7335 		 * Save the last state... if the STOP FAILS we need it
7336 		 * for restoring
7337 		 */
7338 		mutex_enter(SD_MUTEX(un));
7339 		save_state = un->un_last_state;
7340 		/*
7341 		 * There must not be any cmds. getting processed
7342 		 * in the driver when we get here. Power to the
7343 		 * device is potentially going off.
7344 		 */
7345 		ASSERT(un->un_ncmds_in_driver == 0);
7346 		mutex_exit(SD_MUTEX(un));
7347 
7348 		/*
7349 		 * For now suspend the device completely before spindle is
7350 		 * turned off
7351 		 */
7352 		if ((rval = sd_ddi_pm_suspend(un)) == DDI_FAILURE) {
7353 			if (got_semaphore_here != 0) {
7354 				sema_v(&un->un_semoclose);
7355 			}
7356 			/*
7357 			 * On exit put the state back to it's original value
7358 			 * and broadcast to anyone waiting for the power
7359 			 * change completion.
7360 			 */
7361 			mutex_enter(SD_MUTEX(un));
7362 			un->un_state = state_before_pm;
7363 			cv_broadcast(&un->un_suspend_cv);
7364 			mutex_exit(SD_MUTEX(un));
7365 			SD_TRACE(SD_LOG_IO_PM, un,
7366 			    "sdpower: exit, PM suspend Failed.\n");
7367 			return (DDI_FAILURE);
7368 		}
7369 	}
7370 
7371 	/*
7372 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
7373 	 * close, or strategy. Dump no long uses this routine, it uses it's
7374 	 * own code so it can be done in polled mode.
7375 	 */
7376 
7377 	medium_present = TRUE;
7378 
7379 	/*
7380 	 * When powering up, issue a TUR in case the device is at unit
7381 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
7382 	 * a deadlock on un_pm_busy_cv will occur.
7383 	 */
7384 	if (level == SD_SPINDLE_ON) {
7385 		(void) sd_send_scsi_TEST_UNIT_READY(un,
7386 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
7387 	}
7388 
7389 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
7390 	    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
7391 
7392 	sval = sd_send_scsi_START_STOP_UNIT(un,
7393 	    ((level == SD_SPINDLE_ON) ? SD_TARGET_START : SD_TARGET_STOP),
7394 	    SD_PATH_DIRECT);
7395 	/* Command failed, check for media present. */
7396 	if ((sval == ENXIO) && ISREMOVABLE(un)) {
7397 		medium_present = FALSE;
7398 	}
7399 
7400 	/*
7401 	 * The conditions of interest here are:
7402 	 *   if a spindle off with media present fails,
7403 	 *	then restore the state and return an error.
7404 	 *   else if a spindle on fails,
7405 	 *	then return an error (there's no state to restore).
7406 	 * In all other cases we setup for the new state
7407 	 * and return success.
7408 	 */
7409 	switch (level) {
7410 	case SD_SPINDLE_OFF:
7411 		if ((medium_present == TRUE) && (sval != 0)) {
7412 			/* The stop command from above failed */
7413 			rval = DDI_FAILURE;
7414 			/*
7415 			 * The stop command failed, and we have media
7416 			 * present. Put the level back by calling the
7417 			 * sd_pm_resume() and set the state back to
7418 			 * it's previous value.
7419 			 */
7420 			(void) sd_ddi_pm_resume(un);
7421 			mutex_enter(SD_MUTEX(un));
7422 			un->un_last_state = save_state;
7423 			mutex_exit(SD_MUTEX(un));
7424 			break;
7425 		}
7426 		/*
7427 		 * The stop command from above succeeded.
7428 		 */
7429 		if (ISREMOVABLE(un)) {
7430 			/*
7431 			 * Terminate watch thread in case of removable media
7432 			 * devices going into low power state. This is as per
7433 			 * the requirements of pm framework, otherwise commands
7434 			 * will be generated for the device (through watch
7435 			 * thread), even when the device is in low power state.
7436 			 */
7437 			mutex_enter(SD_MUTEX(un));
7438 			un->un_f_watcht_stopped = FALSE;
7439 			if (un->un_swr_token != NULL) {
7440 				opaque_t temp_token = un->un_swr_token;
7441 				un->un_f_watcht_stopped = TRUE;
7442 				un->un_swr_token = NULL;
7443 				mutex_exit(SD_MUTEX(un));
7444 				(void) scsi_watch_request_terminate(temp_token,
7445 				    SCSI_WATCH_TERMINATE_WAIT);
7446 			} else {
7447 				mutex_exit(SD_MUTEX(un));
7448 			}
7449 		}
7450 		break;
7451 
7452 	default:	/* The level requested is spindle on... */
7453 		/*
7454 		 * Legacy behavior: return success on a failed spinup
7455 		 * if there is no media in the drive.
7456 		 * Do this by looking at medium_present here.
7457 		 */
7458 		if ((sval != 0) && medium_present) {
7459 			/* The start command from above failed */
7460 			rval = DDI_FAILURE;
7461 			break;
7462 		}
7463 		/*
7464 		 * The start command from above succeeded
7465 		 * Resume the devices now that we have
7466 		 * started the disks
7467 		 */
7468 		(void) sd_ddi_pm_resume(un);
7469 
7470 		/*
7471 		 * Resume the watch thread since it was suspended
7472 		 * when the device went into low power mode.
7473 		 */
7474 		if (ISREMOVABLE(un)) {
7475 			mutex_enter(SD_MUTEX(un));
7476 			if (un->un_f_watcht_stopped == TRUE) {
7477 				opaque_t temp_token;
7478 
7479 				un->un_f_watcht_stopped = FALSE;
7480 				mutex_exit(SD_MUTEX(un));
7481 				temp_token = scsi_watch_request_submit(
7482 				    SD_SCSI_DEVP(un),
7483 				    sd_check_media_time,
7484 				    SENSE_LENGTH, sd_media_watch_cb,
7485 				    (caddr_t)dev);
7486 				mutex_enter(SD_MUTEX(un));
7487 				un->un_swr_token = temp_token;
7488 			}
7489 			mutex_exit(SD_MUTEX(un));
7490 		}
7491 	}
7492 	if (got_semaphore_here != 0) {
7493 		sema_v(&un->un_semoclose);
7494 	}
7495 	/*
7496 	 * On exit put the state back to it's original value
7497 	 * and broadcast to anyone waiting for the power
7498 	 * change completion.
7499 	 */
7500 	mutex_enter(SD_MUTEX(un));
7501 	un->un_state = state_before_pm;
7502 	cv_broadcast(&un->un_suspend_cv);
7503 	mutex_exit(SD_MUTEX(un));
7504 
7505 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
7506 
7507 	return (rval);
7508 }
7509 
7510 
7511 
7512 /*
7513  *    Function: sdattach
7514  *
7515  * Description: Driver's attach(9e) entry point function.
7516  *
7517  *   Arguments: devi - opaque device info handle
7518  *		cmd  - attach  type
7519  *
7520  * Return Code: DDI_SUCCESS
7521  *		DDI_FAILURE
7522  *
7523  *     Context: Kernel thread context
7524  */
7525 
7526 static int
7527 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
7528 {
7529 	switch (cmd) {
7530 	case DDI_ATTACH:
7531 		return (sd_unit_attach(devi));
7532 	case DDI_RESUME:
7533 		return (sd_ddi_resume(devi));
7534 	default:
7535 		break;
7536 	}
7537 	return (DDI_FAILURE);
7538 }
7539 
7540 
7541 /*
7542  *    Function: sddetach
7543  *
7544  * Description: Driver's detach(9E) entry point function.
7545  *
7546  *   Arguments: devi - opaque device info handle
7547  *		cmd  - detach  type
7548  *
7549  * Return Code: DDI_SUCCESS
7550  *		DDI_FAILURE
7551  *
7552  *     Context: Kernel thread context
7553  */
7554 
7555 static int
7556 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
7557 {
7558 	switch (cmd) {
7559 	case DDI_DETACH:
7560 		return (sd_unit_detach(devi));
7561 	case DDI_SUSPEND:
7562 		return (sd_ddi_suspend(devi));
7563 	default:
7564 		break;
7565 	}
7566 	return (DDI_FAILURE);
7567 }
7568 
7569 
7570 /*
7571  *     Function: sd_sync_with_callback
7572  *
7573  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
7574  *		 state while the callback routine is active.
7575  *
7576  *    Arguments: un: softstate structure for the instance
7577  *
7578  *	Context: Kernel thread context
7579  */
7580 
7581 static void
7582 sd_sync_with_callback(struct sd_lun *un)
7583 {
7584 	ASSERT(un != NULL);
7585 
7586 	mutex_enter(SD_MUTEX(un));
7587 
7588 	ASSERT(un->un_in_callback >= 0);
7589 
7590 	while (un->un_in_callback > 0) {
7591 		mutex_exit(SD_MUTEX(un));
7592 		delay(2);
7593 		mutex_enter(SD_MUTEX(un));
7594 	}
7595 
7596 	mutex_exit(SD_MUTEX(un));
7597 }
7598 
7599 /*
7600  *    Function: sd_unit_attach
7601  *
7602  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
7603  *		the soft state structure for the device and performs
7604  *		all necessary structure and device initializations.
7605  *
7606  *   Arguments: devi: the system's dev_info_t for the device.
7607  *
7608  * Return Code: DDI_SUCCESS if attach is successful.
7609  *		DDI_FAILURE if any part of the attach fails.
7610  *
7611  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
7612  *		Kernel thread context only.  Can sleep.
7613  */
7614 
7615 static int
7616 sd_unit_attach(dev_info_t *devi)
7617 {
7618 	struct	scsi_device	*devp;
7619 	struct	sd_lun		*un;
7620 	char			*variantp;
7621 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
7622 	int	instance;
7623 	int	rval;
7624 	uint64_t	capacity;
7625 	uint_t		lbasize;
7626 
7627 	/*
7628 	 * Retrieve the target driver's private data area. This was set
7629 	 * up by the HBA.
7630 	 */
7631 	devp = ddi_get_driver_private(devi);
7632 
7633 	/*
7634 	 * Since we have no idea what state things were left in by the last
7635 	 * user of the device, set up some 'default' settings, ie. turn 'em
7636 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
7637 	 * Do this before the scsi_probe, which sends an inquiry.
7638 	 * This is a fix for bug (4430280).
7639 	 * Of special importance is wide-xfer. The drive could have been left
7640 	 * in wide transfer mode by the last driver to communicate with it,
7641 	 * this includes us. If that's the case, and if the following is not
7642 	 * setup properly or we don't re-negotiate with the drive prior to
7643 	 * transferring data to/from the drive, it causes bus parity errors,
7644 	 * data overruns, and unexpected interrupts. This first occurred when
7645 	 * the fix for bug (4378686) was made.
7646 	 */
7647 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
7648 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
7649 	(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
7650 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
7651 
7652 	/*
7653 	 * Use scsi_probe() to issue an INQUIRY command to the device.
7654 	 * This call will allocate and fill in the scsi_inquiry structure
7655 	 * and point the sd_inq member of the scsi_device structure to it.
7656 	 * If the attach succeeds, then this memory will not be de-allocated
7657 	 * (via scsi_unprobe()) until the instance is detached.
7658 	 */
7659 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
7660 		goto probe_failed;
7661 	}
7662 
7663 	/*
7664 	 * Check the device type as specified in the inquiry data and
7665 	 * claim it if it is of a type that we support.
7666 	 */
7667 	switch (devp->sd_inq->inq_dtype) {
7668 	case DTYPE_DIRECT:
7669 		break;
7670 	case DTYPE_RODIRECT:
7671 		break;
7672 	case DTYPE_OPTICAL:
7673 		break;
7674 	case DTYPE_NOTPRESENT:
7675 	default:
7676 		/* Unsupported device type; fail the attach. */
7677 		goto probe_failed;
7678 	}
7679 
7680 	/*
7681 	 * Allocate the soft state structure for this unit.
7682 	 *
7683 	 * We rely upon this memory being set to all zeroes by
7684 	 * ddi_soft_state_zalloc().  We assume that any member of the
7685 	 * soft state structure that is not explicitly initialized by
7686 	 * this routine will have a value of zero.
7687 	 */
7688 	instance = ddi_get_instance(devp->sd_dev);
7689 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
7690 		goto probe_failed;
7691 	}
7692 
7693 	/*
7694 	 * Retrieve a pointer to the newly-allocated soft state.
7695 	 *
7696 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
7697 	 * was successful, unless something has gone horribly wrong and the
7698 	 * ddi's soft state internals are corrupt (in which case it is
7699 	 * probably better to halt here than just fail the attach....)
7700 	 */
7701 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
7702 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
7703 		    instance);
7704 		/*NOTREACHED*/
7705 	}
7706 
7707 	/*
7708 	 * Link the back ptr of the driver soft state to the scsi_device
7709 	 * struct for this lun.
7710 	 * Save a pointer to the softstate in the driver-private area of
7711 	 * the scsi_device struct.
7712 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
7713 	 * we first set un->un_sd below.
7714 	 */
7715 	un->un_sd = devp;
7716 	devp->sd_private = (opaque_t)un;
7717 
7718 	/*
7719 	 * The following must be after devp is stored in the soft state struct.
7720 	 */
7721 #ifdef SDDEBUG
7722 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7723 	    "%s_unit_attach: un:0x%p instance:%d\n",
7724 	    ddi_driver_name(devi), un, instance);
7725 #endif
7726 
7727 	/*
7728 	 * Set up the device type and node type (for the minor nodes).
7729 	 * By default we assume that the device can at least support the
7730 	 * Common Command Set. Call it a CD-ROM if it reports itself
7731 	 * as a RODIRECT device.
7732 	 */
7733 	switch (devp->sd_inq->inq_dtype) {
7734 	case DTYPE_RODIRECT:
7735 		un->un_node_type = DDI_NT_CD_CHAN;
7736 		un->un_ctype	 = CTYPE_CDROM;
7737 		break;
7738 	case DTYPE_OPTICAL:
7739 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7740 		un->un_ctype	 = CTYPE_ROD;
7741 		break;
7742 	default:
7743 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7744 		un->un_ctype	 = CTYPE_CCS;
7745 		break;
7746 	}
7747 
7748 	/*
7749 	 * Try to read the interconnect type from the HBA.
7750 	 *
7751 	 * Note: This driver is currently compiled as two binaries, a parallel
7752 	 * scsi version (sd) and a fibre channel version (ssd). All functional
7753 	 * differences are determined at compile time. In the future a single
7754 	 * binary will be provided and the inteconnect type will be used to
7755 	 * differentiate between fibre and parallel scsi behaviors. At that time
7756 	 * it will be necessary for all fibre channel HBAs to support this
7757 	 * property.
7758 	 *
7759 	 * set un_f_is_fiber to TRUE ( default fiber )
7760 	 */
7761 	un->un_f_is_fibre = TRUE;
7762 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
7763 	case INTERCONNECT_SSA:
7764 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
7765 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7766 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
7767 		break;
7768 	case INTERCONNECT_PARALLEL:
7769 		un->un_f_is_fibre = FALSE;
7770 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7771 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7772 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
7773 		break;
7774 	case INTERCONNECT_FIBRE:
7775 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
7776 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7777 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
7778 		break;
7779 	case INTERCONNECT_FABRIC:
7780 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
7781 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
7782 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7783 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
7784 		break;
7785 	default:
7786 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
7787 		/*
7788 		 * The HBA does not support the "interconnect-type" property
7789 		 * (or did not provide a recognized type).
7790 		 *
7791 		 * Note: This will be obsoleted when a single fibre channel
7792 		 * and parallel scsi driver is delivered. In the meantime the
7793 		 * interconnect type will be set to the platform default.If that
7794 		 * type is not parallel SCSI, it means that we should be
7795 		 * assuming "ssd" semantics. However, here this also means that
7796 		 * the FC HBA is not supporting the "interconnect-type" property
7797 		 * like we expect it to, so log this occurrence.
7798 		 */
7799 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
7800 		if (!SD_IS_PARALLEL_SCSI(un)) {
7801 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7802 			    "sd_unit_attach: un:0x%p Assuming "
7803 			    "INTERCONNECT_FIBRE\n", un);
7804 		} else {
7805 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7806 			    "sd_unit_attach: un:0x%p Assuming "
7807 			    "INTERCONNECT_PARALLEL\n", un);
7808 			un->un_f_is_fibre = FALSE;
7809 		}
7810 #else
7811 		/*
7812 		 * Note: This source will be implemented when a single fibre
7813 		 * channel and parallel scsi driver is delivered. The default
7814 		 * will be to assume that if a device does not support the
7815 		 * "interconnect-type" property it is a parallel SCSI HBA and
7816 		 * we will set the interconnect type for parallel scsi.
7817 		 */
7818 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7819 		un->un_f_is_fibre = FALSE;
7820 #endif
7821 		break;
7822 	}
7823 
7824 	if (un->un_f_is_fibre == TRUE) {
7825 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
7826 			SCSI_VERSION_3) {
7827 			switch (un->un_interconnect_type) {
7828 			case SD_INTERCONNECT_FIBRE:
7829 			case SD_INTERCONNECT_SSA:
7830 				un->un_node_type = DDI_NT_BLOCK_WWN;
7831 				break;
7832 			default:
7833 				break;
7834 			}
7835 		}
7836 	}
7837 
7838 	/*
7839 	 * Initialize the Request Sense command for the target
7840 	 */
7841 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
7842 		goto alloc_rqs_failed;
7843 	}
7844 
7845 	/*
7846 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
7847 	 * with seperate binary for sd and ssd.
7848 	 *
7849 	 * x86 has 1 binary, un_retry_count is set base on connection type.
7850 	 * The hardcoded values will go away when Sparc uses 1 binary
7851 	 * for sd and ssd.  This hardcoded values need to match
7852 	 * SD_RETRY_COUNT in sddef.h
7853 	 * The value used is base on interconnect type.
7854 	 * fibre = 3, parallel = 5
7855 	 */
7856 #if defined(__i386) || defined(__amd64)
7857 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
7858 #else
7859 	un->un_retry_count = SD_RETRY_COUNT;
7860 #endif
7861 
7862 	/*
7863 	 * Set the per disk retry count to the default number of retries
7864 	 * for disks and CDROMs. This value can be overridden by the
7865 	 * disk property list or an entry in sd.conf.
7866 	 */
7867 	un->un_notready_retry_count =
7868 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
7869 			: DISK_NOT_READY_RETRY_COUNT(un);
7870 
7871 	/*
7872 	 * Set the busy retry count to the default value of un_retry_count.
7873 	 * This can be overridden by entries in sd.conf or the device
7874 	 * config table.
7875 	 */
7876 	un->un_busy_retry_count = un->un_retry_count;
7877 
7878 	/*
7879 	 * Init the reset threshold for retries.  This number determines
7880 	 * how many retries must be performed before a reset can be issued
7881 	 * (for certain error conditions). This can be overridden by entries
7882 	 * in sd.conf or the device config table.
7883 	 */
7884 	un->un_reset_retry_count = (un->un_retry_count / 2);
7885 
7886 	/*
7887 	 * Set the victim_retry_count to the default un_retry_count
7888 	 */
7889 	un->un_victim_retry_count = (2 * un->un_retry_count);
7890 
7891 	/*
7892 	 * Set the reservation release timeout to the default value of
7893 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
7894 	 * device config table.
7895 	 */
7896 	un->un_reserve_release_time = 5;
7897 
7898 	/*
7899 	 * Set up the default maximum transfer size. Note that this may
7900 	 * get updated later in the attach, when setting up default wide
7901 	 * operations for disks.
7902 	 */
7903 #if defined(__i386) || defined(__amd64)
7904 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
7905 #else
7906 	un->un_max_xfer_size = (uint_t)maxphys;
7907 #endif
7908 
7909 	/*
7910 	 * Get "allow bus device reset" property (defaults to "enabled" if
7911 	 * the property was not defined). This is to disable bus resets for
7912 	 * certain kinds of error recovery. Note: In the future when a run-time
7913 	 * fibre check is available the soft state flag should default to
7914 	 * enabled.
7915 	 */
7916 	if (un->un_f_is_fibre == TRUE) {
7917 		un->un_f_allow_bus_device_reset = TRUE;
7918 	} else {
7919 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7920 			"allow-bus-device-reset", 1) != 0) {
7921 			un->un_f_allow_bus_device_reset = TRUE;
7922 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7923 			"sd_unit_attach: un:0x%p Bus device reset enabled\n",
7924 				un);
7925 		} else {
7926 			un->un_f_allow_bus_device_reset = FALSE;
7927 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7928 			"sd_unit_attach: un:0x%p Bus device reset disabled\n",
7929 				un);
7930 		}
7931 	}
7932 
7933 	/*
7934 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
7935 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
7936 	 *
7937 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
7938 	 * property. The new "variant" property with a value of "atapi" has been
7939 	 * introduced so that future 'variants' of standard SCSI behavior (like
7940 	 * atapi) could be specified by the underlying HBA drivers by supplying
7941 	 * a new value for the "variant" property, instead of having to define a
7942 	 * new property.
7943 	 */
7944 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
7945 		un->un_f_cfg_is_atapi = TRUE;
7946 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7947 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
7948 	}
7949 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
7950 	    &variantp) == DDI_PROP_SUCCESS) {
7951 		if (strcmp(variantp, "atapi") == 0) {
7952 			un->un_f_cfg_is_atapi = TRUE;
7953 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7954 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
7955 		}
7956 		ddi_prop_free(variantp);
7957 	}
7958 
7959 	/*
7960 	 * Assume doorlock commands are supported. If not, the first
7961 	 * call to sd_send_scsi_DOORLOCK() will set to FALSE
7962 	 */
7963 	un->un_f_doorlock_supported = TRUE;
7964 
7965 	un->un_cmd_timeout	= SD_IO_TIME;
7966 
7967 	/* Info on current states, statuses, etc. (Updated frequently) */
7968 	un->un_state		= SD_STATE_NORMAL;
7969 	un->un_last_state	= SD_STATE_NORMAL;
7970 
7971 	/* Control & status info for command throttling */
7972 	un->un_throttle		= sd_max_throttle;
7973 	un->un_saved_throttle	= sd_max_throttle;
7974 	un->un_min_throttle	= sd_min_throttle;
7975 
7976 	if (un->un_f_is_fibre == TRUE) {
7977 		un->un_f_use_adaptive_throttle = TRUE;
7978 	} else {
7979 		un->un_f_use_adaptive_throttle = FALSE;
7980 	}
7981 
7982 	/* Removable media support. */
7983 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
7984 	un->un_mediastate		= DKIO_NONE;
7985 	un->un_specified_mediastate	= DKIO_NONE;
7986 
7987 	/* CVs for suspend/resume (PM or DR) */
7988 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
7989 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
7990 
7991 	/* Power management support. */
7992 	un->un_power_level = SD_SPINDLE_UNINIT;
7993 
7994 	/*
7995 	 * The open/close semaphore is used to serialize threads executing
7996 	 * in the driver's open & close entry point routines for a given
7997 	 * instance.
7998 	 */
7999 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
8000 
8001 	/*
8002 	 * The conf file entry and softstate variable is a forceful override,
8003 	 * meaning a non-zero value must be entered to change the default.
8004 	 */
8005 	un->un_f_disksort_disabled = FALSE;
8006 
8007 	/*
8008 	 * Retrieve the properties from the static driver table or the driver
8009 	 * configuration file (.conf) for this unit and update the soft state
8010 	 * for the device as needed for the indicated properties.
8011 	 * Note: the property configuration needs to occur here as some of the
8012 	 * following routines may have dependancies on soft state flags set
8013 	 * as part of the driver property configuration.
8014 	 */
8015 	sd_read_unit_properties(un);
8016 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8017 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
8018 
8019 	/*
8020 	 * By default, we mark the capacity, lbazize, and geometry
8021 	 * as invalid. Only if we successfully read a valid capacity
8022 	 * will we update the un_blockcount and un_tgt_blocksize with the
8023 	 * valid values (the geometry will be validated later).
8024 	 */
8025 	un->un_f_blockcount_is_valid	= FALSE;
8026 	un->un_f_tgt_blocksize_is_valid	= FALSE;
8027 	un->un_f_geometry_is_valid	= FALSE;
8028 
8029 	/*
8030 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
8031 	 * otherwise.
8032 	 */
8033 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
8034 	un->un_blockcount = 0;
8035 
8036 	/*
8037 	 * Set up the per-instance info needed to determine the correct
8038 	 * CDBs and other info for issuing commands to the target.
8039 	 */
8040 	sd_init_cdb_limits(un);
8041 
8042 	/*
8043 	 * Set up the IO chains to use, based upon the target type.
8044 	 */
8045 	if (ISREMOVABLE(un)) {
8046 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
8047 	} else {
8048 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
8049 	}
8050 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
8051 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
8052 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
8053 
8054 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
8055 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
8056 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
8057 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
8058 
8059 
8060 	if (ISCD(un)) {
8061 		un->un_additional_codes = sd_additional_codes;
8062 	} else {
8063 		un->un_additional_codes = NULL;
8064 	}
8065 
8066 	/*
8067 	 * Create the kstats here so they can be available for attach-time
8068 	 * routines that send commands to the unit (either polled or via
8069 	 * sd_send_scsi_cmd).
8070 	 *
8071 	 * Note: This is a critical sequence that needs to be maintained:
8072 	 *	1) Instantiate the kstats here, before any routines using the
8073 	 *	   iopath (i.e. sd_send_scsi_cmd).
8074 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8075 	 *	   stats (sd_set_pstats), following sd_validate_geometry(),
8076 	 *	   sd_register_devid(), and sd_disable_caching().
8077 	 */
8078 
8079 	un->un_stats = kstat_create(sd_label, instance,
8080 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
8081 	if (un->un_stats != NULL) {
8082 		un->un_stats->ks_lock = SD_MUTEX(un);
8083 		kstat_install(un->un_stats);
8084 	}
8085 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8086 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
8087 
8088 	sd_create_errstats(un, instance);
8089 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8090 	    "sd_unit_attach: un:0x%p errstats created\n", un);
8091 
8092 	/*
8093 	 * The following if/else code was relocated here from below as part
8094 	 * of the fix for bug (4430280). However with the default setup added
8095 	 * on entry to this routine, it's no longer absolutely necessary for
8096 	 * this to be before the call to sd_spin_up_unit.
8097 	 */
8098 	if (SD_IS_PARALLEL_SCSI(un)) {
8099 		/*
8100 		 * If SCSI-2 tagged queueing is supported by the target
8101 		 * and by the host adapter then we will enable it.
8102 		 */
8103 		un->un_tagflags = 0;
8104 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8105 		    (devp->sd_inq->inq_cmdque) &&
8106 		    (un->un_f_arq_enabled == TRUE)) {
8107 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
8108 			    1, 1) == 1) {
8109 				un->un_tagflags = FLAG_STAG;
8110 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8111 				    "sd_unit_attach: un:0x%p tag queueing "
8112 				    "enabled\n", un);
8113 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
8114 			    "untagged-qing", 0) == 1) {
8115 				un->un_f_opt_queueing = TRUE;
8116 				un->un_saved_throttle = un->un_throttle =
8117 				    min(un->un_throttle, 3);
8118 			} else {
8119 				un->un_f_opt_queueing = FALSE;
8120 				un->un_saved_throttle = un->un_throttle = 1;
8121 			}
8122 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
8123 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
8124 			/* The Host Adapter supports internal queueing. */
8125 			un->un_f_opt_queueing = TRUE;
8126 			un->un_saved_throttle = un->un_throttle =
8127 			    min(un->un_throttle, 3);
8128 		} else {
8129 			un->un_f_opt_queueing = FALSE;
8130 			un->un_saved_throttle = un->un_throttle = 1;
8131 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8132 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
8133 		}
8134 
8135 
8136 		/* Setup or tear down default wide operations for disks */
8137 
8138 		/*
8139 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
8140 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
8141 		 * system and be set to different values. In the future this
8142 		 * code may need to be updated when the ssd module is
8143 		 * obsoleted and removed from the system. (4299588)
8144 		 */
8145 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8146 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
8147 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8148 			    1, 1) == 1) {
8149 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8150 				    "sd_unit_attach: un:0x%p Wide Transfer "
8151 				    "enabled\n", un);
8152 			}
8153 
8154 			/*
8155 			 * If tagged queuing has also been enabled, then
8156 			 * enable large xfers
8157 			 */
8158 			if (un->un_saved_throttle == sd_max_throttle) {
8159 				un->un_max_xfer_size =
8160 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8161 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
8162 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8163 				    "sd_unit_attach: un:0x%p max transfer "
8164 				    "size=0x%x\n", un, un->un_max_xfer_size);
8165 			}
8166 		} else {
8167 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8168 			    0, 1) == 1) {
8169 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8170 				    "sd_unit_attach: un:0x%p "
8171 				    "Wide Transfer disabled\n", un);
8172 			}
8173 		}
8174 	} else {
8175 		un->un_tagflags = FLAG_STAG;
8176 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
8177 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
8178 	}
8179 
8180 	/*
8181 	 * If this target supports LUN reset, try to enable it.
8182 	 */
8183 	if (un->un_f_lun_reset_enabled) {
8184 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
8185 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8186 			    "un:0x%p lun_reset capability set\n", un);
8187 		} else {
8188 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8189 			    "un:0x%p lun-reset capability not set\n", un);
8190 		}
8191 	}
8192 
8193 	/*
8194 	 * At this point in the attach, we have enough info in the
8195 	 * soft state to be able to issue commands to the target.
8196 	 *
8197 	 * All command paths used below MUST issue their commands as
8198 	 * SD_PATH_DIRECT. This is important as intermediate layers
8199 	 * are not all initialized yet (such as PM).
8200 	 */
8201 
8202 	/*
8203 	 * Send a TEST UNIT READY command to the device. This should clear
8204 	 * any outstanding UNIT ATTENTION that may be present.
8205 	 *
8206 	 * Note: Don't check for success, just track if there is a reservation,
8207 	 * this is a throw away command to clear any unit attentions.
8208 	 *
8209 	 * Note: This MUST be the first command issued to the target during
8210 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
8211 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
8212 	 * with attempts at spinning up a device with no media.
8213 	 */
8214 	if (sd_send_scsi_TEST_UNIT_READY(un, SD_DONT_RETRY_TUR) == EACCES) {
8215 		reservation_flag = SD_TARGET_IS_RESERVED;
8216 	}
8217 
8218 	/*
8219 	 * If the device is NOT a removable media device, attempt to spin
8220 	 * it up (using the START_STOP_UNIT command) and read its capacity
8221 	 * (using the READ CAPACITY command).  Note, however, that either
8222 	 * of these could fail and in some cases we would continue with
8223 	 * the attach despite the failure (see below).
8224 	 */
8225 	if (devp->sd_inq->inq_dtype == DTYPE_DIRECT && !ISREMOVABLE(un)) {
8226 		switch (sd_spin_up_unit(un)) {
8227 		case 0:
8228 			/*
8229 			 * Spin-up was successful; now try to read the
8230 			 * capacity.  If successful then save the results
8231 			 * and mark the capacity & lbasize as valid.
8232 			 */
8233 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8234 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
8235 
8236 			switch (sd_send_scsi_READ_CAPACITY(un, &capacity,
8237 			    &lbasize, SD_PATH_DIRECT)) {
8238 			case 0: {
8239 				if (capacity > DK_MAX_BLOCKS) {
8240 #ifdef _LP64
8241 					/*
8242 					 * Enable descriptor format sense data
8243 					 * so that we can get 64 bit sense
8244 					 * data fields.
8245 					 */
8246 					sd_enable_descr_sense(un);
8247 #else
8248 					/* 32-bit kernels can't handle this */
8249 					scsi_log(SD_DEVINFO(un),
8250 					    sd_label, CE_WARN,
8251 					    "disk has %llu blocks, which "
8252 					    "is too large for a 32-bit "
8253 					    "kernel", capacity);
8254 					goto spinup_failed;
8255 #endif
8256 				}
8257 				/*
8258 				 * The following relies on
8259 				 * sd_send_scsi_READ_CAPACITY never
8260 				 * returning 0 for capacity and/or lbasize.
8261 				 */
8262 				sd_update_block_info(un, lbasize, capacity);
8263 
8264 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8265 				    "sd_unit_attach: un:0x%p capacity = %ld "
8266 				    "blocks; lbasize= %ld.\n", un,
8267 				    un->un_blockcount, un->un_tgt_blocksize);
8268 
8269 				break;
8270 			}
8271 			case EACCES:
8272 				/*
8273 				 * Should never get here if the spin-up
8274 				 * succeeded, but code it in anyway.
8275 				 * From here, just continue with the attach...
8276 				 */
8277 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8278 				    "sd_unit_attach: un:0x%p "
8279 				    "sd_send_scsi_READ_CAPACITY "
8280 				    "returned reservation conflict\n", un);
8281 				reservation_flag = SD_TARGET_IS_RESERVED;
8282 				break;
8283 			default:
8284 				/*
8285 				 * Likewise, should never get here if the
8286 				 * spin-up succeeded. Just continue with
8287 				 * the attach...
8288 				 */
8289 				break;
8290 			}
8291 			break;
8292 		case EACCES:
8293 			/*
8294 			 * Device is reserved by another host.  In this case
8295 			 * we could not spin it up or read the capacity, but
8296 			 * we continue with the attach anyway.
8297 			 */
8298 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8299 			    "sd_unit_attach: un:0x%p spin-up reservation "
8300 			    "conflict.\n", un);
8301 			reservation_flag = SD_TARGET_IS_RESERVED;
8302 			break;
8303 		default:
8304 			/* Fail the attach if the spin-up failed. */
8305 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8306 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
8307 			goto spinup_failed;
8308 		}
8309 	}
8310 
8311 	/*
8312 	 * Check to see if this is a MMC drive
8313 	 */
8314 	if (ISCD(un)) {
8315 		sd_set_mmc_caps(un);
8316 	}
8317 
8318 	/*
8319 	 * Create the minor nodes for the device.
8320 	 * Note: If we want to support fdisk on both sparc and intel, this will
8321 	 * have to separate out the notion that VTOC8 is always sparc, and
8322 	 * VTOC16 is always intel (tho these can be the defaults).  The vtoc
8323 	 * type will have to be determined at run-time, and the fdisk
8324 	 * partitioning will have to have been read & set up before we
8325 	 * create the minor nodes. (any other inits (such as kstats) that
8326 	 * also ought to be done before creating the minor nodes?) (Doesn't
8327 	 * setting up the minor nodes kind of imply that we're ready to
8328 	 * handle an open from userland?)
8329 	 */
8330 	if (sd_create_minor_nodes(un, devi) != DDI_SUCCESS) {
8331 		goto create_minor_nodes_failed;
8332 	}
8333 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8334 	    "sd_unit_attach: un:0x%p minor nodes created\n", un);
8335 
8336 	/*
8337 	 * Add a zero-length attribute to tell the world we support
8338 	 * kernel ioctls (for layered drivers)
8339 	 */
8340 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8341 	    DDI_KERNEL_IOCTL, NULL, 0);
8342 
8343 	/*
8344 	 * Add a boolean property to tell the world we support
8345 	 * the B_FAILFAST flag (for layered drivers)
8346 	 */
8347 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8348 	    "ddi-failfast-supported", NULL, 0);
8349 
8350 	/*
8351 	 * Initialize power management
8352 	 */
8353 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
8354 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
8355 	sd_setup_pm(un, devi);
8356 	if (un->un_f_pm_is_enabled == FALSE) {
8357 		/*
8358 		 * For performance, point to a jump table that does
8359 		 * not include pm.
8360 		 * The direct and priority chains don't change with PM.
8361 		 *
8362 		 * Note: this is currently done based on individual device
8363 		 * capabilities. When an interface for determining system
8364 		 * power enabled state becomes available, or when additional
8365 		 * layers are added to the command chain, these values will
8366 		 * have to be re-evaluated for correctness.
8367 		 */
8368 		if (ISREMOVABLE(un)) {
8369 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
8370 		} else {
8371 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
8372 		}
8373 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8374 	}
8375 
8376 	/*
8377 	 * This property is set to 0 by HA software to avoid retries
8378 	 * on a reserved disk. (The preferred property name is
8379 	 * "retry-on-reservation-conflict") (1189689)
8380 	 *
8381 	 * Note: The use of a global here can have unintended consequences. A
8382 	 * per instance variable is preferrable to match the capabilities of
8383 	 * different underlying hba's (4402600)
8384 	 */
8385 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
8386 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
8387 	    sd_retry_on_reservation_conflict);
8388 	if (sd_retry_on_reservation_conflict != 0) {
8389 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
8390 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
8391 		    sd_retry_on_reservation_conflict);
8392 	}
8393 
8394 	/* Set up options for QFULL handling. */
8395 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8396 	    "qfull-retries", -1)) != -1) {
8397 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
8398 		    rval, 1);
8399 	}
8400 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8401 	    "qfull-retry-interval", -1)) != -1) {
8402 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
8403 		    rval, 1);
8404 	}
8405 
8406 	/*
8407 	 * This just prints a message that announces the existence of the
8408 	 * device. The message is always printed in the system logfile, but
8409 	 * only appears on the console if the system is booted with the
8410 	 * -v (verbose) argument.
8411 	 */
8412 	ddi_report_dev(devi);
8413 
8414 	/*
8415 	 * The framework calls driver attach routines single-threaded
8416 	 * for a given instance.  However we still acquire SD_MUTEX here
8417 	 * because this required for calling the sd_validate_geometry()
8418 	 * and sd_register_devid() functions.
8419 	 */
8420 	mutex_enter(SD_MUTEX(un));
8421 	un->un_f_geometry_is_valid = FALSE;
8422 	un->un_mediastate = DKIO_NONE;
8423 	un->un_reserved = -1;
8424 	if (!ISREMOVABLE(un)) {
8425 		/*
8426 		 * Read and validate the device's geometry (ie, disk label)
8427 		 * A new unformatted drive will not have a valid geometry, but
8428 		 * the driver needs to successfully attach to this device so
8429 		 * the drive can be formatted via ioctls.
8430 		 */
8431 		if (((sd_validate_geometry(un, SD_PATH_DIRECT) ==
8432 		    ENOTSUP)) &&
8433 		    (un->un_blockcount < DK_MAX_BLOCKS)) {
8434 			/*
8435 			 * We found a small disk with an EFI label on it;
8436 			 * we need to fix up the minor nodes accordingly.
8437 			 */
8438 			ddi_remove_minor_node(devi, "h");
8439 			ddi_remove_minor_node(devi, "h,raw");
8440 			(void) ddi_create_minor_node(devi, "wd",
8441 			    S_IFBLK,
8442 			    (instance << SDUNIT_SHIFT) | WD_NODE,
8443 			    un->un_node_type, NULL);
8444 			(void) ddi_create_minor_node(devi, "wd,raw",
8445 			    S_IFCHR,
8446 			    (instance << SDUNIT_SHIFT) | WD_NODE,
8447 			    un->un_node_type, NULL);
8448 		}
8449 	}
8450 
8451 	/*
8452 	 * Read and initialize the devid for the unit.
8453 	 */
8454 	ASSERT(un->un_errstats != NULL);
8455 	if (!ISREMOVABLE(un)) {
8456 		sd_register_devid(un, devi, reservation_flag);
8457 	}
8458 	mutex_exit(SD_MUTEX(un));
8459 
8460 #if (defined(__fibre))
8461 	/*
8462 	 * Register callbacks for fibre only.  You can't do this soley
8463 	 * on the basis of the devid_type because this is hba specific.
8464 	 * We need to query our hba capabilities to find out whether to
8465 	 * register or not.
8466 	 */
8467 	if (un->un_f_is_fibre) {
8468 	    if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
8469 		sd_init_event_callbacks(un);
8470 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8471 		    "sd_unit_attach: un:0x%p event callbacks inserted", un);
8472 	    }
8473 	}
8474 #endif
8475 
8476 	if (un->un_f_opt_disable_cache == TRUE) {
8477 		if (sd_disable_caching(un) != 0) {
8478 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8479 			    "sd_unit_attach: un:0x%p Could not disable "
8480 			    "caching", un);
8481 			goto devid_failed;
8482 		}
8483 	}
8484 
8485 	/*
8486 	 * Set the pstat and error stat values here, so data obtained during the
8487 	 * previous attach-time routines is available.
8488 	 *
8489 	 * Note: This is a critical sequence that needs to be maintained:
8490 	 *	1) Instantiate the kstats before any routines using the iopath
8491 	 *	   (i.e. sd_send_scsi_cmd).
8492 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8493 	 *	   stats (sd_set_pstats)here, following sd_validate_geometry(),
8494 	 *	   sd_register_devid(), and sd_disable_caching().
8495 	 */
8496 	if (!ISREMOVABLE(un) && (un->un_f_pkstats_enabled == TRUE)) {
8497 		sd_set_pstats(un);
8498 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8499 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
8500 	}
8501 
8502 	sd_set_errstats(un);
8503 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8504 	    "sd_unit_attach: un:0x%p errstats set\n", un);
8505 
8506 	/*
8507 	 * Find out what type of reservation this disk supports.
8508 	 */
8509 	switch (sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS, 0, NULL)) {
8510 	case 0:
8511 		/*
8512 		 * SCSI-3 reservations are supported.
8513 		 */
8514 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8515 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8516 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
8517 		break;
8518 	case ENOTSUP:
8519 		/*
8520 		 * The PERSISTENT RESERVE IN command would not be recognized by
8521 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
8522 		 */
8523 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8524 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
8525 		un->un_reservation_type = SD_SCSI2_RESERVATION;
8526 		break;
8527 	default:
8528 		/*
8529 		 * default to SCSI-3 reservations
8530 		 */
8531 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8532 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
8533 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8534 		break;
8535 	}
8536 
8537 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8538 	    "sd_unit_attach: un:0x%p exit success\n", un);
8539 
8540 	return (DDI_SUCCESS);
8541 
8542 	/*
8543 	 * An error occurred during the attach; clean up & return failure.
8544 	 */
8545 
8546 devid_failed:
8547 
8548 setup_pm_failed:
8549 	ddi_remove_minor_node(devi, NULL);
8550 
8551 create_minor_nodes_failed:
8552 	/*
8553 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8554 	 */
8555 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8556 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8557 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8558 
8559 	if (un->un_f_is_fibre == FALSE) {
8560 	    (void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8561 	}
8562 
8563 spinup_failed:
8564 
8565 	mutex_enter(SD_MUTEX(un));
8566 
8567 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
8568 	if (un->un_direct_priority_timeid != NULL) {
8569 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8570 		un->un_direct_priority_timeid = NULL;
8571 		mutex_exit(SD_MUTEX(un));
8572 		(void) untimeout(temp_id);
8573 		mutex_enter(SD_MUTEX(un));
8574 	}
8575 
8576 	/* Cancel any pending start/stop timeouts */
8577 	if (un->un_startstop_timeid != NULL) {
8578 		timeout_id_t temp_id = un->un_startstop_timeid;
8579 		un->un_startstop_timeid = NULL;
8580 		mutex_exit(SD_MUTEX(un));
8581 		(void) untimeout(temp_id);
8582 		mutex_enter(SD_MUTEX(un));
8583 	}
8584 
8585 	/* Cancel any pending reset-throttle timeouts */
8586 	if (un->un_reset_throttle_timeid != NULL) {
8587 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8588 		un->un_reset_throttle_timeid = NULL;
8589 		mutex_exit(SD_MUTEX(un));
8590 		(void) untimeout(temp_id);
8591 		mutex_enter(SD_MUTEX(un));
8592 	}
8593 
8594 	/* Cancel any pending retry timeouts */
8595 	if (un->un_retry_timeid != NULL) {
8596 		timeout_id_t temp_id = un->un_retry_timeid;
8597 		un->un_retry_timeid = NULL;
8598 		mutex_exit(SD_MUTEX(un));
8599 		(void) untimeout(temp_id);
8600 		mutex_enter(SD_MUTEX(un));
8601 	}
8602 
8603 	/* Cancel any pending delayed cv broadcast timeouts */
8604 	if (un->un_dcvb_timeid != NULL) {
8605 		timeout_id_t temp_id = un->un_dcvb_timeid;
8606 		un->un_dcvb_timeid = NULL;
8607 		mutex_exit(SD_MUTEX(un));
8608 		(void) untimeout(temp_id);
8609 		mutex_enter(SD_MUTEX(un));
8610 	}
8611 
8612 	mutex_exit(SD_MUTEX(un));
8613 
8614 	/* There should not be any in-progress I/O so ASSERT this check */
8615 	ASSERT(un->un_ncmds_in_transport == 0);
8616 	ASSERT(un->un_ncmds_in_driver == 0);
8617 
8618 	/* Do not free the softstate if the callback routine is active */
8619 	sd_sync_with_callback(un);
8620 
8621 	/*
8622 	 * Partition stats apparently are not used with removables. These would
8623 	 * not have been created during attach, so no need to clean them up...
8624 	 */
8625 	if (un->un_stats != NULL) {
8626 		kstat_delete(un->un_stats);
8627 		un->un_stats = NULL;
8628 	}
8629 	if (un->un_errstats != NULL) {
8630 		kstat_delete(un->un_errstats);
8631 		un->un_errstats = NULL;
8632 	}
8633 
8634 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8635 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8636 
8637 	ddi_prop_remove_all(devi);
8638 	sema_destroy(&un->un_semoclose);
8639 	cv_destroy(&un->un_state_cv);
8640 
8641 getrbuf_failed:
8642 
8643 	sd_free_rqs(un);
8644 
8645 alloc_rqs_failed:
8646 
8647 	devp->sd_private = NULL;
8648 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
8649 
8650 get_softstate_failed:
8651 	/*
8652 	 * Note: the man pages are unclear as to whether or not doing a
8653 	 * ddi_soft_state_free(sd_state, instance) is the right way to
8654 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
8655 	 * ddi_get_soft_state() fails.  The implication seems to be
8656 	 * that the get_soft_state cannot fail if the zalloc succeeds.
8657 	 */
8658 	ddi_soft_state_free(sd_state, instance);
8659 
8660 probe_failed:
8661 	scsi_unprobe(devp);
8662 #ifdef SDDEBUG
8663 	if ((sd_component_mask & SD_LOG_ATTACH_DETACH) &&
8664 	    (sd_level_mask & SD_LOGMASK_TRACE)) {
8665 		cmn_err(CE_CONT, "sd_unit_attach: un:0x%p exit failure\n",
8666 		    (void *)un);
8667 	}
8668 #endif
8669 	return (DDI_FAILURE);
8670 }
8671 
8672 
8673 /*
8674  *    Function: sd_unit_detach
8675  *
8676  * Description: Performs DDI_DETACH processing for sddetach().
8677  *
8678  * Return Code: DDI_SUCCESS
8679  *		DDI_FAILURE
8680  *
8681  *     Context: Kernel thread context
8682  */
8683 
8684 static int
8685 sd_unit_detach(dev_info_t *devi)
8686 {
8687 	struct scsi_device	*devp;
8688 	struct sd_lun		*un;
8689 	int			i;
8690 	dev_t			dev;
8691 #if !(defined(__i386) || defined(__amd64)) && !defined(__fibre)
8692 	int			reset_retval;
8693 #endif
8694 	int			instance = ddi_get_instance(devi);
8695 
8696 	mutex_enter(&sd_detach_mutex);
8697 
8698 	/*
8699 	 * Fail the detach for any of the following:
8700 	 *  - Unable to get the sd_lun struct for the instance
8701 	 *  - A layered driver has an outstanding open on the instance
8702 	 *  - Another thread is already detaching this instance
8703 	 *  - Another thread is currently performing an open
8704 	 */
8705 	devp = ddi_get_driver_private(devi);
8706 	if ((devp == NULL) ||
8707 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
8708 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
8709 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
8710 		mutex_exit(&sd_detach_mutex);
8711 		return (DDI_FAILURE);
8712 	}
8713 
8714 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
8715 
8716 	/*
8717 	 * Mark this instance as currently in a detach, to inhibit any
8718 	 * opens from a layered driver.
8719 	 */
8720 	un->un_detach_count++;
8721 	mutex_exit(&sd_detach_mutex);
8722 
8723 	dev = sd_make_device(SD_DEVINFO(un));
8724 
8725 	_NOTE(COMPETING_THREADS_NOW);
8726 
8727 	mutex_enter(SD_MUTEX(un));
8728 
8729 	/*
8730 	 * Fail the detach if there are any outstanding layered
8731 	 * opens on this device.
8732 	 */
8733 	for (i = 0; i < NDKMAP; i++) {
8734 		if (un->un_ocmap.lyropen[i] != 0) {
8735 			goto err_notclosed;
8736 		}
8737 	}
8738 
8739 	/*
8740 	 * Verify there are NO outstanding commands issued to this device.
8741 	 * ie, un_ncmds_in_transport == 0.
8742 	 * It's possible to have outstanding commands through the physio
8743 	 * code path, even though everything's closed.
8744 	 */
8745 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
8746 	    (un->un_direct_priority_timeid != NULL) ||
8747 	    (un->un_state == SD_STATE_RWAIT)) {
8748 		mutex_exit(SD_MUTEX(un));
8749 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8750 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
8751 		goto err_stillbusy;
8752 	}
8753 
8754 	/*
8755 	 * If we have the device reserved, release the reservation.
8756 	 */
8757 	if ((un->un_resvd_status & SD_RESERVE) &&
8758 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
8759 		mutex_exit(SD_MUTEX(un));
8760 		/*
8761 		 * Note: sd_reserve_release sends a command to the device
8762 		 * via the sd_ioctlcmd() path, and can sleep.
8763 		 */
8764 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
8765 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8766 			    "sd_dr_detach: Cannot release reservation \n");
8767 		}
8768 	} else {
8769 		mutex_exit(SD_MUTEX(un));
8770 	}
8771 
8772 	/*
8773 	 * Untimeout any reserve recover, throttle reset, restart unit
8774 	 * and delayed broadcast timeout threads. Protect the timeout pointer
8775 	 * from getting nulled by their callback functions.
8776 	 */
8777 	mutex_enter(SD_MUTEX(un));
8778 	if (un->un_resvd_timeid != NULL) {
8779 		timeout_id_t temp_id = un->un_resvd_timeid;
8780 		un->un_resvd_timeid = NULL;
8781 		mutex_exit(SD_MUTEX(un));
8782 		(void) untimeout(temp_id);
8783 		mutex_enter(SD_MUTEX(un));
8784 	}
8785 
8786 	if (un->un_reset_throttle_timeid != NULL) {
8787 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8788 		un->un_reset_throttle_timeid = NULL;
8789 		mutex_exit(SD_MUTEX(un));
8790 		(void) untimeout(temp_id);
8791 		mutex_enter(SD_MUTEX(un));
8792 	}
8793 
8794 	if (un->un_startstop_timeid != NULL) {
8795 		timeout_id_t temp_id = un->un_startstop_timeid;
8796 		un->un_startstop_timeid = NULL;
8797 		mutex_exit(SD_MUTEX(un));
8798 		(void) untimeout(temp_id);
8799 		mutex_enter(SD_MUTEX(un));
8800 	}
8801 
8802 	if (un->un_dcvb_timeid != NULL) {
8803 		timeout_id_t temp_id = un->un_dcvb_timeid;
8804 		un->un_dcvb_timeid = NULL;
8805 		mutex_exit(SD_MUTEX(un));
8806 		(void) untimeout(temp_id);
8807 	} else {
8808 		mutex_exit(SD_MUTEX(un));
8809 	}
8810 
8811 	/* Remove any pending reservation reclaim requests for this device */
8812 	sd_rmv_resv_reclaim_req(dev);
8813 
8814 	mutex_enter(SD_MUTEX(un));
8815 
8816 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
8817 	if (un->un_direct_priority_timeid != NULL) {
8818 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8819 		un->un_direct_priority_timeid = NULL;
8820 		mutex_exit(SD_MUTEX(un));
8821 		(void) untimeout(temp_id);
8822 		mutex_enter(SD_MUTEX(un));
8823 	}
8824 
8825 	/* Cancel any active multi-host disk watch thread requests */
8826 	if (un->un_mhd_token != NULL) {
8827 		mutex_exit(SD_MUTEX(un));
8828 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
8829 		if (scsi_watch_request_terminate(un->un_mhd_token,
8830 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8831 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8832 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
8833 			/*
8834 			 * Note: We are returning here after having removed
8835 			 * some driver timeouts above. This is consistent with
8836 			 * the legacy implementation but perhaps the watch
8837 			 * terminate call should be made with the wait flag set.
8838 			 */
8839 			goto err_stillbusy;
8840 		}
8841 		mutex_enter(SD_MUTEX(un));
8842 		un->un_mhd_token = NULL;
8843 	}
8844 
8845 	if (un->un_swr_token != NULL) {
8846 		mutex_exit(SD_MUTEX(un));
8847 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
8848 		if (scsi_watch_request_terminate(un->un_swr_token,
8849 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8850 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8851 			    "sd_dr_detach: Cannot cancel swr watch request\n");
8852 			/*
8853 			 * Note: We are returning here after having removed
8854 			 * some driver timeouts above. This is consistent with
8855 			 * the legacy implementation but perhaps the watch
8856 			 * terminate call should be made with the wait flag set.
8857 			 */
8858 			goto err_stillbusy;
8859 		}
8860 		mutex_enter(SD_MUTEX(un));
8861 		un->un_swr_token = NULL;
8862 	}
8863 
8864 	mutex_exit(SD_MUTEX(un));
8865 
8866 	/*
8867 	 * Clear any scsi_reset_notifies. We clear the reset notifies
8868 	 * if we have not registered one.
8869 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
8870 	 */
8871 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
8872 	    sd_mhd_reset_notify_cb, (caddr_t)un);
8873 
8874 
8875 
8876 #if defined(__i386) || defined(__amd64)
8877 	/*
8878 	 * Gratuitous bus resets sometimes cause an otherwise
8879 	 * okay ATA/ATAPI bus to hang. This is due the lack of
8880 	 * a clear spec of how resets should be implemented by ATA
8881 	 * disk drives.
8882 	 */
8883 #elif !defined(__fibre)		/* "#else if" does NOT work! */
8884 	/*
8885 	 * Reset target/bus.
8886 	 *
8887 	 * Note: This is a legacy workaround for Elite III dual-port drives that
8888 	 * will not come online after an aborted detach and subsequent re-attach
8889 	 * It should be removed when the Elite III FW is fixed, or the drives
8890 	 * are no longer supported.
8891 	 */
8892 	if (un->un_f_cfg_is_atapi == FALSE) {
8893 		reset_retval = 0;
8894 
8895 		/* If the device is in low power mode don't reset it */
8896 
8897 		mutex_enter(&un->un_pm_mutex);
8898 		if (!SD_DEVICE_IS_IN_LOW_POWER(un)) {
8899 			/*
8900 			 * First try a LUN reset if we can, then move on to a
8901 			 * target reset if needed; swat the bus as a last
8902 			 * resort.
8903 			 */
8904 			mutex_exit(&un->un_pm_mutex);
8905 			if (un->un_f_allow_bus_device_reset == TRUE) {
8906 				if (un->un_f_lun_reset_enabled == TRUE) {
8907 					reset_retval =
8908 					    scsi_reset(SD_ADDRESS(un),
8909 					    RESET_LUN);
8910 				}
8911 				if (reset_retval == 0) {
8912 					reset_retval =
8913 					    scsi_reset(SD_ADDRESS(un),
8914 					    RESET_TARGET);
8915 				}
8916 			}
8917 			if (reset_retval == 0) {
8918 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
8919 			}
8920 		} else {
8921 			mutex_exit(&un->un_pm_mutex);
8922 		}
8923 	}
8924 #endif
8925 
8926 	/*
8927 	 * protect the timeout pointers from getting nulled by
8928 	 * their callback functions during the cancellation process.
8929 	 * In such a scenario untimeout can be invoked with a null value.
8930 	 */
8931 	_NOTE(NO_COMPETING_THREADS_NOW);
8932 
8933 	mutex_enter(&un->un_pm_mutex);
8934 	if (un->un_pm_idle_timeid != NULL) {
8935 		timeout_id_t temp_id = un->un_pm_idle_timeid;
8936 		un->un_pm_idle_timeid = NULL;
8937 		mutex_exit(&un->un_pm_mutex);
8938 
8939 		/*
8940 		 * Timeout is active; cancel it.
8941 		 * Note that it'll never be active on a device
8942 		 * that does not support PM therefore we don't
8943 		 * have to check before calling pm_idle_component.
8944 		 */
8945 		(void) untimeout(temp_id);
8946 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8947 		mutex_enter(&un->un_pm_mutex);
8948 	}
8949 
8950 	/*
8951 	 * Check whether there is already a timeout scheduled for power
8952 	 * management. If yes then don't lower the power here, that's.
8953 	 * the timeout handler's job.
8954 	 */
8955 	if (un->un_pm_timeid != NULL) {
8956 		timeout_id_t temp_id = un->un_pm_timeid;
8957 		un->un_pm_timeid = NULL;
8958 		mutex_exit(&un->un_pm_mutex);
8959 		/*
8960 		 * Timeout is active; cancel it.
8961 		 * Note that it'll never be active on a device
8962 		 * that does not support PM therefore we don't
8963 		 * have to check before calling pm_idle_component.
8964 		 */
8965 		(void) untimeout(temp_id);
8966 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8967 
8968 	} else {
8969 		mutex_exit(&un->un_pm_mutex);
8970 		if ((un->un_f_pm_is_enabled == TRUE) &&
8971 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_SPINDLE_OFF) !=
8972 		    DDI_SUCCESS)) {
8973 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8974 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
8975 			/*
8976 			 * Fix for bug: 4297749, item # 13
8977 			 * The above test now includes a check to see if PM is
8978 			 * supported by this device before call
8979 			 * pm_lower_power().
8980 			 * Note, the following is not dead code. The call to
8981 			 * pm_lower_power above will generate a call back into
8982 			 * our sdpower routine which might result in a timeout
8983 			 * handler getting activated. Therefore the following
8984 			 * code is valid and necessary.
8985 			 */
8986 			mutex_enter(&un->un_pm_mutex);
8987 			if (un->un_pm_timeid != NULL) {
8988 				timeout_id_t temp_id = un->un_pm_timeid;
8989 				un->un_pm_timeid = NULL;
8990 				mutex_exit(&un->un_pm_mutex);
8991 				(void) untimeout(temp_id);
8992 				(void) pm_idle_component(SD_DEVINFO(un), 0);
8993 			} else {
8994 				mutex_exit(&un->un_pm_mutex);
8995 			}
8996 		}
8997 	}
8998 
8999 	/*
9000 	 * Cleanup from the scsi_ifsetcap() calls (437868)
9001 	 * Relocated here from above to be after the call to
9002 	 * pm_lower_power, which was getting errors.
9003 	 */
9004 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
9005 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
9006 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
9007 
9008 	if (un->un_f_is_fibre == FALSE) {
9009 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
9010 	}
9011 
9012 	/*
9013 	 * Remove any event callbacks, fibre only
9014 	 */
9015 	if (un->un_f_is_fibre == TRUE) {
9016 		if ((un->un_insert_event != NULL) &&
9017 			(ddi_remove_event_handler(un->un_insert_cb_id) !=
9018 				DDI_SUCCESS)) {
9019 			/*
9020 			 * Note: We are returning here after having done
9021 			 * substantial cleanup above. This is consistent
9022 			 * with the legacy implementation but this may not
9023 			 * be the right thing to do.
9024 			 */
9025 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9026 				"sd_dr_detach: Cannot cancel insert event\n");
9027 			goto err_remove_event;
9028 		}
9029 		un->un_insert_event = NULL;
9030 
9031 		if ((un->un_remove_event != NULL) &&
9032 			(ddi_remove_event_handler(un->un_remove_cb_id) !=
9033 				DDI_SUCCESS)) {
9034 			/*
9035 			 * Note: We are returning here after having done
9036 			 * substantial cleanup above. This is consistent
9037 			 * with the legacy implementation but this may not
9038 			 * be the right thing to do.
9039 			 */
9040 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9041 				"sd_dr_detach: Cannot cancel remove event\n");
9042 			goto err_remove_event;
9043 		}
9044 		un->un_remove_event = NULL;
9045 	}
9046 
9047 	/* Do not free the softstate if the callback routine is active */
9048 	sd_sync_with_callback(un);
9049 
9050 	/*
9051 	 * Hold the detach mutex here, to make sure that no other threads ever
9052 	 * can access a (partially) freed soft state structure.
9053 	 */
9054 	mutex_enter(&sd_detach_mutex);
9055 
9056 	/*
9057 	 * Clean up the soft state struct.
9058 	 * Cleanup is done in reverse order of allocs/inits.
9059 	 * At this point there should be no competing threads anymore.
9060 	 */
9061 
9062 	/* Unregister and free device id. */
9063 	ddi_devid_unregister(devi);
9064 	if (un->un_devid) {
9065 		ddi_devid_free(un->un_devid);
9066 		un->un_devid = NULL;
9067 	}
9068 
9069 	/*
9070 	 * Destroy wmap cache if it exists.
9071 	 */
9072 	if (un->un_wm_cache != NULL) {
9073 		kmem_cache_destroy(un->un_wm_cache);
9074 		un->un_wm_cache = NULL;
9075 	}
9076 
9077 	/* Remove minor nodes */
9078 	ddi_remove_minor_node(devi, NULL);
9079 
9080 	/*
9081 	 * kstat cleanup is done in detach for all device types (4363169).
9082 	 * We do not want to fail detach if the device kstats are not deleted
9083 	 * since there is a confusion about the devo_refcnt for the device.
9084 	 * We just delete the kstats and let detach complete successfully.
9085 	 */
9086 	if (un->un_stats != NULL) {
9087 		kstat_delete(un->un_stats);
9088 		un->un_stats = NULL;
9089 	}
9090 	if (un->un_errstats != NULL) {
9091 		kstat_delete(un->un_errstats);
9092 		un->un_errstats = NULL;
9093 	}
9094 
9095 	/* Remove partition stats (not created for removables) */
9096 	if (!ISREMOVABLE(un)) {
9097 		for (i = 0; i < NSDMAP; i++) {
9098 			if (un->un_pstats[i] != NULL) {
9099 				kstat_delete(un->un_pstats[i]);
9100 				un->un_pstats[i] = NULL;
9101 			}
9102 		}
9103 	}
9104 
9105 	/* Remove xbuf registration */
9106 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
9107 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
9108 
9109 	/* Remove driver properties */
9110 	ddi_prop_remove_all(devi);
9111 
9112 	mutex_destroy(&un->un_pm_mutex);
9113 	cv_destroy(&un->un_pm_busy_cv);
9114 
9115 	/* Open/close semaphore */
9116 	sema_destroy(&un->un_semoclose);
9117 
9118 	/* Removable media condvar. */
9119 	cv_destroy(&un->un_state_cv);
9120 
9121 	/* Suspend/resume condvar. */
9122 	cv_destroy(&un->un_suspend_cv);
9123 	cv_destroy(&un->un_disk_busy_cv);
9124 
9125 	sd_free_rqs(un);
9126 
9127 	/* Free up soft state */
9128 	devp->sd_private = NULL;
9129 	bzero(un, sizeof (struct sd_lun));
9130 	ddi_soft_state_free(sd_state, instance);
9131 
9132 	mutex_exit(&sd_detach_mutex);
9133 
9134 	/* This frees up the INQUIRY data associated with the device. */
9135 	scsi_unprobe(devp);
9136 
9137 	return (DDI_SUCCESS);
9138 
9139 err_notclosed:
9140 	mutex_exit(SD_MUTEX(un));
9141 
9142 err_stillbusy:
9143 	_NOTE(NO_COMPETING_THREADS_NOW);
9144 
9145 err_remove_event:
9146 	mutex_enter(&sd_detach_mutex);
9147 	un->un_detach_count--;
9148 	mutex_exit(&sd_detach_mutex);
9149 
9150 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
9151 	return (DDI_FAILURE);
9152 }
9153 
9154 
9155 /*
9156  * Driver minor node structure and data table
9157  */
9158 struct driver_minor_data {
9159 	char	*name;
9160 	minor_t	minor;
9161 	int	type;
9162 };
9163 
9164 static struct driver_minor_data sd_minor_data[] = {
9165 	{"a", 0, S_IFBLK},
9166 	{"b", 1, S_IFBLK},
9167 	{"c", 2, S_IFBLK},
9168 	{"d", 3, S_IFBLK},
9169 	{"e", 4, S_IFBLK},
9170 	{"f", 5, S_IFBLK},
9171 	{"g", 6, S_IFBLK},
9172 	{"h", 7, S_IFBLK},
9173 #if defined(_SUNOS_VTOC_16)
9174 	{"i", 8, S_IFBLK},
9175 	{"j", 9, S_IFBLK},
9176 	{"k", 10, S_IFBLK},
9177 	{"l", 11, S_IFBLK},
9178 	{"m", 12, S_IFBLK},
9179 	{"n", 13, S_IFBLK},
9180 	{"o", 14, S_IFBLK},
9181 	{"p", 15, S_IFBLK},
9182 #endif			/* defined(_SUNOS_VTOC_16) */
9183 #if defined(_FIRMWARE_NEEDS_FDISK)
9184 	{"q", 16, S_IFBLK},
9185 	{"r", 17, S_IFBLK},
9186 	{"s", 18, S_IFBLK},
9187 	{"t", 19, S_IFBLK},
9188 	{"u", 20, S_IFBLK},
9189 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9190 	{"a,raw", 0, S_IFCHR},
9191 	{"b,raw", 1, S_IFCHR},
9192 	{"c,raw", 2, S_IFCHR},
9193 	{"d,raw", 3, S_IFCHR},
9194 	{"e,raw", 4, S_IFCHR},
9195 	{"f,raw", 5, S_IFCHR},
9196 	{"g,raw", 6, S_IFCHR},
9197 	{"h,raw", 7, S_IFCHR},
9198 #if defined(_SUNOS_VTOC_16)
9199 	{"i,raw", 8, S_IFCHR},
9200 	{"j,raw", 9, S_IFCHR},
9201 	{"k,raw", 10, S_IFCHR},
9202 	{"l,raw", 11, S_IFCHR},
9203 	{"m,raw", 12, S_IFCHR},
9204 	{"n,raw", 13, S_IFCHR},
9205 	{"o,raw", 14, S_IFCHR},
9206 	{"p,raw", 15, S_IFCHR},
9207 #endif			/* defined(_SUNOS_VTOC_16) */
9208 #if defined(_FIRMWARE_NEEDS_FDISK)
9209 	{"q,raw", 16, S_IFCHR},
9210 	{"r,raw", 17, S_IFCHR},
9211 	{"s,raw", 18, S_IFCHR},
9212 	{"t,raw", 19, S_IFCHR},
9213 	{"u,raw", 20, S_IFCHR},
9214 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9215 	{0}
9216 };
9217 
9218 static struct driver_minor_data sd_minor_data_efi[] = {
9219 	{"a", 0, S_IFBLK},
9220 	{"b", 1, S_IFBLK},
9221 	{"c", 2, S_IFBLK},
9222 	{"d", 3, S_IFBLK},
9223 	{"e", 4, S_IFBLK},
9224 	{"f", 5, S_IFBLK},
9225 	{"g", 6, S_IFBLK},
9226 	{"wd", 7, S_IFBLK},
9227 #if defined(_FIRMWARE_NEEDS_FDISK)
9228 	{"q", 16, S_IFBLK},
9229 	{"r", 17, S_IFBLK},
9230 	{"s", 18, S_IFBLK},
9231 	{"t", 19, S_IFBLK},
9232 	{"u", 20, S_IFBLK},
9233 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9234 	{"a,raw", 0, S_IFCHR},
9235 	{"b,raw", 1, S_IFCHR},
9236 	{"c,raw", 2, S_IFCHR},
9237 	{"d,raw", 3, S_IFCHR},
9238 	{"e,raw", 4, S_IFCHR},
9239 	{"f,raw", 5, S_IFCHR},
9240 	{"g,raw", 6, S_IFCHR},
9241 	{"wd,raw", 7, S_IFCHR},
9242 #if defined(_FIRMWARE_NEEDS_FDISK)
9243 	{"q,raw", 16, S_IFCHR},
9244 	{"r,raw", 17, S_IFCHR},
9245 	{"s,raw", 18, S_IFCHR},
9246 	{"t,raw", 19, S_IFCHR},
9247 	{"u,raw", 20, S_IFCHR},
9248 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9249 	{0}
9250 };
9251 
9252 
9253 /*
9254  *    Function: sd_create_minor_nodes
9255  *
9256  * Description: Create the minor device nodes for the instance.
9257  *
9258  *   Arguments: un - driver soft state (unit) structure
9259  *		devi - pointer to device info structure
9260  *
9261  * Return Code: DDI_SUCCESS
9262  *		DDI_FAILURE
9263  *
9264  *     Context: Kernel thread context
9265  */
9266 
9267 static int
9268 sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi)
9269 {
9270 	struct driver_minor_data	*dmdp;
9271 	struct scsi_device		*devp;
9272 	int				instance;
9273 	char				name[48];
9274 
9275 	ASSERT(un != NULL);
9276 	devp = ddi_get_driver_private(devi);
9277 	instance = ddi_get_instance(devp->sd_dev);
9278 
9279 	/*
9280 	 * Create all the minor nodes for this target.
9281 	 */
9282 	if (un->un_blockcount > DK_MAX_BLOCKS)
9283 		dmdp = sd_minor_data_efi;
9284 	else
9285 		dmdp = sd_minor_data;
9286 	while (dmdp->name != NULL) {
9287 
9288 		(void) sprintf(name, "%s", dmdp->name);
9289 
9290 		if (ddi_create_minor_node(devi, name, dmdp->type,
9291 		    (instance << SDUNIT_SHIFT) | dmdp->minor,
9292 		    un->un_node_type, NULL) == DDI_FAILURE) {
9293 			/*
9294 			 * Clean up any nodes that may have been created, in
9295 			 * case this fails in the middle of the loop.
9296 			 */
9297 			ddi_remove_minor_node(devi, NULL);
9298 			return (DDI_FAILURE);
9299 		}
9300 		dmdp++;
9301 	}
9302 
9303 	return (DDI_SUCCESS);
9304 }
9305 
9306 
9307 /*
9308  *    Function: sd_create_errstats
9309  *
9310  * Description: This routine instantiates the device error stats.
9311  *
9312  *		Note: During attach the stats are instantiated first so they are
9313  *		available for attach-time routines that utilize the driver
9314  *		iopath to send commands to the device. The stats are initialized
9315  *		separately so data obtained during some attach-time routines is
9316  *		available. (4362483)
9317  *
9318  *   Arguments: un - driver soft state (unit) structure
9319  *		instance - driver instance
9320  *
9321  *     Context: Kernel thread context
9322  */
9323 
9324 static void
9325 sd_create_errstats(struct sd_lun *un, int instance)
9326 {
9327 	struct	sd_errstats	*stp;
9328 	char	kstatmodule_err[KSTAT_STRLEN];
9329 	char	kstatname[KSTAT_STRLEN];
9330 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
9331 
9332 	ASSERT(un != NULL);
9333 
9334 	if (un->un_errstats != NULL) {
9335 		return;
9336 	}
9337 
9338 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
9339 	    "%serr", sd_label);
9340 	(void) snprintf(kstatname, sizeof (kstatname),
9341 	    "%s%d,err", sd_label, instance);
9342 
9343 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
9344 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
9345 
9346 	if (un->un_errstats == NULL) {
9347 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9348 		    "sd_create_errstats: Failed kstat_create\n");
9349 		return;
9350 	}
9351 
9352 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9353 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
9354 	    KSTAT_DATA_UINT32);
9355 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
9356 	    KSTAT_DATA_UINT32);
9357 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
9358 	    KSTAT_DATA_UINT32);
9359 	kstat_named_init(&stp->sd_vid,		"Vendor",
9360 	    KSTAT_DATA_CHAR);
9361 	kstat_named_init(&stp->sd_pid,		"Product",
9362 	    KSTAT_DATA_CHAR);
9363 	kstat_named_init(&stp->sd_revision,	"Revision",
9364 	    KSTAT_DATA_CHAR);
9365 	kstat_named_init(&stp->sd_serial,	"Serial No",
9366 	    KSTAT_DATA_CHAR);
9367 	kstat_named_init(&stp->sd_capacity,	"Size",
9368 	    KSTAT_DATA_ULONGLONG);
9369 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
9370 	    KSTAT_DATA_UINT32);
9371 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
9372 	    KSTAT_DATA_UINT32);
9373 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
9374 	    KSTAT_DATA_UINT32);
9375 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
9376 	    KSTAT_DATA_UINT32);
9377 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
9378 	    KSTAT_DATA_UINT32);
9379 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
9380 	    KSTAT_DATA_UINT32);
9381 
9382 	un->un_errstats->ks_private = un;
9383 	un->un_errstats->ks_update  = nulldev;
9384 
9385 	kstat_install(un->un_errstats);
9386 }
9387 
9388 
9389 /*
9390  *    Function: sd_set_errstats
9391  *
9392  * Description: This routine sets the value of the vendor id, product id,
9393  *		revision, serial number, and capacity device error stats.
9394  *
9395  *		Note: During attach the stats are instantiated first so they are
9396  *		available for attach-time routines that utilize the driver
9397  *		iopath to send commands to the device. The stats are initialized
9398  *		separately so data obtained during some attach-time routines is
9399  *		available. (4362483)
9400  *
9401  *   Arguments: un - driver soft state (unit) structure
9402  *
9403  *     Context: Kernel thread context
9404  */
9405 
9406 static void
9407 sd_set_errstats(struct sd_lun *un)
9408 {
9409 	struct	sd_errstats	*stp;
9410 
9411 	ASSERT(un != NULL);
9412 	ASSERT(un->un_errstats != NULL);
9413 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9414 	ASSERT(stp != NULL);
9415 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
9416 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
9417 	(void) strncpy(stp->sd_revision.value.c,
9418 	    un->un_sd->sd_inq->inq_revision, 4);
9419 
9420 	/*
9421 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
9422 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
9423 	 * (4376302))
9424 	 */
9425 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
9426 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9427 		    sizeof (SD_INQUIRY(un)->inq_serial));
9428 	}
9429 
9430 	if (un->un_f_blockcount_is_valid != TRUE) {
9431 		/*
9432 		 * Set capacity error stat to 0 for no media. This ensures
9433 		 * a valid capacity is displayed in response to 'iostat -E'
9434 		 * when no media is present in the device.
9435 		 */
9436 		stp->sd_capacity.value.ui64 = 0;
9437 	} else {
9438 		/*
9439 		 * Multiply un_blockcount by un->un_sys_blocksize to get
9440 		 * capacity.
9441 		 *
9442 		 * Note: for non-512 blocksize devices "un_blockcount" has been
9443 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
9444 		 * (un_tgt_blocksize / un->un_sys_blocksize).
9445 		 */
9446 		stp->sd_capacity.value.ui64 = (uint64_t)
9447 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
9448 	}
9449 }
9450 
9451 
9452 /*
9453  *    Function: sd_set_pstats
9454  *
9455  * Description: This routine instantiates and initializes the partition
9456  *              stats for each partition with more than zero blocks.
9457  *		(4363169)
9458  *
9459  *   Arguments: un - driver soft state (unit) structure
9460  *
9461  *     Context: Kernel thread context
9462  */
9463 
9464 static void
9465 sd_set_pstats(struct sd_lun *un)
9466 {
9467 	char	kstatname[KSTAT_STRLEN];
9468 	int	instance;
9469 	int	i;
9470 
9471 	ASSERT(un != NULL);
9472 
9473 	instance = ddi_get_instance(SD_DEVINFO(un));
9474 
9475 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
9476 	for (i = 0; i < NSDMAP; i++) {
9477 		if ((un->un_pstats[i] == NULL) &&
9478 		    (un->un_map[i].dkl_nblk != 0)) {
9479 			(void) snprintf(kstatname, sizeof (kstatname),
9480 			    "%s%d,%s", sd_label, instance,
9481 			    sd_minor_data[i].name);
9482 			un->un_pstats[i] = kstat_create(sd_label,
9483 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
9484 			    1, KSTAT_FLAG_PERSISTENT);
9485 			if (un->un_pstats[i] != NULL) {
9486 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
9487 				kstat_install(un->un_pstats[i]);
9488 			}
9489 		}
9490 	}
9491 }
9492 
9493 
9494 #if (defined(__fibre))
9495 /*
9496  *    Function: sd_init_event_callbacks
9497  *
9498  * Description: This routine initializes the insertion and removal event
9499  *		callbacks. (fibre only)
9500  *
9501  *   Arguments: un - driver soft state (unit) structure
9502  *
9503  *     Context: Kernel thread context
9504  */
9505 
9506 static void
9507 sd_init_event_callbacks(struct sd_lun *un)
9508 {
9509 	ASSERT(un != NULL);
9510 
9511 	if ((un->un_insert_event == NULL) &&
9512 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
9513 	    &un->un_insert_event) == DDI_SUCCESS)) {
9514 		/*
9515 		 * Add the callback for an insertion event
9516 		 */
9517 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9518 		    un->un_insert_event, sd_event_callback, (void *)un,
9519 		    &(un->un_insert_cb_id));
9520 	}
9521 
9522 	if ((un->un_remove_event == NULL) &&
9523 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
9524 	    &un->un_remove_event) == DDI_SUCCESS)) {
9525 		/*
9526 		 * Add the callback for a removal event
9527 		 */
9528 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9529 		    un->un_remove_event, sd_event_callback, (void *)un,
9530 		    &(un->un_remove_cb_id));
9531 	}
9532 }
9533 
9534 
9535 /*
9536  *    Function: sd_event_callback
9537  *
9538  * Description: This routine handles insert/remove events (photon). The
9539  *		state is changed to OFFLINE which can be used to supress
9540  *		error msgs. (fibre only)
9541  *
9542  *   Arguments: un - driver soft state (unit) structure
9543  *
9544  *     Context: Callout thread context
9545  */
9546 /* ARGSUSED */
9547 static void
9548 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
9549     void *bus_impldata)
9550 {
9551 	struct sd_lun *un = (struct sd_lun *)arg;
9552 
9553 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
9554 	if (event == un->un_insert_event) {
9555 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
9556 		mutex_enter(SD_MUTEX(un));
9557 		if (un->un_state == SD_STATE_OFFLINE) {
9558 			if (un->un_last_state != SD_STATE_SUSPENDED) {
9559 				un->un_state = un->un_last_state;
9560 			} else {
9561 				/*
9562 				 * We have gone through SUSPEND/RESUME while
9563 				 * we were offline. Restore the last state
9564 				 */
9565 				un->un_state = un->un_save_state;
9566 			}
9567 		}
9568 		mutex_exit(SD_MUTEX(un));
9569 
9570 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
9571 	} else if (event == un->un_remove_event) {
9572 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
9573 		mutex_enter(SD_MUTEX(un));
9574 		/*
9575 		 * We need to handle an event callback that occurs during
9576 		 * the suspend operation, since we don't prevent it.
9577 		 */
9578 		if (un->un_state != SD_STATE_OFFLINE) {
9579 			if (un->un_state != SD_STATE_SUSPENDED) {
9580 				New_state(un, SD_STATE_OFFLINE);
9581 			} else {
9582 				un->un_last_state = SD_STATE_OFFLINE;
9583 			}
9584 		}
9585 		mutex_exit(SD_MUTEX(un));
9586 	} else {
9587 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
9588 		    "!Unknown event\n");
9589 	}
9590 
9591 }
9592 #endif
9593 
9594 
9595 /*
9596  *    Function: sd_disable_caching()
9597  *
9598  * Description: This routine is the driver entry point for disabling
9599  *		read and write caching by modifying the WCE (write cache
9600  *		enable) and RCD (read cache disable) bits of mode
9601  *		page 8 (MODEPAGE_CACHING).
9602  *
9603  *   Arguments: un - driver soft state (unit) structure
9604  *
9605  * Return Code: EIO
9606  *		code returned by sd_send_scsi_MODE_SENSE and
9607  *		sd_send_scsi_MODE_SELECT
9608  *
9609  *     Context: Kernel Thread
9610  */
9611 
9612 static int
9613 sd_disable_caching(struct sd_lun *un)
9614 {
9615 	struct mode_caching	*mode_caching_page;
9616 	uchar_t			*header;
9617 	size_t			buflen;
9618 	int			hdrlen;
9619 	int			bd_len;
9620 	int			rval = 0;
9621 
9622 	ASSERT(un != NULL);
9623 
9624 	/*
9625 	 * Do a test unit ready, otherwise a mode sense may not work if this
9626 	 * is the first command sent to the device after boot.
9627 	 */
9628 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9629 
9630 	if (un->un_f_cfg_is_atapi == TRUE) {
9631 		hdrlen = MODE_HEADER_LENGTH_GRP2;
9632 	} else {
9633 		hdrlen = MODE_HEADER_LENGTH;
9634 	}
9635 
9636 	/*
9637 	 * Allocate memory for the retrieved mode page and its headers.  Set
9638 	 * a pointer to the page itself.
9639 	 */
9640 	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
9641 	header = kmem_zalloc(buflen, KM_SLEEP);
9642 
9643 	/* Get the information from the device. */
9644 	if (un->un_f_cfg_is_atapi == TRUE) {
9645 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
9646 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9647 	} else {
9648 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
9649 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9650 	}
9651 	if (rval != 0) {
9652 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9653 		    "sd_disable_caching: Mode Sense Failed\n");
9654 		kmem_free(header, buflen);
9655 		return (rval);
9656 	}
9657 
9658 	/*
9659 	 * Determine size of Block Descriptors in order to locate
9660 	 * the mode page data. ATAPI devices return 0, SCSI devices
9661 	 * should return MODE_BLK_DESC_LENGTH.
9662 	 */
9663 	if (un->un_f_cfg_is_atapi == TRUE) {
9664 		struct mode_header_grp2	*mhp;
9665 		mhp	= (struct mode_header_grp2 *)header;
9666 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9667 	} else {
9668 		bd_len  = ((struct mode_header *)header)->bdesc_length;
9669 	}
9670 
9671 	if (bd_len > MODE_BLK_DESC_LENGTH) {
9672 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9673 		    "sd_disable_caching: Mode Sense returned invalid "
9674 		    "block descriptor length\n");
9675 		kmem_free(header, buflen);
9676 		return (EIO);
9677 	}
9678 
9679 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9680 
9681 	/* Check the relevant bits on successful mode sense. */
9682 	if ((mode_caching_page->wce) || !(mode_caching_page->rcd)) {
9683 		/*
9684 		 * Read or write caching is enabled.  Disable both of them.
9685 		 */
9686 		mode_caching_page->wce = 0;
9687 		mode_caching_page->rcd = 1;
9688 
9689 		/* Clear reserved bits before mode select. */
9690 		mode_caching_page->mode_page.ps = 0;
9691 
9692 		/*
9693 		 * Clear out mode header for mode select.
9694 		 * The rest of the retrieved page will be reused.
9695 		 */
9696 		bzero(header, hdrlen);
9697 
9698 		/* Change the cache page to disable all caching. */
9699 		if (un->un_f_cfg_is_atapi == TRUE) {
9700 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, header,
9701 			    buflen, SD_SAVE_PAGE, SD_PATH_DIRECT);
9702 		} else {
9703 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
9704 			    buflen, SD_SAVE_PAGE, SD_PATH_DIRECT);
9705 		}
9706 	}
9707 
9708 	kmem_free(header, buflen);
9709 	return (rval);
9710 }
9711 
9712 
9713 /*
9714  *    Function: sd_make_device
9715  *
9716  * Description: Utility routine to return the Solaris device number from
9717  *		the data in the device's dev_info structure.
9718  *
9719  * Return Code: The Solaris device number
9720  *
9721  *     Context: Any
9722  */
9723 
9724 static dev_t
9725 sd_make_device(dev_info_t *devi)
9726 {
9727 	return (makedevice(ddi_name_to_major(ddi_get_name(devi)),
9728 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
9729 }
9730 
9731 
9732 /*
9733  *    Function: sd_pm_entry
9734  *
9735  * Description: Called at the start of a new command to manage power
9736  *		and busy status of a device. This includes determining whether
9737  *		the current power state of the device is sufficient for
9738  *		performing the command or whether it must be changed.
9739  *		The PM framework is notified appropriately.
9740  *		Only with a return status of DDI_SUCCESS will the
9741  *		component be busy to the framework.
9742  *
9743  *		All callers of sd_pm_entry must check the return status
9744  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
9745  *		of DDI_FAILURE indicates the device failed to power up.
9746  *		In this case un_pm_count has been adjusted so the result
9747  *		on exit is still powered down, ie. count is less than 0.
9748  *		Calling sd_pm_exit with this count value hits an ASSERT.
9749  *
9750  * Return Code: DDI_SUCCESS or DDI_FAILURE
9751  *
9752  *     Context: Kernel thread context.
9753  */
9754 
9755 static int
9756 sd_pm_entry(struct sd_lun *un)
9757 {
9758 	int return_status = DDI_SUCCESS;
9759 
9760 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9761 	ASSERT(!mutex_owned(&un->un_pm_mutex));
9762 
9763 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
9764 
9765 	if (un->un_f_pm_is_enabled == FALSE) {
9766 		SD_TRACE(SD_LOG_IO_PM, un,
9767 		    "sd_pm_entry: exiting, PM not enabled\n");
9768 		return (return_status);
9769 	}
9770 
9771 	/*
9772 	 * Just increment a counter if PM is enabled. On the transition from
9773 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
9774 	 * the count with each IO and mark the device as idle when the count
9775 	 * hits 0.
9776 	 *
9777 	 * If the count is less than 0 the device is powered down. If a powered
9778 	 * down device is successfully powered up then the count must be
9779 	 * incremented to reflect the power up. Note that it'll get incremented
9780 	 * a second time to become busy.
9781 	 *
9782 	 * Because the following has the potential to change the device state
9783 	 * and must release the un_pm_mutex to do so, only one thread can be
9784 	 * allowed through at a time.
9785 	 */
9786 
9787 	mutex_enter(&un->un_pm_mutex);
9788 	while (un->un_pm_busy == TRUE) {
9789 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
9790 	}
9791 	un->un_pm_busy = TRUE;
9792 
9793 	if (un->un_pm_count < 1) {
9794 
9795 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
9796 
9797 		/*
9798 		 * Indicate we are now busy so the framework won't attempt to
9799 		 * power down the device. This call will only fail if either
9800 		 * we passed a bad component number or the device has no
9801 		 * components. Neither of these should ever happen.
9802 		 */
9803 		mutex_exit(&un->un_pm_mutex);
9804 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
9805 		ASSERT(return_status == DDI_SUCCESS);
9806 
9807 		mutex_enter(&un->un_pm_mutex);
9808 
9809 		if (un->un_pm_count < 0) {
9810 			mutex_exit(&un->un_pm_mutex);
9811 
9812 			SD_TRACE(SD_LOG_IO_PM, un,
9813 			    "sd_pm_entry: power up component\n");
9814 
9815 			/*
9816 			 * pm_raise_power will cause sdpower to be called
9817 			 * which brings the device power level to the
9818 			 * desired state, ON in this case. If successful,
9819 			 * un_pm_count and un_power_level will be updated
9820 			 * appropriately.
9821 			 */
9822 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
9823 			    SD_SPINDLE_ON);
9824 
9825 			mutex_enter(&un->un_pm_mutex);
9826 
9827 			if (return_status != DDI_SUCCESS) {
9828 				/*
9829 				 * Power up failed.
9830 				 * Idle the device and adjust the count
9831 				 * so the result on exit is that we're
9832 				 * still powered down, ie. count is less than 0.
9833 				 */
9834 				SD_TRACE(SD_LOG_IO_PM, un,
9835 				    "sd_pm_entry: power up failed,"
9836 				    " idle the component\n");
9837 
9838 				(void) pm_idle_component(SD_DEVINFO(un), 0);
9839 				un->un_pm_count--;
9840 			} else {
9841 				/*
9842 				 * Device is powered up, verify the
9843 				 * count is non-negative.
9844 				 * This is debug only.
9845 				 */
9846 				ASSERT(un->un_pm_count == 0);
9847 			}
9848 		}
9849 
9850 		if (return_status == DDI_SUCCESS) {
9851 			/*
9852 			 * For performance, now that the device has been tagged
9853 			 * as busy, and it's known to be powered up, update the
9854 			 * chain types to use jump tables that do not include
9855 			 * pm. This significantly lowers the overhead and
9856 			 * therefore improves performance.
9857 			 */
9858 
9859 			mutex_exit(&un->un_pm_mutex);
9860 			mutex_enter(SD_MUTEX(un));
9861 			SD_TRACE(SD_LOG_IO_PM, un,
9862 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
9863 			    un->un_uscsi_chain_type);
9864 
9865 			if (ISREMOVABLE(un)) {
9866 				un->un_buf_chain_type =
9867 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
9868 			} else {
9869 				un->un_buf_chain_type =
9870 				    SD_CHAIN_INFO_DISK_NO_PM;
9871 			}
9872 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
9873 
9874 			SD_TRACE(SD_LOG_IO_PM, un,
9875 			    "             changed  uscsi_chain_type to   %d\n",
9876 			    un->un_uscsi_chain_type);
9877 			mutex_exit(SD_MUTEX(un));
9878 			mutex_enter(&un->un_pm_mutex);
9879 
9880 			if (un->un_pm_idle_timeid == NULL) {
9881 				/* 300 ms. */
9882 				un->un_pm_idle_timeid =
9883 				    timeout(sd_pm_idletimeout_handler, un,
9884 				    (drv_usectohz((clock_t)300000)));
9885 				/*
9886 				 * Include an extra call to busy which keeps the
9887 				 * device busy with-respect-to the PM layer
9888 				 * until the timer fires, at which time it'll
9889 				 * get the extra idle call.
9890 				 */
9891 				(void) pm_busy_component(SD_DEVINFO(un), 0);
9892 			}
9893 		}
9894 	}
9895 	un->un_pm_busy = FALSE;
9896 	/* Next... */
9897 	cv_signal(&un->un_pm_busy_cv);
9898 
9899 	un->un_pm_count++;
9900 
9901 	SD_TRACE(SD_LOG_IO_PM, un,
9902 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
9903 
9904 	mutex_exit(&un->un_pm_mutex);
9905 
9906 	return (return_status);
9907 }
9908 
9909 
9910 /*
9911  *    Function: sd_pm_exit
9912  *
9913  * Description: Called at the completion of a command to manage busy
9914  *		status for the device. If the device becomes idle the
9915  *		PM framework is notified.
9916  *
9917  *     Context: Kernel thread context
9918  */
9919 
9920 static void
9921 sd_pm_exit(struct sd_lun *un)
9922 {
9923 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9924 	ASSERT(!mutex_owned(&un->un_pm_mutex));
9925 
9926 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
9927 
9928 	/*
9929 	 * After attach the following flag is only read, so don't
9930 	 * take the penalty of acquiring a mutex for it.
9931 	 */
9932 	if (un->un_f_pm_is_enabled == TRUE) {
9933 
9934 		mutex_enter(&un->un_pm_mutex);
9935 		un->un_pm_count--;
9936 
9937 		SD_TRACE(SD_LOG_IO_PM, un,
9938 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
9939 
9940 		ASSERT(un->un_pm_count >= 0);
9941 		if (un->un_pm_count == 0) {
9942 			mutex_exit(&un->un_pm_mutex);
9943 
9944 			SD_TRACE(SD_LOG_IO_PM, un,
9945 			    "sd_pm_exit: idle component\n");
9946 
9947 			(void) pm_idle_component(SD_DEVINFO(un), 0);
9948 
9949 		} else {
9950 			mutex_exit(&un->un_pm_mutex);
9951 		}
9952 	}
9953 
9954 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
9955 }
9956 
9957 
9958 /*
9959  *    Function: sdopen
9960  *
9961  * Description: Driver's open(9e) entry point function.
9962  *
9963  *   Arguments: dev_i   - pointer to device number
9964  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
9965  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
9966  *		cred_p  - user credential pointer
9967  *
9968  * Return Code: EINVAL
9969  *		ENXIO
9970  *		EIO
9971  *		EROFS
9972  *		EBUSY
9973  *
9974  *     Context: Kernel thread context
9975  */
9976 /* ARGSUSED */
9977 static int
9978 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
9979 {
9980 	struct sd_lun	*un;
9981 	int		nodelay;
9982 	int		part;
9983 	uint64_t	partmask;
9984 	int		instance;
9985 	dev_t		dev;
9986 	int		rval = EIO;
9987 
9988 	/* Validate the open type */
9989 	if (otyp >= OTYPCNT) {
9990 		return (EINVAL);
9991 	}
9992 
9993 	dev = *dev_p;
9994 	instance = SDUNIT(dev);
9995 	mutex_enter(&sd_detach_mutex);
9996 
9997 	/*
9998 	 * Fail the open if there is no softstate for the instance, or
9999 	 * if another thread somewhere is trying to detach the instance.
10000 	 */
10001 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
10002 	    (un->un_detach_count != 0)) {
10003 		mutex_exit(&sd_detach_mutex);
10004 		/*
10005 		 * The probe cache only needs to be cleared when open (9e) fails
10006 		 * with ENXIO (4238046).
10007 		 */
10008 		/*
10009 		 * un-conditionally clearing probe cache is ok with
10010 		 * separate sd/ssd binaries
10011 		 * x86 platform can be an issue with both parallel
10012 		 * and fibre in 1 binary
10013 		 */
10014 		sd_scsi_clear_probe_cache();
10015 		return (ENXIO);
10016 	}
10017 
10018 	/*
10019 	 * The un_layer_count is to prevent another thread in specfs from
10020 	 * trying to detach the instance, which can happen when we are
10021 	 * called from a higher-layer driver instead of thru specfs.
10022 	 * This will not be needed when DDI provides a layered driver
10023 	 * interface that allows specfs to know that an instance is in
10024 	 * use by a layered driver & should not be detached.
10025 	 *
10026 	 * Note: the semantics for layered driver opens are exactly one
10027 	 * close for every open.
10028 	 */
10029 	if (otyp == OTYP_LYR) {
10030 		un->un_layer_count++;
10031 	}
10032 
10033 	/*
10034 	 * Keep a count of the current # of opens in progress. This is because
10035 	 * some layered drivers try to call us as a regular open. This can
10036 	 * cause problems that we cannot prevent, however by keeping this count
10037 	 * we can at least keep our open and detach routines from racing against
10038 	 * each other under such conditions.
10039 	 */
10040 	un->un_opens_in_progress++;
10041 	mutex_exit(&sd_detach_mutex);
10042 
10043 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
10044 	part	 = SDPART(dev);
10045 	partmask = 1 << part;
10046 
10047 	/*
10048 	 * We use a semaphore here in order to serialize
10049 	 * open and close requests on the device.
10050 	 */
10051 	sema_p(&un->un_semoclose);
10052 
10053 	mutex_enter(SD_MUTEX(un));
10054 
10055 	/*
10056 	 * All device accesses go thru sdstrategy() where we check
10057 	 * on suspend status but there could be a scsi_poll command,
10058 	 * which bypasses sdstrategy(), so we need to check pm
10059 	 * status.
10060 	 */
10061 
10062 	if (!nodelay) {
10063 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10064 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10065 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10066 		}
10067 
10068 		mutex_exit(SD_MUTEX(un));
10069 		if (sd_pm_entry(un) != DDI_SUCCESS) {
10070 			rval = EIO;
10071 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
10072 			    "sdopen: sd_pm_entry failed\n");
10073 			goto open_failed_with_pm;
10074 		}
10075 		mutex_enter(SD_MUTEX(un));
10076 	}
10077 
10078 	/* check for previous exclusive open */
10079 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
10080 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10081 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
10082 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
10083 
10084 	if (un->un_exclopen & (partmask)) {
10085 		goto excl_open_fail;
10086 	}
10087 
10088 	if (flag & FEXCL) {
10089 		int i;
10090 		if (un->un_ocmap.lyropen[part]) {
10091 			goto excl_open_fail;
10092 		}
10093 		for (i = 0; i < (OTYPCNT - 1); i++) {
10094 			if (un->un_ocmap.regopen[i] & (partmask)) {
10095 				goto excl_open_fail;
10096 			}
10097 		}
10098 	}
10099 
10100 	/*
10101 	 * Check the write permission if this is a removable media device,
10102 	 * NDELAY has not been set, and writable permission is requested.
10103 	 *
10104 	 * Note: If NDELAY was set and this is write-protected media the WRITE
10105 	 * attempt will fail with EIO as part of the I/O processing. This is a
10106 	 * more permissive implementation that allows the open to succeed and
10107 	 * WRITE attempts to fail when appropriate.
10108 	 */
10109 	if (ISREMOVABLE(un)) {
10110 		if ((flag & FWRITE) && (!nodelay)) {
10111 			mutex_exit(SD_MUTEX(un));
10112 			/*
10113 			 * Defer the check for write permission on writable
10114 			 * DVD drive till sdstrategy and will not fail open even
10115 			 * if FWRITE is set as the device can be writable
10116 			 * depending upon the media and the media can change
10117 			 * after the call to open().
10118 			 */
10119 			if (un->un_f_dvdram_writable_device == FALSE) {
10120 				if (ISCD(un) || sr_check_wp(dev)) {
10121 				rval = EROFS;
10122 				mutex_enter(SD_MUTEX(un));
10123 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10124 				    "write to cd or write protected media\n");
10125 				goto open_fail;
10126 				}
10127 			}
10128 			mutex_enter(SD_MUTEX(un));
10129 		}
10130 	}
10131 
10132 	/*
10133 	 * If opening in NDELAY/NONBLOCK mode, just return.
10134 	 * Check if disk is ready and has a valid geometry later.
10135 	 */
10136 	if (!nodelay) {
10137 		mutex_exit(SD_MUTEX(un));
10138 		rval = sd_ready_and_valid(un);
10139 		mutex_enter(SD_MUTEX(un));
10140 		/*
10141 		 * Fail if device is not ready or if the number of disk
10142 		 * blocks is zero or negative for non CD devices.
10143 		 */
10144 		if ((rval != SD_READY_VALID) ||
10145 		    (!ISCD(un) && un->un_map[part].dkl_nblk <= 0)) {
10146 			if (ISREMOVABLE(un)) {
10147 				rval = ENXIO;
10148 			} else {
10149 				rval = EIO;
10150 			}
10151 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10152 			    "device not ready or invalid disk block value\n");
10153 			goto open_fail;
10154 		}
10155 #if defined(__i386) || defined(__amd64)
10156 	} else {
10157 		uchar_t *cp;
10158 		/*
10159 		 * x86 requires special nodelay handling, so that p0 is
10160 		 * always defined and accessible.
10161 		 * Invalidate geometry only if device is not already open.
10162 		 */
10163 		cp = &un->un_ocmap.chkd[0];
10164 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10165 			if (*cp != (uchar_t)0) {
10166 			    break;
10167 			}
10168 			cp++;
10169 		}
10170 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10171 			un->un_f_geometry_is_valid = FALSE;
10172 		}
10173 
10174 #endif
10175 	}
10176 
10177 	if (otyp == OTYP_LYR) {
10178 		un->un_ocmap.lyropen[part]++;
10179 	} else {
10180 		un->un_ocmap.regopen[otyp] |= partmask;
10181 	}
10182 
10183 	/* Set up open and exclusive open flags */
10184 	if (flag & FEXCL) {
10185 		un->un_exclopen |= (partmask);
10186 	}
10187 
10188 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10189 	    "open of part %d type %d\n", part, otyp);
10190 
10191 	mutex_exit(SD_MUTEX(un));
10192 	if (!nodelay) {
10193 		sd_pm_exit(un);
10194 	}
10195 
10196 	sema_v(&un->un_semoclose);
10197 
10198 	mutex_enter(&sd_detach_mutex);
10199 	un->un_opens_in_progress--;
10200 	mutex_exit(&sd_detach_mutex);
10201 
10202 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
10203 	return (DDI_SUCCESS);
10204 
10205 excl_open_fail:
10206 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
10207 	rval = EBUSY;
10208 
10209 open_fail:
10210 	mutex_exit(SD_MUTEX(un));
10211 
10212 	/*
10213 	 * On a failed open we must exit the pm management.
10214 	 */
10215 	if (!nodelay) {
10216 		sd_pm_exit(un);
10217 	}
10218 open_failed_with_pm:
10219 	sema_v(&un->un_semoclose);
10220 
10221 	mutex_enter(&sd_detach_mutex);
10222 	un->un_opens_in_progress--;
10223 	if (otyp == OTYP_LYR) {
10224 		un->un_layer_count--;
10225 	}
10226 	mutex_exit(&sd_detach_mutex);
10227 
10228 	return (rval);
10229 }
10230 
10231 
10232 /*
10233  *    Function: sdclose
10234  *
10235  * Description: Driver's close(9e) entry point function.
10236  *
10237  *   Arguments: dev    - device number
10238  *		flag   - file status flag, informational only
10239  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10240  *		cred_p - user credential pointer
10241  *
10242  * Return Code: ENXIO
10243  *
10244  *     Context: Kernel thread context
10245  */
10246 /* ARGSUSED */
10247 static int
10248 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
10249 {
10250 	struct sd_lun	*un;
10251 	uchar_t		*cp;
10252 	int		part;
10253 	int		nodelay;
10254 	int		rval = 0;
10255 
10256 	/* Validate the open type */
10257 	if (otyp >= OTYPCNT) {
10258 		return (ENXIO);
10259 	}
10260 
10261 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10262 		return (ENXIO);
10263 	}
10264 
10265 	part = SDPART(dev);
10266 	nodelay = flag & (FNDELAY | FNONBLOCK);
10267 
10268 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10269 	    "sdclose: close of part %d type %d\n", part, otyp);
10270 
10271 	/*
10272 	 * We use a semaphore here in order to serialize
10273 	 * open and close requests on the device.
10274 	 */
10275 	sema_p(&un->un_semoclose);
10276 
10277 	mutex_enter(SD_MUTEX(un));
10278 
10279 	/* Don't proceed if power is being changed. */
10280 	while (un->un_state == SD_STATE_PM_CHANGING) {
10281 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10282 	}
10283 
10284 	if (un->un_exclopen & (1 << part)) {
10285 		un->un_exclopen &= ~(1 << part);
10286 	}
10287 
10288 	/* Update the open partition map */
10289 	if (otyp == OTYP_LYR) {
10290 		un->un_ocmap.lyropen[part] -= 1;
10291 	} else {
10292 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
10293 	}
10294 
10295 	cp = &un->un_ocmap.chkd[0];
10296 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10297 		if (*cp != NULL) {
10298 			break;
10299 		}
10300 		cp++;
10301 	}
10302 
10303 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10304 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
10305 
10306 		/*
10307 		 * We avoid persistance upon the last close, and set
10308 		 * the throttle back to the maximum.
10309 		 */
10310 		un->un_throttle = un->un_saved_throttle;
10311 
10312 		if (un->un_state == SD_STATE_OFFLINE) {
10313 			if (un->un_f_is_fibre == FALSE) {
10314 				scsi_log(SD_DEVINFO(un), sd_label,
10315 					CE_WARN, "offline\n");
10316 			}
10317 			un->un_f_geometry_is_valid = FALSE;
10318 
10319 		} else {
10320 			/*
10321 			 * Flush any outstanding writes in NVRAM cache.
10322 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
10323 			 * cmd, it may not work for non-Pluto devices.
10324 			 * SYNCHRONIZE CACHE is not required for removables,
10325 			 * except DVD-RAM drives.
10326 			 *
10327 			 * Also note: because SYNCHRONIZE CACHE is currently
10328 			 * the only command issued here that requires the
10329 			 * drive be powered up, only do the power up before
10330 			 * sending the Sync Cache command. If additional
10331 			 * commands are added which require a powered up
10332 			 * drive, the following sequence may have to change.
10333 			 *
10334 			 * And finally, note that parallel SCSI on SPARC
10335 			 * only issues a Sync Cache to DVD-RAM, a newly
10336 			 * supported device.
10337 			 */
10338 #if defined(__i386) || defined(__amd64)
10339 			if (!ISREMOVABLE(un) ||
10340 			    un->un_f_dvdram_writable_device == TRUE) {
10341 #else
10342 			if (un->un_f_dvdram_writable_device == TRUE) {
10343 #endif
10344 				mutex_exit(SD_MUTEX(un));
10345 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10346 					if (sd_send_scsi_SYNCHRONIZE_CACHE(un)
10347 					    != 0) {
10348 						rval = EIO;
10349 					}
10350 					sd_pm_exit(un);
10351 				} else {
10352 					rval = EIO;
10353 				}
10354 				mutex_enter(SD_MUTEX(un));
10355 			}
10356 
10357 			/*
10358 			 * For removable media devices, send an ALLOW MEDIA
10359 			 * REMOVAL command, but don't get upset if it fails.
10360 			 * Also invalidate the geometry. We need to raise
10361 			 * the power of the drive before we can call
10362 			 * sd_send_scsi_DOORLOCK()
10363 			 */
10364 			if (ISREMOVABLE(un)) {
10365 				mutex_exit(SD_MUTEX(un));
10366 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10367 					rval = sd_send_scsi_DOORLOCK(un,
10368 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
10369 
10370 					sd_pm_exit(un);
10371 					if (ISCD(un) && (rval != 0) &&
10372 					    (nodelay != 0)) {
10373 						rval = ENXIO;
10374 					}
10375 				} else {
10376 					rval = EIO;
10377 				}
10378 				mutex_enter(SD_MUTEX(un));
10379 
10380 				sr_ejected(un);
10381 				/*
10382 				 * Destroy the cache (if it exists) which was
10383 				 * allocated for the write maps since this is
10384 				 * the last close for this media.
10385 				 */
10386 				if (un->un_wm_cache) {
10387 					/*
10388 					 * Check if there are pending commands.
10389 					 * and if there are give a warning and
10390 					 * do not destroy the cache.
10391 					 */
10392 					if (un->un_ncmds_in_driver > 0) {
10393 						scsi_log(SD_DEVINFO(un),
10394 						    sd_label, CE_WARN,
10395 						    "Unable to clean up memory "
10396 						    "because of pending I/O\n");
10397 					} else {
10398 						kmem_cache_destroy(
10399 						    un->un_wm_cache);
10400 						un->un_wm_cache = NULL;
10401 					}
10402 				}
10403 			}
10404 		}
10405 	}
10406 
10407 	mutex_exit(SD_MUTEX(un));
10408 	sema_v(&un->un_semoclose);
10409 
10410 	if (otyp == OTYP_LYR) {
10411 		mutex_enter(&sd_detach_mutex);
10412 		/*
10413 		 * The detach routine may run when the layer count
10414 		 * drops to zero.
10415 		 */
10416 		un->un_layer_count--;
10417 		mutex_exit(&sd_detach_mutex);
10418 	}
10419 
10420 	return (rval);
10421 }
10422 
10423 
10424 /*
10425  *    Function: sd_ready_and_valid
10426  *
10427  * Description: Test if device is ready and has a valid geometry.
10428  *
10429  *   Arguments: dev - device number
10430  *		un  - driver soft state (unit) structure
10431  *
10432  * Return Code: SD_READY_VALID		ready and valid label
10433  *		SD_READY_NOT_VALID	ready, geom ops never applicable
10434  *		SD_NOT_READY_VALID	not ready, no label
10435  *
10436  *     Context: Never called at interrupt context.
10437  */
10438 
10439 static int
10440 sd_ready_and_valid(struct sd_lun *un)
10441 {
10442 	struct sd_errstats	*stp;
10443 	uint64_t		capacity;
10444 	uint_t			lbasize;
10445 	int			rval = SD_READY_VALID;
10446 	char			name_str[48];
10447 
10448 	ASSERT(un != NULL);
10449 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10450 
10451 	mutex_enter(SD_MUTEX(un));
10452 	if (ISREMOVABLE(un)) {
10453 		mutex_exit(SD_MUTEX(un));
10454 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
10455 			rval = SD_NOT_READY_VALID;
10456 			mutex_enter(SD_MUTEX(un));
10457 			goto done;
10458 		}
10459 
10460 		mutex_enter(SD_MUTEX(un));
10461 		if ((un->un_f_geometry_is_valid == FALSE) ||
10462 		    (un->un_f_blockcount_is_valid == FALSE) ||
10463 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
10464 
10465 			/* capacity has to be read every open. */
10466 			mutex_exit(SD_MUTEX(un));
10467 			if (sd_send_scsi_READ_CAPACITY(un, &capacity,
10468 			    &lbasize, SD_PATH_DIRECT) != 0) {
10469 				mutex_enter(SD_MUTEX(un));
10470 				un->un_f_geometry_is_valid = FALSE;
10471 				rval = SD_NOT_READY_VALID;
10472 				goto done;
10473 			} else {
10474 				mutex_enter(SD_MUTEX(un));
10475 				sd_update_block_info(un, lbasize, capacity);
10476 			}
10477 		}
10478 
10479 		/*
10480 		 * If this is a non 512 block device, allocate space for
10481 		 * the wmap cache. This is being done here since every time
10482 		 * a media is changed this routine will be called and the
10483 		 * block size is a function of media rather than device.
10484 		 */
10485 		if (NOT_DEVBSIZE(un)) {
10486 			if (!(un->un_wm_cache)) {
10487 				(void) snprintf(name_str, sizeof (name_str),
10488 				    "%s%d_cache",
10489 				    ddi_driver_name(SD_DEVINFO(un)),
10490 				    ddi_get_instance(SD_DEVINFO(un)));
10491 				un->un_wm_cache = kmem_cache_create(
10492 				    name_str, sizeof (struct sd_w_map),
10493 				    8, sd_wm_cache_constructor,
10494 				    sd_wm_cache_destructor, NULL,
10495 				    (void *)un, NULL, 0);
10496 				if (!(un->un_wm_cache)) {
10497 					rval = ENOMEM;
10498 					goto done;
10499 				}
10500 			}
10501 		}
10502 
10503 		/*
10504 		 * Check if the media in the device is writable or not.
10505 		 */
10506 		if ((un->un_f_geometry_is_valid == FALSE) && ISCD(un)) {
10507 			sd_check_for_writable_cd(un);
10508 		}
10509 
10510 	} else {
10511 		/*
10512 		 * Do a test unit ready to clear any unit attention from non-cd
10513 		 * devices.
10514 		 */
10515 		mutex_exit(SD_MUTEX(un));
10516 		(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
10517 		mutex_enter(SD_MUTEX(un));
10518 	}
10519 
10520 
10521 	if (un->un_state == SD_STATE_NORMAL) {
10522 		/*
10523 		 * If the target is not yet ready here (defined by a TUR
10524 		 * failure), invalidate the geometry and print an 'offline'
10525 		 * message. This is a legacy message, as the state of the
10526 		 * target is not actually changed to SD_STATE_OFFLINE.
10527 		 *
10528 		 * If the TUR fails for EACCES (Reservation Conflict), it
10529 		 * means there actually is nothing wrong with the target that
10530 		 * would require invalidating the geometry, so continue in
10531 		 * that case as if the TUR was successful.
10532 		 */
10533 		int err;
10534 
10535 		mutex_exit(SD_MUTEX(un));
10536 		err = sd_send_scsi_TEST_UNIT_READY(un, 0);
10537 		mutex_enter(SD_MUTEX(un));
10538 
10539 		if ((err != 0) && (err != EACCES)) {
10540 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10541 			    "offline\n");
10542 			un->un_f_geometry_is_valid = FALSE;
10543 			rval = SD_NOT_READY_VALID;
10544 			goto done;
10545 		}
10546 	}
10547 
10548 	if (un->un_f_format_in_progress == FALSE) {
10549 		/*
10550 		 * Note: sd_validate_geometry may return TRUE, but that does
10551 		 * not necessarily mean un_f_geometry_is_valid == TRUE!
10552 		 */
10553 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
10554 		if (rval == ENOTSUP) {
10555 			if (un->un_f_geometry_is_valid == TRUE)
10556 				rval = 0;
10557 			else {
10558 				rval = SD_READY_NOT_VALID;
10559 				goto done;
10560 			}
10561 		}
10562 		if (rval != 0) {
10563 			/*
10564 			 * We don't check the validity of geometry for
10565 			 * CDROMs. Also we assume we have a good label
10566 			 * even if sd_validate_geometry returned ENOMEM.
10567 			 */
10568 			if (!ISCD(un) && rval != ENOMEM) {
10569 				rval = SD_NOT_READY_VALID;
10570 				goto done;
10571 			}
10572 		}
10573 	}
10574 
10575 #ifdef DOESNTWORK /* on eliteII, see 1118607 */
10576 	/*
10577 	 * check to see if this disk is write protected, if it is and we have
10578 	 * not set read-only, then fail
10579 	 */
10580 	if ((flag & FWRITE) && (sr_check_wp(dev))) {
10581 		New_state(un, SD_STATE_CLOSED);
10582 		goto done;
10583 	}
10584 #endif
10585 
10586 	/*
10587 	 * If this is a removable media device, try and send
10588 	 * a PREVENT MEDIA REMOVAL command, but don't get upset
10589 	 * if it fails. For a CD, however, it is an error
10590 	 */
10591 	if (ISREMOVABLE(un)) {
10592 		mutex_exit(SD_MUTEX(un));
10593 		if ((sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
10594 		    SD_PATH_DIRECT) != 0) && ISCD(un)) {
10595 			rval = SD_NOT_READY_VALID;
10596 			mutex_enter(SD_MUTEX(un));
10597 			goto done;
10598 		}
10599 		mutex_enter(SD_MUTEX(un));
10600 	}
10601 
10602 	/* The state has changed, inform the media watch routines */
10603 	un->un_mediastate = DKIO_INSERTED;
10604 	cv_broadcast(&un->un_state_cv);
10605 	rval = SD_READY_VALID;
10606 
10607 done:
10608 
10609 	/*
10610 	 * Initialize the capacity kstat value, if no media previously
10611 	 * (capacity kstat is 0) and a media has been inserted
10612 	 * (un_blockcount > 0).
10613 	 * This is a more generic way then checking for ISREMOVABLE.
10614 	 */
10615 	if (un->un_errstats != NULL) {
10616 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
10617 		if ((stp->sd_capacity.value.ui64 == 0) &&
10618 		    (un->un_f_blockcount_is_valid == TRUE)) {
10619 			stp->sd_capacity.value.ui64 =
10620 			    (uint64_t)((uint64_t)un->un_blockcount *
10621 			    un->un_sys_blocksize);
10622 		}
10623 	}
10624 
10625 	mutex_exit(SD_MUTEX(un));
10626 	return (rval);
10627 }
10628 
10629 
10630 /*
10631  *    Function: sdmin
10632  *
10633  * Description: Routine to limit the size of a data transfer. Used in
10634  *		conjunction with physio(9F).
10635  *
10636  *   Arguments: bp - pointer to the indicated buf(9S) struct.
10637  *
10638  *     Context: Kernel thread context.
10639  */
10640 
10641 static void
10642 sdmin(struct buf *bp)
10643 {
10644 	struct sd_lun	*un;
10645 	int		instance;
10646 
10647 	instance = SDUNIT(bp->b_edev);
10648 
10649 	un = ddi_get_soft_state(sd_state, instance);
10650 	ASSERT(un != NULL);
10651 
10652 	if (bp->b_bcount > un->un_max_xfer_size) {
10653 		bp->b_bcount = un->un_max_xfer_size;
10654 	}
10655 }
10656 
10657 
10658 /*
10659  *    Function: sdread
10660  *
10661  * Description: Driver's read(9e) entry point function.
10662  *
10663  *   Arguments: dev   - device number
10664  *		uio   - structure pointer describing where data is to be stored
10665  *			in user's space
10666  *		cred_p  - user credential pointer
10667  *
10668  * Return Code: ENXIO
10669  *		EIO
10670  *		EINVAL
10671  *		value returned by physio
10672  *
10673  *     Context: Kernel thread context.
10674  */
10675 /* ARGSUSED */
10676 static int
10677 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
10678 {
10679 	struct sd_lun	*un = NULL;
10680 	int		secmask;
10681 	int		err;
10682 
10683 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10684 		return (ENXIO);
10685 	}
10686 
10687 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10688 
10689 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
10690 		mutex_enter(SD_MUTEX(un));
10691 		/*
10692 		 * Because the call to sd_ready_and_valid will issue I/O we
10693 		 * must wait here if either the device is suspended or
10694 		 * if it's power level is changing.
10695 		 */
10696 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10697 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10698 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10699 		}
10700 		un->un_ncmds_in_driver++;
10701 		mutex_exit(SD_MUTEX(un));
10702 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
10703 			mutex_enter(SD_MUTEX(un));
10704 			un->un_ncmds_in_driver--;
10705 			ASSERT(un->un_ncmds_in_driver >= 0);
10706 			mutex_exit(SD_MUTEX(un));
10707 			return (EIO);
10708 		}
10709 		mutex_enter(SD_MUTEX(un));
10710 		un->un_ncmds_in_driver--;
10711 		ASSERT(un->un_ncmds_in_driver >= 0);
10712 		mutex_exit(SD_MUTEX(un));
10713 	}
10714 
10715 	/*
10716 	 * Read requests are restricted to multiples of the system block size.
10717 	 */
10718 	secmask = un->un_sys_blocksize - 1;
10719 
10720 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10721 		SD_ERROR(SD_LOG_READ_WRITE, un,
10722 		    "sdread: file offset not modulo %d\n",
10723 		    un->un_sys_blocksize);
10724 		err = EINVAL;
10725 	} else if (uio->uio_iov->iov_len & (secmask)) {
10726 		SD_ERROR(SD_LOG_READ_WRITE, un,
10727 		    "sdread: transfer length not modulo %d\n",
10728 		    un->un_sys_blocksize);
10729 		err = EINVAL;
10730 	} else {
10731 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
10732 	}
10733 	return (err);
10734 }
10735 
10736 
10737 /*
10738  *    Function: sdwrite
10739  *
10740  * Description: Driver's write(9e) entry point function.
10741  *
10742  *   Arguments: dev   - device number
10743  *		uio   - structure pointer describing where data is stored in
10744  *			user's space
10745  *		cred_p  - user credential pointer
10746  *
10747  * Return Code: ENXIO
10748  *		EIO
10749  *		EINVAL
10750  *		value returned by physio
10751  *
10752  *     Context: Kernel thread context.
10753  */
10754 /* ARGSUSED */
10755 static int
10756 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
10757 {
10758 	struct sd_lun	*un = NULL;
10759 	int		secmask;
10760 	int		err;
10761 
10762 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10763 		return (ENXIO);
10764 	}
10765 
10766 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10767 
10768 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
10769 		mutex_enter(SD_MUTEX(un));
10770 		/*
10771 		 * Because the call to sd_ready_and_valid will issue I/O we
10772 		 * must wait here if either the device is suspended or
10773 		 * if it's power level is changing.
10774 		 */
10775 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10776 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10777 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10778 		}
10779 		un->un_ncmds_in_driver++;
10780 		mutex_exit(SD_MUTEX(un));
10781 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
10782 			mutex_enter(SD_MUTEX(un));
10783 			un->un_ncmds_in_driver--;
10784 			ASSERT(un->un_ncmds_in_driver >= 0);
10785 			mutex_exit(SD_MUTEX(un));
10786 			return (EIO);
10787 		}
10788 		mutex_enter(SD_MUTEX(un));
10789 		un->un_ncmds_in_driver--;
10790 		ASSERT(un->un_ncmds_in_driver >= 0);
10791 		mutex_exit(SD_MUTEX(un));
10792 	}
10793 
10794 	/*
10795 	 * Write requests are restricted to multiples of the system block size.
10796 	 */
10797 	secmask = un->un_sys_blocksize - 1;
10798 
10799 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10800 		SD_ERROR(SD_LOG_READ_WRITE, un,
10801 		    "sdwrite: file offset not modulo %d\n",
10802 		    un->un_sys_blocksize);
10803 		err = EINVAL;
10804 	} else if (uio->uio_iov->iov_len & (secmask)) {
10805 		SD_ERROR(SD_LOG_READ_WRITE, un,
10806 		    "sdwrite: transfer length not modulo %d\n",
10807 		    un->un_sys_blocksize);
10808 		err = EINVAL;
10809 	} else {
10810 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
10811 	}
10812 	return (err);
10813 }
10814 
10815 
10816 /*
10817  *    Function: sdaread
10818  *
10819  * Description: Driver's aread(9e) entry point function.
10820  *
10821  *   Arguments: dev   - device number
10822  *		aio   - structure pointer describing where data is to be stored
10823  *		cred_p  - user credential pointer
10824  *
10825  * Return Code: ENXIO
10826  *		EIO
10827  *		EINVAL
10828  *		value returned by aphysio
10829  *
10830  *     Context: Kernel thread context.
10831  */
10832 /* ARGSUSED */
10833 static int
10834 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
10835 {
10836 	struct sd_lun	*un = NULL;
10837 	struct uio	*uio = aio->aio_uio;
10838 	int		secmask;
10839 	int		err;
10840 
10841 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10842 		return (ENXIO);
10843 	}
10844 
10845 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10846 
10847 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
10848 		mutex_enter(SD_MUTEX(un));
10849 		/*
10850 		 * Because the call to sd_ready_and_valid will issue I/O we
10851 		 * must wait here if either the device is suspended or
10852 		 * if it's power level is changing.
10853 		 */
10854 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10855 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10856 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10857 		}
10858 		un->un_ncmds_in_driver++;
10859 		mutex_exit(SD_MUTEX(un));
10860 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
10861 			mutex_enter(SD_MUTEX(un));
10862 			un->un_ncmds_in_driver--;
10863 			ASSERT(un->un_ncmds_in_driver >= 0);
10864 			mutex_exit(SD_MUTEX(un));
10865 			return (EIO);
10866 		}
10867 		mutex_enter(SD_MUTEX(un));
10868 		un->un_ncmds_in_driver--;
10869 		ASSERT(un->un_ncmds_in_driver >= 0);
10870 		mutex_exit(SD_MUTEX(un));
10871 	}
10872 
10873 	/*
10874 	 * Read requests are restricted to multiples of the system block size.
10875 	 */
10876 	secmask = un->un_sys_blocksize - 1;
10877 
10878 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10879 		SD_ERROR(SD_LOG_READ_WRITE, un,
10880 		    "sdaread: file offset not modulo %d\n",
10881 		    un->un_sys_blocksize);
10882 		err = EINVAL;
10883 	} else if (uio->uio_iov->iov_len & (secmask)) {
10884 		SD_ERROR(SD_LOG_READ_WRITE, un,
10885 		    "sdaread: transfer length not modulo %d\n",
10886 		    un->un_sys_blocksize);
10887 		err = EINVAL;
10888 	} else {
10889 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
10890 	}
10891 	return (err);
10892 }
10893 
10894 
10895 /*
10896  *    Function: sdawrite
10897  *
10898  * Description: Driver's awrite(9e) entry point function.
10899  *
10900  *   Arguments: dev   - device number
10901  *		aio   - structure pointer describing where data is stored
10902  *		cred_p  - user credential pointer
10903  *
10904  * Return Code: ENXIO
10905  *		EIO
10906  *		EINVAL
10907  *		value returned by aphysio
10908  *
10909  *     Context: Kernel thread context.
10910  */
10911 /* ARGSUSED */
10912 static int
10913 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
10914 {
10915 	struct sd_lun	*un = NULL;
10916 	struct uio	*uio = aio->aio_uio;
10917 	int		secmask;
10918 	int		err;
10919 
10920 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10921 		return (ENXIO);
10922 	}
10923 
10924 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10925 
10926 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
10927 		mutex_enter(SD_MUTEX(un));
10928 		/*
10929 		 * Because the call to sd_ready_and_valid will issue I/O we
10930 		 * must wait here if either the device is suspended or
10931 		 * if it's power level is changing.
10932 		 */
10933 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10934 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10935 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10936 		}
10937 		un->un_ncmds_in_driver++;
10938 		mutex_exit(SD_MUTEX(un));
10939 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
10940 			mutex_enter(SD_MUTEX(un));
10941 			un->un_ncmds_in_driver--;
10942 			ASSERT(un->un_ncmds_in_driver >= 0);
10943 			mutex_exit(SD_MUTEX(un));
10944 			return (EIO);
10945 		}
10946 		mutex_enter(SD_MUTEX(un));
10947 		un->un_ncmds_in_driver--;
10948 		ASSERT(un->un_ncmds_in_driver >= 0);
10949 		mutex_exit(SD_MUTEX(un));
10950 	}
10951 
10952 	/*
10953 	 * Write requests are restricted to multiples of the system block size.
10954 	 */
10955 	secmask = un->un_sys_blocksize - 1;
10956 
10957 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10958 		SD_ERROR(SD_LOG_READ_WRITE, un,
10959 		    "sdawrite: file offset not modulo %d\n",
10960 		    un->un_sys_blocksize);
10961 		err = EINVAL;
10962 	} else if (uio->uio_iov->iov_len & (secmask)) {
10963 		SD_ERROR(SD_LOG_READ_WRITE, un,
10964 		    "sdawrite: transfer length not modulo %d\n",
10965 		    un->un_sys_blocksize);
10966 		err = EINVAL;
10967 	} else {
10968 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
10969 	}
10970 	return (err);
10971 }
10972 
10973 
10974 
10975 
10976 
10977 /*
10978  * Driver IO processing follows the following sequence:
10979  *
10980  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
10981  *         |                |                     ^
10982  *         v                v                     |
10983  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
10984  *         |                |                     |                   |
10985  *         v                |                     |                   |
10986  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
10987  *         |                |                     ^                   ^
10988  *         v                v                     |                   |
10989  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
10990  *         |                |                     |                   |
10991  *     +---+                |                     +------------+      +-------+
10992  *     |                    |                                  |              |
10993  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
10994  *     |                    v                                  |              |
10995  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
10996  *     |                    |                                  ^              |
10997  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
10998  *     |                    v                                  |              |
10999  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
11000  *     |                    |                                  ^              |
11001  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11002  *     |                    v                                  |              |
11003  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
11004  *     |                    |                                  ^              |
11005  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
11006  *     |                    v                                  |              |
11007  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
11008  *     |                    |                                  ^              |
11009  *     |                    |                                  |              |
11010  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
11011  *                          |                           ^
11012  *                          v                           |
11013  *                   sd_core_iostart()                  |
11014  *                          |                           |
11015  *                          |                           +------>(*destroypkt)()
11016  *                          +-> sd_start_cmds() <-+     |           |
11017  *                          |                     |     |           v
11018  *                          |                     |     |  scsi_destroy_pkt(9F)
11019  *                          |                     |     |
11020  *                          +->(*initpkt)()       +- sdintr()
11021  *                          |  |                        |  |
11022  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
11023  *                          |  +-> scsi_setup_cdb(9F)   |
11024  *                          |                           |
11025  *                          +--> scsi_transport(9F)     |
11026  *                                     |                |
11027  *                                     +----> SCSA ---->+
11028  *
11029  *
11030  * This code is based upon the following presumtions:
11031  *
11032  *   - iostart and iodone functions operate on buf(9S) structures. These
11033  *     functions perform the necessary operations on the buf(9S) and pass
11034  *     them along to the next function in the chain by using the macros
11035  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
11036  *     (for iodone side functions).
11037  *
11038  *   - The iostart side functions may sleep. The iodone side functions
11039  *     are called under interrupt context and may NOT sleep. Therefore
11040  *     iodone side functions also may not call iostart side functions.
11041  *     (NOTE: iostart side functions should NOT sleep for memory, as
11042  *     this could result in deadlock.)
11043  *
11044  *   - An iostart side function may call its corresponding iodone side
11045  *     function directly (if necessary).
11046  *
11047  *   - In the event of an error, an iostart side function can return a buf(9S)
11048  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
11049  *     b_error in the usual way of course).
11050  *
11051  *   - The taskq mechanism may be used by the iodone side functions to dispatch
11052  *     requests to the iostart side functions.  The iostart side functions in
11053  *     this case would be called under the context of a taskq thread, so it's
11054  *     OK for them to block/sleep/spin in this case.
11055  *
11056  *   - iostart side functions may allocate "shadow" buf(9S) structs and
11057  *     pass them along to the next function in the chain.  The corresponding
11058  *     iodone side functions must coalesce the "shadow" bufs and return
11059  *     the "original" buf to the next higher layer.
11060  *
11061  *   - The b_private field of the buf(9S) struct holds a pointer to
11062  *     an sd_xbuf struct, which contains information needed to
11063  *     construct the scsi_pkt for the command.
11064  *
11065  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
11066  *     layer must acquire & release the SD_MUTEX(un) as needed.
11067  */
11068 
11069 
11070 /*
11071  * Create taskq for all targets in the system. This is created at
11072  * _init(9E) and destroyed at _fini(9E).
11073  *
11074  * Note: here we set the minalloc to a reasonably high number to ensure that
11075  * we will have an adequate supply of task entries available at interrupt time.
11076  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
11077  * sd_create_taskq().  Since we do not want to sleep for allocations at
11078  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
11079  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
11080  * requests any one instant in time.
11081  */
11082 #define	SD_TASKQ_NUMTHREADS	8
11083 #define	SD_TASKQ_MINALLOC	256
11084 #define	SD_TASKQ_MAXALLOC	256
11085 
11086 static taskq_t	*sd_tq = NULL;
11087 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
11088 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
11089 
11090 /*
11091  * The following task queue is being created for the write part of
11092  * read-modify-write of non-512 block size devices.
11093  * Limit the number of threads to 1 for now. This number has been choosen
11094  * considering the fact that it applies only to dvd ram drives/MO drives
11095  * currently. Performance for which is not main criteria at this stage.
11096  * Note: It needs to be explored if we can use a single taskq in future
11097  */
11098 #define	SD_WMR_TASKQ_NUMTHREADS	1
11099 static taskq_t	*sd_wmr_tq = NULL;
11100 
11101 /*
11102  *    Function: sd_taskq_create
11103  *
11104  * Description: Create taskq thread(s) and preallocate task entries
11105  *
11106  * Return Code: Returns a pointer to the allocated taskq_t.
11107  *
11108  *     Context: Can sleep. Requires blockable context.
11109  *
11110  *       Notes: - The taskq() facility currently is NOT part of the DDI.
11111  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
11112  *		- taskq_create() will block for memory, also it will panic
11113  *		  if it cannot create the requested number of threads.
11114  *		- Currently taskq_create() creates threads that cannot be
11115  *		  swapped.
11116  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
11117  *		  supply of taskq entries at interrupt time (ie, so that we
11118  *		  do not have to sleep for memory)
11119  */
11120 
11121 static void
11122 sd_taskq_create(void)
11123 {
11124 	char	taskq_name[TASKQ_NAMELEN];
11125 
11126 	ASSERT(sd_tq == NULL);
11127 	ASSERT(sd_wmr_tq == NULL);
11128 
11129 	(void) snprintf(taskq_name, sizeof (taskq_name),
11130 	    "%s_drv_taskq", sd_label);
11131 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
11132 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11133 	    TASKQ_PREPOPULATE));
11134 
11135 	(void) snprintf(taskq_name, sizeof (taskq_name),
11136 	    "%s_rmw_taskq", sd_label);
11137 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
11138 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11139 	    TASKQ_PREPOPULATE));
11140 }
11141 
11142 
11143 /*
11144  *    Function: sd_taskq_delete
11145  *
11146  * Description: Complementary cleanup routine for sd_taskq_create().
11147  *
11148  *     Context: Kernel thread context.
11149  */
11150 
11151 static void
11152 sd_taskq_delete(void)
11153 {
11154 	ASSERT(sd_tq != NULL);
11155 	ASSERT(sd_wmr_tq != NULL);
11156 	taskq_destroy(sd_tq);
11157 	taskq_destroy(sd_wmr_tq);
11158 	sd_tq = NULL;
11159 	sd_wmr_tq = NULL;
11160 }
11161 
11162 
11163 /*
11164  *    Function: sdstrategy
11165  *
11166  * Description: Driver's strategy (9E) entry point function.
11167  *
11168  *   Arguments: bp - pointer to buf(9S)
11169  *
11170  * Return Code: Always returns zero
11171  *
11172  *     Context: Kernel thread context.
11173  */
11174 
11175 static int
11176 sdstrategy(struct buf *bp)
11177 {
11178 	struct sd_lun *un;
11179 
11180 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11181 	if (un == NULL) {
11182 		bioerror(bp, EIO);
11183 		bp->b_resid = bp->b_bcount;
11184 		biodone(bp);
11185 		return (0);
11186 	}
11187 	/* As was done in the past, fail new cmds. if state is dumping. */
11188 	if (un->un_state == SD_STATE_DUMPING) {
11189 		bioerror(bp, ENXIO);
11190 		bp->b_resid = bp->b_bcount;
11191 		biodone(bp);
11192 		return (0);
11193 	}
11194 
11195 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11196 
11197 	/*
11198 	 * Commands may sneak in while we released the mutex in
11199 	 * DDI_SUSPEND, we should block new commands. However, old
11200 	 * commands that are still in the driver at this point should
11201 	 * still be allowed to drain.
11202 	 */
11203 	mutex_enter(SD_MUTEX(un));
11204 	/*
11205 	 * Must wait here if either the device is suspended or
11206 	 * if it's power level is changing.
11207 	 */
11208 	while ((un->un_state == SD_STATE_SUSPENDED) ||
11209 	    (un->un_state == SD_STATE_PM_CHANGING)) {
11210 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11211 	}
11212 
11213 	un->un_ncmds_in_driver++;
11214 
11215 	/*
11216 	 * atapi: Since we are running the CD for now in PIO mode we need to
11217 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11218 	 * the HBA's init_pkt routine.
11219 	 */
11220 	if (un->un_f_cfg_is_atapi == TRUE) {
11221 		mutex_exit(SD_MUTEX(un));
11222 		bp_mapin(bp);
11223 		mutex_enter(SD_MUTEX(un));
11224 	}
11225 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
11226 	    un->un_ncmds_in_driver);
11227 
11228 	mutex_exit(SD_MUTEX(un));
11229 
11230 	/*
11231 	 * This will (eventually) allocate the sd_xbuf area and
11232 	 * call sd_xbuf_strategy().  We just want to return the
11233 	 * result of ddi_xbuf_qstrategy so that we have an opt-
11234 	 * imized tail call which saves us a stack frame.
11235 	 */
11236 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
11237 }
11238 
11239 
11240 /*
11241  *    Function: sd_xbuf_strategy
11242  *
11243  * Description: Function for initiating IO operations via the
11244  *		ddi_xbuf_qstrategy() mechanism.
11245  *
11246  *     Context: Kernel thread context.
11247  */
11248 
11249 static void
11250 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
11251 {
11252 	struct sd_lun *un = arg;
11253 
11254 	ASSERT(bp != NULL);
11255 	ASSERT(xp != NULL);
11256 	ASSERT(un != NULL);
11257 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11258 
11259 	/*
11260 	 * Initialize the fields in the xbuf and save a pointer to the
11261 	 * xbuf in bp->b_private.
11262 	 */
11263 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
11264 
11265 	/* Send the buf down the iostart chain */
11266 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
11267 }
11268 
11269 
11270 /*
11271  *    Function: sd_xbuf_init
11272  *
11273  * Description: Prepare the given sd_xbuf struct for use.
11274  *
11275  *   Arguments: un - ptr to softstate
11276  *		bp - ptr to associated buf(9S)
11277  *		xp - ptr to associated sd_xbuf
11278  *		chain_type - IO chain type to use:
11279  *			SD_CHAIN_NULL
11280  *			SD_CHAIN_BUFIO
11281  *			SD_CHAIN_USCSI
11282  *			SD_CHAIN_DIRECT
11283  *			SD_CHAIN_DIRECT_PRIORITY
11284  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
11285  *			initialization; may be NULL if none.
11286  *
11287  *     Context: Kernel thread context
11288  */
11289 
11290 static void
11291 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
11292 	uchar_t chain_type, void *pktinfop)
11293 {
11294 	int index;
11295 
11296 	ASSERT(un != NULL);
11297 	ASSERT(bp != NULL);
11298 	ASSERT(xp != NULL);
11299 
11300 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
11301 	    bp, chain_type);
11302 
11303 	xp->xb_un	= un;
11304 	xp->xb_pktp	= NULL;
11305 	xp->xb_pktinfo	= pktinfop;
11306 	xp->xb_private	= bp->b_private;
11307 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
11308 
11309 	/*
11310 	 * Set up the iostart and iodone chain indexes in the xbuf, based
11311 	 * upon the specified chain type to use.
11312 	 */
11313 	switch (chain_type) {
11314 	case SD_CHAIN_NULL:
11315 		/*
11316 		 * Fall thru to just use the values for the buf type, even
11317 		 * tho for the NULL chain these values will never be used.
11318 		 */
11319 		/* FALLTHRU */
11320 	case SD_CHAIN_BUFIO:
11321 		index = un->un_buf_chain_type;
11322 		break;
11323 	case SD_CHAIN_USCSI:
11324 		index = un->un_uscsi_chain_type;
11325 		break;
11326 	case SD_CHAIN_DIRECT:
11327 		index = un->un_direct_chain_type;
11328 		break;
11329 	case SD_CHAIN_DIRECT_PRIORITY:
11330 		index = un->un_priority_chain_type;
11331 		break;
11332 	default:
11333 		/* We're really broken if we ever get here... */
11334 		panic("sd_xbuf_init: illegal chain type!");
11335 		/*NOTREACHED*/
11336 	}
11337 
11338 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
11339 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
11340 
11341 	/*
11342 	 * It might be a bit easier to simply bzero the entire xbuf above,
11343 	 * but it turns out that since we init a fair number of members anyway,
11344 	 * we save a fair number cycles by doing explicit assignment of zero.
11345 	 */
11346 	xp->xb_pkt_flags	= 0;
11347 	xp->xb_dma_resid	= 0;
11348 	xp->xb_retry_count	= 0;
11349 	xp->xb_victim_retry_count = 0;
11350 	xp->xb_ua_retry_count	= 0;
11351 	xp->xb_sense_bp		= NULL;
11352 	xp->xb_sense_status	= 0;
11353 	xp->xb_sense_state	= 0;
11354 	xp->xb_sense_resid	= 0;
11355 
11356 	bp->b_private	= xp;
11357 	bp->b_flags	&= ~(B_DONE | B_ERROR);
11358 	bp->b_resid	= 0;
11359 	bp->av_forw	= NULL;
11360 	bp->av_back	= NULL;
11361 	bioerror(bp, 0);
11362 
11363 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
11364 }
11365 
11366 
11367 /*
11368  *    Function: sd_uscsi_strategy
11369  *
11370  * Description: Wrapper for calling into the USCSI chain via physio(9F)
11371  *
11372  *   Arguments: bp - buf struct ptr
11373  *
11374  * Return Code: Always returns 0
11375  *
11376  *     Context: Kernel thread context
11377  */
11378 
11379 static int
11380 sd_uscsi_strategy(struct buf *bp)
11381 {
11382 	struct sd_lun		*un;
11383 	struct sd_uscsi_info	*uip;
11384 	struct sd_xbuf		*xp;
11385 	uchar_t			chain_type;
11386 
11387 	ASSERT(bp != NULL);
11388 
11389 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11390 	if (un == NULL) {
11391 		bioerror(bp, EIO);
11392 		bp->b_resid = bp->b_bcount;
11393 		biodone(bp);
11394 		return (0);
11395 	}
11396 
11397 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11398 
11399 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
11400 
11401 	mutex_enter(SD_MUTEX(un));
11402 	/*
11403 	 * atapi: Since we are running the CD for now in PIO mode we need to
11404 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11405 	 * the HBA's init_pkt routine.
11406 	 */
11407 	if (un->un_f_cfg_is_atapi == TRUE) {
11408 		mutex_exit(SD_MUTEX(un));
11409 		bp_mapin(bp);
11410 		mutex_enter(SD_MUTEX(un));
11411 	}
11412 	un->un_ncmds_in_driver++;
11413 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
11414 	    un->un_ncmds_in_driver);
11415 	mutex_exit(SD_MUTEX(un));
11416 
11417 	/*
11418 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
11419 	 */
11420 	ASSERT(bp->b_private != NULL);
11421 	uip = (struct sd_uscsi_info *)bp->b_private;
11422 
11423 	switch (uip->ui_flags) {
11424 	case SD_PATH_DIRECT:
11425 		chain_type = SD_CHAIN_DIRECT;
11426 		break;
11427 	case SD_PATH_DIRECT_PRIORITY:
11428 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
11429 		break;
11430 	default:
11431 		chain_type = SD_CHAIN_USCSI;
11432 		break;
11433 	}
11434 
11435 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
11436 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
11437 
11438 	/* Use the index obtained within xbuf_init */
11439 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
11440 
11441 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
11442 
11443 	return (0);
11444 }
11445 
11446 
11447 /*
11448  * These routines perform raw i/o operations.
11449  */
11450 /*ARGSUSED*/
11451 static void
11452 sduscsimin(struct buf *bp)
11453 {
11454 	/*
11455 	 * do not break up because the CDB count would then
11456 	 * be incorrect and data underruns would result (incomplete
11457 	 * read/writes which would be retried and then failed, see
11458 	 * sdintr().
11459 	 */
11460 }
11461 
11462 
11463 
11464 /*
11465  *    Function: sd_send_scsi_cmd
11466  *
11467  * Description: Runs a USCSI command for user (when called thru sdioctl),
11468  *		or for the driver
11469  *
11470  *   Arguments: dev - the dev_t for the device
11471  *		incmd - ptr to a valid uscsi_cmd struct
11472  *		cdbspace - UIO_USERSPACE or UIO_SYSSPACE
11473  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11474  *		rqbufspace - UIO_USERSPACE or UIO_SYSSPACE
11475  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11476  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11477  *			to use the USCSI "direct" chain and bypass the normal
11478  *			command waitq.
11479  *
11480  * Return Code: 0 -  successful completion of the given command
11481  *		EIO - scsi_reset() failed, or see biowait()/physio() codes.
11482  *		ENXIO  - soft state not found for specified dev
11483  *		EINVAL
11484  *		EFAULT - copyin/copyout error
11485  *		return code of biowait(9F) or physio(9F):
11486  *			EIO - IO error, caller may check incmd->uscsi_status
11487  *			ENXIO
11488  *			EACCES - reservation conflict
11489  *
11490  *     Context: Waits for command to complete. Can sleep.
11491  */
11492 
11493 static int
11494 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
11495 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
11496 	int path_flag)
11497 {
11498 	struct sd_uscsi_info	*uip;
11499 	struct uscsi_cmd	*uscmd;
11500 	struct sd_lun	*un;
11501 	struct buf	*bp;
11502 	int	rval;
11503 	int	flags;
11504 
11505 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
11506 	if (un == NULL) {
11507 		return (ENXIO);
11508 	}
11509 
11510 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11511 
11512 #ifdef SDDEBUG
11513 	switch (dataspace) {
11514 	case UIO_USERSPACE:
11515 		SD_TRACE(SD_LOG_IO, un,
11516 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_USERSPACE\n", un);
11517 		break;
11518 	case UIO_SYSSPACE:
11519 		SD_TRACE(SD_LOG_IO, un,
11520 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_SYSSPACE\n", un);
11521 		break;
11522 	default:
11523 		SD_TRACE(SD_LOG_IO, un,
11524 		    "sd_send_scsi_cmd: entry: un:0x%p UNEXPECTED SPACE\n", un);
11525 		break;
11526 	}
11527 #endif
11528 
11529 	/*
11530 	 * Perform resets directly; no need to generate a command to do it.
11531 	 */
11532 	if (incmd->uscsi_flags & (USCSI_RESET | USCSI_RESET_ALL)) {
11533 		flags = ((incmd->uscsi_flags & USCSI_RESET_ALL) != 0) ?
11534 		    RESET_ALL : RESET_TARGET;
11535 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: Issuing reset\n");
11536 		if (scsi_reset(SD_ADDRESS(un), flags) == 0) {
11537 			/* Reset attempt was unsuccessful */
11538 			SD_TRACE(SD_LOG_IO, un,
11539 			    "sd_send_scsi_cmd: reset: failure\n");
11540 			return (EIO);
11541 		}
11542 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: reset: success\n");
11543 		return (0);
11544 	}
11545 
11546 	/* Perfunctory sanity check... */
11547 	if (incmd->uscsi_cdblen <= 0) {
11548 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11549 		    "invalid uscsi_cdblen, returning EINVAL\n");
11550 		return (EINVAL);
11551 	}
11552 
11553 	/*
11554 	 * In order to not worry about where the uscsi structure came from
11555 	 * (or where the cdb it points to came from) we're going to make
11556 	 * kmem_alloc'd copies of them here. This will also allow reference
11557 	 * to the data they contain long after this process has gone to
11558 	 * sleep and its kernel stack has been unmapped, etc.
11559 	 *
11560 	 * First get some memory for the uscsi_cmd struct and copy the
11561 	 * contents of the given uscsi_cmd struct into it.
11562 	 */
11563 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
11564 	bcopy(incmd, uscmd, sizeof (struct uscsi_cmd));
11565 
11566 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: uscsi_cmd",
11567 	    (uchar_t *)uscmd, sizeof (struct uscsi_cmd), SD_LOG_HEX);
11568 
11569 	/*
11570 	 * Now get some space for the CDB, and copy the given CDB into
11571 	 * it. Use ddi_copyin() in case the data is in user space.
11572 	 */
11573 	uscmd->uscsi_cdb = kmem_zalloc((size_t)incmd->uscsi_cdblen, KM_SLEEP);
11574 	flags = (cdbspace == UIO_SYSSPACE) ? FKIOCTL : 0;
11575 	if (ddi_copyin(incmd->uscsi_cdb, uscmd->uscsi_cdb,
11576 	    (uint_t)incmd->uscsi_cdblen, flags) != 0) {
11577 		kmem_free(uscmd->uscsi_cdb, (size_t)incmd->uscsi_cdblen);
11578 		kmem_free(uscmd, sizeof (struct uscsi_cmd));
11579 		return (EFAULT);
11580 	}
11581 
11582 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: CDB",
11583 	    (uchar_t *)uscmd->uscsi_cdb, incmd->uscsi_cdblen, SD_LOG_HEX);
11584 
11585 	bp = getrbuf(KM_SLEEP);
11586 
11587 	/*
11588 	 * Allocate an sd_uscsi_info struct and fill it with the info
11589 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
11590 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
11591 	 * since we allocate the buf here in this function, we do not
11592 	 * need to preserve the prior contents of b_private.
11593 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
11594 	 */
11595 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
11596 	uip->ui_flags = path_flag;
11597 	uip->ui_cmdp  = uscmd;
11598 	bp->b_private = uip;
11599 
11600 	/*
11601 	 * Initialize Request Sense buffering, if requested.
11602 	 */
11603 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
11604 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
11605 		/*
11606 		 * Here uscmd->uscsi_rqbuf currently points to the caller's
11607 		 * buffer, but we replace this with a kernel buffer that
11608 		 * we allocate to use with the sense data. The sense data
11609 		 * (if present) gets copied into this new buffer before the
11610 		 * command is completed.  Then we copy the sense data from
11611 		 * our allocated buf into the caller's buffer below. Note
11612 		 * that incmd->uscsi_rqbuf and incmd->uscsi_rqlen are used
11613 		 * below to perform the copy back to the caller's buf.
11614 		 */
11615 		uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
11616 		if (rqbufspace == UIO_USERSPACE) {
11617 			uscmd->uscsi_rqlen   = SENSE_LENGTH;
11618 			uscmd->uscsi_rqresid = SENSE_LENGTH;
11619 		} else {
11620 			uchar_t rlen = min(SENSE_LENGTH, uscmd->uscsi_rqlen);
11621 			uscmd->uscsi_rqlen   = rlen;
11622 			uscmd->uscsi_rqresid = rlen;
11623 		}
11624 	} else {
11625 		uscmd->uscsi_rqbuf = NULL;
11626 		uscmd->uscsi_rqlen   = 0;
11627 		uscmd->uscsi_rqresid = 0;
11628 	}
11629 
11630 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: rqbuf:0x%p  rqlen:%d\n",
11631 	    uscmd->uscsi_rqbuf, uscmd->uscsi_rqlen);
11632 
11633 	if (un->un_f_is_fibre == FALSE) {
11634 		/*
11635 		 * Force asynchronous mode, if necessary.  Doing this here
11636 		 * has the unfortunate effect of running other queued
11637 		 * commands async also, but since the main purpose of this
11638 		 * capability is downloading new drive firmware, we can
11639 		 * probably live with it.
11640 		 */
11641 		if ((uscmd->uscsi_flags & USCSI_ASYNC) != 0) {
11642 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
11643 				== 1) {
11644 				if (scsi_ifsetcap(SD_ADDRESS(un),
11645 					    "synchronous", 0, 1) == 1) {
11646 					SD_TRACE(SD_LOG_IO, un,
11647 					"sd_send_scsi_cmd: forced async ok\n");
11648 				} else {
11649 					SD_TRACE(SD_LOG_IO, un,
11650 					"sd_send_scsi_cmd:\
11651 					forced async failed\n");
11652 					rval = EINVAL;
11653 					goto done;
11654 				}
11655 			}
11656 		}
11657 
11658 		/*
11659 		 * Re-enable synchronous mode, if requested
11660 		 */
11661 		if (uscmd->uscsi_flags & USCSI_SYNC) {
11662 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
11663 				== 0) {
11664 				int i = scsi_ifsetcap(SD_ADDRESS(un),
11665 						"synchronous", 1, 1);
11666 				SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11667 					"re-enabled sync %s\n",
11668 					(i == 1) ? "ok" : "failed");
11669 			}
11670 		}
11671 	}
11672 
11673 	/*
11674 	 * Commands sent with priority are intended for error recovery
11675 	 * situations, and do not have retries performed.
11676 	 */
11677 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
11678 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
11679 	}
11680 
11681 	/*
11682 	 * If we're going to do actual I/O, let physio do all the right things
11683 	 */
11684 	if (uscmd->uscsi_buflen != 0) {
11685 		struct iovec	aiov;
11686 		struct uio	auio;
11687 		struct uio	*uio = &auio;
11688 
11689 		bzero(&auio, sizeof (struct uio));
11690 		bzero(&aiov, sizeof (struct iovec));
11691 		aiov.iov_base = uscmd->uscsi_bufaddr;
11692 		aiov.iov_len  = uscmd->uscsi_buflen;
11693 		uio->uio_iov  = &aiov;
11694 
11695 		uio->uio_iovcnt  = 1;
11696 		uio->uio_resid   = uscmd->uscsi_buflen;
11697 		uio->uio_segflg  = dataspace;
11698 
11699 		/*
11700 		 * physio() will block here until the command completes....
11701 		 */
11702 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling physio.\n");
11703 
11704 		rval = physio(sd_uscsi_strategy, bp, dev,
11705 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE),
11706 		    sduscsimin, uio);
11707 
11708 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11709 		    "returned from physio with 0x%x\n", rval);
11710 
11711 	} else {
11712 		/*
11713 		 * We have to mimic what physio would do here! Argh!
11714 		 */
11715 		bp->b_flags  = B_BUSY |
11716 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE);
11717 		bp->b_edev   = dev;
11718 		bp->b_dev    = cmpdev(dev);	/* maybe unnecessary? */
11719 		bp->b_bcount = 0;
11720 		bp->b_blkno  = 0;
11721 
11722 		SD_TRACE(SD_LOG_IO, un,
11723 		    "sd_send_scsi_cmd: calling sd_uscsi_strategy...\n");
11724 
11725 		(void) sd_uscsi_strategy(bp);
11726 
11727 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling biowait\n");
11728 
11729 		rval = biowait(bp);
11730 
11731 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11732 		    "returned from  biowait with 0x%x\n", rval);
11733 	}
11734 
11735 done:
11736 
11737 #ifdef SDDEBUG
11738 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11739 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
11740 	    uscmd->uscsi_status, uscmd->uscsi_resid);
11741 	if (uscmd->uscsi_bufaddr != NULL) {
11742 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11743 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
11744 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
11745 		if (dataspace == UIO_SYSSPACE) {
11746 			SD_DUMP_MEMORY(un, SD_LOG_IO,
11747 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
11748 			    uscmd->uscsi_buflen, SD_LOG_HEX);
11749 		}
11750 	}
11751 #endif
11752 
11753 	/*
11754 	 * Get the status and residual to return to the caller.
11755 	 */
11756 	incmd->uscsi_status = uscmd->uscsi_status;
11757 	incmd->uscsi_resid  = uscmd->uscsi_resid;
11758 
11759 	/*
11760 	 * If the caller wants sense data, copy back whatever sense data
11761 	 * we may have gotten, and update the relevant rqsense info.
11762 	 */
11763 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
11764 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
11765 
11766 		int rqlen = uscmd->uscsi_rqlen - uscmd->uscsi_rqresid;
11767 		rqlen = min(((int)incmd->uscsi_rqlen), rqlen);
11768 
11769 		/* Update the Request Sense status and resid */
11770 		incmd->uscsi_rqresid  = incmd->uscsi_rqlen - rqlen;
11771 		incmd->uscsi_rqstatus = uscmd->uscsi_rqstatus;
11772 
11773 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11774 		    "uscsi_rqstatus: 0x%02x  uscsi_rqresid:0x%x\n",
11775 		    incmd->uscsi_rqstatus, incmd->uscsi_rqresid);
11776 
11777 		/* Copy out the sense data for user processes */
11778 		if ((incmd->uscsi_rqbuf != NULL) && (rqlen != 0)) {
11779 			int flags =
11780 			    (rqbufspace == UIO_USERSPACE) ? 0 : FKIOCTL;
11781 			if (ddi_copyout(uscmd->uscsi_rqbuf, incmd->uscsi_rqbuf,
11782 			    rqlen, flags) != 0) {
11783 				rval = EFAULT;
11784 			}
11785 			/*
11786 			 * Note: Can't touch incmd->uscsi_rqbuf so use
11787 			 * uscmd->uscsi_rqbuf instead. They're the same.
11788 			 */
11789 			SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11790 			    "incmd->uscsi_rqbuf: 0x%p  rqlen:%d\n",
11791 			    incmd->uscsi_rqbuf, rqlen);
11792 			SD_DUMP_MEMORY(un, SD_LOG_IO, "rq",
11793 			    (uchar_t *)uscmd->uscsi_rqbuf, rqlen, SD_LOG_HEX);
11794 		}
11795 	}
11796 
11797 	/*
11798 	 * Free allocated resources and return; mapout the buf in case it was
11799 	 * mapped in by a lower layer.
11800 	 */
11801 	bp_mapout(bp);
11802 	freerbuf(bp);
11803 	kmem_free(uip, sizeof (struct sd_uscsi_info));
11804 	if (uscmd->uscsi_rqbuf != NULL) {
11805 		kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
11806 	}
11807 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
11808 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
11809 
11810 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: exit\n");
11811 
11812 	return (rval);
11813 }
11814 
11815 
11816 /*
11817  *    Function: sd_buf_iodone
11818  *
11819  * Description: Frees the sd_xbuf & returns the buf to its originator.
11820  *
11821  *     Context: May be called from interrupt context.
11822  */
11823 /* ARGSUSED */
11824 static void
11825 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
11826 {
11827 	struct sd_xbuf *xp;
11828 
11829 	ASSERT(un != NULL);
11830 	ASSERT(bp != NULL);
11831 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11832 
11833 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
11834 
11835 	xp = SD_GET_XBUF(bp);
11836 	ASSERT(xp != NULL);
11837 
11838 	mutex_enter(SD_MUTEX(un));
11839 
11840 	/*
11841 	 * Grab time when the cmd completed.
11842 	 * This is used for determining if the system has been
11843 	 * idle long enough to make it idle to the PM framework.
11844 	 * This is for lowering the overhead, and therefore improving
11845 	 * performance per I/O operation.
11846 	 */
11847 	un->un_pm_idle_time = ddi_get_time();
11848 
11849 	un->un_ncmds_in_driver--;
11850 	ASSERT(un->un_ncmds_in_driver >= 0);
11851 	SD_INFO(SD_LOG_IO, un, "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
11852 	    un->un_ncmds_in_driver);
11853 
11854 	mutex_exit(SD_MUTEX(un));
11855 
11856 	ddi_xbuf_done(bp, un->un_xbuf_attr);	/* xbuf is gone after this */
11857 	biodone(bp);				/* bp is gone after this */
11858 
11859 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
11860 }
11861 
11862 
11863 /*
11864  *    Function: sd_uscsi_iodone
11865  *
11866  * Description: Frees the sd_xbuf & returns the buf to its originator.
11867  *
11868  *     Context: May be called from interrupt context.
11869  */
11870 /* ARGSUSED */
11871 static void
11872 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
11873 {
11874 	struct sd_xbuf *xp;
11875 
11876 	ASSERT(un != NULL);
11877 	ASSERT(bp != NULL);
11878 
11879 	xp = SD_GET_XBUF(bp);
11880 	ASSERT(xp != NULL);
11881 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11882 
11883 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
11884 
11885 	mutex_enter(SD_MUTEX(un));
11886 
11887 	/*
11888 	 * Grab time when the cmd completed.
11889 	 * This is used for determining if the system has been
11890 	 * idle long enough to make it idle to the PM framework.
11891 	 * This is for lowering the overhead, and therefore improving
11892 	 * performance per I/O operation.
11893 	 */
11894 	un->un_pm_idle_time = ddi_get_time();
11895 
11896 	un->un_ncmds_in_driver--;
11897 	ASSERT(un->un_ncmds_in_driver >= 0);
11898 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
11899 	    un->un_ncmds_in_driver);
11900 
11901 	mutex_exit(SD_MUTEX(un));
11902 
11903 	kmem_free(xp, sizeof (struct sd_xbuf));
11904 	biodone(bp);
11905 
11906 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
11907 }
11908 
11909 
11910 /*
11911  *    Function: sd_mapblockaddr_iostart
11912  *
11913  * Description: Verify request lies withing the partition limits for
11914  *		the indicated minor device.  Issue "overrun" buf if
11915  *		request would exceed partition range.  Converts
11916  *		partition-relative block address to absolute.
11917  *
11918  *     Context: Can sleep
11919  *
11920  *      Issues: This follows what the old code did, in terms of accessing
11921  *		some of the partition info in the unit struct without holding
11922  *		the mutext.  This is a general issue, if the partition info
11923  *		can be altered while IO is in progress... as soon as we send
11924  *		a buf, its partitioning can be invalid before it gets to the
11925  *		device.  Probably the right fix is to move partitioning out
11926  *		of the driver entirely.
11927  */
11928 
11929 static void
11930 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
11931 {
11932 	daddr_t	nblocks;	/* #blocks in the given partition */
11933 	daddr_t	blocknum;	/* Block number specified by the buf */
11934 	size_t	requested_nblocks;
11935 	size_t	available_nblocks;
11936 	int	partition;
11937 	diskaddr_t	partition_offset;
11938 	struct sd_xbuf *xp;
11939 
11940 
11941 	ASSERT(un != NULL);
11942 	ASSERT(bp != NULL);
11943 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11944 
11945 	SD_TRACE(SD_LOG_IO_PARTITION, un,
11946 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
11947 
11948 	xp = SD_GET_XBUF(bp);
11949 	ASSERT(xp != NULL);
11950 
11951 	/*
11952 	 * If the geometry is not indicated as valid, attempt to access
11953 	 * the unit & verify the geometry/label. This can be the case for
11954 	 * removable-media devices, of if the device was opened in
11955 	 * NDELAY/NONBLOCK mode.
11956 	 */
11957 	if ((un->un_f_geometry_is_valid != TRUE) &&
11958 	    (sd_ready_and_valid(un) != SD_READY_VALID)) {
11959 		/*
11960 		 * For removable devices it is possible to start an I/O
11961 		 * without a media by opening the device in nodelay mode.
11962 		 * Also for writable CDs there can be many scenarios where
11963 		 * there is no geometry yet but volume manager is trying to
11964 		 * issue a read() just because it can see TOC on the CD. So
11965 		 * do not print a message for removables.
11966 		 */
11967 		if (!ISREMOVABLE(un)) {
11968 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
11969 			    "i/o to invalid geometry\n");
11970 		}
11971 		bioerror(bp, EIO);
11972 		bp->b_resid = bp->b_bcount;
11973 		SD_BEGIN_IODONE(index, un, bp);
11974 		return;
11975 	}
11976 
11977 	partition = SDPART(bp->b_edev);
11978 
11979 	/* #blocks in partition */
11980 	nblocks = un->un_map[partition].dkl_nblk;    /* #blocks in partition */
11981 
11982 	/* Use of a local variable potentially improves performance slightly */
11983 	partition_offset = un->un_offset[partition];
11984 
11985 	/*
11986 	 * blocknum is the starting block number of the request. At this
11987 	 * point it is still relative to the start of the minor device.
11988 	 */
11989 	blocknum = xp->xb_blkno;
11990 
11991 	/*
11992 	 * Legacy: If the starting block number is one past the last block
11993 	 * in the partition, do not set B_ERROR in the buf.
11994 	 */
11995 	if (blocknum == nblocks)  {
11996 		goto error_exit;
11997 	}
11998 
11999 	/*
12000 	 * Confirm that the first block of the request lies within the
12001 	 * partition limits. Also the requested number of bytes must be
12002 	 * a multiple of the system block size.
12003 	 */
12004 	if ((blocknum < 0) || (blocknum >= nblocks) ||
12005 	    ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
12006 		bp->b_flags |= B_ERROR;
12007 		goto error_exit;
12008 	}
12009 
12010 	/*
12011 	 * If the requsted # blocks exceeds the available # blocks, that
12012 	 * is an overrun of the partition.
12013 	 */
12014 	requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
12015 	available_nblocks = (size_t)(nblocks - blocknum);
12016 	ASSERT(nblocks >= blocknum);
12017 
12018 	if (requested_nblocks > available_nblocks) {
12019 		/*
12020 		 * Allocate an "overrun" buf to allow the request to proceed
12021 		 * for the amount of space available in the partition. The
12022 		 * amount not transferred will be added into the b_resid
12023 		 * when the operation is complete. The overrun buf
12024 		 * replaces the original buf here, and the original buf
12025 		 * is saved inside the overrun buf, for later use.
12026 		 */
12027 		size_t resid = SD_SYSBLOCKS2BYTES(un,
12028 		    (offset_t)(requested_nblocks - available_nblocks));
12029 		size_t count = bp->b_bcount - resid;
12030 		/*
12031 		 * Note: count is an unsigned entity thus it'll NEVER
12032 		 * be less than 0 so ASSERT the original values are
12033 		 * correct.
12034 		 */
12035 		ASSERT(bp->b_bcount >= resid);
12036 
12037 		bp = sd_bioclone_alloc(bp, count, blocknum,
12038 			(int (*)(struct buf *)) sd_mapblockaddr_iodone);
12039 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
12040 		ASSERT(xp != NULL);
12041 	}
12042 
12043 	/* At this point there should be no residual for this buf. */
12044 	ASSERT(bp->b_resid == 0);
12045 
12046 	/* Convert the block number to an absolute address. */
12047 	xp->xb_blkno += partition_offset;
12048 
12049 	SD_NEXT_IOSTART(index, un, bp);
12050 
12051 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12052 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
12053 
12054 	return;
12055 
12056 error_exit:
12057 	bp->b_resid = bp->b_bcount;
12058 	SD_BEGIN_IODONE(index, un, bp);
12059 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12060 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
12061 }
12062 
12063 
12064 /*
12065  *    Function: sd_mapblockaddr_iodone
12066  *
12067  * Description: Completion-side processing for partition management.
12068  *
12069  *     Context: May be called under interrupt context
12070  */
12071 
12072 static void
12073 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
12074 {
12075 	/* int	partition; */	/* Not used, see below. */
12076 	ASSERT(un != NULL);
12077 	ASSERT(bp != NULL);
12078 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12079 
12080 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12081 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
12082 
12083 	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
12084 		/*
12085 		 * We have an "overrun" buf to deal with...
12086 		 */
12087 		struct sd_xbuf	*xp;
12088 		struct buf	*obp;	/* ptr to the original buf */
12089 
12090 		xp = SD_GET_XBUF(bp);
12091 		ASSERT(xp != NULL);
12092 
12093 		/* Retrieve the pointer to the original buf */
12094 		obp = (struct buf *)xp->xb_private;
12095 		ASSERT(obp != NULL);
12096 
12097 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
12098 		bioerror(obp, bp->b_error);
12099 
12100 		sd_bioclone_free(bp);
12101 
12102 		/*
12103 		 * Get back the original buf.
12104 		 * Note that since the restoration of xb_blkno below
12105 		 * was removed, the sd_xbuf is not needed.
12106 		 */
12107 		bp = obp;
12108 		/*
12109 		 * xp = SD_GET_XBUF(bp);
12110 		 * ASSERT(xp != NULL);
12111 		 */
12112 	}
12113 
12114 	/*
12115 	 * Convert sd->xb_blkno back to a minor-device relative value.
12116 	 * Note: this has been commented out, as it is not needed in the
12117 	 * current implementation of the driver (ie, since this function
12118 	 * is at the top of the layering chains, so the info will be
12119 	 * discarded) and it is in the "hot" IO path.
12120 	 *
12121 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
12122 	 * xp->xb_blkno -= un->un_offset[partition];
12123 	 */
12124 
12125 	SD_NEXT_IODONE(index, un, bp);
12126 
12127 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12128 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
12129 }
12130 
12131 
12132 /*
12133  *    Function: sd_mapblocksize_iostart
12134  *
12135  * Description: Convert between system block size (un->un_sys_blocksize)
12136  *		and target block size (un->un_tgt_blocksize).
12137  *
12138  *     Context: Can sleep to allocate resources.
12139  *
12140  * Assumptions: A higher layer has already performed any partition validation,
12141  *		and converted the xp->xb_blkno to an absolute value relative
12142  *		to the start of the device.
12143  *
12144  *		It is also assumed that the higher layer has implemented
12145  *		an "overrun" mechanism for the case where the request would
12146  *		read/write beyond the end of a partition.  In this case we
12147  *		assume (and ASSERT) that bp->b_resid == 0.
12148  *
12149  *		Note: The implementation for this routine assumes the target
12150  *		block size remains constant between allocation and transport.
12151  */
12152 
12153 static void
12154 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
12155 {
12156 	struct sd_mapblocksize_info	*bsp;
12157 	struct sd_xbuf			*xp;
12158 	offset_t first_byte;
12159 	daddr_t	start_block, end_block;
12160 	daddr_t	request_bytes;
12161 	ushort_t is_aligned = FALSE;
12162 
12163 	ASSERT(un != NULL);
12164 	ASSERT(bp != NULL);
12165 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12166 	ASSERT(bp->b_resid == 0);
12167 
12168 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12169 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
12170 
12171 	/*
12172 	 * For a non-writable CD, a write request is an error
12173 	 */
12174 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
12175 	    (un->un_f_mmc_writable_media == FALSE)) {
12176 		bioerror(bp, EIO);
12177 		bp->b_resid = bp->b_bcount;
12178 		SD_BEGIN_IODONE(index, un, bp);
12179 		return;
12180 	}
12181 
12182 	/*
12183 	 * We do not need a shadow buf if the device is using
12184 	 * un->un_sys_blocksize as its block size or if bcount == 0.
12185 	 * In this case there is no layer-private data block allocated.
12186 	 */
12187 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12188 	    (bp->b_bcount == 0)) {
12189 		goto done;
12190 	}
12191 
12192 #if defined(__i386) || defined(__amd64)
12193 	/* We do not support non-block-aligned transfers for ROD devices */
12194 	ASSERT(!ISROD(un));
12195 #endif
12196 
12197 	xp = SD_GET_XBUF(bp);
12198 	ASSERT(xp != NULL);
12199 
12200 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12201 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
12202 	    un->un_tgt_blocksize, un->un_sys_blocksize);
12203 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12204 	    "request start block:0x%x\n", xp->xb_blkno);
12205 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12206 	    "request len:0x%x\n", bp->b_bcount);
12207 
12208 	/*
12209 	 * Allocate the layer-private data area for the mapblocksize layer.
12210 	 * Layers are allowed to use the xp_private member of the sd_xbuf
12211 	 * struct to store the pointer to their layer-private data block, but
12212 	 * each layer also has the responsibility of restoring the prior
12213 	 * contents of xb_private before returning the buf/xbuf to the
12214 	 * higher layer that sent it.
12215 	 *
12216 	 * Here we save the prior contents of xp->xb_private into the
12217 	 * bsp->mbs_oprivate field of our layer-private data area. This value
12218 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
12219 	 * the layer-private area and returning the buf/xbuf to the layer
12220 	 * that sent it.
12221 	 *
12222 	 * Note that here we use kmem_zalloc for the allocation as there are
12223 	 * parts of the mapblocksize code that expect certain fields to be
12224 	 * zero unless explicitly set to a required value.
12225 	 */
12226 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12227 	bsp->mbs_oprivate = xp->xb_private;
12228 	xp->xb_private = bsp;
12229 
12230 	/*
12231 	 * This treats the data on the disk (target) as an array of bytes.
12232 	 * first_byte is the byte offset, from the beginning of the device,
12233 	 * to the location of the request. This is converted from a
12234 	 * un->un_sys_blocksize block address to a byte offset, and then back
12235 	 * to a block address based upon a un->un_tgt_blocksize block size.
12236 	 *
12237 	 * xp->xb_blkno should be absolute upon entry into this function,
12238 	 * but, but it is based upon partitions that use the "system"
12239 	 * block size. It must be adjusted to reflect the block size of
12240 	 * the target.
12241 	 *
12242 	 * Note that end_block is actually the block that follows the last
12243 	 * block of the request, but that's what is needed for the computation.
12244 	 */
12245 	first_byte  = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12246 	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
12247 	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
12248 	    un->un_tgt_blocksize;
12249 
12250 	/* request_bytes is rounded up to a multiple of the target block size */
12251 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
12252 
12253 	/*
12254 	 * See if the starting address of the request and the request
12255 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
12256 	 * then we do not need to allocate a shadow buf to handle the request.
12257 	 */
12258 	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
12259 	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
12260 		is_aligned = TRUE;
12261 	}
12262 
12263 	if ((bp->b_flags & B_READ) == 0) {
12264 		/*
12265 		 * Lock the range for a write operation. An aligned request is
12266 		 * considered a simple write; otherwise the request must be a
12267 		 * read-modify-write.
12268 		 */
12269 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
12270 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
12271 	}
12272 
12273 	/*
12274 	 * Alloc a shadow buf if the request is not aligned. Also, this is
12275 	 * where the READ command is generated for a read-modify-write. (The
12276 	 * write phase is deferred until after the read completes.)
12277 	 */
12278 	if (is_aligned == FALSE) {
12279 
12280 		struct sd_mapblocksize_info	*shadow_bsp;
12281 		struct sd_xbuf	*shadow_xp;
12282 		struct buf	*shadow_bp;
12283 
12284 		/*
12285 		 * Allocate the shadow buf and it associated xbuf. Note that
12286 		 * after this call the xb_blkno value in both the original
12287 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
12288 		 * same: absolute relative to the start of the device, and
12289 		 * adjusted for the target block size. The b_blkno in the
12290 		 * shadow buf will also be set to this value. We should never
12291 		 * change b_blkno in the original bp however.
12292 		 *
12293 		 * Note also that the shadow buf will always need to be a
12294 		 * READ command, regardless of whether the incoming command
12295 		 * is a READ or a WRITE.
12296 		 */
12297 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
12298 		    xp->xb_blkno,
12299 		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
12300 
12301 		shadow_xp = SD_GET_XBUF(shadow_bp);
12302 
12303 		/*
12304 		 * Allocate the layer-private data for the shadow buf.
12305 		 * (No need to preserve xb_private in the shadow xbuf.)
12306 		 */
12307 		shadow_xp->xb_private = shadow_bsp =
12308 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12309 
12310 		/*
12311 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
12312 		 * to figure out where the start of the user data is (based upon
12313 		 * the system block size) in the data returned by the READ
12314 		 * command (which will be based upon the target blocksize). Note
12315 		 * that this is only really used if the request is unaligned.
12316 		 */
12317 		bsp->mbs_copy_offset = (ssize_t)(first_byte -
12318 		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
12319 		ASSERT((bsp->mbs_copy_offset >= 0) &&
12320 		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
12321 
12322 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
12323 
12324 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
12325 
12326 		/* Transfer the wmap (if any) to the shadow buf */
12327 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
12328 		bsp->mbs_wmp = NULL;
12329 
12330 		/*
12331 		 * The shadow buf goes on from here in place of the
12332 		 * original buf.
12333 		 */
12334 		shadow_bsp->mbs_orig_bp = bp;
12335 		bp = shadow_bp;
12336 	}
12337 
12338 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12339 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
12340 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12341 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
12342 	    request_bytes);
12343 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12344 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
12345 
12346 done:
12347 	SD_NEXT_IOSTART(index, un, bp);
12348 
12349 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12350 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
12351 }
12352 
12353 
12354 /*
12355  *    Function: sd_mapblocksize_iodone
12356  *
12357  * Description: Completion side processing for block-size mapping.
12358  *
12359  *     Context: May be called under interrupt context
12360  */
12361 
12362 static void
12363 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
12364 {
12365 	struct sd_mapblocksize_info	*bsp;
12366 	struct sd_xbuf	*xp;
12367 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
12368 	struct buf	*orig_bp;	/* ptr to the original buf */
12369 	offset_t	shadow_end;
12370 	offset_t	request_end;
12371 	offset_t	shadow_start;
12372 	ssize_t		copy_offset;
12373 	size_t		copy_length;
12374 	size_t		shortfall;
12375 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
12376 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
12377 
12378 	ASSERT(un != NULL);
12379 	ASSERT(bp != NULL);
12380 
12381 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12382 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
12383 
12384 	/*
12385 	 * There is no shadow buf or layer-private data if the target is
12386 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
12387 	 */
12388 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12389 	    (bp->b_bcount == 0)) {
12390 		goto exit;
12391 	}
12392 
12393 	xp = SD_GET_XBUF(bp);
12394 	ASSERT(xp != NULL);
12395 
12396 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
12397 	bsp = xp->xb_private;
12398 
12399 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
12400 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
12401 
12402 	if (is_write) {
12403 		/*
12404 		 * For a WRITE request we must free up the block range that
12405 		 * we have locked up.  This holds regardless of whether this is
12406 		 * an aligned write request or a read-modify-write request.
12407 		 */
12408 		sd_range_unlock(un, bsp->mbs_wmp);
12409 		bsp->mbs_wmp = NULL;
12410 	}
12411 
12412 	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
12413 		/*
12414 		 * An aligned read or write command will have no shadow buf;
12415 		 * there is not much else to do with it.
12416 		 */
12417 		goto done;
12418 	}
12419 
12420 	orig_bp = bsp->mbs_orig_bp;
12421 	ASSERT(orig_bp != NULL);
12422 	orig_xp = SD_GET_XBUF(orig_bp);
12423 	ASSERT(orig_xp != NULL);
12424 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12425 
12426 	if (!is_write && has_wmap) {
12427 		/*
12428 		 * A READ with a wmap means this is the READ phase of a
12429 		 * read-modify-write. If an error occurred on the READ then
12430 		 * we do not proceed with the WRITE phase or copy any data.
12431 		 * Just release the write maps and return with an error.
12432 		 */
12433 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
12434 			orig_bp->b_resid = orig_bp->b_bcount;
12435 			bioerror(orig_bp, bp->b_error);
12436 			sd_range_unlock(un, bsp->mbs_wmp);
12437 			goto freebuf_done;
12438 		}
12439 	}
12440 
12441 	/*
12442 	 * Here is where we set up to copy the data from the shadow buf
12443 	 * into the space associated with the original buf.
12444 	 *
12445 	 * To deal with the conversion between block sizes, these
12446 	 * computations treat the data as an array of bytes, with the
12447 	 * first byte (byte 0) corresponding to the first byte in the
12448 	 * first block on the disk.
12449 	 */
12450 
12451 	/*
12452 	 * shadow_start and shadow_len indicate the location and size of
12453 	 * the data returned with the shadow IO request.
12454 	 */
12455 	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12456 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
12457 
12458 	/*
12459 	 * copy_offset gives the offset (in bytes) from the start of the first
12460 	 * block of the READ request to the beginning of the data.  We retrieve
12461 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
12462 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
12463 	 * data to be copied (in bytes).
12464 	 */
12465 	copy_offset  = bsp->mbs_copy_offset;
12466 	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
12467 	copy_length  = orig_bp->b_bcount;
12468 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
12469 
12470 	/*
12471 	 * Set up the resid and error fields of orig_bp as appropriate.
12472 	 */
12473 	if (shadow_end >= request_end) {
12474 		/* We got all the requested data; set resid to zero */
12475 		orig_bp->b_resid = 0;
12476 	} else {
12477 		/*
12478 		 * We failed to get enough data to fully satisfy the original
12479 		 * request. Just copy back whatever data we got and set
12480 		 * up the residual and error code as required.
12481 		 *
12482 		 * 'shortfall' is the amount by which the data received with the
12483 		 * shadow buf has "fallen short" of the requested amount.
12484 		 */
12485 		shortfall = (size_t)(request_end - shadow_end);
12486 
12487 		if (shortfall > orig_bp->b_bcount) {
12488 			/*
12489 			 * We did not get enough data to even partially
12490 			 * fulfill the original request.  The residual is
12491 			 * equal to the amount requested.
12492 			 */
12493 			orig_bp->b_resid = orig_bp->b_bcount;
12494 		} else {
12495 			/*
12496 			 * We did not get all the data that we requested
12497 			 * from the device, but we will try to return what
12498 			 * portion we did get.
12499 			 */
12500 			orig_bp->b_resid = shortfall;
12501 		}
12502 		ASSERT(copy_length >= orig_bp->b_resid);
12503 		copy_length  -= orig_bp->b_resid;
12504 	}
12505 
12506 	/* Propagate the error code from the shadow buf to the original buf */
12507 	bioerror(orig_bp, bp->b_error);
12508 
12509 	if (is_write) {
12510 		goto freebuf_done;	/* No data copying for a WRITE */
12511 	}
12512 
12513 	if (has_wmap) {
12514 		/*
12515 		 * This is a READ command from the READ phase of a
12516 		 * read-modify-write request. We have to copy the data given
12517 		 * by the user OVER the data returned by the READ command,
12518 		 * then convert the command from a READ to a WRITE and send
12519 		 * it back to the target.
12520 		 */
12521 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
12522 		    copy_length);
12523 
12524 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
12525 
12526 		/*
12527 		 * Dispatch the WRITE command to the taskq thread, which
12528 		 * will in turn send the command to the target. When the
12529 		 * WRITE command completes, we (sd_mapblocksize_iodone())
12530 		 * will get called again as part of the iodone chain
12531 		 * processing for it. Note that we will still be dealing
12532 		 * with the shadow buf at that point.
12533 		 */
12534 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
12535 		    KM_NOSLEEP) != 0) {
12536 			/*
12537 			 * Dispatch was successful so we are done. Return
12538 			 * without going any higher up the iodone chain. Do
12539 			 * not free up any layer-private data until after the
12540 			 * WRITE completes.
12541 			 */
12542 			return;
12543 		}
12544 
12545 		/*
12546 		 * Dispatch of the WRITE command failed; set up the error
12547 		 * condition and send this IO back up the iodone chain.
12548 		 */
12549 		bioerror(orig_bp, EIO);
12550 		orig_bp->b_resid = orig_bp->b_bcount;
12551 
12552 	} else {
12553 		/*
12554 		 * This is a regular READ request (ie, not a RMW). Copy the
12555 		 * data from the shadow buf into the original buf. The
12556 		 * copy_offset compensates for any "misalignment" between the
12557 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
12558 		 * original buf (with its un->un_sys_blocksize blocks).
12559 		 */
12560 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
12561 		    copy_length);
12562 	}
12563 
12564 freebuf_done:
12565 
12566 	/*
12567 	 * At this point we still have both the shadow buf AND the original
12568 	 * buf to deal with, as well as the layer-private data area in each.
12569 	 * Local variables are as follows:
12570 	 *
12571 	 * bp -- points to shadow buf
12572 	 * xp -- points to xbuf of shadow buf
12573 	 * bsp -- points to layer-private data area of shadow buf
12574 	 * orig_bp -- points to original buf
12575 	 *
12576 	 * First free the shadow buf and its associated xbuf, then free the
12577 	 * layer-private data area from the shadow buf. There is no need to
12578 	 * restore xb_private in the shadow xbuf.
12579 	 */
12580 	sd_shadow_buf_free(bp);
12581 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12582 
12583 	/*
12584 	 * Now update the local variables to point to the original buf, xbuf,
12585 	 * and layer-private area.
12586 	 */
12587 	bp = orig_bp;
12588 	xp = SD_GET_XBUF(bp);
12589 	ASSERT(xp != NULL);
12590 	ASSERT(xp == orig_xp);
12591 	bsp = xp->xb_private;
12592 	ASSERT(bsp != NULL);
12593 
12594 done:
12595 	/*
12596 	 * Restore xb_private to whatever it was set to by the next higher
12597 	 * layer in the chain, then free the layer-private data area.
12598 	 */
12599 	xp->xb_private = bsp->mbs_oprivate;
12600 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12601 
12602 exit:
12603 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
12604 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
12605 
12606 	SD_NEXT_IODONE(index, un, bp);
12607 }
12608 
12609 
12610 /*
12611  *    Function: sd_checksum_iostart
12612  *
12613  * Description: A stub function for a layer that's currently not used.
12614  *		For now just a placeholder.
12615  *
12616  *     Context: Kernel thread context
12617  */
12618 
12619 static void
12620 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
12621 {
12622 	ASSERT(un != NULL);
12623 	ASSERT(bp != NULL);
12624 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12625 	SD_NEXT_IOSTART(index, un, bp);
12626 }
12627 
12628 
12629 /*
12630  *    Function: sd_checksum_iodone
12631  *
12632  * Description: A stub function for a layer that's currently not used.
12633  *		For now just a placeholder.
12634  *
12635  *     Context: May be called under interrupt context
12636  */
12637 
12638 static void
12639 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
12640 {
12641 	ASSERT(un != NULL);
12642 	ASSERT(bp != NULL);
12643 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12644 	SD_NEXT_IODONE(index, un, bp);
12645 }
12646 
12647 
12648 /*
12649  *    Function: sd_checksum_uscsi_iostart
12650  *
12651  * Description: A stub function for a layer that's currently not used.
12652  *		For now just a placeholder.
12653  *
12654  *     Context: Kernel thread context
12655  */
12656 
12657 static void
12658 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
12659 {
12660 	ASSERT(un != NULL);
12661 	ASSERT(bp != NULL);
12662 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12663 	SD_NEXT_IOSTART(index, un, bp);
12664 }
12665 
12666 
12667 /*
12668  *    Function: sd_checksum_uscsi_iodone
12669  *
12670  * Description: A stub function for a layer that's currently not used.
12671  *		For now just a placeholder.
12672  *
12673  *     Context: May be called under interrupt context
12674  */
12675 
12676 static void
12677 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12678 {
12679 	ASSERT(un != NULL);
12680 	ASSERT(bp != NULL);
12681 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12682 	SD_NEXT_IODONE(index, un, bp);
12683 }
12684 
12685 
12686 /*
12687  *    Function: sd_pm_iostart
12688  *
12689  * Description: iostart-side routine for Power mangement.
12690  *
12691  *     Context: Kernel thread context
12692  */
12693 
12694 static void
12695 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
12696 {
12697 	ASSERT(un != NULL);
12698 	ASSERT(bp != NULL);
12699 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12700 	ASSERT(!mutex_owned(&un->un_pm_mutex));
12701 
12702 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
12703 
12704 	if (sd_pm_entry(un) != DDI_SUCCESS) {
12705 		/*
12706 		 * Set up to return the failed buf back up the 'iodone'
12707 		 * side of the calling chain.
12708 		 */
12709 		bioerror(bp, EIO);
12710 		bp->b_resid = bp->b_bcount;
12711 
12712 		SD_BEGIN_IODONE(index, un, bp);
12713 
12714 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
12715 		return;
12716 	}
12717 
12718 	SD_NEXT_IOSTART(index, un, bp);
12719 
12720 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
12721 }
12722 
12723 
12724 /*
12725  *    Function: sd_pm_iodone
12726  *
12727  * Description: iodone-side routine for power mangement.
12728  *
12729  *     Context: may be called from interrupt context
12730  */
12731 
12732 static void
12733 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
12734 {
12735 	ASSERT(un != NULL);
12736 	ASSERT(bp != NULL);
12737 	ASSERT(!mutex_owned(&un->un_pm_mutex));
12738 
12739 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
12740 
12741 	/*
12742 	 * After attach the following flag is only read, so don't
12743 	 * take the penalty of acquiring a mutex for it.
12744 	 */
12745 	if (un->un_f_pm_is_enabled == TRUE) {
12746 		sd_pm_exit(un);
12747 	}
12748 
12749 	SD_NEXT_IODONE(index, un, bp);
12750 
12751 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
12752 }
12753 
12754 
12755 /*
12756  *    Function: sd_core_iostart
12757  *
12758  * Description: Primary driver function for enqueuing buf(9S) structs from
12759  *		the system and initiating IO to the target device
12760  *
12761  *     Context: Kernel thread context. Can sleep.
12762  *
12763  * Assumptions:  - The given xp->xb_blkno is absolute
12764  *		   (ie, relative to the start of the device).
12765  *		 - The IO is to be done using the native blocksize of
12766  *		   the device, as specified in un->un_tgt_blocksize.
12767  */
12768 /* ARGSUSED */
12769 static void
12770 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
12771 {
12772 	struct sd_xbuf *xp;
12773 
12774 	ASSERT(un != NULL);
12775 	ASSERT(bp != NULL);
12776 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12777 	ASSERT(bp->b_resid == 0);
12778 
12779 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
12780 
12781 	xp = SD_GET_XBUF(bp);
12782 	ASSERT(xp != NULL);
12783 
12784 	mutex_enter(SD_MUTEX(un));
12785 
12786 	/*
12787 	 * If we are currently in the failfast state, fail any new IO
12788 	 * that has B_FAILFAST set, then return.
12789 	 */
12790 	if ((bp->b_flags & B_FAILFAST) &&
12791 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
12792 		mutex_exit(SD_MUTEX(un));
12793 		bioerror(bp, EIO);
12794 		bp->b_resid = bp->b_bcount;
12795 		SD_BEGIN_IODONE(index, un, bp);
12796 		return;
12797 	}
12798 
12799 	if (SD_IS_DIRECT_PRIORITY(xp)) {
12800 		/*
12801 		 * Priority command -- transport it immediately.
12802 		 *
12803 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
12804 		 * because all direct priority commands should be associated
12805 		 * with error recovery actions which we don't want to retry.
12806 		 */
12807 		sd_start_cmds(un, bp);
12808 	} else {
12809 		/*
12810 		 * Normal command -- add it to the wait queue, then start
12811 		 * transporting commands from the wait queue.
12812 		 */
12813 		sd_add_buf_to_waitq(un, bp);
12814 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
12815 		sd_start_cmds(un, NULL);
12816 	}
12817 
12818 	mutex_exit(SD_MUTEX(un));
12819 
12820 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
12821 }
12822 
12823 
12824 /*
12825  *    Function: sd_init_cdb_limits
12826  *
12827  * Description: This is to handle scsi_pkt initialization differences
12828  *		between the driver platforms.
12829  *
12830  *		Legacy behaviors:
12831  *
12832  *		If the block number or the sector count exceeds the
12833  *		capabilities of a Group 0 command, shift over to a
12834  *		Group 1 command. We don't blindly use Group 1
12835  *		commands because a) some drives (CDC Wren IVs) get a
12836  *		bit confused, and b) there is probably a fair amount
12837  *		of speed difference for a target to receive and decode
12838  *		a 10 byte command instead of a 6 byte command.
12839  *
12840  *		The xfer time difference of 6 vs 10 byte CDBs is
12841  *		still significant so this code is still worthwhile.
12842  *		10 byte CDBs are very inefficient with the fas HBA driver
12843  *		and older disks. Each CDB byte took 1 usec with some
12844  *		popular disks.
12845  *
12846  *     Context: Must be called at attach time
12847  */
12848 
12849 static void
12850 sd_init_cdb_limits(struct sd_lun *un)
12851 {
12852 	/*
12853 	 * Use CDB_GROUP1 commands for most devices except for
12854 	 * parallel SCSI fixed drives in which case we get better
12855 	 * performance using CDB_GROUP0 commands (where applicable).
12856 	 */
12857 	un->un_mincdb = SD_CDB_GROUP1;
12858 #if !defined(__fibre)
12859 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
12860 	    !ISREMOVABLE(un)) {
12861 		un->un_mincdb = SD_CDB_GROUP0;
12862 	}
12863 #endif
12864 
12865 	/*
12866 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
12867 	 * commands for fixed disks unless we are building for a 32 bit
12868 	 * kernel.
12869 	 */
12870 #ifdef _LP64
12871 	un->un_maxcdb = (ISREMOVABLE(un)) ? SD_CDB_GROUP5 : SD_CDB_GROUP4;
12872 #else
12873 	un->un_maxcdb = (ISREMOVABLE(un)) ? SD_CDB_GROUP5 : SD_CDB_GROUP1;
12874 #endif
12875 
12876 	/*
12877 	 * x86 systems require the PKT_DMA_PARTIAL flag
12878 	 */
12879 #if defined(__x86)
12880 	un->un_pkt_flags = PKT_DMA_PARTIAL;
12881 #else
12882 	un->un_pkt_flags = 0;
12883 #endif
12884 
12885 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
12886 	    ? sizeof (struct scsi_arq_status) : 1);
12887 	un->un_cmd_timeout = (ushort_t)sd_io_time;
12888 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
12889 }
12890 
12891 
12892 /*
12893  *    Function: sd_initpkt_for_buf
12894  *
12895  * Description: Allocate and initialize for transport a scsi_pkt struct,
12896  *		based upon the info specified in the given buf struct.
12897  *
12898  *		Assumes the xb_blkno in the request is absolute (ie,
12899  *		relative to the start of the device (NOT partition!).
12900  *		Also assumes that the request is using the native block
12901  *		size of the device (as returned by the READ CAPACITY
12902  *		command).
12903  *
12904  * Return Code: SD_PKT_ALLOC_SUCCESS
12905  *		SD_PKT_ALLOC_FAILURE
12906  *		SD_PKT_ALLOC_FAILURE_NO_DMA
12907  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
12908  *
12909  *     Context: Kernel thread and may be called from software interrupt context
12910  *		as part of a sdrunout callback. This function may not block or
12911  *		call routines that block
12912  */
12913 
12914 static int
12915 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
12916 {
12917 	struct sd_xbuf	*xp;
12918 	struct scsi_pkt *pktp = NULL;
12919 	struct sd_lun	*un;
12920 	size_t		blockcount;
12921 	daddr_t		startblock;
12922 	int		rval;
12923 	int		cmd_flags;
12924 
12925 	ASSERT(bp != NULL);
12926 	ASSERT(pktpp != NULL);
12927 	xp = SD_GET_XBUF(bp);
12928 	ASSERT(xp != NULL);
12929 	un = SD_GET_UN(bp);
12930 	ASSERT(un != NULL);
12931 	ASSERT(mutex_owned(SD_MUTEX(un)));
12932 	ASSERT(bp->b_resid == 0);
12933 
12934 	SD_TRACE(SD_LOG_IO_CORE, un,
12935 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
12936 
12937 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12938 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
12939 		/*
12940 		 * Already have a scsi_pkt -- just need DMA resources.
12941 		 * We must recompute the CDB in case the mapping returns
12942 		 * a nonzero pkt_resid.
12943 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
12944 		 * that is being retried, the unmap/remap of the DMA resouces
12945 		 * will result in the entire transfer starting over again
12946 		 * from the very first block.
12947 		 */
12948 		ASSERT(xp->xb_pktp != NULL);
12949 		pktp = xp->xb_pktp;
12950 	} else {
12951 		pktp = NULL;
12952 	}
12953 #endif /* __i386 || __amd64 */
12954 
12955 	startblock = xp->xb_blkno;	/* Absolute block num. */
12956 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
12957 
12958 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12959 
12960 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
12961 
12962 #else
12963 
12964 	cmd_flags = un->un_pkt_flags | xp->xb_pkt_flags;
12965 
12966 #endif
12967 
12968 	/*
12969 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
12970 	 * call scsi_init_pkt, and build the CDB.
12971 	 */
12972 	rval = sd_setup_rw_pkt(un, &pktp, bp,
12973 	    cmd_flags, sdrunout, (caddr_t)un,
12974 	    startblock, blockcount);
12975 
12976 	if (rval == 0) {
12977 		/*
12978 		 * Success.
12979 		 *
12980 		 * If partial DMA is being used and required for this transfer.
12981 		 * set it up here.
12982 		 */
12983 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
12984 		    (pktp->pkt_resid != 0)) {
12985 
12986 			/*
12987 			 * Save the CDB length and pkt_resid for the
12988 			 * next xfer
12989 			 */
12990 			xp->xb_dma_resid = pktp->pkt_resid;
12991 
12992 			/* rezero resid */
12993 			pktp->pkt_resid = 0;
12994 
12995 		} else {
12996 			xp->xb_dma_resid = 0;
12997 		}
12998 
12999 		pktp->pkt_flags = un->un_tagflags;
13000 		pktp->pkt_time  = un->un_cmd_timeout;
13001 		pktp->pkt_comp  = sdintr;
13002 
13003 		pktp->pkt_private = bp;
13004 		*pktpp = pktp;
13005 
13006 		SD_TRACE(SD_LOG_IO_CORE, un,
13007 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
13008 
13009 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13010 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
13011 #endif
13012 
13013 		return (SD_PKT_ALLOC_SUCCESS);
13014 
13015 	}
13016 
13017 	/*
13018 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
13019 	 * from sd_setup_rw_pkt.
13020 	 */
13021 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
13022 
13023 	if (rval == SD_PKT_ALLOC_FAILURE) {
13024 		*pktpp = NULL;
13025 		/*
13026 		 * Set the driver state to RWAIT to indicate the driver
13027 		 * is waiting on resource allocations. The driver will not
13028 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13029 		 */
13030 		New_state(un, SD_STATE_RWAIT);
13031 
13032 		SD_ERROR(SD_LOG_IO_CORE, un,
13033 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
13034 
13035 		if ((bp->b_flags & B_ERROR) != 0) {
13036 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13037 		}
13038 		return (SD_PKT_ALLOC_FAILURE);
13039 	} else {
13040 		/*
13041 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13042 		 *
13043 		 * This should never happen.  Maybe someone messed with the
13044 		 * kernel's minphys?
13045 		 */
13046 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13047 		    "Request rejected: too large for CDB: "
13048 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
13049 		SD_ERROR(SD_LOG_IO_CORE, un,
13050 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
13051 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13052 
13053 	}
13054 }
13055 
13056 
13057 /*
13058  *    Function: sd_destroypkt_for_buf
13059  *
13060  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
13061  *
13062  *     Context: Kernel thread or interrupt context
13063  */
13064 
13065 static void
13066 sd_destroypkt_for_buf(struct buf *bp)
13067 {
13068 	ASSERT(bp != NULL);
13069 	ASSERT(SD_GET_UN(bp) != NULL);
13070 
13071 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13072 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
13073 
13074 	ASSERT(SD_GET_PKTP(bp) != NULL);
13075 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13076 
13077 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13078 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
13079 }
13080 
13081 /*
13082  *    Function: sd_setup_rw_pkt
13083  *
13084  * Description: Determines appropriate CDB group for the requested LBA
13085  *		and transfer length, calls scsi_init_pkt, and builds
13086  *		the CDB.  Do not use for partial DMA transfers except
13087  *		for the initial transfer since the CDB size must
13088  *		remain constant.
13089  *
13090  *     Context: Kernel thread and may be called from software interrupt
13091  *		context as part of a sdrunout callback. This function may not
13092  *		block or call routines that block
13093  */
13094 
13095 
13096 int
13097 sd_setup_rw_pkt(struct sd_lun *un,
13098     struct scsi_pkt **pktpp, struct buf *bp, int flags,
13099     int (*callback)(caddr_t), caddr_t callback_arg,
13100     diskaddr_t lba, uint32_t blockcount)
13101 {
13102 	struct scsi_pkt *return_pktp;
13103 	union scsi_cdb *cdbp;
13104 	struct sd_cdbinfo *cp = NULL;
13105 	int i;
13106 
13107 	/*
13108 	 * See which size CDB to use, based upon the request.
13109 	 */
13110 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
13111 
13112 		/*
13113 		 * Check lba and block count against sd_cdbtab limits.
13114 		 * In the partial DMA case, we have to use the same size
13115 		 * CDB for all the transfers.  Check lba + blockcount
13116 		 * against the max LBA so we know that segment of the
13117 		 * transfer can use the CDB we select.
13118 		 */
13119 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
13120 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
13121 
13122 			/*
13123 			 * The command will fit into the CDB type
13124 			 * specified by sd_cdbtab[i].
13125 			 */
13126 			cp = sd_cdbtab + i;
13127 
13128 			/*
13129 			 * Call scsi_init_pkt so we can fill in the
13130 			 * CDB.
13131 			 */
13132 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
13133 			    bp, cp->sc_grpcode, un->un_status_len, 0,
13134 			    flags, callback, callback_arg);
13135 
13136 			if (return_pktp != NULL) {
13137 
13138 				/*
13139 				 * Return new value of pkt
13140 				 */
13141 				*pktpp = return_pktp;
13142 
13143 				/*
13144 				 * To be safe, zero the CDB insuring there is
13145 				 * no leftover data from a previous command.
13146 				 */
13147 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
13148 
13149 				/*
13150 				 * Handle partial DMA mapping
13151 				 */
13152 				if (return_pktp->pkt_resid != 0) {
13153 
13154 					/*
13155 					 * Not going to xfer as many blocks as
13156 					 * originally expected
13157 					 */
13158 					blockcount -=
13159 					    SD_BYTES2TGTBLOCKS(un,
13160 						return_pktp->pkt_resid);
13161 				}
13162 
13163 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
13164 
13165 				/*
13166 				 * Set command byte based on the CDB
13167 				 * type we matched.
13168 				 */
13169 				cdbp->scc_cmd = cp->sc_grpmask |
13170 				    ((bp->b_flags & B_READ) ?
13171 					SCMD_READ : SCMD_WRITE);
13172 
13173 				SD_FILL_SCSI1_LUN(un, return_pktp);
13174 
13175 				/*
13176 				 * Fill in LBA and length
13177 				 */
13178 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
13179 				    (cp->sc_grpcode == CDB_GROUP4) ||
13180 				    (cp->sc_grpcode == CDB_GROUP0) ||
13181 				    (cp->sc_grpcode == CDB_GROUP5));
13182 
13183 				if (cp->sc_grpcode == CDB_GROUP1) {
13184 					FORMG1ADDR(cdbp, lba);
13185 					FORMG1COUNT(cdbp, blockcount);
13186 					return (0);
13187 				} else if (cp->sc_grpcode == CDB_GROUP4) {
13188 					FORMG4LONGADDR(cdbp, lba);
13189 					FORMG4COUNT(cdbp, blockcount);
13190 					return (0);
13191 				} else if (cp->sc_grpcode == CDB_GROUP0) {
13192 					FORMG0ADDR(cdbp, lba);
13193 					FORMG0COUNT(cdbp, blockcount);
13194 					return (0);
13195 				} else if (cp->sc_grpcode == CDB_GROUP5) {
13196 					FORMG5ADDR(cdbp, lba);
13197 					FORMG5COUNT(cdbp, blockcount);
13198 					return (0);
13199 				}
13200 
13201 				/*
13202 				 * It should be impossible to not match one
13203 				 * of the CDB types above, so we should never
13204 				 * reach this point.  Set the CDB command byte
13205 				 * to test-unit-ready to avoid writing
13206 				 * to somewhere we don't intend.
13207 				 */
13208 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
13209 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13210 			} else {
13211 				/*
13212 				 * Couldn't get scsi_pkt
13213 				 */
13214 				return (SD_PKT_ALLOC_FAILURE);
13215 			}
13216 		}
13217 	}
13218 
13219 	/*
13220 	 * None of the available CDB types were suitable.  This really
13221 	 * should never happen:  on a 64 bit system we support
13222 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
13223 	 * and on a 32 bit system we will refuse to bind to a device
13224 	 * larger than 2TB so addresses will never be larger than 32 bits.
13225 	 */
13226 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13227 }
13228 
13229 #if defined(__i386) || defined(__amd64)
13230 /*
13231  *    Function: sd_setup_next_rw_pkt
13232  *
13233  * Description: Setup packet for partial DMA transfers, except for the
13234  * 		initial transfer.  sd_setup_rw_pkt should be used for
13235  *		the initial transfer.
13236  *
13237  *     Context: Kernel thread and may be called from interrupt context.
13238  */
13239 
13240 int
13241 sd_setup_next_rw_pkt(struct sd_lun *un,
13242     struct scsi_pkt *pktp, struct buf *bp,
13243     diskaddr_t lba, uint32_t blockcount)
13244 {
13245 	uchar_t com;
13246 	union scsi_cdb *cdbp;
13247 	uchar_t cdb_group_id;
13248 
13249 	ASSERT(pktp != NULL);
13250 	ASSERT(pktp->pkt_cdbp != NULL);
13251 
13252 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
13253 	com = cdbp->scc_cmd;
13254 	cdb_group_id = CDB_GROUPID(com);
13255 
13256 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
13257 	    (cdb_group_id == CDB_GROUPID_1) ||
13258 	    (cdb_group_id == CDB_GROUPID_4) ||
13259 	    (cdb_group_id == CDB_GROUPID_5));
13260 
13261 	/*
13262 	 * Move pkt to the next portion of the xfer.
13263 	 * func is NULL_FUNC so we do not have to release
13264 	 * the disk mutex here.
13265 	 */
13266 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
13267 	    NULL_FUNC, NULL) == pktp) {
13268 		/* Success.  Handle partial DMA */
13269 		if (pktp->pkt_resid != 0) {
13270 			blockcount -=
13271 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
13272 		}
13273 
13274 		cdbp->scc_cmd = com;
13275 		SD_FILL_SCSI1_LUN(un, pktp);
13276 		if (cdb_group_id == CDB_GROUPID_1) {
13277 			FORMG1ADDR(cdbp, lba);
13278 			FORMG1COUNT(cdbp, blockcount);
13279 			return (0);
13280 		} else if (cdb_group_id == CDB_GROUPID_4) {
13281 			FORMG4LONGADDR(cdbp, lba);
13282 			FORMG4COUNT(cdbp, blockcount);
13283 			return (0);
13284 		} else if (cdb_group_id == CDB_GROUPID_0) {
13285 			FORMG0ADDR(cdbp, lba);
13286 			FORMG0COUNT(cdbp, blockcount);
13287 			return (0);
13288 		} else if (cdb_group_id == CDB_GROUPID_5) {
13289 			FORMG5ADDR(cdbp, lba);
13290 			FORMG5COUNT(cdbp, blockcount);
13291 			return (0);
13292 		}
13293 
13294 		/* Unreachable */
13295 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13296 	}
13297 
13298 	/*
13299 	 * Error setting up next portion of cmd transfer.
13300 	 * Something is definitely very wrong and this
13301 	 * should not happen.
13302 	 */
13303 	return (SD_PKT_ALLOC_FAILURE);
13304 }
13305 #endif /* defined(__i386) || defined(__amd64) */
13306 
13307 /*
13308  *    Function: sd_initpkt_for_uscsi
13309  *
13310  * Description: Allocate and initialize for transport a scsi_pkt struct,
13311  *		based upon the info specified in the given uscsi_cmd struct.
13312  *
13313  * Return Code: SD_PKT_ALLOC_SUCCESS
13314  *		SD_PKT_ALLOC_FAILURE
13315  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13316  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13317  *
13318  *     Context: Kernel thread and may be called from software interrupt context
13319  *		as part of a sdrunout callback. This function may not block or
13320  *		call routines that block
13321  */
13322 
13323 static int
13324 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
13325 {
13326 	struct uscsi_cmd *uscmd;
13327 	struct sd_xbuf	*xp;
13328 	struct scsi_pkt	*pktp;
13329 	struct sd_lun	*un;
13330 	uint32_t	flags = 0;
13331 
13332 	ASSERT(bp != NULL);
13333 	ASSERT(pktpp != NULL);
13334 	xp = SD_GET_XBUF(bp);
13335 	ASSERT(xp != NULL);
13336 	un = SD_GET_UN(bp);
13337 	ASSERT(un != NULL);
13338 	ASSERT(mutex_owned(SD_MUTEX(un)));
13339 
13340 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13341 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13342 	ASSERT(uscmd != NULL);
13343 
13344 	SD_TRACE(SD_LOG_IO_CORE, un,
13345 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
13346 
13347 	/* Allocate the scsi_pkt for the command. */
13348 	pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
13349 	    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
13350 	    sizeof (struct scsi_arq_status), 0, un->un_pkt_flags,
13351 	    sdrunout, (caddr_t)un);
13352 
13353 	if (pktp == NULL) {
13354 		*pktpp = NULL;
13355 		/*
13356 		 * Set the driver state to RWAIT to indicate the driver
13357 		 * is waiting on resource allocations. The driver will not
13358 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13359 		 */
13360 		New_state(un, SD_STATE_RWAIT);
13361 
13362 		SD_ERROR(SD_LOG_IO_CORE, un,
13363 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
13364 
13365 		if ((bp->b_flags & B_ERROR) != 0) {
13366 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13367 		}
13368 		return (SD_PKT_ALLOC_FAILURE);
13369 	}
13370 
13371 	/*
13372 	 * We do not do DMA breakup for USCSI commands, so return failure
13373 	 * here if all the needed DMA resources were not allocated.
13374 	 */
13375 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
13376 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
13377 		scsi_destroy_pkt(pktp);
13378 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
13379 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
13380 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
13381 	}
13382 
13383 	/* Init the cdb from the given uscsi struct */
13384 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
13385 	    uscmd->uscsi_cdb[0], 0, 0, 0);
13386 
13387 	SD_FILL_SCSI1_LUN(un, pktp);
13388 
13389 	/*
13390 	 * Set up the optional USCSI flags. See the uscsi (7I) man page
13391 	 * for listing of the supported flags.
13392 	 */
13393 
13394 	if (uscmd->uscsi_flags & USCSI_SILENT) {
13395 		flags |= FLAG_SILENT;
13396 	}
13397 
13398 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
13399 		flags |= FLAG_DIAGNOSE;
13400 	}
13401 
13402 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
13403 		flags |= FLAG_ISOLATE;
13404 	}
13405 
13406 	if (un->un_f_is_fibre == FALSE) {
13407 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
13408 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
13409 		}
13410 	}
13411 
13412 	/*
13413 	 * Set the pkt flags here so we save time later.
13414 	 * Note: These flags are NOT in the uscsi man page!!!
13415 	 */
13416 	if (uscmd->uscsi_flags & USCSI_HEAD) {
13417 		flags |= FLAG_HEAD;
13418 	}
13419 
13420 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
13421 		flags |= FLAG_NOINTR;
13422 	}
13423 
13424 	/*
13425 	 * For tagged queueing, things get a bit complicated.
13426 	 * Check first for head of queue and last for ordered queue.
13427 	 * If neither head nor order, use the default driver tag flags.
13428 	 */
13429 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
13430 		if (uscmd->uscsi_flags & USCSI_HTAG) {
13431 			flags |= FLAG_HTAG;
13432 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
13433 			flags |= FLAG_OTAG;
13434 		} else {
13435 			flags |= un->un_tagflags & FLAG_TAGMASK;
13436 		}
13437 	}
13438 
13439 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
13440 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
13441 	}
13442 
13443 	pktp->pkt_flags = flags;
13444 
13445 	/* Copy the caller's CDB into the pkt... */
13446 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
13447 
13448 	if (uscmd->uscsi_timeout == 0) {
13449 		pktp->pkt_time = un->un_uscsi_timeout;
13450 	} else {
13451 		pktp->pkt_time = uscmd->uscsi_timeout;
13452 	}
13453 
13454 	/* need it later to identify USCSI request in sdintr */
13455 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
13456 
13457 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
13458 
13459 	pktp->pkt_private = bp;
13460 	pktp->pkt_comp = sdintr;
13461 	*pktpp = pktp;
13462 
13463 	SD_TRACE(SD_LOG_IO_CORE, un,
13464 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
13465 
13466 	return (SD_PKT_ALLOC_SUCCESS);
13467 }
13468 
13469 
13470 /*
13471  *    Function: sd_destroypkt_for_uscsi
13472  *
13473  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
13474  *		IOs.. Also saves relevant info into the associated uscsi_cmd
13475  *		struct.
13476  *
13477  *     Context: May be called under interrupt context
13478  */
13479 
13480 static void
13481 sd_destroypkt_for_uscsi(struct buf *bp)
13482 {
13483 	struct uscsi_cmd *uscmd;
13484 	struct sd_xbuf	*xp;
13485 	struct scsi_pkt	*pktp;
13486 	struct sd_lun	*un;
13487 
13488 	ASSERT(bp != NULL);
13489 	xp = SD_GET_XBUF(bp);
13490 	ASSERT(xp != NULL);
13491 	un = SD_GET_UN(bp);
13492 	ASSERT(un != NULL);
13493 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13494 	pktp = SD_GET_PKTP(bp);
13495 	ASSERT(pktp != NULL);
13496 
13497 	SD_TRACE(SD_LOG_IO_CORE, un,
13498 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
13499 
13500 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13501 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13502 	ASSERT(uscmd != NULL);
13503 
13504 	/* Save the status and the residual into the uscsi_cmd struct */
13505 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
13506 	uscmd->uscsi_resid  = bp->b_resid;
13507 
13508 	/*
13509 	 * If enabled, copy any saved sense data into the area specified
13510 	 * by the uscsi command.
13511 	 */
13512 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
13513 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
13514 		/*
13515 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
13516 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
13517 		 */
13518 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
13519 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
13520 		bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf, SENSE_LENGTH);
13521 	}
13522 
13523 	/* We are done with the scsi_pkt; free it now */
13524 	ASSERT(SD_GET_PKTP(bp) != NULL);
13525 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13526 
13527 	SD_TRACE(SD_LOG_IO_CORE, un,
13528 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
13529 }
13530 
13531 
13532 /*
13533  *    Function: sd_bioclone_alloc
13534  *
13535  * Description: Allocate a buf(9S) and init it as per the given buf
13536  *		and the various arguments.  The associated sd_xbuf
13537  *		struct is (nearly) duplicated.  The struct buf *bp
13538  *		argument is saved in new_xp->xb_private.
13539  *
13540  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13541  *		datalen - size of data area for the shadow bp
13542  *		blkno - starting LBA
13543  *		func - function pointer for b_iodone in the shadow buf. (May
13544  *			be NULL if none.)
13545  *
13546  * Return Code: Pointer to allocates buf(9S) struct
13547  *
13548  *     Context: Can sleep.
13549  */
13550 
13551 static struct buf *
13552 sd_bioclone_alloc(struct buf *bp, size_t datalen,
13553 	daddr_t blkno, int (*func)(struct buf *))
13554 {
13555 	struct	sd_lun	*un;
13556 	struct	sd_xbuf	*xp;
13557 	struct	sd_xbuf	*new_xp;
13558 	struct	buf	*new_bp;
13559 
13560 	ASSERT(bp != NULL);
13561 	xp = SD_GET_XBUF(bp);
13562 	ASSERT(xp != NULL);
13563 	un = SD_GET_UN(bp);
13564 	ASSERT(un != NULL);
13565 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13566 
13567 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
13568 	    NULL, KM_SLEEP);
13569 
13570 	new_bp->b_lblkno	= blkno;
13571 
13572 	/*
13573 	 * Allocate an xbuf for the shadow bp and copy the contents of the
13574 	 * original xbuf into it.
13575 	 */
13576 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13577 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13578 
13579 	/*
13580 	 * The given bp is automatically saved in the xb_private member
13581 	 * of the new xbuf.  Callers are allowed to depend on this.
13582 	 */
13583 	new_xp->xb_private = bp;
13584 
13585 	new_bp->b_private  = new_xp;
13586 
13587 	return (new_bp);
13588 }
13589 
13590 /*
13591  *    Function: sd_shadow_buf_alloc
13592  *
13593  * Description: Allocate a buf(9S) and init it as per the given buf
13594  *		and the various arguments.  The associated sd_xbuf
13595  *		struct is (nearly) duplicated.  The struct buf *bp
13596  *		argument is saved in new_xp->xb_private.
13597  *
13598  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13599  *		datalen - size of data area for the shadow bp
13600  *		bflags - B_READ or B_WRITE (pseudo flag)
13601  *		blkno - starting LBA
13602  *		func - function pointer for b_iodone in the shadow buf. (May
13603  *			be NULL if none.)
13604  *
13605  * Return Code: Pointer to allocates buf(9S) struct
13606  *
13607  *     Context: Can sleep.
13608  */
13609 
13610 static struct buf *
13611 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
13612 	daddr_t blkno, int (*func)(struct buf *))
13613 {
13614 	struct	sd_lun	*un;
13615 	struct	sd_xbuf	*xp;
13616 	struct	sd_xbuf	*new_xp;
13617 	struct	buf	*new_bp;
13618 
13619 	ASSERT(bp != NULL);
13620 	xp = SD_GET_XBUF(bp);
13621 	ASSERT(xp != NULL);
13622 	un = SD_GET_UN(bp);
13623 	ASSERT(un != NULL);
13624 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13625 
13626 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
13627 		bp_mapin(bp);
13628 	}
13629 
13630 	bflags &= (B_READ | B_WRITE);
13631 #if defined(__i386) || defined(__amd64)
13632 	new_bp = getrbuf(KM_SLEEP);
13633 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
13634 	new_bp->b_bcount = datalen;
13635 	new_bp->b_flags	= bp->b_flags | bflags;
13636 #else
13637 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
13638 	    datalen, bflags, SLEEP_FUNC, NULL);
13639 #endif
13640 	new_bp->av_forw	= NULL;
13641 	new_bp->av_back	= NULL;
13642 	new_bp->b_dev	= bp->b_dev;
13643 	new_bp->b_blkno	= blkno;
13644 	new_bp->b_iodone = func;
13645 	new_bp->b_edev	= bp->b_edev;
13646 	new_bp->b_resid	= 0;
13647 
13648 	/* We need to preserve the B_FAILFAST flag */
13649 	if (bp->b_flags & B_FAILFAST) {
13650 		new_bp->b_flags |= B_FAILFAST;
13651 	}
13652 
13653 	/*
13654 	 * Allocate an xbuf for the shadow bp and copy the contents of the
13655 	 * original xbuf into it.
13656 	 */
13657 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13658 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13659 
13660 	/* Need later to copy data between the shadow buf & original buf! */
13661 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
13662 
13663 	/*
13664 	 * The given bp is automatically saved in the xb_private member
13665 	 * of the new xbuf.  Callers are allowed to depend on this.
13666 	 */
13667 	new_xp->xb_private = bp;
13668 
13669 	new_bp->b_private  = new_xp;
13670 
13671 	return (new_bp);
13672 }
13673 
13674 /*
13675  *    Function: sd_bioclone_free
13676  *
13677  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
13678  *		in the larger than partition operation.
13679  *
13680  *     Context: May be called under interrupt context
13681  */
13682 
13683 static void
13684 sd_bioclone_free(struct buf *bp)
13685 {
13686 	struct sd_xbuf	*xp;
13687 
13688 	ASSERT(bp != NULL);
13689 	xp = SD_GET_XBUF(bp);
13690 	ASSERT(xp != NULL);
13691 
13692 	/*
13693 	 * Call bp_mapout() before freeing the buf,  in case a lower
13694 	 * layer or HBA  had done a bp_mapin().  we must do this here
13695 	 * as we are the "originator" of the shadow buf.
13696 	 */
13697 	bp_mapout(bp);
13698 
13699 	/*
13700 	 * Null out b_iodone before freeing the bp, to ensure that the driver
13701 	 * never gets confused by a stale value in this field. (Just a little
13702 	 * extra defensiveness here.)
13703 	 */
13704 	bp->b_iodone = NULL;
13705 
13706 	freerbuf(bp);
13707 
13708 	kmem_free(xp, sizeof (struct sd_xbuf));
13709 }
13710 
13711 /*
13712  *    Function: sd_shadow_buf_free
13713  *
13714  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
13715  *
13716  *     Context: May be called under interrupt context
13717  */
13718 
13719 static void
13720 sd_shadow_buf_free(struct buf *bp)
13721 {
13722 	struct sd_xbuf	*xp;
13723 
13724 	ASSERT(bp != NULL);
13725 	xp = SD_GET_XBUF(bp);
13726 	ASSERT(xp != NULL);
13727 
13728 #if defined(__sparc)
13729 	/*
13730 	 * Call bp_mapout() before freeing the buf,  in case a lower
13731 	 * layer or HBA  had done a bp_mapin().  we must do this here
13732 	 * as we are the "originator" of the shadow buf.
13733 	 */
13734 	bp_mapout(bp);
13735 #endif
13736 
13737 	/*
13738 	 * Null out b_iodone before freeing the bp, to ensure that the driver
13739 	 * never gets confused by a stale value in this field. (Just a little
13740 	 * extra defensiveness here.)
13741 	 */
13742 	bp->b_iodone = NULL;
13743 
13744 #if defined(__i386) || defined(__amd64)
13745 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
13746 	freerbuf(bp);
13747 #else
13748 	scsi_free_consistent_buf(bp);
13749 #endif
13750 
13751 	kmem_free(xp, sizeof (struct sd_xbuf));
13752 }
13753 
13754 
13755 /*
13756  *    Function: sd_print_transport_rejected_message
13757  *
13758  * Description: This implements the ludicrously complex rules for printing
13759  *		a "transport rejected" message.  This is to address the
13760  *		specific problem of having a flood of this error message
13761  *		produced when a failover occurs.
13762  *
13763  *     Context: Any.
13764  */
13765 
13766 static void
13767 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
13768 	int code)
13769 {
13770 	ASSERT(un != NULL);
13771 	ASSERT(mutex_owned(SD_MUTEX(un)));
13772 	ASSERT(xp != NULL);
13773 
13774 	/*
13775 	 * Print the "transport rejected" message under the following
13776 	 * conditions:
13777 	 *
13778 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
13779 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
13780 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
13781 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
13782 	 *   scsi_transport(9F) (which indicates that the target might have
13783 	 *   gone off-line).  This uses the un->un_tran_fatal_count
13784 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
13785 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
13786 	 *   from scsi_transport().
13787 	 *
13788 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
13789 	 * the preceeding cases in order for the message to be printed.
13790 	 */
13791 	if ((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) {
13792 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
13793 		    (code != TRAN_FATAL_ERROR) ||
13794 		    (un->un_tran_fatal_count == 1)) {
13795 			switch (code) {
13796 			case TRAN_BADPKT:
13797 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13798 				    "transport rejected bad packet\n");
13799 				break;
13800 			case TRAN_FATAL_ERROR:
13801 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13802 				    "transport rejected fatal error\n");
13803 				break;
13804 			default:
13805 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13806 				    "transport rejected (%d)\n", code);
13807 				break;
13808 			}
13809 		}
13810 	}
13811 }
13812 
13813 
13814 /*
13815  *    Function: sd_add_buf_to_waitq
13816  *
13817  * Description: Add the given buf(9S) struct to the wait queue for the
13818  *		instance.  If sorting is enabled, then the buf is added
13819  *		to the queue via an elevator sort algorithm (a la
13820  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
13821  *		If sorting is not enabled, then the buf is just added
13822  *		to the end of the wait queue.
13823  *
13824  * Return Code: void
13825  *
13826  *     Context: Does not sleep/block, therefore technically can be called
13827  *		from any context.  However if sorting is enabled then the
13828  *		execution time is indeterminate, and may take long if
13829  *		the wait queue grows large.
13830  */
13831 
13832 static void
13833 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
13834 {
13835 	struct buf *ap;
13836 
13837 	ASSERT(bp != NULL);
13838 	ASSERT(un != NULL);
13839 	ASSERT(mutex_owned(SD_MUTEX(un)));
13840 
13841 	/* If the queue is empty, add the buf as the only entry & return. */
13842 	if (un->un_waitq_headp == NULL) {
13843 		ASSERT(un->un_waitq_tailp == NULL);
13844 		un->un_waitq_headp = un->un_waitq_tailp = bp;
13845 		bp->av_forw = NULL;
13846 		return;
13847 	}
13848 
13849 	ASSERT(un->un_waitq_tailp != NULL);
13850 
13851 	/*
13852 	 * If sorting is disabled, just add the buf to the tail end of
13853 	 * the wait queue and return.
13854 	 */
13855 	if (un->un_f_disksort_disabled) {
13856 		un->un_waitq_tailp->av_forw = bp;
13857 		un->un_waitq_tailp = bp;
13858 		bp->av_forw = NULL;
13859 		return;
13860 	}
13861 
13862 	/*
13863 	 * Sort thru the list of requests currently on the wait queue
13864 	 * and add the new buf request at the appropriate position.
13865 	 *
13866 	 * The un->un_waitq_headp is an activity chain pointer on which
13867 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
13868 	 * first queue holds those requests which are positioned after
13869 	 * the current SD_GET_BLKNO() (in the first request); the second holds
13870 	 * requests which came in after their SD_GET_BLKNO() number was passed.
13871 	 * Thus we implement a one way scan, retracting after reaching
13872 	 * the end of the drive to the first request on the second
13873 	 * queue, at which time it becomes the first queue.
13874 	 * A one-way scan is natural because of the way UNIX read-ahead
13875 	 * blocks are allocated.
13876 	 *
13877 	 * If we lie after the first request, then we must locate the
13878 	 * second request list and add ourselves to it.
13879 	 */
13880 	ap = un->un_waitq_headp;
13881 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
13882 		while (ap->av_forw != NULL) {
13883 			/*
13884 			 * Look for an "inversion" in the (normally
13885 			 * ascending) block numbers. This indicates
13886 			 * the start of the second request list.
13887 			 */
13888 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
13889 				/*
13890 				 * Search the second request list for the
13891 				 * first request at a larger block number.
13892 				 * We go before that; however if there is
13893 				 * no such request, we go at the end.
13894 				 */
13895 				do {
13896 					if (SD_GET_BLKNO(bp) <
13897 					    SD_GET_BLKNO(ap->av_forw)) {
13898 						goto insert;
13899 					}
13900 					ap = ap->av_forw;
13901 				} while (ap->av_forw != NULL);
13902 				goto insert;		/* after last */
13903 			}
13904 			ap = ap->av_forw;
13905 		}
13906 
13907 		/*
13908 		 * No inversions... we will go after the last, and
13909 		 * be the first request in the second request list.
13910 		 */
13911 		goto insert;
13912 	}
13913 
13914 	/*
13915 	 * Request is at/after the current request...
13916 	 * sort in the first request list.
13917 	 */
13918 	while (ap->av_forw != NULL) {
13919 		/*
13920 		 * We want to go after the current request (1) if
13921 		 * there is an inversion after it (i.e. it is the end
13922 		 * of the first request list), or (2) if the next
13923 		 * request is a larger block no. than our request.
13924 		 */
13925 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
13926 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
13927 			goto insert;
13928 		}
13929 		ap = ap->av_forw;
13930 	}
13931 
13932 	/*
13933 	 * Neither a second list nor a larger request, therefore
13934 	 * we go at the end of the first list (which is the same
13935 	 * as the end of the whole schebang).
13936 	 */
13937 insert:
13938 	bp->av_forw = ap->av_forw;
13939 	ap->av_forw = bp;
13940 
13941 	/*
13942 	 * If we inserted onto the tail end of the waitq, make sure the
13943 	 * tail pointer is updated.
13944 	 */
13945 	if (ap == un->un_waitq_tailp) {
13946 		un->un_waitq_tailp = bp;
13947 	}
13948 }
13949 
13950 
13951 /*
13952  *    Function: sd_start_cmds
13953  *
13954  * Description: Remove and transport cmds from the driver queues.
13955  *
13956  *   Arguments: un - pointer to the unit (soft state) struct for the target.
13957  *
13958  *		immed_bp - ptr to a buf to be transported immediately. Only
13959  *		the immed_bp is transported; bufs on the waitq are not
13960  *		processed and the un_retry_bp is not checked.  If immed_bp is
13961  *		NULL, then normal queue processing is performed.
13962  *
13963  *     Context: May be called from kernel thread context, interrupt context,
13964  *		or runout callback context. This function may not block or
13965  *		call routines that block.
13966  */
13967 
13968 static void
13969 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
13970 {
13971 	struct	sd_xbuf	*xp;
13972 	struct	buf	*bp;
13973 	void	(*statp)(kstat_io_t *);
13974 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13975 	void	(*saved_statp)(kstat_io_t *);
13976 #endif
13977 	int	rval;
13978 
13979 	ASSERT(un != NULL);
13980 	ASSERT(mutex_owned(SD_MUTEX(un)));
13981 	ASSERT(un->un_ncmds_in_transport >= 0);
13982 	ASSERT(un->un_throttle >= 0);
13983 
13984 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
13985 
13986 	do {
13987 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13988 		saved_statp = NULL;
13989 #endif
13990 
13991 		/*
13992 		 * If we are syncing or dumping, fail the command to
13993 		 * avoid recursively calling back into scsi_transport().
13994 		 * See panic.c for more information about the states
13995 		 * the system can be in during panic.
13996 		 */
13997 		if ((un->un_state == SD_STATE_DUMPING) ||
13998 		    (un->un_in_callback > 1)) {
13999 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14000 			    "sd_start_cmds: panicking\n");
14001 			goto exit;
14002 		}
14003 
14004 		if ((bp = immed_bp) != NULL) {
14005 			/*
14006 			 * We have a bp that must be transported immediately.
14007 			 * It's OK to transport the immed_bp here without doing
14008 			 * the throttle limit check because the immed_bp is
14009 			 * always used in a retry/recovery case. This means
14010 			 * that we know we are not at the throttle limit by
14011 			 * virtue of the fact that to get here we must have
14012 			 * already gotten a command back via sdintr(). This also
14013 			 * relies on (1) the command on un_retry_bp preventing
14014 			 * further commands from the waitq from being issued;
14015 			 * and (2) the code in sd_retry_command checking the
14016 			 * throttle limit before issuing a delayed or immediate
14017 			 * retry. This holds even if the throttle limit is
14018 			 * currently ratcheted down from its maximum value.
14019 			 */
14020 			statp = kstat_runq_enter;
14021 			if (bp == un->un_retry_bp) {
14022 				ASSERT((un->un_retry_statp == NULL) ||
14023 				    (un->un_retry_statp == kstat_waitq_enter) ||
14024 				    (un->un_retry_statp ==
14025 				    kstat_runq_back_to_waitq));
14026 				/*
14027 				 * If the waitq kstat was incremented when
14028 				 * sd_set_retry_bp() queued this bp for a retry,
14029 				 * then we must set up statp so that the waitq
14030 				 * count will get decremented correctly below.
14031 				 * Also we must clear un->un_retry_statp to
14032 				 * ensure that we do not act on a stale value
14033 				 * in this field.
14034 				 */
14035 				if ((un->un_retry_statp == kstat_waitq_enter) ||
14036 				    (un->un_retry_statp ==
14037 				    kstat_runq_back_to_waitq)) {
14038 					statp = kstat_waitq_to_runq;
14039 				}
14040 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14041 				saved_statp = un->un_retry_statp;
14042 #endif
14043 				un->un_retry_statp = NULL;
14044 
14045 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14046 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
14047 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
14048 				    un, un->un_retry_bp, un->un_throttle,
14049 				    un->un_ncmds_in_transport);
14050 			} else {
14051 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
14052 				    "processing priority bp:0x%p\n", bp);
14053 			}
14054 
14055 		} else if ((bp = un->un_waitq_headp) != NULL) {
14056 			/*
14057 			 * A command on the waitq is ready to go, but do not
14058 			 * send it if:
14059 			 *
14060 			 * (1) the throttle limit has been reached, or
14061 			 * (2) a retry is pending, or
14062 			 * (3) a START_STOP_UNIT callback pending, or
14063 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
14064 			 *	command is pending.
14065 			 *
14066 			 * For all of these conditions, IO processing will
14067 			 * restart after the condition is cleared.
14068 			 */
14069 			if (un->un_ncmds_in_transport >= un->un_throttle) {
14070 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14071 				    "sd_start_cmds: exiting, "
14072 				    "throttle limit reached!\n");
14073 				goto exit;
14074 			}
14075 			if (un->un_retry_bp != NULL) {
14076 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14077 				    "sd_start_cmds: exiting, retry pending!\n");
14078 				goto exit;
14079 			}
14080 			if (un->un_startstop_timeid != NULL) {
14081 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14082 				    "sd_start_cmds: exiting, "
14083 				    "START_STOP pending!\n");
14084 				goto exit;
14085 			}
14086 			if (un->un_direct_priority_timeid != NULL) {
14087 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14088 				    "sd_start_cmds: exiting, "
14089 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
14090 				goto exit;
14091 			}
14092 
14093 			/* Dequeue the command */
14094 			un->un_waitq_headp = bp->av_forw;
14095 			if (un->un_waitq_headp == NULL) {
14096 				un->un_waitq_tailp = NULL;
14097 			}
14098 			bp->av_forw = NULL;
14099 			statp = kstat_waitq_to_runq;
14100 			SD_TRACE(SD_LOG_IO_CORE, un,
14101 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
14102 
14103 		} else {
14104 			/* No work to do so bail out now */
14105 			SD_TRACE(SD_LOG_IO_CORE, un,
14106 			    "sd_start_cmds: no more work, exiting!\n");
14107 			goto exit;
14108 		}
14109 
14110 		/*
14111 		 * Reset the state to normal. This is the mechanism by which
14112 		 * the state transitions from either SD_STATE_RWAIT or
14113 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
14114 		 * If state is SD_STATE_PM_CHANGING then this command is
14115 		 * part of the device power control and the state must
14116 		 * not be put back to normal. Doing so would would
14117 		 * allow new commands to proceed when they shouldn't,
14118 		 * the device may be going off.
14119 		 */
14120 		if ((un->un_state != SD_STATE_SUSPENDED) &&
14121 		    (un->un_state != SD_STATE_PM_CHANGING)) {
14122 			New_state(un, SD_STATE_NORMAL);
14123 		    }
14124 
14125 		xp = SD_GET_XBUF(bp);
14126 		ASSERT(xp != NULL);
14127 
14128 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14129 		/*
14130 		 * Allocate the scsi_pkt if we need one, or attach DMA
14131 		 * resources if we have a scsi_pkt that needs them. The
14132 		 * latter should only occur for commands that are being
14133 		 * retried.
14134 		 */
14135 		if ((xp->xb_pktp == NULL) ||
14136 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
14137 #else
14138 		if (xp->xb_pktp == NULL) {
14139 #endif
14140 			/*
14141 			 * There is no scsi_pkt allocated for this buf. Call
14142 			 * the initpkt function to allocate & init one.
14143 			 *
14144 			 * The scsi_init_pkt runout callback functionality is
14145 			 * implemented as follows:
14146 			 *
14147 			 * 1) The initpkt function always calls
14148 			 *    scsi_init_pkt(9F) with sdrunout specified as the
14149 			 *    callback routine.
14150 			 * 2) A successful packet allocation is initialized and
14151 			 *    the I/O is transported.
14152 			 * 3) The I/O associated with an allocation resource
14153 			 *    failure is left on its queue to be retried via
14154 			 *    runout or the next I/O.
14155 			 * 4) The I/O associated with a DMA error is removed
14156 			 *    from the queue and failed with EIO. Processing of
14157 			 *    the transport queues is also halted to be
14158 			 *    restarted via runout or the next I/O.
14159 			 * 5) The I/O associated with a CDB size or packet
14160 			 *    size error is removed from the queue and failed
14161 			 *    with EIO. Processing of the transport queues is
14162 			 *    continued.
14163 			 *
14164 			 * Note: there is no interface for canceling a runout
14165 			 * callback. To prevent the driver from detaching or
14166 			 * suspending while a runout is pending the driver
14167 			 * state is set to SD_STATE_RWAIT
14168 			 *
14169 			 * Note: using the scsi_init_pkt callback facility can
14170 			 * result in an I/O request persisting at the head of
14171 			 * the list which cannot be satisfied even after
14172 			 * multiple retries. In the future the driver may
14173 			 * implement some kind of maximum runout count before
14174 			 * failing an I/O.
14175 			 *
14176 			 * Note: the use of funcp below may seem superfluous,
14177 			 * but it helps warlock figure out the correct
14178 			 * initpkt function calls (see [s]sd.wlcmd).
14179 			 */
14180 			struct scsi_pkt	*pktp;
14181 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
14182 
14183 			ASSERT(bp != un->un_rqs_bp);
14184 
14185 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
14186 			switch ((*funcp)(bp, &pktp)) {
14187 			case  SD_PKT_ALLOC_SUCCESS:
14188 				xp->xb_pktp = pktp;
14189 				SD_TRACE(SD_LOG_IO_CORE, un,
14190 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
14191 				    pktp);
14192 				goto got_pkt;
14193 
14194 			case SD_PKT_ALLOC_FAILURE:
14195 				/*
14196 				 * Temporary (hopefully) resource depletion.
14197 				 * Since retries and RQS commands always have a
14198 				 * scsi_pkt allocated, these cases should never
14199 				 * get here. So the only cases this needs to
14200 				 * handle is a bp from the waitq (which we put
14201 				 * back onto the waitq for sdrunout), or a bp
14202 				 * sent as an immed_bp (which we just fail).
14203 				 */
14204 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14205 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
14206 
14207 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14208 
14209 				if (bp == immed_bp) {
14210 					/*
14211 					 * If SD_XB_DMA_FREED is clear, then
14212 					 * this is a failure to allocate a
14213 					 * scsi_pkt, and we must fail the
14214 					 * command.
14215 					 */
14216 					if ((xp->xb_pkt_flags &
14217 					    SD_XB_DMA_FREED) == 0) {
14218 						break;
14219 					}
14220 
14221 					/*
14222 					 * If this immediate command is NOT our
14223 					 * un_retry_bp, then we must fail it.
14224 					 */
14225 					if (bp != un->un_retry_bp) {
14226 						break;
14227 					}
14228 
14229 					/*
14230 					 * We get here if this cmd is our
14231 					 * un_retry_bp that was DMAFREED, but
14232 					 * scsi_init_pkt() failed to reallocate
14233 					 * DMA resources when we attempted to
14234 					 * retry it. This can happen when an
14235 					 * mpxio failover is in progress, but
14236 					 * we don't want to just fail the
14237 					 * command in this case.
14238 					 *
14239 					 * Use timeout(9F) to restart it after
14240 					 * a 100ms delay.  We don't want to
14241 					 * let sdrunout() restart it, because
14242 					 * sdrunout() is just supposed to start
14243 					 * commands that are sitting on the
14244 					 * wait queue.  The un_retry_bp stays
14245 					 * set until the command completes, but
14246 					 * sdrunout can be called many times
14247 					 * before that happens.  Since sdrunout
14248 					 * cannot tell if the un_retry_bp is
14249 					 * already in the transport, it could
14250 					 * end up calling scsi_transport() for
14251 					 * the un_retry_bp multiple times.
14252 					 *
14253 					 * Also: don't schedule the callback
14254 					 * if some other callback is already
14255 					 * pending.
14256 					 */
14257 					if (un->un_retry_statp == NULL) {
14258 						/*
14259 						 * restore the kstat pointer to
14260 						 * keep kstat counts coherent
14261 						 * when we do retry the command.
14262 						 */
14263 						un->un_retry_statp =
14264 						    saved_statp;
14265 					}
14266 
14267 					if ((un->un_startstop_timeid == NULL) &&
14268 					    (un->un_retry_timeid == NULL) &&
14269 					    (un->un_direct_priority_timeid ==
14270 					    NULL)) {
14271 
14272 						un->un_retry_timeid =
14273 						    timeout(
14274 						    sd_start_retry_command,
14275 						    un, SD_RESTART_TIMEOUT);
14276 					}
14277 					goto exit;
14278 				}
14279 
14280 #else
14281 				if (bp == immed_bp) {
14282 					break;	/* Just fail the command */
14283 				}
14284 #endif
14285 
14286 				/* Add the buf back to the head of the waitq */
14287 				bp->av_forw = un->un_waitq_headp;
14288 				un->un_waitq_headp = bp;
14289 				if (un->un_waitq_tailp == NULL) {
14290 					un->un_waitq_tailp = bp;
14291 				}
14292 				goto exit;
14293 
14294 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
14295 				/*
14296 				 * HBA DMA resource failure. Fail the command
14297 				 * and continue processing of the queues.
14298 				 */
14299 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14300 				    "sd_start_cmds: "
14301 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
14302 				break;
14303 
14304 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
14305 				/*
14306 				 * Note:x86: Partial DMA mapping not supported
14307 				 * for USCSI commands, and all the needed DMA
14308 				 * resources were not allocated.
14309 				 */
14310 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14311 				    "sd_start_cmds: "
14312 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
14313 				break;
14314 
14315 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
14316 				/*
14317 				 * Note:x86: Request cannot fit into CDB based
14318 				 * on lba and len.
14319 				 */
14320 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14321 				    "sd_start_cmds: "
14322 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
14323 				break;
14324 
14325 			default:
14326 				/* Should NEVER get here! */
14327 				panic("scsi_initpkt error");
14328 				/*NOTREACHED*/
14329 			}
14330 
14331 			/*
14332 			 * Fatal error in allocating a scsi_pkt for this buf.
14333 			 * Update kstats & return the buf with an error code.
14334 			 * We must use sd_return_failed_command_no_restart() to
14335 			 * avoid a recursive call back into sd_start_cmds().
14336 			 * However this also means that we must keep processing
14337 			 * the waitq here in order to avoid stalling.
14338 			 */
14339 			if (statp == kstat_waitq_to_runq) {
14340 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
14341 			}
14342 			sd_return_failed_command_no_restart(un, bp, EIO);
14343 			if (bp == immed_bp) {
14344 				/* immed_bp is gone by now, so clear this */
14345 				immed_bp = NULL;
14346 			}
14347 			continue;
14348 		}
14349 got_pkt:
14350 		if (bp == immed_bp) {
14351 			/* goto the head of the class.... */
14352 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
14353 		}
14354 
14355 		un->un_ncmds_in_transport++;
14356 		SD_UPDATE_KSTATS(un, statp, bp);
14357 
14358 		/*
14359 		 * Call scsi_transport() to send the command to the target.
14360 		 * According to SCSA architecture, we must drop the mutex here
14361 		 * before calling scsi_transport() in order to avoid deadlock.
14362 		 * Note that the scsi_pkt's completion routine can be executed
14363 		 * (from interrupt context) even before the call to
14364 		 * scsi_transport() returns.
14365 		 */
14366 		SD_TRACE(SD_LOG_IO_CORE, un,
14367 		    "sd_start_cmds: calling scsi_transport()\n");
14368 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
14369 
14370 		mutex_exit(SD_MUTEX(un));
14371 		rval = scsi_transport(xp->xb_pktp);
14372 		mutex_enter(SD_MUTEX(un));
14373 
14374 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14375 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
14376 
14377 		switch (rval) {
14378 		case TRAN_ACCEPT:
14379 			/* Clear this with every pkt accepted by the HBA */
14380 			un->un_tran_fatal_count = 0;
14381 			break;	/* Success; try the next cmd (if any) */
14382 
14383 		case TRAN_BUSY:
14384 			un->un_ncmds_in_transport--;
14385 			ASSERT(un->un_ncmds_in_transport >= 0);
14386 
14387 			/*
14388 			 * Don't retry request sense, the sense data
14389 			 * is lost when another request is sent.
14390 			 * Free up the rqs buf and retry
14391 			 * the original failed cmd.  Update kstat.
14392 			 */
14393 			if (bp == un->un_rqs_bp) {
14394 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14395 				bp = sd_mark_rqs_idle(un, xp);
14396 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
14397 					NULL, NULL, EIO, SD_BSY_TIMEOUT / 500,
14398 					kstat_waitq_enter);
14399 				goto exit;
14400 			}
14401 
14402 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14403 			/*
14404 			 * Free the DMA resources for the  scsi_pkt. This will
14405 			 * allow mpxio to select another path the next time
14406 			 * we call scsi_transport() with this scsi_pkt.
14407 			 * See sdintr() for the rationalization behind this.
14408 			 */
14409 			if ((un->un_f_is_fibre == TRUE) &&
14410 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14411 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
14412 				scsi_dmafree(xp->xb_pktp);
14413 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14414 			}
14415 #endif
14416 
14417 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
14418 				/*
14419 				 * Commands that are SD_PATH_DIRECT_PRIORITY
14420 				 * are for error recovery situations. These do
14421 				 * not use the normal command waitq, so if they
14422 				 * get a TRAN_BUSY we cannot put them back onto
14423 				 * the waitq for later retry. One possible
14424 				 * problem is that there could already be some
14425 				 * other command on un_retry_bp that is waiting
14426 				 * for this one to complete, so we would be
14427 				 * deadlocked if we put this command back onto
14428 				 * the waitq for later retry (since un_retry_bp
14429 				 * must complete before the driver gets back to
14430 				 * commands on the waitq).
14431 				 *
14432 				 * To avoid deadlock we must schedule a callback
14433 				 * that will restart this command after a set
14434 				 * interval.  This should keep retrying for as
14435 				 * long as the underlying transport keeps
14436 				 * returning TRAN_BUSY (just like for other
14437 				 * commands).  Use the same timeout interval as
14438 				 * for the ordinary TRAN_BUSY retry.
14439 				 */
14440 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14441 				    "sd_start_cmds: scsi_transport() returned "
14442 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
14443 
14444 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14445 				un->un_direct_priority_timeid =
14446 				    timeout(sd_start_direct_priority_command,
14447 				    bp, SD_BSY_TIMEOUT / 500);
14448 
14449 				goto exit;
14450 			}
14451 
14452 			/*
14453 			 * For TRAN_BUSY, we want to reduce the throttle value,
14454 			 * unless we are retrying a command.
14455 			 */
14456 			if (bp != un->un_retry_bp) {
14457 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
14458 			}
14459 
14460 			/*
14461 			 * Set up the bp to be tried again 10 ms later.
14462 			 * Note:x86: Is there a timeout value in the sd_lun
14463 			 * for this condition?
14464 			 */
14465 			sd_set_retry_bp(un, bp, SD_BSY_TIMEOUT / 500,
14466 				kstat_runq_back_to_waitq);
14467 			goto exit;
14468 
14469 		case TRAN_FATAL_ERROR:
14470 			un->un_tran_fatal_count++;
14471 			/* FALLTHRU */
14472 
14473 		case TRAN_BADPKT:
14474 		default:
14475 			un->un_ncmds_in_transport--;
14476 			ASSERT(un->un_ncmds_in_transport >= 0);
14477 
14478 			/*
14479 			 * If this is our REQUEST SENSE command with a
14480 			 * transport error, we must get back the pointers
14481 			 * to the original buf, and mark the REQUEST
14482 			 * SENSE command as "available".
14483 			 */
14484 			if (bp == un->un_rqs_bp) {
14485 				bp = sd_mark_rqs_idle(un, xp);
14486 				xp = SD_GET_XBUF(bp);
14487 			} else {
14488 				/*
14489 				 * Legacy behavior: do not update transport
14490 				 * error count for request sense commands.
14491 				 */
14492 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
14493 			}
14494 
14495 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14496 			sd_print_transport_rejected_message(un, xp, rval);
14497 
14498 			/*
14499 			 * We must use sd_return_failed_command_no_restart() to
14500 			 * avoid a recursive call back into sd_start_cmds().
14501 			 * However this also means that we must keep processing
14502 			 * the waitq here in order to avoid stalling.
14503 			 */
14504 			sd_return_failed_command_no_restart(un, bp, EIO);
14505 
14506 			/*
14507 			 * Notify any threads waiting in sd_ddi_suspend() that
14508 			 * a command completion has occurred.
14509 			 */
14510 			if (un->un_state == SD_STATE_SUSPENDED) {
14511 				cv_broadcast(&un->un_disk_busy_cv);
14512 			}
14513 
14514 			if (bp == immed_bp) {
14515 				/* immed_bp is gone by now, so clear this */
14516 				immed_bp = NULL;
14517 			}
14518 			break;
14519 		}
14520 
14521 	} while (immed_bp == NULL);
14522 
14523 exit:
14524 	ASSERT(mutex_owned(SD_MUTEX(un)));
14525 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
14526 }
14527 
14528 
14529 /*
14530  *    Function: sd_return_command
14531  *
14532  * Description: Returns a command to its originator (with or without an
14533  *		error).  Also starts commands waiting to be transported
14534  *		to the target.
14535  *
14536  *     Context: May be called from interrupt, kernel, or timeout context
14537  */
14538 
14539 static void
14540 sd_return_command(struct sd_lun *un, struct buf *bp)
14541 {
14542 	struct sd_xbuf *xp;
14543 #if defined(__i386) || defined(__amd64)
14544 	struct scsi_pkt *pktp;
14545 #endif
14546 
14547 	ASSERT(bp != NULL);
14548 	ASSERT(un != NULL);
14549 	ASSERT(mutex_owned(SD_MUTEX(un)));
14550 	ASSERT(bp != un->un_rqs_bp);
14551 	xp = SD_GET_XBUF(bp);
14552 	ASSERT(xp != NULL);
14553 
14554 #if defined(__i386) || defined(__amd64)
14555 	pktp = SD_GET_PKTP(bp);
14556 #endif
14557 
14558 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
14559 
14560 #if defined(__i386) || defined(__amd64)
14561 	/*
14562 	 * Note:x86: check for the "sdrestart failed" case.
14563 	 */
14564 	if (((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
14565 		(geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
14566 		(xp->xb_pktp->pkt_resid == 0)) {
14567 
14568 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
14569 			/*
14570 			 * Successfully set up next portion of cmd
14571 			 * transfer, try sending it
14572 			 */
14573 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
14574 			    NULL, NULL, 0, (clock_t)0, NULL);
14575 			sd_start_cmds(un, NULL);
14576 			return;	/* Note:x86: need a return here? */
14577 		}
14578 	}
14579 #endif
14580 
14581 	/*
14582 	 * If this is the failfast bp, clear it from un_failfast_bp. This
14583 	 * can happen if upon being re-tried the failfast bp either
14584 	 * succeeded or encountered another error (possibly even a different
14585 	 * error than the one that precipitated the failfast state, but in
14586 	 * that case it would have had to exhaust retries as well). Regardless,
14587 	 * this should not occur whenever the instance is in the active
14588 	 * failfast state.
14589 	 */
14590 	if (bp == un->un_failfast_bp) {
14591 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
14592 		un->un_failfast_bp = NULL;
14593 	}
14594 
14595 	/*
14596 	 * Clear the failfast state upon successful completion of ANY cmd.
14597 	 */
14598 	if (bp->b_error == 0) {
14599 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
14600 	}
14601 
14602 	/*
14603 	 * This is used if the command was retried one or more times. Show that
14604 	 * we are done with it, and allow processing of the waitq to resume.
14605 	 */
14606 	if (bp == un->un_retry_bp) {
14607 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14608 		    "sd_return_command: un:0x%p: "
14609 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
14610 		un->un_retry_bp = NULL;
14611 		un->un_retry_statp = NULL;
14612 	}
14613 
14614 	SD_UPDATE_RDWR_STATS(un, bp);
14615 	SD_UPDATE_PARTITION_STATS(un, bp);
14616 
14617 	switch (un->un_state) {
14618 	case SD_STATE_SUSPENDED:
14619 		/*
14620 		 * Notify any threads waiting in sd_ddi_suspend() that
14621 		 * a command completion has occurred.
14622 		 */
14623 		cv_broadcast(&un->un_disk_busy_cv);
14624 		break;
14625 	default:
14626 		sd_start_cmds(un, NULL);
14627 		break;
14628 	}
14629 
14630 	/* Return this command up the iodone chain to its originator. */
14631 	mutex_exit(SD_MUTEX(un));
14632 
14633 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
14634 	xp->xb_pktp = NULL;
14635 
14636 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
14637 
14638 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14639 	mutex_enter(SD_MUTEX(un));
14640 
14641 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
14642 }
14643 
14644 
14645 /*
14646  *    Function: sd_return_failed_command
14647  *
14648  * Description: Command completion when an error occurred.
14649  *
14650  *     Context: May be called from interrupt context
14651  */
14652 
14653 static void
14654 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
14655 {
14656 	ASSERT(bp != NULL);
14657 	ASSERT(un != NULL);
14658 	ASSERT(mutex_owned(SD_MUTEX(un)));
14659 
14660 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14661 	    "sd_return_failed_command: entry\n");
14662 
14663 	/*
14664 	 * b_resid could already be nonzero due to a partial data
14665 	 * transfer, so do not change it here.
14666 	 */
14667 	SD_BIOERROR(bp, errcode);
14668 
14669 	sd_return_command(un, bp);
14670 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14671 	    "sd_return_failed_command: exit\n");
14672 }
14673 
14674 
14675 /*
14676  *    Function: sd_return_failed_command_no_restart
14677  *
14678  * Description: Same as sd_return_failed_command, but ensures that no
14679  *		call back into sd_start_cmds will be issued.
14680  *
14681  *     Context: May be called from interrupt context
14682  */
14683 
14684 static void
14685 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
14686 	int errcode)
14687 {
14688 	struct sd_xbuf *xp;
14689 
14690 	ASSERT(bp != NULL);
14691 	ASSERT(un != NULL);
14692 	ASSERT(mutex_owned(SD_MUTEX(un)));
14693 	xp = SD_GET_XBUF(bp);
14694 	ASSERT(xp != NULL);
14695 	ASSERT(errcode != 0);
14696 
14697 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14698 	    "sd_return_failed_command_no_restart: entry\n");
14699 
14700 	/*
14701 	 * b_resid could already be nonzero due to a partial data
14702 	 * transfer, so do not change it here.
14703 	 */
14704 	SD_BIOERROR(bp, errcode);
14705 
14706 	/*
14707 	 * If this is the failfast bp, clear it. This can happen if the
14708 	 * failfast bp encounterd a fatal error when we attempted to
14709 	 * re-try it (such as a scsi_transport(9F) failure).  However
14710 	 * we should NOT be in an active failfast state if the failfast
14711 	 * bp is not NULL.
14712 	 */
14713 	if (bp == un->un_failfast_bp) {
14714 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
14715 		un->un_failfast_bp = NULL;
14716 	}
14717 
14718 	if (bp == un->un_retry_bp) {
14719 		/*
14720 		 * This command was retried one or more times. Show that we are
14721 		 * done with it, and allow processing of the waitq to resume.
14722 		 */
14723 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14724 		    "sd_return_failed_command_no_restart: "
14725 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
14726 		un->un_retry_bp = NULL;
14727 		un->un_retry_statp = NULL;
14728 	}
14729 
14730 	SD_UPDATE_RDWR_STATS(un, bp);
14731 	SD_UPDATE_PARTITION_STATS(un, bp);
14732 
14733 	mutex_exit(SD_MUTEX(un));
14734 
14735 	if (xp->xb_pktp != NULL) {
14736 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
14737 		xp->xb_pktp = NULL;
14738 	}
14739 
14740 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
14741 
14742 	mutex_enter(SD_MUTEX(un));
14743 
14744 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14745 	    "sd_return_failed_command_no_restart: exit\n");
14746 }
14747 
14748 
14749 /*
14750  *    Function: sd_retry_command
14751  *
14752  * Description: queue up a command for retry, or (optionally) fail it
14753  *		if retry counts are exhausted.
14754  *
14755  *   Arguments: un - Pointer to the sd_lun struct for the target.
14756  *
14757  *		bp - Pointer to the buf for the command to be retried.
14758  *
14759  *		retry_check_flag - Flag to see which (if any) of the retry
14760  *		   counts should be decremented/checked. If the indicated
14761  *		   retry count is exhausted, then the command will not be
14762  *		   retried; it will be failed instead. This should use a
14763  *		   value equal to one of the following:
14764  *
14765  *			SD_RETRIES_NOCHECK
14766  *			SD_RESD_RETRIES_STANDARD
14767  *			SD_RETRIES_VICTIM
14768  *
14769  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
14770  *		   if the check should be made to see of FLAG_ISOLATE is set
14771  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
14772  *		   not retried, it is simply failed.
14773  *
14774  *		user_funcp - Ptr to function to call before dispatching the
14775  *		   command. May be NULL if no action needs to be performed.
14776  *		   (Primarily intended for printing messages.)
14777  *
14778  *		user_arg - Optional argument to be passed along to
14779  *		   the user_funcp call.
14780  *
14781  *		failure_code - errno return code to set in the bp if the
14782  *		   command is going to be failed.
14783  *
14784  *		retry_delay - Retry delay interval in (clock_t) units. May
14785  *		   be zero which indicates that the retry should be retried
14786  *		   immediately (ie, without an intervening delay).
14787  *
14788  *		statp - Ptr to kstat function to be updated if the command
14789  *		   is queued for a delayed retry. May be NULL if no kstat
14790  *		   update is desired.
14791  *
14792  *     Context: May be called from interupt context.
14793  */
14794 
14795 static void
14796 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
14797 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
14798 	code), void *user_arg, int failure_code,  clock_t retry_delay,
14799 	void (*statp)(kstat_io_t *))
14800 {
14801 	struct sd_xbuf	*xp;
14802 	struct scsi_pkt	*pktp;
14803 
14804 	ASSERT(un != NULL);
14805 	ASSERT(mutex_owned(SD_MUTEX(un)));
14806 	ASSERT(bp != NULL);
14807 	xp = SD_GET_XBUF(bp);
14808 	ASSERT(xp != NULL);
14809 	pktp = SD_GET_PKTP(bp);
14810 	ASSERT(pktp != NULL);
14811 
14812 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14813 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
14814 
14815 	/*
14816 	 * If we are syncing or dumping, fail the command to avoid
14817 	 * recursively calling back into scsi_transport().
14818 	 */
14819 	if (ddi_in_panic()) {
14820 		goto fail_command_no_log;
14821 	}
14822 
14823 	/*
14824 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
14825 	 * log an error and fail the command.
14826 	 */
14827 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
14828 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
14829 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
14830 		sd_dump_memory(un, SD_LOG_IO, "CDB",
14831 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
14832 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
14833 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
14834 		goto fail_command;
14835 	}
14836 
14837 	/*
14838 	 * If we are suspended, then put the command onto head of the
14839 	 * wait queue since we don't want to start more commands.
14840 	 */
14841 	switch (un->un_state) {
14842 	case SD_STATE_SUSPENDED:
14843 	case SD_STATE_DUMPING:
14844 		bp->av_forw = un->un_waitq_headp;
14845 		un->un_waitq_headp = bp;
14846 		if (un->un_waitq_tailp == NULL) {
14847 			un->un_waitq_tailp = bp;
14848 		}
14849 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
14850 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
14851 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
14852 		return;
14853 	default:
14854 		break;
14855 	}
14856 
14857 	/*
14858 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
14859 	 * is set; if it is then we do not want to retry the command.
14860 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
14861 	 */
14862 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
14863 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
14864 			goto fail_command;
14865 		}
14866 	}
14867 
14868 
14869 	/*
14870 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
14871 	 * command timeout or a selection timeout has occurred. This means
14872 	 * that we were unable to establish an kind of communication with
14873 	 * the target, and subsequent retries and/or commands are likely
14874 	 * to encounter similar results and take a long time to complete.
14875 	 *
14876 	 * If this is a failfast error condition, we need to update the
14877 	 * failfast state, even if this bp does not have B_FAILFAST set.
14878 	 */
14879 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
14880 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
14881 			ASSERT(un->un_failfast_bp == NULL);
14882 			/*
14883 			 * If we are already in the active failfast state, and
14884 			 * another failfast error condition has been detected,
14885 			 * then fail this command if it has B_FAILFAST set.
14886 			 * If B_FAILFAST is clear, then maintain the legacy
14887 			 * behavior of retrying heroically, even tho this will
14888 			 * take a lot more time to fail the command.
14889 			 */
14890 			if (bp->b_flags & B_FAILFAST) {
14891 				goto fail_command;
14892 			}
14893 		} else {
14894 			/*
14895 			 * We're not in the active failfast state, but we
14896 			 * have a failfast error condition, so we must begin
14897 			 * transition to the next state. We do this regardless
14898 			 * of whether or not this bp has B_FAILFAST set.
14899 			 */
14900 			if (un->un_failfast_bp == NULL) {
14901 				/*
14902 				 * This is the first bp to meet a failfast
14903 				 * condition so save it on un_failfast_bp &
14904 				 * do normal retry processing. Do not enter
14905 				 * active failfast state yet. This marks
14906 				 * entry into the "failfast pending" state.
14907 				 */
14908 				un->un_failfast_bp = bp;
14909 
14910 			} else if (un->un_failfast_bp == bp) {
14911 				/*
14912 				 * This is the second time *this* bp has
14913 				 * encountered a failfast error condition,
14914 				 * so enter active failfast state & flush
14915 				 * queues as appropriate.
14916 				 */
14917 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
14918 				un->un_failfast_bp = NULL;
14919 				sd_failfast_flushq(un);
14920 
14921 				/*
14922 				 * Fail this bp now if B_FAILFAST set;
14923 				 * otherwise continue with retries. (It would
14924 				 * be pretty ironic if this bp succeeded on a
14925 				 * subsequent retry after we just flushed all
14926 				 * the queues).
14927 				 */
14928 				if (bp->b_flags & B_FAILFAST) {
14929 					goto fail_command;
14930 				}
14931 
14932 #if !defined(lint) && !defined(__lint)
14933 			} else {
14934 				/*
14935 				 * If neither of the preceeding conditionals
14936 				 * was true, it means that there is some
14937 				 * *other* bp that has met an inital failfast
14938 				 * condition and is currently either being
14939 				 * retried or is waiting to be retried. In
14940 				 * that case we should perform normal retry
14941 				 * processing on *this* bp, since there is a
14942 				 * chance that the current failfast condition
14943 				 * is transient and recoverable. If that does
14944 				 * not turn out to be the case, then retries
14945 				 * will be cleared when the wait queue is
14946 				 * flushed anyway.
14947 				 */
14948 #endif
14949 			}
14950 		}
14951 	} else {
14952 		/*
14953 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
14954 		 * likely were able to at least establish some level of
14955 		 * communication with the target and subsequent commands
14956 		 * and/or retries are likely to get through to the target,
14957 		 * In this case we want to be aggressive about clearing
14958 		 * the failfast state. Note that this does not affect
14959 		 * the "failfast pending" condition.
14960 		 */
14961 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
14962 	}
14963 
14964 
14965 	/*
14966 	 * Check the specified retry count to see if we can still do
14967 	 * any retries with this pkt before we should fail it.
14968 	 */
14969 	switch (retry_check_flag & SD_RETRIES_MASK) {
14970 	case SD_RETRIES_VICTIM:
14971 		/*
14972 		 * Check the victim retry count. If exhausted, then fall
14973 		 * thru & check against the standard retry count.
14974 		 */
14975 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
14976 			/* Increment count & proceed with the retry */
14977 			xp->xb_victim_retry_count++;
14978 			break;
14979 		}
14980 		/* Victim retries exhausted, fall back to std. retries... */
14981 		/* FALLTHRU */
14982 
14983 	case SD_RETRIES_STANDARD:
14984 		if (xp->xb_retry_count >= un->un_retry_count) {
14985 			/* Retries exhausted, fail the command */
14986 			SD_TRACE(SD_LOG_IO_CORE, un,
14987 			    "sd_retry_command: retries exhausted!\n");
14988 			/*
14989 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
14990 			 * commands with nonzero pkt_resid.
14991 			 */
14992 			if ((pktp->pkt_reason == CMD_CMPLT) &&
14993 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
14994 			    (pktp->pkt_resid != 0)) {
14995 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
14996 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
14997 					SD_UPDATE_B_RESID(bp, pktp);
14998 				}
14999 			}
15000 			goto fail_command;
15001 		}
15002 		xp->xb_retry_count++;
15003 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15004 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15005 		break;
15006 
15007 	case SD_RETRIES_UA:
15008 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
15009 			/* Retries exhausted, fail the command */
15010 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15011 			    "Unit Attention retries exhausted. "
15012 			    "Check the target.\n");
15013 			goto fail_command;
15014 		}
15015 		xp->xb_ua_retry_count++;
15016 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15017 		    "sd_retry_command: retry count:%d\n",
15018 			xp->xb_ua_retry_count);
15019 		break;
15020 
15021 	case SD_RETRIES_BUSY:
15022 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
15023 			/* Retries exhausted, fail the command */
15024 			SD_TRACE(SD_LOG_IO_CORE, un,
15025 			    "sd_retry_command: retries exhausted!\n");
15026 			goto fail_command;
15027 		}
15028 		xp->xb_retry_count++;
15029 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15030 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15031 		break;
15032 
15033 	case SD_RETRIES_NOCHECK:
15034 	default:
15035 		/* No retry count to check. Just proceed with the retry */
15036 		break;
15037 	}
15038 
15039 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15040 
15041 	/*
15042 	 * If we were given a zero timeout, we must attempt to retry the
15043 	 * command immediately (ie, without a delay).
15044 	 */
15045 	if (retry_delay == 0) {
15046 		/*
15047 		 * Check some limiting conditions to see if we can actually
15048 		 * do the immediate retry.  If we cannot, then we must
15049 		 * fall back to queueing up a delayed retry.
15050 		 */
15051 		if (un->un_ncmds_in_transport >= un->un_throttle) {
15052 			/*
15053 			 * We are at the throttle limit for the target,
15054 			 * fall back to delayed retry.
15055 			 */
15056 			retry_delay = SD_BSY_TIMEOUT;
15057 			statp = kstat_waitq_enter;
15058 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15059 			    "sd_retry_command: immed. retry hit throttle!\n");
15060 		} else {
15061 			/*
15062 			 * We're clear to proceed with the immediate retry.
15063 			 * First call the user-provided function (if any)
15064 			 */
15065 			if (user_funcp != NULL) {
15066 				(*user_funcp)(un, bp, user_arg,
15067 				    SD_IMMEDIATE_RETRY_ISSUED);
15068 			}
15069 
15070 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15071 			    "sd_retry_command: issuing immediate retry\n");
15072 
15073 			/*
15074 			 * Call sd_start_cmds() to transport the command to
15075 			 * the target.
15076 			 */
15077 			sd_start_cmds(un, bp);
15078 
15079 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15080 			    "sd_retry_command exit\n");
15081 			return;
15082 		}
15083 	}
15084 
15085 	/*
15086 	 * Set up to retry the command after a delay.
15087 	 * First call the user-provided function (if any)
15088 	 */
15089 	if (user_funcp != NULL) {
15090 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
15091 	}
15092 
15093 	sd_set_retry_bp(un, bp, retry_delay, statp);
15094 
15095 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15096 	return;
15097 
15098 fail_command:
15099 
15100 	if (user_funcp != NULL) {
15101 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
15102 	}
15103 
15104 fail_command_no_log:
15105 
15106 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15107 	    "sd_retry_command: returning failed command\n");
15108 
15109 	sd_return_failed_command(un, bp, failure_code);
15110 
15111 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15112 }
15113 
15114 
15115 /*
15116  *    Function: sd_set_retry_bp
15117  *
15118  * Description: Set up the given bp for retry.
15119  *
15120  *   Arguments: un - ptr to associated softstate
15121  *		bp - ptr to buf(9S) for the command
15122  *		retry_delay - time interval before issuing retry (may be 0)
15123  *		statp - optional pointer to kstat function
15124  *
15125  *     Context: May be called under interrupt context
15126  */
15127 
15128 static void
15129 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
15130 	void (*statp)(kstat_io_t *))
15131 {
15132 	ASSERT(un != NULL);
15133 	ASSERT(mutex_owned(SD_MUTEX(un)));
15134 	ASSERT(bp != NULL);
15135 
15136 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15137 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
15138 
15139 	/*
15140 	 * Indicate that the command is being retried. This will not allow any
15141 	 * other commands on the wait queue to be transported to the target
15142 	 * until this command has been completed (success or failure). The
15143 	 * "retry command" is not transported to the target until the given
15144 	 * time delay expires, unless the user specified a 0 retry_delay.
15145 	 *
15146 	 * Note: the timeout(9F) callback routine is what actually calls
15147 	 * sd_start_cmds() to transport the command, with the exception of a
15148 	 * zero retry_delay. The only current implementor of a zero retry delay
15149 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
15150 	 */
15151 	if (un->un_retry_bp == NULL) {
15152 		ASSERT(un->un_retry_statp == NULL);
15153 		un->un_retry_bp = bp;
15154 
15155 		/*
15156 		 * If the user has not specified a delay the command should
15157 		 * be queued and no timeout should be scheduled.
15158 		 */
15159 		if (retry_delay == 0) {
15160 			/*
15161 			 * Save the kstat pointer that will be used in the
15162 			 * call to SD_UPDATE_KSTATS() below, so that
15163 			 * sd_start_cmds() can correctly decrement the waitq
15164 			 * count when it is time to transport this command.
15165 			 */
15166 			un->un_retry_statp = statp;
15167 			goto done;
15168 		}
15169 	}
15170 
15171 	if (un->un_retry_bp == bp) {
15172 		/*
15173 		 * Save the kstat pointer that will be used in the call to
15174 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
15175 		 * correctly decrement the waitq count when it is time to
15176 		 * transport this command.
15177 		 */
15178 		un->un_retry_statp = statp;
15179 
15180 		/*
15181 		 * Schedule a timeout if:
15182 		 *   1) The user has specified a delay.
15183 		 *   2) There is not a START_STOP_UNIT callback pending.
15184 		 *
15185 		 * If no delay has been specified, then it is up to the caller
15186 		 * to ensure that IO processing continues without stalling.
15187 		 * Effectively, this means that the caller will issue the
15188 		 * required call to sd_start_cmds(). The START_STOP_UNIT
15189 		 * callback does this after the START STOP UNIT command has
15190 		 * completed. In either of these cases we should not schedule
15191 		 * a timeout callback here.  Also don't schedule the timeout if
15192 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
15193 		 */
15194 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
15195 		    (un->un_direct_priority_timeid == NULL)) {
15196 			un->un_retry_timeid =
15197 			    timeout(sd_start_retry_command, un, retry_delay);
15198 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15199 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
15200 			    " bp:0x%p un_retry_timeid:0x%p\n",
15201 			    un, bp, un->un_retry_timeid);
15202 		}
15203 	} else {
15204 		/*
15205 		 * We only get in here if there is already another command
15206 		 * waiting to be retried.  In this case, we just put the
15207 		 * given command onto the wait queue, so it can be transported
15208 		 * after the current retry command has completed.
15209 		 *
15210 		 * Also we have to make sure that if the command at the head
15211 		 * of the wait queue is the un_failfast_bp, that we do not
15212 		 * put ahead of it any other commands that are to be retried.
15213 		 */
15214 		if ((un->un_failfast_bp != NULL) &&
15215 		    (un->un_failfast_bp == un->un_waitq_headp)) {
15216 			/*
15217 			 * Enqueue this command AFTER the first command on
15218 			 * the wait queue (which is also un_failfast_bp).
15219 			 */
15220 			bp->av_forw = un->un_waitq_headp->av_forw;
15221 			un->un_waitq_headp->av_forw = bp;
15222 			if (un->un_waitq_headp == un->un_waitq_tailp) {
15223 				un->un_waitq_tailp = bp;
15224 			}
15225 		} else {
15226 			/* Enqueue this command at the head of the waitq. */
15227 			bp->av_forw = un->un_waitq_headp;
15228 			un->un_waitq_headp = bp;
15229 			if (un->un_waitq_tailp == NULL) {
15230 				un->un_waitq_tailp = bp;
15231 			}
15232 		}
15233 
15234 		if (statp == NULL) {
15235 			statp = kstat_waitq_enter;
15236 		}
15237 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15238 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
15239 	}
15240 
15241 done:
15242 	if (statp != NULL) {
15243 		SD_UPDATE_KSTATS(un, statp, bp);
15244 	}
15245 
15246 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15247 	    "sd_set_retry_bp: exit un:0x%p\n", un);
15248 }
15249 
15250 
15251 /*
15252  *    Function: sd_start_retry_command
15253  *
15254  * Description: Start the command that has been waiting on the target's
15255  *		retry queue.  Called from timeout(9F) context after the
15256  *		retry delay interval has expired.
15257  *
15258  *   Arguments: arg - pointer to associated softstate for the device.
15259  *
15260  *     Context: timeout(9F) thread context.  May not sleep.
15261  */
15262 
15263 static void
15264 sd_start_retry_command(void *arg)
15265 {
15266 	struct sd_lun *un = arg;
15267 
15268 	ASSERT(un != NULL);
15269 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15270 
15271 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15272 	    "sd_start_retry_command: entry\n");
15273 
15274 	mutex_enter(SD_MUTEX(un));
15275 
15276 	un->un_retry_timeid = NULL;
15277 
15278 	if (un->un_retry_bp != NULL) {
15279 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15280 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
15281 		    un, un->un_retry_bp);
15282 		sd_start_cmds(un, un->un_retry_bp);
15283 	}
15284 
15285 	mutex_exit(SD_MUTEX(un));
15286 
15287 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15288 	    "sd_start_retry_command: exit\n");
15289 }
15290 
15291 
15292 /*
15293  *    Function: sd_start_direct_priority_command
15294  *
15295  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
15296  *		received TRAN_BUSY when we called scsi_transport() to send it
15297  *		to the underlying HBA. This function is called from timeout(9F)
15298  *		context after the delay interval has expired.
15299  *
15300  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
15301  *
15302  *     Context: timeout(9F) thread context.  May not sleep.
15303  */
15304 
15305 static void
15306 sd_start_direct_priority_command(void *arg)
15307 {
15308 	struct buf	*priority_bp = arg;
15309 	struct sd_lun	*un;
15310 
15311 	ASSERT(priority_bp != NULL);
15312 	un = SD_GET_UN(priority_bp);
15313 	ASSERT(un != NULL);
15314 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15315 
15316 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15317 	    "sd_start_direct_priority_command: entry\n");
15318 
15319 	mutex_enter(SD_MUTEX(un));
15320 	un->un_direct_priority_timeid = NULL;
15321 	sd_start_cmds(un, priority_bp);
15322 	mutex_exit(SD_MUTEX(un));
15323 
15324 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15325 	    "sd_start_direct_priority_command: exit\n");
15326 }
15327 
15328 
15329 /*
15330  *    Function: sd_send_request_sense_command
15331  *
15332  * Description: Sends a REQUEST SENSE command to the target
15333  *
15334  *     Context: May be called from interrupt context.
15335  */
15336 
15337 static void
15338 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
15339 	struct scsi_pkt *pktp)
15340 {
15341 	ASSERT(bp != NULL);
15342 	ASSERT(un != NULL);
15343 	ASSERT(mutex_owned(SD_MUTEX(un)));
15344 
15345 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
15346 	    "entry: buf:0x%p\n", bp);
15347 
15348 	/*
15349 	 * If we are syncing or dumping, then fail the command to avoid a
15350 	 * recursive callback into scsi_transport(). Also fail the command
15351 	 * if we are suspended (legacy behavior).
15352 	 */
15353 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
15354 	    (un->un_state == SD_STATE_DUMPING)) {
15355 		sd_return_failed_command(un, bp, EIO);
15356 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15357 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
15358 		return;
15359 	}
15360 
15361 	/*
15362 	 * Retry the failed command and don't issue the request sense if:
15363 	 *    1) the sense buf is busy
15364 	 *    2) we have 1 or more outstanding commands on the target
15365 	 *    (the sense data will be cleared or invalidated any way)
15366 	 *
15367 	 * Note: There could be an issue with not checking a retry limit here,
15368 	 * the problem is determining which retry limit to check.
15369 	 */
15370 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
15371 		/* Don't retry if the command is flagged as non-retryable */
15372 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15373 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15374 			    NULL, NULL, 0, SD_BSY_TIMEOUT, kstat_waitq_enter);
15375 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15376 			    "sd_send_request_sense_command: "
15377 			    "at full throttle, retrying exit\n");
15378 		} else {
15379 			sd_return_failed_command(un, bp, EIO);
15380 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15381 			    "sd_send_request_sense_command: "
15382 			    "at full throttle, non-retryable exit\n");
15383 		}
15384 		return;
15385 	}
15386 
15387 	sd_mark_rqs_busy(un, bp);
15388 	sd_start_cmds(un, un->un_rqs_bp);
15389 
15390 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15391 	    "sd_send_request_sense_command: exit\n");
15392 }
15393 
15394 
15395 /*
15396  *    Function: sd_mark_rqs_busy
15397  *
15398  * Description: Indicate that the request sense bp for this instance is
15399  *		in use.
15400  *
15401  *     Context: May be called under interrupt context
15402  */
15403 
15404 static void
15405 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
15406 {
15407 	struct sd_xbuf	*sense_xp;
15408 
15409 	ASSERT(un != NULL);
15410 	ASSERT(bp != NULL);
15411 	ASSERT(mutex_owned(SD_MUTEX(un)));
15412 	ASSERT(un->un_sense_isbusy == 0);
15413 
15414 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
15415 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
15416 
15417 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
15418 	ASSERT(sense_xp != NULL);
15419 
15420 	SD_INFO(SD_LOG_IO, un,
15421 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
15422 
15423 	ASSERT(sense_xp->xb_pktp != NULL);
15424 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
15425 	    == (FLAG_SENSING | FLAG_HEAD));
15426 
15427 	un->un_sense_isbusy = 1;
15428 	un->un_rqs_bp->b_resid = 0;
15429 	sense_xp->xb_pktp->pkt_resid  = 0;
15430 	sense_xp->xb_pktp->pkt_reason = 0;
15431 
15432 	/* So we can get back the bp at interrupt time! */
15433 	sense_xp->xb_sense_bp = bp;
15434 
15435 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
15436 
15437 	/*
15438 	 * Mark this buf as awaiting sense data. (This is already set in
15439 	 * the pkt_flags for the RQS packet.)
15440 	 */
15441 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
15442 
15443 	sense_xp->xb_retry_count	= 0;
15444 	sense_xp->xb_victim_retry_count = 0;
15445 	sense_xp->xb_ua_retry_count	= 0;
15446 	sense_xp->xb_dma_resid  = 0;
15447 
15448 	/* Clean up the fields for auto-request sense */
15449 	sense_xp->xb_sense_status = 0;
15450 	sense_xp->xb_sense_state  = 0;
15451 	sense_xp->xb_sense_resid  = 0;
15452 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
15453 
15454 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
15455 }
15456 
15457 
15458 /*
15459  *    Function: sd_mark_rqs_idle
15460  *
15461  * Description: SD_MUTEX must be held continuously through this routine
15462  *		to prevent reuse of the rqs struct before the caller can
15463  *		complete it's processing.
15464  *
15465  * Return Code: Pointer to the RQS buf
15466  *
15467  *     Context: May be called under interrupt context
15468  */
15469 
15470 static struct buf *
15471 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
15472 {
15473 	struct buf *bp;
15474 	ASSERT(un != NULL);
15475 	ASSERT(sense_xp != NULL);
15476 	ASSERT(mutex_owned(SD_MUTEX(un)));
15477 	ASSERT(un->un_sense_isbusy != 0);
15478 
15479 	un->un_sense_isbusy = 0;
15480 	bp = sense_xp->xb_sense_bp;
15481 	sense_xp->xb_sense_bp = NULL;
15482 
15483 	/* This pkt is no longer interested in getting sense data */
15484 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
15485 
15486 	return (bp);
15487 }
15488 
15489 
15490 
15491 /*
15492  *    Function: sd_alloc_rqs
15493  *
15494  * Description: Set up the unit to receive auto request sense data
15495  *
15496  * Return Code: DDI_SUCCESS or DDI_FAILURE
15497  *
15498  *     Context: Called under attach(9E) context
15499  */
15500 
15501 static int
15502 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
15503 {
15504 	struct sd_xbuf *xp;
15505 
15506 	ASSERT(un != NULL);
15507 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15508 	ASSERT(un->un_rqs_bp == NULL);
15509 	ASSERT(un->un_rqs_pktp == NULL);
15510 
15511 	/*
15512 	 * First allocate the required buf and scsi_pkt structs, then set up
15513 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
15514 	 */
15515 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
15516 	    SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
15517 	if (un->un_rqs_bp == NULL) {
15518 		return (DDI_FAILURE);
15519 	}
15520 
15521 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
15522 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
15523 
15524 	if (un->un_rqs_pktp == NULL) {
15525 		sd_free_rqs(un);
15526 		return (DDI_FAILURE);
15527 	}
15528 
15529 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
15530 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
15531 	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
15532 
15533 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
15534 
15535 	/* Set up the other needed members in the ARQ scsi_pkt. */
15536 	un->un_rqs_pktp->pkt_comp   = sdintr;
15537 	un->un_rqs_pktp->pkt_time   = sd_io_time;
15538 	un->un_rqs_pktp->pkt_flags |=
15539 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
15540 
15541 	/*
15542 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
15543 	 * provide any intpkt, destroypkt routines as we take care of
15544 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
15545 	 */
15546 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
15547 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
15548 	xp->xb_pktp = un->un_rqs_pktp;
15549 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
15550 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
15551 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
15552 
15553 	/*
15554 	 * Save the pointer to the request sense private bp so it can
15555 	 * be retrieved in sdintr.
15556 	 */
15557 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
15558 	ASSERT(un->un_rqs_bp->b_private == xp);
15559 
15560 	/*
15561 	 * See if the HBA supports auto-request sense for the specified
15562 	 * target/lun. If it does, then try to enable it (if not already
15563 	 * enabled).
15564 	 *
15565 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
15566 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
15567 	 * return success.  However, in both of these cases ARQ is always
15568 	 * enabled and scsi_ifgetcap will always return true. The best approach
15569 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
15570 	 *
15571 	 * The 3rd case is the HBA (adp) always return enabled on
15572 	 * scsi_ifgetgetcap even when it's not enable, the best approach
15573 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
15574 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
15575 	 */
15576 
15577 	if (un->un_f_is_fibre == TRUE) {
15578 		un->un_f_arq_enabled = TRUE;
15579 	} else {
15580 #if defined(__i386) || defined(__amd64)
15581 		/*
15582 		 * Circumvent the Adaptec bug, remove this code when
15583 		 * the bug is fixed
15584 		 */
15585 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
15586 #endif
15587 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
15588 		case 0:
15589 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15590 				"sd_alloc_rqs: HBA supports ARQ\n");
15591 			/*
15592 			 * ARQ is supported by this HBA but currently is not
15593 			 * enabled. Attempt to enable it and if successful then
15594 			 * mark this instance as ARQ enabled.
15595 			 */
15596 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
15597 				== 1) {
15598 				/* Successfully enabled ARQ in the HBA */
15599 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
15600 					"sd_alloc_rqs: ARQ enabled\n");
15601 				un->un_f_arq_enabled = TRUE;
15602 			} else {
15603 				/* Could not enable ARQ in the HBA */
15604 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
15605 				"sd_alloc_rqs: failed ARQ enable\n");
15606 				un->un_f_arq_enabled = FALSE;
15607 			}
15608 			break;
15609 		case 1:
15610 			/*
15611 			 * ARQ is supported by this HBA and is already enabled.
15612 			 * Just mark ARQ as enabled for this instance.
15613 			 */
15614 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15615 				"sd_alloc_rqs: ARQ already enabled\n");
15616 			un->un_f_arq_enabled = TRUE;
15617 			break;
15618 		default:
15619 			/*
15620 			 * ARQ is not supported by this HBA; disable it for this
15621 			 * instance.
15622 			 */
15623 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15624 				"sd_alloc_rqs: HBA does not support ARQ\n");
15625 			un->un_f_arq_enabled = FALSE;
15626 			break;
15627 		}
15628 	}
15629 
15630 	return (DDI_SUCCESS);
15631 }
15632 
15633 
15634 /*
15635  *    Function: sd_free_rqs
15636  *
15637  * Description: Cleanup for the pre-instance RQS command.
15638  *
15639  *     Context: Kernel thread context
15640  */
15641 
15642 static void
15643 sd_free_rqs(struct sd_lun *un)
15644 {
15645 	ASSERT(un != NULL);
15646 
15647 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
15648 
15649 	/*
15650 	 * If consistent memory is bound to a scsi_pkt, the pkt
15651 	 * has to be destroyed *before* freeing the consistent memory.
15652 	 * Don't change the sequence of this operations.
15653 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
15654 	 * after it was freed in scsi_free_consistent_buf().
15655 	 */
15656 	if (un->un_rqs_pktp != NULL) {
15657 		scsi_destroy_pkt(un->un_rqs_pktp);
15658 		un->un_rqs_pktp = NULL;
15659 	}
15660 
15661 	if (un->un_rqs_bp != NULL) {
15662 		kmem_free(SD_GET_XBUF(un->un_rqs_bp), sizeof (struct sd_xbuf));
15663 		scsi_free_consistent_buf(un->un_rqs_bp);
15664 		un->un_rqs_bp = NULL;
15665 	}
15666 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
15667 }
15668 
15669 
15670 
15671 /*
15672  *    Function: sd_reduce_throttle
15673  *
15674  * Description: Reduces the maximun # of outstanding commands on a
15675  *		target to the current number of outstanding commands.
15676  *		Queues a tiemout(9F) callback to restore the limit
15677  *		after a specified interval has elapsed.
15678  *		Typically used when we get a TRAN_BUSY return code
15679  *		back from scsi_transport().
15680  *
15681  *   Arguments: un - ptr to the sd_lun softstate struct
15682  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
15683  *
15684  *     Context: May be called from interrupt context
15685  */
15686 
15687 static void
15688 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
15689 {
15690 	ASSERT(un != NULL);
15691 	ASSERT(mutex_owned(SD_MUTEX(un)));
15692 	ASSERT(un->un_ncmds_in_transport >= 0);
15693 
15694 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
15695 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
15696 	    un, un->un_throttle, un->un_ncmds_in_transport);
15697 
15698 	if (un->un_throttle > 1) {
15699 		if (un->un_f_use_adaptive_throttle == TRUE) {
15700 			switch (throttle_type) {
15701 			case SD_THROTTLE_TRAN_BUSY:
15702 				if (un->un_busy_throttle == 0) {
15703 					un->un_busy_throttle = un->un_throttle;
15704 				}
15705 				break;
15706 			case SD_THROTTLE_QFULL:
15707 				un->un_busy_throttle = 0;
15708 				break;
15709 			default:
15710 				ASSERT(FALSE);
15711 			}
15712 
15713 			if (un->un_ncmds_in_transport > 0) {
15714 				un->un_throttle = un->un_ncmds_in_transport;
15715 			}
15716 		} else {
15717 			if (un->un_ncmds_in_transport == 0) {
15718 				un->un_throttle = 1;
15719 			} else {
15720 				un->un_throttle = un->un_ncmds_in_transport;
15721 			}
15722 		}
15723 	}
15724 
15725 	/* Reschedule the timeout if none is currently active */
15726 	if (un->un_reset_throttle_timeid == NULL) {
15727 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
15728 		    un, sd_reset_throttle_timeout);
15729 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15730 		    "sd_reduce_throttle: timeout scheduled!\n");
15731 	}
15732 
15733 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
15734 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
15735 }
15736 
15737 
15738 
15739 /*
15740  *    Function: sd_restore_throttle
15741  *
15742  * Description: Callback function for timeout(9F).  Resets the current
15743  *		value of un->un_throttle to its default.
15744  *
15745  *   Arguments: arg - pointer to associated softstate for the device.
15746  *
15747  *     Context: May be called from interrupt context
15748  */
15749 
15750 static void
15751 sd_restore_throttle(void *arg)
15752 {
15753 	struct sd_lun	*un = arg;
15754 
15755 	ASSERT(un != NULL);
15756 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15757 
15758 	mutex_enter(SD_MUTEX(un));
15759 
15760 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
15761 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
15762 
15763 	un->un_reset_throttle_timeid = NULL;
15764 
15765 	if (un->un_f_use_adaptive_throttle == TRUE) {
15766 		/*
15767 		 * If un_busy_throttle is nonzero, then it contains the
15768 		 * value that un_throttle was when we got a TRAN_BUSY back
15769 		 * from scsi_transport(). We want to revert back to this
15770 		 * value.
15771 		 */
15772 		if (un->un_busy_throttle > 0) {
15773 			un->un_throttle = un->un_busy_throttle;
15774 			un->un_busy_throttle = 0;
15775 		}
15776 
15777 		/*
15778 		 * If un_throttle has fallen below the low-water mark, we
15779 		 * restore the maximum value here (and allow it to ratchet
15780 		 * down again if necessary).
15781 		 */
15782 		if (un->un_throttle < un->un_min_throttle) {
15783 			un->un_throttle = un->un_saved_throttle;
15784 		}
15785 	} else {
15786 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
15787 		    "restoring limit from 0x%x to 0x%x\n",
15788 		    un->un_throttle, un->un_saved_throttle);
15789 		un->un_throttle = un->un_saved_throttle;
15790 	}
15791 
15792 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15793 	    "sd_restore_throttle: calling sd_start_cmds!\n");
15794 
15795 	sd_start_cmds(un, NULL);
15796 
15797 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15798 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
15799 	    un, un->un_throttle);
15800 
15801 	mutex_exit(SD_MUTEX(un));
15802 
15803 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
15804 }
15805 
15806 /*
15807  *    Function: sdrunout
15808  *
15809  * Description: Callback routine for scsi_init_pkt when a resource allocation
15810  *		fails.
15811  *
15812  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
15813  *		soft state instance.
15814  *
15815  * Return Code: The scsi_init_pkt routine allows for the callback function to
15816  *		return a 0 indicating the callback should be rescheduled or a 1
15817  *		indicating not to reschedule. This routine always returns 1
15818  *		because the driver always provides a callback function to
15819  *		scsi_init_pkt. This results in a callback always being scheduled
15820  *		(via the scsi_init_pkt callback implementation) if a resource
15821  *		failure occurs.
15822  *
15823  *     Context: This callback function may not block or call routines that block
15824  *
15825  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
15826  *		request persisting at the head of the list which cannot be
15827  *		satisfied even after multiple retries. In the future the driver
15828  *		may implement some time of maximum runout count before failing
15829  *		an I/O.
15830  */
15831 
15832 static int
15833 sdrunout(caddr_t arg)
15834 {
15835 	struct sd_lun	*un = (struct sd_lun *)arg;
15836 
15837 	ASSERT(un != NULL);
15838 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15839 
15840 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
15841 
15842 	mutex_enter(SD_MUTEX(un));
15843 	sd_start_cmds(un, NULL);
15844 	mutex_exit(SD_MUTEX(un));
15845 	/*
15846 	 * This callback routine always returns 1 (i.e. do not reschedule)
15847 	 * because we always specify sdrunout as the callback handler for
15848 	 * scsi_init_pkt inside the call to sd_start_cmds.
15849 	 */
15850 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
15851 	return (1);
15852 }
15853 
15854 
15855 /*
15856  *    Function: sdintr
15857  *
15858  * Description: Completion callback routine for scsi_pkt(9S) structs
15859  *		sent to the HBA driver via scsi_transport(9F).
15860  *
15861  *     Context: Interrupt context
15862  */
15863 
15864 static void
15865 sdintr(struct scsi_pkt *pktp)
15866 {
15867 	struct buf	*bp;
15868 	struct sd_xbuf	*xp;
15869 	struct sd_lun	*un;
15870 
15871 	ASSERT(pktp != NULL);
15872 	bp = (struct buf *)pktp->pkt_private;
15873 	ASSERT(bp != NULL);
15874 	xp = SD_GET_XBUF(bp);
15875 	ASSERT(xp != NULL);
15876 	ASSERT(xp->xb_pktp != NULL);
15877 	un = SD_GET_UN(bp);
15878 	ASSERT(un != NULL);
15879 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15880 
15881 #ifdef SD_FAULT_INJECTION
15882 
15883 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
15884 	/* SD FaultInjection */
15885 	sd_faultinjection(pktp);
15886 
15887 #endif /* SD_FAULT_INJECTION */
15888 
15889 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
15890 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
15891 
15892 	mutex_enter(SD_MUTEX(un));
15893 
15894 	/* Reduce the count of the #commands currently in transport */
15895 	un->un_ncmds_in_transport--;
15896 	ASSERT(un->un_ncmds_in_transport >= 0);
15897 
15898 	/* Increment counter to indicate that the callback routine is active */
15899 	un->un_in_callback++;
15900 
15901 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
15902 
15903 #ifdef	SDDEBUG
15904 	if (bp == un->un_retry_bp) {
15905 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
15906 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
15907 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
15908 	}
15909 #endif
15910 
15911 	/*
15912 	 * If pkt_reason is CMD_DEV_GONE, just fail the command
15913 	 */
15914 	if (pktp->pkt_reason == CMD_DEV_GONE) {
15915 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
15916 			    "Device is gone\n");
15917 		sd_return_failed_command(un, bp, EIO);
15918 		goto exit;
15919 	}
15920 
15921 	/*
15922 	 * First see if the pkt has auto-request sense data with it....
15923 	 * Look at the packet state first so we don't take a performance
15924 	 * hit looking at the arq enabled flag unless absolutely necessary.
15925 	 */
15926 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
15927 	    (un->un_f_arq_enabled == TRUE)) {
15928 		/*
15929 		 * The HBA did an auto request sense for this command so check
15930 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
15931 		 * driver command that should not be retried.
15932 		 */
15933 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
15934 			/*
15935 			 * Save the relevant sense info into the xp for the
15936 			 * original cmd.
15937 			 */
15938 			struct scsi_arq_status *asp;
15939 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
15940 			xp->xb_sense_status =
15941 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
15942 			xp->xb_sense_state  = asp->sts_rqpkt_state;
15943 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
15944 			bcopy(&asp->sts_sensedata, xp->xb_sense_data,
15945 			    min(sizeof (struct scsi_extended_sense),
15946 			    SENSE_LENGTH));
15947 
15948 			/* fail the command */
15949 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15950 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
15951 			sd_return_failed_command(un, bp, EIO);
15952 			goto exit;
15953 		}
15954 
15955 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
15956 		/*
15957 		 * We want to either retry or fail this command, so free
15958 		 * the DMA resources here.  If we retry the command then
15959 		 * the DMA resources will be reallocated in sd_start_cmds().
15960 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
15961 		 * causes the *entire* transfer to start over again from the
15962 		 * beginning of the request, even for PARTIAL chunks that
15963 		 * have already transferred successfully.
15964 		 */
15965 		if ((un->un_f_is_fibre == TRUE) &&
15966 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
15967 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
15968 			scsi_dmafree(pktp);
15969 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
15970 		}
15971 #endif
15972 
15973 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15974 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
15975 
15976 		sd_handle_auto_request_sense(un, bp, xp, pktp);
15977 		goto exit;
15978 	}
15979 
15980 	/* Next see if this is the REQUEST SENSE pkt for the instance */
15981 	if (pktp->pkt_flags & FLAG_SENSING)  {
15982 		/* This pktp is from the unit's REQUEST_SENSE command */
15983 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15984 		    "sdintr: sd_handle_request_sense\n");
15985 		sd_handle_request_sense(un, bp, xp, pktp);
15986 		goto exit;
15987 	}
15988 
15989 	/*
15990 	 * Check to see if the command successfully completed as requested;
15991 	 * this is the most common case (and also the hot performance path).
15992 	 *
15993 	 * Requirements for successful completion are:
15994 	 * pkt_reason is CMD_CMPLT and packet status is status good.
15995 	 * In addition:
15996 	 * - A residual of zero indicates successful completion no matter what
15997 	 *   the command is.
15998 	 * - If the residual is not zero and the command is not a read or
15999 	 *   write, then it's still defined as successful completion. In other
16000 	 *   words, if the command is a read or write the residual must be
16001 	 *   zero for successful completion.
16002 	 * - If the residual is not zero and the command is a read or
16003 	 *   write, and it's a USCSICMD, then it's still defined as
16004 	 *   successful completion.
16005 	 */
16006 	if ((pktp->pkt_reason == CMD_CMPLT) &&
16007 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
16008 
16009 		/*
16010 		 * Since this command is returned with a good status, we
16011 		 * can reset the count for Sonoma failover.
16012 		 */
16013 		un->un_sonoma_failure_count = 0;
16014 
16015 		/*
16016 		 * Return all USCSI commands on good status
16017 		 */
16018 		if (pktp->pkt_resid == 0) {
16019 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16020 			    "sdintr: returning command for resid == 0\n");
16021 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
16022 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
16023 			SD_UPDATE_B_RESID(bp, pktp);
16024 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16025 			    "sdintr: returning command for resid != 0\n");
16026 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16027 			SD_UPDATE_B_RESID(bp, pktp);
16028 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16029 				"sdintr: returning uscsi command\n");
16030 		} else {
16031 			goto not_successful;
16032 		}
16033 		sd_return_command(un, bp);
16034 
16035 		/*
16036 		 * Decrement counter to indicate that the callback routine
16037 		 * is done.
16038 		 */
16039 		un->un_in_callback--;
16040 		ASSERT(un->un_in_callback >= 0);
16041 		mutex_exit(SD_MUTEX(un));
16042 
16043 		return;
16044 	}
16045 
16046 not_successful:
16047 
16048 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16049 	/*
16050 	 * The following is based upon knowledge of the underlying transport
16051 	 * and its use of DMA resources.  This code should be removed when
16052 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
16053 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
16054 	 * and sd_start_cmds().
16055 	 *
16056 	 * Free any DMA resources associated with this command if there
16057 	 * is a chance it could be retried or enqueued for later retry.
16058 	 * If we keep the DMA binding then mpxio cannot reissue the
16059 	 * command on another path whenever a path failure occurs.
16060 	 *
16061 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
16062 	 * causes the *entire* transfer to start over again from the
16063 	 * beginning of the request, even for PARTIAL chunks that
16064 	 * have already transferred successfully.
16065 	 *
16066 	 * This is only done for non-uscsi commands (and also skipped for the
16067 	 * driver's internal RQS command). Also just do this for Fibre Channel
16068 	 * devices as these are the only ones that support mpxio.
16069 	 */
16070 	if ((un->un_f_is_fibre == TRUE) &&
16071 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16072 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16073 		scsi_dmafree(pktp);
16074 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16075 	}
16076 #endif
16077 
16078 	/*
16079 	 * The command did not successfully complete as requested so check
16080 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16081 	 * driver command that should not be retried so just return. If
16082 	 * FLAG_DIAGNOSE is not set the error will be processed below.
16083 	 */
16084 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16085 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16086 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
16087 		/*
16088 		 * Issue a request sense if a check condition caused the error
16089 		 * (we handle the auto request sense case above), otherwise
16090 		 * just fail the command.
16091 		 */
16092 		if ((pktp->pkt_reason == CMD_CMPLT) &&
16093 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
16094 			sd_send_request_sense_command(un, bp, pktp);
16095 		} else {
16096 			sd_return_failed_command(un, bp, EIO);
16097 		}
16098 		goto exit;
16099 	}
16100 
16101 	/*
16102 	 * The command did not successfully complete as requested so process
16103 	 * the error, retry, and/or attempt recovery.
16104 	 */
16105 	switch (pktp->pkt_reason) {
16106 	case CMD_CMPLT:
16107 		switch (SD_GET_PKT_STATUS(pktp)) {
16108 		case STATUS_GOOD:
16109 			/*
16110 			 * The command completed successfully with a non-zero
16111 			 * residual
16112 			 */
16113 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16114 			    "sdintr: STATUS_GOOD \n");
16115 			sd_pkt_status_good(un, bp, xp, pktp);
16116 			break;
16117 
16118 		case STATUS_CHECK:
16119 		case STATUS_TERMINATED:
16120 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16121 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
16122 			sd_pkt_status_check_condition(un, bp, xp, pktp);
16123 			break;
16124 
16125 		case STATUS_BUSY:
16126 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16127 			    "sdintr: STATUS_BUSY\n");
16128 			sd_pkt_status_busy(un, bp, xp, pktp);
16129 			break;
16130 
16131 		case STATUS_RESERVATION_CONFLICT:
16132 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16133 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
16134 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16135 			break;
16136 
16137 		case STATUS_QFULL:
16138 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16139 			    "sdintr: STATUS_QFULL\n");
16140 			sd_pkt_status_qfull(un, bp, xp, pktp);
16141 			break;
16142 
16143 		case STATUS_MET:
16144 		case STATUS_INTERMEDIATE:
16145 		case STATUS_SCSI2:
16146 		case STATUS_INTERMEDIATE_MET:
16147 		case STATUS_ACA_ACTIVE:
16148 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16149 			    "Unexpected SCSI status received: 0x%x\n",
16150 			    SD_GET_PKT_STATUS(pktp));
16151 			sd_return_failed_command(un, bp, EIO);
16152 			break;
16153 
16154 		default:
16155 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16156 			    "Invalid SCSI status received: 0x%x\n",
16157 			    SD_GET_PKT_STATUS(pktp));
16158 			sd_return_failed_command(un, bp, EIO);
16159 			break;
16160 
16161 		}
16162 		break;
16163 
16164 	case CMD_INCOMPLETE:
16165 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16166 		    "sdintr:  CMD_INCOMPLETE\n");
16167 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
16168 		break;
16169 	case CMD_TRAN_ERR:
16170 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16171 		    "sdintr: CMD_TRAN_ERR\n");
16172 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
16173 		break;
16174 	case CMD_RESET:
16175 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16176 		    "sdintr: CMD_RESET \n");
16177 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
16178 		break;
16179 	case CMD_ABORTED:
16180 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16181 		    "sdintr: CMD_ABORTED \n");
16182 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
16183 		break;
16184 	case CMD_TIMEOUT:
16185 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16186 		    "sdintr: CMD_TIMEOUT\n");
16187 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
16188 		break;
16189 	case CMD_UNX_BUS_FREE:
16190 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16191 		    "sdintr: CMD_UNX_BUS_FREE \n");
16192 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
16193 		break;
16194 	case CMD_TAG_REJECT:
16195 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16196 		    "sdintr: CMD_TAG_REJECT\n");
16197 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
16198 		break;
16199 	default:
16200 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16201 		    "sdintr: default\n");
16202 		sd_pkt_reason_default(un, bp, xp, pktp);
16203 		break;
16204 	}
16205 
16206 exit:
16207 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
16208 
16209 	/* Decrement counter to indicate that the callback routine is done. */
16210 	un->un_in_callback--;
16211 	ASSERT(un->un_in_callback >= 0);
16212 
16213 	/*
16214 	 * At this point, the pkt has been dispatched, ie, it is either
16215 	 * being re-tried or has been returned to its caller and should
16216 	 * not be referenced.
16217 	 */
16218 
16219 	mutex_exit(SD_MUTEX(un));
16220 }
16221 
16222 
16223 /*
16224  *    Function: sd_print_incomplete_msg
16225  *
16226  * Description: Prints the error message for a CMD_INCOMPLETE error.
16227  *
16228  *   Arguments: un - ptr to associated softstate for the device.
16229  *		bp - ptr to the buf(9S) for the command.
16230  *		arg - message string ptr
16231  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
16232  *			or SD_NO_RETRY_ISSUED.
16233  *
16234  *     Context: May be called under interrupt context
16235  */
16236 
16237 static void
16238 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
16239 {
16240 	struct scsi_pkt	*pktp;
16241 	char	*msgp;
16242 	char	*cmdp = arg;
16243 
16244 	ASSERT(un != NULL);
16245 	ASSERT(mutex_owned(SD_MUTEX(un)));
16246 	ASSERT(bp != NULL);
16247 	ASSERT(arg != NULL);
16248 	pktp = SD_GET_PKTP(bp);
16249 	ASSERT(pktp != NULL);
16250 
16251 	switch (code) {
16252 	case SD_DELAYED_RETRY_ISSUED:
16253 	case SD_IMMEDIATE_RETRY_ISSUED:
16254 		msgp = "retrying";
16255 		break;
16256 	case SD_NO_RETRY_ISSUED:
16257 	default:
16258 		msgp = "giving up";
16259 		break;
16260 	}
16261 
16262 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16263 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16264 		    "incomplete %s- %s\n", cmdp, msgp);
16265 	}
16266 }
16267 
16268 
16269 
16270 /*
16271  *    Function: sd_pkt_status_good
16272  *
16273  * Description: Processing for a STATUS_GOOD code in pkt_status.
16274  *
16275  *     Context: May be called under interrupt context
16276  */
16277 
16278 static void
16279 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
16280 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16281 {
16282 	char	*cmdp;
16283 
16284 	ASSERT(un != NULL);
16285 	ASSERT(mutex_owned(SD_MUTEX(un)));
16286 	ASSERT(bp != NULL);
16287 	ASSERT(xp != NULL);
16288 	ASSERT(pktp != NULL);
16289 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
16290 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
16291 	ASSERT(pktp->pkt_resid != 0);
16292 
16293 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
16294 
16295 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16296 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
16297 	case SCMD_READ:
16298 		cmdp = "read";
16299 		break;
16300 	case SCMD_WRITE:
16301 		cmdp = "write";
16302 		break;
16303 	default:
16304 		SD_UPDATE_B_RESID(bp, pktp);
16305 		sd_return_command(un, bp);
16306 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16307 		return;
16308 	}
16309 
16310 	/*
16311 	 * See if we can retry the read/write, preferrably immediately.
16312 	 * If retries are exhaused, then sd_retry_command() will update
16313 	 * the b_resid count.
16314 	 */
16315 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
16316 	    cmdp, EIO, (clock_t)0, NULL);
16317 
16318 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16319 }
16320 
16321 
16322 
16323 
16324 
16325 /*
16326  *    Function: sd_handle_request_sense
16327  *
16328  * Description: Processing for non-auto Request Sense command.
16329  *
16330  *   Arguments: un - ptr to associated softstate
16331  *		sense_bp - ptr to buf(9S) for the RQS command
16332  *		sense_xp - ptr to the sd_xbuf for the RQS command
16333  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
16334  *
16335  *     Context: May be called under interrupt context
16336  */
16337 
16338 static void
16339 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
16340 	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
16341 {
16342 	struct buf	*cmd_bp;	/* buf for the original command */
16343 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
16344 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
16345 
16346 	ASSERT(un != NULL);
16347 	ASSERT(mutex_owned(SD_MUTEX(un)));
16348 	ASSERT(sense_bp != NULL);
16349 	ASSERT(sense_xp != NULL);
16350 	ASSERT(sense_pktp != NULL);
16351 
16352 	/*
16353 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
16354 	 * RQS command and not the original command.
16355 	 */
16356 	ASSERT(sense_pktp == un->un_rqs_pktp);
16357 	ASSERT(sense_bp   == un->un_rqs_bp);
16358 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
16359 	    (FLAG_SENSING | FLAG_HEAD));
16360 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
16361 	    FLAG_SENSING) == FLAG_SENSING);
16362 
16363 	/* These are the bp, xp, and pktp for the original command */
16364 	cmd_bp = sense_xp->xb_sense_bp;
16365 	cmd_xp = SD_GET_XBUF(cmd_bp);
16366 	cmd_pktp = SD_GET_PKTP(cmd_bp);
16367 
16368 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
16369 		/*
16370 		 * The REQUEST SENSE command failed.  Release the REQUEST
16371 		 * SENSE command for re-use, get back the bp for the original
16372 		 * command, and attempt to re-try the original command if
16373 		 * FLAG_DIAGNOSE is not set in the original packet.
16374 		 */
16375 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16376 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16377 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
16378 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
16379 			    NULL, NULL, EIO, (clock_t)0, NULL);
16380 			return;
16381 		}
16382 	}
16383 
16384 	/*
16385 	 * Save the relevant sense info into the xp for the original cmd.
16386 	 *
16387 	 * Note: if the request sense failed the state info will be zero
16388 	 * as set in sd_mark_rqs_busy()
16389 	 */
16390 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
16391 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
16392 	cmd_xp->xb_sense_resid  = sense_pktp->pkt_resid;
16393 	bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data, SENSE_LENGTH);
16394 
16395 	/*
16396 	 *  Free up the RQS command....
16397 	 *  NOTE:
16398 	 *	Must do this BEFORE calling sd_validate_sense_data!
16399 	 *	sd_validate_sense_data may return the original command in
16400 	 *	which case the pkt will be freed and the flags can no
16401 	 *	longer be touched.
16402 	 *	SD_MUTEX is held through this process until the command
16403 	 *	is dispatched based upon the sense data, so there are
16404 	 *	no race conditions.
16405 	 */
16406 	(void) sd_mark_rqs_idle(un, sense_xp);
16407 
16408 	/*
16409 	 * For a retryable command see if we have valid sense data, if so then
16410 	 * turn it over to sd_decode_sense() to figure out the right course of
16411 	 * action. Just fail a non-retryable command.
16412 	 */
16413 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16414 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp) ==
16415 		    SD_SENSE_DATA_IS_VALID) {
16416 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
16417 		}
16418 	} else {
16419 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
16420 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
16421 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
16422 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
16423 		sd_return_failed_command(un, cmd_bp, EIO);
16424 	}
16425 }
16426 
16427 
16428 
16429 
16430 /*
16431  *    Function: sd_handle_auto_request_sense
16432  *
16433  * Description: Processing for auto-request sense information.
16434  *
16435  *   Arguments: un - ptr to associated softstate
16436  *		bp - ptr to buf(9S) for the command
16437  *		xp - ptr to the sd_xbuf for the command
16438  *		pktp - ptr to the scsi_pkt(9S) for the command
16439  *
16440  *     Context: May be called under interrupt context
16441  */
16442 
16443 static void
16444 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
16445 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16446 {
16447 	struct scsi_arq_status *asp;
16448 
16449 	ASSERT(un != NULL);
16450 	ASSERT(mutex_owned(SD_MUTEX(un)));
16451 	ASSERT(bp != NULL);
16452 	ASSERT(xp != NULL);
16453 	ASSERT(pktp != NULL);
16454 	ASSERT(pktp != un->un_rqs_pktp);
16455 	ASSERT(bp   != un->un_rqs_bp);
16456 
16457 	/*
16458 	 * For auto-request sense, we get a scsi_arq_status back from
16459 	 * the HBA, with the sense data in the sts_sensedata member.
16460 	 * The pkt_scbp of the packet points to this scsi_arq_status.
16461 	 */
16462 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16463 
16464 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
16465 		/*
16466 		 * The auto REQUEST SENSE failed; see if we can re-try
16467 		 * the original command.
16468 		 */
16469 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16470 		    "auto request sense failed (reason=%s)\n",
16471 		    scsi_rname(asp->sts_rqpkt_reason));
16472 
16473 		sd_reset_target(un, pktp);
16474 
16475 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16476 		    NULL, NULL, EIO, (clock_t)0, NULL);
16477 		return;
16478 	}
16479 
16480 	/* Save the relevant sense info into the xp for the original cmd. */
16481 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
16482 	xp->xb_sense_state  = asp->sts_rqpkt_state;
16483 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16484 	bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16485 	    min(sizeof (struct scsi_extended_sense), SENSE_LENGTH));
16486 
16487 	/*
16488 	 * See if we have valid sense data, if so then turn it over to
16489 	 * sd_decode_sense() to figure out the right course of action.
16490 	 */
16491 	if (sd_validate_sense_data(un, bp, xp) == SD_SENSE_DATA_IS_VALID) {
16492 		sd_decode_sense(un, bp, xp, pktp);
16493 	}
16494 }
16495 
16496 
16497 /*
16498  *    Function: sd_print_sense_failed_msg
16499  *
16500  * Description: Print log message when RQS has failed.
16501  *
16502  *   Arguments: un - ptr to associated softstate
16503  *		bp - ptr to buf(9S) for the command
16504  *		arg - generic message string ptr
16505  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16506  *			or SD_NO_RETRY_ISSUED
16507  *
16508  *     Context: May be called from interrupt context
16509  */
16510 
16511 static void
16512 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
16513 	int code)
16514 {
16515 	char	*msgp = arg;
16516 
16517 	ASSERT(un != NULL);
16518 	ASSERT(mutex_owned(SD_MUTEX(un)));
16519 	ASSERT(bp != NULL);
16520 
16521 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
16522 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
16523 	}
16524 }
16525 
16526 
16527 /*
16528  *    Function: sd_validate_sense_data
16529  *
16530  * Description: Check the given sense data for validity.
16531  *		If the sense data is not valid, the command will
16532  *		be either failed or retried!
16533  *
16534  * Return Code: SD_SENSE_DATA_IS_INVALID
16535  *		SD_SENSE_DATA_IS_VALID
16536  *
16537  *     Context: May be called from interrupt context
16538  */
16539 
16540 static int
16541 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp)
16542 {
16543 	struct scsi_extended_sense *esp;
16544 	struct	scsi_pkt *pktp;
16545 	size_t	actual_len;
16546 	char	*msgp = NULL;
16547 
16548 	ASSERT(un != NULL);
16549 	ASSERT(mutex_owned(SD_MUTEX(un)));
16550 	ASSERT(bp != NULL);
16551 	ASSERT(bp != un->un_rqs_bp);
16552 	ASSERT(xp != NULL);
16553 
16554 	pktp = SD_GET_PKTP(bp);
16555 	ASSERT(pktp != NULL);
16556 
16557 	/*
16558 	 * Check the status of the RQS command (auto or manual).
16559 	 */
16560 	switch (xp->xb_sense_status & STATUS_MASK) {
16561 	case STATUS_GOOD:
16562 		break;
16563 
16564 	case STATUS_RESERVATION_CONFLICT:
16565 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16566 		return (SD_SENSE_DATA_IS_INVALID);
16567 
16568 	case STATUS_BUSY:
16569 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16570 		    "Busy Status on REQUEST SENSE\n");
16571 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
16572 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
16573 		return (SD_SENSE_DATA_IS_INVALID);
16574 
16575 	case STATUS_QFULL:
16576 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16577 		    "QFULL Status on REQUEST SENSE\n");
16578 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
16579 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
16580 		return (SD_SENSE_DATA_IS_INVALID);
16581 
16582 	case STATUS_CHECK:
16583 	case STATUS_TERMINATED:
16584 		msgp = "Check Condition on REQUEST SENSE\n";
16585 		goto sense_failed;
16586 
16587 	default:
16588 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
16589 		goto sense_failed;
16590 	}
16591 
16592 	/*
16593 	 * See if we got the minimum required amount of sense data.
16594 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
16595 	 * or less.
16596 	 */
16597 	actual_len = (int)(SENSE_LENGTH - xp->xb_sense_resid);
16598 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
16599 	    (actual_len == 0)) {
16600 		msgp = "Request Sense couldn't get sense data\n";
16601 		goto sense_failed;
16602 	}
16603 
16604 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
16605 		msgp = "Not enough sense information\n";
16606 		goto sense_failed;
16607 	}
16608 
16609 	/*
16610 	 * We require the extended sense data
16611 	 */
16612 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
16613 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
16614 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16615 			static char tmp[8];
16616 			static char buf[148];
16617 			char *p = (char *)(xp->xb_sense_data);
16618 			int i;
16619 
16620 			mutex_enter(&sd_sense_mutex);
16621 			(void) strcpy(buf, "undecodable sense information:");
16622 			for (i = 0; i < actual_len; i++) {
16623 				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
16624 				(void) strcpy(&buf[strlen(buf)], tmp);
16625 			}
16626 			i = strlen(buf);
16627 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
16628 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, buf);
16629 			mutex_exit(&sd_sense_mutex);
16630 		}
16631 		/* Note: Legacy behavior, fail the command with no retry */
16632 		sd_return_failed_command(un, bp, EIO);
16633 		return (SD_SENSE_DATA_IS_INVALID);
16634 	}
16635 
16636 	/*
16637 	 * Check that es_code is valid (es_class concatenated with es_code
16638 	 * make up the "response code" field.  es_class will always be 7, so
16639 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
16640 	 * format.
16641 	 */
16642 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
16643 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
16644 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
16645 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
16646 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
16647 		goto sense_failed;
16648 	}
16649 
16650 	return (SD_SENSE_DATA_IS_VALID);
16651 
16652 sense_failed:
16653 	/*
16654 	 * If the request sense failed (for whatever reason), attempt
16655 	 * to retry the original command.
16656 	 */
16657 #if defined(__i386) || defined(__amd64)
16658 	/*
16659 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
16660 	 * sddef.h for Sparc platform, and x86 uses 1 binary
16661 	 * for both SCSI/FC.
16662 	 * The SD_RETRY_DELAY value need to be adjusted here
16663 	 * when SD_RETRY_DELAY change in sddef.h
16664 	 */
16665 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16666 	    sd_print_sense_failed_msg, msgp, EIO,
16667 		un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
16668 #else
16669 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16670 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
16671 #endif
16672 
16673 	return (SD_SENSE_DATA_IS_INVALID);
16674 }
16675 
16676 
16677 
16678 /*
16679  *    Function: sd_decode_sense
16680  *
16681  * Description: Take recovery action(s) when SCSI Sense Data is received.
16682  *
16683  *     Context: Interrupt context.
16684  */
16685 
16686 static void
16687 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
16688 	struct scsi_pkt *pktp)
16689 {
16690 	struct scsi_extended_sense *esp;
16691 	struct scsi_descr_sense_hdr *sdsp;
16692 	uint8_t asc, ascq, sense_key;
16693 
16694 	ASSERT(un != NULL);
16695 	ASSERT(mutex_owned(SD_MUTEX(un)));
16696 	ASSERT(bp != NULL);
16697 	ASSERT(bp != un->un_rqs_bp);
16698 	ASSERT(xp != NULL);
16699 	ASSERT(pktp != NULL);
16700 
16701 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
16702 
16703 	switch (esp->es_code) {
16704 	case CODE_FMT_DESCR_CURRENT:
16705 	case CODE_FMT_DESCR_DEFERRED:
16706 		sdsp = (struct scsi_descr_sense_hdr *)xp->xb_sense_data;
16707 		sense_key = sdsp->ds_key;
16708 		asc = sdsp->ds_add_code;
16709 		ascq = sdsp->ds_qual_code;
16710 		break;
16711 	case CODE_FMT_VENDOR_SPECIFIC:
16712 	case CODE_FMT_FIXED_CURRENT:
16713 	case CODE_FMT_FIXED_DEFERRED:
16714 	default:
16715 		sense_key = esp->es_key;
16716 		asc = esp->es_add_code;
16717 		ascq = esp->es_qual_code;
16718 		break;
16719 	}
16720 
16721 	switch (sense_key) {
16722 	case KEY_NO_SENSE:
16723 		sd_sense_key_no_sense(un, bp, xp, pktp);
16724 		break;
16725 	case KEY_RECOVERABLE_ERROR:
16726 		sd_sense_key_recoverable_error(un, asc, bp, xp, pktp);
16727 		break;
16728 	case KEY_NOT_READY:
16729 		sd_sense_key_not_ready(un, asc, ascq, bp, xp, pktp);
16730 		break;
16731 	case KEY_MEDIUM_ERROR:
16732 	case KEY_HARDWARE_ERROR:
16733 		sd_sense_key_medium_or_hardware_error(un,
16734 		    sense_key, asc, bp, xp, pktp);
16735 		break;
16736 	case KEY_ILLEGAL_REQUEST:
16737 		sd_sense_key_illegal_request(un, bp, xp, pktp);
16738 		break;
16739 	case KEY_UNIT_ATTENTION:
16740 		sd_sense_key_unit_attention(un, asc, bp, xp, pktp);
16741 		break;
16742 	case KEY_WRITE_PROTECT:
16743 	case KEY_VOLUME_OVERFLOW:
16744 	case KEY_MISCOMPARE:
16745 		sd_sense_key_fail_command(un, bp, xp, pktp);
16746 		break;
16747 	case KEY_BLANK_CHECK:
16748 		sd_sense_key_blank_check(un, bp, xp, pktp);
16749 		break;
16750 	case KEY_ABORTED_COMMAND:
16751 		sd_sense_key_aborted_command(un, bp, xp, pktp);
16752 		break;
16753 	case KEY_VENDOR_UNIQUE:
16754 	case KEY_COPY_ABORTED:
16755 	case KEY_EQUAL:
16756 	case KEY_RESERVED:
16757 	default:
16758 		sd_sense_key_default(un, sense_key, bp, xp, pktp);
16759 		break;
16760 	}
16761 }
16762 
16763 
16764 /*
16765  *    Function: sd_dump_memory
16766  *
16767  * Description: Debug logging routine to print the contents of a user provided
16768  *		buffer. The output of the buffer is broken up into 256 byte
16769  *		segments due to a size constraint of the scsi_log.
16770  *		implementation.
16771  *
16772  *   Arguments: un - ptr to softstate
16773  *		comp - component mask
16774  *		title - "title" string to preceed data when printed
16775  *		data - ptr to data block to be printed
16776  *		len - size of data block to be printed
16777  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
16778  *
16779  *     Context: May be called from interrupt context
16780  */
16781 
16782 #define	SD_DUMP_MEMORY_BUF_SIZE	256
16783 
16784 static char *sd_dump_format_string[] = {
16785 		" 0x%02x",
16786 		" %c"
16787 };
16788 
16789 static void
16790 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
16791     int len, int fmt)
16792 {
16793 	int	i, j;
16794 	int	avail_count;
16795 	int	start_offset;
16796 	int	end_offset;
16797 	size_t	entry_len;
16798 	char	*bufp;
16799 	char	*local_buf;
16800 	char	*format_string;
16801 
16802 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
16803 
16804 	/*
16805 	 * In the debug version of the driver, this function is called from a
16806 	 * number of places which are NOPs in the release driver.
16807 	 * The debug driver therefore has additional methods of filtering
16808 	 * debug output.
16809 	 */
16810 #ifdef SDDEBUG
16811 	/*
16812 	 * In the debug version of the driver we can reduce the amount of debug
16813 	 * messages by setting sd_error_level to something other than
16814 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
16815 	 * sd_component_mask.
16816 	 */
16817 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
16818 	    (sd_error_level != SCSI_ERR_ALL)) {
16819 		return;
16820 	}
16821 	if (((sd_component_mask & comp) == 0) ||
16822 	    (sd_error_level != SCSI_ERR_ALL)) {
16823 		return;
16824 	}
16825 #else
16826 	if (sd_error_level != SCSI_ERR_ALL) {
16827 		return;
16828 	}
16829 #endif
16830 
16831 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
16832 	bufp = local_buf;
16833 	/*
16834 	 * Available length is the length of local_buf[], minus the
16835 	 * length of the title string, minus one for the ":", minus
16836 	 * one for the newline, minus one for the NULL terminator.
16837 	 * This gives the #bytes available for holding the printed
16838 	 * values from the given data buffer.
16839 	 */
16840 	if (fmt == SD_LOG_HEX) {
16841 		format_string = sd_dump_format_string[0];
16842 	} else /* SD_LOG_CHAR */ {
16843 		format_string = sd_dump_format_string[1];
16844 	}
16845 	/*
16846 	 * Available count is the number of elements from the given
16847 	 * data buffer that we can fit into the available length.
16848 	 * This is based upon the size of the format string used.
16849 	 * Make one entry and find it's size.
16850 	 */
16851 	(void) sprintf(bufp, format_string, data[0]);
16852 	entry_len = strlen(bufp);
16853 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
16854 
16855 	j = 0;
16856 	while (j < len) {
16857 		bufp = local_buf;
16858 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
16859 		start_offset = j;
16860 
16861 		end_offset = start_offset + avail_count;
16862 
16863 		(void) sprintf(bufp, "%s:", title);
16864 		bufp += strlen(bufp);
16865 		for (i = start_offset; ((i < end_offset) && (j < len));
16866 		    i++, j++) {
16867 			(void) sprintf(bufp, format_string, data[i]);
16868 			bufp += entry_len;
16869 		}
16870 		(void) sprintf(bufp, "\n");
16871 
16872 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
16873 	}
16874 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
16875 }
16876 
16877 /*
16878  *    Function: sd_print_sense_msg
16879  *
16880  * Description: Log a message based upon the given sense data.
16881  *
16882  *   Arguments: un - ptr to associated softstate
16883  *		bp - ptr to buf(9S) for the command
16884  *		arg - ptr to associate sd_sense_info struct
16885  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16886  *			or SD_NO_RETRY_ISSUED
16887  *
16888  *     Context: May be called from interrupt context
16889  */
16890 
16891 static void
16892 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
16893 {
16894 	struct sd_xbuf	*xp;
16895 	struct scsi_pkt	*pktp;
16896 	struct scsi_extended_sense *sensep;
16897 	daddr_t request_blkno;
16898 	diskaddr_t err_blkno;
16899 	int severity;
16900 	int pfa_flag;
16901 	int fixed_format = TRUE;
16902 	extern struct scsi_key_strings scsi_cmds[];
16903 
16904 	ASSERT(un != NULL);
16905 	ASSERT(mutex_owned(SD_MUTEX(un)));
16906 	ASSERT(bp != NULL);
16907 	xp = SD_GET_XBUF(bp);
16908 	ASSERT(xp != NULL);
16909 	pktp = SD_GET_PKTP(bp);
16910 	ASSERT(pktp != NULL);
16911 	ASSERT(arg != NULL);
16912 
16913 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
16914 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
16915 
16916 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
16917 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
16918 		severity = SCSI_ERR_RETRYABLE;
16919 	}
16920 
16921 	/* Use absolute block number for the request block number */
16922 	request_blkno = xp->xb_blkno;
16923 
16924 	/*
16925 	 * Now try to get the error block number from the sense data
16926 	 */
16927 	sensep = (struct scsi_extended_sense *)xp->xb_sense_data;
16928 	switch (sensep->es_code) {
16929 	case CODE_FMT_DESCR_CURRENT:
16930 	case CODE_FMT_DESCR_DEFERRED:
16931 		err_blkno =
16932 		    sd_extract_sense_info_descr(
16933 			(struct scsi_descr_sense_hdr *)sensep);
16934 		fixed_format = FALSE;
16935 		break;
16936 	case CODE_FMT_FIXED_CURRENT:
16937 	case CODE_FMT_FIXED_DEFERRED:
16938 	case CODE_FMT_VENDOR_SPECIFIC:
16939 	default:
16940 		/*
16941 		 * With the es_valid bit set, we assume that the error
16942 		 * blkno is in the sense data.  Also, if xp->xb_blkno is
16943 		 * greater than 0xffffffff then the target *should* have used
16944 		 * a descriptor sense format (or it shouldn't have set
16945 		 * the es_valid bit), and we may as well ignore the
16946 		 * 32-bit value.
16947 		 */
16948 		if ((sensep->es_valid != 0) && (xp->xb_blkno <= 0xffffffff)) {
16949 			err_blkno = (diskaddr_t)
16950 			    ((sensep->es_info_1 << 24) |
16951 			    (sensep->es_info_2 << 16) |
16952 			    (sensep->es_info_3 << 8)  |
16953 			    (sensep->es_info_4));
16954 		} else {
16955 			err_blkno = (diskaddr_t)-1;
16956 		}
16957 		break;
16958 	}
16959 
16960 	if (err_blkno == (diskaddr_t)-1) {
16961 		/*
16962 		 * Without the es_valid bit set (for fixed format) or an
16963 		 * information descriptor (for descriptor format) we cannot
16964 		 * be certain of the error blkno, so just use the
16965 		 * request_blkno.
16966 		 */
16967 		err_blkno = (diskaddr_t)request_blkno;
16968 	} else {
16969 		/*
16970 		 * We retrieved the error block number from the information
16971 		 * portion of the sense data.
16972 		 *
16973 		 * For USCSI commands we are better off using the error
16974 		 * block no. as the requested block no. (This is the best
16975 		 * we can estimate.)
16976 		 */
16977 		if ((SD_IS_BUFIO(xp) == FALSE) &&
16978 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
16979 			request_blkno = err_blkno;
16980 		}
16981 	}
16982 
16983 	/*
16984 	 * The following will log the buffer contents for the release driver
16985 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
16986 	 * level is set to verbose.
16987 	 */
16988 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
16989 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
16990 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
16991 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
16992 
16993 	if (pfa_flag == FALSE) {
16994 		/* This is normally only set for USCSI */
16995 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
16996 			return;
16997 		}
16998 
16999 		if ((SD_IS_BUFIO(xp) == TRUE) &&
17000 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
17001 		    (severity < sd_error_level))) {
17002 			return;
17003 		}
17004 	}
17005 
17006 	/*
17007 	 * If the data is fixed format then check for Sonoma Failover,
17008 	 * and keep a count of how many failed I/O's.  We should not have
17009 	 * to worry about Sonoma returning descriptor format sense data,
17010 	 * and asc/ascq are in a different location in descriptor format.
17011 	 */
17012 	if (fixed_format &&
17013 	    (SD_IS_LSI(un)) && (sensep->es_key == KEY_ILLEGAL_REQUEST) &&
17014 	    (sensep->es_add_code == 0x94) && (sensep->es_qual_code == 0x01)) {
17015 		un->un_sonoma_failure_count++;
17016 		if (un->un_sonoma_failure_count > 1) {
17017 			return;
17018 		}
17019 	}
17020 
17021 	scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
17022 	    request_blkno, err_blkno, scsi_cmds, sensep,
17023 	    un->un_additional_codes, NULL);
17024 }
17025 
17026 /*
17027  *    Function: sd_extract_sense_info_descr
17028  *
17029  * Description: Retrieve "information" field from descriptor format
17030  *              sense data.  Iterates through each sense descriptor
17031  *              looking for the information descriptor and returns
17032  *              the information field from that descriptor.
17033  *
17034  *     Context: May be called from interrupt context
17035  */
17036 
17037 static diskaddr_t
17038 sd_extract_sense_info_descr(struct scsi_descr_sense_hdr *sdsp)
17039 {
17040 	diskaddr_t result;
17041 	uint8_t *descr_offset;
17042 	int valid_sense_length;
17043 	struct scsi_information_sense_descr *isd;
17044 
17045 	/*
17046 	 * Initialize result to -1 indicating there is no information
17047 	 * descriptor
17048 	 */
17049 	result = (diskaddr_t)-1;
17050 
17051 	/*
17052 	 * The first descriptor will immediately follow the header
17053 	 */
17054 	descr_offset = (uint8_t *)(sdsp+1); /* Pointer arithmetic */
17055 
17056 	/*
17057 	 * Calculate the amount of valid sense data
17058 	 */
17059 	valid_sense_length =
17060 	    min((sizeof (struct scsi_descr_sense_hdr) +
17061 	    sdsp->ds_addl_sense_length),
17062 	    SENSE_LENGTH);
17063 
17064 	/*
17065 	 * Iterate through the list of descriptors, stopping when we
17066 	 * run out of sense data
17067 	 */
17068 	while ((descr_offset + sizeof (struct scsi_information_sense_descr)) <=
17069 	    (uint8_t *)sdsp + valid_sense_length) {
17070 		/*
17071 		 * Check if this is an information descriptor.  We can
17072 		 * use the scsi_information_sense_descr structure as a
17073 		 * template sense the first two fields are always the
17074 		 * same
17075 		 */
17076 		isd = (struct scsi_information_sense_descr *)descr_offset;
17077 		if (isd->isd_descr_type == DESCR_INFORMATION) {
17078 			/*
17079 			 * Found an information descriptor.  Copy the
17080 			 * information field.  There will only be one
17081 			 * information descriptor so we can stop looking.
17082 			 */
17083 			result =
17084 			    (((diskaddr_t)isd->isd_information[0] << 56) |
17085 				((diskaddr_t)isd->isd_information[1] << 48) |
17086 				((diskaddr_t)isd->isd_information[2] << 40) |
17087 				((diskaddr_t)isd->isd_information[3] << 32) |
17088 				((diskaddr_t)isd->isd_information[4] << 24) |
17089 				((diskaddr_t)isd->isd_information[5] << 16) |
17090 				((diskaddr_t)isd->isd_information[6] << 8)  |
17091 				((diskaddr_t)isd->isd_information[7]));
17092 			break;
17093 		}
17094 
17095 		/*
17096 		 * Get pointer to the next descriptor.  The "additional
17097 		 * length" field holds the length of the descriptor except
17098 		 * for the "type" and "additional length" fields, so
17099 		 * we need to add 2 to get the total length.
17100 		 */
17101 		descr_offset += (isd->isd_addl_length + 2);
17102 	}
17103 
17104 	return (result);
17105 }
17106 
17107 /*
17108  *    Function: sd_sense_key_no_sense
17109  *
17110  * Description: Recovery action when sense data was not received.
17111  *
17112  *     Context: May be called from interrupt context
17113  */
17114 
17115 static void
17116 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
17117 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17118 {
17119 	struct sd_sense_info	si;
17120 
17121 	ASSERT(un != NULL);
17122 	ASSERT(mutex_owned(SD_MUTEX(un)));
17123 	ASSERT(bp != NULL);
17124 	ASSERT(xp != NULL);
17125 	ASSERT(pktp != NULL);
17126 
17127 	si.ssi_severity = SCSI_ERR_FATAL;
17128 	si.ssi_pfa_flag = FALSE;
17129 
17130 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17131 
17132 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17133 		&si, EIO, (clock_t)0, NULL);
17134 }
17135 
17136 
17137 /*
17138  *    Function: sd_sense_key_recoverable_error
17139  *
17140  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
17141  *
17142  *     Context: May be called from interrupt context
17143  */
17144 
17145 static void
17146 sd_sense_key_recoverable_error(struct sd_lun *un,
17147 	uint8_t asc,
17148 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17149 {
17150 	struct sd_sense_info	si;
17151 
17152 	ASSERT(un != NULL);
17153 	ASSERT(mutex_owned(SD_MUTEX(un)));
17154 	ASSERT(bp != NULL);
17155 	ASSERT(xp != NULL);
17156 	ASSERT(pktp != NULL);
17157 
17158 	/*
17159 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
17160 	 */
17161 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
17162 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17163 		si.ssi_severity = SCSI_ERR_INFO;
17164 		si.ssi_pfa_flag = TRUE;
17165 	} else {
17166 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
17167 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
17168 		si.ssi_severity = SCSI_ERR_RECOVERED;
17169 		si.ssi_pfa_flag = FALSE;
17170 	}
17171 
17172 	if (pktp->pkt_resid == 0) {
17173 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17174 		sd_return_command(un, bp);
17175 		return;
17176 	}
17177 
17178 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17179 	    &si, EIO, (clock_t)0, NULL);
17180 }
17181 
17182 
17183 
17184 
17185 /*
17186  *    Function: sd_sense_key_not_ready
17187  *
17188  * Description: Recovery actions for a SCSI "Not Ready" sense key.
17189  *
17190  *     Context: May be called from interrupt context
17191  */
17192 
17193 static void
17194 sd_sense_key_not_ready(struct sd_lun *un,
17195 	uint8_t asc, uint8_t ascq,
17196 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17197 {
17198 	struct sd_sense_info	si;
17199 
17200 	ASSERT(un != NULL);
17201 	ASSERT(mutex_owned(SD_MUTEX(un)));
17202 	ASSERT(bp != NULL);
17203 	ASSERT(xp != NULL);
17204 	ASSERT(pktp != NULL);
17205 
17206 	si.ssi_severity = SCSI_ERR_FATAL;
17207 	si.ssi_pfa_flag = FALSE;
17208 
17209 	/*
17210 	 * Update error stats after first NOT READY error. Disks may have
17211 	 * been powered down and may need to be restarted.  For CDROMs,
17212 	 * report NOT READY errors only if media is present.
17213 	 */
17214 	if ((ISCD(un) && (un->un_f_geometry_is_valid == TRUE)) ||
17215 	    (xp->xb_retry_count > 0)) {
17216 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17217 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
17218 	}
17219 
17220 	/*
17221 	 * Just fail if the "not ready" retry limit has been reached.
17222 	 */
17223 	if (xp->xb_retry_count >= un->un_notready_retry_count) {
17224 		/* Special check for error message printing for removables. */
17225 		if ((ISREMOVABLE(un)) && (asc == 0x04) &&
17226 		    (ascq >= 0x04)) {
17227 			si.ssi_severity = SCSI_ERR_ALL;
17228 		}
17229 		goto fail_command;
17230 	}
17231 
17232 	/*
17233 	 * Check the ASC and ASCQ in the sense data as needed, to determine
17234 	 * what to do.
17235 	 */
17236 	switch (asc) {
17237 	case 0x04:	/* LOGICAL UNIT NOT READY */
17238 		/*
17239 		 * disk drives that don't spin up result in a very long delay
17240 		 * in format without warning messages. We will log a message
17241 		 * if the error level is set to verbose.
17242 		 */
17243 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17244 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17245 			    "logical unit not ready, resetting disk\n");
17246 		}
17247 
17248 		/*
17249 		 * There are different requirements for CDROMs and disks for
17250 		 * the number of retries.  If a CD-ROM is giving this, it is
17251 		 * probably reading TOC and is in the process of getting
17252 		 * ready, so we should keep on trying for a long time to make
17253 		 * sure that all types of media are taken in account (for
17254 		 * some media the drive takes a long time to read TOC).  For
17255 		 * disks we do not want to retry this too many times as this
17256 		 * can cause a long hang in format when the drive refuses to
17257 		 * spin up (a very common failure).
17258 		 */
17259 		switch (ascq) {
17260 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
17261 			/*
17262 			 * Disk drives frequently refuse to spin up which
17263 			 * results in a very long hang in format without
17264 			 * warning messages.
17265 			 *
17266 			 * Note: This code preserves the legacy behavior of
17267 			 * comparing xb_retry_count against zero for fibre
17268 			 * channel targets instead of comparing against the
17269 			 * un_reset_retry_count value.  The reason for this
17270 			 * discrepancy has been so utterly lost beneath the
17271 			 * Sands of Time that even Indiana Jones could not
17272 			 * find it.
17273 			 */
17274 			if (un->un_f_is_fibre == TRUE) {
17275 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17276 					(xp->xb_retry_count > 0)) &&
17277 					(un->un_startstop_timeid == NULL)) {
17278 					scsi_log(SD_DEVINFO(un), sd_label,
17279 					CE_WARN, "logical unit not ready, "
17280 					"resetting disk\n");
17281 					sd_reset_target(un, pktp);
17282 				}
17283 			} else {
17284 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17285 					(xp->xb_retry_count >
17286 					un->un_reset_retry_count)) &&
17287 					(un->un_startstop_timeid == NULL)) {
17288 					scsi_log(SD_DEVINFO(un), sd_label,
17289 					CE_WARN, "logical unit not ready, "
17290 					"resetting disk\n");
17291 					sd_reset_target(un, pktp);
17292 				}
17293 			}
17294 			break;
17295 
17296 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
17297 			/*
17298 			 * If the target is in the process of becoming
17299 			 * ready, just proceed with the retry. This can
17300 			 * happen with CD-ROMs that take a long time to
17301 			 * read TOC after a power cycle or reset.
17302 			 */
17303 			goto do_retry;
17304 
17305 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
17306 			break;
17307 
17308 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
17309 			/*
17310 			 * Retries cannot help here so just fail right away.
17311 			 */
17312 			goto fail_command;
17313 
17314 		case 0x88:
17315 			/*
17316 			 * Vendor-unique code for T3/T4: it indicates a
17317 			 * path problem in a mutipathed config, but as far as
17318 			 * the target driver is concerned it equates to a fatal
17319 			 * error, so we should just fail the command right away
17320 			 * (without printing anything to the console). If this
17321 			 * is not a T3/T4, fall thru to the default recovery
17322 			 * action.
17323 			 * T3/T4 is FC only, don't need to check is_fibre
17324 			 */
17325 			if (SD_IS_T3(un) || SD_IS_T4(un)) {
17326 				sd_return_failed_command(un, bp, EIO);
17327 				return;
17328 			}
17329 			/* FALLTHRU */
17330 
17331 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
17332 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
17333 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
17334 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
17335 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
17336 		default:    /* Possible future codes in SCSI spec? */
17337 			/*
17338 			 * For removable-media devices, do not retry if
17339 			 * ASCQ > 2 as these result mostly from USCSI commands
17340 			 * on MMC devices issued to check status of an
17341 			 * operation initiated in immediate mode.  Also for
17342 			 * ASCQ >= 4 do not print console messages as these
17343 			 * mainly represent a user-initiated operation
17344 			 * instead of a system failure.
17345 			 */
17346 			if (ISREMOVABLE(un)) {
17347 				si.ssi_severity = SCSI_ERR_ALL;
17348 				goto fail_command;
17349 			}
17350 			break;
17351 		}
17352 
17353 		/*
17354 		 * As part of our recovery attempt for the NOT READY
17355 		 * condition, we issue a START STOP UNIT command. However
17356 		 * we want to wait for a short delay before attempting this
17357 		 * as there may still be more commands coming back from the
17358 		 * target with the check condition. To do this we use
17359 		 * timeout(9F) to call sd_start_stop_unit_callback() after
17360 		 * the delay interval expires. (sd_start_stop_unit_callback()
17361 		 * dispatches sd_start_stop_unit_task(), which will issue
17362 		 * the actual START STOP UNIT command. The delay interval
17363 		 * is one-half of the delay that we will use to retry the
17364 		 * command that generated the NOT READY condition.
17365 		 *
17366 		 * Note that we could just dispatch sd_start_stop_unit_task()
17367 		 * from here and allow it to sleep for the delay interval,
17368 		 * but then we would be tying up the taskq thread
17369 		 * uncesessarily for the duration of the delay.
17370 		 *
17371 		 * Do not issue the START STOP UNIT if the current command
17372 		 * is already a START STOP UNIT.
17373 		 */
17374 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
17375 			break;
17376 		}
17377 
17378 		/*
17379 		 * Do not schedule the timeout if one is already pending.
17380 		 */
17381 		if (un->un_startstop_timeid != NULL) {
17382 			SD_INFO(SD_LOG_ERROR, un,
17383 			    "sd_sense_key_not_ready: restart already issued to"
17384 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
17385 			    ddi_get_instance(SD_DEVINFO(un)));
17386 			break;
17387 		}
17388 
17389 		/*
17390 		 * Schedule the START STOP UNIT command, then queue the command
17391 		 * for a retry.
17392 		 *
17393 		 * Note: A timeout is not scheduled for this retry because we
17394 		 * want the retry to be serial with the START_STOP_UNIT. The
17395 		 * retry will be started when the START_STOP_UNIT is completed
17396 		 * in sd_start_stop_unit_task.
17397 		 */
17398 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
17399 		    un, SD_BSY_TIMEOUT / 2);
17400 		xp->xb_retry_count++;
17401 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
17402 		return;
17403 
17404 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
17405 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17406 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17407 			    "unit does not respond to selection\n");
17408 		}
17409 		break;
17410 
17411 	case 0x3A:	/* MEDIUM NOT PRESENT */
17412 		if (sd_error_level >= SCSI_ERR_FATAL) {
17413 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17414 			    "Caddy not inserted in drive\n");
17415 		}
17416 
17417 		sr_ejected(un);
17418 		un->un_mediastate = DKIO_EJECTED;
17419 		/* The state has changed, inform the media watch routines */
17420 		cv_broadcast(&un->un_state_cv);
17421 		/* Just fail if no media is present in the drive. */
17422 		goto fail_command;
17423 
17424 	default:
17425 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17426 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
17427 			    "Unit not Ready. Additional sense code 0x%x\n",
17428 			    asc);
17429 		}
17430 		break;
17431 	}
17432 
17433 do_retry:
17434 
17435 	/*
17436 	 * Retry the command, as some targets may report NOT READY for
17437 	 * several seconds after being reset.
17438 	 */
17439 	xp->xb_retry_count++;
17440 	si.ssi_severity = SCSI_ERR_RETRYABLE;
17441 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17442 	    &si, EIO, SD_BSY_TIMEOUT, NULL);
17443 
17444 	return;
17445 
17446 fail_command:
17447 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17448 	sd_return_failed_command(un, bp, EIO);
17449 }
17450 
17451 
17452 
17453 /*
17454  *    Function: sd_sense_key_medium_or_hardware_error
17455  *
17456  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
17457  *		sense key.
17458  *
17459  *     Context: May be called from interrupt context
17460  */
17461 
17462 static void
17463 sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
17464 	int sense_key, uint8_t asc,
17465 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17466 {
17467 	struct sd_sense_info	si;
17468 
17469 	ASSERT(un != NULL);
17470 	ASSERT(mutex_owned(SD_MUTEX(un)));
17471 	ASSERT(bp != NULL);
17472 	ASSERT(xp != NULL);
17473 	ASSERT(pktp != NULL);
17474 
17475 	si.ssi_severity = SCSI_ERR_FATAL;
17476 	si.ssi_pfa_flag = FALSE;
17477 
17478 	if (sense_key == KEY_MEDIUM_ERROR) {
17479 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
17480 	}
17481 
17482 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17483 
17484 	if ((un->un_reset_retry_count != 0) &&
17485 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
17486 		mutex_exit(SD_MUTEX(un));
17487 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
17488 		if (un->un_f_allow_bus_device_reset == TRUE) {
17489 
17490 			boolean_t try_resetting_target = B_TRUE;
17491 
17492 			/*
17493 			 * We need to be able to handle specific ASC when we are
17494 			 * handling a KEY_HARDWARE_ERROR. In particular
17495 			 * taking the default action of resetting the target may
17496 			 * not be the appropriate way to attempt recovery.
17497 			 * Resetting a target because of a single LUN failure
17498 			 * victimizes all LUNs on that target.
17499 			 *
17500 			 * This is true for the LSI arrays, if an LSI
17501 			 * array controller returns an ASC of 0x84 (LUN Dead) we
17502 			 * should trust it.
17503 			 */
17504 
17505 			if (sense_key == KEY_HARDWARE_ERROR) {
17506 				switch (asc) {
17507 				case 0x84:
17508 					if (SD_IS_LSI(un)) {
17509 						try_resetting_target = B_FALSE;
17510 					}
17511 					break;
17512 				default:
17513 					break;
17514 				}
17515 			}
17516 
17517 			if (try_resetting_target == B_TRUE) {
17518 				int reset_retval = 0;
17519 				if (un->un_f_lun_reset_enabled == TRUE) {
17520 					SD_TRACE(SD_LOG_IO_CORE, un,
17521 					    "sd_sense_key_medium_or_hardware_"
17522 					    "error: issuing RESET_LUN\n");
17523 					reset_retval =
17524 					    scsi_reset(SD_ADDRESS(un),
17525 					    RESET_LUN);
17526 				}
17527 				if (reset_retval == 0) {
17528 					SD_TRACE(SD_LOG_IO_CORE, un,
17529 					    "sd_sense_key_medium_or_hardware_"
17530 					    "error: issuing RESET_TARGET\n");
17531 					(void) scsi_reset(SD_ADDRESS(un),
17532 					    RESET_TARGET);
17533 				}
17534 			}
17535 		}
17536 		mutex_enter(SD_MUTEX(un));
17537 	}
17538 
17539 	/*
17540 	 * This really ought to be a fatal error, but we will retry anyway
17541 	 * as some drives report this as a spurious error.
17542 	 */
17543 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17544 	    &si, EIO, (clock_t)0, NULL);
17545 }
17546 
17547 
17548 
17549 /*
17550  *    Function: sd_sense_key_illegal_request
17551  *
17552  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
17553  *
17554  *     Context: May be called from interrupt context
17555  */
17556 
17557 static void
17558 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
17559 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17560 {
17561 	struct sd_sense_info	si;
17562 
17563 	ASSERT(un != NULL);
17564 	ASSERT(mutex_owned(SD_MUTEX(un)));
17565 	ASSERT(bp != NULL);
17566 	ASSERT(xp != NULL);
17567 	ASSERT(pktp != NULL);
17568 
17569 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17570 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
17571 
17572 	si.ssi_severity = SCSI_ERR_INFO;
17573 	si.ssi_pfa_flag = FALSE;
17574 
17575 	/* Pointless to retry if the target thinks it's an illegal request */
17576 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17577 	sd_return_failed_command(un, bp, EIO);
17578 }
17579 
17580 
17581 
17582 
17583 /*
17584  *    Function: sd_sense_key_unit_attention
17585  *
17586  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
17587  *
17588  *     Context: May be called from interrupt context
17589  */
17590 
17591 static void
17592 sd_sense_key_unit_attention(struct sd_lun *un,
17593 	uint8_t asc,
17594 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17595 {
17596 	/*
17597 	 * For UNIT ATTENTION we allow retries for one minute. Devices
17598 	 * like Sonoma can return UNIT ATTENTION close to a minute
17599 	 * under certain conditions.
17600 	 */
17601 	int	retry_check_flag = SD_RETRIES_UA;
17602 	struct	sd_sense_info		si;
17603 
17604 	ASSERT(un != NULL);
17605 	ASSERT(mutex_owned(SD_MUTEX(un)));
17606 	ASSERT(bp != NULL);
17607 	ASSERT(xp != NULL);
17608 	ASSERT(pktp != NULL);
17609 
17610 	si.ssi_severity = SCSI_ERR_INFO;
17611 	si.ssi_pfa_flag = FALSE;
17612 
17613 
17614 	switch (asc) {
17615 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
17616 		if (sd_report_pfa != 0) {
17617 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17618 			si.ssi_pfa_flag = TRUE;
17619 			retry_check_flag = SD_RETRIES_STANDARD;
17620 			goto do_retry;
17621 		}
17622 		break;
17623 
17624 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
17625 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
17626 			un->un_resvd_status |=
17627 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
17628 		}
17629 		/* FALLTHRU */
17630 
17631 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
17632 		if (!ISREMOVABLE(un)) {
17633 			break;
17634 		}
17635 
17636 		/*
17637 		 * When we get a unit attention from a removable-media device,
17638 		 * it may be in a state that will take a long time to recover
17639 		 * (e.g., from a reset).  Since we are executing in interrupt
17640 		 * context here, we cannot wait around for the device to come
17641 		 * back. So hand this command off to sd_media_change_task()
17642 		 * for deferred processing under taskq thread context. (Note
17643 		 * that the command still may be failed if a problem is
17644 		 * encountered at a later time.)
17645 		 */
17646 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
17647 		    KM_NOSLEEP) == 0) {
17648 			/*
17649 			 * Cannot dispatch the request so fail the command.
17650 			 */
17651 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
17652 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17653 			si.ssi_severity = SCSI_ERR_FATAL;
17654 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17655 			sd_return_failed_command(un, bp, EIO);
17656 		}
17657 		/*
17658 		 * Either the command has been successfully dispatched to a
17659 		 * task Q for retrying, or the dispatch failed. In either case
17660 		 * do NOT retry again by calling sd_retry_command. This sets up
17661 		 * two retries of the same command and when one completes and
17662 		 * frees the resources the other will access freed memory,
17663 		 * a bad thing.
17664 		 */
17665 		return;
17666 
17667 	default:
17668 		break;
17669 	}
17670 
17671 	if (!ISREMOVABLE(un)) {
17672 		/*
17673 		 * Do not update these here for removables. For removables
17674 		 * these stats are updated (1) above if we failed to dispatch
17675 		 * sd_media_change_task(), or (2) sd_media_change_task() may
17676 		 * update these later if it encounters an error.
17677 		 */
17678 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17679 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17680 	}
17681 
17682 do_retry:
17683 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
17684 	    EIO, SD_UA_RETRY_DELAY, NULL);
17685 }
17686 
17687 
17688 
17689 /*
17690  *    Function: sd_sense_key_fail_command
17691  *
17692  * Description: Use to fail a command when we don't like the sense key that
17693  *		was returned.
17694  *
17695  *     Context: May be called from interrupt context
17696  */
17697 
17698 static void
17699 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
17700 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17701 {
17702 	struct sd_sense_info	si;
17703 
17704 	ASSERT(un != NULL);
17705 	ASSERT(mutex_owned(SD_MUTEX(un)));
17706 	ASSERT(bp != NULL);
17707 	ASSERT(xp != NULL);
17708 	ASSERT(pktp != NULL);
17709 
17710 	si.ssi_severity = SCSI_ERR_FATAL;
17711 	si.ssi_pfa_flag = FALSE;
17712 
17713 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17714 	sd_return_failed_command(un, bp, EIO);
17715 }
17716 
17717 
17718 
17719 /*
17720  *    Function: sd_sense_key_blank_check
17721  *
17722  * Description: Recovery actions for a SCSI "Blank Check" sense key.
17723  *		Has no monetary connotation.
17724  *
17725  *     Context: May be called from interrupt context
17726  */
17727 
17728 static void
17729 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
17730 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17731 {
17732 	struct sd_sense_info	si;
17733 
17734 	ASSERT(un != NULL);
17735 	ASSERT(mutex_owned(SD_MUTEX(un)));
17736 	ASSERT(bp != NULL);
17737 	ASSERT(xp != NULL);
17738 	ASSERT(pktp != NULL);
17739 
17740 	/*
17741 	 * Blank check is not fatal for removable devices, therefore
17742 	 * it does not require a console message.
17743 	 */
17744 	si.ssi_severity = (ISREMOVABLE(un)) ? SCSI_ERR_ALL : SCSI_ERR_FATAL;
17745 	si.ssi_pfa_flag = FALSE;
17746 
17747 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17748 	sd_return_failed_command(un, bp, EIO);
17749 }
17750 
17751 
17752 
17753 
17754 /*
17755  *    Function: sd_sense_key_aborted_command
17756  *
17757  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
17758  *
17759  *     Context: May be called from interrupt context
17760  */
17761 
17762 static void
17763 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
17764 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17765 {
17766 	struct sd_sense_info	si;
17767 
17768 	ASSERT(un != NULL);
17769 	ASSERT(mutex_owned(SD_MUTEX(un)));
17770 	ASSERT(bp != NULL);
17771 	ASSERT(xp != NULL);
17772 	ASSERT(pktp != NULL);
17773 
17774 	si.ssi_severity = SCSI_ERR_FATAL;
17775 	si.ssi_pfa_flag = FALSE;
17776 
17777 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17778 
17779 	/*
17780 	 * This really ought to be a fatal error, but we will retry anyway
17781 	 * as some drives report this as a spurious error.
17782 	 */
17783 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17784 	    &si, EIO, (clock_t)0, NULL);
17785 }
17786 
17787 
17788 
17789 /*
17790  *    Function: sd_sense_key_default
17791  *
17792  * Description: Default recovery action for several SCSI sense keys (basically
17793  *		attempts a retry).
17794  *
17795  *     Context: May be called from interrupt context
17796  */
17797 
17798 static void
17799 sd_sense_key_default(struct sd_lun *un,
17800 	int sense_key,
17801 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17802 {
17803 	struct sd_sense_info	si;
17804 
17805 	ASSERT(un != NULL);
17806 	ASSERT(mutex_owned(SD_MUTEX(un)));
17807 	ASSERT(bp != NULL);
17808 	ASSERT(xp != NULL);
17809 	ASSERT(pktp != NULL);
17810 
17811 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17812 
17813 	/*
17814 	 * Undecoded sense key.	Attempt retries and hope that will fix
17815 	 * the problem.  Otherwise, we're dead.
17816 	 */
17817 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17818 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17819 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
17820 	}
17821 
17822 	si.ssi_severity = SCSI_ERR_FATAL;
17823 	si.ssi_pfa_flag = FALSE;
17824 
17825 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17826 	    &si, EIO, (clock_t)0, NULL);
17827 }
17828 
17829 
17830 
17831 /*
17832  *    Function: sd_print_retry_msg
17833  *
17834  * Description: Print a message indicating the retry action being taken.
17835  *
17836  *   Arguments: un - ptr to associated softstate
17837  *		bp - ptr to buf(9S) for the command
17838  *		arg - not used.
17839  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17840  *			or SD_NO_RETRY_ISSUED
17841  *
17842  *     Context: May be called from interrupt context
17843  */
17844 /* ARGSUSED */
17845 static void
17846 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
17847 {
17848 	struct sd_xbuf	*xp;
17849 	struct scsi_pkt *pktp;
17850 	char *reasonp;
17851 	char *msgp;
17852 
17853 	ASSERT(un != NULL);
17854 	ASSERT(mutex_owned(SD_MUTEX(un)));
17855 	ASSERT(bp != NULL);
17856 	pktp = SD_GET_PKTP(bp);
17857 	ASSERT(pktp != NULL);
17858 	xp = SD_GET_XBUF(bp);
17859 	ASSERT(xp != NULL);
17860 
17861 	ASSERT(!mutex_owned(&un->un_pm_mutex));
17862 	mutex_enter(&un->un_pm_mutex);
17863 	if ((un->un_state == SD_STATE_SUSPENDED) ||
17864 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
17865 	    (pktp->pkt_flags & FLAG_SILENT)) {
17866 		mutex_exit(&un->un_pm_mutex);
17867 		goto update_pkt_reason;
17868 	}
17869 	mutex_exit(&un->un_pm_mutex);
17870 
17871 	/*
17872 	 * Suppress messages if they are all the same pkt_reason; with
17873 	 * TQ, many (up to 256) are returned with the same pkt_reason.
17874 	 * If we are in panic, then suppress the retry messages.
17875 	 */
17876 	switch (flag) {
17877 	case SD_NO_RETRY_ISSUED:
17878 		msgp = "giving up";
17879 		break;
17880 	case SD_IMMEDIATE_RETRY_ISSUED:
17881 	case SD_DELAYED_RETRY_ISSUED:
17882 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
17883 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
17884 		    (sd_error_level != SCSI_ERR_ALL))) {
17885 			return;
17886 		}
17887 		msgp = "retrying command";
17888 		break;
17889 	default:
17890 		goto update_pkt_reason;
17891 	}
17892 
17893 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
17894 	    scsi_rname(pktp->pkt_reason));
17895 
17896 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17897 	    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
17898 
17899 update_pkt_reason:
17900 	/*
17901 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
17902 	 * This is to prevent multiple console messages for the same failure
17903 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
17904 	 * when the command is retried successfully because there still may be
17905 	 * more commands coming back with the same value of pktp->pkt_reason.
17906 	 */
17907 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
17908 		un->un_last_pkt_reason = pktp->pkt_reason;
17909 	}
17910 }
17911 
17912 
17913 /*
17914  *    Function: sd_print_cmd_incomplete_msg
17915  *
17916  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
17917  *
17918  *   Arguments: un - ptr to associated softstate
17919  *		bp - ptr to buf(9S) for the command
17920  *		arg - passed to sd_print_retry_msg()
17921  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17922  *			or SD_NO_RETRY_ISSUED
17923  *
17924  *     Context: May be called from interrupt context
17925  */
17926 
17927 static void
17928 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
17929 	int code)
17930 {
17931 	dev_info_t	*dip;
17932 
17933 	ASSERT(un != NULL);
17934 	ASSERT(mutex_owned(SD_MUTEX(un)));
17935 	ASSERT(bp != NULL);
17936 
17937 	switch (code) {
17938 	case SD_NO_RETRY_ISSUED:
17939 		/* Command was failed. Someone turned off this target? */
17940 		if (un->un_state != SD_STATE_OFFLINE) {
17941 			/*
17942 			 * Suppress message if we are detaching and
17943 			 * device has been disconnected
17944 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
17945 			 * private interface and not part of the DDI
17946 			 */
17947 			dip = un->un_sd->sd_dev;
17948 			if (!(DEVI_IS_DETACHING(dip) &&
17949 			    DEVI_IS_DEVICE_REMOVED(dip))) {
17950 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17951 				"disk not responding to selection\n");
17952 			}
17953 			New_state(un, SD_STATE_OFFLINE);
17954 		}
17955 		break;
17956 
17957 	case SD_DELAYED_RETRY_ISSUED:
17958 	case SD_IMMEDIATE_RETRY_ISSUED:
17959 	default:
17960 		/* Command was successfully queued for retry */
17961 		sd_print_retry_msg(un, bp, arg, code);
17962 		break;
17963 	}
17964 }
17965 
17966 
17967 /*
17968  *    Function: sd_pkt_reason_cmd_incomplete
17969  *
17970  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
17971  *
17972  *     Context: May be called from interrupt context
17973  */
17974 
17975 static void
17976 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
17977 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17978 {
17979 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
17980 
17981 	ASSERT(un != NULL);
17982 	ASSERT(mutex_owned(SD_MUTEX(un)));
17983 	ASSERT(bp != NULL);
17984 	ASSERT(xp != NULL);
17985 	ASSERT(pktp != NULL);
17986 
17987 	/* Do not do a reset if selection did not complete */
17988 	/* Note: Should this not just check the bit? */
17989 	if (pktp->pkt_state != STATE_GOT_BUS) {
17990 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
17991 		sd_reset_target(un, pktp);
17992 	}
17993 
17994 	/*
17995 	 * If the target was not successfully selected, then set
17996 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
17997 	 * with the target, and further retries and/or commands are
17998 	 * likely to take a long time.
17999 	 */
18000 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
18001 		flag |= SD_RETRIES_FAILFAST;
18002 	}
18003 
18004 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18005 
18006 	sd_retry_command(un, bp, flag,
18007 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18008 }
18009 
18010 
18011 
18012 /*
18013  *    Function: sd_pkt_reason_cmd_tran_err
18014  *
18015  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
18016  *
18017  *     Context: May be called from interrupt context
18018  */
18019 
18020 static void
18021 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
18022 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18023 {
18024 	ASSERT(un != NULL);
18025 	ASSERT(mutex_owned(SD_MUTEX(un)));
18026 	ASSERT(bp != NULL);
18027 	ASSERT(xp != NULL);
18028 	ASSERT(pktp != NULL);
18029 
18030 	/*
18031 	 * Do not reset if we got a parity error, or if
18032 	 * selection did not complete.
18033 	 */
18034 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18035 	/* Note: Should this not just check the bit for pkt_state? */
18036 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
18037 	    (pktp->pkt_state != STATE_GOT_BUS)) {
18038 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18039 		sd_reset_target(un, pktp);
18040 	}
18041 
18042 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18043 
18044 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18045 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18046 }
18047 
18048 
18049 
18050 /*
18051  *    Function: sd_pkt_reason_cmd_reset
18052  *
18053  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
18054  *
18055  *     Context: May be called from interrupt context
18056  */
18057 
18058 static void
18059 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
18060 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18061 {
18062 	ASSERT(un != NULL);
18063 	ASSERT(mutex_owned(SD_MUTEX(un)));
18064 	ASSERT(bp != NULL);
18065 	ASSERT(xp != NULL);
18066 	ASSERT(pktp != NULL);
18067 
18068 	/* The target may still be running the command, so try to reset. */
18069 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18070 	sd_reset_target(un, pktp);
18071 
18072 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18073 
18074 	/*
18075 	 * If pkt_reason is CMD_RESET chances are that this pkt got
18076 	 * reset because another target on this bus caused it. The target
18077 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18078 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18079 	 */
18080 
18081 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18082 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18083 }
18084 
18085 
18086 
18087 
18088 /*
18089  *    Function: sd_pkt_reason_cmd_aborted
18090  *
18091  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
18092  *
18093  *     Context: May be called from interrupt context
18094  */
18095 
18096 static void
18097 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
18098 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18099 {
18100 	ASSERT(un != NULL);
18101 	ASSERT(mutex_owned(SD_MUTEX(un)));
18102 	ASSERT(bp != NULL);
18103 	ASSERT(xp != NULL);
18104 	ASSERT(pktp != NULL);
18105 
18106 	/* The target may still be running the command, so try to reset. */
18107 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18108 	sd_reset_target(un, pktp);
18109 
18110 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18111 
18112 	/*
18113 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
18114 	 * aborted because another target on this bus caused it. The target
18115 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18116 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18117 	 */
18118 
18119 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18120 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18121 }
18122 
18123 
18124 
18125 /*
18126  *    Function: sd_pkt_reason_cmd_timeout
18127  *
18128  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
18129  *
18130  *     Context: May be called from interrupt context
18131  */
18132 
18133 static void
18134 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
18135 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18136 {
18137 	ASSERT(un != NULL);
18138 	ASSERT(mutex_owned(SD_MUTEX(un)));
18139 	ASSERT(bp != NULL);
18140 	ASSERT(xp != NULL);
18141 	ASSERT(pktp != NULL);
18142 
18143 
18144 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18145 	sd_reset_target(un, pktp);
18146 
18147 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18148 
18149 	/*
18150 	 * A command timeout indicates that we could not establish
18151 	 * communication with the target, so set SD_RETRIES_FAILFAST
18152 	 * as further retries/commands are likely to take a long time.
18153 	 */
18154 	sd_retry_command(un, bp,
18155 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
18156 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18157 }
18158 
18159 
18160 
18161 /*
18162  *    Function: sd_pkt_reason_cmd_unx_bus_free
18163  *
18164  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
18165  *
18166  *     Context: May be called from interrupt context
18167  */
18168 
18169 static void
18170 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
18171 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18172 {
18173 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
18174 
18175 	ASSERT(un != NULL);
18176 	ASSERT(mutex_owned(SD_MUTEX(un)));
18177 	ASSERT(bp != NULL);
18178 	ASSERT(xp != NULL);
18179 	ASSERT(pktp != NULL);
18180 
18181 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18182 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18183 
18184 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
18185 	    sd_print_retry_msg : NULL;
18186 
18187 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18188 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18189 }
18190 
18191 
18192 /*
18193  *    Function: sd_pkt_reason_cmd_tag_reject
18194  *
18195  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
18196  *
18197  *     Context: May be called from interrupt context
18198  */
18199 
18200 static void
18201 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
18202 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18203 {
18204 	ASSERT(un != NULL);
18205 	ASSERT(mutex_owned(SD_MUTEX(un)));
18206 	ASSERT(bp != NULL);
18207 	ASSERT(xp != NULL);
18208 	ASSERT(pktp != NULL);
18209 
18210 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18211 	pktp->pkt_flags = 0;
18212 	un->un_tagflags = 0;
18213 	if (un->un_f_opt_queueing == TRUE) {
18214 		un->un_throttle = min(un->un_throttle, 3);
18215 	} else {
18216 		un->un_throttle = 1;
18217 	}
18218 	mutex_exit(SD_MUTEX(un));
18219 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
18220 	mutex_enter(SD_MUTEX(un));
18221 
18222 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18223 
18224 	/* Legacy behavior not to check retry counts here. */
18225 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
18226 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18227 }
18228 
18229 
18230 /*
18231  *    Function: sd_pkt_reason_default
18232  *
18233  * Description: Default recovery actions for SCSA pkt_reason values that
18234  *		do not have more explicit recovery actions.
18235  *
18236  *     Context: May be called from interrupt context
18237  */
18238 
18239 static void
18240 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
18241 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18242 {
18243 	ASSERT(un != NULL);
18244 	ASSERT(mutex_owned(SD_MUTEX(un)));
18245 	ASSERT(bp != NULL);
18246 	ASSERT(xp != NULL);
18247 	ASSERT(pktp != NULL);
18248 
18249 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18250 	sd_reset_target(un, pktp);
18251 
18252 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18253 
18254 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18255 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18256 }
18257 
18258 
18259 
18260 /*
18261  *    Function: sd_pkt_status_check_condition
18262  *
18263  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
18264  *
18265  *     Context: May be called from interrupt context
18266  */
18267 
18268 static void
18269 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
18270 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18271 {
18272 	ASSERT(un != NULL);
18273 	ASSERT(mutex_owned(SD_MUTEX(un)));
18274 	ASSERT(bp != NULL);
18275 	ASSERT(xp != NULL);
18276 	ASSERT(pktp != NULL);
18277 
18278 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
18279 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
18280 
18281 	/*
18282 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
18283 	 * command will be retried after the request sense). Otherwise, retry
18284 	 * the command. Note: we are issuing the request sense even though the
18285 	 * retry limit may have been reached for the failed command.
18286 	 */
18287 	if (un->un_f_arq_enabled == FALSE) {
18288 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18289 		    "no ARQ, sending request sense command\n");
18290 		sd_send_request_sense_command(un, bp, pktp);
18291 	} else {
18292 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18293 		    "ARQ,retrying request sense command\n");
18294 #if defined(__i386) || defined(__amd64)
18295 		/*
18296 		 * The SD_RETRY_DELAY value need to be adjusted here
18297 		 * when SD_RETRY_DELAY change in sddef.h
18298 		 */
18299 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, 0,
18300 			un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
18301 			NULL);
18302 #else
18303 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
18304 		    0, SD_RETRY_DELAY, NULL);
18305 #endif
18306 	}
18307 
18308 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
18309 }
18310 
18311 
18312 /*
18313  *    Function: sd_pkt_status_busy
18314  *
18315  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
18316  *
18317  *     Context: May be called from interrupt context
18318  */
18319 
18320 static void
18321 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18322 	struct scsi_pkt *pktp)
18323 {
18324 	ASSERT(un != NULL);
18325 	ASSERT(mutex_owned(SD_MUTEX(un)));
18326 	ASSERT(bp != NULL);
18327 	ASSERT(xp != NULL);
18328 	ASSERT(pktp != NULL);
18329 
18330 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18331 	    "sd_pkt_status_busy: entry\n");
18332 
18333 	/* If retries are exhausted, just fail the command. */
18334 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
18335 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18336 		    "device busy too long\n");
18337 		sd_return_failed_command(un, bp, EIO);
18338 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18339 		    "sd_pkt_status_busy: exit\n");
18340 		return;
18341 	}
18342 	xp->xb_retry_count++;
18343 
18344 	/*
18345 	 * Try to reset the target. However, we do not want to perform
18346 	 * more than one reset if the device continues to fail. The reset
18347 	 * will be performed when the retry count reaches the reset
18348 	 * threshold.  This threshold should be set such that at least
18349 	 * one retry is issued before the reset is performed.
18350 	 */
18351 	if (xp->xb_retry_count ==
18352 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
18353 		int rval = 0;
18354 		mutex_exit(SD_MUTEX(un));
18355 		if (un->un_f_allow_bus_device_reset == TRUE) {
18356 			/*
18357 			 * First try to reset the LUN; if we cannot then
18358 			 * try to reset the target.
18359 			 */
18360 			if (un->un_f_lun_reset_enabled == TRUE) {
18361 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18362 				    "sd_pkt_status_busy: RESET_LUN\n");
18363 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18364 			}
18365 			if (rval == 0) {
18366 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18367 				    "sd_pkt_status_busy: RESET_TARGET\n");
18368 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18369 			}
18370 		}
18371 		if (rval == 0) {
18372 			/*
18373 			 * If the RESET_LUN and/or RESET_TARGET failed,
18374 			 * try RESET_ALL
18375 			 */
18376 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18377 			    "sd_pkt_status_busy: RESET_ALL\n");
18378 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
18379 		}
18380 		mutex_enter(SD_MUTEX(un));
18381 		if (rval == 0) {
18382 			/*
18383 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
18384 			 * At this point we give up & fail the command.
18385 			 */
18386 			sd_return_failed_command(un, bp, EIO);
18387 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18388 			    "sd_pkt_status_busy: exit (failed cmd)\n");
18389 			return;
18390 		}
18391 	}
18392 
18393 	/*
18394 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
18395 	 * we have already checked the retry counts above.
18396 	 */
18397 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
18398 	    EIO, SD_BSY_TIMEOUT, NULL);
18399 
18400 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18401 	    "sd_pkt_status_busy: exit\n");
18402 }
18403 
18404 
18405 /*
18406  *    Function: sd_pkt_status_reservation_conflict
18407  *
18408  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
18409  *		command status.
18410  *
18411  *     Context: May be called from interrupt context
18412  */
18413 
18414 static void
18415 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
18416 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18417 {
18418 	ASSERT(un != NULL);
18419 	ASSERT(mutex_owned(SD_MUTEX(un)));
18420 	ASSERT(bp != NULL);
18421 	ASSERT(xp != NULL);
18422 	ASSERT(pktp != NULL);
18423 
18424 	/*
18425 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
18426 	 * conflict could be due to various reasons like incorrect keys, not
18427 	 * registered or not reserved etc. So, we return EACCES to the caller.
18428 	 */
18429 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
18430 		int cmd = SD_GET_PKT_OPCODE(pktp);
18431 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
18432 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
18433 			sd_return_failed_command(un, bp, EACCES);
18434 			return;
18435 		}
18436 	}
18437 
18438 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
18439 
18440 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
18441 		if (sd_failfast_enable != 0) {
18442 			/* By definition, we must panic here.... */
18443 			panic("Reservation Conflict");
18444 			/*NOTREACHED*/
18445 		}
18446 		SD_ERROR(SD_LOG_IO, un,
18447 		    "sd_handle_resv_conflict: Disk Reserved\n");
18448 		sd_return_failed_command(un, bp, EACCES);
18449 		return;
18450 	}
18451 
18452 	/*
18453 	 * 1147670: retry only if sd_retry_on_reservation_conflict
18454 	 * property is set (default is 1). Retries will not succeed
18455 	 * on a disk reserved by another initiator. HA systems
18456 	 * may reset this via sd.conf to avoid these retries.
18457 	 *
18458 	 * Note: The legacy return code for this failure is EIO, however EACCES
18459 	 * seems more appropriate for a reservation conflict.
18460 	 */
18461 	if (sd_retry_on_reservation_conflict == 0) {
18462 		SD_ERROR(SD_LOG_IO, un,
18463 		    "sd_handle_resv_conflict: Device Reserved\n");
18464 		sd_return_failed_command(un, bp, EIO);
18465 		return;
18466 	}
18467 
18468 	/*
18469 	 * Retry the command if we can.
18470 	 *
18471 	 * Note: The legacy return code for this failure is EIO, however EACCES
18472 	 * seems more appropriate for a reservation conflict.
18473 	 */
18474 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18475 	    (clock_t)2, NULL);
18476 }
18477 
18478 
18479 
18480 /*
18481  *    Function: sd_pkt_status_qfull
18482  *
18483  * Description: Handle a QUEUE FULL condition from the target.  This can
18484  *		occur if the HBA does not handle the queue full condition.
18485  *		(Basically this means third-party HBAs as Sun HBAs will
18486  *		handle the queue full condition.)  Note that if there are
18487  *		some commands already in the transport, then the queue full
18488  *		has occurred because the queue for this nexus is actually
18489  *		full. If there are no commands in the transport, then the
18490  *		queue full is resulting from some other initiator or lun
18491  *		consuming all the resources at the target.
18492  *
18493  *     Context: May be called from interrupt context
18494  */
18495 
18496 static void
18497 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
18498 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18499 {
18500 	ASSERT(un != NULL);
18501 	ASSERT(mutex_owned(SD_MUTEX(un)));
18502 	ASSERT(bp != NULL);
18503 	ASSERT(xp != NULL);
18504 	ASSERT(pktp != NULL);
18505 
18506 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18507 	    "sd_pkt_status_qfull: entry\n");
18508 
18509 	/*
18510 	 * Just lower the QFULL throttle and retry the command.  Note that
18511 	 * we do not limit the number of retries here.
18512 	 */
18513 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
18514 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
18515 	    SD_RESTART_TIMEOUT, NULL);
18516 
18517 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18518 	    "sd_pkt_status_qfull: exit\n");
18519 }
18520 
18521 
18522 /*
18523  *    Function: sd_reset_target
18524  *
18525  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
18526  *		RESET_TARGET, or RESET_ALL.
18527  *
18528  *     Context: May be called under interrupt context.
18529  */
18530 
18531 static void
18532 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
18533 {
18534 	int rval = 0;
18535 
18536 	ASSERT(un != NULL);
18537 	ASSERT(mutex_owned(SD_MUTEX(un)));
18538 	ASSERT(pktp != NULL);
18539 
18540 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
18541 
18542 	/*
18543 	 * No need to reset if the transport layer has already done so.
18544 	 */
18545 	if ((pktp->pkt_statistics &
18546 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
18547 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18548 		    "sd_reset_target: no reset\n");
18549 		return;
18550 	}
18551 
18552 	mutex_exit(SD_MUTEX(un));
18553 
18554 	if (un->un_f_allow_bus_device_reset == TRUE) {
18555 		if (un->un_f_lun_reset_enabled == TRUE) {
18556 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18557 			    "sd_reset_target: RESET_LUN\n");
18558 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18559 		}
18560 		if (rval == 0) {
18561 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18562 			    "sd_reset_target: RESET_TARGET\n");
18563 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18564 		}
18565 	}
18566 
18567 	if (rval == 0) {
18568 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18569 		    "sd_reset_target: RESET_ALL\n");
18570 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
18571 	}
18572 
18573 	mutex_enter(SD_MUTEX(un));
18574 
18575 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
18576 }
18577 
18578 
18579 /*
18580  *    Function: sd_media_change_task
18581  *
18582  * Description: Recovery action for CDROM to become available.
18583  *
18584  *     Context: Executes in a taskq() thread context
18585  */
18586 
18587 static void
18588 sd_media_change_task(void *arg)
18589 {
18590 	struct	scsi_pkt	*pktp = arg;
18591 	struct	sd_lun		*un;
18592 	struct	buf		*bp;
18593 	struct	sd_xbuf		*xp;
18594 	int	err		= 0;
18595 	int	retry_count	= 0;
18596 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
18597 	struct	sd_sense_info	si;
18598 
18599 	ASSERT(pktp != NULL);
18600 	bp = (struct buf *)pktp->pkt_private;
18601 	ASSERT(bp != NULL);
18602 	xp = SD_GET_XBUF(bp);
18603 	ASSERT(xp != NULL);
18604 	un = SD_GET_UN(bp);
18605 	ASSERT(un != NULL);
18606 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18607 	ASSERT(ISREMOVABLE(un));
18608 
18609 	si.ssi_severity = SCSI_ERR_INFO;
18610 	si.ssi_pfa_flag = FALSE;
18611 
18612 	/*
18613 	 * When a reset is issued on a CDROM, it takes a long time to
18614 	 * recover. First few attempts to read capacity and other things
18615 	 * related to handling unit attention fail (with a ASC 0x4 and
18616 	 * ASCQ 0x1). In that case we want to do enough retries and we want
18617 	 * to limit the retries in other cases of genuine failures like
18618 	 * no media in drive.
18619 	 */
18620 	while (retry_count++ < retry_limit) {
18621 		if ((err = sd_handle_mchange(un)) == 0) {
18622 			break;
18623 		}
18624 		if (err == EAGAIN) {
18625 			retry_limit = SD_UNIT_ATTENTION_RETRY;
18626 		}
18627 		/* Sleep for 0.5 sec. & try again */
18628 		delay(drv_usectohz(500000));
18629 	}
18630 
18631 	/*
18632 	 * Dispatch (retry or fail) the original command here,
18633 	 * along with appropriate console messages....
18634 	 *
18635 	 * Must grab the mutex before calling sd_retry_command,
18636 	 * sd_print_sense_msg and sd_return_failed_command.
18637 	 */
18638 	mutex_enter(SD_MUTEX(un));
18639 	if (err != SD_CMD_SUCCESS) {
18640 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18641 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18642 		si.ssi_severity = SCSI_ERR_FATAL;
18643 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18644 		sd_return_failed_command(un, bp, EIO);
18645 	} else {
18646 		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
18647 		    &si, EIO, (clock_t)0, NULL);
18648 	}
18649 	mutex_exit(SD_MUTEX(un));
18650 }
18651 
18652 
18653 
18654 /*
18655  *    Function: sd_handle_mchange
18656  *
18657  * Description: Perform geometry validation & other recovery when CDROM
18658  *		has been removed from drive.
18659  *
18660  * Return Code: 0 for success
18661  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
18662  *		sd_send_scsi_READ_CAPACITY()
18663  *
18664  *     Context: Executes in a taskq() thread context
18665  */
18666 
18667 static int
18668 sd_handle_mchange(struct sd_lun *un)
18669 {
18670 	uint64_t	capacity;
18671 	uint32_t	lbasize;
18672 	int		rval;
18673 
18674 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18675 	ASSERT(ISREMOVABLE(un));
18676 
18677 	if ((rval = sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
18678 	    SD_PATH_DIRECT_PRIORITY)) != 0) {
18679 		return (rval);
18680 	}
18681 
18682 	mutex_enter(SD_MUTEX(un));
18683 	sd_update_block_info(un, lbasize, capacity);
18684 
18685 	if (un->un_errstats != NULL) {
18686 		struct	sd_errstats *stp =
18687 		    (struct sd_errstats *)un->un_errstats->ks_data;
18688 		stp->sd_capacity.value.ui64 = (uint64_t)
18689 		    ((uint64_t)un->un_blockcount *
18690 		    (uint64_t)un->un_tgt_blocksize);
18691 	}
18692 
18693 	/*
18694 	 * Note: Maybe let the strategy/partitioning chain worry about getting
18695 	 * valid geometry.
18696 	 */
18697 	un->un_f_geometry_is_valid = FALSE;
18698 	(void) sd_validate_geometry(un, SD_PATH_DIRECT_PRIORITY);
18699 	if (un->un_f_geometry_is_valid == FALSE) {
18700 		mutex_exit(SD_MUTEX(un));
18701 		return (EIO);
18702 	}
18703 
18704 	mutex_exit(SD_MUTEX(un));
18705 
18706 	/*
18707 	 * Try to lock the door
18708 	 */
18709 	return (sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
18710 	    SD_PATH_DIRECT_PRIORITY));
18711 }
18712 
18713 
18714 /*
18715  *    Function: sd_send_scsi_DOORLOCK
18716  *
18717  * Description: Issue the scsi DOOR LOCK command
18718  *
18719  *   Arguments: un    - pointer to driver soft state (unit) structure for
18720  *			this target.
18721  *		flag  - SD_REMOVAL_ALLOW
18722  *			SD_REMOVAL_PREVENT
18723  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18724  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18725  *			to use the USCSI "direct" chain and bypass the normal
18726  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
18727  *			command is issued as part of an error recovery action.
18728  *
18729  * Return Code: 0   - Success
18730  *		errno return code from sd_send_scsi_cmd()
18731  *
18732  *     Context: Can sleep.
18733  */
18734 
18735 static int
18736 sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag)
18737 {
18738 	union scsi_cdb		cdb;
18739 	struct uscsi_cmd	ucmd_buf;
18740 	struct scsi_extended_sense	sense_buf;
18741 	int			status;
18742 
18743 	ASSERT(un != NULL);
18744 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18745 
18746 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
18747 
18748 	/* already determined doorlock is not supported, fake success */
18749 	if (un->un_f_doorlock_supported == FALSE) {
18750 		return (0);
18751 	}
18752 
18753 	bzero(&cdb, sizeof (cdb));
18754 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18755 
18756 	cdb.scc_cmd = SCMD_DOORLOCK;
18757 	cdb.cdb_opaque[4] = (uchar_t)flag;
18758 
18759 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18760 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
18761 	ucmd_buf.uscsi_bufaddr	= NULL;
18762 	ucmd_buf.uscsi_buflen	= 0;
18763 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18764 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
18765 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
18766 	ucmd_buf.uscsi_timeout	= 15;
18767 
18768 	SD_TRACE(SD_LOG_IO, un,
18769 	    "sd_send_scsi_DOORLOCK: returning sd_send_scsi_cmd()\n");
18770 
18771 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
18772 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
18773 
18774 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
18775 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18776 	    (sense_buf.es_key == KEY_ILLEGAL_REQUEST)) {
18777 		/* fake success and skip subsequent doorlock commands */
18778 		un->un_f_doorlock_supported = FALSE;
18779 		return (0);
18780 	}
18781 
18782 	return (status);
18783 }
18784 
18785 
18786 /*
18787  *    Function: sd_send_scsi_READ_CAPACITY
18788  *
18789  * Description: This routine uses the scsi READ CAPACITY command to determine
18790  *		the device capacity in number of blocks and the device native
18791  *		block size. If this function returns a failure, then the
18792  *		values in *capp and *lbap are undefined.  If the capacity
18793  *		returned is 0xffffffff then the lun is too large for a
18794  *		normal READ CAPACITY command and the results of a
18795  *		READ CAPACITY 16 will be used instead.
18796  *
18797  *   Arguments: un   - ptr to soft state struct for the target
18798  *		capp - ptr to unsigned 64-bit variable to receive the
18799  *			capacity value from the command.
18800  *		lbap - ptr to unsigned 32-bit varaible to receive the
18801  *			block size value from the command
18802  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18803  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18804  *			to use the USCSI "direct" chain and bypass the normal
18805  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
18806  *			command is issued as part of an error recovery action.
18807  *
18808  * Return Code: 0   - Success
18809  *		EIO - IO error
18810  *		EACCES - Reservation conflict detected
18811  *		EAGAIN - Device is becoming ready
18812  *		errno return code from sd_send_scsi_cmd()
18813  *
18814  *     Context: Can sleep.  Blocks until command completes.
18815  */
18816 
18817 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
18818 
18819 static int
18820 sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp, uint32_t *lbap,
18821 	int path_flag)
18822 {
18823 	struct	scsi_extended_sense	sense_buf;
18824 	struct	uscsi_cmd	ucmd_buf;
18825 	union	scsi_cdb	cdb;
18826 	uint32_t		*capacity_buf;
18827 	uint64_t		capacity;
18828 	uint32_t		lbasize;
18829 	int			status;
18830 
18831 	ASSERT(un != NULL);
18832 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18833 	ASSERT(capp != NULL);
18834 	ASSERT(lbap != NULL);
18835 
18836 	SD_TRACE(SD_LOG_IO, un,
18837 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
18838 
18839 	/*
18840 	 * First send a READ_CAPACITY command to the target.
18841 	 * (This command is mandatory under SCSI-2.)
18842 	 *
18843 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
18844 	 * Medium Indicator bit is cleared.  The address field must be
18845 	 * zero if the PMI bit is zero.
18846 	 */
18847 	bzero(&cdb, sizeof (cdb));
18848 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18849 
18850 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
18851 
18852 	cdb.scc_cmd = SCMD_READ_CAPACITY;
18853 
18854 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18855 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
18856 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
18857 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
18858 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18859 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
18860 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
18861 	ucmd_buf.uscsi_timeout	= 60;
18862 
18863 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
18864 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
18865 
18866 	switch (status) {
18867 	case 0:
18868 		/* Return failure if we did not get valid capacity data. */
18869 		if (ucmd_buf.uscsi_resid != 0) {
18870 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
18871 			return (EIO);
18872 		}
18873 
18874 		/*
18875 		 * Read capacity and block size from the READ CAPACITY 10 data.
18876 		 * This data may be adjusted later due to device specific
18877 		 * issues.
18878 		 *
18879 		 * According to the SCSI spec, the READ CAPACITY 10
18880 		 * command returns the following:
18881 		 *
18882 		 *  bytes 0-3: Maximum logical block address available.
18883 		 *		(MSB in byte:0 & LSB in byte:3)
18884 		 *
18885 		 *  bytes 4-7: Block length in bytes
18886 		 *		(MSB in byte:4 & LSB in byte:7)
18887 		 *
18888 		 */
18889 		capacity = BE_32(capacity_buf[0]);
18890 		lbasize = BE_32(capacity_buf[1]);
18891 
18892 		/*
18893 		 * Done with capacity_buf
18894 		 */
18895 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
18896 
18897 		/*
18898 		 * if the reported capacity is set to all 0xf's, then
18899 		 * this disk is too large and requires SBC-2 commands.
18900 		 * Reissue the request using READ CAPACITY 16.
18901 		 */
18902 		if (capacity == 0xffffffff) {
18903 			status = sd_send_scsi_READ_CAPACITY_16(un, &capacity,
18904 			    &lbasize, path_flag);
18905 			if (status != 0) {
18906 				return (status);
18907 			}
18908 		}
18909 		break;	/* Success! */
18910 	case EIO:
18911 		switch (ucmd_buf.uscsi_status) {
18912 		case STATUS_RESERVATION_CONFLICT:
18913 			status = EACCES;
18914 			break;
18915 		case STATUS_CHECK:
18916 			/*
18917 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
18918 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
18919 			 */
18920 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18921 			    (sense_buf.es_add_code  == 0x04) &&
18922 			    (sense_buf.es_qual_code == 0x01)) {
18923 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
18924 				return (EAGAIN);
18925 			}
18926 			break;
18927 		default:
18928 			break;
18929 		}
18930 		/* FALLTHRU */
18931 	default:
18932 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
18933 		return (status);
18934 	}
18935 
18936 	/*
18937 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
18938 	 * (2352 and 0 are common) so for these devices always force the value
18939 	 * to 2048 as required by the ATAPI specs.
18940 	 */
18941 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
18942 		lbasize = 2048;
18943 	}
18944 
18945 	/*
18946 	 * Get the maximum LBA value from the READ CAPACITY data.
18947 	 * Here we assume that the Partial Medium Indicator (PMI) bit
18948 	 * was cleared when issuing the command. This means that the LBA
18949 	 * returned from the device is the LBA of the last logical block
18950 	 * on the logical unit.  The actual logical block count will be
18951 	 * this value plus one.
18952 	 *
18953 	 * Currently the capacity is saved in terms of un->un_sys_blocksize,
18954 	 * so scale the capacity value to reflect this.
18955 	 */
18956 	capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
18957 
18958 #if defined(__i386) || defined(__amd64)
18959 	/*
18960 	 * On x86, compensate for off-by-1 error (number of sectors on
18961 	 * media)  (1175930)
18962 	 */
18963 	if (!ISREMOVABLE(un) && (lbasize == un->un_sys_blocksize)) {
18964 		capacity -= 1;
18965 	}
18966 #endif
18967 
18968 	/*
18969 	 * Copy the values from the READ CAPACITY command into the space
18970 	 * provided by the caller.
18971 	 */
18972 	*capp = capacity;
18973 	*lbap = lbasize;
18974 
18975 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
18976 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
18977 
18978 	/*
18979 	 * Both the lbasize and capacity from the device must be nonzero,
18980 	 * otherwise we assume that the values are not valid and return
18981 	 * failure to the caller. (4203735)
18982 	 */
18983 	if ((capacity == 0) || (lbasize == 0)) {
18984 		return (EIO);
18985 	}
18986 
18987 	return (0);
18988 }
18989 
18990 /*
18991  *    Function: sd_send_scsi_READ_CAPACITY_16
18992  *
18993  * Description: This routine uses the scsi READ CAPACITY 16 command to
18994  *		determine the device capacity in number of blocks and the
18995  *		device native block size.  If this function returns a failure,
18996  *		then the values in *capp and *lbap are undefined.
18997  *		This routine should always be called by
18998  *		sd_send_scsi_READ_CAPACITY which will appy any device
18999  *		specific adjustments to capacity and lbasize.
19000  *
19001  *   Arguments: un   - ptr to soft state struct for the target
19002  *		capp - ptr to unsigned 64-bit variable to receive the
19003  *			capacity value from the command.
19004  *		lbap - ptr to unsigned 32-bit varaible to receive the
19005  *			block size value from the command
19006  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19007  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19008  *			to use the USCSI "direct" chain and bypass the normal
19009  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
19010  *			this command is issued as part of an error recovery
19011  *			action.
19012  *
19013  * Return Code: 0   - Success
19014  *		EIO - IO error
19015  *		EACCES - Reservation conflict detected
19016  *		EAGAIN - Device is becoming ready
19017  *		errno return code from sd_send_scsi_cmd()
19018  *
19019  *     Context: Can sleep.  Blocks until command completes.
19020  */
19021 
19022 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
19023 
19024 static int
19025 sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
19026 	uint32_t *lbap, int path_flag)
19027 {
19028 	struct	scsi_extended_sense	sense_buf;
19029 	struct	uscsi_cmd	ucmd_buf;
19030 	union	scsi_cdb	cdb;
19031 	uint64_t		*capacity16_buf;
19032 	uint64_t		capacity;
19033 	uint32_t		lbasize;
19034 	int			status;
19035 
19036 	ASSERT(un != NULL);
19037 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19038 	ASSERT(capp != NULL);
19039 	ASSERT(lbap != NULL);
19040 
19041 	SD_TRACE(SD_LOG_IO, un,
19042 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19043 
19044 	/*
19045 	 * First send a READ_CAPACITY_16 command to the target.
19046 	 *
19047 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
19048 	 * Medium Indicator bit is cleared.  The address field must be
19049 	 * zero if the PMI bit is zero.
19050 	 */
19051 	bzero(&cdb, sizeof (cdb));
19052 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19053 
19054 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
19055 
19056 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19057 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
19058 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
19059 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
19060 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19061 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19062 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19063 	ucmd_buf.uscsi_timeout	= 60;
19064 
19065 	/*
19066 	 * Read Capacity (16) is a Service Action In command.  One
19067 	 * command byte (0x9E) is overloaded for multiple operations,
19068 	 * with the second CDB byte specifying the desired operation
19069 	 */
19070 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
19071 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
19072 
19073 	/*
19074 	 * Fill in allocation length field
19075 	 */
19076 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
19077 
19078 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19079 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19080 
19081 	switch (status) {
19082 	case 0:
19083 		/* Return failure if we did not get valid capacity data. */
19084 		if (ucmd_buf.uscsi_resid > 20) {
19085 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19086 			return (EIO);
19087 		}
19088 
19089 		/*
19090 		 * Read capacity and block size from the READ CAPACITY 10 data.
19091 		 * This data may be adjusted later due to device specific
19092 		 * issues.
19093 		 *
19094 		 * According to the SCSI spec, the READ CAPACITY 10
19095 		 * command returns the following:
19096 		 *
19097 		 *  bytes 0-7: Maximum logical block address available.
19098 		 *		(MSB in byte:0 & LSB in byte:7)
19099 		 *
19100 		 *  bytes 8-11: Block length in bytes
19101 		 *		(MSB in byte:8 & LSB in byte:11)
19102 		 *
19103 		 */
19104 		capacity = BE_64(capacity16_buf[0]);
19105 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
19106 
19107 		/*
19108 		 * Done with capacity16_buf
19109 		 */
19110 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19111 
19112 		/*
19113 		 * if the reported capacity is set to all 0xf's, then
19114 		 * this disk is too large.  This could only happen with
19115 		 * a device that supports LBAs larger than 64 bits which
19116 		 * are not defined by any current T10 standards.
19117 		 */
19118 		if (capacity == 0xffffffffffffffff) {
19119 			return (EIO);
19120 		}
19121 		break;	/* Success! */
19122 	case EIO:
19123 		switch (ucmd_buf.uscsi_status) {
19124 		case STATUS_RESERVATION_CONFLICT:
19125 			status = EACCES;
19126 			break;
19127 		case STATUS_CHECK:
19128 			/*
19129 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19130 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19131 			 */
19132 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19133 			    (sense_buf.es_add_code  == 0x04) &&
19134 			    (sense_buf.es_qual_code == 0x01)) {
19135 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19136 				return (EAGAIN);
19137 			}
19138 			break;
19139 		default:
19140 			break;
19141 		}
19142 		/* FALLTHRU */
19143 	default:
19144 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19145 		return (status);
19146 	}
19147 
19148 	*capp = capacity;
19149 	*lbap = lbasize;
19150 
19151 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
19152 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19153 
19154 	return (0);
19155 }
19156 
19157 
19158 /*
19159  *    Function: sd_send_scsi_START_STOP_UNIT
19160  *
19161  * Description: Issue a scsi START STOP UNIT command to the target.
19162  *
19163  *   Arguments: un    - pointer to driver soft state (unit) structure for
19164  *			this target.
19165  *		flag  - SD_TARGET_START
19166  *			SD_TARGET_STOP
19167  *			SD_TARGET_EJECT
19168  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19169  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19170  *			to use the USCSI "direct" chain and bypass the normal
19171  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19172  *			command is issued as part of an error recovery action.
19173  *
19174  * Return Code: 0   - Success
19175  *		EIO - IO error
19176  *		EACCES - Reservation conflict detected
19177  *		ENXIO  - Not Ready, medium not present
19178  *		errno return code from sd_send_scsi_cmd()
19179  *
19180  *     Context: Can sleep.
19181  */
19182 
19183 static int
19184 sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag, int path_flag)
19185 {
19186 	struct	scsi_extended_sense	sense_buf;
19187 	union scsi_cdb		cdb;
19188 	struct uscsi_cmd	ucmd_buf;
19189 	int			status;
19190 
19191 	ASSERT(un != NULL);
19192 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19193 
19194 	SD_TRACE(SD_LOG_IO, un,
19195 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
19196 
19197 	if (ISREMOVABLE(un) &&
19198 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
19199 	    (un->un_f_start_stop_supported != TRUE)) {
19200 		return (0);
19201 	}
19202 
19203 	bzero(&cdb, sizeof (cdb));
19204 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19205 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19206 
19207 	cdb.scc_cmd = SCMD_START_STOP;
19208 	cdb.cdb_opaque[4] = (uchar_t)flag;
19209 
19210 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19211 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19212 	ucmd_buf.uscsi_bufaddr	= NULL;
19213 	ucmd_buf.uscsi_buflen	= 0;
19214 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19215 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19216 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19217 	ucmd_buf.uscsi_timeout	= 200;
19218 
19219 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19220 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19221 
19222 	switch (status) {
19223 	case 0:
19224 		break;	/* Success! */
19225 	case EIO:
19226 		switch (ucmd_buf.uscsi_status) {
19227 		case STATUS_RESERVATION_CONFLICT:
19228 			status = EACCES;
19229 			break;
19230 		case STATUS_CHECK:
19231 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
19232 				switch (sense_buf.es_key) {
19233 				case KEY_ILLEGAL_REQUEST:
19234 					status = ENOTSUP;
19235 					break;
19236 				case KEY_NOT_READY:
19237 					if (sense_buf.es_add_code == 0x3A) {
19238 						status = ENXIO;
19239 					}
19240 					break;
19241 				default:
19242 					break;
19243 				}
19244 			}
19245 			break;
19246 		default:
19247 			break;
19248 		}
19249 		break;
19250 	default:
19251 		break;
19252 	}
19253 
19254 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
19255 
19256 	return (status);
19257 }
19258 
19259 
19260 /*
19261  *    Function: sd_start_stop_unit_callback
19262  *
19263  * Description: timeout(9F) callback to begin recovery process for a
19264  *		device that has spun down.
19265  *
19266  *   Arguments: arg - pointer to associated softstate struct.
19267  *
19268  *     Context: Executes in a timeout(9F) thread context
19269  */
19270 
19271 static void
19272 sd_start_stop_unit_callback(void *arg)
19273 {
19274 	struct sd_lun	*un = arg;
19275 	ASSERT(un != NULL);
19276 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19277 
19278 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
19279 
19280 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
19281 }
19282 
19283 
19284 /*
19285  *    Function: sd_start_stop_unit_task
19286  *
19287  * Description: Recovery procedure when a drive is spun down.
19288  *
19289  *   Arguments: arg - pointer to associated softstate struct.
19290  *
19291  *     Context: Executes in a taskq() thread context
19292  */
19293 
19294 static void
19295 sd_start_stop_unit_task(void *arg)
19296 {
19297 	struct sd_lun	*un = arg;
19298 
19299 	ASSERT(un != NULL);
19300 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19301 
19302 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
19303 
19304 	/*
19305 	 * Some unformatted drives report not ready error, no need to
19306 	 * restart if format has been initiated.
19307 	 */
19308 	mutex_enter(SD_MUTEX(un));
19309 	if (un->un_f_format_in_progress == TRUE) {
19310 		mutex_exit(SD_MUTEX(un));
19311 		return;
19312 	}
19313 	mutex_exit(SD_MUTEX(un));
19314 
19315 	/*
19316 	 * When a START STOP command is issued from here, it is part of a
19317 	 * failure recovery operation and must be issued before any other
19318 	 * commands, including any pending retries. Thus it must be sent
19319 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
19320 	 * succeeds or not, we will start I/O after the attempt.
19321 	 */
19322 	(void) sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
19323 	    SD_PATH_DIRECT_PRIORITY);
19324 
19325 	/*
19326 	 * The above call blocks until the START_STOP_UNIT command completes.
19327 	 * Now that it has completed, we must re-try the original IO that
19328 	 * received the NOT READY condition in the first place. There are
19329 	 * three possible conditions here:
19330 	 *
19331 	 *  (1) The original IO is on un_retry_bp.
19332 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
19333 	 *	is NULL.
19334 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
19335 	 *	points to some other, unrelated bp.
19336 	 *
19337 	 * For each case, we must call sd_start_cmds() with un_retry_bp
19338 	 * as the argument. If un_retry_bp is NULL, this will initiate
19339 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
19340 	 * then this will process the bp on un_retry_bp. That may or may not
19341 	 * be the original IO, but that does not matter: the important thing
19342 	 * is to keep the IO processing going at this point.
19343 	 *
19344 	 * Note: This is a very specific error recovery sequence associated
19345 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
19346 	 * serialize the I/O with completion of the spin-up.
19347 	 */
19348 	mutex_enter(SD_MUTEX(un));
19349 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19350 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
19351 	    un, un->un_retry_bp);
19352 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
19353 	sd_start_cmds(un, un->un_retry_bp);
19354 	mutex_exit(SD_MUTEX(un));
19355 
19356 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
19357 }
19358 
19359 
19360 /*
19361  *    Function: sd_send_scsi_INQUIRY
19362  *
19363  * Description: Issue the scsi INQUIRY command.
19364  *
19365  *   Arguments: un
19366  *		bufaddr
19367  *		buflen
19368  *		evpd
19369  *		page_code
19370  *		page_length
19371  *
19372  * Return Code: 0   - Success
19373  *		errno return code from sd_send_scsi_cmd()
19374  *
19375  *     Context: Can sleep. Does not return until command is completed.
19376  */
19377 
19378 static int
19379 sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr, size_t buflen,
19380 	uchar_t evpd, uchar_t page_code, size_t *residp)
19381 {
19382 	union scsi_cdb		cdb;
19383 	struct uscsi_cmd	ucmd_buf;
19384 	int			status;
19385 
19386 	ASSERT(un != NULL);
19387 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19388 	ASSERT(bufaddr != NULL);
19389 
19390 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
19391 
19392 	bzero(&cdb, sizeof (cdb));
19393 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19394 	bzero(bufaddr, buflen);
19395 
19396 	cdb.scc_cmd = SCMD_INQUIRY;
19397 	cdb.cdb_opaque[1] = evpd;
19398 	cdb.cdb_opaque[2] = page_code;
19399 	FORMG0COUNT(&cdb, buflen);
19400 
19401 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19402 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19403 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19404 	ucmd_buf.uscsi_buflen	= buflen;
19405 	ucmd_buf.uscsi_rqbuf	= NULL;
19406 	ucmd_buf.uscsi_rqlen	= 0;
19407 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
19408 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
19409 
19410 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19411 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_DIRECT);
19412 
19413 	if ((status == 0) && (residp != NULL)) {
19414 		*residp = ucmd_buf.uscsi_resid;
19415 	}
19416 
19417 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
19418 
19419 	return (status);
19420 }
19421 
19422 
19423 /*
19424  *    Function: sd_send_scsi_TEST_UNIT_READY
19425  *
19426  * Description: Issue the scsi TEST UNIT READY command.
19427  *		This routine can be told to set the flag USCSI_DIAGNOSE to
19428  *		prevent retrying failed commands. Use this when the intent
19429  *		is either to check for device readiness, to clear a Unit
19430  *		Attention, or to clear any outstanding sense data.
19431  *		However under specific conditions the expected behavior
19432  *		is for retries to bring a device ready, so use the flag
19433  *		with caution.
19434  *
19435  *   Arguments: un
19436  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
19437  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
19438  *			0: dont check for media present, do retries on cmd.
19439  *
19440  * Return Code: 0   - Success
19441  *		EIO - IO error
19442  *		EACCES - Reservation conflict detected
19443  *		ENXIO  - Not Ready, medium not present
19444  *		errno return code from sd_send_scsi_cmd()
19445  *
19446  *     Context: Can sleep. Does not return until command is completed.
19447  */
19448 
19449 static int
19450 sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag)
19451 {
19452 	struct	scsi_extended_sense	sense_buf;
19453 	union scsi_cdb		cdb;
19454 	struct uscsi_cmd	ucmd_buf;
19455 	int			status;
19456 
19457 	ASSERT(un != NULL);
19458 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19459 
19460 	SD_TRACE(SD_LOG_IO, un,
19461 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
19462 
19463 	/*
19464 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
19465 	 * timeouts when they receive a TUR and the queue is not empty. Check
19466 	 * the configuration flag set during attach (indicating the drive has
19467 	 * this firmware bug) and un_ncmds_in_transport before issuing the
19468 	 * TUR. If there are
19469 	 * pending commands return success, this is a bit arbitrary but is ok
19470 	 * for non-removables (i.e. the eliteI disks) and non-clustering
19471 	 * configurations.
19472 	 */
19473 	if (un->un_f_cfg_tur_check == TRUE) {
19474 		mutex_enter(SD_MUTEX(un));
19475 		if (un->un_ncmds_in_transport != 0) {
19476 			mutex_exit(SD_MUTEX(un));
19477 			return (0);
19478 		}
19479 		mutex_exit(SD_MUTEX(un));
19480 	}
19481 
19482 	bzero(&cdb, sizeof (cdb));
19483 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19484 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19485 
19486 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
19487 
19488 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19489 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19490 	ucmd_buf.uscsi_bufaddr	= NULL;
19491 	ucmd_buf.uscsi_buflen	= 0;
19492 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19493 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19494 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19495 
19496 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
19497 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
19498 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
19499 	}
19500 	ucmd_buf.uscsi_timeout	= 60;
19501 
19502 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19503 	    UIO_SYSSPACE, UIO_SYSSPACE,
19504 	    ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT : SD_PATH_STANDARD));
19505 
19506 	switch (status) {
19507 	case 0:
19508 		break;	/* Success! */
19509 	case EIO:
19510 		switch (ucmd_buf.uscsi_status) {
19511 		case STATUS_RESERVATION_CONFLICT:
19512 			status = EACCES;
19513 			break;
19514 		case STATUS_CHECK:
19515 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
19516 				break;
19517 			}
19518 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19519 			    (sense_buf.es_key == KEY_NOT_READY) &&
19520 			    (sense_buf.es_add_code == 0x3A)) {
19521 				status = ENXIO;
19522 			}
19523 			break;
19524 		default:
19525 			break;
19526 		}
19527 		break;
19528 	default:
19529 		break;
19530 	}
19531 
19532 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
19533 
19534 	return (status);
19535 }
19536 
19537 
19538 /*
19539  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
19540  *
19541  * Description: Issue the scsi PERSISTENT RESERVE IN command.
19542  *
19543  *   Arguments: un
19544  *
19545  * Return Code: 0   - Success
19546  *		EACCES
19547  *		ENOTSUP
19548  *		errno return code from sd_send_scsi_cmd()
19549  *
19550  *     Context: Can sleep. Does not return until command is completed.
19551  */
19552 
19553 static int
19554 sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un, uchar_t  usr_cmd,
19555 	uint16_t data_len, uchar_t *data_bufp)
19556 {
19557 	struct scsi_extended_sense	sense_buf;
19558 	union scsi_cdb		cdb;
19559 	struct uscsi_cmd	ucmd_buf;
19560 	int			status;
19561 	int			no_caller_buf = FALSE;
19562 
19563 	ASSERT(un != NULL);
19564 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19565 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
19566 
19567 	SD_TRACE(SD_LOG_IO, un,
19568 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
19569 
19570 	bzero(&cdb, sizeof (cdb));
19571 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19572 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19573 	if (data_bufp == NULL) {
19574 		/* Allocate a default buf if the caller did not give one */
19575 		ASSERT(data_len == 0);
19576 		data_len  = MHIOC_RESV_KEY_SIZE;
19577 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
19578 		no_caller_buf = TRUE;
19579 	}
19580 
19581 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
19582 	cdb.cdb_opaque[1] = usr_cmd;
19583 	FORMG1COUNT(&cdb, data_len);
19584 
19585 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19586 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19587 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
19588 	ucmd_buf.uscsi_buflen	= data_len;
19589 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19590 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19591 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19592 	ucmd_buf.uscsi_timeout	= 60;
19593 
19594 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19595 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
19596 
19597 	switch (status) {
19598 	case 0:
19599 		break;	/* Success! */
19600 	case EIO:
19601 		switch (ucmd_buf.uscsi_status) {
19602 		case STATUS_RESERVATION_CONFLICT:
19603 			status = EACCES;
19604 			break;
19605 		case STATUS_CHECK:
19606 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19607 			    (sense_buf.es_key == KEY_ILLEGAL_REQUEST)) {
19608 				status = ENOTSUP;
19609 			}
19610 			break;
19611 		default:
19612 			break;
19613 		}
19614 		break;
19615 	default:
19616 		break;
19617 	}
19618 
19619 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
19620 
19621 	if (no_caller_buf == TRUE) {
19622 		kmem_free(data_bufp, data_len);
19623 	}
19624 
19625 	return (status);
19626 }
19627 
19628 
19629 /*
19630  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
19631  *
19632  * Description: This routine is the driver entry point for handling CD-ROM
19633  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
19634  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
19635  *		device.
19636  *
19637  *   Arguments: un  -   Pointer to soft state struct for the target.
19638  *		usr_cmd SCSI-3 reservation facility command (one of
19639  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
19640  *			SD_SCSI3_PREEMPTANDABORT)
19641  *		usr_bufp - user provided pointer register, reserve descriptor or
19642  *			preempt and abort structure (mhioc_register_t,
19643  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
19644  *
19645  * Return Code: 0   - Success
19646  *		EACCES
19647  *		ENOTSUP
19648  *		errno return code from sd_send_scsi_cmd()
19649  *
19650  *     Context: Can sleep. Does not return until command is completed.
19651  */
19652 
19653 static int
19654 sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un, uchar_t usr_cmd,
19655 	uchar_t	*usr_bufp)
19656 {
19657 	struct scsi_extended_sense	sense_buf;
19658 	union scsi_cdb		cdb;
19659 	struct uscsi_cmd	ucmd_buf;
19660 	int			status;
19661 	uchar_t			data_len = sizeof (sd_prout_t);
19662 	sd_prout_t		*prp;
19663 
19664 	ASSERT(un != NULL);
19665 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19666 	ASSERT(data_len == 24);	/* required by scsi spec */
19667 
19668 	SD_TRACE(SD_LOG_IO, un,
19669 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
19670 
19671 	if (usr_bufp == NULL) {
19672 		return (EINVAL);
19673 	}
19674 
19675 	bzero(&cdb, sizeof (cdb));
19676 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19677 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19678 	prp = kmem_zalloc(data_len, KM_SLEEP);
19679 
19680 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
19681 	cdb.cdb_opaque[1] = usr_cmd;
19682 	FORMG1COUNT(&cdb, data_len);
19683 
19684 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19685 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19686 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
19687 	ucmd_buf.uscsi_buflen	= data_len;
19688 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19689 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19690 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
19691 	ucmd_buf.uscsi_timeout	= 60;
19692 
19693 	switch (usr_cmd) {
19694 	case SD_SCSI3_REGISTER: {
19695 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
19696 
19697 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
19698 		bcopy(ptr->newkey.key, prp->service_key,
19699 		    MHIOC_RESV_KEY_SIZE);
19700 		prp->aptpl = ptr->aptpl;
19701 		break;
19702 	}
19703 	case SD_SCSI3_RESERVE:
19704 	case SD_SCSI3_RELEASE: {
19705 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
19706 
19707 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
19708 		prp->scope_address = BE_32(ptr->scope_specific_addr);
19709 		cdb.cdb_opaque[2] = ptr->type;
19710 		break;
19711 	}
19712 	case SD_SCSI3_PREEMPTANDABORT: {
19713 		mhioc_preemptandabort_t *ptr =
19714 		    (mhioc_preemptandabort_t *)usr_bufp;
19715 
19716 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
19717 		bcopy(ptr->victim_key.key, prp->service_key,
19718 		    MHIOC_RESV_KEY_SIZE);
19719 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
19720 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
19721 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
19722 		break;
19723 	}
19724 	case SD_SCSI3_REGISTERANDIGNOREKEY:
19725 	{
19726 		mhioc_registerandignorekey_t *ptr;
19727 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
19728 		bcopy(ptr->newkey.key,
19729 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
19730 		prp->aptpl = ptr->aptpl;
19731 		break;
19732 	}
19733 	default:
19734 		ASSERT(FALSE);
19735 		break;
19736 	}
19737 
19738 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19739 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
19740 
19741 	switch (status) {
19742 	case 0:
19743 		break;	/* Success! */
19744 	case EIO:
19745 		switch (ucmd_buf.uscsi_status) {
19746 		case STATUS_RESERVATION_CONFLICT:
19747 			status = EACCES;
19748 			break;
19749 		case STATUS_CHECK:
19750 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19751 			    (sense_buf.es_key == KEY_ILLEGAL_REQUEST)) {
19752 				status = ENOTSUP;
19753 			}
19754 			break;
19755 		default:
19756 			break;
19757 		}
19758 		break;
19759 	default:
19760 		break;
19761 	}
19762 
19763 	kmem_free(prp, data_len);
19764 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
19765 	return (status);
19766 }
19767 
19768 
19769 /*
19770  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
19771  *
19772  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
19773  *
19774  *   Arguments: un - pointer to the target's soft state struct
19775  *
19776  * Return Code: 0 - success
19777  *		errno-type error code
19778  *
19779  *     Context: kernel thread context only.
19780  */
19781 
19782 static int
19783 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un)
19784 {
19785 	struct	scsi_extended_sense	sense_buf;
19786 	union scsi_cdb		cdb;
19787 	struct uscsi_cmd	ucmd_buf;
19788 	int			status;
19789 
19790 	ASSERT(un != NULL);
19791 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19792 
19793 	SD_TRACE(SD_LOG_IO, un,
19794 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
19795 
19796 	bzero(&cdb, sizeof (cdb));
19797 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19798 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19799 
19800 	cdb.scc_cmd = SCMD_SYNCHRONIZE_CACHE;
19801 
19802 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19803 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19804 	ucmd_buf.uscsi_bufaddr	= NULL;
19805 	ucmd_buf.uscsi_buflen	= 0;
19806 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19807 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19808 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19809 	ucmd_buf.uscsi_timeout	= 240;
19810 
19811 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19812 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_DIRECT);
19813 
19814 	switch (status) {
19815 	case 0:
19816 		break;	/* Success! */
19817 	case EIO:
19818 		switch (ucmd_buf.uscsi_status) {
19819 		case STATUS_RESERVATION_CONFLICT:
19820 			/* Ignore reservation conflict */
19821 			status = 0;
19822 			goto done;
19823 
19824 		case STATUS_CHECK:
19825 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19826 			    (sense_buf.es_key == KEY_ILLEGAL_REQUEST)) {
19827 				/* Ignore Illegal Request error */
19828 				status = 0;
19829 				goto done;
19830 			}
19831 			break;
19832 		default:
19833 			break;
19834 		}
19835 		/* FALLTHRU */
19836 	default:
19837 		/* Ignore error if the media is not present. */
19838 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
19839 			status = 0;
19840 			goto done;
19841 		}
19842 		/* If we reach this, we had an error */
19843 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
19844 		    "SYNCHRONIZE CACHE command failed (%d)\n", status);
19845 		break;
19846 	}
19847 
19848 done:
19849 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_SYNCHRONIZE_CACHE: exit\n");
19850 
19851 	return (status);
19852 }
19853 
19854 
19855 /*
19856  *    Function: sd_send_scsi_GET_CONFIGURATION
19857  *
19858  * Description: Issues the get configuration command to the device.
19859  *		Called from sd_check_for_writable_cd & sd_get_media_info
19860  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
19861  *   Arguments: un
19862  *		ucmdbuf
19863  *		rqbuf
19864  *		rqbuflen
19865  *		bufaddr
19866  *		buflen
19867  *
19868  * Return Code: 0   - Success
19869  *		errno return code from sd_send_scsi_cmd()
19870  *
19871  *     Context: Can sleep. Does not return until command is completed.
19872  *
19873  */
19874 
19875 static int
19876 sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un, struct uscsi_cmd *ucmdbuf,
19877 	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen)
19878 {
19879 	char	cdb[CDB_GROUP1];
19880 	int	status;
19881 
19882 	ASSERT(un != NULL);
19883 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19884 	ASSERT(bufaddr != NULL);
19885 	ASSERT(ucmdbuf != NULL);
19886 	ASSERT(rqbuf != NULL);
19887 
19888 	SD_TRACE(SD_LOG_IO, un,
19889 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
19890 
19891 	bzero(cdb, sizeof (cdb));
19892 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
19893 	bzero(rqbuf, rqbuflen);
19894 	bzero(bufaddr, buflen);
19895 
19896 	/*
19897 	 * Set up cdb field for the get configuration command.
19898 	 */
19899 	cdb[0] = SCMD_GET_CONFIGURATION;
19900 	cdb[1] = 0x02;  /* Requested Type */
19901 	cdb[8] = SD_PROFILE_HEADER_LEN;
19902 	ucmdbuf->uscsi_cdb = cdb;
19903 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
19904 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
19905 	ucmdbuf->uscsi_buflen = buflen;
19906 	ucmdbuf->uscsi_timeout = sd_io_time;
19907 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
19908 	ucmdbuf->uscsi_rqlen = rqbuflen;
19909 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
19910 
19911 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
19912 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
19913 
19914 	switch (status) {
19915 	case 0:
19916 		break;  /* Success! */
19917 	case EIO:
19918 		switch (ucmdbuf->uscsi_status) {
19919 		case STATUS_RESERVATION_CONFLICT:
19920 			status = EACCES;
19921 			break;
19922 		default:
19923 			break;
19924 		}
19925 		break;
19926 	default:
19927 		break;
19928 	}
19929 
19930 	if (status == 0) {
19931 		SD_DUMP_MEMORY(un, SD_LOG_IO,
19932 		    "sd_send_scsi_GET_CONFIGURATION: data",
19933 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
19934 	}
19935 
19936 	SD_TRACE(SD_LOG_IO, un,
19937 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
19938 
19939 	return (status);
19940 }
19941 
19942 /*
19943  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
19944  *
19945  * Description: Issues the get configuration command to the device to
19946  *              retrieve a specfic feature. Called from
19947  *		sd_check_for_writable_cd & sd_set_mmc_caps.
19948  *   Arguments: un
19949  *              ucmdbuf
19950  *              rqbuf
19951  *              rqbuflen
19952  *              bufaddr
19953  *              buflen
19954  *		feature
19955  *
19956  * Return Code: 0   - Success
19957  *              errno return code from sd_send_scsi_cmd()
19958  *
19959  *     Context: Can sleep. Does not return until command is completed.
19960  *
19961  */
19962 static int
19963 sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
19964 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
19965 	uchar_t *bufaddr, uint_t buflen, char feature)
19966 {
19967 	char    cdb[CDB_GROUP1];
19968 	int	status;
19969 
19970 	ASSERT(un != NULL);
19971 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19972 	ASSERT(bufaddr != NULL);
19973 	ASSERT(ucmdbuf != NULL);
19974 	ASSERT(rqbuf != NULL);
19975 
19976 	SD_TRACE(SD_LOG_IO, un,
19977 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
19978 
19979 	bzero(cdb, sizeof (cdb));
19980 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
19981 	bzero(rqbuf, rqbuflen);
19982 	bzero(bufaddr, buflen);
19983 
19984 	/*
19985 	 * Set up cdb field for the get configuration command.
19986 	 */
19987 	cdb[0] = SCMD_GET_CONFIGURATION;
19988 	cdb[1] = 0x02;  /* Requested Type */
19989 	cdb[3] = feature;
19990 	cdb[8] = buflen;
19991 	ucmdbuf->uscsi_cdb = cdb;
19992 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
19993 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
19994 	ucmdbuf->uscsi_buflen = buflen;
19995 	ucmdbuf->uscsi_timeout = sd_io_time;
19996 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
19997 	ucmdbuf->uscsi_rqlen = rqbuflen;
19998 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
19999 
20000 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20001 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20002 
20003 	switch (status) {
20004 	case 0:
20005 		break;  /* Success! */
20006 	case EIO:
20007 		switch (ucmdbuf->uscsi_status) {
20008 		case STATUS_RESERVATION_CONFLICT:
20009 			status = EACCES;
20010 			break;
20011 		default:
20012 			break;
20013 		}
20014 		break;
20015 	default:
20016 		break;
20017 	}
20018 
20019 	if (status == 0) {
20020 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20021 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
20022 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20023 	}
20024 
20025 	SD_TRACE(SD_LOG_IO, un,
20026 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
20027 
20028 	return (status);
20029 }
20030 
20031 
20032 /*
20033  *    Function: sd_send_scsi_MODE_SENSE
20034  *
20035  * Description: Utility function for issuing a scsi MODE SENSE command.
20036  *		Note: This routine uses a consistent implementation for Group0,
20037  *		Group1, and Group2 commands across all platforms. ATAPI devices
20038  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20039  *
20040  *   Arguments: un - pointer to the softstate struct for the target.
20041  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20042  *			  CDB_GROUP[1|2] (10 byte).
20043  *		bufaddr - buffer for page data retrieved from the target.
20044  *		buflen - size of page to be retrieved.
20045  *		page_code - page code of data to be retrieved from the target.
20046  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20047  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20048  *			to use the USCSI "direct" chain and bypass the normal
20049  *			command waitq.
20050  *
20051  * Return Code: 0   - Success
20052  *		errno return code from sd_send_scsi_cmd()
20053  *
20054  *     Context: Can sleep. Does not return until command is completed.
20055  */
20056 
20057 static int
20058 sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20059 	size_t buflen,  uchar_t page_code, int path_flag)
20060 {
20061 	struct	scsi_extended_sense	sense_buf;
20062 	union scsi_cdb		cdb;
20063 	struct uscsi_cmd	ucmd_buf;
20064 	int			status;
20065 
20066 	ASSERT(un != NULL);
20067 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20068 	ASSERT(bufaddr != NULL);
20069 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20070 	    (cdbsize == CDB_GROUP2));
20071 
20072 	SD_TRACE(SD_LOG_IO, un,
20073 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
20074 
20075 	bzero(&cdb, sizeof (cdb));
20076 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20077 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20078 	bzero(bufaddr, buflen);
20079 
20080 	if (cdbsize == CDB_GROUP0) {
20081 		cdb.scc_cmd = SCMD_MODE_SENSE;
20082 		cdb.cdb_opaque[2] = page_code;
20083 		FORMG0COUNT(&cdb, buflen);
20084 	} else {
20085 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
20086 		cdb.cdb_opaque[2] = page_code;
20087 		FORMG1COUNT(&cdb, buflen);
20088 	}
20089 
20090 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20091 
20092 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20093 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20094 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20095 	ucmd_buf.uscsi_buflen	= buflen;
20096 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20097 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20098 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20099 	ucmd_buf.uscsi_timeout	= 60;
20100 
20101 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20102 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20103 
20104 	switch (status) {
20105 	case 0:
20106 		break;	/* Success! */
20107 	case EIO:
20108 		switch (ucmd_buf.uscsi_status) {
20109 		case STATUS_RESERVATION_CONFLICT:
20110 			status = EACCES;
20111 			break;
20112 		default:
20113 			break;
20114 		}
20115 		break;
20116 	default:
20117 		break;
20118 	}
20119 
20120 	if (status == 0) {
20121 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
20122 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20123 	}
20124 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
20125 
20126 	return (status);
20127 }
20128 
20129 
20130 /*
20131  *    Function: sd_send_scsi_MODE_SELECT
20132  *
20133  * Description: Utility function for issuing a scsi MODE SELECT command.
20134  *		Note: This routine uses a consistent implementation for Group0,
20135  *		Group1, and Group2 commands across all platforms. ATAPI devices
20136  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20137  *
20138  *   Arguments: un - pointer to the softstate struct for the target.
20139  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20140  *			  CDB_GROUP[1|2] (10 byte).
20141  *		bufaddr - buffer for page data retrieved from the target.
20142  *		buflen - size of page to be retrieved.
20143  *		save_page - boolean to determin if SP bit should be set.
20144  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20145  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20146  *			to use the USCSI "direct" chain and bypass the normal
20147  *			command waitq.
20148  *
20149  * Return Code: 0   - Success
20150  *		errno return code from sd_send_scsi_cmd()
20151  *
20152  *     Context: Can sleep. Does not return until command is completed.
20153  */
20154 
20155 static int
20156 sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20157 	size_t buflen,  uchar_t save_page, int path_flag)
20158 {
20159 	struct	scsi_extended_sense	sense_buf;
20160 	union scsi_cdb		cdb;
20161 	struct uscsi_cmd	ucmd_buf;
20162 	int			status;
20163 
20164 	ASSERT(un != NULL);
20165 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20166 	ASSERT(bufaddr != NULL);
20167 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20168 	    (cdbsize == CDB_GROUP2));
20169 
20170 	SD_TRACE(SD_LOG_IO, un,
20171 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
20172 
20173 	bzero(&cdb, sizeof (cdb));
20174 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20175 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20176 
20177 	/* Set the PF bit for many third party drives */
20178 	cdb.cdb_opaque[1] = 0x10;
20179 
20180 	/* Set the savepage(SP) bit if given */
20181 	if (save_page == SD_SAVE_PAGE) {
20182 		cdb.cdb_opaque[1] |= 0x01;
20183 	}
20184 
20185 	if (cdbsize == CDB_GROUP0) {
20186 		cdb.scc_cmd = SCMD_MODE_SELECT;
20187 		FORMG0COUNT(&cdb, buflen);
20188 	} else {
20189 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
20190 		FORMG1COUNT(&cdb, buflen);
20191 	}
20192 
20193 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20194 
20195 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20196 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20197 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20198 	ucmd_buf.uscsi_buflen	= buflen;
20199 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20200 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20201 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20202 	ucmd_buf.uscsi_timeout	= 60;
20203 
20204 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20205 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20206 
20207 	switch (status) {
20208 	case 0:
20209 		break;	/* Success! */
20210 	case EIO:
20211 		switch (ucmd_buf.uscsi_status) {
20212 		case STATUS_RESERVATION_CONFLICT:
20213 			status = EACCES;
20214 			break;
20215 		default:
20216 			break;
20217 		}
20218 		break;
20219 	default:
20220 		break;
20221 	}
20222 
20223 	if (status == 0) {
20224 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
20225 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20226 	}
20227 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
20228 
20229 	return (status);
20230 }
20231 
20232 
20233 /*
20234  *    Function: sd_send_scsi_RDWR
20235  *
20236  * Description: Issue a scsi READ or WRITE command with the given parameters.
20237  *
20238  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20239  *		cmd:	 SCMD_READ or SCMD_WRITE
20240  *		bufaddr: Address of caller's buffer to receive the RDWR data
20241  *		buflen:  Length of caller's buffer receive the RDWR data.
20242  *		start_block: Block number for the start of the RDWR operation.
20243  *			 (Assumes target-native block size.)
20244  *		residp:  Pointer to variable to receive the redisual of the
20245  *			 RDWR operation (may be NULL of no residual requested).
20246  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20247  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20248  *			to use the USCSI "direct" chain and bypass the normal
20249  *			command waitq.
20250  *
20251  * Return Code: 0   - Success
20252  *		errno return code from sd_send_scsi_cmd()
20253  *
20254  *     Context: Can sleep. Does not return until command is completed.
20255  */
20256 
20257 static int
20258 sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
20259 	size_t buflen, daddr_t start_block, int path_flag)
20260 {
20261 	struct	scsi_extended_sense	sense_buf;
20262 	union scsi_cdb		cdb;
20263 	struct uscsi_cmd	ucmd_buf;
20264 	uint32_t		block_count;
20265 	int			status;
20266 	int			cdbsize;
20267 	uchar_t			flag;
20268 
20269 	ASSERT(un != NULL);
20270 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20271 	ASSERT(bufaddr != NULL);
20272 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
20273 
20274 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
20275 
20276 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
20277 		return (EINVAL);
20278 	}
20279 
20280 	mutex_enter(SD_MUTEX(un));
20281 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
20282 	mutex_exit(SD_MUTEX(un));
20283 
20284 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
20285 
20286 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
20287 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
20288 	    bufaddr, buflen, start_block, block_count);
20289 
20290 	bzero(&cdb, sizeof (cdb));
20291 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20292 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20293 
20294 	/* Compute CDB size to use */
20295 	if (start_block > 0xffffffff)
20296 		cdbsize = CDB_GROUP4;
20297 	else if ((start_block & 0xFFE00000) ||
20298 	    (un->un_f_cfg_is_atapi == TRUE))
20299 		cdbsize = CDB_GROUP1;
20300 	else
20301 		cdbsize = CDB_GROUP0;
20302 
20303 	switch (cdbsize) {
20304 	case CDB_GROUP0:	/* 6-byte CDBs */
20305 		cdb.scc_cmd = cmd;
20306 		FORMG0ADDR(&cdb, start_block);
20307 		FORMG0COUNT(&cdb, block_count);
20308 		break;
20309 	case CDB_GROUP1:	/* 10-byte CDBs */
20310 		cdb.scc_cmd = cmd | SCMD_GROUP1;
20311 		FORMG1ADDR(&cdb, start_block);
20312 		FORMG1COUNT(&cdb, block_count);
20313 		break;
20314 	case CDB_GROUP4:	/* 16-byte CDBs */
20315 		cdb.scc_cmd = cmd | SCMD_GROUP4;
20316 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
20317 		FORMG4COUNT(&cdb, block_count);
20318 		break;
20319 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
20320 	default:
20321 		/* All others reserved */
20322 		return (EINVAL);
20323 	}
20324 
20325 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
20326 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20327 
20328 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20329 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20330 	ucmd_buf.uscsi_bufaddr	= bufaddr;
20331 	ucmd_buf.uscsi_buflen	= buflen;
20332 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20333 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20334 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
20335 	ucmd_buf.uscsi_timeout	= 60;
20336 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20337 				UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20338 	switch (status) {
20339 	case 0:
20340 		break;	/* Success! */
20341 	case EIO:
20342 		switch (ucmd_buf.uscsi_status) {
20343 		case STATUS_RESERVATION_CONFLICT:
20344 			status = EACCES;
20345 			break;
20346 		default:
20347 			break;
20348 		}
20349 		break;
20350 	default:
20351 		break;
20352 	}
20353 
20354 	if (status == 0) {
20355 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
20356 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20357 	}
20358 
20359 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
20360 
20361 	return (status);
20362 }
20363 
20364 
20365 /*
20366  *    Function: sd_send_scsi_LOG_SENSE
20367  *
20368  * Description: Issue a scsi LOG_SENSE command with the given parameters.
20369  *
20370  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20371  *
20372  * Return Code: 0   - Success
20373  *		errno return code from sd_send_scsi_cmd()
20374  *
20375  *     Context: Can sleep. Does not return until command is completed.
20376  */
20377 
20378 static int
20379 sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr, uint16_t buflen,
20380 	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
20381 	int path_flag)
20382 
20383 {
20384 	struct	scsi_extended_sense	sense_buf;
20385 	union scsi_cdb		cdb;
20386 	struct uscsi_cmd	ucmd_buf;
20387 	int			status;
20388 
20389 	ASSERT(un != NULL);
20390 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20391 
20392 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
20393 
20394 	bzero(&cdb, sizeof (cdb));
20395 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20396 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20397 
20398 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
20399 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
20400 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
20401 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
20402 	FORMG1COUNT(&cdb, buflen);
20403 
20404 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20405 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20406 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20407 	ucmd_buf.uscsi_buflen	= buflen;
20408 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20409 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20410 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20411 	ucmd_buf.uscsi_timeout	= 60;
20412 
20413 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20414 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20415 
20416 	switch (status) {
20417 	case 0:
20418 		break;
20419 	case EIO:
20420 		switch (ucmd_buf.uscsi_status) {
20421 		case STATUS_RESERVATION_CONFLICT:
20422 			status = EACCES;
20423 			break;
20424 		case STATUS_CHECK:
20425 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20426 			    (sense_buf.es_key == KEY_ILLEGAL_REQUEST) &&
20427 			    (sense_buf.es_add_code == 0x24)) {
20428 				/*
20429 				 * ASC 0x24: INVALID FIELD IN CDB
20430 				 */
20431 				switch (page_code) {
20432 				case START_STOP_CYCLE_PAGE:
20433 					/*
20434 					 * The start stop cycle counter is
20435 					 * implemented as page 0x31 in earlier
20436 					 * generation disks. In new generation
20437 					 * disks the start stop cycle counter is
20438 					 * implemented as page 0xE. To properly
20439 					 * handle this case if an attempt for
20440 					 * log page 0xE is made and fails we
20441 					 * will try again using page 0x31.
20442 					 *
20443 					 * Network storage BU committed to
20444 					 * maintain the page 0x31 for this
20445 					 * purpose and will not have any other
20446 					 * page implemented with page code 0x31
20447 					 * until all disks transition to the
20448 					 * standard page.
20449 					 */
20450 					mutex_enter(SD_MUTEX(un));
20451 					un->un_start_stop_cycle_page =
20452 					    START_STOP_CYCLE_VU_PAGE;
20453 					cdb.cdb_opaque[2] =
20454 					    (char)(page_control << 6) |
20455 					    un->un_start_stop_cycle_page;
20456 					mutex_exit(SD_MUTEX(un));
20457 					status = sd_send_scsi_cmd(
20458 					    SD_GET_DEV(un), &ucmd_buf,
20459 					    UIO_SYSSPACE, UIO_SYSSPACE,
20460 					    UIO_SYSSPACE, path_flag);
20461 
20462 					break;
20463 				case TEMPERATURE_PAGE:
20464 					status = ENOTTY;
20465 					break;
20466 				default:
20467 					break;
20468 				}
20469 			}
20470 			break;
20471 		default:
20472 			break;
20473 		}
20474 		break;
20475 	default:
20476 		break;
20477 	}
20478 
20479 	if (status == 0) {
20480 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
20481 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20482 	}
20483 
20484 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
20485 
20486 	return (status);
20487 }
20488 
20489 
20490 /*
20491  *    Function: sdioctl
20492  *
20493  * Description: Driver's ioctl(9e) entry point function.
20494  *
20495  *   Arguments: dev     - device number
20496  *		cmd     - ioctl operation to be performed
20497  *		arg     - user argument, contains data to be set or reference
20498  *			  parameter for get
20499  *		flag    - bit flag, indicating open settings, 32/64 bit type
20500  *		cred_p  - user credential pointer
20501  *		rval_p  - calling process return value (OPT)
20502  *
20503  * Return Code: EINVAL
20504  *		ENOTTY
20505  *		ENXIO
20506  *		EIO
20507  *		EFAULT
20508  *		ENOTSUP
20509  *		EPERM
20510  *
20511  *     Context: Called from the device switch at normal priority.
20512  */
20513 
20514 static int
20515 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
20516 {
20517 	struct sd_lun	*un = NULL;
20518 	int		geom_validated = FALSE;
20519 	int		err = 0;
20520 	int		i = 0;
20521 	cred_t		*cr;
20522 
20523 	/*
20524 	 * All device accesses go thru sdstrategy where we check on suspend
20525 	 * status
20526 	 */
20527 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20528 		return (ENXIO);
20529 	}
20530 
20531 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20532 
20533 	/*
20534 	 * Moved this wait from sd_uscsi_strategy to here for
20535 	 * reasons of deadlock prevention. Internal driver commands,
20536 	 * specifically those to change a devices power level, result
20537 	 * in a call to sd_uscsi_strategy.
20538 	 */
20539 	mutex_enter(SD_MUTEX(un));
20540 	while ((un->un_state == SD_STATE_SUSPENDED) ||
20541 	    (un->un_state == SD_STATE_PM_CHANGING)) {
20542 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
20543 	}
20544 	/*
20545 	 * Twiddling the counter here protects commands from now
20546 	 * through to the top of sd_uscsi_strategy. Without the
20547 	 * counter inc. a power down, for example, could get in
20548 	 * after the above check for state is made and before
20549 	 * execution gets to the top of sd_uscsi_strategy.
20550 	 * That would cause problems.
20551 	 */
20552 	un->un_ncmds_in_driver++;
20553 
20554 	if ((un->un_f_geometry_is_valid == FALSE) &&
20555 	    (flag & (FNDELAY | FNONBLOCK))) {
20556 		switch (cmd) {
20557 		case CDROMPAUSE:
20558 		case CDROMRESUME:
20559 		case CDROMPLAYMSF:
20560 		case CDROMPLAYTRKIND:
20561 		case CDROMREADTOCHDR:
20562 		case CDROMREADTOCENTRY:
20563 		case CDROMSTOP:
20564 		case CDROMSTART:
20565 		case CDROMVOLCTRL:
20566 		case CDROMSUBCHNL:
20567 		case CDROMREADMODE2:
20568 		case CDROMREADMODE1:
20569 		case CDROMREADOFFSET:
20570 		case CDROMSBLKMODE:
20571 		case CDROMGBLKMODE:
20572 		case CDROMGDRVSPEED:
20573 		case CDROMSDRVSPEED:
20574 		case CDROMCDDA:
20575 		case CDROMCDXA:
20576 		case CDROMSUBCODE:
20577 			if (!ISCD(un)) {
20578 				un->un_ncmds_in_driver--;
20579 				ASSERT(un->un_ncmds_in_driver >= 0);
20580 				mutex_exit(SD_MUTEX(un));
20581 				return (ENOTTY);
20582 			}
20583 			break;
20584 		case FDEJECT:
20585 		case DKIOCEJECT:
20586 		case CDROMEJECT:
20587 			if (!ISREMOVABLE(un)) {
20588 				un->un_ncmds_in_driver--;
20589 				ASSERT(un->un_ncmds_in_driver >= 0);
20590 				mutex_exit(SD_MUTEX(un));
20591 				return (ENOTTY);
20592 			}
20593 			break;
20594 		case DKIOCSVTOC:
20595 		case DKIOCSETEFI:
20596 		case DKIOCSMBOOT:
20597 			mutex_exit(SD_MUTEX(un));
20598 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
20599 			if (err != 0) {
20600 				mutex_enter(SD_MUTEX(un));
20601 				un->un_ncmds_in_driver--;
20602 				ASSERT(un->un_ncmds_in_driver >= 0);
20603 				mutex_exit(SD_MUTEX(un));
20604 				return (EIO);
20605 			}
20606 			mutex_enter(SD_MUTEX(un));
20607 			/* FALLTHROUGH */
20608 		case DKIOCREMOVABLE:
20609 		case DKIOCINFO:
20610 		case DKIOCGMEDIAINFO:
20611 		case MHIOCENFAILFAST:
20612 		case MHIOCSTATUS:
20613 		case MHIOCTKOWN:
20614 		case MHIOCRELEASE:
20615 		case MHIOCGRP_INKEYS:
20616 		case MHIOCGRP_INRESV:
20617 		case MHIOCGRP_REGISTER:
20618 		case MHIOCGRP_RESERVE:
20619 		case MHIOCGRP_PREEMPTANDABORT:
20620 		case MHIOCGRP_REGISTERANDIGNOREKEY:
20621 		case CDROMCLOSETRAY:
20622 		case USCSICMD:
20623 			goto skip_ready_valid;
20624 		default:
20625 			break;
20626 		}
20627 
20628 		mutex_exit(SD_MUTEX(un));
20629 		err = sd_ready_and_valid(un);
20630 		mutex_enter(SD_MUTEX(un));
20631 		if (err == SD_READY_NOT_VALID) {
20632 			switch (cmd) {
20633 			case DKIOCGAPART:
20634 			case DKIOCGGEOM:
20635 			case DKIOCSGEOM:
20636 			case DKIOCGVTOC:
20637 			case DKIOCSVTOC:
20638 			case DKIOCSAPART:
20639 			case DKIOCG_PHYGEOM:
20640 			case DKIOCG_VIRTGEOM:
20641 				err = ENOTSUP;
20642 				un->un_ncmds_in_driver--;
20643 				ASSERT(un->un_ncmds_in_driver >= 0);
20644 				mutex_exit(SD_MUTEX(un));
20645 				return (err);
20646 			}
20647 		}
20648 		if (err != SD_READY_VALID) {
20649 			switch (cmd) {
20650 			case DKIOCSTATE:
20651 			case CDROMGDRVSPEED:
20652 			case CDROMSDRVSPEED:
20653 			case FDEJECT:	/* for eject command */
20654 			case DKIOCEJECT:
20655 			case CDROMEJECT:
20656 			case DKIOCGETEFI:
20657 			case DKIOCSGEOM:
20658 			case DKIOCREMOVABLE:
20659 			case DKIOCSAPART:
20660 			case DKIOCSETEFI:
20661 				break;
20662 			default:
20663 				if (ISREMOVABLE(un)) {
20664 					err = ENXIO;
20665 				} else {
20666 					/* Do not map EACCES to EIO */
20667 					if (err != EACCES)
20668 						err = EIO;
20669 				}
20670 				un->un_ncmds_in_driver--;
20671 				ASSERT(un->un_ncmds_in_driver >= 0);
20672 				mutex_exit(SD_MUTEX(un));
20673 				return (err);
20674 			}
20675 		}
20676 		geom_validated = TRUE;
20677 	}
20678 	if ((un->un_f_geometry_is_valid == TRUE) &&
20679 	    (un->un_solaris_size > 0)) {
20680 		/*
20681 		 * the "geometry_is_valid" flag could be true if we
20682 		 * have an fdisk table but no Solaris partition
20683 		 */
20684 		if (un->un_vtoc.v_sanity != VTOC_SANE) {
20685 			/* it is EFI, so return ENOTSUP for these */
20686 			switch (cmd) {
20687 			case DKIOCGAPART:
20688 			case DKIOCGGEOM:
20689 			case DKIOCGVTOC:
20690 			case DKIOCSVTOC:
20691 			case DKIOCSAPART:
20692 				err = ENOTSUP;
20693 				un->un_ncmds_in_driver--;
20694 				ASSERT(un->un_ncmds_in_driver >= 0);
20695 				mutex_exit(SD_MUTEX(un));
20696 				return (err);
20697 			}
20698 		}
20699 	}
20700 
20701 skip_ready_valid:
20702 	mutex_exit(SD_MUTEX(un));
20703 
20704 	switch (cmd) {
20705 	case DKIOCINFO:
20706 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
20707 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
20708 		break;
20709 
20710 	case DKIOCGMEDIAINFO:
20711 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
20712 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
20713 		break;
20714 
20715 	case DKIOCGGEOM:
20716 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGGEOM\n");
20717 		err = sd_dkio_get_geometry(dev, (caddr_t)arg, flag,
20718 		    geom_validated);
20719 		break;
20720 
20721 	case DKIOCSGEOM:
20722 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSGEOM\n");
20723 		err = sd_dkio_set_geometry(dev, (caddr_t)arg, flag);
20724 		break;
20725 
20726 	case DKIOCGAPART:
20727 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGAPART\n");
20728 		err = sd_dkio_get_partition(dev, (caddr_t)arg, flag,
20729 		    geom_validated);
20730 		break;
20731 
20732 	case DKIOCSAPART:
20733 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSAPART\n");
20734 		err = sd_dkio_set_partition(dev, (caddr_t)arg, flag);
20735 		break;
20736 
20737 	case DKIOCGVTOC:
20738 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGVTOC\n");
20739 		err = sd_dkio_get_vtoc(dev, (caddr_t)arg, flag,
20740 		    geom_validated);
20741 		break;
20742 
20743 	case DKIOCGETEFI:
20744 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGETEFI\n");
20745 		err = sd_dkio_get_efi(dev, (caddr_t)arg, flag);
20746 		break;
20747 
20748 	case DKIOCPARTITION:
20749 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTITION\n");
20750 		err = sd_dkio_partition(dev, (caddr_t)arg, flag);
20751 		break;
20752 
20753 	case DKIOCSVTOC:
20754 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSVTOC\n");
20755 		err = sd_dkio_set_vtoc(dev, (caddr_t)arg, flag);
20756 		break;
20757 
20758 	case DKIOCSETEFI:
20759 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSETEFI\n");
20760 		err = sd_dkio_set_efi(dev, (caddr_t)arg, flag);
20761 		break;
20762 
20763 	case DKIOCGMBOOT:
20764 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMBOOT\n");
20765 		err = sd_dkio_get_mboot(dev, (caddr_t)arg, flag);
20766 		break;
20767 
20768 	case DKIOCSMBOOT:
20769 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSMBOOT\n");
20770 		err = sd_dkio_set_mboot(dev, (caddr_t)arg, flag);
20771 		break;
20772 
20773 	case DKIOCLOCK:
20774 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
20775 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
20776 		    SD_PATH_STANDARD);
20777 		break;
20778 
20779 	case DKIOCUNLOCK:
20780 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
20781 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
20782 		    SD_PATH_STANDARD);
20783 		break;
20784 
20785 	case DKIOCSTATE: {
20786 		enum dkio_state		state;
20787 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
20788 
20789 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
20790 			err = EFAULT;
20791 		} else {
20792 			err = sd_check_media(dev, state);
20793 			if (err == 0) {
20794 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
20795 				    sizeof (int), flag) != 0)
20796 					err = EFAULT;
20797 			}
20798 		}
20799 		break;
20800 	}
20801 
20802 	case DKIOCREMOVABLE:
20803 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
20804 		if (ISREMOVABLE(un)) {
20805 			i = 1;
20806 		} else {
20807 			i = 0;
20808 		}
20809 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
20810 			err = EFAULT;
20811 		} else {
20812 			err = 0;
20813 		}
20814 		break;
20815 
20816 	case DKIOCGTEMPERATURE:
20817 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
20818 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
20819 		break;
20820 
20821 	case MHIOCENFAILFAST:
20822 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
20823 		if ((err = drv_priv(cred_p)) == 0) {
20824 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
20825 		}
20826 		break;
20827 
20828 	case MHIOCTKOWN:
20829 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
20830 		if ((err = drv_priv(cred_p)) == 0) {
20831 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
20832 		}
20833 		break;
20834 
20835 	case MHIOCRELEASE:
20836 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
20837 		if ((err = drv_priv(cred_p)) == 0) {
20838 			err = sd_mhdioc_release(dev);
20839 		}
20840 		break;
20841 
20842 	case MHIOCSTATUS:
20843 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
20844 		if ((err = drv_priv(cred_p)) == 0) {
20845 			switch (sd_send_scsi_TEST_UNIT_READY(un, 0)) {
20846 			case 0:
20847 				err = 0;
20848 				break;
20849 			case EACCES:
20850 				*rval_p = 1;
20851 				err = 0;
20852 				break;
20853 			default:
20854 				err = EIO;
20855 				break;
20856 			}
20857 		}
20858 		break;
20859 
20860 	case MHIOCQRESERVE:
20861 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
20862 		if ((err = drv_priv(cred_p)) == 0) {
20863 			err = sd_reserve_release(dev, SD_RESERVE);
20864 		}
20865 		break;
20866 
20867 	case MHIOCREREGISTERDEVID:
20868 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
20869 		if (drv_priv(cred_p) == EPERM) {
20870 			err = EPERM;
20871 		} else if (ISREMOVABLE(un) || ISCD(un)) {
20872 			err = ENOTTY;
20873 		} else {
20874 			err = sd_mhdioc_register_devid(dev);
20875 		}
20876 		break;
20877 
20878 	case MHIOCGRP_INKEYS:
20879 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
20880 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
20881 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
20882 				err = ENOTSUP;
20883 			} else {
20884 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
20885 				    flag);
20886 			}
20887 		}
20888 		break;
20889 
20890 	case MHIOCGRP_INRESV:
20891 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
20892 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
20893 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
20894 				err = ENOTSUP;
20895 			} else {
20896 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
20897 			}
20898 		}
20899 		break;
20900 
20901 	case MHIOCGRP_REGISTER:
20902 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
20903 		if ((err = drv_priv(cred_p)) != EPERM) {
20904 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
20905 				err = ENOTSUP;
20906 			} else if (arg != NULL) {
20907 				mhioc_register_t reg;
20908 				if (ddi_copyin((void *)arg, &reg,
20909 				    sizeof (mhioc_register_t), flag) != 0) {
20910 					err = EFAULT;
20911 				} else {
20912 					err =
20913 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
20914 					    un, SD_SCSI3_REGISTER,
20915 					    (uchar_t *)&reg);
20916 				}
20917 			}
20918 		}
20919 		break;
20920 
20921 	case MHIOCGRP_RESERVE:
20922 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
20923 		if ((err = drv_priv(cred_p)) != EPERM) {
20924 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
20925 				err = ENOTSUP;
20926 			} else if (arg != NULL) {
20927 				mhioc_resv_desc_t resv_desc;
20928 				if (ddi_copyin((void *)arg, &resv_desc,
20929 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
20930 					err = EFAULT;
20931 				} else {
20932 					err =
20933 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
20934 					    un, SD_SCSI3_RESERVE,
20935 					    (uchar_t *)&resv_desc);
20936 				}
20937 			}
20938 		}
20939 		break;
20940 
20941 	case MHIOCGRP_PREEMPTANDABORT:
20942 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
20943 		if ((err = drv_priv(cred_p)) != EPERM) {
20944 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
20945 				err = ENOTSUP;
20946 			} else if (arg != NULL) {
20947 				mhioc_preemptandabort_t preempt_abort;
20948 				if (ddi_copyin((void *)arg, &preempt_abort,
20949 				    sizeof (mhioc_preemptandabort_t),
20950 				    flag) != 0) {
20951 					err = EFAULT;
20952 				} else {
20953 					err =
20954 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
20955 					    un, SD_SCSI3_PREEMPTANDABORT,
20956 					    (uchar_t *)&preempt_abort);
20957 				}
20958 			}
20959 		}
20960 		break;
20961 
20962 	case MHIOCGRP_REGISTERANDIGNOREKEY:
20963 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
20964 		if ((err = drv_priv(cred_p)) != EPERM) {
20965 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
20966 				err = ENOTSUP;
20967 			} else if (arg != NULL) {
20968 				mhioc_registerandignorekey_t r_and_i;
20969 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
20970 				    sizeof (mhioc_registerandignorekey_t),
20971 				    flag) != 0) {
20972 					err = EFAULT;
20973 				} else {
20974 					err =
20975 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
20976 					    un, SD_SCSI3_REGISTERANDIGNOREKEY,
20977 					    (uchar_t *)&r_and_i);
20978 				}
20979 			}
20980 		}
20981 		break;
20982 
20983 	case USCSICMD:
20984 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
20985 		cr = ddi_get_cred();
20986 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
20987 			err = EPERM;
20988 		} else {
20989 			err = sd_uscsi_ioctl(dev, (caddr_t)arg, flag);
20990 		}
20991 		break;
20992 
20993 	case CDROMPAUSE:
20994 	case CDROMRESUME:
20995 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
20996 		if (!ISCD(un)) {
20997 			err = ENOTTY;
20998 		} else {
20999 			err = sr_pause_resume(dev, cmd);
21000 		}
21001 		break;
21002 
21003 	case CDROMPLAYMSF:
21004 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
21005 		if (!ISCD(un)) {
21006 			err = ENOTTY;
21007 		} else {
21008 			err = sr_play_msf(dev, (caddr_t)arg, flag);
21009 		}
21010 		break;
21011 
21012 	case CDROMPLAYTRKIND:
21013 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
21014 #if defined(__i386) || defined(__amd64)
21015 		/*
21016 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
21017 		 */
21018 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21019 #else
21020 		if (!ISCD(un)) {
21021 #endif
21022 			err = ENOTTY;
21023 		} else {
21024 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
21025 		}
21026 		break;
21027 
21028 	case CDROMREADTOCHDR:
21029 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
21030 		if (!ISCD(un)) {
21031 			err = ENOTTY;
21032 		} else {
21033 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
21034 		}
21035 		break;
21036 
21037 	case CDROMREADTOCENTRY:
21038 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
21039 		if (!ISCD(un)) {
21040 			err = ENOTTY;
21041 		} else {
21042 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
21043 		}
21044 		break;
21045 
21046 	case CDROMSTOP:
21047 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
21048 		if (!ISCD(un)) {
21049 			err = ENOTTY;
21050 		} else {
21051 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_STOP,
21052 			    SD_PATH_STANDARD);
21053 		}
21054 		break;
21055 
21056 	case CDROMSTART:
21057 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
21058 		if (!ISCD(un)) {
21059 			err = ENOTTY;
21060 		} else {
21061 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
21062 			    SD_PATH_STANDARD);
21063 		}
21064 		break;
21065 
21066 	case CDROMCLOSETRAY:
21067 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
21068 		if (!ISCD(un)) {
21069 			err = ENOTTY;
21070 		} else {
21071 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_CLOSE,
21072 			    SD_PATH_STANDARD);
21073 		}
21074 		break;
21075 
21076 	case FDEJECT:	/* for eject command */
21077 	case DKIOCEJECT:
21078 	case CDROMEJECT:
21079 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
21080 		if (!ISREMOVABLE(un)) {
21081 			err = ENOTTY;
21082 		} else {
21083 			err = sr_eject(dev);
21084 		}
21085 		break;
21086 
21087 	case CDROMVOLCTRL:
21088 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
21089 		if (!ISCD(un)) {
21090 			err = ENOTTY;
21091 		} else {
21092 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
21093 		}
21094 		break;
21095 
21096 	case CDROMSUBCHNL:
21097 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
21098 		if (!ISCD(un)) {
21099 			err = ENOTTY;
21100 		} else {
21101 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
21102 		}
21103 		break;
21104 
21105 	case CDROMREADMODE2:
21106 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
21107 		if (!ISCD(un)) {
21108 			err = ENOTTY;
21109 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21110 			/*
21111 			 * If the drive supports READ CD, use that instead of
21112 			 * switching the LBA size via a MODE SELECT
21113 			 * Block Descriptor
21114 			 */
21115 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
21116 		} else {
21117 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
21118 		}
21119 		break;
21120 
21121 	case CDROMREADMODE1:
21122 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
21123 		if (!ISCD(un)) {
21124 			err = ENOTTY;
21125 		} else {
21126 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
21127 		}
21128 		break;
21129 
21130 	case CDROMREADOFFSET:
21131 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
21132 		if (!ISCD(un)) {
21133 			err = ENOTTY;
21134 		} else {
21135 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
21136 			    flag);
21137 		}
21138 		break;
21139 
21140 	case CDROMSBLKMODE:
21141 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
21142 		/*
21143 		 * There is no means of changing block size in case of atapi
21144 		 * drives, thus return ENOTTY if drive type is atapi
21145 		 */
21146 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21147 			err = ENOTTY;
21148 		} else if (un->un_f_mmc_cap == TRUE) {
21149 
21150 			/*
21151 			 * MMC Devices do not support changing the
21152 			 * logical block size
21153 			 *
21154 			 * Note: EINVAL is being returned instead of ENOTTY to
21155 			 * maintain consistancy with the original mmc
21156 			 * driver update.
21157 			 */
21158 			err = EINVAL;
21159 		} else {
21160 			mutex_enter(SD_MUTEX(un));
21161 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
21162 			    (un->un_ncmds_in_transport > 0)) {
21163 				mutex_exit(SD_MUTEX(un));
21164 				err = EINVAL;
21165 			} else {
21166 				mutex_exit(SD_MUTEX(un));
21167 				err = sr_change_blkmode(dev, cmd, arg, flag);
21168 			}
21169 		}
21170 		break;
21171 
21172 	case CDROMGBLKMODE:
21173 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
21174 		if (!ISCD(un)) {
21175 			err = ENOTTY;
21176 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
21177 		    (un->un_f_blockcount_is_valid != FALSE)) {
21178 			/*
21179 			 * Drive is an ATAPI drive so return target block
21180 			 * size for ATAPI drives since we cannot change the
21181 			 * blocksize on ATAPI drives. Used primarily to detect
21182 			 * if an ATAPI cdrom is present.
21183 			 */
21184 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
21185 			    sizeof (int), flag) != 0) {
21186 				err = EFAULT;
21187 			} else {
21188 				err = 0;
21189 			}
21190 
21191 		} else {
21192 			/*
21193 			 * Drive supports changing block sizes via a Mode
21194 			 * Select.
21195 			 */
21196 			err = sr_change_blkmode(dev, cmd, arg, flag);
21197 		}
21198 		break;
21199 
21200 	case CDROMGDRVSPEED:
21201 	case CDROMSDRVSPEED:
21202 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
21203 		if (!ISCD(un)) {
21204 			err = ENOTTY;
21205 		} else if (un->un_f_mmc_cap == TRUE) {
21206 			/*
21207 			 * Note: In the future the driver implementation
21208 			 * for getting and
21209 			 * setting cd speed should entail:
21210 			 * 1) If non-mmc try the Toshiba mode page
21211 			 *    (sr_change_speed)
21212 			 * 2) If mmc but no support for Real Time Streaming try
21213 			 *    the SET CD SPEED (0xBB) command
21214 			 *   (sr_atapi_change_speed)
21215 			 * 3) If mmc and support for Real Time Streaming
21216 			 *    try the GET PERFORMANCE and SET STREAMING
21217 			 *    commands (not yet implemented, 4380808)
21218 			 */
21219 			/*
21220 			 * As per recent MMC spec, CD-ROM speed is variable
21221 			 * and changes with LBA. Since there is no such
21222 			 * things as drive speed now, fail this ioctl.
21223 			 *
21224 			 * Note: EINVAL is returned for consistancy of original
21225 			 * implementation which included support for getting
21226 			 * the drive speed of mmc devices but not setting
21227 			 * the drive speed. Thus EINVAL would be returned
21228 			 * if a set request was made for an mmc device.
21229 			 * We no longer support get or set speed for
21230 			 * mmc but need to remain consistant with regard
21231 			 * to the error code returned.
21232 			 */
21233 			err = EINVAL;
21234 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21235 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
21236 		} else {
21237 			err = sr_change_speed(dev, cmd, arg, flag);
21238 		}
21239 		break;
21240 
21241 	case CDROMCDDA:
21242 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
21243 		if (!ISCD(un)) {
21244 			err = ENOTTY;
21245 		} else {
21246 			err = sr_read_cdda(dev, (void *)arg, flag);
21247 		}
21248 		break;
21249 
21250 	case CDROMCDXA:
21251 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
21252 		if (!ISCD(un)) {
21253 			err = ENOTTY;
21254 		} else {
21255 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
21256 		}
21257 		break;
21258 
21259 	case CDROMSUBCODE:
21260 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
21261 		if (!ISCD(un)) {
21262 			err = ENOTTY;
21263 		} else {
21264 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
21265 		}
21266 		break;
21267 
21268 	case DKIOCPARTINFO: {
21269 		/*
21270 		 * Return parameters describing the selected disk slice.
21271 		 * Note: this ioctl is for the intel platform only
21272 		 */
21273 #if defined(__i386) || defined(__amd64)
21274 		int part;
21275 
21276 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21277 		part = SDPART(dev);
21278 
21279 		/* don't check un_solaris_size for pN */
21280 		if (part < P0_RAW_DISK && un->un_solaris_size == 0) {
21281 			err = EIO;
21282 		} else {
21283 			struct part_info p;
21284 
21285 			p.p_start = (daddr_t)un->un_offset[part];
21286 			p.p_length = (int)un->un_map[part].dkl_nblk;
21287 #ifdef _MULTI_DATAMODEL
21288 			switch (ddi_model_convert_from(flag & FMODELS)) {
21289 			case DDI_MODEL_ILP32:
21290 			{
21291 				struct part_info32 p32;
21292 
21293 				p32.p_start = (daddr32_t)p.p_start;
21294 				p32.p_length = p.p_length;
21295 				if (ddi_copyout(&p32, (void *)arg,
21296 				    sizeof (p32), flag))
21297 					err = EFAULT;
21298 				break;
21299 			}
21300 
21301 			case DDI_MODEL_NONE:
21302 			{
21303 				if (ddi_copyout(&p, (void *)arg, sizeof (p),
21304 				    flag))
21305 					err = EFAULT;
21306 				break;
21307 			}
21308 			}
21309 #else /* ! _MULTI_DATAMODEL */
21310 			if (ddi_copyout(&p, (void *)arg, sizeof (p), flag))
21311 				err = EFAULT;
21312 #endif /* _MULTI_DATAMODEL */
21313 		}
21314 #else
21315 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21316 		err = ENOTTY;
21317 #endif
21318 		break;
21319 	}
21320 
21321 	case DKIOCG_PHYGEOM: {
21322 		/* Return the driver's notion of the media physical geometry */
21323 #if defined(__i386) || defined(__amd64)
21324 		struct dk_geom	disk_geom;
21325 		struct dk_geom	*dkgp = &disk_geom;
21326 
21327 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21328 		mutex_enter(SD_MUTEX(un));
21329 
21330 		if (un->un_g.dkg_nhead != 0 &&
21331 		    un->un_g.dkg_nsect != 0) {
21332 			/*
21333 			 * We succeeded in getting a geometry, but
21334 			 * right now it is being reported as just the
21335 			 * Solaris fdisk partition, just like for
21336 			 * DKIOCGGEOM. We need to change that to be
21337 			 * correct for the entire disk now.
21338 			 */
21339 			bcopy(&un->un_g, dkgp, sizeof (*dkgp));
21340 			dkgp->dkg_acyl = 0;
21341 			dkgp->dkg_ncyl = un->un_blockcount /
21342 			    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21343 		} else {
21344 			bzero(dkgp, sizeof (struct dk_geom));
21345 			/*
21346 			 * This disk does not have a Solaris VTOC
21347 			 * so we must present a physical geometry
21348 			 * that will remain consistent regardless
21349 			 * of how the disk is used. This will ensure
21350 			 * that the geometry does not change regardless
21351 			 * of the fdisk partition type (ie. EFI, FAT32,
21352 			 * Solaris, etc).
21353 			 */
21354 			if (ISCD(un)) {
21355 				dkgp->dkg_nhead = un->un_pgeom.g_nhead;
21356 				dkgp->dkg_nsect = un->un_pgeom.g_nsect;
21357 				dkgp->dkg_ncyl = un->un_pgeom.g_ncyl;
21358 				dkgp->dkg_acyl = un->un_pgeom.g_acyl;
21359 			} else {
21360 				sd_convert_geometry(un->un_blockcount, dkgp);
21361 				dkgp->dkg_acyl = 0;
21362 				dkgp->dkg_ncyl = un->un_blockcount /
21363 				    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21364 			}
21365 		}
21366 		dkgp->dkg_pcyl = dkgp->dkg_ncyl + dkgp->dkg_acyl;
21367 
21368 		if (ddi_copyout(dkgp, (void *)arg,
21369 		    sizeof (struct dk_geom), flag)) {
21370 			mutex_exit(SD_MUTEX(un));
21371 			err = EFAULT;
21372 		} else {
21373 			mutex_exit(SD_MUTEX(un));
21374 			err = 0;
21375 		}
21376 #else
21377 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21378 		err = ENOTTY;
21379 #endif
21380 		break;
21381 	}
21382 
21383 	case DKIOCG_VIRTGEOM: {
21384 		/* Return the driver's notion of the media's logical geometry */
21385 #if defined(__i386) || defined(__amd64)
21386 		struct dk_geom	disk_geom;
21387 		struct dk_geom	*dkgp = &disk_geom;
21388 
21389 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21390 		mutex_enter(SD_MUTEX(un));
21391 		/*
21392 		 * If there is no HBA geometry available, or
21393 		 * if the HBA returned us something that doesn't
21394 		 * really fit into an Int 13/function 8 geometry
21395 		 * result, just fail the ioctl.  See PSARC 1998/313.
21396 		 */
21397 		if (un->un_lgeom.g_nhead == 0 ||
21398 		    un->un_lgeom.g_nsect == 0 ||
21399 		    un->un_lgeom.g_ncyl > 1024) {
21400 			mutex_exit(SD_MUTEX(un));
21401 			err = EINVAL;
21402 		} else {
21403 			dkgp->dkg_ncyl	= un->un_lgeom.g_ncyl;
21404 			dkgp->dkg_acyl	= un->un_lgeom.g_acyl;
21405 			dkgp->dkg_pcyl	= dkgp->dkg_ncyl + dkgp->dkg_acyl;
21406 			dkgp->dkg_nhead	= un->un_lgeom.g_nhead;
21407 			dkgp->dkg_nsect	= un->un_lgeom.g_nsect;
21408 
21409 			if (ddi_copyout(dkgp, (void *)arg,
21410 			    sizeof (struct dk_geom), flag)) {
21411 				mutex_exit(SD_MUTEX(un));
21412 				err = EFAULT;
21413 			} else {
21414 				mutex_exit(SD_MUTEX(un));
21415 				err = 0;
21416 			}
21417 		}
21418 #else
21419 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21420 		err = ENOTTY;
21421 #endif
21422 		break;
21423 	}
21424 #ifdef SDDEBUG
21425 /* RESET/ABORTS testing ioctls */
21426 	case DKIOCRESET: {
21427 		int	reset_level;
21428 
21429 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
21430 			err = EFAULT;
21431 		} else {
21432 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
21433 			    "reset_level = 0x%lx\n", reset_level);
21434 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
21435 				err = 0;
21436 			} else {
21437 				err = EIO;
21438 			}
21439 		}
21440 		break;
21441 	}
21442 
21443 	case DKIOCABORT:
21444 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
21445 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
21446 			err = 0;
21447 		} else {
21448 			err = EIO;
21449 		}
21450 		break;
21451 #endif
21452 
21453 #ifdef SD_FAULT_INJECTION
21454 /* SDIOC FaultInjection testing ioctls */
21455 	case SDIOCSTART:
21456 	case SDIOCSTOP:
21457 	case SDIOCINSERTPKT:
21458 	case SDIOCINSERTXB:
21459 	case SDIOCINSERTUN:
21460 	case SDIOCINSERTARQ:
21461 	case SDIOCPUSH:
21462 	case SDIOCRETRIEVE:
21463 	case SDIOCRUN:
21464 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
21465 		    "SDIOC detected cmd:0x%X:\n", cmd);
21466 		/* call error generator */
21467 		sd_faultinjection_ioctl(cmd, arg, un);
21468 		err = 0;
21469 		break;
21470 
21471 #endif /* SD_FAULT_INJECTION */
21472 
21473 	default:
21474 		err = ENOTTY;
21475 		break;
21476 	}
21477 	mutex_enter(SD_MUTEX(un));
21478 	un->un_ncmds_in_driver--;
21479 	ASSERT(un->un_ncmds_in_driver >= 0);
21480 	mutex_exit(SD_MUTEX(un));
21481 
21482 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
21483 	return (err);
21484 }
21485 
21486 
21487 /*
21488  *    Function: sd_uscsi_ioctl
21489  *
21490  * Description: This routine is the driver entry point for handling USCSI ioctl
21491  *		requests (USCSICMD).
21492  *
21493  *   Arguments: dev	- the device number
21494  *		arg	- user provided scsi command
21495  *		flag	- this argument is a pass through to ddi_copyxxx()
21496  *			  directly from the mode argument of ioctl().
21497  *
21498  * Return Code: code returned by sd_send_scsi_cmd
21499  *		ENXIO
21500  *		EFAULT
21501  *		EAGAIN
21502  */
21503 
21504 static int
21505 sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag)
21506 {
21507 #ifdef _MULTI_DATAMODEL
21508 	/*
21509 	 * For use when a 32 bit app makes a call into a
21510 	 * 64 bit ioctl
21511 	 */
21512 	struct uscsi_cmd32	uscsi_cmd_32_for_64;
21513 	struct uscsi_cmd32	*ucmd32 = &uscsi_cmd_32_for_64;
21514 	model_t			model;
21515 #endif /* _MULTI_DATAMODEL */
21516 	struct uscsi_cmd	*scmd = NULL;
21517 	struct sd_lun		*un = NULL;
21518 	enum uio_seg		uioseg;
21519 	char			cdb[CDB_GROUP0];
21520 	int			rval = 0;
21521 
21522 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21523 		return (ENXIO);
21524 	}
21525 
21526 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: entry: un:0x%p\n", un);
21527 
21528 	scmd = (struct uscsi_cmd *)
21529 	    kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
21530 
21531 #ifdef _MULTI_DATAMODEL
21532 	switch (model = ddi_model_convert_from(flag & FMODELS)) {
21533 	case DDI_MODEL_ILP32:
21534 	{
21535 		if (ddi_copyin((void *)arg, ucmd32, sizeof (*ucmd32), flag)) {
21536 			rval = EFAULT;
21537 			goto done;
21538 		}
21539 		/*
21540 		 * Convert the ILP32 uscsi data from the
21541 		 * application to LP64 for internal use.
21542 		 */
21543 		uscsi_cmd32touscsi_cmd(ucmd32, scmd);
21544 		break;
21545 	}
21546 	case DDI_MODEL_NONE:
21547 		if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
21548 			rval = EFAULT;
21549 			goto done;
21550 		}
21551 		break;
21552 	}
21553 #else /* ! _MULTI_DATAMODEL */
21554 	if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
21555 		rval = EFAULT;
21556 		goto done;
21557 	}
21558 #endif /* _MULTI_DATAMODEL */
21559 
21560 	scmd->uscsi_flags &= ~USCSI_NOINTR;
21561 	uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE : UIO_USERSPACE;
21562 	if (un->un_f_format_in_progress == TRUE) {
21563 		rval = EAGAIN;
21564 		goto done;
21565 	}
21566 
21567 	/*
21568 	 * Gotta do the ddi_copyin() here on the uscsi_cdb so that
21569 	 * we will have a valid cdb[0] to test.
21570 	 */
21571 	if ((ddi_copyin(scmd->uscsi_cdb, cdb, CDB_GROUP0, flag) == 0) &&
21572 	    (cdb[0] == SCMD_FORMAT)) {
21573 		SD_TRACE(SD_LOG_IOCTL, un,
21574 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
21575 		mutex_enter(SD_MUTEX(un));
21576 		un->un_f_format_in_progress = TRUE;
21577 		mutex_exit(SD_MUTEX(un));
21578 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
21579 		    SD_PATH_STANDARD);
21580 		mutex_enter(SD_MUTEX(un));
21581 		un->un_f_format_in_progress = FALSE;
21582 		mutex_exit(SD_MUTEX(un));
21583 	} else {
21584 		SD_TRACE(SD_LOG_IOCTL, un,
21585 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
21586 		/*
21587 		 * It's OK to fall into here even if the ddi_copyin()
21588 		 * on the uscsi_cdb above fails, because sd_send_scsi_cmd()
21589 		 * does this same copyin and will return the EFAULT
21590 		 * if it fails.
21591 		 */
21592 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
21593 		    SD_PATH_STANDARD);
21594 	}
21595 #ifdef _MULTI_DATAMODEL
21596 	switch (model) {
21597 	case DDI_MODEL_ILP32:
21598 		/*
21599 		 * Convert back to ILP32 before copyout to the
21600 		 * application
21601 		 */
21602 		uscsi_cmdtouscsi_cmd32(scmd, ucmd32);
21603 		if (ddi_copyout(ucmd32, (void *)arg, sizeof (*ucmd32), flag)) {
21604 			if (rval != 0) {
21605 				rval = EFAULT;
21606 			}
21607 		}
21608 		break;
21609 	case DDI_MODEL_NONE:
21610 		if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
21611 			if (rval != 0) {
21612 				rval = EFAULT;
21613 			}
21614 		}
21615 		break;
21616 	}
21617 #else /* ! _MULTI_DATAMODE */
21618 	if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
21619 		if (rval != 0) {
21620 			rval = EFAULT;
21621 		}
21622 	}
21623 #endif /* _MULTI_DATAMODE */
21624 done:
21625 	kmem_free(scmd, sizeof (struct uscsi_cmd));
21626 
21627 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: exit: un:0x%p\n", un);
21628 
21629 	return (rval);
21630 }
21631 
21632 
21633 /*
21634  *    Function: sd_dkio_ctrl_info
21635  *
21636  * Description: This routine is the driver entry point for handling controller
21637  *		information ioctl requests (DKIOCINFO).
21638  *
21639  *   Arguments: dev  - the device number
21640  *		arg  - pointer to user provided dk_cinfo structure
21641  *		       specifying the controller type and attributes.
21642  *		flag - this argument is a pass through to ddi_copyxxx()
21643  *		       directly from the mode argument of ioctl().
21644  *
21645  * Return Code: 0
21646  *		EFAULT
21647  *		ENXIO
21648  */
21649 
21650 static int
21651 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
21652 {
21653 	struct sd_lun	*un = NULL;
21654 	struct dk_cinfo	*info;
21655 	dev_info_t	*pdip;
21656 	int		lun, tgt;
21657 
21658 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21659 		return (ENXIO);
21660 	}
21661 
21662 	info = (struct dk_cinfo *)
21663 		kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
21664 
21665 	switch (un->un_ctype) {
21666 	case CTYPE_CDROM:
21667 		info->dki_ctype = DKC_CDROM;
21668 		break;
21669 	default:
21670 		info->dki_ctype = DKC_SCSI_CCS;
21671 		break;
21672 	}
21673 	pdip = ddi_get_parent(SD_DEVINFO(un));
21674 	info->dki_cnum = ddi_get_instance(pdip);
21675 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
21676 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
21677 	} else {
21678 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
21679 		    DK_DEVLEN - 1);
21680 	}
21681 
21682 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
21683 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
21684 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
21685 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
21686 
21687 	/* Unit Information */
21688 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
21689 	info->dki_slave = ((tgt << 3) | lun);
21690 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
21691 	    DK_DEVLEN - 1);
21692 	info->dki_flags = DKI_FMTVOL;
21693 	info->dki_partition = SDPART(dev);
21694 
21695 	/* Max Transfer size of this device in blocks */
21696 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
21697 	info->dki_addr = 0;
21698 	info->dki_space = 0;
21699 	info->dki_prio = 0;
21700 	info->dki_vec = 0;
21701 
21702 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
21703 		kmem_free(info, sizeof (struct dk_cinfo));
21704 		return (EFAULT);
21705 	} else {
21706 		kmem_free(info, sizeof (struct dk_cinfo));
21707 		return (0);
21708 	}
21709 }
21710 
21711 
21712 /*
21713  *    Function: sd_get_media_info
21714  *
21715  * Description: This routine is the driver entry point for handling ioctl
21716  *		requests for the media type or command set profile used by the
21717  *		drive to operate on the media (DKIOCGMEDIAINFO).
21718  *
21719  *   Arguments: dev	- the device number
21720  *		arg	- pointer to user provided dk_minfo structure
21721  *			  specifying the media type, logical block size and
21722  *			  drive capacity.
21723  *		flag	- this argument is a pass through to ddi_copyxxx()
21724  *			  directly from the mode argument of ioctl().
21725  *
21726  * Return Code: 0
21727  *		EACCESS
21728  *		EFAULT
21729  *		ENXIO
21730  *		EIO
21731  */
21732 
21733 static int
21734 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
21735 {
21736 	struct sd_lun		*un = NULL;
21737 	struct uscsi_cmd	com;
21738 	struct scsi_inquiry	*sinq;
21739 	struct dk_minfo		media_info;
21740 	u_longlong_t		media_capacity;
21741 	uint64_t		capacity;
21742 	uint_t			lbasize;
21743 	uchar_t			*out_data;
21744 	uchar_t			*rqbuf;
21745 	int			rval = 0;
21746 	int			rtn;
21747 
21748 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
21749 	    (un->un_state == SD_STATE_OFFLINE)) {
21750 		return (ENXIO);
21751 	}
21752 
21753 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
21754 
21755 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
21756 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
21757 
21758 	/* Issue a TUR to determine if the drive is ready with media present */
21759 	rval = sd_send_scsi_TEST_UNIT_READY(un, SD_CHECK_FOR_MEDIA);
21760 	if (rval == ENXIO) {
21761 		goto done;
21762 	}
21763 
21764 	/* Now get configuration data */
21765 	if (ISCD(un)) {
21766 		media_info.dki_media_type = DK_CDROM;
21767 
21768 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
21769 		if (un->un_f_mmc_cap == TRUE) {
21770 			rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf,
21771 				SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN);
21772 
21773 			if (rtn) {
21774 				/*
21775 				 * Failed for other than an illegal request
21776 				 * or command not supported
21777 				 */
21778 				if ((com.uscsi_status == STATUS_CHECK) &&
21779 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
21780 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
21781 					    (rqbuf[12] != 0x20)) {
21782 						rval = EIO;
21783 						goto done;
21784 					}
21785 				}
21786 			} else {
21787 				/*
21788 				 * The GET CONFIGURATION command succeeded
21789 				 * so set the media type according to the
21790 				 * returned data
21791 				 */
21792 				media_info.dki_media_type = out_data[6];
21793 				media_info.dki_media_type <<= 8;
21794 				media_info.dki_media_type |= out_data[7];
21795 			}
21796 		}
21797 	} else {
21798 		/*
21799 		 * The profile list is not available, so we attempt to identify
21800 		 * the media type based on the inquiry data
21801 		 */
21802 		sinq = un->un_sd->sd_inq;
21803 		if (sinq->inq_qual == 0) {
21804 			/* This is a direct access device */
21805 			media_info.dki_media_type = DK_FIXED_DISK;
21806 
21807 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
21808 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
21809 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
21810 					media_info.dki_media_type = DK_ZIP;
21811 				} else if (
21812 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
21813 					media_info.dki_media_type = DK_JAZ;
21814 				}
21815 			}
21816 		} else {
21817 			/* Not a CD or direct access so return unknown media */
21818 			media_info.dki_media_type = DK_UNKNOWN;
21819 		}
21820 	}
21821 
21822 	/* Now read the capacity so we can provide the lbasize and capacity */
21823 	switch (sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
21824 	    SD_PATH_DIRECT)) {
21825 	case 0:
21826 		break;
21827 	case EACCES:
21828 		rval = EACCES;
21829 		goto done;
21830 	default:
21831 		rval = EIO;
21832 		goto done;
21833 	}
21834 
21835 	media_info.dki_lbsize = lbasize;
21836 	media_capacity = capacity;
21837 
21838 	/*
21839 	 * sd_send_scsi_READ_CAPACITY() reports capacity in
21840 	 * un->un_sys_blocksize chunks. So we need to convert it into
21841 	 * cap.lbasize chunks.
21842 	 */
21843 	media_capacity *= un->un_sys_blocksize;
21844 	media_capacity /= lbasize;
21845 	media_info.dki_capacity = media_capacity;
21846 
21847 	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
21848 		rval = EFAULT;
21849 		/* Put goto. Anybody might add some code below in future */
21850 		goto done;
21851 	}
21852 done:
21853 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
21854 	kmem_free(rqbuf, SENSE_LENGTH);
21855 	return (rval);
21856 }
21857 
21858 
21859 /*
21860  *    Function: sd_dkio_get_geometry
21861  *
21862  * Description: This routine is the driver entry point for handling user
21863  *		requests to get the device geometry (DKIOCGGEOM).
21864  *
21865  *   Arguments: dev  - the device number
21866  *		arg  - pointer to user provided dk_geom structure specifying
21867  *			the controller's notion of the current geometry.
21868  *		flag - this argument is a pass through to ddi_copyxxx()
21869  *		       directly from the mode argument of ioctl().
21870  *		geom_validated - flag indicating if the device geometry has been
21871  *				 previously validated in the sdioctl routine.
21872  *
21873  * Return Code: 0
21874  *		EFAULT
21875  *		ENXIO
21876  *		EIO
21877  */
21878 
21879 static int
21880 sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag, int geom_validated)
21881 {
21882 	struct sd_lun	*un = NULL;
21883 	struct dk_geom	*tmp_geom = NULL;
21884 	int		rval = 0;
21885 
21886 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21887 		return (ENXIO);
21888 	}
21889 
21890 #if defined(__i386) || defined(__amd64)
21891 	if (un->un_solaris_size == 0) {
21892 		return (EIO);
21893 	}
21894 #endif
21895 	if (geom_validated == FALSE) {
21896 		/*
21897 		 * sd_validate_geometry does not spin a disk up
21898 		 * if it was spun down. We need to make sure it
21899 		 * is ready.
21900 		 */
21901 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
21902 			return (rval);
21903 		}
21904 		mutex_enter(SD_MUTEX(un));
21905 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
21906 		mutex_exit(SD_MUTEX(un));
21907 	}
21908 	if (rval)
21909 		return (rval);
21910 
21911 	/*
21912 	 * Make a local copy of the soft state geometry to avoid some potential
21913 	 * race conditions associated with holding the mutex and updating the
21914 	 * write_reinstruct value
21915 	 */
21916 	tmp_geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
21917 	mutex_enter(SD_MUTEX(un));
21918 	bcopy(&un->un_g, tmp_geom, sizeof (struct dk_geom));
21919 	mutex_exit(SD_MUTEX(un));
21920 
21921 	if (tmp_geom->dkg_write_reinstruct == 0) {
21922 		tmp_geom->dkg_write_reinstruct =
21923 		    (int)((int)(tmp_geom->dkg_nsect * tmp_geom->dkg_rpm *
21924 		    sd_rot_delay) / (int)60000);
21925 	}
21926 
21927 	rval = ddi_copyout(tmp_geom, (void *)arg, sizeof (struct dk_geom),
21928 	    flag);
21929 	if (rval != 0) {
21930 		rval = EFAULT;
21931 	}
21932 
21933 	kmem_free(tmp_geom, sizeof (struct dk_geom));
21934 	return (rval);
21935 
21936 }
21937 
21938 
21939 /*
21940  *    Function: sd_dkio_set_geometry
21941  *
21942  * Description: This routine is the driver entry point for handling user
21943  *		requests to set the device geometry (DKIOCSGEOM). The actual
21944  *		device geometry is not updated, just the driver "notion" of it.
21945  *
21946  *   Arguments: dev  - the device number
21947  *		arg  - pointer to user provided dk_geom structure used to set
21948  *			the controller's notion of the current geometry.
21949  *		flag - this argument is a pass through to ddi_copyxxx()
21950  *		       directly from the mode argument of ioctl().
21951  *
21952  * Return Code: 0
21953  *		EFAULT
21954  *		ENXIO
21955  *		EIO
21956  */
21957 
21958 static int
21959 sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag)
21960 {
21961 	struct sd_lun	*un = NULL;
21962 	struct dk_geom	*tmp_geom;
21963 	struct dk_map	*lp;
21964 	int		rval = 0;
21965 	int		i;
21966 
21967 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21968 		return (ENXIO);
21969 	}
21970 
21971 #if defined(__i386) || defined(__amd64)
21972 	if (un->un_solaris_size == 0) {
21973 		return (EIO);
21974 	}
21975 #endif
21976 	/*
21977 	 * We need to copy the user specified geometry into local
21978 	 * storage and then update the softstate. We don't want to hold
21979 	 * the mutex and copyin directly from the user to the soft state
21980 	 */
21981 	tmp_geom = (struct dk_geom *)
21982 	    kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
21983 	rval = ddi_copyin(arg, tmp_geom, sizeof (struct dk_geom), flag);
21984 	if (rval != 0) {
21985 		kmem_free(tmp_geom, sizeof (struct dk_geom));
21986 		return (EFAULT);
21987 	}
21988 
21989 	mutex_enter(SD_MUTEX(un));
21990 	bcopy(tmp_geom, &un->un_g, sizeof (struct dk_geom));
21991 	for (i = 0; i < NDKMAP; i++) {
21992 		lp  = &un->un_map[i];
21993 		un->un_offset[i] =
21994 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
21995 #if defined(__i386) || defined(__amd64)
21996 		un->un_offset[i] += un->un_solaris_offset;
21997 #endif
21998 	}
21999 	un->un_f_geometry_is_valid = FALSE;
22000 	mutex_exit(SD_MUTEX(un));
22001 	kmem_free(tmp_geom, sizeof (struct dk_geom));
22002 
22003 	return (rval);
22004 }
22005 
22006 
22007 /*
22008  *    Function: sd_dkio_get_partition
22009  *
22010  * Description: This routine is the driver entry point for handling user
22011  *		requests to get the partition table (DKIOCGAPART).
22012  *
22013  *   Arguments: dev  - the device number
22014  *		arg  - pointer to user provided dk_allmap structure specifying
22015  *			the controller's notion of the current partition table.
22016  *		flag - this argument is a pass through to ddi_copyxxx()
22017  *		       directly from the mode argument of ioctl().
22018  *		geom_validated - flag indicating if the device geometry has been
22019  *				 previously validated in the sdioctl routine.
22020  *
22021  * Return Code: 0
22022  *		EFAULT
22023  *		ENXIO
22024  *		EIO
22025  */
22026 
22027 static int
22028 sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag, int geom_validated)
22029 {
22030 	struct sd_lun	*un = NULL;
22031 	int		rval = 0;
22032 	int		size;
22033 
22034 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22035 		return (ENXIO);
22036 	}
22037 
22038 #if defined(__i386) || defined(__amd64)
22039 	if (un->un_solaris_size == 0) {
22040 		return (EIO);
22041 	}
22042 #endif
22043 	/*
22044 	 * Make sure the geometry is valid before getting the partition
22045 	 * information.
22046 	 */
22047 	mutex_enter(SD_MUTEX(un));
22048 	if (geom_validated == FALSE) {
22049 		/*
22050 		 * sd_validate_geometry does not spin a disk up
22051 		 * if it was spun down. We need to make sure it
22052 		 * is ready before validating the geometry.
22053 		 */
22054 		mutex_exit(SD_MUTEX(un));
22055 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22056 			return (rval);
22057 		}
22058 		mutex_enter(SD_MUTEX(un));
22059 
22060 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22061 			mutex_exit(SD_MUTEX(un));
22062 			return (rval);
22063 		}
22064 	}
22065 	mutex_exit(SD_MUTEX(un));
22066 
22067 #ifdef _MULTI_DATAMODEL
22068 	switch (ddi_model_convert_from(flag & FMODELS)) {
22069 	case DDI_MODEL_ILP32: {
22070 		struct dk_map32 dk_map32[NDKMAP];
22071 		int		i;
22072 
22073 		for (i = 0; i < NDKMAP; i++) {
22074 			dk_map32[i].dkl_cylno = un->un_map[i].dkl_cylno;
22075 			dk_map32[i].dkl_nblk  = un->un_map[i].dkl_nblk;
22076 		}
22077 		size = NDKMAP * sizeof (struct dk_map32);
22078 		rval = ddi_copyout(dk_map32, (void *)arg, size, flag);
22079 		if (rval != 0) {
22080 			rval = EFAULT;
22081 		}
22082 		break;
22083 	}
22084 	case DDI_MODEL_NONE:
22085 		size = NDKMAP * sizeof (struct dk_map);
22086 		rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22087 		if (rval != 0) {
22088 			rval = EFAULT;
22089 		}
22090 		break;
22091 	}
22092 #else /* ! _MULTI_DATAMODEL */
22093 	size = NDKMAP * sizeof (struct dk_map);
22094 	rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22095 	if (rval != 0) {
22096 		rval = EFAULT;
22097 	}
22098 #endif /* _MULTI_DATAMODEL */
22099 	return (rval);
22100 }
22101 
22102 
22103 /*
22104  *    Function: sd_dkio_set_partition
22105  *
22106  * Description: This routine is the driver entry point for handling user
22107  *		requests to set the partition table (DKIOCSAPART). The actual
22108  *		device partition is not updated.
22109  *
22110  *   Arguments: dev  - the device number
22111  *		arg  - pointer to user provided dk_allmap structure used to set
22112  *			the controller's notion of the partition table.
22113  *		flag - this argument is a pass through to ddi_copyxxx()
22114  *		       directly from the mode argument of ioctl().
22115  *
22116  * Return Code: 0
22117  *		EINVAL
22118  *		EFAULT
22119  *		ENXIO
22120  *		EIO
22121  */
22122 
22123 static int
22124 sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag)
22125 {
22126 	struct sd_lun	*un = NULL;
22127 	struct dk_map	dk_map[NDKMAP];
22128 	struct dk_map	*lp;
22129 	int		rval = 0;
22130 	int		size;
22131 	int		i;
22132 #if defined(_SUNOS_VTOC_16)
22133 	struct dkl_partition	*vp;
22134 #endif
22135 
22136 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22137 		return (ENXIO);
22138 	}
22139 
22140 	/*
22141 	 * Set the map for all logical partitions.  We lock
22142 	 * the priority just to make sure an interrupt doesn't
22143 	 * come in while the map is half updated.
22144 	 */
22145 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_solaris_size))
22146 	mutex_enter(SD_MUTEX(un));
22147 	if (un->un_blockcount > DK_MAX_BLOCKS) {
22148 		mutex_exit(SD_MUTEX(un));
22149 		return (ENOTSUP);
22150 	}
22151 	mutex_exit(SD_MUTEX(un));
22152 	if (un->un_solaris_size == 0) {
22153 		return (EIO);
22154 	}
22155 
22156 #ifdef _MULTI_DATAMODEL
22157 	switch (ddi_model_convert_from(flag & FMODELS)) {
22158 	case DDI_MODEL_ILP32: {
22159 		struct dk_map32 dk_map32[NDKMAP];
22160 
22161 		size = NDKMAP * sizeof (struct dk_map32);
22162 		rval = ddi_copyin((void *)arg, dk_map32, size, flag);
22163 		if (rval != 0) {
22164 			return (EFAULT);
22165 		}
22166 		for (i = 0; i < NDKMAP; i++) {
22167 			dk_map[i].dkl_cylno = dk_map32[i].dkl_cylno;
22168 			dk_map[i].dkl_nblk  = dk_map32[i].dkl_nblk;
22169 		}
22170 		break;
22171 	}
22172 	case DDI_MODEL_NONE:
22173 		size = NDKMAP * sizeof (struct dk_map);
22174 		rval = ddi_copyin((void *)arg, dk_map, size, flag);
22175 		if (rval != 0) {
22176 			return (EFAULT);
22177 		}
22178 		break;
22179 	}
22180 #else /* ! _MULTI_DATAMODEL */
22181 	size = NDKMAP * sizeof (struct dk_map);
22182 	rval = ddi_copyin((void *)arg, dk_map, size, flag);
22183 	if (rval != 0) {
22184 		return (EFAULT);
22185 	}
22186 #endif /* _MULTI_DATAMODEL */
22187 
22188 	mutex_enter(SD_MUTEX(un));
22189 	/* Note: The size used in this bcopy is set based upon the data model */
22190 	bcopy(dk_map, un->un_map, size);
22191 #if defined(_SUNOS_VTOC_16)
22192 	vp = (struct dkl_partition *)&(un->un_vtoc);
22193 #endif	/* defined(_SUNOS_VTOC_16) */
22194 	for (i = 0; i < NDKMAP; i++) {
22195 		lp  = &un->un_map[i];
22196 		un->un_offset[i] =
22197 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22198 #if defined(_SUNOS_VTOC_16)
22199 		vp->p_start = un->un_offset[i];
22200 		vp->p_size = lp->dkl_nblk;
22201 		vp++;
22202 #endif	/* defined(_SUNOS_VTOC_16) */
22203 #if defined(__i386) || defined(__amd64)
22204 		un->un_offset[i] += un->un_solaris_offset;
22205 #endif
22206 	}
22207 	mutex_exit(SD_MUTEX(un));
22208 	return (rval);
22209 }
22210 
22211 
22212 /*
22213  *    Function: sd_dkio_get_vtoc
22214  *
22215  * Description: This routine is the driver entry point for handling user
22216  *		requests to get the current volume table of contents
22217  *		(DKIOCGVTOC).
22218  *
22219  *   Arguments: dev  - the device number
22220  *		arg  - pointer to user provided vtoc structure specifying
22221  *			the current vtoc.
22222  *		flag - this argument is a pass through to ddi_copyxxx()
22223  *		       directly from the mode argument of ioctl().
22224  *		geom_validated - flag indicating if the device geometry has been
22225  *				 previously validated in the sdioctl routine.
22226  *
22227  * Return Code: 0
22228  *		EFAULT
22229  *		ENXIO
22230  *		EIO
22231  */
22232 
22233 static int
22234 sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag, int geom_validated)
22235 {
22236 	struct sd_lun	*un = NULL;
22237 #if defined(_SUNOS_VTOC_8)
22238 	struct vtoc	user_vtoc;
22239 #endif	/* defined(_SUNOS_VTOC_8) */
22240 	int		rval = 0;
22241 
22242 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22243 		return (ENXIO);
22244 	}
22245 
22246 	mutex_enter(SD_MUTEX(un));
22247 	if (geom_validated == FALSE) {
22248 		/*
22249 		 * sd_validate_geometry does not spin a disk up
22250 		 * if it was spun down. We need to make sure it
22251 		 * is ready.
22252 		 */
22253 		mutex_exit(SD_MUTEX(un));
22254 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22255 			return (rval);
22256 		}
22257 		mutex_enter(SD_MUTEX(un));
22258 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22259 			mutex_exit(SD_MUTEX(un));
22260 			return (rval);
22261 		}
22262 	}
22263 
22264 #if defined(_SUNOS_VTOC_8)
22265 	sd_build_user_vtoc(un, &user_vtoc);
22266 	mutex_exit(SD_MUTEX(un));
22267 
22268 #ifdef _MULTI_DATAMODEL
22269 	switch (ddi_model_convert_from(flag & FMODELS)) {
22270 	case DDI_MODEL_ILP32: {
22271 		struct vtoc32 user_vtoc32;
22272 
22273 		vtoctovtoc32(user_vtoc, user_vtoc32);
22274 		if (ddi_copyout(&user_vtoc32, (void *)arg,
22275 		    sizeof (struct vtoc32), flag)) {
22276 			return (EFAULT);
22277 		}
22278 		break;
22279 	}
22280 
22281 	case DDI_MODEL_NONE:
22282 		if (ddi_copyout(&user_vtoc, (void *)arg,
22283 		    sizeof (struct vtoc), flag)) {
22284 			return (EFAULT);
22285 		}
22286 		break;
22287 	}
22288 #else /* ! _MULTI_DATAMODEL */
22289 	if (ddi_copyout(&user_vtoc, (void *)arg, sizeof (struct vtoc), flag)) {
22290 		return (EFAULT);
22291 	}
22292 #endif /* _MULTI_DATAMODEL */
22293 
22294 #elif defined(_SUNOS_VTOC_16)
22295 	mutex_exit(SD_MUTEX(un));
22296 
22297 #ifdef _MULTI_DATAMODEL
22298 	/*
22299 	 * The un_vtoc structure is a "struct dk_vtoc"  which is always
22300 	 * 32-bit to maintain compatibility with existing on-disk
22301 	 * structures.  Thus, we need to convert the structure when copying
22302 	 * it out to a datamodel-dependent "struct vtoc" in a 64-bit
22303 	 * program.  If the target is a 32-bit program, then no conversion
22304 	 * is necessary.
22305 	 */
22306 	/* LINTED: logical expression always true: op "||" */
22307 	ASSERT(sizeof (un->un_vtoc) == sizeof (struct vtoc32));
22308 	switch (ddi_model_convert_from(flag & FMODELS)) {
22309 	case DDI_MODEL_ILP32:
22310 		if (ddi_copyout(&(un->un_vtoc), (void *)arg,
22311 		    sizeof (un->un_vtoc), flag)) {
22312 			return (EFAULT);
22313 		}
22314 		break;
22315 
22316 	case DDI_MODEL_NONE: {
22317 		struct vtoc user_vtoc;
22318 
22319 		vtoc32tovtoc(un->un_vtoc, user_vtoc);
22320 		if (ddi_copyout(&user_vtoc, (void *)arg,
22321 		    sizeof (struct vtoc), flag)) {
22322 			return (EFAULT);
22323 		}
22324 		break;
22325 	}
22326 	}
22327 #else /* ! _MULTI_DATAMODEL */
22328 	if (ddi_copyout(&(un->un_vtoc), (void *)arg, sizeof (un->un_vtoc),
22329 	    flag)) {
22330 		return (EFAULT);
22331 	}
22332 #endif /* _MULTI_DATAMODEL */
22333 #else
22334 #error "No VTOC format defined."
22335 #endif
22336 
22337 	return (rval);
22338 }
22339 
22340 static int
22341 sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag)
22342 {
22343 	struct sd_lun	*un = NULL;
22344 	dk_efi_t	user_efi;
22345 	int		rval = 0;
22346 	void		*buffer;
22347 
22348 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
22349 		return (ENXIO);
22350 
22351 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
22352 		return (EFAULT);
22353 
22354 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
22355 
22356 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
22357 	    (user_efi.dki_length > un->un_max_xfer_size))
22358 		return (EINVAL);
22359 
22360 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
22361 	rval = sd_send_scsi_READ(un, buffer, user_efi.dki_length,
22362 	    user_efi.dki_lba, SD_PATH_DIRECT);
22363 	if (rval == 0 && ddi_copyout(buffer, user_efi.dki_data,
22364 	    user_efi.dki_length, flag) != 0)
22365 		rval = EFAULT;
22366 
22367 	kmem_free(buffer, user_efi.dki_length);
22368 	return (rval);
22369 }
22370 
22371 /*
22372  *    Function: sd_build_user_vtoc
22373  *
22374  * Description: This routine populates a pass by reference variable with the
22375  *		current volume table of contents.
22376  *
22377  *   Arguments: un - driver soft state (unit) structure
22378  *		user_vtoc - pointer to vtoc structure to be populated
22379  */
22380 
22381 static void
22382 sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
22383 {
22384 	struct dk_map2		*lpart;
22385 	struct dk_map		*lmap;
22386 	struct partition	*vpart;
22387 	int			nblks;
22388 	int			i;
22389 
22390 	ASSERT(mutex_owned(SD_MUTEX(un)));
22391 
22392 	/*
22393 	 * Return vtoc structure fields in the provided VTOC area, addressed
22394 	 * by *vtoc.
22395 	 */
22396 	bzero(user_vtoc, sizeof (struct vtoc));
22397 	user_vtoc->v_bootinfo[0] = un->un_vtoc.v_bootinfo[0];
22398 	user_vtoc->v_bootinfo[1] = un->un_vtoc.v_bootinfo[1];
22399 	user_vtoc->v_bootinfo[2] = un->un_vtoc.v_bootinfo[2];
22400 	user_vtoc->v_sanity	= VTOC_SANE;
22401 	user_vtoc->v_version	= un->un_vtoc.v_version;
22402 	bcopy(un->un_vtoc.v_volume, user_vtoc->v_volume, LEN_DKL_VVOL);
22403 	user_vtoc->v_sectorsz = un->un_sys_blocksize;
22404 	user_vtoc->v_nparts = un->un_vtoc.v_nparts;
22405 	bcopy(un->un_vtoc.v_reserved, user_vtoc->v_reserved,
22406 	    sizeof (un->un_vtoc.v_reserved));
22407 	/*
22408 	 * Convert partitioning information.
22409 	 *
22410 	 * Note the conversion from starting cylinder number
22411 	 * to starting sector number.
22412 	 */
22413 	lmap = un->un_map;
22414 	lpart = (struct dk_map2 *)un->un_vtoc.v_part;
22415 	vpart = user_vtoc->v_part;
22416 
22417 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
22418 
22419 	for (i = 0; i < V_NUMPAR; i++) {
22420 		vpart->p_tag	= lpart->p_tag;
22421 		vpart->p_flag	= lpart->p_flag;
22422 		vpart->p_start	= lmap->dkl_cylno * nblks;
22423 		vpart->p_size	= lmap->dkl_nblk;
22424 		lmap++;
22425 		lpart++;
22426 		vpart++;
22427 
22428 		/* (4364927) */
22429 		user_vtoc->timestamp[i] = (time_t)un->un_vtoc.v_timestamp[i];
22430 	}
22431 
22432 	bcopy(un->un_asciilabel, user_vtoc->v_asciilabel, LEN_DKL_ASCII);
22433 }
22434 
22435 static int
22436 sd_dkio_partition(dev_t dev, caddr_t arg, int flag)
22437 {
22438 	struct sd_lun		*un = NULL;
22439 	struct partition64	p64;
22440 	int			rval = 0;
22441 	uint_t			nparts;
22442 	efi_gpe_t		*partitions;
22443 	efi_gpt_t		*buffer;
22444 	diskaddr_t		gpe_lba;
22445 
22446 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22447 		return (ENXIO);
22448 	}
22449 
22450 	if (ddi_copyin((const void *)arg, &p64,
22451 	    sizeof (struct partition64), flag)) {
22452 		return (EFAULT);
22453 	}
22454 
22455 	buffer = kmem_alloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
22456 	rval = sd_send_scsi_READ(un, buffer, DEV_BSIZE,
22457 		1, SD_PATH_DIRECT);
22458 	if (rval != 0)
22459 		goto done_error;
22460 
22461 	sd_swap_efi_gpt(buffer);
22462 
22463 	if ((rval = sd_validate_efi(buffer)) != 0)
22464 		goto done_error;
22465 
22466 	nparts = buffer->efi_gpt_NumberOfPartitionEntries;
22467 	gpe_lba = buffer->efi_gpt_PartitionEntryLBA;
22468 	if (p64.p_partno > nparts) {
22469 		/* couldn't find it */
22470 		rval = ESRCH;
22471 		goto done_error;
22472 	}
22473 	/*
22474 	 * if we're dealing with a partition that's out of the normal
22475 	 * 16K block, adjust accordingly
22476 	 */
22477 	gpe_lba += p64.p_partno / sizeof (efi_gpe_t);
22478 	rval = sd_send_scsi_READ(un, buffer, EFI_MIN_ARRAY_SIZE,
22479 			gpe_lba, SD_PATH_DIRECT);
22480 	if (rval) {
22481 		goto done_error;
22482 	}
22483 	partitions = (efi_gpe_t *)buffer;
22484 
22485 	sd_swap_efi_gpe(nparts, partitions);
22486 
22487 	partitions += p64.p_partno;
22488 	bcopy(&partitions->efi_gpe_PartitionTypeGUID, &p64.p_type,
22489 	    sizeof (struct uuid));
22490 	p64.p_start = partitions->efi_gpe_StartingLBA;
22491 	p64.p_size = partitions->efi_gpe_EndingLBA -
22492 			p64.p_start + 1;
22493 
22494 	if (ddi_copyout(&p64, (void *)arg, sizeof (struct partition64), flag))
22495 		rval = EFAULT;
22496 
22497 done_error:
22498 	kmem_free(buffer, EFI_MIN_ARRAY_SIZE);
22499 	return (rval);
22500 }
22501 
22502 
22503 /*
22504  *    Function: sd_dkio_set_vtoc
22505  *
22506  * Description: This routine is the driver entry point for handling user
22507  *		requests to set the current volume table of contents
22508  *		(DKIOCSVTOC).
22509  *
22510  *   Arguments: dev  - the device number
22511  *		arg  - pointer to user provided vtoc structure used to set the
22512  *			current vtoc.
22513  *		flag - this argument is a pass through to ddi_copyxxx()
22514  *		       directly from the mode argument of ioctl().
22515  *
22516  * Return Code: 0
22517  *		EFAULT
22518  *		ENXIO
22519  *		EINVAL
22520  *		ENOTSUP
22521  */
22522 
22523 static int
22524 sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag)
22525 {
22526 	struct sd_lun	*un = NULL;
22527 	struct vtoc	user_vtoc;
22528 	int		rval = 0;
22529 
22530 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22531 		return (ENXIO);
22532 	}
22533 
22534 #if defined(__i386) || defined(__amd64)
22535 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
22536 		return (EINVAL);
22537 	}
22538 #endif
22539 
22540 #ifdef _MULTI_DATAMODEL
22541 	switch (ddi_model_convert_from(flag & FMODELS)) {
22542 	case DDI_MODEL_ILP32: {
22543 		struct vtoc32 user_vtoc32;
22544 
22545 		if (ddi_copyin((const void *)arg, &user_vtoc32,
22546 		    sizeof (struct vtoc32), flag)) {
22547 			return (EFAULT);
22548 		}
22549 		vtoc32tovtoc(user_vtoc32, user_vtoc);
22550 		break;
22551 	}
22552 
22553 	case DDI_MODEL_NONE:
22554 		if (ddi_copyin((const void *)arg, &user_vtoc,
22555 		    sizeof (struct vtoc), flag)) {
22556 			return (EFAULT);
22557 		}
22558 		break;
22559 	}
22560 #else /* ! _MULTI_DATAMODEL */
22561 	if (ddi_copyin((const void *)arg, &user_vtoc,
22562 	    sizeof (struct vtoc), flag)) {
22563 		return (EFAULT);
22564 	}
22565 #endif /* _MULTI_DATAMODEL */
22566 
22567 	mutex_enter(SD_MUTEX(un));
22568 	if (un->un_blockcount > DK_MAX_BLOCKS) {
22569 		mutex_exit(SD_MUTEX(un));
22570 		return (ENOTSUP);
22571 	}
22572 	if (un->un_g.dkg_ncyl == 0) {
22573 		mutex_exit(SD_MUTEX(un));
22574 		return (EINVAL);
22575 	}
22576 
22577 	mutex_exit(SD_MUTEX(un));
22578 	sd_clear_efi(un);
22579 	ddi_remove_minor_node(SD_DEVINFO(un), "wd");
22580 	ddi_remove_minor_node(SD_DEVINFO(un), "wd,raw");
22581 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h",
22582 	    S_IFBLK, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
22583 	    un->un_node_type, NULL);
22584 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h,raw",
22585 	    S_IFCHR, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
22586 	    un->un_node_type, NULL);
22587 	mutex_enter(SD_MUTEX(un));
22588 
22589 	if ((rval = sd_build_label_vtoc(un, &user_vtoc)) == 0) {
22590 		if ((rval = sd_write_label(dev)) == 0) {
22591 			if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT))
22592 			    != 0) {
22593 				SD_ERROR(SD_LOG_IOCTL_DKIO, un,
22594 				    "sd_dkio_set_vtoc: "
22595 				    "Failed validate geometry\n");
22596 			}
22597 		}
22598 	}
22599 
22600 	/*
22601 	 * If sd_build_label_vtoc, or sd_write_label failed above write the
22602 	 * devid anyway, what can it hurt? Also preserve the device id by
22603 	 * writing to the disk acyl for the case where a devid has been
22604 	 * fabricated.
22605 	 */
22606 	if (!ISREMOVABLE(un) && !ISCD(un) &&
22607 	    (un->un_f_opt_fab_devid == TRUE)) {
22608 		if (un->un_devid == NULL) {
22609 			sd_register_devid(un, SD_DEVINFO(un),
22610 			    SD_TARGET_IS_UNRESERVED);
22611 		} else {
22612 			/*
22613 			 * The device id for this disk has been
22614 			 * fabricated. Fabricated device id's are
22615 			 * managed by storing them in the last 2
22616 			 * available sectors on the drive. The device
22617 			 * id must be preserved by writing it back out
22618 			 * to this location.
22619 			 */
22620 			if (sd_write_deviceid(un) != 0) {
22621 				ddi_devid_free(un->un_devid);
22622 				un->un_devid = NULL;
22623 			}
22624 		}
22625 	}
22626 	mutex_exit(SD_MUTEX(un));
22627 	return (rval);
22628 }
22629 
22630 
22631 /*
22632  *    Function: sd_build_label_vtoc
22633  *
22634  * Description: This routine updates the driver soft state current volume table
22635  *		of contents based on a user specified vtoc.
22636  *
22637  *   Arguments: un - driver soft state (unit) structure
22638  *		user_vtoc - pointer to vtoc structure specifying vtoc to be used
22639  *			    to update the driver soft state.
22640  *
22641  * Return Code: 0
22642  *		EINVAL
22643  */
22644 
22645 static int
22646 sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
22647 {
22648 	struct dk_map		*lmap;
22649 	struct partition	*vpart;
22650 	int			nblks;
22651 #if defined(_SUNOS_VTOC_8)
22652 	int			ncyl;
22653 	struct dk_map2		*lpart;
22654 #endif	/* defined(_SUNOS_VTOC_8) */
22655 	int			i;
22656 
22657 	ASSERT(mutex_owned(SD_MUTEX(un)));
22658 
22659 	/* Sanity-check the vtoc */
22660 	if (user_vtoc->v_sanity != VTOC_SANE ||
22661 	    user_vtoc->v_sectorsz != un->un_sys_blocksize ||
22662 	    user_vtoc->v_nparts != V_NUMPAR) {
22663 		return (EINVAL);
22664 	}
22665 
22666 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
22667 	if (nblks == 0) {
22668 		return (EINVAL);
22669 	}
22670 
22671 #if defined(_SUNOS_VTOC_8)
22672 	vpart = user_vtoc->v_part;
22673 	for (i = 0; i < V_NUMPAR; i++) {
22674 		if ((vpart->p_start % nblks) != 0) {
22675 			return (EINVAL);
22676 		}
22677 		ncyl = vpart->p_start / nblks;
22678 		ncyl += vpart->p_size / nblks;
22679 		if ((vpart->p_size % nblks) != 0) {
22680 			ncyl++;
22681 		}
22682 		if (ncyl > (int)un->un_g.dkg_ncyl) {
22683 			return (EINVAL);
22684 		}
22685 		vpart++;
22686 	}
22687 #endif	/* defined(_SUNOS_VTOC_8) */
22688 
22689 	/* Put appropriate vtoc structure fields into the disk label */
22690 #if defined(_SUNOS_VTOC_16)
22691 	/*
22692 	 * The vtoc is always a 32bit data structure to maintain the
22693 	 * on-disk format. Convert "in place" instead of bcopying it.
22694 	 */
22695 	vtoctovtoc32((*user_vtoc), (*((struct vtoc32 *)&(un->un_vtoc))));
22696 
22697 	/*
22698 	 * in the 16-slice vtoc, starting sectors are expressed in
22699 	 * numbers *relative* to the start of the Solaris fdisk partition.
22700 	 */
22701 	lmap = un->un_map;
22702 	vpart = user_vtoc->v_part;
22703 
22704 	for (i = 0; i < (int)user_vtoc->v_nparts; i++, lmap++, vpart++) {
22705 		lmap->dkl_cylno = vpart->p_start / nblks;
22706 		lmap->dkl_nblk = vpart->p_size;
22707 	}
22708 
22709 #elif defined(_SUNOS_VTOC_8)
22710 
22711 	un->un_vtoc.v_bootinfo[0] = (uint32_t)user_vtoc->v_bootinfo[0];
22712 	un->un_vtoc.v_bootinfo[1] = (uint32_t)user_vtoc->v_bootinfo[1];
22713 	un->un_vtoc.v_bootinfo[2] = (uint32_t)user_vtoc->v_bootinfo[2];
22714 
22715 	un->un_vtoc.v_sanity = (uint32_t)user_vtoc->v_sanity;
22716 	un->un_vtoc.v_version = (uint32_t)user_vtoc->v_version;
22717 
22718 	bcopy(user_vtoc->v_volume, un->un_vtoc.v_volume, LEN_DKL_VVOL);
22719 
22720 	un->un_vtoc.v_nparts = user_vtoc->v_nparts;
22721 
22722 	bcopy(user_vtoc->v_reserved, un->un_vtoc.v_reserved,
22723 	    sizeof (un->un_vtoc.v_reserved));
22724 
22725 	/*
22726 	 * Note the conversion from starting sector number
22727 	 * to starting cylinder number.
22728 	 * Return error if division results in a remainder.
22729 	 */
22730 	lmap = un->un_map;
22731 	lpart = un->un_vtoc.v_part;
22732 	vpart = user_vtoc->v_part;
22733 
22734 	for (i = 0; i < (int)user_vtoc->v_nparts; i++) {
22735 		lpart->p_tag  = vpart->p_tag;
22736 		lpart->p_flag = vpart->p_flag;
22737 		lmap->dkl_cylno = vpart->p_start / nblks;
22738 		lmap->dkl_nblk = vpart->p_size;
22739 
22740 		lmap++;
22741 		lpart++;
22742 		vpart++;
22743 
22744 		/* (4387723) */
22745 #ifdef _LP64
22746 		if (user_vtoc->timestamp[i] > TIME32_MAX) {
22747 			un->un_vtoc.v_timestamp[i] = TIME32_MAX;
22748 		} else {
22749 			un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
22750 		}
22751 #else
22752 		un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
22753 #endif
22754 	}
22755 
22756 	bcopy(user_vtoc->v_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
22757 #else
22758 #error "No VTOC format defined."
22759 #endif
22760 	return (0);
22761 }
22762 
22763 /*
22764  *    Function: sd_clear_efi
22765  *
22766  * Description: This routine clears all EFI labels.
22767  *
22768  *   Arguments: un - driver soft state (unit) structure
22769  *
22770  * Return Code: void
22771  */
22772 
22773 static void
22774 sd_clear_efi(struct sd_lun *un)
22775 {
22776 	efi_gpt_t	*gpt;
22777 	uint_t		lbasize;
22778 	uint64_t	cap;
22779 	int rval;
22780 
22781 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22782 
22783 	gpt = kmem_alloc(sizeof (efi_gpt_t), KM_SLEEP);
22784 
22785 	if (sd_send_scsi_READ(un, gpt, DEV_BSIZE, 1, SD_PATH_DIRECT) != 0) {
22786 		goto done;
22787 	}
22788 
22789 	sd_swap_efi_gpt(gpt);
22790 	rval = sd_validate_efi(gpt);
22791 	if (rval == 0) {
22792 		/* clear primary */
22793 		bzero(gpt, sizeof (efi_gpt_t));
22794 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE, 1,
22795 			SD_PATH_DIRECT))) {
22796 			SD_INFO(SD_LOG_IO_PARTITION, un,
22797 				"sd_clear_efi: clear primary label failed\n");
22798 		}
22799 	}
22800 	/* the backup */
22801 	rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
22802 	    SD_PATH_DIRECT);
22803 	if (rval) {
22804 		goto done;
22805 	}
22806 	if ((rval = sd_send_scsi_READ(un, gpt, lbasize,
22807 	    cap - 1, SD_PATH_DIRECT)) != 0) {
22808 		goto done;
22809 	}
22810 	sd_swap_efi_gpt(gpt);
22811 	rval = sd_validate_efi(gpt);
22812 	if (rval == 0) {
22813 		/* clear backup */
22814 		SD_TRACE(SD_LOG_IOCTL, un, "sd_clear_efi clear backup@%lu\n",
22815 			cap-1);
22816 		bzero(gpt, sizeof (efi_gpt_t));
22817 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE,
22818 		    cap-1, SD_PATH_DIRECT))) {
22819 			SD_INFO(SD_LOG_IO_PARTITION, un,
22820 				"sd_clear_efi: clear backup label failed\n");
22821 		}
22822 	}
22823 
22824 done:
22825 	kmem_free(gpt, sizeof (efi_gpt_t));
22826 }
22827 
22828 /*
22829  *    Function: sd_set_vtoc
22830  *
22831  * Description: This routine writes data to the appropriate positions
22832  *
22833  *   Arguments: un - driver soft state (unit) structure
22834  *              dkl  - the data to be written
22835  *
22836  * Return: void
22837  */
22838 
22839 static int
22840 sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl)
22841 {
22842 	void			*shadow_buf;
22843 	uint_t			label_addr;
22844 	int			sec;
22845 	int			blk;
22846 	int			head;
22847 	int			cyl;
22848 	int			rval;
22849 
22850 #if defined(__i386) || defined(__amd64)
22851 	label_addr = un->un_solaris_offset + DK_LABEL_LOC;
22852 #else
22853 	/* Write the primary label at block 0 of the solaris partition. */
22854 	label_addr = 0;
22855 #endif
22856 
22857 	if (NOT_DEVBSIZE(un)) {
22858 		shadow_buf = kmem_zalloc(un->un_tgt_blocksize, KM_SLEEP);
22859 		/*
22860 		 * Read the target's first block.
22861 		 */
22862 		if ((rval = sd_send_scsi_READ(un, shadow_buf,
22863 		    un->un_tgt_blocksize, label_addr,
22864 		    SD_PATH_STANDARD)) != 0) {
22865 			goto exit;
22866 		}
22867 		/*
22868 		 * Copy the contents of the label into the shadow buffer
22869 		 * which is of the size of target block size.
22870 		 */
22871 		bcopy(dkl, shadow_buf, sizeof (struct dk_label));
22872 	}
22873 
22874 	/* Write the primary label */
22875 	if (NOT_DEVBSIZE(un)) {
22876 		rval = sd_send_scsi_WRITE(un, shadow_buf, un->un_tgt_blocksize,
22877 		    label_addr, SD_PATH_STANDARD);
22878 	} else {
22879 		rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
22880 		    label_addr, SD_PATH_STANDARD);
22881 	}
22882 	if (rval != 0) {
22883 		return (rval);
22884 	}
22885 
22886 	/*
22887 	 * Calculate where the backup labels go.  They are always on
22888 	 * the last alternate cylinder, but some older drives put them
22889 	 * on head 2 instead of the last head.	They are always on the
22890 	 * first 5 odd sectors of the appropriate track.
22891 	 *
22892 	 * We have no choice at this point, but to believe that the
22893 	 * disk label is valid.	 Use the geometry of the disk
22894 	 * as described in the label.
22895 	 */
22896 	cyl  = dkl->dkl_ncyl  + dkl->dkl_acyl - 1;
22897 	head = dkl->dkl_nhead - 1;
22898 
22899 	/*
22900 	 * Write and verify the backup labels. Make sure we don't try to
22901 	 * write past the last cylinder.
22902 	 */
22903 	for (sec = 1; ((sec < 5 * 2 + 1) && (sec < dkl->dkl_nsect)); sec += 2) {
22904 		blk = (daddr_t)(
22905 		    (cyl * ((dkl->dkl_nhead * dkl->dkl_nsect) - dkl->dkl_apc)) +
22906 		    (head * dkl->dkl_nsect) + sec);
22907 #if defined(__i386) || defined(__amd64)
22908 		blk += un->un_solaris_offset;
22909 #endif
22910 		if (NOT_DEVBSIZE(un)) {
22911 			uint64_t	tblk;
22912 			/*
22913 			 * Need to read the block first for read modify write.
22914 			 */
22915 			tblk = (uint64_t)blk;
22916 			blk = (int)((tblk * un->un_sys_blocksize) /
22917 			    un->un_tgt_blocksize);
22918 			if ((rval = sd_send_scsi_READ(un, shadow_buf,
22919 			    un->un_tgt_blocksize, blk,
22920 			    SD_PATH_STANDARD)) != 0) {
22921 				goto exit;
22922 			}
22923 			/*
22924 			 * Modify the shadow buffer with the label.
22925 			 */
22926 			bcopy(dkl, shadow_buf, sizeof (struct dk_label));
22927 			rval = sd_send_scsi_WRITE(un, shadow_buf,
22928 			    un->un_tgt_blocksize, blk, SD_PATH_STANDARD);
22929 		} else {
22930 			rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
22931 			    blk, SD_PATH_STANDARD);
22932 			SD_INFO(SD_LOG_IO_PARTITION, un,
22933 			"sd_set_vtoc: wrote backup label %d\n", blk);
22934 		}
22935 		if (rval != 0) {
22936 			goto exit;
22937 		}
22938 	}
22939 exit:
22940 	if (NOT_DEVBSIZE(un)) {
22941 		kmem_free(shadow_buf, un->un_tgt_blocksize);
22942 	}
22943 	return (rval);
22944 }
22945 
22946 /*
22947  *    Function: sd_clear_vtoc
22948  *
22949  * Description: This routine clears out the VTOC labels.
22950  *
22951  *   Arguments: un - driver soft state (unit) structure
22952  *
22953  * Return: void
22954  */
22955 
22956 static void
22957 sd_clear_vtoc(struct sd_lun *un)
22958 {
22959 	struct dk_label		*dkl;
22960 
22961 	mutex_exit(SD_MUTEX(un));
22962 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
22963 	mutex_enter(SD_MUTEX(un));
22964 	/*
22965 	 * sd_set_vtoc uses these fields in order to figure out
22966 	 * where to overwrite the backup labels
22967 	 */
22968 	dkl->dkl_apc    = un->un_g.dkg_apc;
22969 	dkl->dkl_ncyl   = un->un_g.dkg_ncyl;
22970 	dkl->dkl_acyl   = un->un_g.dkg_acyl;
22971 	dkl->dkl_nhead  = un->un_g.dkg_nhead;
22972 	dkl->dkl_nsect  = un->un_g.dkg_nsect;
22973 	mutex_exit(SD_MUTEX(un));
22974 	(void) sd_set_vtoc(un, dkl);
22975 	kmem_free(dkl, sizeof (struct dk_label));
22976 
22977 	mutex_enter(SD_MUTEX(un));
22978 }
22979 
22980 /*
22981  *    Function: sd_write_label
22982  *
22983  * Description: This routine will validate and write the driver soft state vtoc
22984  *		contents to the device.
22985  *
22986  *   Arguments: dev - the device number
22987  *
22988  * Return Code: the code returned by sd_send_scsi_cmd()
22989  *		0
22990  *		EINVAL
22991  *		ENXIO
22992  *		ENOMEM
22993  */
22994 
22995 static int
22996 sd_write_label(dev_t dev)
22997 {
22998 	struct sd_lun		*un;
22999 	struct dk_label		*dkl;
23000 	short			sum;
23001 	short			*sp;
23002 	int			i;
23003 	int			rval;
23004 
23005 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23006 	    (un->un_state == SD_STATE_OFFLINE)) {
23007 		return (ENXIO);
23008 	}
23009 	ASSERT(mutex_owned(SD_MUTEX(un)));
23010 	mutex_exit(SD_MUTEX(un));
23011 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23012 	mutex_enter(SD_MUTEX(un));
23013 
23014 	bcopy(&un->un_vtoc, &dkl->dkl_vtoc, sizeof (struct dk_vtoc));
23015 	dkl->dkl_rpm	= un->un_g.dkg_rpm;
23016 	dkl->dkl_pcyl	= un->un_g.dkg_pcyl;
23017 	dkl->dkl_apc	= un->un_g.dkg_apc;
23018 	dkl->dkl_intrlv = un->un_g.dkg_intrlv;
23019 	dkl->dkl_ncyl	= un->un_g.dkg_ncyl;
23020 	dkl->dkl_acyl	= un->un_g.dkg_acyl;
23021 	dkl->dkl_nhead	= un->un_g.dkg_nhead;
23022 	dkl->dkl_nsect	= un->un_g.dkg_nsect;
23023 
23024 #if defined(_SUNOS_VTOC_8)
23025 	dkl->dkl_obs1	= un->un_g.dkg_obs1;
23026 	dkl->dkl_obs2	= un->un_g.dkg_obs2;
23027 	dkl->dkl_obs3	= un->un_g.dkg_obs3;
23028 	for (i = 0; i < NDKMAP; i++) {
23029 		dkl->dkl_map[i].dkl_cylno = un->un_map[i].dkl_cylno;
23030 		dkl->dkl_map[i].dkl_nblk  = un->un_map[i].dkl_nblk;
23031 	}
23032 	bcopy(un->un_asciilabel, dkl->dkl_asciilabel, LEN_DKL_ASCII);
23033 #elif defined(_SUNOS_VTOC_16)
23034 	dkl->dkl_skew	= un->un_dkg_skew;
23035 #else
23036 #error "No VTOC format defined."
23037 #endif
23038 
23039 	dkl->dkl_magic			= DKL_MAGIC;
23040 	dkl->dkl_write_reinstruct	= un->un_g.dkg_write_reinstruct;
23041 	dkl->dkl_read_reinstruct	= un->un_g.dkg_read_reinstruct;
23042 
23043 	/* Construct checksum for the new disk label */
23044 	sum = 0;
23045 	sp = (short *)dkl;
23046 	i = sizeof (struct dk_label) / sizeof (short);
23047 	while (i--) {
23048 		sum ^= *sp++;
23049 	}
23050 	dkl->dkl_cksum = sum;
23051 
23052 	mutex_exit(SD_MUTEX(un));
23053 
23054 	rval = sd_set_vtoc(un, dkl);
23055 exit:
23056 	kmem_free(dkl, sizeof (struct dk_label));
23057 	mutex_enter(SD_MUTEX(un));
23058 	return (rval);
23059 }
23060 
23061 static int
23062 sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag)
23063 {
23064 	struct sd_lun	*un = NULL;
23065 	dk_efi_t	user_efi;
23066 	int		rval = 0;
23067 	void		*buffer;
23068 
23069 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
23070 		return (ENXIO);
23071 
23072 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
23073 		return (EFAULT);
23074 
23075 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
23076 
23077 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
23078 	    (user_efi.dki_length > un->un_max_xfer_size))
23079 		return (EINVAL);
23080 
23081 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
23082 	if (ddi_copyin(user_efi.dki_data, buffer, user_efi.dki_length, flag)) {
23083 		rval = EFAULT;
23084 	} else {
23085 		/*
23086 		 * let's clear the vtoc labels and clear the softstate
23087 		 * vtoc.
23088 		 */
23089 		mutex_enter(SD_MUTEX(un));
23090 		if (un->un_vtoc.v_sanity == VTOC_SANE) {
23091 			SD_TRACE(SD_LOG_IO_PARTITION, un,
23092 				"sd_dkio_set_efi: CLEAR VTOC\n");
23093 			sd_clear_vtoc(un);
23094 			bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
23095 			mutex_exit(SD_MUTEX(un));
23096 			ddi_remove_minor_node(SD_DEVINFO(un), "h");
23097 			ddi_remove_minor_node(SD_DEVINFO(un), "h,raw");
23098 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd",
23099 			    S_IFBLK,
23100 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23101 			    un->un_node_type, NULL);
23102 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd,raw",
23103 			    S_IFCHR,
23104 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23105 			    un->un_node_type, NULL);
23106 		} else
23107 			mutex_exit(SD_MUTEX(un));
23108 		rval = sd_send_scsi_WRITE(un, buffer, user_efi.dki_length,
23109 		    user_efi.dki_lba, SD_PATH_DIRECT);
23110 		if (rval == 0) {
23111 			mutex_enter(SD_MUTEX(un));
23112 			un->un_f_geometry_is_valid = FALSE;
23113 			mutex_exit(SD_MUTEX(un));
23114 		}
23115 	}
23116 	kmem_free(buffer, user_efi.dki_length);
23117 	return (rval);
23118 }
23119 
23120 /*
23121  *    Function: sd_dkio_get_mboot
23122  *
23123  * Description: This routine is the driver entry point for handling user
23124  *		requests to get the current device mboot (DKIOCGMBOOT)
23125  *
23126  *   Arguments: dev  - the device number
23127  *		arg  - pointer to user provided mboot structure specifying
23128  *			the current mboot.
23129  *		flag - this argument is a pass through to ddi_copyxxx()
23130  *		       directly from the mode argument of ioctl().
23131  *
23132  * Return Code: 0
23133  *		EINVAL
23134  *		EFAULT
23135  *		ENXIO
23136  */
23137 
23138 static int
23139 sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag)
23140 {
23141 	struct sd_lun	*un;
23142 	struct mboot	*mboot;
23143 	int		rval;
23144 	size_t		buffer_size;
23145 
23146 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23147 	    (un->un_state == SD_STATE_OFFLINE)) {
23148 		return (ENXIO);
23149 	}
23150 
23151 #if defined(_SUNOS_VTOC_8)
23152 	if ((!ISREMOVABLE(un)) || (arg == NULL)) {
23153 #elif defined(_SUNOS_VTOC_16)
23154 	if (arg == NULL) {
23155 #endif
23156 		return (EINVAL);
23157 	}
23158 
23159 	/*
23160 	 * Read the mboot block, located at absolute block 0 on the target.
23161 	 */
23162 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct mboot));
23163 
23164 	SD_TRACE(SD_LOG_IO_PARTITION, un,
23165 	    "sd_dkio_get_mboot: allocation size: 0x%x\n", buffer_size);
23166 
23167 	mboot = kmem_zalloc(buffer_size, KM_SLEEP);
23168 	if ((rval = sd_send_scsi_READ(un, mboot, buffer_size, 0,
23169 	    SD_PATH_STANDARD)) == 0) {
23170 		if (ddi_copyout(mboot, (void *)arg,
23171 		    sizeof (struct mboot), flag) != 0) {
23172 			rval = EFAULT;
23173 		}
23174 	}
23175 	kmem_free(mboot, buffer_size);
23176 	return (rval);
23177 }
23178 
23179 
23180 /*
23181  *    Function: sd_dkio_set_mboot
23182  *
23183  * Description: This routine is the driver entry point for handling user
23184  *		requests to validate and set the device master boot
23185  *		(DKIOCSMBOOT).
23186  *
23187  *   Arguments: dev  - the device number
23188  *		arg  - pointer to user provided mboot structure used to set the
23189  *			master boot.
23190  *		flag - this argument is a pass through to ddi_copyxxx()
23191  *		       directly from the mode argument of ioctl().
23192  *
23193  * Return Code: 0
23194  *		EINVAL
23195  *		EFAULT
23196  *		ENXIO
23197  */
23198 
23199 static int
23200 sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag)
23201 {
23202 	struct sd_lun	*un = NULL;
23203 	struct mboot	*mboot = NULL;
23204 	int		rval;
23205 	ushort_t	magic;
23206 
23207 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23208 		return (ENXIO);
23209 	}
23210 
23211 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23212 
23213 #if defined(_SUNOS_VTOC_8)
23214 	if (!ISREMOVABLE(un)) {
23215 		return (EINVAL);
23216 	}
23217 #endif
23218 
23219 	if (arg == NULL) {
23220 		return (EINVAL);
23221 	}
23222 
23223 	mboot = kmem_zalloc(sizeof (struct mboot), KM_SLEEP);
23224 
23225 	if (ddi_copyin((const void *)arg, mboot,
23226 	    sizeof (struct mboot), flag) != 0) {
23227 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23228 		return (EFAULT);
23229 	}
23230 
23231 	/* Is this really a master boot record? */
23232 	magic = LE_16(mboot->signature);
23233 	if (magic != MBB_MAGIC) {
23234 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23235 		return (EINVAL);
23236 	}
23237 
23238 	rval = sd_send_scsi_WRITE(un, mboot, un->un_sys_blocksize, 0,
23239 	    SD_PATH_STANDARD);
23240 
23241 	mutex_enter(SD_MUTEX(un));
23242 #if defined(__i386) || defined(__amd64)
23243 	if (rval == 0) {
23244 		/*
23245 		 * mboot has been written successfully.
23246 		 * update the fdisk and vtoc tables in memory
23247 		 */
23248 		rval = sd_update_fdisk_and_vtoc(un);
23249 		if ((un->un_f_geometry_is_valid == FALSE) || (rval != 0)) {
23250 			mutex_exit(SD_MUTEX(un));
23251 			kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23252 			return (rval);
23253 		}
23254 	}
23255 
23256 	/*
23257 	 * If the mboot write fails, write the devid anyway, what can it hurt?
23258 	 * Also preserve the device id by writing to the disk acyl for the case
23259 	 * where a devid has been fabricated.
23260 	 */
23261 	if (!ISREMOVABLE(un) && !ISCD(un) &&
23262 	    (un->un_f_opt_fab_devid == TRUE)) {
23263 		if (un->un_devid == NULL) {
23264 			sd_register_devid(un, SD_DEVINFO(un),
23265 			    SD_TARGET_IS_UNRESERVED);
23266 		} else {
23267 			/*
23268 			 * The device id for this disk has been
23269 			 * fabricated. Fabricated device id's are
23270 			 * managed by storing them in the last 2
23271 			 * available sectors on the drive. The device
23272 			 * id must be preserved by writing it back out
23273 			 * to this location.
23274 			 */
23275 			if (sd_write_deviceid(un) != 0) {
23276 				ddi_devid_free(un->un_devid);
23277 				un->un_devid = NULL;
23278 			}
23279 		}
23280 	}
23281 #else
23282 	if (rval == 0) {
23283 		/*
23284 		 * mboot has been written successfully.
23285 		 * set up the default geometry and VTOC
23286 		 */
23287 		if (un->un_blockcount <= DK_MAX_BLOCKS)
23288 			sd_setup_default_geometry(un);
23289 	}
23290 #endif
23291 	mutex_exit(SD_MUTEX(un));
23292 	kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23293 	return (rval);
23294 }
23295 
23296 
23297 /*
23298  *    Function: sd_setup_default_geometry
23299  *
23300  * Description: This local utility routine sets the default geometry as part of
23301  *		setting the device mboot.
23302  *
23303  *   Arguments: un - driver soft state (unit) structure
23304  *
23305  * Note: This may be redundant with sd_build_default_label.
23306  */
23307 
23308 static void
23309 sd_setup_default_geometry(struct sd_lun *un)
23310 {
23311 	/* zero out the soft state geometry and partition table. */
23312 	bzero(&un->un_g, sizeof (struct dk_geom));
23313 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
23314 	bzero(un->un_map, NDKMAP * (sizeof (struct dk_map)));
23315 	un->un_asciilabel[0] = '\0';
23316 
23317 	/*
23318 	 * For the rpm, we use the minimum for the disk.
23319 	 * For the head, cyl and number of sector per track,
23320 	 * if the capacity <= 1GB, head = 64, sect = 32.
23321 	 * else head = 255, sect 63
23322 	 * Note: the capacity should be equal to C*H*S values.
23323 	 * This will cause some truncation of size due to
23324 	 * round off errors. For CD-ROMs, this truncation can
23325 	 * have adverse side effects, so returning ncyl and
23326 	 * nhead as 1. The nsect will overflow for most of
23327 	 * CD-ROMs as nsect is of type ushort.
23328 	 */
23329 	if (ISCD(un)) {
23330 		un->un_g.dkg_ncyl = 1;
23331 		un->un_g.dkg_nhead = 1;
23332 		un->un_g.dkg_nsect = un->un_blockcount;
23333 	} else {
23334 		if (un->un_blockcount <= 0x1000) {
23335 			/* Needed for unlabeled SCSI floppies. */
23336 			un->un_g.dkg_nhead = 2;
23337 			un->un_g.dkg_ncyl = 80;
23338 			un->un_g.dkg_pcyl = 80;
23339 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
23340 		} else if (un->un_blockcount <= 0x200000) {
23341 			un->un_g.dkg_nhead = 64;
23342 			un->un_g.dkg_nsect = 32;
23343 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
23344 		} else {
23345 			un->un_g.dkg_nhead = 255;
23346 			un->un_g.dkg_nsect = 63;
23347 			un->un_g.dkg_ncyl = un->un_blockcount / (255 * 63);
23348 		}
23349 		un->un_blockcount = un->un_g.dkg_ncyl *
23350 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
23351 	}
23352 	un->un_g.dkg_acyl = 0;
23353 	un->un_g.dkg_bcyl = 0;
23354 	un->un_g.dkg_intrlv = 1;
23355 	un->un_g.dkg_rpm = 200;
23356 	un->un_g.dkg_read_reinstruct = 0;
23357 	un->un_g.dkg_write_reinstruct = 0;
23358 	if (un->un_g.dkg_pcyl == 0) {
23359 		un->un_g.dkg_pcyl = un->un_g.dkg_ncyl + un->un_g.dkg_acyl;
23360 	}
23361 
23362 	un->un_map['a'-'a'].dkl_cylno = 0;
23363 	un->un_map['a'-'a'].dkl_nblk = un->un_blockcount;
23364 	un->un_map['c'-'a'].dkl_cylno = 0;
23365 	un->un_map['c'-'a'].dkl_nblk = un->un_blockcount;
23366 	un->un_f_geometry_is_valid = FALSE;
23367 }
23368 
23369 
23370 #if defined(__i386) || defined(__amd64)
23371 /*
23372  *    Function: sd_update_fdisk_and_vtoc
23373  *
23374  * Description: This local utility routine updates the device fdisk and vtoc
23375  *		as part of setting the device mboot.
23376  *
23377  *   Arguments: un - driver soft state (unit) structure
23378  *
23379  * Return Code: 0 for success or errno-type return code.
23380  *
23381  *    Note:x86: This looks like a duplicate of sd_validate_geometry(), but
23382  *		these did exist seperately in x86 sd.c!!!
23383  */
23384 
23385 static int
23386 sd_update_fdisk_and_vtoc(struct sd_lun *un)
23387 {
23388 	static char	labelstring[128];
23389 	static char	buf[256];
23390 	char		*label = 0;
23391 	int		count;
23392 	int		label_rc = 0;
23393 	int		gvalid = un->un_f_geometry_is_valid;
23394 	int		fdisk_rval;
23395 	int		lbasize;
23396 	int		capacity;
23397 
23398 	ASSERT(mutex_owned(SD_MUTEX(un)));
23399 
23400 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
23401 		return (EINVAL);
23402 	}
23403 
23404 	if (un->un_f_blockcount_is_valid == FALSE) {
23405 		return (EINVAL);
23406 	}
23407 
23408 #if defined(_SUNOS_VTOC_16)
23409 	/*
23410 	 * Set up the "whole disk" fdisk partition; this should always
23411 	 * exist, regardless of whether the disk contains an fdisk table
23412 	 * or vtoc.
23413 	 */
23414 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
23415 	un->un_map[P0_RAW_DISK].dkl_nblk = un->un_blockcount;
23416 #endif	/* defined(_SUNOS_VTOC_16) */
23417 
23418 	/*
23419 	 * copy the lbasize and capacity so that if they're
23420 	 * reset while we're not holding the SD_MUTEX(un), we will
23421 	 * continue to use valid values after the SD_MUTEX(un) is
23422 	 * reacquired.
23423 	 */
23424 	lbasize  = un->un_tgt_blocksize;
23425 	capacity = un->un_blockcount;
23426 
23427 	/*
23428 	 * refresh the logical and physical geometry caches.
23429 	 * (data from mode sense format/rigid disk geometry pages,
23430 	 * and scsi_ifgetcap("geometry").
23431 	 */
23432 	sd_resync_geom_caches(un, capacity, lbasize, SD_PATH_DIRECT);
23433 
23434 	/*
23435 	 * Only DIRECT ACCESS devices will have Sun labels.
23436 	 * CD's supposedly have a Sun label, too
23437 	 */
23438 	if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT || ISREMOVABLE(un)) {
23439 		fdisk_rval = sd_read_fdisk(un, capacity, lbasize,
23440 		    SD_PATH_DIRECT);
23441 		if (fdisk_rval == SD_CMD_FAILURE) {
23442 			ASSERT(mutex_owned(SD_MUTEX(un)));
23443 			return (EIO);
23444 		}
23445 
23446 		if (fdisk_rval == SD_CMD_RESERVATION_CONFLICT) {
23447 			ASSERT(mutex_owned(SD_MUTEX(un)));
23448 			return (EACCES);
23449 		}
23450 
23451 		if (un->un_solaris_size <= DK_LABEL_LOC) {
23452 			/*
23453 			 * Found fdisk table but no Solaris partition entry,
23454 			 * so don't call sd_uselabel() and don't create
23455 			 * a default label.
23456 			 */
23457 			label_rc = 0;
23458 			un->un_f_geometry_is_valid = TRUE;
23459 			goto no_solaris_partition;
23460 		}
23461 
23462 #if defined(_SUNOS_VTOC_8)
23463 		label = (char *)un->un_asciilabel;
23464 #elif defined(_SUNOS_VTOC_16)
23465 		label = (char *)un->un_vtoc.v_asciilabel;
23466 #else
23467 #error "No VTOC format defined."
23468 #endif
23469 	} else if (capacity < 0) {
23470 		ASSERT(mutex_owned(SD_MUTEX(un)));
23471 		return (EINVAL);
23472 	}
23473 
23474 	/*
23475 	 * For Removable media We reach here if we have found a
23476 	 * SOLARIS PARTITION.
23477 	 * If un_f_geometry_is_valid is FALSE it indicates that the SOLARIS
23478 	 * PARTITION has changed from the previous one, hence we will setup a
23479 	 * default VTOC in this case.
23480 	 */
23481 	if (un->un_f_geometry_is_valid == FALSE) {
23482 		sd_build_default_label(un);
23483 		label_rc = 0;
23484 	}
23485 
23486 no_solaris_partition:
23487 	if ((!ISREMOVABLE(un) ||
23488 	    (ISREMOVABLE(un) && un->un_mediastate == DKIO_EJECTED)) &&
23489 	    (un->un_state == SD_STATE_NORMAL && gvalid == FALSE)) {
23490 		/*
23491 		 * Print out a message indicating who and what we are.
23492 		 * We do this only when we happen to really validate the
23493 		 * geometry. We may call sd_validate_geometry() at other
23494 		 * times, ioctl()'s like Get VTOC in which case we
23495 		 * don't want to print the label.
23496 		 * If the geometry is valid, print the label string,
23497 		 * else print vendor and product info, if available
23498 		 */
23499 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
23500 			SD_INFO(SD_LOG_IOCTL_DKIO, un, "?<%s>\n", label);
23501 		} else {
23502 			mutex_enter(&sd_label_mutex);
23503 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
23504 			    labelstring);
23505 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
23506 			    &labelstring[64]);
23507 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
23508 			    labelstring, &labelstring[64]);
23509 			if (un->un_f_blockcount_is_valid == TRUE) {
23510 				(void) sprintf(&buf[strlen(buf)],
23511 				    ", %" PRIu64 " %u byte blocks\n",
23512 				    un->un_blockcount,
23513 				    un->un_tgt_blocksize);
23514 			} else {
23515 				(void) sprintf(&buf[strlen(buf)],
23516 				    ", (unknown capacity)\n");
23517 			}
23518 			SD_INFO(SD_LOG_IOCTL_DKIO, un, buf);
23519 			mutex_exit(&sd_label_mutex);
23520 		}
23521 	}
23522 
23523 #if defined(_SUNOS_VTOC_16)
23524 	/*
23525 	 * If we have valid geometry, set up the remaining fdisk partitions.
23526 	 * Note that dkl_cylno is not used for the fdisk map entries, so
23527 	 * we set it to an entirely bogus value.
23528 	 */
23529 	for (count = 0; count < FD_NUMPART; count++) {
23530 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
23531 		un->un_map[FDISK_P1 + count].dkl_nblk =
23532 		    un->un_fmap[count].fmap_nblk;
23533 		un->un_offset[FDISK_P1 + count] =
23534 		    un->un_fmap[count].fmap_start;
23535 	}
23536 #endif
23537 
23538 	for (count = 0; count < NDKMAP; count++) {
23539 #if defined(_SUNOS_VTOC_8)
23540 		struct dk_map *lp  = &un->un_map[count];
23541 		un->un_offset[count] =
23542 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
23543 #elif defined(_SUNOS_VTOC_16)
23544 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
23545 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
23546 #else
23547 #error "No VTOC format defined."
23548 #endif
23549 	}
23550 
23551 	ASSERT(mutex_owned(SD_MUTEX(un)));
23552 	return (label_rc);
23553 }
23554 #endif
23555 
23556 
23557 /*
23558  *    Function: sd_check_media
23559  *
23560  * Description: This utility routine implements the functionality for the
23561  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
23562  *		driver state changes from that specified by the user
23563  *		(inserted or ejected). For example, if the user specifies
23564  *		DKIO_EJECTED and the current media state is inserted this
23565  *		routine will immediately return DKIO_INSERTED. However, if the
23566  *		current media state is not inserted the user thread will be
23567  *		blocked until the drive state changes. If DKIO_NONE is specified
23568  *		the user thread will block until a drive state change occurs.
23569  *
23570  *   Arguments: dev  - the device number
23571  *		state  - user pointer to a dkio_state, updated with the current
23572  *			drive state at return.
23573  *
23574  * Return Code: ENXIO
23575  *		EIO
23576  *		EAGAIN
23577  *		EINTR
23578  */
23579 
23580 static int
23581 sd_check_media(dev_t dev, enum dkio_state state)
23582 {
23583 	struct sd_lun		*un = NULL;
23584 	enum dkio_state		prev_state;
23585 	opaque_t		token = NULL;
23586 	int			rval = 0;
23587 
23588 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23589 		return (ENXIO);
23590 	}
23591 
23592 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
23593 
23594 	mutex_enter(SD_MUTEX(un));
23595 
23596 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
23597 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
23598 
23599 	prev_state = un->un_mediastate;
23600 
23601 	/* is there anything to do? */
23602 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
23603 		/*
23604 		 * submit the request to the scsi_watch service;
23605 		 * scsi_media_watch_cb() does the real work
23606 		 */
23607 		mutex_exit(SD_MUTEX(un));
23608 
23609 		/*
23610 		 * This change handles the case where a scsi watch request is
23611 		 * added to a device that is powered down. To accomplish this
23612 		 * we power up the device before adding the scsi watch request,
23613 		 * since the scsi watch sends a TUR directly to the device
23614 		 * which the device cannot handle if it is powered down.
23615 		 */
23616 		if (sd_pm_entry(un) != DDI_SUCCESS) {
23617 			mutex_enter(SD_MUTEX(un));
23618 			goto done;
23619 		}
23620 
23621 		token = scsi_watch_request_submit(SD_SCSI_DEVP(un),
23622 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
23623 		    (caddr_t)dev);
23624 
23625 		sd_pm_exit(un);
23626 
23627 		mutex_enter(SD_MUTEX(un));
23628 		if (token == NULL) {
23629 			rval = EAGAIN;
23630 			goto done;
23631 		}
23632 
23633 		/*
23634 		 * This is a special case IOCTL that doesn't return
23635 		 * until the media state changes. Routine sdpower
23636 		 * knows about and handles this so don't count it
23637 		 * as an active cmd in the driver, which would
23638 		 * keep the device busy to the pm framework.
23639 		 * If the count isn't decremented the device can't
23640 		 * be powered down.
23641 		 */
23642 		un->un_ncmds_in_driver--;
23643 		ASSERT(un->un_ncmds_in_driver >= 0);
23644 
23645 		/*
23646 		 * if a prior request had been made, this will be the same
23647 		 * token, as scsi_watch was designed that way.
23648 		 */
23649 		un->un_swr_token = token;
23650 		un->un_specified_mediastate = state;
23651 
23652 		/*
23653 		 * now wait for media change
23654 		 * we will not be signalled unless mediastate == state but it is
23655 		 * still better to test for this condition, since there is a
23656 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
23657 		 */
23658 		SD_TRACE(SD_LOG_COMMON, un,
23659 		    "sd_check_media: waiting for media state change\n");
23660 		while (un->un_mediastate == state) {
23661 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
23662 				SD_TRACE(SD_LOG_COMMON, un,
23663 				    "sd_check_media: waiting for media state "
23664 				    "was interrupted\n");
23665 				un->un_ncmds_in_driver++;
23666 				rval = EINTR;
23667 				goto done;
23668 			}
23669 			SD_TRACE(SD_LOG_COMMON, un,
23670 			    "sd_check_media: received signal, state=%x\n",
23671 			    un->un_mediastate);
23672 		}
23673 		/*
23674 		 * Inc the counter to indicate the device once again
23675 		 * has an active outstanding cmd.
23676 		 */
23677 		un->un_ncmds_in_driver++;
23678 	}
23679 
23680 	/* invalidate geometry */
23681 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
23682 		sr_ejected(un);
23683 	}
23684 
23685 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
23686 		uint64_t	capacity;
23687 		uint_t		lbasize;
23688 
23689 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
23690 		mutex_exit(SD_MUTEX(un));
23691 		/*
23692 		 * Since the following routines use SD_PATH_DIRECT, we must
23693 		 * call PM directly before the upcoming disk accesses. This
23694 		 * may cause the disk to be power/spin up.
23695 		 */
23696 
23697 		if (sd_pm_entry(un) == DDI_SUCCESS) {
23698 			rval = sd_send_scsi_READ_CAPACITY(un,
23699 			    &capacity,
23700 			    &lbasize, SD_PATH_DIRECT);
23701 			if (rval != 0) {
23702 				sd_pm_exit(un);
23703 				mutex_enter(SD_MUTEX(un));
23704 				goto done;
23705 			}
23706 		} else {
23707 			rval = EIO;
23708 			mutex_enter(SD_MUTEX(un));
23709 			goto done;
23710 		}
23711 		mutex_enter(SD_MUTEX(un));
23712 
23713 		sd_update_block_info(un, lbasize, capacity);
23714 
23715 		un->un_f_geometry_is_valid	= FALSE;
23716 		(void) sd_validate_geometry(un, SD_PATH_DIRECT);
23717 
23718 		mutex_exit(SD_MUTEX(un));
23719 		rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
23720 		    SD_PATH_DIRECT);
23721 		sd_pm_exit(un);
23722 
23723 		mutex_enter(SD_MUTEX(un));
23724 	}
23725 done:
23726 	un->un_f_watcht_stopped = FALSE;
23727 	if (un->un_swr_token) {
23728 		/*
23729 		 * Use of this local token and the mutex ensures that we avoid
23730 		 * some race conditions associated with terminating the
23731 		 * scsi watch.
23732 		 */
23733 		token = un->un_swr_token;
23734 		un->un_swr_token = (opaque_t)NULL;
23735 		mutex_exit(SD_MUTEX(un));
23736 		(void) scsi_watch_request_terminate(token,
23737 		    SCSI_WATCH_TERMINATE_WAIT);
23738 		mutex_enter(SD_MUTEX(un));
23739 	}
23740 
23741 	/*
23742 	 * Update the capacity kstat value, if no media previously
23743 	 * (capacity kstat is 0) and a media has been inserted
23744 	 * (un_f_blockcount_is_valid == TRUE)
23745 	 * This is a more generic way then checking for ISREMOVABLE.
23746 	 */
23747 	if (un->un_errstats) {
23748 		struct sd_errstats	*stp = NULL;
23749 
23750 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
23751 		if ((stp->sd_capacity.value.ui64 == 0) &&
23752 		    (un->un_f_blockcount_is_valid == TRUE)) {
23753 			stp->sd_capacity.value.ui64 =
23754 			    (uint64_t)((uint64_t)un->un_blockcount *
23755 			    un->un_sys_blocksize);
23756 		}
23757 	}
23758 	mutex_exit(SD_MUTEX(un));
23759 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
23760 	return (rval);
23761 }
23762 
23763 
23764 /*
23765  *    Function: sd_delayed_cv_broadcast
23766  *
23767  * Description: Delayed cv_broadcast to allow for target to recover from media
23768  *		insertion.
23769  *
23770  *   Arguments: arg - driver soft state (unit) structure
23771  */
23772 
23773 static void
23774 sd_delayed_cv_broadcast(void *arg)
23775 {
23776 	struct sd_lun *un = arg;
23777 
23778 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
23779 
23780 	mutex_enter(SD_MUTEX(un));
23781 	un->un_dcvb_timeid = NULL;
23782 	cv_broadcast(&un->un_state_cv);
23783 	mutex_exit(SD_MUTEX(un));
23784 }
23785 
23786 
23787 /*
23788  *    Function: sd_media_watch_cb
23789  *
23790  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
23791  *		routine processes the TUR sense data and updates the driver
23792  *		state if a transition has occurred. The user thread
23793  *		(sd_check_media) is then signalled.
23794  *
23795  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
23796  *			among multiple watches that share this callback function
23797  *		resultp - scsi watch facility result packet containing scsi
23798  *			  packet, status byte and sense data
23799  *
23800  * Return Code: 0 for success, -1 for failure
23801  */
23802 
23803 static int
23804 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
23805 {
23806 	struct sd_lun			*un;
23807 	struct scsi_status		*statusp = resultp->statusp;
23808 	struct scsi_extended_sense	*sensep = resultp->sensep;
23809 	enum dkio_state			state = DKIO_NONE;
23810 	dev_t				dev = (dev_t)arg;
23811 	uchar_t				actual_sense_length;
23812 
23813 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23814 		return (-1);
23815 	}
23816 	actual_sense_length = resultp->actual_sense_length;
23817 
23818 	mutex_enter(SD_MUTEX(un));
23819 	SD_TRACE(SD_LOG_COMMON, un,
23820 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
23821 	    *((char *)statusp), (void *)sensep, actual_sense_length);
23822 
23823 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
23824 		un->un_mediastate = DKIO_DEV_GONE;
23825 		printf("sd_media_watch_cb: dev gone\n");
23826 		cv_broadcast(&un->un_state_cv);
23827 		mutex_exit(SD_MUTEX(un));
23828 
23829 		return (0);
23830 	}
23831 
23832 	/*
23833 	 * If there was a check condition then sensep points to valid sense data
23834 	 * If status was not a check condition but a reservation or busy status
23835 	 * then the new state is DKIO_NONE
23836 	 */
23837 	if (sensep != NULL) {
23838 		SD_INFO(SD_LOG_COMMON, un,
23839 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
23840 		    sensep->es_key, sensep->es_add_code, sensep->es_qual_code);
23841 		/* This routine only uses up to 13 bytes of sense data. */
23842 		if (actual_sense_length >= 13) {
23843 			if (sensep->es_key == KEY_UNIT_ATTENTION) {
23844 				if (sensep->es_add_code == 0x28) {
23845 					state = DKIO_INSERTED;
23846 				}
23847 			} else {
23848 				/*
23849 				 * if 02/04/02  means that the host
23850 				 * should send start command. Explicitly
23851 				 * leave the media state as is
23852 				 * (inserted) as the media is inserted
23853 				 * and host has stopped device for PM
23854 				 * reasons. Upon next true read/write
23855 				 * to this media will bring the
23856 				 * device to the right state good for
23857 				 * media access.
23858 				 */
23859 				if ((sensep->es_key == KEY_NOT_READY) &&
23860 				    (sensep->es_add_code == 0x3a)) {
23861 					state = DKIO_EJECTED;
23862 				}
23863 
23864 				/*
23865 				 * If the drivge is busy with an operation
23866 				 * or long write, keep the media in an
23867 				 * inserted state.
23868 				 */
23869 
23870 				if ((sensep->es_key == KEY_NOT_READY) &&
23871 				    (sensep->es_add_code == 0x04) &&
23872 				    ((sensep->es_qual_code == 0x02) ||
23873 				    (sensep->es_qual_code == 0x07) ||
23874 				    (sensep->es_qual_code == 0x08))) {
23875 					state = DKIO_INSERTED;
23876 				}
23877 			}
23878 		}
23879 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
23880 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
23881 		state = DKIO_INSERTED;
23882 	}
23883 
23884 	SD_TRACE(SD_LOG_COMMON, un,
23885 	    "sd_media_watch_cb: state=%x, specified=%x\n",
23886 	    state, un->un_specified_mediastate);
23887 
23888 	/*
23889 	 * now signal the waiting thread if this is *not* the specified state;
23890 	 * delay the signal if the state is DKIO_INSERTED to allow the target
23891 	 * to recover
23892 	 */
23893 	if (state != un->un_specified_mediastate) {
23894 		un->un_mediastate = state;
23895 		if (state == DKIO_INSERTED) {
23896 			/*
23897 			 * delay the signal to give the drive a chance
23898 			 * to do what it apparently needs to do
23899 			 */
23900 			SD_TRACE(SD_LOG_COMMON, un,
23901 			    "sd_media_watch_cb: delayed cv_broadcast\n");
23902 			if (un->un_dcvb_timeid == NULL) {
23903 				un->un_dcvb_timeid =
23904 				    timeout(sd_delayed_cv_broadcast, un,
23905 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
23906 			}
23907 		} else {
23908 			SD_TRACE(SD_LOG_COMMON, un,
23909 			    "sd_media_watch_cb: immediate cv_broadcast\n");
23910 			cv_broadcast(&un->un_state_cv);
23911 		}
23912 	}
23913 	mutex_exit(SD_MUTEX(un));
23914 	return (0);
23915 }
23916 
23917 
23918 /*
23919  *    Function: sd_dkio_get_temp
23920  *
23921  * Description: This routine is the driver entry point for handling ioctl
23922  *		requests to get the disk temperature.
23923  *
23924  *   Arguments: dev  - the device number
23925  *		arg  - pointer to user provided dk_temperature structure.
23926  *		flag - this argument is a pass through to ddi_copyxxx()
23927  *		       directly from the mode argument of ioctl().
23928  *
23929  * Return Code: 0
23930  *		EFAULT
23931  *		ENXIO
23932  *		EAGAIN
23933  */
23934 
23935 static int
23936 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
23937 {
23938 	struct sd_lun		*un = NULL;
23939 	struct dk_temperature	*dktemp = NULL;
23940 	uchar_t			*temperature_page;
23941 	int			rval = 0;
23942 	int			path_flag = SD_PATH_STANDARD;
23943 
23944 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23945 		return (ENXIO);
23946 	}
23947 
23948 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
23949 
23950 	/* copyin the disk temp argument to get the user flags */
23951 	if (ddi_copyin((void *)arg, dktemp,
23952 	    sizeof (struct dk_temperature), flag) != 0) {
23953 		rval = EFAULT;
23954 		goto done;
23955 	}
23956 
23957 	/* Initialize the temperature to invalid. */
23958 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
23959 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
23960 
23961 	/*
23962 	 * Note: Investigate removing the "bypass pm" semantic.
23963 	 * Can we just bypass PM always?
23964 	 */
23965 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
23966 		path_flag = SD_PATH_DIRECT;
23967 		ASSERT(!mutex_owned(&un->un_pm_mutex));
23968 		mutex_enter(&un->un_pm_mutex);
23969 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
23970 			/*
23971 			 * If DKT_BYPASS_PM is set, and the drive happens to be
23972 			 * in low power mode, we can not wake it up, Need to
23973 			 * return EAGAIN.
23974 			 */
23975 			mutex_exit(&un->un_pm_mutex);
23976 			rval = EAGAIN;
23977 			goto done;
23978 		} else {
23979 			/*
23980 			 * Indicate to PM the device is busy. This is required
23981 			 * to avoid a race - i.e. the ioctl is issuing a
23982 			 * command and the pm framework brings down the device
23983 			 * to low power mode (possible power cut-off on some
23984 			 * platforms).
23985 			 */
23986 			mutex_exit(&un->un_pm_mutex);
23987 			if (sd_pm_entry(un) != DDI_SUCCESS) {
23988 				rval = EAGAIN;
23989 				goto done;
23990 			}
23991 		}
23992 	}
23993 
23994 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
23995 
23996 	if ((rval = sd_send_scsi_LOG_SENSE(un, temperature_page,
23997 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag)) != 0) {
23998 		goto done2;
23999 	}
24000 
24001 	/*
24002 	 * For the current temperature verify that the parameter length is 0x02
24003 	 * and the parameter code is 0x00
24004 	 */
24005 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
24006 	    (temperature_page[5] == 0x00)) {
24007 		if (temperature_page[9] == 0xFF) {
24008 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24009 		} else {
24010 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
24011 		}
24012 	}
24013 
24014 	/*
24015 	 * For the reference temperature verify that the parameter
24016 	 * length is 0x02 and the parameter code is 0x01
24017 	 */
24018 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
24019 	    (temperature_page[11] == 0x01)) {
24020 		if (temperature_page[15] == 0xFF) {
24021 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24022 		} else {
24023 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
24024 		}
24025 	}
24026 
24027 	/* Do the copyout regardless of the temperature commands status. */
24028 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
24029 	    flag) != 0) {
24030 		rval = EFAULT;
24031 	}
24032 
24033 done2:
24034 	if (path_flag == SD_PATH_DIRECT) {
24035 		sd_pm_exit(un);
24036 	}
24037 
24038 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
24039 done:
24040 	if (dktemp != NULL) {
24041 		kmem_free(dktemp, sizeof (struct dk_temperature));
24042 	}
24043 
24044 	return (rval);
24045 }
24046 
24047 
24048 /*
24049  *    Function: sd_log_page_supported
24050  *
24051  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
24052  *		supported log pages.
24053  *
24054  *   Arguments: un -
24055  *		log_page -
24056  *
24057  * Return Code: -1 - on error (log sense is optional and may not be supported).
24058  *		0  - log page not found.
24059  *  		1  - log page found.
24060  */
24061 
24062 static int
24063 sd_log_page_supported(struct sd_lun *un, int log_page)
24064 {
24065 	uchar_t *log_page_data;
24066 	int	i;
24067 	int	match = 0;
24068 	int	log_size;
24069 
24070 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
24071 
24072 	if (sd_send_scsi_LOG_SENSE(un, log_page_data, 0xFF, 0, 0x01, 0,
24073 	    SD_PATH_DIRECT) != 0) {
24074 		SD_ERROR(SD_LOG_COMMON, un,
24075 		    "sd_log_page_supported: failed log page retrieval\n");
24076 		kmem_free(log_page_data, 0xFF);
24077 		return (-1);
24078 	}
24079 	log_size = log_page_data[3];
24080 
24081 	/*
24082 	 * The list of supported log pages start from the fourth byte. Check
24083 	 * until we run out of log pages or a match is found.
24084 	 */
24085 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
24086 		if (log_page_data[i] == log_page) {
24087 			match++;
24088 		}
24089 	}
24090 	kmem_free(log_page_data, 0xFF);
24091 	return (match);
24092 }
24093 
24094 
24095 /*
24096  *    Function: sd_mhdioc_failfast
24097  *
24098  * Description: This routine is the driver entry point for handling ioctl
24099  *		requests to enable/disable the multihost failfast option.
24100  *		(MHIOCENFAILFAST)
24101  *
24102  *   Arguments: dev	- the device number
24103  *		arg	- user specified probing interval.
24104  *		flag	- this argument is a pass through to ddi_copyxxx()
24105  *			  directly from the mode argument of ioctl().
24106  *
24107  * Return Code: 0
24108  *		EFAULT
24109  *		ENXIO
24110  */
24111 
24112 static int
24113 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
24114 {
24115 	struct sd_lun	*un = NULL;
24116 	int		mh_time;
24117 	int		rval = 0;
24118 
24119 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24120 		return (ENXIO);
24121 	}
24122 
24123 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
24124 		return (EFAULT);
24125 
24126 	if (mh_time) {
24127 		mutex_enter(SD_MUTEX(un));
24128 		un->un_resvd_status |= SD_FAILFAST;
24129 		mutex_exit(SD_MUTEX(un));
24130 		/*
24131 		 * If mh_time is INT_MAX, then this ioctl is being used for
24132 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
24133 		 */
24134 		if (mh_time != INT_MAX) {
24135 			rval = sd_check_mhd(dev, mh_time);
24136 		}
24137 	} else {
24138 		(void) sd_check_mhd(dev, 0);
24139 		mutex_enter(SD_MUTEX(un));
24140 		un->un_resvd_status &= ~SD_FAILFAST;
24141 		mutex_exit(SD_MUTEX(un));
24142 	}
24143 	return (rval);
24144 }
24145 
24146 
24147 /*
24148  *    Function: sd_mhdioc_takeown
24149  *
24150  * Description: This routine is the driver entry point for handling ioctl
24151  *		requests to forcefully acquire exclusive access rights to the
24152  *		multihost disk (MHIOCTKOWN).
24153  *
24154  *   Arguments: dev	- the device number
24155  *		arg	- user provided structure specifying the delay
24156  *			  parameters in milliseconds
24157  *		flag	- this argument is a pass through to ddi_copyxxx()
24158  *			  directly from the mode argument of ioctl().
24159  *
24160  * Return Code: 0
24161  *		EFAULT
24162  *		ENXIO
24163  */
24164 
24165 static int
24166 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
24167 {
24168 	struct sd_lun		*un = NULL;
24169 	struct mhioctkown	*tkown = NULL;
24170 	int			rval = 0;
24171 
24172 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24173 		return (ENXIO);
24174 	}
24175 
24176 	if (arg != NULL) {
24177 		tkown = (struct mhioctkown *)
24178 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
24179 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
24180 		if (rval != 0) {
24181 			rval = EFAULT;
24182 			goto error;
24183 		}
24184 	}
24185 
24186 	rval = sd_take_ownership(dev, tkown);
24187 	mutex_enter(SD_MUTEX(un));
24188 	if (rval == 0) {
24189 		un->un_resvd_status |= SD_RESERVE;
24190 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
24191 			sd_reinstate_resv_delay =
24192 			    tkown->reinstate_resv_delay * 1000;
24193 		} else {
24194 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
24195 		}
24196 		/*
24197 		 * Give the scsi_watch routine interval set by
24198 		 * the MHIOCENFAILFAST ioctl precedence here.
24199 		 */
24200 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
24201 			mutex_exit(SD_MUTEX(un));
24202 			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
24203 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
24204 			    "sd_mhdioc_takeown : %d\n",
24205 			    sd_reinstate_resv_delay);
24206 		} else {
24207 			mutex_exit(SD_MUTEX(un));
24208 		}
24209 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
24210 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24211 	} else {
24212 		un->un_resvd_status &= ~SD_RESERVE;
24213 		mutex_exit(SD_MUTEX(un));
24214 	}
24215 
24216 error:
24217 	if (tkown != NULL) {
24218 		kmem_free(tkown, sizeof (struct mhioctkown));
24219 	}
24220 	return (rval);
24221 }
24222 
24223 
24224 /*
24225  *    Function: sd_mhdioc_release
24226  *
24227  * Description: This routine is the driver entry point for handling ioctl
24228  *		requests to release exclusive access rights to the multihost
24229  *		disk (MHIOCRELEASE).
24230  *
24231  *   Arguments: dev	- the device number
24232  *
24233  * Return Code: 0
24234  *		ENXIO
24235  */
24236 
24237 static int
24238 sd_mhdioc_release(dev_t dev)
24239 {
24240 	struct sd_lun		*un = NULL;
24241 	timeout_id_t		resvd_timeid_save;
24242 	int			resvd_status_save;
24243 	int			rval = 0;
24244 
24245 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24246 		return (ENXIO);
24247 	}
24248 
24249 	mutex_enter(SD_MUTEX(un));
24250 	resvd_status_save = un->un_resvd_status;
24251 	un->un_resvd_status &=
24252 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
24253 	if (un->un_resvd_timeid) {
24254 		resvd_timeid_save = un->un_resvd_timeid;
24255 		un->un_resvd_timeid = NULL;
24256 		mutex_exit(SD_MUTEX(un));
24257 		(void) untimeout(resvd_timeid_save);
24258 	} else {
24259 		mutex_exit(SD_MUTEX(un));
24260 	}
24261 
24262 	/*
24263 	 * destroy any pending timeout thread that may be attempting to
24264 	 * reinstate reservation on this device.
24265 	 */
24266 	sd_rmv_resv_reclaim_req(dev);
24267 
24268 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
24269 		mutex_enter(SD_MUTEX(un));
24270 		if ((un->un_mhd_token) &&
24271 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
24272 			mutex_exit(SD_MUTEX(un));
24273 			(void) sd_check_mhd(dev, 0);
24274 		} else {
24275 			mutex_exit(SD_MUTEX(un));
24276 		}
24277 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
24278 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24279 	} else {
24280 		/*
24281 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
24282 		 */
24283 		mutex_enter(SD_MUTEX(un));
24284 		un->un_resvd_status = resvd_status_save;
24285 		mutex_exit(SD_MUTEX(un));
24286 	}
24287 	return (rval);
24288 }
24289 
24290 
24291 /*
24292  *    Function: sd_mhdioc_register_devid
24293  *
24294  * Description: This routine is the driver entry point for handling ioctl
24295  *		requests to register the device id (MHIOCREREGISTERDEVID).
24296  *
24297  *		Note: The implementation for this ioctl has been updated to
24298  *		be consistent with the original PSARC case (1999/357)
24299  *		(4375899, 4241671, 4220005)
24300  *
24301  *   Arguments: dev	- the device number
24302  *
24303  * Return Code: 0
24304  *		ENXIO
24305  */
24306 
24307 static int
24308 sd_mhdioc_register_devid(dev_t dev)
24309 {
24310 	struct sd_lun	*un = NULL;
24311 	int		rval = 0;
24312 
24313 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24314 		return (ENXIO);
24315 	}
24316 
24317 	ASSERT(!mutex_owned(SD_MUTEX(un)));
24318 
24319 	mutex_enter(SD_MUTEX(un));
24320 
24321 	/* If a devid already exists, de-register it */
24322 	if (un->un_devid != NULL) {
24323 		ddi_devid_unregister(SD_DEVINFO(un));
24324 		/*
24325 		 * After unregister devid, needs to free devid memory
24326 		 */
24327 		ddi_devid_free(un->un_devid);
24328 		un->un_devid = NULL;
24329 	}
24330 
24331 	/* Check for reservation conflict */
24332 	mutex_exit(SD_MUTEX(un));
24333 	rval = sd_send_scsi_TEST_UNIT_READY(un, 0);
24334 	mutex_enter(SD_MUTEX(un));
24335 
24336 	switch (rval) {
24337 	case 0:
24338 		sd_register_devid(un, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
24339 		break;
24340 	case EACCES:
24341 		break;
24342 	default:
24343 		rval = EIO;
24344 	}
24345 
24346 	mutex_exit(SD_MUTEX(un));
24347 	return (rval);
24348 }
24349 
24350 
24351 /*
24352  *    Function: sd_mhdioc_inkeys
24353  *
24354  * Description: This routine is the driver entry point for handling ioctl
24355  *		requests to issue the SCSI-3 Persistent In Read Keys command
24356  *		to the device (MHIOCGRP_INKEYS).
24357  *
24358  *   Arguments: dev	- the device number
24359  *		arg	- user provided in_keys structure
24360  *		flag	- this argument is a pass through to ddi_copyxxx()
24361  *			  directly from the mode argument of ioctl().
24362  *
24363  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
24364  *		ENXIO
24365  *		EFAULT
24366  */
24367 
24368 static int
24369 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
24370 {
24371 	struct sd_lun		*un;
24372 	mhioc_inkeys_t		inkeys;
24373 	int			rval = 0;
24374 
24375 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24376 		return (ENXIO);
24377 	}
24378 
24379 #ifdef _MULTI_DATAMODEL
24380 	switch (ddi_model_convert_from(flag & FMODELS)) {
24381 	case DDI_MODEL_ILP32: {
24382 		struct mhioc_inkeys32	inkeys32;
24383 
24384 		if (ddi_copyin(arg, &inkeys32,
24385 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
24386 			return (EFAULT);
24387 		}
24388 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
24389 		if ((rval = sd_persistent_reservation_in_read_keys(un,
24390 		    &inkeys, flag)) != 0) {
24391 			return (rval);
24392 		}
24393 		inkeys32.generation = inkeys.generation;
24394 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
24395 		    flag) != 0) {
24396 			return (EFAULT);
24397 		}
24398 		break;
24399 	}
24400 	case DDI_MODEL_NONE:
24401 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
24402 		    flag) != 0) {
24403 			return (EFAULT);
24404 		}
24405 		if ((rval = sd_persistent_reservation_in_read_keys(un,
24406 		    &inkeys, flag)) != 0) {
24407 			return (rval);
24408 		}
24409 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
24410 		    flag) != 0) {
24411 			return (EFAULT);
24412 		}
24413 		break;
24414 	}
24415 
24416 #else /* ! _MULTI_DATAMODEL */
24417 
24418 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
24419 		return (EFAULT);
24420 	}
24421 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
24422 	if (rval != 0) {
24423 		return (rval);
24424 	}
24425 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
24426 		return (EFAULT);
24427 	}
24428 
24429 #endif /* _MULTI_DATAMODEL */
24430 
24431 	return (rval);
24432 }
24433 
24434 
24435 /*
24436  *    Function: sd_mhdioc_inresv
24437  *
24438  * Description: This routine is the driver entry point for handling ioctl
24439  *		requests to issue the SCSI-3 Persistent In Read Reservations
24440  *		command to the device (MHIOCGRP_INKEYS).
24441  *
24442  *   Arguments: dev	- the device number
24443  *		arg	- user provided in_resv structure
24444  *		flag	- this argument is a pass through to ddi_copyxxx()
24445  *			  directly from the mode argument of ioctl().
24446  *
24447  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
24448  *		ENXIO
24449  *		EFAULT
24450  */
24451 
24452 static int
24453 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
24454 {
24455 	struct sd_lun		*un;
24456 	mhioc_inresvs_t		inresvs;
24457 	int			rval = 0;
24458 
24459 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24460 		return (ENXIO);
24461 	}
24462 
24463 #ifdef _MULTI_DATAMODEL
24464 
24465 	switch (ddi_model_convert_from(flag & FMODELS)) {
24466 	case DDI_MODEL_ILP32: {
24467 		struct mhioc_inresvs32	inresvs32;
24468 
24469 		if (ddi_copyin(arg, &inresvs32,
24470 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
24471 			return (EFAULT);
24472 		}
24473 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
24474 		if ((rval = sd_persistent_reservation_in_read_resv(un,
24475 		    &inresvs, flag)) != 0) {
24476 			return (rval);
24477 		}
24478 		inresvs32.generation = inresvs.generation;
24479 		if (ddi_copyout(&inresvs32, arg,
24480 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
24481 			return (EFAULT);
24482 		}
24483 		break;
24484 	}
24485 	case DDI_MODEL_NONE:
24486 		if (ddi_copyin(arg, &inresvs,
24487 		    sizeof (mhioc_inresvs_t), flag) != 0) {
24488 			return (EFAULT);
24489 		}
24490 		if ((rval = sd_persistent_reservation_in_read_resv(un,
24491 		    &inresvs, flag)) != 0) {
24492 			return (rval);
24493 		}
24494 		if (ddi_copyout(&inresvs, arg,
24495 		    sizeof (mhioc_inresvs_t), flag) != 0) {
24496 			return (EFAULT);
24497 		}
24498 		break;
24499 	}
24500 
24501 #else /* ! _MULTI_DATAMODEL */
24502 
24503 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
24504 		return (EFAULT);
24505 	}
24506 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
24507 	if (rval != 0) {
24508 		return (rval);
24509 	}
24510 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
24511 		return (EFAULT);
24512 	}
24513 
24514 #endif /* ! _MULTI_DATAMODEL */
24515 
24516 	return (rval);
24517 }
24518 
24519 
24520 /*
24521  * The following routines support the clustering functionality described below
24522  * and implement lost reservation reclaim functionality.
24523  *
24524  * Clustering
24525  * ----------
24526  * The clustering code uses two different, independent forms of SCSI
24527  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
24528  * Persistent Group Reservations. For any particular disk, it will use either
24529  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
24530  *
24531  * SCSI-2
24532  * The cluster software takes ownership of a multi-hosted disk by issuing the
24533  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
24534  * MHIOCRELEASE ioctl.Closely related is the MHIOCENFAILFAST ioctl -- a cluster,
24535  * just after taking ownership of the disk with the MHIOCTKOWN ioctl then issues
24536  * the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the driver. The
24537  * meaning of failfast is that if the driver (on this host) ever encounters the
24538  * scsi error return code RESERVATION_CONFLICT from the device, it should
24539  * immediately panic the host. The motivation for this ioctl is that if this
24540  * host does encounter reservation conflict, the underlying cause is that some
24541  * other host of the cluster has decided that this host is no longer in the
24542  * cluster and has seized control of the disks for itself. Since this host is no
24543  * longer in the cluster, it ought to panic itself. The MHIOCENFAILFAST ioctl
24544  * does two things:
24545  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
24546  *      error to panic the host
24547  *      (b) it sets up a periodic timer to test whether this host still has
24548  *      "access" (in that no other host has reserved the device):  if the
24549  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
24550  *      purpose of that periodic timer is to handle scenarios where the host is
24551  *      otherwise temporarily quiescent, temporarily doing no real i/o.
24552  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
24553  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
24554  * the device itself.
24555  *
24556  * SCSI-3 PGR
24557  * A direct semantic implementation of the SCSI-3 Persistent Reservation
24558  * facility is supported through the shared multihost disk ioctls
24559  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
24560  * MHIOCGRP_PREEMPTANDABORT)
24561  *
24562  * Reservation Reclaim:
24563  * --------------------
24564  * To support the lost reservation reclaim operations this driver creates a
24565  * single thread to handle reinstating reservations on all devices that have
24566  * lost reservations sd_resv_reclaim_requests are logged for all devices that
24567  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
24568  * and the reservation reclaim thread loops through the requests to regain the
24569  * lost reservations.
24570  */
24571 
24572 /*
24573  *    Function: sd_check_mhd()
24574  *
24575  * Description: This function sets up and submits a scsi watch request or
24576  *		terminates an existing watch request. This routine is used in
24577  *		support of reservation reclaim.
24578  *
24579  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
24580  *			 among multiple watches that share the callback function
24581  *		interval - the number of microseconds specifying the watch
24582  *			   interval for issuing TEST UNIT READY commands. If
24583  *			   set to 0 the watch should be terminated. If the
24584  *			   interval is set to 0 and if the device is required
24585  *			   to hold reservation while disabling failfast, the
24586  *			   watch is restarted with an interval of
24587  *			   reinstate_resv_delay.
24588  *
24589  * Return Code: 0	   - Successful submit/terminate of scsi watch request
24590  *		ENXIO      - Indicates an invalid device was specified
24591  *		EAGAIN     - Unable to submit the scsi watch request
24592  */
24593 
24594 static int
24595 sd_check_mhd(dev_t dev, int interval)
24596 {
24597 	struct sd_lun	*un;
24598 	opaque_t	token;
24599 
24600 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24601 		return (ENXIO);
24602 	}
24603 
24604 	/* is this a watch termination request? */
24605 	if (interval == 0) {
24606 		mutex_enter(SD_MUTEX(un));
24607 		/* if there is an existing watch task then terminate it */
24608 		if (un->un_mhd_token) {
24609 			token = un->un_mhd_token;
24610 			un->un_mhd_token = NULL;
24611 			mutex_exit(SD_MUTEX(un));
24612 			(void) scsi_watch_request_terminate(token,
24613 			    SCSI_WATCH_TERMINATE_WAIT);
24614 			mutex_enter(SD_MUTEX(un));
24615 		} else {
24616 			mutex_exit(SD_MUTEX(un));
24617 			/*
24618 			 * Note: If we return here we don't check for the
24619 			 * failfast case. This is the original legacy
24620 			 * implementation but perhaps we should be checking
24621 			 * the failfast case.
24622 			 */
24623 			return (0);
24624 		}
24625 		/*
24626 		 * If the device is required to hold reservation while
24627 		 * disabling failfast, we need to restart the scsi_watch
24628 		 * routine with an interval of reinstate_resv_delay.
24629 		 */
24630 		if (un->un_resvd_status & SD_RESERVE) {
24631 			interval = sd_reinstate_resv_delay/1000;
24632 		} else {
24633 			/* no failfast so bail */
24634 			mutex_exit(SD_MUTEX(un));
24635 			return (0);
24636 		}
24637 		mutex_exit(SD_MUTEX(un));
24638 	}
24639 
24640 	/*
24641 	 * adjust minimum time interval to 1 second,
24642 	 * and convert from msecs to usecs
24643 	 */
24644 	if (interval > 0 && interval < 1000) {
24645 		interval = 1000;
24646 	}
24647 	interval *= 1000;
24648 
24649 	/*
24650 	 * submit the request to the scsi_watch service
24651 	 */
24652 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
24653 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
24654 	if (token == NULL) {
24655 		return (EAGAIN);
24656 	}
24657 
24658 	/*
24659 	 * save token for termination later on
24660 	 */
24661 	mutex_enter(SD_MUTEX(un));
24662 	un->un_mhd_token = token;
24663 	mutex_exit(SD_MUTEX(un));
24664 	return (0);
24665 }
24666 
24667 
24668 /*
24669  *    Function: sd_mhd_watch_cb()
24670  *
24671  * Description: This function is the call back function used by the scsi watch
24672  *		facility. The scsi watch facility sends the "Test Unit Ready"
24673  *		and processes the status. If applicable (i.e. a "Unit Attention"
24674  *		status and automatic "Request Sense" not used) the scsi watch
24675  *		facility will send a "Request Sense" and retrieve the sense data
24676  *		to be passed to this callback function. In either case the
24677  *		automatic "Request Sense" or the facility submitting one, this
24678  *		callback is passed the status and sense data.
24679  *
24680  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24681  *			among multiple watches that share this callback function
24682  *		resultp - scsi watch facility result packet containing scsi
24683  *			  packet, status byte and sense data
24684  *
24685  * Return Code: 0 - continue the watch task
24686  *		non-zero - terminate the watch task
24687  */
24688 
24689 static int
24690 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
24691 {
24692 	struct sd_lun			*un;
24693 	struct scsi_status		*statusp;
24694 	struct scsi_extended_sense	*sensep;
24695 	struct scsi_pkt			*pkt;
24696 	uchar_t				actual_sense_length;
24697 	dev_t  				dev = (dev_t)arg;
24698 
24699 	ASSERT(resultp != NULL);
24700 	statusp			= resultp->statusp;
24701 	sensep			= resultp->sensep;
24702 	pkt			= resultp->pkt;
24703 	actual_sense_length	= resultp->actual_sense_length;
24704 
24705 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24706 		return (ENXIO);
24707 	}
24708 
24709 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
24710 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
24711 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
24712 
24713 	/* Begin processing of the status and/or sense data */
24714 	if (pkt->pkt_reason != CMD_CMPLT) {
24715 		/* Handle the incomplete packet */
24716 		sd_mhd_watch_incomplete(un, pkt);
24717 		return (0);
24718 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
24719 		if (*((unsigned char *)statusp)
24720 		    == STATUS_RESERVATION_CONFLICT) {
24721 			/*
24722 			 * Handle a reservation conflict by panicking if
24723 			 * configured for failfast or by logging the conflict
24724 			 * and updating the reservation status
24725 			 */
24726 			mutex_enter(SD_MUTEX(un));
24727 			if ((un->un_resvd_status & SD_FAILFAST) &&
24728 			    (sd_failfast_enable)) {
24729 				panic("Reservation Conflict");
24730 				/*NOTREACHED*/
24731 			}
24732 			SD_INFO(SD_LOG_IOCTL_MHD, un,
24733 			    "sd_mhd_watch_cb: Reservation Conflict\n");
24734 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
24735 			mutex_exit(SD_MUTEX(un));
24736 		}
24737 	}
24738 
24739 	if (sensep != NULL) {
24740 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
24741 			mutex_enter(SD_MUTEX(un));
24742 			if ((sensep->es_add_code == SD_SCSI_RESET_SENSE_CODE) &&
24743 			    (un->un_resvd_status & SD_RESERVE)) {
24744 				/*
24745 				 * The additional sense code indicates a power
24746 				 * on or bus device reset has occurred; update
24747 				 * the reservation status.
24748 				 */
24749 				un->un_resvd_status |=
24750 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
24751 				SD_INFO(SD_LOG_IOCTL_MHD, un,
24752 				    "sd_mhd_watch_cb: Lost Reservation\n");
24753 			}
24754 		} else {
24755 			return (0);
24756 		}
24757 	} else {
24758 		mutex_enter(SD_MUTEX(un));
24759 	}
24760 
24761 	if ((un->un_resvd_status & SD_RESERVE) &&
24762 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
24763 		if (un->un_resvd_status & SD_WANT_RESERVE) {
24764 			/*
24765 			 * A reset occurred in between the last probe and this
24766 			 * one so if a timeout is pending cancel it.
24767 			 */
24768 			if (un->un_resvd_timeid) {
24769 				timeout_id_t temp_id = un->un_resvd_timeid;
24770 				un->un_resvd_timeid = NULL;
24771 				mutex_exit(SD_MUTEX(un));
24772 				(void) untimeout(temp_id);
24773 				mutex_enter(SD_MUTEX(un));
24774 			}
24775 			un->un_resvd_status &= ~SD_WANT_RESERVE;
24776 		}
24777 		if (un->un_resvd_timeid == 0) {
24778 			/* Schedule a timeout to handle the lost reservation */
24779 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
24780 			    (void *)dev,
24781 			    drv_usectohz(sd_reinstate_resv_delay));
24782 		}
24783 	}
24784 	mutex_exit(SD_MUTEX(un));
24785 	return (0);
24786 }
24787 
24788 
24789 /*
24790  *    Function: sd_mhd_watch_incomplete()
24791  *
24792  * Description: This function is used to find out why a scsi pkt sent by the
24793  *		scsi watch facility was not completed. Under some scenarios this
24794  *		routine will return. Otherwise it will send a bus reset to see
24795  *		if the drive is still online.
24796  *
24797  *   Arguments: un  - driver soft state (unit) structure
24798  *		pkt - incomplete scsi pkt
24799  */
24800 
24801 static void
24802 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
24803 {
24804 	int	be_chatty;
24805 	int	perr;
24806 
24807 	ASSERT(pkt != NULL);
24808 	ASSERT(un != NULL);
24809 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
24810 	perr		= (pkt->pkt_statistics & STAT_PERR);
24811 
24812 	mutex_enter(SD_MUTEX(un));
24813 	if (un->un_state == SD_STATE_DUMPING) {
24814 		mutex_exit(SD_MUTEX(un));
24815 		return;
24816 	}
24817 
24818 	switch (pkt->pkt_reason) {
24819 	case CMD_UNX_BUS_FREE:
24820 		/*
24821 		 * If we had a parity error that caused the target to drop BSY*,
24822 		 * don't be chatty about it.
24823 		 */
24824 		if (perr && be_chatty) {
24825 			be_chatty = 0;
24826 		}
24827 		break;
24828 	case CMD_TAG_REJECT:
24829 		/*
24830 		 * The SCSI-2 spec states that a tag reject will be sent by the
24831 		 * target if tagged queuing is not supported. A tag reject may
24832 		 * also be sent during certain initialization periods or to
24833 		 * control internal resources. For the latter case the target
24834 		 * may also return Queue Full.
24835 		 *
24836 		 * If this driver receives a tag reject from a target that is
24837 		 * going through an init period or controlling internal
24838 		 * resources tagged queuing will be disabled. This is a less
24839 		 * than optimal behavior but the driver is unable to determine
24840 		 * the target state and assumes tagged queueing is not supported
24841 		 */
24842 		pkt->pkt_flags = 0;
24843 		un->un_tagflags = 0;
24844 
24845 		if (un->un_f_opt_queueing == TRUE) {
24846 			un->un_throttle = min(un->un_throttle, 3);
24847 		} else {
24848 			un->un_throttle = 1;
24849 		}
24850 		mutex_exit(SD_MUTEX(un));
24851 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
24852 		mutex_enter(SD_MUTEX(un));
24853 		break;
24854 	case CMD_INCOMPLETE:
24855 		/*
24856 		 * The transport stopped with an abnormal state, fallthrough and
24857 		 * reset the target and/or bus unless selection did not complete
24858 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
24859 		 * go through a target/bus reset
24860 		 */
24861 		if (pkt->pkt_state == STATE_GOT_BUS) {
24862 			break;
24863 		}
24864 		/*FALLTHROUGH*/
24865 
24866 	case CMD_TIMEOUT:
24867 	default:
24868 		/*
24869 		 * The lun may still be running the command, so a lun reset
24870 		 * should be attempted. If the lun reset fails or cannot be
24871 		 * issued, than try a target reset. Lastly try a bus reset.
24872 		 */
24873 		if ((pkt->pkt_statistics &
24874 		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
24875 			int reset_retval = 0;
24876 			mutex_exit(SD_MUTEX(un));
24877 			if (un->un_f_allow_bus_device_reset == TRUE) {
24878 				if (un->un_f_lun_reset_enabled == TRUE) {
24879 					reset_retval =
24880 					    scsi_reset(SD_ADDRESS(un),
24881 					    RESET_LUN);
24882 				}
24883 				if (reset_retval == 0) {
24884 					reset_retval =
24885 					    scsi_reset(SD_ADDRESS(un),
24886 					    RESET_TARGET);
24887 				}
24888 			}
24889 			if (reset_retval == 0) {
24890 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
24891 			}
24892 			mutex_enter(SD_MUTEX(un));
24893 		}
24894 		break;
24895 	}
24896 
24897 	/* A device/bus reset has occurred; update the reservation status. */
24898 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
24899 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
24900 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
24901 			un->un_resvd_status |=
24902 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
24903 			SD_INFO(SD_LOG_IOCTL_MHD, un,
24904 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
24905 		}
24906 	}
24907 
24908 	/*
24909 	 * The disk has been turned off; Update the device state.
24910 	 *
24911 	 * Note: Should we be offlining the disk here?
24912 	 */
24913 	if (pkt->pkt_state == STATE_GOT_BUS) {
24914 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
24915 		    "Disk not responding to selection\n");
24916 		if (un->un_state != SD_STATE_OFFLINE) {
24917 			New_state(un, SD_STATE_OFFLINE);
24918 		}
24919 	} else if (be_chatty) {
24920 		/*
24921 		 * suppress messages if they are all the same pkt reason;
24922 		 * with TQ, many (up to 256) are returned with the same
24923 		 * pkt_reason
24924 		 */
24925 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
24926 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
24927 			    "sd_mhd_watch_incomplete: "
24928 			    "SCSI transport failed: reason '%s'\n",
24929 			    scsi_rname(pkt->pkt_reason));
24930 		}
24931 	}
24932 	un->un_last_pkt_reason = pkt->pkt_reason;
24933 	mutex_exit(SD_MUTEX(un));
24934 }
24935 
24936 
24937 /*
24938  *    Function: sd_sname()
24939  *
24940  * Description: This is a simple little routine to return a string containing
24941  *		a printable description of command status byte for use in
24942  *		logging.
24943  *
24944  *   Arguments: status - pointer to a status byte
24945  *
24946  * Return Code: char * - string containing status description.
24947  */
24948 
24949 static char *
24950 sd_sname(uchar_t status)
24951 {
24952 	switch (status & STATUS_MASK) {
24953 	case STATUS_GOOD:
24954 		return ("good status");
24955 	case STATUS_CHECK:
24956 		return ("check condition");
24957 	case STATUS_MET:
24958 		return ("condition met");
24959 	case STATUS_BUSY:
24960 		return ("busy");
24961 	case STATUS_INTERMEDIATE:
24962 		return ("intermediate");
24963 	case STATUS_INTERMEDIATE_MET:
24964 		return ("intermediate - condition met");
24965 	case STATUS_RESERVATION_CONFLICT:
24966 		return ("reservation_conflict");
24967 	case STATUS_TERMINATED:
24968 		return ("command terminated");
24969 	case STATUS_QFULL:
24970 		return ("queue full");
24971 	default:
24972 		return ("<unknown status>");
24973 	}
24974 }
24975 
24976 
24977 /*
24978  *    Function: sd_mhd_resvd_recover()
24979  *
24980  * Description: This function adds a reservation entry to the
24981  *		sd_resv_reclaim_request list and signals the reservation
24982  *		reclaim thread that there is work pending. If the reservation
24983  *		reclaim thread has not been previously created this function
24984  *		will kick it off.
24985  *
24986  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24987  *			among multiple watches that share this callback function
24988  *
24989  *     Context: This routine is called by timeout() and is run in interrupt
24990  *		context. It must not sleep or call other functions which may
24991  *		sleep.
24992  */
24993 
24994 static void
24995 sd_mhd_resvd_recover(void *arg)
24996 {
24997 	dev_t			dev = (dev_t)arg;
24998 	struct sd_lun		*un;
24999 	struct sd_thr_request	*sd_treq = NULL;
25000 	struct sd_thr_request	*sd_cur = NULL;
25001 	struct sd_thr_request	*sd_prev = NULL;
25002 	int			already_there = 0;
25003 
25004 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25005 		return;
25006 	}
25007 
25008 	mutex_enter(SD_MUTEX(un));
25009 	un->un_resvd_timeid = NULL;
25010 	if (un->un_resvd_status & SD_WANT_RESERVE) {
25011 		/*
25012 		 * There was a reset so don't issue the reserve, allow the
25013 		 * sd_mhd_watch_cb callback function to notice this and
25014 		 * reschedule the timeout for reservation.
25015 		 */
25016 		mutex_exit(SD_MUTEX(un));
25017 		return;
25018 	}
25019 	mutex_exit(SD_MUTEX(un));
25020 
25021 	/*
25022 	 * Add this device to the sd_resv_reclaim_request list and the
25023 	 * sd_resv_reclaim_thread should take care of the rest.
25024 	 *
25025 	 * Note: We can't sleep in this context so if the memory allocation
25026 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
25027 	 * reschedule the timeout for reservation.  (4378460)
25028 	 */
25029 	sd_treq = (struct sd_thr_request *)
25030 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
25031 	if (sd_treq == NULL) {
25032 		return;
25033 	}
25034 
25035 	sd_treq->sd_thr_req_next = NULL;
25036 	sd_treq->dev = dev;
25037 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25038 	if (sd_tr.srq_thr_req_head == NULL) {
25039 		sd_tr.srq_thr_req_head = sd_treq;
25040 	} else {
25041 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
25042 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
25043 			if (sd_cur->dev == dev) {
25044 				/*
25045 				 * already in Queue so don't log
25046 				 * another request for the device
25047 				 */
25048 				already_there = 1;
25049 				break;
25050 			}
25051 			sd_prev = sd_cur;
25052 		}
25053 		if (!already_there) {
25054 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
25055 			    "logging request for %lx\n", dev);
25056 			sd_prev->sd_thr_req_next = sd_treq;
25057 		} else {
25058 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
25059 		}
25060 	}
25061 
25062 	/*
25063 	 * Create a kernel thread to do the reservation reclaim and free up this
25064 	 * thread. We cannot block this thread while we go away to do the
25065 	 * reservation reclaim
25066 	 */
25067 	if (sd_tr.srq_resv_reclaim_thread == NULL)
25068 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
25069 		    sd_resv_reclaim_thread, NULL,
25070 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
25071 
25072 	/* Tell the reservation reclaim thread that it has work to do */
25073 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
25074 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25075 }
25076 
25077 /*
25078  *    Function: sd_resv_reclaim_thread()
25079  *
25080  * Description: This function implements the reservation reclaim operations
25081  *
25082  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
25083  *		      among multiple watches that share this callback function
25084  */
25085 
25086 static void
25087 sd_resv_reclaim_thread()
25088 {
25089 	struct sd_lun		*un;
25090 	struct sd_thr_request	*sd_mhreq;
25091 
25092 	/* Wait for work */
25093 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25094 	if (sd_tr.srq_thr_req_head == NULL) {
25095 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
25096 		    &sd_tr.srq_resv_reclaim_mutex);
25097 	}
25098 
25099 	/* Loop while we have work */
25100 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
25101 		un = ddi_get_soft_state(sd_state,
25102 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
25103 		if (un == NULL) {
25104 			/*
25105 			 * softstate structure is NULL so just
25106 			 * dequeue the request and continue
25107 			 */
25108 			sd_tr.srq_thr_req_head =
25109 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25110 			kmem_free(sd_tr.srq_thr_cur_req,
25111 			    sizeof (struct sd_thr_request));
25112 			continue;
25113 		}
25114 
25115 		/* dequeue the request */
25116 		sd_mhreq = sd_tr.srq_thr_cur_req;
25117 		sd_tr.srq_thr_req_head =
25118 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25119 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25120 
25121 		/*
25122 		 * Reclaim reservation only if SD_RESERVE is still set. There
25123 		 * may have been a call to MHIOCRELEASE before we got here.
25124 		 */
25125 		mutex_enter(SD_MUTEX(un));
25126 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25127 			/*
25128 			 * Note: The SD_LOST_RESERVE flag is cleared before
25129 			 * reclaiming the reservation. If this is done after the
25130 			 * call to sd_reserve_release a reservation loss in the
25131 			 * window between pkt completion of reserve cmd and
25132 			 * mutex_enter below may not be recognized
25133 			 */
25134 			un->un_resvd_status &= ~SD_LOST_RESERVE;
25135 			mutex_exit(SD_MUTEX(un));
25136 
25137 			if (sd_reserve_release(sd_mhreq->dev,
25138 			    SD_RESERVE) == 0) {
25139 				mutex_enter(SD_MUTEX(un));
25140 				un->un_resvd_status |= SD_RESERVE;
25141 				mutex_exit(SD_MUTEX(un));
25142 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25143 				    "sd_resv_reclaim_thread: "
25144 				    "Reservation Recovered\n");
25145 			} else {
25146 				mutex_enter(SD_MUTEX(un));
25147 				un->un_resvd_status |= SD_LOST_RESERVE;
25148 				mutex_exit(SD_MUTEX(un));
25149 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25150 				    "sd_resv_reclaim_thread: Failed "
25151 				    "Reservation Recovery\n");
25152 			}
25153 		} else {
25154 			mutex_exit(SD_MUTEX(un));
25155 		}
25156 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25157 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
25158 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25159 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
25160 		/*
25161 		 * wakeup the destroy thread if anyone is waiting on
25162 		 * us to complete.
25163 		 */
25164 		cv_signal(&sd_tr.srq_inprocess_cv);
25165 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
25166 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
25167 	}
25168 
25169 	/*
25170 	 * cleanup the sd_tr structure now that this thread will not exist
25171 	 */
25172 	ASSERT(sd_tr.srq_thr_req_head == NULL);
25173 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
25174 	sd_tr.srq_resv_reclaim_thread = NULL;
25175 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25176 	thread_exit();
25177 }
25178 
25179 
25180 /*
25181  *    Function: sd_rmv_resv_reclaim_req()
25182  *
25183  * Description: This function removes any pending reservation reclaim requests
25184  *		for the specified device.
25185  *
25186  *   Arguments: dev - the device 'dev_t'
25187  */
25188 
25189 static void
25190 sd_rmv_resv_reclaim_req(dev_t dev)
25191 {
25192 	struct sd_thr_request *sd_mhreq;
25193 	struct sd_thr_request *sd_prev;
25194 
25195 	/* Remove a reservation reclaim request from the list */
25196 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25197 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
25198 		/*
25199 		 * We are attempting to reinstate reservation for
25200 		 * this device. We wait for sd_reserve_release()
25201 		 * to return before we return.
25202 		 */
25203 		cv_wait(&sd_tr.srq_inprocess_cv,
25204 		    &sd_tr.srq_resv_reclaim_mutex);
25205 	} else {
25206 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
25207 		if (sd_mhreq && sd_mhreq->dev == dev) {
25208 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
25209 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25210 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25211 			return;
25212 		}
25213 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
25214 			if (sd_mhreq && sd_mhreq->dev == dev) {
25215 				break;
25216 			}
25217 			sd_prev = sd_mhreq;
25218 		}
25219 		if (sd_mhreq != NULL) {
25220 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
25221 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25222 		}
25223 	}
25224 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25225 }
25226 
25227 
25228 /*
25229  *    Function: sd_mhd_reset_notify_cb()
25230  *
25231  * Description: This is a call back function for scsi_reset_notify. This
25232  *		function updates the softstate reserved status and logs the
25233  *		reset. The driver scsi watch facility callback function
25234  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
25235  *		will reclaim the reservation.
25236  *
25237  *   Arguments: arg  - driver soft state (unit) structure
25238  */
25239 
25240 static void
25241 sd_mhd_reset_notify_cb(caddr_t arg)
25242 {
25243 	struct sd_lun *un = (struct sd_lun *)arg;
25244 
25245 	mutex_enter(SD_MUTEX(un));
25246 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25247 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
25248 		SD_INFO(SD_LOG_IOCTL_MHD, un,
25249 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
25250 	}
25251 	mutex_exit(SD_MUTEX(un));
25252 }
25253 
25254 
25255 /*
25256  *    Function: sd_take_ownership()
25257  *
25258  * Description: This routine implements an algorithm to achieve a stable
25259  *		reservation on disks which don't implement priority reserve,
25260  *		and makes sure that other host lose re-reservation attempts.
25261  *		This algorithm contains of a loop that keeps issuing the RESERVE
25262  *		for some period of time (min_ownership_delay, default 6 seconds)
25263  *		During that loop, it looks to see if there has been a bus device
25264  *		reset or bus reset (both of which cause an existing reservation
25265  *		to be lost). If the reservation is lost issue RESERVE until a
25266  *		period of min_ownership_delay with no resets has gone by, or
25267  *		until max_ownership_delay has expired. This loop ensures that
25268  *		the host really did manage to reserve the device, in spite of
25269  *		resets. The looping for min_ownership_delay (default six
25270  *		seconds) is important to early generation clustering products,
25271  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
25272  *		MHIOCENFAILFAST periodic timer of two seconds. By having
25273  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
25274  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
25275  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
25276  *		have already noticed, via the MHIOCENFAILFAST polling, that it
25277  *		no longer "owns" the disk and will have panicked itself.  Thus,
25278  *		the host issuing the MHIOCTKOWN is assured (with timing
25279  *		dependencies) that by the time it actually starts to use the
25280  *		disk for real work, the old owner is no longer accessing it.
25281  *
25282  *		min_ownership_delay is the minimum amount of time for which the
25283  *		disk must be reserved continuously devoid of resets before the
25284  *		MHIOCTKOWN ioctl will return success.
25285  *
25286  *		max_ownership_delay indicates the amount of time by which the
25287  *		take ownership should succeed or timeout with an error.
25288  *
25289  *   Arguments: dev - the device 'dev_t'
25290  *		*p  - struct containing timing info.
25291  *
25292  * Return Code: 0 for success or error code
25293  */
25294 
25295 static int
25296 sd_take_ownership(dev_t dev, struct mhioctkown *p)
25297 {
25298 	struct sd_lun	*un;
25299 	int		rval;
25300 	int		err;
25301 	int		reservation_count   = 0;
25302 	int		min_ownership_delay =  6000000; /* in usec */
25303 	int		max_ownership_delay = 30000000; /* in usec */
25304 	clock_t		start_time;	/* starting time of this algorithm */
25305 	clock_t		end_time;	/* time limit for giving up */
25306 	clock_t		ownership_time;	/* time limit for stable ownership */
25307 	clock_t		current_time;
25308 	clock_t		previous_current_time;
25309 
25310 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25311 		return (ENXIO);
25312 	}
25313 
25314 	/*
25315 	 * Attempt a device reservation. A priority reservation is requested.
25316 	 */
25317 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
25318 	    != SD_SUCCESS) {
25319 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25320 		    "sd_take_ownership: return(1)=%d\n", rval);
25321 		return (rval);
25322 	}
25323 
25324 	/* Update the softstate reserved status to indicate the reservation */
25325 	mutex_enter(SD_MUTEX(un));
25326 	un->un_resvd_status |= SD_RESERVE;
25327 	un->un_resvd_status &=
25328 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
25329 	mutex_exit(SD_MUTEX(un));
25330 
25331 	if (p != NULL) {
25332 		if (p->min_ownership_delay != 0) {
25333 			min_ownership_delay = p->min_ownership_delay * 1000;
25334 		}
25335 		if (p->max_ownership_delay != 0) {
25336 			max_ownership_delay = p->max_ownership_delay * 1000;
25337 		}
25338 	}
25339 	SD_INFO(SD_LOG_IOCTL_MHD, un,
25340 	    "sd_take_ownership: min, max delays: %d, %d\n",
25341 	    min_ownership_delay, max_ownership_delay);
25342 
25343 	start_time = ddi_get_lbolt();
25344 	current_time	= start_time;
25345 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
25346 	end_time	= start_time + drv_usectohz(max_ownership_delay);
25347 
25348 	while (current_time - end_time < 0) {
25349 		delay(drv_usectohz(500000));
25350 
25351 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
25352 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
25353 				mutex_enter(SD_MUTEX(un));
25354 				rval = (un->un_resvd_status &
25355 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
25356 				mutex_exit(SD_MUTEX(un));
25357 				break;
25358 			}
25359 		}
25360 		previous_current_time = current_time;
25361 		current_time = ddi_get_lbolt();
25362 		mutex_enter(SD_MUTEX(un));
25363 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
25364 			ownership_time = ddi_get_lbolt() +
25365 			    drv_usectohz(min_ownership_delay);
25366 			reservation_count = 0;
25367 		} else {
25368 			reservation_count++;
25369 		}
25370 		un->un_resvd_status |= SD_RESERVE;
25371 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
25372 		mutex_exit(SD_MUTEX(un));
25373 
25374 		SD_INFO(SD_LOG_IOCTL_MHD, un,
25375 		    "sd_take_ownership: ticks for loop iteration=%ld, "
25376 		    "reservation=%s\n", (current_time - previous_current_time),
25377 		    reservation_count ? "ok" : "reclaimed");
25378 
25379 		if (current_time - ownership_time >= 0 &&
25380 		    reservation_count >= 4) {
25381 			rval = 0; /* Achieved a stable ownership */
25382 			break;
25383 		}
25384 		if (current_time - end_time >= 0) {
25385 			rval = EACCES; /* No ownership in max possible time */
25386 			break;
25387 		}
25388 	}
25389 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25390 	    "sd_take_ownership: return(2)=%d\n", rval);
25391 	return (rval);
25392 }
25393 
25394 
25395 /*
25396  *    Function: sd_reserve_release()
25397  *
25398  * Description: This function builds and sends scsi RESERVE, RELEASE, and
25399  *		PRIORITY RESERVE commands based on a user specified command type
25400  *
25401  *   Arguments: dev - the device 'dev_t'
25402  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
25403  *		      SD_RESERVE, SD_RELEASE
25404  *
25405  * Return Code: 0 or Error Code
25406  */
25407 
25408 static int
25409 sd_reserve_release(dev_t dev, int cmd)
25410 {
25411 	struct uscsi_cmd	*com = NULL;
25412 	struct sd_lun		*un = NULL;
25413 	char			cdb[CDB_GROUP0];
25414 	int			rval;
25415 
25416 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
25417 	    (cmd == SD_PRIORITY_RESERVE));
25418 
25419 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25420 		return (ENXIO);
25421 	}
25422 
25423 	/* instantiate and initialize the command and cdb */
25424 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25425 	bzero(cdb, CDB_GROUP0);
25426 	com->uscsi_flags   = USCSI_SILENT;
25427 	com->uscsi_timeout = un->un_reserve_release_time;
25428 	com->uscsi_cdblen  = CDB_GROUP0;
25429 	com->uscsi_cdb	   = cdb;
25430 	if (cmd == SD_RELEASE) {
25431 		cdb[0] = SCMD_RELEASE;
25432 	} else {
25433 		cdb[0] = SCMD_RESERVE;
25434 	}
25435 
25436 	/* Send the command. */
25437 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
25438 	    UIO_SYSSPACE, SD_PATH_STANDARD);
25439 
25440 	/*
25441 	 * "break" a reservation that is held by another host, by issuing a
25442 	 * reset if priority reserve is desired, and we could not get the
25443 	 * device.
25444 	 */
25445 	if ((cmd == SD_PRIORITY_RESERVE) &&
25446 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
25447 		/*
25448 		 * First try to reset the LUN. If we cannot, then try a target
25449 		 * reset, followed by a bus reset if the target reset fails.
25450 		 */
25451 		int reset_retval = 0;
25452 		if (un->un_f_lun_reset_enabled == TRUE) {
25453 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
25454 		}
25455 		if (reset_retval == 0) {
25456 			/* The LUN reset either failed or was not issued */
25457 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
25458 		}
25459 		if ((reset_retval == 0) &&
25460 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
25461 			rval = EIO;
25462 			kmem_free(com, sizeof (*com));
25463 			return (rval);
25464 		}
25465 
25466 		bzero(com, sizeof (struct uscsi_cmd));
25467 		com->uscsi_flags   = USCSI_SILENT;
25468 		com->uscsi_cdb	   = cdb;
25469 		com->uscsi_cdblen  = CDB_GROUP0;
25470 		com->uscsi_timeout = 5;
25471 
25472 		/*
25473 		 * Reissue the last reserve command, this time without request
25474 		 * sense.  Assume that it is just a regular reserve command.
25475 		 */
25476 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
25477 		    UIO_SYSSPACE, SD_PATH_STANDARD);
25478 	}
25479 
25480 	/* Return an error if still getting a reservation conflict. */
25481 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
25482 		rval = EACCES;
25483 	}
25484 
25485 	kmem_free(com, sizeof (*com));
25486 	return (rval);
25487 }
25488 
25489 
25490 #define	SD_NDUMP_RETRIES	12
25491 /*
25492  *	System Crash Dump routine
25493  */
25494 
25495 static int
25496 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
25497 {
25498 	int		instance;
25499 	int		partition;
25500 	int		i;
25501 	int		err;
25502 	struct sd_lun	*un;
25503 	struct dk_map	*lp;
25504 	struct scsi_pkt *wr_pktp;
25505 	struct buf	*wr_bp;
25506 	struct buf	wr_buf;
25507 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
25508 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
25509 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
25510 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
25511 	size_t		io_start_offset;
25512 	int		doing_rmw = FALSE;
25513 	int		rval;
25514 #if defined(__i386) || defined(__amd64)
25515 	ssize_t dma_resid;
25516 	daddr_t oblkno;
25517 #endif
25518 
25519 	instance = SDUNIT(dev);
25520 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
25521 	    (!un->un_f_geometry_is_valid) || ISCD(un)) {
25522 		return (ENXIO);
25523 	}
25524 
25525 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
25526 
25527 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
25528 
25529 	partition = SDPART(dev);
25530 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
25531 
25532 	/* Validate blocks to dump at against partition size. */
25533 	lp = &un->un_map[partition];
25534 	if ((blkno + nblk) > lp->dkl_nblk) {
25535 		SD_TRACE(SD_LOG_DUMP, un,
25536 		    "sddump: dump range larger than partition: "
25537 		    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
25538 		    blkno, nblk, lp->dkl_nblk);
25539 		return (EINVAL);
25540 	}
25541 
25542 	mutex_enter(&un->un_pm_mutex);
25543 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
25544 		struct scsi_pkt *start_pktp;
25545 
25546 		mutex_exit(&un->un_pm_mutex);
25547 
25548 		/*
25549 		 * use pm framework to power on HBA 1st
25550 		 */
25551 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
25552 
25553 		/*
25554 		 * Dump no long uses sdpower to power on a device, it's
25555 		 * in-line here so it can be done in polled mode.
25556 		 */
25557 
25558 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
25559 
25560 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
25561 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
25562 
25563 		if (start_pktp == NULL) {
25564 			/* We were not given a SCSI packet, fail. */
25565 			return (EIO);
25566 		}
25567 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
25568 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
25569 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
25570 		start_pktp->pkt_flags = FLAG_NOINTR;
25571 
25572 		mutex_enter(SD_MUTEX(un));
25573 		SD_FILL_SCSI1_LUN(un, start_pktp);
25574 		mutex_exit(SD_MUTEX(un));
25575 		/*
25576 		 * Scsi_poll returns 0 (success) if the command completes and
25577 		 * the status block is STATUS_GOOD.
25578 		 */
25579 		if (sd_scsi_poll(un, start_pktp) != 0) {
25580 			scsi_destroy_pkt(start_pktp);
25581 			return (EIO);
25582 		}
25583 		scsi_destroy_pkt(start_pktp);
25584 		(void) sd_ddi_pm_resume(un);
25585 	} else {
25586 		mutex_exit(&un->un_pm_mutex);
25587 	}
25588 
25589 	mutex_enter(SD_MUTEX(un));
25590 	un->un_throttle = 0;
25591 
25592 	/*
25593 	 * The first time through, reset the specific target device.
25594 	 * However, when cpr calls sddump we know that sd is in a
25595 	 * a good state so no bus reset is required.
25596 	 * Clear sense data via Request Sense cmd.
25597 	 * In sddump we don't care about allow_bus_device_reset anymore
25598 	 */
25599 
25600 	if ((un->un_state != SD_STATE_SUSPENDED) &&
25601 	    (un->un_state != SD_STATE_DUMPING)) {
25602 
25603 		New_state(un, SD_STATE_DUMPING);
25604 
25605 		if (un->un_f_is_fibre == FALSE) {
25606 			mutex_exit(SD_MUTEX(un));
25607 			/*
25608 			 * Attempt a bus reset for parallel scsi.
25609 			 *
25610 			 * Note: A bus reset is required because on some host
25611 			 * systems (i.e. E420R) a bus device reset is
25612 			 * insufficient to reset the state of the target.
25613 			 *
25614 			 * Note: Don't issue the reset for fibre-channel,
25615 			 * because this tends to hang the bus (loop) for
25616 			 * too long while everyone is logging out and in
25617 			 * and the deadman timer for dumping will fire
25618 			 * before the dump is complete.
25619 			 */
25620 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
25621 				mutex_enter(SD_MUTEX(un));
25622 				Restore_state(un);
25623 				mutex_exit(SD_MUTEX(un));
25624 				return (EIO);
25625 			}
25626 
25627 			/* Delay to give the device some recovery time. */
25628 			drv_usecwait(10000);
25629 
25630 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
25631 				SD_INFO(SD_LOG_DUMP, un,
25632 					"sddump: sd_send_polled_RQS failed\n");
25633 			}
25634 			mutex_enter(SD_MUTEX(un));
25635 		}
25636 	}
25637 
25638 	/*
25639 	 * Convert the partition-relative block number to a
25640 	 * disk physical block number.
25641 	 */
25642 	blkno += un->un_offset[partition];
25643 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
25644 
25645 
25646 	/*
25647 	 * Check if the device has a non-512 block size.
25648 	 */
25649 	wr_bp = NULL;
25650 	if (NOT_DEVBSIZE(un)) {
25651 		tgt_byte_offset = blkno * un->un_sys_blocksize;
25652 		tgt_byte_count = nblk * un->un_sys_blocksize;
25653 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
25654 		    (tgt_byte_count % un->un_tgt_blocksize)) {
25655 			doing_rmw = TRUE;
25656 			/*
25657 			 * Calculate the block number and number of block
25658 			 * in terms of the media block size.
25659 			 */
25660 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
25661 			tgt_nblk =
25662 			    ((tgt_byte_offset + tgt_byte_count +
25663 				(un->un_tgt_blocksize - 1)) /
25664 				un->un_tgt_blocksize) - tgt_blkno;
25665 
25666 			/*
25667 			 * Invoke the routine which is going to do read part
25668 			 * of read-modify-write.
25669 			 * Note that this routine returns a pointer to
25670 			 * a valid bp in wr_bp.
25671 			 */
25672 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
25673 			    &wr_bp);
25674 			if (err) {
25675 				mutex_exit(SD_MUTEX(un));
25676 				return (err);
25677 			}
25678 			/*
25679 			 * Offset is being calculated as -
25680 			 * (original block # * system block size) -
25681 			 * (new block # * target block size)
25682 			 */
25683 			io_start_offset =
25684 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
25685 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
25686 
25687 			ASSERT((io_start_offset >= 0) &&
25688 			    (io_start_offset < un->un_tgt_blocksize));
25689 			/*
25690 			 * Do the modify portion of read modify write.
25691 			 */
25692 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
25693 			    (size_t)nblk * un->un_sys_blocksize);
25694 		} else {
25695 			doing_rmw = FALSE;
25696 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
25697 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
25698 		}
25699 
25700 		/* Convert blkno and nblk to target blocks */
25701 		blkno = tgt_blkno;
25702 		nblk = tgt_nblk;
25703 	} else {
25704 		wr_bp = &wr_buf;
25705 		bzero(wr_bp, sizeof (struct buf));
25706 		wr_bp->b_flags		= B_BUSY;
25707 		wr_bp->b_un.b_addr	= addr;
25708 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
25709 		wr_bp->b_resid		= 0;
25710 	}
25711 
25712 	mutex_exit(SD_MUTEX(un));
25713 
25714 	/*
25715 	 * Obtain a SCSI packet for the write command.
25716 	 * It should be safe to call the allocator here without
25717 	 * worrying about being locked for DVMA mapping because
25718 	 * the address we're passed is already a DVMA mapping
25719 	 *
25720 	 * We are also not going to worry about semaphore ownership
25721 	 * in the dump buffer. Dumping is single threaded at present.
25722 	 */
25723 
25724 	wr_pktp = NULL;
25725 
25726 #if defined(__i386) || defined(__amd64)
25727 	dma_resid = wr_bp->b_bcount;
25728 	oblkno = blkno;
25729 	while (dma_resid != 0) {
25730 #endif
25731 
25732 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
25733 		wr_bp->b_flags &= ~B_ERROR;
25734 
25735 #if defined(__i386) || defined(__amd64)
25736 		blkno = oblkno +
25737 			((wr_bp->b_bcount - dma_resid) /
25738 			    un->un_tgt_blocksize);
25739 		nblk = dma_resid / un->un_tgt_blocksize;
25740 
25741 		if (wr_pktp) {
25742 			/* Partial DMA transfers after initial transfer */
25743 			rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
25744 			    blkno, nblk);
25745 		} else {
25746 			/* Initial transfer */
25747 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
25748 			    un->un_pkt_flags, NULL_FUNC, NULL,
25749 			    blkno, nblk);
25750 		}
25751 #else
25752 		rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
25753 		    0, NULL_FUNC, NULL, blkno, nblk);
25754 #endif
25755 
25756 		if (rval == 0) {
25757 			/* We were given a SCSI packet, continue. */
25758 			break;
25759 		}
25760 
25761 		if (i == 0) {
25762 			if (wr_bp->b_flags & B_ERROR) {
25763 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25764 				    "no resources for dumping; "
25765 				    "error code: 0x%x, retrying",
25766 				    geterror(wr_bp));
25767 			} else {
25768 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25769 				    "no resources for dumping; retrying");
25770 			}
25771 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
25772 			if (wr_bp->b_flags & B_ERROR) {
25773 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
25774 				    "no resources for dumping; error code: "
25775 				    "0x%x, retrying\n", geterror(wr_bp));
25776 			}
25777 		} else {
25778 			if (wr_bp->b_flags & B_ERROR) {
25779 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
25780 				    "no resources for dumping; "
25781 				    "error code: 0x%x, retries failed, "
25782 				    "giving up.\n", geterror(wr_bp));
25783 			} else {
25784 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
25785 				    "no resources for dumping; "
25786 				    "retries failed, giving up.\n");
25787 			}
25788 			mutex_enter(SD_MUTEX(un));
25789 			Restore_state(un);
25790 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
25791 				mutex_exit(SD_MUTEX(un));
25792 				scsi_free_consistent_buf(wr_bp);
25793 			} else {
25794 				mutex_exit(SD_MUTEX(un));
25795 			}
25796 			return (EIO);
25797 		}
25798 		drv_usecwait(10000);
25799 	}
25800 
25801 #if defined(__i386) || defined(__amd64)
25802 	/*
25803 	 * save the resid from PARTIAL_DMA
25804 	 */
25805 	dma_resid = wr_pktp->pkt_resid;
25806 	if (dma_resid != 0)
25807 		nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
25808 	wr_pktp->pkt_resid = 0;
25809 #endif
25810 
25811 	/* SunBug 1222170 */
25812 	wr_pktp->pkt_flags = FLAG_NOINTR;
25813 
25814 	err = EIO;
25815 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
25816 
25817 		/*
25818 		 * Scsi_poll returns 0 (success) if the command completes and
25819 		 * the status block is STATUS_GOOD.  We should only check
25820 		 * errors if this condition is not true.  Even then we should
25821 		 * send our own request sense packet only if we have a check
25822 		 * condition and auto request sense has not been performed by
25823 		 * the hba.
25824 		 */
25825 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
25826 
25827 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
25828 		    (wr_pktp->pkt_resid == 0)) {
25829 			err = SD_SUCCESS;
25830 			break;
25831 		}
25832 
25833 		/*
25834 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
25835 		 */
25836 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
25837 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
25838 			    "Device is gone\n");
25839 			break;
25840 		}
25841 
25842 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
25843 			SD_INFO(SD_LOG_DUMP, un,
25844 			    "sddump: write failed with CHECK, try # %d\n", i);
25845 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
25846 				(void) sd_send_polled_RQS(un);
25847 			}
25848 
25849 			continue;
25850 		}
25851 
25852 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
25853 			int reset_retval = 0;
25854 
25855 			SD_INFO(SD_LOG_DUMP, un,
25856 			    "sddump: write failed with BUSY, try # %d\n", i);
25857 
25858 			if (un->un_f_lun_reset_enabled == TRUE) {
25859 				reset_retval = scsi_reset(SD_ADDRESS(un),
25860 				    RESET_LUN);
25861 			}
25862 			if (reset_retval == 0) {
25863 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
25864 			}
25865 			(void) sd_send_polled_RQS(un);
25866 
25867 		} else {
25868 			SD_INFO(SD_LOG_DUMP, un,
25869 			    "sddump: write failed with 0x%x, try # %d\n",
25870 			    SD_GET_PKT_STATUS(wr_pktp), i);
25871 			mutex_enter(SD_MUTEX(un));
25872 			sd_reset_target(un, wr_pktp);
25873 			mutex_exit(SD_MUTEX(un));
25874 		}
25875 
25876 		/*
25877 		 * If we are not getting anywhere with lun/target resets,
25878 		 * let's reset the bus.
25879 		 */
25880 		if (i == SD_NDUMP_RETRIES/2) {
25881 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
25882 			(void) sd_send_polled_RQS(un);
25883 		}
25884 
25885 	}
25886 #if defined(__i386) || defined(__amd64)
25887 	}	/* dma_resid */
25888 #endif
25889 
25890 	scsi_destroy_pkt(wr_pktp);
25891 	mutex_enter(SD_MUTEX(un));
25892 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
25893 		mutex_exit(SD_MUTEX(un));
25894 		scsi_free_consistent_buf(wr_bp);
25895 	} else {
25896 		mutex_exit(SD_MUTEX(un));
25897 	}
25898 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
25899 	return (err);
25900 }
25901 
25902 /*
25903  *    Function: sd_scsi_poll()
25904  *
25905  * Description: This is a wrapper for the scsi_poll call.
25906  *
25907  *   Arguments: sd_lun - The unit structure
25908  *              scsi_pkt - The scsi packet being sent to the device.
25909  *
25910  * Return Code: 0 - Command completed successfully with good status
25911  *             -1 - Command failed.  This could indicate a check condition
25912  *                  or other status value requiring recovery action.
25913  *
25914  */
25915 
25916 static int
25917 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
25918 {
25919 	int status;
25920 
25921 	ASSERT(un != NULL);
25922 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25923 	ASSERT(pktp != NULL);
25924 
25925 	status = SD_SUCCESS;
25926 
25927 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
25928 		pktp->pkt_flags |= un->un_tagflags;
25929 		pktp->pkt_flags &= ~FLAG_NODISCON;
25930 	}
25931 
25932 	status = sd_ddi_scsi_poll(pktp);
25933 	/*
25934 	 * Scsi_poll returns 0 (success) if the command completes and the
25935 	 * status block is STATUS_GOOD.  We should only check errors if this
25936 	 * condition is not true.  Even then we should send our own request
25937 	 * sense packet only if we have a check condition and auto
25938 	 * request sense has not been performed by the hba.
25939 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
25940 	 */
25941 	if ((status != SD_SUCCESS) &&
25942 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
25943 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
25944 	    (pktp->pkt_reason != CMD_DEV_GONE))
25945 		(void) sd_send_polled_RQS(un);
25946 
25947 	return (status);
25948 }
25949 
25950 /*
25951  *    Function: sd_send_polled_RQS()
25952  *
25953  * Description: This sends the request sense command to a device.
25954  *
25955  *   Arguments: sd_lun - The unit structure
25956  *
25957  * Return Code: 0 - Command completed successfully with good status
25958  *             -1 - Command failed.
25959  *
25960  */
25961 
25962 static int
25963 sd_send_polled_RQS(struct sd_lun *un)
25964 {
25965 	int	ret_val;
25966 	struct	scsi_pkt	*rqs_pktp;
25967 	struct	buf		*rqs_bp;
25968 
25969 	ASSERT(un != NULL);
25970 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25971 
25972 	ret_val = SD_SUCCESS;
25973 
25974 	rqs_pktp = un->un_rqs_pktp;
25975 	rqs_bp	 = un->un_rqs_bp;
25976 
25977 	mutex_enter(SD_MUTEX(un));
25978 
25979 	if (un->un_sense_isbusy) {
25980 		ret_val = SD_FAILURE;
25981 		mutex_exit(SD_MUTEX(un));
25982 		return (ret_val);
25983 	}
25984 
25985 	/*
25986 	 * If the request sense buffer (and packet) is not in use,
25987 	 * let's set the un_sense_isbusy and send our packet
25988 	 */
25989 	un->un_sense_isbusy 	= 1;
25990 	rqs_pktp->pkt_resid  	= 0;
25991 	rqs_pktp->pkt_reason 	= 0;
25992 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
25993 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
25994 
25995 	mutex_exit(SD_MUTEX(un));
25996 
25997 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
25998 	    " 0x%p\n", rqs_bp->b_un.b_addr);
25999 
26000 	/*
26001 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
26002 	 * axle - it has a call into us!
26003 	 */
26004 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
26005 		SD_INFO(SD_LOG_COMMON, un,
26006 		    "sd_send_polled_RQS: RQS failed\n");
26007 	}
26008 
26009 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
26010 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
26011 
26012 	mutex_enter(SD_MUTEX(un));
26013 	un->un_sense_isbusy = 0;
26014 	mutex_exit(SD_MUTEX(un));
26015 
26016 	return (ret_val);
26017 }
26018 
26019 /*
26020  * Defines needed for localized version of the scsi_poll routine.
26021  */
26022 #define	SD_CSEC		10000			/* usecs */
26023 #define	SD_SEC_TO_CSEC	(1000000/SD_CSEC)
26024 
26025 
26026 /*
26027  *    Function: sd_ddi_scsi_poll()
26028  *
26029  * Description: Localized version of the scsi_poll routine.  The purpose is to
26030  *		send a scsi_pkt to a device as a polled command.  This version
26031  *		is to ensure more robust handling of transport errors.
26032  *		Specifically this routine cures not ready, coming ready
26033  *		transition for power up and reset of sonoma's.  This can take
26034  *		up to 45 seconds for power-on and 20 seconds for reset of a
26035  * 		sonoma lun.
26036  *
26037  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
26038  *
26039  * Return Code: 0 - Command completed successfully with good status
26040  *             -1 - Command failed.
26041  *
26042  */
26043 
26044 static int
26045 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
26046 {
26047 	int busy_count;
26048 	int timeout;
26049 	int rval = SD_FAILURE;
26050 	int savef;
26051 	struct scsi_extended_sense *sensep;
26052 	long savet;
26053 	void (*savec)();
26054 	/*
26055 	 * The following is defined in machdep.c and is used in determining if
26056 	 * the scsi transport system will do polled I/O instead of interrupt
26057 	 * I/O when called from xx_dump().
26058 	 */
26059 	extern int do_polled_io;
26060 
26061 	/*
26062 	 * save old flags in pkt, to restore at end
26063 	 */
26064 	savef = pkt->pkt_flags;
26065 	savec = pkt->pkt_comp;
26066 	savet = pkt->pkt_time;
26067 
26068 	pkt->pkt_flags |= FLAG_NOINTR;
26069 
26070 	/*
26071 	 * XXX there is nothing in the SCSA spec that states that we should not
26072 	 * do a callback for polled cmds; however, removing this will break sd
26073 	 * and probably other target drivers
26074 	 */
26075 	pkt->pkt_comp = NULL;
26076 
26077 	/*
26078 	 * we don't like a polled command without timeout.
26079 	 * 60 seconds seems long enough.
26080 	 */
26081 	if (pkt->pkt_time == 0) {
26082 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
26083 	}
26084 
26085 	/*
26086 	 * Send polled cmd.
26087 	 *
26088 	 * We do some error recovery for various errors.  Tran_busy,
26089 	 * queue full, and non-dispatched commands are retried every 10 msec.
26090 	 * as they are typically transient failures.  Busy status and Not
26091 	 * Ready are retried every second as this status takes a while to
26092 	 * change.  Unit attention is retried for pkt_time (60) times
26093 	 * with no delay.
26094 	 */
26095 	timeout = pkt->pkt_time * SD_SEC_TO_CSEC;
26096 
26097 	for (busy_count = 0; busy_count < timeout; busy_count++) {
26098 		int rc;
26099 		int poll_delay;
26100 
26101 		/*
26102 		 * Initialize pkt status variables.
26103 		 */
26104 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
26105 
26106 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
26107 			if (rc != TRAN_BUSY) {
26108 				/* Transport failed - give up. */
26109 				break;
26110 			} else {
26111 				/* Transport busy - try again. */
26112 				poll_delay = 1 * SD_CSEC; /* 10 msec */
26113 			}
26114 		} else {
26115 			/*
26116 			 * Transport accepted - check pkt status.
26117 			 */
26118 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
26119 			if (pkt->pkt_reason == CMD_CMPLT &&
26120 			    rc == STATUS_CHECK &&
26121 			    pkt->pkt_state & STATE_ARQ_DONE) {
26122 				struct scsi_arq_status *arqstat =
26123 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
26124 
26125 				sensep = &arqstat->sts_sensedata;
26126 			} else {
26127 				sensep = NULL;
26128 			}
26129 
26130 			if ((pkt->pkt_reason == CMD_CMPLT) &&
26131 			    (rc == STATUS_GOOD)) {
26132 				/* No error - we're done */
26133 				rval = SD_SUCCESS;
26134 				break;
26135 
26136 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
26137 				/* Lost connection - give up */
26138 				break;
26139 
26140 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
26141 			    (pkt->pkt_state == 0)) {
26142 				/* Pkt not dispatched - try again. */
26143 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26144 
26145 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26146 			    (rc == STATUS_QFULL)) {
26147 				/* Queue full - try again. */
26148 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26149 
26150 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26151 			    (rc == STATUS_BUSY)) {
26152 				/* Busy - try again. */
26153 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26154 				busy_count += (SD_SEC_TO_CSEC - 1);
26155 
26156 			} else if ((sensep != NULL) &&
26157 			    (sensep->es_key == KEY_UNIT_ATTENTION)) {
26158 				/* Unit Attention - try again */
26159 				busy_count += (SD_SEC_TO_CSEC - 1); /* 1 */
26160 				continue;
26161 
26162 			} else if ((sensep != NULL) &&
26163 			    (sensep->es_key == KEY_NOT_READY) &&
26164 			    (sensep->es_add_code == 0x04) &&
26165 			    (sensep->es_qual_code == 0x01)) {
26166 				/* Not ready -> ready - try again. */
26167 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26168 				busy_count += (SD_SEC_TO_CSEC - 1);
26169 
26170 			} else {
26171 				/* BAD status - give up. */
26172 				break;
26173 			}
26174 		}
26175 
26176 		if ((curthread->t_flag & T_INTR_THREAD) == 0 &&
26177 		    !do_polled_io) {
26178 			delay(drv_usectohz(poll_delay));
26179 		} else {
26180 			/* we busy wait during cpr_dump or interrupt threads */
26181 			drv_usecwait(poll_delay);
26182 		}
26183 	}
26184 
26185 	pkt->pkt_flags = savef;
26186 	pkt->pkt_comp = savec;
26187 	pkt->pkt_time = savet;
26188 	return (rval);
26189 }
26190 
26191 
26192 /*
26193  *    Function: sd_persistent_reservation_in_read_keys
26194  *
26195  * Description: This routine is the driver entry point for handling CD-ROM
26196  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
26197  *		by sending the SCSI-3 PRIN commands to the device.
26198  *		Processes the read keys command response by copying the
26199  *		reservation key information into the user provided buffer.
26200  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
26201  *
26202  *   Arguments: un   -  Pointer to soft state struct for the target.
26203  *		usrp -	user provided pointer to multihost Persistent In Read
26204  *			Keys structure (mhioc_inkeys_t)
26205  *		flag -	this argument is a pass through to ddi_copyxxx()
26206  *			directly from the mode argument of ioctl().
26207  *
26208  * Return Code: 0   - Success
26209  *		EACCES
26210  *		ENOTSUP
26211  *		errno return code from sd_send_scsi_cmd()
26212  *
26213  *     Context: Can sleep. Does not return until command is completed.
26214  */
26215 
26216 static int
26217 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
26218     mhioc_inkeys_t *usrp, int flag)
26219 {
26220 #ifdef _MULTI_DATAMODEL
26221 	struct mhioc_key_list32	li32;
26222 #endif
26223 	sd_prin_readkeys_t	*in;
26224 	mhioc_inkeys_t		*ptr;
26225 	mhioc_key_list_t	li;
26226 	uchar_t			*data_bufp;
26227 	int 			data_len;
26228 	int			rval;
26229 	size_t			copysz;
26230 
26231 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
26232 		return (EINVAL);
26233 	}
26234 	bzero(&li, sizeof (mhioc_key_list_t));
26235 
26236 	/*
26237 	 * Get the listsize from user
26238 	 */
26239 #ifdef _MULTI_DATAMODEL
26240 
26241 	switch (ddi_model_convert_from(flag & FMODELS)) {
26242 	case DDI_MODEL_ILP32:
26243 		copysz = sizeof (struct mhioc_key_list32);
26244 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
26245 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26246 			    "sd_persistent_reservation_in_read_keys: "
26247 			    "failed ddi_copyin: mhioc_key_list32_t\n");
26248 			rval = EFAULT;
26249 			goto done;
26250 		}
26251 		li.listsize = li32.listsize;
26252 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
26253 		break;
26254 
26255 	case DDI_MODEL_NONE:
26256 		copysz = sizeof (mhioc_key_list_t);
26257 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26258 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26259 			    "sd_persistent_reservation_in_read_keys: "
26260 			    "failed ddi_copyin: mhioc_key_list_t\n");
26261 			rval = EFAULT;
26262 			goto done;
26263 		}
26264 		break;
26265 	}
26266 
26267 #else /* ! _MULTI_DATAMODEL */
26268 	copysz = sizeof (mhioc_key_list_t);
26269 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26270 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26271 		    "sd_persistent_reservation_in_read_keys: "
26272 		    "failed ddi_copyin: mhioc_key_list_t\n");
26273 		rval = EFAULT;
26274 		goto done;
26275 	}
26276 #endif
26277 
26278 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
26279 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
26280 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26281 
26282 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS,
26283 	    data_len, data_bufp)) != 0) {
26284 		goto done;
26285 	}
26286 	in = (sd_prin_readkeys_t *)data_bufp;
26287 	ptr->generation = BE_32(in->generation);
26288 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
26289 
26290 	/*
26291 	 * Return the min(listsize, listlen) keys
26292 	 */
26293 #ifdef _MULTI_DATAMODEL
26294 
26295 	switch (ddi_model_convert_from(flag & FMODELS)) {
26296 	case DDI_MODEL_ILP32:
26297 		li32.listlen = li.listlen;
26298 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
26299 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26300 			    "sd_persistent_reservation_in_read_keys: "
26301 			    "failed ddi_copyout: mhioc_key_list32_t\n");
26302 			rval = EFAULT;
26303 			goto done;
26304 		}
26305 		break;
26306 
26307 	case DDI_MODEL_NONE:
26308 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
26309 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26310 			    "sd_persistent_reservation_in_read_keys: "
26311 			    "failed ddi_copyout: mhioc_key_list_t\n");
26312 			rval = EFAULT;
26313 			goto done;
26314 		}
26315 		break;
26316 	}
26317 
26318 #else /* ! _MULTI_DATAMODEL */
26319 
26320 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
26321 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26322 		    "sd_persistent_reservation_in_read_keys: "
26323 		    "failed ddi_copyout: mhioc_key_list_t\n");
26324 		rval = EFAULT;
26325 		goto done;
26326 	}
26327 
26328 #endif /* _MULTI_DATAMODEL */
26329 
26330 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
26331 	    li.listsize * MHIOC_RESV_KEY_SIZE);
26332 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
26333 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26334 		    "sd_persistent_reservation_in_read_keys: "
26335 		    "failed ddi_copyout: keylist\n");
26336 		rval = EFAULT;
26337 	}
26338 done:
26339 	kmem_free(data_bufp, data_len);
26340 	return (rval);
26341 }
26342 
26343 
26344 /*
26345  *    Function: sd_persistent_reservation_in_read_resv
26346  *
26347  * Description: This routine is the driver entry point for handling CD-ROM
26348  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
26349  *		by sending the SCSI-3 PRIN commands to the device.
26350  *		Process the read persistent reservations command response by
26351  *		copying the reservation information into the user provided
26352  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
26353  *
26354  *   Arguments: un   -  Pointer to soft state struct for the target.
26355  *		usrp -	user provided pointer to multihost Persistent In Read
26356  *			Keys structure (mhioc_inkeys_t)
26357  *		flag -	this argument is a pass through to ddi_copyxxx()
26358  *			directly from the mode argument of ioctl().
26359  *
26360  * Return Code: 0   - Success
26361  *		EACCES
26362  *		ENOTSUP
26363  *		errno return code from sd_send_scsi_cmd()
26364  *
26365  *     Context: Can sleep. Does not return until command is completed.
26366  */
26367 
26368 static int
26369 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
26370     mhioc_inresvs_t *usrp, int flag)
26371 {
26372 #ifdef _MULTI_DATAMODEL
26373 	struct mhioc_resv_desc_list32 resvlist32;
26374 #endif
26375 	sd_prin_readresv_t	*in;
26376 	mhioc_inresvs_t		*ptr;
26377 	sd_readresv_desc_t	*readresv_ptr;
26378 	mhioc_resv_desc_list_t	resvlist;
26379 	mhioc_resv_desc_t 	resvdesc;
26380 	uchar_t			*data_bufp;
26381 	int 			data_len;
26382 	int			rval;
26383 	int			i;
26384 	size_t			copysz;
26385 	mhioc_resv_desc_t	*bufp;
26386 
26387 	if ((ptr = usrp) == NULL) {
26388 		return (EINVAL);
26389 	}
26390 
26391 	/*
26392 	 * Get the listsize from user
26393 	 */
26394 #ifdef _MULTI_DATAMODEL
26395 	switch (ddi_model_convert_from(flag & FMODELS)) {
26396 	case DDI_MODEL_ILP32:
26397 		copysz = sizeof (struct mhioc_resv_desc_list32);
26398 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
26399 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26400 			    "sd_persistent_reservation_in_read_resv: "
26401 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26402 			rval = EFAULT;
26403 			goto done;
26404 		}
26405 		resvlist.listsize = resvlist32.listsize;
26406 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
26407 		break;
26408 
26409 	case DDI_MODEL_NONE:
26410 		copysz = sizeof (mhioc_resv_desc_list_t);
26411 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
26412 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26413 			    "sd_persistent_reservation_in_read_resv: "
26414 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26415 			rval = EFAULT;
26416 			goto done;
26417 		}
26418 		break;
26419 	}
26420 #else /* ! _MULTI_DATAMODEL */
26421 	copysz = sizeof (mhioc_resv_desc_list_t);
26422 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
26423 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26424 		    "sd_persistent_reservation_in_read_resv: "
26425 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26426 		rval = EFAULT;
26427 		goto done;
26428 	}
26429 #endif /* ! _MULTI_DATAMODEL */
26430 
26431 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
26432 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
26433 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26434 
26435 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_RESV,
26436 	    data_len, data_bufp)) != 0) {
26437 		goto done;
26438 	}
26439 	in = (sd_prin_readresv_t *)data_bufp;
26440 	ptr->generation = BE_32(in->generation);
26441 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
26442 
26443 	/*
26444 	 * Return the min(listsize, listlen( keys
26445 	 */
26446 #ifdef _MULTI_DATAMODEL
26447 
26448 	switch (ddi_model_convert_from(flag & FMODELS)) {
26449 	case DDI_MODEL_ILP32:
26450 		resvlist32.listlen = resvlist.listlen;
26451 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
26452 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26453 			    "sd_persistent_reservation_in_read_resv: "
26454 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26455 			rval = EFAULT;
26456 			goto done;
26457 		}
26458 		break;
26459 
26460 	case DDI_MODEL_NONE:
26461 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
26462 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26463 			    "sd_persistent_reservation_in_read_resv: "
26464 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26465 			rval = EFAULT;
26466 			goto done;
26467 		}
26468 		break;
26469 	}
26470 
26471 #else /* ! _MULTI_DATAMODEL */
26472 
26473 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
26474 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26475 		    "sd_persistent_reservation_in_read_resv: "
26476 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26477 		rval = EFAULT;
26478 		goto done;
26479 	}
26480 
26481 #endif /* ! _MULTI_DATAMODEL */
26482 
26483 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
26484 	bufp = resvlist.list;
26485 	copysz = sizeof (mhioc_resv_desc_t);
26486 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
26487 	    i++, readresv_ptr++, bufp++) {
26488 
26489 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
26490 		    MHIOC_RESV_KEY_SIZE);
26491 		resvdesc.type  = readresv_ptr->type;
26492 		resvdesc.scope = readresv_ptr->scope;
26493 		resvdesc.scope_specific_addr =
26494 		    BE_32(readresv_ptr->scope_specific_addr);
26495 
26496 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
26497 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26498 			    "sd_persistent_reservation_in_read_resv: "
26499 			    "failed ddi_copyout: resvlist\n");
26500 			rval = EFAULT;
26501 			goto done;
26502 		}
26503 	}
26504 done:
26505 	kmem_free(data_bufp, data_len);
26506 	return (rval);
26507 }
26508 
26509 
26510 /*
26511  *    Function: sr_change_blkmode()
26512  *
26513  * Description: This routine is the driver entry point for handling CD-ROM
26514  *		block mode ioctl requests. Support for returning and changing
26515  *		the current block size in use by the device is implemented. The
26516  *		LBA size is changed via a MODE SELECT Block Descriptor.
26517  *
26518  *		This routine issues a mode sense with an allocation length of
26519  *		12 bytes for the mode page header and a single block descriptor.
26520  *
26521  *   Arguments: dev - the device 'dev_t'
26522  *		cmd - the request type; one of CDROMGBLKMODE (get) or
26523  *		      CDROMSBLKMODE (set)
26524  *		data - current block size or requested block size
26525  *		flag - this argument is a pass through to ddi_copyxxx() directly
26526  *		       from the mode argument of ioctl().
26527  *
26528  * Return Code: the code returned by sd_send_scsi_cmd()
26529  *		EINVAL if invalid arguments are provided
26530  *		EFAULT if ddi_copyxxx() fails
26531  *		ENXIO if fail ddi_get_soft_state
26532  *		EIO if invalid mode sense block descriptor length
26533  *
26534  */
26535 
26536 static int
26537 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
26538 {
26539 	struct sd_lun			*un = NULL;
26540 	struct mode_header		*sense_mhp, *select_mhp;
26541 	struct block_descriptor		*sense_desc, *select_desc;
26542 	int				current_bsize;
26543 	int				rval = EINVAL;
26544 	uchar_t				*sense = NULL;
26545 	uchar_t				*select = NULL;
26546 
26547 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
26548 
26549 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26550 		return (ENXIO);
26551 	}
26552 
26553 	/*
26554 	 * The block length is changed via the Mode Select block descriptor, the
26555 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
26556 	 * required as part of this routine. Therefore the mode sense allocation
26557 	 * length is specified to be the length of a mode page header and a
26558 	 * block descriptor.
26559 	 */
26560 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
26561 
26562 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
26563 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD)) != 0) {
26564 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26565 		    "sr_change_blkmode: Mode Sense Failed\n");
26566 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26567 		return (rval);
26568 	}
26569 
26570 	/* Check the block descriptor len to handle only 1 block descriptor */
26571 	sense_mhp = (struct mode_header *)sense;
26572 	if ((sense_mhp->bdesc_length == 0) ||
26573 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
26574 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26575 		    "sr_change_blkmode: Mode Sense returned invalid block"
26576 		    " descriptor length\n");
26577 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26578 		return (EIO);
26579 	}
26580 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
26581 	current_bsize = ((sense_desc->blksize_hi << 16) |
26582 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
26583 
26584 	/* Process command */
26585 	switch (cmd) {
26586 	case CDROMGBLKMODE:
26587 		/* Return the block size obtained during the mode sense */
26588 		if (ddi_copyout(&current_bsize, (void *)data,
26589 		    sizeof (int), flag) != 0)
26590 			rval = EFAULT;
26591 		break;
26592 	case CDROMSBLKMODE:
26593 		/* Validate the requested block size */
26594 		switch (data) {
26595 		case CDROM_BLK_512:
26596 		case CDROM_BLK_1024:
26597 		case CDROM_BLK_2048:
26598 		case CDROM_BLK_2056:
26599 		case CDROM_BLK_2336:
26600 		case CDROM_BLK_2340:
26601 		case CDROM_BLK_2352:
26602 		case CDROM_BLK_2368:
26603 		case CDROM_BLK_2448:
26604 		case CDROM_BLK_2646:
26605 		case CDROM_BLK_2647:
26606 			break;
26607 		default:
26608 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26609 			    "sr_change_blkmode: "
26610 			    "Block Size '%ld' Not Supported\n", data);
26611 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26612 			return (EINVAL);
26613 		}
26614 
26615 		/*
26616 		 * The current block size matches the requested block size so
26617 		 * there is no need to send the mode select to change the size
26618 		 */
26619 		if (current_bsize == data) {
26620 			break;
26621 		}
26622 
26623 		/* Build the select data for the requested block size */
26624 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
26625 		select_mhp = (struct mode_header *)select;
26626 		select_desc =
26627 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
26628 		/*
26629 		 * The LBA size is changed via the block descriptor, so the
26630 		 * descriptor is built according to the user data
26631 		 */
26632 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
26633 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
26634 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
26635 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
26636 
26637 		/* Send the mode select for the requested block size */
26638 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
26639 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
26640 		    SD_PATH_STANDARD)) != 0) {
26641 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26642 			    "sr_change_blkmode: Mode Select Failed\n");
26643 			/*
26644 			 * The mode select failed for the requested block size,
26645 			 * so reset the data for the original block size and
26646 			 * send it to the target. The error is indicated by the
26647 			 * return value for the failed mode select.
26648 			 */
26649 			select_desc->blksize_hi  = sense_desc->blksize_hi;
26650 			select_desc->blksize_mid = sense_desc->blksize_mid;
26651 			select_desc->blksize_lo  = sense_desc->blksize_lo;
26652 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
26653 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
26654 			    SD_PATH_STANDARD);
26655 		} else {
26656 			ASSERT(!mutex_owned(SD_MUTEX(un)));
26657 			mutex_enter(SD_MUTEX(un));
26658 			sd_update_block_info(un, (uint32_t)data, 0);
26659 
26660 			mutex_exit(SD_MUTEX(un));
26661 		}
26662 		break;
26663 	default:
26664 		/* should not reach here, but check anyway */
26665 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26666 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
26667 		rval = EINVAL;
26668 		break;
26669 	}
26670 
26671 	if (select) {
26672 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
26673 	}
26674 	if (sense) {
26675 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26676 	}
26677 	return (rval);
26678 }
26679 
26680 
26681 /*
26682  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
26683  * implement driver support for getting and setting the CD speed. The command
26684  * set used will be based on the device type. If the device has not been
26685  * identified as MMC the Toshiba vendor specific mode page will be used. If
26686  * the device is MMC but does not support the Real Time Streaming feature
26687  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
26688  * be used to read the speed.
26689  */
26690 
26691 /*
26692  *    Function: sr_change_speed()
26693  *
26694  * Description: This routine is the driver entry point for handling CD-ROM
26695  *		drive speed ioctl requests for devices supporting the Toshiba
26696  *		vendor specific drive speed mode page. Support for returning
26697  *		and changing the current drive speed in use by the device is
26698  *		implemented.
26699  *
26700  *   Arguments: dev - the device 'dev_t'
26701  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
26702  *		      CDROMSDRVSPEED (set)
26703  *		data - current drive speed or requested drive speed
26704  *		flag - this argument is a pass through to ddi_copyxxx() directly
26705  *		       from the mode argument of ioctl().
26706  *
26707  * Return Code: the code returned by sd_send_scsi_cmd()
26708  *		EINVAL if invalid arguments are provided
26709  *		EFAULT if ddi_copyxxx() fails
26710  *		ENXIO if fail ddi_get_soft_state
26711  *		EIO if invalid mode sense block descriptor length
26712  */
26713 
26714 static int
26715 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
26716 {
26717 	struct sd_lun			*un = NULL;
26718 	struct mode_header		*sense_mhp, *select_mhp;
26719 	struct mode_speed		*sense_page, *select_page;
26720 	int				current_speed;
26721 	int				rval = EINVAL;
26722 	int				bd_len;
26723 	uchar_t				*sense = NULL;
26724 	uchar_t				*select = NULL;
26725 
26726 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
26727 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26728 		return (ENXIO);
26729 	}
26730 
26731 	/*
26732 	 * Note: The drive speed is being modified here according to a Toshiba
26733 	 * vendor specific mode page (0x31).
26734 	 */
26735 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
26736 
26737 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
26738 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
26739 	    SD_PATH_STANDARD)) != 0) {
26740 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26741 		    "sr_change_speed: Mode Sense Failed\n");
26742 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
26743 		return (rval);
26744 	}
26745 	sense_mhp  = (struct mode_header *)sense;
26746 
26747 	/* Check the block descriptor len to handle only 1 block descriptor */
26748 	bd_len = sense_mhp->bdesc_length;
26749 	if (bd_len > MODE_BLK_DESC_LENGTH) {
26750 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26751 		    "sr_change_speed: Mode Sense returned invalid block "
26752 		    "descriptor length\n");
26753 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
26754 		return (EIO);
26755 	}
26756 
26757 	sense_page = (struct mode_speed *)
26758 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
26759 	current_speed = sense_page->speed;
26760 
26761 	/* Process command */
26762 	switch (cmd) {
26763 	case CDROMGDRVSPEED:
26764 		/* Return the drive speed obtained during the mode sense */
26765 		if (current_speed == 0x2) {
26766 			current_speed = CDROM_TWELVE_SPEED;
26767 		}
26768 		if (ddi_copyout(&current_speed, (void *)data,
26769 		    sizeof (int), flag) != 0) {
26770 			rval = EFAULT;
26771 		}
26772 		break;
26773 	case CDROMSDRVSPEED:
26774 		/* Validate the requested drive speed */
26775 		switch ((uchar_t)data) {
26776 		case CDROM_TWELVE_SPEED:
26777 			data = 0x2;
26778 			/*FALLTHROUGH*/
26779 		case CDROM_NORMAL_SPEED:
26780 		case CDROM_DOUBLE_SPEED:
26781 		case CDROM_QUAD_SPEED:
26782 		case CDROM_MAXIMUM_SPEED:
26783 			break;
26784 		default:
26785 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26786 			    "sr_change_speed: "
26787 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
26788 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
26789 			return (EINVAL);
26790 		}
26791 
26792 		/*
26793 		 * The current drive speed matches the requested drive speed so
26794 		 * there is no need to send the mode select to change the speed
26795 		 */
26796 		if (current_speed == data) {
26797 			break;
26798 		}
26799 
26800 		/* Build the select data for the requested drive speed */
26801 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
26802 		select_mhp = (struct mode_header *)select;
26803 		select_mhp->bdesc_length = 0;
26804 		select_page =
26805 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
26806 		select_page =
26807 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
26808 		select_page->mode_page.code = CDROM_MODE_SPEED;
26809 		select_page->mode_page.length = 2;
26810 		select_page->speed = (uchar_t)data;
26811 
26812 		/* Send the mode select for the requested block size */
26813 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
26814 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
26815 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
26816 			/*
26817 			 * The mode select failed for the requested drive speed,
26818 			 * so reset the data for the original drive speed and
26819 			 * send it to the target. The error is indicated by the
26820 			 * return value for the failed mode select.
26821 			 */
26822 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26823 			    "sr_drive_speed: Mode Select Failed\n");
26824 			select_page->speed = sense_page->speed;
26825 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
26826 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
26827 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
26828 		}
26829 		break;
26830 	default:
26831 		/* should not reach here, but check anyway */
26832 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26833 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
26834 		rval = EINVAL;
26835 		break;
26836 	}
26837 
26838 	if (select) {
26839 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
26840 	}
26841 	if (sense) {
26842 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
26843 	}
26844 
26845 	return (rval);
26846 }
26847 
26848 
26849 /*
26850  *    Function: sr_atapi_change_speed()
26851  *
26852  * Description: This routine is the driver entry point for handling CD-ROM
26853  *		drive speed ioctl requests for MMC devices that do not support
26854  *		the Real Time Streaming feature (0x107).
26855  *
26856  *		Note: This routine will use the SET SPEED command which may not
26857  *		be supported by all devices.
26858  *
26859  *   Arguments: dev- the device 'dev_t'
26860  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
26861  *		     CDROMSDRVSPEED (set)
26862  *		data- current drive speed or requested drive speed
26863  *		flag- this argument is a pass through to ddi_copyxxx() directly
26864  *		      from the mode argument of ioctl().
26865  *
26866  * Return Code: the code returned by sd_send_scsi_cmd()
26867  *		EINVAL if invalid arguments are provided
26868  *		EFAULT if ddi_copyxxx() fails
26869  *		ENXIO if fail ddi_get_soft_state
26870  *		EIO if invalid mode sense block descriptor length
26871  */
26872 
26873 static int
26874 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
26875 {
26876 	struct sd_lun			*un;
26877 	struct uscsi_cmd		*com = NULL;
26878 	struct mode_header_grp2		*sense_mhp;
26879 	uchar_t				*sense_page;
26880 	uchar_t				*sense = NULL;
26881 	char				cdb[CDB_GROUP5];
26882 	int				bd_len;
26883 	int				current_speed = 0;
26884 	int				max_speed = 0;
26885 	int				rval;
26886 
26887 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
26888 
26889 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26890 		return (ENXIO);
26891 	}
26892 
26893 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
26894 
26895 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
26896 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
26897 	    SD_PATH_STANDARD)) != 0) {
26898 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26899 		    "sr_atapi_change_speed: Mode Sense Failed\n");
26900 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26901 		return (rval);
26902 	}
26903 
26904 	/* Check the block descriptor len to handle only 1 block descriptor */
26905 	sense_mhp = (struct mode_header_grp2 *)sense;
26906 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
26907 	if (bd_len > MODE_BLK_DESC_LENGTH) {
26908 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26909 		    "sr_atapi_change_speed: Mode Sense returned invalid "
26910 		    "block descriptor length\n");
26911 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26912 		return (EIO);
26913 	}
26914 
26915 	/* Calculate the current and maximum drive speeds */
26916 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
26917 	current_speed = (sense_page[14] << 8) | sense_page[15];
26918 	max_speed = (sense_page[8] << 8) | sense_page[9];
26919 
26920 	/* Process the command */
26921 	switch (cmd) {
26922 	case CDROMGDRVSPEED:
26923 		current_speed /= SD_SPEED_1X;
26924 		if (ddi_copyout(&current_speed, (void *)data,
26925 		    sizeof (int), flag) != 0)
26926 			rval = EFAULT;
26927 		break;
26928 	case CDROMSDRVSPEED:
26929 		/* Convert the speed code to KB/sec */
26930 		switch ((uchar_t)data) {
26931 		case CDROM_NORMAL_SPEED:
26932 			current_speed = SD_SPEED_1X;
26933 			break;
26934 		case CDROM_DOUBLE_SPEED:
26935 			current_speed = 2 * SD_SPEED_1X;
26936 			break;
26937 		case CDROM_QUAD_SPEED:
26938 			current_speed = 4 * SD_SPEED_1X;
26939 			break;
26940 		case CDROM_TWELVE_SPEED:
26941 			current_speed = 12 * SD_SPEED_1X;
26942 			break;
26943 		case CDROM_MAXIMUM_SPEED:
26944 			current_speed = 0xffff;
26945 			break;
26946 		default:
26947 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26948 			    "sr_atapi_change_speed: invalid drive speed %d\n",
26949 			    (uchar_t)data);
26950 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26951 			return (EINVAL);
26952 		}
26953 
26954 		/* Check the request against the drive's max speed. */
26955 		if (current_speed != 0xffff) {
26956 			if (current_speed > max_speed) {
26957 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26958 				return (EINVAL);
26959 			}
26960 		}
26961 
26962 		/*
26963 		 * Build and send the SET SPEED command
26964 		 *
26965 		 * Note: The SET SPEED (0xBB) command used in this routine is
26966 		 * obsolete per the SCSI MMC spec but still supported in the
26967 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
26968 		 * therefore the command is still implemented in this routine.
26969 		 */
26970 		bzero(cdb, sizeof (cdb));
26971 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
26972 		cdb[2] = (uchar_t)(current_speed >> 8);
26973 		cdb[3] = (uchar_t)current_speed;
26974 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26975 		com->uscsi_cdb	   = (caddr_t)cdb;
26976 		com->uscsi_cdblen  = CDB_GROUP5;
26977 		com->uscsi_bufaddr = NULL;
26978 		com->uscsi_buflen  = 0;
26979 		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
26980 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, 0,
26981 		    UIO_SYSSPACE, SD_PATH_STANDARD);
26982 		break;
26983 	default:
26984 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26985 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
26986 		rval = EINVAL;
26987 	}
26988 
26989 	if (sense) {
26990 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26991 	}
26992 	if (com) {
26993 		kmem_free(com, sizeof (*com));
26994 	}
26995 	return (rval);
26996 }
26997 
26998 
26999 /*
27000  *    Function: sr_pause_resume()
27001  *
27002  * Description: This routine is the driver entry point for handling CD-ROM
27003  *		pause/resume ioctl requests. This only affects the audio play
27004  *		operation.
27005  *
27006  *   Arguments: dev - the device 'dev_t'
27007  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
27008  *		      for setting the resume bit of the cdb.
27009  *
27010  * Return Code: the code returned by sd_send_scsi_cmd()
27011  *		EINVAL if invalid mode specified
27012  *
27013  */
27014 
27015 static int
27016 sr_pause_resume(dev_t dev, int cmd)
27017 {
27018 	struct sd_lun		*un;
27019 	struct uscsi_cmd	*com;
27020 	char			cdb[CDB_GROUP1];
27021 	int			rval;
27022 
27023 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27024 		return (ENXIO);
27025 	}
27026 
27027 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27028 	bzero(cdb, CDB_GROUP1);
27029 	cdb[0] = SCMD_PAUSE_RESUME;
27030 	switch (cmd) {
27031 	case CDROMRESUME:
27032 		cdb[8] = 1;
27033 		break;
27034 	case CDROMPAUSE:
27035 		cdb[8] = 0;
27036 		break;
27037 	default:
27038 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
27039 		    " Command '%x' Not Supported\n", cmd);
27040 		rval = EINVAL;
27041 		goto done;
27042 	}
27043 
27044 	com->uscsi_cdb    = cdb;
27045 	com->uscsi_cdblen = CDB_GROUP1;
27046 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27047 
27048 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27049 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27050 
27051 done:
27052 	kmem_free(com, sizeof (*com));
27053 	return (rval);
27054 }
27055 
27056 
27057 /*
27058  *    Function: sr_play_msf()
27059  *
27060  * Description: This routine is the driver entry point for handling CD-ROM
27061  *		ioctl requests to output the audio signals at the specified
27062  *		starting address and continue the audio play until the specified
27063  *		ending address (CDROMPLAYMSF) The address is in Minute Second
27064  *		Frame (MSF) format.
27065  *
27066  *   Arguments: dev	- the device 'dev_t'
27067  *		data	- pointer to user provided audio msf structure,
27068  *		          specifying start/end addresses.
27069  *		flag	- this argument is a pass through to ddi_copyxxx()
27070  *		          directly from the mode argument of ioctl().
27071  *
27072  * Return Code: the code returned by sd_send_scsi_cmd()
27073  *		EFAULT if ddi_copyxxx() fails
27074  *		ENXIO if fail ddi_get_soft_state
27075  *		EINVAL if data pointer is NULL
27076  */
27077 
27078 static int
27079 sr_play_msf(dev_t dev, caddr_t data, int flag)
27080 {
27081 	struct sd_lun		*un;
27082 	struct uscsi_cmd	*com;
27083 	struct cdrom_msf	msf_struct;
27084 	struct cdrom_msf	*msf = &msf_struct;
27085 	char			cdb[CDB_GROUP1];
27086 	int			rval;
27087 
27088 	if (data == NULL) {
27089 		return (EINVAL);
27090 	}
27091 
27092 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27093 		return (ENXIO);
27094 	}
27095 
27096 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
27097 		return (EFAULT);
27098 	}
27099 
27100 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27101 	bzero(cdb, CDB_GROUP1);
27102 	cdb[0] = SCMD_PLAYAUDIO_MSF;
27103 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
27104 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
27105 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
27106 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
27107 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
27108 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
27109 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
27110 	} else {
27111 		cdb[3] = msf->cdmsf_min0;
27112 		cdb[4] = msf->cdmsf_sec0;
27113 		cdb[5] = msf->cdmsf_frame0;
27114 		cdb[6] = msf->cdmsf_min1;
27115 		cdb[7] = msf->cdmsf_sec1;
27116 		cdb[8] = msf->cdmsf_frame1;
27117 	}
27118 	com->uscsi_cdb    = cdb;
27119 	com->uscsi_cdblen = CDB_GROUP1;
27120 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27121 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27122 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27123 	kmem_free(com, sizeof (*com));
27124 	return (rval);
27125 }
27126 
27127 
27128 /*
27129  *    Function: sr_play_trkind()
27130  *
27131  * Description: This routine is the driver entry point for handling CD-ROM
27132  *		ioctl requests to output the audio signals at the specified
27133  *		starting address and continue the audio play until the specified
27134  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
27135  *		format.
27136  *
27137  *   Arguments: dev	- the device 'dev_t'
27138  *		data	- pointer to user provided audio track/index structure,
27139  *		          specifying start/end addresses.
27140  *		flag	- this argument is a pass through to ddi_copyxxx()
27141  *		          directly from the mode argument of ioctl().
27142  *
27143  * Return Code: the code returned by sd_send_scsi_cmd()
27144  *		EFAULT if ddi_copyxxx() fails
27145  *		ENXIO if fail ddi_get_soft_state
27146  *		EINVAL if data pointer is NULL
27147  */
27148 
27149 static int
27150 sr_play_trkind(dev_t dev, caddr_t data, int flag)
27151 {
27152 	struct cdrom_ti		ti_struct;
27153 	struct cdrom_ti		*ti = &ti_struct;
27154 	struct uscsi_cmd	*com = NULL;
27155 	char			cdb[CDB_GROUP1];
27156 	int			rval;
27157 
27158 	if (data == NULL) {
27159 		return (EINVAL);
27160 	}
27161 
27162 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
27163 		return (EFAULT);
27164 	}
27165 
27166 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27167 	bzero(cdb, CDB_GROUP1);
27168 	cdb[0] = SCMD_PLAYAUDIO_TI;
27169 	cdb[4] = ti->cdti_trk0;
27170 	cdb[5] = ti->cdti_ind0;
27171 	cdb[7] = ti->cdti_trk1;
27172 	cdb[8] = ti->cdti_ind1;
27173 	com->uscsi_cdb    = cdb;
27174 	com->uscsi_cdblen = CDB_GROUP1;
27175 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27176 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27177 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27178 	kmem_free(com, sizeof (*com));
27179 	return (rval);
27180 }
27181 
27182 
27183 /*
27184  *    Function: sr_read_all_subcodes()
27185  *
27186  * Description: This routine is the driver entry point for handling CD-ROM
27187  *		ioctl requests to return raw subcode data while the target is
27188  *		playing audio (CDROMSUBCODE).
27189  *
27190  *   Arguments: dev	- the device 'dev_t'
27191  *		data	- pointer to user provided cdrom subcode structure,
27192  *		          specifying the transfer length and address.
27193  *		flag	- this argument is a pass through to ddi_copyxxx()
27194  *		          directly from the mode argument of ioctl().
27195  *
27196  * Return Code: the code returned by sd_send_scsi_cmd()
27197  *		EFAULT if ddi_copyxxx() fails
27198  *		ENXIO if fail ddi_get_soft_state
27199  *		EINVAL if data pointer is NULL
27200  */
27201 
27202 static int
27203 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
27204 {
27205 	struct sd_lun		*un = NULL;
27206 	struct uscsi_cmd	*com = NULL;
27207 	struct cdrom_subcode	*subcode = NULL;
27208 	int			rval;
27209 	size_t			buflen;
27210 	char			cdb[CDB_GROUP5];
27211 
27212 #ifdef _MULTI_DATAMODEL
27213 	/* To support ILP32 applications in an LP64 world */
27214 	struct cdrom_subcode32		cdrom_subcode32;
27215 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
27216 #endif
27217 	if (data == NULL) {
27218 		return (EINVAL);
27219 	}
27220 
27221 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27222 		return (ENXIO);
27223 	}
27224 
27225 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
27226 
27227 #ifdef _MULTI_DATAMODEL
27228 	switch (ddi_model_convert_from(flag & FMODELS)) {
27229 	case DDI_MODEL_ILP32:
27230 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
27231 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27232 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27233 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27234 			return (EFAULT);
27235 		}
27236 		/* Convert the ILP32 uscsi data from the application to LP64 */
27237 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
27238 		break;
27239 	case DDI_MODEL_NONE:
27240 		if (ddi_copyin(data, subcode,
27241 		    sizeof (struct cdrom_subcode), flag)) {
27242 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27243 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27244 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27245 			return (EFAULT);
27246 		}
27247 		break;
27248 	}
27249 #else /* ! _MULTI_DATAMODEL */
27250 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
27251 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27252 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
27253 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27254 		return (EFAULT);
27255 	}
27256 #endif /* _MULTI_DATAMODEL */
27257 
27258 	/*
27259 	 * Since MMC-2 expects max 3 bytes for length, check if the
27260 	 * length input is greater than 3 bytes
27261 	 */
27262 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
27263 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27264 		    "sr_read_all_subcodes: "
27265 		    "cdrom transfer length too large: %d (limit %d)\n",
27266 		    subcode->cdsc_length, 0xFFFFFF);
27267 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27268 		return (EINVAL);
27269 	}
27270 
27271 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
27272 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27273 	bzero(cdb, CDB_GROUP5);
27274 
27275 	if (un->un_f_mmc_cap == TRUE) {
27276 		cdb[0] = (char)SCMD_READ_CD;
27277 		cdb[2] = (char)0xff;
27278 		cdb[3] = (char)0xff;
27279 		cdb[4] = (char)0xff;
27280 		cdb[5] = (char)0xff;
27281 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27282 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27283 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
27284 		cdb[10] = 1;
27285 	} else {
27286 		/*
27287 		 * Note: A vendor specific command (0xDF) is being used her to
27288 		 * request a read of all subcodes.
27289 		 */
27290 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
27291 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
27292 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27293 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27294 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
27295 	}
27296 	com->uscsi_cdb	   = cdb;
27297 	com->uscsi_cdblen  = CDB_GROUP5;
27298 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
27299 	com->uscsi_buflen  = buflen;
27300 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27301 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
27302 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27303 	kmem_free(subcode, sizeof (struct cdrom_subcode));
27304 	kmem_free(com, sizeof (*com));
27305 	return (rval);
27306 }
27307 
27308 
27309 /*
27310  *    Function: sr_read_subchannel()
27311  *
27312  * Description: This routine is the driver entry point for handling CD-ROM
27313  *		ioctl requests to return the Q sub-channel data of the CD
27314  *		current position block. (CDROMSUBCHNL) The data includes the
27315  *		track number, index number, absolute CD-ROM address (LBA or MSF
27316  *		format per the user) , track relative CD-ROM address (LBA or MSF
27317  *		format per the user), control data and audio status.
27318  *
27319  *   Arguments: dev	- the device 'dev_t'
27320  *		data	- pointer to user provided cdrom sub-channel structure
27321  *		flag	- this argument is a pass through to ddi_copyxxx()
27322  *		          directly from the mode argument of ioctl().
27323  *
27324  * Return Code: the code returned by sd_send_scsi_cmd()
27325  *		EFAULT if ddi_copyxxx() fails
27326  *		ENXIO if fail ddi_get_soft_state
27327  *		EINVAL if data pointer is NULL
27328  */
27329 
27330 static int
27331 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
27332 {
27333 	struct sd_lun		*un;
27334 	struct uscsi_cmd	*com;
27335 	struct cdrom_subchnl	subchanel;
27336 	struct cdrom_subchnl	*subchnl = &subchanel;
27337 	char			cdb[CDB_GROUP1];
27338 	caddr_t			buffer;
27339 	int			rval;
27340 
27341 	if (data == NULL) {
27342 		return (EINVAL);
27343 	}
27344 
27345 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27346 	    (un->un_state == SD_STATE_OFFLINE)) {
27347 		return (ENXIO);
27348 	}
27349 
27350 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
27351 		return (EFAULT);
27352 	}
27353 
27354 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
27355 	bzero(cdb, CDB_GROUP1);
27356 	cdb[0] = SCMD_READ_SUBCHANNEL;
27357 	/* Set the MSF bit based on the user requested address format */
27358 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
27359 	/*
27360 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
27361 	 * returned
27362 	 */
27363 	cdb[2] = 0x40;
27364 	/*
27365 	 * Set byte 3 to specify the return data format. A value of 0x01
27366 	 * indicates that the CD-ROM current position should be returned.
27367 	 */
27368 	cdb[3] = 0x01;
27369 	cdb[8] = 0x10;
27370 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27371 	com->uscsi_cdb	   = cdb;
27372 	com->uscsi_cdblen  = CDB_GROUP1;
27373 	com->uscsi_bufaddr = buffer;
27374 	com->uscsi_buflen  = 16;
27375 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27376 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27377 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27378 	if (rval != 0) {
27379 		kmem_free(buffer, 16);
27380 		kmem_free(com, sizeof (*com));
27381 		return (rval);
27382 	}
27383 
27384 	/* Process the returned Q sub-channel data */
27385 	subchnl->cdsc_audiostatus = buffer[1];
27386 	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
27387 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
27388 	subchnl->cdsc_trk	= buffer[6];
27389 	subchnl->cdsc_ind	= buffer[7];
27390 	if (subchnl->cdsc_format & CDROM_LBA) {
27391 		subchnl->cdsc_absaddr.lba =
27392 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
27393 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
27394 		subchnl->cdsc_reladdr.lba =
27395 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
27396 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
27397 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
27398 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
27399 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
27400 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
27401 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
27402 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
27403 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
27404 	} else {
27405 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
27406 		subchnl->cdsc_absaddr.msf.second = buffer[10];
27407 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
27408 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
27409 		subchnl->cdsc_reladdr.msf.second = buffer[14];
27410 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
27411 	}
27412 	kmem_free(buffer, 16);
27413 	kmem_free(com, sizeof (*com));
27414 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
27415 	    != 0) {
27416 		return (EFAULT);
27417 	}
27418 	return (rval);
27419 }
27420 
27421 
27422 /*
27423  *    Function: sr_read_tocentry()
27424  *
27425  * Description: This routine is the driver entry point for handling CD-ROM
27426  *		ioctl requests to read from the Table of Contents (TOC)
27427  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
27428  *		fields, the starting address (LBA or MSF format per the user)
27429  *		and the data mode if the user specified track is a data track.
27430  *
27431  *		Note: The READ HEADER (0x44) command used in this routine is
27432  *		obsolete per the SCSI MMC spec but still supported in the
27433  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27434  *		therefore the command is still implemented in this routine.
27435  *
27436  *   Arguments: dev	- the device 'dev_t'
27437  *		data	- pointer to user provided toc entry structure,
27438  *			  specifying the track # and the address format
27439  *			  (LBA or MSF).
27440  *		flag	- this argument is a pass through to ddi_copyxxx()
27441  *		          directly from the mode argument of ioctl().
27442  *
27443  * Return Code: the code returned by sd_send_scsi_cmd()
27444  *		EFAULT if ddi_copyxxx() fails
27445  *		ENXIO if fail ddi_get_soft_state
27446  *		EINVAL if data pointer is NULL
27447  */
27448 
27449 static int
27450 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
27451 {
27452 	struct sd_lun		*un = NULL;
27453 	struct uscsi_cmd	*com;
27454 	struct cdrom_tocentry	toc_entry;
27455 	struct cdrom_tocentry	*entry = &toc_entry;
27456 	caddr_t			buffer;
27457 	int			rval;
27458 	char			cdb[CDB_GROUP1];
27459 
27460 	if (data == NULL) {
27461 		return (EINVAL);
27462 	}
27463 
27464 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27465 	    (un->un_state == SD_STATE_OFFLINE)) {
27466 		return (ENXIO);
27467 	}
27468 
27469 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
27470 		return (EFAULT);
27471 	}
27472 
27473 	/* Validate the requested track and address format */
27474 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
27475 		return (EINVAL);
27476 	}
27477 
27478 	if (entry->cdte_track == 0) {
27479 		return (EINVAL);
27480 	}
27481 
27482 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
27483 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27484 	bzero(cdb, CDB_GROUP1);
27485 
27486 	cdb[0] = SCMD_READ_TOC;
27487 	/* Set the MSF bit based on the user requested address format  */
27488 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
27489 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
27490 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
27491 	} else {
27492 		cdb[6] = entry->cdte_track;
27493 	}
27494 
27495 	/*
27496 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
27497 	 * (4 byte TOC response header + 8 byte track descriptor)
27498 	 */
27499 	cdb[8] = 12;
27500 	com->uscsi_cdb	   = cdb;
27501 	com->uscsi_cdblen  = CDB_GROUP1;
27502 	com->uscsi_bufaddr = buffer;
27503 	com->uscsi_buflen  = 0x0C;
27504 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
27505 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27506 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27507 	if (rval != 0) {
27508 		kmem_free(buffer, 12);
27509 		kmem_free(com, sizeof (*com));
27510 		return (rval);
27511 	}
27512 
27513 	/* Process the toc entry */
27514 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
27515 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
27516 	if (entry->cdte_format & CDROM_LBA) {
27517 		entry->cdte_addr.lba =
27518 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
27519 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
27520 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
27521 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
27522 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
27523 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
27524 		/*
27525 		 * Send a READ TOC command using the LBA address format to get
27526 		 * the LBA for the track requested so it can be used in the
27527 		 * READ HEADER request
27528 		 *
27529 		 * Note: The MSF bit of the READ HEADER command specifies the
27530 		 * output format. The block address specified in that command
27531 		 * must be in LBA format.
27532 		 */
27533 		cdb[1] = 0;
27534 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27535 		    UIO_SYSSPACE, SD_PATH_STANDARD);
27536 		if (rval != 0) {
27537 			kmem_free(buffer, 12);
27538 			kmem_free(com, sizeof (*com));
27539 			return (rval);
27540 		}
27541 	} else {
27542 		entry->cdte_addr.msf.minute	= buffer[9];
27543 		entry->cdte_addr.msf.second	= buffer[10];
27544 		entry->cdte_addr.msf.frame	= buffer[11];
27545 		/*
27546 		 * Send a READ TOC command using the LBA address format to get
27547 		 * the LBA for the track requested so it can be used in the
27548 		 * READ HEADER request
27549 		 *
27550 		 * Note: The MSF bit of the READ HEADER command specifies the
27551 		 * output format. The block address specified in that command
27552 		 * must be in LBA format.
27553 		 */
27554 		cdb[1] = 0;
27555 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27556 		    UIO_SYSSPACE, SD_PATH_STANDARD);
27557 		if (rval != 0) {
27558 			kmem_free(buffer, 12);
27559 			kmem_free(com, sizeof (*com));
27560 			return (rval);
27561 		}
27562 	}
27563 
27564 	/*
27565 	 * Build and send the READ HEADER command to determine the data mode of
27566 	 * the user specified track.
27567 	 */
27568 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
27569 	    (entry->cdte_track != CDROM_LEADOUT)) {
27570 		bzero(cdb, CDB_GROUP1);
27571 		cdb[0] = SCMD_READ_HEADER;
27572 		cdb[2] = buffer[8];
27573 		cdb[3] = buffer[9];
27574 		cdb[4] = buffer[10];
27575 		cdb[5] = buffer[11];
27576 		cdb[8] = 0x08;
27577 		com->uscsi_buflen = 0x08;
27578 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27579 		    UIO_SYSSPACE, SD_PATH_STANDARD);
27580 		if (rval == 0) {
27581 			entry->cdte_datamode = buffer[0];
27582 		} else {
27583 			/*
27584 			 * READ HEADER command failed, since this is
27585 			 * obsoleted in one spec, its better to return
27586 			 * -1 for an invlid track so that we can still
27587 			 * recieve the rest of the TOC data.
27588 			 */
27589 			entry->cdte_datamode = (uchar_t)-1;
27590 		}
27591 	} else {
27592 		entry->cdte_datamode = (uchar_t)-1;
27593 	}
27594 
27595 	kmem_free(buffer, 12);
27596 	kmem_free(com, sizeof (*com));
27597 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
27598 		return (EFAULT);
27599 
27600 	return (rval);
27601 }
27602 
27603 
27604 /*
27605  *    Function: sr_read_tochdr()
27606  *
27607  * Description: This routine is the driver entry point for handling CD-ROM
27608  * 		ioctl requests to read the Table of Contents (TOC) header
27609  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
27610  *		and ending track numbers
27611  *
27612  *   Arguments: dev	- the device 'dev_t'
27613  *		data	- pointer to user provided toc header structure,
27614  *			  specifying the starting and ending track numbers.
27615  *		flag	- this argument is a pass through to ddi_copyxxx()
27616  *			  directly from the mode argument of ioctl().
27617  *
27618  * Return Code: the code returned by sd_send_scsi_cmd()
27619  *		EFAULT if ddi_copyxxx() fails
27620  *		ENXIO if fail ddi_get_soft_state
27621  *		EINVAL if data pointer is NULL
27622  */
27623 
27624 static int
27625 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
27626 {
27627 	struct sd_lun		*un;
27628 	struct uscsi_cmd	*com;
27629 	struct cdrom_tochdr	toc_header;
27630 	struct cdrom_tochdr	*hdr = &toc_header;
27631 	char			cdb[CDB_GROUP1];
27632 	int			rval;
27633 	caddr_t			buffer;
27634 
27635 	if (data == NULL) {
27636 		return (EINVAL);
27637 	}
27638 
27639 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27640 	    (un->un_state == SD_STATE_OFFLINE)) {
27641 		return (ENXIO);
27642 	}
27643 
27644 	buffer = kmem_zalloc(4, KM_SLEEP);
27645 	bzero(cdb, CDB_GROUP1);
27646 	cdb[0] = SCMD_READ_TOC;
27647 	/*
27648 	 * Specifying a track number of 0x00 in the READ TOC command indicates
27649 	 * that the TOC header should be returned
27650 	 */
27651 	cdb[6] = 0x00;
27652 	/*
27653 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
27654 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
27655 	 */
27656 	cdb[8] = 0x04;
27657 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27658 	com->uscsi_cdb	   = cdb;
27659 	com->uscsi_cdblen  = CDB_GROUP1;
27660 	com->uscsi_bufaddr = buffer;
27661 	com->uscsi_buflen  = 0x04;
27662 	com->uscsi_timeout = 300;
27663 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27664 
27665 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27666 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27667 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
27668 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
27669 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
27670 	} else {
27671 		hdr->cdth_trk0 = buffer[2];
27672 		hdr->cdth_trk1 = buffer[3];
27673 	}
27674 	kmem_free(buffer, 4);
27675 	kmem_free(com, sizeof (*com));
27676 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
27677 		return (EFAULT);
27678 	}
27679 	return (rval);
27680 }
27681 
27682 
27683 /*
27684  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
27685  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
27686  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
27687  * digital audio and extended architecture digital audio. These modes are
27688  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
27689  * MMC specs.
27690  *
27691  * In addition to support for the various data formats these routines also
27692  * include support for devices that implement only the direct access READ
27693  * commands (0x08, 0x28), devices that implement the READ_CD commands
27694  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
27695  * READ CDXA commands (0xD8, 0xDB)
27696  */
27697 
27698 /*
27699  *    Function: sr_read_mode1()
27700  *
27701  * Description: This routine is the driver entry point for handling CD-ROM
27702  *		ioctl read mode1 requests (CDROMREADMODE1).
27703  *
27704  *   Arguments: dev	- the device 'dev_t'
27705  *		data	- pointer to user provided cd read structure specifying
27706  *			  the lba buffer address and length.
27707  *		flag	- this argument is a pass through to ddi_copyxxx()
27708  *			  directly from the mode argument of ioctl().
27709  *
27710  * Return Code: the code returned by sd_send_scsi_cmd()
27711  *		EFAULT if ddi_copyxxx() fails
27712  *		ENXIO if fail ddi_get_soft_state
27713  *		EINVAL if data pointer is NULL
27714  */
27715 
27716 static int
27717 sr_read_mode1(dev_t dev, caddr_t data, int flag)
27718 {
27719 	struct sd_lun		*un;
27720 	struct cdrom_read	mode1_struct;
27721 	struct cdrom_read	*mode1 = &mode1_struct;
27722 	int			rval;
27723 #ifdef _MULTI_DATAMODEL
27724 	/* To support ILP32 applications in an LP64 world */
27725 	struct cdrom_read32	cdrom_read32;
27726 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
27727 #endif /* _MULTI_DATAMODEL */
27728 
27729 	if (data == NULL) {
27730 		return (EINVAL);
27731 	}
27732 
27733 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27734 	    (un->un_state == SD_STATE_OFFLINE)) {
27735 		return (ENXIO);
27736 	}
27737 
27738 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
27739 	    "sd_read_mode1: entry: un:0x%p\n", un);
27740 
27741 #ifdef _MULTI_DATAMODEL
27742 	switch (ddi_model_convert_from(flag & FMODELS)) {
27743 	case DDI_MODEL_ILP32:
27744 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
27745 			return (EFAULT);
27746 		}
27747 		/* Convert the ILP32 uscsi data from the application to LP64 */
27748 		cdrom_read32tocdrom_read(cdrd32, mode1);
27749 		break;
27750 	case DDI_MODEL_NONE:
27751 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
27752 			return (EFAULT);
27753 		}
27754 	}
27755 #else /* ! _MULTI_DATAMODEL */
27756 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
27757 		return (EFAULT);
27758 	}
27759 #endif /* _MULTI_DATAMODEL */
27760 
27761 	rval = sd_send_scsi_READ(un, mode1->cdread_bufaddr,
27762 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
27763 
27764 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
27765 	    "sd_read_mode1: exit: un:0x%p\n", un);
27766 
27767 	return (rval);
27768 }
27769 
27770 
27771 /*
27772  *    Function: sr_read_cd_mode2()
27773  *
27774  * Description: This routine is the driver entry point for handling CD-ROM
27775  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
27776  *		support the READ CD (0xBE) command or the 1st generation
27777  *		READ CD (0xD4) command.
27778  *
27779  *   Arguments: dev	- the device 'dev_t'
27780  *		data	- pointer to user provided cd read structure specifying
27781  *			  the lba buffer address and length.
27782  *		flag	- this argument is a pass through to ddi_copyxxx()
27783  *			  directly from the mode argument of ioctl().
27784  *
27785  * Return Code: the code returned by sd_send_scsi_cmd()
27786  *		EFAULT if ddi_copyxxx() fails
27787  *		ENXIO if fail ddi_get_soft_state
27788  *		EINVAL if data pointer is NULL
27789  */
27790 
27791 static int
27792 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
27793 {
27794 	struct sd_lun		*un;
27795 	struct uscsi_cmd	*com;
27796 	struct cdrom_read	mode2_struct;
27797 	struct cdrom_read	*mode2 = &mode2_struct;
27798 	uchar_t			cdb[CDB_GROUP5];
27799 	int			nblocks;
27800 	int			rval;
27801 #ifdef _MULTI_DATAMODEL
27802 	/*  To support ILP32 applications in an LP64 world */
27803 	struct cdrom_read32	cdrom_read32;
27804 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
27805 #endif /* _MULTI_DATAMODEL */
27806 
27807 	if (data == NULL) {
27808 		return (EINVAL);
27809 	}
27810 
27811 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27812 	    (un->un_state == SD_STATE_OFFLINE)) {
27813 		return (ENXIO);
27814 	}
27815 
27816 #ifdef _MULTI_DATAMODEL
27817 	switch (ddi_model_convert_from(flag & FMODELS)) {
27818 	case DDI_MODEL_ILP32:
27819 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
27820 			return (EFAULT);
27821 		}
27822 		/* Convert the ILP32 uscsi data from the application to LP64 */
27823 		cdrom_read32tocdrom_read(cdrd32, mode2);
27824 		break;
27825 	case DDI_MODEL_NONE:
27826 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
27827 			return (EFAULT);
27828 		}
27829 		break;
27830 	}
27831 
27832 #else /* ! _MULTI_DATAMODEL */
27833 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
27834 		return (EFAULT);
27835 	}
27836 #endif /* _MULTI_DATAMODEL */
27837 
27838 	bzero(cdb, sizeof (cdb));
27839 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
27840 		/* Read command supported by 1st generation atapi drives */
27841 		cdb[0] = SCMD_READ_CDD4;
27842 	} else {
27843 		/* Universal CD Access Command */
27844 		cdb[0] = SCMD_READ_CD;
27845 	}
27846 
27847 	/*
27848 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
27849 	 */
27850 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
27851 
27852 	/* set the start address */
27853 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
27854 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
27855 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
27856 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
27857 
27858 	/* set the transfer length */
27859 	nblocks = mode2->cdread_buflen / 2336;
27860 	cdb[6] = (uchar_t)(nblocks >> 16);
27861 	cdb[7] = (uchar_t)(nblocks >> 8);
27862 	cdb[8] = (uchar_t)nblocks;
27863 
27864 	/* set the filter bits */
27865 	cdb[9] = CDROM_READ_CD_USERDATA;
27866 
27867 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27868 	com->uscsi_cdb = (caddr_t)cdb;
27869 	com->uscsi_cdblen = sizeof (cdb);
27870 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
27871 	com->uscsi_buflen = mode2->cdread_buflen;
27872 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27873 
27874 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
27875 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27876 	kmem_free(com, sizeof (*com));
27877 	return (rval);
27878 }
27879 
27880 
27881 /*
27882  *    Function: sr_read_mode2()
27883  *
27884  * Description: This routine is the driver entry point for handling CD-ROM
27885  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
27886  *		do not support the READ CD (0xBE) command.
27887  *
27888  *   Arguments: dev	- the device 'dev_t'
27889  *		data	- pointer to user provided cd read structure specifying
27890  *			  the lba buffer address and length.
27891  *		flag	- this argument is a pass through to ddi_copyxxx()
27892  *			  directly from the mode argument of ioctl().
27893  *
27894  * Return Code: the code returned by sd_send_scsi_cmd()
27895  *		EFAULT if ddi_copyxxx() fails
27896  *		ENXIO if fail ddi_get_soft_state
27897  *		EINVAL if data pointer is NULL
27898  *		EIO if fail to reset block size
27899  *		EAGAIN if commands are in progress in the driver
27900  */
27901 
27902 static int
27903 sr_read_mode2(dev_t dev, caddr_t data, int flag)
27904 {
27905 	struct sd_lun		*un;
27906 	struct cdrom_read	mode2_struct;
27907 	struct cdrom_read	*mode2 = &mode2_struct;
27908 	int			rval;
27909 	uint32_t		restore_blksize;
27910 	struct uscsi_cmd	*com;
27911 	uchar_t			cdb[CDB_GROUP0];
27912 	int			nblocks;
27913 
27914 #ifdef _MULTI_DATAMODEL
27915 	/* To support ILP32 applications in an LP64 world */
27916 	struct cdrom_read32	cdrom_read32;
27917 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
27918 #endif /* _MULTI_DATAMODEL */
27919 
27920 	if (data == NULL) {
27921 		return (EINVAL);
27922 	}
27923 
27924 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27925 	    (un->un_state == SD_STATE_OFFLINE)) {
27926 		return (ENXIO);
27927 	}
27928 
27929 	/*
27930 	 * Because this routine will update the device and driver block size
27931 	 * being used we want to make sure there are no commands in progress.
27932 	 * If commands are in progress the user will have to try again.
27933 	 *
27934 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
27935 	 * in sdioctl to protect commands from sdioctl through to the top of
27936 	 * sd_uscsi_strategy. See sdioctl for details.
27937 	 */
27938 	mutex_enter(SD_MUTEX(un));
27939 	if (un->un_ncmds_in_driver != 1) {
27940 		mutex_exit(SD_MUTEX(un));
27941 		return (EAGAIN);
27942 	}
27943 	mutex_exit(SD_MUTEX(un));
27944 
27945 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
27946 	    "sd_read_mode2: entry: un:0x%p\n", un);
27947 
27948 #ifdef _MULTI_DATAMODEL
27949 	switch (ddi_model_convert_from(flag & FMODELS)) {
27950 	case DDI_MODEL_ILP32:
27951 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
27952 			return (EFAULT);
27953 		}
27954 		/* Convert the ILP32 uscsi data from the application to LP64 */
27955 		cdrom_read32tocdrom_read(cdrd32, mode2);
27956 		break;
27957 	case DDI_MODEL_NONE:
27958 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
27959 			return (EFAULT);
27960 		}
27961 		break;
27962 	}
27963 #else /* ! _MULTI_DATAMODEL */
27964 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
27965 		return (EFAULT);
27966 	}
27967 #endif /* _MULTI_DATAMODEL */
27968 
27969 	/* Store the current target block size for restoration later */
27970 	restore_blksize = un->un_tgt_blocksize;
27971 
27972 	/* Change the device and soft state target block size to 2336 */
27973 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
27974 		rval = EIO;
27975 		goto done;
27976 	}
27977 
27978 
27979 	bzero(cdb, sizeof (cdb));
27980 
27981 	/* set READ operation */
27982 	cdb[0] = SCMD_READ;
27983 
27984 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
27985 	mode2->cdread_lba >>= 2;
27986 
27987 	/* set the start address */
27988 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
27989 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
27990 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
27991 
27992 	/* set the transfer length */
27993 	nblocks = mode2->cdread_buflen / 2336;
27994 	cdb[4] = (uchar_t)nblocks & 0xFF;
27995 
27996 	/* build command */
27997 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27998 	com->uscsi_cdb = (caddr_t)cdb;
27999 	com->uscsi_cdblen = sizeof (cdb);
28000 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28001 	com->uscsi_buflen = mode2->cdread_buflen;
28002 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28003 
28004 	/*
28005 	 * Issue SCSI command with user space address for read buffer.
28006 	 *
28007 	 * This sends the command through main channel in the driver.
28008 	 *
28009 	 * Since this is accessed via an IOCTL call, we go through the
28010 	 * standard path, so that if the device was powered down, then
28011 	 * it would be 'awakened' to handle the command.
28012 	 */
28013 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28014 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28015 
28016 	kmem_free(com, sizeof (*com));
28017 
28018 	/* Restore the device and soft state target block size */
28019 	if (sr_sector_mode(dev, restore_blksize) != 0) {
28020 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28021 		    "can't do switch back to mode 1\n");
28022 		/*
28023 		 * If sd_send_scsi_READ succeeded we still need to report
28024 		 * an error because we failed to reset the block size
28025 		 */
28026 		if (rval == 0) {
28027 			rval = EIO;
28028 		}
28029 	}
28030 
28031 done:
28032 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28033 	    "sd_read_mode2: exit: un:0x%p\n", un);
28034 
28035 	return (rval);
28036 }
28037 
28038 
28039 /*
28040  *    Function: sr_sector_mode()
28041  *
28042  * Description: This utility function is used by sr_read_mode2 to set the target
28043  *		block size based on the user specified size. This is a legacy
28044  *		implementation based upon a vendor specific mode page
28045  *
28046  *   Arguments: dev	- the device 'dev_t'
28047  *		data	- flag indicating if block size is being set to 2336 or
28048  *			  512.
28049  *
28050  * Return Code: the code returned by sd_send_scsi_cmd()
28051  *		EFAULT if ddi_copyxxx() fails
28052  *		ENXIO if fail ddi_get_soft_state
28053  *		EINVAL if data pointer is NULL
28054  */
28055 
28056 static int
28057 sr_sector_mode(dev_t dev, uint32_t blksize)
28058 {
28059 	struct sd_lun	*un;
28060 	uchar_t		*sense;
28061 	uchar_t		*select;
28062 	int		rval;
28063 
28064 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28065 	    (un->un_state == SD_STATE_OFFLINE)) {
28066 		return (ENXIO);
28067 	}
28068 
28069 	sense = kmem_zalloc(20, KM_SLEEP);
28070 
28071 	/* Note: This is a vendor specific mode page (0x81) */
28072 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, 20, 0x81,
28073 	    SD_PATH_STANDARD)) != 0) {
28074 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28075 		    "sr_sector_mode: Mode Sense failed\n");
28076 		kmem_free(sense, 20);
28077 		return (rval);
28078 	}
28079 	select = kmem_zalloc(20, KM_SLEEP);
28080 	select[3] = 0x08;
28081 	select[10] = ((blksize >> 8) & 0xff);
28082 	select[11] = (blksize & 0xff);
28083 	select[12] = 0x01;
28084 	select[13] = 0x06;
28085 	select[14] = sense[14];
28086 	select[15] = sense[15];
28087 	if (blksize == SD_MODE2_BLKSIZE) {
28088 		select[14] |= 0x01;
28089 	}
28090 
28091 	if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select, 20,
28092 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
28093 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28094 		    "sr_sector_mode: Mode Select failed\n");
28095 	} else {
28096 		/*
28097 		 * Only update the softstate block size if we successfully
28098 		 * changed the device block mode.
28099 		 */
28100 		mutex_enter(SD_MUTEX(un));
28101 		sd_update_block_info(un, blksize, 0);
28102 		mutex_exit(SD_MUTEX(un));
28103 	}
28104 	kmem_free(sense, 20);
28105 	kmem_free(select, 20);
28106 	return (rval);
28107 }
28108 
28109 
28110 /*
28111  *    Function: sr_read_cdda()
28112  *
28113  * Description: This routine is the driver entry point for handling CD-ROM
28114  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
28115  *		the target supports CDDA these requests are handled via a vendor
28116  *		specific command (0xD8) If the target does not support CDDA
28117  *		these requests are handled via the READ CD command (0xBE).
28118  *
28119  *   Arguments: dev	- the device 'dev_t'
28120  *		data	- pointer to user provided CD-DA structure specifying
28121  *			  the track starting address, transfer length, and
28122  *			  subcode options.
28123  *		flag	- this argument is a pass through to ddi_copyxxx()
28124  *			  directly from the mode argument of ioctl().
28125  *
28126  * Return Code: the code returned by sd_send_scsi_cmd()
28127  *		EFAULT if ddi_copyxxx() fails
28128  *		ENXIO if fail ddi_get_soft_state
28129  *		EINVAL if invalid arguments are provided
28130  *		ENOTTY
28131  */
28132 
28133 static int
28134 sr_read_cdda(dev_t dev, caddr_t data, int flag)
28135 {
28136 	struct sd_lun			*un;
28137 	struct uscsi_cmd		*com;
28138 	struct cdrom_cdda		*cdda;
28139 	int				rval;
28140 	size_t				buflen;
28141 	char				cdb[CDB_GROUP5];
28142 
28143 #ifdef _MULTI_DATAMODEL
28144 	/* To support ILP32 applications in an LP64 world */
28145 	struct cdrom_cdda32	cdrom_cdda32;
28146 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
28147 #endif /* _MULTI_DATAMODEL */
28148 
28149 	if (data == NULL) {
28150 		return (EINVAL);
28151 	}
28152 
28153 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28154 		return (ENXIO);
28155 	}
28156 
28157 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
28158 
28159 #ifdef _MULTI_DATAMODEL
28160 	switch (ddi_model_convert_from(flag & FMODELS)) {
28161 	case DDI_MODEL_ILP32:
28162 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
28163 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28164 			    "sr_read_cdda: ddi_copyin Failed\n");
28165 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28166 			return (EFAULT);
28167 		}
28168 		/* Convert the ILP32 uscsi data from the application to LP64 */
28169 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
28170 		break;
28171 	case DDI_MODEL_NONE:
28172 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28173 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28174 			    "sr_read_cdda: ddi_copyin Failed\n");
28175 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28176 			return (EFAULT);
28177 		}
28178 		break;
28179 	}
28180 #else /* ! _MULTI_DATAMODEL */
28181 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28182 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28183 		    "sr_read_cdda: ddi_copyin Failed\n");
28184 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28185 		return (EFAULT);
28186 	}
28187 #endif /* _MULTI_DATAMODEL */
28188 
28189 	/*
28190 	 * Since MMC-2 expects max 3 bytes for length, check if the
28191 	 * length input is greater than 3 bytes
28192 	 */
28193 	if ((cdda->cdda_length & 0xFF000000) != 0) {
28194 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
28195 		    "cdrom transfer length too large: %d (limit %d)\n",
28196 		    cdda->cdda_length, 0xFFFFFF);
28197 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28198 		return (EINVAL);
28199 	}
28200 
28201 	switch (cdda->cdda_subcode) {
28202 	case CDROM_DA_NO_SUBCODE:
28203 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
28204 		break;
28205 	case CDROM_DA_SUBQ:
28206 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
28207 		break;
28208 	case CDROM_DA_ALL_SUBCODE:
28209 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
28210 		break;
28211 	case CDROM_DA_SUBCODE_ONLY:
28212 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
28213 		break;
28214 	default:
28215 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28216 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
28217 		    cdda->cdda_subcode);
28218 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28219 		return (EINVAL);
28220 	}
28221 
28222 	/* Build and send the command */
28223 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28224 	bzero(cdb, CDB_GROUP5);
28225 
28226 	if (un->un_f_cfg_cdda == TRUE) {
28227 		cdb[0] = (char)SCMD_READ_CD;
28228 		cdb[1] = 0x04;
28229 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28230 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28231 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28232 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28233 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28234 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28235 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
28236 		cdb[9] = 0x10;
28237 		switch (cdda->cdda_subcode) {
28238 		case CDROM_DA_NO_SUBCODE :
28239 			cdb[10] = 0x0;
28240 			break;
28241 		case CDROM_DA_SUBQ :
28242 			cdb[10] = 0x2;
28243 			break;
28244 		case CDROM_DA_ALL_SUBCODE :
28245 			cdb[10] = 0x1;
28246 			break;
28247 		case CDROM_DA_SUBCODE_ONLY :
28248 			/* FALLTHROUGH */
28249 		default :
28250 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28251 			kmem_free(com, sizeof (*com));
28252 			return (ENOTTY);
28253 		}
28254 	} else {
28255 		cdb[0] = (char)SCMD_READ_CDDA;
28256 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28257 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28258 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28259 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28260 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
28261 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28262 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28263 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
28264 		cdb[10] = cdda->cdda_subcode;
28265 	}
28266 
28267 	com->uscsi_cdb = cdb;
28268 	com->uscsi_cdblen = CDB_GROUP5;
28269 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
28270 	com->uscsi_buflen = buflen;
28271 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28272 
28273 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28274 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28275 
28276 	kmem_free(cdda, sizeof (struct cdrom_cdda));
28277 	kmem_free(com, sizeof (*com));
28278 	return (rval);
28279 }
28280 
28281 
28282 /*
28283  *    Function: sr_read_cdxa()
28284  *
28285  * Description: This routine is the driver entry point for handling CD-ROM
28286  *		ioctl requests to return CD-XA (Extended Architecture) data.
28287  *		(CDROMCDXA).
28288  *
28289  *   Arguments: dev	- the device 'dev_t'
28290  *		data	- pointer to user provided CD-XA structure specifying
28291  *			  the data starting address, transfer length, and format
28292  *		flag	- this argument is a pass through to ddi_copyxxx()
28293  *			  directly from the mode argument of ioctl().
28294  *
28295  * Return Code: the code returned by sd_send_scsi_cmd()
28296  *		EFAULT if ddi_copyxxx() fails
28297  *		ENXIO if fail ddi_get_soft_state
28298  *		EINVAL if data pointer is NULL
28299  */
28300 
28301 static int
28302 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
28303 {
28304 	struct sd_lun		*un;
28305 	struct uscsi_cmd	*com;
28306 	struct cdrom_cdxa	*cdxa;
28307 	int			rval;
28308 	size_t			buflen;
28309 	char			cdb[CDB_GROUP5];
28310 	uchar_t			read_flags;
28311 
28312 #ifdef _MULTI_DATAMODEL
28313 	/* To support ILP32 applications in an LP64 world */
28314 	struct cdrom_cdxa32		cdrom_cdxa32;
28315 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
28316 #endif /* _MULTI_DATAMODEL */
28317 
28318 	if (data == NULL) {
28319 		return (EINVAL);
28320 	}
28321 
28322 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28323 		return (ENXIO);
28324 	}
28325 
28326 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
28327 
28328 #ifdef _MULTI_DATAMODEL
28329 	switch (ddi_model_convert_from(flag & FMODELS)) {
28330 	case DDI_MODEL_ILP32:
28331 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
28332 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28333 			return (EFAULT);
28334 		}
28335 		/*
28336 		 * Convert the ILP32 uscsi data from the
28337 		 * application to LP64 for internal use.
28338 		 */
28339 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
28340 		break;
28341 	case DDI_MODEL_NONE:
28342 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
28343 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28344 			return (EFAULT);
28345 		}
28346 		break;
28347 	}
28348 #else /* ! _MULTI_DATAMODEL */
28349 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
28350 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28351 		return (EFAULT);
28352 	}
28353 #endif /* _MULTI_DATAMODEL */
28354 
28355 	/*
28356 	 * Since MMC-2 expects max 3 bytes for length, check if the
28357 	 * length input is greater than 3 bytes
28358 	 */
28359 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
28360 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
28361 		    "cdrom transfer length too large: %d (limit %d)\n",
28362 		    cdxa->cdxa_length, 0xFFFFFF);
28363 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28364 		return (EINVAL);
28365 	}
28366 
28367 	switch (cdxa->cdxa_format) {
28368 	case CDROM_XA_DATA:
28369 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
28370 		read_flags = 0x10;
28371 		break;
28372 	case CDROM_XA_SECTOR_DATA:
28373 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
28374 		read_flags = 0xf8;
28375 		break;
28376 	case CDROM_XA_DATA_W_ERROR:
28377 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
28378 		read_flags = 0xfc;
28379 		break;
28380 	default:
28381 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28382 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
28383 		    cdxa->cdxa_format);
28384 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28385 		return (EINVAL);
28386 	}
28387 
28388 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28389 	bzero(cdb, CDB_GROUP5);
28390 	if (un->un_f_mmc_cap == TRUE) {
28391 		cdb[0] = (char)SCMD_READ_CD;
28392 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
28393 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
28394 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
28395 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
28396 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
28397 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
28398 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
28399 		cdb[9] = (char)read_flags;
28400 	} else {
28401 		/*
28402 		 * Note: A vendor specific command (0xDB) is being used her to
28403 		 * request a read of all subcodes.
28404 		 */
28405 		cdb[0] = (char)SCMD_READ_CDXA;
28406 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
28407 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
28408 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
28409 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
28410 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
28411 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
28412 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
28413 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
28414 		cdb[10] = cdxa->cdxa_format;
28415 	}
28416 	com->uscsi_cdb	   = cdb;
28417 	com->uscsi_cdblen  = CDB_GROUP5;
28418 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
28419 	com->uscsi_buflen  = buflen;
28420 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28421 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28422 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28423 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28424 	kmem_free(com, sizeof (*com));
28425 	return (rval);
28426 }
28427 
28428 
28429 /*
28430  *    Function: sr_eject()
28431  *
28432  * Description: This routine is the driver entry point for handling CD-ROM
28433  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
28434  *
28435  *   Arguments: dev	- the device 'dev_t'
28436  *
28437  * Return Code: the code returned by sd_send_scsi_cmd()
28438  */
28439 
28440 static int
28441 sr_eject(dev_t dev)
28442 {
28443 	struct sd_lun	*un;
28444 	int		rval;
28445 
28446 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28447 	    (un->un_state == SD_STATE_OFFLINE)) {
28448 		return (ENXIO);
28449 	}
28450 	if ((rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
28451 	    SD_PATH_STANDARD)) != 0) {
28452 		return (rval);
28453 	}
28454 
28455 	rval = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_EJECT,
28456 	    SD_PATH_STANDARD);
28457 
28458 	if (rval == 0) {
28459 		mutex_enter(SD_MUTEX(un));
28460 		sr_ejected(un);
28461 		un->un_mediastate = DKIO_EJECTED;
28462 		cv_broadcast(&un->un_state_cv);
28463 		mutex_exit(SD_MUTEX(un));
28464 	}
28465 	return (rval);
28466 }
28467 
28468 
28469 /*
28470  *    Function: sr_ejected()
28471  *
28472  * Description: This routine updates the soft state structure to invalidate the
28473  *		geometry information after the media has been ejected or a
28474  *		media eject has been detected.
28475  *
28476  *   Arguments: un - driver soft state (unit) structure
28477  */
28478 
28479 static void
28480 sr_ejected(struct sd_lun *un)
28481 {
28482 	struct sd_errstats *stp;
28483 
28484 	ASSERT(un != NULL);
28485 	ASSERT(mutex_owned(SD_MUTEX(un)));
28486 
28487 	un->un_f_blockcount_is_valid	= FALSE;
28488 	un->un_f_tgt_blocksize_is_valid	= FALSE;
28489 	un->un_f_geometry_is_valid	= FALSE;
28490 
28491 	if (un->un_errstats != NULL) {
28492 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
28493 		stp->sd_capacity.value.ui64 = 0;
28494 	}
28495 }
28496 
28497 
28498 /*
28499  *    Function: sr_check_wp()
28500  *
28501  * Description: This routine checks the write protection of a removable media
28502  *		disk via the write protect bit of the Mode Page Header device
28503  *		specific field.  This routine has been implemented to use the
28504  *		error recovery mode page for all device types.
28505  *		Note: In the future use a sd_send_scsi_MODE_SENSE() routine
28506  *
28507  *   Arguments: dev		- the device 'dev_t'
28508  *
28509  * Return Code: int indicating if the device is write protected (1) or not (0)
28510  *
28511  *     Context: Kernel thread.
28512  *
28513  */
28514 
28515 static int
28516 sr_check_wp(dev_t dev)
28517 {
28518 	struct sd_lun	*un;
28519 	uchar_t		device_specific;
28520 	uchar_t		*sense;
28521 	int		hdrlen;
28522 	int		rval;
28523 	int		retry_flag = FALSE;
28524 
28525 	/*
28526 	 * Note: The return codes for this routine should be reworked to
28527 	 * properly handle the case of a NULL softstate.
28528 	 */
28529 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28530 		return (FALSE);
28531 	}
28532 
28533 	if (un->un_f_cfg_is_atapi == TRUE) {
28534 		retry_flag = TRUE;
28535 	}
28536 
28537 retry:
28538 	if (un->un_f_cfg_is_atapi == TRUE) {
28539 		/*
28540 		 * The mode page contents are not required; set the allocation
28541 		 * length for the mode page header only
28542 		 */
28543 		hdrlen = MODE_HEADER_LENGTH_GRP2;
28544 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
28545 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense, hdrlen,
28546 		    MODEPAGE_ERR_RECOV, SD_PATH_STANDARD);
28547 		device_specific =
28548 		    ((struct mode_header_grp2 *)sense)->device_specific;
28549 	} else {
28550 		hdrlen = MODE_HEADER_LENGTH;
28551 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
28552 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, hdrlen,
28553 		    MODEPAGE_ERR_RECOV, SD_PATH_STANDARD);
28554 		device_specific =
28555 		    ((struct mode_header *)sense)->device_specific;
28556 	}
28557 
28558 	if (rval != 0) {
28559 		if ((un->un_f_cfg_is_atapi == TRUE) && (retry_flag)) {
28560 			/*
28561 			 * For an Atapi Zip drive, observed the drive
28562 			 * reporting check condition for the first attempt.
28563 			 * Sense data indicating power on or bus device/reset.
28564 			 * Hence in case of failure need to try at least once
28565 			 * for Atapi devices.
28566 			 */
28567 			retry_flag = FALSE;
28568 			kmem_free(sense, hdrlen);
28569 			goto retry;
28570 		} else {
28571 			/*
28572 			 * Write protect mode sense failed; not all disks
28573 			 * understand this query. Return FALSE assuming that
28574 			 * these devices are not writable.
28575 			 */
28576 			rval = FALSE;
28577 		}
28578 	} else {
28579 		if (device_specific & WRITE_PROTECT) {
28580 			rval = TRUE;
28581 		} else {
28582 			rval = FALSE;
28583 		}
28584 	}
28585 	kmem_free(sense, hdrlen);
28586 	return (rval);
28587 }
28588 
28589 
28590 /*
28591  *    Function: sr_volume_ctrl()
28592  *
28593  * Description: This routine is the driver entry point for handling CD-ROM
28594  *		audio output volume ioctl requests. (CDROMVOLCTRL)
28595  *
28596  *   Arguments: dev	- the device 'dev_t'
28597  *		data	- pointer to user audio volume control structure
28598  *		flag	- this argument is a pass through to ddi_copyxxx()
28599  *			  directly from the mode argument of ioctl().
28600  *
28601  * Return Code: the code returned by sd_send_scsi_cmd()
28602  *		EFAULT if ddi_copyxxx() fails
28603  *		ENXIO if fail ddi_get_soft_state
28604  *		EINVAL if data pointer is NULL
28605  *
28606  */
28607 
28608 static int
28609 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
28610 {
28611 	struct sd_lun		*un;
28612 	struct cdrom_volctrl    volume;
28613 	struct cdrom_volctrl    *vol = &volume;
28614 	uchar_t			*sense_page;
28615 	uchar_t			*select_page;
28616 	uchar_t			*sense;
28617 	uchar_t			*select;
28618 	int			sense_buflen;
28619 	int			select_buflen;
28620 	int			rval;
28621 
28622 	if (data == NULL) {
28623 		return (EINVAL);
28624 	}
28625 
28626 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28627 	    (un->un_state == SD_STATE_OFFLINE)) {
28628 		return (ENXIO);
28629 	}
28630 
28631 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
28632 		return (EFAULT);
28633 	}
28634 
28635 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
28636 		struct mode_header_grp2		*sense_mhp;
28637 		struct mode_header_grp2		*select_mhp;
28638 		int				bd_len;
28639 
28640 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
28641 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
28642 		    MODEPAGE_AUDIO_CTRL_LEN;
28643 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
28644 		select = kmem_zalloc(select_buflen, KM_SLEEP);
28645 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
28646 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
28647 		    SD_PATH_STANDARD)) != 0) {
28648 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28649 			    "sr_volume_ctrl: Mode Sense Failed\n");
28650 			kmem_free(sense, sense_buflen);
28651 			kmem_free(select, select_buflen);
28652 			return (rval);
28653 		}
28654 		sense_mhp = (struct mode_header_grp2 *)sense;
28655 		select_mhp = (struct mode_header_grp2 *)select;
28656 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
28657 		    sense_mhp->bdesc_length_lo;
28658 		if (bd_len > MODE_BLK_DESC_LENGTH) {
28659 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28660 			    "sr_volume_ctrl: Mode Sense returned invalid "
28661 			    "block descriptor length\n");
28662 			kmem_free(sense, sense_buflen);
28663 			kmem_free(select, select_buflen);
28664 			return (EIO);
28665 		}
28666 		sense_page = (uchar_t *)
28667 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
28668 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
28669 		select_mhp->length_msb = 0;
28670 		select_mhp->length_lsb = 0;
28671 		select_mhp->bdesc_length_hi = 0;
28672 		select_mhp->bdesc_length_lo = 0;
28673 	} else {
28674 		struct mode_header		*sense_mhp, *select_mhp;
28675 
28676 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
28677 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
28678 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
28679 		select = kmem_zalloc(select_buflen, KM_SLEEP);
28680 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
28681 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
28682 		    SD_PATH_STANDARD)) != 0) {
28683 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28684 			    "sr_volume_ctrl: Mode Sense Failed\n");
28685 			kmem_free(sense, sense_buflen);
28686 			kmem_free(select, select_buflen);
28687 			return (rval);
28688 		}
28689 		sense_mhp  = (struct mode_header *)sense;
28690 		select_mhp = (struct mode_header *)select;
28691 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
28692 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28693 			    "sr_volume_ctrl: Mode Sense returned invalid "
28694 			    "block descriptor length\n");
28695 			kmem_free(sense, sense_buflen);
28696 			kmem_free(select, select_buflen);
28697 			return (EIO);
28698 		}
28699 		sense_page = (uchar_t *)
28700 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
28701 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
28702 		select_mhp->length = 0;
28703 		select_mhp->bdesc_length = 0;
28704 	}
28705 	/*
28706 	 * Note: An audio control data structure could be created and overlayed
28707 	 * on the following in place of the array indexing method implemented.
28708 	 */
28709 
28710 	/* Build the select data for the user volume data */
28711 	select_page[0] = MODEPAGE_AUDIO_CTRL;
28712 	select_page[1] = 0xE;
28713 	/* Set the immediate bit */
28714 	select_page[2] = 0x04;
28715 	/* Zero out reserved fields */
28716 	select_page[3] = 0x00;
28717 	select_page[4] = 0x00;
28718 	/* Return sense data for fields not to be modified */
28719 	select_page[5] = sense_page[5];
28720 	select_page[6] = sense_page[6];
28721 	select_page[7] = sense_page[7];
28722 	/* Set the user specified volume levels for channel 0 and 1 */
28723 	select_page[8] = 0x01;
28724 	select_page[9] = vol->channel0;
28725 	select_page[10] = 0x02;
28726 	select_page[11] = vol->channel1;
28727 	/* Channel 2 and 3 are currently unsupported so return the sense data */
28728 	select_page[12] = sense_page[12];
28729 	select_page[13] = sense_page[13];
28730 	select_page[14] = sense_page[14];
28731 	select_page[15] = sense_page[15];
28732 
28733 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
28734 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, select,
28735 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
28736 	} else {
28737 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
28738 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
28739 	}
28740 
28741 	kmem_free(sense, sense_buflen);
28742 	kmem_free(select, select_buflen);
28743 	return (rval);
28744 }
28745 
28746 
28747 /*
28748  *    Function: sr_read_sony_session_offset()
28749  *
28750  * Description: This routine is the driver entry point for handling CD-ROM
28751  *		ioctl requests for session offset information. (CDROMREADOFFSET)
28752  *		The address of the first track in the last session of a
28753  *		multi-session CD-ROM is returned
28754  *
28755  *		Note: This routine uses a vendor specific key value in the
28756  *		command control field without implementing any vendor check here
28757  *		or in the ioctl routine.
28758  *
28759  *   Arguments: dev	- the device 'dev_t'
28760  *		data	- pointer to an int to hold the requested address
28761  *		flag	- this argument is a pass through to ddi_copyxxx()
28762  *			  directly from the mode argument of ioctl().
28763  *
28764  * Return Code: the code returned by sd_send_scsi_cmd()
28765  *		EFAULT if ddi_copyxxx() fails
28766  *		ENXIO if fail ddi_get_soft_state
28767  *		EINVAL if data pointer is NULL
28768  */
28769 
28770 static int
28771 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
28772 {
28773 	struct sd_lun		*un;
28774 	struct uscsi_cmd	*com;
28775 	caddr_t			buffer;
28776 	char			cdb[CDB_GROUP1];
28777 	int			session_offset = 0;
28778 	int			rval;
28779 
28780 	if (data == NULL) {
28781 		return (EINVAL);
28782 	}
28783 
28784 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28785 	    (un->un_state == SD_STATE_OFFLINE)) {
28786 		return (ENXIO);
28787 	}
28788 
28789 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
28790 	bzero(cdb, CDB_GROUP1);
28791 	cdb[0] = SCMD_READ_TOC;
28792 	/*
28793 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
28794 	 * (4 byte TOC response header + 8 byte response data)
28795 	 */
28796 	cdb[8] = SONY_SESSION_OFFSET_LEN;
28797 	/* Byte 9 is the control byte. A vendor specific value is used */
28798 	cdb[9] = SONY_SESSION_OFFSET_KEY;
28799 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28800 	com->uscsi_cdb = cdb;
28801 	com->uscsi_cdblen = CDB_GROUP1;
28802 	com->uscsi_bufaddr = buffer;
28803 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
28804 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28805 
28806 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28807 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28808 	if (rval != 0) {
28809 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
28810 		kmem_free(com, sizeof (*com));
28811 		return (rval);
28812 	}
28813 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
28814 		session_offset =
28815 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28816 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28817 		/*
28818 		 * Offset returned offset in current lbasize block's. Convert to
28819 		 * 2k block's to return to the user
28820 		 */
28821 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
28822 			session_offset >>= 2;
28823 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
28824 			session_offset >>= 1;
28825 		}
28826 	}
28827 
28828 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
28829 		rval = EFAULT;
28830 	}
28831 
28832 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
28833 	kmem_free(com, sizeof (*com));
28834 	return (rval);
28835 }
28836 
28837 
28838 /*
28839  *    Function: sd_wm_cache_constructor()
28840  *
28841  * Description: Cache Constructor for the wmap cache for the read/modify/write
28842  * 		devices.
28843  *
28844  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
28845  *		un	- sd_lun structure for the device.
28846  *		flag	- the km flags passed to constructor
28847  *
28848  * Return Code: 0 on success.
28849  *		-1 on failure.
28850  */
28851 
28852 /*ARGSUSED*/
28853 static int
28854 sd_wm_cache_constructor(void *wm, void *un, int flags)
28855 {
28856 	bzero(wm, sizeof (struct sd_w_map));
28857 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
28858 	return (0);
28859 }
28860 
28861 
28862 /*
28863  *    Function: sd_wm_cache_destructor()
28864  *
28865  * Description: Cache destructor for the wmap cache for the read/modify/write
28866  * 		devices.
28867  *
28868  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
28869  *		un	- sd_lun structure for the device.
28870  */
28871 /*ARGSUSED*/
28872 static void
28873 sd_wm_cache_destructor(void *wm, void *un)
28874 {
28875 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
28876 }
28877 
28878 
28879 /*
28880  *    Function: sd_range_lock()
28881  *
28882  * Description: Lock the range of blocks specified as parameter to ensure
28883  *		that read, modify write is atomic and no other i/o writes
28884  *		to the same location. The range is specified in terms
28885  *		of start and end blocks. Block numbers are the actual
28886  *		media block numbers and not system.
28887  *
28888  *   Arguments: un	- sd_lun structure for the device.
28889  *		startb - The starting block number
28890  *		endb - The end block number
28891  *		typ - type of i/o - simple/read_modify_write
28892  *
28893  * Return Code: wm  - pointer to the wmap structure.
28894  *
28895  *     Context: This routine can sleep.
28896  */
28897 
28898 static struct sd_w_map *
28899 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
28900 {
28901 	struct sd_w_map *wmp = NULL;
28902 	struct sd_w_map *sl_wmp = NULL;
28903 	struct sd_w_map *tmp_wmp;
28904 	wm_state state = SD_WM_CHK_LIST;
28905 
28906 
28907 	ASSERT(un != NULL);
28908 	ASSERT(!mutex_owned(SD_MUTEX(un)));
28909 
28910 	mutex_enter(SD_MUTEX(un));
28911 
28912 	while (state != SD_WM_DONE) {
28913 
28914 		switch (state) {
28915 		case SD_WM_CHK_LIST:
28916 			/*
28917 			 * This is the starting state. Check the wmap list
28918 			 * to see if the range is currently available.
28919 			 */
28920 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
28921 				/*
28922 				 * If this is a simple write and no rmw
28923 				 * i/o is pending then try to lock the
28924 				 * range as the range should be available.
28925 				 */
28926 				state = SD_WM_LOCK_RANGE;
28927 			} else {
28928 				tmp_wmp = sd_get_range(un, startb, endb);
28929 				if (tmp_wmp != NULL) {
28930 					if ((wmp != NULL) && ONLIST(un, wmp)) {
28931 						/*
28932 						 * Should not keep onlist wmps
28933 						 * while waiting this macro
28934 						 * will also do wmp = NULL;
28935 						 */
28936 						FREE_ONLIST_WMAP(un, wmp);
28937 					}
28938 					/*
28939 					 * sl_wmp is the wmap on which wait
28940 					 * is done, since the tmp_wmp points
28941 					 * to the inuse wmap, set sl_wmp to
28942 					 * tmp_wmp and change the state to sleep
28943 					 */
28944 					sl_wmp = tmp_wmp;
28945 					state = SD_WM_WAIT_MAP;
28946 				} else {
28947 					state = SD_WM_LOCK_RANGE;
28948 				}
28949 
28950 			}
28951 			break;
28952 
28953 		case SD_WM_LOCK_RANGE:
28954 			ASSERT(un->un_wm_cache);
28955 			/*
28956 			 * The range need to be locked, try to get a wmap.
28957 			 * First attempt it with NO_SLEEP, want to avoid a sleep
28958 			 * if possible as we will have to release the sd mutex
28959 			 * if we have to sleep.
28960 			 */
28961 			if (wmp == NULL)
28962 				wmp = kmem_cache_alloc(un->un_wm_cache,
28963 				    KM_NOSLEEP);
28964 			if (wmp == NULL) {
28965 				mutex_exit(SD_MUTEX(un));
28966 				_NOTE(DATA_READABLE_WITHOUT_LOCK
28967 				    (sd_lun::un_wm_cache))
28968 				wmp = kmem_cache_alloc(un->un_wm_cache,
28969 				    KM_SLEEP);
28970 				mutex_enter(SD_MUTEX(un));
28971 				/*
28972 				 * we released the mutex so recheck and go to
28973 				 * check list state.
28974 				 */
28975 				state = SD_WM_CHK_LIST;
28976 			} else {
28977 				/*
28978 				 * We exit out of state machine since we
28979 				 * have the wmap. Do the housekeeping first.
28980 				 * place the wmap on the wmap list if it is not
28981 				 * on it already and then set the state to done.
28982 				 */
28983 				wmp->wm_start = startb;
28984 				wmp->wm_end = endb;
28985 				wmp->wm_flags = typ | SD_WM_BUSY;
28986 				if (typ & SD_WTYPE_RMW) {
28987 					un->un_rmw_count++;
28988 				}
28989 				/*
28990 				 * If not already on the list then link
28991 				 */
28992 				if (!ONLIST(un, wmp)) {
28993 					wmp->wm_next = un->un_wm;
28994 					wmp->wm_prev = NULL;
28995 					if (wmp->wm_next)
28996 						wmp->wm_next->wm_prev = wmp;
28997 					un->un_wm = wmp;
28998 				}
28999 				state = SD_WM_DONE;
29000 			}
29001 			break;
29002 
29003 		case SD_WM_WAIT_MAP:
29004 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
29005 			/*
29006 			 * Wait is done on sl_wmp, which is set in the
29007 			 * check_list state.
29008 			 */
29009 			sl_wmp->wm_wanted_count++;
29010 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
29011 			sl_wmp->wm_wanted_count--;
29012 			if (!(sl_wmp->wm_flags & SD_WM_BUSY)) {
29013 				if (wmp != NULL)
29014 					CHK_N_FREEWMP(un, wmp);
29015 				wmp = sl_wmp;
29016 			}
29017 			sl_wmp = NULL;
29018 			/*
29019 			 * After waking up, need to recheck for availability of
29020 			 * range.
29021 			 */
29022 			state = SD_WM_CHK_LIST;
29023 			break;
29024 
29025 		default:
29026 			panic("sd_range_lock: "
29027 			    "Unknown state %d in sd_range_lock", state);
29028 			/*NOTREACHED*/
29029 		} /* switch(state) */
29030 
29031 	} /* while(state != SD_WM_DONE) */
29032 
29033 	mutex_exit(SD_MUTEX(un));
29034 
29035 	ASSERT(wmp != NULL);
29036 
29037 	return (wmp);
29038 }
29039 
29040 
29041 /*
29042  *    Function: sd_get_range()
29043  *
29044  * Description: Find if there any overlapping I/O to this one
29045  *		Returns the write-map of 1st such I/O, NULL otherwise.
29046  *
29047  *   Arguments: un	- sd_lun structure for the device.
29048  *		startb - The starting block number
29049  *		endb - The end block number
29050  *
29051  * Return Code: wm  - pointer to the wmap structure.
29052  */
29053 
29054 static struct sd_w_map *
29055 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
29056 {
29057 	struct sd_w_map *wmp;
29058 
29059 	ASSERT(un != NULL);
29060 
29061 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
29062 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
29063 			continue;
29064 		}
29065 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
29066 			break;
29067 		}
29068 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
29069 			break;
29070 		}
29071 	}
29072 
29073 	return (wmp);
29074 }
29075 
29076 
29077 /*
29078  *    Function: sd_free_inlist_wmap()
29079  *
29080  * Description: Unlink and free a write map struct.
29081  *
29082  *   Arguments: un      - sd_lun structure for the device.
29083  *		wmp	- sd_w_map which needs to be unlinked.
29084  */
29085 
29086 static void
29087 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
29088 {
29089 	ASSERT(un != NULL);
29090 
29091 	if (un->un_wm == wmp) {
29092 		un->un_wm = wmp->wm_next;
29093 	} else {
29094 		wmp->wm_prev->wm_next = wmp->wm_next;
29095 	}
29096 
29097 	if (wmp->wm_next) {
29098 		wmp->wm_next->wm_prev = wmp->wm_prev;
29099 	}
29100 
29101 	wmp->wm_next = wmp->wm_prev = NULL;
29102 
29103 	kmem_cache_free(un->un_wm_cache, wmp);
29104 }
29105 
29106 
29107 /*
29108  *    Function: sd_range_unlock()
29109  *
29110  * Description: Unlock the range locked by wm.
29111  *		Free write map if nobody else is waiting on it.
29112  *
29113  *   Arguments: un      - sd_lun structure for the device.
29114  *              wmp     - sd_w_map which needs to be unlinked.
29115  */
29116 
29117 static void
29118 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
29119 {
29120 	ASSERT(un != NULL);
29121 	ASSERT(wm != NULL);
29122 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29123 
29124 	mutex_enter(SD_MUTEX(un));
29125 
29126 	if (wm->wm_flags & SD_WTYPE_RMW) {
29127 		un->un_rmw_count--;
29128 	}
29129 
29130 	if (wm->wm_wanted_count) {
29131 		wm->wm_flags = 0;
29132 		/*
29133 		 * Broadcast that the wmap is available now.
29134 		 */
29135 		cv_broadcast(&wm->wm_avail);
29136 	} else {
29137 		/*
29138 		 * If no one is waiting on the map, it should be free'ed.
29139 		 */
29140 		sd_free_inlist_wmap(un, wm);
29141 	}
29142 
29143 	mutex_exit(SD_MUTEX(un));
29144 }
29145 
29146 
29147 /*
29148  *    Function: sd_read_modify_write_task
29149  *
29150  * Description: Called from a taskq thread to initiate the write phase of
29151  *		a read-modify-write request.  This is used for targets where
29152  *		un->un_sys_blocksize != un->un_tgt_blocksize.
29153  *
29154  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
29155  *
29156  *     Context: Called under taskq thread context.
29157  */
29158 
29159 static void
29160 sd_read_modify_write_task(void *arg)
29161 {
29162 	struct sd_mapblocksize_info	*bsp;
29163 	struct buf	*bp;
29164 	struct sd_xbuf	*xp;
29165 	struct sd_lun	*un;
29166 
29167 	bp = arg;	/* The bp is given in arg */
29168 	ASSERT(bp != NULL);
29169 
29170 	/* Get the pointer to the layer-private data struct */
29171 	xp = SD_GET_XBUF(bp);
29172 	ASSERT(xp != NULL);
29173 	bsp = xp->xb_private;
29174 	ASSERT(bsp != NULL);
29175 
29176 	un = SD_GET_UN(bp);
29177 	ASSERT(un != NULL);
29178 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29179 
29180 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29181 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
29182 
29183 	/*
29184 	 * This is the write phase of a read-modify-write request, called
29185 	 * under the context of a taskq thread in response to the completion
29186 	 * of the read portion of the rmw request completing under interrupt
29187 	 * context. The write request must be sent from here down the iostart
29188 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
29189 	 * we use the layer index saved in the layer-private data area.
29190 	 */
29191 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
29192 
29193 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29194 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
29195 }
29196 
29197 
29198 /*
29199  *    Function: sddump_do_read_of_rmw()
29200  *
29201  * Description: This routine will be called from sddump, If sddump is called
29202  *		with an I/O which not aligned on device blocksize boundary
29203  *		then the write has to be converted to read-modify-write.
29204  *		Do the read part here in order to keep sddump simple.
29205  *		Note - That the sd_mutex is held across the call to this
29206  *		routine.
29207  *
29208  *   Arguments: un	- sd_lun
29209  *		blkno	- block number in terms of media block size.
29210  *		nblk	- number of blocks.
29211  *		bpp	- pointer to pointer to the buf structure. On return
29212  *			from this function, *bpp points to the valid buffer
29213  *			to which the write has to be done.
29214  *
29215  * Return Code: 0 for success or errno-type return code
29216  */
29217 
29218 static int
29219 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
29220 	struct buf **bpp)
29221 {
29222 	int err;
29223 	int i;
29224 	int rval;
29225 	struct buf *bp;
29226 	struct scsi_pkt *pkt = NULL;
29227 	uint32_t target_blocksize;
29228 
29229 	ASSERT(un != NULL);
29230 	ASSERT(mutex_owned(SD_MUTEX(un)));
29231 
29232 	target_blocksize = un->un_tgt_blocksize;
29233 
29234 	mutex_exit(SD_MUTEX(un));
29235 
29236 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
29237 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
29238 	if (bp == NULL) {
29239 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29240 		    "no resources for dumping; giving up");
29241 		err = ENOMEM;
29242 		goto done;
29243 	}
29244 
29245 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
29246 	    blkno, nblk);
29247 	if (rval != 0) {
29248 		scsi_free_consistent_buf(bp);
29249 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29250 		    "no resources for dumping; giving up");
29251 		err = ENOMEM;
29252 		goto done;
29253 	}
29254 
29255 	pkt->pkt_flags |= FLAG_NOINTR;
29256 
29257 	err = EIO;
29258 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
29259 
29260 		/*
29261 		 * Scsi_poll returns 0 (success) if the command completes and
29262 		 * the status block is STATUS_GOOD.  We should only check
29263 		 * errors if this condition is not true.  Even then we should
29264 		 * send our own request sense packet only if we have a check
29265 		 * condition and auto request sense has not been performed by
29266 		 * the hba.
29267 		 */
29268 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
29269 
29270 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
29271 			err = 0;
29272 			break;
29273 		}
29274 
29275 		/*
29276 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
29277 		 * no need to read RQS data.
29278 		 */
29279 		if (pkt->pkt_reason == CMD_DEV_GONE) {
29280 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29281 			    "Device is gone\n");
29282 			break;
29283 		}
29284 
29285 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
29286 			SD_INFO(SD_LOG_DUMP, un,
29287 			    "sddump: read failed with CHECK, try # %d\n", i);
29288 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
29289 				(void) sd_send_polled_RQS(un);
29290 			}
29291 
29292 			continue;
29293 		}
29294 
29295 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
29296 			int reset_retval = 0;
29297 
29298 			SD_INFO(SD_LOG_DUMP, un,
29299 			    "sddump: read failed with BUSY, try # %d\n", i);
29300 
29301 			if (un->un_f_lun_reset_enabled == TRUE) {
29302 				reset_retval = scsi_reset(SD_ADDRESS(un),
29303 				    RESET_LUN);
29304 			}
29305 			if (reset_retval == 0) {
29306 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
29307 			}
29308 			(void) sd_send_polled_RQS(un);
29309 
29310 		} else {
29311 			SD_INFO(SD_LOG_DUMP, un,
29312 			    "sddump: read failed with 0x%x, try # %d\n",
29313 			    SD_GET_PKT_STATUS(pkt), i);
29314 			mutex_enter(SD_MUTEX(un));
29315 			sd_reset_target(un, pkt);
29316 			mutex_exit(SD_MUTEX(un));
29317 		}
29318 
29319 		/*
29320 		 * If we are not getting anywhere with lun/target resets,
29321 		 * let's reset the bus.
29322 		 */
29323 		if (i > SD_NDUMP_RETRIES/2) {
29324 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
29325 			(void) sd_send_polled_RQS(un);
29326 		}
29327 
29328 	}
29329 	scsi_destroy_pkt(pkt);
29330 
29331 	if (err != 0) {
29332 		scsi_free_consistent_buf(bp);
29333 		*bpp = NULL;
29334 	} else {
29335 		*bpp = bp;
29336 	}
29337 
29338 done:
29339 	mutex_enter(SD_MUTEX(un));
29340 	return (err);
29341 }
29342 
29343 
29344 /*
29345  *    Function: sd_failfast_flushq
29346  *
29347  * Description: Take all bp's on the wait queue that have B_FAILFAST set
29348  *		in b_flags and move them onto the failfast queue, then kick
29349  *		off a thread to return all bp's on the failfast queue to
29350  *		their owners with an error set.
29351  *
29352  *   Arguments: un - pointer to the soft state struct for the instance.
29353  *
29354  *     Context: may execute in interrupt context.
29355  */
29356 
29357 static void
29358 sd_failfast_flushq(struct sd_lun *un)
29359 {
29360 	struct buf *bp;
29361 	struct buf *next_waitq_bp;
29362 	struct buf *prev_waitq_bp = NULL;
29363 
29364 	ASSERT(un != NULL);
29365 	ASSERT(mutex_owned(SD_MUTEX(un)));
29366 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
29367 	ASSERT(un->un_failfast_bp == NULL);
29368 
29369 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
29370 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
29371 
29372 	/*
29373 	 * Check if we should flush all bufs when entering failfast state, or
29374 	 * just those with B_FAILFAST set.
29375 	 */
29376 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
29377 		/*
29378 		 * Move *all* bp's on the wait queue to the failfast flush
29379 		 * queue, including those that do NOT have B_FAILFAST set.
29380 		 */
29381 		if (un->un_failfast_headp == NULL) {
29382 			ASSERT(un->un_failfast_tailp == NULL);
29383 			un->un_failfast_headp = un->un_waitq_headp;
29384 		} else {
29385 			ASSERT(un->un_failfast_tailp != NULL);
29386 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
29387 		}
29388 
29389 		un->un_failfast_tailp = un->un_waitq_tailp;
29390 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
29391 
29392 	} else {
29393 		/*
29394 		 * Go thru the wait queue, pick off all entries with
29395 		 * B_FAILFAST set, and move these onto the failfast queue.
29396 		 */
29397 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
29398 			/*
29399 			 * Save the pointer to the next bp on the wait queue,
29400 			 * so we get to it on the next iteration of this loop.
29401 			 */
29402 			next_waitq_bp = bp->av_forw;
29403 
29404 			/*
29405 			 * If this bp from the wait queue does NOT have
29406 			 * B_FAILFAST set, just move on to the next element
29407 			 * in the wait queue. Note, this is the only place
29408 			 * where it is correct to set prev_waitq_bp.
29409 			 */
29410 			if ((bp->b_flags & B_FAILFAST) == 0) {
29411 				prev_waitq_bp = bp;
29412 				continue;
29413 			}
29414 
29415 			/*
29416 			 * Remove the bp from the wait queue.
29417 			 */
29418 			if (bp == un->un_waitq_headp) {
29419 				/* The bp is the first element of the waitq. */
29420 				un->un_waitq_headp = next_waitq_bp;
29421 				if (un->un_waitq_headp == NULL) {
29422 					/* The wait queue is now empty */
29423 					un->un_waitq_tailp = NULL;
29424 				}
29425 			} else {
29426 				/*
29427 				 * The bp is either somewhere in the middle
29428 				 * or at the end of the wait queue.
29429 				 */
29430 				ASSERT(un->un_waitq_headp != NULL);
29431 				ASSERT(prev_waitq_bp != NULL);
29432 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
29433 				    == 0);
29434 				if (bp == un->un_waitq_tailp) {
29435 					/* bp is the last entry on the waitq. */
29436 					ASSERT(next_waitq_bp == NULL);
29437 					un->un_waitq_tailp = prev_waitq_bp;
29438 				}
29439 				prev_waitq_bp->av_forw = next_waitq_bp;
29440 			}
29441 			bp->av_forw = NULL;
29442 
29443 			/*
29444 			 * Now put the bp onto the failfast queue.
29445 			 */
29446 			if (un->un_failfast_headp == NULL) {
29447 				/* failfast queue is currently empty */
29448 				ASSERT(un->un_failfast_tailp == NULL);
29449 				un->un_failfast_headp =
29450 				    un->un_failfast_tailp = bp;
29451 			} else {
29452 				/* Add the bp to the end of the failfast q */
29453 				ASSERT(un->un_failfast_tailp != NULL);
29454 				ASSERT(un->un_failfast_tailp->b_flags &
29455 				    B_FAILFAST);
29456 				un->un_failfast_tailp->av_forw = bp;
29457 				un->un_failfast_tailp = bp;
29458 			}
29459 		}
29460 	}
29461 
29462 	/*
29463 	 * Now return all bp's on the failfast queue to their owners.
29464 	 */
29465 	while ((bp = un->un_failfast_headp) != NULL) {
29466 
29467 		un->un_failfast_headp = bp->av_forw;
29468 		if (un->un_failfast_headp == NULL) {
29469 			un->un_failfast_tailp = NULL;
29470 		}
29471 
29472 		/*
29473 		 * We want to return the bp with a failure error code, but
29474 		 * we do not want a call to sd_start_cmds() to occur here,
29475 		 * so use sd_return_failed_command_no_restart() instead of
29476 		 * sd_return_failed_command().
29477 		 */
29478 		sd_return_failed_command_no_restart(un, bp, EIO);
29479 	}
29480 
29481 	/* Flush the xbuf queues if required. */
29482 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
29483 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
29484 	}
29485 
29486 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
29487 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
29488 }
29489 
29490 
29491 /*
29492  *    Function: sd_failfast_flushq_callback
29493  *
29494  * Description: Return TRUE if the given bp meets the criteria for failfast
29495  *		flushing. Used with ddi_xbuf_flushq(9F).
29496  *
29497  *   Arguments: bp - ptr to buf struct to be examined.
29498  *
29499  *     Context: Any
29500  */
29501 
29502 static int
29503 sd_failfast_flushq_callback(struct buf *bp)
29504 {
29505 	/*
29506 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
29507 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
29508 	 */
29509 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
29510 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
29511 }
29512 
29513 
29514 
29515 #if defined(__i386) || defined(__amd64)
29516 /*
29517  * Function: sd_setup_next_xfer
29518  *
29519  * Description: Prepare next I/O operation using DMA_PARTIAL
29520  *
29521  */
29522 
29523 static int
29524 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
29525     struct scsi_pkt *pkt, struct sd_xbuf *xp)
29526 {
29527 	ssize_t	num_blks_not_xfered;
29528 	daddr_t	strt_blk_num;
29529 	ssize_t	bytes_not_xfered;
29530 	int	rval;
29531 
29532 	ASSERT(pkt->pkt_resid == 0);
29533 
29534 	/*
29535 	 * Calculate next block number and amount to be transferred.
29536 	 *
29537 	 * How much data NOT transfered to the HBA yet.
29538 	 */
29539 	bytes_not_xfered = xp->xb_dma_resid;
29540 
29541 	/*
29542 	 * figure how many blocks NOT transfered to the HBA yet.
29543 	 */
29544 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
29545 
29546 	/*
29547 	 * set starting block number to the end of what WAS transfered.
29548 	 */
29549 	strt_blk_num = xp->xb_blkno +
29550 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
29551 
29552 	/*
29553 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
29554 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
29555 	 * the disk mutex here.
29556 	 */
29557 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
29558 	    strt_blk_num, num_blks_not_xfered);
29559 
29560 	if (rval == 0) {
29561 
29562 		/*
29563 		 * Success.
29564 		 *
29565 		 * Adjust things if there are still more blocks to be
29566 		 * transfered.
29567 		 */
29568 		xp->xb_dma_resid = pkt->pkt_resid;
29569 		pkt->pkt_resid = 0;
29570 
29571 		return (1);
29572 	}
29573 
29574 	/*
29575 	 * There's really only one possible return value from
29576 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
29577 	 * returns NULL.
29578 	 */
29579 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
29580 
29581 	bp->b_resid = bp->b_bcount;
29582 	bp->b_flags |= B_ERROR;
29583 
29584 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29585 	    "Error setting up next portion of DMA transfer\n");
29586 
29587 	return (0);
29588 }
29589 #endif
29590 
29591 /*
29592  * Note: The following sd_faultinjection_ioctl( ) routines implement
29593  * driver support for handling fault injection for error analysis
29594  * causing faults in multiple layers of the driver.
29595  *
29596  */
29597 
29598 #ifdef SD_FAULT_INJECTION
29599 static uint_t   sd_fault_injection_on = 0;
29600 
29601 /*
29602  *    Function: sd_faultinjection_ioctl()
29603  *
29604  * Description: This routine is the driver entry point for handling
29605  *              faultinjection ioctls to inject errors into the
29606  *              layer model
29607  *
29608  *   Arguments: cmd	- the ioctl cmd recieved
29609  *		arg	- the arguments from user and returns
29610  */
29611 
29612 static void
29613 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
29614 
29615 	uint_t i;
29616 	uint_t rval;
29617 
29618 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
29619 
29620 	mutex_enter(SD_MUTEX(un));
29621 
29622 	switch (cmd) {
29623 	case SDIOCRUN:
29624 		/* Allow pushed faults to be injected */
29625 		SD_INFO(SD_LOG_SDTEST, un,
29626 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
29627 
29628 		sd_fault_injection_on = 1;
29629 
29630 		SD_INFO(SD_LOG_IOERR, un,
29631 		    "sd_faultinjection_ioctl: run finished\n");
29632 		break;
29633 
29634 	case SDIOCSTART:
29635 		/* Start Injection Session */
29636 		SD_INFO(SD_LOG_SDTEST, un,
29637 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
29638 
29639 		sd_fault_injection_on = 0;
29640 		un->sd_injection_mask = 0xFFFFFFFF;
29641 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
29642 			un->sd_fi_fifo_pkt[i] = NULL;
29643 			un->sd_fi_fifo_xb[i] = NULL;
29644 			un->sd_fi_fifo_un[i] = NULL;
29645 			un->sd_fi_fifo_arq[i] = NULL;
29646 		}
29647 		un->sd_fi_fifo_start = 0;
29648 		un->sd_fi_fifo_end = 0;
29649 
29650 		mutex_enter(&(un->un_fi_mutex));
29651 		un->sd_fi_log[0] = '\0';
29652 		un->sd_fi_buf_len = 0;
29653 		mutex_exit(&(un->un_fi_mutex));
29654 
29655 		SD_INFO(SD_LOG_IOERR, un,
29656 		    "sd_faultinjection_ioctl: start finished\n");
29657 		break;
29658 
29659 	case SDIOCSTOP:
29660 		/* Stop Injection Session */
29661 		SD_INFO(SD_LOG_SDTEST, un,
29662 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
29663 		sd_fault_injection_on = 0;
29664 		un->sd_injection_mask = 0x0;
29665 
29666 		/* Empty stray or unuseds structs from fifo */
29667 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
29668 			if (un->sd_fi_fifo_pkt[i] != NULL) {
29669 				kmem_free(un->sd_fi_fifo_pkt[i],
29670 				    sizeof (struct sd_fi_pkt));
29671 			}
29672 			if (un->sd_fi_fifo_xb[i] != NULL) {
29673 				kmem_free(un->sd_fi_fifo_xb[i],
29674 				    sizeof (struct sd_fi_xb));
29675 			}
29676 			if (un->sd_fi_fifo_un[i] != NULL) {
29677 				kmem_free(un->sd_fi_fifo_un[i],
29678 				    sizeof (struct sd_fi_un));
29679 			}
29680 			if (un->sd_fi_fifo_arq[i] != NULL) {
29681 				kmem_free(un->sd_fi_fifo_arq[i],
29682 				    sizeof (struct sd_fi_arq));
29683 			}
29684 			un->sd_fi_fifo_pkt[i] = NULL;
29685 			un->sd_fi_fifo_un[i] = NULL;
29686 			un->sd_fi_fifo_xb[i] = NULL;
29687 			un->sd_fi_fifo_arq[i] = NULL;
29688 		}
29689 		un->sd_fi_fifo_start = 0;
29690 		un->sd_fi_fifo_end = 0;
29691 
29692 		SD_INFO(SD_LOG_IOERR, un,
29693 		    "sd_faultinjection_ioctl: stop finished\n");
29694 		break;
29695 
29696 	case SDIOCINSERTPKT:
29697 		/* Store a packet struct to be pushed onto fifo */
29698 		SD_INFO(SD_LOG_SDTEST, un,
29699 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
29700 
29701 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29702 
29703 		sd_fault_injection_on = 0;
29704 
29705 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
29706 		if (un->sd_fi_fifo_pkt[i] != NULL) {
29707 			kmem_free(un->sd_fi_fifo_pkt[i],
29708 			    sizeof (struct sd_fi_pkt));
29709 		}
29710 		if (arg != NULL) {
29711 			un->sd_fi_fifo_pkt[i] =
29712 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
29713 			if (un->sd_fi_fifo_pkt[i] == NULL) {
29714 				/* Alloc failed don't store anything */
29715 				break;
29716 			}
29717 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
29718 			    sizeof (struct sd_fi_pkt), 0);
29719 			if (rval == -1) {
29720 				kmem_free(un->sd_fi_fifo_pkt[i],
29721 				    sizeof (struct sd_fi_pkt));
29722 				un->sd_fi_fifo_pkt[i] = NULL;
29723 			}
29724 		} else {
29725 			SD_INFO(SD_LOG_IOERR, un,
29726 			    "sd_faultinjection_ioctl: pkt null\n");
29727 		}
29728 		break;
29729 
29730 	case SDIOCINSERTXB:
29731 		/* Store a xb struct to be pushed onto fifo */
29732 		SD_INFO(SD_LOG_SDTEST, un,
29733 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
29734 
29735 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29736 
29737 		sd_fault_injection_on = 0;
29738 
29739 		if (un->sd_fi_fifo_xb[i] != NULL) {
29740 			kmem_free(un->sd_fi_fifo_xb[i],
29741 			    sizeof (struct sd_fi_xb));
29742 			un->sd_fi_fifo_xb[i] = NULL;
29743 		}
29744 		if (arg != NULL) {
29745 			un->sd_fi_fifo_xb[i] =
29746 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
29747 			if (un->sd_fi_fifo_xb[i] == NULL) {
29748 				/* Alloc failed don't store anything */
29749 				break;
29750 			}
29751 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
29752 			    sizeof (struct sd_fi_xb), 0);
29753 
29754 			if (rval == -1) {
29755 				kmem_free(un->sd_fi_fifo_xb[i],
29756 				    sizeof (struct sd_fi_xb));
29757 				un->sd_fi_fifo_xb[i] = NULL;
29758 			}
29759 		} else {
29760 			SD_INFO(SD_LOG_IOERR, un,
29761 			    "sd_faultinjection_ioctl: xb null\n");
29762 		}
29763 		break;
29764 
29765 	case SDIOCINSERTUN:
29766 		/* Store a un struct to be pushed onto fifo */
29767 		SD_INFO(SD_LOG_SDTEST, un,
29768 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
29769 
29770 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29771 
29772 		sd_fault_injection_on = 0;
29773 
29774 		if (un->sd_fi_fifo_un[i] != NULL) {
29775 			kmem_free(un->sd_fi_fifo_un[i],
29776 			    sizeof (struct sd_fi_un));
29777 			un->sd_fi_fifo_un[i] = NULL;
29778 		}
29779 		if (arg != NULL) {
29780 			un->sd_fi_fifo_un[i] =
29781 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
29782 			if (un->sd_fi_fifo_un[i] == NULL) {
29783 				/* Alloc failed don't store anything */
29784 				break;
29785 			}
29786 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
29787 			    sizeof (struct sd_fi_un), 0);
29788 			if (rval == -1) {
29789 				kmem_free(un->sd_fi_fifo_un[i],
29790 				    sizeof (struct sd_fi_un));
29791 				un->sd_fi_fifo_un[i] = NULL;
29792 			}
29793 
29794 		} else {
29795 			SD_INFO(SD_LOG_IOERR, un,
29796 			    "sd_faultinjection_ioctl: un null\n");
29797 		}
29798 
29799 		break;
29800 
29801 	case SDIOCINSERTARQ:
29802 		/* Store a arq struct to be pushed onto fifo */
29803 		SD_INFO(SD_LOG_SDTEST, un,
29804 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
29805 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29806 
29807 		sd_fault_injection_on = 0;
29808 
29809 		if (un->sd_fi_fifo_arq[i] != NULL) {
29810 			kmem_free(un->sd_fi_fifo_arq[i],
29811 			    sizeof (struct sd_fi_arq));
29812 			un->sd_fi_fifo_arq[i] = NULL;
29813 		}
29814 		if (arg != NULL) {
29815 			un->sd_fi_fifo_arq[i] =
29816 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
29817 			if (un->sd_fi_fifo_arq[i] == NULL) {
29818 				/* Alloc failed don't store anything */
29819 				break;
29820 			}
29821 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
29822 			    sizeof (struct sd_fi_arq), 0);
29823 			if (rval == -1) {
29824 				kmem_free(un->sd_fi_fifo_arq[i],
29825 				    sizeof (struct sd_fi_arq));
29826 				un->sd_fi_fifo_arq[i] = NULL;
29827 			}
29828 
29829 		} else {
29830 			SD_INFO(SD_LOG_IOERR, un,
29831 			    "sd_faultinjection_ioctl: arq null\n");
29832 		}
29833 
29834 		break;
29835 
29836 	case SDIOCPUSH:
29837 		/* Push stored xb, pkt, un, and arq onto fifo */
29838 		sd_fault_injection_on = 0;
29839 
29840 		if (arg != NULL) {
29841 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
29842 			if (rval != -1 &&
29843 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
29844 				un->sd_fi_fifo_end += i;
29845 			}
29846 		} else {
29847 			SD_INFO(SD_LOG_IOERR, un,
29848 			    "sd_faultinjection_ioctl: push arg null\n");
29849 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
29850 				un->sd_fi_fifo_end++;
29851 			}
29852 		}
29853 		SD_INFO(SD_LOG_IOERR, un,
29854 		    "sd_faultinjection_ioctl: push to end=%d\n",
29855 		    un->sd_fi_fifo_end);
29856 		break;
29857 
29858 	case SDIOCRETRIEVE:
29859 		/* Return buffer of log from Injection session */
29860 		SD_INFO(SD_LOG_SDTEST, un,
29861 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
29862 
29863 		sd_fault_injection_on = 0;
29864 
29865 		mutex_enter(&(un->un_fi_mutex));
29866 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
29867 		    un->sd_fi_buf_len+1, 0);
29868 		mutex_exit(&(un->un_fi_mutex));
29869 
29870 		if (rval == -1) {
29871 			/*
29872 			 * arg is possibly invalid setting
29873 			 * it to NULL for return
29874 			 */
29875 			arg = NULL;
29876 		}
29877 		break;
29878 	}
29879 
29880 	mutex_exit(SD_MUTEX(un));
29881 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
29882 			    " exit\n");
29883 }
29884 
29885 
29886 /*
29887  *    Function: sd_injection_log()
29888  *
29889  * Description: This routine adds buff to the already existing injection log
29890  *              for retrieval via faultinjection_ioctl for use in fault
29891  *              detection and recovery
29892  *
29893  *   Arguments: buf - the string to add to the log
29894  */
29895 
29896 static void
29897 sd_injection_log(char *buf, struct sd_lun *un)
29898 {
29899 	uint_t len;
29900 
29901 	ASSERT(un != NULL);
29902 	ASSERT(buf != NULL);
29903 
29904 	mutex_enter(&(un->un_fi_mutex));
29905 
29906 	len = min(strlen(buf), 255);
29907 	/* Add logged value to Injection log to be returned later */
29908 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
29909 		uint_t	offset = strlen((char *)un->sd_fi_log);
29910 		char *destp = (char *)un->sd_fi_log + offset;
29911 		int i;
29912 		for (i = 0; i < len; i++) {
29913 			*destp++ = *buf++;
29914 		}
29915 		un->sd_fi_buf_len += len;
29916 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
29917 	}
29918 
29919 	mutex_exit(&(un->un_fi_mutex));
29920 }
29921 
29922 
29923 /*
29924  *    Function: sd_faultinjection()
29925  *
29926  * Description: This routine takes the pkt and changes its
29927  *		content based on error injection scenerio.
29928  *
29929  *   Arguments: pktp	- packet to be changed
29930  */
29931 
29932 static void
29933 sd_faultinjection(struct scsi_pkt *pktp)
29934 {
29935 	uint_t i;
29936 	struct sd_fi_pkt *fi_pkt;
29937 	struct sd_fi_xb *fi_xb;
29938 	struct sd_fi_un *fi_un;
29939 	struct sd_fi_arq *fi_arq;
29940 	struct buf *bp;
29941 	struct sd_xbuf *xb;
29942 	struct sd_lun *un;
29943 
29944 	ASSERT(pktp != NULL);
29945 
29946 	/* pull bp xb and un from pktp */
29947 	bp = (struct buf *)pktp->pkt_private;
29948 	xb = SD_GET_XBUF(bp);
29949 	un = SD_GET_UN(bp);
29950 
29951 	ASSERT(un != NULL);
29952 
29953 	mutex_enter(SD_MUTEX(un));
29954 
29955 	SD_TRACE(SD_LOG_SDTEST, un,
29956 	    "sd_faultinjection: entry Injection from sdintr\n");
29957 
29958 	/* if injection is off return */
29959 	if (sd_fault_injection_on == 0 ||
29960 		un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
29961 		mutex_exit(SD_MUTEX(un));
29962 		return;
29963 	}
29964 
29965 
29966 	/* take next set off fifo */
29967 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
29968 
29969 	fi_pkt = un->sd_fi_fifo_pkt[i];
29970 	fi_xb = un->sd_fi_fifo_xb[i];
29971 	fi_un = un->sd_fi_fifo_un[i];
29972 	fi_arq = un->sd_fi_fifo_arq[i];
29973 
29974 
29975 	/* set variables accordingly */
29976 	/* set pkt if it was on fifo */
29977 	if (fi_pkt != NULL) {
29978 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
29979 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
29980 		SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
29981 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
29982 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
29983 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
29984 
29985 	}
29986 
29987 	/* set xb if it was on fifo */
29988 	if (fi_xb != NULL) {
29989 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
29990 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
29991 		SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
29992 		SD_CONDSET(xb, xb, xb_victim_retry_count,
29993 		    "xb_victim_retry_count");
29994 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
29995 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
29996 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
29997 
29998 		/* copy in block data from sense */
29999 		if (fi_xb->xb_sense_data[0] != -1) {
30000 			bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
30001 			    SENSE_LENGTH);
30002 		}
30003 
30004 		/* copy in extended sense codes */
30005 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_code,
30006 		    "es_code");
30007 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_key,
30008 		    "es_key");
30009 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_add_code,
30010 		    "es_add_code");
30011 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb,
30012 		    es_qual_code, "es_qual_code");
30013 	}
30014 
30015 	/* set un if it was on fifo */
30016 	if (fi_un != NULL) {
30017 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
30018 		SD_CONDSET(un, un, un_ctype, "un_ctype");
30019 		SD_CONDSET(un, un, un_reset_retry_count,
30020 		    "un_reset_retry_count");
30021 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
30022 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
30023 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
30024 		SD_CONDSET(un, un, un_f_geometry_is_valid,
30025 		    "un_f_geometry_is_valid");
30026 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
30027 		    "un_f_allow_bus_device_reset");
30028 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
30029 
30030 	}
30031 
30032 	/* copy in auto request sense if it was on fifo */
30033 	if (fi_arq != NULL) {
30034 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
30035 	}
30036 
30037 	/* free structs */
30038 	if (un->sd_fi_fifo_pkt[i] != NULL) {
30039 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
30040 	}
30041 	if (un->sd_fi_fifo_xb[i] != NULL) {
30042 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
30043 	}
30044 	if (un->sd_fi_fifo_un[i] != NULL) {
30045 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
30046 	}
30047 	if (un->sd_fi_fifo_arq[i] != NULL) {
30048 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
30049 	}
30050 
30051 	/*
30052 	 * kmem_free does not gurantee to set to NULL
30053 	 * since we uses these to determine if we set
30054 	 * values or not lets confirm they are always
30055 	 * NULL after free
30056 	 */
30057 	un->sd_fi_fifo_pkt[i] = NULL;
30058 	un->sd_fi_fifo_un[i] = NULL;
30059 	un->sd_fi_fifo_xb[i] = NULL;
30060 	un->sd_fi_fifo_arq[i] = NULL;
30061 
30062 	un->sd_fi_fifo_start++;
30063 
30064 	mutex_exit(SD_MUTEX(un));
30065 
30066 	SD_TRACE(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
30067 }
30068 
30069 #endif /* SD_FAULT_INJECTION */
30070