xref: /titanic_52/usr/src/uts/common/io/scsi/targets/sd.c (revision 65d61b8c5bb0da98e137e8d74245d8f53d690237)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * SCSI disk target driver.
30  */
31 #include <sys/scsi/scsi.h>
32 #include <sys/dkbad.h>
33 #include <sys/dklabel.h>
34 #include <sys/dkio.h>
35 #include <sys/fdio.h>
36 #include <sys/cdio.h>
37 #include <sys/mhd.h>
38 #include <sys/vtoc.h>
39 #include <sys/dktp/fdisk.h>
40 #include <sys/kstat.h>
41 #include <sys/vtrace.h>
42 #include <sys/note.h>
43 #include <sys/thread.h>
44 #include <sys/proc.h>
45 #include <sys/efi_partition.h>
46 #include <sys/var.h>
47 #include <sys/aio_req.h>
48 
49 #ifdef __lock_lint
50 #define	_LP64
51 #define	__amd64
52 #endif
53 
54 #if (defined(__fibre))
55 /* Note: is there a leadville version of the following? */
56 #include <sys/fc4/fcal_linkapp.h>
57 #endif
58 #include <sys/taskq.h>
59 #include <sys/uuid.h>
60 #include <sys/byteorder.h>
61 #include <sys/sdt.h>
62 
63 #include "sd_xbuf.h"
64 
65 #include <sys/scsi/targets/sddef.h>
66 #include <sys/cmlb.h>
67 
68 
69 /*
70  * Loadable module info.
71  */
72 #if (defined(__fibre))
73 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver %I%"
74 char _depends_on[]	= "misc/scsi misc/cmlb drv/fcp";
75 #else
76 #define	SD_MODULE_NAME	"SCSI Disk Driver %I%"
77 char _depends_on[]	= "misc/scsi misc/cmlb";
78 #endif
79 
80 /*
81  * Define the interconnect type, to allow the driver to distinguish
82  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
83  *
84  * This is really for backward compatability. In the future, the driver
85  * should actually check the "interconnect-type" property as reported by
86  * the HBA; however at present this property is not defined by all HBAs,
87  * so we will use this #define (1) to permit the driver to run in
88  * backward-compatability mode; and (2) to print a notification message
89  * if an FC HBA does not support the "interconnect-type" property.  The
90  * behavior of the driver will be to assume parallel SCSI behaviors unless
91  * the "interconnect-type" property is defined by the HBA **AND** has a
92  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
93  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
94  * Channel behaviors (as per the old ssd).  (Note that the
95  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
96  * will result in the driver assuming parallel SCSI behaviors.)
97  *
98  * (see common/sys/scsi/impl/services.h)
99  *
100  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
101  * since some FC HBAs may already support that, and there is some code in
102  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
103  * default would confuse that code, and besides things should work fine
104  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
105  * "interconnect_type" property.
106  *
107  */
108 #if (defined(__fibre))
109 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
110 #else
111 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
112 #endif
113 
114 /*
115  * The name of the driver, established from the module name in _init.
116  */
117 static	char *sd_label			= NULL;
118 
119 /*
120  * Driver name is unfortunately prefixed on some driver.conf properties.
121  */
122 #if (defined(__fibre))
123 #define	sd_max_xfer_size		ssd_max_xfer_size
124 #define	sd_config_list			ssd_config_list
125 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
126 static	char *sd_config_list		= "ssd-config-list";
127 #else
128 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
129 static	char *sd_config_list		= "sd-config-list";
130 #endif
131 
132 /*
133  * Driver global variables
134  */
135 
136 #if (defined(__fibre))
137 /*
138  * These #defines are to avoid namespace collisions that occur because this
139  * code is currently used to compile two seperate driver modules: sd and ssd.
140  * All global variables need to be treated this way (even if declared static)
141  * in order to allow the debugger to resolve the names properly.
142  * It is anticipated that in the near future the ssd module will be obsoleted,
143  * at which time this namespace issue should go away.
144  */
145 #define	sd_state			ssd_state
146 #define	sd_io_time			ssd_io_time
147 #define	sd_failfast_enable		ssd_failfast_enable
148 #define	sd_ua_retry_count		ssd_ua_retry_count
149 #define	sd_report_pfa			ssd_report_pfa
150 #define	sd_max_throttle			ssd_max_throttle
151 #define	sd_min_throttle			ssd_min_throttle
152 #define	sd_rot_delay			ssd_rot_delay
153 
154 #define	sd_retry_on_reservation_conflict	\
155 					ssd_retry_on_reservation_conflict
156 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
157 #define	sd_resv_conflict_name		ssd_resv_conflict_name
158 
159 #define	sd_component_mask		ssd_component_mask
160 #define	sd_level_mask			ssd_level_mask
161 #define	sd_debug_un			ssd_debug_un
162 #define	sd_error_level			ssd_error_level
163 
164 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
165 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
166 
167 #define	sd_tr				ssd_tr
168 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
169 #define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
170 #define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
171 #define	sd_check_media_time		ssd_check_media_time
172 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
173 #define	sd_label_mutex			ssd_label_mutex
174 #define	sd_detach_mutex			ssd_detach_mutex
175 #define	sd_log_buf			ssd_log_buf
176 #define	sd_log_mutex			ssd_log_mutex
177 
178 #define	sd_disk_table			ssd_disk_table
179 #define	sd_disk_table_size		ssd_disk_table_size
180 #define	sd_sense_mutex			ssd_sense_mutex
181 #define	sd_cdbtab			ssd_cdbtab
182 
183 #define	sd_cb_ops			ssd_cb_ops
184 #define	sd_ops				ssd_ops
185 #define	sd_additional_codes		ssd_additional_codes
186 #define	sd_tgops			ssd_tgops
187 
188 #define	sd_minor_data			ssd_minor_data
189 #define	sd_minor_data_efi		ssd_minor_data_efi
190 
191 #define	sd_tq				ssd_tq
192 #define	sd_wmr_tq			ssd_wmr_tq
193 #define	sd_taskq_name			ssd_taskq_name
194 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
195 #define	sd_taskq_minalloc		ssd_taskq_minalloc
196 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
197 
198 #define	sd_dump_format_string		ssd_dump_format_string
199 
200 #define	sd_iostart_chain		ssd_iostart_chain
201 #define	sd_iodone_chain			ssd_iodone_chain
202 
203 #define	sd_pm_idletime			ssd_pm_idletime
204 
205 #define	sd_force_pm_supported		ssd_force_pm_supported
206 
207 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
208 
209 #endif
210 
211 
212 #ifdef	SDDEBUG
213 int	sd_force_pm_supported		= 0;
214 #endif	/* SDDEBUG */
215 
216 void *sd_state				= NULL;
217 int sd_io_time				= SD_IO_TIME;
218 int sd_failfast_enable			= 1;
219 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
220 int sd_report_pfa			= 1;
221 int sd_max_throttle			= SD_MAX_THROTTLE;
222 int sd_min_throttle			= SD_MIN_THROTTLE;
223 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
224 int sd_qfull_throttle_enable		= TRUE;
225 
226 int sd_retry_on_reservation_conflict	= 1;
227 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
228 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
229 
230 static int sd_dtype_optical_bind	= -1;
231 
232 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
233 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
234 
235 /*
236  * Global data for debug logging. To enable debug printing, sd_component_mask
237  * and sd_level_mask should be set to the desired bit patterns as outlined in
238  * sddef.h.
239  */
240 uint_t	sd_component_mask		= 0x0;
241 uint_t	sd_level_mask			= 0x0;
242 struct	sd_lun *sd_debug_un		= NULL;
243 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
244 
245 /* Note: these may go away in the future... */
246 static uint32_t	sd_xbuf_active_limit	= 512;
247 static uint32_t sd_xbuf_reserve_limit	= 16;
248 
249 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
250 
251 /*
252  * Timer value used to reset the throttle after it has been reduced
253  * (typically in response to TRAN_BUSY or STATUS_QFULL)
254  */
255 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
256 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
257 
258 /*
259  * Interval value associated with the media change scsi watch.
260  */
261 static int sd_check_media_time		= 3000000;
262 
263 /*
264  * Wait value used for in progress operations during a DDI_SUSPEND
265  */
266 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
267 
268 /*
269  * sd_label_mutex protects a static buffer used in the disk label
270  * component of the driver
271  */
272 static kmutex_t sd_label_mutex;
273 
274 /*
275  * sd_detach_mutex protects un_layer_count, un_detach_count, and
276  * un_opens_in_progress in the sd_lun structure.
277  */
278 static kmutex_t sd_detach_mutex;
279 
280 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
281 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
282 
283 /*
284  * Global buffer and mutex for debug logging
285  */
286 static char	sd_log_buf[1024];
287 static kmutex_t	sd_log_mutex;
288 
289 /*
290  * Structs and globals for recording attached lun information.
291  * This maintains a chain. Each node in the chain represents a SCSI controller.
292  * The structure records the number of luns attached to each target connected
293  * with the controller.
294  * For parallel scsi device only.
295  */
296 struct sd_scsi_hba_tgt_lun {
297 	struct sd_scsi_hba_tgt_lun	*next;
298 	dev_info_t			*pdip;
299 	int				nlun[NTARGETS_WIDE];
300 };
301 
302 /*
303  * Flag to indicate the lun is attached or detached
304  */
305 #define	SD_SCSI_LUN_ATTACH	0
306 #define	SD_SCSI_LUN_DETACH	1
307 
308 static kmutex_t	sd_scsi_target_lun_mutex;
309 static struct sd_scsi_hba_tgt_lun	*sd_scsi_target_lun_head = NULL;
310 
311 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
312     sd_scsi_hba_tgt_lun::next sd_scsi_hba_tgt_lun::pdip))
313 
314 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
315     sd_scsi_target_lun_head))
316 
317 /*
318  * "Smart" Probe Caching structs, globals, #defines, etc.
319  * For parallel scsi and non-self-identify device only.
320  */
321 
322 /*
323  * The following resources and routines are implemented to support
324  * "smart" probing, which caches the scsi_probe() results in an array,
325  * in order to help avoid long probe times.
326  */
327 struct sd_scsi_probe_cache {
328 	struct	sd_scsi_probe_cache	*next;
329 	dev_info_t	*pdip;
330 	int		cache[NTARGETS_WIDE];
331 };
332 
333 static kmutex_t	sd_scsi_probe_cache_mutex;
334 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
335 
336 /*
337  * Really we only need protection on the head of the linked list, but
338  * better safe than sorry.
339  */
340 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
341     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
342 
343 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
344     sd_scsi_probe_cache_head))
345 
346 
347 /*
348  * Vendor specific data name property declarations
349  */
350 
351 #if defined(__fibre) || defined(__i386) ||defined(__amd64)
352 
353 static sd_tunables seagate_properties = {
354 	SEAGATE_THROTTLE_VALUE,
355 	0,
356 	0,
357 	0,
358 	0,
359 	0,
360 	0,
361 	0,
362 	0
363 };
364 
365 
366 static sd_tunables fujitsu_properties = {
367 	FUJITSU_THROTTLE_VALUE,
368 	0,
369 	0,
370 	0,
371 	0,
372 	0,
373 	0,
374 	0,
375 	0
376 };
377 
378 static sd_tunables ibm_properties = {
379 	IBM_THROTTLE_VALUE,
380 	0,
381 	0,
382 	0,
383 	0,
384 	0,
385 	0,
386 	0,
387 	0
388 };
389 
390 static sd_tunables purple_properties = {
391 	PURPLE_THROTTLE_VALUE,
392 	0,
393 	0,
394 	PURPLE_BUSY_RETRIES,
395 	PURPLE_RESET_RETRY_COUNT,
396 	PURPLE_RESERVE_RELEASE_TIME,
397 	0,
398 	0,
399 	0
400 };
401 
402 static sd_tunables sve_properties = {
403 	SVE_THROTTLE_VALUE,
404 	0,
405 	0,
406 	SVE_BUSY_RETRIES,
407 	SVE_RESET_RETRY_COUNT,
408 	SVE_RESERVE_RELEASE_TIME,
409 	SVE_MIN_THROTTLE_VALUE,
410 	SVE_DISKSORT_DISABLED_FLAG,
411 	0
412 };
413 
414 static sd_tunables maserati_properties = {
415 	0,
416 	0,
417 	0,
418 	0,
419 	0,
420 	0,
421 	0,
422 	MASERATI_DISKSORT_DISABLED_FLAG,
423 	MASERATI_LUN_RESET_ENABLED_FLAG
424 };
425 
426 static sd_tunables pirus_properties = {
427 	PIRUS_THROTTLE_VALUE,
428 	0,
429 	PIRUS_NRR_COUNT,
430 	PIRUS_BUSY_RETRIES,
431 	PIRUS_RESET_RETRY_COUNT,
432 	0,
433 	PIRUS_MIN_THROTTLE_VALUE,
434 	PIRUS_DISKSORT_DISABLED_FLAG,
435 	PIRUS_LUN_RESET_ENABLED_FLAG
436 };
437 
438 #endif
439 
440 #if (defined(__sparc) && !defined(__fibre)) || \
441 	(defined(__i386) || defined(__amd64))
442 
443 
444 static sd_tunables elite_properties = {
445 	ELITE_THROTTLE_VALUE,
446 	0,
447 	0,
448 	0,
449 	0,
450 	0,
451 	0,
452 	0,
453 	0
454 };
455 
456 static sd_tunables st31200n_properties = {
457 	ST31200N_THROTTLE_VALUE,
458 	0,
459 	0,
460 	0,
461 	0,
462 	0,
463 	0,
464 	0,
465 	0
466 };
467 
468 #endif /* Fibre or not */
469 
470 static sd_tunables lsi_properties_scsi = {
471 	LSI_THROTTLE_VALUE,
472 	0,
473 	LSI_NOTREADY_RETRIES,
474 	0,
475 	0,
476 	0,
477 	0,
478 	0,
479 	0
480 };
481 
482 static sd_tunables symbios_properties = {
483 	SYMBIOS_THROTTLE_VALUE,
484 	0,
485 	SYMBIOS_NOTREADY_RETRIES,
486 	0,
487 	0,
488 	0,
489 	0,
490 	0,
491 	0
492 };
493 
494 static sd_tunables lsi_properties = {
495 	0,
496 	0,
497 	LSI_NOTREADY_RETRIES,
498 	0,
499 	0,
500 	0,
501 	0,
502 	0,
503 	0
504 };
505 
506 static sd_tunables lsi_oem_properties = {
507 	0,
508 	0,
509 	LSI_OEM_NOTREADY_RETRIES,
510 	0,
511 	0,
512 	0,
513 	0,
514 	0,
515 	0
516 };
517 
518 
519 
520 #if (defined(SD_PROP_TST))
521 
522 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
523 #define	SD_TST_THROTTLE_VAL	16
524 #define	SD_TST_NOTREADY_VAL	12
525 #define	SD_TST_BUSY_VAL		60
526 #define	SD_TST_RST_RETRY_VAL	36
527 #define	SD_TST_RSV_REL_TIME	60
528 
529 static sd_tunables tst_properties = {
530 	SD_TST_THROTTLE_VAL,
531 	SD_TST_CTYPE_VAL,
532 	SD_TST_NOTREADY_VAL,
533 	SD_TST_BUSY_VAL,
534 	SD_TST_RST_RETRY_VAL,
535 	SD_TST_RSV_REL_TIME,
536 	0,
537 	0,
538 	0
539 };
540 #endif
541 
542 /* This is similiar to the ANSI toupper implementation */
543 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
544 
545 /*
546  * Static Driver Configuration Table
547  *
548  * This is the table of disks which need throttle adjustment (or, perhaps
549  * something else as defined by the flags at a future time.)  device_id
550  * is a string consisting of concatenated vid (vendor), pid (product/model)
551  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
552  * the parts of the string are as defined by the sizes in the scsi_inquiry
553  * structure.  Device type is searched as far as the device_id string is
554  * defined.  Flags defines which values are to be set in the driver from the
555  * properties list.
556  *
557  * Entries below which begin and end with a "*" are a special case.
558  * These do not have a specific vendor, and the string which follows
559  * can appear anywhere in the 16 byte PID portion of the inquiry data.
560  *
561  * Entries below which begin and end with a " " (blank) are a special
562  * case. The comparison function will treat multiple consecutive blanks
563  * as equivalent to a single blank. For example, this causes a
564  * sd_disk_table entry of " NEC CDROM " to match a device's id string
565  * of  "NEC       CDROM".
566  *
567  * Note: The MD21 controller type has been obsoleted.
568  *	 ST318202F is a Legacy device
569  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
570  *	 made with an FC connection. The entries here are a legacy.
571  */
572 static sd_disk_config_t sd_disk_table[] = {
573 #if defined(__fibre) || defined(__i386) || defined(__amd64)
574 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
575 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
576 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
577 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
578 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
579 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
580 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
581 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
582 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
583 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
584 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
585 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
586 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
587 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
588 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
589 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
590 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
591 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
592 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
593 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
594 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
595 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
596 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
597 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
598 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
599 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
600 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
601 	{ "IBM     1726-2xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
602 	{ "IBM     1726-4xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
603 	{ "IBM     1726-3xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
604 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
605 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
606 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
607 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
608 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
609 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
610 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
611 	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
612 	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
613 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
614 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
615 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
616 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
617 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
618 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
619 	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
620 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
621 	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
622 			SD_CONF_BSET_BSY_RETRY_COUNT|
623 			SD_CONF_BSET_RST_RETRIES|
624 			SD_CONF_BSET_RSV_REL_TIME,
625 		&purple_properties },
626 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
627 		SD_CONF_BSET_BSY_RETRY_COUNT|
628 		SD_CONF_BSET_RST_RETRIES|
629 		SD_CONF_BSET_RSV_REL_TIME|
630 		SD_CONF_BSET_MIN_THROTTLE|
631 		SD_CONF_BSET_DISKSORT_DISABLED,
632 		&sve_properties },
633 	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
634 			SD_CONF_BSET_BSY_RETRY_COUNT|
635 			SD_CONF_BSET_RST_RETRIES|
636 			SD_CONF_BSET_RSV_REL_TIME,
637 		&purple_properties },
638 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
639 		SD_CONF_BSET_LUN_RESET_ENABLED,
640 		&maserati_properties },
641 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
642 		SD_CONF_BSET_NRR_COUNT|
643 		SD_CONF_BSET_BSY_RETRY_COUNT|
644 		SD_CONF_BSET_RST_RETRIES|
645 		SD_CONF_BSET_MIN_THROTTLE|
646 		SD_CONF_BSET_DISKSORT_DISABLED|
647 		SD_CONF_BSET_LUN_RESET_ENABLED,
648 		&pirus_properties },
649 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
650 		SD_CONF_BSET_NRR_COUNT|
651 		SD_CONF_BSET_BSY_RETRY_COUNT|
652 		SD_CONF_BSET_RST_RETRIES|
653 		SD_CONF_BSET_MIN_THROTTLE|
654 		SD_CONF_BSET_DISKSORT_DISABLED|
655 		SD_CONF_BSET_LUN_RESET_ENABLED,
656 		&pirus_properties },
657 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
658 		SD_CONF_BSET_NRR_COUNT|
659 		SD_CONF_BSET_BSY_RETRY_COUNT|
660 		SD_CONF_BSET_RST_RETRIES|
661 		SD_CONF_BSET_MIN_THROTTLE|
662 		SD_CONF_BSET_DISKSORT_DISABLED|
663 		SD_CONF_BSET_LUN_RESET_ENABLED,
664 		&pirus_properties },
665 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
666 		SD_CONF_BSET_NRR_COUNT|
667 		SD_CONF_BSET_BSY_RETRY_COUNT|
668 		SD_CONF_BSET_RST_RETRIES|
669 		SD_CONF_BSET_MIN_THROTTLE|
670 		SD_CONF_BSET_DISKSORT_DISABLED|
671 		SD_CONF_BSET_LUN_RESET_ENABLED,
672 		&pirus_properties },
673 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
674 		SD_CONF_BSET_NRR_COUNT|
675 		SD_CONF_BSET_BSY_RETRY_COUNT|
676 		SD_CONF_BSET_RST_RETRIES|
677 		SD_CONF_BSET_MIN_THROTTLE|
678 		SD_CONF_BSET_DISKSORT_DISABLED|
679 		SD_CONF_BSET_LUN_RESET_ENABLED,
680 		&pirus_properties },
681 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
682 		SD_CONF_BSET_NRR_COUNT|
683 		SD_CONF_BSET_BSY_RETRY_COUNT|
684 		SD_CONF_BSET_RST_RETRIES|
685 		SD_CONF_BSET_MIN_THROTTLE|
686 		SD_CONF_BSET_DISKSORT_DISABLED|
687 		SD_CONF_BSET_LUN_RESET_ENABLED,
688 		&pirus_properties },
689 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
690 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
691 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
692 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
693 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
694 #endif /* fibre or NON-sparc platforms */
695 #if ((defined(__sparc) && !defined(__fibre)) ||\
696 	(defined(__i386) || defined(__amd64)))
697 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
698 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
699 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
700 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
701 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
702 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
703 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
704 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
705 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
706 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
707 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
708 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
709 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
710 	    &symbios_properties },
711 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
712 	    &lsi_properties_scsi },
713 #if defined(__i386) || defined(__amd64)
714 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
715 				    | SD_CONF_BSET_READSUB_BCD
716 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
717 				    | SD_CONF_BSET_NO_READ_HEADER
718 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
719 
720 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
721 				    | SD_CONF_BSET_READSUB_BCD
722 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
723 				    | SD_CONF_BSET_NO_READ_HEADER
724 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
725 #endif /* __i386 || __amd64 */
726 #endif /* sparc NON-fibre or NON-sparc platforms */
727 
728 #if (defined(SD_PROP_TST))
729 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
730 				| SD_CONF_BSET_CTYPE
731 				| SD_CONF_BSET_NRR_COUNT
732 				| SD_CONF_BSET_FAB_DEVID
733 				| SD_CONF_BSET_NOCACHE
734 				| SD_CONF_BSET_BSY_RETRY_COUNT
735 				| SD_CONF_BSET_PLAYMSF_BCD
736 				| SD_CONF_BSET_READSUB_BCD
737 				| SD_CONF_BSET_READ_TOC_TRK_BCD
738 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
739 				| SD_CONF_BSET_NO_READ_HEADER
740 				| SD_CONF_BSET_READ_CD_XD4
741 				| SD_CONF_BSET_RST_RETRIES
742 				| SD_CONF_BSET_RSV_REL_TIME
743 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
744 #endif
745 };
746 
747 static const int sd_disk_table_size =
748 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
749 
750 
751 
752 #define	SD_INTERCONNECT_PARALLEL	0
753 #define	SD_INTERCONNECT_FABRIC		1
754 #define	SD_INTERCONNECT_FIBRE		2
755 #define	SD_INTERCONNECT_SSA		3
756 #define	SD_INTERCONNECT_SATA		4
757 #define	SD_IS_PARALLEL_SCSI(un)		\
758 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
759 #define	SD_IS_SERIAL(un)		\
760 	((un)->un_interconnect_type == SD_INTERCONNECT_SATA)
761 
762 /*
763  * Definitions used by device id registration routines
764  */
765 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
766 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
767 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
768 
769 static kmutex_t sd_sense_mutex = {0};
770 
771 /*
772  * Macros for updates of the driver state
773  */
774 #define	New_state(un, s)        \
775 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
776 #define	Restore_state(un)	\
777 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
778 
779 static struct sd_cdbinfo sd_cdbtab[] = {
780 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
781 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
782 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
783 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
784 };
785 
786 /*
787  * Specifies the number of seconds that must have elapsed since the last
788  * cmd. has completed for a device to be declared idle to the PM framework.
789  */
790 static int sd_pm_idletime = 1;
791 
792 /*
793  * Internal function prototypes
794  */
795 
796 #if (defined(__fibre))
797 /*
798  * These #defines are to avoid namespace collisions that occur because this
799  * code is currently used to compile two seperate driver modules: sd and ssd.
800  * All function names need to be treated this way (even if declared static)
801  * in order to allow the debugger to resolve the names properly.
802  * It is anticipated that in the near future the ssd module will be obsoleted,
803  * at which time this ugliness should go away.
804  */
805 #define	sd_log_trace			ssd_log_trace
806 #define	sd_log_info			ssd_log_info
807 #define	sd_log_err			ssd_log_err
808 #define	sdprobe				ssdprobe
809 #define	sdinfo				ssdinfo
810 #define	sd_prop_op			ssd_prop_op
811 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
812 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
813 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
814 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
815 #define	sd_scsi_target_lun_init		ssd_scsi_target_lun_init
816 #define	sd_scsi_target_lun_fini		ssd_scsi_target_lun_fini
817 #define	sd_scsi_get_target_lun_count	ssd_scsi_get_target_lun_count
818 #define	sd_scsi_update_lun_on_target	ssd_scsi_update_lun_on_target
819 #define	sd_spin_up_unit			ssd_spin_up_unit
820 #define	sd_enable_descr_sense		ssd_enable_descr_sense
821 #define	sd_reenable_dsense_task		ssd_reenable_dsense_task
822 #define	sd_set_mmc_caps			ssd_set_mmc_caps
823 #define	sd_read_unit_properties		ssd_read_unit_properties
824 #define	sd_process_sdconf_file		ssd_process_sdconf_file
825 #define	sd_process_sdconf_table		ssd_process_sdconf_table
826 #define	sd_sdconf_id_match		ssd_sdconf_id_match
827 #define	sd_blank_cmp			ssd_blank_cmp
828 #define	sd_chk_vers1_data		ssd_chk_vers1_data
829 #define	sd_set_vers1_properties		ssd_set_vers1_properties
830 
831 #define	sd_get_physical_geometry	ssd_get_physical_geometry
832 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
833 #define	sd_update_block_info		ssd_update_block_info
834 #define	sd_register_devid		ssd_register_devid
835 #define	sd_get_devid			ssd_get_devid
836 #define	sd_create_devid			ssd_create_devid
837 #define	sd_write_deviceid		ssd_write_deviceid
838 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
839 #define	sd_setup_pm			ssd_setup_pm
840 #define	sd_create_pm_components		ssd_create_pm_components
841 #define	sd_ddi_suspend			ssd_ddi_suspend
842 #define	sd_ddi_pm_suspend		ssd_ddi_pm_suspend
843 #define	sd_ddi_resume			ssd_ddi_resume
844 #define	sd_ddi_pm_resume		ssd_ddi_pm_resume
845 #define	sdpower				ssdpower
846 #define	sdattach			ssdattach
847 #define	sddetach			ssddetach
848 #define	sd_unit_attach			ssd_unit_attach
849 #define	sd_unit_detach			ssd_unit_detach
850 #define	sd_set_unit_attributes		ssd_set_unit_attributes
851 #define	sd_create_errstats		ssd_create_errstats
852 #define	sd_set_errstats			ssd_set_errstats
853 #define	sd_set_pstats			ssd_set_pstats
854 #define	sddump				ssddump
855 #define	sd_scsi_poll			ssd_scsi_poll
856 #define	sd_send_polled_RQS		ssd_send_polled_RQS
857 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
858 #define	sd_init_event_callbacks		ssd_init_event_callbacks
859 #define	sd_event_callback		ssd_event_callback
860 #define	sd_cache_control		ssd_cache_control
861 #define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
862 #define	sd_make_device			ssd_make_device
863 #define	sdopen				ssdopen
864 #define	sdclose				ssdclose
865 #define	sd_ready_and_valid		ssd_ready_and_valid
866 #define	sdmin				ssdmin
867 #define	sdread				ssdread
868 #define	sdwrite				ssdwrite
869 #define	sdaread				ssdaread
870 #define	sdawrite			ssdawrite
871 #define	sdstrategy			ssdstrategy
872 #define	sdioctl				ssdioctl
873 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
874 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
875 #define	sd_checksum_iostart		ssd_checksum_iostart
876 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
877 #define	sd_pm_iostart			ssd_pm_iostart
878 #define	sd_core_iostart			ssd_core_iostart
879 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
880 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
881 #define	sd_checksum_iodone		ssd_checksum_iodone
882 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
883 #define	sd_pm_iodone			ssd_pm_iodone
884 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
885 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
886 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
887 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
888 #define	sd_buf_iodone			ssd_buf_iodone
889 #define	sd_uscsi_strategy		ssd_uscsi_strategy
890 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
891 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
892 #define	sd_uscsi_iodone			ssd_uscsi_iodone
893 #define	sd_xbuf_strategy		ssd_xbuf_strategy
894 #define	sd_xbuf_init			ssd_xbuf_init
895 #define	sd_pm_entry			ssd_pm_entry
896 #define	sd_pm_exit			ssd_pm_exit
897 
898 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
899 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
900 
901 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
902 #define	sdintr				ssdintr
903 #define	sd_start_cmds			ssd_start_cmds
904 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
905 #define	sd_bioclone_alloc		ssd_bioclone_alloc
906 #define	sd_bioclone_free		ssd_bioclone_free
907 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
908 #define	sd_shadow_buf_free		ssd_shadow_buf_free
909 #define	sd_print_transport_rejected_message	\
910 					ssd_print_transport_rejected_message
911 #define	sd_retry_command		ssd_retry_command
912 #define	sd_set_retry_bp			ssd_set_retry_bp
913 #define	sd_send_request_sense_command	ssd_send_request_sense_command
914 #define	sd_start_retry_command		ssd_start_retry_command
915 #define	sd_start_direct_priority_command	\
916 					ssd_start_direct_priority_command
917 #define	sd_return_failed_command	ssd_return_failed_command
918 #define	sd_return_failed_command_no_restart	\
919 					ssd_return_failed_command_no_restart
920 #define	sd_return_command		ssd_return_command
921 #define	sd_sync_with_callback		ssd_sync_with_callback
922 #define	sdrunout			ssdrunout
923 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
924 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
925 #define	sd_reduce_throttle		ssd_reduce_throttle
926 #define	sd_restore_throttle		ssd_restore_throttle
927 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
928 #define	sd_init_cdb_limits		ssd_init_cdb_limits
929 #define	sd_pkt_status_good		ssd_pkt_status_good
930 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
931 #define	sd_pkt_status_busy		ssd_pkt_status_busy
932 #define	sd_pkt_status_reservation_conflict	\
933 					ssd_pkt_status_reservation_conflict
934 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
935 #define	sd_handle_request_sense		ssd_handle_request_sense
936 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
937 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
938 #define	sd_validate_sense_data		ssd_validate_sense_data
939 #define	sd_decode_sense			ssd_decode_sense
940 #define	sd_print_sense_msg		ssd_print_sense_msg
941 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
942 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
943 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
944 #define	sd_sense_key_medium_or_hardware_error	\
945 					ssd_sense_key_medium_or_hardware_error
946 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
947 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
948 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
949 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
950 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
951 #define	sd_sense_key_default		ssd_sense_key_default
952 #define	sd_print_retry_msg		ssd_print_retry_msg
953 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
954 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
955 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
956 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
957 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
958 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
959 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
960 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
961 #define	sd_pkt_reason_default		ssd_pkt_reason_default
962 #define	sd_reset_target			ssd_reset_target
963 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
964 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
965 #define	sd_taskq_create			ssd_taskq_create
966 #define	sd_taskq_delete			ssd_taskq_delete
967 #define	sd_media_change_task		ssd_media_change_task
968 #define	sd_handle_mchange		ssd_handle_mchange
969 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
970 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
971 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
972 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
973 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
974 					sd_send_scsi_feature_GET_CONFIGURATION
975 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
976 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
977 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
978 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
979 					ssd_send_scsi_PERSISTENT_RESERVE_IN
980 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
981 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
982 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
983 #define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
984 					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
985 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
986 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
987 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
988 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
989 #define	sd_alloc_rqs			ssd_alloc_rqs
990 #define	sd_free_rqs			ssd_free_rqs
991 #define	sd_dump_memory			ssd_dump_memory
992 #define	sd_get_media_info		ssd_get_media_info
993 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
994 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
995 #define	sd_setup_next_xfer		ssd_setup_next_xfer
996 #define	sd_dkio_get_temp		ssd_dkio_get_temp
997 #define	sd_check_mhd			ssd_check_mhd
998 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
999 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1000 #define	sd_sname			ssd_sname
1001 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1002 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1003 #define	sd_take_ownership		ssd_take_ownership
1004 #define	sd_reserve_release		ssd_reserve_release
1005 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1006 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1007 #define	sd_persistent_reservation_in_read_keys	\
1008 					ssd_persistent_reservation_in_read_keys
1009 #define	sd_persistent_reservation_in_read_resv	\
1010 					ssd_persistent_reservation_in_read_resv
1011 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1012 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1013 #define	sd_mhdioc_release		ssd_mhdioc_release
1014 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1015 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1016 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1017 #define	sr_change_blkmode		ssr_change_blkmode
1018 #define	sr_change_speed			ssr_change_speed
1019 #define	sr_atapi_change_speed		ssr_atapi_change_speed
1020 #define	sr_pause_resume			ssr_pause_resume
1021 #define	sr_play_msf			ssr_play_msf
1022 #define	sr_play_trkind			ssr_play_trkind
1023 #define	sr_read_all_subcodes		ssr_read_all_subcodes
1024 #define	sr_read_subchannel		ssr_read_subchannel
1025 #define	sr_read_tocentry		ssr_read_tocentry
1026 #define	sr_read_tochdr			ssr_read_tochdr
1027 #define	sr_read_cdda			ssr_read_cdda
1028 #define	sr_read_cdxa			ssr_read_cdxa
1029 #define	sr_read_mode1			ssr_read_mode1
1030 #define	sr_read_mode2			ssr_read_mode2
1031 #define	sr_read_cd_mode2		ssr_read_cd_mode2
1032 #define	sr_sector_mode			ssr_sector_mode
1033 #define	sr_eject			ssr_eject
1034 #define	sr_ejected			ssr_ejected
1035 #define	sr_check_wp			ssr_check_wp
1036 #define	sd_check_media			ssd_check_media
1037 #define	sd_media_watch_cb		ssd_media_watch_cb
1038 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1039 #define	sr_volume_ctrl			ssr_volume_ctrl
1040 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1041 #define	sd_log_page_supported		ssd_log_page_supported
1042 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1043 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1044 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1045 #define	sd_range_lock			ssd_range_lock
1046 #define	sd_get_range			ssd_get_range
1047 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1048 #define	sd_range_unlock			ssd_range_unlock
1049 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1050 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1051 
1052 #define	sd_iostart_chain		ssd_iostart_chain
1053 #define	sd_iodone_chain			ssd_iodone_chain
1054 #define	sd_initpkt_map			ssd_initpkt_map
1055 #define	sd_destroypkt_map		ssd_destroypkt_map
1056 #define	sd_chain_type_map		ssd_chain_type_map
1057 #define	sd_chain_index_map		ssd_chain_index_map
1058 
1059 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1060 #define	sd_failfast_flushq		ssd_failfast_flushq
1061 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1062 
1063 #define	sd_is_lsi			ssd_is_lsi
1064 #define	sd_tg_rdwr			ssd_tg_rdwr
1065 #define	sd_tg_getinfo			ssd_tg_getinfo
1066 
1067 #endif	/* #if (defined(__fibre)) */
1068 
1069 
1070 int _init(void);
1071 int _fini(void);
1072 int _info(struct modinfo *modinfop);
1073 
1074 /*PRINTFLIKE3*/
1075 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1076 /*PRINTFLIKE3*/
1077 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1078 /*PRINTFLIKE3*/
1079 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1080 
1081 static int sdprobe(dev_info_t *devi);
1082 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1083     void **result);
1084 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1085     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1086 
1087 /*
1088  * Smart probe for parallel scsi
1089  */
1090 static void sd_scsi_probe_cache_init(void);
1091 static void sd_scsi_probe_cache_fini(void);
1092 static void sd_scsi_clear_probe_cache(void);
1093 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1094 
1095 /*
1096  * Attached luns on target for parallel scsi
1097  */
1098 static void sd_scsi_target_lun_init(void);
1099 static void sd_scsi_target_lun_fini(void);
1100 static int  sd_scsi_get_target_lun_count(dev_info_t *dip, int target);
1101 static void sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag);
1102 
1103 static int	sd_spin_up_unit(struct sd_lun *un);
1104 #ifdef _LP64
1105 static void	sd_enable_descr_sense(struct sd_lun *un);
1106 static void	sd_reenable_dsense_task(void *arg);
1107 #endif /* _LP64 */
1108 
1109 static void	sd_set_mmc_caps(struct sd_lun *un);
1110 
1111 static void sd_read_unit_properties(struct sd_lun *un);
1112 static int  sd_process_sdconf_file(struct sd_lun *un);
1113 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1114     int *data_list, sd_tunables *values);
1115 static void sd_process_sdconf_table(struct sd_lun *un);
1116 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1117 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1118 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1119 	int list_len, char *dataname_ptr);
1120 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1121     sd_tunables *prop_list);
1122 
1123 static void sd_register_devid(struct sd_lun *un, dev_info_t *devi,
1124     int reservation_flag);
1125 static int  sd_get_devid(struct sd_lun *un);
1126 static int  sd_get_serialnum(struct sd_lun *un, uchar_t *wwn, int *len);
1127 static ddi_devid_t sd_create_devid(struct sd_lun *un);
1128 static int  sd_write_deviceid(struct sd_lun *un);
1129 static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1130 static int  sd_check_vpd_page_support(struct sd_lun *un);
1131 
1132 static void sd_setup_pm(struct sd_lun *un, dev_info_t *devi);
1133 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1134 
1135 static int  sd_ddi_suspend(dev_info_t *devi);
1136 static int  sd_ddi_pm_suspend(struct sd_lun *un);
1137 static int  sd_ddi_resume(dev_info_t *devi);
1138 static int  sd_ddi_pm_resume(struct sd_lun *un);
1139 static int  sdpower(dev_info_t *devi, int component, int level);
1140 
1141 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1142 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1143 static int  sd_unit_attach(dev_info_t *devi);
1144 static int  sd_unit_detach(dev_info_t *devi);
1145 
1146 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1147 static void sd_create_errstats(struct sd_lun *un, int instance);
1148 static void sd_set_errstats(struct sd_lun *un);
1149 static void sd_set_pstats(struct sd_lun *un);
1150 
1151 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1152 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1153 static int  sd_send_polled_RQS(struct sd_lun *un);
1154 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1155 
1156 #if (defined(__fibre))
1157 /*
1158  * Event callbacks (photon)
1159  */
1160 static void sd_init_event_callbacks(struct sd_lun *un);
1161 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1162 #endif
1163 
1164 /*
1165  * Defines for sd_cache_control
1166  */
1167 
1168 #define	SD_CACHE_ENABLE		1
1169 #define	SD_CACHE_DISABLE	0
1170 #define	SD_CACHE_NOCHANGE	-1
1171 
1172 static int   sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag);
1173 static int   sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled);
1174 static dev_t sd_make_device(dev_info_t *devi);
1175 
1176 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1177 	uint64_t capacity);
1178 
1179 /*
1180  * Driver entry point functions.
1181  */
1182 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1183 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1184 static int  sd_ready_and_valid(struct sd_lun *un);
1185 
1186 static void sdmin(struct buf *bp);
1187 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1188 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1189 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1190 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1191 
1192 static int sdstrategy(struct buf *bp);
1193 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1194 
1195 /*
1196  * Function prototypes for layering functions in the iostart chain.
1197  */
1198 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1199 	struct buf *bp);
1200 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1201 	struct buf *bp);
1202 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1203 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1204 	struct buf *bp);
1205 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1206 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1207 
1208 /*
1209  * Function prototypes for layering functions in the iodone chain.
1210  */
1211 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1212 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1213 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1214 	struct buf *bp);
1215 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1216 	struct buf *bp);
1217 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1218 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1219 	struct buf *bp);
1220 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1221 
1222 /*
1223  * Prototypes for functions to support buf(9S) based IO.
1224  */
1225 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1226 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1227 static void sd_destroypkt_for_buf(struct buf *);
1228 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1229 	struct buf *bp, int flags,
1230 	int (*callback)(caddr_t), caddr_t callback_arg,
1231 	diskaddr_t lba, uint32_t blockcount);
1232 #if defined(__i386) || defined(__amd64)
1233 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1234 	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1235 #endif /* defined(__i386) || defined(__amd64) */
1236 
1237 /*
1238  * Prototypes for functions to support USCSI IO.
1239  */
1240 static int sd_uscsi_strategy(struct buf *bp);
1241 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1242 static void sd_destroypkt_for_uscsi(struct buf *);
1243 
1244 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1245 	uchar_t chain_type, void *pktinfop);
1246 
1247 static int  sd_pm_entry(struct sd_lun *un);
1248 static void sd_pm_exit(struct sd_lun *un);
1249 
1250 static void sd_pm_idletimeout_handler(void *arg);
1251 
1252 /*
1253  * sd_core internal functions (used at the sd_core_io layer).
1254  */
1255 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1256 static void sdintr(struct scsi_pkt *pktp);
1257 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1258 
1259 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
1260 	enum uio_seg dataspace, int path_flag);
1261 
1262 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1263 	daddr_t blkno, int (*func)(struct buf *));
1264 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1265 	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1266 static void sd_bioclone_free(struct buf *bp);
1267 static void sd_shadow_buf_free(struct buf *bp);
1268 
1269 static void sd_print_transport_rejected_message(struct sd_lun *un,
1270 	struct sd_xbuf *xp, int code);
1271 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1272     void *arg, int code);
1273 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1274     void *arg, int code);
1275 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1276     void *arg, int code);
1277 
1278 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1279 	int retry_check_flag,
1280 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1281 		int c),
1282 	void *user_arg, int failure_code,  clock_t retry_delay,
1283 	void (*statp)(kstat_io_t *));
1284 
1285 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1286 	clock_t retry_delay, void (*statp)(kstat_io_t *));
1287 
1288 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1289 	struct scsi_pkt *pktp);
1290 static void sd_start_retry_command(void *arg);
1291 static void sd_start_direct_priority_command(void *arg);
1292 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1293 	int errcode);
1294 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1295 	struct buf *bp, int errcode);
1296 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1297 static void sd_sync_with_callback(struct sd_lun *un);
1298 static int sdrunout(caddr_t arg);
1299 
1300 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1301 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1302 
1303 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1304 static void sd_restore_throttle(void *arg);
1305 
1306 static void sd_init_cdb_limits(struct sd_lun *un);
1307 
1308 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1309 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1310 
1311 /*
1312  * Error handling functions
1313  */
1314 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1315 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1316 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1317 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1318 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1319 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1320 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1321 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1322 
1323 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1324 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1325 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1326 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1327 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1328 	struct sd_xbuf *xp);
1329 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1330 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1331 
1332 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1333 	void *arg, int code);
1334 
1335 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1336 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1337 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1338 	uint8_t *sense_datap,
1339 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1340 static void sd_sense_key_not_ready(struct sd_lun *un,
1341 	uint8_t *sense_datap,
1342 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1343 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1344 	uint8_t *sense_datap,
1345 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1346 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1347 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1348 static void sd_sense_key_unit_attention(struct sd_lun *un,
1349 	uint8_t *sense_datap,
1350 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1351 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1352 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1353 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1354 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1355 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1356 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1357 static void sd_sense_key_default(struct sd_lun *un,
1358 	uint8_t *sense_datap,
1359 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1360 
1361 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1362 	void *arg, int flag);
1363 
1364 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1365 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1366 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1367 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1368 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1369 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1370 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1371 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1372 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1373 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1374 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1375 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1376 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1377 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1378 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1379 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1380 
1381 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1382 
1383 static void sd_start_stop_unit_callback(void *arg);
1384 static void sd_start_stop_unit_task(void *arg);
1385 
1386 static void sd_taskq_create(void);
1387 static void sd_taskq_delete(void);
1388 static void sd_media_change_task(void *arg);
1389 
1390 static int sd_handle_mchange(struct sd_lun *un);
1391 static int sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag);
1392 static int sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp,
1393 	uint32_t *lbap, int path_flag);
1394 static int sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
1395 	uint32_t *lbap, int path_flag);
1396 static int sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag,
1397 	int path_flag);
1398 static int sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr,
1399 	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1400 static int sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag);
1401 static int sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un,
1402 	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1403 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un,
1404 	uchar_t usr_cmd, uchar_t *usr_bufp);
1405 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1406 	struct dk_callback *dkc);
1407 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1408 static int sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un,
1409 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1410 	uchar_t *bufaddr, uint_t buflen, int path_flag);
1411 static int sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
1412 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1413 	uchar_t *bufaddr, uint_t buflen, char feature, int path_flag);
1414 static int sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize,
1415 	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1416 static int sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize,
1417 	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1418 static int sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
1419 	size_t buflen, daddr_t start_block, int path_flag);
1420 #define	sd_send_scsi_READ(un, bufaddr, buflen, start_block, path_flag)	\
1421 	sd_send_scsi_RDWR(un, SCMD_READ, bufaddr, buflen, start_block, \
1422 	path_flag)
1423 #define	sd_send_scsi_WRITE(un, bufaddr, buflen, start_block, path_flag)	\
1424 	sd_send_scsi_RDWR(un, SCMD_WRITE, bufaddr, buflen, start_block,\
1425 	path_flag)
1426 
1427 static int sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr,
1428 	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1429 	uint16_t param_ptr, int path_flag);
1430 
1431 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1432 static void sd_free_rqs(struct sd_lun *un);
1433 
1434 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1435 	uchar_t *data, int len, int fmt);
1436 static void sd_panic_for_res_conflict(struct sd_lun *un);
1437 
1438 /*
1439  * Disk Ioctl Function Prototypes
1440  */
1441 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1442 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1443 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1444 
1445 /*
1446  * Multi-host Ioctl Prototypes
1447  */
1448 static int sd_check_mhd(dev_t dev, int interval);
1449 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1450 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1451 static char *sd_sname(uchar_t status);
1452 static void sd_mhd_resvd_recover(void *arg);
1453 static void sd_resv_reclaim_thread();
1454 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1455 static int sd_reserve_release(dev_t dev, int cmd);
1456 static void sd_rmv_resv_reclaim_req(dev_t dev);
1457 static void sd_mhd_reset_notify_cb(caddr_t arg);
1458 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1459 	mhioc_inkeys_t *usrp, int flag);
1460 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1461 	mhioc_inresvs_t *usrp, int flag);
1462 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1463 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1464 static int sd_mhdioc_release(dev_t dev);
1465 static int sd_mhdioc_register_devid(dev_t dev);
1466 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1467 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1468 
1469 /*
1470  * SCSI removable prototypes
1471  */
1472 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1473 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1474 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1475 static int sr_pause_resume(dev_t dev, int mode);
1476 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1477 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1478 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1479 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1480 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1481 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1482 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1483 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1484 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1485 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1486 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1487 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1488 static int sr_eject(dev_t dev);
1489 static void sr_ejected(register struct sd_lun *un);
1490 static int sr_check_wp(dev_t dev);
1491 static int sd_check_media(dev_t dev, enum dkio_state state);
1492 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1493 static void sd_delayed_cv_broadcast(void *arg);
1494 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1495 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1496 
1497 static int sd_log_page_supported(struct sd_lun *un, int log_page);
1498 
1499 /*
1500  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1501  */
1502 static void sd_check_for_writable_cd(struct sd_lun *un, int path_flag);
1503 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1504 static void sd_wm_cache_destructor(void *wm, void *un);
1505 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1506 	daddr_t endb, ushort_t typ);
1507 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1508 	daddr_t endb);
1509 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1510 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1511 static void sd_read_modify_write_task(void * arg);
1512 static int
1513 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1514 	struct buf **bpp);
1515 
1516 
1517 /*
1518  * Function prototypes for failfast support.
1519  */
1520 static void sd_failfast_flushq(struct sd_lun *un);
1521 static int sd_failfast_flushq_callback(struct buf *bp);
1522 
1523 /*
1524  * Function prototypes to check for lsi devices
1525  */
1526 static void sd_is_lsi(struct sd_lun *un);
1527 
1528 /*
1529  * Function prototypes for x86 support
1530  */
1531 #if defined(__i386) || defined(__amd64)
1532 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1533 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1534 #endif
1535 
1536 
1537 /* Function prototypes for cmlb */
1538 static int sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
1539     diskaddr_t start_block, size_t reqlength, void *tg_cookie);
1540 
1541 static int sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie);
1542 
1543 /*
1544  * Constants for failfast support:
1545  *
1546  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1547  * failfast processing being performed.
1548  *
1549  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1550  * failfast processing on all bufs with B_FAILFAST set.
1551  */
1552 
1553 #define	SD_FAILFAST_INACTIVE		0
1554 #define	SD_FAILFAST_ACTIVE		1
1555 
1556 /*
1557  * Bitmask to control behavior of buf(9S) flushes when a transition to
1558  * the failfast state occurs. Optional bits include:
1559  *
1560  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1561  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1562  * be flushed.
1563  *
1564  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1565  * driver, in addition to the regular wait queue. This includes the xbuf
1566  * queues. When clear, only the driver's wait queue will be flushed.
1567  */
1568 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1569 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1570 
1571 /*
1572  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1573  * to flush all queues within the driver.
1574  */
1575 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1576 
1577 
1578 /*
1579  * SD Testing Fault Injection
1580  */
1581 #ifdef SD_FAULT_INJECTION
1582 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1583 static void sd_faultinjection(struct scsi_pkt *pktp);
1584 static void sd_injection_log(char *buf, struct sd_lun *un);
1585 #endif
1586 
1587 /*
1588  * Device driver ops vector
1589  */
1590 static struct cb_ops sd_cb_ops = {
1591 	sdopen,			/* open */
1592 	sdclose,		/* close */
1593 	sdstrategy,		/* strategy */
1594 	nodev,			/* print */
1595 	sddump,			/* dump */
1596 	sdread,			/* read */
1597 	sdwrite,		/* write */
1598 	sdioctl,		/* ioctl */
1599 	nodev,			/* devmap */
1600 	nodev,			/* mmap */
1601 	nodev,			/* segmap */
1602 	nochpoll,		/* poll */
1603 	sd_prop_op,		/* cb_prop_op */
1604 	0,			/* streamtab  */
1605 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1606 	CB_REV,			/* cb_rev */
1607 	sdaread, 		/* async I/O read entry point */
1608 	sdawrite		/* async I/O write entry point */
1609 };
1610 
1611 static struct dev_ops sd_ops = {
1612 	DEVO_REV,		/* devo_rev, */
1613 	0,			/* refcnt  */
1614 	sdinfo,			/* info */
1615 	nulldev,		/* identify */
1616 	sdprobe,		/* probe */
1617 	sdattach,		/* attach */
1618 	sddetach,		/* detach */
1619 	nodev,			/* reset */
1620 	&sd_cb_ops,		/* driver operations */
1621 	NULL,			/* bus operations */
1622 	sdpower			/* power */
1623 };
1624 
1625 
1626 /*
1627  * This is the loadable module wrapper.
1628  */
1629 #include <sys/modctl.h>
1630 
1631 static struct modldrv modldrv = {
1632 	&mod_driverops,		/* Type of module. This one is a driver */
1633 	SD_MODULE_NAME,		/* Module name. */
1634 	&sd_ops			/* driver ops */
1635 };
1636 
1637 
1638 static struct modlinkage modlinkage = {
1639 	MODREV_1,
1640 	&modldrv,
1641 	NULL
1642 };
1643 
1644 static cmlb_tg_ops_t sd_tgops = {
1645 	TG_DK_OPS_VERSION_1,
1646 	sd_tg_rdwr,
1647 	sd_tg_getinfo
1648 	};
1649 
1650 static struct scsi_asq_key_strings sd_additional_codes[] = {
1651 	0x81, 0, "Logical Unit is Reserved",
1652 	0x85, 0, "Audio Address Not Valid",
1653 	0xb6, 0, "Media Load Mechanism Failed",
1654 	0xB9, 0, "Audio Play Operation Aborted",
1655 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1656 	0x53, 2, "Medium removal prevented",
1657 	0x6f, 0, "Authentication failed during key exchange",
1658 	0x6f, 1, "Key not present",
1659 	0x6f, 2, "Key not established",
1660 	0x6f, 3, "Read without proper authentication",
1661 	0x6f, 4, "Mismatched region to this logical unit",
1662 	0x6f, 5, "Region reset count error",
1663 	0xffff, 0x0, NULL
1664 };
1665 
1666 
1667 /*
1668  * Struct for passing printing information for sense data messages
1669  */
1670 struct sd_sense_info {
1671 	int	ssi_severity;
1672 	int	ssi_pfa_flag;
1673 };
1674 
1675 /*
1676  * Table of function pointers for iostart-side routines. Seperate "chains"
1677  * of layered function calls are formed by placing the function pointers
1678  * sequentially in the desired order. Functions are called according to an
1679  * incrementing table index ordering. The last function in each chain must
1680  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1681  * in the sd_iodone_chain[] array.
1682  *
1683  * Note: It may seem more natural to organize both the iostart and iodone
1684  * functions together, into an array of structures (or some similar
1685  * organization) with a common index, rather than two seperate arrays which
1686  * must be maintained in synchronization. The purpose of this division is
1687  * to achiece improved performance: individual arrays allows for more
1688  * effective cache line utilization on certain platforms.
1689  */
1690 
1691 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1692 
1693 
1694 static sd_chain_t sd_iostart_chain[] = {
1695 
1696 	/* Chain for buf IO for disk drive targets (PM enabled) */
1697 	sd_mapblockaddr_iostart,	/* Index: 0 */
1698 	sd_pm_iostart,			/* Index: 1 */
1699 	sd_core_iostart,		/* Index: 2 */
1700 
1701 	/* Chain for buf IO for disk drive targets (PM disabled) */
1702 	sd_mapblockaddr_iostart,	/* Index: 3 */
1703 	sd_core_iostart,		/* Index: 4 */
1704 
1705 	/* Chain for buf IO for removable-media targets (PM enabled) */
1706 	sd_mapblockaddr_iostart,	/* Index: 5 */
1707 	sd_mapblocksize_iostart,	/* Index: 6 */
1708 	sd_pm_iostart,			/* Index: 7 */
1709 	sd_core_iostart,		/* Index: 8 */
1710 
1711 	/* Chain for buf IO for removable-media targets (PM disabled) */
1712 	sd_mapblockaddr_iostart,	/* Index: 9 */
1713 	sd_mapblocksize_iostart,	/* Index: 10 */
1714 	sd_core_iostart,		/* Index: 11 */
1715 
1716 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1717 	sd_mapblockaddr_iostart,	/* Index: 12 */
1718 	sd_checksum_iostart,		/* Index: 13 */
1719 	sd_pm_iostart,			/* Index: 14 */
1720 	sd_core_iostart,		/* Index: 15 */
1721 
1722 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1723 	sd_mapblockaddr_iostart,	/* Index: 16 */
1724 	sd_checksum_iostart,		/* Index: 17 */
1725 	sd_core_iostart,		/* Index: 18 */
1726 
1727 	/* Chain for USCSI commands (all targets) */
1728 	sd_pm_iostart,			/* Index: 19 */
1729 	sd_core_iostart,		/* Index: 20 */
1730 
1731 	/* Chain for checksumming USCSI commands (all targets) */
1732 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1733 	sd_pm_iostart,			/* Index: 22 */
1734 	sd_core_iostart,		/* Index: 23 */
1735 
1736 	/* Chain for "direct" USCSI commands (all targets) */
1737 	sd_core_iostart,		/* Index: 24 */
1738 
1739 	/* Chain for "direct priority" USCSI commands (all targets) */
1740 	sd_core_iostart,		/* Index: 25 */
1741 };
1742 
1743 /*
1744  * Macros to locate the first function of each iostart chain in the
1745  * sd_iostart_chain[] array. These are located by the index in the array.
1746  */
1747 #define	SD_CHAIN_DISK_IOSTART			0
1748 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1749 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1750 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1751 #define	SD_CHAIN_CHKSUM_IOSTART			12
1752 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1753 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1754 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1755 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1756 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1757 
1758 
1759 /*
1760  * Table of function pointers for the iodone-side routines for the driver-
1761  * internal layering mechanism.  The calling sequence for iodone routines
1762  * uses a decrementing table index, so the last routine called in a chain
1763  * must be at the lowest array index location for that chain.  The last
1764  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1765  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1766  * of the functions in an iodone side chain must correspond to the ordering
1767  * of the iostart routines for that chain.  Note that there is no iodone
1768  * side routine that corresponds to sd_core_iostart(), so there is no
1769  * entry in the table for this.
1770  */
1771 
1772 static sd_chain_t sd_iodone_chain[] = {
1773 
1774 	/* Chain for buf IO for disk drive targets (PM enabled) */
1775 	sd_buf_iodone,			/* Index: 0 */
1776 	sd_mapblockaddr_iodone,		/* Index: 1 */
1777 	sd_pm_iodone,			/* Index: 2 */
1778 
1779 	/* Chain for buf IO for disk drive targets (PM disabled) */
1780 	sd_buf_iodone,			/* Index: 3 */
1781 	sd_mapblockaddr_iodone,		/* Index: 4 */
1782 
1783 	/* Chain for buf IO for removable-media targets (PM enabled) */
1784 	sd_buf_iodone,			/* Index: 5 */
1785 	sd_mapblockaddr_iodone,		/* Index: 6 */
1786 	sd_mapblocksize_iodone,		/* Index: 7 */
1787 	sd_pm_iodone,			/* Index: 8 */
1788 
1789 	/* Chain for buf IO for removable-media targets (PM disabled) */
1790 	sd_buf_iodone,			/* Index: 9 */
1791 	sd_mapblockaddr_iodone,		/* Index: 10 */
1792 	sd_mapblocksize_iodone,		/* Index: 11 */
1793 
1794 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1795 	sd_buf_iodone,			/* Index: 12 */
1796 	sd_mapblockaddr_iodone,		/* Index: 13 */
1797 	sd_checksum_iodone,		/* Index: 14 */
1798 	sd_pm_iodone,			/* Index: 15 */
1799 
1800 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1801 	sd_buf_iodone,			/* Index: 16 */
1802 	sd_mapblockaddr_iodone,		/* Index: 17 */
1803 	sd_checksum_iodone,		/* Index: 18 */
1804 
1805 	/* Chain for USCSI commands (non-checksum targets) */
1806 	sd_uscsi_iodone,		/* Index: 19 */
1807 	sd_pm_iodone,			/* Index: 20 */
1808 
1809 	/* Chain for USCSI commands (checksum targets) */
1810 	sd_uscsi_iodone,		/* Index: 21 */
1811 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1812 	sd_pm_iodone,			/* Index: 22 */
1813 
1814 	/* Chain for "direct" USCSI commands (all targets) */
1815 	sd_uscsi_iodone,		/* Index: 24 */
1816 
1817 	/* Chain for "direct priority" USCSI commands (all targets) */
1818 	sd_uscsi_iodone,		/* Index: 25 */
1819 };
1820 
1821 
1822 /*
1823  * Macros to locate the "first" function in the sd_iodone_chain[] array for
1824  * each iodone-side chain. These are located by the array index, but as the
1825  * iodone side functions are called in a decrementing-index order, the
1826  * highest index number in each chain must be specified (as these correspond
1827  * to the first function in the iodone chain that will be called by the core
1828  * at IO completion time).
1829  */
1830 
1831 #define	SD_CHAIN_DISK_IODONE			2
1832 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
1833 #define	SD_CHAIN_RMMEDIA_IODONE			8
1834 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1835 #define	SD_CHAIN_CHKSUM_IODONE			15
1836 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1837 #define	SD_CHAIN_USCSI_CMD_IODONE		20
1838 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1839 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
1840 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1841 
1842 
1843 
1844 
1845 /*
1846  * Array to map a layering chain index to the appropriate initpkt routine.
1847  * The redundant entries are present so that the index used for accessing
1848  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1849  * with this table as well.
1850  */
1851 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1852 
1853 static sd_initpkt_t	sd_initpkt_map[] = {
1854 
1855 	/* Chain for buf IO for disk drive targets (PM enabled) */
1856 	sd_initpkt_for_buf,		/* Index: 0 */
1857 	sd_initpkt_for_buf,		/* Index: 1 */
1858 	sd_initpkt_for_buf,		/* Index: 2 */
1859 
1860 	/* Chain for buf IO for disk drive targets (PM disabled) */
1861 	sd_initpkt_for_buf,		/* Index: 3 */
1862 	sd_initpkt_for_buf,		/* Index: 4 */
1863 
1864 	/* Chain for buf IO for removable-media targets (PM enabled) */
1865 	sd_initpkt_for_buf,		/* Index: 5 */
1866 	sd_initpkt_for_buf,		/* Index: 6 */
1867 	sd_initpkt_for_buf,		/* Index: 7 */
1868 	sd_initpkt_for_buf,		/* Index: 8 */
1869 
1870 	/* Chain for buf IO for removable-media targets (PM disabled) */
1871 	sd_initpkt_for_buf,		/* Index: 9 */
1872 	sd_initpkt_for_buf,		/* Index: 10 */
1873 	sd_initpkt_for_buf,		/* Index: 11 */
1874 
1875 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1876 	sd_initpkt_for_buf,		/* Index: 12 */
1877 	sd_initpkt_for_buf,		/* Index: 13 */
1878 	sd_initpkt_for_buf,		/* Index: 14 */
1879 	sd_initpkt_for_buf,		/* Index: 15 */
1880 
1881 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1882 	sd_initpkt_for_buf,		/* Index: 16 */
1883 	sd_initpkt_for_buf,		/* Index: 17 */
1884 	sd_initpkt_for_buf,		/* Index: 18 */
1885 
1886 	/* Chain for USCSI commands (non-checksum targets) */
1887 	sd_initpkt_for_uscsi,		/* Index: 19 */
1888 	sd_initpkt_for_uscsi,		/* Index: 20 */
1889 
1890 	/* Chain for USCSI commands (checksum targets) */
1891 	sd_initpkt_for_uscsi,		/* Index: 21 */
1892 	sd_initpkt_for_uscsi,		/* Index: 22 */
1893 	sd_initpkt_for_uscsi,		/* Index: 22 */
1894 
1895 	/* Chain for "direct" USCSI commands (all targets) */
1896 	sd_initpkt_for_uscsi,		/* Index: 24 */
1897 
1898 	/* Chain for "direct priority" USCSI commands (all targets) */
1899 	sd_initpkt_for_uscsi,		/* Index: 25 */
1900 
1901 };
1902 
1903 
1904 /*
1905  * Array to map a layering chain index to the appropriate destroypktpkt routine.
1906  * The redundant entries are present so that the index used for accessing
1907  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1908  * with this table as well.
1909  */
1910 typedef void (*sd_destroypkt_t)(struct buf *);
1911 
1912 static sd_destroypkt_t	sd_destroypkt_map[] = {
1913 
1914 	/* Chain for buf IO for disk drive targets (PM enabled) */
1915 	sd_destroypkt_for_buf,		/* Index: 0 */
1916 	sd_destroypkt_for_buf,		/* Index: 1 */
1917 	sd_destroypkt_for_buf,		/* Index: 2 */
1918 
1919 	/* Chain for buf IO for disk drive targets (PM disabled) */
1920 	sd_destroypkt_for_buf,		/* Index: 3 */
1921 	sd_destroypkt_for_buf,		/* Index: 4 */
1922 
1923 	/* Chain for buf IO for removable-media targets (PM enabled) */
1924 	sd_destroypkt_for_buf,		/* Index: 5 */
1925 	sd_destroypkt_for_buf,		/* Index: 6 */
1926 	sd_destroypkt_for_buf,		/* Index: 7 */
1927 	sd_destroypkt_for_buf,		/* Index: 8 */
1928 
1929 	/* Chain for buf IO for removable-media targets (PM disabled) */
1930 	sd_destroypkt_for_buf,		/* Index: 9 */
1931 	sd_destroypkt_for_buf,		/* Index: 10 */
1932 	sd_destroypkt_for_buf,		/* Index: 11 */
1933 
1934 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1935 	sd_destroypkt_for_buf,		/* Index: 12 */
1936 	sd_destroypkt_for_buf,		/* Index: 13 */
1937 	sd_destroypkt_for_buf,		/* Index: 14 */
1938 	sd_destroypkt_for_buf,		/* Index: 15 */
1939 
1940 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1941 	sd_destroypkt_for_buf,		/* Index: 16 */
1942 	sd_destroypkt_for_buf,		/* Index: 17 */
1943 	sd_destroypkt_for_buf,		/* Index: 18 */
1944 
1945 	/* Chain for USCSI commands (non-checksum targets) */
1946 	sd_destroypkt_for_uscsi,	/* Index: 19 */
1947 	sd_destroypkt_for_uscsi,	/* Index: 20 */
1948 
1949 	/* Chain for USCSI commands (checksum targets) */
1950 	sd_destroypkt_for_uscsi,	/* Index: 21 */
1951 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1952 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1953 
1954 	/* Chain for "direct" USCSI commands (all targets) */
1955 	sd_destroypkt_for_uscsi,	/* Index: 24 */
1956 
1957 	/* Chain for "direct priority" USCSI commands (all targets) */
1958 	sd_destroypkt_for_uscsi,	/* Index: 25 */
1959 
1960 };
1961 
1962 
1963 
1964 /*
1965  * Array to map a layering chain index to the appropriate chain "type".
1966  * The chain type indicates a specific property/usage of the chain.
1967  * The redundant entries are present so that the index used for accessing
1968  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1969  * with this table as well.
1970  */
1971 
1972 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
1973 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
1974 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
1975 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
1976 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
1977 						/* (for error recovery) */
1978 
1979 static int sd_chain_type_map[] = {
1980 
1981 	/* Chain for buf IO for disk drive targets (PM enabled) */
1982 	SD_CHAIN_BUFIO,			/* Index: 0 */
1983 	SD_CHAIN_BUFIO,			/* Index: 1 */
1984 	SD_CHAIN_BUFIO,			/* Index: 2 */
1985 
1986 	/* Chain for buf IO for disk drive targets (PM disabled) */
1987 	SD_CHAIN_BUFIO,			/* Index: 3 */
1988 	SD_CHAIN_BUFIO,			/* Index: 4 */
1989 
1990 	/* Chain for buf IO for removable-media targets (PM enabled) */
1991 	SD_CHAIN_BUFIO,			/* Index: 5 */
1992 	SD_CHAIN_BUFIO,			/* Index: 6 */
1993 	SD_CHAIN_BUFIO,			/* Index: 7 */
1994 	SD_CHAIN_BUFIO,			/* Index: 8 */
1995 
1996 	/* Chain for buf IO for removable-media targets (PM disabled) */
1997 	SD_CHAIN_BUFIO,			/* Index: 9 */
1998 	SD_CHAIN_BUFIO,			/* Index: 10 */
1999 	SD_CHAIN_BUFIO,			/* Index: 11 */
2000 
2001 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2002 	SD_CHAIN_BUFIO,			/* Index: 12 */
2003 	SD_CHAIN_BUFIO,			/* Index: 13 */
2004 	SD_CHAIN_BUFIO,			/* Index: 14 */
2005 	SD_CHAIN_BUFIO,			/* Index: 15 */
2006 
2007 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2008 	SD_CHAIN_BUFIO,			/* Index: 16 */
2009 	SD_CHAIN_BUFIO,			/* Index: 17 */
2010 	SD_CHAIN_BUFIO,			/* Index: 18 */
2011 
2012 	/* Chain for USCSI commands (non-checksum targets) */
2013 	SD_CHAIN_USCSI,			/* Index: 19 */
2014 	SD_CHAIN_USCSI,			/* Index: 20 */
2015 
2016 	/* Chain for USCSI commands (checksum targets) */
2017 	SD_CHAIN_USCSI,			/* Index: 21 */
2018 	SD_CHAIN_USCSI,			/* Index: 22 */
2019 	SD_CHAIN_USCSI,			/* Index: 22 */
2020 
2021 	/* Chain for "direct" USCSI commands (all targets) */
2022 	SD_CHAIN_DIRECT,		/* Index: 24 */
2023 
2024 	/* Chain for "direct priority" USCSI commands (all targets) */
2025 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2026 };
2027 
2028 
2029 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2030 #define	SD_IS_BUFIO(xp)			\
2031 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2032 
2033 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2034 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2035 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2036 
2037 
2038 
2039 /*
2040  * Struct, array, and macros to map a specific chain to the appropriate
2041  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2042  *
2043  * The sd_chain_index_map[] array is used at attach time to set the various
2044  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2045  * chain to be used with the instance. This allows different instances to use
2046  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2047  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2048  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2049  * dynamically & without the use of locking; and (2) a layer may update the
2050  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2051  * to allow for deferred processing of an IO within the same chain from a
2052  * different execution context.
2053  */
2054 
2055 struct sd_chain_index {
2056 	int	sci_iostart_index;
2057 	int	sci_iodone_index;
2058 };
2059 
2060 static struct sd_chain_index	sd_chain_index_map[] = {
2061 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2062 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2063 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2064 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2065 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2066 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2067 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2068 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2069 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2070 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2071 };
2072 
2073 
2074 /*
2075  * The following are indexes into the sd_chain_index_map[] array.
2076  */
2077 
2078 /* un->un_buf_chain_type must be set to one of these */
2079 #define	SD_CHAIN_INFO_DISK		0
2080 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2081 #define	SD_CHAIN_INFO_RMMEDIA		2
2082 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2083 #define	SD_CHAIN_INFO_CHKSUM		4
2084 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2085 
2086 /* un->un_uscsi_chain_type must be set to one of these */
2087 #define	SD_CHAIN_INFO_USCSI_CMD		6
2088 /* USCSI with PM disabled is the same as DIRECT */
2089 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2090 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2091 
2092 /* un->un_direct_chain_type must be set to one of these */
2093 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2094 
2095 /* un->un_priority_chain_type must be set to one of these */
2096 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2097 
2098 /* size for devid inquiries */
2099 #define	MAX_INQUIRY_SIZE		0xF0
2100 
2101 /*
2102  * Macros used by functions to pass a given buf(9S) struct along to the
2103  * next function in the layering chain for further processing.
2104  *
2105  * In the following macros, passing more than three arguments to the called
2106  * routines causes the optimizer for the SPARC compiler to stop doing tail
2107  * call elimination which results in significant performance degradation.
2108  */
2109 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2110 	((*(sd_iostart_chain[index]))(index, un, bp))
2111 
2112 #define	SD_BEGIN_IODONE(index, un, bp)	\
2113 	((*(sd_iodone_chain[index]))(index, un, bp))
2114 
2115 #define	SD_NEXT_IOSTART(index, un, bp)				\
2116 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2117 
2118 #define	SD_NEXT_IODONE(index, un, bp)				\
2119 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2120 
2121 /*
2122  *    Function: _init
2123  *
2124  * Description: This is the driver _init(9E) entry point.
2125  *
2126  * Return Code: Returns the value from mod_install(9F) or
2127  *		ddi_soft_state_init(9F) as appropriate.
2128  *
2129  *     Context: Called when driver module loaded.
2130  */
2131 
2132 int
2133 _init(void)
2134 {
2135 	int	err;
2136 
2137 	/* establish driver name from module name */
2138 	sd_label = mod_modname(&modlinkage);
2139 
2140 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2141 		SD_MAXUNIT);
2142 
2143 	if (err != 0) {
2144 		return (err);
2145 	}
2146 
2147 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2148 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2149 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2150 
2151 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2152 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2153 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2154 
2155 	/*
2156 	 * it's ok to init here even for fibre device
2157 	 */
2158 	sd_scsi_probe_cache_init();
2159 
2160 	sd_scsi_target_lun_init();
2161 
2162 	/*
2163 	 * Creating taskq before mod_install ensures that all callers (threads)
2164 	 * that enter the module after a successfull mod_install encounter
2165 	 * a valid taskq.
2166 	 */
2167 	sd_taskq_create();
2168 
2169 	err = mod_install(&modlinkage);
2170 	if (err != 0) {
2171 		/* delete taskq if install fails */
2172 		sd_taskq_delete();
2173 
2174 		mutex_destroy(&sd_detach_mutex);
2175 		mutex_destroy(&sd_log_mutex);
2176 		mutex_destroy(&sd_label_mutex);
2177 
2178 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2179 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2180 		cv_destroy(&sd_tr.srq_inprocess_cv);
2181 
2182 		sd_scsi_probe_cache_fini();
2183 
2184 		sd_scsi_target_lun_fini();
2185 
2186 		ddi_soft_state_fini(&sd_state);
2187 		return (err);
2188 	}
2189 
2190 	return (err);
2191 }
2192 
2193 
2194 /*
2195  *    Function: _fini
2196  *
2197  * Description: This is the driver _fini(9E) entry point.
2198  *
2199  * Return Code: Returns the value from mod_remove(9F)
2200  *
2201  *     Context: Called when driver module is unloaded.
2202  */
2203 
2204 int
2205 _fini(void)
2206 {
2207 	int err;
2208 
2209 	if ((err = mod_remove(&modlinkage)) != 0) {
2210 		return (err);
2211 	}
2212 
2213 	sd_taskq_delete();
2214 
2215 	mutex_destroy(&sd_detach_mutex);
2216 	mutex_destroy(&sd_log_mutex);
2217 	mutex_destroy(&sd_label_mutex);
2218 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2219 
2220 	sd_scsi_probe_cache_fini();
2221 
2222 	sd_scsi_target_lun_fini();
2223 
2224 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2225 	cv_destroy(&sd_tr.srq_inprocess_cv);
2226 
2227 	ddi_soft_state_fini(&sd_state);
2228 
2229 	return (err);
2230 }
2231 
2232 
2233 /*
2234  *    Function: _info
2235  *
2236  * Description: This is the driver _info(9E) entry point.
2237  *
2238  *   Arguments: modinfop - pointer to the driver modinfo structure
2239  *
2240  * Return Code: Returns the value from mod_info(9F).
2241  *
2242  *     Context: Kernel thread context
2243  */
2244 
2245 int
2246 _info(struct modinfo *modinfop)
2247 {
2248 	return (mod_info(&modlinkage, modinfop));
2249 }
2250 
2251 
2252 /*
2253  * The following routines implement the driver message logging facility.
2254  * They provide component- and level- based debug output filtering.
2255  * Output may also be restricted to messages for a single instance by
2256  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2257  * to NULL, then messages for all instances are printed.
2258  *
2259  * These routines have been cloned from each other due to the language
2260  * constraints of macros and variable argument list processing.
2261  */
2262 
2263 
2264 /*
2265  *    Function: sd_log_err
2266  *
2267  * Description: This routine is called by the SD_ERROR macro for debug
2268  *		logging of error conditions.
2269  *
2270  *   Arguments: comp - driver component being logged
2271  *		dev  - pointer to driver info structure
2272  *		fmt  - error string and format to be logged
2273  */
2274 
2275 static void
2276 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2277 {
2278 	va_list		ap;
2279 	dev_info_t	*dev;
2280 
2281 	ASSERT(un != NULL);
2282 	dev = SD_DEVINFO(un);
2283 	ASSERT(dev != NULL);
2284 
2285 	/*
2286 	 * Filter messages based on the global component and level masks.
2287 	 * Also print if un matches the value of sd_debug_un, or if
2288 	 * sd_debug_un is set to NULL.
2289 	 */
2290 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2291 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2292 		mutex_enter(&sd_log_mutex);
2293 		va_start(ap, fmt);
2294 		(void) vsprintf(sd_log_buf, fmt, ap);
2295 		va_end(ap);
2296 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2297 		mutex_exit(&sd_log_mutex);
2298 	}
2299 #ifdef SD_FAULT_INJECTION
2300 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2301 	if (un->sd_injection_mask & comp) {
2302 		mutex_enter(&sd_log_mutex);
2303 		va_start(ap, fmt);
2304 		(void) vsprintf(sd_log_buf, fmt, ap);
2305 		va_end(ap);
2306 		sd_injection_log(sd_log_buf, un);
2307 		mutex_exit(&sd_log_mutex);
2308 	}
2309 #endif
2310 }
2311 
2312 
2313 /*
2314  *    Function: sd_log_info
2315  *
2316  * Description: This routine is called by the SD_INFO macro for debug
2317  *		logging of general purpose informational conditions.
2318  *
2319  *   Arguments: comp - driver component being logged
2320  *		dev  - pointer to driver info structure
2321  *		fmt  - info string and format to be logged
2322  */
2323 
2324 static void
2325 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2326 {
2327 	va_list		ap;
2328 	dev_info_t	*dev;
2329 
2330 	ASSERT(un != NULL);
2331 	dev = SD_DEVINFO(un);
2332 	ASSERT(dev != NULL);
2333 
2334 	/*
2335 	 * Filter messages based on the global component and level masks.
2336 	 * Also print if un matches the value of sd_debug_un, or if
2337 	 * sd_debug_un is set to NULL.
2338 	 */
2339 	if ((sd_component_mask & component) &&
2340 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2341 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2342 		mutex_enter(&sd_log_mutex);
2343 		va_start(ap, fmt);
2344 		(void) vsprintf(sd_log_buf, fmt, ap);
2345 		va_end(ap);
2346 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2347 		mutex_exit(&sd_log_mutex);
2348 	}
2349 #ifdef SD_FAULT_INJECTION
2350 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2351 	if (un->sd_injection_mask & component) {
2352 		mutex_enter(&sd_log_mutex);
2353 		va_start(ap, fmt);
2354 		(void) vsprintf(sd_log_buf, fmt, ap);
2355 		va_end(ap);
2356 		sd_injection_log(sd_log_buf, un);
2357 		mutex_exit(&sd_log_mutex);
2358 	}
2359 #endif
2360 }
2361 
2362 
2363 /*
2364  *    Function: sd_log_trace
2365  *
2366  * Description: This routine is called by the SD_TRACE macro for debug
2367  *		logging of trace conditions (i.e. function entry/exit).
2368  *
2369  *   Arguments: comp - driver component being logged
2370  *		dev  - pointer to driver info structure
2371  *		fmt  - trace string and format to be logged
2372  */
2373 
2374 static void
2375 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2376 {
2377 	va_list		ap;
2378 	dev_info_t	*dev;
2379 
2380 	ASSERT(un != NULL);
2381 	dev = SD_DEVINFO(un);
2382 	ASSERT(dev != NULL);
2383 
2384 	/*
2385 	 * Filter messages based on the global component and level masks.
2386 	 * Also print if un matches the value of sd_debug_un, or if
2387 	 * sd_debug_un is set to NULL.
2388 	 */
2389 	if ((sd_component_mask & component) &&
2390 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2391 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2392 		mutex_enter(&sd_log_mutex);
2393 		va_start(ap, fmt);
2394 		(void) vsprintf(sd_log_buf, fmt, ap);
2395 		va_end(ap);
2396 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2397 		mutex_exit(&sd_log_mutex);
2398 	}
2399 #ifdef SD_FAULT_INJECTION
2400 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2401 	if (un->sd_injection_mask & component) {
2402 		mutex_enter(&sd_log_mutex);
2403 		va_start(ap, fmt);
2404 		(void) vsprintf(sd_log_buf, fmt, ap);
2405 		va_end(ap);
2406 		sd_injection_log(sd_log_buf, un);
2407 		mutex_exit(&sd_log_mutex);
2408 	}
2409 #endif
2410 }
2411 
2412 
2413 /*
2414  *    Function: sdprobe
2415  *
2416  * Description: This is the driver probe(9e) entry point function.
2417  *
2418  *   Arguments: devi - opaque device info handle
2419  *
2420  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2421  *              DDI_PROBE_FAILURE: If the probe failed.
2422  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2423  *				   but may be present in the future.
2424  */
2425 
2426 static int
2427 sdprobe(dev_info_t *devi)
2428 {
2429 	struct scsi_device	*devp;
2430 	int			rval;
2431 	int			instance;
2432 
2433 	/*
2434 	 * if it wasn't for pln, sdprobe could actually be nulldev
2435 	 * in the "__fibre" case.
2436 	 */
2437 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2438 		return (DDI_PROBE_DONTCARE);
2439 	}
2440 
2441 	devp = ddi_get_driver_private(devi);
2442 
2443 	if (devp == NULL) {
2444 		/* Ooops... nexus driver is mis-configured... */
2445 		return (DDI_PROBE_FAILURE);
2446 	}
2447 
2448 	instance = ddi_get_instance(devi);
2449 
2450 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2451 		return (DDI_PROBE_PARTIAL);
2452 	}
2453 
2454 	/*
2455 	 * Call the SCSA utility probe routine to see if we actually
2456 	 * have a target at this SCSI nexus.
2457 	 */
2458 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2459 	case SCSIPROBE_EXISTS:
2460 		switch (devp->sd_inq->inq_dtype) {
2461 		case DTYPE_DIRECT:
2462 			rval = DDI_PROBE_SUCCESS;
2463 			break;
2464 		case DTYPE_RODIRECT:
2465 			/* CDs etc. Can be removable media */
2466 			rval = DDI_PROBE_SUCCESS;
2467 			break;
2468 		case DTYPE_OPTICAL:
2469 			/*
2470 			 * Rewritable optical driver HP115AA
2471 			 * Can also be removable media
2472 			 */
2473 
2474 			/*
2475 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2476 			 * pre solaris 9 sparc sd behavior is required
2477 			 *
2478 			 * If first time through and sd_dtype_optical_bind
2479 			 * has not been set in /etc/system check properties
2480 			 */
2481 
2482 			if (sd_dtype_optical_bind  < 0) {
2483 			    sd_dtype_optical_bind = ddi_prop_get_int
2484 				(DDI_DEV_T_ANY,	devi,	0,
2485 				"optical-device-bind",	1);
2486 			}
2487 
2488 			if (sd_dtype_optical_bind == 0) {
2489 				rval = DDI_PROBE_FAILURE;
2490 			} else {
2491 				rval = DDI_PROBE_SUCCESS;
2492 			}
2493 			break;
2494 
2495 		case DTYPE_NOTPRESENT:
2496 		default:
2497 			rval = DDI_PROBE_FAILURE;
2498 			break;
2499 		}
2500 		break;
2501 	default:
2502 		rval = DDI_PROBE_PARTIAL;
2503 		break;
2504 	}
2505 
2506 	/*
2507 	 * This routine checks for resource allocation prior to freeing,
2508 	 * so it will take care of the "smart probing" case where a
2509 	 * scsi_probe() may or may not have been issued and will *not*
2510 	 * free previously-freed resources.
2511 	 */
2512 	scsi_unprobe(devp);
2513 	return (rval);
2514 }
2515 
2516 
2517 /*
2518  *    Function: sdinfo
2519  *
2520  * Description: This is the driver getinfo(9e) entry point function.
2521  * 		Given the device number, return the devinfo pointer from
2522  *		the scsi_device structure or the instance number
2523  *		associated with the dev_t.
2524  *
2525  *   Arguments: dip     - pointer to device info structure
2526  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2527  *			  DDI_INFO_DEVT2INSTANCE)
2528  *		arg     - driver dev_t
2529  *		resultp - user buffer for request response
2530  *
2531  * Return Code: DDI_SUCCESS
2532  *              DDI_FAILURE
2533  */
2534 /* ARGSUSED */
2535 static int
2536 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2537 {
2538 	struct sd_lun	*un;
2539 	dev_t		dev;
2540 	int		instance;
2541 	int		error;
2542 
2543 	switch (infocmd) {
2544 	case DDI_INFO_DEVT2DEVINFO:
2545 		dev = (dev_t)arg;
2546 		instance = SDUNIT(dev);
2547 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2548 			return (DDI_FAILURE);
2549 		}
2550 		*result = (void *) SD_DEVINFO(un);
2551 		error = DDI_SUCCESS;
2552 		break;
2553 	case DDI_INFO_DEVT2INSTANCE:
2554 		dev = (dev_t)arg;
2555 		instance = SDUNIT(dev);
2556 		*result = (void *)(uintptr_t)instance;
2557 		error = DDI_SUCCESS;
2558 		break;
2559 	default:
2560 		error = DDI_FAILURE;
2561 	}
2562 	return (error);
2563 }
2564 
2565 /*
2566  *    Function: sd_prop_op
2567  *
2568  * Description: This is the driver prop_op(9e) entry point function.
2569  *		Return the number of blocks for the partition in question
2570  *		or forward the request to the property facilities.
2571  *
2572  *   Arguments: dev       - device number
2573  *		dip       - pointer to device info structure
2574  *		prop_op   - property operator
2575  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2576  *		name      - pointer to property name
2577  *		valuep    - pointer or address of the user buffer
2578  *		lengthp   - property length
2579  *
2580  * Return Code: DDI_PROP_SUCCESS
2581  *              DDI_PROP_NOT_FOUND
2582  *              DDI_PROP_UNDEFINED
2583  *              DDI_PROP_NO_MEMORY
2584  *              DDI_PROP_BUF_TOO_SMALL
2585  */
2586 
2587 static int
2588 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2589 	char *name, caddr_t valuep, int *lengthp)
2590 {
2591 	int		instance = ddi_get_instance(dip);
2592 	struct sd_lun	*un;
2593 	uint64_t	nblocks64;
2594 
2595 	/*
2596 	 * Our dynamic properties are all device specific and size oriented.
2597 	 * Requests issued under conditions where size is valid are passed
2598 	 * to ddi_prop_op_nblocks with the size information, otherwise the
2599 	 * request is passed to ddi_prop_op. Size depends on valid geometry.
2600 	 */
2601 	un = ddi_get_soft_state(sd_state, instance);
2602 	if ((dev == DDI_DEV_T_ANY) || (un == NULL)) {
2603 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2604 		    name, valuep, lengthp));
2605 	} else if (!SD_IS_VALID_LABEL(un)) {
2606 		return (ddi_prop_op(dev, dip, prop_op, mod_flags, name,
2607 		    valuep, lengthp));
2608 	}
2609 
2610 	/* get nblocks value */
2611 	ASSERT(!mutex_owned(SD_MUTEX(un)));
2612 
2613 	(void) cmlb_partinfo(un->un_cmlbhandle, SDPART(dev),
2614 	    (diskaddr_t *)&nblocks64, NULL, NULL, NULL, (void *)SD_PATH_DIRECT);
2615 
2616 	return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags,
2617 	    name, valuep, lengthp, nblocks64));
2618 }
2619 
2620 /*
2621  * The following functions are for smart probing:
2622  * sd_scsi_probe_cache_init()
2623  * sd_scsi_probe_cache_fini()
2624  * sd_scsi_clear_probe_cache()
2625  * sd_scsi_probe_with_cache()
2626  */
2627 
2628 /*
2629  *    Function: sd_scsi_probe_cache_init
2630  *
2631  * Description: Initializes the probe response cache mutex and head pointer.
2632  *
2633  *     Context: Kernel thread context
2634  */
2635 
2636 static void
2637 sd_scsi_probe_cache_init(void)
2638 {
2639 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2640 	sd_scsi_probe_cache_head = NULL;
2641 }
2642 
2643 
2644 /*
2645  *    Function: sd_scsi_probe_cache_fini
2646  *
2647  * Description: Frees all resources associated with the probe response cache.
2648  *
2649  *     Context: Kernel thread context
2650  */
2651 
2652 static void
2653 sd_scsi_probe_cache_fini(void)
2654 {
2655 	struct sd_scsi_probe_cache *cp;
2656 	struct sd_scsi_probe_cache *ncp;
2657 
2658 	/* Clean up our smart probing linked list */
2659 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2660 		ncp = cp->next;
2661 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2662 	}
2663 	sd_scsi_probe_cache_head = NULL;
2664 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2665 }
2666 
2667 
2668 /*
2669  *    Function: sd_scsi_clear_probe_cache
2670  *
2671  * Description: This routine clears the probe response cache. This is
2672  *		done when open() returns ENXIO so that when deferred
2673  *		attach is attempted (possibly after a device has been
2674  *		turned on) we will retry the probe. Since we don't know
2675  *		which target we failed to open, we just clear the
2676  *		entire cache.
2677  *
2678  *     Context: Kernel thread context
2679  */
2680 
2681 static void
2682 sd_scsi_clear_probe_cache(void)
2683 {
2684 	struct sd_scsi_probe_cache	*cp;
2685 	int				i;
2686 
2687 	mutex_enter(&sd_scsi_probe_cache_mutex);
2688 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2689 		/*
2690 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2691 		 * force probing to be performed the next time
2692 		 * sd_scsi_probe_with_cache is called.
2693 		 */
2694 		for (i = 0; i < NTARGETS_WIDE; i++) {
2695 			cp->cache[i] = SCSIPROBE_EXISTS;
2696 		}
2697 	}
2698 	mutex_exit(&sd_scsi_probe_cache_mutex);
2699 }
2700 
2701 
2702 /*
2703  *    Function: sd_scsi_probe_with_cache
2704  *
2705  * Description: This routine implements support for a scsi device probe
2706  *		with cache. The driver maintains a cache of the target
2707  *		responses to scsi probes. If we get no response from a
2708  *		target during a probe inquiry, we remember that, and we
2709  *		avoid additional calls to scsi_probe on non-zero LUNs
2710  *		on the same target until the cache is cleared. By doing
2711  *		so we avoid the 1/4 sec selection timeout for nonzero
2712  *		LUNs. lun0 of a target is always probed.
2713  *
2714  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2715  *              waitfunc - indicates what the allocator routines should
2716  *			   do when resources are not available. This value
2717  *			   is passed on to scsi_probe() when that routine
2718  *			   is called.
2719  *
2720  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2721  *		otherwise the value returned by scsi_probe(9F).
2722  *
2723  *     Context: Kernel thread context
2724  */
2725 
2726 static int
2727 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2728 {
2729 	struct sd_scsi_probe_cache	*cp;
2730 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2731 	int		lun, tgt;
2732 
2733 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2734 	    SCSI_ADDR_PROP_LUN, 0);
2735 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2736 	    SCSI_ADDR_PROP_TARGET, -1);
2737 
2738 	/* Make sure caching enabled and target in range */
2739 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2740 		/* do it the old way (no cache) */
2741 		return (scsi_probe(devp, waitfn));
2742 	}
2743 
2744 	mutex_enter(&sd_scsi_probe_cache_mutex);
2745 
2746 	/* Find the cache for this scsi bus instance */
2747 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2748 		if (cp->pdip == pdip) {
2749 			break;
2750 		}
2751 	}
2752 
2753 	/* If we can't find a cache for this pdip, create one */
2754 	if (cp == NULL) {
2755 		int i;
2756 
2757 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2758 		    KM_SLEEP);
2759 		cp->pdip = pdip;
2760 		cp->next = sd_scsi_probe_cache_head;
2761 		sd_scsi_probe_cache_head = cp;
2762 		for (i = 0; i < NTARGETS_WIDE; i++) {
2763 			cp->cache[i] = SCSIPROBE_EXISTS;
2764 		}
2765 	}
2766 
2767 	mutex_exit(&sd_scsi_probe_cache_mutex);
2768 
2769 	/* Recompute the cache for this target if LUN zero */
2770 	if (lun == 0) {
2771 		cp->cache[tgt] = SCSIPROBE_EXISTS;
2772 	}
2773 
2774 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2775 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2776 		return (SCSIPROBE_NORESP);
2777 	}
2778 
2779 	/* Do the actual probe; save & return the result */
2780 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2781 }
2782 
2783 
2784 /*
2785  *    Function: sd_scsi_target_lun_init
2786  *
2787  * Description: Initializes the attached lun chain mutex and head pointer.
2788  *
2789  *     Context: Kernel thread context
2790  */
2791 
2792 static void
2793 sd_scsi_target_lun_init(void)
2794 {
2795 	mutex_init(&sd_scsi_target_lun_mutex, NULL, MUTEX_DRIVER, NULL);
2796 	sd_scsi_target_lun_head = NULL;
2797 }
2798 
2799 
2800 /*
2801  *    Function: sd_scsi_target_lun_fini
2802  *
2803  * Description: Frees all resources associated with the attached lun
2804  *              chain
2805  *
2806  *     Context: Kernel thread context
2807  */
2808 
2809 static void
2810 sd_scsi_target_lun_fini(void)
2811 {
2812 	struct sd_scsi_hba_tgt_lun	*cp;
2813 	struct sd_scsi_hba_tgt_lun	*ncp;
2814 
2815 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = ncp) {
2816 		ncp = cp->next;
2817 		kmem_free(cp, sizeof (struct sd_scsi_hba_tgt_lun));
2818 	}
2819 	sd_scsi_target_lun_head = NULL;
2820 	mutex_destroy(&sd_scsi_target_lun_mutex);
2821 }
2822 
2823 
2824 /*
2825  *    Function: sd_scsi_get_target_lun_count
2826  *
2827  * Description: This routine will check in the attached lun chain to see
2828  * 		how many luns are attached on the required SCSI controller
2829  * 		and target. Currently, some capabilities like tagged queue
2830  *		are supported per target based by HBA. So all luns in a
2831  *		target have the same capabilities. Based on this assumption,
2832  * 		sd should only set these capabilities once per target. This
2833  *		function is called when sd needs to decide how many luns
2834  *		already attached on a target.
2835  *
2836  *   Arguments: dip	- Pointer to the system's dev_info_t for the SCSI
2837  *			  controller device.
2838  *              target	- The target ID on the controller's SCSI bus.
2839  *
2840  * Return Code: The number of luns attached on the required target and
2841  *		controller.
2842  *		-1 if target ID is not in parallel SCSI scope or the given
2843  * 		dip is not in the chain.
2844  *
2845  *     Context: Kernel thread context
2846  */
2847 
2848 static int
2849 sd_scsi_get_target_lun_count(dev_info_t *dip, int target)
2850 {
2851 	struct sd_scsi_hba_tgt_lun	*cp;
2852 
2853 	if ((target < 0) || (target >= NTARGETS_WIDE)) {
2854 		return (-1);
2855 	}
2856 
2857 	mutex_enter(&sd_scsi_target_lun_mutex);
2858 
2859 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2860 		if (cp->pdip == dip) {
2861 			break;
2862 		}
2863 	}
2864 
2865 	mutex_exit(&sd_scsi_target_lun_mutex);
2866 
2867 	if (cp == NULL) {
2868 		return (-1);
2869 	}
2870 
2871 	return (cp->nlun[target]);
2872 }
2873 
2874 
2875 /*
2876  *    Function: sd_scsi_update_lun_on_target
2877  *
2878  * Description: This routine is used to update the attached lun chain when a
2879  *		lun is attached or detached on a target.
2880  *
2881  *   Arguments: dip     - Pointer to the system's dev_info_t for the SCSI
2882  *                        controller device.
2883  *              target  - The target ID on the controller's SCSI bus.
2884  *		flag	- Indicate the lun is attached or detached.
2885  *
2886  *     Context: Kernel thread context
2887  */
2888 
2889 static void
2890 sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag)
2891 {
2892 	struct sd_scsi_hba_tgt_lun	*cp;
2893 
2894 	mutex_enter(&sd_scsi_target_lun_mutex);
2895 
2896 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2897 		if (cp->pdip == dip) {
2898 			break;
2899 		}
2900 	}
2901 
2902 	if ((cp == NULL) && (flag == SD_SCSI_LUN_ATTACH)) {
2903 		cp = kmem_zalloc(sizeof (struct sd_scsi_hba_tgt_lun),
2904 		    KM_SLEEP);
2905 		cp->pdip = dip;
2906 		cp->next = sd_scsi_target_lun_head;
2907 		sd_scsi_target_lun_head = cp;
2908 	}
2909 
2910 	mutex_exit(&sd_scsi_target_lun_mutex);
2911 
2912 	if (cp != NULL) {
2913 		if (flag == SD_SCSI_LUN_ATTACH) {
2914 			cp->nlun[target] ++;
2915 		} else {
2916 			cp->nlun[target] --;
2917 		}
2918 	}
2919 }
2920 
2921 
2922 /*
2923  *    Function: sd_spin_up_unit
2924  *
2925  * Description: Issues the following commands to spin-up the device:
2926  *		START STOP UNIT, and INQUIRY.
2927  *
2928  *   Arguments: un - driver soft state (unit) structure
2929  *
2930  * Return Code: 0 - success
2931  *		EIO - failure
2932  *		EACCES - reservation conflict
2933  *
2934  *     Context: Kernel thread context
2935  */
2936 
2937 static int
2938 sd_spin_up_unit(struct sd_lun *un)
2939 {
2940 	size_t	resid		= 0;
2941 	int	has_conflict	= FALSE;
2942 	uchar_t *bufaddr;
2943 
2944 	ASSERT(un != NULL);
2945 
2946 	/*
2947 	 * Send a throwaway START UNIT command.
2948 	 *
2949 	 * If we fail on this, we don't care presently what precisely
2950 	 * is wrong.  EMC's arrays will also fail this with a check
2951 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
2952 	 * we don't want to fail the attach because it may become
2953 	 * "active" later.
2954 	 */
2955 	if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START, SD_PATH_DIRECT)
2956 	    == EACCES)
2957 		has_conflict = TRUE;
2958 
2959 	/*
2960 	 * Send another INQUIRY command to the target. This is necessary for
2961 	 * non-removable media direct access devices because their INQUIRY data
2962 	 * may not be fully qualified until they are spun up (perhaps via the
2963 	 * START command above).  Note: This seems to be needed for some
2964 	 * legacy devices only.) The INQUIRY command should succeed even if a
2965 	 * Reservation Conflict is present.
2966 	 */
2967 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
2968 	if (sd_send_scsi_INQUIRY(un, bufaddr, SUN_INQSIZE, 0, 0, &resid) != 0) {
2969 		kmem_free(bufaddr, SUN_INQSIZE);
2970 		return (EIO);
2971 	}
2972 
2973 	/*
2974 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
2975 	 * Note that this routine does not return a failure here even if the
2976 	 * INQUIRY command did not return any data.  This is a legacy behavior.
2977 	 */
2978 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
2979 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
2980 	}
2981 
2982 	kmem_free(bufaddr, SUN_INQSIZE);
2983 
2984 	/* If we hit a reservation conflict above, tell the caller. */
2985 	if (has_conflict == TRUE) {
2986 		return (EACCES);
2987 	}
2988 
2989 	return (0);
2990 }
2991 
2992 #ifdef _LP64
2993 /*
2994  *    Function: sd_enable_descr_sense
2995  *
2996  * Description: This routine attempts to select descriptor sense format
2997  *		using the Control mode page.  Devices that support 64 bit
2998  *		LBAs (for >2TB luns) should also implement descriptor
2999  *		sense data so we will call this function whenever we see
3000  *		a lun larger than 2TB.  If for some reason the device
3001  *		supports 64 bit LBAs but doesn't support descriptor sense
3002  *		presumably the mode select will fail.  Everything will
3003  *		continue to work normally except that we will not get
3004  *		complete sense data for commands that fail with an LBA
3005  *		larger than 32 bits.
3006  *
3007  *   Arguments: un - driver soft state (unit) structure
3008  *
3009  *     Context: Kernel thread context only
3010  */
3011 
3012 static void
3013 sd_enable_descr_sense(struct sd_lun *un)
3014 {
3015 	uchar_t			*header;
3016 	struct mode_control_scsi3 *ctrl_bufp;
3017 	size_t			buflen;
3018 	size_t			bd_len;
3019 
3020 	/*
3021 	 * Read MODE SENSE page 0xA, Control Mode Page
3022 	 */
3023 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
3024 	    sizeof (struct mode_control_scsi3);
3025 	header = kmem_zalloc(buflen, KM_SLEEP);
3026 	if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
3027 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT) != 0) {
3028 		SD_ERROR(SD_LOG_COMMON, un,
3029 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
3030 		goto eds_exit;
3031 	}
3032 
3033 	/*
3034 	 * Determine size of Block Descriptors in order to locate
3035 	 * the mode page data. ATAPI devices return 0, SCSI devices
3036 	 * should return MODE_BLK_DESC_LENGTH.
3037 	 */
3038 	bd_len  = ((struct mode_header *)header)->bdesc_length;
3039 
3040 	/* Clear the mode data length field for MODE SELECT */
3041 	((struct mode_header *)header)->length = 0;
3042 
3043 	ctrl_bufp = (struct mode_control_scsi3 *)
3044 	    (header + MODE_HEADER_LENGTH + bd_len);
3045 
3046 	/*
3047 	 * If the page length is smaller than the expected value,
3048 	 * the target device doesn't support D_SENSE. Bail out here.
3049 	 */
3050 	if (ctrl_bufp->mode_page.length <
3051 	    sizeof (struct mode_control_scsi3) - 2) {
3052 		SD_ERROR(SD_LOG_COMMON, un,
3053 		    "sd_enable_descr_sense: enable D_SENSE failed\n");
3054 		goto eds_exit;
3055 	}
3056 
3057 	/*
3058 	 * Clear PS bit for MODE SELECT
3059 	 */
3060 	ctrl_bufp->mode_page.ps = 0;
3061 
3062 	/*
3063 	 * Set D_SENSE to enable descriptor sense format.
3064 	 */
3065 	ctrl_bufp->d_sense = 1;
3066 
3067 	/*
3068 	 * Use MODE SELECT to commit the change to the D_SENSE bit
3069 	 */
3070 	if (sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
3071 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT) != 0) {
3072 		SD_INFO(SD_LOG_COMMON, un,
3073 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
3074 		goto eds_exit;
3075 	}
3076 
3077 eds_exit:
3078 	kmem_free(header, buflen);
3079 }
3080 
3081 /*
3082  *    Function: sd_reenable_dsense_task
3083  *
3084  * Description: Re-enable descriptor sense after device or bus reset
3085  *
3086  *     Context: Executes in a taskq() thread context
3087  */
3088 static void
3089 sd_reenable_dsense_task(void *arg)
3090 {
3091 	struct	sd_lun	*un = arg;
3092 
3093 	ASSERT(un != NULL);
3094 	sd_enable_descr_sense(un);
3095 }
3096 #endif /* _LP64 */
3097 
3098 /*
3099  *    Function: sd_set_mmc_caps
3100  *
3101  * Description: This routine determines if the device is MMC compliant and if
3102  *		the device supports CDDA via a mode sense of the CDVD
3103  *		capabilities mode page. Also checks if the device is a
3104  *		dvdram writable device.
3105  *
3106  *   Arguments: un - driver soft state (unit) structure
3107  *
3108  *     Context: Kernel thread context only
3109  */
3110 
3111 static void
3112 sd_set_mmc_caps(struct sd_lun *un)
3113 {
3114 	struct mode_header_grp2		*sense_mhp;
3115 	uchar_t				*sense_page;
3116 	caddr_t				buf;
3117 	int				bd_len;
3118 	int				status;
3119 	struct uscsi_cmd		com;
3120 	int				rtn;
3121 	uchar_t				*out_data_rw, *out_data_hd;
3122 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3123 
3124 	ASSERT(un != NULL);
3125 
3126 	/*
3127 	 * The flags which will be set in this function are - mmc compliant,
3128 	 * dvdram writable device, cdda support. Initialize them to FALSE
3129 	 * and if a capability is detected - it will be set to TRUE.
3130 	 */
3131 	un->un_f_mmc_cap = FALSE;
3132 	un->un_f_dvdram_writable_device = FALSE;
3133 	un->un_f_cfg_cdda = FALSE;
3134 
3135 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3136 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3137 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3138 
3139 	if (status != 0) {
3140 		/* command failed; just return */
3141 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3142 		return;
3143 	}
3144 	/*
3145 	 * If the mode sense request for the CDROM CAPABILITIES
3146 	 * page (0x2A) succeeds the device is assumed to be MMC.
3147 	 */
3148 	un->un_f_mmc_cap = TRUE;
3149 
3150 	/* Get to the page data */
3151 	sense_mhp = (struct mode_header_grp2 *)buf;
3152 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3153 	    sense_mhp->bdesc_length_lo;
3154 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3155 		/*
3156 		 * We did not get back the expected block descriptor
3157 		 * length so we cannot determine if the device supports
3158 		 * CDDA. However, we still indicate the device is MMC
3159 		 * according to the successful response to the page
3160 		 * 0x2A mode sense request.
3161 		 */
3162 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3163 		    "sd_set_mmc_caps: Mode Sense returned "
3164 		    "invalid block descriptor length\n");
3165 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3166 		return;
3167 	}
3168 
3169 	/* See if read CDDA is supported */
3170 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3171 	    bd_len);
3172 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3173 
3174 	/* See if writing DVD RAM is supported. */
3175 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3176 	if (un->un_f_dvdram_writable_device == TRUE) {
3177 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3178 		return;
3179 	}
3180 
3181 	/*
3182 	 * If the device presents DVD or CD capabilities in the mode
3183 	 * page, we can return here since a RRD will not have
3184 	 * these capabilities.
3185 	 */
3186 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3187 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3188 		return;
3189 	}
3190 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3191 
3192 	/*
3193 	 * If un->un_f_dvdram_writable_device is still FALSE,
3194 	 * check for a Removable Rigid Disk (RRD).  A RRD
3195 	 * device is identified by the features RANDOM_WRITABLE and
3196 	 * HARDWARE_DEFECT_MANAGEMENT.
3197 	 */
3198 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3199 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3200 
3201 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3202 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3203 	    RANDOM_WRITABLE, SD_PATH_STANDARD);
3204 	if (rtn != 0) {
3205 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3206 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3207 		return;
3208 	}
3209 
3210 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3211 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3212 
3213 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3214 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3215 	    HARDWARE_DEFECT_MANAGEMENT, SD_PATH_STANDARD);
3216 	if (rtn == 0) {
3217 		/*
3218 		 * We have good information, check for random writable
3219 		 * and hardware defect features.
3220 		 */
3221 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3222 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3223 			un->un_f_dvdram_writable_device = TRUE;
3224 		}
3225 	}
3226 
3227 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3228 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3229 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3230 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3231 }
3232 
3233 /*
3234  *    Function: sd_check_for_writable_cd
3235  *
3236  * Description: This routine determines if the media in the device is
3237  *		writable or not. It uses the get configuration command (0x46)
3238  *		to determine if the media is writable
3239  *
3240  *   Arguments: un - driver soft state (unit) structure
3241  *              path_flag - SD_PATH_DIRECT to use the USCSI "direct"
3242  *                           chain and the normal command waitq, or
3243  *                           SD_PATH_DIRECT_PRIORITY to use the USCSI
3244  *                           "direct" chain and bypass the normal command
3245  *                           waitq.
3246  *
3247  *     Context: Never called at interrupt context.
3248  */
3249 
3250 static void
3251 sd_check_for_writable_cd(struct sd_lun *un, int path_flag)
3252 {
3253 	struct uscsi_cmd		com;
3254 	uchar_t				*out_data;
3255 	uchar_t				*rqbuf;
3256 	int				rtn;
3257 	uchar_t				*out_data_rw, *out_data_hd;
3258 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3259 	struct mode_header_grp2		*sense_mhp;
3260 	uchar_t				*sense_page;
3261 	caddr_t				buf;
3262 	int				bd_len;
3263 	int				status;
3264 
3265 	ASSERT(un != NULL);
3266 	ASSERT(mutex_owned(SD_MUTEX(un)));
3267 
3268 	/*
3269 	 * Initialize the writable media to false, if configuration info.
3270 	 * tells us otherwise then only we will set it.
3271 	 */
3272 	un->un_f_mmc_writable_media = FALSE;
3273 	mutex_exit(SD_MUTEX(un));
3274 
3275 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3276 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3277 
3278 	rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf, SENSE_LENGTH,
3279 	    out_data, SD_PROFILE_HEADER_LEN, path_flag);
3280 
3281 	mutex_enter(SD_MUTEX(un));
3282 	if (rtn == 0) {
3283 		/*
3284 		 * We have good information, check for writable DVD.
3285 		 */
3286 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3287 			un->un_f_mmc_writable_media = TRUE;
3288 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3289 			kmem_free(rqbuf, SENSE_LENGTH);
3290 			return;
3291 		}
3292 	}
3293 
3294 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3295 	kmem_free(rqbuf, SENSE_LENGTH);
3296 
3297 	/*
3298 	 * Determine if this is a RRD type device.
3299 	 */
3300 	mutex_exit(SD_MUTEX(un));
3301 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3302 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3303 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, path_flag);
3304 	mutex_enter(SD_MUTEX(un));
3305 	if (status != 0) {
3306 		/* command failed; just return */
3307 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3308 		return;
3309 	}
3310 
3311 	/* Get to the page data */
3312 	sense_mhp = (struct mode_header_grp2 *)buf;
3313 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3314 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3315 		/*
3316 		 * We did not get back the expected block descriptor length so
3317 		 * we cannot check the mode page.
3318 		 */
3319 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3320 		    "sd_check_for_writable_cd: Mode Sense returned "
3321 		    "invalid block descriptor length\n");
3322 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3323 		return;
3324 	}
3325 
3326 	/*
3327 	 * If the device presents DVD or CD capabilities in the mode
3328 	 * page, we can return here since a RRD device will not have
3329 	 * these capabilities.
3330 	 */
3331 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3332 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3333 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3334 		return;
3335 	}
3336 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3337 
3338 	/*
3339 	 * If un->un_f_mmc_writable_media is still FALSE,
3340 	 * check for RRD type media.  A RRD device is identified
3341 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3342 	 */
3343 	mutex_exit(SD_MUTEX(un));
3344 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3345 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3346 
3347 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3348 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3349 	    RANDOM_WRITABLE, path_flag);
3350 	if (rtn != 0) {
3351 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3352 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3353 		mutex_enter(SD_MUTEX(un));
3354 		return;
3355 	}
3356 
3357 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3358 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3359 
3360 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3361 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3362 	    HARDWARE_DEFECT_MANAGEMENT, path_flag);
3363 	mutex_enter(SD_MUTEX(un));
3364 	if (rtn == 0) {
3365 		/*
3366 		 * We have good information, check for random writable
3367 		 * and hardware defect features as current.
3368 		 */
3369 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3370 		    (out_data_rw[10] & 0x1) &&
3371 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3372 		    (out_data_hd[10] & 0x1)) {
3373 			un->un_f_mmc_writable_media = TRUE;
3374 		}
3375 	}
3376 
3377 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3378 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3379 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3380 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3381 }
3382 
3383 /*
3384  *    Function: sd_read_unit_properties
3385  *
3386  * Description: The following implements a property lookup mechanism.
3387  *		Properties for particular disks (keyed on vendor, model
3388  *		and rev numbers) are sought in the sd.conf file via
3389  *		sd_process_sdconf_file(), and if not found there, are
3390  *		looked for in a list hardcoded in this driver via
3391  *		sd_process_sdconf_table() Once located the properties
3392  *		are used to update the driver unit structure.
3393  *
3394  *   Arguments: un - driver soft state (unit) structure
3395  */
3396 
3397 static void
3398 sd_read_unit_properties(struct sd_lun *un)
3399 {
3400 	/*
3401 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3402 	 * the "sd-config-list" property (from the sd.conf file) or if
3403 	 * there was not a match for the inquiry vid/pid. If this event
3404 	 * occurs the static driver configuration table is searched for
3405 	 * a match.
3406 	 */
3407 	ASSERT(un != NULL);
3408 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3409 		sd_process_sdconf_table(un);
3410 	}
3411 
3412 	/* check for LSI device */
3413 	sd_is_lsi(un);
3414 
3415 
3416 }
3417 
3418 
3419 /*
3420  *    Function: sd_process_sdconf_file
3421  *
3422  * Description: Use ddi_getlongprop to obtain the properties from the
3423  *		driver's config file (ie, sd.conf) and update the driver
3424  *		soft state structure accordingly.
3425  *
3426  *   Arguments: un - driver soft state (unit) structure
3427  *
3428  * Return Code: SD_SUCCESS - The properties were successfully set according
3429  *			     to the driver configuration file.
3430  *		SD_FAILURE - The driver config list was not obtained or
3431  *			     there was no vid/pid match. This indicates that
3432  *			     the static config table should be used.
3433  *
3434  * The config file has a property, "sd-config-list", which consists of
3435  * one or more duplets as follows:
3436  *
3437  *  sd-config-list=
3438  *	<duplet>,
3439  *	[<duplet>,]
3440  *	[<duplet>];
3441  *
3442  * The structure of each duplet is as follows:
3443  *
3444  *  <duplet>:= <vid+pid>,<data-property-name_list>
3445  *
3446  * The first entry of the duplet is the device ID string (the concatenated
3447  * vid & pid; not to be confused with a device_id).  This is defined in
3448  * the same way as in the sd_disk_table.
3449  *
3450  * The second part of the duplet is a string that identifies a
3451  * data-property-name-list. The data-property-name-list is defined as
3452  * follows:
3453  *
3454  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3455  *
3456  * The syntax of <data-property-name> depends on the <version> field.
3457  *
3458  * If version = SD_CONF_VERSION_1 we have the following syntax:
3459  *
3460  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3461  *
3462  * where the prop0 value will be used to set prop0 if bit0 set in the
3463  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3464  *
3465  */
3466 
3467 static int
3468 sd_process_sdconf_file(struct sd_lun *un)
3469 {
3470 	char	*config_list = NULL;
3471 	int	config_list_len;
3472 	int	len;
3473 	int	dupletlen = 0;
3474 	char	*vidptr;
3475 	int	vidlen;
3476 	char	*dnlist_ptr;
3477 	char	*dataname_ptr;
3478 	int	dnlist_len;
3479 	int	dataname_len;
3480 	int	*data_list;
3481 	int	data_list_len;
3482 	int	rval = SD_FAILURE;
3483 	int	i;
3484 
3485 	ASSERT(un != NULL);
3486 
3487 	/* Obtain the configuration list associated with the .conf file */
3488 	if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), DDI_PROP_DONTPASS,
3489 	    sd_config_list, (caddr_t)&config_list, &config_list_len)
3490 	    != DDI_PROP_SUCCESS) {
3491 		return (SD_FAILURE);
3492 	}
3493 
3494 	/*
3495 	 * Compare vids in each duplet to the inquiry vid - if a match is
3496 	 * made, get the data value and update the soft state structure
3497 	 * accordingly.
3498 	 *
3499 	 * Note: This algorithm is complex and difficult to maintain. It should
3500 	 * be replaced with a more robust implementation.
3501 	 */
3502 	for (len = config_list_len, vidptr = config_list; len > 0;
3503 	    vidptr += dupletlen, len -= dupletlen) {
3504 		/*
3505 		 * Note: The assumption here is that each vid entry is on
3506 		 * a unique line from its associated duplet.
3507 		 */
3508 		vidlen = dupletlen = (int)strlen(vidptr);
3509 		if ((vidlen == 0) ||
3510 		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3511 			dupletlen++;
3512 			continue;
3513 		}
3514 
3515 		/*
3516 		 * dnlist contains 1 or more blank separated
3517 		 * data-property-name entries
3518 		 */
3519 		dnlist_ptr = vidptr + vidlen + 1;
3520 		dnlist_len = (int)strlen(dnlist_ptr);
3521 		dupletlen += dnlist_len + 2;
3522 
3523 		/*
3524 		 * Set a pointer for the first data-property-name
3525 		 * entry in the list
3526 		 */
3527 		dataname_ptr = dnlist_ptr;
3528 		dataname_len = 0;
3529 
3530 		/*
3531 		 * Loop through all data-property-name entries in the
3532 		 * data-property-name-list setting the properties for each.
3533 		 */
3534 		while (dataname_len < dnlist_len) {
3535 			int version;
3536 
3537 			/*
3538 			 * Determine the length of the current
3539 			 * data-property-name entry by indexing until a
3540 			 * blank or NULL is encountered. When the space is
3541 			 * encountered reset it to a NULL for compliance
3542 			 * with ddi_getlongprop().
3543 			 */
3544 			for (i = 0; ((dataname_ptr[i] != ' ') &&
3545 			    (dataname_ptr[i] != '\0')); i++) {
3546 				;
3547 			}
3548 
3549 			dataname_len += i;
3550 			/* If not null terminated, Make it so */
3551 			if (dataname_ptr[i] == ' ') {
3552 				dataname_ptr[i] = '\0';
3553 			}
3554 			dataname_len++;
3555 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3556 			    "sd_process_sdconf_file: disk:%s, data:%s\n",
3557 			    vidptr, dataname_ptr);
3558 
3559 			/* Get the data list */
3560 			if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), 0,
3561 			    dataname_ptr, (caddr_t)&data_list, &data_list_len)
3562 			    != DDI_PROP_SUCCESS) {
3563 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3564 				    "sd_process_sdconf_file: data property (%s)"
3565 				    " has no value\n", dataname_ptr);
3566 				dataname_ptr = dnlist_ptr + dataname_len;
3567 				continue;
3568 			}
3569 
3570 			version = data_list[0];
3571 
3572 			if (version == SD_CONF_VERSION_1) {
3573 				sd_tunables values;
3574 
3575 				/* Set the properties */
3576 				if (sd_chk_vers1_data(un, data_list[1],
3577 				    &data_list[2], data_list_len, dataname_ptr)
3578 				    == SD_SUCCESS) {
3579 					sd_get_tunables_from_conf(un,
3580 					    data_list[1], &data_list[2],
3581 					    &values);
3582 					sd_set_vers1_properties(un,
3583 					    data_list[1], &values);
3584 					rval = SD_SUCCESS;
3585 				} else {
3586 					rval = SD_FAILURE;
3587 				}
3588 			} else {
3589 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3590 				    "data property %s version 0x%x is invalid.",
3591 				    dataname_ptr, version);
3592 				rval = SD_FAILURE;
3593 			}
3594 			kmem_free(data_list, data_list_len);
3595 			dataname_ptr = dnlist_ptr + dataname_len;
3596 		}
3597 	}
3598 
3599 	/* free up the memory allocated by ddi_getlongprop */
3600 	if (config_list) {
3601 		kmem_free(config_list, config_list_len);
3602 	}
3603 
3604 	return (rval);
3605 }
3606 
3607 /*
3608  *    Function: sd_get_tunables_from_conf()
3609  *
3610  *
3611  *    This function reads the data list from the sd.conf file and pulls
3612  *    the values that can have numeric values as arguments and places
3613  *    the values in the apropriate sd_tunables member.
3614  *    Since the order of the data list members varies across platforms
3615  *    This function reads them from the data list in a platform specific
3616  *    order and places them into the correct sd_tunable member that is
3617  *    a consistant across all platforms.
3618  */
3619 static void
3620 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3621     sd_tunables *values)
3622 {
3623 	int i;
3624 	int mask;
3625 
3626 	bzero(values, sizeof (sd_tunables));
3627 
3628 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3629 
3630 		mask = 1 << i;
3631 		if (mask > flags) {
3632 			break;
3633 		}
3634 
3635 		switch (mask & flags) {
3636 		case 0:	/* This mask bit not set in flags */
3637 			continue;
3638 		case SD_CONF_BSET_THROTTLE:
3639 			values->sdt_throttle = data_list[i];
3640 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3641 			    "sd_get_tunables_from_conf: throttle = %d\n",
3642 			    values->sdt_throttle);
3643 			break;
3644 		case SD_CONF_BSET_CTYPE:
3645 			values->sdt_ctype = data_list[i];
3646 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3647 			    "sd_get_tunables_from_conf: ctype = %d\n",
3648 			    values->sdt_ctype);
3649 			break;
3650 		case SD_CONF_BSET_NRR_COUNT:
3651 			values->sdt_not_rdy_retries = data_list[i];
3652 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3653 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
3654 			    values->sdt_not_rdy_retries);
3655 			break;
3656 		case SD_CONF_BSET_BSY_RETRY_COUNT:
3657 			values->sdt_busy_retries = data_list[i];
3658 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3659 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
3660 			    values->sdt_busy_retries);
3661 			break;
3662 		case SD_CONF_BSET_RST_RETRIES:
3663 			values->sdt_reset_retries = data_list[i];
3664 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3665 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
3666 			    values->sdt_reset_retries);
3667 			break;
3668 		case SD_CONF_BSET_RSV_REL_TIME:
3669 			values->sdt_reserv_rel_time = data_list[i];
3670 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3671 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
3672 			    values->sdt_reserv_rel_time);
3673 			break;
3674 		case SD_CONF_BSET_MIN_THROTTLE:
3675 			values->sdt_min_throttle = data_list[i];
3676 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3677 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
3678 			    values->sdt_min_throttle);
3679 			break;
3680 		case SD_CONF_BSET_DISKSORT_DISABLED:
3681 			values->sdt_disk_sort_dis = data_list[i];
3682 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3683 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
3684 			    values->sdt_disk_sort_dis);
3685 			break;
3686 		case SD_CONF_BSET_LUN_RESET_ENABLED:
3687 			values->sdt_lun_reset_enable = data_list[i];
3688 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3689 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
3690 			    "\n", values->sdt_lun_reset_enable);
3691 			break;
3692 		}
3693 	}
3694 }
3695 
3696 /*
3697  *    Function: sd_process_sdconf_table
3698  *
3699  * Description: Search the static configuration table for a match on the
3700  *		inquiry vid/pid and update the driver soft state structure
3701  *		according to the table property values for the device.
3702  *
3703  *		The form of a configuration table entry is:
3704  *		  <vid+pid>,<flags>,<property-data>
3705  *		  "SEAGATE ST42400N",1,63,0,0			(Fibre)
3706  *		  "SEAGATE ST42400N",1,63,0,0,0,0		(Sparc)
3707  *		  "SEAGATE ST42400N",1,63,0,0,0,0,0,0,0,0,0,0	(Intel)
3708  *
3709  *   Arguments: un - driver soft state (unit) structure
3710  */
3711 
3712 static void
3713 sd_process_sdconf_table(struct sd_lun *un)
3714 {
3715 	char	*id = NULL;
3716 	int	table_index;
3717 	int	idlen;
3718 
3719 	ASSERT(un != NULL);
3720 	for (table_index = 0; table_index < sd_disk_table_size;
3721 	    table_index++) {
3722 		id = sd_disk_table[table_index].device_id;
3723 		idlen = strlen(id);
3724 		if (idlen == 0) {
3725 			continue;
3726 		}
3727 
3728 		/*
3729 		 * The static configuration table currently does not
3730 		 * implement version 10 properties. Additionally,
3731 		 * multiple data-property-name entries are not
3732 		 * implemented in the static configuration table.
3733 		 */
3734 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
3735 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3736 			    "sd_process_sdconf_table: disk %s\n", id);
3737 			sd_set_vers1_properties(un,
3738 			    sd_disk_table[table_index].flags,
3739 			    sd_disk_table[table_index].properties);
3740 			break;
3741 		}
3742 	}
3743 }
3744 
3745 
3746 /*
3747  *    Function: sd_sdconf_id_match
3748  *
3749  * Description: This local function implements a case sensitive vid/pid
3750  *		comparison as well as the boundary cases of wild card and
3751  *		multiple blanks.
3752  *
3753  *		Note: An implicit assumption made here is that the scsi
3754  *		inquiry structure will always keep the vid, pid and
3755  *		revision strings in consecutive sequence, so they can be
3756  *		read as a single string. If this assumption is not the
3757  *		case, a separate string, to be used for the check, needs
3758  *		to be built with these strings concatenated.
3759  *
3760  *   Arguments: un - driver soft state (unit) structure
3761  *		id - table or config file vid/pid
3762  *		idlen  - length of the vid/pid (bytes)
3763  *
3764  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3765  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3766  */
3767 
3768 static int
3769 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
3770 {
3771 	struct scsi_inquiry	*sd_inq;
3772 	int 			rval = SD_SUCCESS;
3773 
3774 	ASSERT(un != NULL);
3775 	sd_inq = un->un_sd->sd_inq;
3776 	ASSERT(id != NULL);
3777 
3778 	/*
3779 	 * We use the inq_vid as a pointer to a buffer containing the
3780 	 * vid and pid and use the entire vid/pid length of the table
3781 	 * entry for the comparison. This works because the inq_pid
3782 	 * data member follows inq_vid in the scsi_inquiry structure.
3783 	 */
3784 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
3785 		/*
3786 		 * The user id string is compared to the inquiry vid/pid
3787 		 * using a case insensitive comparison and ignoring
3788 		 * multiple spaces.
3789 		 */
3790 		rval = sd_blank_cmp(un, id, idlen);
3791 		if (rval != SD_SUCCESS) {
3792 			/*
3793 			 * User id strings that start and end with a "*"
3794 			 * are a special case. These do not have a
3795 			 * specific vendor, and the product string can
3796 			 * appear anywhere in the 16 byte PID portion of
3797 			 * the inquiry data. This is a simple strstr()
3798 			 * type search for the user id in the inquiry data.
3799 			 */
3800 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
3801 				char	*pidptr = &id[1];
3802 				int	i;
3803 				int	j;
3804 				int	pidstrlen = idlen - 2;
3805 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
3806 				    pidstrlen;
3807 
3808 				if (j < 0) {
3809 					return (SD_FAILURE);
3810 				}
3811 				for (i = 0; i < j; i++) {
3812 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
3813 					    pidptr, pidstrlen) == 0) {
3814 						rval = SD_SUCCESS;
3815 						break;
3816 					}
3817 				}
3818 			}
3819 		}
3820 	}
3821 	return (rval);
3822 }
3823 
3824 
3825 /*
3826  *    Function: sd_blank_cmp
3827  *
3828  * Description: If the id string starts and ends with a space, treat
3829  *		multiple consecutive spaces as equivalent to a single
3830  *		space. For example, this causes a sd_disk_table entry
3831  *		of " NEC CDROM " to match a device's id string of
3832  *		"NEC       CDROM".
3833  *
3834  *		Note: The success exit condition for this routine is if
3835  *		the pointer to the table entry is '\0' and the cnt of
3836  *		the inquiry length is zero. This will happen if the inquiry
3837  *		string returned by the device is padded with spaces to be
3838  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
3839  *		SCSI spec states that the inquiry string is to be padded with
3840  *		spaces.
3841  *
3842  *   Arguments: un - driver soft state (unit) structure
3843  *		id - table or config file vid/pid
3844  *		idlen  - length of the vid/pid (bytes)
3845  *
3846  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3847  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3848  */
3849 
3850 static int
3851 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
3852 {
3853 	char		*p1;
3854 	char		*p2;
3855 	int		cnt;
3856 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
3857 	    sizeof (SD_INQUIRY(un)->inq_pid);
3858 
3859 	ASSERT(un != NULL);
3860 	p2 = un->un_sd->sd_inq->inq_vid;
3861 	ASSERT(id != NULL);
3862 	p1 = id;
3863 
3864 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
3865 		/*
3866 		 * Note: string p1 is terminated by a NUL but string p2
3867 		 * isn't.  The end of p2 is determined by cnt.
3868 		 */
3869 		for (;;) {
3870 			/* skip over any extra blanks in both strings */
3871 			while ((*p1 != '\0') && (*p1 == ' ')) {
3872 				p1++;
3873 			}
3874 			while ((cnt != 0) && (*p2 == ' ')) {
3875 				p2++;
3876 				cnt--;
3877 			}
3878 
3879 			/* compare the two strings */
3880 			if ((cnt == 0) ||
3881 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
3882 				break;
3883 			}
3884 			while ((cnt > 0) &&
3885 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
3886 				p1++;
3887 				p2++;
3888 				cnt--;
3889 			}
3890 		}
3891 	}
3892 
3893 	/* return SD_SUCCESS if both strings match */
3894 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
3895 }
3896 
3897 
3898 /*
3899  *    Function: sd_chk_vers1_data
3900  *
3901  * Description: Verify the version 1 device properties provided by the
3902  *		user via the configuration file
3903  *
3904  *   Arguments: un	     - driver soft state (unit) structure
3905  *		flags	     - integer mask indicating properties to be set
3906  *		prop_list    - integer list of property values
3907  *		list_len     - length of user provided data
3908  *
3909  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
3910  *		SD_FAILURE - Indicates the user provided data is invalid
3911  */
3912 
3913 static int
3914 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
3915     int list_len, char *dataname_ptr)
3916 {
3917 	int i;
3918 	int mask = 1;
3919 	int index = 0;
3920 
3921 	ASSERT(un != NULL);
3922 
3923 	/* Check for a NULL property name and list */
3924 	if (dataname_ptr == NULL) {
3925 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3926 		    "sd_chk_vers1_data: NULL data property name.");
3927 		return (SD_FAILURE);
3928 	}
3929 	if (prop_list == NULL) {
3930 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3931 		    "sd_chk_vers1_data: %s NULL data property list.",
3932 		    dataname_ptr);
3933 		return (SD_FAILURE);
3934 	}
3935 
3936 	/* Display a warning if undefined bits are set in the flags */
3937 	if (flags & ~SD_CONF_BIT_MASK) {
3938 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3939 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
3940 		    "Properties not set.",
3941 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
3942 		return (SD_FAILURE);
3943 	}
3944 
3945 	/*
3946 	 * Verify the length of the list by identifying the highest bit set
3947 	 * in the flags and validating that the property list has a length
3948 	 * up to the index of this bit.
3949 	 */
3950 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3951 		if (flags & mask) {
3952 			index++;
3953 		}
3954 		mask = 1 << i;
3955 	}
3956 	if ((list_len / sizeof (int)) < (index + 2)) {
3957 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3958 		    "sd_chk_vers1_data: "
3959 		    "Data property list %s size is incorrect. "
3960 		    "Properties not set.", dataname_ptr);
3961 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
3962 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
3963 		return (SD_FAILURE);
3964 	}
3965 	return (SD_SUCCESS);
3966 }
3967 
3968 
3969 /*
3970  *    Function: sd_set_vers1_properties
3971  *
3972  * Description: Set version 1 device properties based on a property list
3973  *		retrieved from the driver configuration file or static
3974  *		configuration table. Version 1 properties have the format:
3975  *
3976  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3977  *
3978  *		where the prop0 value will be used to set prop0 if bit0
3979  *		is set in the flags
3980  *
3981  *   Arguments: un	     - driver soft state (unit) structure
3982  *		flags	     - integer mask indicating properties to be set
3983  *		prop_list    - integer list of property values
3984  */
3985 
3986 static void
3987 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
3988 {
3989 	ASSERT(un != NULL);
3990 
3991 	/*
3992 	 * Set the flag to indicate cache is to be disabled. An attempt
3993 	 * to disable the cache via sd_cache_control() will be made
3994 	 * later during attach once the basic initialization is complete.
3995 	 */
3996 	if (flags & SD_CONF_BSET_NOCACHE) {
3997 		un->un_f_opt_disable_cache = TRUE;
3998 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3999 		    "sd_set_vers1_properties: caching disabled flag set\n");
4000 	}
4001 
4002 	/* CD-specific configuration parameters */
4003 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
4004 		un->un_f_cfg_playmsf_bcd = TRUE;
4005 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4006 		    "sd_set_vers1_properties: playmsf_bcd set\n");
4007 	}
4008 	if (flags & SD_CONF_BSET_READSUB_BCD) {
4009 		un->un_f_cfg_readsub_bcd = TRUE;
4010 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4011 		    "sd_set_vers1_properties: readsub_bcd set\n");
4012 	}
4013 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
4014 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
4015 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4016 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
4017 	}
4018 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
4019 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
4020 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4021 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
4022 	}
4023 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
4024 		un->un_f_cfg_no_read_header = TRUE;
4025 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4026 			    "sd_set_vers1_properties: no_read_header set\n");
4027 	}
4028 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
4029 		un->un_f_cfg_read_cd_xd4 = TRUE;
4030 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4031 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
4032 	}
4033 
4034 	/* Support for devices which do not have valid/unique serial numbers */
4035 	if (flags & SD_CONF_BSET_FAB_DEVID) {
4036 		un->un_f_opt_fab_devid = TRUE;
4037 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4038 		    "sd_set_vers1_properties: fab_devid bit set\n");
4039 	}
4040 
4041 	/* Support for user throttle configuration */
4042 	if (flags & SD_CONF_BSET_THROTTLE) {
4043 		ASSERT(prop_list != NULL);
4044 		un->un_saved_throttle = un->un_throttle =
4045 		    prop_list->sdt_throttle;
4046 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4047 		    "sd_set_vers1_properties: throttle set to %d\n",
4048 		    prop_list->sdt_throttle);
4049 	}
4050 
4051 	/* Set the per disk retry count according to the conf file or table. */
4052 	if (flags & SD_CONF_BSET_NRR_COUNT) {
4053 		ASSERT(prop_list != NULL);
4054 		if (prop_list->sdt_not_rdy_retries) {
4055 			un->un_notready_retry_count =
4056 				prop_list->sdt_not_rdy_retries;
4057 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4058 			    "sd_set_vers1_properties: not ready retry count"
4059 			    " set to %d\n", un->un_notready_retry_count);
4060 		}
4061 	}
4062 
4063 	/* The controller type is reported for generic disk driver ioctls */
4064 	if (flags & SD_CONF_BSET_CTYPE) {
4065 		ASSERT(prop_list != NULL);
4066 		switch (prop_list->sdt_ctype) {
4067 		case CTYPE_CDROM:
4068 			un->un_ctype = prop_list->sdt_ctype;
4069 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4070 			    "sd_set_vers1_properties: ctype set to "
4071 			    "CTYPE_CDROM\n");
4072 			break;
4073 		case CTYPE_CCS:
4074 			un->un_ctype = prop_list->sdt_ctype;
4075 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4076 				"sd_set_vers1_properties: ctype set to "
4077 				"CTYPE_CCS\n");
4078 			break;
4079 		case CTYPE_ROD:		/* RW optical */
4080 			un->un_ctype = prop_list->sdt_ctype;
4081 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4082 			    "sd_set_vers1_properties: ctype set to "
4083 			    "CTYPE_ROD\n");
4084 			break;
4085 		default:
4086 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4087 			    "sd_set_vers1_properties: Could not set "
4088 			    "invalid ctype value (%d)",
4089 			    prop_list->sdt_ctype);
4090 		}
4091 	}
4092 
4093 	/* Purple failover timeout */
4094 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4095 		ASSERT(prop_list != NULL);
4096 		un->un_busy_retry_count =
4097 			prop_list->sdt_busy_retries;
4098 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4099 		    "sd_set_vers1_properties: "
4100 		    "busy retry count set to %d\n",
4101 		    un->un_busy_retry_count);
4102 	}
4103 
4104 	/* Purple reset retry count */
4105 	if (flags & SD_CONF_BSET_RST_RETRIES) {
4106 		ASSERT(prop_list != NULL);
4107 		un->un_reset_retry_count =
4108 			prop_list->sdt_reset_retries;
4109 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4110 		    "sd_set_vers1_properties: "
4111 		    "reset retry count set to %d\n",
4112 		    un->un_reset_retry_count);
4113 	}
4114 
4115 	/* Purple reservation release timeout */
4116 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4117 		ASSERT(prop_list != NULL);
4118 		un->un_reserve_release_time =
4119 			prop_list->sdt_reserv_rel_time;
4120 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4121 		    "sd_set_vers1_properties: "
4122 		    "reservation release timeout set to %d\n",
4123 		    un->un_reserve_release_time);
4124 	}
4125 
4126 	/*
4127 	 * Driver flag telling the driver to verify that no commands are pending
4128 	 * for a device before issuing a Test Unit Ready. This is a workaround
4129 	 * for a firmware bug in some Seagate eliteI drives.
4130 	 */
4131 	if (flags & SD_CONF_BSET_TUR_CHECK) {
4132 		un->un_f_cfg_tur_check = TRUE;
4133 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4134 		    "sd_set_vers1_properties: tur queue check set\n");
4135 	}
4136 
4137 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4138 		un->un_min_throttle = prop_list->sdt_min_throttle;
4139 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4140 		    "sd_set_vers1_properties: min throttle set to %d\n",
4141 		    un->un_min_throttle);
4142 	}
4143 
4144 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4145 		un->un_f_disksort_disabled =
4146 		    (prop_list->sdt_disk_sort_dis != 0) ?
4147 		    TRUE : FALSE;
4148 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4149 		    "sd_set_vers1_properties: disksort disabled "
4150 		    "flag set to %d\n",
4151 		    prop_list->sdt_disk_sort_dis);
4152 	}
4153 
4154 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4155 		un->un_f_lun_reset_enabled =
4156 		    (prop_list->sdt_lun_reset_enable != 0) ?
4157 		    TRUE : FALSE;
4158 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4159 		    "sd_set_vers1_properties: lun reset enabled "
4160 		    "flag set to %d\n",
4161 		    prop_list->sdt_lun_reset_enable);
4162 	}
4163 
4164 	/*
4165 	 * Validate the throttle values.
4166 	 * If any of the numbers are invalid, set everything to defaults.
4167 	 */
4168 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4169 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4170 	    (un->un_min_throttle > un->un_throttle)) {
4171 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4172 		un->un_min_throttle = sd_min_throttle;
4173 	}
4174 }
4175 
4176 /*
4177  *   Function: sd_is_lsi()
4178  *
4179  *   Description: Check for lsi devices, step throught the static device
4180  *	table to match vid/pid.
4181  *
4182  *   Args: un - ptr to sd_lun
4183  *
4184  *   Notes:  When creating new LSI property, need to add the new LSI property
4185  *		to this function.
4186  */
4187 static void
4188 sd_is_lsi(struct sd_lun *un)
4189 {
4190 	char	*id = NULL;
4191 	int	table_index;
4192 	int	idlen;
4193 	void	*prop;
4194 
4195 	ASSERT(un != NULL);
4196 	for (table_index = 0; table_index < sd_disk_table_size;
4197 	    table_index++) {
4198 		id = sd_disk_table[table_index].device_id;
4199 		idlen = strlen(id);
4200 		if (idlen == 0) {
4201 			continue;
4202 		}
4203 
4204 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4205 			prop = sd_disk_table[table_index].properties;
4206 			if (prop == &lsi_properties ||
4207 			    prop == &lsi_oem_properties ||
4208 			    prop == &lsi_properties_scsi ||
4209 			    prop == &symbios_properties) {
4210 				un->un_f_cfg_is_lsi = TRUE;
4211 			}
4212 			break;
4213 		}
4214 	}
4215 }
4216 
4217 /*
4218  *    Function: sd_get_physical_geometry
4219  *
4220  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4221  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4222  *		target, and use this information to initialize the physical
4223  *		geometry cache specified by pgeom_p.
4224  *
4225  *		MODE SENSE is an optional command, so failure in this case
4226  *		does not necessarily denote an error. We want to use the
4227  *		MODE SENSE commands to derive the physical geometry of the
4228  *		device, but if either command fails, the logical geometry is
4229  *		used as the fallback for disk label geometry in cmlb.
4230  *
4231  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4232  *		have already been initialized for the current target and
4233  *		that the current values be passed as args so that we don't
4234  *		end up ever trying to use -1 as a valid value. This could
4235  *		happen if either value is reset while we're not holding
4236  *		the mutex.
4237  *
4238  *   Arguments: un - driver soft state (unit) structure
4239  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4240  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4241  *			to use the USCSI "direct" chain and bypass the normal
4242  *			command waitq.
4243  *
4244  *     Context: Kernel thread only (can sleep).
4245  */
4246 
4247 static int
4248 sd_get_physical_geometry(struct sd_lun *un, cmlb_geom_t *pgeom_p,
4249 	diskaddr_t capacity, int lbasize, int path_flag)
4250 {
4251 	struct	mode_format	*page3p;
4252 	struct	mode_geometry	*page4p;
4253 	struct	mode_header	*headerp;
4254 	int	sector_size;
4255 	int	nsect;
4256 	int	nhead;
4257 	int	ncyl;
4258 	int	intrlv;
4259 	int	spc;
4260 	diskaddr_t	modesense_capacity;
4261 	int	rpm;
4262 	int	bd_len;
4263 	int	mode_header_length;
4264 	uchar_t	*p3bufp;
4265 	uchar_t	*p4bufp;
4266 	int	cdbsize;
4267 	int 	ret = EIO;
4268 
4269 	ASSERT(un != NULL);
4270 
4271 	if (lbasize == 0) {
4272 		if (ISCD(un)) {
4273 			lbasize = 2048;
4274 		} else {
4275 			lbasize = un->un_sys_blocksize;
4276 		}
4277 	}
4278 	pgeom_p->g_secsize = (unsigned short)lbasize;
4279 
4280 	/*
4281 	 * If the unit is a cd/dvd drive MODE SENSE page three
4282 	 * and MODE SENSE page four are reserved (see SBC spec
4283 	 * and MMC spec). To prevent soft errors just return
4284 	 * using the default LBA size.
4285 	 */
4286 	if (ISCD(un))
4287 		return (ret);
4288 
4289 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4290 
4291 	/*
4292 	 * Retrieve MODE SENSE page 3 - Format Device Page
4293 	 */
4294 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4295 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p3bufp,
4296 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag)
4297 	    != 0) {
4298 		SD_ERROR(SD_LOG_COMMON, un,
4299 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4300 		goto page3_exit;
4301 	}
4302 
4303 	/*
4304 	 * Determine size of Block Descriptors in order to locate the mode
4305 	 * page data.  ATAPI devices return 0, SCSI devices should return
4306 	 * MODE_BLK_DESC_LENGTH.
4307 	 */
4308 	headerp = (struct mode_header *)p3bufp;
4309 	if (un->un_f_cfg_is_atapi == TRUE) {
4310 		struct mode_header_grp2 *mhp =
4311 		    (struct mode_header_grp2 *)headerp;
4312 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4313 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4314 	} else {
4315 		mode_header_length = MODE_HEADER_LENGTH;
4316 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4317 	}
4318 
4319 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4320 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4321 		    "received unexpected bd_len of %d, page3\n", bd_len);
4322 		goto page3_exit;
4323 	}
4324 
4325 	page3p = (struct mode_format *)
4326 	    ((caddr_t)headerp + mode_header_length + bd_len);
4327 
4328 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4329 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4330 		    "mode sense pg3 code mismatch %d\n",
4331 		    page3p->mode_page.code);
4332 		goto page3_exit;
4333 	}
4334 
4335 	/*
4336 	 * Use this physical geometry data only if BOTH MODE SENSE commands
4337 	 * complete successfully; otherwise, revert to the logical geometry.
4338 	 * So, we need to save everything in temporary variables.
4339 	 */
4340 	sector_size = BE_16(page3p->data_bytes_sect);
4341 
4342 	/*
4343 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
4344 	 */
4345 	if (sector_size == 0) {
4346 		sector_size = un->un_sys_blocksize;
4347 	} else {
4348 		sector_size &= ~(un->un_sys_blocksize - 1);
4349 	}
4350 
4351 	nsect  = BE_16(page3p->sect_track);
4352 	intrlv = BE_16(page3p->interleave);
4353 
4354 	SD_INFO(SD_LOG_COMMON, un,
4355 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
4356 	SD_INFO(SD_LOG_COMMON, un,
4357 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
4358 	    page3p->mode_page.code, nsect, sector_size);
4359 	SD_INFO(SD_LOG_COMMON, un,
4360 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
4361 	    BE_16(page3p->track_skew),
4362 	    BE_16(page3p->cylinder_skew));
4363 
4364 
4365 	/*
4366 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
4367 	 */
4368 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
4369 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p4bufp,
4370 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag)
4371 	    != 0) {
4372 		SD_ERROR(SD_LOG_COMMON, un,
4373 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
4374 		goto page4_exit;
4375 	}
4376 
4377 	/*
4378 	 * Determine size of Block Descriptors in order to locate the mode
4379 	 * page data.  ATAPI devices return 0, SCSI devices should return
4380 	 * MODE_BLK_DESC_LENGTH.
4381 	 */
4382 	headerp = (struct mode_header *)p4bufp;
4383 	if (un->un_f_cfg_is_atapi == TRUE) {
4384 		struct mode_header_grp2 *mhp =
4385 		    (struct mode_header_grp2 *)headerp;
4386 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4387 	} else {
4388 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4389 	}
4390 
4391 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4392 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4393 		    "received unexpected bd_len of %d, page4\n", bd_len);
4394 		goto page4_exit;
4395 	}
4396 
4397 	page4p = (struct mode_geometry *)
4398 	    ((caddr_t)headerp + mode_header_length + bd_len);
4399 
4400 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
4401 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4402 		    "mode sense pg4 code mismatch %d\n",
4403 		    page4p->mode_page.code);
4404 		goto page4_exit;
4405 	}
4406 
4407 	/*
4408 	 * Stash the data now, after we know that both commands completed.
4409 	 */
4410 
4411 
4412 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
4413 	spc   = nhead * nsect;
4414 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
4415 	rpm   = BE_16(page4p->rpm);
4416 
4417 	modesense_capacity = spc * ncyl;
4418 
4419 	SD_INFO(SD_LOG_COMMON, un,
4420 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
4421 	SD_INFO(SD_LOG_COMMON, un,
4422 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
4423 	SD_INFO(SD_LOG_COMMON, un,
4424 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
4425 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
4426 	    (void *)pgeom_p, capacity);
4427 
4428 	/*
4429 	 * Compensate if the drive's geometry is not rectangular, i.e.,
4430 	 * the product of C * H * S returned by MODE SENSE >= that returned
4431 	 * by read capacity. This is an idiosyncrasy of the original x86
4432 	 * disk subsystem.
4433 	 */
4434 	if (modesense_capacity >= capacity) {
4435 		SD_INFO(SD_LOG_COMMON, un,
4436 		    "sd_get_physical_geometry: adjusting acyl; "
4437 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
4438 		    (modesense_capacity - capacity + spc - 1) / spc);
4439 		if (sector_size != 0) {
4440 			/* 1243403: NEC D38x7 drives don't support sec size */
4441 			pgeom_p->g_secsize = (unsigned short)sector_size;
4442 		}
4443 		pgeom_p->g_nsect    = (unsigned short)nsect;
4444 		pgeom_p->g_nhead    = (unsigned short)nhead;
4445 		pgeom_p->g_capacity = capacity;
4446 		pgeom_p->g_acyl	    =
4447 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
4448 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
4449 	}
4450 
4451 	pgeom_p->g_rpm    = (unsigned short)rpm;
4452 	pgeom_p->g_intrlv = (unsigned short)intrlv;
4453 	ret = 0;
4454 
4455 	SD_INFO(SD_LOG_COMMON, un,
4456 	    "sd_get_physical_geometry: mode sense geometry:\n");
4457 	SD_INFO(SD_LOG_COMMON, un,
4458 	    "   nsect: %d; sector size: %d; interlv: %d\n",
4459 	    nsect, sector_size, intrlv);
4460 	SD_INFO(SD_LOG_COMMON, un,
4461 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
4462 	    nhead, ncyl, rpm, modesense_capacity);
4463 	SD_INFO(SD_LOG_COMMON, un,
4464 	    "sd_get_physical_geometry: (cached)\n");
4465 	SD_INFO(SD_LOG_COMMON, un,
4466 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4467 	    pgeom_p->g_ncyl,  pgeom_p->g_acyl,
4468 	    pgeom_p->g_nhead, pgeom_p->g_nsect);
4469 	SD_INFO(SD_LOG_COMMON, un,
4470 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
4471 	    pgeom_p->g_secsize, pgeom_p->g_capacity,
4472 	    pgeom_p->g_intrlv, pgeom_p->g_rpm);
4473 
4474 page4_exit:
4475 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
4476 page3_exit:
4477 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
4478 
4479 	return (ret);
4480 }
4481 
4482 /*
4483  *    Function: sd_get_virtual_geometry
4484  *
4485  * Description: Ask the controller to tell us about the target device.
4486  *
4487  *   Arguments: un - pointer to softstate
4488  *		capacity - disk capacity in #blocks
4489  *		lbasize - disk block size in bytes
4490  *
4491  *     Context: Kernel thread only
4492  */
4493 
4494 static int
4495 sd_get_virtual_geometry(struct sd_lun *un, cmlb_geom_t *lgeom_p,
4496     diskaddr_t capacity, int lbasize)
4497 {
4498 	uint_t	geombuf;
4499 	int	spc;
4500 
4501 	ASSERT(un != NULL);
4502 
4503 	/* Set sector size, and total number of sectors */
4504 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
4505 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
4506 
4507 	/* Let the HBA tell us its geometry */
4508 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
4509 
4510 	/* A value of -1 indicates an undefined "geometry" property */
4511 	if (geombuf == (-1)) {
4512 		return (EINVAL);
4513 	}
4514 
4515 	/* Initialize the logical geometry cache. */
4516 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
4517 	lgeom_p->g_nsect   = geombuf & 0xffff;
4518 	lgeom_p->g_secsize = un->un_sys_blocksize;
4519 
4520 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
4521 
4522 	/*
4523 	 * Note: The driver originally converted the capacity value from
4524 	 * target blocks to system blocks. However, the capacity value passed
4525 	 * to this routine is already in terms of system blocks (this scaling
4526 	 * is done when the READ CAPACITY command is issued and processed).
4527 	 * This 'error' may have gone undetected because the usage of g_ncyl
4528 	 * (which is based upon g_capacity) is very limited within the driver
4529 	 */
4530 	lgeom_p->g_capacity = capacity;
4531 
4532 	/*
4533 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
4534 	 * hba may return zero values if the device has been removed.
4535 	 */
4536 	if (spc == 0) {
4537 		lgeom_p->g_ncyl = 0;
4538 	} else {
4539 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
4540 	}
4541 	lgeom_p->g_acyl = 0;
4542 
4543 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
4544 	return (0);
4545 
4546 }
4547 /*
4548  *    Function: sd_update_block_info
4549  *
4550  * Description: Calculate a byte count to sector count bitshift value
4551  *		from sector size.
4552  *
4553  *   Arguments: un: unit struct.
4554  *		lbasize: new target sector size
4555  *		capacity: new target capacity, ie. block count
4556  *
4557  *     Context: Kernel thread context
4558  */
4559 
4560 static void
4561 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
4562 {
4563 	if (lbasize != 0) {
4564 		un->un_tgt_blocksize = lbasize;
4565 		un->un_f_tgt_blocksize_is_valid	= TRUE;
4566 	}
4567 
4568 	if (capacity != 0) {
4569 		un->un_blockcount		= capacity;
4570 		un->un_f_blockcount_is_valid	= TRUE;
4571 	}
4572 }
4573 
4574 
4575 /*
4576  *    Function: sd_register_devid
4577  *
4578  * Description: This routine will obtain the device id information from the
4579  *		target, obtain the serial number, and register the device
4580  *		id with the ddi framework.
4581  *
4582  *   Arguments: devi - the system's dev_info_t for the device.
4583  *		un - driver soft state (unit) structure
4584  *		reservation_flag - indicates if a reservation conflict
4585  *		occurred during attach
4586  *
4587  *     Context: Kernel Thread
4588  */
4589 static void
4590 sd_register_devid(struct sd_lun *un, dev_info_t *devi, int reservation_flag)
4591 {
4592 	int		rval		= 0;
4593 	uchar_t		*inq80		= NULL;
4594 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
4595 	size_t		inq80_resid	= 0;
4596 	uchar_t		*inq83		= NULL;
4597 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
4598 	size_t		inq83_resid	= 0;
4599 
4600 	ASSERT(un != NULL);
4601 	ASSERT(mutex_owned(SD_MUTEX(un)));
4602 	ASSERT((SD_DEVINFO(un)) == devi);
4603 
4604 	/*
4605 	 * This is the case of antiquated Sun disk drives that have the
4606 	 * FAB_DEVID property set in the disk_table.  These drives
4607 	 * manage the devid's by storing them in last 2 available sectors
4608 	 * on the drive and have them fabricated by the ddi layer by calling
4609 	 * ddi_devid_init and passing the DEVID_FAB flag.
4610 	 */
4611 	if (un->un_f_opt_fab_devid == TRUE) {
4612 		/*
4613 		 * Depending on EINVAL isn't reliable, since a reserved disk
4614 		 * may result in invalid geometry, so check to make sure a
4615 		 * reservation conflict did not occur during attach.
4616 		 */
4617 		if ((sd_get_devid(un) == EINVAL) &&
4618 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
4619 			/*
4620 			 * The devid is invalid AND there is no reservation
4621 			 * conflict.  Fabricate a new devid.
4622 			 */
4623 			(void) sd_create_devid(un);
4624 		}
4625 
4626 		/* Register the devid if it exists */
4627 		if (un->un_devid != NULL) {
4628 			(void) ddi_devid_register(SD_DEVINFO(un),
4629 			    un->un_devid);
4630 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4631 			    "sd_register_devid: Devid Fabricated\n");
4632 		}
4633 		return;
4634 	}
4635 
4636 	/*
4637 	 * We check the availibility of the World Wide Name (0x83) and Unit
4638 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
4639 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
4640 	 * 0x83 is availible, that is the best choice.  Our next choice is
4641 	 * 0x80.  If neither are availible, we munge the devid from the device
4642 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
4643 	 * to fabricate a devid for non-Sun qualified disks.
4644 	 */
4645 	if (sd_check_vpd_page_support(un) == 0) {
4646 		/* collect page 80 data if available */
4647 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
4648 
4649 			mutex_exit(SD_MUTEX(un));
4650 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
4651 			rval = sd_send_scsi_INQUIRY(un, inq80, inq80_len,
4652 			    0x01, 0x80, &inq80_resid);
4653 
4654 			if (rval != 0) {
4655 				kmem_free(inq80, inq80_len);
4656 				inq80 = NULL;
4657 				inq80_len = 0;
4658 			}
4659 			mutex_enter(SD_MUTEX(un));
4660 		}
4661 
4662 		/* collect page 83 data if available */
4663 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
4664 			mutex_exit(SD_MUTEX(un));
4665 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
4666 			rval = sd_send_scsi_INQUIRY(un, inq83, inq83_len,
4667 			    0x01, 0x83, &inq83_resid);
4668 
4669 			if (rval != 0) {
4670 				kmem_free(inq83, inq83_len);
4671 				inq83 = NULL;
4672 				inq83_len = 0;
4673 			}
4674 			mutex_enter(SD_MUTEX(un));
4675 		}
4676 	}
4677 
4678 	/* encode best devid possible based on data available */
4679 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
4680 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
4681 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
4682 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
4683 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
4684 
4685 		/* devid successfully encoded, register devid */
4686 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
4687 
4688 	} else {
4689 		/*
4690 		 * Unable to encode a devid based on data available.
4691 		 * This is not a Sun qualified disk.  Older Sun disk
4692 		 * drives that have the SD_FAB_DEVID property
4693 		 * set in the disk_table and non Sun qualified
4694 		 * disks are treated in the same manner.  These
4695 		 * drives manage the devid's by storing them in
4696 		 * last 2 available sectors on the drive and
4697 		 * have them fabricated by the ddi layer by
4698 		 * calling ddi_devid_init and passing the
4699 		 * DEVID_FAB flag.
4700 		 * Create a fabricate devid only if there's no
4701 		 * fabricate devid existed.
4702 		 */
4703 		if (sd_get_devid(un) == EINVAL) {
4704 			(void) sd_create_devid(un);
4705 		}
4706 		un->un_f_opt_fab_devid = TRUE;
4707 
4708 		/* Register the devid if it exists */
4709 		if (un->un_devid != NULL) {
4710 			(void) ddi_devid_register(SD_DEVINFO(un),
4711 			    un->un_devid);
4712 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4713 			    "sd_register_devid: devid fabricated using "
4714 			    "ddi framework\n");
4715 		}
4716 	}
4717 
4718 	/* clean up resources */
4719 	if (inq80 != NULL) {
4720 		kmem_free(inq80, inq80_len);
4721 	}
4722 	if (inq83 != NULL) {
4723 		kmem_free(inq83, inq83_len);
4724 	}
4725 }
4726 
4727 
4728 
4729 /*
4730  *    Function: sd_get_devid
4731  *
4732  * Description: This routine will return 0 if a valid device id has been
4733  *		obtained from the target and stored in the soft state. If a
4734  *		valid device id has not been previously read and stored, a
4735  *		read attempt will be made.
4736  *
4737  *   Arguments: un - driver soft state (unit) structure
4738  *
4739  * Return Code: 0 if we successfully get the device id
4740  *
4741  *     Context: Kernel Thread
4742  */
4743 
4744 static int
4745 sd_get_devid(struct sd_lun *un)
4746 {
4747 	struct dk_devid		*dkdevid;
4748 	ddi_devid_t		tmpid;
4749 	uint_t			*ip;
4750 	size_t			sz;
4751 	diskaddr_t		blk;
4752 	int			status;
4753 	int			chksum;
4754 	int			i;
4755 	size_t			buffer_size;
4756 
4757 	ASSERT(un != NULL);
4758 	ASSERT(mutex_owned(SD_MUTEX(un)));
4759 
4760 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
4761 	    un);
4762 
4763 	if (un->un_devid != NULL) {
4764 		return (0);
4765 	}
4766 
4767 	mutex_exit(SD_MUTEX(un));
4768 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
4769 	    (void *)SD_PATH_DIRECT) != 0) {
4770 		mutex_enter(SD_MUTEX(un));
4771 		return (EINVAL);
4772 	}
4773 
4774 	/*
4775 	 * Read and verify device id, stored in the reserved cylinders at the
4776 	 * end of the disk. Backup label is on the odd sectors of the last
4777 	 * track of the last cylinder. Device id will be on track of the next
4778 	 * to last cylinder.
4779 	 */
4780 	mutex_enter(SD_MUTEX(un));
4781 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
4782 	mutex_exit(SD_MUTEX(un));
4783 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
4784 	status = sd_send_scsi_READ(un, dkdevid, buffer_size, blk,
4785 	    SD_PATH_DIRECT);
4786 	if (status != 0) {
4787 		goto error;
4788 	}
4789 
4790 	/* Validate the revision */
4791 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
4792 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
4793 		status = EINVAL;
4794 		goto error;
4795 	}
4796 
4797 	/* Calculate the checksum */
4798 	chksum = 0;
4799 	ip = (uint_t *)dkdevid;
4800 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
4801 	    i++) {
4802 		chksum ^= ip[i];
4803 	}
4804 
4805 	/* Compare the checksums */
4806 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
4807 		status = EINVAL;
4808 		goto error;
4809 	}
4810 
4811 	/* Validate the device id */
4812 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
4813 		status = EINVAL;
4814 		goto error;
4815 	}
4816 
4817 	/*
4818 	 * Store the device id in the driver soft state
4819 	 */
4820 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
4821 	tmpid = kmem_alloc(sz, KM_SLEEP);
4822 
4823 	mutex_enter(SD_MUTEX(un));
4824 
4825 	un->un_devid = tmpid;
4826 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
4827 
4828 	kmem_free(dkdevid, buffer_size);
4829 
4830 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
4831 
4832 	return (status);
4833 error:
4834 	mutex_enter(SD_MUTEX(un));
4835 	kmem_free(dkdevid, buffer_size);
4836 	return (status);
4837 }
4838 
4839 
4840 /*
4841  *    Function: sd_create_devid
4842  *
4843  * Description: This routine will fabricate the device id and write it
4844  *		to the disk.
4845  *
4846  *   Arguments: un - driver soft state (unit) structure
4847  *
4848  * Return Code: value of the fabricated device id
4849  *
4850  *     Context: Kernel Thread
4851  */
4852 
4853 static ddi_devid_t
4854 sd_create_devid(struct sd_lun *un)
4855 {
4856 	ASSERT(un != NULL);
4857 
4858 	/* Fabricate the devid */
4859 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
4860 	    == DDI_FAILURE) {
4861 		return (NULL);
4862 	}
4863 
4864 	/* Write the devid to disk */
4865 	if (sd_write_deviceid(un) != 0) {
4866 		ddi_devid_free(un->un_devid);
4867 		un->un_devid = NULL;
4868 	}
4869 
4870 	return (un->un_devid);
4871 }
4872 
4873 
4874 /*
4875  *    Function: sd_write_deviceid
4876  *
4877  * Description: This routine will write the device id to the disk
4878  *		reserved sector.
4879  *
4880  *   Arguments: un - driver soft state (unit) structure
4881  *
4882  * Return Code: EINVAL
4883  *		value returned by sd_send_scsi_cmd
4884  *
4885  *     Context: Kernel Thread
4886  */
4887 
4888 static int
4889 sd_write_deviceid(struct sd_lun *un)
4890 {
4891 	struct dk_devid		*dkdevid;
4892 	diskaddr_t		blk;
4893 	uint_t			*ip, chksum;
4894 	int			status;
4895 	int			i;
4896 
4897 	ASSERT(mutex_owned(SD_MUTEX(un)));
4898 
4899 	mutex_exit(SD_MUTEX(un));
4900 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
4901 	    (void *)SD_PATH_DIRECT) != 0) {
4902 		mutex_enter(SD_MUTEX(un));
4903 		return (-1);
4904 	}
4905 
4906 
4907 	/* Allocate the buffer */
4908 	dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
4909 
4910 	/* Fill in the revision */
4911 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
4912 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
4913 
4914 	/* Copy in the device id */
4915 	mutex_enter(SD_MUTEX(un));
4916 	bcopy(un->un_devid, &dkdevid->dkd_devid,
4917 	    ddi_devid_sizeof(un->un_devid));
4918 	mutex_exit(SD_MUTEX(un));
4919 
4920 	/* Calculate the checksum */
4921 	chksum = 0;
4922 	ip = (uint_t *)dkdevid;
4923 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
4924 	    i++) {
4925 		chksum ^= ip[i];
4926 	}
4927 
4928 	/* Fill-in checksum */
4929 	DKD_FORMCHKSUM(chksum, dkdevid);
4930 
4931 	/* Write the reserved sector */
4932 	status = sd_send_scsi_WRITE(un, dkdevid, un->un_sys_blocksize, blk,
4933 	    SD_PATH_DIRECT);
4934 
4935 	kmem_free(dkdevid, un->un_sys_blocksize);
4936 
4937 	mutex_enter(SD_MUTEX(un));
4938 	return (status);
4939 }
4940 
4941 
4942 /*
4943  *    Function: sd_check_vpd_page_support
4944  *
4945  * Description: This routine sends an inquiry command with the EVPD bit set and
4946  *		a page code of 0x00 to the device. It is used to determine which
4947  *		vital product pages are availible to find the devid. We are
4948  *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
4949  *		device does not support that command.
4950  *
4951  *   Arguments: un  - driver soft state (unit) structure
4952  *
4953  * Return Code: 0 - success
4954  *		1 - check condition
4955  *
4956  *     Context: This routine can sleep.
4957  */
4958 
4959 static int
4960 sd_check_vpd_page_support(struct sd_lun *un)
4961 {
4962 	uchar_t	*page_list	= NULL;
4963 	uchar_t	page_length	= 0xff;	/* Use max possible length */
4964 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
4965 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
4966 	int    	rval		= 0;
4967 	int	counter;
4968 
4969 	ASSERT(un != NULL);
4970 	ASSERT(mutex_owned(SD_MUTEX(un)));
4971 
4972 	mutex_exit(SD_MUTEX(un));
4973 
4974 	/*
4975 	 * We'll set the page length to the maximum to save figuring it out
4976 	 * with an additional call.
4977 	 */
4978 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
4979 
4980 	rval = sd_send_scsi_INQUIRY(un, page_list, page_length, evpd,
4981 	    page_code, NULL);
4982 
4983 	mutex_enter(SD_MUTEX(un));
4984 
4985 	/*
4986 	 * Now we must validate that the device accepted the command, as some
4987 	 * drives do not support it.  If the drive does support it, we will
4988 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
4989 	 * not, we return -1.
4990 	 */
4991 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
4992 		/* Loop to find one of the 2 pages we need */
4993 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
4994 
4995 		/*
4996 		 * Pages are returned in ascending order, and 0x83 is what we
4997 		 * are hoping for.
4998 		 */
4999 		while ((page_list[counter] <= 0x83) &&
5000 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
5001 		    VPD_HEAD_OFFSET))) {
5002 			/*
5003 			 * Add 3 because page_list[3] is the number of
5004 			 * pages minus 3
5005 			 */
5006 
5007 			switch (page_list[counter]) {
5008 			case 0x00:
5009 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
5010 				break;
5011 			case 0x80:
5012 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
5013 				break;
5014 			case 0x81:
5015 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
5016 				break;
5017 			case 0x82:
5018 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
5019 				break;
5020 			case 0x83:
5021 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
5022 				break;
5023 			}
5024 			counter++;
5025 		}
5026 
5027 	} else {
5028 		rval = -1;
5029 
5030 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
5031 		    "sd_check_vpd_page_support: This drive does not implement "
5032 		    "VPD pages.\n");
5033 	}
5034 
5035 	kmem_free(page_list, page_length);
5036 
5037 	return (rval);
5038 }
5039 
5040 
5041 /*
5042  *    Function: sd_setup_pm
5043  *
5044  * Description: Initialize Power Management on the device
5045  *
5046  *     Context: Kernel Thread
5047  */
5048 
5049 static void
5050 sd_setup_pm(struct sd_lun *un, dev_info_t *devi)
5051 {
5052 	uint_t	log_page_size;
5053 	uchar_t	*log_page_data;
5054 	int	rval;
5055 
5056 	/*
5057 	 * Since we are called from attach, holding a mutex for
5058 	 * un is unnecessary. Because some of the routines called
5059 	 * from here require SD_MUTEX to not be held, assert this
5060 	 * right up front.
5061 	 */
5062 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5063 	/*
5064 	 * Since the sd device does not have the 'reg' property,
5065 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
5066 	 * The following code is to tell cpr that this device
5067 	 * DOES need to be suspended and resumed.
5068 	 */
5069 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
5070 	    "pm-hardware-state", "needs-suspend-resume");
5071 
5072 	/*
5073 	 * This complies with the new power management framework
5074 	 * for certain desktop machines. Create the pm_components
5075 	 * property as a string array property.
5076 	 */
5077 	if (un->un_f_pm_supported) {
5078 		/*
5079 		 * not all devices have a motor, try it first.
5080 		 * some devices may return ILLEGAL REQUEST, some
5081 		 * will hang
5082 		 * The following START_STOP_UNIT is used to check if target
5083 		 * device has a motor.
5084 		 */
5085 		un->un_f_start_stop_supported = TRUE;
5086 		if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
5087 		    SD_PATH_DIRECT) != 0) {
5088 			un->un_f_start_stop_supported = FALSE;
5089 		}
5090 
5091 		/*
5092 		 * create pm properties anyways otherwise the parent can't
5093 		 * go to sleep
5094 		 */
5095 		(void) sd_create_pm_components(devi, un);
5096 		un->un_f_pm_is_enabled = TRUE;
5097 		return;
5098 	}
5099 
5100 	if (!un->un_f_log_sense_supported) {
5101 		un->un_power_level = SD_SPINDLE_ON;
5102 		un->un_f_pm_is_enabled = FALSE;
5103 		return;
5104 	}
5105 
5106 	rval = sd_log_page_supported(un, START_STOP_CYCLE_PAGE);
5107 
5108 #ifdef	SDDEBUG
5109 	if (sd_force_pm_supported) {
5110 		/* Force a successful result */
5111 		rval = 1;
5112 	}
5113 #endif
5114 
5115 	/*
5116 	 * If the start-stop cycle counter log page is not supported
5117 	 * or if the pm-capable property is SD_PM_CAPABLE_FALSE (0)
5118 	 * then we should not create the pm_components property.
5119 	 */
5120 	if (rval == -1) {
5121 		/*
5122 		 * Error.
5123 		 * Reading log sense failed, most likely this is
5124 		 * an older drive that does not support log sense.
5125 		 * If this fails auto-pm is not supported.
5126 		 */
5127 		un->un_power_level = SD_SPINDLE_ON;
5128 		un->un_f_pm_is_enabled = FALSE;
5129 
5130 	} else if (rval == 0) {
5131 		/*
5132 		 * Page not found.
5133 		 * The start stop cycle counter is implemented as page
5134 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
5135 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
5136 		 */
5137 		if (sd_log_page_supported(un, START_STOP_CYCLE_VU_PAGE) == 1) {
5138 			/*
5139 			 * Page found, use this one.
5140 			 */
5141 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
5142 			un->un_f_pm_is_enabled = TRUE;
5143 		} else {
5144 			/*
5145 			 * Error or page not found.
5146 			 * auto-pm is not supported for this device.
5147 			 */
5148 			un->un_power_level = SD_SPINDLE_ON;
5149 			un->un_f_pm_is_enabled = FALSE;
5150 		}
5151 	} else {
5152 		/*
5153 		 * Page found, use it.
5154 		 */
5155 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
5156 		un->un_f_pm_is_enabled = TRUE;
5157 	}
5158 
5159 
5160 	if (un->un_f_pm_is_enabled == TRUE) {
5161 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
5162 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
5163 
5164 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
5165 		    log_page_size, un->un_start_stop_cycle_page,
5166 		    0x01, 0, SD_PATH_DIRECT);
5167 #ifdef	SDDEBUG
5168 		if (sd_force_pm_supported) {
5169 			/* Force a successful result */
5170 			rval = 0;
5171 		}
5172 #endif
5173 
5174 		/*
5175 		 * If the Log sense for Page( Start/stop cycle counter page)
5176 		 * succeeds, then power managment is supported and we can
5177 		 * enable auto-pm.
5178 		 */
5179 		if (rval == 0)  {
5180 			(void) sd_create_pm_components(devi, un);
5181 		} else {
5182 			un->un_power_level = SD_SPINDLE_ON;
5183 			un->un_f_pm_is_enabled = FALSE;
5184 		}
5185 
5186 		kmem_free(log_page_data, log_page_size);
5187 	}
5188 }
5189 
5190 
5191 /*
5192  *    Function: sd_create_pm_components
5193  *
5194  * Description: Initialize PM property.
5195  *
5196  *     Context: Kernel thread context
5197  */
5198 
5199 static void
5200 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
5201 {
5202 	char *pm_comp[] = { "NAME=spindle-motor", "0=off", "1=on", NULL };
5203 
5204 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5205 
5206 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
5207 	    "pm-components", pm_comp, 3) == DDI_PROP_SUCCESS) {
5208 		/*
5209 		 * When components are initially created they are idle,
5210 		 * power up any non-removables.
5211 		 * Note: the return value of pm_raise_power can't be used
5212 		 * for determining if PM should be enabled for this device.
5213 		 * Even if you check the return values and remove this
5214 		 * property created above, the PM framework will not honor the
5215 		 * change after the first call to pm_raise_power. Hence,
5216 		 * removal of that property does not help if pm_raise_power
5217 		 * fails. In the case of removable media, the start/stop
5218 		 * will fail if the media is not present.
5219 		 */
5220 		if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
5221 		    SD_SPINDLE_ON) == DDI_SUCCESS)) {
5222 			mutex_enter(SD_MUTEX(un));
5223 			un->un_power_level = SD_SPINDLE_ON;
5224 			mutex_enter(&un->un_pm_mutex);
5225 			/* Set to on and not busy. */
5226 			un->un_pm_count = 0;
5227 		} else {
5228 			mutex_enter(SD_MUTEX(un));
5229 			un->un_power_level = SD_SPINDLE_OFF;
5230 			mutex_enter(&un->un_pm_mutex);
5231 			/* Set to off. */
5232 			un->un_pm_count = -1;
5233 		}
5234 		mutex_exit(&un->un_pm_mutex);
5235 		mutex_exit(SD_MUTEX(un));
5236 	} else {
5237 		un->un_power_level = SD_SPINDLE_ON;
5238 		un->un_f_pm_is_enabled = FALSE;
5239 	}
5240 }
5241 
5242 
5243 /*
5244  *    Function: sd_ddi_suspend
5245  *
5246  * Description: Performs system power-down operations. This includes
5247  *		setting the drive state to indicate its suspended so
5248  *		that no new commands will be accepted. Also, wait for
5249  *		all commands that are in transport or queued to a timer
5250  *		for retry to complete. All timeout threads are cancelled.
5251  *
5252  * Return Code: DDI_FAILURE or DDI_SUCCESS
5253  *
5254  *     Context: Kernel thread context
5255  */
5256 
5257 static int
5258 sd_ddi_suspend(dev_info_t *devi)
5259 {
5260 	struct	sd_lun	*un;
5261 	clock_t		wait_cmds_complete;
5262 
5263 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
5264 	if (un == NULL) {
5265 		return (DDI_FAILURE);
5266 	}
5267 
5268 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
5269 
5270 	mutex_enter(SD_MUTEX(un));
5271 
5272 	/* Return success if the device is already suspended. */
5273 	if (un->un_state == SD_STATE_SUSPENDED) {
5274 		mutex_exit(SD_MUTEX(un));
5275 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5276 		    "device already suspended, exiting\n");
5277 		return (DDI_SUCCESS);
5278 	}
5279 
5280 	/* Return failure if the device is being used by HA */
5281 	if (un->un_resvd_status &
5282 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
5283 		mutex_exit(SD_MUTEX(un));
5284 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5285 		    "device in use by HA, exiting\n");
5286 		return (DDI_FAILURE);
5287 	}
5288 
5289 	/*
5290 	 * Return failure if the device is in a resource wait
5291 	 * or power changing state.
5292 	 */
5293 	if ((un->un_state == SD_STATE_RWAIT) ||
5294 	    (un->un_state == SD_STATE_PM_CHANGING)) {
5295 		mutex_exit(SD_MUTEX(un));
5296 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5297 		    "device in resource wait state, exiting\n");
5298 		return (DDI_FAILURE);
5299 	}
5300 
5301 
5302 	un->un_save_state = un->un_last_state;
5303 	New_state(un, SD_STATE_SUSPENDED);
5304 
5305 	/*
5306 	 * Wait for all commands that are in transport or queued to a timer
5307 	 * for retry to complete.
5308 	 *
5309 	 * While waiting, no new commands will be accepted or sent because of
5310 	 * the new state we set above.
5311 	 *
5312 	 * Wait till current operation has completed. If we are in the resource
5313 	 * wait state (with an intr outstanding) then we need to wait till the
5314 	 * intr completes and starts the next cmd. We want to wait for
5315 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
5316 	 */
5317 	wait_cmds_complete = ddi_get_lbolt() +
5318 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
5319 
5320 	while (un->un_ncmds_in_transport != 0) {
5321 		/*
5322 		 * Fail if commands do not finish in the specified time.
5323 		 */
5324 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
5325 		    wait_cmds_complete) == -1) {
5326 			/*
5327 			 * Undo the state changes made above. Everything
5328 			 * must go back to it's original value.
5329 			 */
5330 			Restore_state(un);
5331 			un->un_last_state = un->un_save_state;
5332 			/* Wake up any threads that might be waiting. */
5333 			cv_broadcast(&un->un_suspend_cv);
5334 			mutex_exit(SD_MUTEX(un));
5335 			SD_ERROR(SD_LOG_IO_PM, un,
5336 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
5337 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
5338 			return (DDI_FAILURE);
5339 		}
5340 	}
5341 
5342 	/*
5343 	 * Cancel SCSI watch thread and timeouts, if any are active
5344 	 */
5345 
5346 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
5347 		opaque_t temp_token = un->un_swr_token;
5348 		mutex_exit(SD_MUTEX(un));
5349 		scsi_watch_suspend(temp_token);
5350 		mutex_enter(SD_MUTEX(un));
5351 	}
5352 
5353 	if (un->un_reset_throttle_timeid != NULL) {
5354 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
5355 		un->un_reset_throttle_timeid = NULL;
5356 		mutex_exit(SD_MUTEX(un));
5357 		(void) untimeout(temp_id);
5358 		mutex_enter(SD_MUTEX(un));
5359 	}
5360 
5361 	if (un->un_dcvb_timeid != NULL) {
5362 		timeout_id_t temp_id = un->un_dcvb_timeid;
5363 		un->un_dcvb_timeid = NULL;
5364 		mutex_exit(SD_MUTEX(un));
5365 		(void) untimeout(temp_id);
5366 		mutex_enter(SD_MUTEX(un));
5367 	}
5368 
5369 	mutex_enter(&un->un_pm_mutex);
5370 	if (un->un_pm_timeid != NULL) {
5371 		timeout_id_t temp_id = un->un_pm_timeid;
5372 		un->un_pm_timeid = NULL;
5373 		mutex_exit(&un->un_pm_mutex);
5374 		mutex_exit(SD_MUTEX(un));
5375 		(void) untimeout(temp_id);
5376 		mutex_enter(SD_MUTEX(un));
5377 	} else {
5378 		mutex_exit(&un->un_pm_mutex);
5379 	}
5380 
5381 	if (un->un_retry_timeid != NULL) {
5382 		timeout_id_t temp_id = un->un_retry_timeid;
5383 		un->un_retry_timeid = NULL;
5384 		mutex_exit(SD_MUTEX(un));
5385 		(void) untimeout(temp_id);
5386 		mutex_enter(SD_MUTEX(un));
5387 	}
5388 
5389 	if (un->un_direct_priority_timeid != NULL) {
5390 		timeout_id_t temp_id = un->un_direct_priority_timeid;
5391 		un->un_direct_priority_timeid = NULL;
5392 		mutex_exit(SD_MUTEX(un));
5393 		(void) untimeout(temp_id);
5394 		mutex_enter(SD_MUTEX(un));
5395 	}
5396 
5397 	if (un->un_f_is_fibre == TRUE) {
5398 		/*
5399 		 * Remove callbacks for insert and remove events
5400 		 */
5401 		if (un->un_insert_event != NULL) {
5402 			mutex_exit(SD_MUTEX(un));
5403 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
5404 			mutex_enter(SD_MUTEX(un));
5405 			un->un_insert_event = NULL;
5406 		}
5407 
5408 		if (un->un_remove_event != NULL) {
5409 			mutex_exit(SD_MUTEX(un));
5410 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
5411 			mutex_enter(SD_MUTEX(un));
5412 			un->un_remove_event = NULL;
5413 		}
5414 	}
5415 
5416 	mutex_exit(SD_MUTEX(un));
5417 
5418 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
5419 
5420 	return (DDI_SUCCESS);
5421 }
5422 
5423 
5424 /*
5425  *    Function: sd_ddi_pm_suspend
5426  *
5427  * Description: Set the drive state to low power.
5428  *		Someone else is required to actually change the drive
5429  *		power level.
5430  *
5431  *   Arguments: un - driver soft state (unit) structure
5432  *
5433  * Return Code: DDI_FAILURE or DDI_SUCCESS
5434  *
5435  *     Context: Kernel thread context
5436  */
5437 
5438 static int
5439 sd_ddi_pm_suspend(struct sd_lun *un)
5440 {
5441 	ASSERT(un != NULL);
5442 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: entry\n");
5443 
5444 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5445 	mutex_enter(SD_MUTEX(un));
5446 
5447 	/*
5448 	 * Exit if power management is not enabled for this device, or if
5449 	 * the device is being used by HA.
5450 	 */
5451 	if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
5452 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
5453 		mutex_exit(SD_MUTEX(un));
5454 		SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exiting\n");
5455 		return (DDI_SUCCESS);
5456 	}
5457 
5458 	SD_INFO(SD_LOG_POWER, un, "sd_ddi_pm_suspend: un_ncmds_in_driver=%ld\n",
5459 	    un->un_ncmds_in_driver);
5460 
5461 	/*
5462 	 * See if the device is not busy, ie.:
5463 	 *    - we have no commands in the driver for this device
5464 	 *    - not waiting for resources
5465 	 */
5466 	if ((un->un_ncmds_in_driver == 0) &&
5467 	    (un->un_state != SD_STATE_RWAIT)) {
5468 		/*
5469 		 * The device is not busy, so it is OK to go to low power state.
5470 		 * Indicate low power, but rely on someone else to actually
5471 		 * change it.
5472 		 */
5473 		mutex_enter(&un->un_pm_mutex);
5474 		un->un_pm_count = -1;
5475 		mutex_exit(&un->un_pm_mutex);
5476 		un->un_power_level = SD_SPINDLE_OFF;
5477 	}
5478 
5479 	mutex_exit(SD_MUTEX(un));
5480 
5481 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exit\n");
5482 
5483 	return (DDI_SUCCESS);
5484 }
5485 
5486 
5487 /*
5488  *    Function: sd_ddi_resume
5489  *
5490  * Description: Performs system power-up operations..
5491  *
5492  * Return Code: DDI_SUCCESS
5493  *		DDI_FAILURE
5494  *
5495  *     Context: Kernel thread context
5496  */
5497 
5498 static int
5499 sd_ddi_resume(dev_info_t *devi)
5500 {
5501 	struct	sd_lun	*un;
5502 
5503 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
5504 	if (un == NULL) {
5505 		return (DDI_FAILURE);
5506 	}
5507 
5508 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
5509 
5510 	mutex_enter(SD_MUTEX(un));
5511 	Restore_state(un);
5512 
5513 	/*
5514 	 * Restore the state which was saved to give the
5515 	 * the right state in un_last_state
5516 	 */
5517 	un->un_last_state = un->un_save_state;
5518 	/*
5519 	 * Note: throttle comes back at full.
5520 	 * Also note: this MUST be done before calling pm_raise_power
5521 	 * otherwise the system can get hung in biowait. The scenario where
5522 	 * this'll happen is under cpr suspend. Writing of the system
5523 	 * state goes through sddump, which writes 0 to un_throttle. If
5524 	 * writing the system state then fails, example if the partition is
5525 	 * too small, then cpr attempts a resume. If throttle isn't restored
5526 	 * from the saved value until after calling pm_raise_power then
5527 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
5528 	 * in biowait.
5529 	 */
5530 	un->un_throttle = un->un_saved_throttle;
5531 
5532 	/*
5533 	 * The chance of failure is very rare as the only command done in power
5534 	 * entry point is START command when you transition from 0->1 or
5535 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
5536 	 * which suspend was done. Ignore the return value as the resume should
5537 	 * not be failed. In the case of removable media the media need not be
5538 	 * inserted and hence there is a chance that raise power will fail with
5539 	 * media not present.
5540 	 */
5541 	if (un->un_f_attach_spinup) {
5542 		mutex_exit(SD_MUTEX(un));
5543 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
5544 		mutex_enter(SD_MUTEX(un));
5545 	}
5546 
5547 	/*
5548 	 * Don't broadcast to the suspend cv and therefore possibly
5549 	 * start I/O until after power has been restored.
5550 	 */
5551 	cv_broadcast(&un->un_suspend_cv);
5552 	cv_broadcast(&un->un_state_cv);
5553 
5554 	/* restart thread */
5555 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
5556 		scsi_watch_resume(un->un_swr_token);
5557 	}
5558 
5559 #if (defined(__fibre))
5560 	if (un->un_f_is_fibre == TRUE) {
5561 		/*
5562 		 * Add callbacks for insert and remove events
5563 		 */
5564 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
5565 			sd_init_event_callbacks(un);
5566 		}
5567 	}
5568 #endif
5569 
5570 	/*
5571 	 * Transport any pending commands to the target.
5572 	 *
5573 	 * If this is a low-activity device commands in queue will have to wait
5574 	 * until new commands come in, which may take awhile. Also, we
5575 	 * specifically don't check un_ncmds_in_transport because we know that
5576 	 * there really are no commands in progress after the unit was
5577 	 * suspended and we could have reached the throttle level, been
5578 	 * suspended, and have no new commands coming in for awhile. Highly
5579 	 * unlikely, but so is the low-activity disk scenario.
5580 	 */
5581 	ddi_xbuf_dispatch(un->un_xbuf_attr);
5582 
5583 	sd_start_cmds(un, NULL);
5584 	mutex_exit(SD_MUTEX(un));
5585 
5586 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
5587 
5588 	return (DDI_SUCCESS);
5589 }
5590 
5591 
5592 /*
5593  *    Function: sd_ddi_pm_resume
5594  *
5595  * Description: Set the drive state to powered on.
5596  *		Someone else is required to actually change the drive
5597  *		power level.
5598  *
5599  *   Arguments: un - driver soft state (unit) structure
5600  *
5601  * Return Code: DDI_SUCCESS
5602  *
5603  *     Context: Kernel thread context
5604  */
5605 
5606 static int
5607 sd_ddi_pm_resume(struct sd_lun *un)
5608 {
5609 	ASSERT(un != NULL);
5610 
5611 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5612 	mutex_enter(SD_MUTEX(un));
5613 	un->un_power_level = SD_SPINDLE_ON;
5614 
5615 	ASSERT(!mutex_owned(&un->un_pm_mutex));
5616 	mutex_enter(&un->un_pm_mutex);
5617 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
5618 		un->un_pm_count++;
5619 		ASSERT(un->un_pm_count == 0);
5620 		/*
5621 		 * Note: no longer do the cv_broadcast on un_suspend_cv. The
5622 		 * un_suspend_cv is for a system resume, not a power management
5623 		 * device resume. (4297749)
5624 		 *	 cv_broadcast(&un->un_suspend_cv);
5625 		 */
5626 	}
5627 	mutex_exit(&un->un_pm_mutex);
5628 	mutex_exit(SD_MUTEX(un));
5629 
5630 	return (DDI_SUCCESS);
5631 }
5632 
5633 
5634 /*
5635  *    Function: sd_pm_idletimeout_handler
5636  *
5637  * Description: A timer routine that's active only while a device is busy.
5638  *		The purpose is to extend slightly the pm framework's busy
5639  *		view of the device to prevent busy/idle thrashing for
5640  *		back-to-back commands. Do this by comparing the current time
5641  *		to the time at which the last command completed and when the
5642  *		difference is greater than sd_pm_idletime, call
5643  *		pm_idle_component. In addition to indicating idle to the pm
5644  *		framework, update the chain type to again use the internal pm
5645  *		layers of the driver.
5646  *
5647  *   Arguments: arg - driver soft state (unit) structure
5648  *
5649  *     Context: Executes in a timeout(9F) thread context
5650  */
5651 
5652 static void
5653 sd_pm_idletimeout_handler(void *arg)
5654 {
5655 	struct sd_lun *un = arg;
5656 
5657 	time_t	now;
5658 
5659 	mutex_enter(&sd_detach_mutex);
5660 	if (un->un_detach_count != 0) {
5661 		/* Abort if the instance is detaching */
5662 		mutex_exit(&sd_detach_mutex);
5663 		return;
5664 	}
5665 	mutex_exit(&sd_detach_mutex);
5666 
5667 	now = ddi_get_time();
5668 	/*
5669 	 * Grab both mutexes, in the proper order, since we're accessing
5670 	 * both PM and softstate variables.
5671 	 */
5672 	mutex_enter(SD_MUTEX(un));
5673 	mutex_enter(&un->un_pm_mutex);
5674 	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
5675 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
5676 		/*
5677 		 * Update the chain types.
5678 		 * This takes affect on the next new command received.
5679 		 */
5680 		if (un->un_f_non_devbsize_supported) {
5681 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
5682 		} else {
5683 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
5684 		}
5685 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
5686 
5687 		SD_TRACE(SD_LOG_IO_PM, un,
5688 		    "sd_pm_idletimeout_handler: idling device\n");
5689 		(void) pm_idle_component(SD_DEVINFO(un), 0);
5690 		un->un_pm_idle_timeid = NULL;
5691 	} else {
5692 		un->un_pm_idle_timeid =
5693 			timeout(sd_pm_idletimeout_handler, un,
5694 			(drv_usectohz((clock_t)300000))); /* 300 ms. */
5695 	}
5696 	mutex_exit(&un->un_pm_mutex);
5697 	mutex_exit(SD_MUTEX(un));
5698 }
5699 
5700 
5701 /*
5702  *    Function: sd_pm_timeout_handler
5703  *
5704  * Description: Callback to tell framework we are idle.
5705  *
5706  *     Context: timeout(9f) thread context.
5707  */
5708 
5709 static void
5710 sd_pm_timeout_handler(void *arg)
5711 {
5712 	struct sd_lun *un = arg;
5713 
5714 	(void) pm_idle_component(SD_DEVINFO(un), 0);
5715 	mutex_enter(&un->un_pm_mutex);
5716 	un->un_pm_timeid = NULL;
5717 	mutex_exit(&un->un_pm_mutex);
5718 }
5719 
5720 
5721 /*
5722  *    Function: sdpower
5723  *
5724  * Description: PM entry point.
5725  *
5726  * Return Code: DDI_SUCCESS
5727  *		DDI_FAILURE
5728  *
5729  *     Context: Kernel thread context
5730  */
5731 
5732 static int
5733 sdpower(dev_info_t *devi, int component, int level)
5734 {
5735 	struct sd_lun	*un;
5736 	int		instance;
5737 	int		rval = DDI_SUCCESS;
5738 	uint_t		i, log_page_size, maxcycles, ncycles;
5739 	uchar_t		*log_page_data;
5740 	int		log_sense_page;
5741 	int		medium_present;
5742 	time_t		intvlp;
5743 	dev_t		dev;
5744 	struct pm_trans_data	sd_pm_tran_data;
5745 	uchar_t		save_state;
5746 	int		sval;
5747 	uchar_t		state_before_pm;
5748 	int		got_semaphore_here;
5749 
5750 	instance = ddi_get_instance(devi);
5751 
5752 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
5753 	    (SD_SPINDLE_OFF > level) || (level > SD_SPINDLE_ON) ||
5754 	    component != 0) {
5755 		return (DDI_FAILURE);
5756 	}
5757 
5758 	dev = sd_make_device(SD_DEVINFO(un));
5759 
5760 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
5761 
5762 	/*
5763 	 * Must synchronize power down with close.
5764 	 * Attempt to decrement/acquire the open/close semaphore,
5765 	 * but do NOT wait on it. If it's not greater than zero,
5766 	 * ie. it can't be decremented without waiting, then
5767 	 * someone else, either open or close, already has it
5768 	 * and the try returns 0. Use that knowledge here to determine
5769 	 * if it's OK to change the device power level.
5770 	 * Also, only increment it on exit if it was decremented, ie. gotten,
5771 	 * here.
5772 	 */
5773 	got_semaphore_here = sema_tryp(&un->un_semoclose);
5774 
5775 	mutex_enter(SD_MUTEX(un));
5776 
5777 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
5778 	    un->un_ncmds_in_driver);
5779 
5780 	/*
5781 	 * If un_ncmds_in_driver is non-zero it indicates commands are
5782 	 * already being processed in the driver, or if the semaphore was
5783 	 * not gotten here it indicates an open or close is being processed.
5784 	 * At the same time somebody is requesting to go low power which
5785 	 * can't happen, therefore we need to return failure.
5786 	 */
5787 	if ((level == SD_SPINDLE_OFF) &&
5788 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
5789 		mutex_exit(SD_MUTEX(un));
5790 
5791 		if (got_semaphore_here != 0) {
5792 			sema_v(&un->un_semoclose);
5793 		}
5794 		SD_TRACE(SD_LOG_IO_PM, un,
5795 		    "sdpower: exit, device has queued cmds.\n");
5796 		return (DDI_FAILURE);
5797 	}
5798 
5799 	/*
5800 	 * if it is OFFLINE that means the disk is completely dead
5801 	 * in our case we have to put the disk in on or off by sending commands
5802 	 * Of course that will fail anyway so return back here.
5803 	 *
5804 	 * Power changes to a device that's OFFLINE or SUSPENDED
5805 	 * are not allowed.
5806 	 */
5807 	if ((un->un_state == SD_STATE_OFFLINE) ||
5808 	    (un->un_state == SD_STATE_SUSPENDED)) {
5809 		mutex_exit(SD_MUTEX(un));
5810 
5811 		if (got_semaphore_here != 0) {
5812 			sema_v(&un->un_semoclose);
5813 		}
5814 		SD_TRACE(SD_LOG_IO_PM, un,
5815 		    "sdpower: exit, device is off-line.\n");
5816 		return (DDI_FAILURE);
5817 	}
5818 
5819 	/*
5820 	 * Change the device's state to indicate it's power level
5821 	 * is being changed. Do this to prevent a power off in the
5822 	 * middle of commands, which is especially bad on devices
5823 	 * that are really powered off instead of just spun down.
5824 	 */
5825 	state_before_pm = un->un_state;
5826 	un->un_state = SD_STATE_PM_CHANGING;
5827 
5828 	mutex_exit(SD_MUTEX(un));
5829 
5830 	/*
5831 	 * If "pm-capable" property is set to TRUE by HBA drivers,
5832 	 * bypass the following checking, otherwise, check the log
5833 	 * sense information for this device
5834 	 */
5835 	if ((level == SD_SPINDLE_OFF) && un->un_f_log_sense_supported) {
5836 		/*
5837 		 * Get the log sense information to understand whether the
5838 		 * the powercycle counts have gone beyond the threshhold.
5839 		 */
5840 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
5841 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
5842 
5843 		mutex_enter(SD_MUTEX(un));
5844 		log_sense_page = un->un_start_stop_cycle_page;
5845 		mutex_exit(SD_MUTEX(un));
5846 
5847 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
5848 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
5849 #ifdef	SDDEBUG
5850 		if (sd_force_pm_supported) {
5851 			/* Force a successful result */
5852 			rval = 0;
5853 		}
5854 #endif
5855 		if (rval != 0) {
5856 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5857 			    "Log Sense Failed\n");
5858 			kmem_free(log_page_data, log_page_size);
5859 			/* Cannot support power management on those drives */
5860 
5861 			if (got_semaphore_here != 0) {
5862 				sema_v(&un->un_semoclose);
5863 			}
5864 			/*
5865 			 * On exit put the state back to it's original value
5866 			 * and broadcast to anyone waiting for the power
5867 			 * change completion.
5868 			 */
5869 			mutex_enter(SD_MUTEX(un));
5870 			un->un_state = state_before_pm;
5871 			cv_broadcast(&un->un_suspend_cv);
5872 			mutex_exit(SD_MUTEX(un));
5873 			SD_TRACE(SD_LOG_IO_PM, un,
5874 			    "sdpower: exit, Log Sense Failed.\n");
5875 			return (DDI_FAILURE);
5876 		}
5877 
5878 		/*
5879 		 * From the page data - Convert the essential information to
5880 		 * pm_trans_data
5881 		 */
5882 		maxcycles =
5883 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
5884 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
5885 
5886 		sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
5887 
5888 		ncycles =
5889 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
5890 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
5891 
5892 		sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
5893 
5894 		for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
5895 			sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
5896 			    log_page_data[8+i];
5897 		}
5898 
5899 		kmem_free(log_page_data, log_page_size);
5900 
5901 		/*
5902 		 * Call pm_trans_check routine to get the Ok from
5903 		 * the global policy
5904 		 */
5905 
5906 		sd_pm_tran_data.format = DC_SCSI_FORMAT;
5907 		sd_pm_tran_data.un.scsi_cycles.flag = 0;
5908 
5909 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
5910 #ifdef	SDDEBUG
5911 		if (sd_force_pm_supported) {
5912 			/* Force a successful result */
5913 			rval = 1;
5914 		}
5915 #endif
5916 		switch (rval) {
5917 		case 0:
5918 			/*
5919 			 * Not Ok to Power cycle or error in parameters passed
5920 			 * Would have given the advised time to consider power
5921 			 * cycle. Based on the new intvlp parameter we are
5922 			 * supposed to pretend we are busy so that pm framework
5923 			 * will never call our power entry point. Because of
5924 			 * that install a timeout handler and wait for the
5925 			 * recommended time to elapse so that power management
5926 			 * can be effective again.
5927 			 *
5928 			 * To effect this behavior, call pm_busy_component to
5929 			 * indicate to the framework this device is busy.
5930 			 * By not adjusting un_pm_count the rest of PM in
5931 			 * the driver will function normally, and independant
5932 			 * of this but because the framework is told the device
5933 			 * is busy it won't attempt powering down until it gets
5934 			 * a matching idle. The timeout handler sends this.
5935 			 * Note: sd_pm_entry can't be called here to do this
5936 			 * because sdpower may have been called as a result
5937 			 * of a call to pm_raise_power from within sd_pm_entry.
5938 			 *
5939 			 * If a timeout handler is already active then
5940 			 * don't install another.
5941 			 */
5942 			mutex_enter(&un->un_pm_mutex);
5943 			if (un->un_pm_timeid == NULL) {
5944 				un->un_pm_timeid =
5945 				    timeout(sd_pm_timeout_handler,
5946 				    un, intvlp * drv_usectohz(1000000));
5947 				mutex_exit(&un->un_pm_mutex);
5948 				(void) pm_busy_component(SD_DEVINFO(un), 0);
5949 			} else {
5950 				mutex_exit(&un->un_pm_mutex);
5951 			}
5952 			if (got_semaphore_here != 0) {
5953 				sema_v(&un->un_semoclose);
5954 			}
5955 			/*
5956 			 * On exit put the state back to it's original value
5957 			 * and broadcast to anyone waiting for the power
5958 			 * change completion.
5959 			 */
5960 			mutex_enter(SD_MUTEX(un));
5961 			un->un_state = state_before_pm;
5962 			cv_broadcast(&un->un_suspend_cv);
5963 			mutex_exit(SD_MUTEX(un));
5964 
5965 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
5966 			    "trans check Failed, not ok to power cycle.\n");
5967 			return (DDI_FAILURE);
5968 
5969 		case -1:
5970 			if (got_semaphore_here != 0) {
5971 				sema_v(&un->un_semoclose);
5972 			}
5973 			/*
5974 			 * On exit put the state back to it's original value
5975 			 * and broadcast to anyone waiting for the power
5976 			 * change completion.
5977 			 */
5978 			mutex_enter(SD_MUTEX(un));
5979 			un->un_state = state_before_pm;
5980 			cv_broadcast(&un->un_suspend_cv);
5981 			mutex_exit(SD_MUTEX(un));
5982 			SD_TRACE(SD_LOG_IO_PM, un,
5983 			    "sdpower: exit, trans check command Failed.\n");
5984 			return (DDI_FAILURE);
5985 		}
5986 	}
5987 
5988 	if (level == SD_SPINDLE_OFF) {
5989 		/*
5990 		 * Save the last state... if the STOP FAILS we need it
5991 		 * for restoring
5992 		 */
5993 		mutex_enter(SD_MUTEX(un));
5994 		save_state = un->un_last_state;
5995 		/*
5996 		 * There must not be any cmds. getting processed
5997 		 * in the driver when we get here. Power to the
5998 		 * device is potentially going off.
5999 		 */
6000 		ASSERT(un->un_ncmds_in_driver == 0);
6001 		mutex_exit(SD_MUTEX(un));
6002 
6003 		/*
6004 		 * For now suspend the device completely before spindle is
6005 		 * turned off
6006 		 */
6007 		if ((rval = sd_ddi_pm_suspend(un)) == DDI_FAILURE) {
6008 			if (got_semaphore_here != 0) {
6009 				sema_v(&un->un_semoclose);
6010 			}
6011 			/*
6012 			 * On exit put the state back to it's original value
6013 			 * and broadcast to anyone waiting for the power
6014 			 * change completion.
6015 			 */
6016 			mutex_enter(SD_MUTEX(un));
6017 			un->un_state = state_before_pm;
6018 			cv_broadcast(&un->un_suspend_cv);
6019 			mutex_exit(SD_MUTEX(un));
6020 			SD_TRACE(SD_LOG_IO_PM, un,
6021 			    "sdpower: exit, PM suspend Failed.\n");
6022 			return (DDI_FAILURE);
6023 		}
6024 	}
6025 
6026 	/*
6027 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
6028 	 * close, or strategy. Dump no long uses this routine, it uses it's
6029 	 * own code so it can be done in polled mode.
6030 	 */
6031 
6032 	medium_present = TRUE;
6033 
6034 	/*
6035 	 * When powering up, issue a TUR in case the device is at unit
6036 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
6037 	 * a deadlock on un_pm_busy_cv will occur.
6038 	 */
6039 	if (level == SD_SPINDLE_ON) {
6040 		(void) sd_send_scsi_TEST_UNIT_READY(un,
6041 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
6042 	}
6043 
6044 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
6045 	    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
6046 
6047 	sval = sd_send_scsi_START_STOP_UNIT(un,
6048 	    ((level == SD_SPINDLE_ON) ? SD_TARGET_START : SD_TARGET_STOP),
6049 	    SD_PATH_DIRECT);
6050 	/* Command failed, check for media present. */
6051 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
6052 		medium_present = FALSE;
6053 	}
6054 
6055 	/*
6056 	 * The conditions of interest here are:
6057 	 *   if a spindle off with media present fails,
6058 	 *	then restore the state and return an error.
6059 	 *   else if a spindle on fails,
6060 	 *	then return an error (there's no state to restore).
6061 	 * In all other cases we setup for the new state
6062 	 * and return success.
6063 	 */
6064 	switch (level) {
6065 	case SD_SPINDLE_OFF:
6066 		if ((medium_present == TRUE) && (sval != 0)) {
6067 			/* The stop command from above failed */
6068 			rval = DDI_FAILURE;
6069 			/*
6070 			 * The stop command failed, and we have media
6071 			 * present. Put the level back by calling the
6072 			 * sd_pm_resume() and set the state back to
6073 			 * it's previous value.
6074 			 */
6075 			(void) sd_ddi_pm_resume(un);
6076 			mutex_enter(SD_MUTEX(un));
6077 			un->un_last_state = save_state;
6078 			mutex_exit(SD_MUTEX(un));
6079 			break;
6080 		}
6081 		/*
6082 		 * The stop command from above succeeded.
6083 		 */
6084 		if (un->un_f_monitor_media_state) {
6085 			/*
6086 			 * Terminate watch thread in case of removable media
6087 			 * devices going into low power state. This is as per
6088 			 * the requirements of pm framework, otherwise commands
6089 			 * will be generated for the device (through watch
6090 			 * thread), even when the device is in low power state.
6091 			 */
6092 			mutex_enter(SD_MUTEX(un));
6093 			un->un_f_watcht_stopped = FALSE;
6094 			if (un->un_swr_token != NULL) {
6095 				opaque_t temp_token = un->un_swr_token;
6096 				un->un_f_watcht_stopped = TRUE;
6097 				un->un_swr_token = NULL;
6098 				mutex_exit(SD_MUTEX(un));
6099 				(void) scsi_watch_request_terminate(temp_token,
6100 				    SCSI_WATCH_TERMINATE_WAIT);
6101 			} else {
6102 				mutex_exit(SD_MUTEX(un));
6103 			}
6104 		}
6105 		break;
6106 
6107 	default:	/* The level requested is spindle on... */
6108 		/*
6109 		 * Legacy behavior: return success on a failed spinup
6110 		 * if there is no media in the drive.
6111 		 * Do this by looking at medium_present here.
6112 		 */
6113 		if ((sval != 0) && medium_present) {
6114 			/* The start command from above failed */
6115 			rval = DDI_FAILURE;
6116 			break;
6117 		}
6118 		/*
6119 		 * The start command from above succeeded
6120 		 * Resume the devices now that we have
6121 		 * started the disks
6122 		 */
6123 		(void) sd_ddi_pm_resume(un);
6124 
6125 		/*
6126 		 * Resume the watch thread since it was suspended
6127 		 * when the device went into low power mode.
6128 		 */
6129 		if (un->un_f_monitor_media_state) {
6130 			mutex_enter(SD_MUTEX(un));
6131 			if (un->un_f_watcht_stopped == TRUE) {
6132 				opaque_t temp_token;
6133 
6134 				un->un_f_watcht_stopped = FALSE;
6135 				mutex_exit(SD_MUTEX(un));
6136 				temp_token = scsi_watch_request_submit(
6137 				    SD_SCSI_DEVP(un),
6138 				    sd_check_media_time,
6139 				    SENSE_LENGTH, sd_media_watch_cb,
6140 				    (caddr_t)dev);
6141 				mutex_enter(SD_MUTEX(un));
6142 				un->un_swr_token = temp_token;
6143 			}
6144 			mutex_exit(SD_MUTEX(un));
6145 		}
6146 	}
6147 	if (got_semaphore_here != 0) {
6148 		sema_v(&un->un_semoclose);
6149 	}
6150 	/*
6151 	 * On exit put the state back to it's original value
6152 	 * and broadcast to anyone waiting for the power
6153 	 * change completion.
6154 	 */
6155 	mutex_enter(SD_MUTEX(un));
6156 	un->un_state = state_before_pm;
6157 	cv_broadcast(&un->un_suspend_cv);
6158 	mutex_exit(SD_MUTEX(un));
6159 
6160 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
6161 
6162 	return (rval);
6163 }
6164 
6165 
6166 
6167 /*
6168  *    Function: sdattach
6169  *
6170  * Description: Driver's attach(9e) entry point function.
6171  *
6172  *   Arguments: devi - opaque device info handle
6173  *		cmd  - attach  type
6174  *
6175  * Return Code: DDI_SUCCESS
6176  *		DDI_FAILURE
6177  *
6178  *     Context: Kernel thread context
6179  */
6180 
6181 static int
6182 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
6183 {
6184 	switch (cmd) {
6185 	case DDI_ATTACH:
6186 		return (sd_unit_attach(devi));
6187 	case DDI_RESUME:
6188 		return (sd_ddi_resume(devi));
6189 	default:
6190 		break;
6191 	}
6192 	return (DDI_FAILURE);
6193 }
6194 
6195 
6196 /*
6197  *    Function: sddetach
6198  *
6199  * Description: Driver's detach(9E) entry point function.
6200  *
6201  *   Arguments: devi - opaque device info handle
6202  *		cmd  - detach  type
6203  *
6204  * Return Code: DDI_SUCCESS
6205  *		DDI_FAILURE
6206  *
6207  *     Context: Kernel thread context
6208  */
6209 
6210 static int
6211 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
6212 {
6213 	switch (cmd) {
6214 	case DDI_DETACH:
6215 		return (sd_unit_detach(devi));
6216 	case DDI_SUSPEND:
6217 		return (sd_ddi_suspend(devi));
6218 	default:
6219 		break;
6220 	}
6221 	return (DDI_FAILURE);
6222 }
6223 
6224 
6225 /*
6226  *     Function: sd_sync_with_callback
6227  *
6228  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
6229  *		 state while the callback routine is active.
6230  *
6231  *    Arguments: un: softstate structure for the instance
6232  *
6233  *	Context: Kernel thread context
6234  */
6235 
6236 static void
6237 sd_sync_with_callback(struct sd_lun *un)
6238 {
6239 	ASSERT(un != NULL);
6240 
6241 	mutex_enter(SD_MUTEX(un));
6242 
6243 	ASSERT(un->un_in_callback >= 0);
6244 
6245 	while (un->un_in_callback > 0) {
6246 		mutex_exit(SD_MUTEX(un));
6247 		delay(2);
6248 		mutex_enter(SD_MUTEX(un));
6249 	}
6250 
6251 	mutex_exit(SD_MUTEX(un));
6252 }
6253 
6254 /*
6255  *    Function: sd_unit_attach
6256  *
6257  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
6258  *		the soft state structure for the device and performs
6259  *		all necessary structure and device initializations.
6260  *
6261  *   Arguments: devi: the system's dev_info_t for the device.
6262  *
6263  * Return Code: DDI_SUCCESS if attach is successful.
6264  *		DDI_FAILURE if any part of the attach fails.
6265  *
6266  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
6267  *		Kernel thread context only.  Can sleep.
6268  */
6269 
6270 static int
6271 sd_unit_attach(dev_info_t *devi)
6272 {
6273 	struct	scsi_device	*devp;
6274 	struct	sd_lun		*un;
6275 	char			*variantp;
6276 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
6277 	int	instance;
6278 	int	rval;
6279 	int	wc_enabled;
6280 	int	tgt;
6281 	uint64_t	capacity;
6282 	uint_t		lbasize = 0;
6283 	dev_info_t	*pdip = ddi_get_parent(devi);
6284 	int		offbyone = 0;
6285 	int		geom_label_valid = 0;
6286 
6287 	/*
6288 	 * Retrieve the target driver's private data area. This was set
6289 	 * up by the HBA.
6290 	 */
6291 	devp = ddi_get_driver_private(devi);
6292 
6293 	/*
6294 	 * Retrieve the target ID of the device.
6295 	 */
6296 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
6297 	    SCSI_ADDR_PROP_TARGET, -1);
6298 
6299 	/*
6300 	 * Since we have no idea what state things were left in by the last
6301 	 * user of the device, set up some 'default' settings, ie. turn 'em
6302 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
6303 	 * Do this before the scsi_probe, which sends an inquiry.
6304 	 * This is a fix for bug (4430280).
6305 	 * Of special importance is wide-xfer. The drive could have been left
6306 	 * in wide transfer mode by the last driver to communicate with it,
6307 	 * this includes us. If that's the case, and if the following is not
6308 	 * setup properly or we don't re-negotiate with the drive prior to
6309 	 * transferring data to/from the drive, it causes bus parity errors,
6310 	 * data overruns, and unexpected interrupts. This first occurred when
6311 	 * the fix for bug (4378686) was made.
6312 	 */
6313 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
6314 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
6315 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
6316 
6317 	/*
6318 	 * Currently, scsi_ifsetcap sets tagged-qing capability for all LUNs
6319 	 * on a target. Setting it per lun instance actually sets the
6320 	 * capability of this target, which affects those luns already
6321 	 * attached on the same target. So during attach, we can only disable
6322 	 * this capability only when no other lun has been attached on this
6323 	 * target. By doing this, we assume a target has the same tagged-qing
6324 	 * capability for every lun. The condition can be removed when HBA
6325 	 * is changed to support per lun based tagged-qing capability.
6326 	 */
6327 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
6328 		(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
6329 	}
6330 
6331 	/*
6332 	 * Use scsi_probe() to issue an INQUIRY command to the device.
6333 	 * This call will allocate and fill in the scsi_inquiry structure
6334 	 * and point the sd_inq member of the scsi_device structure to it.
6335 	 * If the attach succeeds, then this memory will not be de-allocated
6336 	 * (via scsi_unprobe()) until the instance is detached.
6337 	 */
6338 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
6339 		goto probe_failed;
6340 	}
6341 
6342 	/*
6343 	 * Check the device type as specified in the inquiry data and
6344 	 * claim it if it is of a type that we support.
6345 	 */
6346 	switch (devp->sd_inq->inq_dtype) {
6347 	case DTYPE_DIRECT:
6348 		break;
6349 	case DTYPE_RODIRECT:
6350 		break;
6351 	case DTYPE_OPTICAL:
6352 		break;
6353 	case DTYPE_NOTPRESENT:
6354 	default:
6355 		/* Unsupported device type; fail the attach. */
6356 		goto probe_failed;
6357 	}
6358 
6359 	/*
6360 	 * Allocate the soft state structure for this unit.
6361 	 *
6362 	 * We rely upon this memory being set to all zeroes by
6363 	 * ddi_soft_state_zalloc().  We assume that any member of the
6364 	 * soft state structure that is not explicitly initialized by
6365 	 * this routine will have a value of zero.
6366 	 */
6367 	instance = ddi_get_instance(devp->sd_dev);
6368 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
6369 		goto probe_failed;
6370 	}
6371 
6372 	/*
6373 	 * Retrieve a pointer to the newly-allocated soft state.
6374 	 *
6375 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
6376 	 * was successful, unless something has gone horribly wrong and the
6377 	 * ddi's soft state internals are corrupt (in which case it is
6378 	 * probably better to halt here than just fail the attach....)
6379 	 */
6380 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
6381 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
6382 		    instance);
6383 		/*NOTREACHED*/
6384 	}
6385 
6386 	/*
6387 	 * Link the back ptr of the driver soft state to the scsi_device
6388 	 * struct for this lun.
6389 	 * Save a pointer to the softstate in the driver-private area of
6390 	 * the scsi_device struct.
6391 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
6392 	 * we first set un->un_sd below.
6393 	 */
6394 	un->un_sd = devp;
6395 	devp->sd_private = (opaque_t)un;
6396 
6397 	/*
6398 	 * The following must be after devp is stored in the soft state struct.
6399 	 */
6400 #ifdef SDDEBUG
6401 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6402 	    "%s_unit_attach: un:0x%p instance:%d\n",
6403 	    ddi_driver_name(devi), un, instance);
6404 #endif
6405 
6406 	/*
6407 	 * Set up the device type and node type (for the minor nodes).
6408 	 * By default we assume that the device can at least support the
6409 	 * Common Command Set. Call it a CD-ROM if it reports itself
6410 	 * as a RODIRECT device.
6411 	 */
6412 	switch (devp->sd_inq->inq_dtype) {
6413 	case DTYPE_RODIRECT:
6414 		un->un_node_type = DDI_NT_CD_CHAN;
6415 		un->un_ctype	 = CTYPE_CDROM;
6416 		break;
6417 	case DTYPE_OPTICAL:
6418 		un->un_node_type = DDI_NT_BLOCK_CHAN;
6419 		un->un_ctype	 = CTYPE_ROD;
6420 		break;
6421 	default:
6422 		un->un_node_type = DDI_NT_BLOCK_CHAN;
6423 		un->un_ctype	 = CTYPE_CCS;
6424 		break;
6425 	}
6426 
6427 	/*
6428 	 * Try to read the interconnect type from the HBA.
6429 	 *
6430 	 * Note: This driver is currently compiled as two binaries, a parallel
6431 	 * scsi version (sd) and a fibre channel version (ssd). All functional
6432 	 * differences are determined at compile time. In the future a single
6433 	 * binary will be provided and the inteconnect type will be used to
6434 	 * differentiate between fibre and parallel scsi behaviors. At that time
6435 	 * it will be necessary for all fibre channel HBAs to support this
6436 	 * property.
6437 	 *
6438 	 * set un_f_is_fiber to TRUE ( default fiber )
6439 	 */
6440 	un->un_f_is_fibre = TRUE;
6441 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
6442 	case INTERCONNECT_SSA:
6443 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
6444 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6445 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
6446 		break;
6447 	case INTERCONNECT_PARALLEL:
6448 		un->un_f_is_fibre = FALSE;
6449 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
6450 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6451 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
6452 		break;
6453 	case INTERCONNECT_SATA:
6454 		un->un_f_is_fibre = FALSE;
6455 		un->un_interconnect_type = SD_INTERCONNECT_SATA;
6456 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6457 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
6458 		break;
6459 	case INTERCONNECT_FIBRE:
6460 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
6461 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6462 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
6463 		break;
6464 	case INTERCONNECT_FABRIC:
6465 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
6466 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
6467 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6468 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
6469 		break;
6470 	default:
6471 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
6472 		/*
6473 		 * The HBA does not support the "interconnect-type" property
6474 		 * (or did not provide a recognized type).
6475 		 *
6476 		 * Note: This will be obsoleted when a single fibre channel
6477 		 * and parallel scsi driver is delivered. In the meantime the
6478 		 * interconnect type will be set to the platform default.If that
6479 		 * type is not parallel SCSI, it means that we should be
6480 		 * assuming "ssd" semantics. However, here this also means that
6481 		 * the FC HBA is not supporting the "interconnect-type" property
6482 		 * like we expect it to, so log this occurrence.
6483 		 */
6484 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
6485 		if (!SD_IS_PARALLEL_SCSI(un)) {
6486 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6487 			    "sd_unit_attach: un:0x%p Assuming "
6488 			    "INTERCONNECT_FIBRE\n", un);
6489 		} else {
6490 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6491 			    "sd_unit_attach: un:0x%p Assuming "
6492 			    "INTERCONNECT_PARALLEL\n", un);
6493 			un->un_f_is_fibre = FALSE;
6494 		}
6495 #else
6496 		/*
6497 		 * Note: This source will be implemented when a single fibre
6498 		 * channel and parallel scsi driver is delivered. The default
6499 		 * will be to assume that if a device does not support the
6500 		 * "interconnect-type" property it is a parallel SCSI HBA and
6501 		 * we will set the interconnect type for parallel scsi.
6502 		 */
6503 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
6504 		un->un_f_is_fibre = FALSE;
6505 #endif
6506 		break;
6507 	}
6508 
6509 	if (un->un_f_is_fibre == TRUE) {
6510 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
6511 			SCSI_VERSION_3) {
6512 			switch (un->un_interconnect_type) {
6513 			case SD_INTERCONNECT_FIBRE:
6514 			case SD_INTERCONNECT_SSA:
6515 				un->un_node_type = DDI_NT_BLOCK_WWN;
6516 				break;
6517 			default:
6518 				break;
6519 			}
6520 		}
6521 	}
6522 
6523 	/*
6524 	 * Initialize the Request Sense command for the target
6525 	 */
6526 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
6527 		goto alloc_rqs_failed;
6528 	}
6529 
6530 	/*
6531 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
6532 	 * with seperate binary for sd and ssd.
6533 	 *
6534 	 * x86 has 1 binary, un_retry_count is set base on connection type.
6535 	 * The hardcoded values will go away when Sparc uses 1 binary
6536 	 * for sd and ssd.  This hardcoded values need to match
6537 	 * SD_RETRY_COUNT in sddef.h
6538 	 * The value used is base on interconnect type.
6539 	 * fibre = 3, parallel = 5
6540 	 */
6541 #if defined(__i386) || defined(__amd64)
6542 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
6543 #else
6544 	un->un_retry_count = SD_RETRY_COUNT;
6545 #endif
6546 
6547 	/*
6548 	 * Set the per disk retry count to the default number of retries
6549 	 * for disks and CDROMs. This value can be overridden by the
6550 	 * disk property list or an entry in sd.conf.
6551 	 */
6552 	un->un_notready_retry_count =
6553 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
6554 			: DISK_NOT_READY_RETRY_COUNT(un);
6555 
6556 	/*
6557 	 * Set the busy retry count to the default value of un_retry_count.
6558 	 * This can be overridden by entries in sd.conf or the device
6559 	 * config table.
6560 	 */
6561 	un->un_busy_retry_count = un->un_retry_count;
6562 
6563 	/*
6564 	 * Init the reset threshold for retries.  This number determines
6565 	 * how many retries must be performed before a reset can be issued
6566 	 * (for certain error conditions). This can be overridden by entries
6567 	 * in sd.conf or the device config table.
6568 	 */
6569 	un->un_reset_retry_count = (un->un_retry_count / 2);
6570 
6571 	/*
6572 	 * Set the victim_retry_count to the default un_retry_count
6573 	 */
6574 	un->un_victim_retry_count = (2 * un->un_retry_count);
6575 
6576 	/*
6577 	 * Set the reservation release timeout to the default value of
6578 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
6579 	 * device config table.
6580 	 */
6581 	un->un_reserve_release_time = 5;
6582 
6583 	/*
6584 	 * Set up the default maximum transfer size. Note that this may
6585 	 * get updated later in the attach, when setting up default wide
6586 	 * operations for disks.
6587 	 */
6588 #if defined(__i386) || defined(__amd64)
6589 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
6590 #else
6591 	un->un_max_xfer_size = (uint_t)maxphys;
6592 #endif
6593 
6594 	/*
6595 	 * Get "allow bus device reset" property (defaults to "enabled" if
6596 	 * the property was not defined). This is to disable bus resets for
6597 	 * certain kinds of error recovery. Note: In the future when a run-time
6598 	 * fibre check is available the soft state flag should default to
6599 	 * enabled.
6600 	 */
6601 	if (un->un_f_is_fibre == TRUE) {
6602 		un->un_f_allow_bus_device_reset = TRUE;
6603 	} else {
6604 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
6605 			"allow-bus-device-reset", 1) != 0) {
6606 			un->un_f_allow_bus_device_reset = TRUE;
6607 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6608 			"sd_unit_attach: un:0x%p Bus device reset enabled\n",
6609 				un);
6610 		} else {
6611 			un->un_f_allow_bus_device_reset = FALSE;
6612 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6613 			"sd_unit_attach: un:0x%p Bus device reset disabled\n",
6614 				un);
6615 		}
6616 	}
6617 
6618 	/*
6619 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
6620 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
6621 	 *
6622 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
6623 	 * property. The new "variant" property with a value of "atapi" has been
6624 	 * introduced so that future 'variants' of standard SCSI behavior (like
6625 	 * atapi) could be specified by the underlying HBA drivers by supplying
6626 	 * a new value for the "variant" property, instead of having to define a
6627 	 * new property.
6628 	 */
6629 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
6630 		un->un_f_cfg_is_atapi = TRUE;
6631 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6632 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
6633 	}
6634 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
6635 	    &variantp) == DDI_PROP_SUCCESS) {
6636 		if (strcmp(variantp, "atapi") == 0) {
6637 			un->un_f_cfg_is_atapi = TRUE;
6638 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6639 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
6640 		}
6641 		ddi_prop_free(variantp);
6642 	}
6643 
6644 	un->un_cmd_timeout	= SD_IO_TIME;
6645 
6646 	/* Info on current states, statuses, etc. (Updated frequently) */
6647 	un->un_state		= SD_STATE_NORMAL;
6648 	un->un_last_state	= SD_STATE_NORMAL;
6649 
6650 	/* Control & status info for command throttling */
6651 	un->un_throttle		= sd_max_throttle;
6652 	un->un_saved_throttle	= sd_max_throttle;
6653 	un->un_min_throttle	= sd_min_throttle;
6654 
6655 	if (un->un_f_is_fibre == TRUE) {
6656 		un->un_f_use_adaptive_throttle = TRUE;
6657 	} else {
6658 		un->un_f_use_adaptive_throttle = FALSE;
6659 	}
6660 
6661 	/* Removable media support. */
6662 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
6663 	un->un_mediastate		= DKIO_NONE;
6664 	un->un_specified_mediastate	= DKIO_NONE;
6665 
6666 	/* CVs for suspend/resume (PM or DR) */
6667 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
6668 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
6669 
6670 	/* Power management support. */
6671 	un->un_power_level = SD_SPINDLE_UNINIT;
6672 
6673 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
6674 	un->un_f_wcc_inprog = 0;
6675 
6676 	/*
6677 	 * The open/close semaphore is used to serialize threads executing
6678 	 * in the driver's open & close entry point routines for a given
6679 	 * instance.
6680 	 */
6681 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
6682 
6683 	/*
6684 	 * The conf file entry and softstate variable is a forceful override,
6685 	 * meaning a non-zero value must be entered to change the default.
6686 	 */
6687 	un->un_f_disksort_disabled = FALSE;
6688 
6689 	/*
6690 	 * Retrieve the properties from the static driver table or the driver
6691 	 * configuration file (.conf) for this unit and update the soft state
6692 	 * for the device as needed for the indicated properties.
6693 	 * Note: the property configuration needs to occur here as some of the
6694 	 * following routines may have dependancies on soft state flags set
6695 	 * as part of the driver property configuration.
6696 	 */
6697 	sd_read_unit_properties(un);
6698 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6699 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
6700 
6701 	/*
6702 	 * Only if a device has "hotpluggable" property, it is
6703 	 * treated as hotpluggable device. Otherwise, it is
6704 	 * regarded as non-hotpluggable one.
6705 	 */
6706 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
6707 	    -1) != -1) {
6708 		un->un_f_is_hotpluggable = TRUE;
6709 	}
6710 
6711 	/*
6712 	 * set unit's attributes(flags) according to "hotpluggable" and
6713 	 * RMB bit in INQUIRY data.
6714 	 */
6715 	sd_set_unit_attributes(un, devi);
6716 
6717 	/*
6718 	 * By default, we mark the capacity, lbasize, and geometry
6719 	 * as invalid. Only if we successfully read a valid capacity
6720 	 * will we update the un_blockcount and un_tgt_blocksize with the
6721 	 * valid values (the geometry will be validated later).
6722 	 */
6723 	un->un_f_blockcount_is_valid	= FALSE;
6724 	un->un_f_tgt_blocksize_is_valid	= FALSE;
6725 
6726 	/*
6727 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
6728 	 * otherwise.
6729 	 */
6730 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
6731 	un->un_blockcount = 0;
6732 
6733 	/*
6734 	 * Set up the per-instance info needed to determine the correct
6735 	 * CDBs and other info for issuing commands to the target.
6736 	 */
6737 	sd_init_cdb_limits(un);
6738 
6739 	/*
6740 	 * Set up the IO chains to use, based upon the target type.
6741 	 */
6742 	if (un->un_f_non_devbsize_supported) {
6743 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
6744 	} else {
6745 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
6746 	}
6747 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
6748 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
6749 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
6750 
6751 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
6752 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
6753 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
6754 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
6755 
6756 
6757 	if (ISCD(un)) {
6758 		un->un_additional_codes = sd_additional_codes;
6759 	} else {
6760 		un->un_additional_codes = NULL;
6761 	}
6762 
6763 	/*
6764 	 * Create the kstats here so they can be available for attach-time
6765 	 * routines that send commands to the unit (either polled or via
6766 	 * sd_send_scsi_cmd).
6767 	 *
6768 	 * Note: This is a critical sequence that needs to be maintained:
6769 	 *	1) Instantiate the kstats here, before any routines using the
6770 	 *	   iopath (i.e. sd_send_scsi_cmd).
6771 	 *	2) Instantiate and initialize the partition stats
6772 	 *	   (sd_set_pstats).
6773 	 *	3) Initialize the error stats (sd_set_errstats), following
6774 	 *	   sd_validate_geometry(),sd_register_devid(),
6775 	 *	   and sd_cache_control().
6776 	 */
6777 
6778 	un->un_stats = kstat_create(sd_label, instance,
6779 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
6780 	if (un->un_stats != NULL) {
6781 		un->un_stats->ks_lock = SD_MUTEX(un);
6782 		kstat_install(un->un_stats);
6783 	}
6784 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6785 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
6786 
6787 	sd_create_errstats(un, instance);
6788 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6789 	    "sd_unit_attach: un:0x%p errstats created\n", un);
6790 
6791 	/*
6792 	 * The following if/else code was relocated here from below as part
6793 	 * of the fix for bug (4430280). However with the default setup added
6794 	 * on entry to this routine, it's no longer absolutely necessary for
6795 	 * this to be before the call to sd_spin_up_unit.
6796 	 */
6797 	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
6798 		/*
6799 		 * If SCSI-2 tagged queueing is supported by the target
6800 		 * and by the host adapter then we will enable it.
6801 		 */
6802 		un->un_tagflags = 0;
6803 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
6804 		    (devp->sd_inq->inq_cmdque) &&
6805 		    (un->un_f_arq_enabled == TRUE)) {
6806 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
6807 			    1, 1) == 1) {
6808 				un->un_tagflags = FLAG_STAG;
6809 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6810 				    "sd_unit_attach: un:0x%p tag queueing "
6811 				    "enabled\n", un);
6812 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
6813 			    "untagged-qing", 0) == 1) {
6814 				un->un_f_opt_queueing = TRUE;
6815 				un->un_saved_throttle = un->un_throttle =
6816 				    min(un->un_throttle, 3);
6817 			} else {
6818 				un->un_f_opt_queueing = FALSE;
6819 				un->un_saved_throttle = un->un_throttle = 1;
6820 			}
6821 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
6822 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
6823 			/* The Host Adapter supports internal queueing. */
6824 			un->un_f_opt_queueing = TRUE;
6825 			un->un_saved_throttle = un->un_throttle =
6826 			    min(un->un_throttle, 3);
6827 		} else {
6828 			un->un_f_opt_queueing = FALSE;
6829 			un->un_saved_throttle = un->un_throttle = 1;
6830 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6831 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
6832 		}
6833 
6834 		/*
6835 		 * Enable large transfers for SATA/SAS drives
6836 		 */
6837 		if (SD_IS_SERIAL(un)) {
6838 			un->un_max_xfer_size =
6839 			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
6840 			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
6841 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6842 			    "sd_unit_attach: un:0x%p max transfer "
6843 			    "size=0x%x\n", un, un->un_max_xfer_size);
6844 
6845 		}
6846 
6847 		/* Setup or tear down default wide operations for disks */
6848 
6849 		/*
6850 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
6851 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
6852 		 * system and be set to different values. In the future this
6853 		 * code may need to be updated when the ssd module is
6854 		 * obsoleted and removed from the system. (4299588)
6855 		 */
6856 		if (SD_IS_PARALLEL_SCSI(un) &&
6857 		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
6858 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
6859 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
6860 			    1, 1) == 1) {
6861 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6862 				    "sd_unit_attach: un:0x%p Wide Transfer "
6863 				    "enabled\n", un);
6864 			}
6865 
6866 			/*
6867 			 * If tagged queuing has also been enabled, then
6868 			 * enable large xfers
6869 			 */
6870 			if (un->un_saved_throttle == sd_max_throttle) {
6871 				un->un_max_xfer_size =
6872 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
6873 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
6874 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6875 				    "sd_unit_attach: un:0x%p max transfer "
6876 				    "size=0x%x\n", un, un->un_max_xfer_size);
6877 			}
6878 		} else {
6879 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
6880 			    0, 1) == 1) {
6881 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6882 				    "sd_unit_attach: un:0x%p "
6883 				    "Wide Transfer disabled\n", un);
6884 			}
6885 		}
6886 	} else {
6887 		un->un_tagflags = FLAG_STAG;
6888 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
6889 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
6890 	}
6891 
6892 	/*
6893 	 * If this target supports LUN reset, try to enable it.
6894 	 */
6895 	if (un->un_f_lun_reset_enabled) {
6896 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
6897 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
6898 			    "un:0x%p lun_reset capability set\n", un);
6899 		} else {
6900 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
6901 			    "un:0x%p lun-reset capability not set\n", un);
6902 		}
6903 	}
6904 
6905 	/*
6906 	 * At this point in the attach, we have enough info in the
6907 	 * soft state to be able to issue commands to the target.
6908 	 *
6909 	 * All command paths used below MUST issue their commands as
6910 	 * SD_PATH_DIRECT. This is important as intermediate layers
6911 	 * are not all initialized yet (such as PM).
6912 	 */
6913 
6914 	/*
6915 	 * Send a TEST UNIT READY command to the device. This should clear
6916 	 * any outstanding UNIT ATTENTION that may be present.
6917 	 *
6918 	 * Note: Don't check for success, just track if there is a reservation,
6919 	 * this is a throw away command to clear any unit attentions.
6920 	 *
6921 	 * Note: This MUST be the first command issued to the target during
6922 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
6923 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
6924 	 * with attempts at spinning up a device with no media.
6925 	 */
6926 	if (sd_send_scsi_TEST_UNIT_READY(un, SD_DONT_RETRY_TUR) == EACCES) {
6927 		reservation_flag = SD_TARGET_IS_RESERVED;
6928 	}
6929 
6930 	/*
6931 	 * If the device is NOT a removable media device, attempt to spin
6932 	 * it up (using the START_STOP_UNIT command) and read its capacity
6933 	 * (using the READ CAPACITY command).  Note, however, that either
6934 	 * of these could fail and in some cases we would continue with
6935 	 * the attach despite the failure (see below).
6936 	 */
6937 	if (un->un_f_descr_format_supported) {
6938 		switch (sd_spin_up_unit(un)) {
6939 		case 0:
6940 			/*
6941 			 * Spin-up was successful; now try to read the
6942 			 * capacity.  If successful then save the results
6943 			 * and mark the capacity & lbasize as valid.
6944 			 */
6945 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6946 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
6947 
6948 			switch (sd_send_scsi_READ_CAPACITY(un, &capacity,
6949 			    &lbasize, SD_PATH_DIRECT)) {
6950 			case 0: {
6951 				if (capacity > DK_MAX_BLOCKS) {
6952 #ifdef _LP64
6953 					if (capacity + 1 >
6954 					    SD_GROUP1_MAX_ADDRESS) {
6955 						/*
6956 						 * Enable descriptor format
6957 						 * sense data so that we can
6958 						 * get 64 bit sense data
6959 						 * fields.
6960 						 */
6961 						sd_enable_descr_sense(un);
6962 					}
6963 #else
6964 					/* 32-bit kernels can't handle this */
6965 					scsi_log(SD_DEVINFO(un),
6966 					    sd_label, CE_WARN,
6967 					    "disk has %llu blocks, which "
6968 					    "is too large for a 32-bit "
6969 					    "kernel", capacity);
6970 
6971 #if defined(__i386) || defined(__amd64)
6972 					/*
6973 					 * 1TB disk was treated as (1T - 512)B
6974 					 * in the past, so that it might have
6975 					 * valid VTOC and solaris partitions,
6976 					 * we have to allow it to continue to
6977 					 * work.
6978 					 */
6979 					if (capacity -1 > DK_MAX_BLOCKS)
6980 #endif
6981 					goto spinup_failed;
6982 #endif
6983 				}
6984 
6985 				/*
6986 				 * Here it's not necessary to check the case:
6987 				 * the capacity of the device is bigger than
6988 				 * what the max hba cdb can support. Because
6989 				 * sd_send_scsi_READ_CAPACITY will retrieve
6990 				 * the capacity by sending USCSI command, which
6991 				 * is constrained by the max hba cdb. Actually,
6992 				 * sd_send_scsi_READ_CAPACITY will return
6993 				 * EINVAL when using bigger cdb than required
6994 				 * cdb length. Will handle this case in
6995 				 * "case EINVAL".
6996 				 */
6997 
6998 				/*
6999 				 * The following relies on
7000 				 * sd_send_scsi_READ_CAPACITY never
7001 				 * returning 0 for capacity and/or lbasize.
7002 				 */
7003 				sd_update_block_info(un, lbasize, capacity);
7004 
7005 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7006 				    "sd_unit_attach: un:0x%p capacity = %ld "
7007 				    "blocks; lbasize= %ld.\n", un,
7008 				    un->un_blockcount, un->un_tgt_blocksize);
7009 
7010 				break;
7011 			}
7012 			case EINVAL:
7013 				/*
7014 				 * In the case where the max-cdb-length property
7015 				 * is smaller than the required CDB length for
7016 				 * a SCSI device, a target driver can fail to
7017 				 * attach to that device.
7018 				 */
7019 				scsi_log(SD_DEVINFO(un),
7020 				    sd_label, CE_WARN,
7021 				    "disk capacity is too large "
7022 				    "for current cdb length");
7023 				goto spinup_failed;
7024 			case EACCES:
7025 				/*
7026 				 * Should never get here if the spin-up
7027 				 * succeeded, but code it in anyway.
7028 				 * From here, just continue with the attach...
7029 				 */
7030 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7031 				    "sd_unit_attach: un:0x%p "
7032 				    "sd_send_scsi_READ_CAPACITY "
7033 				    "returned reservation conflict\n", un);
7034 				reservation_flag = SD_TARGET_IS_RESERVED;
7035 				break;
7036 			default:
7037 				/*
7038 				 * Likewise, should never get here if the
7039 				 * spin-up succeeded. Just continue with
7040 				 * the attach...
7041 				 */
7042 				break;
7043 			}
7044 			break;
7045 		case EACCES:
7046 			/*
7047 			 * Device is reserved by another host.  In this case
7048 			 * we could not spin it up or read the capacity, but
7049 			 * we continue with the attach anyway.
7050 			 */
7051 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7052 			    "sd_unit_attach: un:0x%p spin-up reservation "
7053 			    "conflict.\n", un);
7054 			reservation_flag = SD_TARGET_IS_RESERVED;
7055 			break;
7056 		default:
7057 			/* Fail the attach if the spin-up failed. */
7058 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7059 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
7060 			goto spinup_failed;
7061 		}
7062 	}
7063 
7064 	/*
7065 	 * Check to see if this is a MMC drive
7066 	 */
7067 	if (ISCD(un)) {
7068 		sd_set_mmc_caps(un);
7069 	}
7070 
7071 
7072 	/*
7073 	 * Add a zero-length attribute to tell the world we support
7074 	 * kernel ioctls (for layered drivers)
7075 	 */
7076 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7077 	    DDI_KERNEL_IOCTL, NULL, 0);
7078 
7079 	/*
7080 	 * Add a boolean property to tell the world we support
7081 	 * the B_FAILFAST flag (for layered drivers)
7082 	 */
7083 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7084 	    "ddi-failfast-supported", NULL, 0);
7085 
7086 	/*
7087 	 * Initialize power management
7088 	 */
7089 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
7090 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
7091 	sd_setup_pm(un, devi);
7092 	if (un->un_f_pm_is_enabled == FALSE) {
7093 		/*
7094 		 * For performance, point to a jump table that does
7095 		 * not include pm.
7096 		 * The direct and priority chains don't change with PM.
7097 		 *
7098 		 * Note: this is currently done based on individual device
7099 		 * capabilities. When an interface for determining system
7100 		 * power enabled state becomes available, or when additional
7101 		 * layers are added to the command chain, these values will
7102 		 * have to be re-evaluated for correctness.
7103 		 */
7104 		if (un->un_f_non_devbsize_supported) {
7105 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
7106 		} else {
7107 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
7108 		}
7109 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
7110 	}
7111 
7112 	/*
7113 	 * This property is set to 0 by HA software to avoid retries
7114 	 * on a reserved disk. (The preferred property name is
7115 	 * "retry-on-reservation-conflict") (1189689)
7116 	 *
7117 	 * Note: The use of a global here can have unintended consequences. A
7118 	 * per instance variable is preferrable to match the capabilities of
7119 	 * different underlying hba's (4402600)
7120 	 */
7121 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
7122 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
7123 	    sd_retry_on_reservation_conflict);
7124 	if (sd_retry_on_reservation_conflict != 0) {
7125 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
7126 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
7127 		    sd_retry_on_reservation_conflict);
7128 	}
7129 
7130 	/* Set up options for QFULL handling. */
7131 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7132 	    "qfull-retries", -1)) != -1) {
7133 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
7134 		    rval, 1);
7135 	}
7136 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7137 	    "qfull-retry-interval", -1)) != -1) {
7138 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
7139 		    rval, 1);
7140 	}
7141 
7142 	/*
7143 	 * This just prints a message that announces the existence of the
7144 	 * device. The message is always printed in the system logfile, but
7145 	 * only appears on the console if the system is booted with the
7146 	 * -v (verbose) argument.
7147 	 */
7148 	ddi_report_dev(devi);
7149 
7150 	un->un_mediastate = DKIO_NONE;
7151 
7152 	cmlb_alloc_handle(&un->un_cmlbhandle);
7153 
7154 #if defined(__i386) || defined(__amd64)
7155 	/*
7156 	 * On x86, compensate for off-by-1 legacy error
7157 	 */
7158 	if (!un->un_f_has_removable_media && !un->un_f_is_hotpluggable &&
7159 	    (lbasize == un->un_sys_blocksize))
7160 		offbyone = CMLB_OFF_BY_ONE;
7161 #endif
7162 
7163 	if (cmlb_attach(devi, &sd_tgops, (int)devp->sd_inq->inq_dtype,
7164 	    un->un_f_has_removable_media, un->un_f_is_hotpluggable,
7165 	    un->un_node_type, offbyone, un->un_cmlbhandle,
7166 	    (void *)SD_PATH_DIRECT) != 0) {
7167 		goto cmlb_attach_failed;
7168 	}
7169 
7170 
7171 	/*
7172 	 * Read and validate the device's geometry (ie, disk label)
7173 	 * A new unformatted drive will not have a valid geometry, but
7174 	 * the driver needs to successfully attach to this device so
7175 	 * the drive can be formatted via ioctls.
7176 	 */
7177 	geom_label_valid = (cmlb_validate(un->un_cmlbhandle, 0,
7178 	    (void *)SD_PATH_DIRECT) == 0) ? 1: 0;
7179 
7180 	mutex_enter(SD_MUTEX(un));
7181 
7182 	/*
7183 	 * Read and initialize the devid for the unit.
7184 	 */
7185 	ASSERT(un->un_errstats != NULL);
7186 	if (un->un_f_devid_supported) {
7187 		sd_register_devid(un, devi, reservation_flag);
7188 	}
7189 	mutex_exit(SD_MUTEX(un));
7190 
7191 #if (defined(__fibre))
7192 	/*
7193 	 * Register callbacks for fibre only.  You can't do this soley
7194 	 * on the basis of the devid_type because this is hba specific.
7195 	 * We need to query our hba capabilities to find out whether to
7196 	 * register or not.
7197 	 */
7198 	if (un->un_f_is_fibre) {
7199 	    if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
7200 		sd_init_event_callbacks(un);
7201 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7202 		    "sd_unit_attach: un:0x%p event callbacks inserted", un);
7203 	    }
7204 	}
7205 #endif
7206 
7207 	if (un->un_f_opt_disable_cache == TRUE) {
7208 		/*
7209 		 * Disable both read cache and write cache.  This is
7210 		 * the historic behavior of the keywords in the config file.
7211 		 */
7212 		if (sd_cache_control(un, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
7213 		    0) {
7214 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7215 			    "sd_unit_attach: un:0x%p Could not disable "
7216 			    "caching", un);
7217 			goto devid_failed;
7218 		}
7219 	}
7220 
7221 	/*
7222 	 * Check the value of the WCE bit now and
7223 	 * set un_f_write_cache_enabled accordingly.
7224 	 */
7225 	(void) sd_get_write_cache_enabled(un, &wc_enabled);
7226 	mutex_enter(SD_MUTEX(un));
7227 	un->un_f_write_cache_enabled = (wc_enabled != 0);
7228 	mutex_exit(SD_MUTEX(un));
7229 
7230 	/*
7231 	 * Find out what type of reservation this disk supports.
7232 	 */
7233 	switch (sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS, 0, NULL)) {
7234 	case 0:
7235 		/*
7236 		 * SCSI-3 reservations are supported.
7237 		 */
7238 		un->un_reservation_type = SD_SCSI3_RESERVATION;
7239 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7240 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
7241 		break;
7242 	case ENOTSUP:
7243 		/*
7244 		 * The PERSISTENT RESERVE IN command would not be recognized by
7245 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
7246 		 */
7247 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7248 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
7249 		un->un_reservation_type = SD_SCSI2_RESERVATION;
7250 		break;
7251 	default:
7252 		/*
7253 		 * default to SCSI-3 reservations
7254 		 */
7255 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7256 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
7257 		un->un_reservation_type = SD_SCSI3_RESERVATION;
7258 		break;
7259 	}
7260 
7261 	/*
7262 	 * Set the pstat and error stat values here, so data obtained during the
7263 	 * previous attach-time routines is available.
7264 	 *
7265 	 * Note: This is a critical sequence that needs to be maintained:
7266 	 *	1) Instantiate the kstats before any routines using the iopath
7267 	 *	   (i.e. sd_send_scsi_cmd).
7268 	 *	2) Initialize the error stats (sd_set_errstats) and partition
7269 	 *	   stats (sd_set_pstats)here, following
7270 	 *	   cmlb_validate_geometry(), sd_register_devid(), and
7271 	 *	   sd_cache_control().
7272 	 */
7273 
7274 	if (un->un_f_pkstats_enabled && geom_label_valid) {
7275 		sd_set_pstats(un);
7276 		SD_TRACE(SD_LOG_IO_PARTITION, un,
7277 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
7278 	}
7279 
7280 	sd_set_errstats(un);
7281 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7282 	    "sd_unit_attach: un:0x%p errstats set\n", un);
7283 
7284 
7285 	/*
7286 	 * After successfully attaching an instance, we record the information
7287 	 * of how many luns have been attached on the relative target and
7288 	 * controller for parallel SCSI. This information is used when sd tries
7289 	 * to set the tagged queuing capability in HBA.
7290 	 */
7291 	if (SD_IS_PARALLEL_SCSI(un) && (tgt >= 0) && (tgt < NTARGETS_WIDE)) {
7292 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_ATTACH);
7293 	}
7294 
7295 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7296 	    "sd_unit_attach: un:0x%p exit success\n", un);
7297 
7298 	return (DDI_SUCCESS);
7299 
7300 	/*
7301 	 * An error occurred during the attach; clean up & return failure.
7302 	 */
7303 
7304 devid_failed:
7305 
7306 setup_pm_failed:
7307 	ddi_remove_minor_node(devi, NULL);
7308 
7309 cmlb_attach_failed:
7310 	/*
7311 	 * Cleanup from the scsi_ifsetcap() calls (437868)
7312 	 */
7313 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
7314 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
7315 
7316 	/*
7317 	 * Refer to the comments of setting tagged-qing in the beginning of
7318 	 * sd_unit_attach. We can only disable tagged queuing when there is
7319 	 * no lun attached on the target.
7320 	 */
7321 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
7322 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
7323 	}
7324 
7325 	if (un->un_f_is_fibre == FALSE) {
7326 	    (void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
7327 	}
7328 
7329 spinup_failed:
7330 
7331 	mutex_enter(SD_MUTEX(un));
7332 
7333 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
7334 	if (un->un_direct_priority_timeid != NULL) {
7335 		timeout_id_t temp_id = un->un_direct_priority_timeid;
7336 		un->un_direct_priority_timeid = NULL;
7337 		mutex_exit(SD_MUTEX(un));
7338 		(void) untimeout(temp_id);
7339 		mutex_enter(SD_MUTEX(un));
7340 	}
7341 
7342 	/* Cancel any pending start/stop timeouts */
7343 	if (un->un_startstop_timeid != NULL) {
7344 		timeout_id_t temp_id = un->un_startstop_timeid;
7345 		un->un_startstop_timeid = NULL;
7346 		mutex_exit(SD_MUTEX(un));
7347 		(void) untimeout(temp_id);
7348 		mutex_enter(SD_MUTEX(un));
7349 	}
7350 
7351 	/* Cancel any pending reset-throttle timeouts */
7352 	if (un->un_reset_throttle_timeid != NULL) {
7353 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
7354 		un->un_reset_throttle_timeid = NULL;
7355 		mutex_exit(SD_MUTEX(un));
7356 		(void) untimeout(temp_id);
7357 		mutex_enter(SD_MUTEX(un));
7358 	}
7359 
7360 	/* Cancel any pending retry timeouts */
7361 	if (un->un_retry_timeid != NULL) {
7362 		timeout_id_t temp_id = un->un_retry_timeid;
7363 		un->un_retry_timeid = NULL;
7364 		mutex_exit(SD_MUTEX(un));
7365 		(void) untimeout(temp_id);
7366 		mutex_enter(SD_MUTEX(un));
7367 	}
7368 
7369 	/* Cancel any pending delayed cv broadcast timeouts */
7370 	if (un->un_dcvb_timeid != NULL) {
7371 		timeout_id_t temp_id = un->un_dcvb_timeid;
7372 		un->un_dcvb_timeid = NULL;
7373 		mutex_exit(SD_MUTEX(un));
7374 		(void) untimeout(temp_id);
7375 		mutex_enter(SD_MUTEX(un));
7376 	}
7377 
7378 	mutex_exit(SD_MUTEX(un));
7379 
7380 	/* There should not be any in-progress I/O so ASSERT this check */
7381 	ASSERT(un->un_ncmds_in_transport == 0);
7382 	ASSERT(un->un_ncmds_in_driver == 0);
7383 
7384 	/* Do not free the softstate if the callback routine is active */
7385 	sd_sync_with_callback(un);
7386 
7387 	/*
7388 	 * Partition stats apparently are not used with removables. These would
7389 	 * not have been created during attach, so no need to clean them up...
7390 	 */
7391 	if (un->un_stats != NULL) {
7392 		kstat_delete(un->un_stats);
7393 		un->un_stats = NULL;
7394 	}
7395 	if (un->un_errstats != NULL) {
7396 		kstat_delete(un->un_errstats);
7397 		un->un_errstats = NULL;
7398 	}
7399 
7400 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
7401 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
7402 
7403 	ddi_prop_remove_all(devi);
7404 	sema_destroy(&un->un_semoclose);
7405 	cv_destroy(&un->un_state_cv);
7406 
7407 getrbuf_failed:
7408 
7409 	sd_free_rqs(un);
7410 
7411 alloc_rqs_failed:
7412 
7413 	devp->sd_private = NULL;
7414 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
7415 
7416 get_softstate_failed:
7417 	/*
7418 	 * Note: the man pages are unclear as to whether or not doing a
7419 	 * ddi_soft_state_free(sd_state, instance) is the right way to
7420 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
7421 	 * ddi_get_soft_state() fails.  The implication seems to be
7422 	 * that the get_soft_state cannot fail if the zalloc succeeds.
7423 	 */
7424 	ddi_soft_state_free(sd_state, instance);
7425 
7426 probe_failed:
7427 	scsi_unprobe(devp);
7428 #ifdef SDDEBUG
7429 	if ((sd_component_mask & SD_LOG_ATTACH_DETACH) &&
7430 	    (sd_level_mask & SD_LOGMASK_TRACE)) {
7431 		cmn_err(CE_CONT, "sd_unit_attach: un:0x%p exit failure\n",
7432 		    (void *)un);
7433 	}
7434 #endif
7435 	return (DDI_FAILURE);
7436 }
7437 
7438 
7439 /*
7440  *    Function: sd_unit_detach
7441  *
7442  * Description: Performs DDI_DETACH processing for sddetach().
7443  *
7444  * Return Code: DDI_SUCCESS
7445  *		DDI_FAILURE
7446  *
7447  *     Context: Kernel thread context
7448  */
7449 
7450 static int
7451 sd_unit_detach(dev_info_t *devi)
7452 {
7453 	struct scsi_device	*devp;
7454 	struct sd_lun		*un;
7455 	int			i;
7456 	int			tgt;
7457 	dev_t			dev;
7458 	dev_info_t		*pdip = ddi_get_parent(devi);
7459 	int			instance = ddi_get_instance(devi);
7460 
7461 	mutex_enter(&sd_detach_mutex);
7462 
7463 	/*
7464 	 * Fail the detach for any of the following:
7465 	 *  - Unable to get the sd_lun struct for the instance
7466 	 *  - A layered driver has an outstanding open on the instance
7467 	 *  - Another thread is already detaching this instance
7468 	 *  - Another thread is currently performing an open
7469 	 */
7470 	devp = ddi_get_driver_private(devi);
7471 	if ((devp == NULL) ||
7472 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
7473 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
7474 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
7475 		mutex_exit(&sd_detach_mutex);
7476 		return (DDI_FAILURE);
7477 	}
7478 
7479 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
7480 
7481 	/*
7482 	 * Mark this instance as currently in a detach, to inhibit any
7483 	 * opens from a layered driver.
7484 	 */
7485 	un->un_detach_count++;
7486 	mutex_exit(&sd_detach_mutex);
7487 
7488 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7489 	    SCSI_ADDR_PROP_TARGET, -1);
7490 
7491 	dev = sd_make_device(SD_DEVINFO(un));
7492 
7493 #ifndef lint
7494 	_NOTE(COMPETING_THREADS_NOW);
7495 #endif
7496 
7497 	mutex_enter(SD_MUTEX(un));
7498 
7499 	/*
7500 	 * Fail the detach if there are any outstanding layered
7501 	 * opens on this device.
7502 	 */
7503 	for (i = 0; i < NDKMAP; i++) {
7504 		if (un->un_ocmap.lyropen[i] != 0) {
7505 			goto err_notclosed;
7506 		}
7507 	}
7508 
7509 	/*
7510 	 * Verify there are NO outstanding commands issued to this device.
7511 	 * ie, un_ncmds_in_transport == 0.
7512 	 * It's possible to have outstanding commands through the physio
7513 	 * code path, even though everything's closed.
7514 	 */
7515 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
7516 	    (un->un_direct_priority_timeid != NULL) ||
7517 	    (un->un_state == SD_STATE_RWAIT)) {
7518 		mutex_exit(SD_MUTEX(un));
7519 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7520 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
7521 		goto err_stillbusy;
7522 	}
7523 
7524 	/*
7525 	 * If we have the device reserved, release the reservation.
7526 	 */
7527 	if ((un->un_resvd_status & SD_RESERVE) &&
7528 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
7529 		mutex_exit(SD_MUTEX(un));
7530 		/*
7531 		 * Note: sd_reserve_release sends a command to the device
7532 		 * via the sd_ioctlcmd() path, and can sleep.
7533 		 */
7534 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
7535 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7536 			    "sd_dr_detach: Cannot release reservation \n");
7537 		}
7538 	} else {
7539 		mutex_exit(SD_MUTEX(un));
7540 	}
7541 
7542 	/*
7543 	 * Untimeout any reserve recover, throttle reset, restart unit
7544 	 * and delayed broadcast timeout threads. Protect the timeout pointer
7545 	 * from getting nulled by their callback functions.
7546 	 */
7547 	mutex_enter(SD_MUTEX(un));
7548 	if (un->un_resvd_timeid != NULL) {
7549 		timeout_id_t temp_id = un->un_resvd_timeid;
7550 		un->un_resvd_timeid = NULL;
7551 		mutex_exit(SD_MUTEX(un));
7552 		(void) untimeout(temp_id);
7553 		mutex_enter(SD_MUTEX(un));
7554 	}
7555 
7556 	if (un->un_reset_throttle_timeid != NULL) {
7557 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
7558 		un->un_reset_throttle_timeid = NULL;
7559 		mutex_exit(SD_MUTEX(un));
7560 		(void) untimeout(temp_id);
7561 		mutex_enter(SD_MUTEX(un));
7562 	}
7563 
7564 	if (un->un_startstop_timeid != NULL) {
7565 		timeout_id_t temp_id = un->un_startstop_timeid;
7566 		un->un_startstop_timeid = NULL;
7567 		mutex_exit(SD_MUTEX(un));
7568 		(void) untimeout(temp_id);
7569 		mutex_enter(SD_MUTEX(un));
7570 	}
7571 
7572 	if (un->un_dcvb_timeid != NULL) {
7573 		timeout_id_t temp_id = un->un_dcvb_timeid;
7574 		un->un_dcvb_timeid = NULL;
7575 		mutex_exit(SD_MUTEX(un));
7576 		(void) untimeout(temp_id);
7577 	} else {
7578 		mutex_exit(SD_MUTEX(un));
7579 	}
7580 
7581 	/* Remove any pending reservation reclaim requests for this device */
7582 	sd_rmv_resv_reclaim_req(dev);
7583 
7584 	mutex_enter(SD_MUTEX(un));
7585 
7586 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
7587 	if (un->un_direct_priority_timeid != NULL) {
7588 		timeout_id_t temp_id = un->un_direct_priority_timeid;
7589 		un->un_direct_priority_timeid = NULL;
7590 		mutex_exit(SD_MUTEX(un));
7591 		(void) untimeout(temp_id);
7592 		mutex_enter(SD_MUTEX(un));
7593 	}
7594 
7595 	/* Cancel any active multi-host disk watch thread requests */
7596 	if (un->un_mhd_token != NULL) {
7597 		mutex_exit(SD_MUTEX(un));
7598 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
7599 		if (scsi_watch_request_terminate(un->un_mhd_token,
7600 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
7601 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7602 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
7603 			/*
7604 			 * Note: We are returning here after having removed
7605 			 * some driver timeouts above. This is consistent with
7606 			 * the legacy implementation but perhaps the watch
7607 			 * terminate call should be made with the wait flag set.
7608 			 */
7609 			goto err_stillbusy;
7610 		}
7611 		mutex_enter(SD_MUTEX(un));
7612 		un->un_mhd_token = NULL;
7613 	}
7614 
7615 	if (un->un_swr_token != NULL) {
7616 		mutex_exit(SD_MUTEX(un));
7617 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
7618 		if (scsi_watch_request_terminate(un->un_swr_token,
7619 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
7620 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7621 			    "sd_dr_detach: Cannot cancel swr watch request\n");
7622 			/*
7623 			 * Note: We are returning here after having removed
7624 			 * some driver timeouts above. This is consistent with
7625 			 * the legacy implementation but perhaps the watch
7626 			 * terminate call should be made with the wait flag set.
7627 			 */
7628 			goto err_stillbusy;
7629 		}
7630 		mutex_enter(SD_MUTEX(un));
7631 		un->un_swr_token = NULL;
7632 	}
7633 
7634 	mutex_exit(SD_MUTEX(un));
7635 
7636 	/*
7637 	 * Clear any scsi_reset_notifies. We clear the reset notifies
7638 	 * if we have not registered one.
7639 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
7640 	 */
7641 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
7642 	    sd_mhd_reset_notify_cb, (caddr_t)un);
7643 
7644 	/*
7645 	 * protect the timeout pointers from getting nulled by
7646 	 * their callback functions during the cancellation process.
7647 	 * In such a scenario untimeout can be invoked with a null value.
7648 	 */
7649 	_NOTE(NO_COMPETING_THREADS_NOW);
7650 
7651 	mutex_enter(&un->un_pm_mutex);
7652 	if (un->un_pm_idle_timeid != NULL) {
7653 		timeout_id_t temp_id = un->un_pm_idle_timeid;
7654 		un->un_pm_idle_timeid = NULL;
7655 		mutex_exit(&un->un_pm_mutex);
7656 
7657 		/*
7658 		 * Timeout is active; cancel it.
7659 		 * Note that it'll never be active on a device
7660 		 * that does not support PM therefore we don't
7661 		 * have to check before calling pm_idle_component.
7662 		 */
7663 		(void) untimeout(temp_id);
7664 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7665 		mutex_enter(&un->un_pm_mutex);
7666 	}
7667 
7668 	/*
7669 	 * Check whether there is already a timeout scheduled for power
7670 	 * management. If yes then don't lower the power here, that's.
7671 	 * the timeout handler's job.
7672 	 */
7673 	if (un->un_pm_timeid != NULL) {
7674 		timeout_id_t temp_id = un->un_pm_timeid;
7675 		un->un_pm_timeid = NULL;
7676 		mutex_exit(&un->un_pm_mutex);
7677 		/*
7678 		 * Timeout is active; cancel it.
7679 		 * Note that it'll never be active on a device
7680 		 * that does not support PM therefore we don't
7681 		 * have to check before calling pm_idle_component.
7682 		 */
7683 		(void) untimeout(temp_id);
7684 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7685 
7686 	} else {
7687 		mutex_exit(&un->un_pm_mutex);
7688 		if ((un->un_f_pm_is_enabled == TRUE) &&
7689 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_SPINDLE_OFF) !=
7690 		    DDI_SUCCESS)) {
7691 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7692 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
7693 			/*
7694 			 * Fix for bug: 4297749, item # 13
7695 			 * The above test now includes a check to see if PM is
7696 			 * supported by this device before call
7697 			 * pm_lower_power().
7698 			 * Note, the following is not dead code. The call to
7699 			 * pm_lower_power above will generate a call back into
7700 			 * our sdpower routine which might result in a timeout
7701 			 * handler getting activated. Therefore the following
7702 			 * code is valid and necessary.
7703 			 */
7704 			mutex_enter(&un->un_pm_mutex);
7705 			if (un->un_pm_timeid != NULL) {
7706 				timeout_id_t temp_id = un->un_pm_timeid;
7707 				un->un_pm_timeid = NULL;
7708 				mutex_exit(&un->un_pm_mutex);
7709 				(void) untimeout(temp_id);
7710 				(void) pm_idle_component(SD_DEVINFO(un), 0);
7711 			} else {
7712 				mutex_exit(&un->un_pm_mutex);
7713 			}
7714 		}
7715 	}
7716 
7717 	/*
7718 	 * Cleanup from the scsi_ifsetcap() calls (437868)
7719 	 * Relocated here from above to be after the call to
7720 	 * pm_lower_power, which was getting errors.
7721 	 */
7722 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
7723 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
7724 
7725 	/*
7726 	 * Currently, tagged queuing is supported per target based by HBA.
7727 	 * Setting this per lun instance actually sets the capability of this
7728 	 * target in HBA, which affects those luns already attached on the
7729 	 * same target. So during detach, we can only disable this capability
7730 	 * only when this is the only lun left on this target. By doing
7731 	 * this, we assume a target has the same tagged queuing capability
7732 	 * for every lun. The condition can be removed when HBA is changed to
7733 	 * support per lun based tagged queuing capability.
7734 	 */
7735 	if (sd_scsi_get_target_lun_count(pdip, tgt) <= 1) {
7736 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
7737 	}
7738 
7739 	if (un->un_f_is_fibre == FALSE) {
7740 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
7741 	}
7742 
7743 	/*
7744 	 * Remove any event callbacks, fibre only
7745 	 */
7746 	if (un->un_f_is_fibre == TRUE) {
7747 		if ((un->un_insert_event != NULL) &&
7748 			(ddi_remove_event_handler(un->un_insert_cb_id) !=
7749 				DDI_SUCCESS)) {
7750 			/*
7751 			 * Note: We are returning here after having done
7752 			 * substantial cleanup above. This is consistent
7753 			 * with the legacy implementation but this may not
7754 			 * be the right thing to do.
7755 			 */
7756 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7757 				"sd_dr_detach: Cannot cancel insert event\n");
7758 			goto err_remove_event;
7759 		}
7760 		un->un_insert_event = NULL;
7761 
7762 		if ((un->un_remove_event != NULL) &&
7763 			(ddi_remove_event_handler(un->un_remove_cb_id) !=
7764 				DDI_SUCCESS)) {
7765 			/*
7766 			 * Note: We are returning here after having done
7767 			 * substantial cleanup above. This is consistent
7768 			 * with the legacy implementation but this may not
7769 			 * be the right thing to do.
7770 			 */
7771 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7772 				"sd_dr_detach: Cannot cancel remove event\n");
7773 			goto err_remove_event;
7774 		}
7775 		un->un_remove_event = NULL;
7776 	}
7777 
7778 	/* Do not free the softstate if the callback routine is active */
7779 	sd_sync_with_callback(un);
7780 
7781 	cmlb_detach(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
7782 	cmlb_free_handle(&un->un_cmlbhandle);
7783 
7784 	/*
7785 	 * Hold the detach mutex here, to make sure that no other threads ever
7786 	 * can access a (partially) freed soft state structure.
7787 	 */
7788 	mutex_enter(&sd_detach_mutex);
7789 
7790 	/*
7791 	 * Clean up the soft state struct.
7792 	 * Cleanup is done in reverse order of allocs/inits.
7793 	 * At this point there should be no competing threads anymore.
7794 	 */
7795 
7796 	/* Unregister and free device id. */
7797 	ddi_devid_unregister(devi);
7798 	if (un->un_devid) {
7799 		ddi_devid_free(un->un_devid);
7800 		un->un_devid = NULL;
7801 	}
7802 
7803 	/*
7804 	 * Destroy wmap cache if it exists.
7805 	 */
7806 	if (un->un_wm_cache != NULL) {
7807 		kmem_cache_destroy(un->un_wm_cache);
7808 		un->un_wm_cache = NULL;
7809 	}
7810 
7811 	/*
7812 	 * kstat cleanup is done in detach for all device types (4363169).
7813 	 * We do not want to fail detach if the device kstats are not deleted
7814 	 * since there is a confusion about the devo_refcnt for the device.
7815 	 * We just delete the kstats and let detach complete successfully.
7816 	 */
7817 	if (un->un_stats != NULL) {
7818 		kstat_delete(un->un_stats);
7819 		un->un_stats = NULL;
7820 	}
7821 	if (un->un_errstats != NULL) {
7822 		kstat_delete(un->un_errstats);
7823 		un->un_errstats = NULL;
7824 	}
7825 
7826 	/* Remove partition stats */
7827 	if (un->un_f_pkstats_enabled) {
7828 		for (i = 0; i < NSDMAP; i++) {
7829 			if (un->un_pstats[i] != NULL) {
7830 				kstat_delete(un->un_pstats[i]);
7831 				un->un_pstats[i] = NULL;
7832 			}
7833 		}
7834 	}
7835 
7836 	/* Remove xbuf registration */
7837 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
7838 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
7839 
7840 	/* Remove driver properties */
7841 	ddi_prop_remove_all(devi);
7842 
7843 	mutex_destroy(&un->un_pm_mutex);
7844 	cv_destroy(&un->un_pm_busy_cv);
7845 
7846 	cv_destroy(&un->un_wcc_cv);
7847 
7848 	/* Open/close semaphore */
7849 	sema_destroy(&un->un_semoclose);
7850 
7851 	/* Removable media condvar. */
7852 	cv_destroy(&un->un_state_cv);
7853 
7854 	/* Suspend/resume condvar. */
7855 	cv_destroy(&un->un_suspend_cv);
7856 	cv_destroy(&un->un_disk_busy_cv);
7857 
7858 	sd_free_rqs(un);
7859 
7860 	/* Free up soft state */
7861 	devp->sd_private = NULL;
7862 
7863 	bzero(un, sizeof (struct sd_lun));
7864 	ddi_soft_state_free(sd_state, instance);
7865 
7866 	mutex_exit(&sd_detach_mutex);
7867 
7868 	/* This frees up the INQUIRY data associated with the device. */
7869 	scsi_unprobe(devp);
7870 
7871 	/*
7872 	 * After successfully detaching an instance, we update the information
7873 	 * of how many luns have been attached in the relative target and
7874 	 * controller for parallel SCSI. This information is used when sd tries
7875 	 * to set the tagged queuing capability in HBA.
7876 	 * Since un has been released, we can't use SD_IS_PARALLEL_SCSI(un) to
7877 	 * check if the device is parallel SCSI. However, we don't need to
7878 	 * check here because we've already checked during attach. No device
7879 	 * that is not parallel SCSI is in the chain.
7880 	 */
7881 	if ((tgt >= 0) && (tgt < NTARGETS_WIDE)) {
7882 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_DETACH);
7883 	}
7884 
7885 	return (DDI_SUCCESS);
7886 
7887 err_notclosed:
7888 	mutex_exit(SD_MUTEX(un));
7889 
7890 err_stillbusy:
7891 	_NOTE(NO_COMPETING_THREADS_NOW);
7892 
7893 err_remove_event:
7894 	mutex_enter(&sd_detach_mutex);
7895 	un->un_detach_count--;
7896 	mutex_exit(&sd_detach_mutex);
7897 
7898 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
7899 	return (DDI_FAILURE);
7900 }
7901 
7902 
7903 /*
7904  *    Function: sd_create_errstats
7905  *
7906  * Description: This routine instantiates the device error stats.
7907  *
7908  *		Note: During attach the stats are instantiated first so they are
7909  *		available for attach-time routines that utilize the driver
7910  *		iopath to send commands to the device. The stats are initialized
7911  *		separately so data obtained during some attach-time routines is
7912  *		available. (4362483)
7913  *
7914  *   Arguments: un - driver soft state (unit) structure
7915  *		instance - driver instance
7916  *
7917  *     Context: Kernel thread context
7918  */
7919 
7920 static void
7921 sd_create_errstats(struct sd_lun *un, int instance)
7922 {
7923 	struct	sd_errstats	*stp;
7924 	char	kstatmodule_err[KSTAT_STRLEN];
7925 	char	kstatname[KSTAT_STRLEN];
7926 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
7927 
7928 	ASSERT(un != NULL);
7929 
7930 	if (un->un_errstats != NULL) {
7931 		return;
7932 	}
7933 
7934 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
7935 	    "%serr", sd_label);
7936 	(void) snprintf(kstatname, sizeof (kstatname),
7937 	    "%s%d,err", sd_label, instance);
7938 
7939 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
7940 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
7941 
7942 	if (un->un_errstats == NULL) {
7943 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7944 		    "sd_create_errstats: Failed kstat_create\n");
7945 		return;
7946 	}
7947 
7948 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
7949 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
7950 	    KSTAT_DATA_UINT32);
7951 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
7952 	    KSTAT_DATA_UINT32);
7953 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
7954 	    KSTAT_DATA_UINT32);
7955 	kstat_named_init(&stp->sd_vid,		"Vendor",
7956 	    KSTAT_DATA_CHAR);
7957 	kstat_named_init(&stp->sd_pid,		"Product",
7958 	    KSTAT_DATA_CHAR);
7959 	kstat_named_init(&stp->sd_revision,	"Revision",
7960 	    KSTAT_DATA_CHAR);
7961 	kstat_named_init(&stp->sd_serial,	"Serial No",
7962 	    KSTAT_DATA_CHAR);
7963 	kstat_named_init(&stp->sd_capacity,	"Size",
7964 	    KSTAT_DATA_ULONGLONG);
7965 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
7966 	    KSTAT_DATA_UINT32);
7967 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
7968 	    KSTAT_DATA_UINT32);
7969 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
7970 	    KSTAT_DATA_UINT32);
7971 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
7972 	    KSTAT_DATA_UINT32);
7973 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
7974 	    KSTAT_DATA_UINT32);
7975 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
7976 	    KSTAT_DATA_UINT32);
7977 
7978 	un->un_errstats->ks_private = un;
7979 	un->un_errstats->ks_update  = nulldev;
7980 
7981 	kstat_install(un->un_errstats);
7982 }
7983 
7984 
7985 /*
7986  *    Function: sd_set_errstats
7987  *
7988  * Description: This routine sets the value of the vendor id, product id,
7989  *		revision, serial number, and capacity device error stats.
7990  *
7991  *		Note: During attach the stats are instantiated first so they are
7992  *		available for attach-time routines that utilize the driver
7993  *		iopath to send commands to the device. The stats are initialized
7994  *		separately so data obtained during some attach-time routines is
7995  *		available. (4362483)
7996  *
7997  *   Arguments: un - driver soft state (unit) structure
7998  *
7999  *     Context: Kernel thread context
8000  */
8001 
8002 static void
8003 sd_set_errstats(struct sd_lun *un)
8004 {
8005 	struct	sd_errstats	*stp;
8006 
8007 	ASSERT(un != NULL);
8008 	ASSERT(un->un_errstats != NULL);
8009 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
8010 	ASSERT(stp != NULL);
8011 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
8012 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
8013 	(void) strncpy(stp->sd_revision.value.c,
8014 	    un->un_sd->sd_inq->inq_revision, 4);
8015 
8016 	/*
8017 	 * All the errstats are persistent across detach/attach,
8018 	 * so reset all the errstats here in case of the hot
8019 	 * replacement of disk drives, except for not changed
8020 	 * Sun qualified drives.
8021 	 */
8022 	if ((bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) != 0) ||
8023 	    (bcmp(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
8024 	    sizeof (SD_INQUIRY(un)->inq_serial)) != 0)) {
8025 		stp->sd_softerrs.value.ui32 = 0;
8026 		stp->sd_harderrs.value.ui32 = 0;
8027 		stp->sd_transerrs.value.ui32 = 0;
8028 		stp->sd_rq_media_err.value.ui32 = 0;
8029 		stp->sd_rq_ntrdy_err.value.ui32 = 0;
8030 		stp->sd_rq_nodev_err.value.ui32 = 0;
8031 		stp->sd_rq_recov_err.value.ui32 = 0;
8032 		stp->sd_rq_illrq_err.value.ui32 = 0;
8033 		stp->sd_rq_pfa_err.value.ui32 = 0;
8034 	}
8035 
8036 	/*
8037 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
8038 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
8039 	 * (4376302))
8040 	 */
8041 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
8042 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
8043 		    sizeof (SD_INQUIRY(un)->inq_serial));
8044 	}
8045 
8046 	if (un->un_f_blockcount_is_valid != TRUE) {
8047 		/*
8048 		 * Set capacity error stat to 0 for no media. This ensures
8049 		 * a valid capacity is displayed in response to 'iostat -E'
8050 		 * when no media is present in the device.
8051 		 */
8052 		stp->sd_capacity.value.ui64 = 0;
8053 	} else {
8054 		/*
8055 		 * Multiply un_blockcount by un->un_sys_blocksize to get
8056 		 * capacity.
8057 		 *
8058 		 * Note: for non-512 blocksize devices "un_blockcount" has been
8059 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
8060 		 * (un_tgt_blocksize / un->un_sys_blocksize).
8061 		 */
8062 		stp->sd_capacity.value.ui64 = (uint64_t)
8063 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
8064 	}
8065 }
8066 
8067 
8068 /*
8069  *    Function: sd_set_pstats
8070  *
8071  * Description: This routine instantiates and initializes the partition
8072  *              stats for each partition with more than zero blocks.
8073  *		(4363169)
8074  *
8075  *   Arguments: un - driver soft state (unit) structure
8076  *
8077  *     Context: Kernel thread context
8078  */
8079 
8080 static void
8081 sd_set_pstats(struct sd_lun *un)
8082 {
8083 	char	kstatname[KSTAT_STRLEN];
8084 	int	instance;
8085 	int	i;
8086 	diskaddr_t	nblks = 0;
8087 	char	*partname = NULL;
8088 
8089 	ASSERT(un != NULL);
8090 
8091 	instance = ddi_get_instance(SD_DEVINFO(un));
8092 
8093 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
8094 	for (i = 0; i < NSDMAP; i++) {
8095 
8096 		if (cmlb_partinfo(un->un_cmlbhandle, i,
8097 		    &nblks, NULL, &partname, NULL, (void *)SD_PATH_DIRECT) != 0)
8098 			continue;
8099 		mutex_enter(SD_MUTEX(un));
8100 
8101 		if ((un->un_pstats[i] == NULL) &&
8102 		    (nblks != 0)) {
8103 
8104 			(void) snprintf(kstatname, sizeof (kstatname),
8105 			    "%s%d,%s", sd_label, instance,
8106 			    partname);
8107 
8108 			un->un_pstats[i] = kstat_create(sd_label,
8109 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
8110 			    1, KSTAT_FLAG_PERSISTENT);
8111 			if (un->un_pstats[i] != NULL) {
8112 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
8113 				kstat_install(un->un_pstats[i]);
8114 			}
8115 		}
8116 		mutex_exit(SD_MUTEX(un));
8117 	}
8118 }
8119 
8120 
8121 #if (defined(__fibre))
8122 /*
8123  *    Function: sd_init_event_callbacks
8124  *
8125  * Description: This routine initializes the insertion and removal event
8126  *		callbacks. (fibre only)
8127  *
8128  *   Arguments: un - driver soft state (unit) structure
8129  *
8130  *     Context: Kernel thread context
8131  */
8132 
8133 static void
8134 sd_init_event_callbacks(struct sd_lun *un)
8135 {
8136 	ASSERT(un != NULL);
8137 
8138 	if ((un->un_insert_event == NULL) &&
8139 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
8140 	    &un->un_insert_event) == DDI_SUCCESS)) {
8141 		/*
8142 		 * Add the callback for an insertion event
8143 		 */
8144 		(void) ddi_add_event_handler(SD_DEVINFO(un),
8145 		    un->un_insert_event, sd_event_callback, (void *)un,
8146 		    &(un->un_insert_cb_id));
8147 	}
8148 
8149 	if ((un->un_remove_event == NULL) &&
8150 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
8151 	    &un->un_remove_event) == DDI_SUCCESS)) {
8152 		/*
8153 		 * Add the callback for a removal event
8154 		 */
8155 		(void) ddi_add_event_handler(SD_DEVINFO(un),
8156 		    un->un_remove_event, sd_event_callback, (void *)un,
8157 		    &(un->un_remove_cb_id));
8158 	}
8159 }
8160 
8161 
8162 /*
8163  *    Function: sd_event_callback
8164  *
8165  * Description: This routine handles insert/remove events (photon). The
8166  *		state is changed to OFFLINE which can be used to supress
8167  *		error msgs. (fibre only)
8168  *
8169  *   Arguments: un - driver soft state (unit) structure
8170  *
8171  *     Context: Callout thread context
8172  */
8173 /* ARGSUSED */
8174 static void
8175 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
8176     void *bus_impldata)
8177 {
8178 	struct sd_lun *un = (struct sd_lun *)arg;
8179 
8180 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
8181 	if (event == un->un_insert_event) {
8182 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
8183 		mutex_enter(SD_MUTEX(un));
8184 		if (un->un_state == SD_STATE_OFFLINE) {
8185 			if (un->un_last_state != SD_STATE_SUSPENDED) {
8186 				un->un_state = un->un_last_state;
8187 			} else {
8188 				/*
8189 				 * We have gone through SUSPEND/RESUME while
8190 				 * we were offline. Restore the last state
8191 				 */
8192 				un->un_state = un->un_save_state;
8193 			}
8194 		}
8195 		mutex_exit(SD_MUTEX(un));
8196 
8197 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
8198 	} else if (event == un->un_remove_event) {
8199 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
8200 		mutex_enter(SD_MUTEX(un));
8201 		/*
8202 		 * We need to handle an event callback that occurs during
8203 		 * the suspend operation, since we don't prevent it.
8204 		 */
8205 		if (un->un_state != SD_STATE_OFFLINE) {
8206 			if (un->un_state != SD_STATE_SUSPENDED) {
8207 				New_state(un, SD_STATE_OFFLINE);
8208 			} else {
8209 				un->un_last_state = SD_STATE_OFFLINE;
8210 			}
8211 		}
8212 		mutex_exit(SD_MUTEX(un));
8213 	} else {
8214 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
8215 		    "!Unknown event\n");
8216 	}
8217 
8218 }
8219 #endif
8220 
8221 /*
8222  *    Function: sd_cache_control()
8223  *
8224  * Description: This routine is the driver entry point for setting
8225  *		read and write caching by modifying the WCE (write cache
8226  *		enable) and RCD (read cache disable) bits of mode
8227  *		page 8 (MODEPAGE_CACHING).
8228  *
8229  *   Arguments: un - driver soft state (unit) structure
8230  *		rcd_flag - flag for controlling the read cache
8231  *		wce_flag - flag for controlling the write cache
8232  *
8233  * Return Code: EIO
8234  *		code returned by sd_send_scsi_MODE_SENSE and
8235  *		sd_send_scsi_MODE_SELECT
8236  *
8237  *     Context: Kernel Thread
8238  */
8239 
8240 static int
8241 sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag)
8242 {
8243 	struct mode_caching	*mode_caching_page;
8244 	uchar_t			*header;
8245 	size_t			buflen;
8246 	int			hdrlen;
8247 	int			bd_len;
8248 	int			rval = 0;
8249 	struct mode_header_grp2	*mhp;
8250 
8251 	ASSERT(un != NULL);
8252 
8253 	/*
8254 	 * Do a test unit ready, otherwise a mode sense may not work if this
8255 	 * is the first command sent to the device after boot.
8256 	 */
8257 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
8258 
8259 	if (un->un_f_cfg_is_atapi == TRUE) {
8260 		hdrlen = MODE_HEADER_LENGTH_GRP2;
8261 	} else {
8262 		hdrlen = MODE_HEADER_LENGTH;
8263 	}
8264 
8265 	/*
8266 	 * Allocate memory for the retrieved mode page and its headers.  Set
8267 	 * a pointer to the page itself.  Use mode_cache_scsi3 to insure
8268 	 * we get all of the mode sense data otherwise, the mode select
8269 	 * will fail.  mode_cache_scsi3 is a superset of mode_caching.
8270 	 */
8271 	buflen = hdrlen + MODE_BLK_DESC_LENGTH +
8272 		sizeof (struct mode_cache_scsi3);
8273 
8274 	header = kmem_zalloc(buflen, KM_SLEEP);
8275 
8276 	/* Get the information from the device. */
8277 	if (un->un_f_cfg_is_atapi == TRUE) {
8278 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
8279 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8280 	} else {
8281 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
8282 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8283 	}
8284 	if (rval != 0) {
8285 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
8286 		    "sd_cache_control: Mode Sense Failed\n");
8287 		kmem_free(header, buflen);
8288 		return (rval);
8289 	}
8290 
8291 	/*
8292 	 * Determine size of Block Descriptors in order to locate
8293 	 * the mode page data. ATAPI devices return 0, SCSI devices
8294 	 * should return MODE_BLK_DESC_LENGTH.
8295 	 */
8296 	if (un->un_f_cfg_is_atapi == TRUE) {
8297 		mhp	= (struct mode_header_grp2 *)header;
8298 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
8299 	} else {
8300 		bd_len  = ((struct mode_header *)header)->bdesc_length;
8301 	}
8302 
8303 	if (bd_len > MODE_BLK_DESC_LENGTH) {
8304 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
8305 		    "sd_cache_control: Mode Sense returned invalid "
8306 		    "block descriptor length\n");
8307 		kmem_free(header, buflen);
8308 		return (EIO);
8309 	}
8310 
8311 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
8312 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
8313 		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
8314 		    " caching page code mismatch %d\n",
8315 		    mode_caching_page->mode_page.code);
8316 		kmem_free(header, buflen);
8317 		return (EIO);
8318 	}
8319 
8320 	/* Check the relevant bits on successful mode sense. */
8321 	if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
8322 	    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
8323 	    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
8324 	    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
8325 
8326 		size_t sbuflen;
8327 		uchar_t save_pg;
8328 
8329 		/*
8330 		 * Construct select buffer length based on the
8331 		 * length of the sense data returned.
8332 		 */
8333 		sbuflen =  hdrlen + MODE_BLK_DESC_LENGTH +
8334 				sizeof (struct mode_page) +
8335 				(int)mode_caching_page->mode_page.length;
8336 
8337 		/*
8338 		 * Set the caching bits as requested.
8339 		 */
8340 		if (rcd_flag == SD_CACHE_ENABLE)
8341 			mode_caching_page->rcd = 0;
8342 		else if (rcd_flag == SD_CACHE_DISABLE)
8343 			mode_caching_page->rcd = 1;
8344 
8345 		if (wce_flag == SD_CACHE_ENABLE)
8346 			mode_caching_page->wce = 1;
8347 		else if (wce_flag == SD_CACHE_DISABLE)
8348 			mode_caching_page->wce = 0;
8349 
8350 		/*
8351 		 * Save the page if the mode sense says the
8352 		 * drive supports it.
8353 		 */
8354 		save_pg = mode_caching_page->mode_page.ps ?
8355 				SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
8356 
8357 		/* Clear reserved bits before mode select. */
8358 		mode_caching_page->mode_page.ps = 0;
8359 
8360 		/*
8361 		 * Clear out mode header for mode select.
8362 		 * The rest of the retrieved page will be reused.
8363 		 */
8364 		bzero(header, hdrlen);
8365 
8366 		if (un->un_f_cfg_is_atapi == TRUE) {
8367 			mhp = (struct mode_header_grp2 *)header;
8368 			mhp->bdesc_length_hi = bd_len >> 8;
8369 			mhp->bdesc_length_lo = (uchar_t)bd_len & 0xff;
8370 		} else {
8371 			((struct mode_header *)header)->bdesc_length = bd_len;
8372 		}
8373 
8374 		/* Issue mode select to change the cache settings */
8375 		if (un->un_f_cfg_is_atapi == TRUE) {
8376 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, header,
8377 			    sbuflen, save_pg, SD_PATH_DIRECT);
8378 		} else {
8379 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
8380 			    sbuflen, save_pg, SD_PATH_DIRECT);
8381 		}
8382 	}
8383 
8384 	kmem_free(header, buflen);
8385 	return (rval);
8386 }
8387 
8388 
8389 /*
8390  *    Function: sd_get_write_cache_enabled()
8391  *
8392  * Description: This routine is the driver entry point for determining if
8393  *		write caching is enabled.  It examines the WCE (write cache
8394  *		enable) bits of mode page 8 (MODEPAGE_CACHING).
8395  *
8396  *   Arguments: un - driver soft state (unit) structure
8397  *		is_enabled - pointer to int where write cache enabled state
8398  *		is returned (non-zero -> write cache enabled)
8399  *
8400  *
8401  * Return Code: EIO
8402  *		code returned by sd_send_scsi_MODE_SENSE
8403  *
8404  *     Context: Kernel Thread
8405  *
8406  * NOTE: If ioctl is added to disable write cache, this sequence should
8407  * be followed so that no locking is required for accesses to
8408  * un->un_f_write_cache_enabled:
8409  * 	do mode select to clear wce
8410  * 	do synchronize cache to flush cache
8411  * 	set un->un_f_write_cache_enabled = FALSE
8412  *
8413  * Conversely, an ioctl to enable the write cache should be done
8414  * in this order:
8415  * 	set un->un_f_write_cache_enabled = TRUE
8416  * 	do mode select to set wce
8417  */
8418 
8419 static int
8420 sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled)
8421 {
8422 	struct mode_caching	*mode_caching_page;
8423 	uchar_t			*header;
8424 	size_t			buflen;
8425 	int			hdrlen;
8426 	int			bd_len;
8427 	int			rval = 0;
8428 
8429 	ASSERT(un != NULL);
8430 	ASSERT(is_enabled != NULL);
8431 
8432 	/* in case of error, flag as enabled */
8433 	*is_enabled = TRUE;
8434 
8435 	/*
8436 	 * Do a test unit ready, otherwise a mode sense may not work if this
8437 	 * is the first command sent to the device after boot.
8438 	 */
8439 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
8440 
8441 	if (un->un_f_cfg_is_atapi == TRUE) {
8442 		hdrlen = MODE_HEADER_LENGTH_GRP2;
8443 	} else {
8444 		hdrlen = MODE_HEADER_LENGTH;
8445 	}
8446 
8447 	/*
8448 	 * Allocate memory for the retrieved mode page and its headers.  Set
8449 	 * a pointer to the page itself.
8450 	 */
8451 	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
8452 	header = kmem_zalloc(buflen, KM_SLEEP);
8453 
8454 	/* Get the information from the device. */
8455 	if (un->un_f_cfg_is_atapi == TRUE) {
8456 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
8457 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8458 	} else {
8459 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
8460 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8461 	}
8462 	if (rval != 0) {
8463 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
8464 		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
8465 		kmem_free(header, buflen);
8466 		return (rval);
8467 	}
8468 
8469 	/*
8470 	 * Determine size of Block Descriptors in order to locate
8471 	 * the mode page data. ATAPI devices return 0, SCSI devices
8472 	 * should return MODE_BLK_DESC_LENGTH.
8473 	 */
8474 	if (un->un_f_cfg_is_atapi == TRUE) {
8475 		struct mode_header_grp2	*mhp;
8476 		mhp	= (struct mode_header_grp2 *)header;
8477 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
8478 	} else {
8479 		bd_len  = ((struct mode_header *)header)->bdesc_length;
8480 	}
8481 
8482 	if (bd_len > MODE_BLK_DESC_LENGTH) {
8483 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
8484 		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
8485 		    "block descriptor length\n");
8486 		kmem_free(header, buflen);
8487 		return (EIO);
8488 	}
8489 
8490 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
8491 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
8492 		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
8493 		    " caching page code mismatch %d\n",
8494 		    mode_caching_page->mode_page.code);
8495 		kmem_free(header, buflen);
8496 		return (EIO);
8497 	}
8498 	*is_enabled = mode_caching_page->wce;
8499 
8500 	kmem_free(header, buflen);
8501 	return (0);
8502 }
8503 
8504 
8505 /*
8506  *    Function: sd_make_device
8507  *
8508  * Description: Utility routine to return the Solaris device number from
8509  *		the data in the device's dev_info structure.
8510  *
8511  * Return Code: The Solaris device number
8512  *
8513  *     Context: Any
8514  */
8515 
8516 static dev_t
8517 sd_make_device(dev_info_t *devi)
8518 {
8519 	return (makedevice(ddi_name_to_major(ddi_get_name(devi)),
8520 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
8521 }
8522 
8523 
8524 /*
8525  *    Function: sd_pm_entry
8526  *
8527  * Description: Called at the start of a new command to manage power
8528  *		and busy status of a device. This includes determining whether
8529  *		the current power state of the device is sufficient for
8530  *		performing the command or whether it must be changed.
8531  *		The PM framework is notified appropriately.
8532  *		Only with a return status of DDI_SUCCESS will the
8533  *		component be busy to the framework.
8534  *
8535  *		All callers of sd_pm_entry must check the return status
8536  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
8537  *		of DDI_FAILURE indicates the device failed to power up.
8538  *		In this case un_pm_count has been adjusted so the result
8539  *		on exit is still powered down, ie. count is less than 0.
8540  *		Calling sd_pm_exit with this count value hits an ASSERT.
8541  *
8542  * Return Code: DDI_SUCCESS or DDI_FAILURE
8543  *
8544  *     Context: Kernel thread context.
8545  */
8546 
8547 static int
8548 sd_pm_entry(struct sd_lun *un)
8549 {
8550 	int return_status = DDI_SUCCESS;
8551 
8552 	ASSERT(!mutex_owned(SD_MUTEX(un)));
8553 	ASSERT(!mutex_owned(&un->un_pm_mutex));
8554 
8555 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
8556 
8557 	if (un->un_f_pm_is_enabled == FALSE) {
8558 		SD_TRACE(SD_LOG_IO_PM, un,
8559 		    "sd_pm_entry: exiting, PM not enabled\n");
8560 		return (return_status);
8561 	}
8562 
8563 	/*
8564 	 * Just increment a counter if PM is enabled. On the transition from
8565 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
8566 	 * the count with each IO and mark the device as idle when the count
8567 	 * hits 0.
8568 	 *
8569 	 * If the count is less than 0 the device is powered down. If a powered
8570 	 * down device is successfully powered up then the count must be
8571 	 * incremented to reflect the power up. Note that it'll get incremented
8572 	 * a second time to become busy.
8573 	 *
8574 	 * Because the following has the potential to change the device state
8575 	 * and must release the un_pm_mutex to do so, only one thread can be
8576 	 * allowed through at a time.
8577 	 */
8578 
8579 	mutex_enter(&un->un_pm_mutex);
8580 	while (un->un_pm_busy == TRUE) {
8581 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
8582 	}
8583 	un->un_pm_busy = TRUE;
8584 
8585 	if (un->un_pm_count < 1) {
8586 
8587 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
8588 
8589 		/*
8590 		 * Indicate we are now busy so the framework won't attempt to
8591 		 * power down the device. This call will only fail if either
8592 		 * we passed a bad component number or the device has no
8593 		 * components. Neither of these should ever happen.
8594 		 */
8595 		mutex_exit(&un->un_pm_mutex);
8596 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
8597 		ASSERT(return_status == DDI_SUCCESS);
8598 
8599 		mutex_enter(&un->un_pm_mutex);
8600 
8601 		if (un->un_pm_count < 0) {
8602 			mutex_exit(&un->un_pm_mutex);
8603 
8604 			SD_TRACE(SD_LOG_IO_PM, un,
8605 			    "sd_pm_entry: power up component\n");
8606 
8607 			/*
8608 			 * pm_raise_power will cause sdpower to be called
8609 			 * which brings the device power level to the
8610 			 * desired state, ON in this case. If successful,
8611 			 * un_pm_count and un_power_level will be updated
8612 			 * appropriately.
8613 			 */
8614 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
8615 			    SD_SPINDLE_ON);
8616 
8617 			mutex_enter(&un->un_pm_mutex);
8618 
8619 			if (return_status != DDI_SUCCESS) {
8620 				/*
8621 				 * Power up failed.
8622 				 * Idle the device and adjust the count
8623 				 * so the result on exit is that we're
8624 				 * still powered down, ie. count is less than 0.
8625 				 */
8626 				SD_TRACE(SD_LOG_IO_PM, un,
8627 				    "sd_pm_entry: power up failed,"
8628 				    " idle the component\n");
8629 
8630 				(void) pm_idle_component(SD_DEVINFO(un), 0);
8631 				un->un_pm_count--;
8632 			} else {
8633 				/*
8634 				 * Device is powered up, verify the
8635 				 * count is non-negative.
8636 				 * This is debug only.
8637 				 */
8638 				ASSERT(un->un_pm_count == 0);
8639 			}
8640 		}
8641 
8642 		if (return_status == DDI_SUCCESS) {
8643 			/*
8644 			 * For performance, now that the device has been tagged
8645 			 * as busy, and it's known to be powered up, update the
8646 			 * chain types to use jump tables that do not include
8647 			 * pm. This significantly lowers the overhead and
8648 			 * therefore improves performance.
8649 			 */
8650 
8651 			mutex_exit(&un->un_pm_mutex);
8652 			mutex_enter(SD_MUTEX(un));
8653 			SD_TRACE(SD_LOG_IO_PM, un,
8654 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
8655 			    un->un_uscsi_chain_type);
8656 
8657 			if (un->un_f_non_devbsize_supported) {
8658 				un->un_buf_chain_type =
8659 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
8660 			} else {
8661 				un->un_buf_chain_type =
8662 				    SD_CHAIN_INFO_DISK_NO_PM;
8663 			}
8664 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8665 
8666 			SD_TRACE(SD_LOG_IO_PM, un,
8667 			    "             changed  uscsi_chain_type to   %d\n",
8668 			    un->un_uscsi_chain_type);
8669 			mutex_exit(SD_MUTEX(un));
8670 			mutex_enter(&un->un_pm_mutex);
8671 
8672 			if (un->un_pm_idle_timeid == NULL) {
8673 				/* 300 ms. */
8674 				un->un_pm_idle_timeid =
8675 				    timeout(sd_pm_idletimeout_handler, un,
8676 				    (drv_usectohz((clock_t)300000)));
8677 				/*
8678 				 * Include an extra call to busy which keeps the
8679 				 * device busy with-respect-to the PM layer
8680 				 * until the timer fires, at which time it'll
8681 				 * get the extra idle call.
8682 				 */
8683 				(void) pm_busy_component(SD_DEVINFO(un), 0);
8684 			}
8685 		}
8686 	}
8687 	un->un_pm_busy = FALSE;
8688 	/* Next... */
8689 	cv_signal(&un->un_pm_busy_cv);
8690 
8691 	un->un_pm_count++;
8692 
8693 	SD_TRACE(SD_LOG_IO_PM, un,
8694 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
8695 
8696 	mutex_exit(&un->un_pm_mutex);
8697 
8698 	return (return_status);
8699 }
8700 
8701 
8702 /*
8703  *    Function: sd_pm_exit
8704  *
8705  * Description: Called at the completion of a command to manage busy
8706  *		status for the device. If the device becomes idle the
8707  *		PM framework is notified.
8708  *
8709  *     Context: Kernel thread context
8710  */
8711 
8712 static void
8713 sd_pm_exit(struct sd_lun *un)
8714 {
8715 	ASSERT(!mutex_owned(SD_MUTEX(un)));
8716 	ASSERT(!mutex_owned(&un->un_pm_mutex));
8717 
8718 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
8719 
8720 	/*
8721 	 * After attach the following flag is only read, so don't
8722 	 * take the penalty of acquiring a mutex for it.
8723 	 */
8724 	if (un->un_f_pm_is_enabled == TRUE) {
8725 
8726 		mutex_enter(&un->un_pm_mutex);
8727 		un->un_pm_count--;
8728 
8729 		SD_TRACE(SD_LOG_IO_PM, un,
8730 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
8731 
8732 		ASSERT(un->un_pm_count >= 0);
8733 		if (un->un_pm_count == 0) {
8734 			mutex_exit(&un->un_pm_mutex);
8735 
8736 			SD_TRACE(SD_LOG_IO_PM, un,
8737 			    "sd_pm_exit: idle component\n");
8738 
8739 			(void) pm_idle_component(SD_DEVINFO(un), 0);
8740 
8741 		} else {
8742 			mutex_exit(&un->un_pm_mutex);
8743 		}
8744 	}
8745 
8746 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
8747 }
8748 
8749 
8750 /*
8751  *    Function: sdopen
8752  *
8753  * Description: Driver's open(9e) entry point function.
8754  *
8755  *   Arguments: dev_i   - pointer to device number
8756  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
8757  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
8758  *		cred_p  - user credential pointer
8759  *
8760  * Return Code: EINVAL
8761  *		ENXIO
8762  *		EIO
8763  *		EROFS
8764  *		EBUSY
8765  *
8766  *     Context: Kernel thread context
8767  */
8768 /* ARGSUSED */
8769 static int
8770 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
8771 {
8772 	struct sd_lun	*un;
8773 	int		nodelay;
8774 	int		part;
8775 	uint64_t	partmask;
8776 	int		instance;
8777 	dev_t		dev;
8778 	int		rval = EIO;
8779 	diskaddr_t	nblks = 0;
8780 
8781 	/* Validate the open type */
8782 	if (otyp >= OTYPCNT) {
8783 		return (EINVAL);
8784 	}
8785 
8786 	dev = *dev_p;
8787 	instance = SDUNIT(dev);
8788 	mutex_enter(&sd_detach_mutex);
8789 
8790 	/*
8791 	 * Fail the open if there is no softstate for the instance, or
8792 	 * if another thread somewhere is trying to detach the instance.
8793 	 */
8794 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
8795 	    (un->un_detach_count != 0)) {
8796 		mutex_exit(&sd_detach_mutex);
8797 		/*
8798 		 * The probe cache only needs to be cleared when open (9e) fails
8799 		 * with ENXIO (4238046).
8800 		 */
8801 		/*
8802 		 * un-conditionally clearing probe cache is ok with
8803 		 * separate sd/ssd binaries
8804 		 * x86 platform can be an issue with both parallel
8805 		 * and fibre in 1 binary
8806 		 */
8807 		sd_scsi_clear_probe_cache();
8808 		return (ENXIO);
8809 	}
8810 
8811 	/*
8812 	 * The un_layer_count is to prevent another thread in specfs from
8813 	 * trying to detach the instance, which can happen when we are
8814 	 * called from a higher-layer driver instead of thru specfs.
8815 	 * This will not be needed when DDI provides a layered driver
8816 	 * interface that allows specfs to know that an instance is in
8817 	 * use by a layered driver & should not be detached.
8818 	 *
8819 	 * Note: the semantics for layered driver opens are exactly one
8820 	 * close for every open.
8821 	 */
8822 	if (otyp == OTYP_LYR) {
8823 		un->un_layer_count++;
8824 	}
8825 
8826 	/*
8827 	 * Keep a count of the current # of opens in progress. This is because
8828 	 * some layered drivers try to call us as a regular open. This can
8829 	 * cause problems that we cannot prevent, however by keeping this count
8830 	 * we can at least keep our open and detach routines from racing against
8831 	 * each other under such conditions.
8832 	 */
8833 	un->un_opens_in_progress++;
8834 	mutex_exit(&sd_detach_mutex);
8835 
8836 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
8837 	part	 = SDPART(dev);
8838 	partmask = 1 << part;
8839 
8840 	/*
8841 	 * We use a semaphore here in order to serialize
8842 	 * open and close requests on the device.
8843 	 */
8844 	sema_p(&un->un_semoclose);
8845 
8846 	mutex_enter(SD_MUTEX(un));
8847 
8848 	/*
8849 	 * All device accesses go thru sdstrategy() where we check
8850 	 * on suspend status but there could be a scsi_poll command,
8851 	 * which bypasses sdstrategy(), so we need to check pm
8852 	 * status.
8853 	 */
8854 
8855 	if (!nodelay) {
8856 		while ((un->un_state == SD_STATE_SUSPENDED) ||
8857 		    (un->un_state == SD_STATE_PM_CHANGING)) {
8858 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
8859 		}
8860 
8861 		mutex_exit(SD_MUTEX(un));
8862 		if (sd_pm_entry(un) != DDI_SUCCESS) {
8863 			rval = EIO;
8864 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
8865 			    "sdopen: sd_pm_entry failed\n");
8866 			goto open_failed_with_pm;
8867 		}
8868 		mutex_enter(SD_MUTEX(un));
8869 	}
8870 
8871 	/* check for previous exclusive open */
8872 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
8873 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
8874 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
8875 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
8876 
8877 	if (un->un_exclopen & (partmask)) {
8878 		goto excl_open_fail;
8879 	}
8880 
8881 	if (flag & FEXCL) {
8882 		int i;
8883 		if (un->un_ocmap.lyropen[part]) {
8884 			goto excl_open_fail;
8885 		}
8886 		for (i = 0; i < (OTYPCNT - 1); i++) {
8887 			if (un->un_ocmap.regopen[i] & (partmask)) {
8888 				goto excl_open_fail;
8889 			}
8890 		}
8891 	}
8892 
8893 	/*
8894 	 * Check the write permission if this is a removable media device,
8895 	 * NDELAY has not been set, and writable permission is requested.
8896 	 *
8897 	 * Note: If NDELAY was set and this is write-protected media the WRITE
8898 	 * attempt will fail with EIO as part of the I/O processing. This is a
8899 	 * more permissive implementation that allows the open to succeed and
8900 	 * WRITE attempts to fail when appropriate.
8901 	 */
8902 	if (un->un_f_chk_wp_open) {
8903 		if ((flag & FWRITE) && (!nodelay)) {
8904 			mutex_exit(SD_MUTEX(un));
8905 			/*
8906 			 * Defer the check for write permission on writable
8907 			 * DVD drive till sdstrategy and will not fail open even
8908 			 * if FWRITE is set as the device can be writable
8909 			 * depending upon the media and the media can change
8910 			 * after the call to open().
8911 			 */
8912 			if (un->un_f_dvdram_writable_device == FALSE) {
8913 				if (ISCD(un) || sr_check_wp(dev)) {
8914 				rval = EROFS;
8915 				mutex_enter(SD_MUTEX(un));
8916 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
8917 				    "write to cd or write protected media\n");
8918 				goto open_fail;
8919 				}
8920 			}
8921 			mutex_enter(SD_MUTEX(un));
8922 		}
8923 	}
8924 
8925 	/*
8926 	 * If opening in NDELAY/NONBLOCK mode, just return.
8927 	 * Check if disk is ready and has a valid geometry later.
8928 	 */
8929 	if (!nodelay) {
8930 		mutex_exit(SD_MUTEX(un));
8931 		rval = sd_ready_and_valid(un);
8932 		mutex_enter(SD_MUTEX(un));
8933 		/*
8934 		 * Fail if device is not ready or if the number of disk
8935 		 * blocks is zero or negative for non CD devices.
8936 		 */
8937 
8938 		nblks = 0;
8939 
8940 		if (rval == SD_READY_VALID && (!ISCD(un))) {
8941 			/* if cmlb_partinfo fails, nblks remains 0 */
8942 			mutex_exit(SD_MUTEX(un));
8943 			(void) cmlb_partinfo(un->un_cmlbhandle, part, &nblks,
8944 			    NULL, NULL, NULL, (void *)SD_PATH_DIRECT);
8945 			mutex_enter(SD_MUTEX(un));
8946 		}
8947 
8948 		if ((rval != SD_READY_VALID) ||
8949 		    (!ISCD(un) && nblks <= 0)) {
8950 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
8951 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
8952 			    "device not ready or invalid disk block value\n");
8953 			goto open_fail;
8954 		}
8955 #if defined(__i386) || defined(__amd64)
8956 	} else {
8957 		uchar_t *cp;
8958 		/*
8959 		 * x86 requires special nodelay handling, so that p0 is
8960 		 * always defined and accessible.
8961 		 * Invalidate geometry only if device is not already open.
8962 		 */
8963 		cp = &un->un_ocmap.chkd[0];
8964 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
8965 			if (*cp != (uchar_t)0) {
8966 			    break;
8967 			}
8968 			cp++;
8969 		}
8970 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
8971 			mutex_exit(SD_MUTEX(un));
8972 			cmlb_invalidate(un->un_cmlbhandle,
8973 			    (void *)SD_PATH_DIRECT);
8974 			mutex_enter(SD_MUTEX(un));
8975 		}
8976 
8977 #endif
8978 	}
8979 
8980 	if (otyp == OTYP_LYR) {
8981 		un->un_ocmap.lyropen[part]++;
8982 	} else {
8983 		un->un_ocmap.regopen[otyp] |= partmask;
8984 	}
8985 
8986 	/* Set up open and exclusive open flags */
8987 	if (flag & FEXCL) {
8988 		un->un_exclopen |= (partmask);
8989 	}
8990 
8991 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
8992 	    "open of part %d type %d\n", part, otyp);
8993 
8994 	mutex_exit(SD_MUTEX(un));
8995 	if (!nodelay) {
8996 		sd_pm_exit(un);
8997 	}
8998 
8999 	sema_v(&un->un_semoclose);
9000 
9001 	mutex_enter(&sd_detach_mutex);
9002 	un->un_opens_in_progress--;
9003 	mutex_exit(&sd_detach_mutex);
9004 
9005 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
9006 	return (DDI_SUCCESS);
9007 
9008 excl_open_fail:
9009 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
9010 	rval = EBUSY;
9011 
9012 open_fail:
9013 	mutex_exit(SD_MUTEX(un));
9014 
9015 	/*
9016 	 * On a failed open we must exit the pm management.
9017 	 */
9018 	if (!nodelay) {
9019 		sd_pm_exit(un);
9020 	}
9021 open_failed_with_pm:
9022 	sema_v(&un->un_semoclose);
9023 
9024 	mutex_enter(&sd_detach_mutex);
9025 	un->un_opens_in_progress--;
9026 	if (otyp == OTYP_LYR) {
9027 		un->un_layer_count--;
9028 	}
9029 	mutex_exit(&sd_detach_mutex);
9030 
9031 	return (rval);
9032 }
9033 
9034 
9035 /*
9036  *    Function: sdclose
9037  *
9038  * Description: Driver's close(9e) entry point function.
9039  *
9040  *   Arguments: dev    - device number
9041  *		flag   - file status flag, informational only
9042  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
9043  *		cred_p - user credential pointer
9044  *
9045  * Return Code: ENXIO
9046  *
9047  *     Context: Kernel thread context
9048  */
9049 /* ARGSUSED */
9050 static int
9051 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
9052 {
9053 	struct sd_lun	*un;
9054 	uchar_t		*cp;
9055 	int		part;
9056 	int		nodelay;
9057 	int		rval = 0;
9058 
9059 	/* Validate the open type */
9060 	if (otyp >= OTYPCNT) {
9061 		return (ENXIO);
9062 	}
9063 
9064 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9065 		return (ENXIO);
9066 	}
9067 
9068 	part = SDPART(dev);
9069 	nodelay = flag & (FNDELAY | FNONBLOCK);
9070 
9071 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
9072 	    "sdclose: close of part %d type %d\n", part, otyp);
9073 
9074 	/*
9075 	 * We use a semaphore here in order to serialize
9076 	 * open and close requests on the device.
9077 	 */
9078 	sema_p(&un->un_semoclose);
9079 
9080 	mutex_enter(SD_MUTEX(un));
9081 
9082 	/* Don't proceed if power is being changed. */
9083 	while (un->un_state == SD_STATE_PM_CHANGING) {
9084 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9085 	}
9086 
9087 	if (un->un_exclopen & (1 << part)) {
9088 		un->un_exclopen &= ~(1 << part);
9089 	}
9090 
9091 	/* Update the open partition map */
9092 	if (otyp == OTYP_LYR) {
9093 		un->un_ocmap.lyropen[part] -= 1;
9094 	} else {
9095 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
9096 	}
9097 
9098 	cp = &un->un_ocmap.chkd[0];
9099 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
9100 		if (*cp != NULL) {
9101 			break;
9102 		}
9103 		cp++;
9104 	}
9105 
9106 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
9107 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
9108 
9109 		/*
9110 		 * We avoid persistance upon the last close, and set
9111 		 * the throttle back to the maximum.
9112 		 */
9113 		un->un_throttle = un->un_saved_throttle;
9114 
9115 		if (un->un_state == SD_STATE_OFFLINE) {
9116 			if (un->un_f_is_fibre == FALSE) {
9117 				scsi_log(SD_DEVINFO(un), sd_label,
9118 					CE_WARN, "offline\n");
9119 			}
9120 			mutex_exit(SD_MUTEX(un));
9121 			cmlb_invalidate(un->un_cmlbhandle,
9122 			    (void *)SD_PATH_DIRECT);
9123 			mutex_enter(SD_MUTEX(un));
9124 
9125 		} else {
9126 			/*
9127 			 * Flush any outstanding writes in NVRAM cache.
9128 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
9129 			 * cmd, it may not work for non-Pluto devices.
9130 			 * SYNCHRONIZE CACHE is not required for removables,
9131 			 * except DVD-RAM drives.
9132 			 *
9133 			 * Also note: because SYNCHRONIZE CACHE is currently
9134 			 * the only command issued here that requires the
9135 			 * drive be powered up, only do the power up before
9136 			 * sending the Sync Cache command. If additional
9137 			 * commands are added which require a powered up
9138 			 * drive, the following sequence may have to change.
9139 			 *
9140 			 * And finally, note that parallel SCSI on SPARC
9141 			 * only issues a Sync Cache to DVD-RAM, a newly
9142 			 * supported device.
9143 			 */
9144 #if defined(__i386) || defined(__amd64)
9145 			if (un->un_f_sync_cache_supported ||
9146 			    un->un_f_dvdram_writable_device == TRUE) {
9147 #else
9148 			if (un->un_f_dvdram_writable_device == TRUE) {
9149 #endif
9150 				mutex_exit(SD_MUTEX(un));
9151 				if (sd_pm_entry(un) == DDI_SUCCESS) {
9152 					rval =
9153 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
9154 					    NULL);
9155 					/* ignore error if not supported */
9156 					if (rval == ENOTSUP) {
9157 						rval = 0;
9158 					} else if (rval != 0) {
9159 						rval = EIO;
9160 					}
9161 					sd_pm_exit(un);
9162 				} else {
9163 					rval = EIO;
9164 				}
9165 				mutex_enter(SD_MUTEX(un));
9166 			}
9167 
9168 			/*
9169 			 * For devices which supports DOOR_LOCK, send an ALLOW
9170 			 * MEDIA REMOVAL command, but don't get upset if it
9171 			 * fails. We need to raise the power of the drive before
9172 			 * we can call sd_send_scsi_DOORLOCK()
9173 			 */
9174 			if (un->un_f_doorlock_supported) {
9175 				mutex_exit(SD_MUTEX(un));
9176 				if (sd_pm_entry(un) == DDI_SUCCESS) {
9177 					rval = sd_send_scsi_DOORLOCK(un,
9178 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
9179 
9180 					sd_pm_exit(un);
9181 					if (ISCD(un) && (rval != 0) &&
9182 					    (nodelay != 0)) {
9183 						rval = ENXIO;
9184 					}
9185 				} else {
9186 					rval = EIO;
9187 				}
9188 				mutex_enter(SD_MUTEX(un));
9189 			}
9190 
9191 			/*
9192 			 * If a device has removable media, invalidate all
9193 			 * parameters related to media, such as geometry,
9194 			 * blocksize, and blockcount.
9195 			 */
9196 			if (un->un_f_has_removable_media) {
9197 				sr_ejected(un);
9198 			}
9199 
9200 			/*
9201 			 * Destroy the cache (if it exists) which was
9202 			 * allocated for the write maps since this is
9203 			 * the last close for this media.
9204 			 */
9205 			if (un->un_wm_cache) {
9206 				/*
9207 				 * Check if there are pending commands.
9208 				 * and if there are give a warning and
9209 				 * do not destroy the cache.
9210 				 */
9211 				if (un->un_ncmds_in_driver > 0) {
9212 					scsi_log(SD_DEVINFO(un),
9213 					    sd_label, CE_WARN,
9214 					    "Unable to clean up memory "
9215 					    "because of pending I/O\n");
9216 				} else {
9217 					kmem_cache_destroy(
9218 					    un->un_wm_cache);
9219 					un->un_wm_cache = NULL;
9220 				}
9221 			}
9222 		}
9223 	}
9224 
9225 	mutex_exit(SD_MUTEX(un));
9226 	sema_v(&un->un_semoclose);
9227 
9228 	if (otyp == OTYP_LYR) {
9229 		mutex_enter(&sd_detach_mutex);
9230 		/*
9231 		 * The detach routine may run when the layer count
9232 		 * drops to zero.
9233 		 */
9234 		un->un_layer_count--;
9235 		mutex_exit(&sd_detach_mutex);
9236 	}
9237 
9238 	return (rval);
9239 }
9240 
9241 
9242 /*
9243  *    Function: sd_ready_and_valid
9244  *
9245  * Description: Test if device is ready and has a valid geometry.
9246  *
9247  *   Arguments: dev - device number
9248  *		un  - driver soft state (unit) structure
9249  *
9250  * Return Code: SD_READY_VALID		ready and valid label
9251  *		SD_NOT_READY_VALID	not ready, no label
9252  *		SD_RESERVED_BY_OTHERS	reservation conflict
9253  *
9254  *     Context: Never called at interrupt context.
9255  */
9256 
9257 static int
9258 sd_ready_and_valid(struct sd_lun *un)
9259 {
9260 	struct sd_errstats	*stp;
9261 	uint64_t		capacity;
9262 	uint_t			lbasize;
9263 	int			rval = SD_READY_VALID;
9264 	char			name_str[48];
9265 	int			is_valid;
9266 
9267 	ASSERT(un != NULL);
9268 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9269 
9270 	mutex_enter(SD_MUTEX(un));
9271 	/*
9272 	 * If a device has removable media, we must check if media is
9273 	 * ready when checking if this device is ready and valid.
9274 	 */
9275 	if (un->un_f_has_removable_media) {
9276 		mutex_exit(SD_MUTEX(un));
9277 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
9278 			rval = SD_NOT_READY_VALID;
9279 			mutex_enter(SD_MUTEX(un));
9280 			goto done;
9281 		}
9282 
9283 		is_valid = SD_IS_VALID_LABEL(un);
9284 		mutex_enter(SD_MUTEX(un));
9285 		if (!is_valid ||
9286 		    (un->un_f_blockcount_is_valid == FALSE) ||
9287 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
9288 
9289 			/* capacity has to be read every open. */
9290 			mutex_exit(SD_MUTEX(un));
9291 			if (sd_send_scsi_READ_CAPACITY(un, &capacity,
9292 			    &lbasize, SD_PATH_DIRECT) != 0) {
9293 				cmlb_invalidate(un->un_cmlbhandle,
9294 				    (void *)SD_PATH_DIRECT);
9295 				mutex_enter(SD_MUTEX(un));
9296 				rval = SD_NOT_READY_VALID;
9297 				goto done;
9298 			} else {
9299 				mutex_enter(SD_MUTEX(un));
9300 				sd_update_block_info(un, lbasize, capacity);
9301 			}
9302 		}
9303 
9304 		/*
9305 		 * Check if the media in the device is writable or not.
9306 		 */
9307 		if (!is_valid && ISCD(un)) {
9308 			sd_check_for_writable_cd(un, SD_PATH_DIRECT);
9309 		}
9310 
9311 	} else {
9312 		/*
9313 		 * Do a test unit ready to clear any unit attention from non-cd
9314 		 * devices.
9315 		 */
9316 		mutex_exit(SD_MUTEX(un));
9317 		(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9318 		mutex_enter(SD_MUTEX(un));
9319 	}
9320 
9321 
9322 	/*
9323 	 * If this is a non 512 block device, allocate space for
9324 	 * the wmap cache. This is being done here since every time
9325 	 * a media is changed this routine will be called and the
9326 	 * block size is a function of media rather than device.
9327 	 */
9328 	if (un->un_f_non_devbsize_supported && NOT_DEVBSIZE(un)) {
9329 		if (!(un->un_wm_cache)) {
9330 			(void) snprintf(name_str, sizeof (name_str),
9331 			    "%s%d_cache",
9332 			    ddi_driver_name(SD_DEVINFO(un)),
9333 			    ddi_get_instance(SD_DEVINFO(un)));
9334 			un->un_wm_cache = kmem_cache_create(
9335 			    name_str, sizeof (struct sd_w_map),
9336 			    8, sd_wm_cache_constructor,
9337 			    sd_wm_cache_destructor, NULL,
9338 			    (void *)un, NULL, 0);
9339 			if (!(un->un_wm_cache)) {
9340 					rval = ENOMEM;
9341 					goto done;
9342 			}
9343 		}
9344 	}
9345 
9346 	if (un->un_state == SD_STATE_NORMAL) {
9347 		/*
9348 		 * If the target is not yet ready here (defined by a TUR
9349 		 * failure), invalidate the geometry and print an 'offline'
9350 		 * message. This is a legacy message, as the state of the
9351 		 * target is not actually changed to SD_STATE_OFFLINE.
9352 		 *
9353 		 * If the TUR fails for EACCES (Reservation Conflict),
9354 		 * SD_RESERVED_BY_OTHERS will be returned to indicate
9355 		 * reservation conflict. If the TUR fails for other
9356 		 * reasons, SD_NOT_READY_VALID will be returned.
9357 		 */
9358 		int err;
9359 
9360 		mutex_exit(SD_MUTEX(un));
9361 		err = sd_send_scsi_TEST_UNIT_READY(un, 0);
9362 		mutex_enter(SD_MUTEX(un));
9363 
9364 		if (err != 0) {
9365 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9366 			    "offline or reservation conflict\n");
9367 			mutex_exit(SD_MUTEX(un));
9368 			cmlb_invalidate(un->un_cmlbhandle,
9369 			    (void *)SD_PATH_DIRECT);
9370 			mutex_enter(SD_MUTEX(un));
9371 			if (err == EACCES) {
9372 				rval = SD_RESERVED_BY_OTHERS;
9373 			} else {
9374 				rval = SD_NOT_READY_VALID;
9375 			}
9376 			goto done;
9377 		}
9378 	}
9379 
9380 	if (un->un_f_format_in_progress == FALSE) {
9381 		mutex_exit(SD_MUTEX(un));
9382 		if (cmlb_validate(un->un_cmlbhandle, 0,
9383 		    (void *)SD_PATH_DIRECT) != 0) {
9384 			rval = SD_NOT_READY_VALID;
9385 			mutex_enter(SD_MUTEX(un));
9386 			goto done;
9387 		}
9388 		if (un->un_f_pkstats_enabled) {
9389 			sd_set_pstats(un);
9390 			SD_TRACE(SD_LOG_IO_PARTITION, un,
9391 			    "sd_ready_and_valid: un:0x%p pstats created and "
9392 			    "set\n", un);
9393 		}
9394 		mutex_enter(SD_MUTEX(un));
9395 	}
9396 
9397 	/*
9398 	 * If this device supports DOOR_LOCK command, try and send
9399 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
9400 	 * if it fails. For a CD, however, it is an error
9401 	 */
9402 	if (un->un_f_doorlock_supported) {
9403 		mutex_exit(SD_MUTEX(un));
9404 		if ((sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
9405 		    SD_PATH_DIRECT) != 0) && ISCD(un)) {
9406 			rval = SD_NOT_READY_VALID;
9407 			mutex_enter(SD_MUTEX(un));
9408 			goto done;
9409 		}
9410 		mutex_enter(SD_MUTEX(un));
9411 	}
9412 
9413 	/* The state has changed, inform the media watch routines */
9414 	un->un_mediastate = DKIO_INSERTED;
9415 	cv_broadcast(&un->un_state_cv);
9416 	rval = SD_READY_VALID;
9417 
9418 done:
9419 
9420 	/*
9421 	 * Initialize the capacity kstat value, if no media previously
9422 	 * (capacity kstat is 0) and a media has been inserted
9423 	 * (un_blockcount > 0).
9424 	 */
9425 	if (un->un_errstats != NULL) {
9426 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
9427 		if ((stp->sd_capacity.value.ui64 == 0) &&
9428 		    (un->un_f_blockcount_is_valid == TRUE)) {
9429 			stp->sd_capacity.value.ui64 =
9430 			    (uint64_t)((uint64_t)un->un_blockcount *
9431 			    un->un_sys_blocksize);
9432 		}
9433 	}
9434 
9435 	mutex_exit(SD_MUTEX(un));
9436 	return (rval);
9437 }
9438 
9439 
9440 /*
9441  *    Function: sdmin
9442  *
9443  * Description: Routine to limit the size of a data transfer. Used in
9444  *		conjunction with physio(9F).
9445  *
9446  *   Arguments: bp - pointer to the indicated buf(9S) struct.
9447  *
9448  *     Context: Kernel thread context.
9449  */
9450 
9451 static void
9452 sdmin(struct buf *bp)
9453 {
9454 	struct sd_lun	*un;
9455 	int		instance;
9456 
9457 	instance = SDUNIT(bp->b_edev);
9458 
9459 	un = ddi_get_soft_state(sd_state, instance);
9460 	ASSERT(un != NULL);
9461 
9462 	if (bp->b_bcount > un->un_max_xfer_size) {
9463 		bp->b_bcount = un->un_max_xfer_size;
9464 	}
9465 }
9466 
9467 
9468 /*
9469  *    Function: sdread
9470  *
9471  * Description: Driver's read(9e) entry point function.
9472  *
9473  *   Arguments: dev   - device number
9474  *		uio   - structure pointer describing where data is to be stored
9475  *			in user's space
9476  *		cred_p  - user credential pointer
9477  *
9478  * Return Code: ENXIO
9479  *		EIO
9480  *		EINVAL
9481  *		value returned by physio
9482  *
9483  *     Context: Kernel thread context.
9484  */
9485 /* ARGSUSED */
9486 static int
9487 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
9488 {
9489 	struct sd_lun	*un = NULL;
9490 	int		secmask;
9491 	int		err;
9492 
9493 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9494 		return (ENXIO);
9495 	}
9496 
9497 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9498 
9499 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9500 		mutex_enter(SD_MUTEX(un));
9501 		/*
9502 		 * Because the call to sd_ready_and_valid will issue I/O we
9503 		 * must wait here if either the device is suspended or
9504 		 * if it's power level is changing.
9505 		 */
9506 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9507 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9508 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9509 		}
9510 		un->un_ncmds_in_driver++;
9511 		mutex_exit(SD_MUTEX(un));
9512 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9513 			mutex_enter(SD_MUTEX(un));
9514 			un->un_ncmds_in_driver--;
9515 			ASSERT(un->un_ncmds_in_driver >= 0);
9516 			mutex_exit(SD_MUTEX(un));
9517 			return (EIO);
9518 		}
9519 		mutex_enter(SD_MUTEX(un));
9520 		un->un_ncmds_in_driver--;
9521 		ASSERT(un->un_ncmds_in_driver >= 0);
9522 		mutex_exit(SD_MUTEX(un));
9523 	}
9524 
9525 	/*
9526 	 * Read requests are restricted to multiples of the system block size.
9527 	 */
9528 	secmask = un->un_sys_blocksize - 1;
9529 
9530 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9531 		SD_ERROR(SD_LOG_READ_WRITE, un,
9532 		    "sdread: file offset not modulo %d\n",
9533 		    un->un_sys_blocksize);
9534 		err = EINVAL;
9535 	} else if (uio->uio_iov->iov_len & (secmask)) {
9536 		SD_ERROR(SD_LOG_READ_WRITE, un,
9537 		    "sdread: transfer length not modulo %d\n",
9538 		    un->un_sys_blocksize);
9539 		err = EINVAL;
9540 	} else {
9541 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
9542 	}
9543 	return (err);
9544 }
9545 
9546 
9547 /*
9548  *    Function: sdwrite
9549  *
9550  * Description: Driver's write(9e) entry point function.
9551  *
9552  *   Arguments: dev   - device number
9553  *		uio   - structure pointer describing where data is stored in
9554  *			user's space
9555  *		cred_p  - user credential pointer
9556  *
9557  * Return Code: ENXIO
9558  *		EIO
9559  *		EINVAL
9560  *		value returned by physio
9561  *
9562  *     Context: Kernel thread context.
9563  */
9564 /* ARGSUSED */
9565 static int
9566 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
9567 {
9568 	struct sd_lun	*un = NULL;
9569 	int		secmask;
9570 	int		err;
9571 
9572 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9573 		return (ENXIO);
9574 	}
9575 
9576 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9577 
9578 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9579 		mutex_enter(SD_MUTEX(un));
9580 		/*
9581 		 * Because the call to sd_ready_and_valid will issue I/O we
9582 		 * must wait here if either the device is suspended or
9583 		 * if it's power level is changing.
9584 		 */
9585 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9586 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9587 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9588 		}
9589 		un->un_ncmds_in_driver++;
9590 		mutex_exit(SD_MUTEX(un));
9591 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9592 			mutex_enter(SD_MUTEX(un));
9593 			un->un_ncmds_in_driver--;
9594 			ASSERT(un->un_ncmds_in_driver >= 0);
9595 			mutex_exit(SD_MUTEX(un));
9596 			return (EIO);
9597 		}
9598 		mutex_enter(SD_MUTEX(un));
9599 		un->un_ncmds_in_driver--;
9600 		ASSERT(un->un_ncmds_in_driver >= 0);
9601 		mutex_exit(SD_MUTEX(un));
9602 	}
9603 
9604 	/*
9605 	 * Write requests are restricted to multiples of the system block size.
9606 	 */
9607 	secmask = un->un_sys_blocksize - 1;
9608 
9609 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9610 		SD_ERROR(SD_LOG_READ_WRITE, un,
9611 		    "sdwrite: file offset not modulo %d\n",
9612 		    un->un_sys_blocksize);
9613 		err = EINVAL;
9614 	} else if (uio->uio_iov->iov_len & (secmask)) {
9615 		SD_ERROR(SD_LOG_READ_WRITE, un,
9616 		    "sdwrite: transfer length not modulo %d\n",
9617 		    un->un_sys_blocksize);
9618 		err = EINVAL;
9619 	} else {
9620 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
9621 	}
9622 	return (err);
9623 }
9624 
9625 
9626 /*
9627  *    Function: sdaread
9628  *
9629  * Description: Driver's aread(9e) entry point function.
9630  *
9631  *   Arguments: dev   - device number
9632  *		aio   - structure pointer describing where data is to be stored
9633  *		cred_p  - user credential pointer
9634  *
9635  * Return Code: ENXIO
9636  *		EIO
9637  *		EINVAL
9638  *		value returned by aphysio
9639  *
9640  *     Context: Kernel thread context.
9641  */
9642 /* ARGSUSED */
9643 static int
9644 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
9645 {
9646 	struct sd_lun	*un = NULL;
9647 	struct uio	*uio = aio->aio_uio;
9648 	int		secmask;
9649 	int		err;
9650 
9651 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9652 		return (ENXIO);
9653 	}
9654 
9655 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9656 
9657 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9658 		mutex_enter(SD_MUTEX(un));
9659 		/*
9660 		 * Because the call to sd_ready_and_valid will issue I/O we
9661 		 * must wait here if either the device is suspended or
9662 		 * if it's power level is changing.
9663 		 */
9664 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9665 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9666 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9667 		}
9668 		un->un_ncmds_in_driver++;
9669 		mutex_exit(SD_MUTEX(un));
9670 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9671 			mutex_enter(SD_MUTEX(un));
9672 			un->un_ncmds_in_driver--;
9673 			ASSERT(un->un_ncmds_in_driver >= 0);
9674 			mutex_exit(SD_MUTEX(un));
9675 			return (EIO);
9676 		}
9677 		mutex_enter(SD_MUTEX(un));
9678 		un->un_ncmds_in_driver--;
9679 		ASSERT(un->un_ncmds_in_driver >= 0);
9680 		mutex_exit(SD_MUTEX(un));
9681 	}
9682 
9683 	/*
9684 	 * Read requests are restricted to multiples of the system block size.
9685 	 */
9686 	secmask = un->un_sys_blocksize - 1;
9687 
9688 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9689 		SD_ERROR(SD_LOG_READ_WRITE, un,
9690 		    "sdaread: file offset not modulo %d\n",
9691 		    un->un_sys_blocksize);
9692 		err = EINVAL;
9693 	} else if (uio->uio_iov->iov_len & (secmask)) {
9694 		SD_ERROR(SD_LOG_READ_WRITE, un,
9695 		    "sdaread: transfer length not modulo %d\n",
9696 		    un->un_sys_blocksize);
9697 		err = EINVAL;
9698 	} else {
9699 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
9700 	}
9701 	return (err);
9702 }
9703 
9704 
9705 /*
9706  *    Function: sdawrite
9707  *
9708  * Description: Driver's awrite(9e) entry point function.
9709  *
9710  *   Arguments: dev   - device number
9711  *		aio   - structure pointer describing where data is stored
9712  *		cred_p  - user credential pointer
9713  *
9714  * Return Code: ENXIO
9715  *		EIO
9716  *		EINVAL
9717  *		value returned by aphysio
9718  *
9719  *     Context: Kernel thread context.
9720  */
9721 /* ARGSUSED */
9722 static int
9723 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
9724 {
9725 	struct sd_lun	*un = NULL;
9726 	struct uio	*uio = aio->aio_uio;
9727 	int		secmask;
9728 	int		err;
9729 
9730 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9731 		return (ENXIO);
9732 	}
9733 
9734 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9735 
9736 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9737 		mutex_enter(SD_MUTEX(un));
9738 		/*
9739 		 * Because the call to sd_ready_and_valid will issue I/O we
9740 		 * must wait here if either the device is suspended or
9741 		 * if it's power level is changing.
9742 		 */
9743 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9744 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9745 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9746 		}
9747 		un->un_ncmds_in_driver++;
9748 		mutex_exit(SD_MUTEX(un));
9749 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9750 			mutex_enter(SD_MUTEX(un));
9751 			un->un_ncmds_in_driver--;
9752 			ASSERT(un->un_ncmds_in_driver >= 0);
9753 			mutex_exit(SD_MUTEX(un));
9754 			return (EIO);
9755 		}
9756 		mutex_enter(SD_MUTEX(un));
9757 		un->un_ncmds_in_driver--;
9758 		ASSERT(un->un_ncmds_in_driver >= 0);
9759 		mutex_exit(SD_MUTEX(un));
9760 	}
9761 
9762 	/*
9763 	 * Write requests are restricted to multiples of the system block size.
9764 	 */
9765 	secmask = un->un_sys_blocksize - 1;
9766 
9767 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9768 		SD_ERROR(SD_LOG_READ_WRITE, un,
9769 		    "sdawrite: file offset not modulo %d\n",
9770 		    un->un_sys_blocksize);
9771 		err = EINVAL;
9772 	} else if (uio->uio_iov->iov_len & (secmask)) {
9773 		SD_ERROR(SD_LOG_READ_WRITE, un,
9774 		    "sdawrite: transfer length not modulo %d\n",
9775 		    un->un_sys_blocksize);
9776 		err = EINVAL;
9777 	} else {
9778 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
9779 	}
9780 	return (err);
9781 }
9782 
9783 
9784 
9785 
9786 
9787 /*
9788  * Driver IO processing follows the following sequence:
9789  *
9790  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
9791  *         |                |                     ^
9792  *         v                v                     |
9793  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
9794  *         |                |                     |                   |
9795  *         v                |                     |                   |
9796  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
9797  *         |                |                     ^                   ^
9798  *         v                v                     |                   |
9799  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
9800  *         |                |                     |                   |
9801  *     +---+                |                     +------------+      +-------+
9802  *     |                    |                                  |              |
9803  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
9804  *     |                    v                                  |              |
9805  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
9806  *     |                    |                                  ^              |
9807  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
9808  *     |                    v                                  |              |
9809  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
9810  *     |                    |                                  ^              |
9811  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
9812  *     |                    v                                  |              |
9813  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
9814  *     |                    |                                  ^              |
9815  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
9816  *     |                    v                                  |              |
9817  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
9818  *     |                    |                                  ^              |
9819  *     |                    |                                  |              |
9820  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
9821  *                          |                           ^
9822  *                          v                           |
9823  *                   sd_core_iostart()                  |
9824  *                          |                           |
9825  *                          |                           +------>(*destroypkt)()
9826  *                          +-> sd_start_cmds() <-+     |           |
9827  *                          |                     |     |           v
9828  *                          |                     |     |  scsi_destroy_pkt(9F)
9829  *                          |                     |     |
9830  *                          +->(*initpkt)()       +- sdintr()
9831  *                          |  |                        |  |
9832  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
9833  *                          |  +-> scsi_setup_cdb(9F)   |
9834  *                          |                           |
9835  *                          +--> scsi_transport(9F)     |
9836  *                                     |                |
9837  *                                     +----> SCSA ---->+
9838  *
9839  *
9840  * This code is based upon the following presumtions:
9841  *
9842  *   - iostart and iodone functions operate on buf(9S) structures. These
9843  *     functions perform the necessary operations on the buf(9S) and pass
9844  *     them along to the next function in the chain by using the macros
9845  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
9846  *     (for iodone side functions).
9847  *
9848  *   - The iostart side functions may sleep. The iodone side functions
9849  *     are called under interrupt context and may NOT sleep. Therefore
9850  *     iodone side functions also may not call iostart side functions.
9851  *     (NOTE: iostart side functions should NOT sleep for memory, as
9852  *     this could result in deadlock.)
9853  *
9854  *   - An iostart side function may call its corresponding iodone side
9855  *     function directly (if necessary).
9856  *
9857  *   - In the event of an error, an iostart side function can return a buf(9S)
9858  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
9859  *     b_error in the usual way of course).
9860  *
9861  *   - The taskq mechanism may be used by the iodone side functions to dispatch
9862  *     requests to the iostart side functions.  The iostart side functions in
9863  *     this case would be called under the context of a taskq thread, so it's
9864  *     OK for them to block/sleep/spin in this case.
9865  *
9866  *   - iostart side functions may allocate "shadow" buf(9S) structs and
9867  *     pass them along to the next function in the chain.  The corresponding
9868  *     iodone side functions must coalesce the "shadow" bufs and return
9869  *     the "original" buf to the next higher layer.
9870  *
9871  *   - The b_private field of the buf(9S) struct holds a pointer to
9872  *     an sd_xbuf struct, which contains information needed to
9873  *     construct the scsi_pkt for the command.
9874  *
9875  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
9876  *     layer must acquire & release the SD_MUTEX(un) as needed.
9877  */
9878 
9879 
9880 /*
9881  * Create taskq for all targets in the system. This is created at
9882  * _init(9E) and destroyed at _fini(9E).
9883  *
9884  * Note: here we set the minalloc to a reasonably high number to ensure that
9885  * we will have an adequate supply of task entries available at interrupt time.
9886  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
9887  * sd_create_taskq().  Since we do not want to sleep for allocations at
9888  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
9889  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
9890  * requests any one instant in time.
9891  */
9892 #define	SD_TASKQ_NUMTHREADS	8
9893 #define	SD_TASKQ_MINALLOC	256
9894 #define	SD_TASKQ_MAXALLOC	256
9895 
9896 static taskq_t	*sd_tq = NULL;
9897 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
9898 
9899 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
9900 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
9901 
9902 /*
9903  * The following task queue is being created for the write part of
9904  * read-modify-write of non-512 block size devices.
9905  * Limit the number of threads to 1 for now. This number has been choosen
9906  * considering the fact that it applies only to dvd ram drives/MO drives
9907  * currently. Performance for which is not main criteria at this stage.
9908  * Note: It needs to be explored if we can use a single taskq in future
9909  */
9910 #define	SD_WMR_TASKQ_NUMTHREADS	1
9911 static taskq_t	*sd_wmr_tq = NULL;
9912 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
9913 
9914 /*
9915  *    Function: sd_taskq_create
9916  *
9917  * Description: Create taskq thread(s) and preallocate task entries
9918  *
9919  * Return Code: Returns a pointer to the allocated taskq_t.
9920  *
9921  *     Context: Can sleep. Requires blockable context.
9922  *
9923  *       Notes: - The taskq() facility currently is NOT part of the DDI.
9924  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
9925  *		- taskq_create() will block for memory, also it will panic
9926  *		  if it cannot create the requested number of threads.
9927  *		- Currently taskq_create() creates threads that cannot be
9928  *		  swapped.
9929  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
9930  *		  supply of taskq entries at interrupt time (ie, so that we
9931  *		  do not have to sleep for memory)
9932  */
9933 
9934 static void
9935 sd_taskq_create(void)
9936 {
9937 	char	taskq_name[TASKQ_NAMELEN];
9938 
9939 	ASSERT(sd_tq == NULL);
9940 	ASSERT(sd_wmr_tq == NULL);
9941 
9942 	(void) snprintf(taskq_name, sizeof (taskq_name),
9943 	    "%s_drv_taskq", sd_label);
9944 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
9945 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
9946 	    TASKQ_PREPOPULATE));
9947 
9948 	(void) snprintf(taskq_name, sizeof (taskq_name),
9949 	    "%s_rmw_taskq", sd_label);
9950 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
9951 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
9952 	    TASKQ_PREPOPULATE));
9953 }
9954 
9955 
9956 /*
9957  *    Function: sd_taskq_delete
9958  *
9959  * Description: Complementary cleanup routine for sd_taskq_create().
9960  *
9961  *     Context: Kernel thread context.
9962  */
9963 
9964 static void
9965 sd_taskq_delete(void)
9966 {
9967 	ASSERT(sd_tq != NULL);
9968 	ASSERT(sd_wmr_tq != NULL);
9969 	taskq_destroy(sd_tq);
9970 	taskq_destroy(sd_wmr_tq);
9971 	sd_tq = NULL;
9972 	sd_wmr_tq = NULL;
9973 }
9974 
9975 
9976 /*
9977  *    Function: sdstrategy
9978  *
9979  * Description: Driver's strategy (9E) entry point function.
9980  *
9981  *   Arguments: bp - pointer to buf(9S)
9982  *
9983  * Return Code: Always returns zero
9984  *
9985  *     Context: Kernel thread context.
9986  */
9987 
9988 static int
9989 sdstrategy(struct buf *bp)
9990 {
9991 	struct sd_lun *un;
9992 
9993 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
9994 	if (un == NULL) {
9995 		bioerror(bp, EIO);
9996 		bp->b_resid = bp->b_bcount;
9997 		biodone(bp);
9998 		return (0);
9999 	}
10000 	/* As was done in the past, fail new cmds. if state is dumping. */
10001 	if (un->un_state == SD_STATE_DUMPING) {
10002 		bioerror(bp, ENXIO);
10003 		bp->b_resid = bp->b_bcount;
10004 		biodone(bp);
10005 		return (0);
10006 	}
10007 
10008 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10009 
10010 	/*
10011 	 * Commands may sneak in while we released the mutex in
10012 	 * DDI_SUSPEND, we should block new commands. However, old
10013 	 * commands that are still in the driver at this point should
10014 	 * still be allowed to drain.
10015 	 */
10016 	mutex_enter(SD_MUTEX(un));
10017 	/*
10018 	 * Must wait here if either the device is suspended or
10019 	 * if it's power level is changing.
10020 	 */
10021 	while ((un->un_state == SD_STATE_SUSPENDED) ||
10022 	    (un->un_state == SD_STATE_PM_CHANGING)) {
10023 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10024 	}
10025 
10026 	un->un_ncmds_in_driver++;
10027 
10028 	/*
10029 	 * atapi: Since we are running the CD for now in PIO mode we need to
10030 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
10031 	 * the HBA's init_pkt routine.
10032 	 */
10033 	if (un->un_f_cfg_is_atapi == TRUE) {
10034 		mutex_exit(SD_MUTEX(un));
10035 		bp_mapin(bp);
10036 		mutex_enter(SD_MUTEX(un));
10037 	}
10038 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
10039 	    un->un_ncmds_in_driver);
10040 
10041 	mutex_exit(SD_MUTEX(un));
10042 
10043 	/*
10044 	 * This will (eventually) allocate the sd_xbuf area and
10045 	 * call sd_xbuf_strategy().  We just want to return the
10046 	 * result of ddi_xbuf_qstrategy so that we have an opt-
10047 	 * imized tail call which saves us a stack frame.
10048 	 */
10049 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
10050 }
10051 
10052 
10053 /*
10054  *    Function: sd_xbuf_strategy
10055  *
10056  * Description: Function for initiating IO operations via the
10057  *		ddi_xbuf_qstrategy() mechanism.
10058  *
10059  *     Context: Kernel thread context.
10060  */
10061 
10062 static void
10063 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
10064 {
10065 	struct sd_lun *un = arg;
10066 
10067 	ASSERT(bp != NULL);
10068 	ASSERT(xp != NULL);
10069 	ASSERT(un != NULL);
10070 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10071 
10072 	/*
10073 	 * Initialize the fields in the xbuf and save a pointer to the
10074 	 * xbuf in bp->b_private.
10075 	 */
10076 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
10077 
10078 	/* Send the buf down the iostart chain */
10079 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
10080 }
10081 
10082 
10083 /*
10084  *    Function: sd_xbuf_init
10085  *
10086  * Description: Prepare the given sd_xbuf struct for use.
10087  *
10088  *   Arguments: un - ptr to softstate
10089  *		bp - ptr to associated buf(9S)
10090  *		xp - ptr to associated sd_xbuf
10091  *		chain_type - IO chain type to use:
10092  *			SD_CHAIN_NULL
10093  *			SD_CHAIN_BUFIO
10094  *			SD_CHAIN_USCSI
10095  *			SD_CHAIN_DIRECT
10096  *			SD_CHAIN_DIRECT_PRIORITY
10097  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
10098  *			initialization; may be NULL if none.
10099  *
10100  *     Context: Kernel thread context
10101  */
10102 
10103 static void
10104 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
10105 	uchar_t chain_type, void *pktinfop)
10106 {
10107 	int index;
10108 
10109 	ASSERT(un != NULL);
10110 	ASSERT(bp != NULL);
10111 	ASSERT(xp != NULL);
10112 
10113 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
10114 	    bp, chain_type);
10115 
10116 	xp->xb_un	= un;
10117 	xp->xb_pktp	= NULL;
10118 	xp->xb_pktinfo	= pktinfop;
10119 	xp->xb_private	= bp->b_private;
10120 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
10121 
10122 	/*
10123 	 * Set up the iostart and iodone chain indexes in the xbuf, based
10124 	 * upon the specified chain type to use.
10125 	 */
10126 	switch (chain_type) {
10127 	case SD_CHAIN_NULL:
10128 		/*
10129 		 * Fall thru to just use the values for the buf type, even
10130 		 * tho for the NULL chain these values will never be used.
10131 		 */
10132 		/* FALLTHRU */
10133 	case SD_CHAIN_BUFIO:
10134 		index = un->un_buf_chain_type;
10135 		break;
10136 	case SD_CHAIN_USCSI:
10137 		index = un->un_uscsi_chain_type;
10138 		break;
10139 	case SD_CHAIN_DIRECT:
10140 		index = un->un_direct_chain_type;
10141 		break;
10142 	case SD_CHAIN_DIRECT_PRIORITY:
10143 		index = un->un_priority_chain_type;
10144 		break;
10145 	default:
10146 		/* We're really broken if we ever get here... */
10147 		panic("sd_xbuf_init: illegal chain type!");
10148 		/*NOTREACHED*/
10149 	}
10150 
10151 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
10152 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
10153 
10154 	/*
10155 	 * It might be a bit easier to simply bzero the entire xbuf above,
10156 	 * but it turns out that since we init a fair number of members anyway,
10157 	 * we save a fair number cycles by doing explicit assignment of zero.
10158 	 */
10159 	xp->xb_pkt_flags	= 0;
10160 	xp->xb_dma_resid	= 0;
10161 	xp->xb_retry_count	= 0;
10162 	xp->xb_victim_retry_count = 0;
10163 	xp->xb_ua_retry_count	= 0;
10164 	xp->xb_sense_bp		= NULL;
10165 	xp->xb_sense_status	= 0;
10166 	xp->xb_sense_state	= 0;
10167 	xp->xb_sense_resid	= 0;
10168 
10169 	bp->b_private	= xp;
10170 	bp->b_flags	&= ~(B_DONE | B_ERROR);
10171 	bp->b_resid	= 0;
10172 	bp->av_forw	= NULL;
10173 	bp->av_back	= NULL;
10174 	bioerror(bp, 0);
10175 
10176 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
10177 }
10178 
10179 
10180 /*
10181  *    Function: sd_uscsi_strategy
10182  *
10183  * Description: Wrapper for calling into the USCSI chain via physio(9F)
10184  *
10185  *   Arguments: bp - buf struct ptr
10186  *
10187  * Return Code: Always returns 0
10188  *
10189  *     Context: Kernel thread context
10190  */
10191 
10192 static int
10193 sd_uscsi_strategy(struct buf *bp)
10194 {
10195 	struct sd_lun		*un;
10196 	struct sd_uscsi_info	*uip;
10197 	struct sd_xbuf		*xp;
10198 	uchar_t			chain_type;
10199 
10200 	ASSERT(bp != NULL);
10201 
10202 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
10203 	if (un == NULL) {
10204 		bioerror(bp, EIO);
10205 		bp->b_resid = bp->b_bcount;
10206 		biodone(bp);
10207 		return (0);
10208 	}
10209 
10210 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10211 
10212 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
10213 
10214 	mutex_enter(SD_MUTEX(un));
10215 	/*
10216 	 * atapi: Since we are running the CD for now in PIO mode we need to
10217 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
10218 	 * the HBA's init_pkt routine.
10219 	 */
10220 	if (un->un_f_cfg_is_atapi == TRUE) {
10221 		mutex_exit(SD_MUTEX(un));
10222 		bp_mapin(bp);
10223 		mutex_enter(SD_MUTEX(un));
10224 	}
10225 	un->un_ncmds_in_driver++;
10226 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
10227 	    un->un_ncmds_in_driver);
10228 	mutex_exit(SD_MUTEX(un));
10229 
10230 	/*
10231 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
10232 	 */
10233 	ASSERT(bp->b_private != NULL);
10234 	uip = (struct sd_uscsi_info *)bp->b_private;
10235 
10236 	switch (uip->ui_flags) {
10237 	case SD_PATH_DIRECT:
10238 		chain_type = SD_CHAIN_DIRECT;
10239 		break;
10240 	case SD_PATH_DIRECT_PRIORITY:
10241 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
10242 		break;
10243 	default:
10244 		chain_type = SD_CHAIN_USCSI;
10245 		break;
10246 	}
10247 
10248 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
10249 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
10250 
10251 	/* Use the index obtained within xbuf_init */
10252 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
10253 
10254 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
10255 
10256 	return (0);
10257 }
10258 
10259 /*
10260  *    Function: sd_send_scsi_cmd
10261  *
10262  * Description: Runs a USCSI command for user (when called thru sdioctl),
10263  *		or for the driver
10264  *
10265  *   Arguments: dev - the dev_t for the device
10266  *		incmd - ptr to a valid uscsi_cmd struct
10267  *		flag - bit flag, indicating open settings, 32/64 bit type
10268  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
10269  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
10270  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
10271  *			to use the USCSI "direct" chain and bypass the normal
10272  *			command waitq.
10273  *
10274  * Return Code: 0 -  successful completion of the given command
10275  *		EIO - scsi_uscsi_handle_command() failed
10276  *		ENXIO  - soft state not found for specified dev
10277  *		EINVAL
10278  *		EFAULT - copyin/copyout error
10279  *		return code of scsi_uscsi_handle_command():
10280  *			EIO
10281  *			ENXIO
10282  *			EACCES
10283  *
10284  *     Context: Waits for command to complete. Can sleep.
10285  */
10286 
10287 static int
10288 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
10289 	enum uio_seg dataspace, int path_flag)
10290 {
10291 	struct sd_uscsi_info	*uip;
10292 	struct uscsi_cmd	*uscmd;
10293 	struct sd_lun	*un;
10294 	int	format = 0;
10295 	int	rval;
10296 
10297 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
10298 	if (un == NULL) {
10299 		return (ENXIO);
10300 	}
10301 
10302 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10303 
10304 #ifdef SDDEBUG
10305 	switch (dataspace) {
10306 	case UIO_USERSPACE:
10307 		SD_TRACE(SD_LOG_IO, un,
10308 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_USERSPACE\n", un);
10309 		break;
10310 	case UIO_SYSSPACE:
10311 		SD_TRACE(SD_LOG_IO, un,
10312 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_SYSSPACE\n", un);
10313 		break;
10314 	default:
10315 		SD_TRACE(SD_LOG_IO, un,
10316 		    "sd_send_scsi_cmd: entry: un:0x%p UNEXPECTED SPACE\n", un);
10317 		break;
10318 	}
10319 #endif
10320 
10321 	rval = scsi_uscsi_alloc_and_copyin((intptr_t)incmd, flag,
10322 	    SD_ADDRESS(un), &uscmd);
10323 	if (rval != 0) {
10324 		SD_TRACE(SD_LOG_IO, un, "sd_sense_scsi_cmd: "
10325 		    "scsi_uscsi_alloc_and_copyin failed\n", un);
10326 		return (rval);
10327 	}
10328 
10329 	if ((uscmd->uscsi_cdb != NULL) &&
10330 	    (uscmd->uscsi_cdb[0] == SCMD_FORMAT)) {
10331 		mutex_enter(SD_MUTEX(un));
10332 		un->un_f_format_in_progress = TRUE;
10333 		mutex_exit(SD_MUTEX(un));
10334 		format = 1;
10335 	}
10336 
10337 	/*
10338 	 * Allocate an sd_uscsi_info struct and fill it with the info
10339 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
10340 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
10341 	 * since we allocate the buf here in this function, we do not
10342 	 * need to preserve the prior contents of b_private.
10343 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
10344 	 */
10345 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
10346 	uip->ui_flags = path_flag;
10347 	uip->ui_cmdp = uscmd;
10348 
10349 	/*
10350 	 * Commands sent with priority are intended for error recovery
10351 	 * situations, and do not have retries performed.
10352 	 */
10353 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
10354 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
10355 	}
10356 	uscmd->uscsi_flags &= ~USCSI_NOINTR;
10357 
10358 	rval = scsi_uscsi_handle_cmd(dev, dataspace, uscmd,
10359 	    sd_uscsi_strategy, NULL, uip);
10360 
10361 #ifdef SDDEBUG
10362 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
10363 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
10364 	    uscmd->uscsi_status, uscmd->uscsi_resid);
10365 	if (uscmd->uscsi_bufaddr != NULL) {
10366 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
10367 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
10368 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
10369 		if (dataspace == UIO_SYSSPACE) {
10370 			SD_DUMP_MEMORY(un, SD_LOG_IO,
10371 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
10372 			    uscmd->uscsi_buflen, SD_LOG_HEX);
10373 		}
10374 	}
10375 #endif
10376 
10377 	if (format == 1) {
10378 		mutex_enter(SD_MUTEX(un));
10379 		un->un_f_format_in_progress = FALSE;
10380 		mutex_exit(SD_MUTEX(un));
10381 	}
10382 
10383 	(void) scsi_uscsi_copyout_and_free((intptr_t)incmd, uscmd);
10384 	kmem_free(uip, sizeof (struct sd_uscsi_info));
10385 
10386 	return (rval);
10387 }
10388 
10389 
10390 /*
10391  *    Function: sd_buf_iodone
10392  *
10393  * Description: Frees the sd_xbuf & returns the buf to its originator.
10394  *
10395  *     Context: May be called from interrupt context.
10396  */
10397 /* ARGSUSED */
10398 static void
10399 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
10400 {
10401 	struct sd_xbuf *xp;
10402 
10403 	ASSERT(un != NULL);
10404 	ASSERT(bp != NULL);
10405 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10406 
10407 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
10408 
10409 	xp = SD_GET_XBUF(bp);
10410 	ASSERT(xp != NULL);
10411 
10412 	mutex_enter(SD_MUTEX(un));
10413 
10414 	/*
10415 	 * Grab time when the cmd completed.
10416 	 * This is used for determining if the system has been
10417 	 * idle long enough to make it idle to the PM framework.
10418 	 * This is for lowering the overhead, and therefore improving
10419 	 * performance per I/O operation.
10420 	 */
10421 	un->un_pm_idle_time = ddi_get_time();
10422 
10423 	un->un_ncmds_in_driver--;
10424 	ASSERT(un->un_ncmds_in_driver >= 0);
10425 	SD_INFO(SD_LOG_IO, un, "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
10426 	    un->un_ncmds_in_driver);
10427 
10428 	mutex_exit(SD_MUTEX(un));
10429 
10430 	ddi_xbuf_done(bp, un->un_xbuf_attr);	/* xbuf is gone after this */
10431 	biodone(bp);				/* bp is gone after this */
10432 
10433 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
10434 }
10435 
10436 
10437 /*
10438  *    Function: sd_uscsi_iodone
10439  *
10440  * Description: Frees the sd_xbuf & returns the buf to its originator.
10441  *
10442  *     Context: May be called from interrupt context.
10443  */
10444 /* ARGSUSED */
10445 static void
10446 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
10447 {
10448 	struct sd_xbuf *xp;
10449 
10450 	ASSERT(un != NULL);
10451 	ASSERT(bp != NULL);
10452 
10453 	xp = SD_GET_XBUF(bp);
10454 	ASSERT(xp != NULL);
10455 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10456 
10457 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
10458 
10459 	bp->b_private = xp->xb_private;
10460 
10461 	mutex_enter(SD_MUTEX(un));
10462 
10463 	/*
10464 	 * Grab time when the cmd completed.
10465 	 * This is used for determining if the system has been
10466 	 * idle long enough to make it idle to the PM framework.
10467 	 * This is for lowering the overhead, and therefore improving
10468 	 * performance per I/O operation.
10469 	 */
10470 	un->un_pm_idle_time = ddi_get_time();
10471 
10472 	un->un_ncmds_in_driver--;
10473 	ASSERT(un->un_ncmds_in_driver >= 0);
10474 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
10475 	    un->un_ncmds_in_driver);
10476 
10477 	mutex_exit(SD_MUTEX(un));
10478 
10479 	kmem_free(xp, sizeof (struct sd_xbuf));
10480 	biodone(bp);
10481 
10482 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
10483 }
10484 
10485 
10486 /*
10487  *    Function: sd_mapblockaddr_iostart
10488  *
10489  * Description: Verify request lies withing the partition limits for
10490  *		the indicated minor device.  Issue "overrun" buf if
10491  *		request would exceed partition range.  Converts
10492  *		partition-relative block address to absolute.
10493  *
10494  *     Context: Can sleep
10495  *
10496  *      Issues: This follows what the old code did, in terms of accessing
10497  *		some of the partition info in the unit struct without holding
10498  *		the mutext.  This is a general issue, if the partition info
10499  *		can be altered while IO is in progress... as soon as we send
10500  *		a buf, its partitioning can be invalid before it gets to the
10501  *		device.  Probably the right fix is to move partitioning out
10502  *		of the driver entirely.
10503  */
10504 
10505 static void
10506 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
10507 {
10508 	diskaddr_t	nblocks;	/* #blocks in the given partition */
10509 	daddr_t	blocknum;	/* Block number specified by the buf */
10510 	size_t	requested_nblocks;
10511 	size_t	available_nblocks;
10512 	int	partition;
10513 	diskaddr_t	partition_offset;
10514 	struct sd_xbuf *xp;
10515 
10516 
10517 	ASSERT(un != NULL);
10518 	ASSERT(bp != NULL);
10519 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10520 
10521 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10522 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
10523 
10524 	xp = SD_GET_XBUF(bp);
10525 	ASSERT(xp != NULL);
10526 
10527 	/*
10528 	 * If the geometry is not indicated as valid, attempt to access
10529 	 * the unit & verify the geometry/label. This can be the case for
10530 	 * removable-media devices, of if the device was opened in
10531 	 * NDELAY/NONBLOCK mode.
10532 	 */
10533 	if (!SD_IS_VALID_LABEL(un) &&
10534 	    (sd_ready_and_valid(un) != SD_READY_VALID)) {
10535 		/*
10536 		 * For removable devices it is possible to start an I/O
10537 		 * without a media by opening the device in nodelay mode.
10538 		 * Also for writable CDs there can be many scenarios where
10539 		 * there is no geometry yet but volume manager is trying to
10540 		 * issue a read() just because it can see TOC on the CD. So
10541 		 * do not print a message for removables.
10542 		 */
10543 		if (!un->un_f_has_removable_media) {
10544 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10545 			    "i/o to invalid geometry\n");
10546 		}
10547 		bioerror(bp, EIO);
10548 		bp->b_resid = bp->b_bcount;
10549 		SD_BEGIN_IODONE(index, un, bp);
10550 		return;
10551 	}
10552 
10553 	partition = SDPART(bp->b_edev);
10554 
10555 	nblocks = 0;
10556 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
10557 	    &nblocks, &partition_offset, NULL, NULL, (void *)SD_PATH_DIRECT);
10558 
10559 	/*
10560 	 * blocknum is the starting block number of the request. At this
10561 	 * point it is still relative to the start of the minor device.
10562 	 */
10563 	blocknum = xp->xb_blkno;
10564 
10565 	/*
10566 	 * Legacy: If the starting block number is one past the last block
10567 	 * in the partition, do not set B_ERROR in the buf.
10568 	 */
10569 	if (blocknum == nblocks)  {
10570 		goto error_exit;
10571 	}
10572 
10573 	/*
10574 	 * Confirm that the first block of the request lies within the
10575 	 * partition limits. Also the requested number of bytes must be
10576 	 * a multiple of the system block size.
10577 	 */
10578 	if ((blocknum < 0) || (blocknum >= nblocks) ||
10579 	    ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
10580 		bp->b_flags |= B_ERROR;
10581 		goto error_exit;
10582 	}
10583 
10584 	/*
10585 	 * If the requsted # blocks exceeds the available # blocks, that
10586 	 * is an overrun of the partition.
10587 	 */
10588 	requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
10589 	available_nblocks = (size_t)(nblocks - blocknum);
10590 	ASSERT(nblocks >= blocknum);
10591 
10592 	if (requested_nblocks > available_nblocks) {
10593 		/*
10594 		 * Allocate an "overrun" buf to allow the request to proceed
10595 		 * for the amount of space available in the partition. The
10596 		 * amount not transferred will be added into the b_resid
10597 		 * when the operation is complete. The overrun buf
10598 		 * replaces the original buf here, and the original buf
10599 		 * is saved inside the overrun buf, for later use.
10600 		 */
10601 		size_t resid = SD_SYSBLOCKS2BYTES(un,
10602 		    (offset_t)(requested_nblocks - available_nblocks));
10603 		size_t count = bp->b_bcount - resid;
10604 		/*
10605 		 * Note: count is an unsigned entity thus it'll NEVER
10606 		 * be less than 0 so ASSERT the original values are
10607 		 * correct.
10608 		 */
10609 		ASSERT(bp->b_bcount >= resid);
10610 
10611 		bp = sd_bioclone_alloc(bp, count, blocknum,
10612 			(int (*)(struct buf *)) sd_mapblockaddr_iodone);
10613 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
10614 		ASSERT(xp != NULL);
10615 	}
10616 
10617 	/* At this point there should be no residual for this buf. */
10618 	ASSERT(bp->b_resid == 0);
10619 
10620 	/* Convert the block number to an absolute address. */
10621 	xp->xb_blkno += partition_offset;
10622 
10623 	SD_NEXT_IOSTART(index, un, bp);
10624 
10625 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10626 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
10627 
10628 	return;
10629 
10630 error_exit:
10631 	bp->b_resid = bp->b_bcount;
10632 	SD_BEGIN_IODONE(index, un, bp);
10633 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10634 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
10635 }
10636 
10637 
10638 /*
10639  *    Function: sd_mapblockaddr_iodone
10640  *
10641  * Description: Completion-side processing for partition management.
10642  *
10643  *     Context: May be called under interrupt context
10644  */
10645 
10646 static void
10647 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
10648 {
10649 	/* int	partition; */	/* Not used, see below. */
10650 	ASSERT(un != NULL);
10651 	ASSERT(bp != NULL);
10652 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10653 
10654 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10655 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
10656 
10657 	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
10658 		/*
10659 		 * We have an "overrun" buf to deal with...
10660 		 */
10661 		struct sd_xbuf	*xp;
10662 		struct buf	*obp;	/* ptr to the original buf */
10663 
10664 		xp = SD_GET_XBUF(bp);
10665 		ASSERT(xp != NULL);
10666 
10667 		/* Retrieve the pointer to the original buf */
10668 		obp = (struct buf *)xp->xb_private;
10669 		ASSERT(obp != NULL);
10670 
10671 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
10672 		bioerror(obp, bp->b_error);
10673 
10674 		sd_bioclone_free(bp);
10675 
10676 		/*
10677 		 * Get back the original buf.
10678 		 * Note that since the restoration of xb_blkno below
10679 		 * was removed, the sd_xbuf is not needed.
10680 		 */
10681 		bp = obp;
10682 		/*
10683 		 * xp = SD_GET_XBUF(bp);
10684 		 * ASSERT(xp != NULL);
10685 		 */
10686 	}
10687 
10688 	/*
10689 	 * Convert sd->xb_blkno back to a minor-device relative value.
10690 	 * Note: this has been commented out, as it is not needed in the
10691 	 * current implementation of the driver (ie, since this function
10692 	 * is at the top of the layering chains, so the info will be
10693 	 * discarded) and it is in the "hot" IO path.
10694 	 *
10695 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
10696 	 * xp->xb_blkno -= un->un_offset[partition];
10697 	 */
10698 
10699 	SD_NEXT_IODONE(index, un, bp);
10700 
10701 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10702 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
10703 }
10704 
10705 
10706 /*
10707  *    Function: sd_mapblocksize_iostart
10708  *
10709  * Description: Convert between system block size (un->un_sys_blocksize)
10710  *		and target block size (un->un_tgt_blocksize).
10711  *
10712  *     Context: Can sleep to allocate resources.
10713  *
10714  * Assumptions: A higher layer has already performed any partition validation,
10715  *		and converted the xp->xb_blkno to an absolute value relative
10716  *		to the start of the device.
10717  *
10718  *		It is also assumed that the higher layer has implemented
10719  *		an "overrun" mechanism for the case where the request would
10720  *		read/write beyond the end of a partition.  In this case we
10721  *		assume (and ASSERT) that bp->b_resid == 0.
10722  *
10723  *		Note: The implementation for this routine assumes the target
10724  *		block size remains constant between allocation and transport.
10725  */
10726 
10727 static void
10728 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
10729 {
10730 	struct sd_mapblocksize_info	*bsp;
10731 	struct sd_xbuf			*xp;
10732 	offset_t first_byte;
10733 	daddr_t	start_block, end_block;
10734 	daddr_t	request_bytes;
10735 	ushort_t is_aligned = FALSE;
10736 
10737 	ASSERT(un != NULL);
10738 	ASSERT(bp != NULL);
10739 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10740 	ASSERT(bp->b_resid == 0);
10741 
10742 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
10743 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
10744 
10745 	/*
10746 	 * For a non-writable CD, a write request is an error
10747 	 */
10748 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
10749 	    (un->un_f_mmc_writable_media == FALSE)) {
10750 		bioerror(bp, EIO);
10751 		bp->b_resid = bp->b_bcount;
10752 		SD_BEGIN_IODONE(index, un, bp);
10753 		return;
10754 	}
10755 
10756 	/*
10757 	 * We do not need a shadow buf if the device is using
10758 	 * un->un_sys_blocksize as its block size or if bcount == 0.
10759 	 * In this case there is no layer-private data block allocated.
10760 	 */
10761 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
10762 	    (bp->b_bcount == 0)) {
10763 		goto done;
10764 	}
10765 
10766 #if defined(__i386) || defined(__amd64)
10767 	/* We do not support non-block-aligned transfers for ROD devices */
10768 	ASSERT(!ISROD(un));
10769 #endif
10770 
10771 	xp = SD_GET_XBUF(bp);
10772 	ASSERT(xp != NULL);
10773 
10774 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
10775 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
10776 	    un->un_tgt_blocksize, un->un_sys_blocksize);
10777 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
10778 	    "request start block:0x%x\n", xp->xb_blkno);
10779 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
10780 	    "request len:0x%x\n", bp->b_bcount);
10781 
10782 	/*
10783 	 * Allocate the layer-private data area for the mapblocksize layer.
10784 	 * Layers are allowed to use the xp_private member of the sd_xbuf
10785 	 * struct to store the pointer to their layer-private data block, but
10786 	 * each layer also has the responsibility of restoring the prior
10787 	 * contents of xb_private before returning the buf/xbuf to the
10788 	 * higher layer that sent it.
10789 	 *
10790 	 * Here we save the prior contents of xp->xb_private into the
10791 	 * bsp->mbs_oprivate field of our layer-private data area. This value
10792 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
10793 	 * the layer-private area and returning the buf/xbuf to the layer
10794 	 * that sent it.
10795 	 *
10796 	 * Note that here we use kmem_zalloc for the allocation as there are
10797 	 * parts of the mapblocksize code that expect certain fields to be
10798 	 * zero unless explicitly set to a required value.
10799 	 */
10800 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
10801 	bsp->mbs_oprivate = xp->xb_private;
10802 	xp->xb_private = bsp;
10803 
10804 	/*
10805 	 * This treats the data on the disk (target) as an array of bytes.
10806 	 * first_byte is the byte offset, from the beginning of the device,
10807 	 * to the location of the request. This is converted from a
10808 	 * un->un_sys_blocksize block address to a byte offset, and then back
10809 	 * to a block address based upon a un->un_tgt_blocksize block size.
10810 	 *
10811 	 * xp->xb_blkno should be absolute upon entry into this function,
10812 	 * but, but it is based upon partitions that use the "system"
10813 	 * block size. It must be adjusted to reflect the block size of
10814 	 * the target.
10815 	 *
10816 	 * Note that end_block is actually the block that follows the last
10817 	 * block of the request, but that's what is needed for the computation.
10818 	 */
10819 	first_byte  = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
10820 	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
10821 	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
10822 	    un->un_tgt_blocksize;
10823 
10824 	/* request_bytes is rounded up to a multiple of the target block size */
10825 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
10826 
10827 	/*
10828 	 * See if the starting address of the request and the request
10829 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
10830 	 * then we do not need to allocate a shadow buf to handle the request.
10831 	 */
10832 	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
10833 	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
10834 		is_aligned = TRUE;
10835 	}
10836 
10837 	if ((bp->b_flags & B_READ) == 0) {
10838 		/*
10839 		 * Lock the range for a write operation. An aligned request is
10840 		 * considered a simple write; otherwise the request must be a
10841 		 * read-modify-write.
10842 		 */
10843 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
10844 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
10845 	}
10846 
10847 	/*
10848 	 * Alloc a shadow buf if the request is not aligned. Also, this is
10849 	 * where the READ command is generated for a read-modify-write. (The
10850 	 * write phase is deferred until after the read completes.)
10851 	 */
10852 	if (is_aligned == FALSE) {
10853 
10854 		struct sd_mapblocksize_info	*shadow_bsp;
10855 		struct sd_xbuf	*shadow_xp;
10856 		struct buf	*shadow_bp;
10857 
10858 		/*
10859 		 * Allocate the shadow buf and it associated xbuf. Note that
10860 		 * after this call the xb_blkno value in both the original
10861 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
10862 		 * same: absolute relative to the start of the device, and
10863 		 * adjusted for the target block size. The b_blkno in the
10864 		 * shadow buf will also be set to this value. We should never
10865 		 * change b_blkno in the original bp however.
10866 		 *
10867 		 * Note also that the shadow buf will always need to be a
10868 		 * READ command, regardless of whether the incoming command
10869 		 * is a READ or a WRITE.
10870 		 */
10871 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
10872 		    xp->xb_blkno,
10873 		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
10874 
10875 		shadow_xp = SD_GET_XBUF(shadow_bp);
10876 
10877 		/*
10878 		 * Allocate the layer-private data for the shadow buf.
10879 		 * (No need to preserve xb_private in the shadow xbuf.)
10880 		 */
10881 		shadow_xp->xb_private = shadow_bsp =
10882 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
10883 
10884 		/*
10885 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
10886 		 * to figure out where the start of the user data is (based upon
10887 		 * the system block size) in the data returned by the READ
10888 		 * command (which will be based upon the target blocksize). Note
10889 		 * that this is only really used if the request is unaligned.
10890 		 */
10891 		bsp->mbs_copy_offset = (ssize_t)(first_byte -
10892 		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
10893 		ASSERT((bsp->mbs_copy_offset >= 0) &&
10894 		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
10895 
10896 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
10897 
10898 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
10899 
10900 		/* Transfer the wmap (if any) to the shadow buf */
10901 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
10902 		bsp->mbs_wmp = NULL;
10903 
10904 		/*
10905 		 * The shadow buf goes on from here in place of the
10906 		 * original buf.
10907 		 */
10908 		shadow_bsp->mbs_orig_bp = bp;
10909 		bp = shadow_bp;
10910 	}
10911 
10912 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
10913 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
10914 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
10915 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
10916 	    request_bytes);
10917 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
10918 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
10919 
10920 done:
10921 	SD_NEXT_IOSTART(index, un, bp);
10922 
10923 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
10924 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
10925 }
10926 
10927 
10928 /*
10929  *    Function: sd_mapblocksize_iodone
10930  *
10931  * Description: Completion side processing for block-size mapping.
10932  *
10933  *     Context: May be called under interrupt context
10934  */
10935 
10936 static void
10937 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
10938 {
10939 	struct sd_mapblocksize_info	*bsp;
10940 	struct sd_xbuf	*xp;
10941 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
10942 	struct buf	*orig_bp;	/* ptr to the original buf */
10943 	offset_t	shadow_end;
10944 	offset_t	request_end;
10945 	offset_t	shadow_start;
10946 	ssize_t		copy_offset;
10947 	size_t		copy_length;
10948 	size_t		shortfall;
10949 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
10950 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
10951 
10952 	ASSERT(un != NULL);
10953 	ASSERT(bp != NULL);
10954 
10955 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
10956 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
10957 
10958 	/*
10959 	 * There is no shadow buf or layer-private data if the target is
10960 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
10961 	 */
10962 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
10963 	    (bp->b_bcount == 0)) {
10964 		goto exit;
10965 	}
10966 
10967 	xp = SD_GET_XBUF(bp);
10968 	ASSERT(xp != NULL);
10969 
10970 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
10971 	bsp = xp->xb_private;
10972 
10973 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
10974 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
10975 
10976 	if (is_write) {
10977 		/*
10978 		 * For a WRITE request we must free up the block range that
10979 		 * we have locked up.  This holds regardless of whether this is
10980 		 * an aligned write request or a read-modify-write request.
10981 		 */
10982 		sd_range_unlock(un, bsp->mbs_wmp);
10983 		bsp->mbs_wmp = NULL;
10984 	}
10985 
10986 	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
10987 		/*
10988 		 * An aligned read or write command will have no shadow buf;
10989 		 * there is not much else to do with it.
10990 		 */
10991 		goto done;
10992 	}
10993 
10994 	orig_bp = bsp->mbs_orig_bp;
10995 	ASSERT(orig_bp != NULL);
10996 	orig_xp = SD_GET_XBUF(orig_bp);
10997 	ASSERT(orig_xp != NULL);
10998 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10999 
11000 	if (!is_write && has_wmap) {
11001 		/*
11002 		 * A READ with a wmap means this is the READ phase of a
11003 		 * read-modify-write. If an error occurred on the READ then
11004 		 * we do not proceed with the WRITE phase or copy any data.
11005 		 * Just release the write maps and return with an error.
11006 		 */
11007 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
11008 			orig_bp->b_resid = orig_bp->b_bcount;
11009 			bioerror(orig_bp, bp->b_error);
11010 			sd_range_unlock(un, bsp->mbs_wmp);
11011 			goto freebuf_done;
11012 		}
11013 	}
11014 
11015 	/*
11016 	 * Here is where we set up to copy the data from the shadow buf
11017 	 * into the space associated with the original buf.
11018 	 *
11019 	 * To deal with the conversion between block sizes, these
11020 	 * computations treat the data as an array of bytes, with the
11021 	 * first byte (byte 0) corresponding to the first byte in the
11022 	 * first block on the disk.
11023 	 */
11024 
11025 	/*
11026 	 * shadow_start and shadow_len indicate the location and size of
11027 	 * the data returned with the shadow IO request.
11028 	 */
11029 	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
11030 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
11031 
11032 	/*
11033 	 * copy_offset gives the offset (in bytes) from the start of the first
11034 	 * block of the READ request to the beginning of the data.  We retrieve
11035 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
11036 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
11037 	 * data to be copied (in bytes).
11038 	 */
11039 	copy_offset  = bsp->mbs_copy_offset;
11040 	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
11041 	copy_length  = orig_bp->b_bcount;
11042 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
11043 
11044 	/*
11045 	 * Set up the resid and error fields of orig_bp as appropriate.
11046 	 */
11047 	if (shadow_end >= request_end) {
11048 		/* We got all the requested data; set resid to zero */
11049 		orig_bp->b_resid = 0;
11050 	} else {
11051 		/*
11052 		 * We failed to get enough data to fully satisfy the original
11053 		 * request. Just copy back whatever data we got and set
11054 		 * up the residual and error code as required.
11055 		 *
11056 		 * 'shortfall' is the amount by which the data received with the
11057 		 * shadow buf has "fallen short" of the requested amount.
11058 		 */
11059 		shortfall = (size_t)(request_end - shadow_end);
11060 
11061 		if (shortfall > orig_bp->b_bcount) {
11062 			/*
11063 			 * We did not get enough data to even partially
11064 			 * fulfill the original request.  The residual is
11065 			 * equal to the amount requested.
11066 			 */
11067 			orig_bp->b_resid = orig_bp->b_bcount;
11068 		} else {
11069 			/*
11070 			 * We did not get all the data that we requested
11071 			 * from the device, but we will try to return what
11072 			 * portion we did get.
11073 			 */
11074 			orig_bp->b_resid = shortfall;
11075 		}
11076 		ASSERT(copy_length >= orig_bp->b_resid);
11077 		copy_length  -= orig_bp->b_resid;
11078 	}
11079 
11080 	/* Propagate the error code from the shadow buf to the original buf */
11081 	bioerror(orig_bp, bp->b_error);
11082 
11083 	if (is_write) {
11084 		goto freebuf_done;	/* No data copying for a WRITE */
11085 	}
11086 
11087 	if (has_wmap) {
11088 		/*
11089 		 * This is a READ command from the READ phase of a
11090 		 * read-modify-write request. We have to copy the data given
11091 		 * by the user OVER the data returned by the READ command,
11092 		 * then convert the command from a READ to a WRITE and send
11093 		 * it back to the target.
11094 		 */
11095 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
11096 		    copy_length);
11097 
11098 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
11099 
11100 		/*
11101 		 * Dispatch the WRITE command to the taskq thread, which
11102 		 * will in turn send the command to the target. When the
11103 		 * WRITE command completes, we (sd_mapblocksize_iodone())
11104 		 * will get called again as part of the iodone chain
11105 		 * processing for it. Note that we will still be dealing
11106 		 * with the shadow buf at that point.
11107 		 */
11108 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
11109 		    KM_NOSLEEP) != 0) {
11110 			/*
11111 			 * Dispatch was successful so we are done. Return
11112 			 * without going any higher up the iodone chain. Do
11113 			 * not free up any layer-private data until after the
11114 			 * WRITE completes.
11115 			 */
11116 			return;
11117 		}
11118 
11119 		/*
11120 		 * Dispatch of the WRITE command failed; set up the error
11121 		 * condition and send this IO back up the iodone chain.
11122 		 */
11123 		bioerror(orig_bp, EIO);
11124 		orig_bp->b_resid = orig_bp->b_bcount;
11125 
11126 	} else {
11127 		/*
11128 		 * This is a regular READ request (ie, not a RMW). Copy the
11129 		 * data from the shadow buf into the original buf. The
11130 		 * copy_offset compensates for any "misalignment" between the
11131 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
11132 		 * original buf (with its un->un_sys_blocksize blocks).
11133 		 */
11134 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
11135 		    copy_length);
11136 	}
11137 
11138 freebuf_done:
11139 
11140 	/*
11141 	 * At this point we still have both the shadow buf AND the original
11142 	 * buf to deal with, as well as the layer-private data area in each.
11143 	 * Local variables are as follows:
11144 	 *
11145 	 * bp -- points to shadow buf
11146 	 * xp -- points to xbuf of shadow buf
11147 	 * bsp -- points to layer-private data area of shadow buf
11148 	 * orig_bp -- points to original buf
11149 	 *
11150 	 * First free the shadow buf and its associated xbuf, then free the
11151 	 * layer-private data area from the shadow buf. There is no need to
11152 	 * restore xb_private in the shadow xbuf.
11153 	 */
11154 	sd_shadow_buf_free(bp);
11155 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
11156 
11157 	/*
11158 	 * Now update the local variables to point to the original buf, xbuf,
11159 	 * and layer-private area.
11160 	 */
11161 	bp = orig_bp;
11162 	xp = SD_GET_XBUF(bp);
11163 	ASSERT(xp != NULL);
11164 	ASSERT(xp == orig_xp);
11165 	bsp = xp->xb_private;
11166 	ASSERT(bsp != NULL);
11167 
11168 done:
11169 	/*
11170 	 * Restore xb_private to whatever it was set to by the next higher
11171 	 * layer in the chain, then free the layer-private data area.
11172 	 */
11173 	xp->xb_private = bsp->mbs_oprivate;
11174 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
11175 
11176 exit:
11177 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
11178 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
11179 
11180 	SD_NEXT_IODONE(index, un, bp);
11181 }
11182 
11183 
11184 /*
11185  *    Function: sd_checksum_iostart
11186  *
11187  * Description: A stub function for a layer that's currently not used.
11188  *		For now just a placeholder.
11189  *
11190  *     Context: Kernel thread context
11191  */
11192 
11193 static void
11194 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
11195 {
11196 	ASSERT(un != NULL);
11197 	ASSERT(bp != NULL);
11198 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11199 	SD_NEXT_IOSTART(index, un, bp);
11200 }
11201 
11202 
11203 /*
11204  *    Function: sd_checksum_iodone
11205  *
11206  * Description: A stub function for a layer that's currently not used.
11207  *		For now just a placeholder.
11208  *
11209  *     Context: May be called under interrupt context
11210  */
11211 
11212 static void
11213 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
11214 {
11215 	ASSERT(un != NULL);
11216 	ASSERT(bp != NULL);
11217 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11218 	SD_NEXT_IODONE(index, un, bp);
11219 }
11220 
11221 
11222 /*
11223  *    Function: sd_checksum_uscsi_iostart
11224  *
11225  * Description: A stub function for a layer that's currently not used.
11226  *		For now just a placeholder.
11227  *
11228  *     Context: Kernel thread context
11229  */
11230 
11231 static void
11232 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
11233 {
11234 	ASSERT(un != NULL);
11235 	ASSERT(bp != NULL);
11236 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11237 	SD_NEXT_IOSTART(index, un, bp);
11238 }
11239 
11240 
11241 /*
11242  *    Function: sd_checksum_uscsi_iodone
11243  *
11244  * Description: A stub function for a layer that's currently not used.
11245  *		For now just a placeholder.
11246  *
11247  *     Context: May be called under interrupt context
11248  */
11249 
11250 static void
11251 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
11252 {
11253 	ASSERT(un != NULL);
11254 	ASSERT(bp != NULL);
11255 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11256 	SD_NEXT_IODONE(index, un, bp);
11257 }
11258 
11259 
11260 /*
11261  *    Function: sd_pm_iostart
11262  *
11263  * Description: iostart-side routine for Power mangement.
11264  *
11265  *     Context: Kernel thread context
11266  */
11267 
11268 static void
11269 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
11270 {
11271 	ASSERT(un != NULL);
11272 	ASSERT(bp != NULL);
11273 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11274 	ASSERT(!mutex_owned(&un->un_pm_mutex));
11275 
11276 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
11277 
11278 	if (sd_pm_entry(un) != DDI_SUCCESS) {
11279 		/*
11280 		 * Set up to return the failed buf back up the 'iodone'
11281 		 * side of the calling chain.
11282 		 */
11283 		bioerror(bp, EIO);
11284 		bp->b_resid = bp->b_bcount;
11285 
11286 		SD_BEGIN_IODONE(index, un, bp);
11287 
11288 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
11289 		return;
11290 	}
11291 
11292 	SD_NEXT_IOSTART(index, un, bp);
11293 
11294 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
11295 }
11296 
11297 
11298 /*
11299  *    Function: sd_pm_iodone
11300  *
11301  * Description: iodone-side routine for power mangement.
11302  *
11303  *     Context: may be called from interrupt context
11304  */
11305 
11306 static void
11307 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
11308 {
11309 	ASSERT(un != NULL);
11310 	ASSERT(bp != NULL);
11311 	ASSERT(!mutex_owned(&un->un_pm_mutex));
11312 
11313 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
11314 
11315 	/*
11316 	 * After attach the following flag is only read, so don't
11317 	 * take the penalty of acquiring a mutex for it.
11318 	 */
11319 	if (un->un_f_pm_is_enabled == TRUE) {
11320 		sd_pm_exit(un);
11321 	}
11322 
11323 	SD_NEXT_IODONE(index, un, bp);
11324 
11325 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
11326 }
11327 
11328 
11329 /*
11330  *    Function: sd_core_iostart
11331  *
11332  * Description: Primary driver function for enqueuing buf(9S) structs from
11333  *		the system and initiating IO to the target device
11334  *
11335  *     Context: Kernel thread context. Can sleep.
11336  *
11337  * Assumptions:  - The given xp->xb_blkno is absolute
11338  *		   (ie, relative to the start of the device).
11339  *		 - The IO is to be done using the native blocksize of
11340  *		   the device, as specified in un->un_tgt_blocksize.
11341  */
11342 /* ARGSUSED */
11343 static void
11344 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
11345 {
11346 	struct sd_xbuf *xp;
11347 
11348 	ASSERT(un != NULL);
11349 	ASSERT(bp != NULL);
11350 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11351 	ASSERT(bp->b_resid == 0);
11352 
11353 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
11354 
11355 	xp = SD_GET_XBUF(bp);
11356 	ASSERT(xp != NULL);
11357 
11358 	mutex_enter(SD_MUTEX(un));
11359 
11360 	/*
11361 	 * If we are currently in the failfast state, fail any new IO
11362 	 * that has B_FAILFAST set, then return.
11363 	 */
11364 	if ((bp->b_flags & B_FAILFAST) &&
11365 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
11366 		mutex_exit(SD_MUTEX(un));
11367 		bioerror(bp, EIO);
11368 		bp->b_resid = bp->b_bcount;
11369 		SD_BEGIN_IODONE(index, un, bp);
11370 		return;
11371 	}
11372 
11373 	if (SD_IS_DIRECT_PRIORITY(xp)) {
11374 		/*
11375 		 * Priority command -- transport it immediately.
11376 		 *
11377 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
11378 		 * because all direct priority commands should be associated
11379 		 * with error recovery actions which we don't want to retry.
11380 		 */
11381 		sd_start_cmds(un, bp);
11382 	} else {
11383 		/*
11384 		 * Normal command -- add it to the wait queue, then start
11385 		 * transporting commands from the wait queue.
11386 		 */
11387 		sd_add_buf_to_waitq(un, bp);
11388 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
11389 		sd_start_cmds(un, NULL);
11390 	}
11391 
11392 	mutex_exit(SD_MUTEX(un));
11393 
11394 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
11395 }
11396 
11397 
11398 /*
11399  *    Function: sd_init_cdb_limits
11400  *
11401  * Description: This is to handle scsi_pkt initialization differences
11402  *		between the driver platforms.
11403  *
11404  *		Legacy behaviors:
11405  *
11406  *		If the block number or the sector count exceeds the
11407  *		capabilities of a Group 0 command, shift over to a
11408  *		Group 1 command. We don't blindly use Group 1
11409  *		commands because a) some drives (CDC Wren IVs) get a
11410  *		bit confused, and b) there is probably a fair amount
11411  *		of speed difference for a target to receive and decode
11412  *		a 10 byte command instead of a 6 byte command.
11413  *
11414  *		The xfer time difference of 6 vs 10 byte CDBs is
11415  *		still significant so this code is still worthwhile.
11416  *		10 byte CDBs are very inefficient with the fas HBA driver
11417  *		and older disks. Each CDB byte took 1 usec with some
11418  *		popular disks.
11419  *
11420  *     Context: Must be called at attach time
11421  */
11422 
11423 static void
11424 sd_init_cdb_limits(struct sd_lun *un)
11425 {
11426 	int hba_cdb_limit;
11427 
11428 	/*
11429 	 * Use CDB_GROUP1 commands for most devices except for
11430 	 * parallel SCSI fixed drives in which case we get better
11431 	 * performance using CDB_GROUP0 commands (where applicable).
11432 	 */
11433 	un->un_mincdb = SD_CDB_GROUP1;
11434 #if !defined(__fibre)
11435 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
11436 	    !un->un_f_has_removable_media) {
11437 		un->un_mincdb = SD_CDB_GROUP0;
11438 	}
11439 #endif
11440 
11441 	/*
11442 	 * Try to read the max-cdb-length supported by HBA.
11443 	 */
11444 	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
11445 	if (0 >= un->un_max_hba_cdb) {
11446 		un->un_max_hba_cdb = CDB_GROUP4;
11447 		hba_cdb_limit = SD_CDB_GROUP4;
11448 	} else if (0 < un->un_max_hba_cdb &&
11449 	    un->un_max_hba_cdb < CDB_GROUP1) {
11450 		hba_cdb_limit = SD_CDB_GROUP0;
11451 	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
11452 	    un->un_max_hba_cdb < CDB_GROUP5) {
11453 		hba_cdb_limit = SD_CDB_GROUP1;
11454 	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
11455 	    un->un_max_hba_cdb < CDB_GROUP4) {
11456 		hba_cdb_limit = SD_CDB_GROUP5;
11457 	} else {
11458 		hba_cdb_limit = SD_CDB_GROUP4;
11459 	}
11460 
11461 	/*
11462 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
11463 	 * commands for fixed disks unless we are building for a 32 bit
11464 	 * kernel.
11465 	 */
11466 #ifdef _LP64
11467 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
11468 	    min(hba_cdb_limit, SD_CDB_GROUP4);
11469 #else
11470 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
11471 	    min(hba_cdb_limit, SD_CDB_GROUP1);
11472 #endif
11473 
11474 	/*
11475 	 * x86 systems require the PKT_DMA_PARTIAL flag
11476 	 */
11477 #if defined(__x86)
11478 	un->un_pkt_flags = PKT_DMA_PARTIAL;
11479 #else
11480 	un->un_pkt_flags = 0;
11481 #endif
11482 
11483 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
11484 	    ? sizeof (struct scsi_arq_status) : 1);
11485 	un->un_cmd_timeout = (ushort_t)sd_io_time;
11486 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
11487 }
11488 
11489 
11490 /*
11491  *    Function: sd_initpkt_for_buf
11492  *
11493  * Description: Allocate and initialize for transport a scsi_pkt struct,
11494  *		based upon the info specified in the given buf struct.
11495  *
11496  *		Assumes the xb_blkno in the request is absolute (ie,
11497  *		relative to the start of the device (NOT partition!).
11498  *		Also assumes that the request is using the native block
11499  *		size of the device (as returned by the READ CAPACITY
11500  *		command).
11501  *
11502  * Return Code: SD_PKT_ALLOC_SUCCESS
11503  *		SD_PKT_ALLOC_FAILURE
11504  *		SD_PKT_ALLOC_FAILURE_NO_DMA
11505  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
11506  *
11507  *     Context: Kernel thread and may be called from software interrupt context
11508  *		as part of a sdrunout callback. This function may not block or
11509  *		call routines that block
11510  */
11511 
11512 static int
11513 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
11514 {
11515 	struct sd_xbuf	*xp;
11516 	struct scsi_pkt *pktp = NULL;
11517 	struct sd_lun	*un;
11518 	size_t		blockcount;
11519 	daddr_t		startblock;
11520 	int		rval;
11521 	int		cmd_flags;
11522 
11523 	ASSERT(bp != NULL);
11524 	ASSERT(pktpp != NULL);
11525 	xp = SD_GET_XBUF(bp);
11526 	ASSERT(xp != NULL);
11527 	un = SD_GET_UN(bp);
11528 	ASSERT(un != NULL);
11529 	ASSERT(mutex_owned(SD_MUTEX(un)));
11530 	ASSERT(bp->b_resid == 0);
11531 
11532 	SD_TRACE(SD_LOG_IO_CORE, un,
11533 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
11534 
11535 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
11536 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
11537 		/*
11538 		 * Already have a scsi_pkt -- just need DMA resources.
11539 		 * We must recompute the CDB in case the mapping returns
11540 		 * a nonzero pkt_resid.
11541 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
11542 		 * that is being retried, the unmap/remap of the DMA resouces
11543 		 * will result in the entire transfer starting over again
11544 		 * from the very first block.
11545 		 */
11546 		ASSERT(xp->xb_pktp != NULL);
11547 		pktp = xp->xb_pktp;
11548 	} else {
11549 		pktp = NULL;
11550 	}
11551 #endif /* __i386 || __amd64 */
11552 
11553 	startblock = xp->xb_blkno;	/* Absolute block num. */
11554 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
11555 
11556 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
11557 
11558 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
11559 
11560 #else
11561 
11562 	cmd_flags = un->un_pkt_flags | xp->xb_pkt_flags;
11563 
11564 #endif
11565 
11566 	/*
11567 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
11568 	 * call scsi_init_pkt, and build the CDB.
11569 	 */
11570 	rval = sd_setup_rw_pkt(un, &pktp, bp,
11571 	    cmd_flags, sdrunout, (caddr_t)un,
11572 	    startblock, blockcount);
11573 
11574 	if (rval == 0) {
11575 		/*
11576 		 * Success.
11577 		 *
11578 		 * If partial DMA is being used and required for this transfer.
11579 		 * set it up here.
11580 		 */
11581 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
11582 		    (pktp->pkt_resid != 0)) {
11583 
11584 			/*
11585 			 * Save the CDB length and pkt_resid for the
11586 			 * next xfer
11587 			 */
11588 			xp->xb_dma_resid = pktp->pkt_resid;
11589 
11590 			/* rezero resid */
11591 			pktp->pkt_resid = 0;
11592 
11593 		} else {
11594 			xp->xb_dma_resid = 0;
11595 		}
11596 
11597 		pktp->pkt_flags = un->un_tagflags;
11598 		pktp->pkt_time  = un->un_cmd_timeout;
11599 		pktp->pkt_comp  = sdintr;
11600 
11601 		pktp->pkt_private = bp;
11602 		*pktpp = pktp;
11603 
11604 		SD_TRACE(SD_LOG_IO_CORE, un,
11605 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
11606 
11607 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
11608 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
11609 #endif
11610 
11611 		return (SD_PKT_ALLOC_SUCCESS);
11612 
11613 	}
11614 
11615 	/*
11616 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
11617 	 * from sd_setup_rw_pkt.
11618 	 */
11619 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
11620 
11621 	if (rval == SD_PKT_ALLOC_FAILURE) {
11622 		*pktpp = NULL;
11623 		/*
11624 		 * Set the driver state to RWAIT to indicate the driver
11625 		 * is waiting on resource allocations. The driver will not
11626 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
11627 		 */
11628 		New_state(un, SD_STATE_RWAIT);
11629 
11630 		SD_ERROR(SD_LOG_IO_CORE, un,
11631 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
11632 
11633 		if ((bp->b_flags & B_ERROR) != 0) {
11634 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
11635 		}
11636 		return (SD_PKT_ALLOC_FAILURE);
11637 	} else {
11638 		/*
11639 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
11640 		 *
11641 		 * This should never happen.  Maybe someone messed with the
11642 		 * kernel's minphys?
11643 		 */
11644 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
11645 		    "Request rejected: too large for CDB: "
11646 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
11647 		SD_ERROR(SD_LOG_IO_CORE, un,
11648 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
11649 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11650 
11651 	}
11652 }
11653 
11654 
11655 /*
11656  *    Function: sd_destroypkt_for_buf
11657  *
11658  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
11659  *
11660  *     Context: Kernel thread or interrupt context
11661  */
11662 
11663 static void
11664 sd_destroypkt_for_buf(struct buf *bp)
11665 {
11666 	ASSERT(bp != NULL);
11667 	ASSERT(SD_GET_UN(bp) != NULL);
11668 
11669 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
11670 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
11671 
11672 	ASSERT(SD_GET_PKTP(bp) != NULL);
11673 	scsi_destroy_pkt(SD_GET_PKTP(bp));
11674 
11675 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
11676 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
11677 }
11678 
11679 /*
11680  *    Function: sd_setup_rw_pkt
11681  *
11682  * Description: Determines appropriate CDB group for the requested LBA
11683  *		and transfer length, calls scsi_init_pkt, and builds
11684  *		the CDB.  Do not use for partial DMA transfers except
11685  *		for the initial transfer since the CDB size must
11686  *		remain constant.
11687  *
11688  *     Context: Kernel thread and may be called from software interrupt
11689  *		context as part of a sdrunout callback. This function may not
11690  *		block or call routines that block
11691  */
11692 
11693 
11694 int
11695 sd_setup_rw_pkt(struct sd_lun *un,
11696     struct scsi_pkt **pktpp, struct buf *bp, int flags,
11697     int (*callback)(caddr_t), caddr_t callback_arg,
11698     diskaddr_t lba, uint32_t blockcount)
11699 {
11700 	struct scsi_pkt *return_pktp;
11701 	union scsi_cdb *cdbp;
11702 	struct sd_cdbinfo *cp = NULL;
11703 	int i;
11704 
11705 	/*
11706 	 * See which size CDB to use, based upon the request.
11707 	 */
11708 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
11709 
11710 		/*
11711 		 * Check lba and block count against sd_cdbtab limits.
11712 		 * In the partial DMA case, we have to use the same size
11713 		 * CDB for all the transfers.  Check lba + blockcount
11714 		 * against the max LBA so we know that segment of the
11715 		 * transfer can use the CDB we select.
11716 		 */
11717 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
11718 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
11719 
11720 			/*
11721 			 * The command will fit into the CDB type
11722 			 * specified by sd_cdbtab[i].
11723 			 */
11724 			cp = sd_cdbtab + i;
11725 
11726 			/*
11727 			 * Call scsi_init_pkt so we can fill in the
11728 			 * CDB.
11729 			 */
11730 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
11731 			    bp, cp->sc_grpcode, un->un_status_len, 0,
11732 			    flags, callback, callback_arg);
11733 
11734 			if (return_pktp != NULL) {
11735 
11736 				/*
11737 				 * Return new value of pkt
11738 				 */
11739 				*pktpp = return_pktp;
11740 
11741 				/*
11742 				 * To be safe, zero the CDB insuring there is
11743 				 * no leftover data from a previous command.
11744 				 */
11745 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
11746 
11747 				/*
11748 				 * Handle partial DMA mapping
11749 				 */
11750 				if (return_pktp->pkt_resid != 0) {
11751 
11752 					/*
11753 					 * Not going to xfer as many blocks as
11754 					 * originally expected
11755 					 */
11756 					blockcount -=
11757 					    SD_BYTES2TGTBLOCKS(un,
11758 						return_pktp->pkt_resid);
11759 				}
11760 
11761 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
11762 
11763 				/*
11764 				 * Set command byte based on the CDB
11765 				 * type we matched.
11766 				 */
11767 				cdbp->scc_cmd = cp->sc_grpmask |
11768 				    ((bp->b_flags & B_READ) ?
11769 					SCMD_READ : SCMD_WRITE);
11770 
11771 				SD_FILL_SCSI1_LUN(un, return_pktp);
11772 
11773 				/*
11774 				 * Fill in LBA and length
11775 				 */
11776 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
11777 				    (cp->sc_grpcode == CDB_GROUP4) ||
11778 				    (cp->sc_grpcode == CDB_GROUP0) ||
11779 				    (cp->sc_grpcode == CDB_GROUP5));
11780 
11781 				if (cp->sc_grpcode == CDB_GROUP1) {
11782 					FORMG1ADDR(cdbp, lba);
11783 					FORMG1COUNT(cdbp, blockcount);
11784 					return (0);
11785 				} else if (cp->sc_grpcode == CDB_GROUP4) {
11786 					FORMG4LONGADDR(cdbp, lba);
11787 					FORMG4COUNT(cdbp, blockcount);
11788 					return (0);
11789 				} else if (cp->sc_grpcode == CDB_GROUP0) {
11790 					FORMG0ADDR(cdbp, lba);
11791 					FORMG0COUNT(cdbp, blockcount);
11792 					return (0);
11793 				} else if (cp->sc_grpcode == CDB_GROUP5) {
11794 					FORMG5ADDR(cdbp, lba);
11795 					FORMG5COUNT(cdbp, blockcount);
11796 					return (0);
11797 				}
11798 
11799 				/*
11800 				 * It should be impossible to not match one
11801 				 * of the CDB types above, so we should never
11802 				 * reach this point.  Set the CDB command byte
11803 				 * to test-unit-ready to avoid writing
11804 				 * to somewhere we don't intend.
11805 				 */
11806 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
11807 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11808 			} else {
11809 				/*
11810 				 * Couldn't get scsi_pkt
11811 				 */
11812 				return (SD_PKT_ALLOC_FAILURE);
11813 			}
11814 		}
11815 	}
11816 
11817 	/*
11818 	 * None of the available CDB types were suitable.  This really
11819 	 * should never happen:  on a 64 bit system we support
11820 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
11821 	 * and on a 32 bit system we will refuse to bind to a device
11822 	 * larger than 2TB so addresses will never be larger than 32 bits.
11823 	 */
11824 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11825 }
11826 
11827 #if defined(__i386) || defined(__amd64)
11828 /*
11829  *    Function: sd_setup_next_rw_pkt
11830  *
11831  * Description: Setup packet for partial DMA transfers, except for the
11832  * 		initial transfer.  sd_setup_rw_pkt should be used for
11833  *		the initial transfer.
11834  *
11835  *     Context: Kernel thread and may be called from interrupt context.
11836  */
11837 
11838 int
11839 sd_setup_next_rw_pkt(struct sd_lun *un,
11840     struct scsi_pkt *pktp, struct buf *bp,
11841     diskaddr_t lba, uint32_t blockcount)
11842 {
11843 	uchar_t com;
11844 	union scsi_cdb *cdbp;
11845 	uchar_t cdb_group_id;
11846 
11847 	ASSERT(pktp != NULL);
11848 	ASSERT(pktp->pkt_cdbp != NULL);
11849 
11850 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
11851 	com = cdbp->scc_cmd;
11852 	cdb_group_id = CDB_GROUPID(com);
11853 
11854 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
11855 	    (cdb_group_id == CDB_GROUPID_1) ||
11856 	    (cdb_group_id == CDB_GROUPID_4) ||
11857 	    (cdb_group_id == CDB_GROUPID_5));
11858 
11859 	/*
11860 	 * Move pkt to the next portion of the xfer.
11861 	 * func is NULL_FUNC so we do not have to release
11862 	 * the disk mutex here.
11863 	 */
11864 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
11865 	    NULL_FUNC, NULL) == pktp) {
11866 		/* Success.  Handle partial DMA */
11867 		if (pktp->pkt_resid != 0) {
11868 			blockcount -=
11869 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
11870 		}
11871 
11872 		cdbp->scc_cmd = com;
11873 		SD_FILL_SCSI1_LUN(un, pktp);
11874 		if (cdb_group_id == CDB_GROUPID_1) {
11875 			FORMG1ADDR(cdbp, lba);
11876 			FORMG1COUNT(cdbp, blockcount);
11877 			return (0);
11878 		} else if (cdb_group_id == CDB_GROUPID_4) {
11879 			FORMG4LONGADDR(cdbp, lba);
11880 			FORMG4COUNT(cdbp, blockcount);
11881 			return (0);
11882 		} else if (cdb_group_id == CDB_GROUPID_0) {
11883 			FORMG0ADDR(cdbp, lba);
11884 			FORMG0COUNT(cdbp, blockcount);
11885 			return (0);
11886 		} else if (cdb_group_id == CDB_GROUPID_5) {
11887 			FORMG5ADDR(cdbp, lba);
11888 			FORMG5COUNT(cdbp, blockcount);
11889 			return (0);
11890 		}
11891 
11892 		/* Unreachable */
11893 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11894 	}
11895 
11896 	/*
11897 	 * Error setting up next portion of cmd transfer.
11898 	 * Something is definitely very wrong and this
11899 	 * should not happen.
11900 	 */
11901 	return (SD_PKT_ALLOC_FAILURE);
11902 }
11903 #endif /* defined(__i386) || defined(__amd64) */
11904 
11905 /*
11906  *    Function: sd_initpkt_for_uscsi
11907  *
11908  * Description: Allocate and initialize for transport a scsi_pkt struct,
11909  *		based upon the info specified in the given uscsi_cmd struct.
11910  *
11911  * Return Code: SD_PKT_ALLOC_SUCCESS
11912  *		SD_PKT_ALLOC_FAILURE
11913  *		SD_PKT_ALLOC_FAILURE_NO_DMA
11914  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
11915  *
11916  *     Context: Kernel thread and may be called from software interrupt context
11917  *		as part of a sdrunout callback. This function may not block or
11918  *		call routines that block
11919  */
11920 
11921 static int
11922 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
11923 {
11924 	struct uscsi_cmd *uscmd;
11925 	struct sd_xbuf	*xp;
11926 	struct scsi_pkt	*pktp;
11927 	struct sd_lun	*un;
11928 	uint32_t	flags = 0;
11929 
11930 	ASSERT(bp != NULL);
11931 	ASSERT(pktpp != NULL);
11932 	xp = SD_GET_XBUF(bp);
11933 	ASSERT(xp != NULL);
11934 	un = SD_GET_UN(bp);
11935 	ASSERT(un != NULL);
11936 	ASSERT(mutex_owned(SD_MUTEX(un)));
11937 
11938 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
11939 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
11940 	ASSERT(uscmd != NULL);
11941 
11942 	SD_TRACE(SD_LOG_IO_CORE, un,
11943 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
11944 
11945 	/*
11946 	 * Allocate the scsi_pkt for the command.
11947 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
11948 	 *	 during scsi_init_pkt time and will continue to use the
11949 	 *	 same path as long as the same scsi_pkt is used without
11950 	 *	 intervening scsi_dma_free(). Since uscsi command does
11951 	 *	 not call scsi_dmafree() before retry failed command, it
11952 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
11953 	 *	 set such that scsi_vhci can use other available path for
11954 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
11955 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
11956 	 */
11957 	pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
11958 	    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
11959 	    sizeof (struct scsi_arq_status), 0,
11960 	    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
11961 	    sdrunout, (caddr_t)un);
11962 
11963 	if (pktp == NULL) {
11964 		*pktpp = NULL;
11965 		/*
11966 		 * Set the driver state to RWAIT to indicate the driver
11967 		 * is waiting on resource allocations. The driver will not
11968 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
11969 		 */
11970 		New_state(un, SD_STATE_RWAIT);
11971 
11972 		SD_ERROR(SD_LOG_IO_CORE, un,
11973 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
11974 
11975 		if ((bp->b_flags & B_ERROR) != 0) {
11976 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
11977 		}
11978 		return (SD_PKT_ALLOC_FAILURE);
11979 	}
11980 
11981 	/*
11982 	 * We do not do DMA breakup for USCSI commands, so return failure
11983 	 * here if all the needed DMA resources were not allocated.
11984 	 */
11985 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
11986 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
11987 		scsi_destroy_pkt(pktp);
11988 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
11989 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
11990 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
11991 	}
11992 
11993 	/* Init the cdb from the given uscsi struct */
11994 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
11995 	    uscmd->uscsi_cdb[0], 0, 0, 0);
11996 
11997 	SD_FILL_SCSI1_LUN(un, pktp);
11998 
11999 	/*
12000 	 * Set up the optional USCSI flags. See the uscsi (7I) man page
12001 	 * for listing of the supported flags.
12002 	 */
12003 
12004 	if (uscmd->uscsi_flags & USCSI_SILENT) {
12005 		flags |= FLAG_SILENT;
12006 	}
12007 
12008 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
12009 		flags |= FLAG_DIAGNOSE;
12010 	}
12011 
12012 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
12013 		flags |= FLAG_ISOLATE;
12014 	}
12015 
12016 	if (un->un_f_is_fibre == FALSE) {
12017 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
12018 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
12019 		}
12020 	}
12021 
12022 	/*
12023 	 * Set the pkt flags here so we save time later.
12024 	 * Note: These flags are NOT in the uscsi man page!!!
12025 	 */
12026 	if (uscmd->uscsi_flags & USCSI_HEAD) {
12027 		flags |= FLAG_HEAD;
12028 	}
12029 
12030 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
12031 		flags |= FLAG_NOINTR;
12032 	}
12033 
12034 	/*
12035 	 * For tagged queueing, things get a bit complicated.
12036 	 * Check first for head of queue and last for ordered queue.
12037 	 * If neither head nor order, use the default driver tag flags.
12038 	 */
12039 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
12040 		if (uscmd->uscsi_flags & USCSI_HTAG) {
12041 			flags |= FLAG_HTAG;
12042 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
12043 			flags |= FLAG_OTAG;
12044 		} else {
12045 			flags |= un->un_tagflags & FLAG_TAGMASK;
12046 		}
12047 	}
12048 
12049 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
12050 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
12051 	}
12052 
12053 	pktp->pkt_flags = flags;
12054 
12055 	/* Copy the caller's CDB into the pkt... */
12056 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
12057 
12058 	if (uscmd->uscsi_timeout == 0) {
12059 		pktp->pkt_time = un->un_uscsi_timeout;
12060 	} else {
12061 		pktp->pkt_time = uscmd->uscsi_timeout;
12062 	}
12063 
12064 	/* need it later to identify USCSI request in sdintr */
12065 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
12066 
12067 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
12068 
12069 	pktp->pkt_private = bp;
12070 	pktp->pkt_comp = sdintr;
12071 	*pktpp = pktp;
12072 
12073 	SD_TRACE(SD_LOG_IO_CORE, un,
12074 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
12075 
12076 	return (SD_PKT_ALLOC_SUCCESS);
12077 }
12078 
12079 
12080 /*
12081  *    Function: sd_destroypkt_for_uscsi
12082  *
12083  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
12084  *		IOs.. Also saves relevant info into the associated uscsi_cmd
12085  *		struct.
12086  *
12087  *     Context: May be called under interrupt context
12088  */
12089 
12090 static void
12091 sd_destroypkt_for_uscsi(struct buf *bp)
12092 {
12093 	struct uscsi_cmd *uscmd;
12094 	struct sd_xbuf	*xp;
12095 	struct scsi_pkt	*pktp;
12096 	struct sd_lun	*un;
12097 
12098 	ASSERT(bp != NULL);
12099 	xp = SD_GET_XBUF(bp);
12100 	ASSERT(xp != NULL);
12101 	un = SD_GET_UN(bp);
12102 	ASSERT(un != NULL);
12103 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12104 	pktp = SD_GET_PKTP(bp);
12105 	ASSERT(pktp != NULL);
12106 
12107 	SD_TRACE(SD_LOG_IO_CORE, un,
12108 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
12109 
12110 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
12111 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
12112 	ASSERT(uscmd != NULL);
12113 
12114 	/* Save the status and the residual into the uscsi_cmd struct */
12115 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
12116 	uscmd->uscsi_resid  = bp->b_resid;
12117 
12118 	/*
12119 	 * If enabled, copy any saved sense data into the area specified
12120 	 * by the uscsi command.
12121 	 */
12122 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
12123 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
12124 		/*
12125 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
12126 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
12127 		 */
12128 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
12129 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
12130 		bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf, SENSE_LENGTH);
12131 	}
12132 
12133 	/* We are done with the scsi_pkt; free it now */
12134 	ASSERT(SD_GET_PKTP(bp) != NULL);
12135 	scsi_destroy_pkt(SD_GET_PKTP(bp));
12136 
12137 	SD_TRACE(SD_LOG_IO_CORE, un,
12138 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
12139 }
12140 
12141 
12142 /*
12143  *    Function: sd_bioclone_alloc
12144  *
12145  * Description: Allocate a buf(9S) and init it as per the given buf
12146  *		and the various arguments.  The associated sd_xbuf
12147  *		struct is (nearly) duplicated.  The struct buf *bp
12148  *		argument is saved in new_xp->xb_private.
12149  *
12150  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
12151  *		datalen - size of data area for the shadow bp
12152  *		blkno - starting LBA
12153  *		func - function pointer for b_iodone in the shadow buf. (May
12154  *			be NULL if none.)
12155  *
12156  * Return Code: Pointer to allocates buf(9S) struct
12157  *
12158  *     Context: Can sleep.
12159  */
12160 
12161 static struct buf *
12162 sd_bioclone_alloc(struct buf *bp, size_t datalen,
12163 	daddr_t blkno, int (*func)(struct buf *))
12164 {
12165 	struct	sd_lun	*un;
12166 	struct	sd_xbuf	*xp;
12167 	struct	sd_xbuf	*new_xp;
12168 	struct	buf	*new_bp;
12169 
12170 	ASSERT(bp != NULL);
12171 	xp = SD_GET_XBUF(bp);
12172 	ASSERT(xp != NULL);
12173 	un = SD_GET_UN(bp);
12174 	ASSERT(un != NULL);
12175 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12176 
12177 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
12178 	    NULL, KM_SLEEP);
12179 
12180 	new_bp->b_lblkno	= blkno;
12181 
12182 	/*
12183 	 * Allocate an xbuf for the shadow bp and copy the contents of the
12184 	 * original xbuf into it.
12185 	 */
12186 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
12187 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
12188 
12189 	/*
12190 	 * The given bp is automatically saved in the xb_private member
12191 	 * of the new xbuf.  Callers are allowed to depend on this.
12192 	 */
12193 	new_xp->xb_private = bp;
12194 
12195 	new_bp->b_private  = new_xp;
12196 
12197 	return (new_bp);
12198 }
12199 
12200 /*
12201  *    Function: sd_shadow_buf_alloc
12202  *
12203  * Description: Allocate a buf(9S) and init it as per the given buf
12204  *		and the various arguments.  The associated sd_xbuf
12205  *		struct is (nearly) duplicated.  The struct buf *bp
12206  *		argument is saved in new_xp->xb_private.
12207  *
12208  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
12209  *		datalen - size of data area for the shadow bp
12210  *		bflags - B_READ or B_WRITE (pseudo flag)
12211  *		blkno - starting LBA
12212  *		func - function pointer for b_iodone in the shadow buf. (May
12213  *			be NULL if none.)
12214  *
12215  * Return Code: Pointer to allocates buf(9S) struct
12216  *
12217  *     Context: Can sleep.
12218  */
12219 
12220 static struct buf *
12221 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
12222 	daddr_t blkno, int (*func)(struct buf *))
12223 {
12224 	struct	sd_lun	*un;
12225 	struct	sd_xbuf	*xp;
12226 	struct	sd_xbuf	*new_xp;
12227 	struct	buf	*new_bp;
12228 
12229 	ASSERT(bp != NULL);
12230 	xp = SD_GET_XBUF(bp);
12231 	ASSERT(xp != NULL);
12232 	un = SD_GET_UN(bp);
12233 	ASSERT(un != NULL);
12234 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12235 
12236 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
12237 		bp_mapin(bp);
12238 	}
12239 
12240 	bflags &= (B_READ | B_WRITE);
12241 #if defined(__i386) || defined(__amd64)
12242 	new_bp = getrbuf(KM_SLEEP);
12243 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
12244 	new_bp->b_bcount = datalen;
12245 	new_bp->b_flags = bflags |
12246 	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
12247 #else
12248 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
12249 	    datalen, bflags, SLEEP_FUNC, NULL);
12250 #endif
12251 	new_bp->av_forw	= NULL;
12252 	new_bp->av_back	= NULL;
12253 	new_bp->b_dev	= bp->b_dev;
12254 	new_bp->b_blkno	= blkno;
12255 	new_bp->b_iodone = func;
12256 	new_bp->b_edev	= bp->b_edev;
12257 	new_bp->b_resid	= 0;
12258 
12259 	/* We need to preserve the B_FAILFAST flag */
12260 	if (bp->b_flags & B_FAILFAST) {
12261 		new_bp->b_flags |= B_FAILFAST;
12262 	}
12263 
12264 	/*
12265 	 * Allocate an xbuf for the shadow bp and copy the contents of the
12266 	 * original xbuf into it.
12267 	 */
12268 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
12269 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
12270 
12271 	/* Need later to copy data between the shadow buf & original buf! */
12272 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
12273 
12274 	/*
12275 	 * The given bp is automatically saved in the xb_private member
12276 	 * of the new xbuf.  Callers are allowed to depend on this.
12277 	 */
12278 	new_xp->xb_private = bp;
12279 
12280 	new_bp->b_private  = new_xp;
12281 
12282 	return (new_bp);
12283 }
12284 
12285 /*
12286  *    Function: sd_bioclone_free
12287  *
12288  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
12289  *		in the larger than partition operation.
12290  *
12291  *     Context: May be called under interrupt context
12292  */
12293 
12294 static void
12295 sd_bioclone_free(struct buf *bp)
12296 {
12297 	struct sd_xbuf	*xp;
12298 
12299 	ASSERT(bp != NULL);
12300 	xp = SD_GET_XBUF(bp);
12301 	ASSERT(xp != NULL);
12302 
12303 	/*
12304 	 * Call bp_mapout() before freeing the buf,  in case a lower
12305 	 * layer or HBA  had done a bp_mapin().  we must do this here
12306 	 * as we are the "originator" of the shadow buf.
12307 	 */
12308 	bp_mapout(bp);
12309 
12310 	/*
12311 	 * Null out b_iodone before freeing the bp, to ensure that the driver
12312 	 * never gets confused by a stale value in this field. (Just a little
12313 	 * extra defensiveness here.)
12314 	 */
12315 	bp->b_iodone = NULL;
12316 
12317 	freerbuf(bp);
12318 
12319 	kmem_free(xp, sizeof (struct sd_xbuf));
12320 }
12321 
12322 /*
12323  *    Function: sd_shadow_buf_free
12324  *
12325  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
12326  *
12327  *     Context: May be called under interrupt context
12328  */
12329 
12330 static void
12331 sd_shadow_buf_free(struct buf *bp)
12332 {
12333 	struct sd_xbuf	*xp;
12334 
12335 	ASSERT(bp != NULL);
12336 	xp = SD_GET_XBUF(bp);
12337 	ASSERT(xp != NULL);
12338 
12339 #if defined(__sparc)
12340 	/*
12341 	 * Call bp_mapout() before freeing the buf,  in case a lower
12342 	 * layer or HBA  had done a bp_mapin().  we must do this here
12343 	 * as we are the "originator" of the shadow buf.
12344 	 */
12345 	bp_mapout(bp);
12346 #endif
12347 
12348 	/*
12349 	 * Null out b_iodone before freeing the bp, to ensure that the driver
12350 	 * never gets confused by a stale value in this field. (Just a little
12351 	 * extra defensiveness here.)
12352 	 */
12353 	bp->b_iodone = NULL;
12354 
12355 #if defined(__i386) || defined(__amd64)
12356 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
12357 	freerbuf(bp);
12358 #else
12359 	scsi_free_consistent_buf(bp);
12360 #endif
12361 
12362 	kmem_free(xp, sizeof (struct sd_xbuf));
12363 }
12364 
12365 
12366 /*
12367  *    Function: sd_print_transport_rejected_message
12368  *
12369  * Description: This implements the ludicrously complex rules for printing
12370  *		a "transport rejected" message.  This is to address the
12371  *		specific problem of having a flood of this error message
12372  *		produced when a failover occurs.
12373  *
12374  *     Context: Any.
12375  */
12376 
12377 static void
12378 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
12379 	int code)
12380 {
12381 	ASSERT(un != NULL);
12382 	ASSERT(mutex_owned(SD_MUTEX(un)));
12383 	ASSERT(xp != NULL);
12384 
12385 	/*
12386 	 * Print the "transport rejected" message under the following
12387 	 * conditions:
12388 	 *
12389 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
12390 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
12391 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
12392 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
12393 	 *   scsi_transport(9F) (which indicates that the target might have
12394 	 *   gone off-line).  This uses the un->un_tran_fatal_count
12395 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
12396 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
12397 	 *   from scsi_transport().
12398 	 *
12399 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
12400 	 * the preceeding cases in order for the message to be printed.
12401 	 */
12402 	if ((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) {
12403 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
12404 		    (code != TRAN_FATAL_ERROR) ||
12405 		    (un->un_tran_fatal_count == 1)) {
12406 			switch (code) {
12407 			case TRAN_BADPKT:
12408 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12409 				    "transport rejected bad packet\n");
12410 				break;
12411 			case TRAN_FATAL_ERROR:
12412 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12413 				    "transport rejected fatal error\n");
12414 				break;
12415 			default:
12416 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12417 				    "transport rejected (%d)\n", code);
12418 				break;
12419 			}
12420 		}
12421 	}
12422 }
12423 
12424 
12425 /*
12426  *    Function: sd_add_buf_to_waitq
12427  *
12428  * Description: Add the given buf(9S) struct to the wait queue for the
12429  *		instance.  If sorting is enabled, then the buf is added
12430  *		to the queue via an elevator sort algorithm (a la
12431  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
12432  *		If sorting is not enabled, then the buf is just added
12433  *		to the end of the wait queue.
12434  *
12435  * Return Code: void
12436  *
12437  *     Context: Does not sleep/block, therefore technically can be called
12438  *		from any context.  However if sorting is enabled then the
12439  *		execution time is indeterminate, and may take long if
12440  *		the wait queue grows large.
12441  */
12442 
12443 static void
12444 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
12445 {
12446 	struct buf *ap;
12447 
12448 	ASSERT(bp != NULL);
12449 	ASSERT(un != NULL);
12450 	ASSERT(mutex_owned(SD_MUTEX(un)));
12451 
12452 	/* If the queue is empty, add the buf as the only entry & return. */
12453 	if (un->un_waitq_headp == NULL) {
12454 		ASSERT(un->un_waitq_tailp == NULL);
12455 		un->un_waitq_headp = un->un_waitq_tailp = bp;
12456 		bp->av_forw = NULL;
12457 		return;
12458 	}
12459 
12460 	ASSERT(un->un_waitq_tailp != NULL);
12461 
12462 	/*
12463 	 * If sorting is disabled, just add the buf to the tail end of
12464 	 * the wait queue and return.
12465 	 */
12466 	if (un->un_f_disksort_disabled) {
12467 		un->un_waitq_tailp->av_forw = bp;
12468 		un->un_waitq_tailp = bp;
12469 		bp->av_forw = NULL;
12470 		return;
12471 	}
12472 
12473 	/*
12474 	 * Sort thru the list of requests currently on the wait queue
12475 	 * and add the new buf request at the appropriate position.
12476 	 *
12477 	 * The un->un_waitq_headp is an activity chain pointer on which
12478 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
12479 	 * first queue holds those requests which are positioned after
12480 	 * the current SD_GET_BLKNO() (in the first request); the second holds
12481 	 * requests which came in after their SD_GET_BLKNO() number was passed.
12482 	 * Thus we implement a one way scan, retracting after reaching
12483 	 * the end of the drive to the first request on the second
12484 	 * queue, at which time it becomes the first queue.
12485 	 * A one-way scan is natural because of the way UNIX read-ahead
12486 	 * blocks are allocated.
12487 	 *
12488 	 * If we lie after the first request, then we must locate the
12489 	 * second request list and add ourselves to it.
12490 	 */
12491 	ap = un->un_waitq_headp;
12492 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
12493 		while (ap->av_forw != NULL) {
12494 			/*
12495 			 * Look for an "inversion" in the (normally
12496 			 * ascending) block numbers. This indicates
12497 			 * the start of the second request list.
12498 			 */
12499 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
12500 				/*
12501 				 * Search the second request list for the
12502 				 * first request at a larger block number.
12503 				 * We go before that; however if there is
12504 				 * no such request, we go at the end.
12505 				 */
12506 				do {
12507 					if (SD_GET_BLKNO(bp) <
12508 					    SD_GET_BLKNO(ap->av_forw)) {
12509 						goto insert;
12510 					}
12511 					ap = ap->av_forw;
12512 				} while (ap->av_forw != NULL);
12513 				goto insert;		/* after last */
12514 			}
12515 			ap = ap->av_forw;
12516 		}
12517 
12518 		/*
12519 		 * No inversions... we will go after the last, and
12520 		 * be the first request in the second request list.
12521 		 */
12522 		goto insert;
12523 	}
12524 
12525 	/*
12526 	 * Request is at/after the current request...
12527 	 * sort in the first request list.
12528 	 */
12529 	while (ap->av_forw != NULL) {
12530 		/*
12531 		 * We want to go after the current request (1) if
12532 		 * there is an inversion after it (i.e. it is the end
12533 		 * of the first request list), or (2) if the next
12534 		 * request is a larger block no. than our request.
12535 		 */
12536 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
12537 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
12538 			goto insert;
12539 		}
12540 		ap = ap->av_forw;
12541 	}
12542 
12543 	/*
12544 	 * Neither a second list nor a larger request, therefore
12545 	 * we go at the end of the first list (which is the same
12546 	 * as the end of the whole schebang).
12547 	 */
12548 insert:
12549 	bp->av_forw = ap->av_forw;
12550 	ap->av_forw = bp;
12551 
12552 	/*
12553 	 * If we inserted onto the tail end of the waitq, make sure the
12554 	 * tail pointer is updated.
12555 	 */
12556 	if (ap == un->un_waitq_tailp) {
12557 		un->un_waitq_tailp = bp;
12558 	}
12559 }
12560 
12561 
12562 /*
12563  *    Function: sd_start_cmds
12564  *
12565  * Description: Remove and transport cmds from the driver queues.
12566  *
12567  *   Arguments: un - pointer to the unit (soft state) struct for the target.
12568  *
12569  *		immed_bp - ptr to a buf to be transported immediately. Only
12570  *		the immed_bp is transported; bufs on the waitq are not
12571  *		processed and the un_retry_bp is not checked.  If immed_bp is
12572  *		NULL, then normal queue processing is performed.
12573  *
12574  *     Context: May be called from kernel thread context, interrupt context,
12575  *		or runout callback context. This function may not block or
12576  *		call routines that block.
12577  */
12578 
12579 static void
12580 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
12581 {
12582 	struct	sd_xbuf	*xp;
12583 	struct	buf	*bp;
12584 	void	(*statp)(kstat_io_t *);
12585 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12586 	void	(*saved_statp)(kstat_io_t *);
12587 #endif
12588 	int	rval;
12589 
12590 	ASSERT(un != NULL);
12591 	ASSERT(mutex_owned(SD_MUTEX(un)));
12592 	ASSERT(un->un_ncmds_in_transport >= 0);
12593 	ASSERT(un->un_throttle >= 0);
12594 
12595 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
12596 
12597 	do {
12598 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12599 		saved_statp = NULL;
12600 #endif
12601 
12602 		/*
12603 		 * If we are syncing or dumping, fail the command to
12604 		 * avoid recursively calling back into scsi_transport().
12605 		 * The dump I/O itself uses a separate code path so this
12606 		 * only prevents non-dump I/O from being sent while dumping.
12607 		 * File system sync takes place before dumping begins.
12608 		 * During panic, filesystem I/O is allowed provided
12609 		 * un_in_callback is <= 1.  This is to prevent recursion
12610 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
12611 		 * sd_start_cmds and so on.  See panic.c for more information
12612 		 * about the states the system can be in during panic.
12613 		 */
12614 		if ((un->un_state == SD_STATE_DUMPING) ||
12615 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
12616 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12617 			    "sd_start_cmds: panicking\n");
12618 			goto exit;
12619 		}
12620 
12621 		if ((bp = immed_bp) != NULL) {
12622 			/*
12623 			 * We have a bp that must be transported immediately.
12624 			 * It's OK to transport the immed_bp here without doing
12625 			 * the throttle limit check because the immed_bp is
12626 			 * always used in a retry/recovery case. This means
12627 			 * that we know we are not at the throttle limit by
12628 			 * virtue of the fact that to get here we must have
12629 			 * already gotten a command back via sdintr(). This also
12630 			 * relies on (1) the command on un_retry_bp preventing
12631 			 * further commands from the waitq from being issued;
12632 			 * and (2) the code in sd_retry_command checking the
12633 			 * throttle limit before issuing a delayed or immediate
12634 			 * retry. This holds even if the throttle limit is
12635 			 * currently ratcheted down from its maximum value.
12636 			 */
12637 			statp = kstat_runq_enter;
12638 			if (bp == un->un_retry_bp) {
12639 				ASSERT((un->un_retry_statp == NULL) ||
12640 				    (un->un_retry_statp == kstat_waitq_enter) ||
12641 				    (un->un_retry_statp ==
12642 				    kstat_runq_back_to_waitq));
12643 				/*
12644 				 * If the waitq kstat was incremented when
12645 				 * sd_set_retry_bp() queued this bp for a retry,
12646 				 * then we must set up statp so that the waitq
12647 				 * count will get decremented correctly below.
12648 				 * Also we must clear un->un_retry_statp to
12649 				 * ensure that we do not act on a stale value
12650 				 * in this field.
12651 				 */
12652 				if ((un->un_retry_statp == kstat_waitq_enter) ||
12653 				    (un->un_retry_statp ==
12654 				    kstat_runq_back_to_waitq)) {
12655 					statp = kstat_waitq_to_runq;
12656 				}
12657 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12658 				saved_statp = un->un_retry_statp;
12659 #endif
12660 				un->un_retry_statp = NULL;
12661 
12662 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
12663 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
12664 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
12665 				    un, un->un_retry_bp, un->un_throttle,
12666 				    un->un_ncmds_in_transport);
12667 			} else {
12668 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
12669 				    "processing priority bp:0x%p\n", bp);
12670 			}
12671 
12672 		} else if ((bp = un->un_waitq_headp) != NULL) {
12673 			/*
12674 			 * A command on the waitq is ready to go, but do not
12675 			 * send it if:
12676 			 *
12677 			 * (1) the throttle limit has been reached, or
12678 			 * (2) a retry is pending, or
12679 			 * (3) a START_STOP_UNIT callback pending, or
12680 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
12681 			 *	command is pending.
12682 			 *
12683 			 * For all of these conditions, IO processing will
12684 			 * restart after the condition is cleared.
12685 			 */
12686 			if (un->un_ncmds_in_transport >= un->un_throttle) {
12687 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12688 				    "sd_start_cmds: exiting, "
12689 				    "throttle limit reached!\n");
12690 				goto exit;
12691 			}
12692 			if (un->un_retry_bp != NULL) {
12693 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12694 				    "sd_start_cmds: exiting, retry pending!\n");
12695 				goto exit;
12696 			}
12697 			if (un->un_startstop_timeid != NULL) {
12698 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12699 				    "sd_start_cmds: exiting, "
12700 				    "START_STOP pending!\n");
12701 				goto exit;
12702 			}
12703 			if (un->un_direct_priority_timeid != NULL) {
12704 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12705 				    "sd_start_cmds: exiting, "
12706 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
12707 				goto exit;
12708 			}
12709 
12710 			/* Dequeue the command */
12711 			un->un_waitq_headp = bp->av_forw;
12712 			if (un->un_waitq_headp == NULL) {
12713 				un->un_waitq_tailp = NULL;
12714 			}
12715 			bp->av_forw = NULL;
12716 			statp = kstat_waitq_to_runq;
12717 			SD_TRACE(SD_LOG_IO_CORE, un,
12718 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
12719 
12720 		} else {
12721 			/* No work to do so bail out now */
12722 			SD_TRACE(SD_LOG_IO_CORE, un,
12723 			    "sd_start_cmds: no more work, exiting!\n");
12724 			goto exit;
12725 		}
12726 
12727 		/*
12728 		 * Reset the state to normal. This is the mechanism by which
12729 		 * the state transitions from either SD_STATE_RWAIT or
12730 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
12731 		 * If state is SD_STATE_PM_CHANGING then this command is
12732 		 * part of the device power control and the state must
12733 		 * not be put back to normal. Doing so would would
12734 		 * allow new commands to proceed when they shouldn't,
12735 		 * the device may be going off.
12736 		 */
12737 		if ((un->un_state != SD_STATE_SUSPENDED) &&
12738 		    (un->un_state != SD_STATE_PM_CHANGING)) {
12739 			New_state(un, SD_STATE_NORMAL);
12740 		    }
12741 
12742 		xp = SD_GET_XBUF(bp);
12743 		ASSERT(xp != NULL);
12744 
12745 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12746 		/*
12747 		 * Allocate the scsi_pkt if we need one, or attach DMA
12748 		 * resources if we have a scsi_pkt that needs them. The
12749 		 * latter should only occur for commands that are being
12750 		 * retried.
12751 		 */
12752 		if ((xp->xb_pktp == NULL) ||
12753 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
12754 #else
12755 		if (xp->xb_pktp == NULL) {
12756 #endif
12757 			/*
12758 			 * There is no scsi_pkt allocated for this buf. Call
12759 			 * the initpkt function to allocate & init one.
12760 			 *
12761 			 * The scsi_init_pkt runout callback functionality is
12762 			 * implemented as follows:
12763 			 *
12764 			 * 1) The initpkt function always calls
12765 			 *    scsi_init_pkt(9F) with sdrunout specified as the
12766 			 *    callback routine.
12767 			 * 2) A successful packet allocation is initialized and
12768 			 *    the I/O is transported.
12769 			 * 3) The I/O associated with an allocation resource
12770 			 *    failure is left on its queue to be retried via
12771 			 *    runout or the next I/O.
12772 			 * 4) The I/O associated with a DMA error is removed
12773 			 *    from the queue and failed with EIO. Processing of
12774 			 *    the transport queues is also halted to be
12775 			 *    restarted via runout or the next I/O.
12776 			 * 5) The I/O associated with a CDB size or packet
12777 			 *    size error is removed from the queue and failed
12778 			 *    with EIO. Processing of the transport queues is
12779 			 *    continued.
12780 			 *
12781 			 * Note: there is no interface for canceling a runout
12782 			 * callback. To prevent the driver from detaching or
12783 			 * suspending while a runout is pending the driver
12784 			 * state is set to SD_STATE_RWAIT
12785 			 *
12786 			 * Note: using the scsi_init_pkt callback facility can
12787 			 * result in an I/O request persisting at the head of
12788 			 * the list which cannot be satisfied even after
12789 			 * multiple retries. In the future the driver may
12790 			 * implement some kind of maximum runout count before
12791 			 * failing an I/O.
12792 			 *
12793 			 * Note: the use of funcp below may seem superfluous,
12794 			 * but it helps warlock figure out the correct
12795 			 * initpkt function calls (see [s]sd.wlcmd).
12796 			 */
12797 			struct scsi_pkt	*pktp;
12798 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
12799 
12800 			ASSERT(bp != un->un_rqs_bp);
12801 
12802 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
12803 			switch ((*funcp)(bp, &pktp)) {
12804 			case  SD_PKT_ALLOC_SUCCESS:
12805 				xp->xb_pktp = pktp;
12806 				SD_TRACE(SD_LOG_IO_CORE, un,
12807 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
12808 				    pktp);
12809 				goto got_pkt;
12810 
12811 			case SD_PKT_ALLOC_FAILURE:
12812 				/*
12813 				 * Temporary (hopefully) resource depletion.
12814 				 * Since retries and RQS commands always have a
12815 				 * scsi_pkt allocated, these cases should never
12816 				 * get here. So the only cases this needs to
12817 				 * handle is a bp from the waitq (which we put
12818 				 * back onto the waitq for sdrunout), or a bp
12819 				 * sent as an immed_bp (which we just fail).
12820 				 */
12821 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12822 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
12823 
12824 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12825 
12826 				if (bp == immed_bp) {
12827 					/*
12828 					 * If SD_XB_DMA_FREED is clear, then
12829 					 * this is a failure to allocate a
12830 					 * scsi_pkt, and we must fail the
12831 					 * command.
12832 					 */
12833 					if ((xp->xb_pkt_flags &
12834 					    SD_XB_DMA_FREED) == 0) {
12835 						break;
12836 					}
12837 
12838 					/*
12839 					 * If this immediate command is NOT our
12840 					 * un_retry_bp, then we must fail it.
12841 					 */
12842 					if (bp != un->un_retry_bp) {
12843 						break;
12844 					}
12845 
12846 					/*
12847 					 * We get here if this cmd is our
12848 					 * un_retry_bp that was DMAFREED, but
12849 					 * scsi_init_pkt() failed to reallocate
12850 					 * DMA resources when we attempted to
12851 					 * retry it. This can happen when an
12852 					 * mpxio failover is in progress, but
12853 					 * we don't want to just fail the
12854 					 * command in this case.
12855 					 *
12856 					 * Use timeout(9F) to restart it after
12857 					 * a 100ms delay.  We don't want to
12858 					 * let sdrunout() restart it, because
12859 					 * sdrunout() is just supposed to start
12860 					 * commands that are sitting on the
12861 					 * wait queue.  The un_retry_bp stays
12862 					 * set until the command completes, but
12863 					 * sdrunout can be called many times
12864 					 * before that happens.  Since sdrunout
12865 					 * cannot tell if the un_retry_bp is
12866 					 * already in the transport, it could
12867 					 * end up calling scsi_transport() for
12868 					 * the un_retry_bp multiple times.
12869 					 *
12870 					 * Also: don't schedule the callback
12871 					 * if some other callback is already
12872 					 * pending.
12873 					 */
12874 					if (un->un_retry_statp == NULL) {
12875 						/*
12876 						 * restore the kstat pointer to
12877 						 * keep kstat counts coherent
12878 						 * when we do retry the command.
12879 						 */
12880 						un->un_retry_statp =
12881 						    saved_statp;
12882 					}
12883 
12884 					if ((un->un_startstop_timeid == NULL) &&
12885 					    (un->un_retry_timeid == NULL) &&
12886 					    (un->un_direct_priority_timeid ==
12887 					    NULL)) {
12888 
12889 						un->un_retry_timeid =
12890 						    timeout(
12891 						    sd_start_retry_command,
12892 						    un, SD_RESTART_TIMEOUT);
12893 					}
12894 					goto exit;
12895 				}
12896 
12897 #else
12898 				if (bp == immed_bp) {
12899 					break;	/* Just fail the command */
12900 				}
12901 #endif
12902 
12903 				/* Add the buf back to the head of the waitq */
12904 				bp->av_forw = un->un_waitq_headp;
12905 				un->un_waitq_headp = bp;
12906 				if (un->un_waitq_tailp == NULL) {
12907 					un->un_waitq_tailp = bp;
12908 				}
12909 				goto exit;
12910 
12911 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
12912 				/*
12913 				 * HBA DMA resource failure. Fail the command
12914 				 * and continue processing of the queues.
12915 				 */
12916 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12917 				    "sd_start_cmds: "
12918 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
12919 				break;
12920 
12921 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
12922 				/*
12923 				 * Note:x86: Partial DMA mapping not supported
12924 				 * for USCSI commands, and all the needed DMA
12925 				 * resources were not allocated.
12926 				 */
12927 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12928 				    "sd_start_cmds: "
12929 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
12930 				break;
12931 
12932 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
12933 				/*
12934 				 * Note:x86: Request cannot fit into CDB based
12935 				 * on lba and len.
12936 				 */
12937 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12938 				    "sd_start_cmds: "
12939 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
12940 				break;
12941 
12942 			default:
12943 				/* Should NEVER get here! */
12944 				panic("scsi_initpkt error");
12945 				/*NOTREACHED*/
12946 			}
12947 
12948 			/*
12949 			 * Fatal error in allocating a scsi_pkt for this buf.
12950 			 * Update kstats & return the buf with an error code.
12951 			 * We must use sd_return_failed_command_no_restart() to
12952 			 * avoid a recursive call back into sd_start_cmds().
12953 			 * However this also means that we must keep processing
12954 			 * the waitq here in order to avoid stalling.
12955 			 */
12956 			if (statp == kstat_waitq_to_runq) {
12957 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
12958 			}
12959 			sd_return_failed_command_no_restart(un, bp, EIO);
12960 			if (bp == immed_bp) {
12961 				/* immed_bp is gone by now, so clear this */
12962 				immed_bp = NULL;
12963 			}
12964 			continue;
12965 		}
12966 got_pkt:
12967 		if (bp == immed_bp) {
12968 			/* goto the head of the class.... */
12969 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
12970 		}
12971 
12972 		un->un_ncmds_in_transport++;
12973 		SD_UPDATE_KSTATS(un, statp, bp);
12974 
12975 		/*
12976 		 * Call scsi_transport() to send the command to the target.
12977 		 * According to SCSA architecture, we must drop the mutex here
12978 		 * before calling scsi_transport() in order to avoid deadlock.
12979 		 * Note that the scsi_pkt's completion routine can be executed
12980 		 * (from interrupt context) even before the call to
12981 		 * scsi_transport() returns.
12982 		 */
12983 		SD_TRACE(SD_LOG_IO_CORE, un,
12984 		    "sd_start_cmds: calling scsi_transport()\n");
12985 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
12986 
12987 		mutex_exit(SD_MUTEX(un));
12988 		rval = scsi_transport(xp->xb_pktp);
12989 		mutex_enter(SD_MUTEX(un));
12990 
12991 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12992 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
12993 
12994 		switch (rval) {
12995 		case TRAN_ACCEPT:
12996 			/* Clear this with every pkt accepted by the HBA */
12997 			un->un_tran_fatal_count = 0;
12998 			break;	/* Success; try the next cmd (if any) */
12999 
13000 		case TRAN_BUSY:
13001 			un->un_ncmds_in_transport--;
13002 			ASSERT(un->un_ncmds_in_transport >= 0);
13003 
13004 			/*
13005 			 * Don't retry request sense, the sense data
13006 			 * is lost when another request is sent.
13007 			 * Free up the rqs buf and retry
13008 			 * the original failed cmd.  Update kstat.
13009 			 */
13010 			if (bp == un->un_rqs_bp) {
13011 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
13012 				bp = sd_mark_rqs_idle(un, xp);
13013 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
13014 					NULL, NULL, EIO, SD_BSY_TIMEOUT / 500,
13015 					kstat_waitq_enter);
13016 				goto exit;
13017 			}
13018 
13019 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13020 			/*
13021 			 * Free the DMA resources for the  scsi_pkt. This will
13022 			 * allow mpxio to select another path the next time
13023 			 * we call scsi_transport() with this scsi_pkt.
13024 			 * See sdintr() for the rationalization behind this.
13025 			 */
13026 			if ((un->un_f_is_fibre == TRUE) &&
13027 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
13028 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
13029 				scsi_dmafree(xp->xb_pktp);
13030 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
13031 			}
13032 #endif
13033 
13034 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
13035 				/*
13036 				 * Commands that are SD_PATH_DIRECT_PRIORITY
13037 				 * are for error recovery situations. These do
13038 				 * not use the normal command waitq, so if they
13039 				 * get a TRAN_BUSY we cannot put them back onto
13040 				 * the waitq for later retry. One possible
13041 				 * problem is that there could already be some
13042 				 * other command on un_retry_bp that is waiting
13043 				 * for this one to complete, so we would be
13044 				 * deadlocked if we put this command back onto
13045 				 * the waitq for later retry (since un_retry_bp
13046 				 * must complete before the driver gets back to
13047 				 * commands on the waitq).
13048 				 *
13049 				 * To avoid deadlock we must schedule a callback
13050 				 * that will restart this command after a set
13051 				 * interval.  This should keep retrying for as
13052 				 * long as the underlying transport keeps
13053 				 * returning TRAN_BUSY (just like for other
13054 				 * commands).  Use the same timeout interval as
13055 				 * for the ordinary TRAN_BUSY retry.
13056 				 */
13057 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13058 				    "sd_start_cmds: scsi_transport() returned "
13059 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
13060 
13061 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
13062 				un->un_direct_priority_timeid =
13063 				    timeout(sd_start_direct_priority_command,
13064 				    bp, SD_BSY_TIMEOUT / 500);
13065 
13066 				goto exit;
13067 			}
13068 
13069 			/*
13070 			 * For TRAN_BUSY, we want to reduce the throttle value,
13071 			 * unless we are retrying a command.
13072 			 */
13073 			if (bp != un->un_retry_bp) {
13074 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
13075 			}
13076 
13077 			/*
13078 			 * Set up the bp to be tried again 10 ms later.
13079 			 * Note:x86: Is there a timeout value in the sd_lun
13080 			 * for this condition?
13081 			 */
13082 			sd_set_retry_bp(un, bp, SD_BSY_TIMEOUT / 500,
13083 				kstat_runq_back_to_waitq);
13084 			goto exit;
13085 
13086 		case TRAN_FATAL_ERROR:
13087 			un->un_tran_fatal_count++;
13088 			/* FALLTHRU */
13089 
13090 		case TRAN_BADPKT:
13091 		default:
13092 			un->un_ncmds_in_transport--;
13093 			ASSERT(un->un_ncmds_in_transport >= 0);
13094 
13095 			/*
13096 			 * If this is our REQUEST SENSE command with a
13097 			 * transport error, we must get back the pointers
13098 			 * to the original buf, and mark the REQUEST
13099 			 * SENSE command as "available".
13100 			 */
13101 			if (bp == un->un_rqs_bp) {
13102 				bp = sd_mark_rqs_idle(un, xp);
13103 				xp = SD_GET_XBUF(bp);
13104 			} else {
13105 				/*
13106 				 * Legacy behavior: do not update transport
13107 				 * error count for request sense commands.
13108 				 */
13109 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
13110 			}
13111 
13112 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
13113 			sd_print_transport_rejected_message(un, xp, rval);
13114 
13115 			/*
13116 			 * We must use sd_return_failed_command_no_restart() to
13117 			 * avoid a recursive call back into sd_start_cmds().
13118 			 * However this also means that we must keep processing
13119 			 * the waitq here in order to avoid stalling.
13120 			 */
13121 			sd_return_failed_command_no_restart(un, bp, EIO);
13122 
13123 			/*
13124 			 * Notify any threads waiting in sd_ddi_suspend() that
13125 			 * a command completion has occurred.
13126 			 */
13127 			if (un->un_state == SD_STATE_SUSPENDED) {
13128 				cv_broadcast(&un->un_disk_busy_cv);
13129 			}
13130 
13131 			if (bp == immed_bp) {
13132 				/* immed_bp is gone by now, so clear this */
13133 				immed_bp = NULL;
13134 			}
13135 			break;
13136 		}
13137 
13138 	} while (immed_bp == NULL);
13139 
13140 exit:
13141 	ASSERT(mutex_owned(SD_MUTEX(un)));
13142 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
13143 }
13144 
13145 
13146 /*
13147  *    Function: sd_return_command
13148  *
13149  * Description: Returns a command to its originator (with or without an
13150  *		error).  Also starts commands waiting to be transported
13151  *		to the target.
13152  *
13153  *     Context: May be called from interrupt, kernel, or timeout context
13154  */
13155 
13156 static void
13157 sd_return_command(struct sd_lun *un, struct buf *bp)
13158 {
13159 	struct sd_xbuf *xp;
13160 #if defined(__i386) || defined(__amd64)
13161 	struct scsi_pkt *pktp;
13162 #endif
13163 
13164 	ASSERT(bp != NULL);
13165 	ASSERT(un != NULL);
13166 	ASSERT(mutex_owned(SD_MUTEX(un)));
13167 	ASSERT(bp != un->un_rqs_bp);
13168 	xp = SD_GET_XBUF(bp);
13169 	ASSERT(xp != NULL);
13170 
13171 #if defined(__i386) || defined(__amd64)
13172 	pktp = SD_GET_PKTP(bp);
13173 #endif
13174 
13175 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
13176 
13177 #if defined(__i386) || defined(__amd64)
13178 	/*
13179 	 * Note:x86: check for the "sdrestart failed" case.
13180 	 */
13181 	if (((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
13182 		(geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
13183 		(xp->xb_pktp->pkt_resid == 0)) {
13184 
13185 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
13186 			/*
13187 			 * Successfully set up next portion of cmd
13188 			 * transfer, try sending it
13189 			 */
13190 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
13191 			    NULL, NULL, 0, (clock_t)0, NULL);
13192 			sd_start_cmds(un, NULL);
13193 			return;	/* Note:x86: need a return here? */
13194 		}
13195 	}
13196 #endif
13197 
13198 	/*
13199 	 * If this is the failfast bp, clear it from un_failfast_bp. This
13200 	 * can happen if upon being re-tried the failfast bp either
13201 	 * succeeded or encountered another error (possibly even a different
13202 	 * error than the one that precipitated the failfast state, but in
13203 	 * that case it would have had to exhaust retries as well). Regardless,
13204 	 * this should not occur whenever the instance is in the active
13205 	 * failfast state.
13206 	 */
13207 	if (bp == un->un_failfast_bp) {
13208 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
13209 		un->un_failfast_bp = NULL;
13210 	}
13211 
13212 	/*
13213 	 * Clear the failfast state upon successful completion of ANY cmd.
13214 	 */
13215 	if (bp->b_error == 0) {
13216 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
13217 	}
13218 
13219 	/*
13220 	 * This is used if the command was retried one or more times. Show that
13221 	 * we are done with it, and allow processing of the waitq to resume.
13222 	 */
13223 	if (bp == un->un_retry_bp) {
13224 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13225 		    "sd_return_command: un:0x%p: "
13226 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
13227 		un->un_retry_bp = NULL;
13228 		un->un_retry_statp = NULL;
13229 	}
13230 
13231 	SD_UPDATE_RDWR_STATS(un, bp);
13232 	SD_UPDATE_PARTITION_STATS(un, bp);
13233 
13234 	switch (un->un_state) {
13235 	case SD_STATE_SUSPENDED:
13236 		/*
13237 		 * Notify any threads waiting in sd_ddi_suspend() that
13238 		 * a command completion has occurred.
13239 		 */
13240 		cv_broadcast(&un->un_disk_busy_cv);
13241 		break;
13242 	default:
13243 		sd_start_cmds(un, NULL);
13244 		break;
13245 	}
13246 
13247 	/* Return this command up the iodone chain to its originator. */
13248 	mutex_exit(SD_MUTEX(un));
13249 
13250 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
13251 	xp->xb_pktp = NULL;
13252 
13253 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
13254 
13255 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13256 	mutex_enter(SD_MUTEX(un));
13257 
13258 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
13259 }
13260 
13261 
13262 /*
13263  *    Function: sd_return_failed_command
13264  *
13265  * Description: Command completion when an error occurred.
13266  *
13267  *     Context: May be called from interrupt context
13268  */
13269 
13270 static void
13271 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
13272 {
13273 	ASSERT(bp != NULL);
13274 	ASSERT(un != NULL);
13275 	ASSERT(mutex_owned(SD_MUTEX(un)));
13276 
13277 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13278 	    "sd_return_failed_command: entry\n");
13279 
13280 	/*
13281 	 * b_resid could already be nonzero due to a partial data
13282 	 * transfer, so do not change it here.
13283 	 */
13284 	SD_BIOERROR(bp, errcode);
13285 
13286 	sd_return_command(un, bp);
13287 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13288 	    "sd_return_failed_command: exit\n");
13289 }
13290 
13291 
13292 /*
13293  *    Function: sd_return_failed_command_no_restart
13294  *
13295  * Description: Same as sd_return_failed_command, but ensures that no
13296  *		call back into sd_start_cmds will be issued.
13297  *
13298  *     Context: May be called from interrupt context
13299  */
13300 
13301 static void
13302 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
13303 	int errcode)
13304 {
13305 	struct sd_xbuf *xp;
13306 
13307 	ASSERT(bp != NULL);
13308 	ASSERT(un != NULL);
13309 	ASSERT(mutex_owned(SD_MUTEX(un)));
13310 	xp = SD_GET_XBUF(bp);
13311 	ASSERT(xp != NULL);
13312 	ASSERT(errcode != 0);
13313 
13314 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13315 	    "sd_return_failed_command_no_restart: entry\n");
13316 
13317 	/*
13318 	 * b_resid could already be nonzero due to a partial data
13319 	 * transfer, so do not change it here.
13320 	 */
13321 	SD_BIOERROR(bp, errcode);
13322 
13323 	/*
13324 	 * If this is the failfast bp, clear it. This can happen if the
13325 	 * failfast bp encounterd a fatal error when we attempted to
13326 	 * re-try it (such as a scsi_transport(9F) failure).  However
13327 	 * we should NOT be in an active failfast state if the failfast
13328 	 * bp is not NULL.
13329 	 */
13330 	if (bp == un->un_failfast_bp) {
13331 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
13332 		un->un_failfast_bp = NULL;
13333 	}
13334 
13335 	if (bp == un->un_retry_bp) {
13336 		/*
13337 		 * This command was retried one or more times. Show that we are
13338 		 * done with it, and allow processing of the waitq to resume.
13339 		 */
13340 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13341 		    "sd_return_failed_command_no_restart: "
13342 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
13343 		un->un_retry_bp = NULL;
13344 		un->un_retry_statp = NULL;
13345 	}
13346 
13347 	SD_UPDATE_RDWR_STATS(un, bp);
13348 	SD_UPDATE_PARTITION_STATS(un, bp);
13349 
13350 	mutex_exit(SD_MUTEX(un));
13351 
13352 	if (xp->xb_pktp != NULL) {
13353 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
13354 		xp->xb_pktp = NULL;
13355 	}
13356 
13357 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
13358 
13359 	mutex_enter(SD_MUTEX(un));
13360 
13361 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13362 	    "sd_return_failed_command_no_restart: exit\n");
13363 }
13364 
13365 
13366 /*
13367  *    Function: sd_retry_command
13368  *
13369  * Description: queue up a command for retry, or (optionally) fail it
13370  *		if retry counts are exhausted.
13371  *
13372  *   Arguments: un - Pointer to the sd_lun struct for the target.
13373  *
13374  *		bp - Pointer to the buf for the command to be retried.
13375  *
13376  *		retry_check_flag - Flag to see which (if any) of the retry
13377  *		   counts should be decremented/checked. If the indicated
13378  *		   retry count is exhausted, then the command will not be
13379  *		   retried; it will be failed instead. This should use a
13380  *		   value equal to one of the following:
13381  *
13382  *			SD_RETRIES_NOCHECK
13383  *			SD_RESD_RETRIES_STANDARD
13384  *			SD_RETRIES_VICTIM
13385  *
13386  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
13387  *		   if the check should be made to see of FLAG_ISOLATE is set
13388  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
13389  *		   not retried, it is simply failed.
13390  *
13391  *		user_funcp - Ptr to function to call before dispatching the
13392  *		   command. May be NULL if no action needs to be performed.
13393  *		   (Primarily intended for printing messages.)
13394  *
13395  *		user_arg - Optional argument to be passed along to
13396  *		   the user_funcp call.
13397  *
13398  *		failure_code - errno return code to set in the bp if the
13399  *		   command is going to be failed.
13400  *
13401  *		retry_delay - Retry delay interval in (clock_t) units. May
13402  *		   be zero which indicates that the retry should be retried
13403  *		   immediately (ie, without an intervening delay).
13404  *
13405  *		statp - Ptr to kstat function to be updated if the command
13406  *		   is queued for a delayed retry. May be NULL if no kstat
13407  *		   update is desired.
13408  *
13409  *     Context: May be called from interupt context.
13410  */
13411 
13412 static void
13413 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
13414 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
13415 	code), void *user_arg, int failure_code,  clock_t retry_delay,
13416 	void (*statp)(kstat_io_t *))
13417 {
13418 	struct sd_xbuf	*xp;
13419 	struct scsi_pkt	*pktp;
13420 
13421 	ASSERT(un != NULL);
13422 	ASSERT(mutex_owned(SD_MUTEX(un)));
13423 	ASSERT(bp != NULL);
13424 	xp = SD_GET_XBUF(bp);
13425 	ASSERT(xp != NULL);
13426 	pktp = SD_GET_PKTP(bp);
13427 	ASSERT(pktp != NULL);
13428 
13429 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
13430 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
13431 
13432 	/*
13433 	 * If we are syncing or dumping, fail the command to avoid
13434 	 * recursively calling back into scsi_transport().
13435 	 */
13436 	if (ddi_in_panic()) {
13437 		goto fail_command_no_log;
13438 	}
13439 
13440 	/*
13441 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
13442 	 * log an error and fail the command.
13443 	 */
13444 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
13445 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
13446 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
13447 		sd_dump_memory(un, SD_LOG_IO, "CDB",
13448 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
13449 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
13450 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
13451 		goto fail_command;
13452 	}
13453 
13454 	/*
13455 	 * If we are suspended, then put the command onto head of the
13456 	 * wait queue since we don't want to start more commands.
13457 	 */
13458 	switch (un->un_state) {
13459 	case SD_STATE_SUSPENDED:
13460 	case SD_STATE_DUMPING:
13461 		bp->av_forw = un->un_waitq_headp;
13462 		un->un_waitq_headp = bp;
13463 		if (un->un_waitq_tailp == NULL) {
13464 			un->un_waitq_tailp = bp;
13465 		}
13466 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
13467 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
13468 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
13469 		return;
13470 	default:
13471 		break;
13472 	}
13473 
13474 	/*
13475 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
13476 	 * is set; if it is then we do not want to retry the command.
13477 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
13478 	 */
13479 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
13480 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
13481 			goto fail_command;
13482 		}
13483 	}
13484 
13485 
13486 	/*
13487 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
13488 	 * command timeout or a selection timeout has occurred. This means
13489 	 * that we were unable to establish an kind of communication with
13490 	 * the target, and subsequent retries and/or commands are likely
13491 	 * to encounter similar results and take a long time to complete.
13492 	 *
13493 	 * If this is a failfast error condition, we need to update the
13494 	 * failfast state, even if this bp does not have B_FAILFAST set.
13495 	 */
13496 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
13497 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
13498 			ASSERT(un->un_failfast_bp == NULL);
13499 			/*
13500 			 * If we are already in the active failfast state, and
13501 			 * another failfast error condition has been detected,
13502 			 * then fail this command if it has B_FAILFAST set.
13503 			 * If B_FAILFAST is clear, then maintain the legacy
13504 			 * behavior of retrying heroically, even tho this will
13505 			 * take a lot more time to fail the command.
13506 			 */
13507 			if (bp->b_flags & B_FAILFAST) {
13508 				goto fail_command;
13509 			}
13510 		} else {
13511 			/*
13512 			 * We're not in the active failfast state, but we
13513 			 * have a failfast error condition, so we must begin
13514 			 * transition to the next state. We do this regardless
13515 			 * of whether or not this bp has B_FAILFAST set.
13516 			 */
13517 			if (un->un_failfast_bp == NULL) {
13518 				/*
13519 				 * This is the first bp to meet a failfast
13520 				 * condition so save it on un_failfast_bp &
13521 				 * do normal retry processing. Do not enter
13522 				 * active failfast state yet. This marks
13523 				 * entry into the "failfast pending" state.
13524 				 */
13525 				un->un_failfast_bp = bp;
13526 
13527 			} else if (un->un_failfast_bp == bp) {
13528 				/*
13529 				 * This is the second time *this* bp has
13530 				 * encountered a failfast error condition,
13531 				 * so enter active failfast state & flush
13532 				 * queues as appropriate.
13533 				 */
13534 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
13535 				un->un_failfast_bp = NULL;
13536 				sd_failfast_flushq(un);
13537 
13538 				/*
13539 				 * Fail this bp now if B_FAILFAST set;
13540 				 * otherwise continue with retries. (It would
13541 				 * be pretty ironic if this bp succeeded on a
13542 				 * subsequent retry after we just flushed all
13543 				 * the queues).
13544 				 */
13545 				if (bp->b_flags & B_FAILFAST) {
13546 					goto fail_command;
13547 				}
13548 
13549 #if !defined(lint) && !defined(__lint)
13550 			} else {
13551 				/*
13552 				 * If neither of the preceeding conditionals
13553 				 * was true, it means that there is some
13554 				 * *other* bp that has met an inital failfast
13555 				 * condition and is currently either being
13556 				 * retried or is waiting to be retried. In
13557 				 * that case we should perform normal retry
13558 				 * processing on *this* bp, since there is a
13559 				 * chance that the current failfast condition
13560 				 * is transient and recoverable. If that does
13561 				 * not turn out to be the case, then retries
13562 				 * will be cleared when the wait queue is
13563 				 * flushed anyway.
13564 				 */
13565 #endif
13566 			}
13567 		}
13568 	} else {
13569 		/*
13570 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
13571 		 * likely were able to at least establish some level of
13572 		 * communication with the target and subsequent commands
13573 		 * and/or retries are likely to get through to the target,
13574 		 * In this case we want to be aggressive about clearing
13575 		 * the failfast state. Note that this does not affect
13576 		 * the "failfast pending" condition.
13577 		 */
13578 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
13579 	}
13580 
13581 
13582 	/*
13583 	 * Check the specified retry count to see if we can still do
13584 	 * any retries with this pkt before we should fail it.
13585 	 */
13586 	switch (retry_check_flag & SD_RETRIES_MASK) {
13587 	case SD_RETRIES_VICTIM:
13588 		/*
13589 		 * Check the victim retry count. If exhausted, then fall
13590 		 * thru & check against the standard retry count.
13591 		 */
13592 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
13593 			/* Increment count & proceed with the retry */
13594 			xp->xb_victim_retry_count++;
13595 			break;
13596 		}
13597 		/* Victim retries exhausted, fall back to std. retries... */
13598 		/* FALLTHRU */
13599 
13600 	case SD_RETRIES_STANDARD:
13601 		if (xp->xb_retry_count >= un->un_retry_count) {
13602 			/* Retries exhausted, fail the command */
13603 			SD_TRACE(SD_LOG_IO_CORE, un,
13604 			    "sd_retry_command: retries exhausted!\n");
13605 			/*
13606 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
13607 			 * commands with nonzero pkt_resid.
13608 			 */
13609 			if ((pktp->pkt_reason == CMD_CMPLT) &&
13610 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
13611 			    (pktp->pkt_resid != 0)) {
13612 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
13613 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
13614 					SD_UPDATE_B_RESID(bp, pktp);
13615 				}
13616 			}
13617 			goto fail_command;
13618 		}
13619 		xp->xb_retry_count++;
13620 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13621 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
13622 		break;
13623 
13624 	case SD_RETRIES_UA:
13625 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
13626 			/* Retries exhausted, fail the command */
13627 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13628 			    "Unit Attention retries exhausted. "
13629 			    "Check the target.\n");
13630 			goto fail_command;
13631 		}
13632 		xp->xb_ua_retry_count++;
13633 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13634 		    "sd_retry_command: retry count:%d\n",
13635 			xp->xb_ua_retry_count);
13636 		break;
13637 
13638 	case SD_RETRIES_BUSY:
13639 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
13640 			/* Retries exhausted, fail the command */
13641 			SD_TRACE(SD_LOG_IO_CORE, un,
13642 			    "sd_retry_command: retries exhausted!\n");
13643 			goto fail_command;
13644 		}
13645 		xp->xb_retry_count++;
13646 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13647 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
13648 		break;
13649 
13650 	case SD_RETRIES_NOCHECK:
13651 	default:
13652 		/* No retry count to check. Just proceed with the retry */
13653 		break;
13654 	}
13655 
13656 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
13657 
13658 	/*
13659 	 * If we were given a zero timeout, we must attempt to retry the
13660 	 * command immediately (ie, without a delay).
13661 	 */
13662 	if (retry_delay == 0) {
13663 		/*
13664 		 * Check some limiting conditions to see if we can actually
13665 		 * do the immediate retry.  If we cannot, then we must
13666 		 * fall back to queueing up a delayed retry.
13667 		 */
13668 		if (un->un_ncmds_in_transport >= un->un_throttle) {
13669 			/*
13670 			 * We are at the throttle limit for the target,
13671 			 * fall back to delayed retry.
13672 			 */
13673 			retry_delay = SD_BSY_TIMEOUT;
13674 			statp = kstat_waitq_enter;
13675 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13676 			    "sd_retry_command: immed. retry hit "
13677 			    "throttle!\n");
13678 		} else {
13679 			/*
13680 			 * We're clear to proceed with the immediate retry.
13681 			 * First call the user-provided function (if any)
13682 			 */
13683 			if (user_funcp != NULL) {
13684 				(*user_funcp)(un, bp, user_arg,
13685 				    SD_IMMEDIATE_RETRY_ISSUED);
13686 #ifdef __lock_lint
13687 				sd_print_incomplete_msg(un, bp, user_arg,
13688 				    SD_IMMEDIATE_RETRY_ISSUED);
13689 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
13690 				    SD_IMMEDIATE_RETRY_ISSUED);
13691 				sd_print_sense_failed_msg(un, bp, user_arg,
13692 				    SD_IMMEDIATE_RETRY_ISSUED);
13693 #endif
13694 			}
13695 
13696 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13697 			    "sd_retry_command: issuing immediate retry\n");
13698 
13699 			/*
13700 			 * Call sd_start_cmds() to transport the command to
13701 			 * the target.
13702 			 */
13703 			sd_start_cmds(un, bp);
13704 
13705 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13706 			    "sd_retry_command exit\n");
13707 			return;
13708 		}
13709 	}
13710 
13711 	/*
13712 	 * Set up to retry the command after a delay.
13713 	 * First call the user-provided function (if any)
13714 	 */
13715 	if (user_funcp != NULL) {
13716 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
13717 	}
13718 
13719 	sd_set_retry_bp(un, bp, retry_delay, statp);
13720 
13721 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
13722 	return;
13723 
13724 fail_command:
13725 
13726 	if (user_funcp != NULL) {
13727 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
13728 	}
13729 
13730 fail_command_no_log:
13731 
13732 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13733 	    "sd_retry_command: returning failed command\n");
13734 
13735 	sd_return_failed_command(un, bp, failure_code);
13736 
13737 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
13738 }
13739 
13740 
13741 /*
13742  *    Function: sd_set_retry_bp
13743  *
13744  * Description: Set up the given bp for retry.
13745  *
13746  *   Arguments: un - ptr to associated softstate
13747  *		bp - ptr to buf(9S) for the command
13748  *		retry_delay - time interval before issuing retry (may be 0)
13749  *		statp - optional pointer to kstat function
13750  *
13751  *     Context: May be called under interrupt context
13752  */
13753 
13754 static void
13755 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
13756 	void (*statp)(kstat_io_t *))
13757 {
13758 	ASSERT(un != NULL);
13759 	ASSERT(mutex_owned(SD_MUTEX(un)));
13760 	ASSERT(bp != NULL);
13761 
13762 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
13763 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
13764 
13765 	/*
13766 	 * Indicate that the command is being retried. This will not allow any
13767 	 * other commands on the wait queue to be transported to the target
13768 	 * until this command has been completed (success or failure). The
13769 	 * "retry command" is not transported to the target until the given
13770 	 * time delay expires, unless the user specified a 0 retry_delay.
13771 	 *
13772 	 * Note: the timeout(9F) callback routine is what actually calls
13773 	 * sd_start_cmds() to transport the command, with the exception of a
13774 	 * zero retry_delay. The only current implementor of a zero retry delay
13775 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
13776 	 */
13777 	if (un->un_retry_bp == NULL) {
13778 		ASSERT(un->un_retry_statp == NULL);
13779 		un->un_retry_bp = bp;
13780 
13781 		/*
13782 		 * If the user has not specified a delay the command should
13783 		 * be queued and no timeout should be scheduled.
13784 		 */
13785 		if (retry_delay == 0) {
13786 			/*
13787 			 * Save the kstat pointer that will be used in the
13788 			 * call to SD_UPDATE_KSTATS() below, so that
13789 			 * sd_start_cmds() can correctly decrement the waitq
13790 			 * count when it is time to transport this command.
13791 			 */
13792 			un->un_retry_statp = statp;
13793 			goto done;
13794 		}
13795 	}
13796 
13797 	if (un->un_retry_bp == bp) {
13798 		/*
13799 		 * Save the kstat pointer that will be used in the call to
13800 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
13801 		 * correctly decrement the waitq count when it is time to
13802 		 * transport this command.
13803 		 */
13804 		un->un_retry_statp = statp;
13805 
13806 		/*
13807 		 * Schedule a timeout if:
13808 		 *   1) The user has specified a delay.
13809 		 *   2) There is not a START_STOP_UNIT callback pending.
13810 		 *
13811 		 * If no delay has been specified, then it is up to the caller
13812 		 * to ensure that IO processing continues without stalling.
13813 		 * Effectively, this means that the caller will issue the
13814 		 * required call to sd_start_cmds(). The START_STOP_UNIT
13815 		 * callback does this after the START STOP UNIT command has
13816 		 * completed. In either of these cases we should not schedule
13817 		 * a timeout callback here.  Also don't schedule the timeout if
13818 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
13819 		 */
13820 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
13821 		    (un->un_direct_priority_timeid == NULL)) {
13822 			un->un_retry_timeid =
13823 			    timeout(sd_start_retry_command, un, retry_delay);
13824 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13825 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
13826 			    " bp:0x%p un_retry_timeid:0x%p\n",
13827 			    un, bp, un->un_retry_timeid);
13828 		}
13829 	} else {
13830 		/*
13831 		 * We only get in here if there is already another command
13832 		 * waiting to be retried.  In this case, we just put the
13833 		 * given command onto the wait queue, so it can be transported
13834 		 * after the current retry command has completed.
13835 		 *
13836 		 * Also we have to make sure that if the command at the head
13837 		 * of the wait queue is the un_failfast_bp, that we do not
13838 		 * put ahead of it any other commands that are to be retried.
13839 		 */
13840 		if ((un->un_failfast_bp != NULL) &&
13841 		    (un->un_failfast_bp == un->un_waitq_headp)) {
13842 			/*
13843 			 * Enqueue this command AFTER the first command on
13844 			 * the wait queue (which is also un_failfast_bp).
13845 			 */
13846 			bp->av_forw = un->un_waitq_headp->av_forw;
13847 			un->un_waitq_headp->av_forw = bp;
13848 			if (un->un_waitq_headp == un->un_waitq_tailp) {
13849 				un->un_waitq_tailp = bp;
13850 			}
13851 		} else {
13852 			/* Enqueue this command at the head of the waitq. */
13853 			bp->av_forw = un->un_waitq_headp;
13854 			un->un_waitq_headp = bp;
13855 			if (un->un_waitq_tailp == NULL) {
13856 				un->un_waitq_tailp = bp;
13857 			}
13858 		}
13859 
13860 		if (statp == NULL) {
13861 			statp = kstat_waitq_enter;
13862 		}
13863 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13864 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
13865 	}
13866 
13867 done:
13868 	if (statp != NULL) {
13869 		SD_UPDATE_KSTATS(un, statp, bp);
13870 	}
13871 
13872 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13873 	    "sd_set_retry_bp: exit un:0x%p\n", un);
13874 }
13875 
13876 
13877 /*
13878  *    Function: sd_start_retry_command
13879  *
13880  * Description: Start the command that has been waiting on the target's
13881  *		retry queue.  Called from timeout(9F) context after the
13882  *		retry delay interval has expired.
13883  *
13884  *   Arguments: arg - pointer to associated softstate for the device.
13885  *
13886  *     Context: timeout(9F) thread context.  May not sleep.
13887  */
13888 
13889 static void
13890 sd_start_retry_command(void *arg)
13891 {
13892 	struct sd_lun *un = arg;
13893 
13894 	ASSERT(un != NULL);
13895 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13896 
13897 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13898 	    "sd_start_retry_command: entry\n");
13899 
13900 	mutex_enter(SD_MUTEX(un));
13901 
13902 	un->un_retry_timeid = NULL;
13903 
13904 	if (un->un_retry_bp != NULL) {
13905 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13906 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
13907 		    un, un->un_retry_bp);
13908 		sd_start_cmds(un, un->un_retry_bp);
13909 	}
13910 
13911 	mutex_exit(SD_MUTEX(un));
13912 
13913 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13914 	    "sd_start_retry_command: exit\n");
13915 }
13916 
13917 
13918 /*
13919  *    Function: sd_start_direct_priority_command
13920  *
13921  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
13922  *		received TRAN_BUSY when we called scsi_transport() to send it
13923  *		to the underlying HBA. This function is called from timeout(9F)
13924  *		context after the delay interval has expired.
13925  *
13926  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
13927  *
13928  *     Context: timeout(9F) thread context.  May not sleep.
13929  */
13930 
13931 static void
13932 sd_start_direct_priority_command(void *arg)
13933 {
13934 	struct buf	*priority_bp = arg;
13935 	struct sd_lun	*un;
13936 
13937 	ASSERT(priority_bp != NULL);
13938 	un = SD_GET_UN(priority_bp);
13939 	ASSERT(un != NULL);
13940 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13941 
13942 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13943 	    "sd_start_direct_priority_command: entry\n");
13944 
13945 	mutex_enter(SD_MUTEX(un));
13946 	un->un_direct_priority_timeid = NULL;
13947 	sd_start_cmds(un, priority_bp);
13948 	mutex_exit(SD_MUTEX(un));
13949 
13950 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13951 	    "sd_start_direct_priority_command: exit\n");
13952 }
13953 
13954 
13955 /*
13956  *    Function: sd_send_request_sense_command
13957  *
13958  * Description: Sends a REQUEST SENSE command to the target
13959  *
13960  *     Context: May be called from interrupt context.
13961  */
13962 
13963 static void
13964 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
13965 	struct scsi_pkt *pktp)
13966 {
13967 	ASSERT(bp != NULL);
13968 	ASSERT(un != NULL);
13969 	ASSERT(mutex_owned(SD_MUTEX(un)));
13970 
13971 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
13972 	    "entry: buf:0x%p\n", bp);
13973 
13974 	/*
13975 	 * If we are syncing or dumping, then fail the command to avoid a
13976 	 * recursive callback into scsi_transport(). Also fail the command
13977 	 * if we are suspended (legacy behavior).
13978 	 */
13979 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
13980 	    (un->un_state == SD_STATE_DUMPING)) {
13981 		sd_return_failed_command(un, bp, EIO);
13982 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13983 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
13984 		return;
13985 	}
13986 
13987 	/*
13988 	 * Retry the failed command and don't issue the request sense if:
13989 	 *    1) the sense buf is busy
13990 	 *    2) we have 1 or more outstanding commands on the target
13991 	 *    (the sense data will be cleared or invalidated any way)
13992 	 *
13993 	 * Note: There could be an issue with not checking a retry limit here,
13994 	 * the problem is determining which retry limit to check.
13995 	 */
13996 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
13997 		/* Don't retry if the command is flagged as non-retryable */
13998 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
13999 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
14000 			    NULL, NULL, 0, SD_BSY_TIMEOUT, kstat_waitq_enter);
14001 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14002 			    "sd_send_request_sense_command: "
14003 			    "at full throttle, retrying exit\n");
14004 		} else {
14005 			sd_return_failed_command(un, bp, EIO);
14006 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14007 			    "sd_send_request_sense_command: "
14008 			    "at full throttle, non-retryable exit\n");
14009 		}
14010 		return;
14011 	}
14012 
14013 	sd_mark_rqs_busy(un, bp);
14014 	sd_start_cmds(un, un->un_rqs_bp);
14015 
14016 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14017 	    "sd_send_request_sense_command: exit\n");
14018 }
14019 
14020 
14021 /*
14022  *    Function: sd_mark_rqs_busy
14023  *
14024  * Description: Indicate that the request sense bp for this instance is
14025  *		in use.
14026  *
14027  *     Context: May be called under interrupt context
14028  */
14029 
14030 static void
14031 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
14032 {
14033 	struct sd_xbuf	*sense_xp;
14034 
14035 	ASSERT(un != NULL);
14036 	ASSERT(bp != NULL);
14037 	ASSERT(mutex_owned(SD_MUTEX(un)));
14038 	ASSERT(un->un_sense_isbusy == 0);
14039 
14040 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
14041 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
14042 
14043 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
14044 	ASSERT(sense_xp != NULL);
14045 
14046 	SD_INFO(SD_LOG_IO, un,
14047 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
14048 
14049 	ASSERT(sense_xp->xb_pktp != NULL);
14050 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
14051 	    == (FLAG_SENSING | FLAG_HEAD));
14052 
14053 	un->un_sense_isbusy = 1;
14054 	un->un_rqs_bp->b_resid = 0;
14055 	sense_xp->xb_pktp->pkt_resid  = 0;
14056 	sense_xp->xb_pktp->pkt_reason = 0;
14057 
14058 	/* So we can get back the bp at interrupt time! */
14059 	sense_xp->xb_sense_bp = bp;
14060 
14061 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
14062 
14063 	/*
14064 	 * Mark this buf as awaiting sense data. (This is already set in
14065 	 * the pkt_flags for the RQS packet.)
14066 	 */
14067 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
14068 
14069 	sense_xp->xb_retry_count	= 0;
14070 	sense_xp->xb_victim_retry_count = 0;
14071 	sense_xp->xb_ua_retry_count	= 0;
14072 	sense_xp->xb_dma_resid  = 0;
14073 
14074 	/* Clean up the fields for auto-request sense */
14075 	sense_xp->xb_sense_status = 0;
14076 	sense_xp->xb_sense_state  = 0;
14077 	sense_xp->xb_sense_resid  = 0;
14078 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
14079 
14080 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
14081 }
14082 
14083 
14084 /*
14085  *    Function: sd_mark_rqs_idle
14086  *
14087  * Description: SD_MUTEX must be held continuously through this routine
14088  *		to prevent reuse of the rqs struct before the caller can
14089  *		complete it's processing.
14090  *
14091  * Return Code: Pointer to the RQS buf
14092  *
14093  *     Context: May be called under interrupt context
14094  */
14095 
14096 static struct buf *
14097 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
14098 {
14099 	struct buf *bp;
14100 	ASSERT(un != NULL);
14101 	ASSERT(sense_xp != NULL);
14102 	ASSERT(mutex_owned(SD_MUTEX(un)));
14103 	ASSERT(un->un_sense_isbusy != 0);
14104 
14105 	un->un_sense_isbusy = 0;
14106 	bp = sense_xp->xb_sense_bp;
14107 	sense_xp->xb_sense_bp = NULL;
14108 
14109 	/* This pkt is no longer interested in getting sense data */
14110 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
14111 
14112 	return (bp);
14113 }
14114 
14115 
14116 
14117 /*
14118  *    Function: sd_alloc_rqs
14119  *
14120  * Description: Set up the unit to receive auto request sense data
14121  *
14122  * Return Code: DDI_SUCCESS or DDI_FAILURE
14123  *
14124  *     Context: Called under attach(9E) context
14125  */
14126 
14127 static int
14128 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
14129 {
14130 	struct sd_xbuf *xp;
14131 
14132 	ASSERT(un != NULL);
14133 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14134 	ASSERT(un->un_rqs_bp == NULL);
14135 	ASSERT(un->un_rqs_pktp == NULL);
14136 
14137 	/*
14138 	 * First allocate the required buf and scsi_pkt structs, then set up
14139 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
14140 	 */
14141 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
14142 	    SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
14143 	if (un->un_rqs_bp == NULL) {
14144 		return (DDI_FAILURE);
14145 	}
14146 
14147 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
14148 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
14149 
14150 	if (un->un_rqs_pktp == NULL) {
14151 		sd_free_rqs(un);
14152 		return (DDI_FAILURE);
14153 	}
14154 
14155 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
14156 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
14157 	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
14158 
14159 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
14160 
14161 	/* Set up the other needed members in the ARQ scsi_pkt. */
14162 	un->un_rqs_pktp->pkt_comp   = sdintr;
14163 	un->un_rqs_pktp->pkt_time   = sd_io_time;
14164 	un->un_rqs_pktp->pkt_flags |=
14165 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
14166 
14167 	/*
14168 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
14169 	 * provide any intpkt, destroypkt routines as we take care of
14170 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
14171 	 */
14172 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14173 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
14174 	xp->xb_pktp = un->un_rqs_pktp;
14175 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
14176 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
14177 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
14178 
14179 	/*
14180 	 * Save the pointer to the request sense private bp so it can
14181 	 * be retrieved in sdintr.
14182 	 */
14183 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
14184 	ASSERT(un->un_rqs_bp->b_private == xp);
14185 
14186 	/*
14187 	 * See if the HBA supports auto-request sense for the specified
14188 	 * target/lun. If it does, then try to enable it (if not already
14189 	 * enabled).
14190 	 *
14191 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
14192 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
14193 	 * return success.  However, in both of these cases ARQ is always
14194 	 * enabled and scsi_ifgetcap will always return true. The best approach
14195 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
14196 	 *
14197 	 * The 3rd case is the HBA (adp) always return enabled on
14198 	 * scsi_ifgetgetcap even when it's not enable, the best approach
14199 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
14200 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
14201 	 */
14202 
14203 	if (un->un_f_is_fibre == TRUE) {
14204 		un->un_f_arq_enabled = TRUE;
14205 	} else {
14206 #if defined(__i386) || defined(__amd64)
14207 		/*
14208 		 * Circumvent the Adaptec bug, remove this code when
14209 		 * the bug is fixed
14210 		 */
14211 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
14212 #endif
14213 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
14214 		case 0:
14215 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
14216 				"sd_alloc_rqs: HBA supports ARQ\n");
14217 			/*
14218 			 * ARQ is supported by this HBA but currently is not
14219 			 * enabled. Attempt to enable it and if successful then
14220 			 * mark this instance as ARQ enabled.
14221 			 */
14222 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
14223 				== 1) {
14224 				/* Successfully enabled ARQ in the HBA */
14225 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
14226 					"sd_alloc_rqs: ARQ enabled\n");
14227 				un->un_f_arq_enabled = TRUE;
14228 			} else {
14229 				/* Could not enable ARQ in the HBA */
14230 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
14231 				"sd_alloc_rqs: failed ARQ enable\n");
14232 				un->un_f_arq_enabled = FALSE;
14233 			}
14234 			break;
14235 		case 1:
14236 			/*
14237 			 * ARQ is supported by this HBA and is already enabled.
14238 			 * Just mark ARQ as enabled for this instance.
14239 			 */
14240 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
14241 				"sd_alloc_rqs: ARQ already enabled\n");
14242 			un->un_f_arq_enabled = TRUE;
14243 			break;
14244 		default:
14245 			/*
14246 			 * ARQ is not supported by this HBA; disable it for this
14247 			 * instance.
14248 			 */
14249 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
14250 				"sd_alloc_rqs: HBA does not support ARQ\n");
14251 			un->un_f_arq_enabled = FALSE;
14252 			break;
14253 		}
14254 	}
14255 
14256 	return (DDI_SUCCESS);
14257 }
14258 
14259 
14260 /*
14261  *    Function: sd_free_rqs
14262  *
14263  * Description: Cleanup for the pre-instance RQS command.
14264  *
14265  *     Context: Kernel thread context
14266  */
14267 
14268 static void
14269 sd_free_rqs(struct sd_lun *un)
14270 {
14271 	ASSERT(un != NULL);
14272 
14273 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
14274 
14275 	/*
14276 	 * If consistent memory is bound to a scsi_pkt, the pkt
14277 	 * has to be destroyed *before* freeing the consistent memory.
14278 	 * Don't change the sequence of this operations.
14279 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
14280 	 * after it was freed in scsi_free_consistent_buf().
14281 	 */
14282 	if (un->un_rqs_pktp != NULL) {
14283 		scsi_destroy_pkt(un->un_rqs_pktp);
14284 		un->un_rqs_pktp = NULL;
14285 	}
14286 
14287 	if (un->un_rqs_bp != NULL) {
14288 		kmem_free(SD_GET_XBUF(un->un_rqs_bp), sizeof (struct sd_xbuf));
14289 		scsi_free_consistent_buf(un->un_rqs_bp);
14290 		un->un_rqs_bp = NULL;
14291 	}
14292 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
14293 }
14294 
14295 
14296 
14297 /*
14298  *    Function: sd_reduce_throttle
14299  *
14300  * Description: Reduces the maximun # of outstanding commands on a
14301  *		target to the current number of outstanding commands.
14302  *		Queues a tiemout(9F) callback to restore the limit
14303  *		after a specified interval has elapsed.
14304  *		Typically used when we get a TRAN_BUSY return code
14305  *		back from scsi_transport().
14306  *
14307  *   Arguments: un - ptr to the sd_lun softstate struct
14308  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
14309  *
14310  *     Context: May be called from interrupt context
14311  */
14312 
14313 static void
14314 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
14315 {
14316 	ASSERT(un != NULL);
14317 	ASSERT(mutex_owned(SD_MUTEX(un)));
14318 	ASSERT(un->un_ncmds_in_transport >= 0);
14319 
14320 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
14321 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
14322 	    un, un->un_throttle, un->un_ncmds_in_transport);
14323 
14324 	if (un->un_throttle > 1) {
14325 		if (un->un_f_use_adaptive_throttle == TRUE) {
14326 			switch (throttle_type) {
14327 			case SD_THROTTLE_TRAN_BUSY:
14328 				if (un->un_busy_throttle == 0) {
14329 					un->un_busy_throttle = un->un_throttle;
14330 				}
14331 				break;
14332 			case SD_THROTTLE_QFULL:
14333 				un->un_busy_throttle = 0;
14334 				break;
14335 			default:
14336 				ASSERT(FALSE);
14337 			}
14338 
14339 			if (un->un_ncmds_in_transport > 0) {
14340 			    un->un_throttle = un->un_ncmds_in_transport;
14341 			}
14342 
14343 		} else {
14344 			if (un->un_ncmds_in_transport == 0) {
14345 				un->un_throttle = 1;
14346 			} else {
14347 				un->un_throttle = un->un_ncmds_in_transport;
14348 			}
14349 		}
14350 	}
14351 
14352 	/* Reschedule the timeout if none is currently active */
14353 	if (un->un_reset_throttle_timeid == NULL) {
14354 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
14355 		    un, SD_THROTTLE_RESET_INTERVAL);
14356 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14357 		    "sd_reduce_throttle: timeout scheduled!\n");
14358 	}
14359 
14360 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
14361 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
14362 }
14363 
14364 
14365 
14366 /*
14367  *    Function: sd_restore_throttle
14368  *
14369  * Description: Callback function for timeout(9F).  Resets the current
14370  *		value of un->un_throttle to its default.
14371  *
14372  *   Arguments: arg - pointer to associated softstate for the device.
14373  *
14374  *     Context: May be called from interrupt context
14375  */
14376 
14377 static void
14378 sd_restore_throttle(void *arg)
14379 {
14380 	struct sd_lun	*un = arg;
14381 
14382 	ASSERT(un != NULL);
14383 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14384 
14385 	mutex_enter(SD_MUTEX(un));
14386 
14387 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
14388 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
14389 
14390 	un->un_reset_throttle_timeid = NULL;
14391 
14392 	if (un->un_f_use_adaptive_throttle == TRUE) {
14393 		/*
14394 		 * If un_busy_throttle is nonzero, then it contains the
14395 		 * value that un_throttle was when we got a TRAN_BUSY back
14396 		 * from scsi_transport(). We want to revert back to this
14397 		 * value.
14398 		 *
14399 		 * In the QFULL case, the throttle limit will incrementally
14400 		 * increase until it reaches max throttle.
14401 		 */
14402 		if (un->un_busy_throttle > 0) {
14403 			un->un_throttle = un->un_busy_throttle;
14404 			un->un_busy_throttle = 0;
14405 		} else {
14406 			/*
14407 			 * increase throttle by 10% open gate slowly, schedule
14408 			 * another restore if saved throttle has not been
14409 			 * reached
14410 			 */
14411 			short throttle;
14412 			if (sd_qfull_throttle_enable) {
14413 				throttle = un->un_throttle +
14414 				    max((un->un_throttle / 10), 1);
14415 				un->un_throttle =
14416 				    (throttle < un->un_saved_throttle) ?
14417 				    throttle : un->un_saved_throttle;
14418 				if (un->un_throttle < un->un_saved_throttle) {
14419 				    un->un_reset_throttle_timeid =
14420 					timeout(sd_restore_throttle,
14421 					un, SD_QFULL_THROTTLE_RESET_INTERVAL);
14422 				}
14423 			}
14424 		}
14425 
14426 		/*
14427 		 * If un_throttle has fallen below the low-water mark, we
14428 		 * restore the maximum value here (and allow it to ratchet
14429 		 * down again if necessary).
14430 		 */
14431 		if (un->un_throttle < un->un_min_throttle) {
14432 			un->un_throttle = un->un_saved_throttle;
14433 		}
14434 	} else {
14435 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
14436 		    "restoring limit from 0x%x to 0x%x\n",
14437 		    un->un_throttle, un->un_saved_throttle);
14438 		un->un_throttle = un->un_saved_throttle;
14439 	}
14440 
14441 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14442 	    "sd_restore_throttle: calling sd_start_cmds!\n");
14443 
14444 	sd_start_cmds(un, NULL);
14445 
14446 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14447 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
14448 	    un, un->un_throttle);
14449 
14450 	mutex_exit(SD_MUTEX(un));
14451 
14452 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
14453 }
14454 
14455 /*
14456  *    Function: sdrunout
14457  *
14458  * Description: Callback routine for scsi_init_pkt when a resource allocation
14459  *		fails.
14460  *
14461  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
14462  *		soft state instance.
14463  *
14464  * Return Code: The scsi_init_pkt routine allows for the callback function to
14465  *		return a 0 indicating the callback should be rescheduled or a 1
14466  *		indicating not to reschedule. This routine always returns 1
14467  *		because the driver always provides a callback function to
14468  *		scsi_init_pkt. This results in a callback always being scheduled
14469  *		(via the scsi_init_pkt callback implementation) if a resource
14470  *		failure occurs.
14471  *
14472  *     Context: This callback function may not block or call routines that block
14473  *
14474  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
14475  *		request persisting at the head of the list which cannot be
14476  *		satisfied even after multiple retries. In the future the driver
14477  *		may implement some time of maximum runout count before failing
14478  *		an I/O.
14479  */
14480 
14481 static int
14482 sdrunout(caddr_t arg)
14483 {
14484 	struct sd_lun	*un = (struct sd_lun *)arg;
14485 
14486 	ASSERT(un != NULL);
14487 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14488 
14489 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
14490 
14491 	mutex_enter(SD_MUTEX(un));
14492 	sd_start_cmds(un, NULL);
14493 	mutex_exit(SD_MUTEX(un));
14494 	/*
14495 	 * This callback routine always returns 1 (i.e. do not reschedule)
14496 	 * because we always specify sdrunout as the callback handler for
14497 	 * scsi_init_pkt inside the call to sd_start_cmds.
14498 	 */
14499 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
14500 	return (1);
14501 }
14502 
14503 
14504 /*
14505  *    Function: sdintr
14506  *
14507  * Description: Completion callback routine for scsi_pkt(9S) structs
14508  *		sent to the HBA driver via scsi_transport(9F).
14509  *
14510  *     Context: Interrupt context
14511  */
14512 
14513 static void
14514 sdintr(struct scsi_pkt *pktp)
14515 {
14516 	struct buf	*bp;
14517 	struct sd_xbuf	*xp;
14518 	struct sd_lun	*un;
14519 
14520 	ASSERT(pktp != NULL);
14521 	bp = (struct buf *)pktp->pkt_private;
14522 	ASSERT(bp != NULL);
14523 	xp = SD_GET_XBUF(bp);
14524 	ASSERT(xp != NULL);
14525 	ASSERT(xp->xb_pktp != NULL);
14526 	un = SD_GET_UN(bp);
14527 	ASSERT(un != NULL);
14528 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14529 
14530 #ifdef SD_FAULT_INJECTION
14531 
14532 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
14533 	/* SD FaultInjection */
14534 	sd_faultinjection(pktp);
14535 
14536 #endif /* SD_FAULT_INJECTION */
14537 
14538 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
14539 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
14540 
14541 	mutex_enter(SD_MUTEX(un));
14542 
14543 	/* Reduce the count of the #commands currently in transport */
14544 	un->un_ncmds_in_transport--;
14545 	ASSERT(un->un_ncmds_in_transport >= 0);
14546 
14547 	/* Increment counter to indicate that the callback routine is active */
14548 	un->un_in_callback++;
14549 
14550 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14551 
14552 #ifdef	SDDEBUG
14553 	if (bp == un->un_retry_bp) {
14554 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
14555 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
14556 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
14557 	}
14558 #endif
14559 
14560 	/*
14561 	 * If pkt_reason is CMD_DEV_GONE, just fail the command
14562 	 */
14563 	if (pktp->pkt_reason == CMD_DEV_GONE) {
14564 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
14565 			    "Device is gone\n");
14566 		sd_return_failed_command(un, bp, EIO);
14567 		goto exit;
14568 	}
14569 
14570 	/*
14571 	 * First see if the pkt has auto-request sense data with it....
14572 	 * Look at the packet state first so we don't take a performance
14573 	 * hit looking at the arq enabled flag unless absolutely necessary.
14574 	 */
14575 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
14576 	    (un->un_f_arq_enabled == TRUE)) {
14577 		/*
14578 		 * The HBA did an auto request sense for this command so check
14579 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
14580 		 * driver command that should not be retried.
14581 		 */
14582 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
14583 			/*
14584 			 * Save the relevant sense info into the xp for the
14585 			 * original cmd.
14586 			 */
14587 			struct scsi_arq_status *asp;
14588 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
14589 			xp->xb_sense_status =
14590 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
14591 			xp->xb_sense_state  = asp->sts_rqpkt_state;
14592 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
14593 			bcopy(&asp->sts_sensedata, xp->xb_sense_data,
14594 			    min(sizeof (struct scsi_extended_sense),
14595 			    SENSE_LENGTH));
14596 
14597 			/* fail the command */
14598 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14599 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
14600 			sd_return_failed_command(un, bp, EIO);
14601 			goto exit;
14602 		}
14603 
14604 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
14605 		/*
14606 		 * We want to either retry or fail this command, so free
14607 		 * the DMA resources here.  If we retry the command then
14608 		 * the DMA resources will be reallocated in sd_start_cmds().
14609 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
14610 		 * causes the *entire* transfer to start over again from the
14611 		 * beginning of the request, even for PARTIAL chunks that
14612 		 * have already transferred successfully.
14613 		 */
14614 		if ((un->un_f_is_fibre == TRUE) &&
14615 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14616 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
14617 			scsi_dmafree(pktp);
14618 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14619 		}
14620 #endif
14621 
14622 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14623 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
14624 
14625 		sd_handle_auto_request_sense(un, bp, xp, pktp);
14626 		goto exit;
14627 	}
14628 
14629 	/* Next see if this is the REQUEST SENSE pkt for the instance */
14630 	if (pktp->pkt_flags & FLAG_SENSING)  {
14631 		/* This pktp is from the unit's REQUEST_SENSE command */
14632 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14633 		    "sdintr: sd_handle_request_sense\n");
14634 		sd_handle_request_sense(un, bp, xp, pktp);
14635 		goto exit;
14636 	}
14637 
14638 	/*
14639 	 * Check to see if the command successfully completed as requested;
14640 	 * this is the most common case (and also the hot performance path).
14641 	 *
14642 	 * Requirements for successful completion are:
14643 	 * pkt_reason is CMD_CMPLT and packet status is status good.
14644 	 * In addition:
14645 	 * - A residual of zero indicates successful completion no matter what
14646 	 *   the command is.
14647 	 * - If the residual is not zero and the command is not a read or
14648 	 *   write, then it's still defined as successful completion. In other
14649 	 *   words, if the command is a read or write the residual must be
14650 	 *   zero for successful completion.
14651 	 * - If the residual is not zero and the command is a read or
14652 	 *   write, and it's a USCSICMD, then it's still defined as
14653 	 *   successful completion.
14654 	 */
14655 	if ((pktp->pkt_reason == CMD_CMPLT) &&
14656 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
14657 
14658 		/*
14659 		 * Since this command is returned with a good status, we
14660 		 * can reset the count for Sonoma failover.
14661 		 */
14662 		un->un_sonoma_failure_count = 0;
14663 
14664 		/*
14665 		 * Return all USCSI commands on good status
14666 		 */
14667 		if (pktp->pkt_resid == 0) {
14668 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14669 			    "sdintr: returning command for resid == 0\n");
14670 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
14671 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
14672 			SD_UPDATE_B_RESID(bp, pktp);
14673 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14674 			    "sdintr: returning command for resid != 0\n");
14675 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
14676 			SD_UPDATE_B_RESID(bp, pktp);
14677 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14678 				"sdintr: returning uscsi command\n");
14679 		} else {
14680 			goto not_successful;
14681 		}
14682 		sd_return_command(un, bp);
14683 
14684 		/*
14685 		 * Decrement counter to indicate that the callback routine
14686 		 * is done.
14687 		 */
14688 		un->un_in_callback--;
14689 		ASSERT(un->un_in_callback >= 0);
14690 		mutex_exit(SD_MUTEX(un));
14691 
14692 		return;
14693 	}
14694 
14695 not_successful:
14696 
14697 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
14698 	/*
14699 	 * The following is based upon knowledge of the underlying transport
14700 	 * and its use of DMA resources.  This code should be removed when
14701 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
14702 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
14703 	 * and sd_start_cmds().
14704 	 *
14705 	 * Free any DMA resources associated with this command if there
14706 	 * is a chance it could be retried or enqueued for later retry.
14707 	 * If we keep the DMA binding then mpxio cannot reissue the
14708 	 * command on another path whenever a path failure occurs.
14709 	 *
14710 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
14711 	 * causes the *entire* transfer to start over again from the
14712 	 * beginning of the request, even for PARTIAL chunks that
14713 	 * have already transferred successfully.
14714 	 *
14715 	 * This is only done for non-uscsi commands (and also skipped for the
14716 	 * driver's internal RQS command). Also just do this for Fibre Channel
14717 	 * devices as these are the only ones that support mpxio.
14718 	 */
14719 	if ((un->un_f_is_fibre == TRUE) &&
14720 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14721 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
14722 		scsi_dmafree(pktp);
14723 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14724 	}
14725 #endif
14726 
14727 	/*
14728 	 * The command did not successfully complete as requested so check
14729 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
14730 	 * driver command that should not be retried so just return. If
14731 	 * FLAG_DIAGNOSE is not set the error will be processed below.
14732 	 */
14733 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
14734 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14735 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
14736 		/*
14737 		 * Issue a request sense if a check condition caused the error
14738 		 * (we handle the auto request sense case above), otherwise
14739 		 * just fail the command.
14740 		 */
14741 		if ((pktp->pkt_reason == CMD_CMPLT) &&
14742 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
14743 			sd_send_request_sense_command(un, bp, pktp);
14744 		} else {
14745 			sd_return_failed_command(un, bp, EIO);
14746 		}
14747 		goto exit;
14748 	}
14749 
14750 	/*
14751 	 * The command did not successfully complete as requested so process
14752 	 * the error, retry, and/or attempt recovery.
14753 	 */
14754 	switch (pktp->pkt_reason) {
14755 	case CMD_CMPLT:
14756 		switch (SD_GET_PKT_STATUS(pktp)) {
14757 		case STATUS_GOOD:
14758 			/*
14759 			 * The command completed successfully with a non-zero
14760 			 * residual
14761 			 */
14762 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14763 			    "sdintr: STATUS_GOOD \n");
14764 			sd_pkt_status_good(un, bp, xp, pktp);
14765 			break;
14766 
14767 		case STATUS_CHECK:
14768 		case STATUS_TERMINATED:
14769 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14770 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
14771 			sd_pkt_status_check_condition(un, bp, xp, pktp);
14772 			break;
14773 
14774 		case STATUS_BUSY:
14775 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14776 			    "sdintr: STATUS_BUSY\n");
14777 			sd_pkt_status_busy(un, bp, xp, pktp);
14778 			break;
14779 
14780 		case STATUS_RESERVATION_CONFLICT:
14781 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14782 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
14783 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
14784 			break;
14785 
14786 		case STATUS_QFULL:
14787 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14788 			    "sdintr: STATUS_QFULL\n");
14789 			sd_pkt_status_qfull(un, bp, xp, pktp);
14790 			break;
14791 
14792 		case STATUS_MET:
14793 		case STATUS_INTERMEDIATE:
14794 		case STATUS_SCSI2:
14795 		case STATUS_INTERMEDIATE_MET:
14796 		case STATUS_ACA_ACTIVE:
14797 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
14798 			    "Unexpected SCSI status received: 0x%x\n",
14799 			    SD_GET_PKT_STATUS(pktp));
14800 			sd_return_failed_command(un, bp, EIO);
14801 			break;
14802 
14803 		default:
14804 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
14805 			    "Invalid SCSI status received: 0x%x\n",
14806 			    SD_GET_PKT_STATUS(pktp));
14807 			sd_return_failed_command(un, bp, EIO);
14808 			break;
14809 
14810 		}
14811 		break;
14812 
14813 	case CMD_INCOMPLETE:
14814 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14815 		    "sdintr:  CMD_INCOMPLETE\n");
14816 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
14817 		break;
14818 	case CMD_TRAN_ERR:
14819 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14820 		    "sdintr: CMD_TRAN_ERR\n");
14821 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
14822 		break;
14823 	case CMD_RESET:
14824 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14825 		    "sdintr: CMD_RESET \n");
14826 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
14827 		break;
14828 	case CMD_ABORTED:
14829 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14830 		    "sdintr: CMD_ABORTED \n");
14831 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
14832 		break;
14833 	case CMD_TIMEOUT:
14834 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14835 		    "sdintr: CMD_TIMEOUT\n");
14836 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
14837 		break;
14838 	case CMD_UNX_BUS_FREE:
14839 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14840 		    "sdintr: CMD_UNX_BUS_FREE \n");
14841 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
14842 		break;
14843 	case CMD_TAG_REJECT:
14844 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14845 		    "sdintr: CMD_TAG_REJECT\n");
14846 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
14847 		break;
14848 	default:
14849 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14850 		    "sdintr: default\n");
14851 		sd_pkt_reason_default(un, bp, xp, pktp);
14852 		break;
14853 	}
14854 
14855 exit:
14856 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
14857 
14858 	/* Decrement counter to indicate that the callback routine is done. */
14859 	un->un_in_callback--;
14860 	ASSERT(un->un_in_callback >= 0);
14861 
14862 	/*
14863 	 * At this point, the pkt has been dispatched, ie, it is either
14864 	 * being re-tried or has been returned to its caller and should
14865 	 * not be referenced.
14866 	 */
14867 
14868 	mutex_exit(SD_MUTEX(un));
14869 }
14870 
14871 
14872 /*
14873  *    Function: sd_print_incomplete_msg
14874  *
14875  * Description: Prints the error message for a CMD_INCOMPLETE error.
14876  *
14877  *   Arguments: un - ptr to associated softstate for the device.
14878  *		bp - ptr to the buf(9S) for the command.
14879  *		arg - message string ptr
14880  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
14881  *			or SD_NO_RETRY_ISSUED.
14882  *
14883  *     Context: May be called under interrupt context
14884  */
14885 
14886 static void
14887 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
14888 {
14889 	struct scsi_pkt	*pktp;
14890 	char	*msgp;
14891 	char	*cmdp = arg;
14892 
14893 	ASSERT(un != NULL);
14894 	ASSERT(mutex_owned(SD_MUTEX(un)));
14895 	ASSERT(bp != NULL);
14896 	ASSERT(arg != NULL);
14897 	pktp = SD_GET_PKTP(bp);
14898 	ASSERT(pktp != NULL);
14899 
14900 	switch (code) {
14901 	case SD_DELAYED_RETRY_ISSUED:
14902 	case SD_IMMEDIATE_RETRY_ISSUED:
14903 		msgp = "retrying";
14904 		break;
14905 	case SD_NO_RETRY_ISSUED:
14906 	default:
14907 		msgp = "giving up";
14908 		break;
14909 	}
14910 
14911 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
14912 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14913 		    "incomplete %s- %s\n", cmdp, msgp);
14914 	}
14915 }
14916 
14917 
14918 
14919 /*
14920  *    Function: sd_pkt_status_good
14921  *
14922  * Description: Processing for a STATUS_GOOD code in pkt_status.
14923  *
14924  *     Context: May be called under interrupt context
14925  */
14926 
14927 static void
14928 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
14929 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
14930 {
14931 	char	*cmdp;
14932 
14933 	ASSERT(un != NULL);
14934 	ASSERT(mutex_owned(SD_MUTEX(un)));
14935 	ASSERT(bp != NULL);
14936 	ASSERT(xp != NULL);
14937 	ASSERT(pktp != NULL);
14938 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
14939 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
14940 	ASSERT(pktp->pkt_resid != 0);
14941 
14942 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
14943 
14944 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
14945 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
14946 	case SCMD_READ:
14947 		cmdp = "read";
14948 		break;
14949 	case SCMD_WRITE:
14950 		cmdp = "write";
14951 		break;
14952 	default:
14953 		SD_UPDATE_B_RESID(bp, pktp);
14954 		sd_return_command(un, bp);
14955 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
14956 		return;
14957 	}
14958 
14959 	/*
14960 	 * See if we can retry the read/write, preferrably immediately.
14961 	 * If retries are exhaused, then sd_retry_command() will update
14962 	 * the b_resid count.
14963 	 */
14964 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
14965 	    cmdp, EIO, (clock_t)0, NULL);
14966 
14967 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
14968 }
14969 
14970 
14971 
14972 
14973 
14974 /*
14975  *    Function: sd_handle_request_sense
14976  *
14977  * Description: Processing for non-auto Request Sense command.
14978  *
14979  *   Arguments: un - ptr to associated softstate
14980  *		sense_bp - ptr to buf(9S) for the RQS command
14981  *		sense_xp - ptr to the sd_xbuf for the RQS command
14982  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
14983  *
14984  *     Context: May be called under interrupt context
14985  */
14986 
14987 static void
14988 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
14989 	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
14990 {
14991 	struct buf	*cmd_bp;	/* buf for the original command */
14992 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
14993 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
14994 
14995 	ASSERT(un != NULL);
14996 	ASSERT(mutex_owned(SD_MUTEX(un)));
14997 	ASSERT(sense_bp != NULL);
14998 	ASSERT(sense_xp != NULL);
14999 	ASSERT(sense_pktp != NULL);
15000 
15001 	/*
15002 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
15003 	 * RQS command and not the original command.
15004 	 */
15005 	ASSERT(sense_pktp == un->un_rqs_pktp);
15006 	ASSERT(sense_bp   == un->un_rqs_bp);
15007 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
15008 	    (FLAG_SENSING | FLAG_HEAD));
15009 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
15010 	    FLAG_SENSING) == FLAG_SENSING);
15011 
15012 	/* These are the bp, xp, and pktp for the original command */
15013 	cmd_bp = sense_xp->xb_sense_bp;
15014 	cmd_xp = SD_GET_XBUF(cmd_bp);
15015 	cmd_pktp = SD_GET_PKTP(cmd_bp);
15016 
15017 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
15018 		/*
15019 		 * The REQUEST SENSE command failed.  Release the REQUEST
15020 		 * SENSE command for re-use, get back the bp for the original
15021 		 * command, and attempt to re-try the original command if
15022 		 * FLAG_DIAGNOSE is not set in the original packet.
15023 		 */
15024 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
15025 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15026 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
15027 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
15028 			    NULL, NULL, EIO, (clock_t)0, NULL);
15029 			return;
15030 		}
15031 	}
15032 
15033 	/*
15034 	 * Save the relevant sense info into the xp for the original cmd.
15035 	 *
15036 	 * Note: if the request sense failed the state info will be zero
15037 	 * as set in sd_mark_rqs_busy()
15038 	 */
15039 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
15040 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
15041 	cmd_xp->xb_sense_resid  = sense_pktp->pkt_resid;
15042 	bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data, SENSE_LENGTH);
15043 
15044 	/*
15045 	 *  Free up the RQS command....
15046 	 *  NOTE:
15047 	 *	Must do this BEFORE calling sd_validate_sense_data!
15048 	 *	sd_validate_sense_data may return the original command in
15049 	 *	which case the pkt will be freed and the flags can no
15050 	 *	longer be touched.
15051 	 *	SD_MUTEX is held through this process until the command
15052 	 *	is dispatched based upon the sense data, so there are
15053 	 *	no race conditions.
15054 	 */
15055 	(void) sd_mark_rqs_idle(un, sense_xp);
15056 
15057 	/*
15058 	 * For a retryable command see if we have valid sense data, if so then
15059 	 * turn it over to sd_decode_sense() to figure out the right course of
15060 	 * action. Just fail a non-retryable command.
15061 	 */
15062 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15063 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp) ==
15064 		    SD_SENSE_DATA_IS_VALID) {
15065 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
15066 		}
15067 	} else {
15068 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
15069 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15070 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
15071 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15072 		sd_return_failed_command(un, cmd_bp, EIO);
15073 	}
15074 }
15075 
15076 
15077 
15078 
15079 /*
15080  *    Function: sd_handle_auto_request_sense
15081  *
15082  * Description: Processing for auto-request sense information.
15083  *
15084  *   Arguments: un - ptr to associated softstate
15085  *		bp - ptr to buf(9S) for the command
15086  *		xp - ptr to the sd_xbuf for the command
15087  *		pktp - ptr to the scsi_pkt(9S) for the command
15088  *
15089  *     Context: May be called under interrupt context
15090  */
15091 
15092 static void
15093 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
15094 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
15095 {
15096 	struct scsi_arq_status *asp;
15097 
15098 	ASSERT(un != NULL);
15099 	ASSERT(mutex_owned(SD_MUTEX(un)));
15100 	ASSERT(bp != NULL);
15101 	ASSERT(xp != NULL);
15102 	ASSERT(pktp != NULL);
15103 	ASSERT(pktp != un->un_rqs_pktp);
15104 	ASSERT(bp   != un->un_rqs_bp);
15105 
15106 	/*
15107 	 * For auto-request sense, we get a scsi_arq_status back from
15108 	 * the HBA, with the sense data in the sts_sensedata member.
15109 	 * The pkt_scbp of the packet points to this scsi_arq_status.
15110 	 */
15111 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
15112 
15113 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
15114 		/*
15115 		 * The auto REQUEST SENSE failed; see if we can re-try
15116 		 * the original command.
15117 		 */
15118 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15119 		    "auto request sense failed (reason=%s)\n",
15120 		    scsi_rname(asp->sts_rqpkt_reason));
15121 
15122 		sd_reset_target(un, pktp);
15123 
15124 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15125 		    NULL, NULL, EIO, (clock_t)0, NULL);
15126 		return;
15127 	}
15128 
15129 	/* Save the relevant sense info into the xp for the original cmd. */
15130 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
15131 	xp->xb_sense_state  = asp->sts_rqpkt_state;
15132 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
15133 	bcopy(&asp->sts_sensedata, xp->xb_sense_data,
15134 	    min(sizeof (struct scsi_extended_sense), SENSE_LENGTH));
15135 
15136 	/*
15137 	 * See if we have valid sense data, if so then turn it over to
15138 	 * sd_decode_sense() to figure out the right course of action.
15139 	 */
15140 	if (sd_validate_sense_data(un, bp, xp) == SD_SENSE_DATA_IS_VALID) {
15141 		sd_decode_sense(un, bp, xp, pktp);
15142 	}
15143 }
15144 
15145 
15146 /*
15147  *    Function: sd_print_sense_failed_msg
15148  *
15149  * Description: Print log message when RQS has failed.
15150  *
15151  *   Arguments: un - ptr to associated softstate
15152  *		bp - ptr to buf(9S) for the command
15153  *		arg - generic message string ptr
15154  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
15155  *			or SD_NO_RETRY_ISSUED
15156  *
15157  *     Context: May be called from interrupt context
15158  */
15159 
15160 static void
15161 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
15162 	int code)
15163 {
15164 	char	*msgp = arg;
15165 
15166 	ASSERT(un != NULL);
15167 	ASSERT(mutex_owned(SD_MUTEX(un)));
15168 	ASSERT(bp != NULL);
15169 
15170 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
15171 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
15172 	}
15173 }
15174 
15175 
15176 /*
15177  *    Function: sd_validate_sense_data
15178  *
15179  * Description: Check the given sense data for validity.
15180  *		If the sense data is not valid, the command will
15181  *		be either failed or retried!
15182  *
15183  * Return Code: SD_SENSE_DATA_IS_INVALID
15184  *		SD_SENSE_DATA_IS_VALID
15185  *
15186  *     Context: May be called from interrupt context
15187  */
15188 
15189 static int
15190 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp)
15191 {
15192 	struct scsi_extended_sense *esp;
15193 	struct	scsi_pkt *pktp;
15194 	size_t	actual_len;
15195 	char	*msgp = NULL;
15196 
15197 	ASSERT(un != NULL);
15198 	ASSERT(mutex_owned(SD_MUTEX(un)));
15199 	ASSERT(bp != NULL);
15200 	ASSERT(bp != un->un_rqs_bp);
15201 	ASSERT(xp != NULL);
15202 
15203 	pktp = SD_GET_PKTP(bp);
15204 	ASSERT(pktp != NULL);
15205 
15206 	/*
15207 	 * Check the status of the RQS command (auto or manual).
15208 	 */
15209 	switch (xp->xb_sense_status & STATUS_MASK) {
15210 	case STATUS_GOOD:
15211 		break;
15212 
15213 	case STATUS_RESERVATION_CONFLICT:
15214 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
15215 		return (SD_SENSE_DATA_IS_INVALID);
15216 
15217 	case STATUS_BUSY:
15218 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15219 		    "Busy Status on REQUEST SENSE\n");
15220 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
15221 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
15222 		return (SD_SENSE_DATA_IS_INVALID);
15223 
15224 	case STATUS_QFULL:
15225 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15226 		    "QFULL Status on REQUEST SENSE\n");
15227 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
15228 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
15229 		return (SD_SENSE_DATA_IS_INVALID);
15230 
15231 	case STATUS_CHECK:
15232 	case STATUS_TERMINATED:
15233 		msgp = "Check Condition on REQUEST SENSE\n";
15234 		goto sense_failed;
15235 
15236 	default:
15237 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
15238 		goto sense_failed;
15239 	}
15240 
15241 	/*
15242 	 * See if we got the minimum required amount of sense data.
15243 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
15244 	 * or less.
15245 	 */
15246 	actual_len = (int)(SENSE_LENGTH - xp->xb_sense_resid);
15247 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
15248 	    (actual_len == 0)) {
15249 		msgp = "Request Sense couldn't get sense data\n";
15250 		goto sense_failed;
15251 	}
15252 
15253 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
15254 		msgp = "Not enough sense information\n";
15255 		goto sense_failed;
15256 	}
15257 
15258 	/*
15259 	 * We require the extended sense data
15260 	 */
15261 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
15262 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
15263 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
15264 			static char tmp[8];
15265 			static char buf[148];
15266 			char *p = (char *)(xp->xb_sense_data);
15267 			int i;
15268 
15269 			mutex_enter(&sd_sense_mutex);
15270 			(void) strcpy(buf, "undecodable sense information:");
15271 			for (i = 0; i < actual_len; i++) {
15272 				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
15273 				(void) strcpy(&buf[strlen(buf)], tmp);
15274 			}
15275 			i = strlen(buf);
15276 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
15277 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, buf);
15278 			mutex_exit(&sd_sense_mutex);
15279 		}
15280 		/* Note: Legacy behavior, fail the command with no retry */
15281 		sd_return_failed_command(un, bp, EIO);
15282 		return (SD_SENSE_DATA_IS_INVALID);
15283 	}
15284 
15285 	/*
15286 	 * Check that es_code is valid (es_class concatenated with es_code
15287 	 * make up the "response code" field.  es_class will always be 7, so
15288 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
15289 	 * format.
15290 	 */
15291 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
15292 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
15293 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
15294 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
15295 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
15296 		goto sense_failed;
15297 	}
15298 
15299 	return (SD_SENSE_DATA_IS_VALID);
15300 
15301 sense_failed:
15302 	/*
15303 	 * If the request sense failed (for whatever reason), attempt
15304 	 * to retry the original command.
15305 	 */
15306 #if defined(__i386) || defined(__amd64)
15307 	/*
15308 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
15309 	 * sddef.h for Sparc platform, and x86 uses 1 binary
15310 	 * for both SCSI/FC.
15311 	 * The SD_RETRY_DELAY value need to be adjusted here
15312 	 * when SD_RETRY_DELAY change in sddef.h
15313 	 */
15314 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15315 	    sd_print_sense_failed_msg, msgp, EIO,
15316 		un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
15317 #else
15318 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15319 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
15320 #endif
15321 
15322 	return (SD_SENSE_DATA_IS_INVALID);
15323 }
15324 
15325 
15326 
15327 /*
15328  *    Function: sd_decode_sense
15329  *
15330  * Description: Take recovery action(s) when SCSI Sense Data is received.
15331  *
15332  *     Context: Interrupt context.
15333  */
15334 
15335 static void
15336 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
15337 	struct scsi_pkt *pktp)
15338 {
15339 	uint8_t sense_key;
15340 
15341 	ASSERT(un != NULL);
15342 	ASSERT(mutex_owned(SD_MUTEX(un)));
15343 	ASSERT(bp != NULL);
15344 	ASSERT(bp != un->un_rqs_bp);
15345 	ASSERT(xp != NULL);
15346 	ASSERT(pktp != NULL);
15347 
15348 	sense_key = scsi_sense_key(xp->xb_sense_data);
15349 
15350 	switch (sense_key) {
15351 	case KEY_NO_SENSE:
15352 		sd_sense_key_no_sense(un, bp, xp, pktp);
15353 		break;
15354 	case KEY_RECOVERABLE_ERROR:
15355 		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
15356 		    bp, xp, pktp);
15357 		break;
15358 	case KEY_NOT_READY:
15359 		sd_sense_key_not_ready(un, xp->xb_sense_data,
15360 		    bp, xp, pktp);
15361 		break;
15362 	case KEY_MEDIUM_ERROR:
15363 	case KEY_HARDWARE_ERROR:
15364 		sd_sense_key_medium_or_hardware_error(un,
15365 		    xp->xb_sense_data, bp, xp, pktp);
15366 		break;
15367 	case KEY_ILLEGAL_REQUEST:
15368 		sd_sense_key_illegal_request(un, bp, xp, pktp);
15369 		break;
15370 	case KEY_UNIT_ATTENTION:
15371 		sd_sense_key_unit_attention(un, xp->xb_sense_data,
15372 		    bp, xp, pktp);
15373 		break;
15374 	case KEY_WRITE_PROTECT:
15375 	case KEY_VOLUME_OVERFLOW:
15376 	case KEY_MISCOMPARE:
15377 		sd_sense_key_fail_command(un, bp, xp, pktp);
15378 		break;
15379 	case KEY_BLANK_CHECK:
15380 		sd_sense_key_blank_check(un, bp, xp, pktp);
15381 		break;
15382 	case KEY_ABORTED_COMMAND:
15383 		sd_sense_key_aborted_command(un, bp, xp, pktp);
15384 		break;
15385 	case KEY_VENDOR_UNIQUE:
15386 	case KEY_COPY_ABORTED:
15387 	case KEY_EQUAL:
15388 	case KEY_RESERVED:
15389 	default:
15390 		sd_sense_key_default(un, xp->xb_sense_data,
15391 		    bp, xp, pktp);
15392 		break;
15393 	}
15394 }
15395 
15396 
15397 /*
15398  *    Function: sd_dump_memory
15399  *
15400  * Description: Debug logging routine to print the contents of a user provided
15401  *		buffer. The output of the buffer is broken up into 256 byte
15402  *		segments due to a size constraint of the scsi_log.
15403  *		implementation.
15404  *
15405  *   Arguments: un - ptr to softstate
15406  *		comp - component mask
15407  *		title - "title" string to preceed data when printed
15408  *		data - ptr to data block to be printed
15409  *		len - size of data block to be printed
15410  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
15411  *
15412  *     Context: May be called from interrupt context
15413  */
15414 
15415 #define	SD_DUMP_MEMORY_BUF_SIZE	256
15416 
15417 static char *sd_dump_format_string[] = {
15418 		" 0x%02x",
15419 		" %c"
15420 };
15421 
15422 static void
15423 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
15424     int len, int fmt)
15425 {
15426 	int	i, j;
15427 	int	avail_count;
15428 	int	start_offset;
15429 	int	end_offset;
15430 	size_t	entry_len;
15431 	char	*bufp;
15432 	char	*local_buf;
15433 	char	*format_string;
15434 
15435 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
15436 
15437 	/*
15438 	 * In the debug version of the driver, this function is called from a
15439 	 * number of places which are NOPs in the release driver.
15440 	 * The debug driver therefore has additional methods of filtering
15441 	 * debug output.
15442 	 */
15443 #ifdef SDDEBUG
15444 	/*
15445 	 * In the debug version of the driver we can reduce the amount of debug
15446 	 * messages by setting sd_error_level to something other than
15447 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
15448 	 * sd_component_mask.
15449 	 */
15450 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
15451 	    (sd_error_level != SCSI_ERR_ALL)) {
15452 		return;
15453 	}
15454 	if (((sd_component_mask & comp) == 0) ||
15455 	    (sd_error_level != SCSI_ERR_ALL)) {
15456 		return;
15457 	}
15458 #else
15459 	if (sd_error_level != SCSI_ERR_ALL) {
15460 		return;
15461 	}
15462 #endif
15463 
15464 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
15465 	bufp = local_buf;
15466 	/*
15467 	 * Available length is the length of local_buf[], minus the
15468 	 * length of the title string, minus one for the ":", minus
15469 	 * one for the newline, minus one for the NULL terminator.
15470 	 * This gives the #bytes available for holding the printed
15471 	 * values from the given data buffer.
15472 	 */
15473 	if (fmt == SD_LOG_HEX) {
15474 		format_string = sd_dump_format_string[0];
15475 	} else /* SD_LOG_CHAR */ {
15476 		format_string = sd_dump_format_string[1];
15477 	}
15478 	/*
15479 	 * Available count is the number of elements from the given
15480 	 * data buffer that we can fit into the available length.
15481 	 * This is based upon the size of the format string used.
15482 	 * Make one entry and find it's size.
15483 	 */
15484 	(void) sprintf(bufp, format_string, data[0]);
15485 	entry_len = strlen(bufp);
15486 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
15487 
15488 	j = 0;
15489 	while (j < len) {
15490 		bufp = local_buf;
15491 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
15492 		start_offset = j;
15493 
15494 		end_offset = start_offset + avail_count;
15495 
15496 		(void) sprintf(bufp, "%s:", title);
15497 		bufp += strlen(bufp);
15498 		for (i = start_offset; ((i < end_offset) && (j < len));
15499 		    i++, j++) {
15500 			(void) sprintf(bufp, format_string, data[i]);
15501 			bufp += entry_len;
15502 		}
15503 		(void) sprintf(bufp, "\n");
15504 
15505 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
15506 	}
15507 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
15508 }
15509 
15510 /*
15511  *    Function: sd_print_sense_msg
15512  *
15513  * Description: Log a message based upon the given sense data.
15514  *
15515  *   Arguments: un - ptr to associated softstate
15516  *		bp - ptr to buf(9S) for the command
15517  *		arg - ptr to associate sd_sense_info struct
15518  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
15519  *			or SD_NO_RETRY_ISSUED
15520  *
15521  *     Context: May be called from interrupt context
15522  */
15523 
15524 static void
15525 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
15526 {
15527 	struct sd_xbuf	*xp;
15528 	struct scsi_pkt	*pktp;
15529 	uint8_t *sensep;
15530 	daddr_t request_blkno;
15531 	diskaddr_t err_blkno;
15532 	int severity;
15533 	int pfa_flag;
15534 	extern struct scsi_key_strings scsi_cmds[];
15535 
15536 	ASSERT(un != NULL);
15537 	ASSERT(mutex_owned(SD_MUTEX(un)));
15538 	ASSERT(bp != NULL);
15539 	xp = SD_GET_XBUF(bp);
15540 	ASSERT(xp != NULL);
15541 	pktp = SD_GET_PKTP(bp);
15542 	ASSERT(pktp != NULL);
15543 	ASSERT(arg != NULL);
15544 
15545 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
15546 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
15547 
15548 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
15549 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
15550 		severity = SCSI_ERR_RETRYABLE;
15551 	}
15552 
15553 	/* Use absolute block number for the request block number */
15554 	request_blkno = xp->xb_blkno;
15555 
15556 	/*
15557 	 * Now try to get the error block number from the sense data
15558 	 */
15559 	sensep = xp->xb_sense_data;
15560 
15561 	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
15562 		(uint64_t *)&err_blkno)) {
15563 		/*
15564 		 * We retrieved the error block number from the information
15565 		 * portion of the sense data.
15566 		 *
15567 		 * For USCSI commands we are better off using the error
15568 		 * block no. as the requested block no. (This is the best
15569 		 * we can estimate.)
15570 		 */
15571 		if ((SD_IS_BUFIO(xp) == FALSE) &&
15572 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
15573 			request_blkno = err_blkno;
15574 		}
15575 	} else {
15576 		/*
15577 		 * Without the es_valid bit set (for fixed format) or an
15578 		 * information descriptor (for descriptor format) we cannot
15579 		 * be certain of the error blkno, so just use the
15580 		 * request_blkno.
15581 		 */
15582 		err_blkno = (diskaddr_t)request_blkno;
15583 	}
15584 
15585 	/*
15586 	 * The following will log the buffer contents for the release driver
15587 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
15588 	 * level is set to verbose.
15589 	 */
15590 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
15591 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15592 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15593 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
15594 
15595 	if (pfa_flag == FALSE) {
15596 		/* This is normally only set for USCSI */
15597 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
15598 			return;
15599 		}
15600 
15601 		if ((SD_IS_BUFIO(xp) == TRUE) &&
15602 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
15603 		    (severity < sd_error_level))) {
15604 			return;
15605 		}
15606 	}
15607 
15608 	/*
15609 	 * Check for Sonoma Failover and keep a count of how many failed I/O's
15610 	 */
15611 	if ((SD_IS_LSI(un)) &&
15612 	    (scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) &&
15613 	    (scsi_sense_asc(sensep) == 0x94) &&
15614 	    (scsi_sense_ascq(sensep) == 0x01)) {
15615 		un->un_sonoma_failure_count++;
15616 		if (un->un_sonoma_failure_count > 1) {
15617 			return;
15618 		}
15619 	}
15620 
15621 	scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
15622 	    request_blkno, err_blkno, scsi_cmds,
15623 	    (struct scsi_extended_sense *)sensep,
15624 	    un->un_additional_codes, NULL);
15625 }
15626 
15627 /*
15628  *    Function: sd_sense_key_no_sense
15629  *
15630  * Description: Recovery action when sense data was not received.
15631  *
15632  *     Context: May be called from interrupt context
15633  */
15634 
15635 static void
15636 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
15637 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
15638 {
15639 	struct sd_sense_info	si;
15640 
15641 	ASSERT(un != NULL);
15642 	ASSERT(mutex_owned(SD_MUTEX(un)));
15643 	ASSERT(bp != NULL);
15644 	ASSERT(xp != NULL);
15645 	ASSERT(pktp != NULL);
15646 
15647 	si.ssi_severity = SCSI_ERR_FATAL;
15648 	si.ssi_pfa_flag = FALSE;
15649 
15650 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
15651 
15652 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
15653 		&si, EIO, (clock_t)0, NULL);
15654 }
15655 
15656 
15657 /*
15658  *    Function: sd_sense_key_recoverable_error
15659  *
15660  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
15661  *
15662  *     Context: May be called from interrupt context
15663  */
15664 
15665 static void
15666 sd_sense_key_recoverable_error(struct sd_lun *un,
15667 	uint8_t *sense_datap,
15668 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
15669 {
15670 	struct sd_sense_info	si;
15671 	uint8_t asc = scsi_sense_asc(sense_datap);
15672 
15673 	ASSERT(un != NULL);
15674 	ASSERT(mutex_owned(SD_MUTEX(un)));
15675 	ASSERT(bp != NULL);
15676 	ASSERT(xp != NULL);
15677 	ASSERT(pktp != NULL);
15678 
15679 	/*
15680 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
15681 	 */
15682 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
15683 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
15684 		si.ssi_severity = SCSI_ERR_INFO;
15685 		si.ssi_pfa_flag = TRUE;
15686 	} else {
15687 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
15688 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
15689 		si.ssi_severity = SCSI_ERR_RECOVERED;
15690 		si.ssi_pfa_flag = FALSE;
15691 	}
15692 
15693 	if (pktp->pkt_resid == 0) {
15694 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
15695 		sd_return_command(un, bp);
15696 		return;
15697 	}
15698 
15699 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
15700 	    &si, EIO, (clock_t)0, NULL);
15701 }
15702 
15703 
15704 
15705 
15706 /*
15707  *    Function: sd_sense_key_not_ready
15708  *
15709  * Description: Recovery actions for a SCSI "Not Ready" sense key.
15710  *
15711  *     Context: May be called from interrupt context
15712  */
15713 
15714 static void
15715 sd_sense_key_not_ready(struct sd_lun *un,
15716 	uint8_t *sense_datap,
15717 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
15718 {
15719 	struct sd_sense_info	si;
15720 	uint8_t asc = scsi_sense_asc(sense_datap);
15721 	uint8_t ascq = scsi_sense_ascq(sense_datap);
15722 
15723 	ASSERT(un != NULL);
15724 	ASSERT(mutex_owned(SD_MUTEX(un)));
15725 	ASSERT(bp != NULL);
15726 	ASSERT(xp != NULL);
15727 	ASSERT(pktp != NULL);
15728 
15729 	si.ssi_severity = SCSI_ERR_FATAL;
15730 	si.ssi_pfa_flag = FALSE;
15731 
15732 	/*
15733 	 * Update error stats after first NOT READY error. Disks may have
15734 	 * been powered down and may need to be restarted.  For CDROMs,
15735 	 * report NOT READY errors only if media is present.
15736 	 */
15737 	if ((ISCD(un) && (asc == 0x3A)) ||
15738 	    (xp->xb_retry_count > 0)) {
15739 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
15740 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
15741 	}
15742 
15743 	/*
15744 	 * Just fail if the "not ready" retry limit has been reached.
15745 	 */
15746 	if (xp->xb_retry_count >= un->un_notready_retry_count) {
15747 		/* Special check for error message printing for removables. */
15748 		if (un->un_f_has_removable_media && (asc == 0x04) &&
15749 		    (ascq >= 0x04)) {
15750 			si.ssi_severity = SCSI_ERR_ALL;
15751 		}
15752 		goto fail_command;
15753 	}
15754 
15755 	/*
15756 	 * Check the ASC and ASCQ in the sense data as needed, to determine
15757 	 * what to do.
15758 	 */
15759 	switch (asc) {
15760 	case 0x04:	/* LOGICAL UNIT NOT READY */
15761 		/*
15762 		 * disk drives that don't spin up result in a very long delay
15763 		 * in format without warning messages. We will log a message
15764 		 * if the error level is set to verbose.
15765 		 */
15766 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
15767 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15768 			    "logical unit not ready, resetting disk\n");
15769 		}
15770 
15771 		/*
15772 		 * There are different requirements for CDROMs and disks for
15773 		 * the number of retries.  If a CD-ROM is giving this, it is
15774 		 * probably reading TOC and is in the process of getting
15775 		 * ready, so we should keep on trying for a long time to make
15776 		 * sure that all types of media are taken in account (for
15777 		 * some media the drive takes a long time to read TOC).  For
15778 		 * disks we do not want to retry this too many times as this
15779 		 * can cause a long hang in format when the drive refuses to
15780 		 * spin up (a very common failure).
15781 		 */
15782 		switch (ascq) {
15783 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
15784 			/*
15785 			 * Disk drives frequently refuse to spin up which
15786 			 * results in a very long hang in format without
15787 			 * warning messages.
15788 			 *
15789 			 * Note: This code preserves the legacy behavior of
15790 			 * comparing xb_retry_count against zero for fibre
15791 			 * channel targets instead of comparing against the
15792 			 * un_reset_retry_count value.  The reason for this
15793 			 * discrepancy has been so utterly lost beneath the
15794 			 * Sands of Time that even Indiana Jones could not
15795 			 * find it.
15796 			 */
15797 			if (un->un_f_is_fibre == TRUE) {
15798 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
15799 					(xp->xb_retry_count > 0)) &&
15800 					(un->un_startstop_timeid == NULL)) {
15801 					scsi_log(SD_DEVINFO(un), sd_label,
15802 					CE_WARN, "logical unit not ready, "
15803 					"resetting disk\n");
15804 					sd_reset_target(un, pktp);
15805 				}
15806 			} else {
15807 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
15808 					(xp->xb_retry_count >
15809 					un->un_reset_retry_count)) &&
15810 					(un->un_startstop_timeid == NULL)) {
15811 					scsi_log(SD_DEVINFO(un), sd_label,
15812 					CE_WARN, "logical unit not ready, "
15813 					"resetting disk\n");
15814 					sd_reset_target(un, pktp);
15815 				}
15816 			}
15817 			break;
15818 
15819 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
15820 			/*
15821 			 * If the target is in the process of becoming
15822 			 * ready, just proceed with the retry. This can
15823 			 * happen with CD-ROMs that take a long time to
15824 			 * read TOC after a power cycle or reset.
15825 			 */
15826 			goto do_retry;
15827 
15828 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
15829 			break;
15830 
15831 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
15832 			/*
15833 			 * Retries cannot help here so just fail right away.
15834 			 */
15835 			goto fail_command;
15836 
15837 		case 0x88:
15838 			/*
15839 			 * Vendor-unique code for T3/T4: it indicates a
15840 			 * path problem in a mutipathed config, but as far as
15841 			 * the target driver is concerned it equates to a fatal
15842 			 * error, so we should just fail the command right away
15843 			 * (without printing anything to the console). If this
15844 			 * is not a T3/T4, fall thru to the default recovery
15845 			 * action.
15846 			 * T3/T4 is FC only, don't need to check is_fibre
15847 			 */
15848 			if (SD_IS_T3(un) || SD_IS_T4(un)) {
15849 				sd_return_failed_command(un, bp, EIO);
15850 				return;
15851 			}
15852 			/* FALLTHRU */
15853 
15854 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
15855 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
15856 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
15857 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
15858 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
15859 		default:    /* Possible future codes in SCSI spec? */
15860 			/*
15861 			 * For removable-media devices, do not retry if
15862 			 * ASCQ > 2 as these result mostly from USCSI commands
15863 			 * on MMC devices issued to check status of an
15864 			 * operation initiated in immediate mode.  Also for
15865 			 * ASCQ >= 4 do not print console messages as these
15866 			 * mainly represent a user-initiated operation
15867 			 * instead of a system failure.
15868 			 */
15869 			if (un->un_f_has_removable_media) {
15870 				si.ssi_severity = SCSI_ERR_ALL;
15871 				goto fail_command;
15872 			}
15873 			break;
15874 		}
15875 
15876 		/*
15877 		 * As part of our recovery attempt for the NOT READY
15878 		 * condition, we issue a START STOP UNIT command. However
15879 		 * we want to wait for a short delay before attempting this
15880 		 * as there may still be more commands coming back from the
15881 		 * target with the check condition. To do this we use
15882 		 * timeout(9F) to call sd_start_stop_unit_callback() after
15883 		 * the delay interval expires. (sd_start_stop_unit_callback()
15884 		 * dispatches sd_start_stop_unit_task(), which will issue
15885 		 * the actual START STOP UNIT command. The delay interval
15886 		 * is one-half of the delay that we will use to retry the
15887 		 * command that generated the NOT READY condition.
15888 		 *
15889 		 * Note that we could just dispatch sd_start_stop_unit_task()
15890 		 * from here and allow it to sleep for the delay interval,
15891 		 * but then we would be tying up the taskq thread
15892 		 * uncesessarily for the duration of the delay.
15893 		 *
15894 		 * Do not issue the START STOP UNIT if the current command
15895 		 * is already a START STOP UNIT.
15896 		 */
15897 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
15898 			break;
15899 		}
15900 
15901 		/*
15902 		 * Do not schedule the timeout if one is already pending.
15903 		 */
15904 		if (un->un_startstop_timeid != NULL) {
15905 			SD_INFO(SD_LOG_ERROR, un,
15906 			    "sd_sense_key_not_ready: restart already issued to"
15907 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
15908 			    ddi_get_instance(SD_DEVINFO(un)));
15909 			break;
15910 		}
15911 
15912 		/*
15913 		 * Schedule the START STOP UNIT command, then queue the command
15914 		 * for a retry.
15915 		 *
15916 		 * Note: A timeout is not scheduled for this retry because we
15917 		 * want the retry to be serial with the START_STOP_UNIT. The
15918 		 * retry will be started when the START_STOP_UNIT is completed
15919 		 * in sd_start_stop_unit_task.
15920 		 */
15921 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
15922 		    un, SD_BSY_TIMEOUT / 2);
15923 		xp->xb_retry_count++;
15924 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
15925 		return;
15926 
15927 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
15928 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
15929 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15930 			    "unit does not respond to selection\n");
15931 		}
15932 		break;
15933 
15934 	case 0x3A:	/* MEDIUM NOT PRESENT */
15935 		if (sd_error_level >= SCSI_ERR_FATAL) {
15936 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15937 			    "Caddy not inserted in drive\n");
15938 		}
15939 
15940 		sr_ejected(un);
15941 		un->un_mediastate = DKIO_EJECTED;
15942 		/* The state has changed, inform the media watch routines */
15943 		cv_broadcast(&un->un_state_cv);
15944 		/* Just fail if no media is present in the drive. */
15945 		goto fail_command;
15946 
15947 	default:
15948 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
15949 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15950 			    "Unit not Ready. Additional sense code 0x%x\n",
15951 			    asc);
15952 		}
15953 		break;
15954 	}
15955 
15956 do_retry:
15957 
15958 	/*
15959 	 * Retry the command, as some targets may report NOT READY for
15960 	 * several seconds after being reset.
15961 	 */
15962 	xp->xb_retry_count++;
15963 	si.ssi_severity = SCSI_ERR_RETRYABLE;
15964 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
15965 	    &si, EIO, SD_BSY_TIMEOUT, NULL);
15966 
15967 	return;
15968 
15969 fail_command:
15970 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
15971 	sd_return_failed_command(un, bp, EIO);
15972 }
15973 
15974 
15975 
15976 /*
15977  *    Function: sd_sense_key_medium_or_hardware_error
15978  *
15979  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
15980  *		sense key.
15981  *
15982  *     Context: May be called from interrupt context
15983  */
15984 
15985 static void
15986 sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
15987 	uint8_t *sense_datap,
15988 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
15989 {
15990 	struct sd_sense_info	si;
15991 	uint8_t sense_key = scsi_sense_key(sense_datap);
15992 	uint8_t asc = scsi_sense_asc(sense_datap);
15993 
15994 	ASSERT(un != NULL);
15995 	ASSERT(mutex_owned(SD_MUTEX(un)));
15996 	ASSERT(bp != NULL);
15997 	ASSERT(xp != NULL);
15998 	ASSERT(pktp != NULL);
15999 
16000 	si.ssi_severity = SCSI_ERR_FATAL;
16001 	si.ssi_pfa_flag = FALSE;
16002 
16003 	if (sense_key == KEY_MEDIUM_ERROR) {
16004 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
16005 	}
16006 
16007 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16008 
16009 	if ((un->un_reset_retry_count != 0) &&
16010 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
16011 		mutex_exit(SD_MUTEX(un));
16012 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
16013 		if (un->un_f_allow_bus_device_reset == TRUE) {
16014 
16015 			boolean_t try_resetting_target = B_TRUE;
16016 
16017 			/*
16018 			 * We need to be able to handle specific ASC when we are
16019 			 * handling a KEY_HARDWARE_ERROR. In particular
16020 			 * taking the default action of resetting the target may
16021 			 * not be the appropriate way to attempt recovery.
16022 			 * Resetting a target because of a single LUN failure
16023 			 * victimizes all LUNs on that target.
16024 			 *
16025 			 * This is true for the LSI arrays, if an LSI
16026 			 * array controller returns an ASC of 0x84 (LUN Dead) we
16027 			 * should trust it.
16028 			 */
16029 
16030 			if (sense_key == KEY_HARDWARE_ERROR) {
16031 				switch (asc) {
16032 				case 0x84:
16033 					if (SD_IS_LSI(un)) {
16034 						try_resetting_target = B_FALSE;
16035 					}
16036 					break;
16037 				default:
16038 					break;
16039 				}
16040 			}
16041 
16042 			if (try_resetting_target == B_TRUE) {
16043 				int reset_retval = 0;
16044 				if (un->un_f_lun_reset_enabled == TRUE) {
16045 					SD_TRACE(SD_LOG_IO_CORE, un,
16046 					    "sd_sense_key_medium_or_hardware_"
16047 					    "error: issuing RESET_LUN\n");
16048 					reset_retval =
16049 					    scsi_reset(SD_ADDRESS(un),
16050 					    RESET_LUN);
16051 				}
16052 				if (reset_retval == 0) {
16053 					SD_TRACE(SD_LOG_IO_CORE, un,
16054 					    "sd_sense_key_medium_or_hardware_"
16055 					    "error: issuing RESET_TARGET\n");
16056 					(void) scsi_reset(SD_ADDRESS(un),
16057 					    RESET_TARGET);
16058 				}
16059 			}
16060 		}
16061 		mutex_enter(SD_MUTEX(un));
16062 	}
16063 
16064 	/*
16065 	 * This really ought to be a fatal error, but we will retry anyway
16066 	 * as some drives report this as a spurious error.
16067 	 */
16068 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
16069 	    &si, EIO, (clock_t)0, NULL);
16070 }
16071 
16072 
16073 
16074 /*
16075  *    Function: sd_sense_key_illegal_request
16076  *
16077  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
16078  *
16079  *     Context: May be called from interrupt context
16080  */
16081 
16082 static void
16083 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
16084 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16085 {
16086 	struct sd_sense_info	si;
16087 
16088 	ASSERT(un != NULL);
16089 	ASSERT(mutex_owned(SD_MUTEX(un)));
16090 	ASSERT(bp != NULL);
16091 	ASSERT(xp != NULL);
16092 	ASSERT(pktp != NULL);
16093 
16094 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
16095 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
16096 
16097 	si.ssi_severity = SCSI_ERR_INFO;
16098 	si.ssi_pfa_flag = FALSE;
16099 
16100 	/* Pointless to retry if the target thinks it's an illegal request */
16101 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16102 	sd_return_failed_command(un, bp, EIO);
16103 }
16104 
16105 
16106 
16107 
16108 /*
16109  *    Function: sd_sense_key_unit_attention
16110  *
16111  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
16112  *
16113  *     Context: May be called from interrupt context
16114  */
16115 
16116 static void
16117 sd_sense_key_unit_attention(struct sd_lun *un,
16118 	uint8_t *sense_datap,
16119 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
16120 {
16121 	/*
16122 	 * For UNIT ATTENTION we allow retries for one minute. Devices
16123 	 * like Sonoma can return UNIT ATTENTION close to a minute
16124 	 * under certain conditions.
16125 	 */
16126 	int	retry_check_flag = SD_RETRIES_UA;
16127 	boolean_t	kstat_updated = B_FALSE;
16128 	struct	sd_sense_info		si;
16129 	uint8_t asc = scsi_sense_asc(sense_datap);
16130 
16131 	ASSERT(un != NULL);
16132 	ASSERT(mutex_owned(SD_MUTEX(un)));
16133 	ASSERT(bp != NULL);
16134 	ASSERT(xp != NULL);
16135 	ASSERT(pktp != NULL);
16136 
16137 	si.ssi_severity = SCSI_ERR_INFO;
16138 	si.ssi_pfa_flag = FALSE;
16139 
16140 
16141 	switch (asc) {
16142 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
16143 		if (sd_report_pfa != 0) {
16144 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
16145 			si.ssi_pfa_flag = TRUE;
16146 			retry_check_flag = SD_RETRIES_STANDARD;
16147 			goto do_retry;
16148 		}
16149 
16150 		break;
16151 
16152 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
16153 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
16154 			un->un_resvd_status |=
16155 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
16156 		}
16157 #ifdef _LP64
16158 		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
16159 			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
16160 			    un, KM_NOSLEEP) == 0) {
16161 				/*
16162 				 * If we can't dispatch the task we'll just
16163 				 * live without descriptor sense.  We can
16164 				 * try again on the next "unit attention"
16165 				 */
16166 				SD_ERROR(SD_LOG_ERROR, un,
16167 				    "sd_sense_key_unit_attention: "
16168 				    "Could not dispatch "
16169 				    "sd_reenable_dsense_task\n");
16170 			}
16171 		}
16172 #endif /* _LP64 */
16173 		/* FALLTHRU */
16174 
16175 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
16176 		if (!un->un_f_has_removable_media) {
16177 			break;
16178 		}
16179 
16180 		/*
16181 		 * When we get a unit attention from a removable-media device,
16182 		 * it may be in a state that will take a long time to recover
16183 		 * (e.g., from a reset).  Since we are executing in interrupt
16184 		 * context here, we cannot wait around for the device to come
16185 		 * back. So hand this command off to sd_media_change_task()
16186 		 * for deferred processing under taskq thread context. (Note
16187 		 * that the command still may be failed if a problem is
16188 		 * encountered at a later time.)
16189 		 */
16190 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
16191 		    KM_NOSLEEP) == 0) {
16192 			/*
16193 			 * Cannot dispatch the request so fail the command.
16194 			 */
16195 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
16196 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
16197 			si.ssi_severity = SCSI_ERR_FATAL;
16198 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16199 			sd_return_failed_command(un, bp, EIO);
16200 		}
16201 
16202 		/*
16203 		 * If failed to dispatch sd_media_change_task(), we already
16204 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
16205 		 * we should update kstat later if it encounters an error. So,
16206 		 * we update kstat_updated flag here.
16207 		 */
16208 		kstat_updated = B_TRUE;
16209 
16210 		/*
16211 		 * Either the command has been successfully dispatched to a
16212 		 * task Q for retrying, or the dispatch failed. In either case
16213 		 * do NOT retry again by calling sd_retry_command. This sets up
16214 		 * two retries of the same command and when one completes and
16215 		 * frees the resources the other will access freed memory,
16216 		 * a bad thing.
16217 		 */
16218 		return;
16219 
16220 	default:
16221 		break;
16222 	}
16223 
16224 	/*
16225 	 * Update kstat if we haven't done that.
16226 	 */
16227 	if (!kstat_updated) {
16228 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16229 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
16230 	}
16231 
16232 do_retry:
16233 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
16234 	    EIO, SD_UA_RETRY_DELAY, NULL);
16235 }
16236 
16237 
16238 
16239 /*
16240  *    Function: sd_sense_key_fail_command
16241  *
16242  * Description: Use to fail a command when we don't like the sense key that
16243  *		was returned.
16244  *
16245  *     Context: May be called from interrupt context
16246  */
16247 
16248 static void
16249 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
16250 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16251 {
16252 	struct sd_sense_info	si;
16253 
16254 	ASSERT(un != NULL);
16255 	ASSERT(mutex_owned(SD_MUTEX(un)));
16256 	ASSERT(bp != NULL);
16257 	ASSERT(xp != NULL);
16258 	ASSERT(pktp != NULL);
16259 
16260 	si.ssi_severity = SCSI_ERR_FATAL;
16261 	si.ssi_pfa_flag = FALSE;
16262 
16263 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16264 	sd_return_failed_command(un, bp, EIO);
16265 }
16266 
16267 
16268 
16269 /*
16270  *    Function: sd_sense_key_blank_check
16271  *
16272  * Description: Recovery actions for a SCSI "Blank Check" sense key.
16273  *		Has no monetary connotation.
16274  *
16275  *     Context: May be called from interrupt context
16276  */
16277 
16278 static void
16279 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
16280 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16281 {
16282 	struct sd_sense_info	si;
16283 
16284 	ASSERT(un != NULL);
16285 	ASSERT(mutex_owned(SD_MUTEX(un)));
16286 	ASSERT(bp != NULL);
16287 	ASSERT(xp != NULL);
16288 	ASSERT(pktp != NULL);
16289 
16290 	/*
16291 	 * Blank check is not fatal for removable devices, therefore
16292 	 * it does not require a console message.
16293 	 */
16294 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
16295 	    SCSI_ERR_FATAL;
16296 	si.ssi_pfa_flag = FALSE;
16297 
16298 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16299 	sd_return_failed_command(un, bp, EIO);
16300 }
16301 
16302 
16303 
16304 
16305 /*
16306  *    Function: sd_sense_key_aborted_command
16307  *
16308  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
16309  *
16310  *     Context: May be called from interrupt context
16311  */
16312 
16313 static void
16314 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
16315 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16316 {
16317 	struct sd_sense_info	si;
16318 
16319 	ASSERT(un != NULL);
16320 	ASSERT(mutex_owned(SD_MUTEX(un)));
16321 	ASSERT(bp != NULL);
16322 	ASSERT(xp != NULL);
16323 	ASSERT(pktp != NULL);
16324 
16325 	si.ssi_severity = SCSI_ERR_FATAL;
16326 	si.ssi_pfa_flag = FALSE;
16327 
16328 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16329 
16330 	/*
16331 	 * This really ought to be a fatal error, but we will retry anyway
16332 	 * as some drives report this as a spurious error.
16333 	 */
16334 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
16335 	    &si, EIO, (clock_t)0, NULL);
16336 }
16337 
16338 
16339 
16340 /*
16341  *    Function: sd_sense_key_default
16342  *
16343  * Description: Default recovery action for several SCSI sense keys (basically
16344  *		attempts a retry).
16345  *
16346  *     Context: May be called from interrupt context
16347  */
16348 
16349 static void
16350 sd_sense_key_default(struct sd_lun *un,
16351 	uint8_t *sense_datap,
16352 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
16353 {
16354 	struct sd_sense_info	si;
16355 	uint8_t sense_key = scsi_sense_key(sense_datap);
16356 
16357 	ASSERT(un != NULL);
16358 	ASSERT(mutex_owned(SD_MUTEX(un)));
16359 	ASSERT(bp != NULL);
16360 	ASSERT(xp != NULL);
16361 	ASSERT(pktp != NULL);
16362 
16363 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16364 
16365 	/*
16366 	 * Undecoded sense key.	Attempt retries and hope that will fix
16367 	 * the problem.  Otherwise, we're dead.
16368 	 */
16369 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16370 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16371 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
16372 	}
16373 
16374 	si.ssi_severity = SCSI_ERR_FATAL;
16375 	si.ssi_pfa_flag = FALSE;
16376 
16377 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
16378 	    &si, EIO, (clock_t)0, NULL);
16379 }
16380 
16381 
16382 
16383 /*
16384  *    Function: sd_print_retry_msg
16385  *
16386  * Description: Print a message indicating the retry action being taken.
16387  *
16388  *   Arguments: un - ptr to associated softstate
16389  *		bp - ptr to buf(9S) for the command
16390  *		arg - not used.
16391  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16392  *			or SD_NO_RETRY_ISSUED
16393  *
16394  *     Context: May be called from interrupt context
16395  */
16396 /* ARGSUSED */
16397 static void
16398 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
16399 {
16400 	struct sd_xbuf	*xp;
16401 	struct scsi_pkt *pktp;
16402 	char *reasonp;
16403 	char *msgp;
16404 
16405 	ASSERT(un != NULL);
16406 	ASSERT(mutex_owned(SD_MUTEX(un)));
16407 	ASSERT(bp != NULL);
16408 	pktp = SD_GET_PKTP(bp);
16409 	ASSERT(pktp != NULL);
16410 	xp = SD_GET_XBUF(bp);
16411 	ASSERT(xp != NULL);
16412 
16413 	ASSERT(!mutex_owned(&un->un_pm_mutex));
16414 	mutex_enter(&un->un_pm_mutex);
16415 	if ((un->un_state == SD_STATE_SUSPENDED) ||
16416 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
16417 	    (pktp->pkt_flags & FLAG_SILENT)) {
16418 		mutex_exit(&un->un_pm_mutex);
16419 		goto update_pkt_reason;
16420 	}
16421 	mutex_exit(&un->un_pm_mutex);
16422 
16423 	/*
16424 	 * Suppress messages if they are all the same pkt_reason; with
16425 	 * TQ, many (up to 256) are returned with the same pkt_reason.
16426 	 * If we are in panic, then suppress the retry messages.
16427 	 */
16428 	switch (flag) {
16429 	case SD_NO_RETRY_ISSUED:
16430 		msgp = "giving up";
16431 		break;
16432 	case SD_IMMEDIATE_RETRY_ISSUED:
16433 	case SD_DELAYED_RETRY_ISSUED:
16434 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
16435 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
16436 		    (sd_error_level != SCSI_ERR_ALL))) {
16437 			return;
16438 		}
16439 		msgp = "retrying command";
16440 		break;
16441 	default:
16442 		goto update_pkt_reason;
16443 	}
16444 
16445 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
16446 	    scsi_rname(pktp->pkt_reason));
16447 
16448 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16449 	    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
16450 
16451 update_pkt_reason:
16452 	/*
16453 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
16454 	 * This is to prevent multiple console messages for the same failure
16455 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
16456 	 * when the command is retried successfully because there still may be
16457 	 * more commands coming back with the same value of pktp->pkt_reason.
16458 	 */
16459 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
16460 		un->un_last_pkt_reason = pktp->pkt_reason;
16461 	}
16462 }
16463 
16464 
16465 /*
16466  *    Function: sd_print_cmd_incomplete_msg
16467  *
16468  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
16469  *
16470  *   Arguments: un - ptr to associated softstate
16471  *		bp - ptr to buf(9S) for the command
16472  *		arg - passed to sd_print_retry_msg()
16473  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16474  *			or SD_NO_RETRY_ISSUED
16475  *
16476  *     Context: May be called from interrupt context
16477  */
16478 
16479 static void
16480 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
16481 	int code)
16482 {
16483 	dev_info_t	*dip;
16484 
16485 	ASSERT(un != NULL);
16486 	ASSERT(mutex_owned(SD_MUTEX(un)));
16487 	ASSERT(bp != NULL);
16488 
16489 	switch (code) {
16490 	case SD_NO_RETRY_ISSUED:
16491 		/* Command was failed. Someone turned off this target? */
16492 		if (un->un_state != SD_STATE_OFFLINE) {
16493 			/*
16494 			 * Suppress message if we are detaching and
16495 			 * device has been disconnected
16496 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
16497 			 * private interface and not part of the DDI
16498 			 */
16499 			dip = un->un_sd->sd_dev;
16500 			if (!(DEVI_IS_DETACHING(dip) &&
16501 			    DEVI_IS_DEVICE_REMOVED(dip))) {
16502 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16503 				"disk not responding to selection\n");
16504 			}
16505 			New_state(un, SD_STATE_OFFLINE);
16506 		}
16507 		break;
16508 
16509 	case SD_DELAYED_RETRY_ISSUED:
16510 	case SD_IMMEDIATE_RETRY_ISSUED:
16511 	default:
16512 		/* Command was successfully queued for retry */
16513 		sd_print_retry_msg(un, bp, arg, code);
16514 		break;
16515 	}
16516 }
16517 
16518 
16519 /*
16520  *    Function: sd_pkt_reason_cmd_incomplete
16521  *
16522  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
16523  *
16524  *     Context: May be called from interrupt context
16525  */
16526 
16527 static void
16528 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
16529 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16530 {
16531 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
16532 
16533 	ASSERT(un != NULL);
16534 	ASSERT(mutex_owned(SD_MUTEX(un)));
16535 	ASSERT(bp != NULL);
16536 	ASSERT(xp != NULL);
16537 	ASSERT(pktp != NULL);
16538 
16539 	/* Do not do a reset if selection did not complete */
16540 	/* Note: Should this not just check the bit? */
16541 	if (pktp->pkt_state != STATE_GOT_BUS) {
16542 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
16543 		sd_reset_target(un, pktp);
16544 	}
16545 
16546 	/*
16547 	 * If the target was not successfully selected, then set
16548 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
16549 	 * with the target, and further retries and/or commands are
16550 	 * likely to take a long time.
16551 	 */
16552 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
16553 		flag |= SD_RETRIES_FAILFAST;
16554 	}
16555 
16556 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16557 
16558 	sd_retry_command(un, bp, flag,
16559 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16560 }
16561 
16562 
16563 
16564 /*
16565  *    Function: sd_pkt_reason_cmd_tran_err
16566  *
16567  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
16568  *
16569  *     Context: May be called from interrupt context
16570  */
16571 
16572 static void
16573 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
16574 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16575 {
16576 	ASSERT(un != NULL);
16577 	ASSERT(mutex_owned(SD_MUTEX(un)));
16578 	ASSERT(bp != NULL);
16579 	ASSERT(xp != NULL);
16580 	ASSERT(pktp != NULL);
16581 
16582 	/*
16583 	 * Do not reset if we got a parity error, or if
16584 	 * selection did not complete.
16585 	 */
16586 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16587 	/* Note: Should this not just check the bit for pkt_state? */
16588 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
16589 	    (pktp->pkt_state != STATE_GOT_BUS)) {
16590 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
16591 		sd_reset_target(un, pktp);
16592 	}
16593 
16594 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16595 
16596 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
16597 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16598 }
16599 
16600 
16601 
16602 /*
16603  *    Function: sd_pkt_reason_cmd_reset
16604  *
16605  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
16606  *
16607  *     Context: May be called from interrupt context
16608  */
16609 
16610 static void
16611 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
16612 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16613 {
16614 	ASSERT(un != NULL);
16615 	ASSERT(mutex_owned(SD_MUTEX(un)));
16616 	ASSERT(bp != NULL);
16617 	ASSERT(xp != NULL);
16618 	ASSERT(pktp != NULL);
16619 
16620 	/* The target may still be running the command, so try to reset. */
16621 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16622 	sd_reset_target(un, pktp);
16623 
16624 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16625 
16626 	/*
16627 	 * If pkt_reason is CMD_RESET chances are that this pkt got
16628 	 * reset because another target on this bus caused it. The target
16629 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
16630 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
16631 	 */
16632 
16633 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
16634 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16635 }
16636 
16637 
16638 
16639 
16640 /*
16641  *    Function: sd_pkt_reason_cmd_aborted
16642  *
16643  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
16644  *
16645  *     Context: May be called from interrupt context
16646  */
16647 
16648 static void
16649 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
16650 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16651 {
16652 	ASSERT(un != NULL);
16653 	ASSERT(mutex_owned(SD_MUTEX(un)));
16654 	ASSERT(bp != NULL);
16655 	ASSERT(xp != NULL);
16656 	ASSERT(pktp != NULL);
16657 
16658 	/* The target may still be running the command, so try to reset. */
16659 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16660 	sd_reset_target(un, pktp);
16661 
16662 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16663 
16664 	/*
16665 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
16666 	 * aborted because another target on this bus caused it. The target
16667 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
16668 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
16669 	 */
16670 
16671 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
16672 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16673 }
16674 
16675 
16676 
16677 /*
16678  *    Function: sd_pkt_reason_cmd_timeout
16679  *
16680  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
16681  *
16682  *     Context: May be called from interrupt context
16683  */
16684 
16685 static void
16686 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
16687 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16688 {
16689 	ASSERT(un != NULL);
16690 	ASSERT(mutex_owned(SD_MUTEX(un)));
16691 	ASSERT(bp != NULL);
16692 	ASSERT(xp != NULL);
16693 	ASSERT(pktp != NULL);
16694 
16695 
16696 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16697 	sd_reset_target(un, pktp);
16698 
16699 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16700 
16701 	/*
16702 	 * A command timeout indicates that we could not establish
16703 	 * communication with the target, so set SD_RETRIES_FAILFAST
16704 	 * as further retries/commands are likely to take a long time.
16705 	 */
16706 	sd_retry_command(un, bp,
16707 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
16708 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16709 }
16710 
16711 
16712 
16713 /*
16714  *    Function: sd_pkt_reason_cmd_unx_bus_free
16715  *
16716  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
16717  *
16718  *     Context: May be called from interrupt context
16719  */
16720 
16721 static void
16722 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
16723 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16724 {
16725 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
16726 
16727 	ASSERT(un != NULL);
16728 	ASSERT(mutex_owned(SD_MUTEX(un)));
16729 	ASSERT(bp != NULL);
16730 	ASSERT(xp != NULL);
16731 	ASSERT(pktp != NULL);
16732 
16733 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16734 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16735 
16736 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
16737 	    sd_print_retry_msg : NULL;
16738 
16739 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
16740 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16741 }
16742 
16743 
16744 /*
16745  *    Function: sd_pkt_reason_cmd_tag_reject
16746  *
16747  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
16748  *
16749  *     Context: May be called from interrupt context
16750  */
16751 
16752 static void
16753 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
16754 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16755 {
16756 	ASSERT(un != NULL);
16757 	ASSERT(mutex_owned(SD_MUTEX(un)));
16758 	ASSERT(bp != NULL);
16759 	ASSERT(xp != NULL);
16760 	ASSERT(pktp != NULL);
16761 
16762 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16763 	pktp->pkt_flags = 0;
16764 	un->un_tagflags = 0;
16765 	if (un->un_f_opt_queueing == TRUE) {
16766 		un->un_throttle = min(un->un_throttle, 3);
16767 	} else {
16768 		un->un_throttle = 1;
16769 	}
16770 	mutex_exit(SD_MUTEX(un));
16771 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
16772 	mutex_enter(SD_MUTEX(un));
16773 
16774 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16775 
16776 	/* Legacy behavior not to check retry counts here. */
16777 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
16778 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16779 }
16780 
16781 
16782 /*
16783  *    Function: sd_pkt_reason_default
16784  *
16785  * Description: Default recovery actions for SCSA pkt_reason values that
16786  *		do not have more explicit recovery actions.
16787  *
16788  *     Context: May be called from interrupt context
16789  */
16790 
16791 static void
16792 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
16793 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16794 {
16795 	ASSERT(un != NULL);
16796 	ASSERT(mutex_owned(SD_MUTEX(un)));
16797 	ASSERT(bp != NULL);
16798 	ASSERT(xp != NULL);
16799 	ASSERT(pktp != NULL);
16800 
16801 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16802 	sd_reset_target(un, pktp);
16803 
16804 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16805 
16806 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
16807 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16808 }
16809 
16810 
16811 
16812 /*
16813  *    Function: sd_pkt_status_check_condition
16814  *
16815  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
16816  *
16817  *     Context: May be called from interrupt context
16818  */
16819 
16820 static void
16821 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
16822 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16823 {
16824 	ASSERT(un != NULL);
16825 	ASSERT(mutex_owned(SD_MUTEX(un)));
16826 	ASSERT(bp != NULL);
16827 	ASSERT(xp != NULL);
16828 	ASSERT(pktp != NULL);
16829 
16830 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
16831 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
16832 
16833 	/*
16834 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
16835 	 * command will be retried after the request sense). Otherwise, retry
16836 	 * the command. Note: we are issuing the request sense even though the
16837 	 * retry limit may have been reached for the failed command.
16838 	 */
16839 	if (un->un_f_arq_enabled == FALSE) {
16840 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
16841 		    "no ARQ, sending request sense command\n");
16842 		sd_send_request_sense_command(un, bp, pktp);
16843 	} else {
16844 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
16845 		    "ARQ,retrying request sense command\n");
16846 #if defined(__i386) || defined(__amd64)
16847 		/*
16848 		 * The SD_RETRY_DELAY value need to be adjusted here
16849 		 * when SD_RETRY_DELAY change in sddef.h
16850 		 */
16851 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
16852 			un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
16853 			NULL);
16854 #else
16855 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
16856 		    EIO, SD_RETRY_DELAY, NULL);
16857 #endif
16858 	}
16859 
16860 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
16861 }
16862 
16863 
16864 /*
16865  *    Function: sd_pkt_status_busy
16866  *
16867  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
16868  *
16869  *     Context: May be called from interrupt context
16870  */
16871 
16872 static void
16873 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
16874 	struct scsi_pkt *pktp)
16875 {
16876 	ASSERT(un != NULL);
16877 	ASSERT(mutex_owned(SD_MUTEX(un)));
16878 	ASSERT(bp != NULL);
16879 	ASSERT(xp != NULL);
16880 	ASSERT(pktp != NULL);
16881 
16882 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16883 	    "sd_pkt_status_busy: entry\n");
16884 
16885 	/* If retries are exhausted, just fail the command. */
16886 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
16887 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16888 		    "device busy too long\n");
16889 		sd_return_failed_command(un, bp, EIO);
16890 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16891 		    "sd_pkt_status_busy: exit\n");
16892 		return;
16893 	}
16894 	xp->xb_retry_count++;
16895 
16896 	/*
16897 	 * Try to reset the target. However, we do not want to perform
16898 	 * more than one reset if the device continues to fail. The reset
16899 	 * will be performed when the retry count reaches the reset
16900 	 * threshold.  This threshold should be set such that at least
16901 	 * one retry is issued before the reset is performed.
16902 	 */
16903 	if (xp->xb_retry_count ==
16904 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
16905 		int rval = 0;
16906 		mutex_exit(SD_MUTEX(un));
16907 		if (un->un_f_allow_bus_device_reset == TRUE) {
16908 			/*
16909 			 * First try to reset the LUN; if we cannot then
16910 			 * try to reset the target.
16911 			 */
16912 			if (un->un_f_lun_reset_enabled == TRUE) {
16913 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16914 				    "sd_pkt_status_busy: RESET_LUN\n");
16915 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
16916 			}
16917 			if (rval == 0) {
16918 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16919 				    "sd_pkt_status_busy: RESET_TARGET\n");
16920 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
16921 			}
16922 		}
16923 		if (rval == 0) {
16924 			/*
16925 			 * If the RESET_LUN and/or RESET_TARGET failed,
16926 			 * try RESET_ALL
16927 			 */
16928 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16929 			    "sd_pkt_status_busy: RESET_ALL\n");
16930 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
16931 		}
16932 		mutex_enter(SD_MUTEX(un));
16933 		if (rval == 0) {
16934 			/*
16935 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
16936 			 * At this point we give up & fail the command.
16937 			 */
16938 			sd_return_failed_command(un, bp, EIO);
16939 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16940 			    "sd_pkt_status_busy: exit (failed cmd)\n");
16941 			return;
16942 		}
16943 	}
16944 
16945 	/*
16946 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
16947 	 * we have already checked the retry counts above.
16948 	 */
16949 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
16950 	    EIO, SD_BSY_TIMEOUT, NULL);
16951 
16952 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16953 	    "sd_pkt_status_busy: exit\n");
16954 }
16955 
16956 
16957 /*
16958  *    Function: sd_pkt_status_reservation_conflict
16959  *
16960  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
16961  *		command status.
16962  *
16963  *     Context: May be called from interrupt context
16964  */
16965 
16966 static void
16967 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
16968 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16969 {
16970 	ASSERT(un != NULL);
16971 	ASSERT(mutex_owned(SD_MUTEX(un)));
16972 	ASSERT(bp != NULL);
16973 	ASSERT(xp != NULL);
16974 	ASSERT(pktp != NULL);
16975 
16976 	/*
16977 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
16978 	 * conflict could be due to various reasons like incorrect keys, not
16979 	 * registered or not reserved etc. So, we return EACCES to the caller.
16980 	 */
16981 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
16982 		int cmd = SD_GET_PKT_OPCODE(pktp);
16983 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
16984 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
16985 			sd_return_failed_command(un, bp, EACCES);
16986 			return;
16987 		}
16988 	}
16989 
16990 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
16991 
16992 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
16993 		if (sd_failfast_enable != 0) {
16994 			/* By definition, we must panic here.... */
16995 			sd_panic_for_res_conflict(un);
16996 			/*NOTREACHED*/
16997 		}
16998 		SD_ERROR(SD_LOG_IO, un,
16999 		    "sd_handle_resv_conflict: Disk Reserved\n");
17000 		sd_return_failed_command(un, bp, EACCES);
17001 		return;
17002 	}
17003 
17004 	/*
17005 	 * 1147670: retry only if sd_retry_on_reservation_conflict
17006 	 * property is set (default is 1). Retries will not succeed
17007 	 * on a disk reserved by another initiator. HA systems
17008 	 * may reset this via sd.conf to avoid these retries.
17009 	 *
17010 	 * Note: The legacy return code for this failure is EIO, however EACCES
17011 	 * seems more appropriate for a reservation conflict.
17012 	 */
17013 	if (sd_retry_on_reservation_conflict == 0) {
17014 		SD_ERROR(SD_LOG_IO, un,
17015 		    "sd_handle_resv_conflict: Device Reserved\n");
17016 		sd_return_failed_command(un, bp, EIO);
17017 		return;
17018 	}
17019 
17020 	/*
17021 	 * Retry the command if we can.
17022 	 *
17023 	 * Note: The legacy return code for this failure is EIO, however EACCES
17024 	 * seems more appropriate for a reservation conflict.
17025 	 */
17026 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
17027 	    (clock_t)2, NULL);
17028 }
17029 
17030 
17031 
17032 /*
17033  *    Function: sd_pkt_status_qfull
17034  *
17035  * Description: Handle a QUEUE FULL condition from the target.  This can
17036  *		occur if the HBA does not handle the queue full condition.
17037  *		(Basically this means third-party HBAs as Sun HBAs will
17038  *		handle the queue full condition.)  Note that if there are
17039  *		some commands already in the transport, then the queue full
17040  *		has occurred because the queue for this nexus is actually
17041  *		full. If there are no commands in the transport, then the
17042  *		queue full is resulting from some other initiator or lun
17043  *		consuming all the resources at the target.
17044  *
17045  *     Context: May be called from interrupt context
17046  */
17047 
17048 static void
17049 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
17050 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17051 {
17052 	ASSERT(un != NULL);
17053 	ASSERT(mutex_owned(SD_MUTEX(un)));
17054 	ASSERT(bp != NULL);
17055 	ASSERT(xp != NULL);
17056 	ASSERT(pktp != NULL);
17057 
17058 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17059 	    "sd_pkt_status_qfull: entry\n");
17060 
17061 	/*
17062 	 * Just lower the QFULL throttle and retry the command.  Note that
17063 	 * we do not limit the number of retries here.
17064 	 */
17065 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
17066 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
17067 	    SD_RESTART_TIMEOUT, NULL);
17068 
17069 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17070 	    "sd_pkt_status_qfull: exit\n");
17071 }
17072 
17073 
17074 /*
17075  *    Function: sd_reset_target
17076  *
17077  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
17078  *		RESET_TARGET, or RESET_ALL.
17079  *
17080  *     Context: May be called under interrupt context.
17081  */
17082 
17083 static void
17084 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
17085 {
17086 	int rval = 0;
17087 
17088 	ASSERT(un != NULL);
17089 	ASSERT(mutex_owned(SD_MUTEX(un)));
17090 	ASSERT(pktp != NULL);
17091 
17092 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
17093 
17094 	/*
17095 	 * No need to reset if the transport layer has already done so.
17096 	 */
17097 	if ((pktp->pkt_statistics &
17098 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
17099 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17100 		    "sd_reset_target: no reset\n");
17101 		return;
17102 	}
17103 
17104 	mutex_exit(SD_MUTEX(un));
17105 
17106 	if (un->un_f_allow_bus_device_reset == TRUE) {
17107 		if (un->un_f_lun_reset_enabled == TRUE) {
17108 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17109 			    "sd_reset_target: RESET_LUN\n");
17110 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
17111 		}
17112 		if (rval == 0) {
17113 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17114 			    "sd_reset_target: RESET_TARGET\n");
17115 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
17116 		}
17117 	}
17118 
17119 	if (rval == 0) {
17120 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17121 		    "sd_reset_target: RESET_ALL\n");
17122 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
17123 	}
17124 
17125 	mutex_enter(SD_MUTEX(un));
17126 
17127 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
17128 }
17129 
17130 
17131 /*
17132  *    Function: sd_media_change_task
17133  *
17134  * Description: Recovery action for CDROM to become available.
17135  *
17136  *     Context: Executes in a taskq() thread context
17137  */
17138 
17139 static void
17140 sd_media_change_task(void *arg)
17141 {
17142 	struct	scsi_pkt	*pktp = arg;
17143 	struct	sd_lun		*un;
17144 	struct	buf		*bp;
17145 	struct	sd_xbuf		*xp;
17146 	int	err		= 0;
17147 	int	retry_count	= 0;
17148 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
17149 	struct	sd_sense_info	si;
17150 
17151 	ASSERT(pktp != NULL);
17152 	bp = (struct buf *)pktp->pkt_private;
17153 	ASSERT(bp != NULL);
17154 	xp = SD_GET_XBUF(bp);
17155 	ASSERT(xp != NULL);
17156 	un = SD_GET_UN(bp);
17157 	ASSERT(un != NULL);
17158 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17159 	ASSERT(un->un_f_monitor_media_state);
17160 
17161 	si.ssi_severity = SCSI_ERR_INFO;
17162 	si.ssi_pfa_flag = FALSE;
17163 
17164 	/*
17165 	 * When a reset is issued on a CDROM, it takes a long time to
17166 	 * recover. First few attempts to read capacity and other things
17167 	 * related to handling unit attention fail (with a ASC 0x4 and
17168 	 * ASCQ 0x1). In that case we want to do enough retries and we want
17169 	 * to limit the retries in other cases of genuine failures like
17170 	 * no media in drive.
17171 	 */
17172 	while (retry_count++ < retry_limit) {
17173 		if ((err = sd_handle_mchange(un)) == 0) {
17174 			break;
17175 		}
17176 		if (err == EAGAIN) {
17177 			retry_limit = SD_UNIT_ATTENTION_RETRY;
17178 		}
17179 		/* Sleep for 0.5 sec. & try again */
17180 		delay(drv_usectohz(500000));
17181 	}
17182 
17183 	/*
17184 	 * Dispatch (retry or fail) the original command here,
17185 	 * along with appropriate console messages....
17186 	 *
17187 	 * Must grab the mutex before calling sd_retry_command,
17188 	 * sd_print_sense_msg and sd_return_failed_command.
17189 	 */
17190 	mutex_enter(SD_MUTEX(un));
17191 	if (err != SD_CMD_SUCCESS) {
17192 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17193 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17194 		si.ssi_severity = SCSI_ERR_FATAL;
17195 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17196 		sd_return_failed_command(un, bp, EIO);
17197 	} else {
17198 		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17199 		    &si, EIO, (clock_t)0, NULL);
17200 	}
17201 	mutex_exit(SD_MUTEX(un));
17202 }
17203 
17204 
17205 
17206 /*
17207  *    Function: sd_handle_mchange
17208  *
17209  * Description: Perform geometry validation & other recovery when CDROM
17210  *		has been removed from drive.
17211  *
17212  * Return Code: 0 for success
17213  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
17214  *		sd_send_scsi_READ_CAPACITY()
17215  *
17216  *     Context: Executes in a taskq() thread context
17217  */
17218 
17219 static int
17220 sd_handle_mchange(struct sd_lun *un)
17221 {
17222 	uint64_t	capacity;
17223 	uint32_t	lbasize;
17224 	int		rval;
17225 
17226 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17227 	ASSERT(un->un_f_monitor_media_state);
17228 
17229 	if ((rval = sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
17230 	    SD_PATH_DIRECT_PRIORITY)) != 0) {
17231 		return (rval);
17232 	}
17233 
17234 	mutex_enter(SD_MUTEX(un));
17235 	sd_update_block_info(un, lbasize, capacity);
17236 
17237 	if (un->un_errstats != NULL) {
17238 		struct	sd_errstats *stp =
17239 		    (struct sd_errstats *)un->un_errstats->ks_data;
17240 		stp->sd_capacity.value.ui64 = (uint64_t)
17241 		    ((uint64_t)un->un_blockcount *
17242 		    (uint64_t)un->un_tgt_blocksize);
17243 	}
17244 
17245 
17246 	/*
17247 	 * Check if the media in the device is writable or not
17248 	 */
17249 	if (ISCD(un))
17250 		sd_check_for_writable_cd(un, SD_PATH_DIRECT_PRIORITY);
17251 
17252 	/*
17253 	 * Note: Maybe let the strategy/partitioning chain worry about getting
17254 	 * valid geometry.
17255 	 */
17256 	mutex_exit(SD_MUTEX(un));
17257 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
17258 
17259 
17260 	if (cmlb_validate(un->un_cmlbhandle, 0,
17261 	    (void *)SD_PATH_DIRECT_PRIORITY) != 0) {
17262 		return (EIO);
17263 	} else {
17264 		if (un->un_f_pkstats_enabled) {
17265 			sd_set_pstats(un);
17266 			SD_TRACE(SD_LOG_IO_PARTITION, un,
17267 			    "sd_handle_mchange: un:0x%p pstats created and "
17268 			    "set\n", un);
17269 		}
17270 	}
17271 
17272 
17273 	/*
17274 	 * Try to lock the door
17275 	 */
17276 	return (sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
17277 	    SD_PATH_DIRECT_PRIORITY));
17278 }
17279 
17280 
17281 /*
17282  *    Function: sd_send_scsi_DOORLOCK
17283  *
17284  * Description: Issue the scsi DOOR LOCK command
17285  *
17286  *   Arguments: un    - pointer to driver soft state (unit) structure for
17287  *			this target.
17288  *		flag  - SD_REMOVAL_ALLOW
17289  *			SD_REMOVAL_PREVENT
17290  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17291  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17292  *			to use the USCSI "direct" chain and bypass the normal
17293  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
17294  *			command is issued as part of an error recovery action.
17295  *
17296  * Return Code: 0   - Success
17297  *		errno return code from sd_send_scsi_cmd()
17298  *
17299  *     Context: Can sleep.
17300  */
17301 
17302 static int
17303 sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag)
17304 {
17305 	union scsi_cdb		cdb;
17306 	struct uscsi_cmd	ucmd_buf;
17307 	struct scsi_extended_sense	sense_buf;
17308 	int			status;
17309 
17310 	ASSERT(un != NULL);
17311 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17312 
17313 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
17314 
17315 	/* already determined doorlock is not supported, fake success */
17316 	if (un->un_f_doorlock_supported == FALSE) {
17317 		return (0);
17318 	}
17319 
17320 	/*
17321 	 * If we are ejecting and see an SD_REMOVAL_PREVENT
17322 	 * ignore the command so we can complete the eject
17323 	 * operation.
17324 	 */
17325 	if (flag == SD_REMOVAL_PREVENT) {
17326 		mutex_enter(SD_MUTEX(un));
17327 		if (un->un_f_ejecting == TRUE) {
17328 			mutex_exit(SD_MUTEX(un));
17329 			return (EAGAIN);
17330 		}
17331 		mutex_exit(SD_MUTEX(un));
17332 	}
17333 
17334 	bzero(&cdb, sizeof (cdb));
17335 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17336 
17337 	cdb.scc_cmd = SCMD_DOORLOCK;
17338 	cdb.cdb_opaque[4] = (uchar_t)flag;
17339 
17340 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17341 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
17342 	ucmd_buf.uscsi_bufaddr	= NULL;
17343 	ucmd_buf.uscsi_buflen	= 0;
17344 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17345 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
17346 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
17347 	ucmd_buf.uscsi_timeout	= 15;
17348 
17349 	SD_TRACE(SD_LOG_IO, un,
17350 	    "sd_send_scsi_DOORLOCK: returning sd_send_scsi_cmd()\n");
17351 
17352 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17353 	    UIO_SYSSPACE, path_flag);
17354 
17355 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
17356 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
17357 	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
17358 		/* fake success and skip subsequent doorlock commands */
17359 		un->un_f_doorlock_supported = FALSE;
17360 		return (0);
17361 	}
17362 
17363 	return (status);
17364 }
17365 
17366 /*
17367  *    Function: sd_send_scsi_READ_CAPACITY
17368  *
17369  * Description: This routine uses the scsi READ CAPACITY command to determine
17370  *		the device capacity in number of blocks and the device native
17371  *		block size. If this function returns a failure, then the
17372  *		values in *capp and *lbap are undefined.  If the capacity
17373  *		returned is 0xffffffff then the lun is too large for a
17374  *		normal READ CAPACITY command and the results of a
17375  *		READ CAPACITY 16 will be used instead.
17376  *
17377  *   Arguments: un   - ptr to soft state struct for the target
17378  *		capp - ptr to unsigned 64-bit variable to receive the
17379  *			capacity value from the command.
17380  *		lbap - ptr to unsigned 32-bit varaible to receive the
17381  *			block size value from the command
17382  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17383  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17384  *			to use the USCSI "direct" chain and bypass the normal
17385  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
17386  *			command is issued as part of an error recovery action.
17387  *
17388  * Return Code: 0   - Success
17389  *		EIO - IO error
17390  *		EACCES - Reservation conflict detected
17391  *		EAGAIN - Device is becoming ready
17392  *		errno return code from sd_send_scsi_cmd()
17393  *
17394  *     Context: Can sleep.  Blocks until command completes.
17395  */
17396 
17397 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
17398 
17399 static int
17400 sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp, uint32_t *lbap,
17401 	int path_flag)
17402 {
17403 	struct	scsi_extended_sense	sense_buf;
17404 	struct	uscsi_cmd	ucmd_buf;
17405 	union	scsi_cdb	cdb;
17406 	uint32_t		*capacity_buf;
17407 	uint64_t		capacity;
17408 	uint32_t		lbasize;
17409 	int			status;
17410 
17411 	ASSERT(un != NULL);
17412 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17413 	ASSERT(capp != NULL);
17414 	ASSERT(lbap != NULL);
17415 
17416 	SD_TRACE(SD_LOG_IO, un,
17417 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
17418 
17419 	/*
17420 	 * First send a READ_CAPACITY command to the target.
17421 	 * (This command is mandatory under SCSI-2.)
17422 	 *
17423 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
17424 	 * Medium Indicator bit is cleared.  The address field must be
17425 	 * zero if the PMI bit is zero.
17426 	 */
17427 	bzero(&cdb, sizeof (cdb));
17428 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17429 
17430 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
17431 
17432 	cdb.scc_cmd = SCMD_READ_CAPACITY;
17433 
17434 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17435 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
17436 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
17437 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
17438 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17439 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
17440 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
17441 	ucmd_buf.uscsi_timeout	= 60;
17442 
17443 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17444 	    UIO_SYSSPACE, path_flag);
17445 
17446 	switch (status) {
17447 	case 0:
17448 		/* Return failure if we did not get valid capacity data. */
17449 		if (ucmd_buf.uscsi_resid != 0) {
17450 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17451 			return (EIO);
17452 		}
17453 
17454 		/*
17455 		 * Read capacity and block size from the READ CAPACITY 10 data.
17456 		 * This data may be adjusted later due to device specific
17457 		 * issues.
17458 		 *
17459 		 * According to the SCSI spec, the READ CAPACITY 10
17460 		 * command returns the following:
17461 		 *
17462 		 *  bytes 0-3: Maximum logical block address available.
17463 		 *		(MSB in byte:0 & LSB in byte:3)
17464 		 *
17465 		 *  bytes 4-7: Block length in bytes
17466 		 *		(MSB in byte:4 & LSB in byte:7)
17467 		 *
17468 		 */
17469 		capacity = BE_32(capacity_buf[0]);
17470 		lbasize = BE_32(capacity_buf[1]);
17471 
17472 		/*
17473 		 * Done with capacity_buf
17474 		 */
17475 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17476 
17477 		/*
17478 		 * if the reported capacity is set to all 0xf's, then
17479 		 * this disk is too large and requires SBC-2 commands.
17480 		 * Reissue the request using READ CAPACITY 16.
17481 		 */
17482 		if (capacity == 0xffffffff) {
17483 			status = sd_send_scsi_READ_CAPACITY_16(un, &capacity,
17484 			    &lbasize, path_flag);
17485 			if (status != 0) {
17486 				return (status);
17487 			}
17488 		}
17489 		break;	/* Success! */
17490 	case EIO:
17491 		switch (ucmd_buf.uscsi_status) {
17492 		case STATUS_RESERVATION_CONFLICT:
17493 			status = EACCES;
17494 			break;
17495 		case STATUS_CHECK:
17496 			/*
17497 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
17498 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
17499 			 */
17500 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
17501 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
17502 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
17503 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17504 				return (EAGAIN);
17505 			}
17506 			break;
17507 		default:
17508 			break;
17509 		}
17510 		/* FALLTHRU */
17511 	default:
17512 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17513 		return (status);
17514 	}
17515 
17516 	/*
17517 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
17518 	 * (2352 and 0 are common) so for these devices always force the value
17519 	 * to 2048 as required by the ATAPI specs.
17520 	 */
17521 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
17522 		lbasize = 2048;
17523 	}
17524 
17525 	/*
17526 	 * Get the maximum LBA value from the READ CAPACITY data.
17527 	 * Here we assume that the Partial Medium Indicator (PMI) bit
17528 	 * was cleared when issuing the command. This means that the LBA
17529 	 * returned from the device is the LBA of the last logical block
17530 	 * on the logical unit.  The actual logical block count will be
17531 	 * this value plus one.
17532 	 *
17533 	 * Currently the capacity is saved in terms of un->un_sys_blocksize,
17534 	 * so scale the capacity value to reflect this.
17535 	 */
17536 	capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
17537 
17538 	/*
17539 	 * Copy the values from the READ CAPACITY command into the space
17540 	 * provided by the caller.
17541 	 */
17542 	*capp = capacity;
17543 	*lbap = lbasize;
17544 
17545 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
17546 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
17547 
17548 	/*
17549 	 * Both the lbasize and capacity from the device must be nonzero,
17550 	 * otherwise we assume that the values are not valid and return
17551 	 * failure to the caller. (4203735)
17552 	 */
17553 	if ((capacity == 0) || (lbasize == 0)) {
17554 		return (EIO);
17555 	}
17556 
17557 	return (0);
17558 }
17559 
17560 /*
17561  *    Function: sd_send_scsi_READ_CAPACITY_16
17562  *
17563  * Description: This routine uses the scsi READ CAPACITY 16 command to
17564  *		determine the device capacity in number of blocks and the
17565  *		device native block size.  If this function returns a failure,
17566  *		then the values in *capp and *lbap are undefined.
17567  *		This routine should always be called by
17568  *		sd_send_scsi_READ_CAPACITY which will appy any device
17569  *		specific adjustments to capacity and lbasize.
17570  *
17571  *   Arguments: un   - ptr to soft state struct for the target
17572  *		capp - ptr to unsigned 64-bit variable to receive the
17573  *			capacity value from the command.
17574  *		lbap - ptr to unsigned 32-bit varaible to receive the
17575  *			block size value from the command
17576  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17577  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17578  *			to use the USCSI "direct" chain and bypass the normal
17579  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
17580  *			this command is issued as part of an error recovery
17581  *			action.
17582  *
17583  * Return Code: 0   - Success
17584  *		EIO - IO error
17585  *		EACCES - Reservation conflict detected
17586  *		EAGAIN - Device is becoming ready
17587  *		errno return code from sd_send_scsi_cmd()
17588  *
17589  *     Context: Can sleep.  Blocks until command completes.
17590  */
17591 
17592 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
17593 
17594 static int
17595 sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
17596 	uint32_t *lbap, int path_flag)
17597 {
17598 	struct	scsi_extended_sense	sense_buf;
17599 	struct	uscsi_cmd	ucmd_buf;
17600 	union	scsi_cdb	cdb;
17601 	uint64_t		*capacity16_buf;
17602 	uint64_t		capacity;
17603 	uint32_t		lbasize;
17604 	int			status;
17605 
17606 	ASSERT(un != NULL);
17607 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17608 	ASSERT(capp != NULL);
17609 	ASSERT(lbap != NULL);
17610 
17611 	SD_TRACE(SD_LOG_IO, un,
17612 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
17613 
17614 	/*
17615 	 * First send a READ_CAPACITY_16 command to the target.
17616 	 *
17617 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
17618 	 * Medium Indicator bit is cleared.  The address field must be
17619 	 * zero if the PMI bit is zero.
17620 	 */
17621 	bzero(&cdb, sizeof (cdb));
17622 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17623 
17624 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
17625 
17626 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17627 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
17628 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
17629 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
17630 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17631 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
17632 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
17633 	ucmd_buf.uscsi_timeout	= 60;
17634 
17635 	/*
17636 	 * Read Capacity (16) is a Service Action In command.  One
17637 	 * command byte (0x9E) is overloaded for multiple operations,
17638 	 * with the second CDB byte specifying the desired operation
17639 	 */
17640 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
17641 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
17642 
17643 	/*
17644 	 * Fill in allocation length field
17645 	 */
17646 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
17647 
17648 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17649 	    UIO_SYSSPACE, path_flag);
17650 
17651 	switch (status) {
17652 	case 0:
17653 		/* Return failure if we did not get valid capacity data. */
17654 		if (ucmd_buf.uscsi_resid > 20) {
17655 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17656 			return (EIO);
17657 		}
17658 
17659 		/*
17660 		 * Read capacity and block size from the READ CAPACITY 10 data.
17661 		 * This data may be adjusted later due to device specific
17662 		 * issues.
17663 		 *
17664 		 * According to the SCSI spec, the READ CAPACITY 10
17665 		 * command returns the following:
17666 		 *
17667 		 *  bytes 0-7: Maximum logical block address available.
17668 		 *		(MSB in byte:0 & LSB in byte:7)
17669 		 *
17670 		 *  bytes 8-11: Block length in bytes
17671 		 *		(MSB in byte:8 & LSB in byte:11)
17672 		 *
17673 		 */
17674 		capacity = BE_64(capacity16_buf[0]);
17675 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
17676 
17677 		/*
17678 		 * Done with capacity16_buf
17679 		 */
17680 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17681 
17682 		/*
17683 		 * if the reported capacity is set to all 0xf's, then
17684 		 * this disk is too large.  This could only happen with
17685 		 * a device that supports LBAs larger than 64 bits which
17686 		 * are not defined by any current T10 standards.
17687 		 */
17688 		if (capacity == 0xffffffffffffffff) {
17689 			return (EIO);
17690 		}
17691 		break;	/* Success! */
17692 	case EIO:
17693 		switch (ucmd_buf.uscsi_status) {
17694 		case STATUS_RESERVATION_CONFLICT:
17695 			status = EACCES;
17696 			break;
17697 		case STATUS_CHECK:
17698 			/*
17699 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
17700 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
17701 			 */
17702 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
17703 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
17704 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
17705 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17706 				return (EAGAIN);
17707 			}
17708 			break;
17709 		default:
17710 			break;
17711 		}
17712 		/* FALLTHRU */
17713 	default:
17714 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17715 		return (status);
17716 	}
17717 
17718 	*capp = capacity;
17719 	*lbap = lbasize;
17720 
17721 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
17722 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
17723 
17724 	return (0);
17725 }
17726 
17727 
17728 /*
17729  *    Function: sd_send_scsi_START_STOP_UNIT
17730  *
17731  * Description: Issue a scsi START STOP UNIT command to the target.
17732  *
17733  *   Arguments: un    - pointer to driver soft state (unit) structure for
17734  *			this target.
17735  *		flag  - SD_TARGET_START
17736  *			SD_TARGET_STOP
17737  *			SD_TARGET_EJECT
17738  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17739  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17740  *			to use the USCSI "direct" chain and bypass the normal
17741  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
17742  *			command is issued as part of an error recovery action.
17743  *
17744  * Return Code: 0   - Success
17745  *		EIO - IO error
17746  *		EACCES - Reservation conflict detected
17747  *		ENXIO  - Not Ready, medium not present
17748  *		errno return code from sd_send_scsi_cmd()
17749  *
17750  *     Context: Can sleep.
17751  */
17752 
17753 static int
17754 sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag, int path_flag)
17755 {
17756 	struct	scsi_extended_sense	sense_buf;
17757 	union scsi_cdb		cdb;
17758 	struct uscsi_cmd	ucmd_buf;
17759 	int			status;
17760 
17761 	ASSERT(un != NULL);
17762 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17763 
17764 	SD_TRACE(SD_LOG_IO, un,
17765 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
17766 
17767 	if (un->un_f_check_start_stop &&
17768 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
17769 	    (un->un_f_start_stop_supported != TRUE)) {
17770 		return (0);
17771 	}
17772 
17773 	/*
17774 	 * If we are performing an eject operation and
17775 	 * we receive any command other than SD_TARGET_EJECT
17776 	 * we should immediately return.
17777 	 */
17778 	if (flag != SD_TARGET_EJECT) {
17779 		mutex_enter(SD_MUTEX(un));
17780 		if (un->un_f_ejecting == TRUE) {
17781 			mutex_exit(SD_MUTEX(un));
17782 			return (EAGAIN);
17783 		}
17784 		mutex_exit(SD_MUTEX(un));
17785 	}
17786 
17787 	bzero(&cdb, sizeof (cdb));
17788 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17789 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
17790 
17791 	cdb.scc_cmd = SCMD_START_STOP;
17792 	cdb.cdb_opaque[4] = (uchar_t)flag;
17793 
17794 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17795 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
17796 	ucmd_buf.uscsi_bufaddr	= NULL;
17797 	ucmd_buf.uscsi_buflen	= 0;
17798 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17799 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
17800 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
17801 	ucmd_buf.uscsi_timeout	= 200;
17802 
17803 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17804 	    UIO_SYSSPACE, path_flag);
17805 
17806 	switch (status) {
17807 	case 0:
17808 		break;	/* Success! */
17809 	case EIO:
17810 		switch (ucmd_buf.uscsi_status) {
17811 		case STATUS_RESERVATION_CONFLICT:
17812 			status = EACCES;
17813 			break;
17814 		case STATUS_CHECK:
17815 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
17816 				switch (scsi_sense_key(
17817 						(uint8_t *)&sense_buf)) {
17818 				case KEY_ILLEGAL_REQUEST:
17819 					status = ENOTSUP;
17820 					break;
17821 				case KEY_NOT_READY:
17822 					if (scsi_sense_asc(
17823 						    (uint8_t *)&sense_buf)
17824 					    == 0x3A) {
17825 						status = ENXIO;
17826 					}
17827 					break;
17828 				default:
17829 					break;
17830 				}
17831 			}
17832 			break;
17833 		default:
17834 			break;
17835 		}
17836 		break;
17837 	default:
17838 		break;
17839 	}
17840 
17841 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
17842 
17843 	return (status);
17844 }
17845 
17846 
17847 /*
17848  *    Function: sd_start_stop_unit_callback
17849  *
17850  * Description: timeout(9F) callback to begin recovery process for a
17851  *		device that has spun down.
17852  *
17853  *   Arguments: arg - pointer to associated softstate struct.
17854  *
17855  *     Context: Executes in a timeout(9F) thread context
17856  */
17857 
17858 static void
17859 sd_start_stop_unit_callback(void *arg)
17860 {
17861 	struct sd_lun	*un = arg;
17862 	ASSERT(un != NULL);
17863 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17864 
17865 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
17866 
17867 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
17868 }
17869 
17870 
17871 /*
17872  *    Function: sd_start_stop_unit_task
17873  *
17874  * Description: Recovery procedure when a drive is spun down.
17875  *
17876  *   Arguments: arg - pointer to associated softstate struct.
17877  *
17878  *     Context: Executes in a taskq() thread context
17879  */
17880 
17881 static void
17882 sd_start_stop_unit_task(void *arg)
17883 {
17884 	struct sd_lun	*un = arg;
17885 
17886 	ASSERT(un != NULL);
17887 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17888 
17889 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
17890 
17891 	/*
17892 	 * Some unformatted drives report not ready error, no need to
17893 	 * restart if format has been initiated.
17894 	 */
17895 	mutex_enter(SD_MUTEX(un));
17896 	if (un->un_f_format_in_progress == TRUE) {
17897 		mutex_exit(SD_MUTEX(un));
17898 		return;
17899 	}
17900 	mutex_exit(SD_MUTEX(un));
17901 
17902 	/*
17903 	 * When a START STOP command is issued from here, it is part of a
17904 	 * failure recovery operation and must be issued before any other
17905 	 * commands, including any pending retries. Thus it must be sent
17906 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
17907 	 * succeeds or not, we will start I/O after the attempt.
17908 	 */
17909 	(void) sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
17910 	    SD_PATH_DIRECT_PRIORITY);
17911 
17912 	/*
17913 	 * The above call blocks until the START_STOP_UNIT command completes.
17914 	 * Now that it has completed, we must re-try the original IO that
17915 	 * received the NOT READY condition in the first place. There are
17916 	 * three possible conditions here:
17917 	 *
17918 	 *  (1) The original IO is on un_retry_bp.
17919 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
17920 	 *	is NULL.
17921 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
17922 	 *	points to some other, unrelated bp.
17923 	 *
17924 	 * For each case, we must call sd_start_cmds() with un_retry_bp
17925 	 * as the argument. If un_retry_bp is NULL, this will initiate
17926 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
17927 	 * then this will process the bp on un_retry_bp. That may or may not
17928 	 * be the original IO, but that does not matter: the important thing
17929 	 * is to keep the IO processing going at this point.
17930 	 *
17931 	 * Note: This is a very specific error recovery sequence associated
17932 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
17933 	 * serialize the I/O with completion of the spin-up.
17934 	 */
17935 	mutex_enter(SD_MUTEX(un));
17936 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17937 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
17938 	    un, un->un_retry_bp);
17939 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
17940 	sd_start_cmds(un, un->un_retry_bp);
17941 	mutex_exit(SD_MUTEX(un));
17942 
17943 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
17944 }
17945 
17946 
17947 /*
17948  *    Function: sd_send_scsi_INQUIRY
17949  *
17950  * Description: Issue the scsi INQUIRY command.
17951  *
17952  *   Arguments: un
17953  *		bufaddr
17954  *		buflen
17955  *		evpd
17956  *		page_code
17957  *		page_length
17958  *
17959  * Return Code: 0   - Success
17960  *		errno return code from sd_send_scsi_cmd()
17961  *
17962  *     Context: Can sleep. Does not return until command is completed.
17963  */
17964 
17965 static int
17966 sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr, size_t buflen,
17967 	uchar_t evpd, uchar_t page_code, size_t *residp)
17968 {
17969 	union scsi_cdb		cdb;
17970 	struct uscsi_cmd	ucmd_buf;
17971 	int			status;
17972 
17973 	ASSERT(un != NULL);
17974 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17975 	ASSERT(bufaddr != NULL);
17976 
17977 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
17978 
17979 	bzero(&cdb, sizeof (cdb));
17980 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17981 	bzero(bufaddr, buflen);
17982 
17983 	cdb.scc_cmd = SCMD_INQUIRY;
17984 	cdb.cdb_opaque[1] = evpd;
17985 	cdb.cdb_opaque[2] = page_code;
17986 	FORMG0COUNT(&cdb, buflen);
17987 
17988 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17989 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
17990 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
17991 	ucmd_buf.uscsi_buflen	= buflen;
17992 	ucmd_buf.uscsi_rqbuf	= NULL;
17993 	ucmd_buf.uscsi_rqlen	= 0;
17994 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
17995 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
17996 
17997 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17998 	    UIO_SYSSPACE, SD_PATH_DIRECT);
17999 
18000 	if ((status == 0) && (residp != NULL)) {
18001 		*residp = ucmd_buf.uscsi_resid;
18002 	}
18003 
18004 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
18005 
18006 	return (status);
18007 }
18008 
18009 
18010 /*
18011  *    Function: sd_send_scsi_TEST_UNIT_READY
18012  *
18013  * Description: Issue the scsi TEST UNIT READY command.
18014  *		This routine can be told to set the flag USCSI_DIAGNOSE to
18015  *		prevent retrying failed commands. Use this when the intent
18016  *		is either to check for device readiness, to clear a Unit
18017  *		Attention, or to clear any outstanding sense data.
18018  *		However under specific conditions the expected behavior
18019  *		is for retries to bring a device ready, so use the flag
18020  *		with caution.
18021  *
18022  *   Arguments: un
18023  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
18024  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
18025  *			0: dont check for media present, do retries on cmd.
18026  *
18027  * Return Code: 0   - Success
18028  *		EIO - IO error
18029  *		EACCES - Reservation conflict detected
18030  *		ENXIO  - Not Ready, medium not present
18031  *		errno return code from sd_send_scsi_cmd()
18032  *
18033  *     Context: Can sleep. Does not return until command is completed.
18034  */
18035 
18036 static int
18037 sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag)
18038 {
18039 	struct	scsi_extended_sense	sense_buf;
18040 	union scsi_cdb		cdb;
18041 	struct uscsi_cmd	ucmd_buf;
18042 	int			status;
18043 
18044 	ASSERT(un != NULL);
18045 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18046 
18047 	SD_TRACE(SD_LOG_IO, un,
18048 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
18049 
18050 	/*
18051 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
18052 	 * timeouts when they receive a TUR and the queue is not empty. Check
18053 	 * the configuration flag set during attach (indicating the drive has
18054 	 * this firmware bug) and un_ncmds_in_transport before issuing the
18055 	 * TUR. If there are
18056 	 * pending commands return success, this is a bit arbitrary but is ok
18057 	 * for non-removables (i.e. the eliteI disks) and non-clustering
18058 	 * configurations.
18059 	 */
18060 	if (un->un_f_cfg_tur_check == TRUE) {
18061 		mutex_enter(SD_MUTEX(un));
18062 		if (un->un_ncmds_in_transport != 0) {
18063 			mutex_exit(SD_MUTEX(un));
18064 			return (0);
18065 		}
18066 		mutex_exit(SD_MUTEX(un));
18067 	}
18068 
18069 	bzero(&cdb, sizeof (cdb));
18070 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18071 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18072 
18073 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
18074 
18075 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18076 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
18077 	ucmd_buf.uscsi_bufaddr	= NULL;
18078 	ucmd_buf.uscsi_buflen	= 0;
18079 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18080 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18081 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
18082 
18083 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
18084 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
18085 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
18086 	}
18087 	ucmd_buf.uscsi_timeout	= 60;
18088 
18089 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18090 	    UIO_SYSSPACE, ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT :
18091 	    SD_PATH_STANDARD));
18092 
18093 	switch (status) {
18094 	case 0:
18095 		break;	/* Success! */
18096 	case EIO:
18097 		switch (ucmd_buf.uscsi_status) {
18098 		case STATUS_RESERVATION_CONFLICT:
18099 			status = EACCES;
18100 			break;
18101 		case STATUS_CHECK:
18102 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
18103 				break;
18104 			}
18105 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18106 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
18107 				KEY_NOT_READY) &&
18108 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
18109 				status = ENXIO;
18110 			}
18111 			break;
18112 		default:
18113 			break;
18114 		}
18115 		break;
18116 	default:
18117 		break;
18118 	}
18119 
18120 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
18121 
18122 	return (status);
18123 }
18124 
18125 
18126 /*
18127  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
18128  *
18129  * Description: Issue the scsi PERSISTENT RESERVE IN command.
18130  *
18131  *   Arguments: un
18132  *
18133  * Return Code: 0   - Success
18134  *		EACCES
18135  *		ENOTSUP
18136  *		errno return code from sd_send_scsi_cmd()
18137  *
18138  *     Context: Can sleep. Does not return until command is completed.
18139  */
18140 
18141 static int
18142 sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un, uchar_t  usr_cmd,
18143 	uint16_t data_len, uchar_t *data_bufp)
18144 {
18145 	struct scsi_extended_sense	sense_buf;
18146 	union scsi_cdb		cdb;
18147 	struct uscsi_cmd	ucmd_buf;
18148 	int			status;
18149 	int			no_caller_buf = FALSE;
18150 
18151 	ASSERT(un != NULL);
18152 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18153 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
18154 
18155 	SD_TRACE(SD_LOG_IO, un,
18156 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
18157 
18158 	bzero(&cdb, sizeof (cdb));
18159 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18160 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18161 	if (data_bufp == NULL) {
18162 		/* Allocate a default buf if the caller did not give one */
18163 		ASSERT(data_len == 0);
18164 		data_len  = MHIOC_RESV_KEY_SIZE;
18165 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
18166 		no_caller_buf = TRUE;
18167 	}
18168 
18169 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
18170 	cdb.cdb_opaque[1] = usr_cmd;
18171 	FORMG1COUNT(&cdb, data_len);
18172 
18173 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18174 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
18175 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
18176 	ucmd_buf.uscsi_buflen	= data_len;
18177 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18178 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18179 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
18180 	ucmd_buf.uscsi_timeout	= 60;
18181 
18182 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18183 	    UIO_SYSSPACE, SD_PATH_STANDARD);
18184 
18185 	switch (status) {
18186 	case 0:
18187 		break;	/* Success! */
18188 	case EIO:
18189 		switch (ucmd_buf.uscsi_status) {
18190 		case STATUS_RESERVATION_CONFLICT:
18191 			status = EACCES;
18192 			break;
18193 		case STATUS_CHECK:
18194 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18195 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
18196 				KEY_ILLEGAL_REQUEST)) {
18197 				status = ENOTSUP;
18198 			}
18199 			break;
18200 		default:
18201 			break;
18202 		}
18203 		break;
18204 	default:
18205 		break;
18206 	}
18207 
18208 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
18209 
18210 	if (no_caller_buf == TRUE) {
18211 		kmem_free(data_bufp, data_len);
18212 	}
18213 
18214 	return (status);
18215 }
18216 
18217 
18218 /*
18219  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
18220  *
18221  * Description: This routine is the driver entry point for handling CD-ROM
18222  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
18223  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
18224  *		device.
18225  *
18226  *   Arguments: un  -   Pointer to soft state struct for the target.
18227  *		usr_cmd SCSI-3 reservation facility command (one of
18228  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
18229  *			SD_SCSI3_PREEMPTANDABORT)
18230  *		usr_bufp - user provided pointer register, reserve descriptor or
18231  *			preempt and abort structure (mhioc_register_t,
18232  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
18233  *
18234  * Return Code: 0   - Success
18235  *		EACCES
18236  *		ENOTSUP
18237  *		errno return code from sd_send_scsi_cmd()
18238  *
18239  *     Context: Can sleep. Does not return until command is completed.
18240  */
18241 
18242 static int
18243 sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un, uchar_t usr_cmd,
18244 	uchar_t	*usr_bufp)
18245 {
18246 	struct scsi_extended_sense	sense_buf;
18247 	union scsi_cdb		cdb;
18248 	struct uscsi_cmd	ucmd_buf;
18249 	int			status;
18250 	uchar_t			data_len = sizeof (sd_prout_t);
18251 	sd_prout_t		*prp;
18252 
18253 	ASSERT(un != NULL);
18254 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18255 	ASSERT(data_len == 24);	/* required by scsi spec */
18256 
18257 	SD_TRACE(SD_LOG_IO, un,
18258 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
18259 
18260 	if (usr_bufp == NULL) {
18261 		return (EINVAL);
18262 	}
18263 
18264 	bzero(&cdb, sizeof (cdb));
18265 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18266 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18267 	prp = kmem_zalloc(data_len, KM_SLEEP);
18268 
18269 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
18270 	cdb.cdb_opaque[1] = usr_cmd;
18271 	FORMG1COUNT(&cdb, data_len);
18272 
18273 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18274 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
18275 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
18276 	ucmd_buf.uscsi_buflen	= data_len;
18277 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18278 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18279 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
18280 	ucmd_buf.uscsi_timeout	= 60;
18281 
18282 	switch (usr_cmd) {
18283 	case SD_SCSI3_REGISTER: {
18284 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
18285 
18286 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
18287 		bcopy(ptr->newkey.key, prp->service_key,
18288 		    MHIOC_RESV_KEY_SIZE);
18289 		prp->aptpl = ptr->aptpl;
18290 		break;
18291 	}
18292 	case SD_SCSI3_RESERVE:
18293 	case SD_SCSI3_RELEASE: {
18294 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
18295 
18296 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
18297 		prp->scope_address = BE_32(ptr->scope_specific_addr);
18298 		cdb.cdb_opaque[2] = ptr->type;
18299 		break;
18300 	}
18301 	case SD_SCSI3_PREEMPTANDABORT: {
18302 		mhioc_preemptandabort_t *ptr =
18303 		    (mhioc_preemptandabort_t *)usr_bufp;
18304 
18305 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
18306 		bcopy(ptr->victim_key.key, prp->service_key,
18307 		    MHIOC_RESV_KEY_SIZE);
18308 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
18309 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
18310 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
18311 		break;
18312 	}
18313 	case SD_SCSI3_REGISTERANDIGNOREKEY:
18314 	{
18315 		mhioc_registerandignorekey_t *ptr;
18316 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
18317 		bcopy(ptr->newkey.key,
18318 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
18319 		prp->aptpl = ptr->aptpl;
18320 		break;
18321 	}
18322 	default:
18323 		ASSERT(FALSE);
18324 		break;
18325 	}
18326 
18327 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18328 	    UIO_SYSSPACE, SD_PATH_STANDARD);
18329 
18330 	switch (status) {
18331 	case 0:
18332 		break;	/* Success! */
18333 	case EIO:
18334 		switch (ucmd_buf.uscsi_status) {
18335 		case STATUS_RESERVATION_CONFLICT:
18336 			status = EACCES;
18337 			break;
18338 		case STATUS_CHECK:
18339 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18340 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
18341 				KEY_ILLEGAL_REQUEST)) {
18342 				status = ENOTSUP;
18343 			}
18344 			break;
18345 		default:
18346 			break;
18347 		}
18348 		break;
18349 	default:
18350 		break;
18351 	}
18352 
18353 	kmem_free(prp, data_len);
18354 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
18355 	return (status);
18356 }
18357 
18358 
18359 /*
18360  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
18361  *
18362  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
18363  *
18364  *   Arguments: un - pointer to the target's soft state struct
18365  *
18366  * Return Code: 0 - success
18367  *		errno-type error code
18368  *
18369  *     Context: kernel thread context only.
18370  */
18371 
18372 static int
18373 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
18374 {
18375 	struct sd_uscsi_info	*uip;
18376 	struct uscsi_cmd	*uscmd;
18377 	union scsi_cdb		*cdb;
18378 	struct buf		*bp;
18379 	int			rval = 0;
18380 
18381 	SD_TRACE(SD_LOG_IO, un,
18382 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
18383 
18384 	ASSERT(un != NULL);
18385 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18386 
18387 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
18388 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
18389 
18390 	/*
18391 	 * First get some memory for the uscsi_cmd struct and cdb
18392 	 * and initialize for SYNCHRONIZE_CACHE cmd.
18393 	 */
18394 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
18395 	uscmd->uscsi_cdblen = CDB_GROUP1;
18396 	uscmd->uscsi_cdb = (caddr_t)cdb;
18397 	uscmd->uscsi_bufaddr = NULL;
18398 	uscmd->uscsi_buflen = 0;
18399 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
18400 	uscmd->uscsi_rqlen = SENSE_LENGTH;
18401 	uscmd->uscsi_rqresid = SENSE_LENGTH;
18402 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
18403 	uscmd->uscsi_timeout = sd_io_time;
18404 
18405 	/*
18406 	 * Allocate an sd_uscsi_info struct and fill it with the info
18407 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
18408 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
18409 	 * since we allocate the buf here in this function, we do not
18410 	 * need to preserve the prior contents of b_private.
18411 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
18412 	 */
18413 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
18414 	uip->ui_flags = SD_PATH_DIRECT;
18415 	uip->ui_cmdp  = uscmd;
18416 
18417 	bp = getrbuf(KM_SLEEP);
18418 	bp->b_private = uip;
18419 
18420 	/*
18421 	 * Setup buffer to carry uscsi request.
18422 	 */
18423 	bp->b_flags  = B_BUSY;
18424 	bp->b_bcount = 0;
18425 	bp->b_blkno  = 0;
18426 
18427 	if (dkc != NULL) {
18428 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
18429 		uip->ui_dkc = *dkc;
18430 	}
18431 
18432 	bp->b_edev = SD_GET_DEV(un);
18433 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
18434 
18435 	(void) sd_uscsi_strategy(bp);
18436 
18437 	/*
18438 	 * If synchronous request, wait for completion
18439 	 * If async just return and let b_iodone callback
18440 	 * cleanup.
18441 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
18442 	 * but it was also incremented in sd_uscsi_strategy(), so
18443 	 * we should be ok.
18444 	 */
18445 	if (dkc == NULL) {
18446 		(void) biowait(bp);
18447 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
18448 	}
18449 
18450 	return (rval);
18451 }
18452 
18453 
18454 static int
18455 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
18456 {
18457 	struct sd_uscsi_info *uip;
18458 	struct uscsi_cmd *uscmd;
18459 	uint8_t *sense_buf;
18460 	struct sd_lun *un;
18461 	int status;
18462 
18463 	uip = (struct sd_uscsi_info *)(bp->b_private);
18464 	ASSERT(uip != NULL);
18465 
18466 	uscmd = uip->ui_cmdp;
18467 	ASSERT(uscmd != NULL);
18468 
18469 	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
18470 	ASSERT(sense_buf != NULL);
18471 
18472 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
18473 	ASSERT(un != NULL);
18474 
18475 	status = geterror(bp);
18476 	switch (status) {
18477 	case 0:
18478 		break;	/* Success! */
18479 	case EIO:
18480 		switch (uscmd->uscsi_status) {
18481 		case STATUS_RESERVATION_CONFLICT:
18482 			/* Ignore reservation conflict */
18483 			status = 0;
18484 			goto done;
18485 
18486 		case STATUS_CHECK:
18487 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
18488 			    (scsi_sense_key(sense_buf) ==
18489 				KEY_ILLEGAL_REQUEST)) {
18490 				/* Ignore Illegal Request error */
18491 				mutex_enter(SD_MUTEX(un));
18492 				un->un_f_sync_cache_supported = FALSE;
18493 				mutex_exit(SD_MUTEX(un));
18494 				status = ENOTSUP;
18495 				goto done;
18496 			}
18497 			break;
18498 		default:
18499 			break;
18500 		}
18501 		/* FALLTHRU */
18502 	default:
18503 		/*
18504 		 * Don't log an error message if this device
18505 		 * has removable media.
18506 		 */
18507 		if (!un->un_f_has_removable_media) {
18508 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18509 			    "SYNCHRONIZE CACHE command failed (%d)\n", status);
18510 		}
18511 		break;
18512 	}
18513 
18514 done:
18515 	if (uip->ui_dkc.dkc_callback != NULL) {
18516 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
18517 	}
18518 
18519 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
18520 	freerbuf(bp);
18521 	kmem_free(uip, sizeof (struct sd_uscsi_info));
18522 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
18523 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
18524 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
18525 
18526 	return (status);
18527 }
18528 
18529 
18530 /*
18531  *    Function: sd_send_scsi_GET_CONFIGURATION
18532  *
18533  * Description: Issues the get configuration command to the device.
18534  *		Called from sd_check_for_writable_cd & sd_get_media_info
18535  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
18536  *   Arguments: un
18537  *		ucmdbuf
18538  *		rqbuf
18539  *		rqbuflen
18540  *		bufaddr
18541  *		buflen
18542  *		path_flag
18543  *
18544  * Return Code: 0   - Success
18545  *		errno return code from sd_send_scsi_cmd()
18546  *
18547  *     Context: Can sleep. Does not return until command is completed.
18548  *
18549  */
18550 
18551 static int
18552 sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un, struct uscsi_cmd *ucmdbuf,
18553 	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen,
18554 	int path_flag)
18555 {
18556 	char	cdb[CDB_GROUP1];
18557 	int	status;
18558 
18559 	ASSERT(un != NULL);
18560 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18561 	ASSERT(bufaddr != NULL);
18562 	ASSERT(ucmdbuf != NULL);
18563 	ASSERT(rqbuf != NULL);
18564 
18565 	SD_TRACE(SD_LOG_IO, un,
18566 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
18567 
18568 	bzero(cdb, sizeof (cdb));
18569 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
18570 	bzero(rqbuf, rqbuflen);
18571 	bzero(bufaddr, buflen);
18572 
18573 	/*
18574 	 * Set up cdb field for the get configuration command.
18575 	 */
18576 	cdb[0] = SCMD_GET_CONFIGURATION;
18577 	cdb[1] = 0x02;  /* Requested Type */
18578 	cdb[8] = SD_PROFILE_HEADER_LEN;
18579 	ucmdbuf->uscsi_cdb = cdb;
18580 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
18581 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
18582 	ucmdbuf->uscsi_buflen = buflen;
18583 	ucmdbuf->uscsi_timeout = sd_io_time;
18584 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
18585 	ucmdbuf->uscsi_rqlen = rqbuflen;
18586 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
18587 
18588 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, FKIOCTL,
18589 	    UIO_SYSSPACE, path_flag);
18590 
18591 	switch (status) {
18592 	case 0:
18593 		break;  /* Success! */
18594 	case EIO:
18595 		switch (ucmdbuf->uscsi_status) {
18596 		case STATUS_RESERVATION_CONFLICT:
18597 			status = EACCES;
18598 			break;
18599 		default:
18600 			break;
18601 		}
18602 		break;
18603 	default:
18604 		break;
18605 	}
18606 
18607 	if (status == 0) {
18608 		SD_DUMP_MEMORY(un, SD_LOG_IO,
18609 		    "sd_send_scsi_GET_CONFIGURATION: data",
18610 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
18611 	}
18612 
18613 	SD_TRACE(SD_LOG_IO, un,
18614 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
18615 
18616 	return (status);
18617 }
18618 
18619 /*
18620  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
18621  *
18622  * Description: Issues the get configuration command to the device to
18623  *              retrieve a specfic feature. Called from
18624  *		sd_check_for_writable_cd & sd_set_mmc_caps.
18625  *   Arguments: un
18626  *              ucmdbuf
18627  *              rqbuf
18628  *              rqbuflen
18629  *              bufaddr
18630  *              buflen
18631  *		feature
18632  *
18633  * Return Code: 0   - Success
18634  *              errno return code from sd_send_scsi_cmd()
18635  *
18636  *     Context: Can sleep. Does not return until command is completed.
18637  *
18638  */
18639 static int
18640 sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
18641 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
18642 	uchar_t *bufaddr, uint_t buflen, char feature, int path_flag)
18643 {
18644 	char    cdb[CDB_GROUP1];
18645 	int	status;
18646 
18647 	ASSERT(un != NULL);
18648 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18649 	ASSERT(bufaddr != NULL);
18650 	ASSERT(ucmdbuf != NULL);
18651 	ASSERT(rqbuf != NULL);
18652 
18653 	SD_TRACE(SD_LOG_IO, un,
18654 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
18655 
18656 	bzero(cdb, sizeof (cdb));
18657 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
18658 	bzero(rqbuf, rqbuflen);
18659 	bzero(bufaddr, buflen);
18660 
18661 	/*
18662 	 * Set up cdb field for the get configuration command.
18663 	 */
18664 	cdb[0] = SCMD_GET_CONFIGURATION;
18665 	cdb[1] = 0x02;  /* Requested Type */
18666 	cdb[3] = feature;
18667 	cdb[8] = buflen;
18668 	ucmdbuf->uscsi_cdb = cdb;
18669 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
18670 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
18671 	ucmdbuf->uscsi_buflen = buflen;
18672 	ucmdbuf->uscsi_timeout = sd_io_time;
18673 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
18674 	ucmdbuf->uscsi_rqlen = rqbuflen;
18675 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
18676 
18677 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, FKIOCTL,
18678 	    UIO_SYSSPACE, path_flag);
18679 
18680 	switch (status) {
18681 	case 0:
18682 		break;  /* Success! */
18683 	case EIO:
18684 		switch (ucmdbuf->uscsi_status) {
18685 		case STATUS_RESERVATION_CONFLICT:
18686 			status = EACCES;
18687 			break;
18688 		default:
18689 			break;
18690 		}
18691 		break;
18692 	default:
18693 		break;
18694 	}
18695 
18696 	if (status == 0) {
18697 		SD_DUMP_MEMORY(un, SD_LOG_IO,
18698 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
18699 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
18700 	}
18701 
18702 	SD_TRACE(SD_LOG_IO, un,
18703 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
18704 
18705 	return (status);
18706 }
18707 
18708 
18709 /*
18710  *    Function: sd_send_scsi_MODE_SENSE
18711  *
18712  * Description: Utility function for issuing a scsi MODE SENSE command.
18713  *		Note: This routine uses a consistent implementation for Group0,
18714  *		Group1, and Group2 commands across all platforms. ATAPI devices
18715  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
18716  *
18717  *   Arguments: un - pointer to the softstate struct for the target.
18718  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
18719  *			  CDB_GROUP[1|2] (10 byte).
18720  *		bufaddr - buffer for page data retrieved from the target.
18721  *		buflen - size of page to be retrieved.
18722  *		page_code - page code of data to be retrieved from the target.
18723  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18724  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18725  *			to use the USCSI "direct" chain and bypass the normal
18726  *			command waitq.
18727  *
18728  * Return Code: 0   - Success
18729  *		errno return code from sd_send_scsi_cmd()
18730  *
18731  *     Context: Can sleep. Does not return until command is completed.
18732  */
18733 
18734 static int
18735 sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
18736 	size_t buflen,  uchar_t page_code, int path_flag)
18737 {
18738 	struct	scsi_extended_sense	sense_buf;
18739 	union scsi_cdb		cdb;
18740 	struct uscsi_cmd	ucmd_buf;
18741 	int			status;
18742 	int			headlen;
18743 
18744 	ASSERT(un != NULL);
18745 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18746 	ASSERT(bufaddr != NULL);
18747 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
18748 	    (cdbsize == CDB_GROUP2));
18749 
18750 	SD_TRACE(SD_LOG_IO, un,
18751 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
18752 
18753 	bzero(&cdb, sizeof (cdb));
18754 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18755 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18756 	bzero(bufaddr, buflen);
18757 
18758 	if (cdbsize == CDB_GROUP0) {
18759 		cdb.scc_cmd = SCMD_MODE_SENSE;
18760 		cdb.cdb_opaque[2] = page_code;
18761 		FORMG0COUNT(&cdb, buflen);
18762 		headlen = MODE_HEADER_LENGTH;
18763 	} else {
18764 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
18765 		cdb.cdb_opaque[2] = page_code;
18766 		FORMG1COUNT(&cdb, buflen);
18767 		headlen = MODE_HEADER_LENGTH_GRP2;
18768 	}
18769 
18770 	ASSERT(headlen <= buflen);
18771 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
18772 
18773 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18774 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
18775 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
18776 	ucmd_buf.uscsi_buflen	= buflen;
18777 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18778 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18779 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
18780 	ucmd_buf.uscsi_timeout	= 60;
18781 
18782 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18783 	    UIO_SYSSPACE, path_flag);
18784 
18785 	switch (status) {
18786 	case 0:
18787 		/*
18788 		 * sr_check_wp() uses 0x3f page code and check the header of
18789 		 * mode page to determine if target device is write-protected.
18790 		 * But some USB devices return 0 bytes for 0x3f page code. For
18791 		 * this case, make sure that mode page header is returned at
18792 		 * least.
18793 		 */
18794 		if (buflen - ucmd_buf.uscsi_resid <  headlen)
18795 			status = EIO;
18796 		break;	/* Success! */
18797 	case EIO:
18798 		switch (ucmd_buf.uscsi_status) {
18799 		case STATUS_RESERVATION_CONFLICT:
18800 			status = EACCES;
18801 			break;
18802 		default:
18803 			break;
18804 		}
18805 		break;
18806 	default:
18807 		break;
18808 	}
18809 
18810 	if (status == 0) {
18811 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
18812 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
18813 	}
18814 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
18815 
18816 	return (status);
18817 }
18818 
18819 
18820 /*
18821  *    Function: sd_send_scsi_MODE_SELECT
18822  *
18823  * Description: Utility function for issuing a scsi MODE SELECT command.
18824  *		Note: This routine uses a consistent implementation for Group0,
18825  *		Group1, and Group2 commands across all platforms. ATAPI devices
18826  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
18827  *
18828  *   Arguments: un - pointer to the softstate struct for the target.
18829  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
18830  *			  CDB_GROUP[1|2] (10 byte).
18831  *		bufaddr - buffer for page data retrieved from the target.
18832  *		buflen - size of page to be retrieved.
18833  *		save_page - boolean to determin if SP bit should be set.
18834  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18835  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18836  *			to use the USCSI "direct" chain and bypass the normal
18837  *			command waitq.
18838  *
18839  * Return Code: 0   - Success
18840  *		errno return code from sd_send_scsi_cmd()
18841  *
18842  *     Context: Can sleep. Does not return until command is completed.
18843  */
18844 
18845 static int
18846 sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
18847 	size_t buflen,  uchar_t save_page, int path_flag)
18848 {
18849 	struct	scsi_extended_sense	sense_buf;
18850 	union scsi_cdb		cdb;
18851 	struct uscsi_cmd	ucmd_buf;
18852 	int			status;
18853 
18854 	ASSERT(un != NULL);
18855 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18856 	ASSERT(bufaddr != NULL);
18857 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
18858 	    (cdbsize == CDB_GROUP2));
18859 
18860 	SD_TRACE(SD_LOG_IO, un,
18861 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
18862 
18863 	bzero(&cdb, sizeof (cdb));
18864 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18865 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18866 
18867 	/* Set the PF bit for many third party drives */
18868 	cdb.cdb_opaque[1] = 0x10;
18869 
18870 	/* Set the savepage(SP) bit if given */
18871 	if (save_page == SD_SAVE_PAGE) {
18872 		cdb.cdb_opaque[1] |= 0x01;
18873 	}
18874 
18875 	if (cdbsize == CDB_GROUP0) {
18876 		cdb.scc_cmd = SCMD_MODE_SELECT;
18877 		FORMG0COUNT(&cdb, buflen);
18878 	} else {
18879 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
18880 		FORMG1COUNT(&cdb, buflen);
18881 	}
18882 
18883 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
18884 
18885 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18886 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
18887 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
18888 	ucmd_buf.uscsi_buflen	= buflen;
18889 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18890 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18891 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
18892 	ucmd_buf.uscsi_timeout	= 60;
18893 
18894 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18895 	    UIO_SYSSPACE, path_flag);
18896 
18897 	switch (status) {
18898 	case 0:
18899 		break;	/* Success! */
18900 	case EIO:
18901 		switch (ucmd_buf.uscsi_status) {
18902 		case STATUS_RESERVATION_CONFLICT:
18903 			status = EACCES;
18904 			break;
18905 		default:
18906 			break;
18907 		}
18908 		break;
18909 	default:
18910 		break;
18911 	}
18912 
18913 	if (status == 0) {
18914 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
18915 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
18916 	}
18917 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
18918 
18919 	return (status);
18920 }
18921 
18922 
18923 /*
18924  *    Function: sd_send_scsi_RDWR
18925  *
18926  * Description: Issue a scsi READ or WRITE command with the given parameters.
18927  *
18928  *   Arguments: un:      Pointer to the sd_lun struct for the target.
18929  *		cmd:	 SCMD_READ or SCMD_WRITE
18930  *		bufaddr: Address of caller's buffer to receive the RDWR data
18931  *		buflen:  Length of caller's buffer receive the RDWR data.
18932  *		start_block: Block number for the start of the RDWR operation.
18933  *			 (Assumes target-native block size.)
18934  *		residp:  Pointer to variable to receive the redisual of the
18935  *			 RDWR operation (may be NULL of no residual requested).
18936  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18937  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18938  *			to use the USCSI "direct" chain and bypass the normal
18939  *			command waitq.
18940  *
18941  * Return Code: 0   - Success
18942  *		errno return code from sd_send_scsi_cmd()
18943  *
18944  *     Context: Can sleep. Does not return until command is completed.
18945  */
18946 
18947 static int
18948 sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
18949 	size_t buflen, daddr_t start_block, int path_flag)
18950 {
18951 	struct	scsi_extended_sense	sense_buf;
18952 	union scsi_cdb		cdb;
18953 	struct uscsi_cmd	ucmd_buf;
18954 	uint32_t		block_count;
18955 	int			status;
18956 	int			cdbsize;
18957 	uchar_t			flag;
18958 
18959 	ASSERT(un != NULL);
18960 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18961 	ASSERT(bufaddr != NULL);
18962 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
18963 
18964 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
18965 
18966 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
18967 		return (EINVAL);
18968 	}
18969 
18970 	mutex_enter(SD_MUTEX(un));
18971 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
18972 	mutex_exit(SD_MUTEX(un));
18973 
18974 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
18975 
18976 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
18977 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
18978 	    bufaddr, buflen, start_block, block_count);
18979 
18980 	bzero(&cdb, sizeof (cdb));
18981 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18982 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18983 
18984 	/* Compute CDB size to use */
18985 	if (start_block > 0xffffffff)
18986 		cdbsize = CDB_GROUP4;
18987 	else if ((start_block & 0xFFE00000) ||
18988 	    (un->un_f_cfg_is_atapi == TRUE))
18989 		cdbsize = CDB_GROUP1;
18990 	else
18991 		cdbsize = CDB_GROUP0;
18992 
18993 	switch (cdbsize) {
18994 	case CDB_GROUP0:	/* 6-byte CDBs */
18995 		cdb.scc_cmd = cmd;
18996 		FORMG0ADDR(&cdb, start_block);
18997 		FORMG0COUNT(&cdb, block_count);
18998 		break;
18999 	case CDB_GROUP1:	/* 10-byte CDBs */
19000 		cdb.scc_cmd = cmd | SCMD_GROUP1;
19001 		FORMG1ADDR(&cdb, start_block);
19002 		FORMG1COUNT(&cdb, block_count);
19003 		break;
19004 	case CDB_GROUP4:	/* 16-byte CDBs */
19005 		cdb.scc_cmd = cmd | SCMD_GROUP4;
19006 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
19007 		FORMG4COUNT(&cdb, block_count);
19008 		break;
19009 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
19010 	default:
19011 		/* All others reserved */
19012 		return (EINVAL);
19013 	}
19014 
19015 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
19016 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
19017 
19018 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19019 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
19020 	ucmd_buf.uscsi_bufaddr	= bufaddr;
19021 	ucmd_buf.uscsi_buflen	= buflen;
19022 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19023 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19024 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
19025 	ucmd_buf.uscsi_timeout	= 60;
19026 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
19027 	    UIO_SYSSPACE, path_flag);
19028 	switch (status) {
19029 	case 0:
19030 		break;	/* Success! */
19031 	case EIO:
19032 		switch (ucmd_buf.uscsi_status) {
19033 		case STATUS_RESERVATION_CONFLICT:
19034 			status = EACCES;
19035 			break;
19036 		default:
19037 			break;
19038 		}
19039 		break;
19040 	default:
19041 		break;
19042 	}
19043 
19044 	if (status == 0) {
19045 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
19046 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
19047 	}
19048 
19049 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
19050 
19051 	return (status);
19052 }
19053 
19054 
19055 /*
19056  *    Function: sd_send_scsi_LOG_SENSE
19057  *
19058  * Description: Issue a scsi LOG_SENSE command with the given parameters.
19059  *
19060  *   Arguments: un:      Pointer to the sd_lun struct for the target.
19061  *
19062  * Return Code: 0   - Success
19063  *		errno return code from sd_send_scsi_cmd()
19064  *
19065  *     Context: Can sleep. Does not return until command is completed.
19066  */
19067 
19068 static int
19069 sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr, uint16_t buflen,
19070 	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
19071 	int path_flag)
19072 
19073 {
19074 	struct	scsi_extended_sense	sense_buf;
19075 	union scsi_cdb		cdb;
19076 	struct uscsi_cmd	ucmd_buf;
19077 	int			status;
19078 
19079 	ASSERT(un != NULL);
19080 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19081 
19082 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
19083 
19084 	bzero(&cdb, sizeof (cdb));
19085 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19086 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19087 
19088 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
19089 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
19090 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
19091 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
19092 	FORMG1COUNT(&cdb, buflen);
19093 
19094 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19095 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19096 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19097 	ucmd_buf.uscsi_buflen	= buflen;
19098 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19099 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19100 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19101 	ucmd_buf.uscsi_timeout	= 60;
19102 
19103 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
19104 	    UIO_SYSSPACE, path_flag);
19105 
19106 	switch (status) {
19107 	case 0:
19108 		break;
19109 	case EIO:
19110 		switch (ucmd_buf.uscsi_status) {
19111 		case STATUS_RESERVATION_CONFLICT:
19112 			status = EACCES;
19113 			break;
19114 		case STATUS_CHECK:
19115 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19116 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
19117 				KEY_ILLEGAL_REQUEST) &&
19118 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
19119 				/*
19120 				 * ASC 0x24: INVALID FIELD IN CDB
19121 				 */
19122 				switch (page_code) {
19123 				case START_STOP_CYCLE_PAGE:
19124 					/*
19125 					 * The start stop cycle counter is
19126 					 * implemented as page 0x31 in earlier
19127 					 * generation disks. In new generation
19128 					 * disks the start stop cycle counter is
19129 					 * implemented as page 0xE. To properly
19130 					 * handle this case if an attempt for
19131 					 * log page 0xE is made and fails we
19132 					 * will try again using page 0x31.
19133 					 *
19134 					 * Network storage BU committed to
19135 					 * maintain the page 0x31 for this
19136 					 * purpose and will not have any other
19137 					 * page implemented with page code 0x31
19138 					 * until all disks transition to the
19139 					 * standard page.
19140 					 */
19141 					mutex_enter(SD_MUTEX(un));
19142 					un->un_start_stop_cycle_page =
19143 					    START_STOP_CYCLE_VU_PAGE;
19144 					cdb.cdb_opaque[2] =
19145 					    (char)(page_control << 6) |
19146 					    un->un_start_stop_cycle_page;
19147 					mutex_exit(SD_MUTEX(un));
19148 					status = sd_send_scsi_cmd(
19149 					    SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
19150 					    UIO_SYSSPACE, path_flag);
19151 
19152 					break;
19153 				case TEMPERATURE_PAGE:
19154 					status = ENOTTY;
19155 					break;
19156 				default:
19157 					break;
19158 				}
19159 			}
19160 			break;
19161 		default:
19162 			break;
19163 		}
19164 		break;
19165 	default:
19166 		break;
19167 	}
19168 
19169 	if (status == 0) {
19170 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
19171 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
19172 	}
19173 
19174 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
19175 
19176 	return (status);
19177 }
19178 
19179 
19180 /*
19181  *    Function: sdioctl
19182  *
19183  * Description: Driver's ioctl(9e) entry point function.
19184  *
19185  *   Arguments: dev     - device number
19186  *		cmd     - ioctl operation to be performed
19187  *		arg     - user argument, contains data to be set or reference
19188  *			  parameter for get
19189  *		flag    - bit flag, indicating open settings, 32/64 bit type
19190  *		cred_p  - user credential pointer
19191  *		rval_p  - calling process return value (OPT)
19192  *
19193  * Return Code: EINVAL
19194  *		ENOTTY
19195  *		ENXIO
19196  *		EIO
19197  *		EFAULT
19198  *		ENOTSUP
19199  *		EPERM
19200  *
19201  *     Context: Called from the device switch at normal priority.
19202  */
19203 
19204 static int
19205 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
19206 {
19207 	struct sd_lun	*un = NULL;
19208 	int		err = 0;
19209 	int		i = 0;
19210 	cred_t		*cr;
19211 	int		tmprval = EINVAL;
19212 	int 		is_valid;
19213 
19214 	/*
19215 	 * All device accesses go thru sdstrategy where we check on suspend
19216 	 * status
19217 	 */
19218 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
19219 		return (ENXIO);
19220 	}
19221 
19222 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19223 
19224 
19225 	is_valid = SD_IS_VALID_LABEL(un);
19226 
19227 	/*
19228 	 * Moved this wait from sd_uscsi_strategy to here for
19229 	 * reasons of deadlock prevention. Internal driver commands,
19230 	 * specifically those to change a devices power level, result
19231 	 * in a call to sd_uscsi_strategy.
19232 	 */
19233 	mutex_enter(SD_MUTEX(un));
19234 	while ((un->un_state == SD_STATE_SUSPENDED) ||
19235 	    (un->un_state == SD_STATE_PM_CHANGING)) {
19236 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
19237 	}
19238 	/*
19239 	 * Twiddling the counter here protects commands from now
19240 	 * through to the top of sd_uscsi_strategy. Without the
19241 	 * counter inc. a power down, for example, could get in
19242 	 * after the above check for state is made and before
19243 	 * execution gets to the top of sd_uscsi_strategy.
19244 	 * That would cause problems.
19245 	 */
19246 	un->un_ncmds_in_driver++;
19247 
19248 	if (!is_valid &&
19249 	    (flag & (FNDELAY | FNONBLOCK))) {
19250 		switch (cmd) {
19251 		case DKIOCGGEOM:	/* SD_PATH_DIRECT */
19252 		case DKIOCGVTOC:
19253 		case DKIOCGAPART:
19254 		case DKIOCPARTINFO:
19255 		case DKIOCSGEOM:
19256 		case DKIOCSAPART:
19257 		case DKIOCGETEFI:
19258 		case DKIOCPARTITION:
19259 		case DKIOCSVTOC:
19260 		case DKIOCSETEFI:
19261 		case DKIOCGMBOOT:
19262 		case DKIOCSMBOOT:
19263 		case DKIOCG_PHYGEOM:
19264 		case DKIOCG_VIRTGEOM:
19265 			/* let cmlb handle it */
19266 			goto skip_ready_valid;
19267 
19268 		case CDROMPAUSE:
19269 		case CDROMRESUME:
19270 		case CDROMPLAYMSF:
19271 		case CDROMPLAYTRKIND:
19272 		case CDROMREADTOCHDR:
19273 		case CDROMREADTOCENTRY:
19274 		case CDROMSTOP:
19275 		case CDROMSTART:
19276 		case CDROMVOLCTRL:
19277 		case CDROMSUBCHNL:
19278 		case CDROMREADMODE2:
19279 		case CDROMREADMODE1:
19280 		case CDROMREADOFFSET:
19281 		case CDROMSBLKMODE:
19282 		case CDROMGBLKMODE:
19283 		case CDROMGDRVSPEED:
19284 		case CDROMSDRVSPEED:
19285 		case CDROMCDDA:
19286 		case CDROMCDXA:
19287 		case CDROMSUBCODE:
19288 			if (!ISCD(un)) {
19289 				un->un_ncmds_in_driver--;
19290 				ASSERT(un->un_ncmds_in_driver >= 0);
19291 				mutex_exit(SD_MUTEX(un));
19292 				return (ENOTTY);
19293 			}
19294 			break;
19295 		case FDEJECT:
19296 		case DKIOCEJECT:
19297 		case CDROMEJECT:
19298 			if (!un->un_f_eject_media_supported) {
19299 				un->un_ncmds_in_driver--;
19300 				ASSERT(un->un_ncmds_in_driver >= 0);
19301 				mutex_exit(SD_MUTEX(un));
19302 				return (ENOTTY);
19303 			}
19304 			break;
19305 		case DKIOCFLUSHWRITECACHE:
19306 			mutex_exit(SD_MUTEX(un));
19307 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
19308 			if (err != 0) {
19309 				mutex_enter(SD_MUTEX(un));
19310 				un->un_ncmds_in_driver--;
19311 				ASSERT(un->un_ncmds_in_driver >= 0);
19312 				mutex_exit(SD_MUTEX(un));
19313 				return (EIO);
19314 			}
19315 			mutex_enter(SD_MUTEX(un));
19316 			/* FALLTHROUGH */
19317 		case DKIOCREMOVABLE:
19318 		case DKIOCHOTPLUGGABLE:
19319 		case DKIOCINFO:
19320 		case DKIOCGMEDIAINFO:
19321 		case MHIOCENFAILFAST:
19322 		case MHIOCSTATUS:
19323 		case MHIOCTKOWN:
19324 		case MHIOCRELEASE:
19325 		case MHIOCGRP_INKEYS:
19326 		case MHIOCGRP_INRESV:
19327 		case MHIOCGRP_REGISTER:
19328 		case MHIOCGRP_RESERVE:
19329 		case MHIOCGRP_PREEMPTANDABORT:
19330 		case MHIOCGRP_REGISTERANDIGNOREKEY:
19331 		case CDROMCLOSETRAY:
19332 		case USCSICMD:
19333 			goto skip_ready_valid;
19334 		default:
19335 			break;
19336 		}
19337 
19338 		mutex_exit(SD_MUTEX(un));
19339 		err = sd_ready_and_valid(un);
19340 		mutex_enter(SD_MUTEX(un));
19341 
19342 		if (err != SD_READY_VALID) {
19343 			switch (cmd) {
19344 			case DKIOCSTATE:
19345 			case CDROMGDRVSPEED:
19346 			case CDROMSDRVSPEED:
19347 			case FDEJECT:	/* for eject command */
19348 			case DKIOCEJECT:
19349 			case CDROMEJECT:
19350 			case DKIOCREMOVABLE:
19351 			case DKIOCHOTPLUGGABLE:
19352 				break;
19353 			default:
19354 				if (un->un_f_has_removable_media) {
19355 					err = ENXIO;
19356 				} else {
19357 				/* Do not map SD_RESERVED_BY_OTHERS to EIO */
19358 					if (err == SD_RESERVED_BY_OTHERS) {
19359 						err = EACCES;
19360 					} else {
19361 						err = EIO;
19362 					}
19363 				}
19364 				un->un_ncmds_in_driver--;
19365 				ASSERT(un->un_ncmds_in_driver >= 0);
19366 				mutex_exit(SD_MUTEX(un));
19367 				return (err);
19368 			}
19369 		}
19370 	}
19371 
19372 skip_ready_valid:
19373 	mutex_exit(SD_MUTEX(un));
19374 
19375 	switch (cmd) {
19376 	case DKIOCINFO:
19377 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
19378 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
19379 		break;
19380 
19381 	case DKIOCGMEDIAINFO:
19382 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
19383 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
19384 		break;
19385 
19386 	case DKIOCGGEOM:
19387 	case DKIOCGVTOC:
19388 	case DKIOCGAPART:
19389 	case DKIOCPARTINFO:
19390 	case DKIOCSGEOM:
19391 	case DKIOCSAPART:
19392 	case DKIOCGETEFI:
19393 	case DKIOCPARTITION:
19394 	case DKIOCSVTOC:
19395 	case DKIOCSETEFI:
19396 	case DKIOCGMBOOT:
19397 	case DKIOCSMBOOT:
19398 	case DKIOCG_PHYGEOM:
19399 	case DKIOCG_VIRTGEOM:
19400 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOC %d\n", cmd);
19401 
19402 		/* TUR should spin up */
19403 
19404 		if (un->un_f_has_removable_media)
19405 			err = sd_send_scsi_TEST_UNIT_READY(un,
19406 			    SD_CHECK_FOR_MEDIA);
19407 		else
19408 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
19409 
19410 		if (err != 0)
19411 			break;
19412 
19413 		err = cmlb_ioctl(un->un_cmlbhandle, dev,
19414 		    cmd, arg, flag, cred_p, rval_p, (void *)SD_PATH_DIRECT);
19415 
19416 		if ((err == 0) &&
19417 		    ((cmd == DKIOCSETEFI) ||
19418 		    (un->un_f_pkstats_enabled) &&
19419 		    (cmd == DKIOCSAPART || cmd == DKIOCSVTOC))) {
19420 
19421 			tmprval = cmlb_validate(un->un_cmlbhandle, CMLB_SILENT,
19422 			    (void *)SD_PATH_DIRECT);
19423 			if ((tmprval == 0) && un->un_f_pkstats_enabled) {
19424 				sd_set_pstats(un);
19425 				SD_TRACE(SD_LOG_IO_PARTITION, un,
19426 				    "sd_ioctl: un:0x%p pstats created and "
19427 				    "set\n", un);
19428 			}
19429 		}
19430 
19431 		if ((cmd == DKIOCSVTOC) ||
19432 		    ((cmd == DKIOCSETEFI) && (tmprval == 0))) {
19433 
19434 			mutex_enter(SD_MUTEX(un));
19435 			if (un->un_f_devid_supported &&
19436 			    (un->un_f_opt_fab_devid == TRUE)) {
19437 				if (un->un_devid == NULL) {
19438 					sd_register_devid(un, SD_DEVINFO(un),
19439 					    SD_TARGET_IS_UNRESERVED);
19440 				} else {
19441 					/*
19442 					 * The device id for this disk
19443 					 * has been fabricated. The
19444 					 * device id must be preserved
19445 					 * by writing it back out to
19446 					 * disk.
19447 					 */
19448 					if (sd_write_deviceid(un) != 0) {
19449 						ddi_devid_free(un->un_devid);
19450 						un->un_devid = NULL;
19451 					}
19452 				}
19453 			}
19454 			mutex_exit(SD_MUTEX(un));
19455 		}
19456 
19457 		break;
19458 
19459 	case DKIOCLOCK:
19460 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
19461 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
19462 		    SD_PATH_STANDARD);
19463 		break;
19464 
19465 	case DKIOCUNLOCK:
19466 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
19467 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
19468 		    SD_PATH_STANDARD);
19469 		break;
19470 
19471 	case DKIOCSTATE: {
19472 		enum dkio_state		state;
19473 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
19474 
19475 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
19476 			err = EFAULT;
19477 		} else {
19478 			err = sd_check_media(dev, state);
19479 			if (err == 0) {
19480 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
19481 				    sizeof (int), flag) != 0)
19482 					err = EFAULT;
19483 			}
19484 		}
19485 		break;
19486 	}
19487 
19488 	case DKIOCREMOVABLE:
19489 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
19490 		i = un->un_f_has_removable_media ? 1 : 0;
19491 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
19492 			err = EFAULT;
19493 		} else {
19494 			err = 0;
19495 		}
19496 		break;
19497 
19498 	case DKIOCHOTPLUGGABLE:
19499 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
19500 		i = un->un_f_is_hotpluggable ? 1 : 0;
19501 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
19502 			err = EFAULT;
19503 		} else {
19504 			err = 0;
19505 		}
19506 		break;
19507 
19508 	case DKIOCGTEMPERATURE:
19509 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
19510 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
19511 		break;
19512 
19513 	case MHIOCENFAILFAST:
19514 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
19515 		if ((err = drv_priv(cred_p)) == 0) {
19516 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
19517 		}
19518 		break;
19519 
19520 	case MHIOCTKOWN:
19521 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
19522 		if ((err = drv_priv(cred_p)) == 0) {
19523 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
19524 		}
19525 		break;
19526 
19527 	case MHIOCRELEASE:
19528 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
19529 		if ((err = drv_priv(cred_p)) == 0) {
19530 			err = sd_mhdioc_release(dev);
19531 		}
19532 		break;
19533 
19534 	case MHIOCSTATUS:
19535 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
19536 		if ((err = drv_priv(cred_p)) == 0) {
19537 			switch (sd_send_scsi_TEST_UNIT_READY(un, 0)) {
19538 			case 0:
19539 				err = 0;
19540 				break;
19541 			case EACCES:
19542 				*rval_p = 1;
19543 				err = 0;
19544 				break;
19545 			default:
19546 				err = EIO;
19547 				break;
19548 			}
19549 		}
19550 		break;
19551 
19552 	case MHIOCQRESERVE:
19553 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
19554 		if ((err = drv_priv(cred_p)) == 0) {
19555 			err = sd_reserve_release(dev, SD_RESERVE);
19556 		}
19557 		break;
19558 
19559 	case MHIOCREREGISTERDEVID:
19560 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
19561 		if (drv_priv(cred_p) == EPERM) {
19562 			err = EPERM;
19563 		} else if (!un->un_f_devid_supported) {
19564 			err = ENOTTY;
19565 		} else {
19566 			err = sd_mhdioc_register_devid(dev);
19567 		}
19568 		break;
19569 
19570 	case MHIOCGRP_INKEYS:
19571 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
19572 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
19573 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19574 				err = ENOTSUP;
19575 			} else {
19576 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
19577 				    flag);
19578 			}
19579 		}
19580 		break;
19581 
19582 	case MHIOCGRP_INRESV:
19583 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
19584 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
19585 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19586 				err = ENOTSUP;
19587 			} else {
19588 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
19589 			}
19590 		}
19591 		break;
19592 
19593 	case MHIOCGRP_REGISTER:
19594 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
19595 		if ((err = drv_priv(cred_p)) != EPERM) {
19596 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19597 				err = ENOTSUP;
19598 			} else if (arg != NULL) {
19599 				mhioc_register_t reg;
19600 				if (ddi_copyin((void *)arg, &reg,
19601 				    sizeof (mhioc_register_t), flag) != 0) {
19602 					err = EFAULT;
19603 				} else {
19604 					err =
19605 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19606 					    un, SD_SCSI3_REGISTER,
19607 					    (uchar_t *)&reg);
19608 				}
19609 			}
19610 		}
19611 		break;
19612 
19613 	case MHIOCGRP_RESERVE:
19614 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
19615 		if ((err = drv_priv(cred_p)) != EPERM) {
19616 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19617 				err = ENOTSUP;
19618 			} else if (arg != NULL) {
19619 				mhioc_resv_desc_t resv_desc;
19620 				if (ddi_copyin((void *)arg, &resv_desc,
19621 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
19622 					err = EFAULT;
19623 				} else {
19624 					err =
19625 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19626 					    un, SD_SCSI3_RESERVE,
19627 					    (uchar_t *)&resv_desc);
19628 				}
19629 			}
19630 		}
19631 		break;
19632 
19633 	case MHIOCGRP_PREEMPTANDABORT:
19634 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
19635 		if ((err = drv_priv(cred_p)) != EPERM) {
19636 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19637 				err = ENOTSUP;
19638 			} else if (arg != NULL) {
19639 				mhioc_preemptandabort_t preempt_abort;
19640 				if (ddi_copyin((void *)arg, &preempt_abort,
19641 				    sizeof (mhioc_preemptandabort_t),
19642 				    flag) != 0) {
19643 					err = EFAULT;
19644 				} else {
19645 					err =
19646 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19647 					    un, SD_SCSI3_PREEMPTANDABORT,
19648 					    (uchar_t *)&preempt_abort);
19649 				}
19650 			}
19651 		}
19652 		break;
19653 
19654 	case MHIOCGRP_REGISTERANDIGNOREKEY:
19655 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTERANDIGNOREKEY\n");
19656 		if ((err = drv_priv(cred_p)) != EPERM) {
19657 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19658 				err = ENOTSUP;
19659 			} else if (arg != NULL) {
19660 				mhioc_registerandignorekey_t r_and_i;
19661 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
19662 				    sizeof (mhioc_registerandignorekey_t),
19663 				    flag) != 0) {
19664 					err = EFAULT;
19665 				} else {
19666 					err =
19667 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19668 					    un, SD_SCSI3_REGISTERANDIGNOREKEY,
19669 					    (uchar_t *)&r_and_i);
19670 				}
19671 			}
19672 		}
19673 		break;
19674 
19675 	case USCSICMD:
19676 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
19677 		cr = ddi_get_cred();
19678 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
19679 			err = EPERM;
19680 		} else {
19681 			enum uio_seg	uioseg;
19682 			uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE :
19683 			    UIO_USERSPACE;
19684 			if (un->un_f_format_in_progress == TRUE) {
19685 				err = EAGAIN;
19686 				break;
19687 			}
19688 			err = sd_send_scsi_cmd(dev, (struct uscsi_cmd *)arg,
19689 			    flag, uioseg, SD_PATH_STANDARD);
19690 		}
19691 		break;
19692 
19693 	case CDROMPAUSE:
19694 	case CDROMRESUME:
19695 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
19696 		if (!ISCD(un)) {
19697 			err = ENOTTY;
19698 		} else {
19699 			err = sr_pause_resume(dev, cmd);
19700 		}
19701 		break;
19702 
19703 	case CDROMPLAYMSF:
19704 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
19705 		if (!ISCD(un)) {
19706 			err = ENOTTY;
19707 		} else {
19708 			err = sr_play_msf(dev, (caddr_t)arg, flag);
19709 		}
19710 		break;
19711 
19712 	case CDROMPLAYTRKIND:
19713 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
19714 #if defined(__i386) || defined(__amd64)
19715 		/*
19716 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
19717 		 */
19718 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
19719 #else
19720 		if (!ISCD(un)) {
19721 #endif
19722 			err = ENOTTY;
19723 		} else {
19724 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
19725 		}
19726 		break;
19727 
19728 	case CDROMREADTOCHDR:
19729 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
19730 		if (!ISCD(un)) {
19731 			err = ENOTTY;
19732 		} else {
19733 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
19734 		}
19735 		break;
19736 
19737 	case CDROMREADTOCENTRY:
19738 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
19739 		if (!ISCD(un)) {
19740 			err = ENOTTY;
19741 		} else {
19742 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
19743 		}
19744 		break;
19745 
19746 	case CDROMSTOP:
19747 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
19748 		if (!ISCD(un)) {
19749 			err = ENOTTY;
19750 		} else {
19751 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_STOP,
19752 			    SD_PATH_STANDARD);
19753 		}
19754 		break;
19755 
19756 	case CDROMSTART:
19757 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
19758 		if (!ISCD(un)) {
19759 			err = ENOTTY;
19760 		} else {
19761 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
19762 			    SD_PATH_STANDARD);
19763 		}
19764 		break;
19765 
19766 	case CDROMCLOSETRAY:
19767 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
19768 		if (!ISCD(un)) {
19769 			err = ENOTTY;
19770 		} else {
19771 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_CLOSE,
19772 			    SD_PATH_STANDARD);
19773 		}
19774 		break;
19775 
19776 	case FDEJECT:	/* for eject command */
19777 	case DKIOCEJECT:
19778 	case CDROMEJECT:
19779 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
19780 		if (!un->un_f_eject_media_supported) {
19781 			err = ENOTTY;
19782 		} else {
19783 			err = sr_eject(dev);
19784 		}
19785 		break;
19786 
19787 	case CDROMVOLCTRL:
19788 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
19789 		if (!ISCD(un)) {
19790 			err = ENOTTY;
19791 		} else {
19792 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
19793 		}
19794 		break;
19795 
19796 	case CDROMSUBCHNL:
19797 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
19798 		if (!ISCD(un)) {
19799 			err = ENOTTY;
19800 		} else {
19801 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
19802 		}
19803 		break;
19804 
19805 	case CDROMREADMODE2:
19806 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
19807 		if (!ISCD(un)) {
19808 			err = ENOTTY;
19809 		} else if (un->un_f_cfg_is_atapi == TRUE) {
19810 			/*
19811 			 * If the drive supports READ CD, use that instead of
19812 			 * switching the LBA size via a MODE SELECT
19813 			 * Block Descriptor
19814 			 */
19815 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
19816 		} else {
19817 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
19818 		}
19819 		break;
19820 
19821 	case CDROMREADMODE1:
19822 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
19823 		if (!ISCD(un)) {
19824 			err = ENOTTY;
19825 		} else {
19826 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
19827 		}
19828 		break;
19829 
19830 	case CDROMREADOFFSET:
19831 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
19832 		if (!ISCD(un)) {
19833 			err = ENOTTY;
19834 		} else {
19835 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
19836 			    flag);
19837 		}
19838 		break;
19839 
19840 	case CDROMSBLKMODE:
19841 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
19842 		/*
19843 		 * There is no means of changing block size in case of atapi
19844 		 * drives, thus return ENOTTY if drive type is atapi
19845 		 */
19846 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
19847 			err = ENOTTY;
19848 		} else if (un->un_f_mmc_cap == TRUE) {
19849 
19850 			/*
19851 			 * MMC Devices do not support changing the
19852 			 * logical block size
19853 			 *
19854 			 * Note: EINVAL is being returned instead of ENOTTY to
19855 			 * maintain consistancy with the original mmc
19856 			 * driver update.
19857 			 */
19858 			err = EINVAL;
19859 		} else {
19860 			mutex_enter(SD_MUTEX(un));
19861 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
19862 			    (un->un_ncmds_in_transport > 0)) {
19863 				mutex_exit(SD_MUTEX(un));
19864 				err = EINVAL;
19865 			} else {
19866 				mutex_exit(SD_MUTEX(un));
19867 				err = sr_change_blkmode(dev, cmd, arg, flag);
19868 			}
19869 		}
19870 		break;
19871 
19872 	case CDROMGBLKMODE:
19873 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
19874 		if (!ISCD(un)) {
19875 			err = ENOTTY;
19876 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
19877 		    (un->un_f_blockcount_is_valid != FALSE)) {
19878 			/*
19879 			 * Drive is an ATAPI drive so return target block
19880 			 * size for ATAPI drives since we cannot change the
19881 			 * blocksize on ATAPI drives. Used primarily to detect
19882 			 * if an ATAPI cdrom is present.
19883 			 */
19884 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
19885 			    sizeof (int), flag) != 0) {
19886 				err = EFAULT;
19887 			} else {
19888 				err = 0;
19889 			}
19890 
19891 		} else {
19892 			/*
19893 			 * Drive supports changing block sizes via a Mode
19894 			 * Select.
19895 			 */
19896 			err = sr_change_blkmode(dev, cmd, arg, flag);
19897 		}
19898 		break;
19899 
19900 	case CDROMGDRVSPEED:
19901 	case CDROMSDRVSPEED:
19902 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
19903 		if (!ISCD(un)) {
19904 			err = ENOTTY;
19905 		} else if (un->un_f_mmc_cap == TRUE) {
19906 			/*
19907 			 * Note: In the future the driver implementation
19908 			 * for getting and
19909 			 * setting cd speed should entail:
19910 			 * 1) If non-mmc try the Toshiba mode page
19911 			 *    (sr_change_speed)
19912 			 * 2) If mmc but no support for Real Time Streaming try
19913 			 *    the SET CD SPEED (0xBB) command
19914 			 *   (sr_atapi_change_speed)
19915 			 * 3) If mmc and support for Real Time Streaming
19916 			 *    try the GET PERFORMANCE and SET STREAMING
19917 			 *    commands (not yet implemented, 4380808)
19918 			 */
19919 			/*
19920 			 * As per recent MMC spec, CD-ROM speed is variable
19921 			 * and changes with LBA. Since there is no such
19922 			 * things as drive speed now, fail this ioctl.
19923 			 *
19924 			 * Note: EINVAL is returned for consistancy of original
19925 			 * implementation which included support for getting
19926 			 * the drive speed of mmc devices but not setting
19927 			 * the drive speed. Thus EINVAL would be returned
19928 			 * if a set request was made for an mmc device.
19929 			 * We no longer support get or set speed for
19930 			 * mmc but need to remain consistant with regard
19931 			 * to the error code returned.
19932 			 */
19933 			err = EINVAL;
19934 		} else if (un->un_f_cfg_is_atapi == TRUE) {
19935 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
19936 		} else {
19937 			err = sr_change_speed(dev, cmd, arg, flag);
19938 		}
19939 		break;
19940 
19941 	case CDROMCDDA:
19942 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
19943 		if (!ISCD(un)) {
19944 			err = ENOTTY;
19945 		} else {
19946 			err = sr_read_cdda(dev, (void *)arg, flag);
19947 		}
19948 		break;
19949 
19950 	case CDROMCDXA:
19951 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
19952 		if (!ISCD(un)) {
19953 			err = ENOTTY;
19954 		} else {
19955 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
19956 		}
19957 		break;
19958 
19959 	case CDROMSUBCODE:
19960 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
19961 		if (!ISCD(un)) {
19962 			err = ENOTTY;
19963 		} else {
19964 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
19965 		}
19966 		break;
19967 
19968 
19969 #ifdef SDDEBUG
19970 /* RESET/ABORTS testing ioctls */
19971 	case DKIOCRESET: {
19972 		int	reset_level;
19973 
19974 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
19975 			err = EFAULT;
19976 		} else {
19977 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
19978 			    "reset_level = 0x%lx\n", reset_level);
19979 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
19980 				err = 0;
19981 			} else {
19982 				err = EIO;
19983 			}
19984 		}
19985 		break;
19986 	}
19987 
19988 	case DKIOCABORT:
19989 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
19990 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
19991 			err = 0;
19992 		} else {
19993 			err = EIO;
19994 		}
19995 		break;
19996 #endif
19997 
19998 #ifdef SD_FAULT_INJECTION
19999 /* SDIOC FaultInjection testing ioctls */
20000 	case SDIOCSTART:
20001 	case SDIOCSTOP:
20002 	case SDIOCINSERTPKT:
20003 	case SDIOCINSERTXB:
20004 	case SDIOCINSERTUN:
20005 	case SDIOCINSERTARQ:
20006 	case SDIOCPUSH:
20007 	case SDIOCRETRIEVE:
20008 	case SDIOCRUN:
20009 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
20010 		    "SDIOC detected cmd:0x%X:\n", cmd);
20011 		/* call error generator */
20012 		sd_faultinjection_ioctl(cmd, arg, un);
20013 		err = 0;
20014 		break;
20015 
20016 #endif /* SD_FAULT_INJECTION */
20017 
20018 	case DKIOCFLUSHWRITECACHE:
20019 		{
20020 			struct dk_callback *dkc = (struct dk_callback *)arg;
20021 
20022 			mutex_enter(SD_MUTEX(un));
20023 			if (!un->un_f_sync_cache_supported ||
20024 			    !un->un_f_write_cache_enabled) {
20025 				err = un->un_f_sync_cache_supported ?
20026 					0 : ENOTSUP;
20027 				mutex_exit(SD_MUTEX(un));
20028 				if ((flag & FKIOCTL) && dkc != NULL &&
20029 				    dkc->dkc_callback != NULL) {
20030 					(*dkc->dkc_callback)(dkc->dkc_cookie,
20031 					    err);
20032 					/*
20033 					 * Did callback and reported error.
20034 					 * Since we did a callback, ioctl
20035 					 * should return 0.
20036 					 */
20037 					err = 0;
20038 				}
20039 				break;
20040 			}
20041 			mutex_exit(SD_MUTEX(un));
20042 
20043 			if ((flag & FKIOCTL) && dkc != NULL &&
20044 			    dkc->dkc_callback != NULL) {
20045 				/* async SYNC CACHE request */
20046 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
20047 			} else {
20048 				/* synchronous SYNC CACHE request */
20049 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
20050 			}
20051 		}
20052 		break;
20053 
20054 	case DKIOCGETWCE: {
20055 
20056 		int wce;
20057 
20058 		if ((err = sd_get_write_cache_enabled(un, &wce)) != 0) {
20059 			break;
20060 		}
20061 
20062 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
20063 			err = EFAULT;
20064 		}
20065 		break;
20066 	}
20067 
20068 	case DKIOCSETWCE: {
20069 
20070 		int wce, sync_supported;
20071 
20072 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
20073 			err = EFAULT;
20074 			break;
20075 		}
20076 
20077 		/*
20078 		 * Synchronize multiple threads trying to enable
20079 		 * or disable the cache via the un_f_wcc_cv
20080 		 * condition variable.
20081 		 */
20082 		mutex_enter(SD_MUTEX(un));
20083 
20084 		/*
20085 		 * Don't allow the cache to be enabled if the
20086 		 * config file has it disabled.
20087 		 */
20088 		if (un->un_f_opt_disable_cache && wce) {
20089 			mutex_exit(SD_MUTEX(un));
20090 			err = EINVAL;
20091 			break;
20092 		}
20093 
20094 		/*
20095 		 * Wait for write cache change in progress
20096 		 * bit to be clear before proceeding.
20097 		 */
20098 		while (un->un_f_wcc_inprog)
20099 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
20100 
20101 		un->un_f_wcc_inprog = 1;
20102 
20103 		if (un->un_f_write_cache_enabled && wce == 0) {
20104 			/*
20105 			 * Disable the write cache.  Don't clear
20106 			 * un_f_write_cache_enabled until after
20107 			 * the mode select and flush are complete.
20108 			 */
20109 			sync_supported = un->un_f_sync_cache_supported;
20110 			mutex_exit(SD_MUTEX(un));
20111 			if ((err = sd_cache_control(un, SD_CACHE_NOCHANGE,
20112 			    SD_CACHE_DISABLE)) == 0 && sync_supported) {
20113 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
20114 			}
20115 
20116 			mutex_enter(SD_MUTEX(un));
20117 			if (err == 0) {
20118 				un->un_f_write_cache_enabled = 0;
20119 			}
20120 
20121 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
20122 			/*
20123 			 * Set un_f_write_cache_enabled first, so there is
20124 			 * no window where the cache is enabled, but the
20125 			 * bit says it isn't.
20126 			 */
20127 			un->un_f_write_cache_enabled = 1;
20128 			mutex_exit(SD_MUTEX(un));
20129 
20130 			err = sd_cache_control(un, SD_CACHE_NOCHANGE,
20131 				SD_CACHE_ENABLE);
20132 
20133 			mutex_enter(SD_MUTEX(un));
20134 
20135 			if (err) {
20136 				un->un_f_write_cache_enabled = 0;
20137 			}
20138 		}
20139 
20140 		un->un_f_wcc_inprog = 0;
20141 		cv_broadcast(&un->un_wcc_cv);
20142 		mutex_exit(SD_MUTEX(un));
20143 		break;
20144 	}
20145 
20146 	default:
20147 		err = ENOTTY;
20148 		break;
20149 	}
20150 	mutex_enter(SD_MUTEX(un));
20151 	un->un_ncmds_in_driver--;
20152 	ASSERT(un->un_ncmds_in_driver >= 0);
20153 	mutex_exit(SD_MUTEX(un));
20154 
20155 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
20156 	return (err);
20157 }
20158 
20159 
20160 /*
20161  *    Function: sd_dkio_ctrl_info
20162  *
20163  * Description: This routine is the driver entry point for handling controller
20164  *		information ioctl requests (DKIOCINFO).
20165  *
20166  *   Arguments: dev  - the device number
20167  *		arg  - pointer to user provided dk_cinfo structure
20168  *		       specifying the controller type and attributes.
20169  *		flag - this argument is a pass through to ddi_copyxxx()
20170  *		       directly from the mode argument of ioctl().
20171  *
20172  * Return Code: 0
20173  *		EFAULT
20174  *		ENXIO
20175  */
20176 
20177 static int
20178 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
20179 {
20180 	struct sd_lun	*un = NULL;
20181 	struct dk_cinfo	*info;
20182 	dev_info_t	*pdip;
20183 	int		lun, tgt;
20184 
20185 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20186 		return (ENXIO);
20187 	}
20188 
20189 	info = (struct dk_cinfo *)
20190 		kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
20191 
20192 	switch (un->un_ctype) {
20193 	case CTYPE_CDROM:
20194 		info->dki_ctype = DKC_CDROM;
20195 		break;
20196 	default:
20197 		info->dki_ctype = DKC_SCSI_CCS;
20198 		break;
20199 	}
20200 	pdip = ddi_get_parent(SD_DEVINFO(un));
20201 	info->dki_cnum = ddi_get_instance(pdip);
20202 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
20203 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
20204 	} else {
20205 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
20206 		    DK_DEVLEN - 1);
20207 	}
20208 
20209 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
20210 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
20211 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
20212 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
20213 
20214 	/* Unit Information */
20215 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
20216 	info->dki_slave = ((tgt << 3) | lun);
20217 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
20218 	    DK_DEVLEN - 1);
20219 	info->dki_flags = DKI_FMTVOL;
20220 	info->dki_partition = SDPART(dev);
20221 
20222 	/* Max Transfer size of this device in blocks */
20223 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
20224 	info->dki_addr = 0;
20225 	info->dki_space = 0;
20226 	info->dki_prio = 0;
20227 	info->dki_vec = 0;
20228 
20229 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
20230 		kmem_free(info, sizeof (struct dk_cinfo));
20231 		return (EFAULT);
20232 	} else {
20233 		kmem_free(info, sizeof (struct dk_cinfo));
20234 		return (0);
20235 	}
20236 }
20237 
20238 
20239 /*
20240  *    Function: sd_get_media_info
20241  *
20242  * Description: This routine is the driver entry point for handling ioctl
20243  *		requests for the media type or command set profile used by the
20244  *		drive to operate on the media (DKIOCGMEDIAINFO).
20245  *
20246  *   Arguments: dev	- the device number
20247  *		arg	- pointer to user provided dk_minfo structure
20248  *			  specifying the media type, logical block size and
20249  *			  drive capacity.
20250  *		flag	- this argument is a pass through to ddi_copyxxx()
20251  *			  directly from the mode argument of ioctl().
20252  *
20253  * Return Code: 0
20254  *		EACCESS
20255  *		EFAULT
20256  *		ENXIO
20257  *		EIO
20258  */
20259 
20260 static int
20261 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
20262 {
20263 	struct sd_lun		*un = NULL;
20264 	struct uscsi_cmd	com;
20265 	struct scsi_inquiry	*sinq;
20266 	struct dk_minfo		media_info;
20267 	u_longlong_t		media_capacity;
20268 	uint64_t		capacity;
20269 	uint_t			lbasize;
20270 	uchar_t			*out_data;
20271 	uchar_t			*rqbuf;
20272 	int			rval = 0;
20273 	int			rtn;
20274 
20275 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
20276 	    (un->un_state == SD_STATE_OFFLINE)) {
20277 		return (ENXIO);
20278 	}
20279 
20280 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
20281 
20282 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
20283 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
20284 
20285 	/* Issue a TUR to determine if the drive is ready with media present */
20286 	rval = sd_send_scsi_TEST_UNIT_READY(un, SD_CHECK_FOR_MEDIA);
20287 	if (rval == ENXIO) {
20288 		goto done;
20289 	}
20290 
20291 	/* Now get configuration data */
20292 	if (ISCD(un)) {
20293 		media_info.dki_media_type = DK_CDROM;
20294 
20295 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
20296 		if (un->un_f_mmc_cap == TRUE) {
20297 			rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf,
20298 				SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN,
20299 				SD_PATH_STANDARD);
20300 
20301 			if (rtn) {
20302 				/*
20303 				 * Failed for other than an illegal request
20304 				 * or command not supported
20305 				 */
20306 				if ((com.uscsi_status == STATUS_CHECK) &&
20307 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
20308 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
20309 					    (rqbuf[12] != 0x20)) {
20310 						rval = EIO;
20311 						goto done;
20312 					}
20313 				}
20314 			} else {
20315 				/*
20316 				 * The GET CONFIGURATION command succeeded
20317 				 * so set the media type according to the
20318 				 * returned data
20319 				 */
20320 				media_info.dki_media_type = out_data[6];
20321 				media_info.dki_media_type <<= 8;
20322 				media_info.dki_media_type |= out_data[7];
20323 			}
20324 		}
20325 	} else {
20326 		/*
20327 		 * The profile list is not available, so we attempt to identify
20328 		 * the media type based on the inquiry data
20329 		 */
20330 		sinq = un->un_sd->sd_inq;
20331 		if ((sinq->inq_dtype == DTYPE_DIRECT) ||
20332 		    (sinq->inq_dtype == DTYPE_OPTICAL)) {
20333 			/* This is a direct access device  or optical disk */
20334 			media_info.dki_media_type = DK_FIXED_DISK;
20335 
20336 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
20337 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
20338 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
20339 					media_info.dki_media_type = DK_ZIP;
20340 				} else if (
20341 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
20342 					media_info.dki_media_type = DK_JAZ;
20343 				}
20344 			}
20345 		} else {
20346 			/*
20347 			 * Not a CD, direct access or optical disk so return
20348 			 * unknown media
20349 			 */
20350 			media_info.dki_media_type = DK_UNKNOWN;
20351 		}
20352 	}
20353 
20354 	/* Now read the capacity so we can provide the lbasize and capacity */
20355 	switch (sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
20356 	    SD_PATH_DIRECT)) {
20357 	case 0:
20358 		break;
20359 	case EACCES:
20360 		rval = EACCES;
20361 		goto done;
20362 	default:
20363 		rval = EIO;
20364 		goto done;
20365 	}
20366 
20367 	media_info.dki_lbsize = lbasize;
20368 	media_capacity = capacity;
20369 
20370 	/*
20371 	 * sd_send_scsi_READ_CAPACITY() reports capacity in
20372 	 * un->un_sys_blocksize chunks. So we need to convert it into
20373 	 * cap.lbasize chunks.
20374 	 */
20375 	media_capacity *= un->un_sys_blocksize;
20376 	media_capacity /= lbasize;
20377 	media_info.dki_capacity = media_capacity;
20378 
20379 	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
20380 		rval = EFAULT;
20381 		/* Put goto. Anybody might add some code below in future */
20382 		goto done;
20383 	}
20384 done:
20385 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
20386 	kmem_free(rqbuf, SENSE_LENGTH);
20387 	return (rval);
20388 }
20389 
20390 
20391 /*
20392  *    Function: sd_check_media
20393  *
20394  * Description: This utility routine implements the functionality for the
20395  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
20396  *		driver state changes from that specified by the user
20397  *		(inserted or ejected). For example, if the user specifies
20398  *		DKIO_EJECTED and the current media state is inserted this
20399  *		routine will immediately return DKIO_INSERTED. However, if the
20400  *		current media state is not inserted the user thread will be
20401  *		blocked until the drive state changes. If DKIO_NONE is specified
20402  *		the user thread will block until a drive state change occurs.
20403  *
20404  *   Arguments: dev  - the device number
20405  *		state  - user pointer to a dkio_state, updated with the current
20406  *			drive state at return.
20407  *
20408  * Return Code: ENXIO
20409  *		EIO
20410  *		EAGAIN
20411  *		EINTR
20412  */
20413 
20414 static int
20415 sd_check_media(dev_t dev, enum dkio_state state)
20416 {
20417 	struct sd_lun		*un = NULL;
20418 	enum dkio_state		prev_state;
20419 	opaque_t		token = NULL;
20420 	int			rval = 0;
20421 
20422 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20423 		return (ENXIO);
20424 	}
20425 
20426 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
20427 
20428 	mutex_enter(SD_MUTEX(un));
20429 
20430 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
20431 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
20432 
20433 	prev_state = un->un_mediastate;
20434 
20435 	/* is there anything to do? */
20436 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
20437 		/*
20438 		 * submit the request to the scsi_watch service;
20439 		 * scsi_media_watch_cb() does the real work
20440 		 */
20441 		mutex_exit(SD_MUTEX(un));
20442 
20443 		/*
20444 		 * This change handles the case where a scsi watch request is
20445 		 * added to a device that is powered down. To accomplish this
20446 		 * we power up the device before adding the scsi watch request,
20447 		 * since the scsi watch sends a TUR directly to the device
20448 		 * which the device cannot handle if it is powered down.
20449 		 */
20450 		if (sd_pm_entry(un) != DDI_SUCCESS) {
20451 			mutex_enter(SD_MUTEX(un));
20452 			goto done;
20453 		}
20454 
20455 		token = scsi_watch_request_submit(SD_SCSI_DEVP(un),
20456 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
20457 		    (caddr_t)dev);
20458 
20459 		sd_pm_exit(un);
20460 
20461 		mutex_enter(SD_MUTEX(un));
20462 		if (token == NULL) {
20463 			rval = EAGAIN;
20464 			goto done;
20465 		}
20466 
20467 		/*
20468 		 * This is a special case IOCTL that doesn't return
20469 		 * until the media state changes. Routine sdpower
20470 		 * knows about and handles this so don't count it
20471 		 * as an active cmd in the driver, which would
20472 		 * keep the device busy to the pm framework.
20473 		 * If the count isn't decremented the device can't
20474 		 * be powered down.
20475 		 */
20476 		un->un_ncmds_in_driver--;
20477 		ASSERT(un->un_ncmds_in_driver >= 0);
20478 
20479 		/*
20480 		 * if a prior request had been made, this will be the same
20481 		 * token, as scsi_watch was designed that way.
20482 		 */
20483 		un->un_swr_token = token;
20484 		un->un_specified_mediastate = state;
20485 
20486 		/*
20487 		 * now wait for media change
20488 		 * we will not be signalled unless mediastate == state but it is
20489 		 * still better to test for this condition, since there is a
20490 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
20491 		 */
20492 		SD_TRACE(SD_LOG_COMMON, un,
20493 		    "sd_check_media: waiting for media state change\n");
20494 		while (un->un_mediastate == state) {
20495 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
20496 				SD_TRACE(SD_LOG_COMMON, un,
20497 				    "sd_check_media: waiting for media state "
20498 				    "was interrupted\n");
20499 				un->un_ncmds_in_driver++;
20500 				rval = EINTR;
20501 				goto done;
20502 			}
20503 			SD_TRACE(SD_LOG_COMMON, un,
20504 			    "sd_check_media: received signal, state=%x\n",
20505 			    un->un_mediastate);
20506 		}
20507 		/*
20508 		 * Inc the counter to indicate the device once again
20509 		 * has an active outstanding cmd.
20510 		 */
20511 		un->un_ncmds_in_driver++;
20512 	}
20513 
20514 	/* invalidate geometry */
20515 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
20516 		sr_ejected(un);
20517 	}
20518 
20519 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
20520 		uint64_t	capacity;
20521 		uint_t		lbasize;
20522 
20523 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
20524 		mutex_exit(SD_MUTEX(un));
20525 		/*
20526 		 * Since the following routines use SD_PATH_DIRECT, we must
20527 		 * call PM directly before the upcoming disk accesses. This
20528 		 * may cause the disk to be power/spin up.
20529 		 */
20530 
20531 		if (sd_pm_entry(un) == DDI_SUCCESS) {
20532 			rval = sd_send_scsi_READ_CAPACITY(un,
20533 			    &capacity,
20534 			    &lbasize, SD_PATH_DIRECT);
20535 			if (rval != 0) {
20536 				sd_pm_exit(un);
20537 				mutex_enter(SD_MUTEX(un));
20538 				goto done;
20539 			}
20540 		} else {
20541 			rval = EIO;
20542 			mutex_enter(SD_MUTEX(un));
20543 			goto done;
20544 		}
20545 		mutex_enter(SD_MUTEX(un));
20546 
20547 		sd_update_block_info(un, lbasize, capacity);
20548 
20549 		/*
20550 		 *  Check if the media in the device is writable or not
20551 		 */
20552 		if (ISCD(un))
20553 			sd_check_for_writable_cd(un, SD_PATH_DIRECT);
20554 
20555 		mutex_exit(SD_MUTEX(un));
20556 		cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
20557 		if ((cmlb_validate(un->un_cmlbhandle, 0,
20558 		    (void *)SD_PATH_DIRECT) == 0) && un->un_f_pkstats_enabled) {
20559 			sd_set_pstats(un);
20560 			SD_TRACE(SD_LOG_IO_PARTITION, un,
20561 			    "sd_check_media: un:0x%p pstats created and "
20562 			    "set\n", un);
20563 		}
20564 
20565 		rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
20566 		    SD_PATH_DIRECT);
20567 		sd_pm_exit(un);
20568 
20569 		mutex_enter(SD_MUTEX(un));
20570 	}
20571 done:
20572 	un->un_f_watcht_stopped = FALSE;
20573 	if (un->un_swr_token) {
20574 		/*
20575 		 * Use of this local token and the mutex ensures that we avoid
20576 		 * some race conditions associated with terminating the
20577 		 * scsi watch.
20578 		 */
20579 		token = un->un_swr_token;
20580 		un->un_swr_token = (opaque_t)NULL;
20581 		mutex_exit(SD_MUTEX(un));
20582 		(void) scsi_watch_request_terminate(token,
20583 		    SCSI_WATCH_TERMINATE_WAIT);
20584 		mutex_enter(SD_MUTEX(un));
20585 	}
20586 
20587 	/*
20588 	 * Update the capacity kstat value, if no media previously
20589 	 * (capacity kstat is 0) and a media has been inserted
20590 	 * (un_f_blockcount_is_valid == TRUE)
20591 	 */
20592 	if (un->un_errstats) {
20593 		struct sd_errstats	*stp = NULL;
20594 
20595 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
20596 		if ((stp->sd_capacity.value.ui64 == 0) &&
20597 		    (un->un_f_blockcount_is_valid == TRUE)) {
20598 			stp->sd_capacity.value.ui64 =
20599 			    (uint64_t)((uint64_t)un->un_blockcount *
20600 			    un->un_sys_blocksize);
20601 		}
20602 	}
20603 	mutex_exit(SD_MUTEX(un));
20604 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
20605 	return (rval);
20606 }
20607 
20608 
20609 /*
20610  *    Function: sd_delayed_cv_broadcast
20611  *
20612  * Description: Delayed cv_broadcast to allow for target to recover from media
20613  *		insertion.
20614  *
20615  *   Arguments: arg - driver soft state (unit) structure
20616  */
20617 
20618 static void
20619 sd_delayed_cv_broadcast(void *arg)
20620 {
20621 	struct sd_lun *un = arg;
20622 
20623 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
20624 
20625 	mutex_enter(SD_MUTEX(un));
20626 	un->un_dcvb_timeid = NULL;
20627 	cv_broadcast(&un->un_state_cv);
20628 	mutex_exit(SD_MUTEX(un));
20629 }
20630 
20631 
20632 /*
20633  *    Function: sd_media_watch_cb
20634  *
20635  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
20636  *		routine processes the TUR sense data and updates the driver
20637  *		state if a transition has occurred. The user thread
20638  *		(sd_check_media) is then signalled.
20639  *
20640  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
20641  *			among multiple watches that share this callback function
20642  *		resultp - scsi watch facility result packet containing scsi
20643  *			  packet, status byte and sense data
20644  *
20645  * Return Code: 0 for success, -1 for failure
20646  */
20647 
20648 static int
20649 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
20650 {
20651 	struct sd_lun			*un;
20652 	struct scsi_status		*statusp = resultp->statusp;
20653 	uint8_t				*sensep = (uint8_t *)resultp->sensep;
20654 	enum dkio_state			state = DKIO_NONE;
20655 	dev_t				dev = (dev_t)arg;
20656 	uchar_t				actual_sense_length;
20657 	uint8_t				skey, asc, ascq;
20658 
20659 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20660 		return (-1);
20661 	}
20662 	actual_sense_length = resultp->actual_sense_length;
20663 
20664 	mutex_enter(SD_MUTEX(un));
20665 	SD_TRACE(SD_LOG_COMMON, un,
20666 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
20667 	    *((char *)statusp), (void *)sensep, actual_sense_length);
20668 
20669 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
20670 		un->un_mediastate = DKIO_DEV_GONE;
20671 		cv_broadcast(&un->un_state_cv);
20672 		mutex_exit(SD_MUTEX(un));
20673 
20674 		return (0);
20675 	}
20676 
20677 	/*
20678 	 * If there was a check condition then sensep points to valid sense data
20679 	 * If status was not a check condition but a reservation or busy status
20680 	 * then the new state is DKIO_NONE
20681 	 */
20682 	if (sensep != NULL) {
20683 		skey = scsi_sense_key(sensep);
20684 		asc = scsi_sense_asc(sensep);
20685 		ascq = scsi_sense_ascq(sensep);
20686 
20687 		SD_INFO(SD_LOG_COMMON, un,
20688 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
20689 		    skey, asc, ascq);
20690 		/* This routine only uses up to 13 bytes of sense data. */
20691 		if (actual_sense_length >= 13) {
20692 			if (skey == KEY_UNIT_ATTENTION) {
20693 				if (asc == 0x28) {
20694 					state = DKIO_INSERTED;
20695 				}
20696 			} else if (skey == KEY_NOT_READY) {
20697 				/*
20698 				 * if 02/04/02  means that the host
20699 				 * should send start command. Explicitly
20700 				 * leave the media state as is
20701 				 * (inserted) as the media is inserted
20702 				 * and host has stopped device for PM
20703 				 * reasons. Upon next true read/write
20704 				 * to this media will bring the
20705 				 * device to the right state good for
20706 				 * media access.
20707 				 */
20708 				if (asc == 0x3a) {
20709 					state = DKIO_EJECTED;
20710 				} else {
20711 					/*
20712 					 * If the drive is busy with an
20713 					 * operation or long write, keep the
20714 					 * media in an inserted state.
20715 					 */
20716 
20717 					if ((asc == 0x04) &&
20718 					    ((ascq == 0x02) ||
20719 					    (ascq == 0x07) ||
20720 					    (ascq == 0x08))) {
20721 						state = DKIO_INSERTED;
20722 					}
20723 				}
20724 			} else if (skey == KEY_NO_SENSE) {
20725 				if ((asc == 0x00) && (ascq == 0x00)) {
20726 					/*
20727 					 * Sense Data 00/00/00 does not provide
20728 					 * any information about the state of
20729 					 * the media. Ignore it.
20730 					 */
20731 					mutex_exit(SD_MUTEX(un));
20732 					return (0);
20733 				}
20734 			}
20735 		}
20736 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
20737 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
20738 		state = DKIO_INSERTED;
20739 	}
20740 
20741 	SD_TRACE(SD_LOG_COMMON, un,
20742 	    "sd_media_watch_cb: state=%x, specified=%x\n",
20743 	    state, un->un_specified_mediastate);
20744 
20745 	/*
20746 	 * now signal the waiting thread if this is *not* the specified state;
20747 	 * delay the signal if the state is DKIO_INSERTED to allow the target
20748 	 * to recover
20749 	 */
20750 	if (state != un->un_specified_mediastate) {
20751 		un->un_mediastate = state;
20752 		if (state == DKIO_INSERTED) {
20753 			/*
20754 			 * delay the signal to give the drive a chance
20755 			 * to do what it apparently needs to do
20756 			 */
20757 			SD_TRACE(SD_LOG_COMMON, un,
20758 			    "sd_media_watch_cb: delayed cv_broadcast\n");
20759 			if (un->un_dcvb_timeid == NULL) {
20760 				un->un_dcvb_timeid =
20761 				    timeout(sd_delayed_cv_broadcast, un,
20762 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
20763 			}
20764 		} else {
20765 			SD_TRACE(SD_LOG_COMMON, un,
20766 			    "sd_media_watch_cb: immediate cv_broadcast\n");
20767 			cv_broadcast(&un->un_state_cv);
20768 		}
20769 	}
20770 	mutex_exit(SD_MUTEX(un));
20771 	return (0);
20772 }
20773 
20774 
20775 /*
20776  *    Function: sd_dkio_get_temp
20777  *
20778  * Description: This routine is the driver entry point for handling ioctl
20779  *		requests to get the disk temperature.
20780  *
20781  *   Arguments: dev  - the device number
20782  *		arg  - pointer to user provided dk_temperature structure.
20783  *		flag - this argument is a pass through to ddi_copyxxx()
20784  *		       directly from the mode argument of ioctl().
20785  *
20786  * Return Code: 0
20787  *		EFAULT
20788  *		ENXIO
20789  *		EAGAIN
20790  */
20791 
20792 static int
20793 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
20794 {
20795 	struct sd_lun		*un = NULL;
20796 	struct dk_temperature	*dktemp = NULL;
20797 	uchar_t			*temperature_page;
20798 	int			rval = 0;
20799 	int			path_flag = SD_PATH_STANDARD;
20800 
20801 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20802 		return (ENXIO);
20803 	}
20804 
20805 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
20806 
20807 	/* copyin the disk temp argument to get the user flags */
20808 	if (ddi_copyin((void *)arg, dktemp,
20809 	    sizeof (struct dk_temperature), flag) != 0) {
20810 		rval = EFAULT;
20811 		goto done;
20812 	}
20813 
20814 	/* Initialize the temperature to invalid. */
20815 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
20816 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
20817 
20818 	/*
20819 	 * Note: Investigate removing the "bypass pm" semantic.
20820 	 * Can we just bypass PM always?
20821 	 */
20822 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
20823 		path_flag = SD_PATH_DIRECT;
20824 		ASSERT(!mutex_owned(&un->un_pm_mutex));
20825 		mutex_enter(&un->un_pm_mutex);
20826 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
20827 			/*
20828 			 * If DKT_BYPASS_PM is set, and the drive happens to be
20829 			 * in low power mode, we can not wake it up, Need to
20830 			 * return EAGAIN.
20831 			 */
20832 			mutex_exit(&un->un_pm_mutex);
20833 			rval = EAGAIN;
20834 			goto done;
20835 		} else {
20836 			/*
20837 			 * Indicate to PM the device is busy. This is required
20838 			 * to avoid a race - i.e. the ioctl is issuing a
20839 			 * command and the pm framework brings down the device
20840 			 * to low power mode (possible power cut-off on some
20841 			 * platforms).
20842 			 */
20843 			mutex_exit(&un->un_pm_mutex);
20844 			if (sd_pm_entry(un) != DDI_SUCCESS) {
20845 				rval = EAGAIN;
20846 				goto done;
20847 			}
20848 		}
20849 	}
20850 
20851 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
20852 
20853 	if ((rval = sd_send_scsi_LOG_SENSE(un, temperature_page,
20854 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag)) != 0) {
20855 		goto done2;
20856 	}
20857 
20858 	/*
20859 	 * For the current temperature verify that the parameter length is 0x02
20860 	 * and the parameter code is 0x00
20861 	 */
20862 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
20863 	    (temperature_page[5] == 0x00)) {
20864 		if (temperature_page[9] == 0xFF) {
20865 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
20866 		} else {
20867 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
20868 		}
20869 	}
20870 
20871 	/*
20872 	 * For the reference temperature verify that the parameter
20873 	 * length is 0x02 and the parameter code is 0x01
20874 	 */
20875 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
20876 	    (temperature_page[11] == 0x01)) {
20877 		if (temperature_page[15] == 0xFF) {
20878 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
20879 		} else {
20880 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
20881 		}
20882 	}
20883 
20884 	/* Do the copyout regardless of the temperature commands status. */
20885 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
20886 	    flag) != 0) {
20887 		rval = EFAULT;
20888 	}
20889 
20890 done2:
20891 	if (path_flag == SD_PATH_DIRECT) {
20892 		sd_pm_exit(un);
20893 	}
20894 
20895 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
20896 done:
20897 	if (dktemp != NULL) {
20898 		kmem_free(dktemp, sizeof (struct dk_temperature));
20899 	}
20900 
20901 	return (rval);
20902 }
20903 
20904 
20905 /*
20906  *    Function: sd_log_page_supported
20907  *
20908  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
20909  *		supported log pages.
20910  *
20911  *   Arguments: un -
20912  *		log_page -
20913  *
20914  * Return Code: -1 - on error (log sense is optional and may not be supported).
20915  *		0  - log page not found.
20916  *  		1  - log page found.
20917  */
20918 
20919 static int
20920 sd_log_page_supported(struct sd_lun *un, int log_page)
20921 {
20922 	uchar_t *log_page_data;
20923 	int	i;
20924 	int	match = 0;
20925 	int	log_size;
20926 
20927 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
20928 
20929 	if (sd_send_scsi_LOG_SENSE(un, log_page_data, 0xFF, 0, 0x01, 0,
20930 	    SD_PATH_DIRECT) != 0) {
20931 		SD_ERROR(SD_LOG_COMMON, un,
20932 		    "sd_log_page_supported: failed log page retrieval\n");
20933 		kmem_free(log_page_data, 0xFF);
20934 		return (-1);
20935 	}
20936 	log_size = log_page_data[3];
20937 
20938 	/*
20939 	 * The list of supported log pages start from the fourth byte. Check
20940 	 * until we run out of log pages or a match is found.
20941 	 */
20942 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
20943 		if (log_page_data[i] == log_page) {
20944 			match++;
20945 		}
20946 	}
20947 	kmem_free(log_page_data, 0xFF);
20948 	return (match);
20949 }
20950 
20951 
20952 /*
20953  *    Function: sd_mhdioc_failfast
20954  *
20955  * Description: This routine is the driver entry point for handling ioctl
20956  *		requests to enable/disable the multihost failfast option.
20957  *		(MHIOCENFAILFAST)
20958  *
20959  *   Arguments: dev	- the device number
20960  *		arg	- user specified probing interval.
20961  *		flag	- this argument is a pass through to ddi_copyxxx()
20962  *			  directly from the mode argument of ioctl().
20963  *
20964  * Return Code: 0
20965  *		EFAULT
20966  *		ENXIO
20967  */
20968 
20969 static int
20970 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
20971 {
20972 	struct sd_lun	*un = NULL;
20973 	int		mh_time;
20974 	int		rval = 0;
20975 
20976 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20977 		return (ENXIO);
20978 	}
20979 
20980 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
20981 		return (EFAULT);
20982 
20983 	if (mh_time) {
20984 		mutex_enter(SD_MUTEX(un));
20985 		un->un_resvd_status |= SD_FAILFAST;
20986 		mutex_exit(SD_MUTEX(un));
20987 		/*
20988 		 * If mh_time is INT_MAX, then this ioctl is being used for
20989 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
20990 		 */
20991 		if (mh_time != INT_MAX) {
20992 			rval = sd_check_mhd(dev, mh_time);
20993 		}
20994 	} else {
20995 		(void) sd_check_mhd(dev, 0);
20996 		mutex_enter(SD_MUTEX(un));
20997 		un->un_resvd_status &= ~SD_FAILFAST;
20998 		mutex_exit(SD_MUTEX(un));
20999 	}
21000 	return (rval);
21001 }
21002 
21003 
21004 /*
21005  *    Function: sd_mhdioc_takeown
21006  *
21007  * Description: This routine is the driver entry point for handling ioctl
21008  *		requests to forcefully acquire exclusive access rights to the
21009  *		multihost disk (MHIOCTKOWN).
21010  *
21011  *   Arguments: dev	- the device number
21012  *		arg	- user provided structure specifying the delay
21013  *			  parameters in milliseconds
21014  *		flag	- this argument is a pass through to ddi_copyxxx()
21015  *			  directly from the mode argument of ioctl().
21016  *
21017  * Return Code: 0
21018  *		EFAULT
21019  *		ENXIO
21020  */
21021 
21022 static int
21023 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
21024 {
21025 	struct sd_lun		*un = NULL;
21026 	struct mhioctkown	*tkown = NULL;
21027 	int			rval = 0;
21028 
21029 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21030 		return (ENXIO);
21031 	}
21032 
21033 	if (arg != NULL) {
21034 		tkown = (struct mhioctkown *)
21035 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
21036 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
21037 		if (rval != 0) {
21038 			rval = EFAULT;
21039 			goto error;
21040 		}
21041 	}
21042 
21043 	rval = sd_take_ownership(dev, tkown);
21044 	mutex_enter(SD_MUTEX(un));
21045 	if (rval == 0) {
21046 		un->un_resvd_status |= SD_RESERVE;
21047 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
21048 			sd_reinstate_resv_delay =
21049 			    tkown->reinstate_resv_delay * 1000;
21050 		} else {
21051 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
21052 		}
21053 		/*
21054 		 * Give the scsi_watch routine interval set by
21055 		 * the MHIOCENFAILFAST ioctl precedence here.
21056 		 */
21057 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
21058 			mutex_exit(SD_MUTEX(un));
21059 			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
21060 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
21061 			    "sd_mhdioc_takeown : %d\n",
21062 			    sd_reinstate_resv_delay);
21063 		} else {
21064 			mutex_exit(SD_MUTEX(un));
21065 		}
21066 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
21067 		    sd_mhd_reset_notify_cb, (caddr_t)un);
21068 	} else {
21069 		un->un_resvd_status &= ~SD_RESERVE;
21070 		mutex_exit(SD_MUTEX(un));
21071 	}
21072 
21073 error:
21074 	if (tkown != NULL) {
21075 		kmem_free(tkown, sizeof (struct mhioctkown));
21076 	}
21077 	return (rval);
21078 }
21079 
21080 
21081 /*
21082  *    Function: sd_mhdioc_release
21083  *
21084  * Description: This routine is the driver entry point for handling ioctl
21085  *		requests to release exclusive access rights to the multihost
21086  *		disk (MHIOCRELEASE).
21087  *
21088  *   Arguments: dev	- the device number
21089  *
21090  * Return Code: 0
21091  *		ENXIO
21092  */
21093 
21094 static int
21095 sd_mhdioc_release(dev_t dev)
21096 {
21097 	struct sd_lun		*un = NULL;
21098 	timeout_id_t		resvd_timeid_save;
21099 	int			resvd_status_save;
21100 	int			rval = 0;
21101 
21102 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21103 		return (ENXIO);
21104 	}
21105 
21106 	mutex_enter(SD_MUTEX(un));
21107 	resvd_status_save = un->un_resvd_status;
21108 	un->un_resvd_status &=
21109 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
21110 	if (un->un_resvd_timeid) {
21111 		resvd_timeid_save = un->un_resvd_timeid;
21112 		un->un_resvd_timeid = NULL;
21113 		mutex_exit(SD_MUTEX(un));
21114 		(void) untimeout(resvd_timeid_save);
21115 	} else {
21116 		mutex_exit(SD_MUTEX(un));
21117 	}
21118 
21119 	/*
21120 	 * destroy any pending timeout thread that may be attempting to
21121 	 * reinstate reservation on this device.
21122 	 */
21123 	sd_rmv_resv_reclaim_req(dev);
21124 
21125 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
21126 		mutex_enter(SD_MUTEX(un));
21127 		if ((un->un_mhd_token) &&
21128 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
21129 			mutex_exit(SD_MUTEX(un));
21130 			(void) sd_check_mhd(dev, 0);
21131 		} else {
21132 			mutex_exit(SD_MUTEX(un));
21133 		}
21134 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
21135 		    sd_mhd_reset_notify_cb, (caddr_t)un);
21136 	} else {
21137 		/*
21138 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
21139 		 */
21140 		mutex_enter(SD_MUTEX(un));
21141 		un->un_resvd_status = resvd_status_save;
21142 		mutex_exit(SD_MUTEX(un));
21143 	}
21144 	return (rval);
21145 }
21146 
21147 
21148 /*
21149  *    Function: sd_mhdioc_register_devid
21150  *
21151  * Description: This routine is the driver entry point for handling ioctl
21152  *		requests to register the device id (MHIOCREREGISTERDEVID).
21153  *
21154  *		Note: The implementation for this ioctl has been updated to
21155  *		be consistent with the original PSARC case (1999/357)
21156  *		(4375899, 4241671, 4220005)
21157  *
21158  *   Arguments: dev	- the device number
21159  *
21160  * Return Code: 0
21161  *		ENXIO
21162  */
21163 
21164 static int
21165 sd_mhdioc_register_devid(dev_t dev)
21166 {
21167 	struct sd_lun	*un = NULL;
21168 	int		rval = 0;
21169 
21170 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21171 		return (ENXIO);
21172 	}
21173 
21174 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21175 
21176 	mutex_enter(SD_MUTEX(un));
21177 
21178 	/* If a devid already exists, de-register it */
21179 	if (un->un_devid != NULL) {
21180 		ddi_devid_unregister(SD_DEVINFO(un));
21181 		/*
21182 		 * After unregister devid, needs to free devid memory
21183 		 */
21184 		ddi_devid_free(un->un_devid);
21185 		un->un_devid = NULL;
21186 	}
21187 
21188 	/* Check for reservation conflict */
21189 	mutex_exit(SD_MUTEX(un));
21190 	rval = sd_send_scsi_TEST_UNIT_READY(un, 0);
21191 	mutex_enter(SD_MUTEX(un));
21192 
21193 	switch (rval) {
21194 	case 0:
21195 		sd_register_devid(un, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
21196 		break;
21197 	case EACCES:
21198 		break;
21199 	default:
21200 		rval = EIO;
21201 	}
21202 
21203 	mutex_exit(SD_MUTEX(un));
21204 	return (rval);
21205 }
21206 
21207 
21208 /*
21209  *    Function: sd_mhdioc_inkeys
21210  *
21211  * Description: This routine is the driver entry point for handling ioctl
21212  *		requests to issue the SCSI-3 Persistent In Read Keys command
21213  *		to the device (MHIOCGRP_INKEYS).
21214  *
21215  *   Arguments: dev	- the device number
21216  *		arg	- user provided in_keys structure
21217  *		flag	- this argument is a pass through to ddi_copyxxx()
21218  *			  directly from the mode argument of ioctl().
21219  *
21220  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
21221  *		ENXIO
21222  *		EFAULT
21223  */
21224 
21225 static int
21226 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
21227 {
21228 	struct sd_lun		*un;
21229 	mhioc_inkeys_t		inkeys;
21230 	int			rval = 0;
21231 
21232 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21233 		return (ENXIO);
21234 	}
21235 
21236 #ifdef _MULTI_DATAMODEL
21237 	switch (ddi_model_convert_from(flag & FMODELS)) {
21238 	case DDI_MODEL_ILP32: {
21239 		struct mhioc_inkeys32	inkeys32;
21240 
21241 		if (ddi_copyin(arg, &inkeys32,
21242 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
21243 			return (EFAULT);
21244 		}
21245 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
21246 		if ((rval = sd_persistent_reservation_in_read_keys(un,
21247 		    &inkeys, flag)) != 0) {
21248 			return (rval);
21249 		}
21250 		inkeys32.generation = inkeys.generation;
21251 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
21252 		    flag) != 0) {
21253 			return (EFAULT);
21254 		}
21255 		break;
21256 	}
21257 	case DDI_MODEL_NONE:
21258 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
21259 		    flag) != 0) {
21260 			return (EFAULT);
21261 		}
21262 		if ((rval = sd_persistent_reservation_in_read_keys(un,
21263 		    &inkeys, flag)) != 0) {
21264 			return (rval);
21265 		}
21266 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
21267 		    flag) != 0) {
21268 			return (EFAULT);
21269 		}
21270 		break;
21271 	}
21272 
21273 #else /* ! _MULTI_DATAMODEL */
21274 
21275 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
21276 		return (EFAULT);
21277 	}
21278 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
21279 	if (rval != 0) {
21280 		return (rval);
21281 	}
21282 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
21283 		return (EFAULT);
21284 	}
21285 
21286 #endif /* _MULTI_DATAMODEL */
21287 
21288 	return (rval);
21289 }
21290 
21291 
21292 /*
21293  *    Function: sd_mhdioc_inresv
21294  *
21295  * Description: This routine is the driver entry point for handling ioctl
21296  *		requests to issue the SCSI-3 Persistent In Read Reservations
21297  *		command to the device (MHIOCGRP_INKEYS).
21298  *
21299  *   Arguments: dev	- the device number
21300  *		arg	- user provided in_resv structure
21301  *		flag	- this argument is a pass through to ddi_copyxxx()
21302  *			  directly from the mode argument of ioctl().
21303  *
21304  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
21305  *		ENXIO
21306  *		EFAULT
21307  */
21308 
21309 static int
21310 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
21311 {
21312 	struct sd_lun		*un;
21313 	mhioc_inresvs_t		inresvs;
21314 	int			rval = 0;
21315 
21316 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21317 		return (ENXIO);
21318 	}
21319 
21320 #ifdef _MULTI_DATAMODEL
21321 
21322 	switch (ddi_model_convert_from(flag & FMODELS)) {
21323 	case DDI_MODEL_ILP32: {
21324 		struct mhioc_inresvs32	inresvs32;
21325 
21326 		if (ddi_copyin(arg, &inresvs32,
21327 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
21328 			return (EFAULT);
21329 		}
21330 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
21331 		if ((rval = sd_persistent_reservation_in_read_resv(un,
21332 		    &inresvs, flag)) != 0) {
21333 			return (rval);
21334 		}
21335 		inresvs32.generation = inresvs.generation;
21336 		if (ddi_copyout(&inresvs32, arg,
21337 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
21338 			return (EFAULT);
21339 		}
21340 		break;
21341 	}
21342 	case DDI_MODEL_NONE:
21343 		if (ddi_copyin(arg, &inresvs,
21344 		    sizeof (mhioc_inresvs_t), flag) != 0) {
21345 			return (EFAULT);
21346 		}
21347 		if ((rval = sd_persistent_reservation_in_read_resv(un,
21348 		    &inresvs, flag)) != 0) {
21349 			return (rval);
21350 		}
21351 		if (ddi_copyout(&inresvs, arg,
21352 		    sizeof (mhioc_inresvs_t), flag) != 0) {
21353 			return (EFAULT);
21354 		}
21355 		break;
21356 	}
21357 
21358 #else /* ! _MULTI_DATAMODEL */
21359 
21360 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
21361 		return (EFAULT);
21362 	}
21363 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
21364 	if (rval != 0) {
21365 		return (rval);
21366 	}
21367 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
21368 		return (EFAULT);
21369 	}
21370 
21371 #endif /* ! _MULTI_DATAMODEL */
21372 
21373 	return (rval);
21374 }
21375 
21376 
21377 /*
21378  * The following routines support the clustering functionality described below
21379  * and implement lost reservation reclaim functionality.
21380  *
21381  * Clustering
21382  * ----------
21383  * The clustering code uses two different, independent forms of SCSI
21384  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
21385  * Persistent Group Reservations. For any particular disk, it will use either
21386  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
21387  *
21388  * SCSI-2
21389  * The cluster software takes ownership of a multi-hosted disk by issuing the
21390  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
21391  * MHIOCRELEASE ioctl.Closely related is the MHIOCENFAILFAST ioctl -- a cluster,
21392  * just after taking ownership of the disk with the MHIOCTKOWN ioctl then issues
21393  * the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the driver. The
21394  * meaning of failfast is that if the driver (on this host) ever encounters the
21395  * scsi error return code RESERVATION_CONFLICT from the device, it should
21396  * immediately panic the host. The motivation for this ioctl is that if this
21397  * host does encounter reservation conflict, the underlying cause is that some
21398  * other host of the cluster has decided that this host is no longer in the
21399  * cluster and has seized control of the disks for itself. Since this host is no
21400  * longer in the cluster, it ought to panic itself. The MHIOCENFAILFAST ioctl
21401  * does two things:
21402  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
21403  *      error to panic the host
21404  *      (b) it sets up a periodic timer to test whether this host still has
21405  *      "access" (in that no other host has reserved the device):  if the
21406  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
21407  *      purpose of that periodic timer is to handle scenarios where the host is
21408  *      otherwise temporarily quiescent, temporarily doing no real i/o.
21409  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
21410  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
21411  * the device itself.
21412  *
21413  * SCSI-3 PGR
21414  * A direct semantic implementation of the SCSI-3 Persistent Reservation
21415  * facility is supported through the shared multihost disk ioctls
21416  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
21417  * MHIOCGRP_PREEMPTANDABORT)
21418  *
21419  * Reservation Reclaim:
21420  * --------------------
21421  * To support the lost reservation reclaim operations this driver creates a
21422  * single thread to handle reinstating reservations on all devices that have
21423  * lost reservations sd_resv_reclaim_requests are logged for all devices that
21424  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
21425  * and the reservation reclaim thread loops through the requests to regain the
21426  * lost reservations.
21427  */
21428 
21429 /*
21430  *    Function: sd_check_mhd()
21431  *
21432  * Description: This function sets up and submits a scsi watch request or
21433  *		terminates an existing watch request. This routine is used in
21434  *		support of reservation reclaim.
21435  *
21436  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
21437  *			 among multiple watches that share the callback function
21438  *		interval - the number of microseconds specifying the watch
21439  *			   interval for issuing TEST UNIT READY commands. If
21440  *			   set to 0 the watch should be terminated. If the
21441  *			   interval is set to 0 and if the device is required
21442  *			   to hold reservation while disabling failfast, the
21443  *			   watch is restarted with an interval of
21444  *			   reinstate_resv_delay.
21445  *
21446  * Return Code: 0	   - Successful submit/terminate of scsi watch request
21447  *		ENXIO      - Indicates an invalid device was specified
21448  *		EAGAIN     - Unable to submit the scsi watch request
21449  */
21450 
21451 static int
21452 sd_check_mhd(dev_t dev, int interval)
21453 {
21454 	struct sd_lun	*un;
21455 	opaque_t	token;
21456 
21457 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21458 		return (ENXIO);
21459 	}
21460 
21461 	/* is this a watch termination request? */
21462 	if (interval == 0) {
21463 		mutex_enter(SD_MUTEX(un));
21464 		/* if there is an existing watch task then terminate it */
21465 		if (un->un_mhd_token) {
21466 			token = un->un_mhd_token;
21467 			un->un_mhd_token = NULL;
21468 			mutex_exit(SD_MUTEX(un));
21469 			(void) scsi_watch_request_terminate(token,
21470 			    SCSI_WATCH_TERMINATE_WAIT);
21471 			mutex_enter(SD_MUTEX(un));
21472 		} else {
21473 			mutex_exit(SD_MUTEX(un));
21474 			/*
21475 			 * Note: If we return here we don't check for the
21476 			 * failfast case. This is the original legacy
21477 			 * implementation but perhaps we should be checking
21478 			 * the failfast case.
21479 			 */
21480 			return (0);
21481 		}
21482 		/*
21483 		 * If the device is required to hold reservation while
21484 		 * disabling failfast, we need to restart the scsi_watch
21485 		 * routine with an interval of reinstate_resv_delay.
21486 		 */
21487 		if (un->un_resvd_status & SD_RESERVE) {
21488 			interval = sd_reinstate_resv_delay/1000;
21489 		} else {
21490 			/* no failfast so bail */
21491 			mutex_exit(SD_MUTEX(un));
21492 			return (0);
21493 		}
21494 		mutex_exit(SD_MUTEX(un));
21495 	}
21496 
21497 	/*
21498 	 * adjust minimum time interval to 1 second,
21499 	 * and convert from msecs to usecs
21500 	 */
21501 	if (interval > 0 && interval < 1000) {
21502 		interval = 1000;
21503 	}
21504 	interval *= 1000;
21505 
21506 	/*
21507 	 * submit the request to the scsi_watch service
21508 	 */
21509 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
21510 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
21511 	if (token == NULL) {
21512 		return (EAGAIN);
21513 	}
21514 
21515 	/*
21516 	 * save token for termination later on
21517 	 */
21518 	mutex_enter(SD_MUTEX(un));
21519 	un->un_mhd_token = token;
21520 	mutex_exit(SD_MUTEX(un));
21521 	return (0);
21522 }
21523 
21524 
21525 /*
21526  *    Function: sd_mhd_watch_cb()
21527  *
21528  * Description: This function is the call back function used by the scsi watch
21529  *		facility. The scsi watch facility sends the "Test Unit Ready"
21530  *		and processes the status. If applicable (i.e. a "Unit Attention"
21531  *		status and automatic "Request Sense" not used) the scsi watch
21532  *		facility will send a "Request Sense" and retrieve the sense data
21533  *		to be passed to this callback function. In either case the
21534  *		automatic "Request Sense" or the facility submitting one, this
21535  *		callback is passed the status and sense data.
21536  *
21537  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
21538  *			among multiple watches that share this callback function
21539  *		resultp - scsi watch facility result packet containing scsi
21540  *			  packet, status byte and sense data
21541  *
21542  * Return Code: 0 - continue the watch task
21543  *		non-zero - terminate the watch task
21544  */
21545 
21546 static int
21547 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
21548 {
21549 	struct sd_lun			*un;
21550 	struct scsi_status		*statusp;
21551 	uint8_t				*sensep;
21552 	struct scsi_pkt			*pkt;
21553 	uchar_t				actual_sense_length;
21554 	dev_t  				dev = (dev_t)arg;
21555 
21556 	ASSERT(resultp != NULL);
21557 	statusp			= resultp->statusp;
21558 	sensep			= (uint8_t *)resultp->sensep;
21559 	pkt			= resultp->pkt;
21560 	actual_sense_length	= resultp->actual_sense_length;
21561 
21562 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21563 		return (ENXIO);
21564 	}
21565 
21566 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
21567 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
21568 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
21569 
21570 	/* Begin processing of the status and/or sense data */
21571 	if (pkt->pkt_reason != CMD_CMPLT) {
21572 		/* Handle the incomplete packet */
21573 		sd_mhd_watch_incomplete(un, pkt);
21574 		return (0);
21575 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
21576 		if (*((unsigned char *)statusp)
21577 		    == STATUS_RESERVATION_CONFLICT) {
21578 			/*
21579 			 * Handle a reservation conflict by panicking if
21580 			 * configured for failfast or by logging the conflict
21581 			 * and updating the reservation status
21582 			 */
21583 			mutex_enter(SD_MUTEX(un));
21584 			if ((un->un_resvd_status & SD_FAILFAST) &&
21585 			    (sd_failfast_enable)) {
21586 				sd_panic_for_res_conflict(un);
21587 				/*NOTREACHED*/
21588 			}
21589 			SD_INFO(SD_LOG_IOCTL_MHD, un,
21590 			    "sd_mhd_watch_cb: Reservation Conflict\n");
21591 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
21592 			mutex_exit(SD_MUTEX(un));
21593 		}
21594 	}
21595 
21596 	if (sensep != NULL) {
21597 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
21598 			mutex_enter(SD_MUTEX(un));
21599 			if ((scsi_sense_asc(sensep) ==
21600 			    SD_SCSI_RESET_SENSE_CODE) &&
21601 			    (un->un_resvd_status & SD_RESERVE)) {
21602 				/*
21603 				 * The additional sense code indicates a power
21604 				 * on or bus device reset has occurred; update
21605 				 * the reservation status.
21606 				 */
21607 				un->un_resvd_status |=
21608 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
21609 				SD_INFO(SD_LOG_IOCTL_MHD, un,
21610 				    "sd_mhd_watch_cb: Lost Reservation\n");
21611 			}
21612 		} else {
21613 			return (0);
21614 		}
21615 	} else {
21616 		mutex_enter(SD_MUTEX(un));
21617 	}
21618 
21619 	if ((un->un_resvd_status & SD_RESERVE) &&
21620 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
21621 		if (un->un_resvd_status & SD_WANT_RESERVE) {
21622 			/*
21623 			 * A reset occurred in between the last probe and this
21624 			 * one so if a timeout is pending cancel it.
21625 			 */
21626 			if (un->un_resvd_timeid) {
21627 				timeout_id_t temp_id = un->un_resvd_timeid;
21628 				un->un_resvd_timeid = NULL;
21629 				mutex_exit(SD_MUTEX(un));
21630 				(void) untimeout(temp_id);
21631 				mutex_enter(SD_MUTEX(un));
21632 			}
21633 			un->un_resvd_status &= ~SD_WANT_RESERVE;
21634 		}
21635 		if (un->un_resvd_timeid == 0) {
21636 			/* Schedule a timeout to handle the lost reservation */
21637 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
21638 			    (void *)dev,
21639 			    drv_usectohz(sd_reinstate_resv_delay));
21640 		}
21641 	}
21642 	mutex_exit(SD_MUTEX(un));
21643 	return (0);
21644 }
21645 
21646 
21647 /*
21648  *    Function: sd_mhd_watch_incomplete()
21649  *
21650  * Description: This function is used to find out why a scsi pkt sent by the
21651  *		scsi watch facility was not completed. Under some scenarios this
21652  *		routine will return. Otherwise it will send a bus reset to see
21653  *		if the drive is still online.
21654  *
21655  *   Arguments: un  - driver soft state (unit) structure
21656  *		pkt - incomplete scsi pkt
21657  */
21658 
21659 static void
21660 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
21661 {
21662 	int	be_chatty;
21663 	int	perr;
21664 
21665 	ASSERT(pkt != NULL);
21666 	ASSERT(un != NULL);
21667 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
21668 	perr		= (pkt->pkt_statistics & STAT_PERR);
21669 
21670 	mutex_enter(SD_MUTEX(un));
21671 	if (un->un_state == SD_STATE_DUMPING) {
21672 		mutex_exit(SD_MUTEX(un));
21673 		return;
21674 	}
21675 
21676 	switch (pkt->pkt_reason) {
21677 	case CMD_UNX_BUS_FREE:
21678 		/*
21679 		 * If we had a parity error that caused the target to drop BSY*,
21680 		 * don't be chatty about it.
21681 		 */
21682 		if (perr && be_chatty) {
21683 			be_chatty = 0;
21684 		}
21685 		break;
21686 	case CMD_TAG_REJECT:
21687 		/*
21688 		 * The SCSI-2 spec states that a tag reject will be sent by the
21689 		 * target if tagged queuing is not supported. A tag reject may
21690 		 * also be sent during certain initialization periods or to
21691 		 * control internal resources. For the latter case the target
21692 		 * may also return Queue Full.
21693 		 *
21694 		 * If this driver receives a tag reject from a target that is
21695 		 * going through an init period or controlling internal
21696 		 * resources tagged queuing will be disabled. This is a less
21697 		 * than optimal behavior but the driver is unable to determine
21698 		 * the target state and assumes tagged queueing is not supported
21699 		 */
21700 		pkt->pkt_flags = 0;
21701 		un->un_tagflags = 0;
21702 
21703 		if (un->un_f_opt_queueing == TRUE) {
21704 			un->un_throttle = min(un->un_throttle, 3);
21705 		} else {
21706 			un->un_throttle = 1;
21707 		}
21708 		mutex_exit(SD_MUTEX(un));
21709 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
21710 		mutex_enter(SD_MUTEX(un));
21711 		break;
21712 	case CMD_INCOMPLETE:
21713 		/*
21714 		 * The transport stopped with an abnormal state, fallthrough and
21715 		 * reset the target and/or bus unless selection did not complete
21716 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
21717 		 * go through a target/bus reset
21718 		 */
21719 		if (pkt->pkt_state == STATE_GOT_BUS) {
21720 			break;
21721 		}
21722 		/*FALLTHROUGH*/
21723 
21724 	case CMD_TIMEOUT:
21725 	default:
21726 		/*
21727 		 * The lun may still be running the command, so a lun reset
21728 		 * should be attempted. If the lun reset fails or cannot be
21729 		 * issued, than try a target reset. Lastly try a bus reset.
21730 		 */
21731 		if ((pkt->pkt_statistics &
21732 		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
21733 			int reset_retval = 0;
21734 			mutex_exit(SD_MUTEX(un));
21735 			if (un->un_f_allow_bus_device_reset == TRUE) {
21736 				if (un->un_f_lun_reset_enabled == TRUE) {
21737 					reset_retval =
21738 					    scsi_reset(SD_ADDRESS(un),
21739 					    RESET_LUN);
21740 				}
21741 				if (reset_retval == 0) {
21742 					reset_retval =
21743 					    scsi_reset(SD_ADDRESS(un),
21744 					    RESET_TARGET);
21745 				}
21746 			}
21747 			if (reset_retval == 0) {
21748 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
21749 			}
21750 			mutex_enter(SD_MUTEX(un));
21751 		}
21752 		break;
21753 	}
21754 
21755 	/* A device/bus reset has occurred; update the reservation status. */
21756 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
21757 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
21758 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
21759 			un->un_resvd_status |=
21760 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
21761 			SD_INFO(SD_LOG_IOCTL_MHD, un,
21762 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
21763 		}
21764 	}
21765 
21766 	/*
21767 	 * The disk has been turned off; Update the device state.
21768 	 *
21769 	 * Note: Should we be offlining the disk here?
21770 	 */
21771 	if (pkt->pkt_state == STATE_GOT_BUS) {
21772 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
21773 		    "Disk not responding to selection\n");
21774 		if (un->un_state != SD_STATE_OFFLINE) {
21775 			New_state(un, SD_STATE_OFFLINE);
21776 		}
21777 	} else if (be_chatty) {
21778 		/*
21779 		 * suppress messages if they are all the same pkt reason;
21780 		 * with TQ, many (up to 256) are returned with the same
21781 		 * pkt_reason
21782 		 */
21783 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
21784 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
21785 			    "sd_mhd_watch_incomplete: "
21786 			    "SCSI transport failed: reason '%s'\n",
21787 			    scsi_rname(pkt->pkt_reason));
21788 		}
21789 	}
21790 	un->un_last_pkt_reason = pkt->pkt_reason;
21791 	mutex_exit(SD_MUTEX(un));
21792 }
21793 
21794 
21795 /*
21796  *    Function: sd_sname()
21797  *
21798  * Description: This is a simple little routine to return a string containing
21799  *		a printable description of command status byte for use in
21800  *		logging.
21801  *
21802  *   Arguments: status - pointer to a status byte
21803  *
21804  * Return Code: char * - string containing status description.
21805  */
21806 
21807 static char *
21808 sd_sname(uchar_t status)
21809 {
21810 	switch (status & STATUS_MASK) {
21811 	case STATUS_GOOD:
21812 		return ("good status");
21813 	case STATUS_CHECK:
21814 		return ("check condition");
21815 	case STATUS_MET:
21816 		return ("condition met");
21817 	case STATUS_BUSY:
21818 		return ("busy");
21819 	case STATUS_INTERMEDIATE:
21820 		return ("intermediate");
21821 	case STATUS_INTERMEDIATE_MET:
21822 		return ("intermediate - condition met");
21823 	case STATUS_RESERVATION_CONFLICT:
21824 		return ("reservation_conflict");
21825 	case STATUS_TERMINATED:
21826 		return ("command terminated");
21827 	case STATUS_QFULL:
21828 		return ("queue full");
21829 	default:
21830 		return ("<unknown status>");
21831 	}
21832 }
21833 
21834 
21835 /*
21836  *    Function: sd_mhd_resvd_recover()
21837  *
21838  * Description: This function adds a reservation entry to the
21839  *		sd_resv_reclaim_request list and signals the reservation
21840  *		reclaim thread that there is work pending. If the reservation
21841  *		reclaim thread has not been previously created this function
21842  *		will kick it off.
21843  *
21844  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
21845  *			among multiple watches that share this callback function
21846  *
21847  *     Context: This routine is called by timeout() and is run in interrupt
21848  *		context. It must not sleep or call other functions which may
21849  *		sleep.
21850  */
21851 
21852 static void
21853 sd_mhd_resvd_recover(void *arg)
21854 {
21855 	dev_t			dev = (dev_t)arg;
21856 	struct sd_lun		*un;
21857 	struct sd_thr_request	*sd_treq = NULL;
21858 	struct sd_thr_request	*sd_cur = NULL;
21859 	struct sd_thr_request	*sd_prev = NULL;
21860 	int			already_there = 0;
21861 
21862 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21863 		return;
21864 	}
21865 
21866 	mutex_enter(SD_MUTEX(un));
21867 	un->un_resvd_timeid = NULL;
21868 	if (un->un_resvd_status & SD_WANT_RESERVE) {
21869 		/*
21870 		 * There was a reset so don't issue the reserve, allow the
21871 		 * sd_mhd_watch_cb callback function to notice this and
21872 		 * reschedule the timeout for reservation.
21873 		 */
21874 		mutex_exit(SD_MUTEX(un));
21875 		return;
21876 	}
21877 	mutex_exit(SD_MUTEX(un));
21878 
21879 	/*
21880 	 * Add this device to the sd_resv_reclaim_request list and the
21881 	 * sd_resv_reclaim_thread should take care of the rest.
21882 	 *
21883 	 * Note: We can't sleep in this context so if the memory allocation
21884 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
21885 	 * reschedule the timeout for reservation.  (4378460)
21886 	 */
21887 	sd_treq = (struct sd_thr_request *)
21888 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
21889 	if (sd_treq == NULL) {
21890 		return;
21891 	}
21892 
21893 	sd_treq->sd_thr_req_next = NULL;
21894 	sd_treq->dev = dev;
21895 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
21896 	if (sd_tr.srq_thr_req_head == NULL) {
21897 		sd_tr.srq_thr_req_head = sd_treq;
21898 	} else {
21899 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
21900 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
21901 			if (sd_cur->dev == dev) {
21902 				/*
21903 				 * already in Queue so don't log
21904 				 * another request for the device
21905 				 */
21906 				already_there = 1;
21907 				break;
21908 			}
21909 			sd_prev = sd_cur;
21910 		}
21911 		if (!already_there) {
21912 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
21913 			    "logging request for %lx\n", dev);
21914 			sd_prev->sd_thr_req_next = sd_treq;
21915 		} else {
21916 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
21917 		}
21918 	}
21919 
21920 	/*
21921 	 * Create a kernel thread to do the reservation reclaim and free up this
21922 	 * thread. We cannot block this thread while we go away to do the
21923 	 * reservation reclaim
21924 	 */
21925 	if (sd_tr.srq_resv_reclaim_thread == NULL)
21926 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
21927 		    sd_resv_reclaim_thread, NULL,
21928 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
21929 
21930 	/* Tell the reservation reclaim thread that it has work to do */
21931 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
21932 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
21933 }
21934 
21935 /*
21936  *    Function: sd_resv_reclaim_thread()
21937  *
21938  * Description: This function implements the reservation reclaim operations
21939  *
21940  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
21941  *		      among multiple watches that share this callback function
21942  */
21943 
21944 static void
21945 sd_resv_reclaim_thread()
21946 {
21947 	struct sd_lun		*un;
21948 	struct sd_thr_request	*sd_mhreq;
21949 
21950 	/* Wait for work */
21951 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
21952 	if (sd_tr.srq_thr_req_head == NULL) {
21953 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
21954 		    &sd_tr.srq_resv_reclaim_mutex);
21955 	}
21956 
21957 	/* Loop while we have work */
21958 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
21959 		un = ddi_get_soft_state(sd_state,
21960 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
21961 		if (un == NULL) {
21962 			/*
21963 			 * softstate structure is NULL so just
21964 			 * dequeue the request and continue
21965 			 */
21966 			sd_tr.srq_thr_req_head =
21967 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
21968 			kmem_free(sd_tr.srq_thr_cur_req,
21969 			    sizeof (struct sd_thr_request));
21970 			continue;
21971 		}
21972 
21973 		/* dequeue the request */
21974 		sd_mhreq = sd_tr.srq_thr_cur_req;
21975 		sd_tr.srq_thr_req_head =
21976 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
21977 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
21978 
21979 		/*
21980 		 * Reclaim reservation only if SD_RESERVE is still set. There
21981 		 * may have been a call to MHIOCRELEASE before we got here.
21982 		 */
21983 		mutex_enter(SD_MUTEX(un));
21984 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
21985 			/*
21986 			 * Note: The SD_LOST_RESERVE flag is cleared before
21987 			 * reclaiming the reservation. If this is done after the
21988 			 * call to sd_reserve_release a reservation loss in the
21989 			 * window between pkt completion of reserve cmd and
21990 			 * mutex_enter below may not be recognized
21991 			 */
21992 			un->un_resvd_status &= ~SD_LOST_RESERVE;
21993 			mutex_exit(SD_MUTEX(un));
21994 
21995 			if (sd_reserve_release(sd_mhreq->dev,
21996 			    SD_RESERVE) == 0) {
21997 				mutex_enter(SD_MUTEX(un));
21998 				un->un_resvd_status |= SD_RESERVE;
21999 				mutex_exit(SD_MUTEX(un));
22000 				SD_INFO(SD_LOG_IOCTL_MHD, un,
22001 				    "sd_resv_reclaim_thread: "
22002 				    "Reservation Recovered\n");
22003 			} else {
22004 				mutex_enter(SD_MUTEX(un));
22005 				un->un_resvd_status |= SD_LOST_RESERVE;
22006 				mutex_exit(SD_MUTEX(un));
22007 				SD_INFO(SD_LOG_IOCTL_MHD, un,
22008 				    "sd_resv_reclaim_thread: Failed "
22009 				    "Reservation Recovery\n");
22010 			}
22011 		} else {
22012 			mutex_exit(SD_MUTEX(un));
22013 		}
22014 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
22015 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
22016 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
22017 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
22018 		/*
22019 		 * wakeup the destroy thread if anyone is waiting on
22020 		 * us to complete.
22021 		 */
22022 		cv_signal(&sd_tr.srq_inprocess_cv);
22023 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
22024 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
22025 	}
22026 
22027 	/*
22028 	 * cleanup the sd_tr structure now that this thread will not exist
22029 	 */
22030 	ASSERT(sd_tr.srq_thr_req_head == NULL);
22031 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
22032 	sd_tr.srq_resv_reclaim_thread = NULL;
22033 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
22034 	thread_exit();
22035 }
22036 
22037 
22038 /*
22039  *    Function: sd_rmv_resv_reclaim_req()
22040  *
22041  * Description: This function removes any pending reservation reclaim requests
22042  *		for the specified device.
22043  *
22044  *   Arguments: dev - the device 'dev_t'
22045  */
22046 
22047 static void
22048 sd_rmv_resv_reclaim_req(dev_t dev)
22049 {
22050 	struct sd_thr_request *sd_mhreq;
22051 	struct sd_thr_request *sd_prev;
22052 
22053 	/* Remove a reservation reclaim request from the list */
22054 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
22055 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
22056 		/*
22057 		 * We are attempting to reinstate reservation for
22058 		 * this device. We wait for sd_reserve_release()
22059 		 * to return before we return.
22060 		 */
22061 		cv_wait(&sd_tr.srq_inprocess_cv,
22062 		    &sd_tr.srq_resv_reclaim_mutex);
22063 	} else {
22064 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
22065 		if (sd_mhreq && sd_mhreq->dev == dev) {
22066 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
22067 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
22068 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
22069 			return;
22070 		}
22071 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
22072 			if (sd_mhreq && sd_mhreq->dev == dev) {
22073 				break;
22074 			}
22075 			sd_prev = sd_mhreq;
22076 		}
22077 		if (sd_mhreq != NULL) {
22078 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
22079 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
22080 		}
22081 	}
22082 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
22083 }
22084 
22085 
22086 /*
22087  *    Function: sd_mhd_reset_notify_cb()
22088  *
22089  * Description: This is a call back function for scsi_reset_notify. This
22090  *		function updates the softstate reserved status and logs the
22091  *		reset. The driver scsi watch facility callback function
22092  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
22093  *		will reclaim the reservation.
22094  *
22095  *   Arguments: arg  - driver soft state (unit) structure
22096  */
22097 
22098 static void
22099 sd_mhd_reset_notify_cb(caddr_t arg)
22100 {
22101 	struct sd_lun *un = (struct sd_lun *)arg;
22102 
22103 	mutex_enter(SD_MUTEX(un));
22104 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
22105 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
22106 		SD_INFO(SD_LOG_IOCTL_MHD, un,
22107 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
22108 	}
22109 	mutex_exit(SD_MUTEX(un));
22110 }
22111 
22112 
22113 /*
22114  *    Function: sd_take_ownership()
22115  *
22116  * Description: This routine implements an algorithm to achieve a stable
22117  *		reservation on disks which don't implement priority reserve,
22118  *		and makes sure that other host lose re-reservation attempts.
22119  *		This algorithm contains of a loop that keeps issuing the RESERVE
22120  *		for some period of time (min_ownership_delay, default 6 seconds)
22121  *		During that loop, it looks to see if there has been a bus device
22122  *		reset or bus reset (both of which cause an existing reservation
22123  *		to be lost). If the reservation is lost issue RESERVE until a
22124  *		period of min_ownership_delay with no resets has gone by, or
22125  *		until max_ownership_delay has expired. This loop ensures that
22126  *		the host really did manage to reserve the device, in spite of
22127  *		resets. The looping for min_ownership_delay (default six
22128  *		seconds) is important to early generation clustering products,
22129  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
22130  *		MHIOCENFAILFAST periodic timer of two seconds. By having
22131  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
22132  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
22133  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
22134  *		have already noticed, via the MHIOCENFAILFAST polling, that it
22135  *		no longer "owns" the disk and will have panicked itself.  Thus,
22136  *		the host issuing the MHIOCTKOWN is assured (with timing
22137  *		dependencies) that by the time it actually starts to use the
22138  *		disk for real work, the old owner is no longer accessing it.
22139  *
22140  *		min_ownership_delay is the minimum amount of time for which the
22141  *		disk must be reserved continuously devoid of resets before the
22142  *		MHIOCTKOWN ioctl will return success.
22143  *
22144  *		max_ownership_delay indicates the amount of time by which the
22145  *		take ownership should succeed or timeout with an error.
22146  *
22147  *   Arguments: dev - the device 'dev_t'
22148  *		*p  - struct containing timing info.
22149  *
22150  * Return Code: 0 for success or error code
22151  */
22152 
22153 static int
22154 sd_take_ownership(dev_t dev, struct mhioctkown *p)
22155 {
22156 	struct sd_lun	*un;
22157 	int		rval;
22158 	int		err;
22159 	int		reservation_count   = 0;
22160 	int		min_ownership_delay =  6000000; /* in usec */
22161 	int		max_ownership_delay = 30000000; /* in usec */
22162 	clock_t		start_time;	/* starting time of this algorithm */
22163 	clock_t		end_time;	/* time limit for giving up */
22164 	clock_t		ownership_time;	/* time limit for stable ownership */
22165 	clock_t		current_time;
22166 	clock_t		previous_current_time;
22167 
22168 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22169 		return (ENXIO);
22170 	}
22171 
22172 	/*
22173 	 * Attempt a device reservation. A priority reservation is requested.
22174 	 */
22175 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
22176 	    != SD_SUCCESS) {
22177 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
22178 		    "sd_take_ownership: return(1)=%d\n", rval);
22179 		return (rval);
22180 	}
22181 
22182 	/* Update the softstate reserved status to indicate the reservation */
22183 	mutex_enter(SD_MUTEX(un));
22184 	un->un_resvd_status |= SD_RESERVE;
22185 	un->un_resvd_status &=
22186 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
22187 	mutex_exit(SD_MUTEX(un));
22188 
22189 	if (p != NULL) {
22190 		if (p->min_ownership_delay != 0) {
22191 			min_ownership_delay = p->min_ownership_delay * 1000;
22192 		}
22193 		if (p->max_ownership_delay != 0) {
22194 			max_ownership_delay = p->max_ownership_delay * 1000;
22195 		}
22196 	}
22197 	SD_INFO(SD_LOG_IOCTL_MHD, un,
22198 	    "sd_take_ownership: min, max delays: %d, %d\n",
22199 	    min_ownership_delay, max_ownership_delay);
22200 
22201 	start_time = ddi_get_lbolt();
22202 	current_time	= start_time;
22203 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
22204 	end_time	= start_time + drv_usectohz(max_ownership_delay);
22205 
22206 	while (current_time - end_time < 0) {
22207 		delay(drv_usectohz(500000));
22208 
22209 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
22210 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
22211 				mutex_enter(SD_MUTEX(un));
22212 				rval = (un->un_resvd_status &
22213 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
22214 				mutex_exit(SD_MUTEX(un));
22215 				break;
22216 			}
22217 		}
22218 		previous_current_time = current_time;
22219 		current_time = ddi_get_lbolt();
22220 		mutex_enter(SD_MUTEX(un));
22221 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
22222 			ownership_time = ddi_get_lbolt() +
22223 			    drv_usectohz(min_ownership_delay);
22224 			reservation_count = 0;
22225 		} else {
22226 			reservation_count++;
22227 		}
22228 		un->un_resvd_status |= SD_RESERVE;
22229 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
22230 		mutex_exit(SD_MUTEX(un));
22231 
22232 		SD_INFO(SD_LOG_IOCTL_MHD, un,
22233 		    "sd_take_ownership: ticks for loop iteration=%ld, "
22234 		    "reservation=%s\n", (current_time - previous_current_time),
22235 		    reservation_count ? "ok" : "reclaimed");
22236 
22237 		if (current_time - ownership_time >= 0 &&
22238 		    reservation_count >= 4) {
22239 			rval = 0; /* Achieved a stable ownership */
22240 			break;
22241 		}
22242 		if (current_time - end_time >= 0) {
22243 			rval = EACCES; /* No ownership in max possible time */
22244 			break;
22245 		}
22246 	}
22247 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
22248 	    "sd_take_ownership: return(2)=%d\n", rval);
22249 	return (rval);
22250 }
22251 
22252 
22253 /*
22254  *    Function: sd_reserve_release()
22255  *
22256  * Description: This function builds and sends scsi RESERVE, RELEASE, and
22257  *		PRIORITY RESERVE commands based on a user specified command type
22258  *
22259  *   Arguments: dev - the device 'dev_t'
22260  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
22261  *		      SD_RESERVE, SD_RELEASE
22262  *
22263  * Return Code: 0 or Error Code
22264  */
22265 
22266 static int
22267 sd_reserve_release(dev_t dev, int cmd)
22268 {
22269 	struct uscsi_cmd	*com = NULL;
22270 	struct sd_lun		*un = NULL;
22271 	char			cdb[CDB_GROUP0];
22272 	int			rval;
22273 
22274 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
22275 	    (cmd == SD_PRIORITY_RESERVE));
22276 
22277 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22278 		return (ENXIO);
22279 	}
22280 
22281 	/* instantiate and initialize the command and cdb */
22282 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
22283 	bzero(cdb, CDB_GROUP0);
22284 	com->uscsi_flags   = USCSI_SILENT;
22285 	com->uscsi_timeout = un->un_reserve_release_time;
22286 	com->uscsi_cdblen  = CDB_GROUP0;
22287 	com->uscsi_cdb	   = cdb;
22288 	if (cmd == SD_RELEASE) {
22289 		cdb[0] = SCMD_RELEASE;
22290 	} else {
22291 		cdb[0] = SCMD_RESERVE;
22292 	}
22293 
22294 	/* Send the command. */
22295 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
22296 	    SD_PATH_STANDARD);
22297 
22298 	/*
22299 	 * "break" a reservation that is held by another host, by issuing a
22300 	 * reset if priority reserve is desired, and we could not get the
22301 	 * device.
22302 	 */
22303 	if ((cmd == SD_PRIORITY_RESERVE) &&
22304 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
22305 		/*
22306 		 * First try to reset the LUN. If we cannot, then try a target
22307 		 * reset, followed by a bus reset if the target reset fails.
22308 		 */
22309 		int reset_retval = 0;
22310 		if (un->un_f_lun_reset_enabled == TRUE) {
22311 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
22312 		}
22313 		if (reset_retval == 0) {
22314 			/* The LUN reset either failed or was not issued */
22315 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
22316 		}
22317 		if ((reset_retval == 0) &&
22318 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
22319 			rval = EIO;
22320 			kmem_free(com, sizeof (*com));
22321 			return (rval);
22322 		}
22323 
22324 		bzero(com, sizeof (struct uscsi_cmd));
22325 		com->uscsi_flags   = USCSI_SILENT;
22326 		com->uscsi_cdb	   = cdb;
22327 		com->uscsi_cdblen  = CDB_GROUP0;
22328 		com->uscsi_timeout = 5;
22329 
22330 		/*
22331 		 * Reissue the last reserve command, this time without request
22332 		 * sense.  Assume that it is just a regular reserve command.
22333 		 */
22334 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
22335 		    SD_PATH_STANDARD);
22336 	}
22337 
22338 	/* Return an error if still getting a reservation conflict. */
22339 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
22340 		rval = EACCES;
22341 	}
22342 
22343 	kmem_free(com, sizeof (*com));
22344 	return (rval);
22345 }
22346 
22347 
22348 #define	SD_NDUMP_RETRIES	12
22349 /*
22350  *	System Crash Dump routine
22351  */
22352 
22353 static int
22354 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
22355 {
22356 	int		instance;
22357 	int		partition;
22358 	int		i;
22359 	int		err;
22360 	struct sd_lun	*un;
22361 	struct scsi_pkt *wr_pktp;
22362 	struct buf	*wr_bp;
22363 	struct buf	wr_buf;
22364 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
22365 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
22366 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
22367 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
22368 	size_t		io_start_offset;
22369 	int		doing_rmw = FALSE;
22370 	int		rval;
22371 #if defined(__i386) || defined(__amd64)
22372 	ssize_t dma_resid;
22373 	daddr_t oblkno;
22374 #endif
22375 	diskaddr_t	nblks = 0;
22376 	diskaddr_t	start_block;
22377 
22378 	instance = SDUNIT(dev);
22379 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
22380 	    !SD_IS_VALID_LABEL(un) || ISCD(un)) {
22381 		return (ENXIO);
22382 	}
22383 
22384 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
22385 
22386 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
22387 
22388 	partition = SDPART(dev);
22389 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
22390 
22391 	/* Validate blocks to dump at against partition size. */
22392 
22393 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
22394 	    &nblks, &start_block, NULL, NULL, (void *)SD_PATH_DIRECT);
22395 
22396 	if ((blkno + nblk) > nblks) {
22397 		SD_TRACE(SD_LOG_DUMP, un,
22398 		    "sddump: dump range larger than partition: "
22399 		    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
22400 		    blkno, nblk, nblks);
22401 		return (EINVAL);
22402 	}
22403 
22404 	mutex_enter(&un->un_pm_mutex);
22405 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
22406 		struct scsi_pkt *start_pktp;
22407 
22408 		mutex_exit(&un->un_pm_mutex);
22409 
22410 		/*
22411 		 * use pm framework to power on HBA 1st
22412 		 */
22413 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
22414 
22415 		/*
22416 		 * Dump no long uses sdpower to power on a device, it's
22417 		 * in-line here so it can be done in polled mode.
22418 		 */
22419 
22420 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
22421 
22422 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
22423 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
22424 
22425 		if (start_pktp == NULL) {
22426 			/* We were not given a SCSI packet, fail. */
22427 			return (EIO);
22428 		}
22429 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
22430 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
22431 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
22432 		start_pktp->pkt_flags = FLAG_NOINTR;
22433 
22434 		mutex_enter(SD_MUTEX(un));
22435 		SD_FILL_SCSI1_LUN(un, start_pktp);
22436 		mutex_exit(SD_MUTEX(un));
22437 		/*
22438 		 * Scsi_poll returns 0 (success) if the command completes and
22439 		 * the status block is STATUS_GOOD.
22440 		 */
22441 		if (sd_scsi_poll(un, start_pktp) != 0) {
22442 			scsi_destroy_pkt(start_pktp);
22443 			return (EIO);
22444 		}
22445 		scsi_destroy_pkt(start_pktp);
22446 		(void) sd_ddi_pm_resume(un);
22447 	} else {
22448 		mutex_exit(&un->un_pm_mutex);
22449 	}
22450 
22451 	mutex_enter(SD_MUTEX(un));
22452 	un->un_throttle = 0;
22453 
22454 	/*
22455 	 * The first time through, reset the specific target device.
22456 	 * However, when cpr calls sddump we know that sd is in a
22457 	 * a good state so no bus reset is required.
22458 	 * Clear sense data via Request Sense cmd.
22459 	 * In sddump we don't care about allow_bus_device_reset anymore
22460 	 */
22461 
22462 	if ((un->un_state != SD_STATE_SUSPENDED) &&
22463 	    (un->un_state != SD_STATE_DUMPING)) {
22464 
22465 		New_state(un, SD_STATE_DUMPING);
22466 
22467 		if (un->un_f_is_fibre == FALSE) {
22468 			mutex_exit(SD_MUTEX(un));
22469 			/*
22470 			 * Attempt a bus reset for parallel scsi.
22471 			 *
22472 			 * Note: A bus reset is required because on some host
22473 			 * systems (i.e. E420R) a bus device reset is
22474 			 * insufficient to reset the state of the target.
22475 			 *
22476 			 * Note: Don't issue the reset for fibre-channel,
22477 			 * because this tends to hang the bus (loop) for
22478 			 * too long while everyone is logging out and in
22479 			 * and the deadman timer for dumping will fire
22480 			 * before the dump is complete.
22481 			 */
22482 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
22483 				mutex_enter(SD_MUTEX(un));
22484 				Restore_state(un);
22485 				mutex_exit(SD_MUTEX(un));
22486 				return (EIO);
22487 			}
22488 
22489 			/* Delay to give the device some recovery time. */
22490 			drv_usecwait(10000);
22491 
22492 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
22493 				SD_INFO(SD_LOG_DUMP, un,
22494 					"sddump: sd_send_polled_RQS failed\n");
22495 			}
22496 			mutex_enter(SD_MUTEX(un));
22497 		}
22498 	}
22499 
22500 	/*
22501 	 * Convert the partition-relative block number to a
22502 	 * disk physical block number.
22503 	 */
22504 	blkno += start_block;
22505 
22506 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
22507 
22508 
22509 	/*
22510 	 * Check if the device has a non-512 block size.
22511 	 */
22512 	wr_bp = NULL;
22513 	if (NOT_DEVBSIZE(un)) {
22514 		tgt_byte_offset = blkno * un->un_sys_blocksize;
22515 		tgt_byte_count = nblk * un->un_sys_blocksize;
22516 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
22517 		    (tgt_byte_count % un->un_tgt_blocksize)) {
22518 			doing_rmw = TRUE;
22519 			/*
22520 			 * Calculate the block number and number of block
22521 			 * in terms of the media block size.
22522 			 */
22523 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
22524 			tgt_nblk =
22525 			    ((tgt_byte_offset + tgt_byte_count +
22526 				(un->un_tgt_blocksize - 1)) /
22527 				un->un_tgt_blocksize) - tgt_blkno;
22528 
22529 			/*
22530 			 * Invoke the routine which is going to do read part
22531 			 * of read-modify-write.
22532 			 * Note that this routine returns a pointer to
22533 			 * a valid bp in wr_bp.
22534 			 */
22535 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
22536 			    &wr_bp);
22537 			if (err) {
22538 				mutex_exit(SD_MUTEX(un));
22539 				return (err);
22540 			}
22541 			/*
22542 			 * Offset is being calculated as -
22543 			 * (original block # * system block size) -
22544 			 * (new block # * target block size)
22545 			 */
22546 			io_start_offset =
22547 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
22548 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
22549 
22550 			ASSERT((io_start_offset >= 0) &&
22551 			    (io_start_offset < un->un_tgt_blocksize));
22552 			/*
22553 			 * Do the modify portion of read modify write.
22554 			 */
22555 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
22556 			    (size_t)nblk * un->un_sys_blocksize);
22557 		} else {
22558 			doing_rmw = FALSE;
22559 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
22560 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
22561 		}
22562 
22563 		/* Convert blkno and nblk to target blocks */
22564 		blkno = tgt_blkno;
22565 		nblk = tgt_nblk;
22566 	} else {
22567 		wr_bp = &wr_buf;
22568 		bzero(wr_bp, sizeof (struct buf));
22569 		wr_bp->b_flags		= B_BUSY;
22570 		wr_bp->b_un.b_addr	= addr;
22571 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
22572 		wr_bp->b_resid		= 0;
22573 	}
22574 
22575 	mutex_exit(SD_MUTEX(un));
22576 
22577 	/*
22578 	 * Obtain a SCSI packet for the write command.
22579 	 * It should be safe to call the allocator here without
22580 	 * worrying about being locked for DVMA mapping because
22581 	 * the address we're passed is already a DVMA mapping
22582 	 *
22583 	 * We are also not going to worry about semaphore ownership
22584 	 * in the dump buffer. Dumping is single threaded at present.
22585 	 */
22586 
22587 	wr_pktp = NULL;
22588 
22589 #if defined(__i386) || defined(__amd64)
22590 	dma_resid = wr_bp->b_bcount;
22591 	oblkno = blkno;
22592 	while (dma_resid != 0) {
22593 #endif
22594 
22595 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
22596 		wr_bp->b_flags &= ~B_ERROR;
22597 
22598 #if defined(__i386) || defined(__amd64)
22599 		blkno = oblkno +
22600 			((wr_bp->b_bcount - dma_resid) /
22601 			    un->un_tgt_blocksize);
22602 		nblk = dma_resid / un->un_tgt_blocksize;
22603 
22604 		if (wr_pktp) {
22605 			/* Partial DMA transfers after initial transfer */
22606 			rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
22607 			    blkno, nblk);
22608 		} else {
22609 			/* Initial transfer */
22610 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
22611 			    un->un_pkt_flags, NULL_FUNC, NULL,
22612 			    blkno, nblk);
22613 		}
22614 #else
22615 		rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
22616 		    0, NULL_FUNC, NULL, blkno, nblk);
22617 #endif
22618 
22619 		if (rval == 0) {
22620 			/* We were given a SCSI packet, continue. */
22621 			break;
22622 		}
22623 
22624 		if (i == 0) {
22625 			if (wr_bp->b_flags & B_ERROR) {
22626 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
22627 				    "no resources for dumping; "
22628 				    "error code: 0x%x, retrying",
22629 				    geterror(wr_bp));
22630 			} else {
22631 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
22632 				    "no resources for dumping; retrying");
22633 			}
22634 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
22635 			if (wr_bp->b_flags & B_ERROR) {
22636 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22637 				    "no resources for dumping; error code: "
22638 				    "0x%x, retrying\n", geterror(wr_bp));
22639 			}
22640 		} else {
22641 			if (wr_bp->b_flags & B_ERROR) {
22642 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22643 				    "no resources for dumping; "
22644 				    "error code: 0x%x, retries failed, "
22645 				    "giving up.\n", geterror(wr_bp));
22646 			} else {
22647 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22648 				    "no resources for dumping; "
22649 				    "retries failed, giving up.\n");
22650 			}
22651 			mutex_enter(SD_MUTEX(un));
22652 			Restore_state(un);
22653 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
22654 				mutex_exit(SD_MUTEX(un));
22655 				scsi_free_consistent_buf(wr_bp);
22656 			} else {
22657 				mutex_exit(SD_MUTEX(un));
22658 			}
22659 			return (EIO);
22660 		}
22661 		drv_usecwait(10000);
22662 	}
22663 
22664 #if defined(__i386) || defined(__amd64)
22665 	/*
22666 	 * save the resid from PARTIAL_DMA
22667 	 */
22668 	dma_resid = wr_pktp->pkt_resid;
22669 	if (dma_resid != 0)
22670 		nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
22671 	wr_pktp->pkt_resid = 0;
22672 #endif
22673 
22674 	/* SunBug 1222170 */
22675 	wr_pktp->pkt_flags = FLAG_NOINTR;
22676 
22677 	err = EIO;
22678 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
22679 
22680 		/*
22681 		 * Scsi_poll returns 0 (success) if the command completes and
22682 		 * the status block is STATUS_GOOD.  We should only check
22683 		 * errors if this condition is not true.  Even then we should
22684 		 * send our own request sense packet only if we have a check
22685 		 * condition and auto request sense has not been performed by
22686 		 * the hba.
22687 		 */
22688 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
22689 
22690 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
22691 		    (wr_pktp->pkt_resid == 0)) {
22692 			err = SD_SUCCESS;
22693 			break;
22694 		}
22695 
22696 		/*
22697 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
22698 		 */
22699 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
22700 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22701 			    "Device is gone\n");
22702 			break;
22703 		}
22704 
22705 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
22706 			SD_INFO(SD_LOG_DUMP, un,
22707 			    "sddump: write failed with CHECK, try # %d\n", i);
22708 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
22709 				(void) sd_send_polled_RQS(un);
22710 			}
22711 
22712 			continue;
22713 		}
22714 
22715 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
22716 			int reset_retval = 0;
22717 
22718 			SD_INFO(SD_LOG_DUMP, un,
22719 			    "sddump: write failed with BUSY, try # %d\n", i);
22720 
22721 			if (un->un_f_lun_reset_enabled == TRUE) {
22722 				reset_retval = scsi_reset(SD_ADDRESS(un),
22723 				    RESET_LUN);
22724 			}
22725 			if (reset_retval == 0) {
22726 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
22727 			}
22728 			(void) sd_send_polled_RQS(un);
22729 
22730 		} else {
22731 			SD_INFO(SD_LOG_DUMP, un,
22732 			    "sddump: write failed with 0x%x, try # %d\n",
22733 			    SD_GET_PKT_STATUS(wr_pktp), i);
22734 			mutex_enter(SD_MUTEX(un));
22735 			sd_reset_target(un, wr_pktp);
22736 			mutex_exit(SD_MUTEX(un));
22737 		}
22738 
22739 		/*
22740 		 * If we are not getting anywhere with lun/target resets,
22741 		 * let's reset the bus.
22742 		 */
22743 		if (i == SD_NDUMP_RETRIES/2) {
22744 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
22745 			(void) sd_send_polled_RQS(un);
22746 		}
22747 
22748 	}
22749 #if defined(__i386) || defined(__amd64)
22750 	}	/* dma_resid */
22751 #endif
22752 
22753 	scsi_destroy_pkt(wr_pktp);
22754 	mutex_enter(SD_MUTEX(un));
22755 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
22756 		mutex_exit(SD_MUTEX(un));
22757 		scsi_free_consistent_buf(wr_bp);
22758 	} else {
22759 		mutex_exit(SD_MUTEX(un));
22760 	}
22761 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
22762 	return (err);
22763 }
22764 
22765 /*
22766  *    Function: sd_scsi_poll()
22767  *
22768  * Description: This is a wrapper for the scsi_poll call.
22769  *
22770  *   Arguments: sd_lun - The unit structure
22771  *              scsi_pkt - The scsi packet being sent to the device.
22772  *
22773  * Return Code: 0 - Command completed successfully with good status
22774  *             -1 - Command failed.  This could indicate a check condition
22775  *                  or other status value requiring recovery action.
22776  *
22777  */
22778 
22779 static int
22780 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
22781 {
22782 	int status;
22783 
22784 	ASSERT(un != NULL);
22785 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22786 	ASSERT(pktp != NULL);
22787 
22788 	status = SD_SUCCESS;
22789 
22790 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
22791 		pktp->pkt_flags |= un->un_tagflags;
22792 		pktp->pkt_flags &= ~FLAG_NODISCON;
22793 	}
22794 
22795 	status = sd_ddi_scsi_poll(pktp);
22796 	/*
22797 	 * Scsi_poll returns 0 (success) if the command completes and the
22798 	 * status block is STATUS_GOOD.  We should only check errors if this
22799 	 * condition is not true.  Even then we should send our own request
22800 	 * sense packet only if we have a check condition and auto
22801 	 * request sense has not been performed by the hba.
22802 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
22803 	 */
22804 	if ((status != SD_SUCCESS) &&
22805 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
22806 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
22807 	    (pktp->pkt_reason != CMD_DEV_GONE))
22808 		(void) sd_send_polled_RQS(un);
22809 
22810 	return (status);
22811 }
22812 
22813 /*
22814  *    Function: sd_send_polled_RQS()
22815  *
22816  * Description: This sends the request sense command to a device.
22817  *
22818  *   Arguments: sd_lun - The unit structure
22819  *
22820  * Return Code: 0 - Command completed successfully with good status
22821  *             -1 - Command failed.
22822  *
22823  */
22824 
22825 static int
22826 sd_send_polled_RQS(struct sd_lun *un)
22827 {
22828 	int	ret_val;
22829 	struct	scsi_pkt	*rqs_pktp;
22830 	struct	buf		*rqs_bp;
22831 
22832 	ASSERT(un != NULL);
22833 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22834 
22835 	ret_val = SD_SUCCESS;
22836 
22837 	rqs_pktp = un->un_rqs_pktp;
22838 	rqs_bp	 = un->un_rqs_bp;
22839 
22840 	mutex_enter(SD_MUTEX(un));
22841 
22842 	if (un->un_sense_isbusy) {
22843 		ret_val = SD_FAILURE;
22844 		mutex_exit(SD_MUTEX(un));
22845 		return (ret_val);
22846 	}
22847 
22848 	/*
22849 	 * If the request sense buffer (and packet) is not in use,
22850 	 * let's set the un_sense_isbusy and send our packet
22851 	 */
22852 	un->un_sense_isbusy 	= 1;
22853 	rqs_pktp->pkt_resid  	= 0;
22854 	rqs_pktp->pkt_reason 	= 0;
22855 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
22856 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
22857 
22858 	mutex_exit(SD_MUTEX(un));
22859 
22860 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
22861 	    " 0x%p\n", rqs_bp->b_un.b_addr);
22862 
22863 	/*
22864 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
22865 	 * axle - it has a call into us!
22866 	 */
22867 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
22868 		SD_INFO(SD_LOG_COMMON, un,
22869 		    "sd_send_polled_RQS: RQS failed\n");
22870 	}
22871 
22872 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
22873 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
22874 
22875 	mutex_enter(SD_MUTEX(un));
22876 	un->un_sense_isbusy = 0;
22877 	mutex_exit(SD_MUTEX(un));
22878 
22879 	return (ret_val);
22880 }
22881 
22882 /*
22883  * Defines needed for localized version of the scsi_poll routine.
22884  */
22885 #define	SD_CSEC		10000			/* usecs */
22886 #define	SD_SEC_TO_CSEC	(1000000/SD_CSEC)
22887 
22888 
22889 /*
22890  *    Function: sd_ddi_scsi_poll()
22891  *
22892  * Description: Localized version of the scsi_poll routine.  The purpose is to
22893  *		send a scsi_pkt to a device as a polled command.  This version
22894  *		is to ensure more robust handling of transport errors.
22895  *		Specifically this routine cures not ready, coming ready
22896  *		transition for power up and reset of sonoma's.  This can take
22897  *		up to 45 seconds for power-on and 20 seconds for reset of a
22898  * 		sonoma lun.
22899  *
22900  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
22901  *
22902  * Return Code: 0 - Command completed successfully with good status
22903  *             -1 - Command failed.
22904  *
22905  */
22906 
22907 static int
22908 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
22909 {
22910 	int busy_count;
22911 	int timeout;
22912 	int rval = SD_FAILURE;
22913 	int savef;
22914 	uint8_t *sensep;
22915 	long savet;
22916 	void (*savec)();
22917 	/*
22918 	 * The following is defined in machdep.c and is used in determining if
22919 	 * the scsi transport system will do polled I/O instead of interrupt
22920 	 * I/O when called from xx_dump().
22921 	 */
22922 	extern int do_polled_io;
22923 
22924 	/*
22925 	 * save old flags in pkt, to restore at end
22926 	 */
22927 	savef = pkt->pkt_flags;
22928 	savec = pkt->pkt_comp;
22929 	savet = pkt->pkt_time;
22930 
22931 	pkt->pkt_flags |= FLAG_NOINTR;
22932 
22933 	/*
22934 	 * XXX there is nothing in the SCSA spec that states that we should not
22935 	 * do a callback for polled cmds; however, removing this will break sd
22936 	 * and probably other target drivers
22937 	 */
22938 	pkt->pkt_comp = NULL;
22939 
22940 	/*
22941 	 * we don't like a polled command without timeout.
22942 	 * 60 seconds seems long enough.
22943 	 */
22944 	if (pkt->pkt_time == 0) {
22945 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
22946 	}
22947 
22948 	/*
22949 	 * Send polled cmd.
22950 	 *
22951 	 * We do some error recovery for various errors.  Tran_busy,
22952 	 * queue full, and non-dispatched commands are retried every 10 msec.
22953 	 * as they are typically transient failures.  Busy status and Not
22954 	 * Ready are retried every second as this status takes a while to
22955 	 * change.  Unit attention is retried for pkt_time (60) times
22956 	 * with no delay.
22957 	 */
22958 	timeout = pkt->pkt_time * SD_SEC_TO_CSEC;
22959 
22960 	for (busy_count = 0; busy_count < timeout; busy_count++) {
22961 		int rc;
22962 		int poll_delay;
22963 
22964 		/*
22965 		 * Initialize pkt status variables.
22966 		 */
22967 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
22968 
22969 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
22970 			if (rc != TRAN_BUSY) {
22971 				/* Transport failed - give up. */
22972 				break;
22973 			} else {
22974 				/* Transport busy - try again. */
22975 				poll_delay = 1 * SD_CSEC; /* 10 msec */
22976 			}
22977 		} else {
22978 			/*
22979 			 * Transport accepted - check pkt status.
22980 			 */
22981 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
22982 			if (pkt->pkt_reason == CMD_CMPLT &&
22983 			    rc == STATUS_CHECK &&
22984 			    pkt->pkt_state & STATE_ARQ_DONE) {
22985 				struct scsi_arq_status *arqstat =
22986 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
22987 
22988 				sensep = (uint8_t *)&arqstat->sts_sensedata;
22989 			} else {
22990 				sensep = NULL;
22991 			}
22992 
22993 			if ((pkt->pkt_reason == CMD_CMPLT) &&
22994 			    (rc == STATUS_GOOD)) {
22995 				/* No error - we're done */
22996 				rval = SD_SUCCESS;
22997 				break;
22998 
22999 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
23000 				/* Lost connection - give up */
23001 				break;
23002 
23003 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
23004 			    (pkt->pkt_state == 0)) {
23005 				/* Pkt not dispatched - try again. */
23006 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
23007 
23008 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
23009 			    (rc == STATUS_QFULL)) {
23010 				/* Queue full - try again. */
23011 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
23012 
23013 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
23014 			    (rc == STATUS_BUSY)) {
23015 				/* Busy - try again. */
23016 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
23017 				busy_count += (SD_SEC_TO_CSEC - 1);
23018 
23019 			} else if ((sensep != NULL) &&
23020 			    (scsi_sense_key(sensep) ==
23021 				KEY_UNIT_ATTENTION)) {
23022 				/* Unit Attention - try again */
23023 				busy_count += (SD_SEC_TO_CSEC - 1); /* 1 */
23024 				continue;
23025 
23026 			} else if ((sensep != NULL) &&
23027 			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
23028 			    (scsi_sense_asc(sensep) == 0x04) &&
23029 			    (scsi_sense_ascq(sensep) == 0x01)) {
23030 				/* Not ready -> ready - try again. */
23031 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
23032 				busy_count += (SD_SEC_TO_CSEC - 1);
23033 
23034 			} else {
23035 				/* BAD status - give up. */
23036 				break;
23037 			}
23038 		}
23039 
23040 		if ((curthread->t_flag & T_INTR_THREAD) == 0 &&
23041 		    !do_polled_io) {
23042 			delay(drv_usectohz(poll_delay));
23043 		} else {
23044 			/* we busy wait during cpr_dump or interrupt threads */
23045 			drv_usecwait(poll_delay);
23046 		}
23047 	}
23048 
23049 	pkt->pkt_flags = savef;
23050 	pkt->pkt_comp = savec;
23051 	pkt->pkt_time = savet;
23052 	return (rval);
23053 }
23054 
23055 
23056 /*
23057  *    Function: sd_persistent_reservation_in_read_keys
23058  *
23059  * Description: This routine is the driver entry point for handling CD-ROM
23060  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
23061  *		by sending the SCSI-3 PRIN commands to the device.
23062  *		Processes the read keys command response by copying the
23063  *		reservation key information into the user provided buffer.
23064  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
23065  *
23066  *   Arguments: un   -  Pointer to soft state struct for the target.
23067  *		usrp -	user provided pointer to multihost Persistent In Read
23068  *			Keys structure (mhioc_inkeys_t)
23069  *		flag -	this argument is a pass through to ddi_copyxxx()
23070  *			directly from the mode argument of ioctl().
23071  *
23072  * Return Code: 0   - Success
23073  *		EACCES
23074  *		ENOTSUP
23075  *		errno return code from sd_send_scsi_cmd()
23076  *
23077  *     Context: Can sleep. Does not return until command is completed.
23078  */
23079 
23080 static int
23081 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
23082     mhioc_inkeys_t *usrp, int flag)
23083 {
23084 #ifdef _MULTI_DATAMODEL
23085 	struct mhioc_key_list32	li32;
23086 #endif
23087 	sd_prin_readkeys_t	*in;
23088 	mhioc_inkeys_t		*ptr;
23089 	mhioc_key_list_t	li;
23090 	uchar_t			*data_bufp;
23091 	int 			data_len;
23092 	int			rval;
23093 	size_t			copysz;
23094 
23095 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
23096 		return (EINVAL);
23097 	}
23098 	bzero(&li, sizeof (mhioc_key_list_t));
23099 
23100 	/*
23101 	 * Get the listsize from user
23102 	 */
23103 #ifdef _MULTI_DATAMODEL
23104 
23105 	switch (ddi_model_convert_from(flag & FMODELS)) {
23106 	case DDI_MODEL_ILP32:
23107 		copysz = sizeof (struct mhioc_key_list32);
23108 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
23109 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23110 			    "sd_persistent_reservation_in_read_keys: "
23111 			    "failed ddi_copyin: mhioc_key_list32_t\n");
23112 			rval = EFAULT;
23113 			goto done;
23114 		}
23115 		li.listsize = li32.listsize;
23116 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
23117 		break;
23118 
23119 	case DDI_MODEL_NONE:
23120 		copysz = sizeof (mhioc_key_list_t);
23121 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
23122 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23123 			    "sd_persistent_reservation_in_read_keys: "
23124 			    "failed ddi_copyin: mhioc_key_list_t\n");
23125 			rval = EFAULT;
23126 			goto done;
23127 		}
23128 		break;
23129 	}
23130 
23131 #else /* ! _MULTI_DATAMODEL */
23132 	copysz = sizeof (mhioc_key_list_t);
23133 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
23134 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23135 		    "sd_persistent_reservation_in_read_keys: "
23136 		    "failed ddi_copyin: mhioc_key_list_t\n");
23137 		rval = EFAULT;
23138 		goto done;
23139 	}
23140 #endif
23141 
23142 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
23143 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
23144 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
23145 
23146 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS,
23147 	    data_len, data_bufp)) != 0) {
23148 		goto done;
23149 	}
23150 	in = (sd_prin_readkeys_t *)data_bufp;
23151 	ptr->generation = BE_32(in->generation);
23152 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
23153 
23154 	/*
23155 	 * Return the min(listsize, listlen) keys
23156 	 */
23157 #ifdef _MULTI_DATAMODEL
23158 
23159 	switch (ddi_model_convert_from(flag & FMODELS)) {
23160 	case DDI_MODEL_ILP32:
23161 		li32.listlen = li.listlen;
23162 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
23163 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23164 			    "sd_persistent_reservation_in_read_keys: "
23165 			    "failed ddi_copyout: mhioc_key_list32_t\n");
23166 			rval = EFAULT;
23167 			goto done;
23168 		}
23169 		break;
23170 
23171 	case DDI_MODEL_NONE:
23172 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
23173 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23174 			    "sd_persistent_reservation_in_read_keys: "
23175 			    "failed ddi_copyout: mhioc_key_list_t\n");
23176 			rval = EFAULT;
23177 			goto done;
23178 		}
23179 		break;
23180 	}
23181 
23182 #else /* ! _MULTI_DATAMODEL */
23183 
23184 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
23185 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23186 		    "sd_persistent_reservation_in_read_keys: "
23187 		    "failed ddi_copyout: mhioc_key_list_t\n");
23188 		rval = EFAULT;
23189 		goto done;
23190 	}
23191 
23192 #endif /* _MULTI_DATAMODEL */
23193 
23194 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
23195 	    li.listsize * MHIOC_RESV_KEY_SIZE);
23196 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
23197 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23198 		    "sd_persistent_reservation_in_read_keys: "
23199 		    "failed ddi_copyout: keylist\n");
23200 		rval = EFAULT;
23201 	}
23202 done:
23203 	kmem_free(data_bufp, data_len);
23204 	return (rval);
23205 }
23206 
23207 
23208 /*
23209  *    Function: sd_persistent_reservation_in_read_resv
23210  *
23211  * Description: This routine is the driver entry point for handling CD-ROM
23212  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
23213  *		by sending the SCSI-3 PRIN commands to the device.
23214  *		Process the read persistent reservations command response by
23215  *		copying the reservation information into the user provided
23216  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
23217  *
23218  *   Arguments: un   -  Pointer to soft state struct for the target.
23219  *		usrp -	user provided pointer to multihost Persistent In Read
23220  *			Keys structure (mhioc_inkeys_t)
23221  *		flag -	this argument is a pass through to ddi_copyxxx()
23222  *			directly from the mode argument of ioctl().
23223  *
23224  * Return Code: 0   - Success
23225  *		EACCES
23226  *		ENOTSUP
23227  *		errno return code from sd_send_scsi_cmd()
23228  *
23229  *     Context: Can sleep. Does not return until command is completed.
23230  */
23231 
23232 static int
23233 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
23234     mhioc_inresvs_t *usrp, int flag)
23235 {
23236 #ifdef _MULTI_DATAMODEL
23237 	struct mhioc_resv_desc_list32 resvlist32;
23238 #endif
23239 	sd_prin_readresv_t	*in;
23240 	mhioc_inresvs_t		*ptr;
23241 	sd_readresv_desc_t	*readresv_ptr;
23242 	mhioc_resv_desc_list_t	resvlist;
23243 	mhioc_resv_desc_t 	resvdesc;
23244 	uchar_t			*data_bufp;
23245 	int 			data_len;
23246 	int			rval;
23247 	int			i;
23248 	size_t			copysz;
23249 	mhioc_resv_desc_t	*bufp;
23250 
23251 	if ((ptr = usrp) == NULL) {
23252 		return (EINVAL);
23253 	}
23254 
23255 	/*
23256 	 * Get the listsize from user
23257 	 */
23258 #ifdef _MULTI_DATAMODEL
23259 	switch (ddi_model_convert_from(flag & FMODELS)) {
23260 	case DDI_MODEL_ILP32:
23261 		copysz = sizeof (struct mhioc_resv_desc_list32);
23262 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
23263 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23264 			    "sd_persistent_reservation_in_read_resv: "
23265 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
23266 			rval = EFAULT;
23267 			goto done;
23268 		}
23269 		resvlist.listsize = resvlist32.listsize;
23270 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
23271 		break;
23272 
23273 	case DDI_MODEL_NONE:
23274 		copysz = sizeof (mhioc_resv_desc_list_t);
23275 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
23276 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23277 			    "sd_persistent_reservation_in_read_resv: "
23278 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
23279 			rval = EFAULT;
23280 			goto done;
23281 		}
23282 		break;
23283 	}
23284 #else /* ! _MULTI_DATAMODEL */
23285 	copysz = sizeof (mhioc_resv_desc_list_t);
23286 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
23287 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23288 		    "sd_persistent_reservation_in_read_resv: "
23289 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
23290 		rval = EFAULT;
23291 		goto done;
23292 	}
23293 #endif /* ! _MULTI_DATAMODEL */
23294 
23295 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
23296 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
23297 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
23298 
23299 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_RESV,
23300 	    data_len, data_bufp)) != 0) {
23301 		goto done;
23302 	}
23303 	in = (sd_prin_readresv_t *)data_bufp;
23304 	ptr->generation = BE_32(in->generation);
23305 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
23306 
23307 	/*
23308 	 * Return the min(listsize, listlen( keys
23309 	 */
23310 #ifdef _MULTI_DATAMODEL
23311 
23312 	switch (ddi_model_convert_from(flag & FMODELS)) {
23313 	case DDI_MODEL_ILP32:
23314 		resvlist32.listlen = resvlist.listlen;
23315 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
23316 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23317 			    "sd_persistent_reservation_in_read_resv: "
23318 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
23319 			rval = EFAULT;
23320 			goto done;
23321 		}
23322 		break;
23323 
23324 	case DDI_MODEL_NONE:
23325 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
23326 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23327 			    "sd_persistent_reservation_in_read_resv: "
23328 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
23329 			rval = EFAULT;
23330 			goto done;
23331 		}
23332 		break;
23333 	}
23334 
23335 #else /* ! _MULTI_DATAMODEL */
23336 
23337 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
23338 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23339 		    "sd_persistent_reservation_in_read_resv: "
23340 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
23341 		rval = EFAULT;
23342 		goto done;
23343 	}
23344 
23345 #endif /* ! _MULTI_DATAMODEL */
23346 
23347 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
23348 	bufp = resvlist.list;
23349 	copysz = sizeof (mhioc_resv_desc_t);
23350 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
23351 	    i++, readresv_ptr++, bufp++) {
23352 
23353 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
23354 		    MHIOC_RESV_KEY_SIZE);
23355 		resvdesc.type  = readresv_ptr->type;
23356 		resvdesc.scope = readresv_ptr->scope;
23357 		resvdesc.scope_specific_addr =
23358 		    BE_32(readresv_ptr->scope_specific_addr);
23359 
23360 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
23361 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23362 			    "sd_persistent_reservation_in_read_resv: "
23363 			    "failed ddi_copyout: resvlist\n");
23364 			rval = EFAULT;
23365 			goto done;
23366 		}
23367 	}
23368 done:
23369 	kmem_free(data_bufp, data_len);
23370 	return (rval);
23371 }
23372 
23373 
23374 /*
23375  *    Function: sr_change_blkmode()
23376  *
23377  * Description: This routine is the driver entry point for handling CD-ROM
23378  *		block mode ioctl requests. Support for returning and changing
23379  *		the current block size in use by the device is implemented. The
23380  *		LBA size is changed via a MODE SELECT Block Descriptor.
23381  *
23382  *		This routine issues a mode sense with an allocation length of
23383  *		12 bytes for the mode page header and a single block descriptor.
23384  *
23385  *   Arguments: dev - the device 'dev_t'
23386  *		cmd - the request type; one of CDROMGBLKMODE (get) or
23387  *		      CDROMSBLKMODE (set)
23388  *		data - current block size or requested block size
23389  *		flag - this argument is a pass through to ddi_copyxxx() directly
23390  *		       from the mode argument of ioctl().
23391  *
23392  * Return Code: the code returned by sd_send_scsi_cmd()
23393  *		EINVAL if invalid arguments are provided
23394  *		EFAULT if ddi_copyxxx() fails
23395  *		ENXIO if fail ddi_get_soft_state
23396  *		EIO if invalid mode sense block descriptor length
23397  *
23398  */
23399 
23400 static int
23401 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
23402 {
23403 	struct sd_lun			*un = NULL;
23404 	struct mode_header		*sense_mhp, *select_mhp;
23405 	struct block_descriptor		*sense_desc, *select_desc;
23406 	int				current_bsize;
23407 	int				rval = EINVAL;
23408 	uchar_t				*sense = NULL;
23409 	uchar_t				*select = NULL;
23410 
23411 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
23412 
23413 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23414 		return (ENXIO);
23415 	}
23416 
23417 	/*
23418 	 * The block length is changed via the Mode Select block descriptor, the
23419 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
23420 	 * required as part of this routine. Therefore the mode sense allocation
23421 	 * length is specified to be the length of a mode page header and a
23422 	 * block descriptor.
23423 	 */
23424 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
23425 
23426 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
23427 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD)) != 0) {
23428 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23429 		    "sr_change_blkmode: Mode Sense Failed\n");
23430 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23431 		return (rval);
23432 	}
23433 
23434 	/* Check the block descriptor len to handle only 1 block descriptor */
23435 	sense_mhp = (struct mode_header *)sense;
23436 	if ((sense_mhp->bdesc_length == 0) ||
23437 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
23438 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23439 		    "sr_change_blkmode: Mode Sense returned invalid block"
23440 		    " descriptor length\n");
23441 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23442 		return (EIO);
23443 	}
23444 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
23445 	current_bsize = ((sense_desc->blksize_hi << 16) |
23446 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
23447 
23448 	/* Process command */
23449 	switch (cmd) {
23450 	case CDROMGBLKMODE:
23451 		/* Return the block size obtained during the mode sense */
23452 		if (ddi_copyout(&current_bsize, (void *)data,
23453 		    sizeof (int), flag) != 0)
23454 			rval = EFAULT;
23455 		break;
23456 	case CDROMSBLKMODE:
23457 		/* Validate the requested block size */
23458 		switch (data) {
23459 		case CDROM_BLK_512:
23460 		case CDROM_BLK_1024:
23461 		case CDROM_BLK_2048:
23462 		case CDROM_BLK_2056:
23463 		case CDROM_BLK_2336:
23464 		case CDROM_BLK_2340:
23465 		case CDROM_BLK_2352:
23466 		case CDROM_BLK_2368:
23467 		case CDROM_BLK_2448:
23468 		case CDROM_BLK_2646:
23469 		case CDROM_BLK_2647:
23470 			break;
23471 		default:
23472 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23473 			    "sr_change_blkmode: "
23474 			    "Block Size '%ld' Not Supported\n", data);
23475 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23476 			return (EINVAL);
23477 		}
23478 
23479 		/*
23480 		 * The current block size matches the requested block size so
23481 		 * there is no need to send the mode select to change the size
23482 		 */
23483 		if (current_bsize == data) {
23484 			break;
23485 		}
23486 
23487 		/* Build the select data for the requested block size */
23488 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
23489 		select_mhp = (struct mode_header *)select;
23490 		select_desc =
23491 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
23492 		/*
23493 		 * The LBA size is changed via the block descriptor, so the
23494 		 * descriptor is built according to the user data
23495 		 */
23496 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
23497 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
23498 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
23499 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
23500 
23501 		/* Send the mode select for the requested block size */
23502 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
23503 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
23504 		    SD_PATH_STANDARD)) != 0) {
23505 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23506 			    "sr_change_blkmode: Mode Select Failed\n");
23507 			/*
23508 			 * The mode select failed for the requested block size,
23509 			 * so reset the data for the original block size and
23510 			 * send it to the target. The error is indicated by the
23511 			 * return value for the failed mode select.
23512 			 */
23513 			select_desc->blksize_hi  = sense_desc->blksize_hi;
23514 			select_desc->blksize_mid = sense_desc->blksize_mid;
23515 			select_desc->blksize_lo  = sense_desc->blksize_lo;
23516 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
23517 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
23518 			    SD_PATH_STANDARD);
23519 		} else {
23520 			ASSERT(!mutex_owned(SD_MUTEX(un)));
23521 			mutex_enter(SD_MUTEX(un));
23522 			sd_update_block_info(un, (uint32_t)data, 0);
23523 			mutex_exit(SD_MUTEX(un));
23524 		}
23525 		break;
23526 	default:
23527 		/* should not reach here, but check anyway */
23528 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23529 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
23530 		rval = EINVAL;
23531 		break;
23532 	}
23533 
23534 	if (select) {
23535 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
23536 	}
23537 	if (sense) {
23538 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23539 	}
23540 	return (rval);
23541 }
23542 
23543 
23544 /*
23545  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
23546  * implement driver support for getting and setting the CD speed. The command
23547  * set used will be based on the device type. If the device has not been
23548  * identified as MMC the Toshiba vendor specific mode page will be used. If
23549  * the device is MMC but does not support the Real Time Streaming feature
23550  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
23551  * be used to read the speed.
23552  */
23553 
23554 /*
23555  *    Function: sr_change_speed()
23556  *
23557  * Description: This routine is the driver entry point for handling CD-ROM
23558  *		drive speed ioctl requests for devices supporting the Toshiba
23559  *		vendor specific drive speed mode page. Support for returning
23560  *		and changing the current drive speed in use by the device is
23561  *		implemented.
23562  *
23563  *   Arguments: dev - the device 'dev_t'
23564  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
23565  *		      CDROMSDRVSPEED (set)
23566  *		data - current drive speed or requested drive speed
23567  *		flag - this argument is a pass through to ddi_copyxxx() directly
23568  *		       from the mode argument of ioctl().
23569  *
23570  * Return Code: the code returned by sd_send_scsi_cmd()
23571  *		EINVAL if invalid arguments are provided
23572  *		EFAULT if ddi_copyxxx() fails
23573  *		ENXIO if fail ddi_get_soft_state
23574  *		EIO if invalid mode sense block descriptor length
23575  */
23576 
23577 static int
23578 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
23579 {
23580 	struct sd_lun			*un = NULL;
23581 	struct mode_header		*sense_mhp, *select_mhp;
23582 	struct mode_speed		*sense_page, *select_page;
23583 	int				current_speed;
23584 	int				rval = EINVAL;
23585 	int				bd_len;
23586 	uchar_t				*sense = NULL;
23587 	uchar_t				*select = NULL;
23588 
23589 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
23590 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23591 		return (ENXIO);
23592 	}
23593 
23594 	/*
23595 	 * Note: The drive speed is being modified here according to a Toshiba
23596 	 * vendor specific mode page (0x31).
23597 	 */
23598 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
23599 
23600 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
23601 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
23602 	    SD_PATH_STANDARD)) != 0) {
23603 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23604 		    "sr_change_speed: Mode Sense Failed\n");
23605 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23606 		return (rval);
23607 	}
23608 	sense_mhp  = (struct mode_header *)sense;
23609 
23610 	/* Check the block descriptor len to handle only 1 block descriptor */
23611 	bd_len = sense_mhp->bdesc_length;
23612 	if (bd_len > MODE_BLK_DESC_LENGTH) {
23613 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23614 		    "sr_change_speed: Mode Sense returned invalid block "
23615 		    "descriptor length\n");
23616 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23617 		return (EIO);
23618 	}
23619 
23620 	sense_page = (struct mode_speed *)
23621 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
23622 	current_speed = sense_page->speed;
23623 
23624 	/* Process command */
23625 	switch (cmd) {
23626 	case CDROMGDRVSPEED:
23627 		/* Return the drive speed obtained during the mode sense */
23628 		if (current_speed == 0x2) {
23629 			current_speed = CDROM_TWELVE_SPEED;
23630 		}
23631 		if (ddi_copyout(&current_speed, (void *)data,
23632 		    sizeof (int), flag) != 0) {
23633 			rval = EFAULT;
23634 		}
23635 		break;
23636 	case CDROMSDRVSPEED:
23637 		/* Validate the requested drive speed */
23638 		switch ((uchar_t)data) {
23639 		case CDROM_TWELVE_SPEED:
23640 			data = 0x2;
23641 			/*FALLTHROUGH*/
23642 		case CDROM_NORMAL_SPEED:
23643 		case CDROM_DOUBLE_SPEED:
23644 		case CDROM_QUAD_SPEED:
23645 		case CDROM_MAXIMUM_SPEED:
23646 			break;
23647 		default:
23648 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23649 			    "sr_change_speed: "
23650 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
23651 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23652 			return (EINVAL);
23653 		}
23654 
23655 		/*
23656 		 * The current drive speed matches the requested drive speed so
23657 		 * there is no need to send the mode select to change the speed
23658 		 */
23659 		if (current_speed == data) {
23660 			break;
23661 		}
23662 
23663 		/* Build the select data for the requested drive speed */
23664 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
23665 		select_mhp = (struct mode_header *)select;
23666 		select_mhp->bdesc_length = 0;
23667 		select_page =
23668 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
23669 		select_page =
23670 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
23671 		select_page->mode_page.code = CDROM_MODE_SPEED;
23672 		select_page->mode_page.length = 2;
23673 		select_page->speed = (uchar_t)data;
23674 
23675 		/* Send the mode select for the requested block size */
23676 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
23677 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
23678 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
23679 			/*
23680 			 * The mode select failed for the requested drive speed,
23681 			 * so reset the data for the original drive speed and
23682 			 * send it to the target. The error is indicated by the
23683 			 * return value for the failed mode select.
23684 			 */
23685 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23686 			    "sr_drive_speed: Mode Select Failed\n");
23687 			select_page->speed = sense_page->speed;
23688 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
23689 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
23690 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
23691 		}
23692 		break;
23693 	default:
23694 		/* should not reach here, but check anyway */
23695 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23696 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
23697 		rval = EINVAL;
23698 		break;
23699 	}
23700 
23701 	if (select) {
23702 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
23703 	}
23704 	if (sense) {
23705 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23706 	}
23707 
23708 	return (rval);
23709 }
23710 
23711 
23712 /*
23713  *    Function: sr_atapi_change_speed()
23714  *
23715  * Description: This routine is the driver entry point for handling CD-ROM
23716  *		drive speed ioctl requests for MMC devices that do not support
23717  *		the Real Time Streaming feature (0x107).
23718  *
23719  *		Note: This routine will use the SET SPEED command which may not
23720  *		be supported by all devices.
23721  *
23722  *   Arguments: dev- the device 'dev_t'
23723  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
23724  *		     CDROMSDRVSPEED (set)
23725  *		data- current drive speed or requested drive speed
23726  *		flag- this argument is a pass through to ddi_copyxxx() directly
23727  *		      from the mode argument of ioctl().
23728  *
23729  * Return Code: the code returned by sd_send_scsi_cmd()
23730  *		EINVAL if invalid arguments are provided
23731  *		EFAULT if ddi_copyxxx() fails
23732  *		ENXIO if fail ddi_get_soft_state
23733  *		EIO if invalid mode sense block descriptor length
23734  */
23735 
23736 static int
23737 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
23738 {
23739 	struct sd_lun			*un;
23740 	struct uscsi_cmd		*com = NULL;
23741 	struct mode_header_grp2		*sense_mhp;
23742 	uchar_t				*sense_page;
23743 	uchar_t				*sense = NULL;
23744 	char				cdb[CDB_GROUP5];
23745 	int				bd_len;
23746 	int				current_speed = 0;
23747 	int				max_speed = 0;
23748 	int				rval;
23749 
23750 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
23751 
23752 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23753 		return (ENXIO);
23754 	}
23755 
23756 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
23757 
23758 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
23759 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
23760 	    SD_PATH_STANDARD)) != 0) {
23761 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23762 		    "sr_atapi_change_speed: Mode Sense Failed\n");
23763 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23764 		return (rval);
23765 	}
23766 
23767 	/* Check the block descriptor len to handle only 1 block descriptor */
23768 	sense_mhp = (struct mode_header_grp2 *)sense;
23769 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
23770 	if (bd_len > MODE_BLK_DESC_LENGTH) {
23771 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23772 		    "sr_atapi_change_speed: Mode Sense returned invalid "
23773 		    "block descriptor length\n");
23774 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23775 		return (EIO);
23776 	}
23777 
23778 	/* Calculate the current and maximum drive speeds */
23779 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
23780 	current_speed = (sense_page[14] << 8) | sense_page[15];
23781 	max_speed = (sense_page[8] << 8) | sense_page[9];
23782 
23783 	/* Process the command */
23784 	switch (cmd) {
23785 	case CDROMGDRVSPEED:
23786 		current_speed /= SD_SPEED_1X;
23787 		if (ddi_copyout(&current_speed, (void *)data,
23788 		    sizeof (int), flag) != 0)
23789 			rval = EFAULT;
23790 		break;
23791 	case CDROMSDRVSPEED:
23792 		/* Convert the speed code to KB/sec */
23793 		switch ((uchar_t)data) {
23794 		case CDROM_NORMAL_SPEED:
23795 			current_speed = SD_SPEED_1X;
23796 			break;
23797 		case CDROM_DOUBLE_SPEED:
23798 			current_speed = 2 * SD_SPEED_1X;
23799 			break;
23800 		case CDROM_QUAD_SPEED:
23801 			current_speed = 4 * SD_SPEED_1X;
23802 			break;
23803 		case CDROM_TWELVE_SPEED:
23804 			current_speed = 12 * SD_SPEED_1X;
23805 			break;
23806 		case CDROM_MAXIMUM_SPEED:
23807 			current_speed = 0xffff;
23808 			break;
23809 		default:
23810 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23811 			    "sr_atapi_change_speed: invalid drive speed %d\n",
23812 			    (uchar_t)data);
23813 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23814 			return (EINVAL);
23815 		}
23816 
23817 		/* Check the request against the drive's max speed. */
23818 		if (current_speed != 0xffff) {
23819 			if (current_speed > max_speed) {
23820 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23821 				return (EINVAL);
23822 			}
23823 		}
23824 
23825 		/*
23826 		 * Build and send the SET SPEED command
23827 		 *
23828 		 * Note: The SET SPEED (0xBB) command used in this routine is
23829 		 * obsolete per the SCSI MMC spec but still supported in the
23830 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
23831 		 * therefore the command is still implemented in this routine.
23832 		 */
23833 		bzero(cdb, sizeof (cdb));
23834 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
23835 		cdb[2] = (uchar_t)(current_speed >> 8);
23836 		cdb[3] = (uchar_t)current_speed;
23837 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
23838 		com->uscsi_cdb	   = (caddr_t)cdb;
23839 		com->uscsi_cdblen  = CDB_GROUP5;
23840 		com->uscsi_bufaddr = NULL;
23841 		com->uscsi_buflen  = 0;
23842 		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
23843 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, 0, SD_PATH_STANDARD);
23844 		break;
23845 	default:
23846 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23847 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
23848 		rval = EINVAL;
23849 	}
23850 
23851 	if (sense) {
23852 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23853 	}
23854 	if (com) {
23855 		kmem_free(com, sizeof (*com));
23856 	}
23857 	return (rval);
23858 }
23859 
23860 
23861 /*
23862  *    Function: sr_pause_resume()
23863  *
23864  * Description: This routine is the driver entry point for handling CD-ROM
23865  *		pause/resume ioctl requests. This only affects the audio play
23866  *		operation.
23867  *
23868  *   Arguments: dev - the device 'dev_t'
23869  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
23870  *		      for setting the resume bit of the cdb.
23871  *
23872  * Return Code: the code returned by sd_send_scsi_cmd()
23873  *		EINVAL if invalid mode specified
23874  *
23875  */
23876 
23877 static int
23878 sr_pause_resume(dev_t dev, int cmd)
23879 {
23880 	struct sd_lun		*un;
23881 	struct uscsi_cmd	*com;
23882 	char			cdb[CDB_GROUP1];
23883 	int			rval;
23884 
23885 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23886 		return (ENXIO);
23887 	}
23888 
23889 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
23890 	bzero(cdb, CDB_GROUP1);
23891 	cdb[0] = SCMD_PAUSE_RESUME;
23892 	switch (cmd) {
23893 	case CDROMRESUME:
23894 		cdb[8] = 1;
23895 		break;
23896 	case CDROMPAUSE:
23897 		cdb[8] = 0;
23898 		break;
23899 	default:
23900 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
23901 		    " Command '%x' Not Supported\n", cmd);
23902 		rval = EINVAL;
23903 		goto done;
23904 	}
23905 
23906 	com->uscsi_cdb    = cdb;
23907 	com->uscsi_cdblen = CDB_GROUP1;
23908 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
23909 
23910 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
23911 	    SD_PATH_STANDARD);
23912 
23913 done:
23914 	kmem_free(com, sizeof (*com));
23915 	return (rval);
23916 }
23917 
23918 
23919 /*
23920  *    Function: sr_play_msf()
23921  *
23922  * Description: This routine is the driver entry point for handling CD-ROM
23923  *		ioctl requests to output the audio signals at the specified
23924  *		starting address and continue the audio play until the specified
23925  *		ending address (CDROMPLAYMSF) The address is in Minute Second
23926  *		Frame (MSF) format.
23927  *
23928  *   Arguments: dev	- the device 'dev_t'
23929  *		data	- pointer to user provided audio msf structure,
23930  *		          specifying start/end addresses.
23931  *		flag	- this argument is a pass through to ddi_copyxxx()
23932  *		          directly from the mode argument of ioctl().
23933  *
23934  * Return Code: the code returned by sd_send_scsi_cmd()
23935  *		EFAULT if ddi_copyxxx() fails
23936  *		ENXIO if fail ddi_get_soft_state
23937  *		EINVAL if data pointer is NULL
23938  */
23939 
23940 static int
23941 sr_play_msf(dev_t dev, caddr_t data, int flag)
23942 {
23943 	struct sd_lun		*un;
23944 	struct uscsi_cmd	*com;
23945 	struct cdrom_msf	msf_struct;
23946 	struct cdrom_msf	*msf = &msf_struct;
23947 	char			cdb[CDB_GROUP1];
23948 	int			rval;
23949 
23950 	if (data == NULL) {
23951 		return (EINVAL);
23952 	}
23953 
23954 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23955 		return (ENXIO);
23956 	}
23957 
23958 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
23959 		return (EFAULT);
23960 	}
23961 
23962 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
23963 	bzero(cdb, CDB_GROUP1);
23964 	cdb[0] = SCMD_PLAYAUDIO_MSF;
23965 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
23966 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
23967 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
23968 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
23969 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
23970 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
23971 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
23972 	} else {
23973 		cdb[3] = msf->cdmsf_min0;
23974 		cdb[4] = msf->cdmsf_sec0;
23975 		cdb[5] = msf->cdmsf_frame0;
23976 		cdb[6] = msf->cdmsf_min1;
23977 		cdb[7] = msf->cdmsf_sec1;
23978 		cdb[8] = msf->cdmsf_frame1;
23979 	}
23980 	com->uscsi_cdb    = cdb;
23981 	com->uscsi_cdblen = CDB_GROUP1;
23982 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
23983 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
23984 	    SD_PATH_STANDARD);
23985 	kmem_free(com, sizeof (*com));
23986 	return (rval);
23987 }
23988 
23989 
23990 /*
23991  *    Function: sr_play_trkind()
23992  *
23993  * Description: This routine is the driver entry point for handling CD-ROM
23994  *		ioctl requests to output the audio signals at the specified
23995  *		starting address and continue the audio play until the specified
23996  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
23997  *		format.
23998  *
23999  *   Arguments: dev	- the device 'dev_t'
24000  *		data	- pointer to user provided audio track/index structure,
24001  *		          specifying start/end addresses.
24002  *		flag	- this argument is a pass through to ddi_copyxxx()
24003  *		          directly from the mode argument of ioctl().
24004  *
24005  * Return Code: the code returned by sd_send_scsi_cmd()
24006  *		EFAULT if ddi_copyxxx() fails
24007  *		ENXIO if fail ddi_get_soft_state
24008  *		EINVAL if data pointer is NULL
24009  */
24010 
24011 static int
24012 sr_play_trkind(dev_t dev, caddr_t data, int flag)
24013 {
24014 	struct cdrom_ti		ti_struct;
24015 	struct cdrom_ti		*ti = &ti_struct;
24016 	struct uscsi_cmd	*com = NULL;
24017 	char			cdb[CDB_GROUP1];
24018 	int			rval;
24019 
24020 	if (data == NULL) {
24021 		return (EINVAL);
24022 	}
24023 
24024 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
24025 		return (EFAULT);
24026 	}
24027 
24028 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24029 	bzero(cdb, CDB_GROUP1);
24030 	cdb[0] = SCMD_PLAYAUDIO_TI;
24031 	cdb[4] = ti->cdti_trk0;
24032 	cdb[5] = ti->cdti_ind0;
24033 	cdb[7] = ti->cdti_trk1;
24034 	cdb[8] = ti->cdti_ind1;
24035 	com->uscsi_cdb    = cdb;
24036 	com->uscsi_cdblen = CDB_GROUP1;
24037 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
24038 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24039 	    SD_PATH_STANDARD);
24040 	kmem_free(com, sizeof (*com));
24041 	return (rval);
24042 }
24043 
24044 
24045 /*
24046  *    Function: sr_read_all_subcodes()
24047  *
24048  * Description: This routine is the driver entry point for handling CD-ROM
24049  *		ioctl requests to return raw subcode data while the target is
24050  *		playing audio (CDROMSUBCODE).
24051  *
24052  *   Arguments: dev	- the device 'dev_t'
24053  *		data	- pointer to user provided cdrom subcode structure,
24054  *		          specifying the transfer length and address.
24055  *		flag	- this argument is a pass through to ddi_copyxxx()
24056  *		          directly from the mode argument of ioctl().
24057  *
24058  * Return Code: the code returned by sd_send_scsi_cmd()
24059  *		EFAULT if ddi_copyxxx() fails
24060  *		ENXIO if fail ddi_get_soft_state
24061  *		EINVAL if data pointer is NULL
24062  */
24063 
24064 static int
24065 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
24066 {
24067 	struct sd_lun		*un = NULL;
24068 	struct uscsi_cmd	*com = NULL;
24069 	struct cdrom_subcode	*subcode = NULL;
24070 	int			rval;
24071 	size_t			buflen;
24072 	char			cdb[CDB_GROUP5];
24073 
24074 #ifdef _MULTI_DATAMODEL
24075 	/* To support ILP32 applications in an LP64 world */
24076 	struct cdrom_subcode32		cdrom_subcode32;
24077 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
24078 #endif
24079 	if (data == NULL) {
24080 		return (EINVAL);
24081 	}
24082 
24083 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24084 		return (ENXIO);
24085 	}
24086 
24087 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
24088 
24089 #ifdef _MULTI_DATAMODEL
24090 	switch (ddi_model_convert_from(flag & FMODELS)) {
24091 	case DDI_MODEL_ILP32:
24092 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
24093 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24094 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
24095 			kmem_free(subcode, sizeof (struct cdrom_subcode));
24096 			return (EFAULT);
24097 		}
24098 		/* Convert the ILP32 uscsi data from the application to LP64 */
24099 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
24100 		break;
24101 	case DDI_MODEL_NONE:
24102 		if (ddi_copyin(data, subcode,
24103 		    sizeof (struct cdrom_subcode), flag)) {
24104 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24105 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
24106 			kmem_free(subcode, sizeof (struct cdrom_subcode));
24107 			return (EFAULT);
24108 		}
24109 		break;
24110 	}
24111 #else /* ! _MULTI_DATAMODEL */
24112 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
24113 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24114 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
24115 		kmem_free(subcode, sizeof (struct cdrom_subcode));
24116 		return (EFAULT);
24117 	}
24118 #endif /* _MULTI_DATAMODEL */
24119 
24120 	/*
24121 	 * Since MMC-2 expects max 3 bytes for length, check if the
24122 	 * length input is greater than 3 bytes
24123 	 */
24124 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
24125 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24126 		    "sr_read_all_subcodes: "
24127 		    "cdrom transfer length too large: %d (limit %d)\n",
24128 		    subcode->cdsc_length, 0xFFFFFF);
24129 		kmem_free(subcode, sizeof (struct cdrom_subcode));
24130 		return (EINVAL);
24131 	}
24132 
24133 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
24134 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24135 	bzero(cdb, CDB_GROUP5);
24136 
24137 	if (un->un_f_mmc_cap == TRUE) {
24138 		cdb[0] = (char)SCMD_READ_CD;
24139 		cdb[2] = (char)0xff;
24140 		cdb[3] = (char)0xff;
24141 		cdb[4] = (char)0xff;
24142 		cdb[5] = (char)0xff;
24143 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
24144 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
24145 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
24146 		cdb[10] = 1;
24147 	} else {
24148 		/*
24149 		 * Note: A vendor specific command (0xDF) is being used her to
24150 		 * request a read of all subcodes.
24151 		 */
24152 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
24153 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
24154 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
24155 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
24156 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
24157 	}
24158 	com->uscsi_cdb	   = cdb;
24159 	com->uscsi_cdblen  = CDB_GROUP5;
24160 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
24161 	com->uscsi_buflen  = buflen;
24162 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24163 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
24164 	    SD_PATH_STANDARD);
24165 	kmem_free(subcode, sizeof (struct cdrom_subcode));
24166 	kmem_free(com, sizeof (*com));
24167 	return (rval);
24168 }
24169 
24170 
24171 /*
24172  *    Function: sr_read_subchannel()
24173  *
24174  * Description: This routine is the driver entry point for handling CD-ROM
24175  *		ioctl requests to return the Q sub-channel data of the CD
24176  *		current position block. (CDROMSUBCHNL) The data includes the
24177  *		track number, index number, absolute CD-ROM address (LBA or MSF
24178  *		format per the user) , track relative CD-ROM address (LBA or MSF
24179  *		format per the user), control data and audio status.
24180  *
24181  *   Arguments: dev	- the device 'dev_t'
24182  *		data	- pointer to user provided cdrom sub-channel structure
24183  *		flag	- this argument is a pass through to ddi_copyxxx()
24184  *		          directly from the mode argument of ioctl().
24185  *
24186  * Return Code: the code returned by sd_send_scsi_cmd()
24187  *		EFAULT if ddi_copyxxx() fails
24188  *		ENXIO if fail ddi_get_soft_state
24189  *		EINVAL if data pointer is NULL
24190  */
24191 
24192 static int
24193 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
24194 {
24195 	struct sd_lun		*un;
24196 	struct uscsi_cmd	*com;
24197 	struct cdrom_subchnl	subchanel;
24198 	struct cdrom_subchnl	*subchnl = &subchanel;
24199 	char			cdb[CDB_GROUP1];
24200 	caddr_t			buffer;
24201 	int			rval;
24202 
24203 	if (data == NULL) {
24204 		return (EINVAL);
24205 	}
24206 
24207 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24208 	    (un->un_state == SD_STATE_OFFLINE)) {
24209 		return (ENXIO);
24210 	}
24211 
24212 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
24213 		return (EFAULT);
24214 	}
24215 
24216 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
24217 	bzero(cdb, CDB_GROUP1);
24218 	cdb[0] = SCMD_READ_SUBCHANNEL;
24219 	/* Set the MSF bit based on the user requested address format */
24220 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
24221 	/*
24222 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
24223 	 * returned
24224 	 */
24225 	cdb[2] = 0x40;
24226 	/*
24227 	 * Set byte 3 to specify the return data format. A value of 0x01
24228 	 * indicates that the CD-ROM current position should be returned.
24229 	 */
24230 	cdb[3] = 0x01;
24231 	cdb[8] = 0x10;
24232 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24233 	com->uscsi_cdb	   = cdb;
24234 	com->uscsi_cdblen  = CDB_GROUP1;
24235 	com->uscsi_bufaddr = buffer;
24236 	com->uscsi_buflen  = 16;
24237 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24238 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24239 	    SD_PATH_STANDARD);
24240 	if (rval != 0) {
24241 		kmem_free(buffer, 16);
24242 		kmem_free(com, sizeof (*com));
24243 		return (rval);
24244 	}
24245 
24246 	/* Process the returned Q sub-channel data */
24247 	subchnl->cdsc_audiostatus = buffer[1];
24248 	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
24249 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
24250 	subchnl->cdsc_trk	= buffer[6];
24251 	subchnl->cdsc_ind	= buffer[7];
24252 	if (subchnl->cdsc_format & CDROM_LBA) {
24253 		subchnl->cdsc_absaddr.lba =
24254 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
24255 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
24256 		subchnl->cdsc_reladdr.lba =
24257 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
24258 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
24259 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
24260 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
24261 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
24262 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
24263 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
24264 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
24265 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
24266 	} else {
24267 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
24268 		subchnl->cdsc_absaddr.msf.second = buffer[10];
24269 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
24270 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
24271 		subchnl->cdsc_reladdr.msf.second = buffer[14];
24272 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
24273 	}
24274 	kmem_free(buffer, 16);
24275 	kmem_free(com, sizeof (*com));
24276 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
24277 	    != 0) {
24278 		return (EFAULT);
24279 	}
24280 	return (rval);
24281 }
24282 
24283 
24284 /*
24285  *    Function: sr_read_tocentry()
24286  *
24287  * Description: This routine is the driver entry point for handling CD-ROM
24288  *		ioctl requests to read from the Table of Contents (TOC)
24289  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
24290  *		fields, the starting address (LBA or MSF format per the user)
24291  *		and the data mode if the user specified track is a data track.
24292  *
24293  *		Note: The READ HEADER (0x44) command used in this routine is
24294  *		obsolete per the SCSI MMC spec but still supported in the
24295  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
24296  *		therefore the command is still implemented in this routine.
24297  *
24298  *   Arguments: dev	- the device 'dev_t'
24299  *		data	- pointer to user provided toc entry structure,
24300  *			  specifying the track # and the address format
24301  *			  (LBA or MSF).
24302  *		flag	- this argument is a pass through to ddi_copyxxx()
24303  *		          directly from the mode argument of ioctl().
24304  *
24305  * Return Code: the code returned by sd_send_scsi_cmd()
24306  *		EFAULT if ddi_copyxxx() fails
24307  *		ENXIO if fail ddi_get_soft_state
24308  *		EINVAL if data pointer is NULL
24309  */
24310 
24311 static int
24312 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
24313 {
24314 	struct sd_lun		*un = NULL;
24315 	struct uscsi_cmd	*com;
24316 	struct cdrom_tocentry	toc_entry;
24317 	struct cdrom_tocentry	*entry = &toc_entry;
24318 	caddr_t			buffer;
24319 	int			rval;
24320 	char			cdb[CDB_GROUP1];
24321 
24322 	if (data == NULL) {
24323 		return (EINVAL);
24324 	}
24325 
24326 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24327 	    (un->un_state == SD_STATE_OFFLINE)) {
24328 		return (ENXIO);
24329 	}
24330 
24331 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
24332 		return (EFAULT);
24333 	}
24334 
24335 	/* Validate the requested track and address format */
24336 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
24337 		return (EINVAL);
24338 	}
24339 
24340 	if (entry->cdte_track == 0) {
24341 		return (EINVAL);
24342 	}
24343 
24344 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
24345 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24346 	bzero(cdb, CDB_GROUP1);
24347 
24348 	cdb[0] = SCMD_READ_TOC;
24349 	/* Set the MSF bit based on the user requested address format  */
24350 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
24351 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
24352 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
24353 	} else {
24354 		cdb[6] = entry->cdte_track;
24355 	}
24356 
24357 	/*
24358 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
24359 	 * (4 byte TOC response header + 8 byte track descriptor)
24360 	 */
24361 	cdb[8] = 12;
24362 	com->uscsi_cdb	   = cdb;
24363 	com->uscsi_cdblen  = CDB_GROUP1;
24364 	com->uscsi_bufaddr = buffer;
24365 	com->uscsi_buflen  = 0x0C;
24366 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
24367 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24368 	    SD_PATH_STANDARD);
24369 	if (rval != 0) {
24370 		kmem_free(buffer, 12);
24371 		kmem_free(com, sizeof (*com));
24372 		return (rval);
24373 	}
24374 
24375 	/* Process the toc entry */
24376 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
24377 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
24378 	if (entry->cdte_format & CDROM_LBA) {
24379 		entry->cdte_addr.lba =
24380 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
24381 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
24382 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
24383 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
24384 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
24385 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
24386 		/*
24387 		 * Send a READ TOC command using the LBA address format to get
24388 		 * the LBA for the track requested so it can be used in the
24389 		 * READ HEADER request
24390 		 *
24391 		 * Note: The MSF bit of the READ HEADER command specifies the
24392 		 * output format. The block address specified in that command
24393 		 * must be in LBA format.
24394 		 */
24395 		cdb[1] = 0;
24396 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24397 		    SD_PATH_STANDARD);
24398 		if (rval != 0) {
24399 			kmem_free(buffer, 12);
24400 			kmem_free(com, sizeof (*com));
24401 			return (rval);
24402 		}
24403 	} else {
24404 		entry->cdte_addr.msf.minute	= buffer[9];
24405 		entry->cdte_addr.msf.second	= buffer[10];
24406 		entry->cdte_addr.msf.frame	= buffer[11];
24407 		/*
24408 		 * Send a READ TOC command using the LBA address format to get
24409 		 * the LBA for the track requested so it can be used in the
24410 		 * READ HEADER request
24411 		 *
24412 		 * Note: The MSF bit of the READ HEADER command specifies the
24413 		 * output format. The block address specified in that command
24414 		 * must be in LBA format.
24415 		 */
24416 		cdb[1] = 0;
24417 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24418 		    SD_PATH_STANDARD);
24419 		if (rval != 0) {
24420 			kmem_free(buffer, 12);
24421 			kmem_free(com, sizeof (*com));
24422 			return (rval);
24423 		}
24424 	}
24425 
24426 	/*
24427 	 * Build and send the READ HEADER command to determine the data mode of
24428 	 * the user specified track.
24429 	 */
24430 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
24431 	    (entry->cdte_track != CDROM_LEADOUT)) {
24432 		bzero(cdb, CDB_GROUP1);
24433 		cdb[0] = SCMD_READ_HEADER;
24434 		cdb[2] = buffer[8];
24435 		cdb[3] = buffer[9];
24436 		cdb[4] = buffer[10];
24437 		cdb[5] = buffer[11];
24438 		cdb[8] = 0x08;
24439 		com->uscsi_buflen = 0x08;
24440 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24441 		    SD_PATH_STANDARD);
24442 		if (rval == 0) {
24443 			entry->cdte_datamode = buffer[0];
24444 		} else {
24445 			/*
24446 			 * READ HEADER command failed, since this is
24447 			 * obsoleted in one spec, its better to return
24448 			 * -1 for an invlid track so that we can still
24449 			 * recieve the rest of the TOC data.
24450 			 */
24451 			entry->cdte_datamode = (uchar_t)-1;
24452 		}
24453 	} else {
24454 		entry->cdte_datamode = (uchar_t)-1;
24455 	}
24456 
24457 	kmem_free(buffer, 12);
24458 	kmem_free(com, sizeof (*com));
24459 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
24460 		return (EFAULT);
24461 
24462 	return (rval);
24463 }
24464 
24465 
24466 /*
24467  *    Function: sr_read_tochdr()
24468  *
24469  * Description: This routine is the driver entry point for handling CD-ROM
24470  * 		ioctl requests to read the Table of Contents (TOC) header
24471  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
24472  *		and ending track numbers
24473  *
24474  *   Arguments: dev	- the device 'dev_t'
24475  *		data	- pointer to user provided toc header structure,
24476  *			  specifying the starting and ending track numbers.
24477  *		flag	- this argument is a pass through to ddi_copyxxx()
24478  *			  directly from the mode argument of ioctl().
24479  *
24480  * Return Code: the code returned by sd_send_scsi_cmd()
24481  *		EFAULT if ddi_copyxxx() fails
24482  *		ENXIO if fail ddi_get_soft_state
24483  *		EINVAL if data pointer is NULL
24484  */
24485 
24486 static int
24487 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
24488 {
24489 	struct sd_lun		*un;
24490 	struct uscsi_cmd	*com;
24491 	struct cdrom_tochdr	toc_header;
24492 	struct cdrom_tochdr	*hdr = &toc_header;
24493 	char			cdb[CDB_GROUP1];
24494 	int			rval;
24495 	caddr_t			buffer;
24496 
24497 	if (data == NULL) {
24498 		return (EINVAL);
24499 	}
24500 
24501 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24502 	    (un->un_state == SD_STATE_OFFLINE)) {
24503 		return (ENXIO);
24504 	}
24505 
24506 	buffer = kmem_zalloc(4, KM_SLEEP);
24507 	bzero(cdb, CDB_GROUP1);
24508 	cdb[0] = SCMD_READ_TOC;
24509 	/*
24510 	 * Specifying a track number of 0x00 in the READ TOC command indicates
24511 	 * that the TOC header should be returned
24512 	 */
24513 	cdb[6] = 0x00;
24514 	/*
24515 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
24516 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
24517 	 */
24518 	cdb[8] = 0x04;
24519 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24520 	com->uscsi_cdb	   = cdb;
24521 	com->uscsi_cdblen  = CDB_GROUP1;
24522 	com->uscsi_bufaddr = buffer;
24523 	com->uscsi_buflen  = 0x04;
24524 	com->uscsi_timeout = 300;
24525 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24526 
24527 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24528 	    SD_PATH_STANDARD);
24529 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
24530 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
24531 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
24532 	} else {
24533 		hdr->cdth_trk0 = buffer[2];
24534 		hdr->cdth_trk1 = buffer[3];
24535 	}
24536 	kmem_free(buffer, 4);
24537 	kmem_free(com, sizeof (*com));
24538 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
24539 		return (EFAULT);
24540 	}
24541 	return (rval);
24542 }
24543 
24544 
24545 /*
24546  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
24547  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
24548  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
24549  * digital audio and extended architecture digital audio. These modes are
24550  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
24551  * MMC specs.
24552  *
24553  * In addition to support for the various data formats these routines also
24554  * include support for devices that implement only the direct access READ
24555  * commands (0x08, 0x28), devices that implement the READ_CD commands
24556  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
24557  * READ CDXA commands (0xD8, 0xDB)
24558  */
24559 
24560 /*
24561  *    Function: sr_read_mode1()
24562  *
24563  * Description: This routine is the driver entry point for handling CD-ROM
24564  *		ioctl read mode1 requests (CDROMREADMODE1).
24565  *
24566  *   Arguments: dev	- the device 'dev_t'
24567  *		data	- pointer to user provided cd read structure specifying
24568  *			  the lba buffer address and length.
24569  *		flag	- this argument is a pass through to ddi_copyxxx()
24570  *			  directly from the mode argument of ioctl().
24571  *
24572  * Return Code: the code returned by sd_send_scsi_cmd()
24573  *		EFAULT if ddi_copyxxx() fails
24574  *		ENXIO if fail ddi_get_soft_state
24575  *		EINVAL if data pointer is NULL
24576  */
24577 
24578 static int
24579 sr_read_mode1(dev_t dev, caddr_t data, int flag)
24580 {
24581 	struct sd_lun		*un;
24582 	struct cdrom_read	mode1_struct;
24583 	struct cdrom_read	*mode1 = &mode1_struct;
24584 	int			rval;
24585 #ifdef _MULTI_DATAMODEL
24586 	/* To support ILP32 applications in an LP64 world */
24587 	struct cdrom_read32	cdrom_read32;
24588 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
24589 #endif /* _MULTI_DATAMODEL */
24590 
24591 	if (data == NULL) {
24592 		return (EINVAL);
24593 	}
24594 
24595 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24596 	    (un->un_state == SD_STATE_OFFLINE)) {
24597 		return (ENXIO);
24598 	}
24599 
24600 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24601 	    "sd_read_mode1: entry: un:0x%p\n", un);
24602 
24603 #ifdef _MULTI_DATAMODEL
24604 	switch (ddi_model_convert_from(flag & FMODELS)) {
24605 	case DDI_MODEL_ILP32:
24606 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
24607 			return (EFAULT);
24608 		}
24609 		/* Convert the ILP32 uscsi data from the application to LP64 */
24610 		cdrom_read32tocdrom_read(cdrd32, mode1);
24611 		break;
24612 	case DDI_MODEL_NONE:
24613 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
24614 			return (EFAULT);
24615 		}
24616 	}
24617 #else /* ! _MULTI_DATAMODEL */
24618 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
24619 		return (EFAULT);
24620 	}
24621 #endif /* _MULTI_DATAMODEL */
24622 
24623 	rval = sd_send_scsi_READ(un, mode1->cdread_bufaddr,
24624 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
24625 
24626 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24627 	    "sd_read_mode1: exit: un:0x%p\n", un);
24628 
24629 	return (rval);
24630 }
24631 
24632 
24633 /*
24634  *    Function: sr_read_cd_mode2()
24635  *
24636  * Description: This routine is the driver entry point for handling CD-ROM
24637  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
24638  *		support the READ CD (0xBE) command or the 1st generation
24639  *		READ CD (0xD4) command.
24640  *
24641  *   Arguments: dev	- the device 'dev_t'
24642  *		data	- pointer to user provided cd read structure specifying
24643  *			  the lba buffer address and length.
24644  *		flag	- this argument is a pass through to ddi_copyxxx()
24645  *			  directly from the mode argument of ioctl().
24646  *
24647  * Return Code: the code returned by sd_send_scsi_cmd()
24648  *		EFAULT if ddi_copyxxx() fails
24649  *		ENXIO if fail ddi_get_soft_state
24650  *		EINVAL if data pointer is NULL
24651  */
24652 
24653 static int
24654 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
24655 {
24656 	struct sd_lun		*un;
24657 	struct uscsi_cmd	*com;
24658 	struct cdrom_read	mode2_struct;
24659 	struct cdrom_read	*mode2 = &mode2_struct;
24660 	uchar_t			cdb[CDB_GROUP5];
24661 	int			nblocks;
24662 	int			rval;
24663 #ifdef _MULTI_DATAMODEL
24664 	/*  To support ILP32 applications in an LP64 world */
24665 	struct cdrom_read32	cdrom_read32;
24666 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
24667 #endif /* _MULTI_DATAMODEL */
24668 
24669 	if (data == NULL) {
24670 		return (EINVAL);
24671 	}
24672 
24673 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24674 	    (un->un_state == SD_STATE_OFFLINE)) {
24675 		return (ENXIO);
24676 	}
24677 
24678 #ifdef _MULTI_DATAMODEL
24679 	switch (ddi_model_convert_from(flag & FMODELS)) {
24680 	case DDI_MODEL_ILP32:
24681 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
24682 			return (EFAULT);
24683 		}
24684 		/* Convert the ILP32 uscsi data from the application to LP64 */
24685 		cdrom_read32tocdrom_read(cdrd32, mode2);
24686 		break;
24687 	case DDI_MODEL_NONE:
24688 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
24689 			return (EFAULT);
24690 		}
24691 		break;
24692 	}
24693 
24694 #else /* ! _MULTI_DATAMODEL */
24695 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
24696 		return (EFAULT);
24697 	}
24698 #endif /* _MULTI_DATAMODEL */
24699 
24700 	bzero(cdb, sizeof (cdb));
24701 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
24702 		/* Read command supported by 1st generation atapi drives */
24703 		cdb[0] = SCMD_READ_CDD4;
24704 	} else {
24705 		/* Universal CD Access Command */
24706 		cdb[0] = SCMD_READ_CD;
24707 	}
24708 
24709 	/*
24710 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
24711 	 */
24712 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
24713 
24714 	/* set the start address */
24715 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
24716 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
24717 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
24718 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
24719 
24720 	/* set the transfer length */
24721 	nblocks = mode2->cdread_buflen / 2336;
24722 	cdb[6] = (uchar_t)(nblocks >> 16);
24723 	cdb[7] = (uchar_t)(nblocks >> 8);
24724 	cdb[8] = (uchar_t)nblocks;
24725 
24726 	/* set the filter bits */
24727 	cdb[9] = CDROM_READ_CD_USERDATA;
24728 
24729 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24730 	com->uscsi_cdb = (caddr_t)cdb;
24731 	com->uscsi_cdblen = sizeof (cdb);
24732 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
24733 	com->uscsi_buflen = mode2->cdread_buflen;
24734 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24735 
24736 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
24737 	    SD_PATH_STANDARD);
24738 	kmem_free(com, sizeof (*com));
24739 	return (rval);
24740 }
24741 
24742 
24743 /*
24744  *    Function: sr_read_mode2()
24745  *
24746  * Description: This routine is the driver entry point for handling CD-ROM
24747  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
24748  *		do not support the READ CD (0xBE) command.
24749  *
24750  *   Arguments: dev	- the device 'dev_t'
24751  *		data	- pointer to user provided cd read structure specifying
24752  *			  the lba buffer address and length.
24753  *		flag	- this argument is a pass through to ddi_copyxxx()
24754  *			  directly from the mode argument of ioctl().
24755  *
24756  * Return Code: the code returned by sd_send_scsi_cmd()
24757  *		EFAULT if ddi_copyxxx() fails
24758  *		ENXIO if fail ddi_get_soft_state
24759  *		EINVAL if data pointer is NULL
24760  *		EIO if fail to reset block size
24761  *		EAGAIN if commands are in progress in the driver
24762  */
24763 
24764 static int
24765 sr_read_mode2(dev_t dev, caddr_t data, int flag)
24766 {
24767 	struct sd_lun		*un;
24768 	struct cdrom_read	mode2_struct;
24769 	struct cdrom_read	*mode2 = &mode2_struct;
24770 	int			rval;
24771 	uint32_t		restore_blksize;
24772 	struct uscsi_cmd	*com;
24773 	uchar_t			cdb[CDB_GROUP0];
24774 	int			nblocks;
24775 
24776 #ifdef _MULTI_DATAMODEL
24777 	/* To support ILP32 applications in an LP64 world */
24778 	struct cdrom_read32	cdrom_read32;
24779 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
24780 #endif /* _MULTI_DATAMODEL */
24781 
24782 	if (data == NULL) {
24783 		return (EINVAL);
24784 	}
24785 
24786 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24787 	    (un->un_state == SD_STATE_OFFLINE)) {
24788 		return (ENXIO);
24789 	}
24790 
24791 	/*
24792 	 * Because this routine will update the device and driver block size
24793 	 * being used we want to make sure there are no commands in progress.
24794 	 * If commands are in progress the user will have to try again.
24795 	 *
24796 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
24797 	 * in sdioctl to protect commands from sdioctl through to the top of
24798 	 * sd_uscsi_strategy. See sdioctl for details.
24799 	 */
24800 	mutex_enter(SD_MUTEX(un));
24801 	if (un->un_ncmds_in_driver != 1) {
24802 		mutex_exit(SD_MUTEX(un));
24803 		return (EAGAIN);
24804 	}
24805 	mutex_exit(SD_MUTEX(un));
24806 
24807 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24808 	    "sd_read_mode2: entry: un:0x%p\n", un);
24809 
24810 #ifdef _MULTI_DATAMODEL
24811 	switch (ddi_model_convert_from(flag & FMODELS)) {
24812 	case DDI_MODEL_ILP32:
24813 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
24814 			return (EFAULT);
24815 		}
24816 		/* Convert the ILP32 uscsi data from the application to LP64 */
24817 		cdrom_read32tocdrom_read(cdrd32, mode2);
24818 		break;
24819 	case DDI_MODEL_NONE:
24820 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
24821 			return (EFAULT);
24822 		}
24823 		break;
24824 	}
24825 #else /* ! _MULTI_DATAMODEL */
24826 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
24827 		return (EFAULT);
24828 	}
24829 #endif /* _MULTI_DATAMODEL */
24830 
24831 	/* Store the current target block size for restoration later */
24832 	restore_blksize = un->un_tgt_blocksize;
24833 
24834 	/* Change the device and soft state target block size to 2336 */
24835 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
24836 		rval = EIO;
24837 		goto done;
24838 	}
24839 
24840 
24841 	bzero(cdb, sizeof (cdb));
24842 
24843 	/* set READ operation */
24844 	cdb[0] = SCMD_READ;
24845 
24846 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
24847 	mode2->cdread_lba >>= 2;
24848 
24849 	/* set the start address */
24850 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
24851 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
24852 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
24853 
24854 	/* set the transfer length */
24855 	nblocks = mode2->cdread_buflen / 2336;
24856 	cdb[4] = (uchar_t)nblocks & 0xFF;
24857 
24858 	/* build command */
24859 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24860 	com->uscsi_cdb = (caddr_t)cdb;
24861 	com->uscsi_cdblen = sizeof (cdb);
24862 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
24863 	com->uscsi_buflen = mode2->cdread_buflen;
24864 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24865 
24866 	/*
24867 	 * Issue SCSI command with user space address for read buffer.
24868 	 *
24869 	 * This sends the command through main channel in the driver.
24870 	 *
24871 	 * Since this is accessed via an IOCTL call, we go through the
24872 	 * standard path, so that if the device was powered down, then
24873 	 * it would be 'awakened' to handle the command.
24874 	 */
24875 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
24876 	    SD_PATH_STANDARD);
24877 
24878 	kmem_free(com, sizeof (*com));
24879 
24880 	/* Restore the device and soft state target block size */
24881 	if (sr_sector_mode(dev, restore_blksize) != 0) {
24882 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24883 		    "can't do switch back to mode 1\n");
24884 		/*
24885 		 * If sd_send_scsi_READ succeeded we still need to report
24886 		 * an error because we failed to reset the block size
24887 		 */
24888 		if (rval == 0) {
24889 			rval = EIO;
24890 		}
24891 	}
24892 
24893 done:
24894 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24895 	    "sd_read_mode2: exit: un:0x%p\n", un);
24896 
24897 	return (rval);
24898 }
24899 
24900 
24901 /*
24902  *    Function: sr_sector_mode()
24903  *
24904  * Description: This utility function is used by sr_read_mode2 to set the target
24905  *		block size based on the user specified size. This is a legacy
24906  *		implementation based upon a vendor specific mode page
24907  *
24908  *   Arguments: dev	- the device 'dev_t'
24909  *		data	- flag indicating if block size is being set to 2336 or
24910  *			  512.
24911  *
24912  * Return Code: the code returned by sd_send_scsi_cmd()
24913  *		EFAULT if ddi_copyxxx() fails
24914  *		ENXIO if fail ddi_get_soft_state
24915  *		EINVAL if data pointer is NULL
24916  */
24917 
24918 static int
24919 sr_sector_mode(dev_t dev, uint32_t blksize)
24920 {
24921 	struct sd_lun	*un;
24922 	uchar_t		*sense;
24923 	uchar_t		*select;
24924 	int		rval;
24925 
24926 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24927 	    (un->un_state == SD_STATE_OFFLINE)) {
24928 		return (ENXIO);
24929 	}
24930 
24931 	sense = kmem_zalloc(20, KM_SLEEP);
24932 
24933 	/* Note: This is a vendor specific mode page (0x81) */
24934 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, 20, 0x81,
24935 	    SD_PATH_STANDARD)) != 0) {
24936 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
24937 		    "sr_sector_mode: Mode Sense failed\n");
24938 		kmem_free(sense, 20);
24939 		return (rval);
24940 	}
24941 	select = kmem_zalloc(20, KM_SLEEP);
24942 	select[3] = 0x08;
24943 	select[10] = ((blksize >> 8) & 0xff);
24944 	select[11] = (blksize & 0xff);
24945 	select[12] = 0x01;
24946 	select[13] = 0x06;
24947 	select[14] = sense[14];
24948 	select[15] = sense[15];
24949 	if (blksize == SD_MODE2_BLKSIZE) {
24950 		select[14] |= 0x01;
24951 	}
24952 
24953 	if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select, 20,
24954 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
24955 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
24956 		    "sr_sector_mode: Mode Select failed\n");
24957 	} else {
24958 		/*
24959 		 * Only update the softstate block size if we successfully
24960 		 * changed the device block mode.
24961 		 */
24962 		mutex_enter(SD_MUTEX(un));
24963 		sd_update_block_info(un, blksize, 0);
24964 		mutex_exit(SD_MUTEX(un));
24965 	}
24966 	kmem_free(sense, 20);
24967 	kmem_free(select, 20);
24968 	return (rval);
24969 }
24970 
24971 
24972 /*
24973  *    Function: sr_read_cdda()
24974  *
24975  * Description: This routine is the driver entry point for handling CD-ROM
24976  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
24977  *		the target supports CDDA these requests are handled via a vendor
24978  *		specific command (0xD8) If the target does not support CDDA
24979  *		these requests are handled via the READ CD command (0xBE).
24980  *
24981  *   Arguments: dev	- the device 'dev_t'
24982  *		data	- pointer to user provided CD-DA structure specifying
24983  *			  the track starting address, transfer length, and
24984  *			  subcode options.
24985  *		flag	- this argument is a pass through to ddi_copyxxx()
24986  *			  directly from the mode argument of ioctl().
24987  *
24988  * Return Code: the code returned by sd_send_scsi_cmd()
24989  *		EFAULT if ddi_copyxxx() fails
24990  *		ENXIO if fail ddi_get_soft_state
24991  *		EINVAL if invalid arguments are provided
24992  *		ENOTTY
24993  */
24994 
24995 static int
24996 sr_read_cdda(dev_t dev, caddr_t data, int flag)
24997 {
24998 	struct sd_lun			*un;
24999 	struct uscsi_cmd		*com;
25000 	struct cdrom_cdda		*cdda;
25001 	int				rval;
25002 	size_t				buflen;
25003 	char				cdb[CDB_GROUP5];
25004 
25005 #ifdef _MULTI_DATAMODEL
25006 	/* To support ILP32 applications in an LP64 world */
25007 	struct cdrom_cdda32	cdrom_cdda32;
25008 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
25009 #endif /* _MULTI_DATAMODEL */
25010 
25011 	if (data == NULL) {
25012 		return (EINVAL);
25013 	}
25014 
25015 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25016 		return (ENXIO);
25017 	}
25018 
25019 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
25020 
25021 #ifdef _MULTI_DATAMODEL
25022 	switch (ddi_model_convert_from(flag & FMODELS)) {
25023 	case DDI_MODEL_ILP32:
25024 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
25025 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25026 			    "sr_read_cdda: ddi_copyin Failed\n");
25027 			kmem_free(cdda, sizeof (struct cdrom_cdda));
25028 			return (EFAULT);
25029 		}
25030 		/* Convert the ILP32 uscsi data from the application to LP64 */
25031 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
25032 		break;
25033 	case DDI_MODEL_NONE:
25034 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
25035 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25036 			    "sr_read_cdda: ddi_copyin Failed\n");
25037 			kmem_free(cdda, sizeof (struct cdrom_cdda));
25038 			return (EFAULT);
25039 		}
25040 		break;
25041 	}
25042 #else /* ! _MULTI_DATAMODEL */
25043 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
25044 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25045 		    "sr_read_cdda: ddi_copyin Failed\n");
25046 		kmem_free(cdda, sizeof (struct cdrom_cdda));
25047 		return (EFAULT);
25048 	}
25049 #endif /* _MULTI_DATAMODEL */
25050 
25051 	/*
25052 	 * Since MMC-2 expects max 3 bytes for length, check if the
25053 	 * length input is greater than 3 bytes
25054 	 */
25055 	if ((cdda->cdda_length & 0xFF000000) != 0) {
25056 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
25057 		    "cdrom transfer length too large: %d (limit %d)\n",
25058 		    cdda->cdda_length, 0xFFFFFF);
25059 		kmem_free(cdda, sizeof (struct cdrom_cdda));
25060 		return (EINVAL);
25061 	}
25062 
25063 	switch (cdda->cdda_subcode) {
25064 	case CDROM_DA_NO_SUBCODE:
25065 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
25066 		break;
25067 	case CDROM_DA_SUBQ:
25068 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
25069 		break;
25070 	case CDROM_DA_ALL_SUBCODE:
25071 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
25072 		break;
25073 	case CDROM_DA_SUBCODE_ONLY:
25074 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
25075 		break;
25076 	default:
25077 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25078 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
25079 		    cdda->cdda_subcode);
25080 		kmem_free(cdda, sizeof (struct cdrom_cdda));
25081 		return (EINVAL);
25082 	}
25083 
25084 	/* Build and send the command */
25085 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25086 	bzero(cdb, CDB_GROUP5);
25087 
25088 	if (un->un_f_cfg_cdda == TRUE) {
25089 		cdb[0] = (char)SCMD_READ_CD;
25090 		cdb[1] = 0x04;
25091 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
25092 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
25093 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
25094 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
25095 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
25096 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
25097 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
25098 		cdb[9] = 0x10;
25099 		switch (cdda->cdda_subcode) {
25100 		case CDROM_DA_NO_SUBCODE :
25101 			cdb[10] = 0x0;
25102 			break;
25103 		case CDROM_DA_SUBQ :
25104 			cdb[10] = 0x2;
25105 			break;
25106 		case CDROM_DA_ALL_SUBCODE :
25107 			cdb[10] = 0x1;
25108 			break;
25109 		case CDROM_DA_SUBCODE_ONLY :
25110 			/* FALLTHROUGH */
25111 		default :
25112 			kmem_free(cdda, sizeof (struct cdrom_cdda));
25113 			kmem_free(com, sizeof (*com));
25114 			return (ENOTTY);
25115 		}
25116 	} else {
25117 		cdb[0] = (char)SCMD_READ_CDDA;
25118 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
25119 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
25120 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
25121 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
25122 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
25123 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
25124 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
25125 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
25126 		cdb[10] = cdda->cdda_subcode;
25127 	}
25128 
25129 	com->uscsi_cdb = cdb;
25130 	com->uscsi_cdblen = CDB_GROUP5;
25131 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
25132 	com->uscsi_buflen = buflen;
25133 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
25134 
25135 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
25136 	    SD_PATH_STANDARD);
25137 
25138 	kmem_free(cdda, sizeof (struct cdrom_cdda));
25139 	kmem_free(com, sizeof (*com));
25140 	return (rval);
25141 }
25142 
25143 
25144 /*
25145  *    Function: sr_read_cdxa()
25146  *
25147  * Description: This routine is the driver entry point for handling CD-ROM
25148  *		ioctl requests to return CD-XA (Extended Architecture) data.
25149  *		(CDROMCDXA).
25150  *
25151  *   Arguments: dev	- the device 'dev_t'
25152  *		data	- pointer to user provided CD-XA structure specifying
25153  *			  the data starting address, transfer length, and format
25154  *		flag	- this argument is a pass through to ddi_copyxxx()
25155  *			  directly from the mode argument of ioctl().
25156  *
25157  * Return Code: the code returned by sd_send_scsi_cmd()
25158  *		EFAULT if ddi_copyxxx() fails
25159  *		ENXIO if fail ddi_get_soft_state
25160  *		EINVAL if data pointer is NULL
25161  */
25162 
25163 static int
25164 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
25165 {
25166 	struct sd_lun		*un;
25167 	struct uscsi_cmd	*com;
25168 	struct cdrom_cdxa	*cdxa;
25169 	int			rval;
25170 	size_t			buflen;
25171 	char			cdb[CDB_GROUP5];
25172 	uchar_t			read_flags;
25173 
25174 #ifdef _MULTI_DATAMODEL
25175 	/* To support ILP32 applications in an LP64 world */
25176 	struct cdrom_cdxa32		cdrom_cdxa32;
25177 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
25178 #endif /* _MULTI_DATAMODEL */
25179 
25180 	if (data == NULL) {
25181 		return (EINVAL);
25182 	}
25183 
25184 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25185 		return (ENXIO);
25186 	}
25187 
25188 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
25189 
25190 #ifdef _MULTI_DATAMODEL
25191 	switch (ddi_model_convert_from(flag & FMODELS)) {
25192 	case DDI_MODEL_ILP32:
25193 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
25194 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25195 			return (EFAULT);
25196 		}
25197 		/*
25198 		 * Convert the ILP32 uscsi data from the
25199 		 * application to LP64 for internal use.
25200 		 */
25201 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
25202 		break;
25203 	case DDI_MODEL_NONE:
25204 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
25205 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25206 			return (EFAULT);
25207 		}
25208 		break;
25209 	}
25210 #else /* ! _MULTI_DATAMODEL */
25211 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
25212 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25213 		return (EFAULT);
25214 	}
25215 #endif /* _MULTI_DATAMODEL */
25216 
25217 	/*
25218 	 * Since MMC-2 expects max 3 bytes for length, check if the
25219 	 * length input is greater than 3 bytes
25220 	 */
25221 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
25222 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
25223 		    "cdrom transfer length too large: %d (limit %d)\n",
25224 		    cdxa->cdxa_length, 0xFFFFFF);
25225 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25226 		return (EINVAL);
25227 	}
25228 
25229 	switch (cdxa->cdxa_format) {
25230 	case CDROM_XA_DATA:
25231 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
25232 		read_flags = 0x10;
25233 		break;
25234 	case CDROM_XA_SECTOR_DATA:
25235 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
25236 		read_flags = 0xf8;
25237 		break;
25238 	case CDROM_XA_DATA_W_ERROR:
25239 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
25240 		read_flags = 0xfc;
25241 		break;
25242 	default:
25243 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25244 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
25245 		    cdxa->cdxa_format);
25246 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25247 		return (EINVAL);
25248 	}
25249 
25250 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25251 	bzero(cdb, CDB_GROUP5);
25252 	if (un->un_f_mmc_cap == TRUE) {
25253 		cdb[0] = (char)SCMD_READ_CD;
25254 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
25255 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
25256 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
25257 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
25258 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
25259 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
25260 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
25261 		cdb[9] = (char)read_flags;
25262 	} else {
25263 		/*
25264 		 * Note: A vendor specific command (0xDB) is being used her to
25265 		 * request a read of all subcodes.
25266 		 */
25267 		cdb[0] = (char)SCMD_READ_CDXA;
25268 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
25269 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
25270 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
25271 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
25272 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
25273 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
25274 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
25275 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
25276 		cdb[10] = cdxa->cdxa_format;
25277 	}
25278 	com->uscsi_cdb	   = cdb;
25279 	com->uscsi_cdblen  = CDB_GROUP5;
25280 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
25281 	com->uscsi_buflen  = buflen;
25282 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
25283 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
25284 	    SD_PATH_STANDARD);
25285 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25286 	kmem_free(com, sizeof (*com));
25287 	return (rval);
25288 }
25289 
25290 
25291 /*
25292  *    Function: sr_eject()
25293  *
25294  * Description: This routine is the driver entry point for handling CD-ROM
25295  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
25296  *
25297  *   Arguments: dev	- the device 'dev_t'
25298  *
25299  * Return Code: the code returned by sd_send_scsi_cmd()
25300  */
25301 
25302 static int
25303 sr_eject(dev_t dev)
25304 {
25305 	struct sd_lun	*un;
25306 	int		rval;
25307 
25308 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25309 	    (un->un_state == SD_STATE_OFFLINE)) {
25310 		return (ENXIO);
25311 	}
25312 
25313 	/*
25314 	 * To prevent race conditions with the eject
25315 	 * command, keep track of an eject command as
25316 	 * it progresses. If we are already handling
25317 	 * an eject command in the driver for the given
25318 	 * unit and another request to eject is received
25319 	 * immediately return EAGAIN so we don't lose
25320 	 * the command if the current eject command fails.
25321 	 */
25322 	mutex_enter(SD_MUTEX(un));
25323 	if (un->un_f_ejecting == TRUE) {
25324 		mutex_exit(SD_MUTEX(un));
25325 		return (EAGAIN);
25326 	}
25327 	un->un_f_ejecting = TRUE;
25328 	mutex_exit(SD_MUTEX(un));
25329 
25330 	if ((rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
25331 	    SD_PATH_STANDARD)) != 0) {
25332 		mutex_enter(SD_MUTEX(un));
25333 		un->un_f_ejecting = FALSE;
25334 		mutex_exit(SD_MUTEX(un));
25335 		return (rval);
25336 	}
25337 
25338 	rval = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_EJECT,
25339 	    SD_PATH_STANDARD);
25340 
25341 	if (rval == 0) {
25342 		mutex_enter(SD_MUTEX(un));
25343 		sr_ejected(un);
25344 		un->un_mediastate = DKIO_EJECTED;
25345 		un->un_f_ejecting = FALSE;
25346 		cv_broadcast(&un->un_state_cv);
25347 		mutex_exit(SD_MUTEX(un));
25348 	} else {
25349 		mutex_enter(SD_MUTEX(un));
25350 		un->un_f_ejecting = FALSE;
25351 		mutex_exit(SD_MUTEX(un));
25352 	}
25353 	return (rval);
25354 }
25355 
25356 
25357 /*
25358  *    Function: sr_ejected()
25359  *
25360  * Description: This routine updates the soft state structure to invalidate the
25361  *		geometry information after the media has been ejected or a
25362  *		media eject has been detected.
25363  *
25364  *   Arguments: un - driver soft state (unit) structure
25365  */
25366 
25367 static void
25368 sr_ejected(struct sd_lun *un)
25369 {
25370 	struct sd_errstats *stp;
25371 
25372 	ASSERT(un != NULL);
25373 	ASSERT(mutex_owned(SD_MUTEX(un)));
25374 
25375 	un->un_f_blockcount_is_valid	= FALSE;
25376 	un->un_f_tgt_blocksize_is_valid	= FALSE;
25377 	mutex_exit(SD_MUTEX(un));
25378 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
25379 	mutex_enter(SD_MUTEX(un));
25380 
25381 	if (un->un_errstats != NULL) {
25382 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
25383 		stp->sd_capacity.value.ui64 = 0;
25384 	}
25385 }
25386 
25387 
25388 /*
25389  *    Function: sr_check_wp()
25390  *
25391  * Description: This routine checks the write protection of a removable
25392  *      media disk and hotpluggable devices via the write protect bit of
25393  *      the Mode Page Header device specific field. Some devices choke
25394  *      on unsupported mode page. In order to workaround this issue,
25395  *      this routine has been implemented to use 0x3f mode page(request
25396  *      for all pages) for all device types.
25397  *
25398  *   Arguments: dev		- the device 'dev_t'
25399  *
25400  * Return Code: int indicating if the device is write protected (1) or not (0)
25401  *
25402  *     Context: Kernel thread.
25403  *
25404  */
25405 
25406 static int
25407 sr_check_wp(dev_t dev)
25408 {
25409 	struct sd_lun	*un;
25410 	uchar_t		device_specific;
25411 	uchar_t		*sense;
25412 	int		hdrlen;
25413 	int		rval = FALSE;
25414 
25415 	/*
25416 	 * Note: The return codes for this routine should be reworked to
25417 	 * properly handle the case of a NULL softstate.
25418 	 */
25419 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25420 		return (FALSE);
25421 	}
25422 
25423 	if (un->un_f_cfg_is_atapi == TRUE) {
25424 		/*
25425 		 * The mode page contents are not required; set the allocation
25426 		 * length for the mode page header only
25427 		 */
25428 		hdrlen = MODE_HEADER_LENGTH_GRP2;
25429 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
25430 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense, hdrlen,
25431 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
25432 			goto err_exit;
25433 		device_specific =
25434 		    ((struct mode_header_grp2 *)sense)->device_specific;
25435 	} else {
25436 		hdrlen = MODE_HEADER_LENGTH;
25437 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
25438 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, hdrlen,
25439 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
25440 			goto err_exit;
25441 		device_specific =
25442 		    ((struct mode_header *)sense)->device_specific;
25443 	}
25444 
25445 	/*
25446 	 * Write protect mode sense failed; not all disks
25447 	 * understand this query. Return FALSE assuming that
25448 	 * these devices are not writable.
25449 	 */
25450 	if (device_specific & WRITE_PROTECT) {
25451 		rval = TRUE;
25452 	}
25453 
25454 err_exit:
25455 	kmem_free(sense, hdrlen);
25456 	return (rval);
25457 }
25458 
25459 /*
25460  *    Function: sr_volume_ctrl()
25461  *
25462  * Description: This routine is the driver entry point for handling CD-ROM
25463  *		audio output volume ioctl requests. (CDROMVOLCTRL)
25464  *
25465  *   Arguments: dev	- the device 'dev_t'
25466  *		data	- pointer to user audio volume control structure
25467  *		flag	- this argument is a pass through to ddi_copyxxx()
25468  *			  directly from the mode argument of ioctl().
25469  *
25470  * Return Code: the code returned by sd_send_scsi_cmd()
25471  *		EFAULT if ddi_copyxxx() fails
25472  *		ENXIO if fail ddi_get_soft_state
25473  *		EINVAL if data pointer is NULL
25474  *
25475  */
25476 
25477 static int
25478 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
25479 {
25480 	struct sd_lun		*un;
25481 	struct cdrom_volctrl    volume;
25482 	struct cdrom_volctrl    *vol = &volume;
25483 	uchar_t			*sense_page;
25484 	uchar_t			*select_page;
25485 	uchar_t			*sense;
25486 	uchar_t			*select;
25487 	int			sense_buflen;
25488 	int			select_buflen;
25489 	int			rval;
25490 
25491 	if (data == NULL) {
25492 		return (EINVAL);
25493 	}
25494 
25495 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25496 	    (un->un_state == SD_STATE_OFFLINE)) {
25497 		return (ENXIO);
25498 	}
25499 
25500 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
25501 		return (EFAULT);
25502 	}
25503 
25504 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
25505 		struct mode_header_grp2		*sense_mhp;
25506 		struct mode_header_grp2		*select_mhp;
25507 		int				bd_len;
25508 
25509 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
25510 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
25511 		    MODEPAGE_AUDIO_CTRL_LEN;
25512 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
25513 		select = kmem_zalloc(select_buflen, KM_SLEEP);
25514 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
25515 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
25516 		    SD_PATH_STANDARD)) != 0) {
25517 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
25518 			    "sr_volume_ctrl: Mode Sense Failed\n");
25519 			kmem_free(sense, sense_buflen);
25520 			kmem_free(select, select_buflen);
25521 			return (rval);
25522 		}
25523 		sense_mhp = (struct mode_header_grp2 *)sense;
25524 		select_mhp = (struct mode_header_grp2 *)select;
25525 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
25526 		    sense_mhp->bdesc_length_lo;
25527 		if (bd_len > MODE_BLK_DESC_LENGTH) {
25528 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25529 			    "sr_volume_ctrl: Mode Sense returned invalid "
25530 			    "block descriptor length\n");
25531 			kmem_free(sense, sense_buflen);
25532 			kmem_free(select, select_buflen);
25533 			return (EIO);
25534 		}
25535 		sense_page = (uchar_t *)
25536 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
25537 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
25538 		select_mhp->length_msb = 0;
25539 		select_mhp->length_lsb = 0;
25540 		select_mhp->bdesc_length_hi = 0;
25541 		select_mhp->bdesc_length_lo = 0;
25542 	} else {
25543 		struct mode_header		*sense_mhp, *select_mhp;
25544 
25545 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
25546 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
25547 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
25548 		select = kmem_zalloc(select_buflen, KM_SLEEP);
25549 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
25550 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
25551 		    SD_PATH_STANDARD)) != 0) {
25552 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25553 			    "sr_volume_ctrl: Mode Sense Failed\n");
25554 			kmem_free(sense, sense_buflen);
25555 			kmem_free(select, select_buflen);
25556 			return (rval);
25557 		}
25558 		sense_mhp  = (struct mode_header *)sense;
25559 		select_mhp = (struct mode_header *)select;
25560 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
25561 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25562 			    "sr_volume_ctrl: Mode Sense returned invalid "
25563 			    "block descriptor length\n");
25564 			kmem_free(sense, sense_buflen);
25565 			kmem_free(select, select_buflen);
25566 			return (EIO);
25567 		}
25568 		sense_page = (uchar_t *)
25569 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
25570 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
25571 		select_mhp->length = 0;
25572 		select_mhp->bdesc_length = 0;
25573 	}
25574 	/*
25575 	 * Note: An audio control data structure could be created and overlayed
25576 	 * on the following in place of the array indexing method implemented.
25577 	 */
25578 
25579 	/* Build the select data for the user volume data */
25580 	select_page[0] = MODEPAGE_AUDIO_CTRL;
25581 	select_page[1] = 0xE;
25582 	/* Set the immediate bit */
25583 	select_page[2] = 0x04;
25584 	/* Zero out reserved fields */
25585 	select_page[3] = 0x00;
25586 	select_page[4] = 0x00;
25587 	/* Return sense data for fields not to be modified */
25588 	select_page[5] = sense_page[5];
25589 	select_page[6] = sense_page[6];
25590 	select_page[7] = sense_page[7];
25591 	/* Set the user specified volume levels for channel 0 and 1 */
25592 	select_page[8] = 0x01;
25593 	select_page[9] = vol->channel0;
25594 	select_page[10] = 0x02;
25595 	select_page[11] = vol->channel1;
25596 	/* Channel 2 and 3 are currently unsupported so return the sense data */
25597 	select_page[12] = sense_page[12];
25598 	select_page[13] = sense_page[13];
25599 	select_page[14] = sense_page[14];
25600 	select_page[15] = sense_page[15];
25601 
25602 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
25603 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, select,
25604 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
25605 	} else {
25606 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
25607 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
25608 	}
25609 
25610 	kmem_free(sense, sense_buflen);
25611 	kmem_free(select, select_buflen);
25612 	return (rval);
25613 }
25614 
25615 
25616 /*
25617  *    Function: sr_read_sony_session_offset()
25618  *
25619  * Description: This routine is the driver entry point for handling CD-ROM
25620  *		ioctl requests for session offset information. (CDROMREADOFFSET)
25621  *		The address of the first track in the last session of a
25622  *		multi-session CD-ROM is returned
25623  *
25624  *		Note: This routine uses a vendor specific key value in the
25625  *		command control field without implementing any vendor check here
25626  *		or in the ioctl routine.
25627  *
25628  *   Arguments: dev	- the device 'dev_t'
25629  *		data	- pointer to an int to hold the requested address
25630  *		flag	- this argument is a pass through to ddi_copyxxx()
25631  *			  directly from the mode argument of ioctl().
25632  *
25633  * Return Code: the code returned by sd_send_scsi_cmd()
25634  *		EFAULT if ddi_copyxxx() fails
25635  *		ENXIO if fail ddi_get_soft_state
25636  *		EINVAL if data pointer is NULL
25637  */
25638 
25639 static int
25640 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
25641 {
25642 	struct sd_lun		*un;
25643 	struct uscsi_cmd	*com;
25644 	caddr_t			buffer;
25645 	char			cdb[CDB_GROUP1];
25646 	int			session_offset = 0;
25647 	int			rval;
25648 
25649 	if (data == NULL) {
25650 		return (EINVAL);
25651 	}
25652 
25653 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25654 	    (un->un_state == SD_STATE_OFFLINE)) {
25655 		return (ENXIO);
25656 	}
25657 
25658 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
25659 	bzero(cdb, CDB_GROUP1);
25660 	cdb[0] = SCMD_READ_TOC;
25661 	/*
25662 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
25663 	 * (4 byte TOC response header + 8 byte response data)
25664 	 */
25665 	cdb[8] = SONY_SESSION_OFFSET_LEN;
25666 	/* Byte 9 is the control byte. A vendor specific value is used */
25667 	cdb[9] = SONY_SESSION_OFFSET_KEY;
25668 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25669 	com->uscsi_cdb = cdb;
25670 	com->uscsi_cdblen = CDB_GROUP1;
25671 	com->uscsi_bufaddr = buffer;
25672 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
25673 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
25674 
25675 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
25676 	    SD_PATH_STANDARD);
25677 	if (rval != 0) {
25678 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
25679 		kmem_free(com, sizeof (*com));
25680 		return (rval);
25681 	}
25682 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
25683 		session_offset =
25684 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
25685 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
25686 		/*
25687 		 * Offset returned offset in current lbasize block's. Convert to
25688 		 * 2k block's to return to the user
25689 		 */
25690 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
25691 			session_offset >>= 2;
25692 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
25693 			session_offset >>= 1;
25694 		}
25695 	}
25696 
25697 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
25698 		rval = EFAULT;
25699 	}
25700 
25701 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
25702 	kmem_free(com, sizeof (*com));
25703 	return (rval);
25704 }
25705 
25706 
25707 /*
25708  *    Function: sd_wm_cache_constructor()
25709  *
25710  * Description: Cache Constructor for the wmap cache for the read/modify/write
25711  * 		devices.
25712  *
25713  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
25714  *		un	- sd_lun structure for the device.
25715  *		flag	- the km flags passed to constructor
25716  *
25717  * Return Code: 0 on success.
25718  *		-1 on failure.
25719  */
25720 
25721 /*ARGSUSED*/
25722 static int
25723 sd_wm_cache_constructor(void *wm, void *un, int flags)
25724 {
25725 	bzero(wm, sizeof (struct sd_w_map));
25726 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
25727 	return (0);
25728 }
25729 
25730 
25731 /*
25732  *    Function: sd_wm_cache_destructor()
25733  *
25734  * Description: Cache destructor for the wmap cache for the read/modify/write
25735  * 		devices.
25736  *
25737  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
25738  *		un	- sd_lun structure for the device.
25739  */
25740 /*ARGSUSED*/
25741 static void
25742 sd_wm_cache_destructor(void *wm, void *un)
25743 {
25744 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
25745 }
25746 
25747 
25748 /*
25749  *    Function: sd_range_lock()
25750  *
25751  * Description: Lock the range of blocks specified as parameter to ensure
25752  *		that read, modify write is atomic and no other i/o writes
25753  *		to the same location. The range is specified in terms
25754  *		of start and end blocks. Block numbers are the actual
25755  *		media block numbers and not system.
25756  *
25757  *   Arguments: un	- sd_lun structure for the device.
25758  *		startb - The starting block number
25759  *		endb - The end block number
25760  *		typ - type of i/o - simple/read_modify_write
25761  *
25762  * Return Code: wm  - pointer to the wmap structure.
25763  *
25764  *     Context: This routine can sleep.
25765  */
25766 
25767 static struct sd_w_map *
25768 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
25769 {
25770 	struct sd_w_map *wmp = NULL;
25771 	struct sd_w_map *sl_wmp = NULL;
25772 	struct sd_w_map *tmp_wmp;
25773 	wm_state state = SD_WM_CHK_LIST;
25774 
25775 
25776 	ASSERT(un != NULL);
25777 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25778 
25779 	mutex_enter(SD_MUTEX(un));
25780 
25781 	while (state != SD_WM_DONE) {
25782 
25783 		switch (state) {
25784 		case SD_WM_CHK_LIST:
25785 			/*
25786 			 * This is the starting state. Check the wmap list
25787 			 * to see if the range is currently available.
25788 			 */
25789 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
25790 				/*
25791 				 * If this is a simple write and no rmw
25792 				 * i/o is pending then try to lock the
25793 				 * range as the range should be available.
25794 				 */
25795 				state = SD_WM_LOCK_RANGE;
25796 			} else {
25797 				tmp_wmp = sd_get_range(un, startb, endb);
25798 				if (tmp_wmp != NULL) {
25799 					if ((wmp != NULL) && ONLIST(un, wmp)) {
25800 						/*
25801 						 * Should not keep onlist wmps
25802 						 * while waiting this macro
25803 						 * will also do wmp = NULL;
25804 						 */
25805 						FREE_ONLIST_WMAP(un, wmp);
25806 					}
25807 					/*
25808 					 * sl_wmp is the wmap on which wait
25809 					 * is done, since the tmp_wmp points
25810 					 * to the inuse wmap, set sl_wmp to
25811 					 * tmp_wmp and change the state to sleep
25812 					 */
25813 					sl_wmp = tmp_wmp;
25814 					state = SD_WM_WAIT_MAP;
25815 				} else {
25816 					state = SD_WM_LOCK_RANGE;
25817 				}
25818 
25819 			}
25820 			break;
25821 
25822 		case SD_WM_LOCK_RANGE:
25823 			ASSERT(un->un_wm_cache);
25824 			/*
25825 			 * The range need to be locked, try to get a wmap.
25826 			 * First attempt it with NO_SLEEP, want to avoid a sleep
25827 			 * if possible as we will have to release the sd mutex
25828 			 * if we have to sleep.
25829 			 */
25830 			if (wmp == NULL)
25831 				wmp = kmem_cache_alloc(un->un_wm_cache,
25832 				    KM_NOSLEEP);
25833 			if (wmp == NULL) {
25834 				mutex_exit(SD_MUTEX(un));
25835 				_NOTE(DATA_READABLE_WITHOUT_LOCK
25836 				    (sd_lun::un_wm_cache))
25837 				wmp = kmem_cache_alloc(un->un_wm_cache,
25838 				    KM_SLEEP);
25839 				mutex_enter(SD_MUTEX(un));
25840 				/*
25841 				 * we released the mutex so recheck and go to
25842 				 * check list state.
25843 				 */
25844 				state = SD_WM_CHK_LIST;
25845 			} else {
25846 				/*
25847 				 * We exit out of state machine since we
25848 				 * have the wmap. Do the housekeeping first.
25849 				 * place the wmap on the wmap list if it is not
25850 				 * on it already and then set the state to done.
25851 				 */
25852 				wmp->wm_start = startb;
25853 				wmp->wm_end = endb;
25854 				wmp->wm_flags = typ | SD_WM_BUSY;
25855 				if (typ & SD_WTYPE_RMW) {
25856 					un->un_rmw_count++;
25857 				}
25858 				/*
25859 				 * If not already on the list then link
25860 				 */
25861 				if (!ONLIST(un, wmp)) {
25862 					wmp->wm_next = un->un_wm;
25863 					wmp->wm_prev = NULL;
25864 					if (wmp->wm_next)
25865 						wmp->wm_next->wm_prev = wmp;
25866 					un->un_wm = wmp;
25867 				}
25868 				state = SD_WM_DONE;
25869 			}
25870 			break;
25871 
25872 		case SD_WM_WAIT_MAP:
25873 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
25874 			/*
25875 			 * Wait is done on sl_wmp, which is set in the
25876 			 * check_list state.
25877 			 */
25878 			sl_wmp->wm_wanted_count++;
25879 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
25880 			sl_wmp->wm_wanted_count--;
25881 			/*
25882 			 * We can reuse the memory from the completed sl_wmp
25883 			 * lock range for our new lock, but only if noone is
25884 			 * waiting for it.
25885 			 */
25886 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
25887 			if (sl_wmp->wm_wanted_count == 0) {
25888 				if (wmp != NULL)
25889 					CHK_N_FREEWMP(un, wmp);
25890 				wmp = sl_wmp;
25891 			}
25892 			sl_wmp = NULL;
25893 			/*
25894 			 * After waking up, need to recheck for availability of
25895 			 * range.
25896 			 */
25897 			state = SD_WM_CHK_LIST;
25898 			break;
25899 
25900 		default:
25901 			panic("sd_range_lock: "
25902 			    "Unknown state %d in sd_range_lock", state);
25903 			/*NOTREACHED*/
25904 		} /* switch(state) */
25905 
25906 	} /* while(state != SD_WM_DONE) */
25907 
25908 	mutex_exit(SD_MUTEX(un));
25909 
25910 	ASSERT(wmp != NULL);
25911 
25912 	return (wmp);
25913 }
25914 
25915 
25916 /*
25917  *    Function: sd_get_range()
25918  *
25919  * Description: Find if there any overlapping I/O to this one
25920  *		Returns the write-map of 1st such I/O, NULL otherwise.
25921  *
25922  *   Arguments: un	- sd_lun structure for the device.
25923  *		startb - The starting block number
25924  *		endb - The end block number
25925  *
25926  * Return Code: wm  - pointer to the wmap structure.
25927  */
25928 
25929 static struct sd_w_map *
25930 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
25931 {
25932 	struct sd_w_map *wmp;
25933 
25934 	ASSERT(un != NULL);
25935 
25936 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
25937 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
25938 			continue;
25939 		}
25940 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
25941 			break;
25942 		}
25943 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
25944 			break;
25945 		}
25946 	}
25947 
25948 	return (wmp);
25949 }
25950 
25951 
25952 /*
25953  *    Function: sd_free_inlist_wmap()
25954  *
25955  * Description: Unlink and free a write map struct.
25956  *
25957  *   Arguments: un      - sd_lun structure for the device.
25958  *		wmp	- sd_w_map which needs to be unlinked.
25959  */
25960 
25961 static void
25962 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
25963 {
25964 	ASSERT(un != NULL);
25965 
25966 	if (un->un_wm == wmp) {
25967 		un->un_wm = wmp->wm_next;
25968 	} else {
25969 		wmp->wm_prev->wm_next = wmp->wm_next;
25970 	}
25971 
25972 	if (wmp->wm_next) {
25973 		wmp->wm_next->wm_prev = wmp->wm_prev;
25974 	}
25975 
25976 	wmp->wm_next = wmp->wm_prev = NULL;
25977 
25978 	kmem_cache_free(un->un_wm_cache, wmp);
25979 }
25980 
25981 
25982 /*
25983  *    Function: sd_range_unlock()
25984  *
25985  * Description: Unlock the range locked by wm.
25986  *		Free write map if nobody else is waiting on it.
25987  *
25988  *   Arguments: un      - sd_lun structure for the device.
25989  *              wmp     - sd_w_map which needs to be unlinked.
25990  */
25991 
25992 static void
25993 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
25994 {
25995 	ASSERT(un != NULL);
25996 	ASSERT(wm != NULL);
25997 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25998 
25999 	mutex_enter(SD_MUTEX(un));
26000 
26001 	if (wm->wm_flags & SD_WTYPE_RMW) {
26002 		un->un_rmw_count--;
26003 	}
26004 
26005 	if (wm->wm_wanted_count) {
26006 		wm->wm_flags = 0;
26007 		/*
26008 		 * Broadcast that the wmap is available now.
26009 		 */
26010 		cv_broadcast(&wm->wm_avail);
26011 	} else {
26012 		/*
26013 		 * If no one is waiting on the map, it should be free'ed.
26014 		 */
26015 		sd_free_inlist_wmap(un, wm);
26016 	}
26017 
26018 	mutex_exit(SD_MUTEX(un));
26019 }
26020 
26021 
26022 /*
26023  *    Function: sd_read_modify_write_task
26024  *
26025  * Description: Called from a taskq thread to initiate the write phase of
26026  *		a read-modify-write request.  This is used for targets where
26027  *		un->un_sys_blocksize != un->un_tgt_blocksize.
26028  *
26029  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
26030  *
26031  *     Context: Called under taskq thread context.
26032  */
26033 
26034 static void
26035 sd_read_modify_write_task(void *arg)
26036 {
26037 	struct sd_mapblocksize_info	*bsp;
26038 	struct buf	*bp;
26039 	struct sd_xbuf	*xp;
26040 	struct sd_lun	*un;
26041 
26042 	bp = arg;	/* The bp is given in arg */
26043 	ASSERT(bp != NULL);
26044 
26045 	/* Get the pointer to the layer-private data struct */
26046 	xp = SD_GET_XBUF(bp);
26047 	ASSERT(xp != NULL);
26048 	bsp = xp->xb_private;
26049 	ASSERT(bsp != NULL);
26050 
26051 	un = SD_GET_UN(bp);
26052 	ASSERT(un != NULL);
26053 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26054 
26055 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
26056 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
26057 
26058 	/*
26059 	 * This is the write phase of a read-modify-write request, called
26060 	 * under the context of a taskq thread in response to the completion
26061 	 * of the read portion of the rmw request completing under interrupt
26062 	 * context. The write request must be sent from here down the iostart
26063 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
26064 	 * we use the layer index saved in the layer-private data area.
26065 	 */
26066 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
26067 
26068 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
26069 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
26070 }
26071 
26072 
26073 /*
26074  *    Function: sddump_do_read_of_rmw()
26075  *
26076  * Description: This routine will be called from sddump, If sddump is called
26077  *		with an I/O which not aligned on device blocksize boundary
26078  *		then the write has to be converted to read-modify-write.
26079  *		Do the read part here in order to keep sddump simple.
26080  *		Note - That the sd_mutex is held across the call to this
26081  *		routine.
26082  *
26083  *   Arguments: un	- sd_lun
26084  *		blkno	- block number in terms of media block size.
26085  *		nblk	- number of blocks.
26086  *		bpp	- pointer to pointer to the buf structure. On return
26087  *			from this function, *bpp points to the valid buffer
26088  *			to which the write has to be done.
26089  *
26090  * Return Code: 0 for success or errno-type return code
26091  */
26092 
26093 static int
26094 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
26095 	struct buf **bpp)
26096 {
26097 	int err;
26098 	int i;
26099 	int rval;
26100 	struct buf *bp;
26101 	struct scsi_pkt *pkt = NULL;
26102 	uint32_t target_blocksize;
26103 
26104 	ASSERT(un != NULL);
26105 	ASSERT(mutex_owned(SD_MUTEX(un)));
26106 
26107 	target_blocksize = un->un_tgt_blocksize;
26108 
26109 	mutex_exit(SD_MUTEX(un));
26110 
26111 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
26112 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
26113 	if (bp == NULL) {
26114 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26115 		    "no resources for dumping; giving up");
26116 		err = ENOMEM;
26117 		goto done;
26118 	}
26119 
26120 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
26121 	    blkno, nblk);
26122 	if (rval != 0) {
26123 		scsi_free_consistent_buf(bp);
26124 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26125 		    "no resources for dumping; giving up");
26126 		err = ENOMEM;
26127 		goto done;
26128 	}
26129 
26130 	pkt->pkt_flags |= FLAG_NOINTR;
26131 
26132 	err = EIO;
26133 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26134 
26135 		/*
26136 		 * Scsi_poll returns 0 (success) if the command completes and
26137 		 * the status block is STATUS_GOOD.  We should only check
26138 		 * errors if this condition is not true.  Even then we should
26139 		 * send our own request sense packet only if we have a check
26140 		 * condition and auto request sense has not been performed by
26141 		 * the hba.
26142 		 */
26143 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
26144 
26145 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
26146 			err = 0;
26147 			break;
26148 		}
26149 
26150 		/*
26151 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
26152 		 * no need to read RQS data.
26153 		 */
26154 		if (pkt->pkt_reason == CMD_DEV_GONE) {
26155 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26156 			    "Device is gone\n");
26157 			break;
26158 		}
26159 
26160 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
26161 			SD_INFO(SD_LOG_DUMP, un,
26162 			    "sddump: read failed with CHECK, try # %d\n", i);
26163 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
26164 				(void) sd_send_polled_RQS(un);
26165 			}
26166 
26167 			continue;
26168 		}
26169 
26170 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
26171 			int reset_retval = 0;
26172 
26173 			SD_INFO(SD_LOG_DUMP, un,
26174 			    "sddump: read failed with BUSY, try # %d\n", i);
26175 
26176 			if (un->un_f_lun_reset_enabled == TRUE) {
26177 				reset_retval = scsi_reset(SD_ADDRESS(un),
26178 				    RESET_LUN);
26179 			}
26180 			if (reset_retval == 0) {
26181 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26182 			}
26183 			(void) sd_send_polled_RQS(un);
26184 
26185 		} else {
26186 			SD_INFO(SD_LOG_DUMP, un,
26187 			    "sddump: read failed with 0x%x, try # %d\n",
26188 			    SD_GET_PKT_STATUS(pkt), i);
26189 			mutex_enter(SD_MUTEX(un));
26190 			sd_reset_target(un, pkt);
26191 			mutex_exit(SD_MUTEX(un));
26192 		}
26193 
26194 		/*
26195 		 * If we are not getting anywhere with lun/target resets,
26196 		 * let's reset the bus.
26197 		 */
26198 		if (i > SD_NDUMP_RETRIES/2) {
26199 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26200 			(void) sd_send_polled_RQS(un);
26201 		}
26202 
26203 	}
26204 	scsi_destroy_pkt(pkt);
26205 
26206 	if (err != 0) {
26207 		scsi_free_consistent_buf(bp);
26208 		*bpp = NULL;
26209 	} else {
26210 		*bpp = bp;
26211 	}
26212 
26213 done:
26214 	mutex_enter(SD_MUTEX(un));
26215 	return (err);
26216 }
26217 
26218 
26219 /*
26220  *    Function: sd_failfast_flushq
26221  *
26222  * Description: Take all bp's on the wait queue that have B_FAILFAST set
26223  *		in b_flags and move them onto the failfast queue, then kick
26224  *		off a thread to return all bp's on the failfast queue to
26225  *		their owners with an error set.
26226  *
26227  *   Arguments: un - pointer to the soft state struct for the instance.
26228  *
26229  *     Context: may execute in interrupt context.
26230  */
26231 
26232 static void
26233 sd_failfast_flushq(struct sd_lun *un)
26234 {
26235 	struct buf *bp;
26236 	struct buf *next_waitq_bp;
26237 	struct buf *prev_waitq_bp = NULL;
26238 
26239 	ASSERT(un != NULL);
26240 	ASSERT(mutex_owned(SD_MUTEX(un)));
26241 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
26242 	ASSERT(un->un_failfast_bp == NULL);
26243 
26244 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
26245 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
26246 
26247 	/*
26248 	 * Check if we should flush all bufs when entering failfast state, or
26249 	 * just those with B_FAILFAST set.
26250 	 */
26251 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
26252 		/*
26253 		 * Move *all* bp's on the wait queue to the failfast flush
26254 		 * queue, including those that do NOT have B_FAILFAST set.
26255 		 */
26256 		if (un->un_failfast_headp == NULL) {
26257 			ASSERT(un->un_failfast_tailp == NULL);
26258 			un->un_failfast_headp = un->un_waitq_headp;
26259 		} else {
26260 			ASSERT(un->un_failfast_tailp != NULL);
26261 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
26262 		}
26263 
26264 		un->un_failfast_tailp = un->un_waitq_tailp;
26265 
26266 		/* update kstat for each bp moved out of the waitq */
26267 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
26268 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
26269 		}
26270 
26271 		/* empty the waitq */
26272 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
26273 
26274 	} else {
26275 		/*
26276 		 * Go thru the wait queue, pick off all entries with
26277 		 * B_FAILFAST set, and move these onto the failfast queue.
26278 		 */
26279 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
26280 			/*
26281 			 * Save the pointer to the next bp on the wait queue,
26282 			 * so we get to it on the next iteration of this loop.
26283 			 */
26284 			next_waitq_bp = bp->av_forw;
26285 
26286 			/*
26287 			 * If this bp from the wait queue does NOT have
26288 			 * B_FAILFAST set, just move on to the next element
26289 			 * in the wait queue. Note, this is the only place
26290 			 * where it is correct to set prev_waitq_bp.
26291 			 */
26292 			if ((bp->b_flags & B_FAILFAST) == 0) {
26293 				prev_waitq_bp = bp;
26294 				continue;
26295 			}
26296 
26297 			/*
26298 			 * Remove the bp from the wait queue.
26299 			 */
26300 			if (bp == un->un_waitq_headp) {
26301 				/* The bp is the first element of the waitq. */
26302 				un->un_waitq_headp = next_waitq_bp;
26303 				if (un->un_waitq_headp == NULL) {
26304 					/* The wait queue is now empty */
26305 					un->un_waitq_tailp = NULL;
26306 				}
26307 			} else {
26308 				/*
26309 				 * The bp is either somewhere in the middle
26310 				 * or at the end of the wait queue.
26311 				 */
26312 				ASSERT(un->un_waitq_headp != NULL);
26313 				ASSERT(prev_waitq_bp != NULL);
26314 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
26315 				    == 0);
26316 				if (bp == un->un_waitq_tailp) {
26317 					/* bp is the last entry on the waitq. */
26318 					ASSERT(next_waitq_bp == NULL);
26319 					un->un_waitq_tailp = prev_waitq_bp;
26320 				}
26321 				prev_waitq_bp->av_forw = next_waitq_bp;
26322 			}
26323 			bp->av_forw = NULL;
26324 
26325 			/*
26326 			 * update kstat since the bp is moved out of
26327 			 * the waitq
26328 			 */
26329 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
26330 
26331 			/*
26332 			 * Now put the bp onto the failfast queue.
26333 			 */
26334 			if (un->un_failfast_headp == NULL) {
26335 				/* failfast queue is currently empty */
26336 				ASSERT(un->un_failfast_tailp == NULL);
26337 				un->un_failfast_headp =
26338 				    un->un_failfast_tailp = bp;
26339 			} else {
26340 				/* Add the bp to the end of the failfast q */
26341 				ASSERT(un->un_failfast_tailp != NULL);
26342 				ASSERT(un->un_failfast_tailp->b_flags &
26343 				    B_FAILFAST);
26344 				un->un_failfast_tailp->av_forw = bp;
26345 				un->un_failfast_tailp = bp;
26346 			}
26347 		}
26348 	}
26349 
26350 	/*
26351 	 * Now return all bp's on the failfast queue to their owners.
26352 	 */
26353 	while ((bp = un->un_failfast_headp) != NULL) {
26354 
26355 		un->un_failfast_headp = bp->av_forw;
26356 		if (un->un_failfast_headp == NULL) {
26357 			un->un_failfast_tailp = NULL;
26358 		}
26359 
26360 		/*
26361 		 * We want to return the bp with a failure error code, but
26362 		 * we do not want a call to sd_start_cmds() to occur here,
26363 		 * so use sd_return_failed_command_no_restart() instead of
26364 		 * sd_return_failed_command().
26365 		 */
26366 		sd_return_failed_command_no_restart(un, bp, EIO);
26367 	}
26368 
26369 	/* Flush the xbuf queues if required. */
26370 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
26371 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
26372 	}
26373 
26374 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
26375 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
26376 }
26377 
26378 
26379 /*
26380  *    Function: sd_failfast_flushq_callback
26381  *
26382  * Description: Return TRUE if the given bp meets the criteria for failfast
26383  *		flushing. Used with ddi_xbuf_flushq(9F).
26384  *
26385  *   Arguments: bp - ptr to buf struct to be examined.
26386  *
26387  *     Context: Any
26388  */
26389 
26390 static int
26391 sd_failfast_flushq_callback(struct buf *bp)
26392 {
26393 	/*
26394 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
26395 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
26396 	 */
26397 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
26398 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
26399 }
26400 
26401 
26402 
26403 #if defined(__i386) || defined(__amd64)
26404 /*
26405  * Function: sd_setup_next_xfer
26406  *
26407  * Description: Prepare next I/O operation using DMA_PARTIAL
26408  *
26409  */
26410 
26411 static int
26412 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
26413     struct scsi_pkt *pkt, struct sd_xbuf *xp)
26414 {
26415 	ssize_t	num_blks_not_xfered;
26416 	daddr_t	strt_blk_num;
26417 	ssize_t	bytes_not_xfered;
26418 	int	rval;
26419 
26420 	ASSERT(pkt->pkt_resid == 0);
26421 
26422 	/*
26423 	 * Calculate next block number and amount to be transferred.
26424 	 *
26425 	 * How much data NOT transfered to the HBA yet.
26426 	 */
26427 	bytes_not_xfered = xp->xb_dma_resid;
26428 
26429 	/*
26430 	 * figure how many blocks NOT transfered to the HBA yet.
26431 	 */
26432 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
26433 
26434 	/*
26435 	 * set starting block number to the end of what WAS transfered.
26436 	 */
26437 	strt_blk_num = xp->xb_blkno +
26438 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
26439 
26440 	/*
26441 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
26442 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
26443 	 * the disk mutex here.
26444 	 */
26445 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
26446 	    strt_blk_num, num_blks_not_xfered);
26447 
26448 	if (rval == 0) {
26449 
26450 		/*
26451 		 * Success.
26452 		 *
26453 		 * Adjust things if there are still more blocks to be
26454 		 * transfered.
26455 		 */
26456 		xp->xb_dma_resid = pkt->pkt_resid;
26457 		pkt->pkt_resid = 0;
26458 
26459 		return (1);
26460 	}
26461 
26462 	/*
26463 	 * There's really only one possible return value from
26464 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
26465 	 * returns NULL.
26466 	 */
26467 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
26468 
26469 	bp->b_resid = bp->b_bcount;
26470 	bp->b_flags |= B_ERROR;
26471 
26472 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26473 	    "Error setting up next portion of DMA transfer\n");
26474 
26475 	return (0);
26476 }
26477 #endif
26478 
26479 /*
26480  *    Function: sd_panic_for_res_conflict
26481  *
26482  * Description: Call panic with a string formated with "Reservation Conflict"
26483  *		and a human readable identifier indicating the SD instance
26484  *		that experienced the reservation conflict.
26485  *
26486  *   Arguments: un - pointer to the soft state struct for the instance.
26487  *
26488  *     Context: may execute in interrupt context.
26489  */
26490 
26491 #define	SD_RESV_CONFLICT_FMT_LEN 40
26492 void
26493 sd_panic_for_res_conflict(struct sd_lun *un)
26494 {
26495 	char panic_str[SD_RESV_CONFLICT_FMT_LEN+MAXPATHLEN];
26496 	char path_str[MAXPATHLEN];
26497 
26498 	(void) snprintf(panic_str, sizeof (panic_str),
26499 	    "Reservation Conflict\nDisk: %s",
26500 	    ddi_pathname(SD_DEVINFO(un), path_str));
26501 
26502 	panic(panic_str);
26503 }
26504 
26505 /*
26506  * Note: The following sd_faultinjection_ioctl( ) routines implement
26507  * driver support for handling fault injection for error analysis
26508  * causing faults in multiple layers of the driver.
26509  *
26510  */
26511 
26512 #ifdef SD_FAULT_INJECTION
26513 static uint_t   sd_fault_injection_on = 0;
26514 
26515 /*
26516  *    Function: sd_faultinjection_ioctl()
26517  *
26518  * Description: This routine is the driver entry point for handling
26519  *              faultinjection ioctls to inject errors into the
26520  *              layer model
26521  *
26522  *   Arguments: cmd	- the ioctl cmd recieved
26523  *		arg	- the arguments from user and returns
26524  */
26525 
26526 static void
26527 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
26528 
26529 	uint_t i;
26530 	uint_t rval;
26531 
26532 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
26533 
26534 	mutex_enter(SD_MUTEX(un));
26535 
26536 	switch (cmd) {
26537 	case SDIOCRUN:
26538 		/* Allow pushed faults to be injected */
26539 		SD_INFO(SD_LOG_SDTEST, un,
26540 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
26541 
26542 		sd_fault_injection_on = 1;
26543 
26544 		SD_INFO(SD_LOG_IOERR, un,
26545 		    "sd_faultinjection_ioctl: run finished\n");
26546 		break;
26547 
26548 	case SDIOCSTART:
26549 		/* Start Injection Session */
26550 		SD_INFO(SD_LOG_SDTEST, un,
26551 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
26552 
26553 		sd_fault_injection_on = 0;
26554 		un->sd_injection_mask = 0xFFFFFFFF;
26555 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
26556 			un->sd_fi_fifo_pkt[i] = NULL;
26557 			un->sd_fi_fifo_xb[i] = NULL;
26558 			un->sd_fi_fifo_un[i] = NULL;
26559 			un->sd_fi_fifo_arq[i] = NULL;
26560 		}
26561 		un->sd_fi_fifo_start = 0;
26562 		un->sd_fi_fifo_end = 0;
26563 
26564 		mutex_enter(&(un->un_fi_mutex));
26565 		un->sd_fi_log[0] = '\0';
26566 		un->sd_fi_buf_len = 0;
26567 		mutex_exit(&(un->un_fi_mutex));
26568 
26569 		SD_INFO(SD_LOG_IOERR, un,
26570 		    "sd_faultinjection_ioctl: start finished\n");
26571 		break;
26572 
26573 	case SDIOCSTOP:
26574 		/* Stop Injection Session */
26575 		SD_INFO(SD_LOG_SDTEST, un,
26576 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
26577 		sd_fault_injection_on = 0;
26578 		un->sd_injection_mask = 0x0;
26579 
26580 		/* Empty stray or unuseds structs from fifo */
26581 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
26582 			if (un->sd_fi_fifo_pkt[i] != NULL) {
26583 				kmem_free(un->sd_fi_fifo_pkt[i],
26584 				    sizeof (struct sd_fi_pkt));
26585 			}
26586 			if (un->sd_fi_fifo_xb[i] != NULL) {
26587 				kmem_free(un->sd_fi_fifo_xb[i],
26588 				    sizeof (struct sd_fi_xb));
26589 			}
26590 			if (un->sd_fi_fifo_un[i] != NULL) {
26591 				kmem_free(un->sd_fi_fifo_un[i],
26592 				    sizeof (struct sd_fi_un));
26593 			}
26594 			if (un->sd_fi_fifo_arq[i] != NULL) {
26595 				kmem_free(un->sd_fi_fifo_arq[i],
26596 				    sizeof (struct sd_fi_arq));
26597 			}
26598 			un->sd_fi_fifo_pkt[i] = NULL;
26599 			un->sd_fi_fifo_un[i] = NULL;
26600 			un->sd_fi_fifo_xb[i] = NULL;
26601 			un->sd_fi_fifo_arq[i] = NULL;
26602 		}
26603 		un->sd_fi_fifo_start = 0;
26604 		un->sd_fi_fifo_end = 0;
26605 
26606 		SD_INFO(SD_LOG_IOERR, un,
26607 		    "sd_faultinjection_ioctl: stop finished\n");
26608 		break;
26609 
26610 	case SDIOCINSERTPKT:
26611 		/* Store a packet struct to be pushed onto fifo */
26612 		SD_INFO(SD_LOG_SDTEST, un,
26613 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
26614 
26615 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26616 
26617 		sd_fault_injection_on = 0;
26618 
26619 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
26620 		if (un->sd_fi_fifo_pkt[i] != NULL) {
26621 			kmem_free(un->sd_fi_fifo_pkt[i],
26622 			    sizeof (struct sd_fi_pkt));
26623 		}
26624 		if (arg != NULL) {
26625 			un->sd_fi_fifo_pkt[i] =
26626 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
26627 			if (un->sd_fi_fifo_pkt[i] == NULL) {
26628 				/* Alloc failed don't store anything */
26629 				break;
26630 			}
26631 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
26632 			    sizeof (struct sd_fi_pkt), 0);
26633 			if (rval == -1) {
26634 				kmem_free(un->sd_fi_fifo_pkt[i],
26635 				    sizeof (struct sd_fi_pkt));
26636 				un->sd_fi_fifo_pkt[i] = NULL;
26637 			}
26638 		} else {
26639 			SD_INFO(SD_LOG_IOERR, un,
26640 			    "sd_faultinjection_ioctl: pkt null\n");
26641 		}
26642 		break;
26643 
26644 	case SDIOCINSERTXB:
26645 		/* Store a xb struct to be pushed onto fifo */
26646 		SD_INFO(SD_LOG_SDTEST, un,
26647 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
26648 
26649 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26650 
26651 		sd_fault_injection_on = 0;
26652 
26653 		if (un->sd_fi_fifo_xb[i] != NULL) {
26654 			kmem_free(un->sd_fi_fifo_xb[i],
26655 			    sizeof (struct sd_fi_xb));
26656 			un->sd_fi_fifo_xb[i] = NULL;
26657 		}
26658 		if (arg != NULL) {
26659 			un->sd_fi_fifo_xb[i] =
26660 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
26661 			if (un->sd_fi_fifo_xb[i] == NULL) {
26662 				/* Alloc failed don't store anything */
26663 				break;
26664 			}
26665 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
26666 			    sizeof (struct sd_fi_xb), 0);
26667 
26668 			if (rval == -1) {
26669 				kmem_free(un->sd_fi_fifo_xb[i],
26670 				    sizeof (struct sd_fi_xb));
26671 				un->sd_fi_fifo_xb[i] = NULL;
26672 			}
26673 		} else {
26674 			SD_INFO(SD_LOG_IOERR, un,
26675 			    "sd_faultinjection_ioctl: xb null\n");
26676 		}
26677 		break;
26678 
26679 	case SDIOCINSERTUN:
26680 		/* Store a un struct to be pushed onto fifo */
26681 		SD_INFO(SD_LOG_SDTEST, un,
26682 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
26683 
26684 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26685 
26686 		sd_fault_injection_on = 0;
26687 
26688 		if (un->sd_fi_fifo_un[i] != NULL) {
26689 			kmem_free(un->sd_fi_fifo_un[i],
26690 			    sizeof (struct sd_fi_un));
26691 			un->sd_fi_fifo_un[i] = NULL;
26692 		}
26693 		if (arg != NULL) {
26694 			un->sd_fi_fifo_un[i] =
26695 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
26696 			if (un->sd_fi_fifo_un[i] == NULL) {
26697 				/* Alloc failed don't store anything */
26698 				break;
26699 			}
26700 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
26701 			    sizeof (struct sd_fi_un), 0);
26702 			if (rval == -1) {
26703 				kmem_free(un->sd_fi_fifo_un[i],
26704 				    sizeof (struct sd_fi_un));
26705 				un->sd_fi_fifo_un[i] = NULL;
26706 			}
26707 
26708 		} else {
26709 			SD_INFO(SD_LOG_IOERR, un,
26710 			    "sd_faultinjection_ioctl: un null\n");
26711 		}
26712 
26713 		break;
26714 
26715 	case SDIOCINSERTARQ:
26716 		/* Store a arq struct to be pushed onto fifo */
26717 		SD_INFO(SD_LOG_SDTEST, un,
26718 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
26719 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26720 
26721 		sd_fault_injection_on = 0;
26722 
26723 		if (un->sd_fi_fifo_arq[i] != NULL) {
26724 			kmem_free(un->sd_fi_fifo_arq[i],
26725 			    sizeof (struct sd_fi_arq));
26726 			un->sd_fi_fifo_arq[i] = NULL;
26727 		}
26728 		if (arg != NULL) {
26729 			un->sd_fi_fifo_arq[i] =
26730 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
26731 			if (un->sd_fi_fifo_arq[i] == NULL) {
26732 				/* Alloc failed don't store anything */
26733 				break;
26734 			}
26735 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
26736 			    sizeof (struct sd_fi_arq), 0);
26737 			if (rval == -1) {
26738 				kmem_free(un->sd_fi_fifo_arq[i],
26739 				    sizeof (struct sd_fi_arq));
26740 				un->sd_fi_fifo_arq[i] = NULL;
26741 			}
26742 
26743 		} else {
26744 			SD_INFO(SD_LOG_IOERR, un,
26745 			    "sd_faultinjection_ioctl: arq null\n");
26746 		}
26747 
26748 		break;
26749 
26750 	case SDIOCPUSH:
26751 		/* Push stored xb, pkt, un, and arq onto fifo */
26752 		sd_fault_injection_on = 0;
26753 
26754 		if (arg != NULL) {
26755 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
26756 			if (rval != -1 &&
26757 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
26758 				un->sd_fi_fifo_end += i;
26759 			}
26760 		} else {
26761 			SD_INFO(SD_LOG_IOERR, un,
26762 			    "sd_faultinjection_ioctl: push arg null\n");
26763 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
26764 				un->sd_fi_fifo_end++;
26765 			}
26766 		}
26767 		SD_INFO(SD_LOG_IOERR, un,
26768 		    "sd_faultinjection_ioctl: push to end=%d\n",
26769 		    un->sd_fi_fifo_end);
26770 		break;
26771 
26772 	case SDIOCRETRIEVE:
26773 		/* Return buffer of log from Injection session */
26774 		SD_INFO(SD_LOG_SDTEST, un,
26775 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
26776 
26777 		sd_fault_injection_on = 0;
26778 
26779 		mutex_enter(&(un->un_fi_mutex));
26780 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
26781 		    un->sd_fi_buf_len+1, 0);
26782 		mutex_exit(&(un->un_fi_mutex));
26783 
26784 		if (rval == -1) {
26785 			/*
26786 			 * arg is possibly invalid setting
26787 			 * it to NULL for return
26788 			 */
26789 			arg = NULL;
26790 		}
26791 		break;
26792 	}
26793 
26794 	mutex_exit(SD_MUTEX(un));
26795 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
26796 			    " exit\n");
26797 }
26798 
26799 
26800 /*
26801  *    Function: sd_injection_log()
26802  *
26803  * Description: This routine adds buff to the already existing injection log
26804  *              for retrieval via faultinjection_ioctl for use in fault
26805  *              detection and recovery
26806  *
26807  *   Arguments: buf - the string to add to the log
26808  */
26809 
26810 static void
26811 sd_injection_log(char *buf, struct sd_lun *un)
26812 {
26813 	uint_t len;
26814 
26815 	ASSERT(un != NULL);
26816 	ASSERT(buf != NULL);
26817 
26818 	mutex_enter(&(un->un_fi_mutex));
26819 
26820 	len = min(strlen(buf), 255);
26821 	/* Add logged value to Injection log to be returned later */
26822 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
26823 		uint_t	offset = strlen((char *)un->sd_fi_log);
26824 		char *destp = (char *)un->sd_fi_log + offset;
26825 		int i;
26826 		for (i = 0; i < len; i++) {
26827 			*destp++ = *buf++;
26828 		}
26829 		un->sd_fi_buf_len += len;
26830 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
26831 	}
26832 
26833 	mutex_exit(&(un->un_fi_mutex));
26834 }
26835 
26836 
26837 /*
26838  *    Function: sd_faultinjection()
26839  *
26840  * Description: This routine takes the pkt and changes its
26841  *		content based on error injection scenerio.
26842  *
26843  *   Arguments: pktp	- packet to be changed
26844  */
26845 
26846 static void
26847 sd_faultinjection(struct scsi_pkt *pktp)
26848 {
26849 	uint_t i;
26850 	struct sd_fi_pkt *fi_pkt;
26851 	struct sd_fi_xb *fi_xb;
26852 	struct sd_fi_un *fi_un;
26853 	struct sd_fi_arq *fi_arq;
26854 	struct buf *bp;
26855 	struct sd_xbuf *xb;
26856 	struct sd_lun *un;
26857 
26858 	ASSERT(pktp != NULL);
26859 
26860 	/* pull bp xb and un from pktp */
26861 	bp = (struct buf *)pktp->pkt_private;
26862 	xb = SD_GET_XBUF(bp);
26863 	un = SD_GET_UN(bp);
26864 
26865 	ASSERT(un != NULL);
26866 
26867 	mutex_enter(SD_MUTEX(un));
26868 
26869 	SD_TRACE(SD_LOG_SDTEST, un,
26870 	    "sd_faultinjection: entry Injection from sdintr\n");
26871 
26872 	/* if injection is off return */
26873 	if (sd_fault_injection_on == 0 ||
26874 		un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
26875 		mutex_exit(SD_MUTEX(un));
26876 		return;
26877 	}
26878 
26879 
26880 	/* take next set off fifo */
26881 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
26882 
26883 	fi_pkt = un->sd_fi_fifo_pkt[i];
26884 	fi_xb = un->sd_fi_fifo_xb[i];
26885 	fi_un = un->sd_fi_fifo_un[i];
26886 	fi_arq = un->sd_fi_fifo_arq[i];
26887 
26888 
26889 	/* set variables accordingly */
26890 	/* set pkt if it was on fifo */
26891 	if (fi_pkt != NULL) {
26892 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
26893 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
26894 		SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
26895 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
26896 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
26897 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
26898 
26899 	}
26900 
26901 	/* set xb if it was on fifo */
26902 	if (fi_xb != NULL) {
26903 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
26904 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
26905 		SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
26906 		SD_CONDSET(xb, xb, xb_victim_retry_count,
26907 		    "xb_victim_retry_count");
26908 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
26909 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
26910 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
26911 
26912 		/* copy in block data from sense */
26913 		if (fi_xb->xb_sense_data[0] != -1) {
26914 			bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
26915 			    SENSE_LENGTH);
26916 		}
26917 
26918 		/* copy in extended sense codes */
26919 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_code,
26920 		    "es_code");
26921 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_key,
26922 		    "es_key");
26923 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_add_code,
26924 		    "es_add_code");
26925 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb,
26926 		    es_qual_code, "es_qual_code");
26927 	}
26928 
26929 	/* set un if it was on fifo */
26930 	if (fi_un != NULL) {
26931 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
26932 		SD_CONDSET(un, un, un_ctype, "un_ctype");
26933 		SD_CONDSET(un, un, un_reset_retry_count,
26934 		    "un_reset_retry_count");
26935 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
26936 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
26937 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
26938 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
26939 		    "un_f_allow_bus_device_reset");
26940 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
26941 
26942 	}
26943 
26944 	/* copy in auto request sense if it was on fifo */
26945 	if (fi_arq != NULL) {
26946 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
26947 	}
26948 
26949 	/* free structs */
26950 	if (un->sd_fi_fifo_pkt[i] != NULL) {
26951 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
26952 	}
26953 	if (un->sd_fi_fifo_xb[i] != NULL) {
26954 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
26955 	}
26956 	if (un->sd_fi_fifo_un[i] != NULL) {
26957 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
26958 	}
26959 	if (un->sd_fi_fifo_arq[i] != NULL) {
26960 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
26961 	}
26962 
26963 	/*
26964 	 * kmem_free does not gurantee to set to NULL
26965 	 * since we uses these to determine if we set
26966 	 * values or not lets confirm they are always
26967 	 * NULL after free
26968 	 */
26969 	un->sd_fi_fifo_pkt[i] = NULL;
26970 	un->sd_fi_fifo_un[i] = NULL;
26971 	un->sd_fi_fifo_xb[i] = NULL;
26972 	un->sd_fi_fifo_arq[i] = NULL;
26973 
26974 	un->sd_fi_fifo_start++;
26975 
26976 	mutex_exit(SD_MUTEX(un));
26977 
26978 	SD_TRACE(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
26979 }
26980 
26981 #endif /* SD_FAULT_INJECTION */
26982 
26983 /*
26984  * This routine is invoked in sd_unit_attach(). Before calling it, the
26985  * properties in conf file should be processed already, and "hotpluggable"
26986  * property was processed also.
26987  *
26988  * The sd driver distinguishes 3 different type of devices: removable media,
26989  * non-removable media, and hotpluggable. Below the differences are defined:
26990  *
26991  * 1. Device ID
26992  *
26993  *     The device ID of a device is used to identify this device. Refer to
26994  *     ddi_devid_register(9F).
26995  *
26996  *     For a non-removable media disk device which can provide 0x80 or 0x83
26997  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
26998  *     device ID is created to identify this device. For other non-removable
26999  *     media devices, a default device ID is created only if this device has
27000  *     at least 2 alter cylinders. Otherwise, this device has no devid.
27001  *
27002  *     -------------------------------------------------------
27003  *     removable media   hotpluggable  | Can Have Device ID
27004  *     -------------------------------------------------------
27005  *         false             false     |     Yes
27006  *         false             true      |     Yes
27007  *         true                x       |     No
27008  *     ------------------------------------------------------
27009  *
27010  *
27011  * 2. SCSI group 4 commands
27012  *
27013  *     In SCSI specs, only some commands in group 4 command set can use
27014  *     8-byte addresses that can be used to access >2TB storage spaces.
27015  *     Other commands have no such capability. Without supporting group4,
27016  *     it is impossible to make full use of storage spaces of a disk with
27017  *     capacity larger than 2TB.
27018  *
27019  *     -----------------------------------------------
27020  *     removable media   hotpluggable   LP64  |  Group
27021  *     -----------------------------------------------
27022  *           false          false       false |   1
27023  *           false          false       true  |   4
27024  *           false          true        false |   1
27025  *           false          true        true  |   4
27026  *           true             x           x   |   5
27027  *     -----------------------------------------------
27028  *
27029  *
27030  * 3. Check for VTOC Label
27031  *
27032  *     If a direct-access disk has no EFI label, sd will check if it has a
27033  *     valid VTOC label. Now, sd also does that check for removable media
27034  *     and hotpluggable devices.
27035  *
27036  *     --------------------------------------------------------------
27037  *     Direct-Access   removable media    hotpluggable |  Check Label
27038  *     -------------------------------------------------------------
27039  *         false          false           false        |   No
27040  *         false          false           true         |   No
27041  *         false          true            false        |   Yes
27042  *         false          true            true         |   Yes
27043  *         true            x                x          |   Yes
27044  *     --------------------------------------------------------------
27045  *
27046  *
27047  * 4. Building default VTOC label
27048  *
27049  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
27050  *     If those devices have no valid VTOC label, sd(7d) will attempt to
27051  *     create default VTOC for them. Currently sd creates default VTOC label
27052  *     for all devices on x86 platform (VTOC_16), but only for removable
27053  *     media devices on SPARC (VTOC_8).
27054  *
27055  *     -----------------------------------------------------------
27056  *       removable media hotpluggable platform   |   Default Label
27057  *     -----------------------------------------------------------
27058  *             false          false    sparc     |     No
27059  *             false          true      x86      |     Yes
27060  *             false          true     sparc     |     Yes
27061  *             true             x        x       |     Yes
27062  *     ----------------------------------------------------------
27063  *
27064  *
27065  * 5. Supported blocksizes of target devices
27066  *
27067  *     Sd supports non-512-byte blocksize for removable media devices only.
27068  *     For other devices, only 512-byte blocksize is supported. This may be
27069  *     changed in near future because some RAID devices require non-512-byte
27070  *     blocksize
27071  *
27072  *     -----------------------------------------------------------
27073  *     removable media    hotpluggable    | non-512-byte blocksize
27074  *     -----------------------------------------------------------
27075  *           false          false         |   No
27076  *           false          true          |   No
27077  *           true             x           |   Yes
27078  *     -----------------------------------------------------------
27079  *
27080  *
27081  * 6. Automatic mount & unmount
27082  *
27083  *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
27084  *     if a device is removable media device. It return 1 for removable media
27085  *     devices, and 0 for others.
27086  *
27087  *     The automatic mounting subsystem should distinguish between the types
27088  *     of devices and apply automounting policies to each.
27089  *
27090  *
27091  * 7. fdisk partition management
27092  *
27093  *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
27094  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
27095  *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
27096  *     fdisk partitions on both x86 and SPARC platform.
27097  *
27098  *     -----------------------------------------------------------
27099  *       platform   removable media  USB/1394  |  fdisk supported
27100  *     -----------------------------------------------------------
27101  *        x86         X               X        |       true
27102  *     ------------------------------------------------------------
27103  *        sparc       X               X        |       false
27104  *     ------------------------------------------------------------
27105  *
27106  *
27107  * 8. MBOOT/MBR
27108  *
27109  *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
27110  *     read/write mboot for removable media devices on sparc platform.
27111  *
27112  *     -----------------------------------------------------------
27113  *       platform   removable media  USB/1394  |  mboot supported
27114  *     -----------------------------------------------------------
27115  *        x86         X               X        |       true
27116  *     ------------------------------------------------------------
27117  *        sparc      false           false     |       false
27118  *        sparc      false           true      |       true
27119  *        sparc      true            false     |       true
27120  *        sparc      true            true      |       true
27121  *     ------------------------------------------------------------
27122  *
27123  *
27124  * 9.  error handling during opening device
27125  *
27126  *     If failed to open a disk device, an errno is returned. For some kinds
27127  *     of errors, different errno is returned depending on if this device is
27128  *     a removable media device. This brings USB/1394 hard disks in line with
27129  *     expected hard disk behavior. It is not expected that this breaks any
27130  *     application.
27131  *
27132  *     ------------------------------------------------------
27133  *       removable media    hotpluggable   |  errno
27134  *     ------------------------------------------------------
27135  *             false          false        |   EIO
27136  *             false          true         |   EIO
27137  *             true             x          |   ENXIO
27138  *     ------------------------------------------------------
27139  *
27140  *
27141  * 11. ioctls: DKIOCEJECT, CDROMEJECT
27142  *
27143  *     These IOCTLs are applicable only to removable media devices.
27144  *
27145  *     -----------------------------------------------------------
27146  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
27147  *     -----------------------------------------------------------
27148  *             false          false        |     No
27149  *             false          true         |     No
27150  *             true            x           |     Yes
27151  *     -----------------------------------------------------------
27152  *
27153  *
27154  * 12. Kstats for partitions
27155  *
27156  *     sd creates partition kstat for non-removable media devices. USB and
27157  *     Firewire hard disks now have partition kstats
27158  *
27159  *      ------------------------------------------------------
27160  *       removable media    hotplugable    |   kstat
27161  *      ------------------------------------------------------
27162  *             false          false        |    Yes
27163  *             false          true         |    Yes
27164  *             true             x          |    No
27165  *       ------------------------------------------------------
27166  *
27167  *
27168  * 13. Removable media & hotpluggable properties
27169  *
27170  *     Sd driver creates a "removable-media" property for removable media
27171  *     devices. Parent nexus drivers create a "hotpluggable" property if
27172  *     it supports hotplugging.
27173  *
27174  *     ---------------------------------------------------------------------
27175  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
27176  *     ---------------------------------------------------------------------
27177  *       false            false       |    No                   No
27178  *       false            true        |    No                   Yes
27179  *       true             false       |    Yes                  No
27180  *       true             true        |    Yes                  Yes
27181  *     ---------------------------------------------------------------------
27182  *
27183  *
27184  * 14. Power Management
27185  *
27186  *     sd only power manages removable media devices or devices that support
27187  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
27188  *
27189  *     A parent nexus that supports hotplugging can also set "pm-capable"
27190  *     if the disk can be power managed.
27191  *
27192  *     ------------------------------------------------------------
27193  *       removable media hotpluggable pm-capable  |   power manage
27194  *     ------------------------------------------------------------
27195  *             false          false     false     |     No
27196  *             false          false     true      |     Yes
27197  *             false          true      false     |     No
27198  *             false          true      true      |     Yes
27199  *             true             x        x        |     Yes
27200  *     ------------------------------------------------------------
27201  *
27202  *      USB and firewire hard disks can now be power managed independently
27203  *      of the framebuffer
27204  *
27205  *
27206  * 15. Support for USB disks with capacity larger than 1TB
27207  *
27208  *     Currently, sd doesn't permit a fixed disk device with capacity
27209  *     larger than 1TB to be used in a 32-bit operating system environment.
27210  *     However, sd doesn't do that for removable media devices. Instead, it
27211  *     assumes that removable media devices cannot have a capacity larger
27212  *     than 1TB. Therefore, using those devices on 32-bit system is partially
27213  *     supported, which can cause some unexpected results.
27214  *
27215  *     ---------------------------------------------------------------------
27216  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
27217  *     ---------------------------------------------------------------------
27218  *             false          false  |   true         |     no
27219  *             false          true   |   true         |     no
27220  *             true           false  |   true         |     Yes
27221  *             true           true   |   true         |     Yes
27222  *     ---------------------------------------------------------------------
27223  *
27224  *
27225  * 16. Check write-protection at open time
27226  *
27227  *     When a removable media device is being opened for writing without NDELAY
27228  *     flag, sd will check if this device is writable. If attempting to open
27229  *     without NDELAY flag a write-protected device, this operation will abort.
27230  *
27231  *     ------------------------------------------------------------
27232  *       removable media    USB/1394   |   WP Check
27233  *     ------------------------------------------------------------
27234  *             false          false    |     No
27235  *             false          true     |     No
27236  *             true           false    |     Yes
27237  *             true           true     |     Yes
27238  *     ------------------------------------------------------------
27239  *
27240  *
27241  * 17. syslog when corrupted VTOC is encountered
27242  *
27243  *      Currently, if an invalid VTOC is encountered, sd only print syslog
27244  *      for fixed SCSI disks.
27245  *     ------------------------------------------------------------
27246  *       removable media    USB/1394   |   print syslog
27247  *     ------------------------------------------------------------
27248  *             false          false    |     Yes
27249  *             false          true     |     No
27250  *             true           false    |     No
27251  *             true           true     |     No
27252  *     ------------------------------------------------------------
27253  */
27254 static void
27255 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
27256 {
27257 	int	pm_capable_prop;
27258 
27259 	ASSERT(un->un_sd);
27260 	ASSERT(un->un_sd->sd_inq);
27261 
27262 	/*
27263 	 * Enable SYNC CACHE support for all devices.
27264 	 */
27265 	un->un_f_sync_cache_supported = TRUE;
27266 
27267 	if (un->un_sd->sd_inq->inq_rmb) {
27268 		/*
27269 		 * The media of this device is removable. And for this kind
27270 		 * of devices, it is possible to change medium after opening
27271 		 * devices. Thus we should support this operation.
27272 		 */
27273 		un->un_f_has_removable_media = TRUE;
27274 
27275 		/*
27276 		 * support non-512-byte blocksize of removable media devices
27277 		 */
27278 		un->un_f_non_devbsize_supported = TRUE;
27279 
27280 		/*
27281 		 * Assume that all removable media devices support DOOR_LOCK
27282 		 */
27283 		un->un_f_doorlock_supported = TRUE;
27284 
27285 		/*
27286 		 * For a removable media device, it is possible to be opened
27287 		 * with NDELAY flag when there is no media in drive, in this
27288 		 * case we don't care if device is writable. But if without
27289 		 * NDELAY flag, we need to check if media is write-protected.
27290 		 */
27291 		un->un_f_chk_wp_open = TRUE;
27292 
27293 		/*
27294 		 * need to start a SCSI watch thread to monitor media state,
27295 		 * when media is being inserted or ejected, notify syseventd.
27296 		 */
27297 		un->un_f_monitor_media_state = TRUE;
27298 
27299 		/*
27300 		 * Some devices don't support START_STOP_UNIT command.
27301 		 * Therefore, we'd better check if a device supports it
27302 		 * before sending it.
27303 		 */
27304 		un->un_f_check_start_stop = TRUE;
27305 
27306 		/*
27307 		 * support eject media ioctl:
27308 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
27309 		 */
27310 		un->un_f_eject_media_supported = TRUE;
27311 
27312 		/*
27313 		 * Because many removable-media devices don't support
27314 		 * LOG_SENSE, we couldn't use this command to check if
27315 		 * a removable media device support power-management.
27316 		 * We assume that they support power-management via
27317 		 * START_STOP_UNIT command and can be spun up and down
27318 		 * without limitations.
27319 		 */
27320 		un->un_f_pm_supported = TRUE;
27321 
27322 		/*
27323 		 * Need to create a zero length (Boolean) property
27324 		 * removable-media for the removable media devices.
27325 		 * Note that the return value of the property is not being
27326 		 * checked, since if unable to create the property
27327 		 * then do not want the attach to fail altogether. Consistent
27328 		 * with other property creation in attach.
27329 		 */
27330 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
27331 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
27332 
27333 	} else {
27334 		/*
27335 		 * create device ID for device
27336 		 */
27337 		un->un_f_devid_supported = TRUE;
27338 
27339 		/*
27340 		 * Spin up non-removable-media devices once it is attached
27341 		 */
27342 		un->un_f_attach_spinup = TRUE;
27343 
27344 		/*
27345 		 * According to SCSI specification, Sense data has two kinds of
27346 		 * format: fixed format, and descriptor format. At present, we
27347 		 * don't support descriptor format sense data for removable
27348 		 * media.
27349 		 */
27350 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
27351 			un->un_f_descr_format_supported = TRUE;
27352 		}
27353 
27354 		/*
27355 		 * kstats are created only for non-removable media devices.
27356 		 *
27357 		 * Set this in sd.conf to 0 in order to disable kstats.  The
27358 		 * default is 1, so they are enabled by default.
27359 		 */
27360 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
27361 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
27362 			"enable-partition-kstats", 1));
27363 
27364 		/*
27365 		 * Check if HBA has set the "pm-capable" property.
27366 		 * If "pm-capable" exists and is non-zero then we can
27367 		 * power manage the device without checking the start/stop
27368 		 * cycle count log sense page.
27369 		 *
27370 		 * If "pm-capable" exists and is SD_PM_CAPABLE_FALSE (0)
27371 		 * then we should not power manage the device.
27372 		 *
27373 		 * If "pm-capable" doesn't exist then pm_capable_prop will
27374 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
27375 		 * sd will check the start/stop cycle count log sense page
27376 		 * and power manage the device if the cycle count limit has
27377 		 * not been exceeded.
27378 		 */
27379 		pm_capable_prop = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
27380 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
27381 		if (pm_capable_prop == SD_PM_CAPABLE_UNDEFINED) {
27382 			un->un_f_log_sense_supported = TRUE;
27383 		} else {
27384 			/*
27385 			 * pm-capable property exists.
27386 			 *
27387 			 * Convert "TRUE" values for pm_capable_prop to
27388 			 * SD_PM_CAPABLE_TRUE (1) to make it easier to check
27389 			 * later. "TRUE" values are any values except
27390 			 * SD_PM_CAPABLE_FALSE (0) and
27391 			 * SD_PM_CAPABLE_UNDEFINED (-1)
27392 			 */
27393 			if (pm_capable_prop == SD_PM_CAPABLE_FALSE) {
27394 				un->un_f_log_sense_supported = FALSE;
27395 			} else {
27396 				un->un_f_pm_supported = TRUE;
27397 			}
27398 
27399 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
27400 			    "sd_unit_attach: un:0x%p pm-capable "
27401 			    "property set to %d.\n", un, un->un_f_pm_supported);
27402 		}
27403 	}
27404 
27405 	if (un->un_f_is_hotpluggable) {
27406 
27407 		/*
27408 		 * Have to watch hotpluggable devices as well, since
27409 		 * that's the only way for userland applications to
27410 		 * detect hot removal while device is busy/mounted.
27411 		 */
27412 		un->un_f_monitor_media_state = TRUE;
27413 
27414 		un->un_f_check_start_stop = TRUE;
27415 
27416 	}
27417 }
27418 
27419 /*
27420  * sd_tg_rdwr:
27421  * Provides rdwr access for cmlb via sd_tgops. The start_block is
27422  * in sys block size, req_length in bytes.
27423  *
27424  */
27425 static int
27426 sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
27427     diskaddr_t start_block, size_t reqlength, void *tg_cookie)
27428 {
27429 	struct sd_lun *un;
27430 	int path_flag = (int)(uintptr_t)tg_cookie;
27431 	char *dkl = NULL;
27432 	diskaddr_t real_addr = start_block;
27433 	diskaddr_t first_byte, end_block;
27434 
27435 	size_t	buffer_size = reqlength;
27436 	int rval;
27437 	diskaddr_t	cap;
27438 	uint32_t	lbasize;
27439 
27440 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
27441 	if (un == NULL)
27442 		return (ENXIO);
27443 
27444 	if (cmd != TG_READ && cmd != TG_WRITE)
27445 		return (EINVAL);
27446 
27447 	mutex_enter(SD_MUTEX(un));
27448 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
27449 		mutex_exit(SD_MUTEX(un));
27450 		rval = sd_send_scsi_READ_CAPACITY(un, (uint64_t *)&cap,
27451 		    &lbasize, path_flag);
27452 		if (rval != 0)
27453 			return (rval);
27454 		mutex_enter(SD_MUTEX(un));
27455 		sd_update_block_info(un, lbasize, cap);
27456 		if ((un->un_f_tgt_blocksize_is_valid == FALSE)) {
27457 			mutex_exit(SD_MUTEX(un));
27458 			return (EIO);
27459 		}
27460 	}
27461 
27462 	if (NOT_DEVBSIZE(un)) {
27463 		/*
27464 		 * sys_blocksize != tgt_blocksize, need to re-adjust
27465 		 * blkno and save the index to beginning of dk_label
27466 		 */
27467 		first_byte  = SD_SYSBLOCKS2BYTES(un, start_block);
27468 		real_addr = first_byte / un->un_tgt_blocksize;
27469 
27470 		end_block = (first_byte + reqlength +
27471 		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
27472 
27473 		/* round up buffer size to multiple of target block size */
27474 		buffer_size = (end_block - real_addr) * un->un_tgt_blocksize;
27475 
27476 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_tg_rdwr",
27477 		    "label_addr: 0x%x allocation size: 0x%x\n",
27478 		    real_addr, buffer_size);
27479 
27480 		if (((first_byte % un->un_tgt_blocksize) != 0) ||
27481 		    (reqlength % un->un_tgt_blocksize) != 0)
27482 			/* the request is not aligned */
27483 			dkl = kmem_zalloc(buffer_size, KM_SLEEP);
27484 	}
27485 
27486 	/*
27487 	 * The MMC standard allows READ CAPACITY to be
27488 	 * inaccurate by a bounded amount (in the interest of
27489 	 * response latency).  As a result, failed READs are
27490 	 * commonplace (due to the reading of metadata and not
27491 	 * data). Depending on the per-Vendor/drive Sense data,
27492 	 * the failed READ can cause many (unnecessary) retries.
27493 	 */
27494 
27495 	if (ISCD(un) && (cmd == TG_READ) &&
27496 	    (un->un_f_blockcount_is_valid == TRUE) &&
27497 	    ((start_block == (un->un_blockcount - 1))||
27498 	    (start_block == (un->un_blockcount - 2)))) {
27499 			path_flag = SD_PATH_DIRECT_PRIORITY;
27500 	}
27501 
27502 	mutex_exit(SD_MUTEX(un));
27503 	if (cmd == TG_READ) {
27504 		rval = sd_send_scsi_READ(un, (dkl != NULL)? dkl: bufaddr,
27505 		    buffer_size, real_addr, path_flag);
27506 		if (dkl != NULL)
27507 			bcopy(dkl + SD_TGTBYTEOFFSET(un, start_block,
27508 			    real_addr), bufaddr, reqlength);
27509 	} else {
27510 		if (dkl) {
27511 			rval = sd_send_scsi_READ(un, dkl, buffer_size,
27512 			    real_addr, path_flag);
27513 			if (rval) {
27514 				kmem_free(dkl, buffer_size);
27515 				return (rval);
27516 			}
27517 			bcopy(bufaddr, dkl + SD_TGTBYTEOFFSET(un, start_block,
27518 			    real_addr), reqlength);
27519 		}
27520 		rval = sd_send_scsi_WRITE(un, (dkl != NULL)? dkl: bufaddr,
27521 		    buffer_size, real_addr, path_flag);
27522 	}
27523 
27524 	if (dkl != NULL)
27525 		kmem_free(dkl, buffer_size);
27526 
27527 	return (rval);
27528 }
27529 
27530 
27531 static int
27532 sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie)
27533 {
27534 
27535 	struct sd_lun *un;
27536 	diskaddr_t	cap;
27537 	uint32_t	lbasize;
27538 	int		path_flag = (int)(uintptr_t)tg_cookie;
27539 	int		ret = 0;
27540 
27541 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
27542 	if (un == NULL)
27543 		return (ENXIO);
27544 
27545 	switch (cmd) {
27546 	case TG_GETPHYGEOM:
27547 	case TG_GETVIRTGEOM:
27548 	case TG_GETCAPACITY:
27549 	case  TG_GETBLOCKSIZE:
27550 		mutex_enter(SD_MUTEX(un));
27551 
27552 		if ((un->un_f_blockcount_is_valid == TRUE) &&
27553 		    (un->un_f_tgt_blocksize_is_valid == TRUE)) {
27554 			cap = un->un_blockcount;
27555 			lbasize = un->un_tgt_blocksize;
27556 			mutex_exit(SD_MUTEX(un));
27557 		} else {
27558 			mutex_exit(SD_MUTEX(un));
27559 			ret = sd_send_scsi_READ_CAPACITY(un, (uint64_t *)&cap,
27560 			    &lbasize, path_flag);
27561 			if (ret != 0)
27562 				return (ret);
27563 			mutex_enter(SD_MUTEX(un));
27564 			sd_update_block_info(un, lbasize, cap);
27565 			if ((un->un_f_blockcount_is_valid == FALSE) ||
27566 			    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
27567 				mutex_exit(SD_MUTEX(un));
27568 				return (EIO);
27569 			}
27570 			mutex_exit(SD_MUTEX(un));
27571 		}
27572 
27573 		if (cmd == TG_GETCAPACITY) {
27574 			*(diskaddr_t *)arg = cap;
27575 			return (0);
27576 		}
27577 
27578 		if (cmd == TG_GETBLOCKSIZE) {
27579 			*(uint32_t *)arg = lbasize;
27580 			return (0);
27581 		}
27582 
27583 		if (cmd == TG_GETPHYGEOM)
27584 			ret = sd_get_physical_geometry(un, (cmlb_geom_t *)arg,
27585 			    cap, lbasize, path_flag);
27586 		else
27587 			/* TG_GETVIRTGEOM */
27588 			ret = sd_get_virtual_geometry(un,
27589 			    (cmlb_geom_t *)arg, cap, lbasize);
27590 
27591 		return (ret);
27592 
27593 	case TG_GETATTR:
27594 		mutex_enter(SD_MUTEX(un));
27595 		((tg_attribute_t *)arg)->media_is_writable =
27596 		    un->un_f_mmc_writable_media;
27597 		mutex_exit(SD_MUTEX(un));
27598 		return (0);
27599 	default:
27600 		return (ENOTTY);
27601 
27602 	}
27603 
27604 }
27605