xref: /titanic_51/usr/src/uts/common/io/scsi/targets/sd.c (revision c3aa2a379ea93f4fb5a64b8ddf9b1f93e9a8e84b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * SCSI disk target driver.
30  */
31 #include <sys/scsi/scsi.h>
32 #include <sys/dkbad.h>
33 #include <sys/dklabel.h>
34 #include <sys/dkio.h>
35 #include <sys/fdio.h>
36 #include <sys/cdio.h>
37 #include <sys/mhd.h>
38 #include <sys/vtoc.h>
39 #include <sys/dktp/fdisk.h>
40 #include <sys/kstat.h>
41 #include <sys/vtrace.h>
42 #include <sys/note.h>
43 #include <sys/thread.h>
44 #include <sys/proc.h>
45 #include <sys/efi_partition.h>
46 #include <sys/var.h>
47 #include <sys/aio_req.h>
48 
49 #ifdef __lock_lint
50 #define	_LP64
51 #define	__amd64
52 #endif
53 
54 #if (defined(__fibre))
55 /* Note: is there a leadville version of the following? */
56 #include <sys/fc4/fcal_linkapp.h>
57 #endif
58 #include <sys/taskq.h>
59 #include <sys/uuid.h>
60 #include <sys/byteorder.h>
61 #include <sys/sdt.h>
62 
63 #include "sd_xbuf.h"
64 
65 #include <sys/scsi/targets/sddef.h>
66 #include <sys/cmlb.h>
67 
68 
69 /*
70  * Loadable module info.
71  */
72 #if (defined(__fibre))
73 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver %I%"
74 char _depends_on[]	= "misc/scsi misc/cmlb drv/fcp";
75 #else
76 #define	SD_MODULE_NAME	"SCSI Disk Driver %I%"
77 char _depends_on[]	= "misc/scsi misc/cmlb";
78 #endif
79 
80 /*
81  * Define the interconnect type, to allow the driver to distinguish
82  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
83  *
84  * This is really for backward compatability. In the future, the driver
85  * should actually check the "interconnect-type" property as reported by
86  * the HBA; however at present this property is not defined by all HBAs,
87  * so we will use this #define (1) to permit the driver to run in
88  * backward-compatability mode; and (2) to print a notification message
89  * if an FC HBA does not support the "interconnect-type" property.  The
90  * behavior of the driver will be to assume parallel SCSI behaviors unless
91  * the "interconnect-type" property is defined by the HBA **AND** has a
92  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
93  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
94  * Channel behaviors (as per the old ssd).  (Note that the
95  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
96  * will result in the driver assuming parallel SCSI behaviors.)
97  *
98  * (see common/sys/scsi/impl/services.h)
99  *
100  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
101  * since some FC HBAs may already support that, and there is some code in
102  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
103  * default would confuse that code, and besides things should work fine
104  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
105  * "interconnect_type" property.
106  *
107  */
108 #if (defined(__fibre))
109 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
110 #else
111 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
112 #endif
113 
114 /*
115  * The name of the driver, established from the module name in _init.
116  */
117 static	char *sd_label			= NULL;
118 
119 /*
120  * Driver name is unfortunately prefixed on some driver.conf properties.
121  */
122 #if (defined(__fibre))
123 #define	sd_max_xfer_size		ssd_max_xfer_size
124 #define	sd_config_list			ssd_config_list
125 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
126 static	char *sd_config_list		= "ssd-config-list";
127 #else
128 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
129 static	char *sd_config_list		= "sd-config-list";
130 #endif
131 
132 /*
133  * Driver global variables
134  */
135 
136 #if (defined(__fibre))
137 /*
138  * These #defines are to avoid namespace collisions that occur because this
139  * code is currently used to compile two seperate driver modules: sd and ssd.
140  * All global variables need to be treated this way (even if declared static)
141  * in order to allow the debugger to resolve the names properly.
142  * It is anticipated that in the near future the ssd module will be obsoleted,
143  * at which time this namespace issue should go away.
144  */
145 #define	sd_state			ssd_state
146 #define	sd_io_time			ssd_io_time
147 #define	sd_failfast_enable		ssd_failfast_enable
148 #define	sd_ua_retry_count		ssd_ua_retry_count
149 #define	sd_report_pfa			ssd_report_pfa
150 #define	sd_max_throttle			ssd_max_throttle
151 #define	sd_min_throttle			ssd_min_throttle
152 #define	sd_rot_delay			ssd_rot_delay
153 
154 #define	sd_retry_on_reservation_conflict	\
155 					ssd_retry_on_reservation_conflict
156 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
157 #define	sd_resv_conflict_name		ssd_resv_conflict_name
158 
159 #define	sd_component_mask		ssd_component_mask
160 #define	sd_level_mask			ssd_level_mask
161 #define	sd_debug_un			ssd_debug_un
162 #define	sd_error_level			ssd_error_level
163 
164 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
165 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
166 
167 #define	sd_tr				ssd_tr
168 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
169 #define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
170 #define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
171 #define	sd_check_media_time		ssd_check_media_time
172 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
173 #define	sd_label_mutex			ssd_label_mutex
174 #define	sd_detach_mutex			ssd_detach_mutex
175 #define	sd_log_buf			ssd_log_buf
176 #define	sd_log_mutex			ssd_log_mutex
177 
178 #define	sd_disk_table			ssd_disk_table
179 #define	sd_disk_table_size		ssd_disk_table_size
180 #define	sd_sense_mutex			ssd_sense_mutex
181 #define	sd_cdbtab			ssd_cdbtab
182 
183 #define	sd_cb_ops			ssd_cb_ops
184 #define	sd_ops				ssd_ops
185 #define	sd_additional_codes		ssd_additional_codes
186 #define	sd_tgops			ssd_tgops
187 
188 #define	sd_minor_data			ssd_minor_data
189 #define	sd_minor_data_efi		ssd_minor_data_efi
190 
191 #define	sd_tq				ssd_tq
192 #define	sd_wmr_tq			ssd_wmr_tq
193 #define	sd_taskq_name			ssd_taskq_name
194 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
195 #define	sd_taskq_minalloc		ssd_taskq_minalloc
196 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
197 
198 #define	sd_dump_format_string		ssd_dump_format_string
199 
200 #define	sd_iostart_chain		ssd_iostart_chain
201 #define	sd_iodone_chain			ssd_iodone_chain
202 
203 #define	sd_pm_idletime			ssd_pm_idletime
204 
205 #define	sd_force_pm_supported		ssd_force_pm_supported
206 
207 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
208 
209 #endif
210 
211 
212 #ifdef	SDDEBUG
213 int	sd_force_pm_supported		= 0;
214 #endif	/* SDDEBUG */
215 
216 void *sd_state				= NULL;
217 int sd_io_time				= SD_IO_TIME;
218 int sd_failfast_enable			= 1;
219 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
220 int sd_report_pfa			= 1;
221 int sd_max_throttle			= SD_MAX_THROTTLE;
222 int sd_min_throttle			= SD_MIN_THROTTLE;
223 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
224 int sd_qfull_throttle_enable		= TRUE;
225 
226 int sd_retry_on_reservation_conflict	= 1;
227 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
228 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
229 
230 static int sd_dtype_optical_bind	= -1;
231 
232 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
233 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
234 
235 /*
236  * Global data for debug logging. To enable debug printing, sd_component_mask
237  * and sd_level_mask should be set to the desired bit patterns as outlined in
238  * sddef.h.
239  */
240 uint_t	sd_component_mask		= 0x0;
241 uint_t	sd_level_mask			= 0x0;
242 struct	sd_lun *sd_debug_un		= NULL;
243 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
244 
245 /* Note: these may go away in the future... */
246 static uint32_t	sd_xbuf_active_limit	= 512;
247 static uint32_t sd_xbuf_reserve_limit	= 16;
248 
249 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
250 
251 /*
252  * Timer value used to reset the throttle after it has been reduced
253  * (typically in response to TRAN_BUSY or STATUS_QFULL)
254  */
255 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
256 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
257 
258 /*
259  * Interval value associated with the media change scsi watch.
260  */
261 static int sd_check_media_time		= 3000000;
262 
263 /*
264  * Wait value used for in progress operations during a DDI_SUSPEND
265  */
266 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
267 
268 /*
269  * sd_label_mutex protects a static buffer used in the disk label
270  * component of the driver
271  */
272 static kmutex_t sd_label_mutex;
273 
274 /*
275  * sd_detach_mutex protects un_layer_count, un_detach_count, and
276  * un_opens_in_progress in the sd_lun structure.
277  */
278 static kmutex_t sd_detach_mutex;
279 
280 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
281 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
282 
283 /*
284  * Global buffer and mutex for debug logging
285  */
286 static char	sd_log_buf[1024];
287 static kmutex_t	sd_log_mutex;
288 
289 /*
290  * Structs and globals for recording attached lun information.
291  * This maintains a chain. Each node in the chain represents a SCSI controller.
292  * The structure records the number of luns attached to each target connected
293  * with the controller.
294  * For parallel scsi device only.
295  */
296 struct sd_scsi_hba_tgt_lun {
297 	struct sd_scsi_hba_tgt_lun	*next;
298 	dev_info_t			*pdip;
299 	int				nlun[NTARGETS_WIDE];
300 };
301 
302 /*
303  * Flag to indicate the lun is attached or detached
304  */
305 #define	SD_SCSI_LUN_ATTACH	0
306 #define	SD_SCSI_LUN_DETACH	1
307 
308 static kmutex_t	sd_scsi_target_lun_mutex;
309 static struct sd_scsi_hba_tgt_lun	*sd_scsi_target_lun_head = NULL;
310 
311 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
312     sd_scsi_hba_tgt_lun::next sd_scsi_hba_tgt_lun::pdip))
313 
314 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
315     sd_scsi_target_lun_head))
316 
317 /*
318  * "Smart" Probe Caching structs, globals, #defines, etc.
319  * For parallel scsi and non-self-identify device only.
320  */
321 
322 /*
323  * The following resources and routines are implemented to support
324  * "smart" probing, which caches the scsi_probe() results in an array,
325  * in order to help avoid long probe times.
326  */
327 struct sd_scsi_probe_cache {
328 	struct	sd_scsi_probe_cache	*next;
329 	dev_info_t	*pdip;
330 	int		cache[NTARGETS_WIDE];
331 };
332 
333 static kmutex_t	sd_scsi_probe_cache_mutex;
334 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
335 
336 /*
337  * Really we only need protection on the head of the linked list, but
338  * better safe than sorry.
339  */
340 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
341     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
342 
343 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
344     sd_scsi_probe_cache_head))
345 
346 
347 /*
348  * Vendor specific data name property declarations
349  */
350 
351 #if defined(__fibre) || defined(__i386) ||defined(__amd64)
352 
353 static sd_tunables seagate_properties = {
354 	SEAGATE_THROTTLE_VALUE,
355 	0,
356 	0,
357 	0,
358 	0,
359 	0,
360 	0,
361 	0,
362 	0
363 };
364 
365 
366 static sd_tunables fujitsu_properties = {
367 	FUJITSU_THROTTLE_VALUE,
368 	0,
369 	0,
370 	0,
371 	0,
372 	0,
373 	0,
374 	0,
375 	0
376 };
377 
378 static sd_tunables ibm_properties = {
379 	IBM_THROTTLE_VALUE,
380 	0,
381 	0,
382 	0,
383 	0,
384 	0,
385 	0,
386 	0,
387 	0
388 };
389 
390 static sd_tunables purple_properties = {
391 	PURPLE_THROTTLE_VALUE,
392 	0,
393 	0,
394 	PURPLE_BUSY_RETRIES,
395 	PURPLE_RESET_RETRY_COUNT,
396 	PURPLE_RESERVE_RELEASE_TIME,
397 	0,
398 	0,
399 	0
400 };
401 
402 static sd_tunables sve_properties = {
403 	SVE_THROTTLE_VALUE,
404 	0,
405 	0,
406 	SVE_BUSY_RETRIES,
407 	SVE_RESET_RETRY_COUNT,
408 	SVE_RESERVE_RELEASE_TIME,
409 	SVE_MIN_THROTTLE_VALUE,
410 	SVE_DISKSORT_DISABLED_FLAG,
411 	0
412 };
413 
414 static sd_tunables maserati_properties = {
415 	0,
416 	0,
417 	0,
418 	0,
419 	0,
420 	0,
421 	0,
422 	MASERATI_DISKSORT_DISABLED_FLAG,
423 	MASERATI_LUN_RESET_ENABLED_FLAG
424 };
425 
426 static sd_tunables pirus_properties = {
427 	PIRUS_THROTTLE_VALUE,
428 	0,
429 	PIRUS_NRR_COUNT,
430 	PIRUS_BUSY_RETRIES,
431 	PIRUS_RESET_RETRY_COUNT,
432 	0,
433 	PIRUS_MIN_THROTTLE_VALUE,
434 	PIRUS_DISKSORT_DISABLED_FLAG,
435 	PIRUS_LUN_RESET_ENABLED_FLAG
436 };
437 
438 #endif
439 
440 #if (defined(__sparc) && !defined(__fibre)) || \
441 	(defined(__i386) || defined(__amd64))
442 
443 
444 static sd_tunables elite_properties = {
445 	ELITE_THROTTLE_VALUE,
446 	0,
447 	0,
448 	0,
449 	0,
450 	0,
451 	0,
452 	0,
453 	0
454 };
455 
456 static sd_tunables st31200n_properties = {
457 	ST31200N_THROTTLE_VALUE,
458 	0,
459 	0,
460 	0,
461 	0,
462 	0,
463 	0,
464 	0,
465 	0
466 };
467 
468 #endif /* Fibre or not */
469 
470 static sd_tunables lsi_properties_scsi = {
471 	LSI_THROTTLE_VALUE,
472 	0,
473 	LSI_NOTREADY_RETRIES,
474 	0,
475 	0,
476 	0,
477 	0,
478 	0,
479 	0
480 };
481 
482 static sd_tunables symbios_properties = {
483 	SYMBIOS_THROTTLE_VALUE,
484 	0,
485 	SYMBIOS_NOTREADY_RETRIES,
486 	0,
487 	0,
488 	0,
489 	0,
490 	0,
491 	0
492 };
493 
494 static sd_tunables lsi_properties = {
495 	0,
496 	0,
497 	LSI_NOTREADY_RETRIES,
498 	0,
499 	0,
500 	0,
501 	0,
502 	0,
503 	0
504 };
505 
506 static sd_tunables lsi_oem_properties = {
507 	0,
508 	0,
509 	LSI_OEM_NOTREADY_RETRIES,
510 	0,
511 	0,
512 	0,
513 	0,
514 	0,
515 	0
516 };
517 
518 
519 
520 #if (defined(SD_PROP_TST))
521 
522 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
523 #define	SD_TST_THROTTLE_VAL	16
524 #define	SD_TST_NOTREADY_VAL	12
525 #define	SD_TST_BUSY_VAL		60
526 #define	SD_TST_RST_RETRY_VAL	36
527 #define	SD_TST_RSV_REL_TIME	60
528 
529 static sd_tunables tst_properties = {
530 	SD_TST_THROTTLE_VAL,
531 	SD_TST_CTYPE_VAL,
532 	SD_TST_NOTREADY_VAL,
533 	SD_TST_BUSY_VAL,
534 	SD_TST_RST_RETRY_VAL,
535 	SD_TST_RSV_REL_TIME,
536 	0,
537 	0,
538 	0
539 };
540 #endif
541 
542 /* This is similiar to the ANSI toupper implementation */
543 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
544 
545 /*
546  * Static Driver Configuration Table
547  *
548  * This is the table of disks which need throttle adjustment (or, perhaps
549  * something else as defined by the flags at a future time.)  device_id
550  * is a string consisting of concatenated vid (vendor), pid (product/model)
551  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
552  * the parts of the string are as defined by the sizes in the scsi_inquiry
553  * structure.  Device type is searched as far as the device_id string is
554  * defined.  Flags defines which values are to be set in the driver from the
555  * properties list.
556  *
557  * Entries below which begin and end with a "*" are a special case.
558  * These do not have a specific vendor, and the string which follows
559  * can appear anywhere in the 16 byte PID portion of the inquiry data.
560  *
561  * Entries below which begin and end with a " " (blank) are a special
562  * case. The comparison function will treat multiple consecutive blanks
563  * as equivalent to a single blank. For example, this causes a
564  * sd_disk_table entry of " NEC CDROM " to match a device's id string
565  * of  "NEC       CDROM".
566  *
567  * Note: The MD21 controller type has been obsoleted.
568  *	 ST318202F is a Legacy device
569  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
570  *	 made with an FC connection. The entries here are a legacy.
571  */
572 static sd_disk_config_t sd_disk_table[] = {
573 #if defined(__fibre) || defined(__i386) || defined(__amd64)
574 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
575 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
576 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
577 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
578 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
579 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
580 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
581 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
582 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
583 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
584 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
585 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
586 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
587 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
588 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
589 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
590 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
591 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
592 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
593 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
594 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
595 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
596 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
597 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
598 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
599 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
600 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
601 	{ "IBM     1726-2xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
602 	{ "IBM     1726-4xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
603 	{ "IBM     1726-3xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
604 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
605 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
606 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
607 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
608 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
609 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
610 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
611 	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
612 	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
613 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
614 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
615 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
616 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
617 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
618 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
619 	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
620 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
621 	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
622 			SD_CONF_BSET_BSY_RETRY_COUNT|
623 			SD_CONF_BSET_RST_RETRIES|
624 			SD_CONF_BSET_RSV_REL_TIME,
625 		&purple_properties },
626 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
627 		SD_CONF_BSET_BSY_RETRY_COUNT|
628 		SD_CONF_BSET_RST_RETRIES|
629 		SD_CONF_BSET_RSV_REL_TIME|
630 		SD_CONF_BSET_MIN_THROTTLE|
631 		SD_CONF_BSET_DISKSORT_DISABLED,
632 		&sve_properties },
633 	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
634 			SD_CONF_BSET_BSY_RETRY_COUNT|
635 			SD_CONF_BSET_RST_RETRIES|
636 			SD_CONF_BSET_RSV_REL_TIME,
637 		&purple_properties },
638 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
639 		SD_CONF_BSET_LUN_RESET_ENABLED,
640 		&maserati_properties },
641 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
642 		SD_CONF_BSET_NRR_COUNT|
643 		SD_CONF_BSET_BSY_RETRY_COUNT|
644 		SD_CONF_BSET_RST_RETRIES|
645 		SD_CONF_BSET_MIN_THROTTLE|
646 		SD_CONF_BSET_DISKSORT_DISABLED|
647 		SD_CONF_BSET_LUN_RESET_ENABLED,
648 		&pirus_properties },
649 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
650 		SD_CONF_BSET_NRR_COUNT|
651 		SD_CONF_BSET_BSY_RETRY_COUNT|
652 		SD_CONF_BSET_RST_RETRIES|
653 		SD_CONF_BSET_MIN_THROTTLE|
654 		SD_CONF_BSET_DISKSORT_DISABLED|
655 		SD_CONF_BSET_LUN_RESET_ENABLED,
656 		&pirus_properties },
657 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
658 		SD_CONF_BSET_NRR_COUNT|
659 		SD_CONF_BSET_BSY_RETRY_COUNT|
660 		SD_CONF_BSET_RST_RETRIES|
661 		SD_CONF_BSET_MIN_THROTTLE|
662 		SD_CONF_BSET_DISKSORT_DISABLED|
663 		SD_CONF_BSET_LUN_RESET_ENABLED,
664 		&pirus_properties },
665 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
666 		SD_CONF_BSET_NRR_COUNT|
667 		SD_CONF_BSET_BSY_RETRY_COUNT|
668 		SD_CONF_BSET_RST_RETRIES|
669 		SD_CONF_BSET_MIN_THROTTLE|
670 		SD_CONF_BSET_DISKSORT_DISABLED|
671 		SD_CONF_BSET_LUN_RESET_ENABLED,
672 		&pirus_properties },
673 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
674 		SD_CONF_BSET_NRR_COUNT|
675 		SD_CONF_BSET_BSY_RETRY_COUNT|
676 		SD_CONF_BSET_RST_RETRIES|
677 		SD_CONF_BSET_MIN_THROTTLE|
678 		SD_CONF_BSET_DISKSORT_DISABLED|
679 		SD_CONF_BSET_LUN_RESET_ENABLED,
680 		&pirus_properties },
681 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
682 		SD_CONF_BSET_NRR_COUNT|
683 		SD_CONF_BSET_BSY_RETRY_COUNT|
684 		SD_CONF_BSET_RST_RETRIES|
685 		SD_CONF_BSET_MIN_THROTTLE|
686 		SD_CONF_BSET_DISKSORT_DISABLED|
687 		SD_CONF_BSET_LUN_RESET_ENABLED,
688 		&pirus_properties },
689 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
690 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
691 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
692 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
693 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
694 #endif /* fibre or NON-sparc platforms */
695 #if ((defined(__sparc) && !defined(__fibre)) ||\
696 	(defined(__i386) || defined(__amd64)))
697 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
698 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
699 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
700 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
701 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
702 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
703 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
704 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
705 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
706 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
707 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
708 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
709 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
710 	    &symbios_properties },
711 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
712 	    &lsi_properties_scsi },
713 #if defined(__i386) || defined(__amd64)
714 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
715 				    | SD_CONF_BSET_READSUB_BCD
716 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
717 				    | SD_CONF_BSET_NO_READ_HEADER
718 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
719 
720 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
721 				    | SD_CONF_BSET_READSUB_BCD
722 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
723 				    | SD_CONF_BSET_NO_READ_HEADER
724 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
725 #endif /* __i386 || __amd64 */
726 #endif /* sparc NON-fibre or NON-sparc platforms */
727 
728 #if (defined(SD_PROP_TST))
729 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
730 				| SD_CONF_BSET_CTYPE
731 				| SD_CONF_BSET_NRR_COUNT
732 				| SD_CONF_BSET_FAB_DEVID
733 				| SD_CONF_BSET_NOCACHE
734 				| SD_CONF_BSET_BSY_RETRY_COUNT
735 				| SD_CONF_BSET_PLAYMSF_BCD
736 				| SD_CONF_BSET_READSUB_BCD
737 				| SD_CONF_BSET_READ_TOC_TRK_BCD
738 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
739 				| SD_CONF_BSET_NO_READ_HEADER
740 				| SD_CONF_BSET_READ_CD_XD4
741 				| SD_CONF_BSET_RST_RETRIES
742 				| SD_CONF_BSET_RSV_REL_TIME
743 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
744 #endif
745 };
746 
747 static const int sd_disk_table_size =
748 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
749 
750 
751 
752 #define	SD_INTERCONNECT_PARALLEL	0
753 #define	SD_INTERCONNECT_FABRIC		1
754 #define	SD_INTERCONNECT_FIBRE		2
755 #define	SD_INTERCONNECT_SSA		3
756 #define	SD_INTERCONNECT_SATA		4
757 #define	SD_IS_PARALLEL_SCSI(un)		\
758 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
759 #define	SD_IS_SERIAL(un)		\
760 	((un)->un_interconnect_type == SD_INTERCONNECT_SATA)
761 
762 /*
763  * Definitions used by device id registration routines
764  */
765 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
766 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
767 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
768 
769 static kmutex_t sd_sense_mutex = {0};
770 
771 /*
772  * Macros for updates of the driver state
773  */
774 #define	New_state(un, s)        \
775 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
776 #define	Restore_state(un)	\
777 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
778 
779 static struct sd_cdbinfo sd_cdbtab[] = {
780 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
781 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
782 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
783 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
784 };
785 
786 /*
787  * Specifies the number of seconds that must have elapsed since the last
788  * cmd. has completed for a device to be declared idle to the PM framework.
789  */
790 static int sd_pm_idletime = 1;
791 
792 /*
793  * Internal function prototypes
794  */
795 
796 #if (defined(__fibre))
797 /*
798  * These #defines are to avoid namespace collisions that occur because this
799  * code is currently used to compile two seperate driver modules: sd and ssd.
800  * All function names need to be treated this way (even if declared static)
801  * in order to allow the debugger to resolve the names properly.
802  * It is anticipated that in the near future the ssd module will be obsoleted,
803  * at which time this ugliness should go away.
804  */
805 #define	sd_log_trace			ssd_log_trace
806 #define	sd_log_info			ssd_log_info
807 #define	sd_log_err			ssd_log_err
808 #define	sdprobe				ssdprobe
809 #define	sdinfo				ssdinfo
810 #define	sd_prop_op			ssd_prop_op
811 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
812 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
813 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
814 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
815 #define	sd_scsi_target_lun_init		ssd_scsi_target_lun_init
816 #define	sd_scsi_target_lun_fini		ssd_scsi_target_lun_fini
817 #define	sd_scsi_get_target_lun_count	ssd_scsi_get_target_lun_count
818 #define	sd_scsi_update_lun_on_target	ssd_scsi_update_lun_on_target
819 #define	sd_spin_up_unit			ssd_spin_up_unit
820 #define	sd_enable_descr_sense		ssd_enable_descr_sense
821 #define	sd_reenable_dsense_task		ssd_reenable_dsense_task
822 #define	sd_set_mmc_caps			ssd_set_mmc_caps
823 #define	sd_read_unit_properties		ssd_read_unit_properties
824 #define	sd_process_sdconf_file		ssd_process_sdconf_file
825 #define	sd_process_sdconf_table		ssd_process_sdconf_table
826 #define	sd_sdconf_id_match		ssd_sdconf_id_match
827 #define	sd_blank_cmp			ssd_blank_cmp
828 #define	sd_chk_vers1_data		ssd_chk_vers1_data
829 #define	sd_set_vers1_properties		ssd_set_vers1_properties
830 
831 #define	sd_get_physical_geometry	ssd_get_physical_geometry
832 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
833 #define	sd_update_block_info		ssd_update_block_info
834 #define	sd_register_devid		ssd_register_devid
835 #define	sd_get_devid			ssd_get_devid
836 #define	sd_create_devid			ssd_create_devid
837 #define	sd_write_deviceid		ssd_write_deviceid
838 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
839 #define	sd_setup_pm			ssd_setup_pm
840 #define	sd_create_pm_components		ssd_create_pm_components
841 #define	sd_ddi_suspend			ssd_ddi_suspend
842 #define	sd_ddi_pm_suspend		ssd_ddi_pm_suspend
843 #define	sd_ddi_resume			ssd_ddi_resume
844 #define	sd_ddi_pm_resume		ssd_ddi_pm_resume
845 #define	sdpower				ssdpower
846 #define	sdattach			ssdattach
847 #define	sddetach			ssddetach
848 #define	sd_unit_attach			ssd_unit_attach
849 #define	sd_unit_detach			ssd_unit_detach
850 #define	sd_set_unit_attributes		ssd_set_unit_attributes
851 #define	sd_create_errstats		ssd_create_errstats
852 #define	sd_set_errstats			ssd_set_errstats
853 #define	sd_set_pstats			ssd_set_pstats
854 #define	sddump				ssddump
855 #define	sd_scsi_poll			ssd_scsi_poll
856 #define	sd_send_polled_RQS		ssd_send_polled_RQS
857 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
858 #define	sd_init_event_callbacks		ssd_init_event_callbacks
859 #define	sd_event_callback		ssd_event_callback
860 #define	sd_cache_control		ssd_cache_control
861 #define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
862 #define	sd_make_device			ssd_make_device
863 #define	sdopen				ssdopen
864 #define	sdclose				ssdclose
865 #define	sd_ready_and_valid		ssd_ready_and_valid
866 #define	sdmin				ssdmin
867 #define	sdread				ssdread
868 #define	sdwrite				ssdwrite
869 #define	sdaread				ssdaread
870 #define	sdawrite			ssdawrite
871 #define	sdstrategy			ssdstrategy
872 #define	sdioctl				ssdioctl
873 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
874 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
875 #define	sd_checksum_iostart		ssd_checksum_iostart
876 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
877 #define	sd_pm_iostart			ssd_pm_iostart
878 #define	sd_core_iostart			ssd_core_iostart
879 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
880 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
881 #define	sd_checksum_iodone		ssd_checksum_iodone
882 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
883 #define	sd_pm_iodone			ssd_pm_iodone
884 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
885 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
886 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
887 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
888 #define	sd_buf_iodone			ssd_buf_iodone
889 #define	sd_uscsi_strategy		ssd_uscsi_strategy
890 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
891 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
892 #define	sd_uscsi_iodone			ssd_uscsi_iodone
893 #define	sd_xbuf_strategy		ssd_xbuf_strategy
894 #define	sd_xbuf_init			ssd_xbuf_init
895 #define	sd_pm_entry			ssd_pm_entry
896 #define	sd_pm_exit			ssd_pm_exit
897 
898 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
899 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
900 
901 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
902 #define	sdintr				ssdintr
903 #define	sd_start_cmds			ssd_start_cmds
904 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
905 #define	sd_bioclone_alloc		ssd_bioclone_alloc
906 #define	sd_bioclone_free		ssd_bioclone_free
907 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
908 #define	sd_shadow_buf_free		ssd_shadow_buf_free
909 #define	sd_print_transport_rejected_message	\
910 					ssd_print_transport_rejected_message
911 #define	sd_retry_command		ssd_retry_command
912 #define	sd_set_retry_bp			ssd_set_retry_bp
913 #define	sd_send_request_sense_command	ssd_send_request_sense_command
914 #define	sd_start_retry_command		ssd_start_retry_command
915 #define	sd_start_direct_priority_command	\
916 					ssd_start_direct_priority_command
917 #define	sd_return_failed_command	ssd_return_failed_command
918 #define	sd_return_failed_command_no_restart	\
919 					ssd_return_failed_command_no_restart
920 #define	sd_return_command		ssd_return_command
921 #define	sd_sync_with_callback		ssd_sync_with_callback
922 #define	sdrunout			ssdrunout
923 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
924 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
925 #define	sd_reduce_throttle		ssd_reduce_throttle
926 #define	sd_restore_throttle		ssd_restore_throttle
927 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
928 #define	sd_init_cdb_limits		ssd_init_cdb_limits
929 #define	sd_pkt_status_good		ssd_pkt_status_good
930 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
931 #define	sd_pkt_status_busy		ssd_pkt_status_busy
932 #define	sd_pkt_status_reservation_conflict	\
933 					ssd_pkt_status_reservation_conflict
934 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
935 #define	sd_handle_request_sense		ssd_handle_request_sense
936 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
937 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
938 #define	sd_validate_sense_data		ssd_validate_sense_data
939 #define	sd_decode_sense			ssd_decode_sense
940 #define	sd_print_sense_msg		ssd_print_sense_msg
941 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
942 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
943 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
944 #define	sd_sense_key_medium_or_hardware_error	\
945 					ssd_sense_key_medium_or_hardware_error
946 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
947 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
948 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
949 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
950 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
951 #define	sd_sense_key_default		ssd_sense_key_default
952 #define	sd_print_retry_msg		ssd_print_retry_msg
953 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
954 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
955 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
956 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
957 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
958 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
959 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
960 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
961 #define	sd_pkt_reason_default		ssd_pkt_reason_default
962 #define	sd_reset_target			ssd_reset_target
963 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
964 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
965 #define	sd_taskq_create			ssd_taskq_create
966 #define	sd_taskq_delete			ssd_taskq_delete
967 #define	sd_media_change_task		ssd_media_change_task
968 #define	sd_handle_mchange		ssd_handle_mchange
969 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
970 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
971 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
972 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
973 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
974 					sd_send_scsi_feature_GET_CONFIGURATION
975 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
976 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
977 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
978 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
979 					ssd_send_scsi_PERSISTENT_RESERVE_IN
980 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
981 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
982 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
983 #define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
984 					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
985 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
986 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
987 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
988 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
989 #define	sd_alloc_rqs			ssd_alloc_rqs
990 #define	sd_free_rqs			ssd_free_rqs
991 #define	sd_dump_memory			ssd_dump_memory
992 #define	sd_get_media_info		ssd_get_media_info
993 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
994 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
995 #define	sd_setup_next_xfer		ssd_setup_next_xfer
996 #define	sd_dkio_get_temp		ssd_dkio_get_temp
997 #define	sd_check_mhd			ssd_check_mhd
998 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
999 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1000 #define	sd_sname			ssd_sname
1001 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1002 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1003 #define	sd_take_ownership		ssd_take_ownership
1004 #define	sd_reserve_release		ssd_reserve_release
1005 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1006 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1007 #define	sd_persistent_reservation_in_read_keys	\
1008 					ssd_persistent_reservation_in_read_keys
1009 #define	sd_persistent_reservation_in_read_resv	\
1010 					ssd_persistent_reservation_in_read_resv
1011 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1012 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1013 #define	sd_mhdioc_release		ssd_mhdioc_release
1014 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1015 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1016 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1017 #define	sr_change_blkmode		ssr_change_blkmode
1018 #define	sr_change_speed			ssr_change_speed
1019 #define	sr_atapi_change_speed		ssr_atapi_change_speed
1020 #define	sr_pause_resume			ssr_pause_resume
1021 #define	sr_play_msf			ssr_play_msf
1022 #define	sr_play_trkind			ssr_play_trkind
1023 #define	sr_read_all_subcodes		ssr_read_all_subcodes
1024 #define	sr_read_subchannel		ssr_read_subchannel
1025 #define	sr_read_tocentry		ssr_read_tocentry
1026 #define	sr_read_tochdr			ssr_read_tochdr
1027 #define	sr_read_cdda			ssr_read_cdda
1028 #define	sr_read_cdxa			ssr_read_cdxa
1029 #define	sr_read_mode1			ssr_read_mode1
1030 #define	sr_read_mode2			ssr_read_mode2
1031 #define	sr_read_cd_mode2		ssr_read_cd_mode2
1032 #define	sr_sector_mode			ssr_sector_mode
1033 #define	sr_eject			ssr_eject
1034 #define	sr_ejected			ssr_ejected
1035 #define	sr_check_wp			ssr_check_wp
1036 #define	sd_check_media			ssd_check_media
1037 #define	sd_media_watch_cb		ssd_media_watch_cb
1038 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1039 #define	sr_volume_ctrl			ssr_volume_ctrl
1040 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1041 #define	sd_log_page_supported		ssd_log_page_supported
1042 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1043 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1044 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1045 #define	sd_range_lock			ssd_range_lock
1046 #define	sd_get_range			ssd_get_range
1047 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1048 #define	sd_range_unlock			ssd_range_unlock
1049 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1050 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1051 
1052 #define	sd_iostart_chain		ssd_iostart_chain
1053 #define	sd_iodone_chain			ssd_iodone_chain
1054 #define	sd_initpkt_map			ssd_initpkt_map
1055 #define	sd_destroypkt_map		ssd_destroypkt_map
1056 #define	sd_chain_type_map		ssd_chain_type_map
1057 #define	sd_chain_index_map		ssd_chain_index_map
1058 
1059 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1060 #define	sd_failfast_flushq		ssd_failfast_flushq
1061 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1062 
1063 #define	sd_is_lsi			ssd_is_lsi
1064 #define	sd_tg_rdwr			ssd_tg_rdwr
1065 #define	sd_tg_getinfo			ssd_tg_getinfo
1066 
1067 #endif	/* #if (defined(__fibre)) */
1068 
1069 
1070 int _init(void);
1071 int _fini(void);
1072 int _info(struct modinfo *modinfop);
1073 
1074 /*PRINTFLIKE3*/
1075 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1076 /*PRINTFLIKE3*/
1077 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1078 /*PRINTFLIKE3*/
1079 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1080 
1081 static int sdprobe(dev_info_t *devi);
1082 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1083     void **result);
1084 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1085     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1086 
1087 /*
1088  * Smart probe for parallel scsi
1089  */
1090 static void sd_scsi_probe_cache_init(void);
1091 static void sd_scsi_probe_cache_fini(void);
1092 static void sd_scsi_clear_probe_cache(void);
1093 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1094 
1095 /*
1096  * Attached luns on target for parallel scsi
1097  */
1098 static void sd_scsi_target_lun_init(void);
1099 static void sd_scsi_target_lun_fini(void);
1100 static int  sd_scsi_get_target_lun_count(dev_info_t *dip, int target);
1101 static void sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag);
1102 
1103 static int	sd_spin_up_unit(struct sd_lun *un);
1104 #ifdef _LP64
1105 static void	sd_enable_descr_sense(struct sd_lun *un);
1106 static void	sd_reenable_dsense_task(void *arg);
1107 #endif /* _LP64 */
1108 
1109 static void	sd_set_mmc_caps(struct sd_lun *un);
1110 
1111 static void sd_read_unit_properties(struct sd_lun *un);
1112 static int  sd_process_sdconf_file(struct sd_lun *un);
1113 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1114     int *data_list, sd_tunables *values);
1115 static void sd_process_sdconf_table(struct sd_lun *un);
1116 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1117 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1118 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1119 	int list_len, char *dataname_ptr);
1120 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1121     sd_tunables *prop_list);
1122 
1123 static void sd_register_devid(struct sd_lun *un, dev_info_t *devi,
1124     int reservation_flag);
1125 static int  sd_get_devid(struct sd_lun *un);
1126 static int  sd_get_serialnum(struct sd_lun *un, uchar_t *wwn, int *len);
1127 static ddi_devid_t sd_create_devid(struct sd_lun *un);
1128 static int  sd_write_deviceid(struct sd_lun *un);
1129 static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1130 static int  sd_check_vpd_page_support(struct sd_lun *un);
1131 
1132 static void sd_setup_pm(struct sd_lun *un, dev_info_t *devi);
1133 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1134 
1135 static int  sd_ddi_suspend(dev_info_t *devi);
1136 static int  sd_ddi_pm_suspend(struct sd_lun *un);
1137 static int  sd_ddi_resume(dev_info_t *devi);
1138 static int  sd_ddi_pm_resume(struct sd_lun *un);
1139 static int  sdpower(dev_info_t *devi, int component, int level);
1140 
1141 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1142 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1143 static int  sd_unit_attach(dev_info_t *devi);
1144 static int  sd_unit_detach(dev_info_t *devi);
1145 
1146 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1147 static void sd_create_errstats(struct sd_lun *un, int instance);
1148 static void sd_set_errstats(struct sd_lun *un);
1149 static void sd_set_pstats(struct sd_lun *un);
1150 
1151 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1152 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1153 static int  sd_send_polled_RQS(struct sd_lun *un);
1154 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1155 
1156 #if (defined(__fibre))
1157 /*
1158  * Event callbacks (photon)
1159  */
1160 static void sd_init_event_callbacks(struct sd_lun *un);
1161 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1162 #endif
1163 
1164 /*
1165  * Defines for sd_cache_control
1166  */
1167 
1168 #define	SD_CACHE_ENABLE		1
1169 #define	SD_CACHE_DISABLE	0
1170 #define	SD_CACHE_NOCHANGE	-1
1171 
1172 static int   sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag);
1173 static int   sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled);
1174 static dev_t sd_make_device(dev_info_t *devi);
1175 
1176 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1177 	uint64_t capacity);
1178 
1179 /*
1180  * Driver entry point functions.
1181  */
1182 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1183 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1184 static int  sd_ready_and_valid(struct sd_lun *un);
1185 
1186 static void sdmin(struct buf *bp);
1187 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1188 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1189 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1190 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1191 
1192 static int sdstrategy(struct buf *bp);
1193 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1194 
1195 /*
1196  * Function prototypes for layering functions in the iostart chain.
1197  */
1198 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1199 	struct buf *bp);
1200 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1201 	struct buf *bp);
1202 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1203 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1204 	struct buf *bp);
1205 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1206 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1207 
1208 /*
1209  * Function prototypes for layering functions in the iodone chain.
1210  */
1211 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1212 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1213 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1214 	struct buf *bp);
1215 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1216 	struct buf *bp);
1217 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1218 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1219 	struct buf *bp);
1220 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1221 
1222 /*
1223  * Prototypes for functions to support buf(9S) based IO.
1224  */
1225 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1226 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1227 static void sd_destroypkt_for_buf(struct buf *);
1228 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1229 	struct buf *bp, int flags,
1230 	int (*callback)(caddr_t), caddr_t callback_arg,
1231 	diskaddr_t lba, uint32_t blockcount);
1232 #if defined(__i386) || defined(__amd64)
1233 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1234 	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1235 #endif /* defined(__i386) || defined(__amd64) */
1236 
1237 /*
1238  * Prototypes for functions to support USCSI IO.
1239  */
1240 static int sd_uscsi_strategy(struct buf *bp);
1241 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1242 static void sd_destroypkt_for_uscsi(struct buf *);
1243 
1244 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1245 	uchar_t chain_type, void *pktinfop);
1246 
1247 static int  sd_pm_entry(struct sd_lun *un);
1248 static void sd_pm_exit(struct sd_lun *un);
1249 
1250 static void sd_pm_idletimeout_handler(void *arg);
1251 
1252 /*
1253  * sd_core internal functions (used at the sd_core_io layer).
1254  */
1255 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1256 static void sdintr(struct scsi_pkt *pktp);
1257 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1258 
1259 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
1260 	enum uio_seg dataspace, int path_flag);
1261 
1262 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1263 	daddr_t blkno, int (*func)(struct buf *));
1264 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1265 	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1266 static void sd_bioclone_free(struct buf *bp);
1267 static void sd_shadow_buf_free(struct buf *bp);
1268 
1269 static void sd_print_transport_rejected_message(struct sd_lun *un,
1270 	struct sd_xbuf *xp, int code);
1271 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1272     void *arg, int code);
1273 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1274     void *arg, int code);
1275 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1276     void *arg, int code);
1277 
1278 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1279 	int retry_check_flag,
1280 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1281 		int c),
1282 	void *user_arg, int failure_code,  clock_t retry_delay,
1283 	void (*statp)(kstat_io_t *));
1284 
1285 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1286 	clock_t retry_delay, void (*statp)(kstat_io_t *));
1287 
1288 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1289 	struct scsi_pkt *pktp);
1290 static void sd_start_retry_command(void *arg);
1291 static void sd_start_direct_priority_command(void *arg);
1292 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1293 	int errcode);
1294 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1295 	struct buf *bp, int errcode);
1296 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1297 static void sd_sync_with_callback(struct sd_lun *un);
1298 static int sdrunout(caddr_t arg);
1299 
1300 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1301 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1302 
1303 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1304 static void sd_restore_throttle(void *arg);
1305 
1306 static void sd_init_cdb_limits(struct sd_lun *un);
1307 
1308 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1309 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1310 
1311 /*
1312  * Error handling functions
1313  */
1314 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1315 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1316 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1317 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1318 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1319 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1320 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1321 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1322 
1323 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1324 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1325 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1326 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1327 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1328 	struct sd_xbuf *xp);
1329 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1330 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1331 
1332 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1333 	void *arg, int code);
1334 
1335 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1336 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1337 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1338 	uint8_t *sense_datap,
1339 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1340 static void sd_sense_key_not_ready(struct sd_lun *un,
1341 	uint8_t *sense_datap,
1342 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1343 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1344 	uint8_t *sense_datap,
1345 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1346 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1347 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1348 static void sd_sense_key_unit_attention(struct sd_lun *un,
1349 	uint8_t *sense_datap,
1350 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1351 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1352 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1353 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1354 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1355 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1356 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1357 static void sd_sense_key_default(struct sd_lun *un,
1358 	uint8_t *sense_datap,
1359 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1360 
1361 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1362 	void *arg, int flag);
1363 
1364 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1365 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1366 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1367 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1368 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1369 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1370 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1371 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1372 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1373 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1374 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1375 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1376 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1377 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1378 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1379 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1380 
1381 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1382 
1383 static void sd_start_stop_unit_callback(void *arg);
1384 static void sd_start_stop_unit_task(void *arg);
1385 
1386 static void sd_taskq_create(void);
1387 static void sd_taskq_delete(void);
1388 static void sd_media_change_task(void *arg);
1389 
1390 static int sd_handle_mchange(struct sd_lun *un);
1391 static int sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag);
1392 static int sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp,
1393 	uint32_t *lbap, int path_flag);
1394 static int sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
1395 	uint32_t *lbap, int path_flag);
1396 static int sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag,
1397 	int path_flag);
1398 static int sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr,
1399 	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1400 static int sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag);
1401 static int sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un,
1402 	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1403 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un,
1404 	uchar_t usr_cmd, uchar_t *usr_bufp);
1405 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1406 	struct dk_callback *dkc);
1407 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1408 static int sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un,
1409 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1410 	uchar_t *bufaddr, uint_t buflen);
1411 static int sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
1412 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1413 	uchar_t *bufaddr, uint_t buflen, char feature);
1414 static int sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize,
1415 	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1416 static int sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize,
1417 	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1418 static int sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
1419 	size_t buflen, daddr_t start_block, int path_flag);
1420 #define	sd_send_scsi_READ(un, bufaddr, buflen, start_block, path_flag)	\
1421 	sd_send_scsi_RDWR(un, SCMD_READ, bufaddr, buflen, start_block, \
1422 	path_flag)
1423 #define	sd_send_scsi_WRITE(un, bufaddr, buflen, start_block, path_flag)	\
1424 	sd_send_scsi_RDWR(un, SCMD_WRITE, bufaddr, buflen, start_block,\
1425 	path_flag)
1426 
1427 static int sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr,
1428 	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1429 	uint16_t param_ptr, int path_flag);
1430 
1431 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1432 static void sd_free_rqs(struct sd_lun *un);
1433 
1434 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1435 	uchar_t *data, int len, int fmt);
1436 static void sd_panic_for_res_conflict(struct sd_lun *un);
1437 
1438 /*
1439  * Disk Ioctl Function Prototypes
1440  */
1441 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1442 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1443 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1444 
1445 /*
1446  * Multi-host Ioctl Prototypes
1447  */
1448 static int sd_check_mhd(dev_t dev, int interval);
1449 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1450 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1451 static char *sd_sname(uchar_t status);
1452 static void sd_mhd_resvd_recover(void *arg);
1453 static void sd_resv_reclaim_thread();
1454 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1455 static int sd_reserve_release(dev_t dev, int cmd);
1456 static void sd_rmv_resv_reclaim_req(dev_t dev);
1457 static void sd_mhd_reset_notify_cb(caddr_t arg);
1458 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1459 	mhioc_inkeys_t *usrp, int flag);
1460 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1461 	mhioc_inresvs_t *usrp, int flag);
1462 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1463 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1464 static int sd_mhdioc_release(dev_t dev);
1465 static int sd_mhdioc_register_devid(dev_t dev);
1466 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1467 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1468 
1469 /*
1470  * SCSI removable prototypes
1471  */
1472 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1473 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1474 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1475 static int sr_pause_resume(dev_t dev, int mode);
1476 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1477 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1478 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1479 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1480 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1481 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1482 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1483 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1484 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1485 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1486 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1487 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1488 static int sr_eject(dev_t dev);
1489 static void sr_ejected(register struct sd_lun *un);
1490 static int sr_check_wp(dev_t dev);
1491 static int sd_check_media(dev_t dev, enum dkio_state state);
1492 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1493 static void sd_delayed_cv_broadcast(void *arg);
1494 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1495 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1496 
1497 static int sd_log_page_supported(struct sd_lun *un, int log_page);
1498 
1499 /*
1500  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1501  */
1502 static void sd_check_for_writable_cd(struct sd_lun *un);
1503 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1504 static void sd_wm_cache_destructor(void *wm, void *un);
1505 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1506 	daddr_t endb, ushort_t typ);
1507 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1508 	daddr_t endb);
1509 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1510 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1511 static void sd_read_modify_write_task(void * arg);
1512 static int
1513 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1514 	struct buf **bpp);
1515 
1516 
1517 /*
1518  * Function prototypes for failfast support.
1519  */
1520 static void sd_failfast_flushq(struct sd_lun *un);
1521 static int sd_failfast_flushq_callback(struct buf *bp);
1522 
1523 /*
1524  * Function prototypes to check for lsi devices
1525  */
1526 static void sd_is_lsi(struct sd_lun *un);
1527 
1528 /*
1529  * Function prototypes for x86 support
1530  */
1531 #if defined(__i386) || defined(__amd64)
1532 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1533 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1534 #endif
1535 
1536 
1537 /* Function prototypes for cmlb */
1538 static int sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
1539     diskaddr_t start_block, size_t reqlength, void *tg_cookie);
1540 
1541 static int sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie);
1542 
1543 /*
1544  * Constants for failfast support:
1545  *
1546  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1547  * failfast processing being performed.
1548  *
1549  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1550  * failfast processing on all bufs with B_FAILFAST set.
1551  */
1552 
1553 #define	SD_FAILFAST_INACTIVE		0
1554 #define	SD_FAILFAST_ACTIVE		1
1555 
1556 /*
1557  * Bitmask to control behavior of buf(9S) flushes when a transition to
1558  * the failfast state occurs. Optional bits include:
1559  *
1560  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1561  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1562  * be flushed.
1563  *
1564  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1565  * driver, in addition to the regular wait queue. This includes the xbuf
1566  * queues. When clear, only the driver's wait queue will be flushed.
1567  */
1568 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1569 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1570 
1571 /*
1572  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1573  * to flush all queues within the driver.
1574  */
1575 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1576 
1577 
1578 /*
1579  * SD Testing Fault Injection
1580  */
1581 #ifdef SD_FAULT_INJECTION
1582 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1583 static void sd_faultinjection(struct scsi_pkt *pktp);
1584 static void sd_injection_log(char *buf, struct sd_lun *un);
1585 #endif
1586 
1587 /*
1588  * Device driver ops vector
1589  */
1590 static struct cb_ops sd_cb_ops = {
1591 	sdopen,			/* open */
1592 	sdclose,		/* close */
1593 	sdstrategy,		/* strategy */
1594 	nodev,			/* print */
1595 	sddump,			/* dump */
1596 	sdread,			/* read */
1597 	sdwrite,		/* write */
1598 	sdioctl,		/* ioctl */
1599 	nodev,			/* devmap */
1600 	nodev,			/* mmap */
1601 	nodev,			/* segmap */
1602 	nochpoll,		/* poll */
1603 	sd_prop_op,		/* cb_prop_op */
1604 	0,			/* streamtab  */
1605 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1606 	CB_REV,			/* cb_rev */
1607 	sdaread, 		/* async I/O read entry point */
1608 	sdawrite		/* async I/O write entry point */
1609 };
1610 
1611 static struct dev_ops sd_ops = {
1612 	DEVO_REV,		/* devo_rev, */
1613 	0,			/* refcnt  */
1614 	sdinfo,			/* info */
1615 	nulldev,		/* identify */
1616 	sdprobe,		/* probe */
1617 	sdattach,		/* attach */
1618 	sddetach,		/* detach */
1619 	nodev,			/* reset */
1620 	&sd_cb_ops,		/* driver operations */
1621 	NULL,			/* bus operations */
1622 	sdpower			/* power */
1623 };
1624 
1625 
1626 /*
1627  * This is the loadable module wrapper.
1628  */
1629 #include <sys/modctl.h>
1630 
1631 static struct modldrv modldrv = {
1632 	&mod_driverops,		/* Type of module. This one is a driver */
1633 	SD_MODULE_NAME,		/* Module name. */
1634 	&sd_ops			/* driver ops */
1635 };
1636 
1637 
1638 static struct modlinkage modlinkage = {
1639 	MODREV_1,
1640 	&modldrv,
1641 	NULL
1642 };
1643 
1644 static cmlb_tg_ops_t sd_tgops = {
1645 	TG_DK_OPS_VERSION_1,
1646 	sd_tg_rdwr,
1647 	sd_tg_getinfo
1648 	};
1649 
1650 static struct scsi_asq_key_strings sd_additional_codes[] = {
1651 	0x81, 0, "Logical Unit is Reserved",
1652 	0x85, 0, "Audio Address Not Valid",
1653 	0xb6, 0, "Media Load Mechanism Failed",
1654 	0xB9, 0, "Audio Play Operation Aborted",
1655 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1656 	0x53, 2, "Medium removal prevented",
1657 	0x6f, 0, "Authentication failed during key exchange",
1658 	0x6f, 1, "Key not present",
1659 	0x6f, 2, "Key not established",
1660 	0x6f, 3, "Read without proper authentication",
1661 	0x6f, 4, "Mismatched region to this logical unit",
1662 	0x6f, 5, "Region reset count error",
1663 	0xffff, 0x0, NULL
1664 };
1665 
1666 
1667 /*
1668  * Struct for passing printing information for sense data messages
1669  */
1670 struct sd_sense_info {
1671 	int	ssi_severity;
1672 	int	ssi_pfa_flag;
1673 };
1674 
1675 /*
1676  * Table of function pointers for iostart-side routines. Seperate "chains"
1677  * of layered function calls are formed by placing the function pointers
1678  * sequentially in the desired order. Functions are called according to an
1679  * incrementing table index ordering. The last function in each chain must
1680  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1681  * in the sd_iodone_chain[] array.
1682  *
1683  * Note: It may seem more natural to organize both the iostart and iodone
1684  * functions together, into an array of structures (or some similar
1685  * organization) with a common index, rather than two seperate arrays which
1686  * must be maintained in synchronization. The purpose of this division is
1687  * to achiece improved performance: individual arrays allows for more
1688  * effective cache line utilization on certain platforms.
1689  */
1690 
1691 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1692 
1693 
1694 static sd_chain_t sd_iostart_chain[] = {
1695 
1696 	/* Chain for buf IO for disk drive targets (PM enabled) */
1697 	sd_mapblockaddr_iostart,	/* Index: 0 */
1698 	sd_pm_iostart,			/* Index: 1 */
1699 	sd_core_iostart,		/* Index: 2 */
1700 
1701 	/* Chain for buf IO for disk drive targets (PM disabled) */
1702 	sd_mapblockaddr_iostart,	/* Index: 3 */
1703 	sd_core_iostart,		/* Index: 4 */
1704 
1705 	/* Chain for buf IO for removable-media targets (PM enabled) */
1706 	sd_mapblockaddr_iostart,	/* Index: 5 */
1707 	sd_mapblocksize_iostart,	/* Index: 6 */
1708 	sd_pm_iostart,			/* Index: 7 */
1709 	sd_core_iostart,		/* Index: 8 */
1710 
1711 	/* Chain for buf IO for removable-media targets (PM disabled) */
1712 	sd_mapblockaddr_iostart,	/* Index: 9 */
1713 	sd_mapblocksize_iostart,	/* Index: 10 */
1714 	sd_core_iostart,		/* Index: 11 */
1715 
1716 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1717 	sd_mapblockaddr_iostart,	/* Index: 12 */
1718 	sd_checksum_iostart,		/* Index: 13 */
1719 	sd_pm_iostart,			/* Index: 14 */
1720 	sd_core_iostart,		/* Index: 15 */
1721 
1722 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1723 	sd_mapblockaddr_iostart,	/* Index: 16 */
1724 	sd_checksum_iostart,		/* Index: 17 */
1725 	sd_core_iostart,		/* Index: 18 */
1726 
1727 	/* Chain for USCSI commands (all targets) */
1728 	sd_pm_iostart,			/* Index: 19 */
1729 	sd_core_iostart,		/* Index: 20 */
1730 
1731 	/* Chain for checksumming USCSI commands (all targets) */
1732 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1733 	sd_pm_iostart,			/* Index: 22 */
1734 	sd_core_iostart,		/* Index: 23 */
1735 
1736 	/* Chain for "direct" USCSI commands (all targets) */
1737 	sd_core_iostart,		/* Index: 24 */
1738 
1739 	/* Chain for "direct priority" USCSI commands (all targets) */
1740 	sd_core_iostart,		/* Index: 25 */
1741 };
1742 
1743 /*
1744  * Macros to locate the first function of each iostart chain in the
1745  * sd_iostart_chain[] array. These are located by the index in the array.
1746  */
1747 #define	SD_CHAIN_DISK_IOSTART			0
1748 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1749 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1750 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1751 #define	SD_CHAIN_CHKSUM_IOSTART			12
1752 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1753 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1754 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1755 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1756 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1757 
1758 
1759 /*
1760  * Table of function pointers for the iodone-side routines for the driver-
1761  * internal layering mechanism.  The calling sequence for iodone routines
1762  * uses a decrementing table index, so the last routine called in a chain
1763  * must be at the lowest array index location for that chain.  The last
1764  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1765  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1766  * of the functions in an iodone side chain must correspond to the ordering
1767  * of the iostart routines for that chain.  Note that there is no iodone
1768  * side routine that corresponds to sd_core_iostart(), so there is no
1769  * entry in the table for this.
1770  */
1771 
1772 static sd_chain_t sd_iodone_chain[] = {
1773 
1774 	/* Chain for buf IO for disk drive targets (PM enabled) */
1775 	sd_buf_iodone,			/* Index: 0 */
1776 	sd_mapblockaddr_iodone,		/* Index: 1 */
1777 	sd_pm_iodone,			/* Index: 2 */
1778 
1779 	/* Chain for buf IO for disk drive targets (PM disabled) */
1780 	sd_buf_iodone,			/* Index: 3 */
1781 	sd_mapblockaddr_iodone,		/* Index: 4 */
1782 
1783 	/* Chain for buf IO for removable-media targets (PM enabled) */
1784 	sd_buf_iodone,			/* Index: 5 */
1785 	sd_mapblockaddr_iodone,		/* Index: 6 */
1786 	sd_mapblocksize_iodone,		/* Index: 7 */
1787 	sd_pm_iodone,			/* Index: 8 */
1788 
1789 	/* Chain for buf IO for removable-media targets (PM disabled) */
1790 	sd_buf_iodone,			/* Index: 9 */
1791 	sd_mapblockaddr_iodone,		/* Index: 10 */
1792 	sd_mapblocksize_iodone,		/* Index: 11 */
1793 
1794 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1795 	sd_buf_iodone,			/* Index: 12 */
1796 	sd_mapblockaddr_iodone,		/* Index: 13 */
1797 	sd_checksum_iodone,		/* Index: 14 */
1798 	sd_pm_iodone,			/* Index: 15 */
1799 
1800 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1801 	sd_buf_iodone,			/* Index: 16 */
1802 	sd_mapblockaddr_iodone,		/* Index: 17 */
1803 	sd_checksum_iodone,		/* Index: 18 */
1804 
1805 	/* Chain for USCSI commands (non-checksum targets) */
1806 	sd_uscsi_iodone,		/* Index: 19 */
1807 	sd_pm_iodone,			/* Index: 20 */
1808 
1809 	/* Chain for USCSI commands (checksum targets) */
1810 	sd_uscsi_iodone,		/* Index: 21 */
1811 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1812 	sd_pm_iodone,			/* Index: 22 */
1813 
1814 	/* Chain for "direct" USCSI commands (all targets) */
1815 	sd_uscsi_iodone,		/* Index: 24 */
1816 
1817 	/* Chain for "direct priority" USCSI commands (all targets) */
1818 	sd_uscsi_iodone,		/* Index: 25 */
1819 };
1820 
1821 
1822 /*
1823  * Macros to locate the "first" function in the sd_iodone_chain[] array for
1824  * each iodone-side chain. These are located by the array index, but as the
1825  * iodone side functions are called in a decrementing-index order, the
1826  * highest index number in each chain must be specified (as these correspond
1827  * to the first function in the iodone chain that will be called by the core
1828  * at IO completion time).
1829  */
1830 
1831 #define	SD_CHAIN_DISK_IODONE			2
1832 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
1833 #define	SD_CHAIN_RMMEDIA_IODONE			8
1834 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1835 #define	SD_CHAIN_CHKSUM_IODONE			15
1836 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1837 #define	SD_CHAIN_USCSI_CMD_IODONE		20
1838 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1839 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
1840 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1841 
1842 
1843 
1844 
1845 /*
1846  * Array to map a layering chain index to the appropriate initpkt routine.
1847  * The redundant entries are present so that the index used for accessing
1848  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1849  * with this table as well.
1850  */
1851 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1852 
1853 static sd_initpkt_t	sd_initpkt_map[] = {
1854 
1855 	/* Chain for buf IO for disk drive targets (PM enabled) */
1856 	sd_initpkt_for_buf,		/* Index: 0 */
1857 	sd_initpkt_for_buf,		/* Index: 1 */
1858 	sd_initpkt_for_buf,		/* Index: 2 */
1859 
1860 	/* Chain for buf IO for disk drive targets (PM disabled) */
1861 	sd_initpkt_for_buf,		/* Index: 3 */
1862 	sd_initpkt_for_buf,		/* Index: 4 */
1863 
1864 	/* Chain for buf IO for removable-media targets (PM enabled) */
1865 	sd_initpkt_for_buf,		/* Index: 5 */
1866 	sd_initpkt_for_buf,		/* Index: 6 */
1867 	sd_initpkt_for_buf,		/* Index: 7 */
1868 	sd_initpkt_for_buf,		/* Index: 8 */
1869 
1870 	/* Chain for buf IO for removable-media targets (PM disabled) */
1871 	sd_initpkt_for_buf,		/* Index: 9 */
1872 	sd_initpkt_for_buf,		/* Index: 10 */
1873 	sd_initpkt_for_buf,		/* Index: 11 */
1874 
1875 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1876 	sd_initpkt_for_buf,		/* Index: 12 */
1877 	sd_initpkt_for_buf,		/* Index: 13 */
1878 	sd_initpkt_for_buf,		/* Index: 14 */
1879 	sd_initpkt_for_buf,		/* Index: 15 */
1880 
1881 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1882 	sd_initpkt_for_buf,		/* Index: 16 */
1883 	sd_initpkt_for_buf,		/* Index: 17 */
1884 	sd_initpkt_for_buf,		/* Index: 18 */
1885 
1886 	/* Chain for USCSI commands (non-checksum targets) */
1887 	sd_initpkt_for_uscsi,		/* Index: 19 */
1888 	sd_initpkt_for_uscsi,		/* Index: 20 */
1889 
1890 	/* Chain for USCSI commands (checksum targets) */
1891 	sd_initpkt_for_uscsi,		/* Index: 21 */
1892 	sd_initpkt_for_uscsi,		/* Index: 22 */
1893 	sd_initpkt_for_uscsi,		/* Index: 22 */
1894 
1895 	/* Chain for "direct" USCSI commands (all targets) */
1896 	sd_initpkt_for_uscsi,		/* Index: 24 */
1897 
1898 	/* Chain for "direct priority" USCSI commands (all targets) */
1899 	sd_initpkt_for_uscsi,		/* Index: 25 */
1900 
1901 };
1902 
1903 
1904 /*
1905  * Array to map a layering chain index to the appropriate destroypktpkt routine.
1906  * The redundant entries are present so that the index used for accessing
1907  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1908  * with this table as well.
1909  */
1910 typedef void (*sd_destroypkt_t)(struct buf *);
1911 
1912 static sd_destroypkt_t	sd_destroypkt_map[] = {
1913 
1914 	/* Chain for buf IO for disk drive targets (PM enabled) */
1915 	sd_destroypkt_for_buf,		/* Index: 0 */
1916 	sd_destroypkt_for_buf,		/* Index: 1 */
1917 	sd_destroypkt_for_buf,		/* Index: 2 */
1918 
1919 	/* Chain for buf IO for disk drive targets (PM disabled) */
1920 	sd_destroypkt_for_buf,		/* Index: 3 */
1921 	sd_destroypkt_for_buf,		/* Index: 4 */
1922 
1923 	/* Chain for buf IO for removable-media targets (PM enabled) */
1924 	sd_destroypkt_for_buf,		/* Index: 5 */
1925 	sd_destroypkt_for_buf,		/* Index: 6 */
1926 	sd_destroypkt_for_buf,		/* Index: 7 */
1927 	sd_destroypkt_for_buf,		/* Index: 8 */
1928 
1929 	/* Chain for buf IO for removable-media targets (PM disabled) */
1930 	sd_destroypkt_for_buf,		/* Index: 9 */
1931 	sd_destroypkt_for_buf,		/* Index: 10 */
1932 	sd_destroypkt_for_buf,		/* Index: 11 */
1933 
1934 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1935 	sd_destroypkt_for_buf,		/* Index: 12 */
1936 	sd_destroypkt_for_buf,		/* Index: 13 */
1937 	sd_destroypkt_for_buf,		/* Index: 14 */
1938 	sd_destroypkt_for_buf,		/* Index: 15 */
1939 
1940 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1941 	sd_destroypkt_for_buf,		/* Index: 16 */
1942 	sd_destroypkt_for_buf,		/* Index: 17 */
1943 	sd_destroypkt_for_buf,		/* Index: 18 */
1944 
1945 	/* Chain for USCSI commands (non-checksum targets) */
1946 	sd_destroypkt_for_uscsi,	/* Index: 19 */
1947 	sd_destroypkt_for_uscsi,	/* Index: 20 */
1948 
1949 	/* Chain for USCSI commands (checksum targets) */
1950 	sd_destroypkt_for_uscsi,	/* Index: 21 */
1951 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1952 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1953 
1954 	/* Chain for "direct" USCSI commands (all targets) */
1955 	sd_destroypkt_for_uscsi,	/* Index: 24 */
1956 
1957 	/* Chain for "direct priority" USCSI commands (all targets) */
1958 	sd_destroypkt_for_uscsi,	/* Index: 25 */
1959 
1960 };
1961 
1962 
1963 
1964 /*
1965  * Array to map a layering chain index to the appropriate chain "type".
1966  * The chain type indicates a specific property/usage of the chain.
1967  * The redundant entries are present so that the index used for accessing
1968  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1969  * with this table as well.
1970  */
1971 
1972 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
1973 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
1974 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
1975 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
1976 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
1977 						/* (for error recovery) */
1978 
1979 static int sd_chain_type_map[] = {
1980 
1981 	/* Chain for buf IO for disk drive targets (PM enabled) */
1982 	SD_CHAIN_BUFIO,			/* Index: 0 */
1983 	SD_CHAIN_BUFIO,			/* Index: 1 */
1984 	SD_CHAIN_BUFIO,			/* Index: 2 */
1985 
1986 	/* Chain for buf IO for disk drive targets (PM disabled) */
1987 	SD_CHAIN_BUFIO,			/* Index: 3 */
1988 	SD_CHAIN_BUFIO,			/* Index: 4 */
1989 
1990 	/* Chain for buf IO for removable-media targets (PM enabled) */
1991 	SD_CHAIN_BUFIO,			/* Index: 5 */
1992 	SD_CHAIN_BUFIO,			/* Index: 6 */
1993 	SD_CHAIN_BUFIO,			/* Index: 7 */
1994 	SD_CHAIN_BUFIO,			/* Index: 8 */
1995 
1996 	/* Chain for buf IO for removable-media targets (PM disabled) */
1997 	SD_CHAIN_BUFIO,			/* Index: 9 */
1998 	SD_CHAIN_BUFIO,			/* Index: 10 */
1999 	SD_CHAIN_BUFIO,			/* Index: 11 */
2000 
2001 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2002 	SD_CHAIN_BUFIO,			/* Index: 12 */
2003 	SD_CHAIN_BUFIO,			/* Index: 13 */
2004 	SD_CHAIN_BUFIO,			/* Index: 14 */
2005 	SD_CHAIN_BUFIO,			/* Index: 15 */
2006 
2007 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2008 	SD_CHAIN_BUFIO,			/* Index: 16 */
2009 	SD_CHAIN_BUFIO,			/* Index: 17 */
2010 	SD_CHAIN_BUFIO,			/* Index: 18 */
2011 
2012 	/* Chain for USCSI commands (non-checksum targets) */
2013 	SD_CHAIN_USCSI,			/* Index: 19 */
2014 	SD_CHAIN_USCSI,			/* Index: 20 */
2015 
2016 	/* Chain for USCSI commands (checksum targets) */
2017 	SD_CHAIN_USCSI,			/* Index: 21 */
2018 	SD_CHAIN_USCSI,			/* Index: 22 */
2019 	SD_CHAIN_USCSI,			/* Index: 22 */
2020 
2021 	/* Chain for "direct" USCSI commands (all targets) */
2022 	SD_CHAIN_DIRECT,		/* Index: 24 */
2023 
2024 	/* Chain for "direct priority" USCSI commands (all targets) */
2025 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2026 };
2027 
2028 
2029 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2030 #define	SD_IS_BUFIO(xp)			\
2031 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2032 
2033 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2034 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2035 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2036 
2037 
2038 
2039 /*
2040  * Struct, array, and macros to map a specific chain to the appropriate
2041  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2042  *
2043  * The sd_chain_index_map[] array is used at attach time to set the various
2044  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2045  * chain to be used with the instance. This allows different instances to use
2046  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2047  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2048  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2049  * dynamically & without the use of locking; and (2) a layer may update the
2050  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2051  * to allow for deferred processing of an IO within the same chain from a
2052  * different execution context.
2053  */
2054 
2055 struct sd_chain_index {
2056 	int	sci_iostart_index;
2057 	int	sci_iodone_index;
2058 };
2059 
2060 static struct sd_chain_index	sd_chain_index_map[] = {
2061 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2062 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2063 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2064 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2065 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2066 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2067 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2068 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2069 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2070 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2071 };
2072 
2073 
2074 /*
2075  * The following are indexes into the sd_chain_index_map[] array.
2076  */
2077 
2078 /* un->un_buf_chain_type must be set to one of these */
2079 #define	SD_CHAIN_INFO_DISK		0
2080 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2081 #define	SD_CHAIN_INFO_RMMEDIA		2
2082 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2083 #define	SD_CHAIN_INFO_CHKSUM		4
2084 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2085 
2086 /* un->un_uscsi_chain_type must be set to one of these */
2087 #define	SD_CHAIN_INFO_USCSI_CMD		6
2088 /* USCSI with PM disabled is the same as DIRECT */
2089 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2090 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2091 
2092 /* un->un_direct_chain_type must be set to one of these */
2093 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2094 
2095 /* un->un_priority_chain_type must be set to one of these */
2096 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2097 
2098 /* size for devid inquiries */
2099 #define	MAX_INQUIRY_SIZE		0xF0
2100 
2101 /*
2102  * Macros used by functions to pass a given buf(9S) struct along to the
2103  * next function in the layering chain for further processing.
2104  *
2105  * In the following macros, passing more than three arguments to the called
2106  * routines causes the optimizer for the SPARC compiler to stop doing tail
2107  * call elimination which results in significant performance degradation.
2108  */
2109 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2110 	((*(sd_iostart_chain[index]))(index, un, bp))
2111 
2112 #define	SD_BEGIN_IODONE(index, un, bp)	\
2113 	((*(sd_iodone_chain[index]))(index, un, bp))
2114 
2115 #define	SD_NEXT_IOSTART(index, un, bp)				\
2116 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2117 
2118 #define	SD_NEXT_IODONE(index, un, bp)				\
2119 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2120 
2121 /*
2122  *    Function: _init
2123  *
2124  * Description: This is the driver _init(9E) entry point.
2125  *
2126  * Return Code: Returns the value from mod_install(9F) or
2127  *		ddi_soft_state_init(9F) as appropriate.
2128  *
2129  *     Context: Called when driver module loaded.
2130  */
2131 
2132 int
2133 _init(void)
2134 {
2135 	int	err;
2136 
2137 	/* establish driver name from module name */
2138 	sd_label = mod_modname(&modlinkage);
2139 
2140 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2141 		SD_MAXUNIT);
2142 
2143 	if (err != 0) {
2144 		return (err);
2145 	}
2146 
2147 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2148 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2149 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2150 
2151 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2152 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2153 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2154 
2155 	/*
2156 	 * it's ok to init here even for fibre device
2157 	 */
2158 	sd_scsi_probe_cache_init();
2159 
2160 	sd_scsi_target_lun_init();
2161 
2162 	/*
2163 	 * Creating taskq before mod_install ensures that all callers (threads)
2164 	 * that enter the module after a successfull mod_install encounter
2165 	 * a valid taskq.
2166 	 */
2167 	sd_taskq_create();
2168 
2169 	err = mod_install(&modlinkage);
2170 	if (err != 0) {
2171 		/* delete taskq if install fails */
2172 		sd_taskq_delete();
2173 
2174 		mutex_destroy(&sd_detach_mutex);
2175 		mutex_destroy(&sd_log_mutex);
2176 		mutex_destroy(&sd_label_mutex);
2177 
2178 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2179 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2180 		cv_destroy(&sd_tr.srq_inprocess_cv);
2181 
2182 		sd_scsi_probe_cache_fini();
2183 
2184 		sd_scsi_target_lun_fini();
2185 
2186 		ddi_soft_state_fini(&sd_state);
2187 		return (err);
2188 	}
2189 
2190 	return (err);
2191 }
2192 
2193 
2194 /*
2195  *    Function: _fini
2196  *
2197  * Description: This is the driver _fini(9E) entry point.
2198  *
2199  * Return Code: Returns the value from mod_remove(9F)
2200  *
2201  *     Context: Called when driver module is unloaded.
2202  */
2203 
2204 int
2205 _fini(void)
2206 {
2207 	int err;
2208 
2209 	if ((err = mod_remove(&modlinkage)) != 0) {
2210 		return (err);
2211 	}
2212 
2213 	sd_taskq_delete();
2214 
2215 	mutex_destroy(&sd_detach_mutex);
2216 	mutex_destroy(&sd_log_mutex);
2217 	mutex_destroy(&sd_label_mutex);
2218 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2219 
2220 	sd_scsi_probe_cache_fini();
2221 
2222 	sd_scsi_target_lun_fini();
2223 
2224 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2225 	cv_destroy(&sd_tr.srq_inprocess_cv);
2226 
2227 	ddi_soft_state_fini(&sd_state);
2228 
2229 	return (err);
2230 }
2231 
2232 
2233 /*
2234  *    Function: _info
2235  *
2236  * Description: This is the driver _info(9E) entry point.
2237  *
2238  *   Arguments: modinfop - pointer to the driver modinfo structure
2239  *
2240  * Return Code: Returns the value from mod_info(9F).
2241  *
2242  *     Context: Kernel thread context
2243  */
2244 
2245 int
2246 _info(struct modinfo *modinfop)
2247 {
2248 	return (mod_info(&modlinkage, modinfop));
2249 }
2250 
2251 
2252 /*
2253  * The following routines implement the driver message logging facility.
2254  * They provide component- and level- based debug output filtering.
2255  * Output may also be restricted to messages for a single instance by
2256  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2257  * to NULL, then messages for all instances are printed.
2258  *
2259  * These routines have been cloned from each other due to the language
2260  * constraints of macros and variable argument list processing.
2261  */
2262 
2263 
2264 /*
2265  *    Function: sd_log_err
2266  *
2267  * Description: This routine is called by the SD_ERROR macro for debug
2268  *		logging of error conditions.
2269  *
2270  *   Arguments: comp - driver component being logged
2271  *		dev  - pointer to driver info structure
2272  *		fmt  - error string and format to be logged
2273  */
2274 
2275 static void
2276 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2277 {
2278 	va_list		ap;
2279 	dev_info_t	*dev;
2280 
2281 	ASSERT(un != NULL);
2282 	dev = SD_DEVINFO(un);
2283 	ASSERT(dev != NULL);
2284 
2285 	/*
2286 	 * Filter messages based on the global component and level masks.
2287 	 * Also print if un matches the value of sd_debug_un, or if
2288 	 * sd_debug_un is set to NULL.
2289 	 */
2290 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2291 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2292 		mutex_enter(&sd_log_mutex);
2293 		va_start(ap, fmt);
2294 		(void) vsprintf(sd_log_buf, fmt, ap);
2295 		va_end(ap);
2296 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2297 		mutex_exit(&sd_log_mutex);
2298 	}
2299 #ifdef SD_FAULT_INJECTION
2300 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2301 	if (un->sd_injection_mask & comp) {
2302 		mutex_enter(&sd_log_mutex);
2303 		va_start(ap, fmt);
2304 		(void) vsprintf(sd_log_buf, fmt, ap);
2305 		va_end(ap);
2306 		sd_injection_log(sd_log_buf, un);
2307 		mutex_exit(&sd_log_mutex);
2308 	}
2309 #endif
2310 }
2311 
2312 
2313 /*
2314  *    Function: sd_log_info
2315  *
2316  * Description: This routine is called by the SD_INFO macro for debug
2317  *		logging of general purpose informational conditions.
2318  *
2319  *   Arguments: comp - driver component being logged
2320  *		dev  - pointer to driver info structure
2321  *		fmt  - info string and format to be logged
2322  */
2323 
2324 static void
2325 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2326 {
2327 	va_list		ap;
2328 	dev_info_t	*dev;
2329 
2330 	ASSERT(un != NULL);
2331 	dev = SD_DEVINFO(un);
2332 	ASSERT(dev != NULL);
2333 
2334 	/*
2335 	 * Filter messages based on the global component and level masks.
2336 	 * Also print if un matches the value of sd_debug_un, or if
2337 	 * sd_debug_un is set to NULL.
2338 	 */
2339 	if ((sd_component_mask & component) &&
2340 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2341 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2342 		mutex_enter(&sd_log_mutex);
2343 		va_start(ap, fmt);
2344 		(void) vsprintf(sd_log_buf, fmt, ap);
2345 		va_end(ap);
2346 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2347 		mutex_exit(&sd_log_mutex);
2348 	}
2349 #ifdef SD_FAULT_INJECTION
2350 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2351 	if (un->sd_injection_mask & component) {
2352 		mutex_enter(&sd_log_mutex);
2353 		va_start(ap, fmt);
2354 		(void) vsprintf(sd_log_buf, fmt, ap);
2355 		va_end(ap);
2356 		sd_injection_log(sd_log_buf, un);
2357 		mutex_exit(&sd_log_mutex);
2358 	}
2359 #endif
2360 }
2361 
2362 
2363 /*
2364  *    Function: sd_log_trace
2365  *
2366  * Description: This routine is called by the SD_TRACE macro for debug
2367  *		logging of trace conditions (i.e. function entry/exit).
2368  *
2369  *   Arguments: comp - driver component being logged
2370  *		dev  - pointer to driver info structure
2371  *		fmt  - trace string and format to be logged
2372  */
2373 
2374 static void
2375 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2376 {
2377 	va_list		ap;
2378 	dev_info_t	*dev;
2379 
2380 	ASSERT(un != NULL);
2381 	dev = SD_DEVINFO(un);
2382 	ASSERT(dev != NULL);
2383 
2384 	/*
2385 	 * Filter messages based on the global component and level masks.
2386 	 * Also print if un matches the value of sd_debug_un, or if
2387 	 * sd_debug_un is set to NULL.
2388 	 */
2389 	if ((sd_component_mask & component) &&
2390 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2391 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2392 		mutex_enter(&sd_log_mutex);
2393 		va_start(ap, fmt);
2394 		(void) vsprintf(sd_log_buf, fmt, ap);
2395 		va_end(ap);
2396 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2397 		mutex_exit(&sd_log_mutex);
2398 	}
2399 #ifdef SD_FAULT_INJECTION
2400 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2401 	if (un->sd_injection_mask & component) {
2402 		mutex_enter(&sd_log_mutex);
2403 		va_start(ap, fmt);
2404 		(void) vsprintf(sd_log_buf, fmt, ap);
2405 		va_end(ap);
2406 		sd_injection_log(sd_log_buf, un);
2407 		mutex_exit(&sd_log_mutex);
2408 	}
2409 #endif
2410 }
2411 
2412 
2413 /*
2414  *    Function: sdprobe
2415  *
2416  * Description: This is the driver probe(9e) entry point function.
2417  *
2418  *   Arguments: devi - opaque device info handle
2419  *
2420  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2421  *              DDI_PROBE_FAILURE: If the probe failed.
2422  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2423  *				   but may be present in the future.
2424  */
2425 
2426 static int
2427 sdprobe(dev_info_t *devi)
2428 {
2429 	struct scsi_device	*devp;
2430 	int			rval;
2431 	int			instance;
2432 
2433 	/*
2434 	 * if it wasn't for pln, sdprobe could actually be nulldev
2435 	 * in the "__fibre" case.
2436 	 */
2437 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2438 		return (DDI_PROBE_DONTCARE);
2439 	}
2440 
2441 	devp = ddi_get_driver_private(devi);
2442 
2443 	if (devp == NULL) {
2444 		/* Ooops... nexus driver is mis-configured... */
2445 		return (DDI_PROBE_FAILURE);
2446 	}
2447 
2448 	instance = ddi_get_instance(devi);
2449 
2450 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2451 		return (DDI_PROBE_PARTIAL);
2452 	}
2453 
2454 	/*
2455 	 * Call the SCSA utility probe routine to see if we actually
2456 	 * have a target at this SCSI nexus.
2457 	 */
2458 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2459 	case SCSIPROBE_EXISTS:
2460 		switch (devp->sd_inq->inq_dtype) {
2461 		case DTYPE_DIRECT:
2462 			rval = DDI_PROBE_SUCCESS;
2463 			break;
2464 		case DTYPE_RODIRECT:
2465 			/* CDs etc. Can be removable media */
2466 			rval = DDI_PROBE_SUCCESS;
2467 			break;
2468 		case DTYPE_OPTICAL:
2469 			/*
2470 			 * Rewritable optical driver HP115AA
2471 			 * Can also be removable media
2472 			 */
2473 
2474 			/*
2475 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2476 			 * pre solaris 9 sparc sd behavior is required
2477 			 *
2478 			 * If first time through and sd_dtype_optical_bind
2479 			 * has not been set in /etc/system check properties
2480 			 */
2481 
2482 			if (sd_dtype_optical_bind  < 0) {
2483 			    sd_dtype_optical_bind = ddi_prop_get_int
2484 				(DDI_DEV_T_ANY,	devi,	0,
2485 				"optical-device-bind",	1);
2486 			}
2487 
2488 			if (sd_dtype_optical_bind == 0) {
2489 				rval = DDI_PROBE_FAILURE;
2490 			} else {
2491 				rval = DDI_PROBE_SUCCESS;
2492 			}
2493 			break;
2494 
2495 		case DTYPE_NOTPRESENT:
2496 		default:
2497 			rval = DDI_PROBE_FAILURE;
2498 			break;
2499 		}
2500 		break;
2501 	default:
2502 		rval = DDI_PROBE_PARTIAL;
2503 		break;
2504 	}
2505 
2506 	/*
2507 	 * This routine checks for resource allocation prior to freeing,
2508 	 * so it will take care of the "smart probing" case where a
2509 	 * scsi_probe() may or may not have been issued and will *not*
2510 	 * free previously-freed resources.
2511 	 */
2512 	scsi_unprobe(devp);
2513 	return (rval);
2514 }
2515 
2516 
2517 /*
2518  *    Function: sdinfo
2519  *
2520  * Description: This is the driver getinfo(9e) entry point function.
2521  * 		Given the device number, return the devinfo pointer from
2522  *		the scsi_device structure or the instance number
2523  *		associated with the dev_t.
2524  *
2525  *   Arguments: dip     - pointer to device info structure
2526  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2527  *			  DDI_INFO_DEVT2INSTANCE)
2528  *		arg     - driver dev_t
2529  *		resultp - user buffer for request response
2530  *
2531  * Return Code: DDI_SUCCESS
2532  *              DDI_FAILURE
2533  */
2534 /* ARGSUSED */
2535 static int
2536 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2537 {
2538 	struct sd_lun	*un;
2539 	dev_t		dev;
2540 	int		instance;
2541 	int		error;
2542 
2543 	switch (infocmd) {
2544 	case DDI_INFO_DEVT2DEVINFO:
2545 		dev = (dev_t)arg;
2546 		instance = SDUNIT(dev);
2547 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2548 			return (DDI_FAILURE);
2549 		}
2550 		*result = (void *) SD_DEVINFO(un);
2551 		error = DDI_SUCCESS;
2552 		break;
2553 	case DDI_INFO_DEVT2INSTANCE:
2554 		dev = (dev_t)arg;
2555 		instance = SDUNIT(dev);
2556 		*result = (void *)(uintptr_t)instance;
2557 		error = DDI_SUCCESS;
2558 		break;
2559 	default:
2560 		error = DDI_FAILURE;
2561 	}
2562 	return (error);
2563 }
2564 
2565 /*
2566  *    Function: sd_prop_op
2567  *
2568  * Description: This is the driver prop_op(9e) entry point function.
2569  *		Return the number of blocks for the partition in question
2570  *		or forward the request to the property facilities.
2571  *
2572  *   Arguments: dev       - device number
2573  *		dip       - pointer to device info structure
2574  *		prop_op   - property operator
2575  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2576  *		name      - pointer to property name
2577  *		valuep    - pointer or address of the user buffer
2578  *		lengthp   - property length
2579  *
2580  * Return Code: DDI_PROP_SUCCESS
2581  *              DDI_PROP_NOT_FOUND
2582  *              DDI_PROP_UNDEFINED
2583  *              DDI_PROP_NO_MEMORY
2584  *              DDI_PROP_BUF_TOO_SMALL
2585  */
2586 
2587 static int
2588 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2589 	char *name, caddr_t valuep, int *lengthp)
2590 {
2591 	int		instance = ddi_get_instance(dip);
2592 	struct sd_lun	*un;
2593 	uint64_t	nblocks64;
2594 
2595 	/*
2596 	 * Our dynamic properties are all device specific and size oriented.
2597 	 * Requests issued under conditions where size is valid are passed
2598 	 * to ddi_prop_op_nblocks with the size information, otherwise the
2599 	 * request is passed to ddi_prop_op. Size depends on valid geometry.
2600 	 */
2601 	un = ddi_get_soft_state(sd_state, instance);
2602 	if ((dev == DDI_DEV_T_ANY) || (un == NULL) ||
2603 	    !SD_IS_VALID_LABEL(un)) {
2604 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2605 		    name, valuep, lengthp));
2606 	} else {
2607 		/* get nblocks value */
2608 		ASSERT(!mutex_owned(SD_MUTEX(un)));
2609 
2610 		(void) cmlb_partinfo(un->un_cmlbhandle, SDPART(dev),
2611 		    (diskaddr_t *)&nblocks64, NULL, NULL, NULL,
2612 		    (void *)SD_PATH_DIRECT);
2613 
2614 		return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags,
2615 		    name, valuep, lengthp, nblocks64));
2616 	}
2617 }
2618 
2619 /*
2620  * The following functions are for smart probing:
2621  * sd_scsi_probe_cache_init()
2622  * sd_scsi_probe_cache_fini()
2623  * sd_scsi_clear_probe_cache()
2624  * sd_scsi_probe_with_cache()
2625  */
2626 
2627 /*
2628  *    Function: sd_scsi_probe_cache_init
2629  *
2630  * Description: Initializes the probe response cache mutex and head pointer.
2631  *
2632  *     Context: Kernel thread context
2633  */
2634 
2635 static void
2636 sd_scsi_probe_cache_init(void)
2637 {
2638 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2639 	sd_scsi_probe_cache_head = NULL;
2640 }
2641 
2642 
2643 /*
2644  *    Function: sd_scsi_probe_cache_fini
2645  *
2646  * Description: Frees all resources associated with the probe response cache.
2647  *
2648  *     Context: Kernel thread context
2649  */
2650 
2651 static void
2652 sd_scsi_probe_cache_fini(void)
2653 {
2654 	struct sd_scsi_probe_cache *cp;
2655 	struct sd_scsi_probe_cache *ncp;
2656 
2657 	/* Clean up our smart probing linked list */
2658 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2659 		ncp = cp->next;
2660 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2661 	}
2662 	sd_scsi_probe_cache_head = NULL;
2663 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2664 }
2665 
2666 
2667 /*
2668  *    Function: sd_scsi_clear_probe_cache
2669  *
2670  * Description: This routine clears the probe response cache. This is
2671  *		done when open() returns ENXIO so that when deferred
2672  *		attach is attempted (possibly after a device has been
2673  *		turned on) we will retry the probe. Since we don't know
2674  *		which target we failed to open, we just clear the
2675  *		entire cache.
2676  *
2677  *     Context: Kernel thread context
2678  */
2679 
2680 static void
2681 sd_scsi_clear_probe_cache(void)
2682 {
2683 	struct sd_scsi_probe_cache	*cp;
2684 	int				i;
2685 
2686 	mutex_enter(&sd_scsi_probe_cache_mutex);
2687 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2688 		/*
2689 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2690 		 * force probing to be performed the next time
2691 		 * sd_scsi_probe_with_cache is called.
2692 		 */
2693 		for (i = 0; i < NTARGETS_WIDE; i++) {
2694 			cp->cache[i] = SCSIPROBE_EXISTS;
2695 		}
2696 	}
2697 	mutex_exit(&sd_scsi_probe_cache_mutex);
2698 }
2699 
2700 
2701 /*
2702  *    Function: sd_scsi_probe_with_cache
2703  *
2704  * Description: This routine implements support for a scsi device probe
2705  *		with cache. The driver maintains a cache of the target
2706  *		responses to scsi probes. If we get no response from a
2707  *		target during a probe inquiry, we remember that, and we
2708  *		avoid additional calls to scsi_probe on non-zero LUNs
2709  *		on the same target until the cache is cleared. By doing
2710  *		so we avoid the 1/4 sec selection timeout for nonzero
2711  *		LUNs. lun0 of a target is always probed.
2712  *
2713  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2714  *              waitfunc - indicates what the allocator routines should
2715  *			   do when resources are not available. This value
2716  *			   is passed on to scsi_probe() when that routine
2717  *			   is called.
2718  *
2719  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2720  *		otherwise the value returned by scsi_probe(9F).
2721  *
2722  *     Context: Kernel thread context
2723  */
2724 
2725 static int
2726 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2727 {
2728 	struct sd_scsi_probe_cache	*cp;
2729 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2730 	int		lun, tgt;
2731 
2732 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2733 	    SCSI_ADDR_PROP_LUN, 0);
2734 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2735 	    SCSI_ADDR_PROP_TARGET, -1);
2736 
2737 	/* Make sure caching enabled and target in range */
2738 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2739 		/* do it the old way (no cache) */
2740 		return (scsi_probe(devp, waitfn));
2741 	}
2742 
2743 	mutex_enter(&sd_scsi_probe_cache_mutex);
2744 
2745 	/* Find the cache for this scsi bus instance */
2746 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2747 		if (cp->pdip == pdip) {
2748 			break;
2749 		}
2750 	}
2751 
2752 	/* If we can't find a cache for this pdip, create one */
2753 	if (cp == NULL) {
2754 		int i;
2755 
2756 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2757 		    KM_SLEEP);
2758 		cp->pdip = pdip;
2759 		cp->next = sd_scsi_probe_cache_head;
2760 		sd_scsi_probe_cache_head = cp;
2761 		for (i = 0; i < NTARGETS_WIDE; i++) {
2762 			cp->cache[i] = SCSIPROBE_EXISTS;
2763 		}
2764 	}
2765 
2766 	mutex_exit(&sd_scsi_probe_cache_mutex);
2767 
2768 	/* Recompute the cache for this target if LUN zero */
2769 	if (lun == 0) {
2770 		cp->cache[tgt] = SCSIPROBE_EXISTS;
2771 	}
2772 
2773 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2774 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2775 		return (SCSIPROBE_NORESP);
2776 	}
2777 
2778 	/* Do the actual probe; save & return the result */
2779 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2780 }
2781 
2782 
2783 /*
2784  *    Function: sd_scsi_target_lun_init
2785  *
2786  * Description: Initializes the attached lun chain mutex and head pointer.
2787  *
2788  *     Context: Kernel thread context
2789  */
2790 
2791 static void
2792 sd_scsi_target_lun_init(void)
2793 {
2794 	mutex_init(&sd_scsi_target_lun_mutex, NULL, MUTEX_DRIVER, NULL);
2795 	sd_scsi_target_lun_head = NULL;
2796 }
2797 
2798 
2799 /*
2800  *    Function: sd_scsi_target_lun_fini
2801  *
2802  * Description: Frees all resources associated with the attached lun
2803  *              chain
2804  *
2805  *     Context: Kernel thread context
2806  */
2807 
2808 static void
2809 sd_scsi_target_lun_fini(void)
2810 {
2811 	struct sd_scsi_hba_tgt_lun	*cp;
2812 	struct sd_scsi_hba_tgt_lun	*ncp;
2813 
2814 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = ncp) {
2815 		ncp = cp->next;
2816 		kmem_free(cp, sizeof (struct sd_scsi_hba_tgt_lun));
2817 	}
2818 	sd_scsi_target_lun_head = NULL;
2819 	mutex_destroy(&sd_scsi_target_lun_mutex);
2820 }
2821 
2822 
2823 /*
2824  *    Function: sd_scsi_get_target_lun_count
2825  *
2826  * Description: This routine will check in the attached lun chain to see
2827  * 		how many luns are attached on the required SCSI controller
2828  * 		and target. Currently, some capabilities like tagged queue
2829  *		are supported per target based by HBA. So all luns in a
2830  *		target have the same capabilities. Based on this assumption,
2831  * 		sd should only set these capabilities once per target. This
2832  *		function is called when sd needs to decide how many luns
2833  *		already attached on a target.
2834  *
2835  *   Arguments: dip	- Pointer to the system's dev_info_t for the SCSI
2836  *			  controller device.
2837  *              target	- The target ID on the controller's SCSI bus.
2838  *
2839  * Return Code: The number of luns attached on the required target and
2840  *		controller.
2841  *		-1 if target ID is not in parallel SCSI scope or the given
2842  * 		dip is not in the chain.
2843  *
2844  *     Context: Kernel thread context
2845  */
2846 
2847 static int
2848 sd_scsi_get_target_lun_count(dev_info_t *dip, int target)
2849 {
2850 	struct sd_scsi_hba_tgt_lun	*cp;
2851 
2852 	if ((target < 0) || (target >= NTARGETS_WIDE)) {
2853 		return (-1);
2854 	}
2855 
2856 	mutex_enter(&sd_scsi_target_lun_mutex);
2857 
2858 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2859 		if (cp->pdip == dip) {
2860 			break;
2861 		}
2862 	}
2863 
2864 	mutex_exit(&sd_scsi_target_lun_mutex);
2865 
2866 	if (cp == NULL) {
2867 		return (-1);
2868 	}
2869 
2870 	return (cp->nlun[target]);
2871 }
2872 
2873 
2874 /*
2875  *    Function: sd_scsi_update_lun_on_target
2876  *
2877  * Description: This routine is used to update the attached lun chain when a
2878  *		lun is attached or detached on a target.
2879  *
2880  *   Arguments: dip     - Pointer to the system's dev_info_t for the SCSI
2881  *                        controller device.
2882  *              target  - The target ID on the controller's SCSI bus.
2883  *		flag	- Indicate the lun is attached or detached.
2884  *
2885  *     Context: Kernel thread context
2886  */
2887 
2888 static void
2889 sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag)
2890 {
2891 	struct sd_scsi_hba_tgt_lun	*cp;
2892 
2893 	mutex_enter(&sd_scsi_target_lun_mutex);
2894 
2895 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2896 		if (cp->pdip == dip) {
2897 			break;
2898 		}
2899 	}
2900 
2901 	if ((cp == NULL) && (flag == SD_SCSI_LUN_ATTACH)) {
2902 		cp = kmem_zalloc(sizeof (struct sd_scsi_hba_tgt_lun),
2903 		    KM_SLEEP);
2904 		cp->pdip = dip;
2905 		cp->next = sd_scsi_target_lun_head;
2906 		sd_scsi_target_lun_head = cp;
2907 	}
2908 
2909 	mutex_exit(&sd_scsi_target_lun_mutex);
2910 
2911 	if (cp != NULL) {
2912 		if (flag == SD_SCSI_LUN_ATTACH) {
2913 			cp->nlun[target] ++;
2914 		} else {
2915 			cp->nlun[target] --;
2916 		}
2917 	}
2918 }
2919 
2920 
2921 /*
2922  *    Function: sd_spin_up_unit
2923  *
2924  * Description: Issues the following commands to spin-up the device:
2925  *		START STOP UNIT, and INQUIRY.
2926  *
2927  *   Arguments: un - driver soft state (unit) structure
2928  *
2929  * Return Code: 0 - success
2930  *		EIO - failure
2931  *		EACCES - reservation conflict
2932  *
2933  *     Context: Kernel thread context
2934  */
2935 
2936 static int
2937 sd_spin_up_unit(struct sd_lun *un)
2938 {
2939 	size_t	resid		= 0;
2940 	int	has_conflict	= FALSE;
2941 	uchar_t *bufaddr;
2942 
2943 	ASSERT(un != NULL);
2944 
2945 	/*
2946 	 * Send a throwaway START UNIT command.
2947 	 *
2948 	 * If we fail on this, we don't care presently what precisely
2949 	 * is wrong.  EMC's arrays will also fail this with a check
2950 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
2951 	 * we don't want to fail the attach because it may become
2952 	 * "active" later.
2953 	 */
2954 	if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START, SD_PATH_DIRECT)
2955 	    == EACCES)
2956 		has_conflict = TRUE;
2957 
2958 	/*
2959 	 * Send another INQUIRY command to the target. This is necessary for
2960 	 * non-removable media direct access devices because their INQUIRY data
2961 	 * may not be fully qualified until they are spun up (perhaps via the
2962 	 * START command above).  Note: This seems to be needed for some
2963 	 * legacy devices only.) The INQUIRY command should succeed even if a
2964 	 * Reservation Conflict is present.
2965 	 */
2966 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
2967 	if (sd_send_scsi_INQUIRY(un, bufaddr, SUN_INQSIZE, 0, 0, &resid) != 0) {
2968 		kmem_free(bufaddr, SUN_INQSIZE);
2969 		return (EIO);
2970 	}
2971 
2972 	/*
2973 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
2974 	 * Note that this routine does not return a failure here even if the
2975 	 * INQUIRY command did not return any data.  This is a legacy behavior.
2976 	 */
2977 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
2978 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
2979 	}
2980 
2981 	kmem_free(bufaddr, SUN_INQSIZE);
2982 
2983 	/* If we hit a reservation conflict above, tell the caller. */
2984 	if (has_conflict == TRUE) {
2985 		return (EACCES);
2986 	}
2987 
2988 	return (0);
2989 }
2990 
2991 #ifdef _LP64
2992 /*
2993  *    Function: sd_enable_descr_sense
2994  *
2995  * Description: This routine attempts to select descriptor sense format
2996  *		using the Control mode page.  Devices that support 64 bit
2997  *		LBAs (for >2TB luns) should also implement descriptor
2998  *		sense data so we will call this function whenever we see
2999  *		a lun larger than 2TB.  If for some reason the device
3000  *		supports 64 bit LBAs but doesn't support descriptor sense
3001  *		presumably the mode select will fail.  Everything will
3002  *		continue to work normally except that we will not get
3003  *		complete sense data for commands that fail with an LBA
3004  *		larger than 32 bits.
3005  *
3006  *   Arguments: un - driver soft state (unit) structure
3007  *
3008  *     Context: Kernel thread context only
3009  */
3010 
3011 static void
3012 sd_enable_descr_sense(struct sd_lun *un)
3013 {
3014 	uchar_t			*header;
3015 	struct mode_control_scsi3 *ctrl_bufp;
3016 	size_t			buflen;
3017 	size_t			bd_len;
3018 
3019 	/*
3020 	 * Read MODE SENSE page 0xA, Control Mode Page
3021 	 */
3022 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
3023 	    sizeof (struct mode_control_scsi3);
3024 	header = kmem_zalloc(buflen, KM_SLEEP);
3025 	if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
3026 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT) != 0) {
3027 		SD_ERROR(SD_LOG_COMMON, un,
3028 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
3029 		goto eds_exit;
3030 	}
3031 
3032 	/*
3033 	 * Determine size of Block Descriptors in order to locate
3034 	 * the mode page data. ATAPI devices return 0, SCSI devices
3035 	 * should return MODE_BLK_DESC_LENGTH.
3036 	 */
3037 	bd_len  = ((struct mode_header *)header)->bdesc_length;
3038 
3039 	ctrl_bufp = (struct mode_control_scsi3 *)
3040 	    (header + MODE_HEADER_LENGTH + bd_len);
3041 
3042 	/*
3043 	 * Clear PS bit for MODE SELECT
3044 	 */
3045 	ctrl_bufp->mode_page.ps = 0;
3046 
3047 	/*
3048 	 * Set D_SENSE to enable descriptor sense format.
3049 	 */
3050 	ctrl_bufp->d_sense = 1;
3051 
3052 	/*
3053 	 * Use MODE SELECT to commit the change to the D_SENSE bit
3054 	 */
3055 	if (sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
3056 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT) != 0) {
3057 		SD_INFO(SD_LOG_COMMON, un,
3058 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
3059 		goto eds_exit;
3060 	}
3061 
3062 eds_exit:
3063 	kmem_free(header, buflen);
3064 }
3065 
3066 /*
3067  *    Function: sd_reenable_dsense_task
3068  *
3069  * Description: Re-enable descriptor sense after device or bus reset
3070  *
3071  *     Context: Executes in a taskq() thread context
3072  */
3073 static void
3074 sd_reenable_dsense_task(void *arg)
3075 {
3076 	struct	sd_lun	*un = arg;
3077 
3078 	ASSERT(un != NULL);
3079 	sd_enable_descr_sense(un);
3080 }
3081 #endif /* _LP64 */
3082 
3083 /*
3084  *    Function: sd_set_mmc_caps
3085  *
3086  * Description: This routine determines if the device is MMC compliant and if
3087  *		the device supports CDDA via a mode sense of the CDVD
3088  *		capabilities mode page. Also checks if the device is a
3089  *		dvdram writable device.
3090  *
3091  *   Arguments: un - driver soft state (unit) structure
3092  *
3093  *     Context: Kernel thread context only
3094  */
3095 
3096 static void
3097 sd_set_mmc_caps(struct sd_lun *un)
3098 {
3099 	struct mode_header_grp2		*sense_mhp;
3100 	uchar_t				*sense_page;
3101 	caddr_t				buf;
3102 	int				bd_len;
3103 	int				status;
3104 	struct uscsi_cmd		com;
3105 	int				rtn;
3106 	uchar_t				*out_data_rw, *out_data_hd;
3107 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3108 
3109 	ASSERT(un != NULL);
3110 
3111 	/*
3112 	 * The flags which will be set in this function are - mmc compliant,
3113 	 * dvdram writable device, cdda support. Initialize them to FALSE
3114 	 * and if a capability is detected - it will be set to TRUE.
3115 	 */
3116 	un->un_f_mmc_cap = FALSE;
3117 	un->un_f_dvdram_writable_device = FALSE;
3118 	un->un_f_cfg_cdda = FALSE;
3119 
3120 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3121 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3122 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3123 
3124 	if (status != 0) {
3125 		/* command failed; just return */
3126 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3127 		return;
3128 	}
3129 	/*
3130 	 * If the mode sense request for the CDROM CAPABILITIES
3131 	 * page (0x2A) succeeds the device is assumed to be MMC.
3132 	 */
3133 	un->un_f_mmc_cap = TRUE;
3134 
3135 	/* Get to the page data */
3136 	sense_mhp = (struct mode_header_grp2 *)buf;
3137 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3138 	    sense_mhp->bdesc_length_lo;
3139 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3140 		/*
3141 		 * We did not get back the expected block descriptor
3142 		 * length so we cannot determine if the device supports
3143 		 * CDDA. However, we still indicate the device is MMC
3144 		 * according to the successful response to the page
3145 		 * 0x2A mode sense request.
3146 		 */
3147 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3148 		    "sd_set_mmc_caps: Mode Sense returned "
3149 		    "invalid block descriptor length\n");
3150 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3151 		return;
3152 	}
3153 
3154 	/* See if read CDDA is supported */
3155 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3156 	    bd_len);
3157 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3158 
3159 	/* See if writing DVD RAM is supported. */
3160 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3161 	if (un->un_f_dvdram_writable_device == TRUE) {
3162 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3163 		return;
3164 	}
3165 
3166 	/*
3167 	 * If the device presents DVD or CD capabilities in the mode
3168 	 * page, we can return here since a RRD will not have
3169 	 * these capabilities.
3170 	 */
3171 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3172 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3173 		return;
3174 	}
3175 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3176 
3177 	/*
3178 	 * If un->un_f_dvdram_writable_device is still FALSE,
3179 	 * check for a Removable Rigid Disk (RRD).  A RRD
3180 	 * device is identified by the features RANDOM_WRITABLE and
3181 	 * HARDWARE_DEFECT_MANAGEMENT.
3182 	 */
3183 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3184 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3185 
3186 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3187 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3188 	    RANDOM_WRITABLE);
3189 	if (rtn != 0) {
3190 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3191 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3192 		return;
3193 	}
3194 
3195 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3196 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3197 
3198 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3199 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3200 	    HARDWARE_DEFECT_MANAGEMENT);
3201 	if (rtn == 0) {
3202 		/*
3203 		 * We have good information, check for random writable
3204 		 * and hardware defect features.
3205 		 */
3206 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3207 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3208 			un->un_f_dvdram_writable_device = TRUE;
3209 		}
3210 	}
3211 
3212 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3213 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3214 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3215 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3216 }
3217 
3218 /*
3219  *    Function: sd_check_for_writable_cd
3220  *
3221  * Description: This routine determines if the media in the device is
3222  *		writable or not. It uses the get configuration command (0x46)
3223  *		to determine if the media is writable
3224  *
3225  *   Arguments: un - driver soft state (unit) structure
3226  *
3227  *     Context: Never called at interrupt context.
3228  */
3229 
3230 static void
3231 sd_check_for_writable_cd(struct sd_lun *un)
3232 {
3233 	struct uscsi_cmd		com;
3234 	uchar_t				*out_data;
3235 	uchar_t				*rqbuf;
3236 	int				rtn;
3237 	uchar_t				*out_data_rw, *out_data_hd;
3238 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3239 	struct mode_header_grp2		*sense_mhp;
3240 	uchar_t				*sense_page;
3241 	caddr_t				buf;
3242 	int				bd_len;
3243 	int				status;
3244 
3245 	ASSERT(un != NULL);
3246 	ASSERT(mutex_owned(SD_MUTEX(un)));
3247 
3248 	/*
3249 	 * Initialize the writable media to false, if configuration info.
3250 	 * tells us otherwise then only we will set it.
3251 	 */
3252 	un->un_f_mmc_writable_media = FALSE;
3253 	mutex_exit(SD_MUTEX(un));
3254 
3255 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3256 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3257 
3258 	rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf, SENSE_LENGTH,
3259 	    out_data, SD_PROFILE_HEADER_LEN);
3260 
3261 	mutex_enter(SD_MUTEX(un));
3262 	if (rtn == 0) {
3263 		/*
3264 		 * We have good information, check for writable DVD.
3265 		 */
3266 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3267 			un->un_f_mmc_writable_media = TRUE;
3268 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3269 			kmem_free(rqbuf, SENSE_LENGTH);
3270 			return;
3271 		}
3272 	}
3273 
3274 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3275 	kmem_free(rqbuf, SENSE_LENGTH);
3276 
3277 	/*
3278 	 * Determine if this is a RRD type device.
3279 	 */
3280 	mutex_exit(SD_MUTEX(un));
3281 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3282 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3283 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3284 	mutex_enter(SD_MUTEX(un));
3285 	if (status != 0) {
3286 		/* command failed; just return */
3287 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3288 		return;
3289 	}
3290 
3291 	/* Get to the page data */
3292 	sense_mhp = (struct mode_header_grp2 *)buf;
3293 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3294 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3295 		/*
3296 		 * We did not get back the expected block descriptor length so
3297 		 * we cannot check the mode page.
3298 		 */
3299 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3300 		    "sd_check_for_writable_cd: Mode Sense returned "
3301 		    "invalid block descriptor length\n");
3302 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3303 		return;
3304 	}
3305 
3306 	/*
3307 	 * If the device presents DVD or CD capabilities in the mode
3308 	 * page, we can return here since a RRD device will not have
3309 	 * these capabilities.
3310 	 */
3311 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3312 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3313 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3314 		return;
3315 	}
3316 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3317 
3318 	/*
3319 	 * If un->un_f_mmc_writable_media is still FALSE,
3320 	 * check for RRD type media.  A RRD device is identified
3321 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3322 	 */
3323 	mutex_exit(SD_MUTEX(un));
3324 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3325 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3326 
3327 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3328 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3329 	    RANDOM_WRITABLE);
3330 	if (rtn != 0) {
3331 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3332 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3333 		mutex_enter(SD_MUTEX(un));
3334 		return;
3335 	}
3336 
3337 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3338 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3339 
3340 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3341 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3342 	    HARDWARE_DEFECT_MANAGEMENT);
3343 	mutex_enter(SD_MUTEX(un));
3344 	if (rtn == 0) {
3345 		/*
3346 		 * We have good information, check for random writable
3347 		 * and hardware defect features as current.
3348 		 */
3349 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3350 		    (out_data_rw[10] & 0x1) &&
3351 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3352 		    (out_data_hd[10] & 0x1)) {
3353 			un->un_f_mmc_writable_media = TRUE;
3354 		}
3355 	}
3356 
3357 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3358 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3359 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3360 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3361 }
3362 
3363 /*
3364  *    Function: sd_read_unit_properties
3365  *
3366  * Description: The following implements a property lookup mechanism.
3367  *		Properties for particular disks (keyed on vendor, model
3368  *		and rev numbers) are sought in the sd.conf file via
3369  *		sd_process_sdconf_file(), and if not found there, are
3370  *		looked for in a list hardcoded in this driver via
3371  *		sd_process_sdconf_table() Once located the properties
3372  *		are used to update the driver unit structure.
3373  *
3374  *   Arguments: un - driver soft state (unit) structure
3375  */
3376 
3377 static void
3378 sd_read_unit_properties(struct sd_lun *un)
3379 {
3380 	/*
3381 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3382 	 * the "sd-config-list" property (from the sd.conf file) or if
3383 	 * there was not a match for the inquiry vid/pid. If this event
3384 	 * occurs the static driver configuration table is searched for
3385 	 * a match.
3386 	 */
3387 	ASSERT(un != NULL);
3388 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3389 		sd_process_sdconf_table(un);
3390 	}
3391 
3392 	/* check for LSI device */
3393 	sd_is_lsi(un);
3394 
3395 
3396 }
3397 
3398 
3399 /*
3400  *    Function: sd_process_sdconf_file
3401  *
3402  * Description: Use ddi_getlongprop to obtain the properties from the
3403  *		driver's config file (ie, sd.conf) and update the driver
3404  *		soft state structure accordingly.
3405  *
3406  *   Arguments: un - driver soft state (unit) structure
3407  *
3408  * Return Code: SD_SUCCESS - The properties were successfully set according
3409  *			     to the driver configuration file.
3410  *		SD_FAILURE - The driver config list was not obtained or
3411  *			     there was no vid/pid match. This indicates that
3412  *			     the static config table should be used.
3413  *
3414  * The config file has a property, "sd-config-list", which consists of
3415  * one or more duplets as follows:
3416  *
3417  *  sd-config-list=
3418  *	<duplet>,
3419  *	[<duplet>,]
3420  *	[<duplet>];
3421  *
3422  * The structure of each duplet is as follows:
3423  *
3424  *  <duplet>:= <vid+pid>,<data-property-name_list>
3425  *
3426  * The first entry of the duplet is the device ID string (the concatenated
3427  * vid & pid; not to be confused with a device_id).  This is defined in
3428  * the same way as in the sd_disk_table.
3429  *
3430  * The second part of the duplet is a string that identifies a
3431  * data-property-name-list. The data-property-name-list is defined as
3432  * follows:
3433  *
3434  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3435  *
3436  * The syntax of <data-property-name> depends on the <version> field.
3437  *
3438  * If version = SD_CONF_VERSION_1 we have the following syntax:
3439  *
3440  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3441  *
3442  * where the prop0 value will be used to set prop0 if bit0 set in the
3443  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3444  *
3445  */
3446 
3447 static int
3448 sd_process_sdconf_file(struct sd_lun *un)
3449 {
3450 	char	*config_list = NULL;
3451 	int	config_list_len;
3452 	int	len;
3453 	int	dupletlen = 0;
3454 	char	*vidptr;
3455 	int	vidlen;
3456 	char	*dnlist_ptr;
3457 	char	*dataname_ptr;
3458 	int	dnlist_len;
3459 	int	dataname_len;
3460 	int	*data_list;
3461 	int	data_list_len;
3462 	int	rval = SD_FAILURE;
3463 	int	i;
3464 
3465 	ASSERT(un != NULL);
3466 
3467 	/* Obtain the configuration list associated with the .conf file */
3468 	if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), DDI_PROP_DONTPASS,
3469 	    sd_config_list, (caddr_t)&config_list, &config_list_len)
3470 	    != DDI_PROP_SUCCESS) {
3471 		return (SD_FAILURE);
3472 	}
3473 
3474 	/*
3475 	 * Compare vids in each duplet to the inquiry vid - if a match is
3476 	 * made, get the data value and update the soft state structure
3477 	 * accordingly.
3478 	 *
3479 	 * Note: This algorithm is complex and difficult to maintain. It should
3480 	 * be replaced with a more robust implementation.
3481 	 */
3482 	for (len = config_list_len, vidptr = config_list; len > 0;
3483 	    vidptr += dupletlen, len -= dupletlen) {
3484 		/*
3485 		 * Note: The assumption here is that each vid entry is on
3486 		 * a unique line from its associated duplet.
3487 		 */
3488 		vidlen = dupletlen = (int)strlen(vidptr);
3489 		if ((vidlen == 0) ||
3490 		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3491 			dupletlen++;
3492 			continue;
3493 		}
3494 
3495 		/*
3496 		 * dnlist contains 1 or more blank separated
3497 		 * data-property-name entries
3498 		 */
3499 		dnlist_ptr = vidptr + vidlen + 1;
3500 		dnlist_len = (int)strlen(dnlist_ptr);
3501 		dupletlen += dnlist_len + 2;
3502 
3503 		/*
3504 		 * Set a pointer for the first data-property-name
3505 		 * entry in the list
3506 		 */
3507 		dataname_ptr = dnlist_ptr;
3508 		dataname_len = 0;
3509 
3510 		/*
3511 		 * Loop through all data-property-name entries in the
3512 		 * data-property-name-list setting the properties for each.
3513 		 */
3514 		while (dataname_len < dnlist_len) {
3515 			int version;
3516 
3517 			/*
3518 			 * Determine the length of the current
3519 			 * data-property-name entry by indexing until a
3520 			 * blank or NULL is encountered. When the space is
3521 			 * encountered reset it to a NULL for compliance
3522 			 * with ddi_getlongprop().
3523 			 */
3524 			for (i = 0; ((dataname_ptr[i] != ' ') &&
3525 			    (dataname_ptr[i] != '\0')); i++) {
3526 				;
3527 			}
3528 
3529 			dataname_len += i;
3530 			/* If not null terminated, Make it so */
3531 			if (dataname_ptr[i] == ' ') {
3532 				dataname_ptr[i] = '\0';
3533 			}
3534 			dataname_len++;
3535 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3536 			    "sd_process_sdconf_file: disk:%s, data:%s\n",
3537 			    vidptr, dataname_ptr);
3538 
3539 			/* Get the data list */
3540 			if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), 0,
3541 			    dataname_ptr, (caddr_t)&data_list, &data_list_len)
3542 			    != DDI_PROP_SUCCESS) {
3543 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3544 				    "sd_process_sdconf_file: data property (%s)"
3545 				    " has no value\n", dataname_ptr);
3546 				dataname_ptr = dnlist_ptr + dataname_len;
3547 				continue;
3548 			}
3549 
3550 			version = data_list[0];
3551 
3552 			if (version == SD_CONF_VERSION_1) {
3553 				sd_tunables values;
3554 
3555 				/* Set the properties */
3556 				if (sd_chk_vers1_data(un, data_list[1],
3557 				    &data_list[2], data_list_len, dataname_ptr)
3558 				    == SD_SUCCESS) {
3559 					sd_get_tunables_from_conf(un,
3560 					    data_list[1], &data_list[2],
3561 					    &values);
3562 					sd_set_vers1_properties(un,
3563 					    data_list[1], &values);
3564 					rval = SD_SUCCESS;
3565 				} else {
3566 					rval = SD_FAILURE;
3567 				}
3568 			} else {
3569 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3570 				    "data property %s version 0x%x is invalid.",
3571 				    dataname_ptr, version);
3572 				rval = SD_FAILURE;
3573 			}
3574 			kmem_free(data_list, data_list_len);
3575 			dataname_ptr = dnlist_ptr + dataname_len;
3576 		}
3577 	}
3578 
3579 	/* free up the memory allocated by ddi_getlongprop */
3580 	if (config_list) {
3581 		kmem_free(config_list, config_list_len);
3582 	}
3583 
3584 	return (rval);
3585 }
3586 
3587 /*
3588  *    Function: sd_get_tunables_from_conf()
3589  *
3590  *
3591  *    This function reads the data list from the sd.conf file and pulls
3592  *    the values that can have numeric values as arguments and places
3593  *    the values in the apropriate sd_tunables member.
3594  *    Since the order of the data list members varies across platforms
3595  *    This function reads them from the data list in a platform specific
3596  *    order and places them into the correct sd_tunable member that is
3597  *    a consistant across all platforms.
3598  */
3599 static void
3600 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3601     sd_tunables *values)
3602 {
3603 	int i;
3604 	int mask;
3605 
3606 	bzero(values, sizeof (sd_tunables));
3607 
3608 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3609 
3610 		mask = 1 << i;
3611 		if (mask > flags) {
3612 			break;
3613 		}
3614 
3615 		switch (mask & flags) {
3616 		case 0:	/* This mask bit not set in flags */
3617 			continue;
3618 		case SD_CONF_BSET_THROTTLE:
3619 			values->sdt_throttle = data_list[i];
3620 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3621 			    "sd_get_tunables_from_conf: throttle = %d\n",
3622 			    values->sdt_throttle);
3623 			break;
3624 		case SD_CONF_BSET_CTYPE:
3625 			values->sdt_ctype = data_list[i];
3626 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3627 			    "sd_get_tunables_from_conf: ctype = %d\n",
3628 			    values->sdt_ctype);
3629 			break;
3630 		case SD_CONF_BSET_NRR_COUNT:
3631 			values->sdt_not_rdy_retries = data_list[i];
3632 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3633 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
3634 			    values->sdt_not_rdy_retries);
3635 			break;
3636 		case SD_CONF_BSET_BSY_RETRY_COUNT:
3637 			values->sdt_busy_retries = data_list[i];
3638 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3639 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
3640 			    values->sdt_busy_retries);
3641 			break;
3642 		case SD_CONF_BSET_RST_RETRIES:
3643 			values->sdt_reset_retries = data_list[i];
3644 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3645 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
3646 			    values->sdt_reset_retries);
3647 			break;
3648 		case SD_CONF_BSET_RSV_REL_TIME:
3649 			values->sdt_reserv_rel_time = data_list[i];
3650 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3651 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
3652 			    values->sdt_reserv_rel_time);
3653 			break;
3654 		case SD_CONF_BSET_MIN_THROTTLE:
3655 			values->sdt_min_throttle = data_list[i];
3656 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3657 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
3658 			    values->sdt_min_throttle);
3659 			break;
3660 		case SD_CONF_BSET_DISKSORT_DISABLED:
3661 			values->sdt_disk_sort_dis = data_list[i];
3662 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3663 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
3664 			    values->sdt_disk_sort_dis);
3665 			break;
3666 		case SD_CONF_BSET_LUN_RESET_ENABLED:
3667 			values->sdt_lun_reset_enable = data_list[i];
3668 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3669 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
3670 			    "\n", values->sdt_lun_reset_enable);
3671 			break;
3672 		}
3673 	}
3674 }
3675 
3676 /*
3677  *    Function: sd_process_sdconf_table
3678  *
3679  * Description: Search the static configuration table for a match on the
3680  *		inquiry vid/pid and update the driver soft state structure
3681  *		according to the table property values for the device.
3682  *
3683  *		The form of a configuration table entry is:
3684  *		  <vid+pid>,<flags>,<property-data>
3685  *		  "SEAGATE ST42400N",1,63,0,0			(Fibre)
3686  *		  "SEAGATE ST42400N",1,63,0,0,0,0		(Sparc)
3687  *		  "SEAGATE ST42400N",1,63,0,0,0,0,0,0,0,0,0,0	(Intel)
3688  *
3689  *   Arguments: un - driver soft state (unit) structure
3690  */
3691 
3692 static void
3693 sd_process_sdconf_table(struct sd_lun *un)
3694 {
3695 	char	*id = NULL;
3696 	int	table_index;
3697 	int	idlen;
3698 
3699 	ASSERT(un != NULL);
3700 	for (table_index = 0; table_index < sd_disk_table_size;
3701 	    table_index++) {
3702 		id = sd_disk_table[table_index].device_id;
3703 		idlen = strlen(id);
3704 		if (idlen == 0) {
3705 			continue;
3706 		}
3707 
3708 		/*
3709 		 * The static configuration table currently does not
3710 		 * implement version 10 properties. Additionally,
3711 		 * multiple data-property-name entries are not
3712 		 * implemented in the static configuration table.
3713 		 */
3714 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
3715 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3716 			    "sd_process_sdconf_table: disk %s\n", id);
3717 			sd_set_vers1_properties(un,
3718 			    sd_disk_table[table_index].flags,
3719 			    sd_disk_table[table_index].properties);
3720 			break;
3721 		}
3722 	}
3723 }
3724 
3725 
3726 /*
3727  *    Function: sd_sdconf_id_match
3728  *
3729  * Description: This local function implements a case sensitive vid/pid
3730  *		comparison as well as the boundary cases of wild card and
3731  *		multiple blanks.
3732  *
3733  *		Note: An implicit assumption made here is that the scsi
3734  *		inquiry structure will always keep the vid, pid and
3735  *		revision strings in consecutive sequence, so they can be
3736  *		read as a single string. If this assumption is not the
3737  *		case, a separate string, to be used for the check, needs
3738  *		to be built with these strings concatenated.
3739  *
3740  *   Arguments: un - driver soft state (unit) structure
3741  *		id - table or config file vid/pid
3742  *		idlen  - length of the vid/pid (bytes)
3743  *
3744  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3745  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3746  */
3747 
3748 static int
3749 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
3750 {
3751 	struct scsi_inquiry	*sd_inq;
3752 	int 			rval = SD_SUCCESS;
3753 
3754 	ASSERT(un != NULL);
3755 	sd_inq = un->un_sd->sd_inq;
3756 	ASSERT(id != NULL);
3757 
3758 	/*
3759 	 * We use the inq_vid as a pointer to a buffer containing the
3760 	 * vid and pid and use the entire vid/pid length of the table
3761 	 * entry for the comparison. This works because the inq_pid
3762 	 * data member follows inq_vid in the scsi_inquiry structure.
3763 	 */
3764 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
3765 		/*
3766 		 * The user id string is compared to the inquiry vid/pid
3767 		 * using a case insensitive comparison and ignoring
3768 		 * multiple spaces.
3769 		 */
3770 		rval = sd_blank_cmp(un, id, idlen);
3771 		if (rval != SD_SUCCESS) {
3772 			/*
3773 			 * User id strings that start and end with a "*"
3774 			 * are a special case. These do not have a
3775 			 * specific vendor, and the product string can
3776 			 * appear anywhere in the 16 byte PID portion of
3777 			 * the inquiry data. This is a simple strstr()
3778 			 * type search for the user id in the inquiry data.
3779 			 */
3780 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
3781 				char	*pidptr = &id[1];
3782 				int	i;
3783 				int	j;
3784 				int	pidstrlen = idlen - 2;
3785 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
3786 				    pidstrlen;
3787 
3788 				if (j < 0) {
3789 					return (SD_FAILURE);
3790 				}
3791 				for (i = 0; i < j; i++) {
3792 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
3793 					    pidptr, pidstrlen) == 0) {
3794 						rval = SD_SUCCESS;
3795 						break;
3796 					}
3797 				}
3798 			}
3799 		}
3800 	}
3801 	return (rval);
3802 }
3803 
3804 
3805 /*
3806  *    Function: sd_blank_cmp
3807  *
3808  * Description: If the id string starts and ends with a space, treat
3809  *		multiple consecutive spaces as equivalent to a single
3810  *		space. For example, this causes a sd_disk_table entry
3811  *		of " NEC CDROM " to match a device's id string of
3812  *		"NEC       CDROM".
3813  *
3814  *		Note: The success exit condition for this routine is if
3815  *		the pointer to the table entry is '\0' and the cnt of
3816  *		the inquiry length is zero. This will happen if the inquiry
3817  *		string returned by the device is padded with spaces to be
3818  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
3819  *		SCSI spec states that the inquiry string is to be padded with
3820  *		spaces.
3821  *
3822  *   Arguments: un - driver soft state (unit) structure
3823  *		id - table or config file vid/pid
3824  *		idlen  - length of the vid/pid (bytes)
3825  *
3826  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3827  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3828  */
3829 
3830 static int
3831 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
3832 {
3833 	char		*p1;
3834 	char		*p2;
3835 	int		cnt;
3836 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
3837 	    sizeof (SD_INQUIRY(un)->inq_pid);
3838 
3839 	ASSERT(un != NULL);
3840 	p2 = un->un_sd->sd_inq->inq_vid;
3841 	ASSERT(id != NULL);
3842 	p1 = id;
3843 
3844 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
3845 		/*
3846 		 * Note: string p1 is terminated by a NUL but string p2
3847 		 * isn't.  The end of p2 is determined by cnt.
3848 		 */
3849 		for (;;) {
3850 			/* skip over any extra blanks in both strings */
3851 			while ((*p1 != '\0') && (*p1 == ' ')) {
3852 				p1++;
3853 			}
3854 			while ((cnt != 0) && (*p2 == ' ')) {
3855 				p2++;
3856 				cnt--;
3857 			}
3858 
3859 			/* compare the two strings */
3860 			if ((cnt == 0) ||
3861 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
3862 				break;
3863 			}
3864 			while ((cnt > 0) &&
3865 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
3866 				p1++;
3867 				p2++;
3868 				cnt--;
3869 			}
3870 		}
3871 	}
3872 
3873 	/* return SD_SUCCESS if both strings match */
3874 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
3875 }
3876 
3877 
3878 /*
3879  *    Function: sd_chk_vers1_data
3880  *
3881  * Description: Verify the version 1 device properties provided by the
3882  *		user via the configuration file
3883  *
3884  *   Arguments: un	     - driver soft state (unit) structure
3885  *		flags	     - integer mask indicating properties to be set
3886  *		prop_list    - integer list of property values
3887  *		list_len     - length of user provided data
3888  *
3889  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
3890  *		SD_FAILURE - Indicates the user provided data is invalid
3891  */
3892 
3893 static int
3894 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
3895     int list_len, char *dataname_ptr)
3896 {
3897 	int i;
3898 	int mask = 1;
3899 	int index = 0;
3900 
3901 	ASSERT(un != NULL);
3902 
3903 	/* Check for a NULL property name and list */
3904 	if (dataname_ptr == NULL) {
3905 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3906 		    "sd_chk_vers1_data: NULL data property name.");
3907 		return (SD_FAILURE);
3908 	}
3909 	if (prop_list == NULL) {
3910 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3911 		    "sd_chk_vers1_data: %s NULL data property list.",
3912 		    dataname_ptr);
3913 		return (SD_FAILURE);
3914 	}
3915 
3916 	/* Display a warning if undefined bits are set in the flags */
3917 	if (flags & ~SD_CONF_BIT_MASK) {
3918 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3919 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
3920 		    "Properties not set.",
3921 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
3922 		return (SD_FAILURE);
3923 	}
3924 
3925 	/*
3926 	 * Verify the length of the list by identifying the highest bit set
3927 	 * in the flags and validating that the property list has a length
3928 	 * up to the index of this bit.
3929 	 */
3930 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3931 		if (flags & mask) {
3932 			index++;
3933 		}
3934 		mask = 1 << i;
3935 	}
3936 	if ((list_len / sizeof (int)) < (index + 2)) {
3937 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3938 		    "sd_chk_vers1_data: "
3939 		    "Data property list %s size is incorrect. "
3940 		    "Properties not set.", dataname_ptr);
3941 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
3942 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
3943 		return (SD_FAILURE);
3944 	}
3945 	return (SD_SUCCESS);
3946 }
3947 
3948 
3949 /*
3950  *    Function: sd_set_vers1_properties
3951  *
3952  * Description: Set version 1 device properties based on a property list
3953  *		retrieved from the driver configuration file or static
3954  *		configuration table. Version 1 properties have the format:
3955  *
3956  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3957  *
3958  *		where the prop0 value will be used to set prop0 if bit0
3959  *		is set in the flags
3960  *
3961  *   Arguments: un	     - driver soft state (unit) structure
3962  *		flags	     - integer mask indicating properties to be set
3963  *		prop_list    - integer list of property values
3964  */
3965 
3966 static void
3967 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
3968 {
3969 	ASSERT(un != NULL);
3970 
3971 	/*
3972 	 * Set the flag to indicate cache is to be disabled. An attempt
3973 	 * to disable the cache via sd_cache_control() will be made
3974 	 * later during attach once the basic initialization is complete.
3975 	 */
3976 	if (flags & SD_CONF_BSET_NOCACHE) {
3977 		un->un_f_opt_disable_cache = TRUE;
3978 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3979 		    "sd_set_vers1_properties: caching disabled flag set\n");
3980 	}
3981 
3982 	/* CD-specific configuration parameters */
3983 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
3984 		un->un_f_cfg_playmsf_bcd = TRUE;
3985 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3986 		    "sd_set_vers1_properties: playmsf_bcd set\n");
3987 	}
3988 	if (flags & SD_CONF_BSET_READSUB_BCD) {
3989 		un->un_f_cfg_readsub_bcd = TRUE;
3990 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3991 		    "sd_set_vers1_properties: readsub_bcd set\n");
3992 	}
3993 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
3994 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
3995 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3996 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
3997 	}
3998 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
3999 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
4000 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4001 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
4002 	}
4003 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
4004 		un->un_f_cfg_no_read_header = TRUE;
4005 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4006 			    "sd_set_vers1_properties: no_read_header set\n");
4007 	}
4008 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
4009 		un->un_f_cfg_read_cd_xd4 = TRUE;
4010 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4011 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
4012 	}
4013 
4014 	/* Support for devices which do not have valid/unique serial numbers */
4015 	if (flags & SD_CONF_BSET_FAB_DEVID) {
4016 		un->un_f_opt_fab_devid = TRUE;
4017 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4018 		    "sd_set_vers1_properties: fab_devid bit set\n");
4019 	}
4020 
4021 	/* Support for user throttle configuration */
4022 	if (flags & SD_CONF_BSET_THROTTLE) {
4023 		ASSERT(prop_list != NULL);
4024 		un->un_saved_throttle = un->un_throttle =
4025 		    prop_list->sdt_throttle;
4026 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4027 		    "sd_set_vers1_properties: throttle set to %d\n",
4028 		    prop_list->sdt_throttle);
4029 	}
4030 
4031 	/* Set the per disk retry count according to the conf file or table. */
4032 	if (flags & SD_CONF_BSET_NRR_COUNT) {
4033 		ASSERT(prop_list != NULL);
4034 		if (prop_list->sdt_not_rdy_retries) {
4035 			un->un_notready_retry_count =
4036 				prop_list->sdt_not_rdy_retries;
4037 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4038 			    "sd_set_vers1_properties: not ready retry count"
4039 			    " set to %d\n", un->un_notready_retry_count);
4040 		}
4041 	}
4042 
4043 	/* The controller type is reported for generic disk driver ioctls */
4044 	if (flags & SD_CONF_BSET_CTYPE) {
4045 		ASSERT(prop_list != NULL);
4046 		switch (prop_list->sdt_ctype) {
4047 		case CTYPE_CDROM:
4048 			un->un_ctype = prop_list->sdt_ctype;
4049 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4050 			    "sd_set_vers1_properties: ctype set to "
4051 			    "CTYPE_CDROM\n");
4052 			break;
4053 		case CTYPE_CCS:
4054 			un->un_ctype = prop_list->sdt_ctype;
4055 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4056 				"sd_set_vers1_properties: ctype set to "
4057 				"CTYPE_CCS\n");
4058 			break;
4059 		case CTYPE_ROD:		/* RW optical */
4060 			un->un_ctype = prop_list->sdt_ctype;
4061 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4062 			    "sd_set_vers1_properties: ctype set to "
4063 			    "CTYPE_ROD\n");
4064 			break;
4065 		default:
4066 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4067 			    "sd_set_vers1_properties: Could not set "
4068 			    "invalid ctype value (%d)",
4069 			    prop_list->sdt_ctype);
4070 		}
4071 	}
4072 
4073 	/* Purple failover timeout */
4074 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4075 		ASSERT(prop_list != NULL);
4076 		un->un_busy_retry_count =
4077 			prop_list->sdt_busy_retries;
4078 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4079 		    "sd_set_vers1_properties: "
4080 		    "busy retry count set to %d\n",
4081 		    un->un_busy_retry_count);
4082 	}
4083 
4084 	/* Purple reset retry count */
4085 	if (flags & SD_CONF_BSET_RST_RETRIES) {
4086 		ASSERT(prop_list != NULL);
4087 		un->un_reset_retry_count =
4088 			prop_list->sdt_reset_retries;
4089 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4090 		    "sd_set_vers1_properties: "
4091 		    "reset retry count set to %d\n",
4092 		    un->un_reset_retry_count);
4093 	}
4094 
4095 	/* Purple reservation release timeout */
4096 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4097 		ASSERT(prop_list != NULL);
4098 		un->un_reserve_release_time =
4099 			prop_list->sdt_reserv_rel_time;
4100 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4101 		    "sd_set_vers1_properties: "
4102 		    "reservation release timeout set to %d\n",
4103 		    un->un_reserve_release_time);
4104 	}
4105 
4106 	/*
4107 	 * Driver flag telling the driver to verify that no commands are pending
4108 	 * for a device before issuing a Test Unit Ready. This is a workaround
4109 	 * for a firmware bug in some Seagate eliteI drives.
4110 	 */
4111 	if (flags & SD_CONF_BSET_TUR_CHECK) {
4112 		un->un_f_cfg_tur_check = TRUE;
4113 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4114 		    "sd_set_vers1_properties: tur queue check set\n");
4115 	}
4116 
4117 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4118 		un->un_min_throttle = prop_list->sdt_min_throttle;
4119 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4120 		    "sd_set_vers1_properties: min throttle set to %d\n",
4121 		    un->un_min_throttle);
4122 	}
4123 
4124 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4125 		un->un_f_disksort_disabled =
4126 		    (prop_list->sdt_disk_sort_dis != 0) ?
4127 		    TRUE : FALSE;
4128 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4129 		    "sd_set_vers1_properties: disksort disabled "
4130 		    "flag set to %d\n",
4131 		    prop_list->sdt_disk_sort_dis);
4132 	}
4133 
4134 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4135 		un->un_f_lun_reset_enabled =
4136 		    (prop_list->sdt_lun_reset_enable != 0) ?
4137 		    TRUE : FALSE;
4138 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4139 		    "sd_set_vers1_properties: lun reset enabled "
4140 		    "flag set to %d\n",
4141 		    prop_list->sdt_lun_reset_enable);
4142 	}
4143 
4144 	/*
4145 	 * Validate the throttle values.
4146 	 * If any of the numbers are invalid, set everything to defaults.
4147 	 */
4148 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4149 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4150 	    (un->un_min_throttle > un->un_throttle)) {
4151 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4152 		un->un_min_throttle = sd_min_throttle;
4153 	}
4154 }
4155 
4156 /*
4157  *   Function: sd_is_lsi()
4158  *
4159  *   Description: Check for lsi devices, step throught the static device
4160  *	table to match vid/pid.
4161  *
4162  *   Args: un - ptr to sd_lun
4163  *
4164  *   Notes:  When creating new LSI property, need to add the new LSI property
4165  *		to this function.
4166  */
4167 static void
4168 sd_is_lsi(struct sd_lun *un)
4169 {
4170 	char	*id = NULL;
4171 	int	table_index;
4172 	int	idlen;
4173 	void	*prop;
4174 
4175 	ASSERT(un != NULL);
4176 	for (table_index = 0; table_index < sd_disk_table_size;
4177 	    table_index++) {
4178 		id = sd_disk_table[table_index].device_id;
4179 		idlen = strlen(id);
4180 		if (idlen == 0) {
4181 			continue;
4182 		}
4183 
4184 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4185 			prop = sd_disk_table[table_index].properties;
4186 			if (prop == &lsi_properties ||
4187 			    prop == &lsi_oem_properties ||
4188 			    prop == &lsi_properties_scsi ||
4189 			    prop == &symbios_properties) {
4190 				un->un_f_cfg_is_lsi = TRUE;
4191 			}
4192 			break;
4193 		}
4194 	}
4195 }
4196 
4197 /*
4198  *    Function: sd_get_physical_geometry
4199  *
4200  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4201  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4202  *		target, and use this information to initialize the physical
4203  *		geometry cache specified by pgeom_p.
4204  *
4205  *		MODE SENSE is an optional command, so failure in this case
4206  *		does not necessarily denote an error. We want to use the
4207  *		MODE SENSE commands to derive the physical geometry of the
4208  *		device, but if either command fails, the logical geometry is
4209  *		used as the fallback for disk label geometry in cmlb.
4210  *
4211  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4212  *		have already been initialized for the current target and
4213  *		that the current values be passed as args so that we don't
4214  *		end up ever trying to use -1 as a valid value. This could
4215  *		happen if either value is reset while we're not holding
4216  *		the mutex.
4217  *
4218  *   Arguments: un - driver soft state (unit) structure
4219  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4220  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4221  *			to use the USCSI "direct" chain and bypass the normal
4222  *			command waitq.
4223  *
4224  *     Context: Kernel thread only (can sleep).
4225  */
4226 
4227 static int
4228 sd_get_physical_geometry(struct sd_lun *un, cmlb_geom_t *pgeom_p,
4229 	diskaddr_t capacity, int lbasize, int path_flag)
4230 {
4231 	struct	mode_format	*page3p;
4232 	struct	mode_geometry	*page4p;
4233 	struct	mode_header	*headerp;
4234 	int	sector_size;
4235 	int	nsect;
4236 	int	nhead;
4237 	int	ncyl;
4238 	int	intrlv;
4239 	int	spc;
4240 	diskaddr_t	modesense_capacity;
4241 	int	rpm;
4242 	int	bd_len;
4243 	int	mode_header_length;
4244 	uchar_t	*p3bufp;
4245 	uchar_t	*p4bufp;
4246 	int	cdbsize;
4247 	int 	ret = EIO;
4248 
4249 	ASSERT(un != NULL);
4250 
4251 	if (lbasize == 0) {
4252 		if (ISCD(un)) {
4253 			lbasize = 2048;
4254 		} else {
4255 			lbasize = un->un_sys_blocksize;
4256 		}
4257 	}
4258 	pgeom_p->g_secsize = (unsigned short)lbasize;
4259 
4260 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4261 
4262 	/*
4263 	 * Retrieve MODE SENSE page 3 - Format Device Page
4264 	 */
4265 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4266 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p3bufp,
4267 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag)
4268 	    != 0) {
4269 		SD_ERROR(SD_LOG_COMMON, un,
4270 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4271 		goto page3_exit;
4272 	}
4273 
4274 	/*
4275 	 * Determine size of Block Descriptors in order to locate the mode
4276 	 * page data.  ATAPI devices return 0, SCSI devices should return
4277 	 * MODE_BLK_DESC_LENGTH.
4278 	 */
4279 	headerp = (struct mode_header *)p3bufp;
4280 	if (un->un_f_cfg_is_atapi == TRUE) {
4281 		struct mode_header_grp2 *mhp =
4282 		    (struct mode_header_grp2 *)headerp;
4283 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4284 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4285 	} else {
4286 		mode_header_length = MODE_HEADER_LENGTH;
4287 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4288 	}
4289 
4290 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4291 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4292 		    "received unexpected bd_len of %d, page3\n", bd_len);
4293 		goto page3_exit;
4294 	}
4295 
4296 	page3p = (struct mode_format *)
4297 	    ((caddr_t)headerp + mode_header_length + bd_len);
4298 
4299 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4300 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4301 		    "mode sense pg3 code mismatch %d\n",
4302 		    page3p->mode_page.code);
4303 		goto page3_exit;
4304 	}
4305 
4306 	/*
4307 	 * Use this physical geometry data only if BOTH MODE SENSE commands
4308 	 * complete successfully; otherwise, revert to the logical geometry.
4309 	 * So, we need to save everything in temporary variables.
4310 	 */
4311 	sector_size = BE_16(page3p->data_bytes_sect);
4312 
4313 	/*
4314 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
4315 	 */
4316 	if (sector_size == 0) {
4317 		sector_size = (ISCD(un)) ? 2048 : un->un_sys_blocksize;
4318 	} else {
4319 		sector_size &= ~(un->un_sys_blocksize - 1);
4320 	}
4321 
4322 	nsect  = BE_16(page3p->sect_track);
4323 	intrlv = BE_16(page3p->interleave);
4324 
4325 	SD_INFO(SD_LOG_COMMON, un,
4326 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
4327 	SD_INFO(SD_LOG_COMMON, un,
4328 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
4329 	    page3p->mode_page.code, nsect, sector_size);
4330 	SD_INFO(SD_LOG_COMMON, un,
4331 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
4332 	    BE_16(page3p->track_skew),
4333 	    BE_16(page3p->cylinder_skew));
4334 
4335 
4336 	/*
4337 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
4338 	 */
4339 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
4340 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p4bufp,
4341 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag)
4342 	    != 0) {
4343 		SD_ERROR(SD_LOG_COMMON, un,
4344 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
4345 		goto page4_exit;
4346 	}
4347 
4348 	/*
4349 	 * Determine size of Block Descriptors in order to locate the mode
4350 	 * page data.  ATAPI devices return 0, SCSI devices should return
4351 	 * MODE_BLK_DESC_LENGTH.
4352 	 */
4353 	headerp = (struct mode_header *)p4bufp;
4354 	if (un->un_f_cfg_is_atapi == TRUE) {
4355 		struct mode_header_grp2 *mhp =
4356 		    (struct mode_header_grp2 *)headerp;
4357 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4358 	} else {
4359 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4360 	}
4361 
4362 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4363 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4364 		    "received unexpected bd_len of %d, page4\n", bd_len);
4365 		goto page4_exit;
4366 	}
4367 
4368 	page4p = (struct mode_geometry *)
4369 	    ((caddr_t)headerp + mode_header_length + bd_len);
4370 
4371 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
4372 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4373 		    "mode sense pg4 code mismatch %d\n",
4374 		    page4p->mode_page.code);
4375 		goto page4_exit;
4376 	}
4377 
4378 	/*
4379 	 * Stash the data now, after we know that both commands completed.
4380 	 */
4381 
4382 
4383 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
4384 	spc   = nhead * nsect;
4385 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
4386 	rpm   = BE_16(page4p->rpm);
4387 
4388 	modesense_capacity = spc * ncyl;
4389 
4390 	SD_INFO(SD_LOG_COMMON, un,
4391 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
4392 	SD_INFO(SD_LOG_COMMON, un,
4393 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
4394 	SD_INFO(SD_LOG_COMMON, un,
4395 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
4396 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
4397 	    (void *)pgeom_p, capacity);
4398 
4399 	/*
4400 	 * Compensate if the drive's geometry is not rectangular, i.e.,
4401 	 * the product of C * H * S returned by MODE SENSE >= that returned
4402 	 * by read capacity. This is an idiosyncrasy of the original x86
4403 	 * disk subsystem.
4404 	 */
4405 	if (modesense_capacity >= capacity) {
4406 		SD_INFO(SD_LOG_COMMON, un,
4407 		    "sd_get_physical_geometry: adjusting acyl; "
4408 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
4409 		    (modesense_capacity - capacity + spc - 1) / spc);
4410 		if (sector_size != 0) {
4411 			/* 1243403: NEC D38x7 drives don't support sec size */
4412 			pgeom_p->g_secsize = (unsigned short)sector_size;
4413 		}
4414 		pgeom_p->g_nsect    = (unsigned short)nsect;
4415 		pgeom_p->g_nhead    = (unsigned short)nhead;
4416 		pgeom_p->g_capacity = capacity;
4417 		pgeom_p->g_acyl	    =
4418 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
4419 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
4420 	}
4421 
4422 	pgeom_p->g_rpm    = (unsigned short)rpm;
4423 	pgeom_p->g_intrlv = (unsigned short)intrlv;
4424 	ret = 0;
4425 
4426 	SD_INFO(SD_LOG_COMMON, un,
4427 	    "sd_get_physical_geometry: mode sense geometry:\n");
4428 	SD_INFO(SD_LOG_COMMON, un,
4429 	    "   nsect: %d; sector size: %d; interlv: %d\n",
4430 	    nsect, sector_size, intrlv);
4431 	SD_INFO(SD_LOG_COMMON, un,
4432 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
4433 	    nhead, ncyl, rpm, modesense_capacity);
4434 	SD_INFO(SD_LOG_COMMON, un,
4435 	    "sd_get_physical_geometry: (cached)\n");
4436 	SD_INFO(SD_LOG_COMMON, un,
4437 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4438 	    pgeom_p->g_ncyl,  pgeom_p->g_acyl,
4439 	    pgeom_p->g_nhead, pgeom_p->g_nsect);
4440 	SD_INFO(SD_LOG_COMMON, un,
4441 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
4442 	    pgeom_p->g_secsize, pgeom_p->g_capacity,
4443 	    pgeom_p->g_intrlv, pgeom_p->g_rpm);
4444 
4445 page4_exit:
4446 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
4447 page3_exit:
4448 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
4449 
4450 	return (ret);
4451 }
4452 
4453 /*
4454  *    Function: sd_get_virtual_geometry
4455  *
4456  * Description: Ask the controller to tell us about the target device.
4457  *
4458  *   Arguments: un - pointer to softstate
4459  *		capacity - disk capacity in #blocks
4460  *		lbasize - disk block size in bytes
4461  *
4462  *     Context: Kernel thread only
4463  */
4464 
4465 static int
4466 sd_get_virtual_geometry(struct sd_lun *un, cmlb_geom_t *lgeom_p,
4467     diskaddr_t capacity, int lbasize)
4468 {
4469 	uint_t	geombuf;
4470 	int	spc;
4471 
4472 	ASSERT(un != NULL);
4473 
4474 	/* Set sector size, and total number of sectors */
4475 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
4476 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
4477 
4478 	/* Let the HBA tell us its geometry */
4479 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
4480 
4481 	/* A value of -1 indicates an undefined "geometry" property */
4482 	if (geombuf == (-1)) {
4483 		return (EINVAL);
4484 	}
4485 
4486 	/* Initialize the logical geometry cache. */
4487 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
4488 	lgeom_p->g_nsect   = geombuf & 0xffff;
4489 	lgeom_p->g_secsize = un->un_sys_blocksize;
4490 
4491 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
4492 
4493 	/*
4494 	 * Note: The driver originally converted the capacity value from
4495 	 * target blocks to system blocks. However, the capacity value passed
4496 	 * to this routine is already in terms of system blocks (this scaling
4497 	 * is done when the READ CAPACITY command is issued and processed).
4498 	 * This 'error' may have gone undetected because the usage of g_ncyl
4499 	 * (which is based upon g_capacity) is very limited within the driver
4500 	 */
4501 	lgeom_p->g_capacity = capacity;
4502 
4503 	/*
4504 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
4505 	 * hba may return zero values if the device has been removed.
4506 	 */
4507 	if (spc == 0) {
4508 		lgeom_p->g_ncyl = 0;
4509 	} else {
4510 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
4511 	}
4512 	lgeom_p->g_acyl = 0;
4513 
4514 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
4515 	return (0);
4516 
4517 }
4518 /*
4519  *    Function: sd_update_block_info
4520  *
4521  * Description: Calculate a byte count to sector count bitshift value
4522  *		from sector size.
4523  *
4524  *   Arguments: un: unit struct.
4525  *		lbasize: new target sector size
4526  *		capacity: new target capacity, ie. block count
4527  *
4528  *     Context: Kernel thread context
4529  */
4530 
4531 static void
4532 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
4533 {
4534 	if (lbasize != 0) {
4535 		un->un_tgt_blocksize = lbasize;
4536 		un->un_f_tgt_blocksize_is_valid	= TRUE;
4537 	}
4538 
4539 	if (capacity != 0) {
4540 		un->un_blockcount		= capacity;
4541 		un->un_f_blockcount_is_valid	= TRUE;
4542 	}
4543 }
4544 
4545 
4546 /*
4547  *    Function: sd_register_devid
4548  *
4549  * Description: This routine will obtain the device id information from the
4550  *		target, obtain the serial number, and register the device
4551  *		id with the ddi framework.
4552  *
4553  *   Arguments: devi - the system's dev_info_t for the device.
4554  *		un - driver soft state (unit) structure
4555  *		reservation_flag - indicates if a reservation conflict
4556  *		occurred during attach
4557  *
4558  *     Context: Kernel Thread
4559  */
4560 static void
4561 sd_register_devid(struct sd_lun *un, dev_info_t *devi, int reservation_flag)
4562 {
4563 	int		rval		= 0;
4564 	uchar_t		*inq80		= NULL;
4565 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
4566 	size_t		inq80_resid	= 0;
4567 	uchar_t		*inq83		= NULL;
4568 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
4569 	size_t		inq83_resid	= 0;
4570 
4571 	ASSERT(un != NULL);
4572 	ASSERT(mutex_owned(SD_MUTEX(un)));
4573 	ASSERT((SD_DEVINFO(un)) == devi);
4574 
4575 	/*
4576 	 * This is the case of antiquated Sun disk drives that have the
4577 	 * FAB_DEVID property set in the disk_table.  These drives
4578 	 * manage the devid's by storing them in last 2 available sectors
4579 	 * on the drive and have them fabricated by the ddi layer by calling
4580 	 * ddi_devid_init and passing the DEVID_FAB flag.
4581 	 */
4582 	if (un->un_f_opt_fab_devid == TRUE) {
4583 		/*
4584 		 * Depending on EINVAL isn't reliable, since a reserved disk
4585 		 * may result in invalid geometry, so check to make sure a
4586 		 * reservation conflict did not occur during attach.
4587 		 */
4588 		if ((sd_get_devid(un) == EINVAL) &&
4589 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
4590 			/*
4591 			 * The devid is invalid AND there is no reservation
4592 			 * conflict.  Fabricate a new devid.
4593 			 */
4594 			(void) sd_create_devid(un);
4595 		}
4596 
4597 		/* Register the devid if it exists */
4598 		if (un->un_devid != NULL) {
4599 			(void) ddi_devid_register(SD_DEVINFO(un),
4600 			    un->un_devid);
4601 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4602 			    "sd_register_devid: Devid Fabricated\n");
4603 		}
4604 		return;
4605 	}
4606 
4607 	/*
4608 	 * We check the availibility of the World Wide Name (0x83) and Unit
4609 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
4610 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
4611 	 * 0x83 is availible, that is the best choice.  Our next choice is
4612 	 * 0x80.  If neither are availible, we munge the devid from the device
4613 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
4614 	 * to fabricate a devid for non-Sun qualified disks.
4615 	 */
4616 	if (sd_check_vpd_page_support(un) == 0) {
4617 		/* collect page 80 data if available */
4618 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
4619 
4620 			mutex_exit(SD_MUTEX(un));
4621 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
4622 			rval = sd_send_scsi_INQUIRY(un, inq80, inq80_len,
4623 			    0x01, 0x80, &inq80_resid);
4624 
4625 			if (rval != 0) {
4626 				kmem_free(inq80, inq80_len);
4627 				inq80 = NULL;
4628 				inq80_len = 0;
4629 			}
4630 			mutex_enter(SD_MUTEX(un));
4631 		}
4632 
4633 		/* collect page 83 data if available */
4634 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
4635 			mutex_exit(SD_MUTEX(un));
4636 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
4637 			rval = sd_send_scsi_INQUIRY(un, inq83, inq83_len,
4638 			    0x01, 0x83, &inq83_resid);
4639 
4640 			if (rval != 0) {
4641 				kmem_free(inq83, inq83_len);
4642 				inq83 = NULL;
4643 				inq83_len = 0;
4644 			}
4645 			mutex_enter(SD_MUTEX(un));
4646 		}
4647 	}
4648 
4649 	/* encode best devid possible based on data available */
4650 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
4651 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
4652 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
4653 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
4654 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
4655 
4656 		/* devid successfully encoded, register devid */
4657 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
4658 
4659 	} else {
4660 		/*
4661 		 * Unable to encode a devid based on data available.
4662 		 * This is not a Sun qualified disk.  Older Sun disk
4663 		 * drives that have the SD_FAB_DEVID property
4664 		 * set in the disk_table and non Sun qualified
4665 		 * disks are treated in the same manner.  These
4666 		 * drives manage the devid's by storing them in
4667 		 * last 2 available sectors on the drive and
4668 		 * have them fabricated by the ddi layer by
4669 		 * calling ddi_devid_init and passing the
4670 		 * DEVID_FAB flag.
4671 		 * Create a fabricate devid only if there's no
4672 		 * fabricate devid existed.
4673 		 */
4674 		if (sd_get_devid(un) == EINVAL) {
4675 			(void) sd_create_devid(un);
4676 		}
4677 		un->un_f_opt_fab_devid = TRUE;
4678 
4679 		/* Register the devid if it exists */
4680 		if (un->un_devid != NULL) {
4681 			(void) ddi_devid_register(SD_DEVINFO(un),
4682 			    un->un_devid);
4683 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4684 			    "sd_register_devid: devid fabricated using "
4685 			    "ddi framework\n");
4686 		}
4687 	}
4688 
4689 	/* clean up resources */
4690 	if (inq80 != NULL) {
4691 		kmem_free(inq80, inq80_len);
4692 	}
4693 	if (inq83 != NULL) {
4694 		kmem_free(inq83, inq83_len);
4695 	}
4696 }
4697 
4698 
4699 
4700 /*
4701  *    Function: sd_get_devid
4702  *
4703  * Description: This routine will return 0 if a valid device id has been
4704  *		obtained from the target and stored in the soft state. If a
4705  *		valid device id has not been previously read and stored, a
4706  *		read attempt will be made.
4707  *
4708  *   Arguments: un - driver soft state (unit) structure
4709  *
4710  * Return Code: 0 if we successfully get the device id
4711  *
4712  *     Context: Kernel Thread
4713  */
4714 
4715 static int
4716 sd_get_devid(struct sd_lun *un)
4717 {
4718 	struct dk_devid		*dkdevid;
4719 	ddi_devid_t		tmpid;
4720 	uint_t			*ip;
4721 	size_t			sz;
4722 	diskaddr_t		blk;
4723 	int			status;
4724 	int			chksum;
4725 	int			i;
4726 	size_t			buffer_size;
4727 
4728 	ASSERT(un != NULL);
4729 	ASSERT(mutex_owned(SD_MUTEX(un)));
4730 
4731 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
4732 	    un);
4733 
4734 	if (un->un_devid != NULL) {
4735 		return (0);
4736 	}
4737 
4738 	mutex_exit(SD_MUTEX(un));
4739 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
4740 	    (void *)SD_PATH_DIRECT) != 0) {
4741 		mutex_enter(SD_MUTEX(un));
4742 		return (EINVAL);
4743 	}
4744 
4745 	/*
4746 	 * Read and verify device id, stored in the reserved cylinders at the
4747 	 * end of the disk. Backup label is on the odd sectors of the last
4748 	 * track of the last cylinder. Device id will be on track of the next
4749 	 * to last cylinder.
4750 	 */
4751 	mutex_enter(SD_MUTEX(un));
4752 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
4753 	mutex_exit(SD_MUTEX(un));
4754 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
4755 	status = sd_send_scsi_READ(un, dkdevid, buffer_size, blk,
4756 	    SD_PATH_DIRECT);
4757 	if (status != 0) {
4758 		goto error;
4759 	}
4760 
4761 	/* Validate the revision */
4762 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
4763 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
4764 		status = EINVAL;
4765 		goto error;
4766 	}
4767 
4768 	/* Calculate the checksum */
4769 	chksum = 0;
4770 	ip = (uint_t *)dkdevid;
4771 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
4772 	    i++) {
4773 		chksum ^= ip[i];
4774 	}
4775 
4776 	/* Compare the checksums */
4777 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
4778 		status = EINVAL;
4779 		goto error;
4780 	}
4781 
4782 	/* Validate the device id */
4783 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
4784 		status = EINVAL;
4785 		goto error;
4786 	}
4787 
4788 	/*
4789 	 * Store the device id in the driver soft state
4790 	 */
4791 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
4792 	tmpid = kmem_alloc(sz, KM_SLEEP);
4793 
4794 	mutex_enter(SD_MUTEX(un));
4795 
4796 	un->un_devid = tmpid;
4797 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
4798 
4799 	kmem_free(dkdevid, buffer_size);
4800 
4801 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
4802 
4803 	return (status);
4804 error:
4805 	mutex_enter(SD_MUTEX(un));
4806 	kmem_free(dkdevid, buffer_size);
4807 	return (status);
4808 }
4809 
4810 
4811 /*
4812  *    Function: sd_create_devid
4813  *
4814  * Description: This routine will fabricate the device id and write it
4815  *		to the disk.
4816  *
4817  *   Arguments: un - driver soft state (unit) structure
4818  *
4819  * Return Code: value of the fabricated device id
4820  *
4821  *     Context: Kernel Thread
4822  */
4823 
4824 static ddi_devid_t
4825 sd_create_devid(struct sd_lun *un)
4826 {
4827 	ASSERT(un != NULL);
4828 
4829 	/* Fabricate the devid */
4830 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
4831 	    == DDI_FAILURE) {
4832 		return (NULL);
4833 	}
4834 
4835 	/* Write the devid to disk */
4836 	if (sd_write_deviceid(un) != 0) {
4837 		ddi_devid_free(un->un_devid);
4838 		un->un_devid = NULL;
4839 	}
4840 
4841 	return (un->un_devid);
4842 }
4843 
4844 
4845 /*
4846  *    Function: sd_write_deviceid
4847  *
4848  * Description: This routine will write the device id to the disk
4849  *		reserved sector.
4850  *
4851  *   Arguments: un - driver soft state (unit) structure
4852  *
4853  * Return Code: EINVAL
4854  *		value returned by sd_send_scsi_cmd
4855  *
4856  *     Context: Kernel Thread
4857  */
4858 
4859 static int
4860 sd_write_deviceid(struct sd_lun *un)
4861 {
4862 	struct dk_devid		*dkdevid;
4863 	diskaddr_t		blk;
4864 	uint_t			*ip, chksum;
4865 	int			status;
4866 	int			i;
4867 
4868 	ASSERT(mutex_owned(SD_MUTEX(un)));
4869 
4870 	mutex_exit(SD_MUTEX(un));
4871 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
4872 	    (void *)SD_PATH_DIRECT) != 0) {
4873 		mutex_enter(SD_MUTEX(un));
4874 		return (-1);
4875 	}
4876 
4877 
4878 	/* Allocate the buffer */
4879 	dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
4880 
4881 	/* Fill in the revision */
4882 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
4883 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
4884 
4885 	/* Copy in the device id */
4886 	mutex_enter(SD_MUTEX(un));
4887 	bcopy(un->un_devid, &dkdevid->dkd_devid,
4888 	    ddi_devid_sizeof(un->un_devid));
4889 	mutex_exit(SD_MUTEX(un));
4890 
4891 	/* Calculate the checksum */
4892 	chksum = 0;
4893 	ip = (uint_t *)dkdevid;
4894 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
4895 	    i++) {
4896 		chksum ^= ip[i];
4897 	}
4898 
4899 	/* Fill-in checksum */
4900 	DKD_FORMCHKSUM(chksum, dkdevid);
4901 
4902 	/* Write the reserved sector */
4903 	status = sd_send_scsi_WRITE(un, dkdevid, un->un_sys_blocksize, blk,
4904 	    SD_PATH_DIRECT);
4905 
4906 	kmem_free(dkdevid, un->un_sys_blocksize);
4907 
4908 	mutex_enter(SD_MUTEX(un));
4909 	return (status);
4910 }
4911 
4912 
4913 /*
4914  *    Function: sd_check_vpd_page_support
4915  *
4916  * Description: This routine sends an inquiry command with the EVPD bit set and
4917  *		a page code of 0x00 to the device. It is used to determine which
4918  *		vital product pages are availible to find the devid. We are
4919  *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
4920  *		device does not support that command.
4921  *
4922  *   Arguments: un  - driver soft state (unit) structure
4923  *
4924  * Return Code: 0 - success
4925  *		1 - check condition
4926  *
4927  *     Context: This routine can sleep.
4928  */
4929 
4930 static int
4931 sd_check_vpd_page_support(struct sd_lun *un)
4932 {
4933 	uchar_t	*page_list	= NULL;
4934 	uchar_t	page_length	= 0xff;	/* Use max possible length */
4935 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
4936 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
4937 	int    	rval		= 0;
4938 	int	counter;
4939 
4940 	ASSERT(un != NULL);
4941 	ASSERT(mutex_owned(SD_MUTEX(un)));
4942 
4943 	mutex_exit(SD_MUTEX(un));
4944 
4945 	/*
4946 	 * We'll set the page length to the maximum to save figuring it out
4947 	 * with an additional call.
4948 	 */
4949 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
4950 
4951 	rval = sd_send_scsi_INQUIRY(un, page_list, page_length, evpd,
4952 	    page_code, NULL);
4953 
4954 	mutex_enter(SD_MUTEX(un));
4955 
4956 	/*
4957 	 * Now we must validate that the device accepted the command, as some
4958 	 * drives do not support it.  If the drive does support it, we will
4959 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
4960 	 * not, we return -1.
4961 	 */
4962 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
4963 		/* Loop to find one of the 2 pages we need */
4964 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
4965 
4966 		/*
4967 		 * Pages are returned in ascending order, and 0x83 is what we
4968 		 * are hoping for.
4969 		 */
4970 		while ((page_list[counter] <= 0x83) &&
4971 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
4972 		    VPD_HEAD_OFFSET))) {
4973 			/*
4974 			 * Add 3 because page_list[3] is the number of
4975 			 * pages minus 3
4976 			 */
4977 
4978 			switch (page_list[counter]) {
4979 			case 0x00:
4980 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
4981 				break;
4982 			case 0x80:
4983 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
4984 				break;
4985 			case 0x81:
4986 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
4987 				break;
4988 			case 0x82:
4989 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
4990 				break;
4991 			case 0x83:
4992 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
4993 				break;
4994 			}
4995 			counter++;
4996 		}
4997 
4998 	} else {
4999 		rval = -1;
5000 
5001 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
5002 		    "sd_check_vpd_page_support: This drive does not implement "
5003 		    "VPD pages.\n");
5004 	}
5005 
5006 	kmem_free(page_list, page_length);
5007 
5008 	return (rval);
5009 }
5010 
5011 
5012 /*
5013  *    Function: sd_setup_pm
5014  *
5015  * Description: Initialize Power Management on the device
5016  *
5017  *     Context: Kernel Thread
5018  */
5019 
5020 static void
5021 sd_setup_pm(struct sd_lun *un, dev_info_t *devi)
5022 {
5023 	uint_t	log_page_size;
5024 	uchar_t	*log_page_data;
5025 	int	rval;
5026 
5027 	/*
5028 	 * Since we are called from attach, holding a mutex for
5029 	 * un is unnecessary. Because some of the routines called
5030 	 * from here require SD_MUTEX to not be held, assert this
5031 	 * right up front.
5032 	 */
5033 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5034 	/*
5035 	 * Since the sd device does not have the 'reg' property,
5036 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
5037 	 * The following code is to tell cpr that this device
5038 	 * DOES need to be suspended and resumed.
5039 	 */
5040 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
5041 	    "pm-hardware-state", "needs-suspend-resume");
5042 
5043 	/*
5044 	 * This complies with the new power management framework
5045 	 * for certain desktop machines. Create the pm_components
5046 	 * property as a string array property.
5047 	 */
5048 	if (un->un_f_pm_supported) {
5049 		/*
5050 		 * not all devices have a motor, try it first.
5051 		 * some devices may return ILLEGAL REQUEST, some
5052 		 * will hang
5053 		 * The following START_STOP_UNIT is used to check if target
5054 		 * device has a motor.
5055 		 */
5056 		un->un_f_start_stop_supported = TRUE;
5057 		if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
5058 		    SD_PATH_DIRECT) != 0) {
5059 			un->un_f_start_stop_supported = FALSE;
5060 		}
5061 
5062 		/*
5063 		 * create pm properties anyways otherwise the parent can't
5064 		 * go to sleep
5065 		 */
5066 		(void) sd_create_pm_components(devi, un);
5067 		un->un_f_pm_is_enabled = TRUE;
5068 		return;
5069 	}
5070 
5071 	if (!un->un_f_log_sense_supported) {
5072 		un->un_power_level = SD_SPINDLE_ON;
5073 		un->un_f_pm_is_enabled = FALSE;
5074 		return;
5075 	}
5076 
5077 	rval = sd_log_page_supported(un, START_STOP_CYCLE_PAGE);
5078 
5079 #ifdef	SDDEBUG
5080 	if (sd_force_pm_supported) {
5081 		/* Force a successful result */
5082 		rval = 1;
5083 	}
5084 #endif
5085 
5086 	/*
5087 	 * If the start-stop cycle counter log page is not supported
5088 	 * or if the pm-capable property is SD_PM_CAPABLE_FALSE (0)
5089 	 * then we should not create the pm_components property.
5090 	 */
5091 	if (rval == -1) {
5092 		/*
5093 		 * Error.
5094 		 * Reading log sense failed, most likely this is
5095 		 * an older drive that does not support log sense.
5096 		 * If this fails auto-pm is not supported.
5097 		 */
5098 		un->un_power_level = SD_SPINDLE_ON;
5099 		un->un_f_pm_is_enabled = FALSE;
5100 
5101 	} else if (rval == 0) {
5102 		/*
5103 		 * Page not found.
5104 		 * The start stop cycle counter is implemented as page
5105 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
5106 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
5107 		 */
5108 		if (sd_log_page_supported(un, START_STOP_CYCLE_VU_PAGE) == 1) {
5109 			/*
5110 			 * Page found, use this one.
5111 			 */
5112 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
5113 			un->un_f_pm_is_enabled = TRUE;
5114 		} else {
5115 			/*
5116 			 * Error or page not found.
5117 			 * auto-pm is not supported for this device.
5118 			 */
5119 			un->un_power_level = SD_SPINDLE_ON;
5120 			un->un_f_pm_is_enabled = FALSE;
5121 		}
5122 	} else {
5123 		/*
5124 		 * Page found, use it.
5125 		 */
5126 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
5127 		un->un_f_pm_is_enabled = TRUE;
5128 	}
5129 
5130 
5131 	if (un->un_f_pm_is_enabled == TRUE) {
5132 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
5133 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
5134 
5135 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
5136 		    log_page_size, un->un_start_stop_cycle_page,
5137 		    0x01, 0, SD_PATH_DIRECT);
5138 #ifdef	SDDEBUG
5139 		if (sd_force_pm_supported) {
5140 			/* Force a successful result */
5141 			rval = 0;
5142 		}
5143 #endif
5144 
5145 		/*
5146 		 * If the Log sense for Page( Start/stop cycle counter page)
5147 		 * succeeds, then power managment is supported and we can
5148 		 * enable auto-pm.
5149 		 */
5150 		if (rval == 0)  {
5151 			(void) sd_create_pm_components(devi, un);
5152 		} else {
5153 			un->un_power_level = SD_SPINDLE_ON;
5154 			un->un_f_pm_is_enabled = FALSE;
5155 		}
5156 
5157 		kmem_free(log_page_data, log_page_size);
5158 	}
5159 }
5160 
5161 
5162 /*
5163  *    Function: sd_create_pm_components
5164  *
5165  * Description: Initialize PM property.
5166  *
5167  *     Context: Kernel thread context
5168  */
5169 
5170 static void
5171 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
5172 {
5173 	char *pm_comp[] = { "NAME=spindle-motor", "0=off", "1=on", NULL };
5174 
5175 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5176 
5177 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
5178 	    "pm-components", pm_comp, 3) == DDI_PROP_SUCCESS) {
5179 		/*
5180 		 * When components are initially created they are idle,
5181 		 * power up any non-removables.
5182 		 * Note: the return value of pm_raise_power can't be used
5183 		 * for determining if PM should be enabled for this device.
5184 		 * Even if you check the return values and remove this
5185 		 * property created above, the PM framework will not honor the
5186 		 * change after the first call to pm_raise_power. Hence,
5187 		 * removal of that property does not help if pm_raise_power
5188 		 * fails. In the case of removable media, the start/stop
5189 		 * will fail if the media is not present.
5190 		 */
5191 		if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
5192 		    SD_SPINDLE_ON) == DDI_SUCCESS)) {
5193 			mutex_enter(SD_MUTEX(un));
5194 			un->un_power_level = SD_SPINDLE_ON;
5195 			mutex_enter(&un->un_pm_mutex);
5196 			/* Set to on and not busy. */
5197 			un->un_pm_count = 0;
5198 		} else {
5199 			mutex_enter(SD_MUTEX(un));
5200 			un->un_power_level = SD_SPINDLE_OFF;
5201 			mutex_enter(&un->un_pm_mutex);
5202 			/* Set to off. */
5203 			un->un_pm_count = -1;
5204 		}
5205 		mutex_exit(&un->un_pm_mutex);
5206 		mutex_exit(SD_MUTEX(un));
5207 	} else {
5208 		un->un_power_level = SD_SPINDLE_ON;
5209 		un->un_f_pm_is_enabled = FALSE;
5210 	}
5211 }
5212 
5213 
5214 /*
5215  *    Function: sd_ddi_suspend
5216  *
5217  * Description: Performs system power-down operations. This includes
5218  *		setting the drive state to indicate its suspended so
5219  *		that no new commands will be accepted. Also, wait for
5220  *		all commands that are in transport or queued to a timer
5221  *		for retry to complete. All timeout threads are cancelled.
5222  *
5223  * Return Code: DDI_FAILURE or DDI_SUCCESS
5224  *
5225  *     Context: Kernel thread context
5226  */
5227 
5228 static int
5229 sd_ddi_suspend(dev_info_t *devi)
5230 {
5231 	struct	sd_lun	*un;
5232 	clock_t		wait_cmds_complete;
5233 
5234 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
5235 	if (un == NULL) {
5236 		return (DDI_FAILURE);
5237 	}
5238 
5239 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
5240 
5241 	mutex_enter(SD_MUTEX(un));
5242 
5243 	/* Return success if the device is already suspended. */
5244 	if (un->un_state == SD_STATE_SUSPENDED) {
5245 		mutex_exit(SD_MUTEX(un));
5246 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5247 		    "device already suspended, exiting\n");
5248 		return (DDI_SUCCESS);
5249 	}
5250 
5251 	/* Return failure if the device is being used by HA */
5252 	if (un->un_resvd_status &
5253 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
5254 		mutex_exit(SD_MUTEX(un));
5255 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5256 		    "device in use by HA, exiting\n");
5257 		return (DDI_FAILURE);
5258 	}
5259 
5260 	/*
5261 	 * Return failure if the device is in a resource wait
5262 	 * or power changing state.
5263 	 */
5264 	if ((un->un_state == SD_STATE_RWAIT) ||
5265 	    (un->un_state == SD_STATE_PM_CHANGING)) {
5266 		mutex_exit(SD_MUTEX(un));
5267 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5268 		    "device in resource wait state, exiting\n");
5269 		return (DDI_FAILURE);
5270 	}
5271 
5272 
5273 	un->un_save_state = un->un_last_state;
5274 	New_state(un, SD_STATE_SUSPENDED);
5275 
5276 	/*
5277 	 * Wait for all commands that are in transport or queued to a timer
5278 	 * for retry to complete.
5279 	 *
5280 	 * While waiting, no new commands will be accepted or sent because of
5281 	 * the new state we set above.
5282 	 *
5283 	 * Wait till current operation has completed. If we are in the resource
5284 	 * wait state (with an intr outstanding) then we need to wait till the
5285 	 * intr completes and starts the next cmd. We want to wait for
5286 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
5287 	 */
5288 	wait_cmds_complete = ddi_get_lbolt() +
5289 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
5290 
5291 	while (un->un_ncmds_in_transport != 0) {
5292 		/*
5293 		 * Fail if commands do not finish in the specified time.
5294 		 */
5295 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
5296 		    wait_cmds_complete) == -1) {
5297 			/*
5298 			 * Undo the state changes made above. Everything
5299 			 * must go back to it's original value.
5300 			 */
5301 			Restore_state(un);
5302 			un->un_last_state = un->un_save_state;
5303 			/* Wake up any threads that might be waiting. */
5304 			cv_broadcast(&un->un_suspend_cv);
5305 			mutex_exit(SD_MUTEX(un));
5306 			SD_ERROR(SD_LOG_IO_PM, un,
5307 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
5308 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
5309 			return (DDI_FAILURE);
5310 		}
5311 	}
5312 
5313 	/*
5314 	 * Cancel SCSI watch thread and timeouts, if any are active
5315 	 */
5316 
5317 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
5318 		opaque_t temp_token = un->un_swr_token;
5319 		mutex_exit(SD_MUTEX(un));
5320 		scsi_watch_suspend(temp_token);
5321 		mutex_enter(SD_MUTEX(un));
5322 	}
5323 
5324 	if (un->un_reset_throttle_timeid != NULL) {
5325 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
5326 		un->un_reset_throttle_timeid = NULL;
5327 		mutex_exit(SD_MUTEX(un));
5328 		(void) untimeout(temp_id);
5329 		mutex_enter(SD_MUTEX(un));
5330 	}
5331 
5332 	if (un->un_dcvb_timeid != NULL) {
5333 		timeout_id_t temp_id = un->un_dcvb_timeid;
5334 		un->un_dcvb_timeid = NULL;
5335 		mutex_exit(SD_MUTEX(un));
5336 		(void) untimeout(temp_id);
5337 		mutex_enter(SD_MUTEX(un));
5338 	}
5339 
5340 	mutex_enter(&un->un_pm_mutex);
5341 	if (un->un_pm_timeid != NULL) {
5342 		timeout_id_t temp_id = un->un_pm_timeid;
5343 		un->un_pm_timeid = NULL;
5344 		mutex_exit(&un->un_pm_mutex);
5345 		mutex_exit(SD_MUTEX(un));
5346 		(void) untimeout(temp_id);
5347 		mutex_enter(SD_MUTEX(un));
5348 	} else {
5349 		mutex_exit(&un->un_pm_mutex);
5350 	}
5351 
5352 	if (un->un_retry_timeid != NULL) {
5353 		timeout_id_t temp_id = un->un_retry_timeid;
5354 		un->un_retry_timeid = NULL;
5355 		mutex_exit(SD_MUTEX(un));
5356 		(void) untimeout(temp_id);
5357 		mutex_enter(SD_MUTEX(un));
5358 	}
5359 
5360 	if (un->un_direct_priority_timeid != NULL) {
5361 		timeout_id_t temp_id = un->un_direct_priority_timeid;
5362 		un->un_direct_priority_timeid = NULL;
5363 		mutex_exit(SD_MUTEX(un));
5364 		(void) untimeout(temp_id);
5365 		mutex_enter(SD_MUTEX(un));
5366 	}
5367 
5368 	if (un->un_f_is_fibre == TRUE) {
5369 		/*
5370 		 * Remove callbacks for insert and remove events
5371 		 */
5372 		if (un->un_insert_event != NULL) {
5373 			mutex_exit(SD_MUTEX(un));
5374 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
5375 			mutex_enter(SD_MUTEX(un));
5376 			un->un_insert_event = NULL;
5377 		}
5378 
5379 		if (un->un_remove_event != NULL) {
5380 			mutex_exit(SD_MUTEX(un));
5381 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
5382 			mutex_enter(SD_MUTEX(un));
5383 			un->un_remove_event = NULL;
5384 		}
5385 	}
5386 
5387 	mutex_exit(SD_MUTEX(un));
5388 
5389 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
5390 
5391 	return (DDI_SUCCESS);
5392 }
5393 
5394 
5395 /*
5396  *    Function: sd_ddi_pm_suspend
5397  *
5398  * Description: Set the drive state to low power.
5399  *		Someone else is required to actually change the drive
5400  *		power level.
5401  *
5402  *   Arguments: un - driver soft state (unit) structure
5403  *
5404  * Return Code: DDI_FAILURE or DDI_SUCCESS
5405  *
5406  *     Context: Kernel thread context
5407  */
5408 
5409 static int
5410 sd_ddi_pm_suspend(struct sd_lun *un)
5411 {
5412 	ASSERT(un != NULL);
5413 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: entry\n");
5414 
5415 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5416 	mutex_enter(SD_MUTEX(un));
5417 
5418 	/*
5419 	 * Exit if power management is not enabled for this device, or if
5420 	 * the device is being used by HA.
5421 	 */
5422 	if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
5423 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
5424 		mutex_exit(SD_MUTEX(un));
5425 		SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exiting\n");
5426 		return (DDI_SUCCESS);
5427 	}
5428 
5429 	SD_INFO(SD_LOG_POWER, un, "sd_ddi_pm_suspend: un_ncmds_in_driver=%ld\n",
5430 	    un->un_ncmds_in_driver);
5431 
5432 	/*
5433 	 * See if the device is not busy, ie.:
5434 	 *    - we have no commands in the driver for this device
5435 	 *    - not waiting for resources
5436 	 */
5437 	if ((un->un_ncmds_in_driver == 0) &&
5438 	    (un->un_state != SD_STATE_RWAIT)) {
5439 		/*
5440 		 * The device is not busy, so it is OK to go to low power state.
5441 		 * Indicate low power, but rely on someone else to actually
5442 		 * change it.
5443 		 */
5444 		mutex_enter(&un->un_pm_mutex);
5445 		un->un_pm_count = -1;
5446 		mutex_exit(&un->un_pm_mutex);
5447 		un->un_power_level = SD_SPINDLE_OFF;
5448 	}
5449 
5450 	mutex_exit(SD_MUTEX(un));
5451 
5452 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exit\n");
5453 
5454 	return (DDI_SUCCESS);
5455 }
5456 
5457 
5458 /*
5459  *    Function: sd_ddi_resume
5460  *
5461  * Description: Performs system power-up operations..
5462  *
5463  * Return Code: DDI_SUCCESS
5464  *		DDI_FAILURE
5465  *
5466  *     Context: Kernel thread context
5467  */
5468 
5469 static int
5470 sd_ddi_resume(dev_info_t *devi)
5471 {
5472 	struct	sd_lun	*un;
5473 
5474 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
5475 	if (un == NULL) {
5476 		return (DDI_FAILURE);
5477 	}
5478 
5479 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
5480 
5481 	mutex_enter(SD_MUTEX(un));
5482 	Restore_state(un);
5483 
5484 	/*
5485 	 * Restore the state which was saved to give the
5486 	 * the right state in un_last_state
5487 	 */
5488 	un->un_last_state = un->un_save_state;
5489 	/*
5490 	 * Note: throttle comes back at full.
5491 	 * Also note: this MUST be done before calling pm_raise_power
5492 	 * otherwise the system can get hung in biowait. The scenario where
5493 	 * this'll happen is under cpr suspend. Writing of the system
5494 	 * state goes through sddump, which writes 0 to un_throttle. If
5495 	 * writing the system state then fails, example if the partition is
5496 	 * too small, then cpr attempts a resume. If throttle isn't restored
5497 	 * from the saved value until after calling pm_raise_power then
5498 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
5499 	 * in biowait.
5500 	 */
5501 	un->un_throttle = un->un_saved_throttle;
5502 
5503 	/*
5504 	 * The chance of failure is very rare as the only command done in power
5505 	 * entry point is START command when you transition from 0->1 or
5506 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
5507 	 * which suspend was done. Ignore the return value as the resume should
5508 	 * not be failed. In the case of removable media the media need not be
5509 	 * inserted and hence there is a chance that raise power will fail with
5510 	 * media not present.
5511 	 */
5512 	if (un->un_f_attach_spinup) {
5513 		mutex_exit(SD_MUTEX(un));
5514 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
5515 		mutex_enter(SD_MUTEX(un));
5516 	}
5517 
5518 	/*
5519 	 * Don't broadcast to the suspend cv and therefore possibly
5520 	 * start I/O until after power has been restored.
5521 	 */
5522 	cv_broadcast(&un->un_suspend_cv);
5523 	cv_broadcast(&un->un_state_cv);
5524 
5525 	/* restart thread */
5526 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
5527 		scsi_watch_resume(un->un_swr_token);
5528 	}
5529 
5530 #if (defined(__fibre))
5531 	if (un->un_f_is_fibre == TRUE) {
5532 		/*
5533 		 * Add callbacks for insert and remove events
5534 		 */
5535 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
5536 			sd_init_event_callbacks(un);
5537 		}
5538 	}
5539 #endif
5540 
5541 	/*
5542 	 * Transport any pending commands to the target.
5543 	 *
5544 	 * If this is a low-activity device commands in queue will have to wait
5545 	 * until new commands come in, which may take awhile. Also, we
5546 	 * specifically don't check un_ncmds_in_transport because we know that
5547 	 * there really are no commands in progress after the unit was
5548 	 * suspended and we could have reached the throttle level, been
5549 	 * suspended, and have no new commands coming in for awhile. Highly
5550 	 * unlikely, but so is the low-activity disk scenario.
5551 	 */
5552 	ddi_xbuf_dispatch(un->un_xbuf_attr);
5553 
5554 	sd_start_cmds(un, NULL);
5555 	mutex_exit(SD_MUTEX(un));
5556 
5557 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
5558 
5559 	return (DDI_SUCCESS);
5560 }
5561 
5562 
5563 /*
5564  *    Function: sd_ddi_pm_resume
5565  *
5566  * Description: Set the drive state to powered on.
5567  *		Someone else is required to actually change the drive
5568  *		power level.
5569  *
5570  *   Arguments: un - driver soft state (unit) structure
5571  *
5572  * Return Code: DDI_SUCCESS
5573  *
5574  *     Context: Kernel thread context
5575  */
5576 
5577 static int
5578 sd_ddi_pm_resume(struct sd_lun *un)
5579 {
5580 	ASSERT(un != NULL);
5581 
5582 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5583 	mutex_enter(SD_MUTEX(un));
5584 	un->un_power_level = SD_SPINDLE_ON;
5585 
5586 	ASSERT(!mutex_owned(&un->un_pm_mutex));
5587 	mutex_enter(&un->un_pm_mutex);
5588 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
5589 		un->un_pm_count++;
5590 		ASSERT(un->un_pm_count == 0);
5591 		/*
5592 		 * Note: no longer do the cv_broadcast on un_suspend_cv. The
5593 		 * un_suspend_cv is for a system resume, not a power management
5594 		 * device resume. (4297749)
5595 		 *	 cv_broadcast(&un->un_suspend_cv);
5596 		 */
5597 	}
5598 	mutex_exit(&un->un_pm_mutex);
5599 	mutex_exit(SD_MUTEX(un));
5600 
5601 	return (DDI_SUCCESS);
5602 }
5603 
5604 
5605 /*
5606  *    Function: sd_pm_idletimeout_handler
5607  *
5608  * Description: A timer routine that's active only while a device is busy.
5609  *		The purpose is to extend slightly the pm framework's busy
5610  *		view of the device to prevent busy/idle thrashing for
5611  *		back-to-back commands. Do this by comparing the current time
5612  *		to the time at which the last command completed and when the
5613  *		difference is greater than sd_pm_idletime, call
5614  *		pm_idle_component. In addition to indicating idle to the pm
5615  *		framework, update the chain type to again use the internal pm
5616  *		layers of the driver.
5617  *
5618  *   Arguments: arg - driver soft state (unit) structure
5619  *
5620  *     Context: Executes in a timeout(9F) thread context
5621  */
5622 
5623 static void
5624 sd_pm_idletimeout_handler(void *arg)
5625 {
5626 	struct sd_lun *un = arg;
5627 
5628 	time_t	now;
5629 
5630 	mutex_enter(&sd_detach_mutex);
5631 	if (un->un_detach_count != 0) {
5632 		/* Abort if the instance is detaching */
5633 		mutex_exit(&sd_detach_mutex);
5634 		return;
5635 	}
5636 	mutex_exit(&sd_detach_mutex);
5637 
5638 	now = ddi_get_time();
5639 	/*
5640 	 * Grab both mutexes, in the proper order, since we're accessing
5641 	 * both PM and softstate variables.
5642 	 */
5643 	mutex_enter(SD_MUTEX(un));
5644 	mutex_enter(&un->un_pm_mutex);
5645 	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
5646 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
5647 		/*
5648 		 * Update the chain types.
5649 		 * This takes affect on the next new command received.
5650 		 */
5651 		if (un->un_f_non_devbsize_supported) {
5652 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
5653 		} else {
5654 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
5655 		}
5656 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
5657 
5658 		SD_TRACE(SD_LOG_IO_PM, un,
5659 		    "sd_pm_idletimeout_handler: idling device\n");
5660 		(void) pm_idle_component(SD_DEVINFO(un), 0);
5661 		un->un_pm_idle_timeid = NULL;
5662 	} else {
5663 		un->un_pm_idle_timeid =
5664 			timeout(sd_pm_idletimeout_handler, un,
5665 			(drv_usectohz((clock_t)300000))); /* 300 ms. */
5666 	}
5667 	mutex_exit(&un->un_pm_mutex);
5668 	mutex_exit(SD_MUTEX(un));
5669 }
5670 
5671 
5672 /*
5673  *    Function: sd_pm_timeout_handler
5674  *
5675  * Description: Callback to tell framework we are idle.
5676  *
5677  *     Context: timeout(9f) thread context.
5678  */
5679 
5680 static void
5681 sd_pm_timeout_handler(void *arg)
5682 {
5683 	struct sd_lun *un = arg;
5684 
5685 	(void) pm_idle_component(SD_DEVINFO(un), 0);
5686 	mutex_enter(&un->un_pm_mutex);
5687 	un->un_pm_timeid = NULL;
5688 	mutex_exit(&un->un_pm_mutex);
5689 }
5690 
5691 
5692 /*
5693  *    Function: sdpower
5694  *
5695  * Description: PM entry point.
5696  *
5697  * Return Code: DDI_SUCCESS
5698  *		DDI_FAILURE
5699  *
5700  *     Context: Kernel thread context
5701  */
5702 
5703 static int
5704 sdpower(dev_info_t *devi, int component, int level)
5705 {
5706 	struct sd_lun	*un;
5707 	int		instance;
5708 	int		rval = DDI_SUCCESS;
5709 	uint_t		i, log_page_size, maxcycles, ncycles;
5710 	uchar_t		*log_page_data;
5711 	int		log_sense_page;
5712 	int		medium_present;
5713 	time_t		intvlp;
5714 	dev_t		dev;
5715 	struct pm_trans_data	sd_pm_tran_data;
5716 	uchar_t		save_state;
5717 	int		sval;
5718 	uchar_t		state_before_pm;
5719 	int		got_semaphore_here;
5720 
5721 	instance = ddi_get_instance(devi);
5722 
5723 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
5724 	    (SD_SPINDLE_OFF > level) || (level > SD_SPINDLE_ON) ||
5725 	    component != 0) {
5726 		return (DDI_FAILURE);
5727 	}
5728 
5729 	dev = sd_make_device(SD_DEVINFO(un));
5730 
5731 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
5732 
5733 	/*
5734 	 * Must synchronize power down with close.
5735 	 * Attempt to decrement/acquire the open/close semaphore,
5736 	 * but do NOT wait on it. If it's not greater than zero,
5737 	 * ie. it can't be decremented without waiting, then
5738 	 * someone else, either open or close, already has it
5739 	 * and the try returns 0. Use that knowledge here to determine
5740 	 * if it's OK to change the device power level.
5741 	 * Also, only increment it on exit if it was decremented, ie. gotten,
5742 	 * here.
5743 	 */
5744 	got_semaphore_here = sema_tryp(&un->un_semoclose);
5745 
5746 	mutex_enter(SD_MUTEX(un));
5747 
5748 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
5749 	    un->un_ncmds_in_driver);
5750 
5751 	/*
5752 	 * If un_ncmds_in_driver is non-zero it indicates commands are
5753 	 * already being processed in the driver, or if the semaphore was
5754 	 * not gotten here it indicates an open or close is being processed.
5755 	 * At the same time somebody is requesting to go low power which
5756 	 * can't happen, therefore we need to return failure.
5757 	 */
5758 	if ((level == SD_SPINDLE_OFF) &&
5759 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
5760 		mutex_exit(SD_MUTEX(un));
5761 
5762 		if (got_semaphore_here != 0) {
5763 			sema_v(&un->un_semoclose);
5764 		}
5765 		SD_TRACE(SD_LOG_IO_PM, un,
5766 		    "sdpower: exit, device has queued cmds.\n");
5767 		return (DDI_FAILURE);
5768 	}
5769 
5770 	/*
5771 	 * if it is OFFLINE that means the disk is completely dead
5772 	 * in our case we have to put the disk in on or off by sending commands
5773 	 * Of course that will fail anyway so return back here.
5774 	 *
5775 	 * Power changes to a device that's OFFLINE or SUSPENDED
5776 	 * are not allowed.
5777 	 */
5778 	if ((un->un_state == SD_STATE_OFFLINE) ||
5779 	    (un->un_state == SD_STATE_SUSPENDED)) {
5780 		mutex_exit(SD_MUTEX(un));
5781 
5782 		if (got_semaphore_here != 0) {
5783 			sema_v(&un->un_semoclose);
5784 		}
5785 		SD_TRACE(SD_LOG_IO_PM, un,
5786 		    "sdpower: exit, device is off-line.\n");
5787 		return (DDI_FAILURE);
5788 	}
5789 
5790 	/*
5791 	 * Change the device's state to indicate it's power level
5792 	 * is being changed. Do this to prevent a power off in the
5793 	 * middle of commands, which is especially bad on devices
5794 	 * that are really powered off instead of just spun down.
5795 	 */
5796 	state_before_pm = un->un_state;
5797 	un->un_state = SD_STATE_PM_CHANGING;
5798 
5799 	mutex_exit(SD_MUTEX(un));
5800 
5801 	/*
5802 	 * If "pm-capable" property is set to TRUE by HBA drivers,
5803 	 * bypass the following checking, otherwise, check the log
5804 	 * sense information for this device
5805 	 */
5806 	if ((level == SD_SPINDLE_OFF) && un->un_f_log_sense_supported) {
5807 		/*
5808 		 * Get the log sense information to understand whether the
5809 		 * the powercycle counts have gone beyond the threshhold.
5810 		 */
5811 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
5812 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
5813 
5814 		mutex_enter(SD_MUTEX(un));
5815 		log_sense_page = un->un_start_stop_cycle_page;
5816 		mutex_exit(SD_MUTEX(un));
5817 
5818 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
5819 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
5820 #ifdef	SDDEBUG
5821 		if (sd_force_pm_supported) {
5822 			/* Force a successful result */
5823 			rval = 0;
5824 		}
5825 #endif
5826 		if (rval != 0) {
5827 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5828 			    "Log Sense Failed\n");
5829 			kmem_free(log_page_data, log_page_size);
5830 			/* Cannot support power management on those drives */
5831 
5832 			if (got_semaphore_here != 0) {
5833 				sema_v(&un->un_semoclose);
5834 			}
5835 			/*
5836 			 * On exit put the state back to it's original value
5837 			 * and broadcast to anyone waiting for the power
5838 			 * change completion.
5839 			 */
5840 			mutex_enter(SD_MUTEX(un));
5841 			un->un_state = state_before_pm;
5842 			cv_broadcast(&un->un_suspend_cv);
5843 			mutex_exit(SD_MUTEX(un));
5844 			SD_TRACE(SD_LOG_IO_PM, un,
5845 			    "sdpower: exit, Log Sense Failed.\n");
5846 			return (DDI_FAILURE);
5847 		}
5848 
5849 		/*
5850 		 * From the page data - Convert the essential information to
5851 		 * pm_trans_data
5852 		 */
5853 		maxcycles =
5854 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
5855 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
5856 
5857 		sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
5858 
5859 		ncycles =
5860 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
5861 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
5862 
5863 		sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
5864 
5865 		for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
5866 			sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
5867 			    log_page_data[8+i];
5868 		}
5869 
5870 		kmem_free(log_page_data, log_page_size);
5871 
5872 		/*
5873 		 * Call pm_trans_check routine to get the Ok from
5874 		 * the global policy
5875 		 */
5876 
5877 		sd_pm_tran_data.format = DC_SCSI_FORMAT;
5878 		sd_pm_tran_data.un.scsi_cycles.flag = 0;
5879 
5880 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
5881 #ifdef	SDDEBUG
5882 		if (sd_force_pm_supported) {
5883 			/* Force a successful result */
5884 			rval = 1;
5885 		}
5886 #endif
5887 		switch (rval) {
5888 		case 0:
5889 			/*
5890 			 * Not Ok to Power cycle or error in parameters passed
5891 			 * Would have given the advised time to consider power
5892 			 * cycle. Based on the new intvlp parameter we are
5893 			 * supposed to pretend we are busy so that pm framework
5894 			 * will never call our power entry point. Because of
5895 			 * that install a timeout handler and wait for the
5896 			 * recommended time to elapse so that power management
5897 			 * can be effective again.
5898 			 *
5899 			 * To effect this behavior, call pm_busy_component to
5900 			 * indicate to the framework this device is busy.
5901 			 * By not adjusting un_pm_count the rest of PM in
5902 			 * the driver will function normally, and independant
5903 			 * of this but because the framework is told the device
5904 			 * is busy it won't attempt powering down until it gets
5905 			 * a matching idle. The timeout handler sends this.
5906 			 * Note: sd_pm_entry can't be called here to do this
5907 			 * because sdpower may have been called as a result
5908 			 * of a call to pm_raise_power from within sd_pm_entry.
5909 			 *
5910 			 * If a timeout handler is already active then
5911 			 * don't install another.
5912 			 */
5913 			mutex_enter(&un->un_pm_mutex);
5914 			if (un->un_pm_timeid == NULL) {
5915 				un->un_pm_timeid =
5916 				    timeout(sd_pm_timeout_handler,
5917 				    un, intvlp * drv_usectohz(1000000));
5918 				mutex_exit(&un->un_pm_mutex);
5919 				(void) pm_busy_component(SD_DEVINFO(un), 0);
5920 			} else {
5921 				mutex_exit(&un->un_pm_mutex);
5922 			}
5923 			if (got_semaphore_here != 0) {
5924 				sema_v(&un->un_semoclose);
5925 			}
5926 			/*
5927 			 * On exit put the state back to it's original value
5928 			 * and broadcast to anyone waiting for the power
5929 			 * change completion.
5930 			 */
5931 			mutex_enter(SD_MUTEX(un));
5932 			un->un_state = state_before_pm;
5933 			cv_broadcast(&un->un_suspend_cv);
5934 			mutex_exit(SD_MUTEX(un));
5935 
5936 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
5937 			    "trans check Failed, not ok to power cycle.\n");
5938 			return (DDI_FAILURE);
5939 
5940 		case -1:
5941 			if (got_semaphore_here != 0) {
5942 				sema_v(&un->un_semoclose);
5943 			}
5944 			/*
5945 			 * On exit put the state back to it's original value
5946 			 * and broadcast to anyone waiting for the power
5947 			 * change completion.
5948 			 */
5949 			mutex_enter(SD_MUTEX(un));
5950 			un->un_state = state_before_pm;
5951 			cv_broadcast(&un->un_suspend_cv);
5952 			mutex_exit(SD_MUTEX(un));
5953 			SD_TRACE(SD_LOG_IO_PM, un,
5954 			    "sdpower: exit, trans check command Failed.\n");
5955 			return (DDI_FAILURE);
5956 		}
5957 	}
5958 
5959 	if (level == SD_SPINDLE_OFF) {
5960 		/*
5961 		 * Save the last state... if the STOP FAILS we need it
5962 		 * for restoring
5963 		 */
5964 		mutex_enter(SD_MUTEX(un));
5965 		save_state = un->un_last_state;
5966 		/*
5967 		 * There must not be any cmds. getting processed
5968 		 * in the driver when we get here. Power to the
5969 		 * device is potentially going off.
5970 		 */
5971 		ASSERT(un->un_ncmds_in_driver == 0);
5972 		mutex_exit(SD_MUTEX(un));
5973 
5974 		/*
5975 		 * For now suspend the device completely before spindle is
5976 		 * turned off
5977 		 */
5978 		if ((rval = sd_ddi_pm_suspend(un)) == DDI_FAILURE) {
5979 			if (got_semaphore_here != 0) {
5980 				sema_v(&un->un_semoclose);
5981 			}
5982 			/*
5983 			 * On exit put the state back to it's original value
5984 			 * and broadcast to anyone waiting for the power
5985 			 * change completion.
5986 			 */
5987 			mutex_enter(SD_MUTEX(un));
5988 			un->un_state = state_before_pm;
5989 			cv_broadcast(&un->un_suspend_cv);
5990 			mutex_exit(SD_MUTEX(un));
5991 			SD_TRACE(SD_LOG_IO_PM, un,
5992 			    "sdpower: exit, PM suspend Failed.\n");
5993 			return (DDI_FAILURE);
5994 		}
5995 	}
5996 
5997 	/*
5998 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
5999 	 * close, or strategy. Dump no long uses this routine, it uses it's
6000 	 * own code so it can be done in polled mode.
6001 	 */
6002 
6003 	medium_present = TRUE;
6004 
6005 	/*
6006 	 * When powering up, issue a TUR in case the device is at unit
6007 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
6008 	 * a deadlock on un_pm_busy_cv will occur.
6009 	 */
6010 	if (level == SD_SPINDLE_ON) {
6011 		(void) sd_send_scsi_TEST_UNIT_READY(un,
6012 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
6013 	}
6014 
6015 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
6016 	    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
6017 
6018 	sval = sd_send_scsi_START_STOP_UNIT(un,
6019 	    ((level == SD_SPINDLE_ON) ? SD_TARGET_START : SD_TARGET_STOP),
6020 	    SD_PATH_DIRECT);
6021 	/* Command failed, check for media present. */
6022 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
6023 		medium_present = FALSE;
6024 	}
6025 
6026 	/*
6027 	 * The conditions of interest here are:
6028 	 *   if a spindle off with media present fails,
6029 	 *	then restore the state and return an error.
6030 	 *   else if a spindle on fails,
6031 	 *	then return an error (there's no state to restore).
6032 	 * In all other cases we setup for the new state
6033 	 * and return success.
6034 	 */
6035 	switch (level) {
6036 	case SD_SPINDLE_OFF:
6037 		if ((medium_present == TRUE) && (sval != 0)) {
6038 			/* The stop command from above failed */
6039 			rval = DDI_FAILURE;
6040 			/*
6041 			 * The stop command failed, and we have media
6042 			 * present. Put the level back by calling the
6043 			 * sd_pm_resume() and set the state back to
6044 			 * it's previous value.
6045 			 */
6046 			(void) sd_ddi_pm_resume(un);
6047 			mutex_enter(SD_MUTEX(un));
6048 			un->un_last_state = save_state;
6049 			mutex_exit(SD_MUTEX(un));
6050 			break;
6051 		}
6052 		/*
6053 		 * The stop command from above succeeded.
6054 		 */
6055 		if (un->un_f_monitor_media_state) {
6056 			/*
6057 			 * Terminate watch thread in case of removable media
6058 			 * devices going into low power state. This is as per
6059 			 * the requirements of pm framework, otherwise commands
6060 			 * will be generated for the device (through watch
6061 			 * thread), even when the device is in low power state.
6062 			 */
6063 			mutex_enter(SD_MUTEX(un));
6064 			un->un_f_watcht_stopped = FALSE;
6065 			if (un->un_swr_token != NULL) {
6066 				opaque_t temp_token = un->un_swr_token;
6067 				un->un_f_watcht_stopped = TRUE;
6068 				un->un_swr_token = NULL;
6069 				mutex_exit(SD_MUTEX(un));
6070 				(void) scsi_watch_request_terminate(temp_token,
6071 				    SCSI_WATCH_TERMINATE_WAIT);
6072 			} else {
6073 				mutex_exit(SD_MUTEX(un));
6074 			}
6075 		}
6076 		break;
6077 
6078 	default:	/* The level requested is spindle on... */
6079 		/*
6080 		 * Legacy behavior: return success on a failed spinup
6081 		 * if there is no media in the drive.
6082 		 * Do this by looking at medium_present here.
6083 		 */
6084 		if ((sval != 0) && medium_present) {
6085 			/* The start command from above failed */
6086 			rval = DDI_FAILURE;
6087 			break;
6088 		}
6089 		/*
6090 		 * The start command from above succeeded
6091 		 * Resume the devices now that we have
6092 		 * started the disks
6093 		 */
6094 		(void) sd_ddi_pm_resume(un);
6095 
6096 		/*
6097 		 * Resume the watch thread since it was suspended
6098 		 * when the device went into low power mode.
6099 		 */
6100 		if (un->un_f_monitor_media_state) {
6101 			mutex_enter(SD_MUTEX(un));
6102 			if (un->un_f_watcht_stopped == TRUE) {
6103 				opaque_t temp_token;
6104 
6105 				un->un_f_watcht_stopped = FALSE;
6106 				mutex_exit(SD_MUTEX(un));
6107 				temp_token = scsi_watch_request_submit(
6108 				    SD_SCSI_DEVP(un),
6109 				    sd_check_media_time,
6110 				    SENSE_LENGTH, sd_media_watch_cb,
6111 				    (caddr_t)dev);
6112 				mutex_enter(SD_MUTEX(un));
6113 				un->un_swr_token = temp_token;
6114 			}
6115 			mutex_exit(SD_MUTEX(un));
6116 		}
6117 	}
6118 	if (got_semaphore_here != 0) {
6119 		sema_v(&un->un_semoclose);
6120 	}
6121 	/*
6122 	 * On exit put the state back to it's original value
6123 	 * and broadcast to anyone waiting for the power
6124 	 * change completion.
6125 	 */
6126 	mutex_enter(SD_MUTEX(un));
6127 	un->un_state = state_before_pm;
6128 	cv_broadcast(&un->un_suspend_cv);
6129 	mutex_exit(SD_MUTEX(un));
6130 
6131 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
6132 
6133 	return (rval);
6134 }
6135 
6136 
6137 
6138 /*
6139  *    Function: sdattach
6140  *
6141  * Description: Driver's attach(9e) entry point function.
6142  *
6143  *   Arguments: devi - opaque device info handle
6144  *		cmd  - attach  type
6145  *
6146  * Return Code: DDI_SUCCESS
6147  *		DDI_FAILURE
6148  *
6149  *     Context: Kernel thread context
6150  */
6151 
6152 static int
6153 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
6154 {
6155 	switch (cmd) {
6156 	case DDI_ATTACH:
6157 		return (sd_unit_attach(devi));
6158 	case DDI_RESUME:
6159 		return (sd_ddi_resume(devi));
6160 	default:
6161 		break;
6162 	}
6163 	return (DDI_FAILURE);
6164 }
6165 
6166 
6167 /*
6168  *    Function: sddetach
6169  *
6170  * Description: Driver's detach(9E) entry point function.
6171  *
6172  *   Arguments: devi - opaque device info handle
6173  *		cmd  - detach  type
6174  *
6175  * Return Code: DDI_SUCCESS
6176  *		DDI_FAILURE
6177  *
6178  *     Context: Kernel thread context
6179  */
6180 
6181 static int
6182 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
6183 {
6184 	switch (cmd) {
6185 	case DDI_DETACH:
6186 		return (sd_unit_detach(devi));
6187 	case DDI_SUSPEND:
6188 		return (sd_ddi_suspend(devi));
6189 	default:
6190 		break;
6191 	}
6192 	return (DDI_FAILURE);
6193 }
6194 
6195 
6196 /*
6197  *     Function: sd_sync_with_callback
6198  *
6199  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
6200  *		 state while the callback routine is active.
6201  *
6202  *    Arguments: un: softstate structure for the instance
6203  *
6204  *	Context: Kernel thread context
6205  */
6206 
6207 static void
6208 sd_sync_with_callback(struct sd_lun *un)
6209 {
6210 	ASSERT(un != NULL);
6211 
6212 	mutex_enter(SD_MUTEX(un));
6213 
6214 	ASSERT(un->un_in_callback >= 0);
6215 
6216 	while (un->un_in_callback > 0) {
6217 		mutex_exit(SD_MUTEX(un));
6218 		delay(2);
6219 		mutex_enter(SD_MUTEX(un));
6220 	}
6221 
6222 	mutex_exit(SD_MUTEX(un));
6223 }
6224 
6225 /*
6226  *    Function: sd_unit_attach
6227  *
6228  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
6229  *		the soft state structure for the device and performs
6230  *		all necessary structure and device initializations.
6231  *
6232  *   Arguments: devi: the system's dev_info_t for the device.
6233  *
6234  * Return Code: DDI_SUCCESS if attach is successful.
6235  *		DDI_FAILURE if any part of the attach fails.
6236  *
6237  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
6238  *		Kernel thread context only.  Can sleep.
6239  */
6240 
6241 static int
6242 sd_unit_attach(dev_info_t *devi)
6243 {
6244 	struct	scsi_device	*devp;
6245 	struct	sd_lun		*un;
6246 	char			*variantp;
6247 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
6248 	int	instance;
6249 	int	rval;
6250 	int	wc_enabled;
6251 	int	tgt;
6252 	uint64_t	capacity;
6253 	uint_t		lbasize = 0;
6254 	dev_info_t	*pdip = ddi_get_parent(devi);
6255 	int		offbyone = 0;
6256 	int		geom_label_valid = 0;
6257 
6258 	/*
6259 	 * Retrieve the target driver's private data area. This was set
6260 	 * up by the HBA.
6261 	 */
6262 	devp = ddi_get_driver_private(devi);
6263 
6264 	/*
6265 	 * Retrieve the target ID of the device.
6266 	 */
6267 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
6268 	    SCSI_ADDR_PROP_TARGET, -1);
6269 
6270 	/*
6271 	 * Since we have no idea what state things were left in by the last
6272 	 * user of the device, set up some 'default' settings, ie. turn 'em
6273 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
6274 	 * Do this before the scsi_probe, which sends an inquiry.
6275 	 * This is a fix for bug (4430280).
6276 	 * Of special importance is wide-xfer. The drive could have been left
6277 	 * in wide transfer mode by the last driver to communicate with it,
6278 	 * this includes us. If that's the case, and if the following is not
6279 	 * setup properly or we don't re-negotiate with the drive prior to
6280 	 * transferring data to/from the drive, it causes bus parity errors,
6281 	 * data overruns, and unexpected interrupts. This first occurred when
6282 	 * the fix for bug (4378686) was made.
6283 	 */
6284 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
6285 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
6286 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
6287 
6288 	/*
6289 	 * Currently, scsi_ifsetcap sets tagged-qing capability for all LUNs
6290 	 * on a target. Setting it per lun instance actually sets the
6291 	 * capability of this target, which affects those luns already
6292 	 * attached on the same target. So during attach, we can only disable
6293 	 * this capability only when no other lun has been attached on this
6294 	 * target. By doing this, we assume a target has the same tagged-qing
6295 	 * capability for every lun. The condition can be removed when HBA
6296 	 * is changed to support per lun based tagged-qing capability.
6297 	 */
6298 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
6299 		(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
6300 	}
6301 
6302 	/*
6303 	 * Use scsi_probe() to issue an INQUIRY command to the device.
6304 	 * This call will allocate and fill in the scsi_inquiry structure
6305 	 * and point the sd_inq member of the scsi_device structure to it.
6306 	 * If the attach succeeds, then this memory will not be de-allocated
6307 	 * (via scsi_unprobe()) until the instance is detached.
6308 	 */
6309 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
6310 		goto probe_failed;
6311 	}
6312 
6313 	/*
6314 	 * Check the device type as specified in the inquiry data and
6315 	 * claim it if it is of a type that we support.
6316 	 */
6317 	switch (devp->sd_inq->inq_dtype) {
6318 	case DTYPE_DIRECT:
6319 		break;
6320 	case DTYPE_RODIRECT:
6321 		break;
6322 	case DTYPE_OPTICAL:
6323 		break;
6324 	case DTYPE_NOTPRESENT:
6325 	default:
6326 		/* Unsupported device type; fail the attach. */
6327 		goto probe_failed;
6328 	}
6329 
6330 	/*
6331 	 * Allocate the soft state structure for this unit.
6332 	 *
6333 	 * We rely upon this memory being set to all zeroes by
6334 	 * ddi_soft_state_zalloc().  We assume that any member of the
6335 	 * soft state structure that is not explicitly initialized by
6336 	 * this routine will have a value of zero.
6337 	 */
6338 	instance = ddi_get_instance(devp->sd_dev);
6339 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
6340 		goto probe_failed;
6341 	}
6342 
6343 	/*
6344 	 * Retrieve a pointer to the newly-allocated soft state.
6345 	 *
6346 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
6347 	 * was successful, unless something has gone horribly wrong and the
6348 	 * ddi's soft state internals are corrupt (in which case it is
6349 	 * probably better to halt here than just fail the attach....)
6350 	 */
6351 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
6352 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
6353 		    instance);
6354 		/*NOTREACHED*/
6355 	}
6356 
6357 	/*
6358 	 * Link the back ptr of the driver soft state to the scsi_device
6359 	 * struct for this lun.
6360 	 * Save a pointer to the softstate in the driver-private area of
6361 	 * the scsi_device struct.
6362 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
6363 	 * we first set un->un_sd below.
6364 	 */
6365 	un->un_sd = devp;
6366 	devp->sd_private = (opaque_t)un;
6367 
6368 	/*
6369 	 * The following must be after devp is stored in the soft state struct.
6370 	 */
6371 #ifdef SDDEBUG
6372 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6373 	    "%s_unit_attach: un:0x%p instance:%d\n",
6374 	    ddi_driver_name(devi), un, instance);
6375 #endif
6376 
6377 	/*
6378 	 * Set up the device type and node type (for the minor nodes).
6379 	 * By default we assume that the device can at least support the
6380 	 * Common Command Set. Call it a CD-ROM if it reports itself
6381 	 * as a RODIRECT device.
6382 	 */
6383 	switch (devp->sd_inq->inq_dtype) {
6384 	case DTYPE_RODIRECT:
6385 		un->un_node_type = DDI_NT_CD_CHAN;
6386 		un->un_ctype	 = CTYPE_CDROM;
6387 		break;
6388 	case DTYPE_OPTICAL:
6389 		un->un_node_type = DDI_NT_BLOCK_CHAN;
6390 		un->un_ctype	 = CTYPE_ROD;
6391 		break;
6392 	default:
6393 		un->un_node_type = DDI_NT_BLOCK_CHAN;
6394 		un->un_ctype	 = CTYPE_CCS;
6395 		break;
6396 	}
6397 
6398 	/*
6399 	 * Try to read the interconnect type from the HBA.
6400 	 *
6401 	 * Note: This driver is currently compiled as two binaries, a parallel
6402 	 * scsi version (sd) and a fibre channel version (ssd). All functional
6403 	 * differences are determined at compile time. In the future a single
6404 	 * binary will be provided and the inteconnect type will be used to
6405 	 * differentiate between fibre and parallel scsi behaviors. At that time
6406 	 * it will be necessary for all fibre channel HBAs to support this
6407 	 * property.
6408 	 *
6409 	 * set un_f_is_fiber to TRUE ( default fiber )
6410 	 */
6411 	un->un_f_is_fibre = TRUE;
6412 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
6413 	case INTERCONNECT_SSA:
6414 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
6415 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6416 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
6417 		break;
6418 	case INTERCONNECT_PARALLEL:
6419 		un->un_f_is_fibre = FALSE;
6420 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
6421 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6422 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
6423 		break;
6424 	case INTERCONNECT_SATA:
6425 		un->un_f_is_fibre = FALSE;
6426 		un->un_interconnect_type = SD_INTERCONNECT_SATA;
6427 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6428 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
6429 		break;
6430 	case INTERCONNECT_FIBRE:
6431 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
6432 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6433 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
6434 		break;
6435 	case INTERCONNECT_FABRIC:
6436 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
6437 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
6438 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6439 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
6440 		break;
6441 	default:
6442 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
6443 		/*
6444 		 * The HBA does not support the "interconnect-type" property
6445 		 * (or did not provide a recognized type).
6446 		 *
6447 		 * Note: This will be obsoleted when a single fibre channel
6448 		 * and parallel scsi driver is delivered. In the meantime the
6449 		 * interconnect type will be set to the platform default.If that
6450 		 * type is not parallel SCSI, it means that we should be
6451 		 * assuming "ssd" semantics. However, here this also means that
6452 		 * the FC HBA is not supporting the "interconnect-type" property
6453 		 * like we expect it to, so log this occurrence.
6454 		 */
6455 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
6456 		if (!SD_IS_PARALLEL_SCSI(un)) {
6457 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6458 			    "sd_unit_attach: un:0x%p Assuming "
6459 			    "INTERCONNECT_FIBRE\n", un);
6460 		} else {
6461 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6462 			    "sd_unit_attach: un:0x%p Assuming "
6463 			    "INTERCONNECT_PARALLEL\n", un);
6464 			un->un_f_is_fibre = FALSE;
6465 		}
6466 #else
6467 		/*
6468 		 * Note: This source will be implemented when a single fibre
6469 		 * channel and parallel scsi driver is delivered. The default
6470 		 * will be to assume that if a device does not support the
6471 		 * "interconnect-type" property it is a parallel SCSI HBA and
6472 		 * we will set the interconnect type for parallel scsi.
6473 		 */
6474 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
6475 		un->un_f_is_fibre = FALSE;
6476 #endif
6477 		break;
6478 	}
6479 
6480 	if (un->un_f_is_fibre == TRUE) {
6481 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
6482 			SCSI_VERSION_3) {
6483 			switch (un->un_interconnect_type) {
6484 			case SD_INTERCONNECT_FIBRE:
6485 			case SD_INTERCONNECT_SSA:
6486 				un->un_node_type = DDI_NT_BLOCK_WWN;
6487 				break;
6488 			default:
6489 				break;
6490 			}
6491 		}
6492 	}
6493 
6494 	/*
6495 	 * Initialize the Request Sense command for the target
6496 	 */
6497 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
6498 		goto alloc_rqs_failed;
6499 	}
6500 
6501 	/*
6502 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
6503 	 * with seperate binary for sd and ssd.
6504 	 *
6505 	 * x86 has 1 binary, un_retry_count is set base on connection type.
6506 	 * The hardcoded values will go away when Sparc uses 1 binary
6507 	 * for sd and ssd.  This hardcoded values need to match
6508 	 * SD_RETRY_COUNT in sddef.h
6509 	 * The value used is base on interconnect type.
6510 	 * fibre = 3, parallel = 5
6511 	 */
6512 #if defined(__i386) || defined(__amd64)
6513 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
6514 #else
6515 	un->un_retry_count = SD_RETRY_COUNT;
6516 #endif
6517 
6518 	/*
6519 	 * Set the per disk retry count to the default number of retries
6520 	 * for disks and CDROMs. This value can be overridden by the
6521 	 * disk property list or an entry in sd.conf.
6522 	 */
6523 	un->un_notready_retry_count =
6524 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
6525 			: DISK_NOT_READY_RETRY_COUNT(un);
6526 
6527 	/*
6528 	 * Set the busy retry count to the default value of un_retry_count.
6529 	 * This can be overridden by entries in sd.conf or the device
6530 	 * config table.
6531 	 */
6532 	un->un_busy_retry_count = un->un_retry_count;
6533 
6534 	/*
6535 	 * Init the reset threshold for retries.  This number determines
6536 	 * how many retries must be performed before a reset can be issued
6537 	 * (for certain error conditions). This can be overridden by entries
6538 	 * in sd.conf or the device config table.
6539 	 */
6540 	un->un_reset_retry_count = (un->un_retry_count / 2);
6541 
6542 	/*
6543 	 * Set the victim_retry_count to the default un_retry_count
6544 	 */
6545 	un->un_victim_retry_count = (2 * un->un_retry_count);
6546 
6547 	/*
6548 	 * Set the reservation release timeout to the default value of
6549 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
6550 	 * device config table.
6551 	 */
6552 	un->un_reserve_release_time = 5;
6553 
6554 	/*
6555 	 * Set up the default maximum transfer size. Note that this may
6556 	 * get updated later in the attach, when setting up default wide
6557 	 * operations for disks.
6558 	 */
6559 #if defined(__i386) || defined(__amd64)
6560 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
6561 #else
6562 	un->un_max_xfer_size = (uint_t)maxphys;
6563 #endif
6564 
6565 	/*
6566 	 * Get "allow bus device reset" property (defaults to "enabled" if
6567 	 * the property was not defined). This is to disable bus resets for
6568 	 * certain kinds of error recovery. Note: In the future when a run-time
6569 	 * fibre check is available the soft state flag should default to
6570 	 * enabled.
6571 	 */
6572 	if (un->un_f_is_fibre == TRUE) {
6573 		un->un_f_allow_bus_device_reset = TRUE;
6574 	} else {
6575 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
6576 			"allow-bus-device-reset", 1) != 0) {
6577 			un->un_f_allow_bus_device_reset = TRUE;
6578 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6579 			"sd_unit_attach: un:0x%p Bus device reset enabled\n",
6580 				un);
6581 		} else {
6582 			un->un_f_allow_bus_device_reset = FALSE;
6583 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6584 			"sd_unit_attach: un:0x%p Bus device reset disabled\n",
6585 				un);
6586 		}
6587 	}
6588 
6589 	/*
6590 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
6591 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
6592 	 *
6593 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
6594 	 * property. The new "variant" property with a value of "atapi" has been
6595 	 * introduced so that future 'variants' of standard SCSI behavior (like
6596 	 * atapi) could be specified by the underlying HBA drivers by supplying
6597 	 * a new value for the "variant" property, instead of having to define a
6598 	 * new property.
6599 	 */
6600 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
6601 		un->un_f_cfg_is_atapi = TRUE;
6602 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6603 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
6604 	}
6605 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
6606 	    &variantp) == DDI_PROP_SUCCESS) {
6607 		if (strcmp(variantp, "atapi") == 0) {
6608 			un->un_f_cfg_is_atapi = TRUE;
6609 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6610 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
6611 		}
6612 		ddi_prop_free(variantp);
6613 	}
6614 
6615 	un->un_cmd_timeout	= SD_IO_TIME;
6616 
6617 	/* Info on current states, statuses, etc. (Updated frequently) */
6618 	un->un_state		= SD_STATE_NORMAL;
6619 	un->un_last_state	= SD_STATE_NORMAL;
6620 
6621 	/* Control & status info for command throttling */
6622 	un->un_throttle		= sd_max_throttle;
6623 	un->un_saved_throttle	= sd_max_throttle;
6624 	un->un_min_throttle	= sd_min_throttle;
6625 
6626 	if (un->un_f_is_fibre == TRUE) {
6627 		un->un_f_use_adaptive_throttle = TRUE;
6628 	} else {
6629 		un->un_f_use_adaptive_throttle = FALSE;
6630 	}
6631 
6632 	/* Removable media support. */
6633 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
6634 	un->un_mediastate		= DKIO_NONE;
6635 	un->un_specified_mediastate	= DKIO_NONE;
6636 
6637 	/* CVs for suspend/resume (PM or DR) */
6638 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
6639 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
6640 
6641 	/* Power management support. */
6642 	un->un_power_level = SD_SPINDLE_UNINIT;
6643 
6644 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
6645 	un->un_f_wcc_inprog = 0;
6646 
6647 	/*
6648 	 * The open/close semaphore is used to serialize threads executing
6649 	 * in the driver's open & close entry point routines for a given
6650 	 * instance.
6651 	 */
6652 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
6653 
6654 	/*
6655 	 * The conf file entry and softstate variable is a forceful override,
6656 	 * meaning a non-zero value must be entered to change the default.
6657 	 */
6658 	un->un_f_disksort_disabled = FALSE;
6659 
6660 	/*
6661 	 * Retrieve the properties from the static driver table or the driver
6662 	 * configuration file (.conf) for this unit and update the soft state
6663 	 * for the device as needed for the indicated properties.
6664 	 * Note: the property configuration needs to occur here as some of the
6665 	 * following routines may have dependancies on soft state flags set
6666 	 * as part of the driver property configuration.
6667 	 */
6668 	sd_read_unit_properties(un);
6669 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6670 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
6671 
6672 	/*
6673 	 * Only if a device has "hotpluggable" property, it is
6674 	 * treated as hotpluggable device. Otherwise, it is
6675 	 * regarded as non-hotpluggable one.
6676 	 */
6677 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
6678 	    -1) != -1) {
6679 		un->un_f_is_hotpluggable = TRUE;
6680 	}
6681 
6682 	/*
6683 	 * set unit's attributes(flags) according to "hotpluggable" and
6684 	 * RMB bit in INQUIRY data.
6685 	 */
6686 	sd_set_unit_attributes(un, devi);
6687 
6688 	/*
6689 	 * By default, we mark the capacity, lbasize, and geometry
6690 	 * as invalid. Only if we successfully read a valid capacity
6691 	 * will we update the un_blockcount and un_tgt_blocksize with the
6692 	 * valid values (the geometry will be validated later).
6693 	 */
6694 	un->un_f_blockcount_is_valid	= FALSE;
6695 	un->un_f_tgt_blocksize_is_valid	= FALSE;
6696 
6697 	/*
6698 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
6699 	 * otherwise.
6700 	 */
6701 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
6702 	un->un_blockcount = 0;
6703 
6704 	/*
6705 	 * Set up the per-instance info needed to determine the correct
6706 	 * CDBs and other info for issuing commands to the target.
6707 	 */
6708 	sd_init_cdb_limits(un);
6709 
6710 	/*
6711 	 * Set up the IO chains to use, based upon the target type.
6712 	 */
6713 	if (un->un_f_non_devbsize_supported) {
6714 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
6715 	} else {
6716 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
6717 	}
6718 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
6719 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
6720 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
6721 
6722 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
6723 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
6724 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
6725 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
6726 
6727 
6728 	if (ISCD(un)) {
6729 		un->un_additional_codes = sd_additional_codes;
6730 	} else {
6731 		un->un_additional_codes = NULL;
6732 	}
6733 
6734 	/*
6735 	 * Create the kstats here so they can be available for attach-time
6736 	 * routines that send commands to the unit (either polled or via
6737 	 * sd_send_scsi_cmd).
6738 	 *
6739 	 * Note: This is a critical sequence that needs to be maintained:
6740 	 *	1) Instantiate the kstats here, before any routines using the
6741 	 *	   iopath (i.e. sd_send_scsi_cmd).
6742 	 *	2) Instantiate and initialize the partition stats
6743 	 *	   (sd_set_pstats).
6744 	 *	3) Initialize the error stats (sd_set_errstats), following
6745 	 *	   sd_validate_geometry(),sd_register_devid(),
6746 	 *	   and sd_cache_control().
6747 	 */
6748 
6749 	un->un_stats = kstat_create(sd_label, instance,
6750 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
6751 	if (un->un_stats != NULL) {
6752 		un->un_stats->ks_lock = SD_MUTEX(un);
6753 		kstat_install(un->un_stats);
6754 	}
6755 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6756 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
6757 
6758 	sd_create_errstats(un, instance);
6759 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6760 	    "sd_unit_attach: un:0x%p errstats created\n", un);
6761 
6762 	/*
6763 	 * The following if/else code was relocated here from below as part
6764 	 * of the fix for bug (4430280). However with the default setup added
6765 	 * on entry to this routine, it's no longer absolutely necessary for
6766 	 * this to be before the call to sd_spin_up_unit.
6767 	 */
6768 	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
6769 		/*
6770 		 * If SCSI-2 tagged queueing is supported by the target
6771 		 * and by the host adapter then we will enable it.
6772 		 */
6773 		un->un_tagflags = 0;
6774 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
6775 		    (devp->sd_inq->inq_cmdque) &&
6776 		    (un->un_f_arq_enabled == TRUE)) {
6777 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
6778 			    1, 1) == 1) {
6779 				un->un_tagflags = FLAG_STAG;
6780 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6781 				    "sd_unit_attach: un:0x%p tag queueing "
6782 				    "enabled\n", un);
6783 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
6784 			    "untagged-qing", 0) == 1) {
6785 				un->un_f_opt_queueing = TRUE;
6786 				un->un_saved_throttle = un->un_throttle =
6787 				    min(un->un_throttle, 3);
6788 			} else {
6789 				un->un_f_opt_queueing = FALSE;
6790 				un->un_saved_throttle = un->un_throttle = 1;
6791 			}
6792 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
6793 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
6794 			/* The Host Adapter supports internal queueing. */
6795 			un->un_f_opt_queueing = TRUE;
6796 			un->un_saved_throttle = un->un_throttle =
6797 			    min(un->un_throttle, 3);
6798 		} else {
6799 			un->un_f_opt_queueing = FALSE;
6800 			un->un_saved_throttle = un->un_throttle = 1;
6801 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6802 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
6803 		}
6804 
6805 		/*
6806 		 * Enable large transfers for SATA/SAS drives
6807 		 */
6808 		if (SD_IS_SERIAL(un)) {
6809 			un->un_max_xfer_size =
6810 			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
6811 			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
6812 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6813 			    "sd_unit_attach: un:0x%p max transfer "
6814 			    "size=0x%x\n", un, un->un_max_xfer_size);
6815 
6816 		}
6817 
6818 		/* Setup or tear down default wide operations for disks */
6819 
6820 		/*
6821 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
6822 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
6823 		 * system and be set to different values. In the future this
6824 		 * code may need to be updated when the ssd module is
6825 		 * obsoleted and removed from the system. (4299588)
6826 		 */
6827 		if (SD_IS_PARALLEL_SCSI(un) &&
6828 		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
6829 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
6830 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
6831 			    1, 1) == 1) {
6832 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6833 				    "sd_unit_attach: un:0x%p Wide Transfer "
6834 				    "enabled\n", un);
6835 			}
6836 
6837 			/*
6838 			 * If tagged queuing has also been enabled, then
6839 			 * enable large xfers
6840 			 */
6841 			if (un->un_saved_throttle == sd_max_throttle) {
6842 				un->un_max_xfer_size =
6843 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
6844 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
6845 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6846 				    "sd_unit_attach: un:0x%p max transfer "
6847 				    "size=0x%x\n", un, un->un_max_xfer_size);
6848 			}
6849 		} else {
6850 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
6851 			    0, 1) == 1) {
6852 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6853 				    "sd_unit_attach: un:0x%p "
6854 				    "Wide Transfer disabled\n", un);
6855 			}
6856 		}
6857 	} else {
6858 		un->un_tagflags = FLAG_STAG;
6859 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
6860 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
6861 	}
6862 
6863 	/*
6864 	 * If this target supports LUN reset, try to enable it.
6865 	 */
6866 	if (un->un_f_lun_reset_enabled) {
6867 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
6868 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
6869 			    "un:0x%p lun_reset capability set\n", un);
6870 		} else {
6871 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
6872 			    "un:0x%p lun-reset capability not set\n", un);
6873 		}
6874 	}
6875 
6876 	/*
6877 	 * At this point in the attach, we have enough info in the
6878 	 * soft state to be able to issue commands to the target.
6879 	 *
6880 	 * All command paths used below MUST issue their commands as
6881 	 * SD_PATH_DIRECT. This is important as intermediate layers
6882 	 * are not all initialized yet (such as PM).
6883 	 */
6884 
6885 	/*
6886 	 * Send a TEST UNIT READY command to the device. This should clear
6887 	 * any outstanding UNIT ATTENTION that may be present.
6888 	 *
6889 	 * Note: Don't check for success, just track if there is a reservation,
6890 	 * this is a throw away command to clear any unit attentions.
6891 	 *
6892 	 * Note: This MUST be the first command issued to the target during
6893 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
6894 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
6895 	 * with attempts at spinning up a device with no media.
6896 	 */
6897 	if (sd_send_scsi_TEST_UNIT_READY(un, SD_DONT_RETRY_TUR) == EACCES) {
6898 		reservation_flag = SD_TARGET_IS_RESERVED;
6899 	}
6900 
6901 	/*
6902 	 * If the device is NOT a removable media device, attempt to spin
6903 	 * it up (using the START_STOP_UNIT command) and read its capacity
6904 	 * (using the READ CAPACITY command).  Note, however, that either
6905 	 * of these could fail and in some cases we would continue with
6906 	 * the attach despite the failure (see below).
6907 	 */
6908 	if (un->un_f_descr_format_supported) {
6909 		switch (sd_spin_up_unit(un)) {
6910 		case 0:
6911 			/*
6912 			 * Spin-up was successful; now try to read the
6913 			 * capacity.  If successful then save the results
6914 			 * and mark the capacity & lbasize as valid.
6915 			 */
6916 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6917 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
6918 
6919 			switch (sd_send_scsi_READ_CAPACITY(un, &capacity,
6920 			    &lbasize, SD_PATH_DIRECT)) {
6921 			case 0: {
6922 				if (capacity > DK_MAX_BLOCKS) {
6923 #ifdef _LP64
6924 					if (capacity + 1 >
6925 					    SD_GROUP1_MAX_ADDRESS) {
6926 						/*
6927 						 * Enable descriptor format
6928 						 * sense data so that we can
6929 						 * get 64 bit sense data
6930 						 * fields.
6931 						 */
6932 						sd_enable_descr_sense(un);
6933 					}
6934 #else
6935 					/* 32-bit kernels can't handle this */
6936 					scsi_log(SD_DEVINFO(un),
6937 					    sd_label, CE_WARN,
6938 					    "disk has %llu blocks, which "
6939 					    "is too large for a 32-bit "
6940 					    "kernel", capacity);
6941 
6942 #if defined(__i386) || defined(__amd64)
6943 					/*
6944 					 * 1TB disk was treated as (1T - 512)B
6945 					 * in the past, so that it might have
6946 					 * valid VTOC and solaris partitions,
6947 					 * we have to allow it to continue to
6948 					 * work.
6949 					 */
6950 					if (capacity -1 > DK_MAX_BLOCKS)
6951 #endif
6952 					goto spinup_failed;
6953 #endif
6954 				}
6955 
6956 				/*
6957 				 * Here it's not necessary to check the case:
6958 				 * the capacity of the device is bigger than
6959 				 * what the max hba cdb can support. Because
6960 				 * sd_send_scsi_READ_CAPACITY will retrieve
6961 				 * the capacity by sending USCSI command, which
6962 				 * is constrained by the max hba cdb. Actually,
6963 				 * sd_send_scsi_READ_CAPACITY will return
6964 				 * EINVAL when using bigger cdb than required
6965 				 * cdb length. Will handle this case in
6966 				 * "case EINVAL".
6967 				 */
6968 
6969 				/*
6970 				 * The following relies on
6971 				 * sd_send_scsi_READ_CAPACITY never
6972 				 * returning 0 for capacity and/or lbasize.
6973 				 */
6974 				sd_update_block_info(un, lbasize, capacity);
6975 
6976 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6977 				    "sd_unit_attach: un:0x%p capacity = %ld "
6978 				    "blocks; lbasize= %ld.\n", un,
6979 				    un->un_blockcount, un->un_tgt_blocksize);
6980 
6981 				break;
6982 			}
6983 			case EINVAL:
6984 				/*
6985 				 * In the case where the max-cdb-length property
6986 				 * is smaller than the required CDB length for
6987 				 * a SCSI device, a target driver can fail to
6988 				 * attach to that device.
6989 				 */
6990 				scsi_log(SD_DEVINFO(un),
6991 				    sd_label, CE_WARN,
6992 				    "disk capacity is too large "
6993 				    "for current cdb length");
6994 				goto spinup_failed;
6995 			case EACCES:
6996 				/*
6997 				 * Should never get here if the spin-up
6998 				 * succeeded, but code it in anyway.
6999 				 * From here, just continue with the attach...
7000 				 */
7001 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7002 				    "sd_unit_attach: un:0x%p "
7003 				    "sd_send_scsi_READ_CAPACITY "
7004 				    "returned reservation conflict\n", un);
7005 				reservation_flag = SD_TARGET_IS_RESERVED;
7006 				break;
7007 			default:
7008 				/*
7009 				 * Likewise, should never get here if the
7010 				 * spin-up succeeded. Just continue with
7011 				 * the attach...
7012 				 */
7013 				break;
7014 			}
7015 			break;
7016 		case EACCES:
7017 			/*
7018 			 * Device is reserved by another host.  In this case
7019 			 * we could not spin it up or read the capacity, but
7020 			 * we continue with the attach anyway.
7021 			 */
7022 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7023 			    "sd_unit_attach: un:0x%p spin-up reservation "
7024 			    "conflict.\n", un);
7025 			reservation_flag = SD_TARGET_IS_RESERVED;
7026 			break;
7027 		default:
7028 			/* Fail the attach if the spin-up failed. */
7029 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7030 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
7031 			goto spinup_failed;
7032 		}
7033 	}
7034 
7035 	/*
7036 	 * Check to see if this is a MMC drive
7037 	 */
7038 	if (ISCD(un)) {
7039 		sd_set_mmc_caps(un);
7040 	}
7041 
7042 
7043 	/*
7044 	 * Add a zero-length attribute to tell the world we support
7045 	 * kernel ioctls (for layered drivers)
7046 	 */
7047 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7048 	    DDI_KERNEL_IOCTL, NULL, 0);
7049 
7050 	/*
7051 	 * Add a boolean property to tell the world we support
7052 	 * the B_FAILFAST flag (for layered drivers)
7053 	 */
7054 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7055 	    "ddi-failfast-supported", NULL, 0);
7056 
7057 	/*
7058 	 * Initialize power management
7059 	 */
7060 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
7061 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
7062 	sd_setup_pm(un, devi);
7063 	if (un->un_f_pm_is_enabled == FALSE) {
7064 		/*
7065 		 * For performance, point to a jump table that does
7066 		 * not include pm.
7067 		 * The direct and priority chains don't change with PM.
7068 		 *
7069 		 * Note: this is currently done based on individual device
7070 		 * capabilities. When an interface for determining system
7071 		 * power enabled state becomes available, or when additional
7072 		 * layers are added to the command chain, these values will
7073 		 * have to be re-evaluated for correctness.
7074 		 */
7075 		if (un->un_f_non_devbsize_supported) {
7076 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
7077 		} else {
7078 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
7079 		}
7080 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
7081 	}
7082 
7083 	/*
7084 	 * This property is set to 0 by HA software to avoid retries
7085 	 * on a reserved disk. (The preferred property name is
7086 	 * "retry-on-reservation-conflict") (1189689)
7087 	 *
7088 	 * Note: The use of a global here can have unintended consequences. A
7089 	 * per instance variable is preferrable to match the capabilities of
7090 	 * different underlying hba's (4402600)
7091 	 */
7092 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
7093 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
7094 	    sd_retry_on_reservation_conflict);
7095 	if (sd_retry_on_reservation_conflict != 0) {
7096 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
7097 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
7098 		    sd_retry_on_reservation_conflict);
7099 	}
7100 
7101 	/* Set up options for QFULL handling. */
7102 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7103 	    "qfull-retries", -1)) != -1) {
7104 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
7105 		    rval, 1);
7106 	}
7107 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7108 	    "qfull-retry-interval", -1)) != -1) {
7109 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
7110 		    rval, 1);
7111 	}
7112 
7113 	/*
7114 	 * This just prints a message that announces the existence of the
7115 	 * device. The message is always printed in the system logfile, but
7116 	 * only appears on the console if the system is booted with the
7117 	 * -v (verbose) argument.
7118 	 */
7119 	ddi_report_dev(devi);
7120 
7121 	un->un_mediastate = DKIO_NONE;
7122 
7123 	cmlb_alloc_handle(&un->un_cmlbhandle);
7124 
7125 #if defined(__i386) || defined(__amd64)
7126 	/*
7127 	 * On x86, compensate for off-by-1 legacy error
7128 	 */
7129 	if (!un->un_f_has_removable_media && !un->un_f_is_hotpluggable &&
7130 	    (lbasize == un->un_sys_blocksize))
7131 		offbyone = CMLB_OFF_BY_ONE;
7132 #endif
7133 
7134 	if (cmlb_attach(devi, &sd_tgops, (int)devp->sd_inq->inq_dtype,
7135 	    un->un_f_has_removable_media, un->un_f_is_hotpluggable,
7136 	    un->un_node_type, offbyone, un->un_cmlbhandle,
7137 	    (void *)SD_PATH_DIRECT) != 0) {
7138 		goto cmlb_attach_failed;
7139 	}
7140 
7141 
7142 	/*
7143 	 * Read and validate the device's geometry (ie, disk label)
7144 	 * A new unformatted drive will not have a valid geometry, but
7145 	 * the driver needs to successfully attach to this device so
7146 	 * the drive can be formatted via ioctls.
7147 	 */
7148 	geom_label_valid = (cmlb_validate(un->un_cmlbhandle, 0,
7149 	    (void *)SD_PATH_DIRECT) == 0) ? 1: 0;
7150 
7151 	mutex_enter(SD_MUTEX(un));
7152 
7153 	/*
7154 	 * Read and initialize the devid for the unit.
7155 	 */
7156 	ASSERT(un->un_errstats != NULL);
7157 	if (un->un_f_devid_supported) {
7158 		sd_register_devid(un, devi, reservation_flag);
7159 	}
7160 	mutex_exit(SD_MUTEX(un));
7161 
7162 #if (defined(__fibre))
7163 	/*
7164 	 * Register callbacks for fibre only.  You can't do this soley
7165 	 * on the basis of the devid_type because this is hba specific.
7166 	 * We need to query our hba capabilities to find out whether to
7167 	 * register or not.
7168 	 */
7169 	if (un->un_f_is_fibre) {
7170 	    if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
7171 		sd_init_event_callbacks(un);
7172 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7173 		    "sd_unit_attach: un:0x%p event callbacks inserted", un);
7174 	    }
7175 	}
7176 #endif
7177 
7178 	if (un->un_f_opt_disable_cache == TRUE) {
7179 		/*
7180 		 * Disable both read cache and write cache.  This is
7181 		 * the historic behavior of the keywords in the config file.
7182 		 */
7183 		if (sd_cache_control(un, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
7184 		    0) {
7185 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7186 			    "sd_unit_attach: un:0x%p Could not disable "
7187 			    "caching", un);
7188 			goto devid_failed;
7189 		}
7190 	}
7191 
7192 	/*
7193 	 * Check the value of the WCE bit now and
7194 	 * set un_f_write_cache_enabled accordingly.
7195 	 */
7196 	(void) sd_get_write_cache_enabled(un, &wc_enabled);
7197 	mutex_enter(SD_MUTEX(un));
7198 	un->un_f_write_cache_enabled = (wc_enabled != 0);
7199 	mutex_exit(SD_MUTEX(un));
7200 
7201 	/*
7202 	 * Set the pstat and error stat values here, so data obtained during the
7203 	 * previous attach-time routines is available.
7204 	 *
7205 	 * Note: This is a critical sequence that needs to be maintained:
7206 	 *	1) Instantiate the kstats before any routines using the iopath
7207 	 *	   (i.e. sd_send_scsi_cmd).
7208 	 *	2) Initialize the error stats (sd_set_errstats) and partition
7209 	 *	   stats (sd_set_pstats)here, following
7210 	 *	   cmlb_validate_geometry(), sd_register_devid(), and
7211 	 *	   sd_cache_control().
7212 	 */
7213 
7214 	if (un->un_f_pkstats_enabled && geom_label_valid) {
7215 		sd_set_pstats(un);
7216 		SD_TRACE(SD_LOG_IO_PARTITION, un,
7217 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
7218 	}
7219 
7220 	sd_set_errstats(un);
7221 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7222 	    "sd_unit_attach: un:0x%p errstats set\n", un);
7223 
7224 	/*
7225 	 * Find out what type of reservation this disk supports.
7226 	 */
7227 	switch (sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS, 0, NULL)) {
7228 	case 0:
7229 		/*
7230 		 * SCSI-3 reservations are supported.
7231 		 */
7232 		un->un_reservation_type = SD_SCSI3_RESERVATION;
7233 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7234 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
7235 		break;
7236 	case ENOTSUP:
7237 		/*
7238 		 * The PERSISTENT RESERVE IN command would not be recognized by
7239 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
7240 		 */
7241 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7242 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
7243 		un->un_reservation_type = SD_SCSI2_RESERVATION;
7244 		break;
7245 	default:
7246 		/*
7247 		 * default to SCSI-3 reservations
7248 		 */
7249 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7250 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
7251 		un->un_reservation_type = SD_SCSI3_RESERVATION;
7252 		break;
7253 	}
7254 
7255 	/*
7256 	 * After successfully attaching an instance, we record the information
7257 	 * of how many luns have been attached on the relative target and
7258 	 * controller for parallel SCSI. This information is used when sd tries
7259 	 * to set the tagged queuing capability in HBA.
7260 	 */
7261 	if (SD_IS_PARALLEL_SCSI(un) && (tgt >= 0) && (tgt < NTARGETS_WIDE)) {
7262 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_ATTACH);
7263 	}
7264 
7265 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7266 	    "sd_unit_attach: un:0x%p exit success\n", un);
7267 
7268 	return (DDI_SUCCESS);
7269 
7270 	/*
7271 	 * An error occurred during the attach; clean up & return failure.
7272 	 */
7273 
7274 devid_failed:
7275 
7276 setup_pm_failed:
7277 	ddi_remove_minor_node(devi, NULL);
7278 
7279 cmlb_attach_failed:
7280 	/*
7281 	 * Cleanup from the scsi_ifsetcap() calls (437868)
7282 	 */
7283 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
7284 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
7285 
7286 	/*
7287 	 * Refer to the comments of setting tagged-qing in the beginning of
7288 	 * sd_unit_attach. We can only disable tagged queuing when there is
7289 	 * no lun attached on the target.
7290 	 */
7291 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
7292 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
7293 	}
7294 
7295 	if (un->un_f_is_fibre == FALSE) {
7296 	    (void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
7297 	}
7298 
7299 spinup_failed:
7300 
7301 	mutex_enter(SD_MUTEX(un));
7302 
7303 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
7304 	if (un->un_direct_priority_timeid != NULL) {
7305 		timeout_id_t temp_id = un->un_direct_priority_timeid;
7306 		un->un_direct_priority_timeid = NULL;
7307 		mutex_exit(SD_MUTEX(un));
7308 		(void) untimeout(temp_id);
7309 		mutex_enter(SD_MUTEX(un));
7310 	}
7311 
7312 	/* Cancel any pending start/stop timeouts */
7313 	if (un->un_startstop_timeid != NULL) {
7314 		timeout_id_t temp_id = un->un_startstop_timeid;
7315 		un->un_startstop_timeid = NULL;
7316 		mutex_exit(SD_MUTEX(un));
7317 		(void) untimeout(temp_id);
7318 		mutex_enter(SD_MUTEX(un));
7319 	}
7320 
7321 	/* Cancel any pending reset-throttle timeouts */
7322 	if (un->un_reset_throttle_timeid != NULL) {
7323 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
7324 		un->un_reset_throttle_timeid = NULL;
7325 		mutex_exit(SD_MUTEX(un));
7326 		(void) untimeout(temp_id);
7327 		mutex_enter(SD_MUTEX(un));
7328 	}
7329 
7330 	/* Cancel any pending retry timeouts */
7331 	if (un->un_retry_timeid != NULL) {
7332 		timeout_id_t temp_id = un->un_retry_timeid;
7333 		un->un_retry_timeid = NULL;
7334 		mutex_exit(SD_MUTEX(un));
7335 		(void) untimeout(temp_id);
7336 		mutex_enter(SD_MUTEX(un));
7337 	}
7338 
7339 	/* Cancel any pending delayed cv broadcast timeouts */
7340 	if (un->un_dcvb_timeid != NULL) {
7341 		timeout_id_t temp_id = un->un_dcvb_timeid;
7342 		un->un_dcvb_timeid = NULL;
7343 		mutex_exit(SD_MUTEX(un));
7344 		(void) untimeout(temp_id);
7345 		mutex_enter(SD_MUTEX(un));
7346 	}
7347 
7348 	mutex_exit(SD_MUTEX(un));
7349 
7350 	/* There should not be any in-progress I/O so ASSERT this check */
7351 	ASSERT(un->un_ncmds_in_transport == 0);
7352 	ASSERT(un->un_ncmds_in_driver == 0);
7353 
7354 	/* Do not free the softstate if the callback routine is active */
7355 	sd_sync_with_callback(un);
7356 
7357 	/*
7358 	 * Partition stats apparently are not used with removables. These would
7359 	 * not have been created during attach, so no need to clean them up...
7360 	 */
7361 	if (un->un_stats != NULL) {
7362 		kstat_delete(un->un_stats);
7363 		un->un_stats = NULL;
7364 	}
7365 	if (un->un_errstats != NULL) {
7366 		kstat_delete(un->un_errstats);
7367 		un->un_errstats = NULL;
7368 	}
7369 
7370 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
7371 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
7372 
7373 	ddi_prop_remove_all(devi);
7374 	sema_destroy(&un->un_semoclose);
7375 	cv_destroy(&un->un_state_cv);
7376 
7377 getrbuf_failed:
7378 
7379 	sd_free_rqs(un);
7380 
7381 alloc_rqs_failed:
7382 
7383 	devp->sd_private = NULL;
7384 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
7385 
7386 get_softstate_failed:
7387 	/*
7388 	 * Note: the man pages are unclear as to whether or not doing a
7389 	 * ddi_soft_state_free(sd_state, instance) is the right way to
7390 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
7391 	 * ddi_get_soft_state() fails.  The implication seems to be
7392 	 * that the get_soft_state cannot fail if the zalloc succeeds.
7393 	 */
7394 	ddi_soft_state_free(sd_state, instance);
7395 
7396 probe_failed:
7397 	scsi_unprobe(devp);
7398 #ifdef SDDEBUG
7399 	if ((sd_component_mask & SD_LOG_ATTACH_DETACH) &&
7400 	    (sd_level_mask & SD_LOGMASK_TRACE)) {
7401 		cmn_err(CE_CONT, "sd_unit_attach: un:0x%p exit failure\n",
7402 		    (void *)un);
7403 	}
7404 #endif
7405 	return (DDI_FAILURE);
7406 }
7407 
7408 
7409 /*
7410  *    Function: sd_unit_detach
7411  *
7412  * Description: Performs DDI_DETACH processing for sddetach().
7413  *
7414  * Return Code: DDI_SUCCESS
7415  *		DDI_FAILURE
7416  *
7417  *     Context: Kernel thread context
7418  */
7419 
7420 static int
7421 sd_unit_detach(dev_info_t *devi)
7422 {
7423 	struct scsi_device	*devp;
7424 	struct sd_lun		*un;
7425 	int			i;
7426 	int			tgt;
7427 	dev_t			dev;
7428 	dev_info_t		*pdip = ddi_get_parent(devi);
7429 	int			instance = ddi_get_instance(devi);
7430 
7431 	mutex_enter(&sd_detach_mutex);
7432 
7433 	/*
7434 	 * Fail the detach for any of the following:
7435 	 *  - Unable to get the sd_lun struct for the instance
7436 	 *  - A layered driver has an outstanding open on the instance
7437 	 *  - Another thread is already detaching this instance
7438 	 *  - Another thread is currently performing an open
7439 	 */
7440 	devp = ddi_get_driver_private(devi);
7441 	if ((devp == NULL) ||
7442 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
7443 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
7444 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
7445 		mutex_exit(&sd_detach_mutex);
7446 		return (DDI_FAILURE);
7447 	}
7448 
7449 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
7450 
7451 	/*
7452 	 * Mark this instance as currently in a detach, to inhibit any
7453 	 * opens from a layered driver.
7454 	 */
7455 	un->un_detach_count++;
7456 	mutex_exit(&sd_detach_mutex);
7457 
7458 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7459 	    SCSI_ADDR_PROP_TARGET, -1);
7460 
7461 	dev = sd_make_device(SD_DEVINFO(un));
7462 
7463 #ifndef lint
7464 	_NOTE(COMPETING_THREADS_NOW);
7465 #endif
7466 
7467 	mutex_enter(SD_MUTEX(un));
7468 
7469 	/*
7470 	 * Fail the detach if there are any outstanding layered
7471 	 * opens on this device.
7472 	 */
7473 	for (i = 0; i < NDKMAP; i++) {
7474 		if (un->un_ocmap.lyropen[i] != 0) {
7475 			goto err_notclosed;
7476 		}
7477 	}
7478 
7479 	/*
7480 	 * Verify there are NO outstanding commands issued to this device.
7481 	 * ie, un_ncmds_in_transport == 0.
7482 	 * It's possible to have outstanding commands through the physio
7483 	 * code path, even though everything's closed.
7484 	 */
7485 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
7486 	    (un->un_direct_priority_timeid != NULL) ||
7487 	    (un->un_state == SD_STATE_RWAIT)) {
7488 		mutex_exit(SD_MUTEX(un));
7489 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7490 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
7491 		goto err_stillbusy;
7492 	}
7493 
7494 	/*
7495 	 * If we have the device reserved, release the reservation.
7496 	 */
7497 	if ((un->un_resvd_status & SD_RESERVE) &&
7498 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
7499 		mutex_exit(SD_MUTEX(un));
7500 		/*
7501 		 * Note: sd_reserve_release sends a command to the device
7502 		 * via the sd_ioctlcmd() path, and can sleep.
7503 		 */
7504 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
7505 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7506 			    "sd_dr_detach: Cannot release reservation \n");
7507 		}
7508 	} else {
7509 		mutex_exit(SD_MUTEX(un));
7510 	}
7511 
7512 	/*
7513 	 * Untimeout any reserve recover, throttle reset, restart unit
7514 	 * and delayed broadcast timeout threads. Protect the timeout pointer
7515 	 * from getting nulled by their callback functions.
7516 	 */
7517 	mutex_enter(SD_MUTEX(un));
7518 	if (un->un_resvd_timeid != NULL) {
7519 		timeout_id_t temp_id = un->un_resvd_timeid;
7520 		un->un_resvd_timeid = NULL;
7521 		mutex_exit(SD_MUTEX(un));
7522 		(void) untimeout(temp_id);
7523 		mutex_enter(SD_MUTEX(un));
7524 	}
7525 
7526 	if (un->un_reset_throttle_timeid != NULL) {
7527 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
7528 		un->un_reset_throttle_timeid = NULL;
7529 		mutex_exit(SD_MUTEX(un));
7530 		(void) untimeout(temp_id);
7531 		mutex_enter(SD_MUTEX(un));
7532 	}
7533 
7534 	if (un->un_startstop_timeid != NULL) {
7535 		timeout_id_t temp_id = un->un_startstop_timeid;
7536 		un->un_startstop_timeid = NULL;
7537 		mutex_exit(SD_MUTEX(un));
7538 		(void) untimeout(temp_id);
7539 		mutex_enter(SD_MUTEX(un));
7540 	}
7541 
7542 	if (un->un_dcvb_timeid != NULL) {
7543 		timeout_id_t temp_id = un->un_dcvb_timeid;
7544 		un->un_dcvb_timeid = NULL;
7545 		mutex_exit(SD_MUTEX(un));
7546 		(void) untimeout(temp_id);
7547 	} else {
7548 		mutex_exit(SD_MUTEX(un));
7549 	}
7550 
7551 	/* Remove any pending reservation reclaim requests for this device */
7552 	sd_rmv_resv_reclaim_req(dev);
7553 
7554 	mutex_enter(SD_MUTEX(un));
7555 
7556 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
7557 	if (un->un_direct_priority_timeid != NULL) {
7558 		timeout_id_t temp_id = un->un_direct_priority_timeid;
7559 		un->un_direct_priority_timeid = NULL;
7560 		mutex_exit(SD_MUTEX(un));
7561 		(void) untimeout(temp_id);
7562 		mutex_enter(SD_MUTEX(un));
7563 	}
7564 
7565 	/* Cancel any active multi-host disk watch thread requests */
7566 	if (un->un_mhd_token != NULL) {
7567 		mutex_exit(SD_MUTEX(un));
7568 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
7569 		if (scsi_watch_request_terminate(un->un_mhd_token,
7570 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
7571 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7572 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
7573 			/*
7574 			 * Note: We are returning here after having removed
7575 			 * some driver timeouts above. This is consistent with
7576 			 * the legacy implementation but perhaps the watch
7577 			 * terminate call should be made with the wait flag set.
7578 			 */
7579 			goto err_stillbusy;
7580 		}
7581 		mutex_enter(SD_MUTEX(un));
7582 		un->un_mhd_token = NULL;
7583 	}
7584 
7585 	if (un->un_swr_token != NULL) {
7586 		mutex_exit(SD_MUTEX(un));
7587 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
7588 		if (scsi_watch_request_terminate(un->un_swr_token,
7589 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
7590 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7591 			    "sd_dr_detach: Cannot cancel swr watch request\n");
7592 			/*
7593 			 * Note: We are returning here after having removed
7594 			 * some driver timeouts above. This is consistent with
7595 			 * the legacy implementation but perhaps the watch
7596 			 * terminate call should be made with the wait flag set.
7597 			 */
7598 			goto err_stillbusy;
7599 		}
7600 		mutex_enter(SD_MUTEX(un));
7601 		un->un_swr_token = NULL;
7602 	}
7603 
7604 	mutex_exit(SD_MUTEX(un));
7605 
7606 	/*
7607 	 * Clear any scsi_reset_notifies. We clear the reset notifies
7608 	 * if we have not registered one.
7609 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
7610 	 */
7611 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
7612 	    sd_mhd_reset_notify_cb, (caddr_t)un);
7613 
7614 	/*
7615 	 * protect the timeout pointers from getting nulled by
7616 	 * their callback functions during the cancellation process.
7617 	 * In such a scenario untimeout can be invoked with a null value.
7618 	 */
7619 	_NOTE(NO_COMPETING_THREADS_NOW);
7620 
7621 	mutex_enter(&un->un_pm_mutex);
7622 	if (un->un_pm_idle_timeid != NULL) {
7623 		timeout_id_t temp_id = un->un_pm_idle_timeid;
7624 		un->un_pm_idle_timeid = NULL;
7625 		mutex_exit(&un->un_pm_mutex);
7626 
7627 		/*
7628 		 * Timeout is active; cancel it.
7629 		 * Note that it'll never be active on a device
7630 		 * that does not support PM therefore we don't
7631 		 * have to check before calling pm_idle_component.
7632 		 */
7633 		(void) untimeout(temp_id);
7634 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7635 		mutex_enter(&un->un_pm_mutex);
7636 	}
7637 
7638 	/*
7639 	 * Check whether there is already a timeout scheduled for power
7640 	 * management. If yes then don't lower the power here, that's.
7641 	 * the timeout handler's job.
7642 	 */
7643 	if (un->un_pm_timeid != NULL) {
7644 		timeout_id_t temp_id = un->un_pm_timeid;
7645 		un->un_pm_timeid = NULL;
7646 		mutex_exit(&un->un_pm_mutex);
7647 		/*
7648 		 * Timeout is active; cancel it.
7649 		 * Note that it'll never be active on a device
7650 		 * that does not support PM therefore we don't
7651 		 * have to check before calling pm_idle_component.
7652 		 */
7653 		(void) untimeout(temp_id);
7654 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7655 
7656 	} else {
7657 		mutex_exit(&un->un_pm_mutex);
7658 		if ((un->un_f_pm_is_enabled == TRUE) &&
7659 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_SPINDLE_OFF) !=
7660 		    DDI_SUCCESS)) {
7661 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7662 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
7663 			/*
7664 			 * Fix for bug: 4297749, item # 13
7665 			 * The above test now includes a check to see if PM is
7666 			 * supported by this device before call
7667 			 * pm_lower_power().
7668 			 * Note, the following is not dead code. The call to
7669 			 * pm_lower_power above will generate a call back into
7670 			 * our sdpower routine which might result in a timeout
7671 			 * handler getting activated. Therefore the following
7672 			 * code is valid and necessary.
7673 			 */
7674 			mutex_enter(&un->un_pm_mutex);
7675 			if (un->un_pm_timeid != NULL) {
7676 				timeout_id_t temp_id = un->un_pm_timeid;
7677 				un->un_pm_timeid = NULL;
7678 				mutex_exit(&un->un_pm_mutex);
7679 				(void) untimeout(temp_id);
7680 				(void) pm_idle_component(SD_DEVINFO(un), 0);
7681 			} else {
7682 				mutex_exit(&un->un_pm_mutex);
7683 			}
7684 		}
7685 	}
7686 
7687 	/*
7688 	 * Cleanup from the scsi_ifsetcap() calls (437868)
7689 	 * Relocated here from above to be after the call to
7690 	 * pm_lower_power, which was getting errors.
7691 	 */
7692 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
7693 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
7694 
7695 	/*
7696 	 * Currently, tagged queuing is supported per target based by HBA.
7697 	 * Setting this per lun instance actually sets the capability of this
7698 	 * target in HBA, which affects those luns already attached on the
7699 	 * same target. So during detach, we can only disable this capability
7700 	 * only when this is the only lun left on this target. By doing
7701 	 * this, we assume a target has the same tagged queuing capability
7702 	 * for every lun. The condition can be removed when HBA is changed to
7703 	 * support per lun based tagged queuing capability.
7704 	 */
7705 	if (sd_scsi_get_target_lun_count(pdip, tgt) <= 1) {
7706 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
7707 	}
7708 
7709 	if (un->un_f_is_fibre == FALSE) {
7710 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
7711 	}
7712 
7713 	/*
7714 	 * Remove any event callbacks, fibre only
7715 	 */
7716 	if (un->un_f_is_fibre == TRUE) {
7717 		if ((un->un_insert_event != NULL) &&
7718 			(ddi_remove_event_handler(un->un_insert_cb_id) !=
7719 				DDI_SUCCESS)) {
7720 			/*
7721 			 * Note: We are returning here after having done
7722 			 * substantial cleanup above. This is consistent
7723 			 * with the legacy implementation but this may not
7724 			 * be the right thing to do.
7725 			 */
7726 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7727 				"sd_dr_detach: Cannot cancel insert event\n");
7728 			goto err_remove_event;
7729 		}
7730 		un->un_insert_event = NULL;
7731 
7732 		if ((un->un_remove_event != NULL) &&
7733 			(ddi_remove_event_handler(un->un_remove_cb_id) !=
7734 				DDI_SUCCESS)) {
7735 			/*
7736 			 * Note: We are returning here after having done
7737 			 * substantial cleanup above. This is consistent
7738 			 * with the legacy implementation but this may not
7739 			 * be the right thing to do.
7740 			 */
7741 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7742 				"sd_dr_detach: Cannot cancel remove event\n");
7743 			goto err_remove_event;
7744 		}
7745 		un->un_remove_event = NULL;
7746 	}
7747 
7748 	/* Do not free the softstate if the callback routine is active */
7749 	sd_sync_with_callback(un);
7750 
7751 	cmlb_detach(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
7752 	cmlb_free_handle(&un->un_cmlbhandle);
7753 
7754 	/*
7755 	 * Hold the detach mutex here, to make sure that no other threads ever
7756 	 * can access a (partially) freed soft state structure.
7757 	 */
7758 	mutex_enter(&sd_detach_mutex);
7759 
7760 	/*
7761 	 * Clean up the soft state struct.
7762 	 * Cleanup is done in reverse order of allocs/inits.
7763 	 * At this point there should be no competing threads anymore.
7764 	 */
7765 
7766 	/* Unregister and free device id. */
7767 	ddi_devid_unregister(devi);
7768 	if (un->un_devid) {
7769 		ddi_devid_free(un->un_devid);
7770 		un->un_devid = NULL;
7771 	}
7772 
7773 	/*
7774 	 * Destroy wmap cache if it exists.
7775 	 */
7776 	if (un->un_wm_cache != NULL) {
7777 		kmem_cache_destroy(un->un_wm_cache);
7778 		un->un_wm_cache = NULL;
7779 	}
7780 
7781 	/*
7782 	 * kstat cleanup is done in detach for all device types (4363169).
7783 	 * We do not want to fail detach if the device kstats are not deleted
7784 	 * since there is a confusion about the devo_refcnt for the device.
7785 	 * We just delete the kstats and let detach complete successfully.
7786 	 */
7787 	if (un->un_stats != NULL) {
7788 		kstat_delete(un->un_stats);
7789 		un->un_stats = NULL;
7790 	}
7791 	if (un->un_errstats != NULL) {
7792 		kstat_delete(un->un_errstats);
7793 		un->un_errstats = NULL;
7794 	}
7795 
7796 	/* Remove partition stats */
7797 	if (un->un_f_pkstats_enabled) {
7798 		for (i = 0; i < NSDMAP; i++) {
7799 			if (un->un_pstats[i] != NULL) {
7800 				kstat_delete(un->un_pstats[i]);
7801 				un->un_pstats[i] = NULL;
7802 			}
7803 		}
7804 	}
7805 
7806 	/* Remove xbuf registration */
7807 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
7808 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
7809 
7810 	/* Remove driver properties */
7811 	ddi_prop_remove_all(devi);
7812 
7813 	mutex_destroy(&un->un_pm_mutex);
7814 	cv_destroy(&un->un_pm_busy_cv);
7815 
7816 	cv_destroy(&un->un_wcc_cv);
7817 
7818 	/* Open/close semaphore */
7819 	sema_destroy(&un->un_semoclose);
7820 
7821 	/* Removable media condvar. */
7822 	cv_destroy(&un->un_state_cv);
7823 
7824 	/* Suspend/resume condvar. */
7825 	cv_destroy(&un->un_suspend_cv);
7826 	cv_destroy(&un->un_disk_busy_cv);
7827 
7828 	sd_free_rqs(un);
7829 
7830 	/* Free up soft state */
7831 	devp->sd_private = NULL;
7832 
7833 	bzero(un, sizeof (struct sd_lun));
7834 	ddi_soft_state_free(sd_state, instance);
7835 
7836 	mutex_exit(&sd_detach_mutex);
7837 
7838 	/* This frees up the INQUIRY data associated with the device. */
7839 	scsi_unprobe(devp);
7840 
7841 	/*
7842 	 * After successfully detaching an instance, we update the information
7843 	 * of how many luns have been attached in the relative target and
7844 	 * controller for parallel SCSI. This information is used when sd tries
7845 	 * to set the tagged queuing capability in HBA.
7846 	 * Since un has been released, we can't use SD_IS_PARALLEL_SCSI(un) to
7847 	 * check if the device is parallel SCSI. However, we don't need to
7848 	 * check here because we've already checked during attach. No device
7849 	 * that is not parallel SCSI is in the chain.
7850 	 */
7851 	if ((tgt >= 0) && (tgt < NTARGETS_WIDE)) {
7852 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_DETACH);
7853 	}
7854 
7855 	return (DDI_SUCCESS);
7856 
7857 err_notclosed:
7858 	mutex_exit(SD_MUTEX(un));
7859 
7860 err_stillbusy:
7861 	_NOTE(NO_COMPETING_THREADS_NOW);
7862 
7863 err_remove_event:
7864 	mutex_enter(&sd_detach_mutex);
7865 	un->un_detach_count--;
7866 	mutex_exit(&sd_detach_mutex);
7867 
7868 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
7869 	return (DDI_FAILURE);
7870 }
7871 
7872 
7873 /*
7874  *    Function: sd_create_errstats
7875  *
7876  * Description: This routine instantiates the device error stats.
7877  *
7878  *		Note: During attach the stats are instantiated first so they are
7879  *		available for attach-time routines that utilize the driver
7880  *		iopath to send commands to the device. The stats are initialized
7881  *		separately so data obtained during some attach-time routines is
7882  *		available. (4362483)
7883  *
7884  *   Arguments: un - driver soft state (unit) structure
7885  *		instance - driver instance
7886  *
7887  *     Context: Kernel thread context
7888  */
7889 
7890 static void
7891 sd_create_errstats(struct sd_lun *un, int instance)
7892 {
7893 	struct	sd_errstats	*stp;
7894 	char	kstatmodule_err[KSTAT_STRLEN];
7895 	char	kstatname[KSTAT_STRLEN];
7896 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
7897 
7898 	ASSERT(un != NULL);
7899 
7900 	if (un->un_errstats != NULL) {
7901 		return;
7902 	}
7903 
7904 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
7905 	    "%serr", sd_label);
7906 	(void) snprintf(kstatname, sizeof (kstatname),
7907 	    "%s%d,err", sd_label, instance);
7908 
7909 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
7910 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
7911 
7912 	if (un->un_errstats == NULL) {
7913 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7914 		    "sd_create_errstats: Failed kstat_create\n");
7915 		return;
7916 	}
7917 
7918 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
7919 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
7920 	    KSTAT_DATA_UINT32);
7921 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
7922 	    KSTAT_DATA_UINT32);
7923 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
7924 	    KSTAT_DATA_UINT32);
7925 	kstat_named_init(&stp->sd_vid,		"Vendor",
7926 	    KSTAT_DATA_CHAR);
7927 	kstat_named_init(&stp->sd_pid,		"Product",
7928 	    KSTAT_DATA_CHAR);
7929 	kstat_named_init(&stp->sd_revision,	"Revision",
7930 	    KSTAT_DATA_CHAR);
7931 	kstat_named_init(&stp->sd_serial,	"Serial No",
7932 	    KSTAT_DATA_CHAR);
7933 	kstat_named_init(&stp->sd_capacity,	"Size",
7934 	    KSTAT_DATA_ULONGLONG);
7935 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
7936 	    KSTAT_DATA_UINT32);
7937 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
7938 	    KSTAT_DATA_UINT32);
7939 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
7940 	    KSTAT_DATA_UINT32);
7941 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
7942 	    KSTAT_DATA_UINT32);
7943 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
7944 	    KSTAT_DATA_UINT32);
7945 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
7946 	    KSTAT_DATA_UINT32);
7947 
7948 	un->un_errstats->ks_private = un;
7949 	un->un_errstats->ks_update  = nulldev;
7950 
7951 	kstat_install(un->un_errstats);
7952 }
7953 
7954 
7955 /*
7956  *    Function: sd_set_errstats
7957  *
7958  * Description: This routine sets the value of the vendor id, product id,
7959  *		revision, serial number, and capacity device error stats.
7960  *
7961  *		Note: During attach the stats are instantiated first so they are
7962  *		available for attach-time routines that utilize the driver
7963  *		iopath to send commands to the device. The stats are initialized
7964  *		separately so data obtained during some attach-time routines is
7965  *		available. (4362483)
7966  *
7967  *   Arguments: un - driver soft state (unit) structure
7968  *
7969  *     Context: Kernel thread context
7970  */
7971 
7972 static void
7973 sd_set_errstats(struct sd_lun *un)
7974 {
7975 	struct	sd_errstats	*stp;
7976 
7977 	ASSERT(un != NULL);
7978 	ASSERT(un->un_errstats != NULL);
7979 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
7980 	ASSERT(stp != NULL);
7981 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
7982 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
7983 	(void) strncpy(stp->sd_revision.value.c,
7984 	    un->un_sd->sd_inq->inq_revision, 4);
7985 
7986 	/*
7987 	 * All the errstats are persistent across detach/attach,
7988 	 * so reset all the errstats here in case of the hot
7989 	 * replacement of disk drives, except for not changed
7990 	 * Sun qualified drives.
7991 	 */
7992 	if ((bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) != 0) ||
7993 	    (bcmp(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
7994 	    sizeof (SD_INQUIRY(un)->inq_serial)) != 0)) {
7995 		stp->sd_softerrs.value.ui32 = 0;
7996 		stp->sd_harderrs.value.ui32 = 0;
7997 		stp->sd_transerrs.value.ui32 = 0;
7998 		stp->sd_rq_media_err.value.ui32 = 0;
7999 		stp->sd_rq_ntrdy_err.value.ui32 = 0;
8000 		stp->sd_rq_nodev_err.value.ui32 = 0;
8001 		stp->sd_rq_recov_err.value.ui32 = 0;
8002 		stp->sd_rq_illrq_err.value.ui32 = 0;
8003 		stp->sd_rq_pfa_err.value.ui32 = 0;
8004 	}
8005 
8006 	/*
8007 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
8008 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
8009 	 * (4376302))
8010 	 */
8011 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
8012 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
8013 		    sizeof (SD_INQUIRY(un)->inq_serial));
8014 	}
8015 
8016 	if (un->un_f_blockcount_is_valid != TRUE) {
8017 		/*
8018 		 * Set capacity error stat to 0 for no media. This ensures
8019 		 * a valid capacity is displayed in response to 'iostat -E'
8020 		 * when no media is present in the device.
8021 		 */
8022 		stp->sd_capacity.value.ui64 = 0;
8023 	} else {
8024 		/*
8025 		 * Multiply un_blockcount by un->un_sys_blocksize to get
8026 		 * capacity.
8027 		 *
8028 		 * Note: for non-512 blocksize devices "un_blockcount" has been
8029 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
8030 		 * (un_tgt_blocksize / un->un_sys_blocksize).
8031 		 */
8032 		stp->sd_capacity.value.ui64 = (uint64_t)
8033 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
8034 	}
8035 }
8036 
8037 
8038 /*
8039  *    Function: sd_set_pstats
8040  *
8041  * Description: This routine instantiates and initializes the partition
8042  *              stats for each partition with more than zero blocks.
8043  *		(4363169)
8044  *
8045  *   Arguments: un - driver soft state (unit) structure
8046  *
8047  *     Context: Kernel thread context
8048  */
8049 
8050 static void
8051 sd_set_pstats(struct sd_lun *un)
8052 {
8053 	char	kstatname[KSTAT_STRLEN];
8054 	int	instance;
8055 	int	i;
8056 	diskaddr_t	nblks = 0;
8057 	char	*partname = NULL;
8058 
8059 	ASSERT(un != NULL);
8060 
8061 	instance = ddi_get_instance(SD_DEVINFO(un));
8062 
8063 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
8064 	for (i = 0; i < NSDMAP; i++) {
8065 
8066 		if (cmlb_partinfo(un->un_cmlbhandle, i,
8067 		    &nblks, NULL, &partname, NULL, (void *)SD_PATH_DIRECT) != 0)
8068 			continue;
8069 		mutex_enter(SD_MUTEX(un));
8070 
8071 		if ((un->un_pstats[i] == NULL) &&
8072 		    (nblks != 0)) {
8073 
8074 			(void) snprintf(kstatname, sizeof (kstatname),
8075 			    "%s%d,%s", sd_label, instance,
8076 			    partname);
8077 
8078 			un->un_pstats[i] = kstat_create(sd_label,
8079 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
8080 			    1, KSTAT_FLAG_PERSISTENT);
8081 			if (un->un_pstats[i] != NULL) {
8082 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
8083 				kstat_install(un->un_pstats[i]);
8084 			}
8085 		}
8086 		mutex_exit(SD_MUTEX(un));
8087 	}
8088 }
8089 
8090 
8091 #if (defined(__fibre))
8092 /*
8093  *    Function: sd_init_event_callbacks
8094  *
8095  * Description: This routine initializes the insertion and removal event
8096  *		callbacks. (fibre only)
8097  *
8098  *   Arguments: un - driver soft state (unit) structure
8099  *
8100  *     Context: Kernel thread context
8101  */
8102 
8103 static void
8104 sd_init_event_callbacks(struct sd_lun *un)
8105 {
8106 	ASSERT(un != NULL);
8107 
8108 	if ((un->un_insert_event == NULL) &&
8109 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
8110 	    &un->un_insert_event) == DDI_SUCCESS)) {
8111 		/*
8112 		 * Add the callback for an insertion event
8113 		 */
8114 		(void) ddi_add_event_handler(SD_DEVINFO(un),
8115 		    un->un_insert_event, sd_event_callback, (void *)un,
8116 		    &(un->un_insert_cb_id));
8117 	}
8118 
8119 	if ((un->un_remove_event == NULL) &&
8120 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
8121 	    &un->un_remove_event) == DDI_SUCCESS)) {
8122 		/*
8123 		 * Add the callback for a removal event
8124 		 */
8125 		(void) ddi_add_event_handler(SD_DEVINFO(un),
8126 		    un->un_remove_event, sd_event_callback, (void *)un,
8127 		    &(un->un_remove_cb_id));
8128 	}
8129 }
8130 
8131 
8132 /*
8133  *    Function: sd_event_callback
8134  *
8135  * Description: This routine handles insert/remove events (photon). The
8136  *		state is changed to OFFLINE which can be used to supress
8137  *		error msgs. (fibre only)
8138  *
8139  *   Arguments: un - driver soft state (unit) structure
8140  *
8141  *     Context: Callout thread context
8142  */
8143 /* ARGSUSED */
8144 static void
8145 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
8146     void *bus_impldata)
8147 {
8148 	struct sd_lun *un = (struct sd_lun *)arg;
8149 
8150 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
8151 	if (event == un->un_insert_event) {
8152 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
8153 		mutex_enter(SD_MUTEX(un));
8154 		if (un->un_state == SD_STATE_OFFLINE) {
8155 			if (un->un_last_state != SD_STATE_SUSPENDED) {
8156 				un->un_state = un->un_last_state;
8157 			} else {
8158 				/*
8159 				 * We have gone through SUSPEND/RESUME while
8160 				 * we were offline. Restore the last state
8161 				 */
8162 				un->un_state = un->un_save_state;
8163 			}
8164 		}
8165 		mutex_exit(SD_MUTEX(un));
8166 
8167 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
8168 	} else if (event == un->un_remove_event) {
8169 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
8170 		mutex_enter(SD_MUTEX(un));
8171 		/*
8172 		 * We need to handle an event callback that occurs during
8173 		 * the suspend operation, since we don't prevent it.
8174 		 */
8175 		if (un->un_state != SD_STATE_OFFLINE) {
8176 			if (un->un_state != SD_STATE_SUSPENDED) {
8177 				New_state(un, SD_STATE_OFFLINE);
8178 			} else {
8179 				un->un_last_state = SD_STATE_OFFLINE;
8180 			}
8181 		}
8182 		mutex_exit(SD_MUTEX(un));
8183 	} else {
8184 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
8185 		    "!Unknown event\n");
8186 	}
8187 
8188 }
8189 #endif
8190 
8191 /*
8192  *    Function: sd_cache_control()
8193  *
8194  * Description: This routine is the driver entry point for setting
8195  *		read and write caching by modifying the WCE (write cache
8196  *		enable) and RCD (read cache disable) bits of mode
8197  *		page 8 (MODEPAGE_CACHING).
8198  *
8199  *   Arguments: un - driver soft state (unit) structure
8200  *		rcd_flag - flag for controlling the read cache
8201  *		wce_flag - flag for controlling the write cache
8202  *
8203  * Return Code: EIO
8204  *		code returned by sd_send_scsi_MODE_SENSE and
8205  *		sd_send_scsi_MODE_SELECT
8206  *
8207  *     Context: Kernel Thread
8208  */
8209 
8210 static int
8211 sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag)
8212 {
8213 	struct mode_caching	*mode_caching_page;
8214 	uchar_t			*header;
8215 	size_t			buflen;
8216 	int			hdrlen;
8217 	int			bd_len;
8218 	int			rval = 0;
8219 	struct mode_header_grp2	*mhp;
8220 
8221 	ASSERT(un != NULL);
8222 
8223 	/*
8224 	 * Do a test unit ready, otherwise a mode sense may not work if this
8225 	 * is the first command sent to the device after boot.
8226 	 */
8227 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
8228 
8229 	if (un->un_f_cfg_is_atapi == TRUE) {
8230 		hdrlen = MODE_HEADER_LENGTH_GRP2;
8231 	} else {
8232 		hdrlen = MODE_HEADER_LENGTH;
8233 	}
8234 
8235 	/*
8236 	 * Allocate memory for the retrieved mode page and its headers.  Set
8237 	 * a pointer to the page itself.  Use mode_cache_scsi3 to insure
8238 	 * we get all of the mode sense data otherwise, the mode select
8239 	 * will fail.  mode_cache_scsi3 is a superset of mode_caching.
8240 	 */
8241 	buflen = hdrlen + MODE_BLK_DESC_LENGTH +
8242 		sizeof (struct mode_cache_scsi3);
8243 
8244 	header = kmem_zalloc(buflen, KM_SLEEP);
8245 
8246 	/* Get the information from the device. */
8247 	if (un->un_f_cfg_is_atapi == TRUE) {
8248 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
8249 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8250 	} else {
8251 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
8252 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8253 	}
8254 	if (rval != 0) {
8255 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
8256 		    "sd_cache_control: Mode Sense Failed\n");
8257 		kmem_free(header, buflen);
8258 		return (rval);
8259 	}
8260 
8261 	/*
8262 	 * Determine size of Block Descriptors in order to locate
8263 	 * the mode page data. ATAPI devices return 0, SCSI devices
8264 	 * should return MODE_BLK_DESC_LENGTH.
8265 	 */
8266 	if (un->un_f_cfg_is_atapi == TRUE) {
8267 		mhp	= (struct mode_header_grp2 *)header;
8268 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
8269 	} else {
8270 		bd_len  = ((struct mode_header *)header)->bdesc_length;
8271 	}
8272 
8273 	if (bd_len > MODE_BLK_DESC_LENGTH) {
8274 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
8275 		    "sd_cache_control: Mode Sense returned invalid "
8276 		    "block descriptor length\n");
8277 		kmem_free(header, buflen);
8278 		return (EIO);
8279 	}
8280 
8281 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
8282 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
8283 		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
8284 		    " caching page code mismatch %d\n",
8285 		    mode_caching_page->mode_page.code);
8286 		kmem_free(header, buflen);
8287 		return (EIO);
8288 	}
8289 
8290 	/* Check the relevant bits on successful mode sense. */
8291 	if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
8292 	    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
8293 	    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
8294 	    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
8295 
8296 		size_t sbuflen;
8297 		uchar_t save_pg;
8298 
8299 		/*
8300 		 * Construct select buffer length based on the
8301 		 * length of the sense data returned.
8302 		 */
8303 		sbuflen =  hdrlen + MODE_BLK_DESC_LENGTH +
8304 				sizeof (struct mode_page) +
8305 				(int)mode_caching_page->mode_page.length;
8306 
8307 		/*
8308 		 * Set the caching bits as requested.
8309 		 */
8310 		if (rcd_flag == SD_CACHE_ENABLE)
8311 			mode_caching_page->rcd = 0;
8312 		else if (rcd_flag == SD_CACHE_DISABLE)
8313 			mode_caching_page->rcd = 1;
8314 
8315 		if (wce_flag == SD_CACHE_ENABLE)
8316 			mode_caching_page->wce = 1;
8317 		else if (wce_flag == SD_CACHE_DISABLE)
8318 			mode_caching_page->wce = 0;
8319 
8320 		/*
8321 		 * Save the page if the mode sense says the
8322 		 * drive supports it.
8323 		 */
8324 		save_pg = mode_caching_page->mode_page.ps ?
8325 				SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
8326 
8327 		/* Clear reserved bits before mode select. */
8328 		mode_caching_page->mode_page.ps = 0;
8329 
8330 		/*
8331 		 * Clear out mode header for mode select.
8332 		 * The rest of the retrieved page will be reused.
8333 		 */
8334 		bzero(header, hdrlen);
8335 
8336 		if (un->un_f_cfg_is_atapi == TRUE) {
8337 			mhp = (struct mode_header_grp2 *)header;
8338 			mhp->bdesc_length_hi = bd_len >> 8;
8339 			mhp->bdesc_length_lo = (uchar_t)bd_len & 0xff;
8340 		} else {
8341 			((struct mode_header *)header)->bdesc_length = bd_len;
8342 		}
8343 
8344 		/* Issue mode select to change the cache settings */
8345 		if (un->un_f_cfg_is_atapi == TRUE) {
8346 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, header,
8347 			    sbuflen, save_pg, SD_PATH_DIRECT);
8348 		} else {
8349 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
8350 			    sbuflen, save_pg, SD_PATH_DIRECT);
8351 		}
8352 	}
8353 
8354 	kmem_free(header, buflen);
8355 	return (rval);
8356 }
8357 
8358 
8359 /*
8360  *    Function: sd_get_write_cache_enabled()
8361  *
8362  * Description: This routine is the driver entry point for determining if
8363  *		write caching is enabled.  It examines the WCE (write cache
8364  *		enable) bits of mode page 8 (MODEPAGE_CACHING).
8365  *
8366  *   Arguments: un - driver soft state (unit) structure
8367  *		is_enabled - pointer to int where write cache enabled state
8368  *		is returned (non-zero -> write cache enabled)
8369  *
8370  *
8371  * Return Code: EIO
8372  *		code returned by sd_send_scsi_MODE_SENSE
8373  *
8374  *     Context: Kernel Thread
8375  *
8376  * NOTE: If ioctl is added to disable write cache, this sequence should
8377  * be followed so that no locking is required for accesses to
8378  * un->un_f_write_cache_enabled:
8379  * 	do mode select to clear wce
8380  * 	do synchronize cache to flush cache
8381  * 	set un->un_f_write_cache_enabled = FALSE
8382  *
8383  * Conversely, an ioctl to enable the write cache should be done
8384  * in this order:
8385  * 	set un->un_f_write_cache_enabled = TRUE
8386  * 	do mode select to set wce
8387  */
8388 
8389 static int
8390 sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled)
8391 {
8392 	struct mode_caching	*mode_caching_page;
8393 	uchar_t			*header;
8394 	size_t			buflen;
8395 	int			hdrlen;
8396 	int			bd_len;
8397 	int			rval = 0;
8398 
8399 	ASSERT(un != NULL);
8400 	ASSERT(is_enabled != NULL);
8401 
8402 	/* in case of error, flag as enabled */
8403 	*is_enabled = TRUE;
8404 
8405 	/*
8406 	 * Do a test unit ready, otherwise a mode sense may not work if this
8407 	 * is the first command sent to the device after boot.
8408 	 */
8409 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
8410 
8411 	if (un->un_f_cfg_is_atapi == TRUE) {
8412 		hdrlen = MODE_HEADER_LENGTH_GRP2;
8413 	} else {
8414 		hdrlen = MODE_HEADER_LENGTH;
8415 	}
8416 
8417 	/*
8418 	 * Allocate memory for the retrieved mode page and its headers.  Set
8419 	 * a pointer to the page itself.
8420 	 */
8421 	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
8422 	header = kmem_zalloc(buflen, KM_SLEEP);
8423 
8424 	/* Get the information from the device. */
8425 	if (un->un_f_cfg_is_atapi == TRUE) {
8426 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
8427 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8428 	} else {
8429 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
8430 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8431 	}
8432 	if (rval != 0) {
8433 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
8434 		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
8435 		kmem_free(header, buflen);
8436 		return (rval);
8437 	}
8438 
8439 	/*
8440 	 * Determine size of Block Descriptors in order to locate
8441 	 * the mode page data. ATAPI devices return 0, SCSI devices
8442 	 * should return MODE_BLK_DESC_LENGTH.
8443 	 */
8444 	if (un->un_f_cfg_is_atapi == TRUE) {
8445 		struct mode_header_grp2	*mhp;
8446 		mhp	= (struct mode_header_grp2 *)header;
8447 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
8448 	} else {
8449 		bd_len  = ((struct mode_header *)header)->bdesc_length;
8450 	}
8451 
8452 	if (bd_len > MODE_BLK_DESC_LENGTH) {
8453 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
8454 		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
8455 		    "block descriptor length\n");
8456 		kmem_free(header, buflen);
8457 		return (EIO);
8458 	}
8459 
8460 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
8461 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
8462 		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
8463 		    " caching page code mismatch %d\n",
8464 		    mode_caching_page->mode_page.code);
8465 		kmem_free(header, buflen);
8466 		return (EIO);
8467 	}
8468 	*is_enabled = mode_caching_page->wce;
8469 
8470 	kmem_free(header, buflen);
8471 	return (0);
8472 }
8473 
8474 
8475 /*
8476  *    Function: sd_make_device
8477  *
8478  * Description: Utility routine to return the Solaris device number from
8479  *		the data in the device's dev_info structure.
8480  *
8481  * Return Code: The Solaris device number
8482  *
8483  *     Context: Any
8484  */
8485 
8486 static dev_t
8487 sd_make_device(dev_info_t *devi)
8488 {
8489 	return (makedevice(ddi_name_to_major(ddi_get_name(devi)),
8490 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
8491 }
8492 
8493 
8494 /*
8495  *    Function: sd_pm_entry
8496  *
8497  * Description: Called at the start of a new command to manage power
8498  *		and busy status of a device. This includes determining whether
8499  *		the current power state of the device is sufficient for
8500  *		performing the command or whether it must be changed.
8501  *		The PM framework is notified appropriately.
8502  *		Only with a return status of DDI_SUCCESS will the
8503  *		component be busy to the framework.
8504  *
8505  *		All callers of sd_pm_entry must check the return status
8506  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
8507  *		of DDI_FAILURE indicates the device failed to power up.
8508  *		In this case un_pm_count has been adjusted so the result
8509  *		on exit is still powered down, ie. count is less than 0.
8510  *		Calling sd_pm_exit with this count value hits an ASSERT.
8511  *
8512  * Return Code: DDI_SUCCESS or DDI_FAILURE
8513  *
8514  *     Context: Kernel thread context.
8515  */
8516 
8517 static int
8518 sd_pm_entry(struct sd_lun *un)
8519 {
8520 	int return_status = DDI_SUCCESS;
8521 
8522 	ASSERT(!mutex_owned(SD_MUTEX(un)));
8523 	ASSERT(!mutex_owned(&un->un_pm_mutex));
8524 
8525 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
8526 
8527 	if (un->un_f_pm_is_enabled == FALSE) {
8528 		SD_TRACE(SD_LOG_IO_PM, un,
8529 		    "sd_pm_entry: exiting, PM not enabled\n");
8530 		return (return_status);
8531 	}
8532 
8533 	/*
8534 	 * Just increment a counter if PM is enabled. On the transition from
8535 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
8536 	 * the count with each IO and mark the device as idle when the count
8537 	 * hits 0.
8538 	 *
8539 	 * If the count is less than 0 the device is powered down. If a powered
8540 	 * down device is successfully powered up then the count must be
8541 	 * incremented to reflect the power up. Note that it'll get incremented
8542 	 * a second time to become busy.
8543 	 *
8544 	 * Because the following has the potential to change the device state
8545 	 * and must release the un_pm_mutex to do so, only one thread can be
8546 	 * allowed through at a time.
8547 	 */
8548 
8549 	mutex_enter(&un->un_pm_mutex);
8550 	while (un->un_pm_busy == TRUE) {
8551 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
8552 	}
8553 	un->un_pm_busy = TRUE;
8554 
8555 	if (un->un_pm_count < 1) {
8556 
8557 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
8558 
8559 		/*
8560 		 * Indicate we are now busy so the framework won't attempt to
8561 		 * power down the device. This call will only fail if either
8562 		 * we passed a bad component number or the device has no
8563 		 * components. Neither of these should ever happen.
8564 		 */
8565 		mutex_exit(&un->un_pm_mutex);
8566 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
8567 		ASSERT(return_status == DDI_SUCCESS);
8568 
8569 		mutex_enter(&un->un_pm_mutex);
8570 
8571 		if (un->un_pm_count < 0) {
8572 			mutex_exit(&un->un_pm_mutex);
8573 
8574 			SD_TRACE(SD_LOG_IO_PM, un,
8575 			    "sd_pm_entry: power up component\n");
8576 
8577 			/*
8578 			 * pm_raise_power will cause sdpower to be called
8579 			 * which brings the device power level to the
8580 			 * desired state, ON in this case. If successful,
8581 			 * un_pm_count and un_power_level will be updated
8582 			 * appropriately.
8583 			 */
8584 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
8585 			    SD_SPINDLE_ON);
8586 
8587 			mutex_enter(&un->un_pm_mutex);
8588 
8589 			if (return_status != DDI_SUCCESS) {
8590 				/*
8591 				 * Power up failed.
8592 				 * Idle the device and adjust the count
8593 				 * so the result on exit is that we're
8594 				 * still powered down, ie. count is less than 0.
8595 				 */
8596 				SD_TRACE(SD_LOG_IO_PM, un,
8597 				    "sd_pm_entry: power up failed,"
8598 				    " idle the component\n");
8599 
8600 				(void) pm_idle_component(SD_DEVINFO(un), 0);
8601 				un->un_pm_count--;
8602 			} else {
8603 				/*
8604 				 * Device is powered up, verify the
8605 				 * count is non-negative.
8606 				 * This is debug only.
8607 				 */
8608 				ASSERT(un->un_pm_count == 0);
8609 			}
8610 		}
8611 
8612 		if (return_status == DDI_SUCCESS) {
8613 			/*
8614 			 * For performance, now that the device has been tagged
8615 			 * as busy, and it's known to be powered up, update the
8616 			 * chain types to use jump tables that do not include
8617 			 * pm. This significantly lowers the overhead and
8618 			 * therefore improves performance.
8619 			 */
8620 
8621 			mutex_exit(&un->un_pm_mutex);
8622 			mutex_enter(SD_MUTEX(un));
8623 			SD_TRACE(SD_LOG_IO_PM, un,
8624 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
8625 			    un->un_uscsi_chain_type);
8626 
8627 			if (un->un_f_non_devbsize_supported) {
8628 				un->un_buf_chain_type =
8629 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
8630 			} else {
8631 				un->un_buf_chain_type =
8632 				    SD_CHAIN_INFO_DISK_NO_PM;
8633 			}
8634 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8635 
8636 			SD_TRACE(SD_LOG_IO_PM, un,
8637 			    "             changed  uscsi_chain_type to   %d\n",
8638 			    un->un_uscsi_chain_type);
8639 			mutex_exit(SD_MUTEX(un));
8640 			mutex_enter(&un->un_pm_mutex);
8641 
8642 			if (un->un_pm_idle_timeid == NULL) {
8643 				/* 300 ms. */
8644 				un->un_pm_idle_timeid =
8645 				    timeout(sd_pm_idletimeout_handler, un,
8646 				    (drv_usectohz((clock_t)300000)));
8647 				/*
8648 				 * Include an extra call to busy which keeps the
8649 				 * device busy with-respect-to the PM layer
8650 				 * until the timer fires, at which time it'll
8651 				 * get the extra idle call.
8652 				 */
8653 				(void) pm_busy_component(SD_DEVINFO(un), 0);
8654 			}
8655 		}
8656 	}
8657 	un->un_pm_busy = FALSE;
8658 	/* Next... */
8659 	cv_signal(&un->un_pm_busy_cv);
8660 
8661 	un->un_pm_count++;
8662 
8663 	SD_TRACE(SD_LOG_IO_PM, un,
8664 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
8665 
8666 	mutex_exit(&un->un_pm_mutex);
8667 
8668 	return (return_status);
8669 }
8670 
8671 
8672 /*
8673  *    Function: sd_pm_exit
8674  *
8675  * Description: Called at the completion of a command to manage busy
8676  *		status for the device. If the device becomes idle the
8677  *		PM framework is notified.
8678  *
8679  *     Context: Kernel thread context
8680  */
8681 
8682 static void
8683 sd_pm_exit(struct sd_lun *un)
8684 {
8685 	ASSERT(!mutex_owned(SD_MUTEX(un)));
8686 	ASSERT(!mutex_owned(&un->un_pm_mutex));
8687 
8688 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
8689 
8690 	/*
8691 	 * After attach the following flag is only read, so don't
8692 	 * take the penalty of acquiring a mutex for it.
8693 	 */
8694 	if (un->un_f_pm_is_enabled == TRUE) {
8695 
8696 		mutex_enter(&un->un_pm_mutex);
8697 		un->un_pm_count--;
8698 
8699 		SD_TRACE(SD_LOG_IO_PM, un,
8700 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
8701 
8702 		ASSERT(un->un_pm_count >= 0);
8703 		if (un->un_pm_count == 0) {
8704 			mutex_exit(&un->un_pm_mutex);
8705 
8706 			SD_TRACE(SD_LOG_IO_PM, un,
8707 			    "sd_pm_exit: idle component\n");
8708 
8709 			(void) pm_idle_component(SD_DEVINFO(un), 0);
8710 
8711 		} else {
8712 			mutex_exit(&un->un_pm_mutex);
8713 		}
8714 	}
8715 
8716 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
8717 }
8718 
8719 
8720 /*
8721  *    Function: sdopen
8722  *
8723  * Description: Driver's open(9e) entry point function.
8724  *
8725  *   Arguments: dev_i   - pointer to device number
8726  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
8727  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
8728  *		cred_p  - user credential pointer
8729  *
8730  * Return Code: EINVAL
8731  *		ENXIO
8732  *		EIO
8733  *		EROFS
8734  *		EBUSY
8735  *
8736  *     Context: Kernel thread context
8737  */
8738 /* ARGSUSED */
8739 static int
8740 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
8741 {
8742 	struct sd_lun	*un;
8743 	int		nodelay;
8744 	int		part;
8745 	uint64_t	partmask;
8746 	int		instance;
8747 	dev_t		dev;
8748 	int		rval = EIO;
8749 	diskaddr_t	nblks = 0;
8750 
8751 	/* Validate the open type */
8752 	if (otyp >= OTYPCNT) {
8753 		return (EINVAL);
8754 	}
8755 
8756 	dev = *dev_p;
8757 	instance = SDUNIT(dev);
8758 	mutex_enter(&sd_detach_mutex);
8759 
8760 	/*
8761 	 * Fail the open if there is no softstate for the instance, or
8762 	 * if another thread somewhere is trying to detach the instance.
8763 	 */
8764 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
8765 	    (un->un_detach_count != 0)) {
8766 		mutex_exit(&sd_detach_mutex);
8767 		/*
8768 		 * The probe cache only needs to be cleared when open (9e) fails
8769 		 * with ENXIO (4238046).
8770 		 */
8771 		/*
8772 		 * un-conditionally clearing probe cache is ok with
8773 		 * separate sd/ssd binaries
8774 		 * x86 platform can be an issue with both parallel
8775 		 * and fibre in 1 binary
8776 		 */
8777 		sd_scsi_clear_probe_cache();
8778 		return (ENXIO);
8779 	}
8780 
8781 	/*
8782 	 * The un_layer_count is to prevent another thread in specfs from
8783 	 * trying to detach the instance, which can happen when we are
8784 	 * called from a higher-layer driver instead of thru specfs.
8785 	 * This will not be needed when DDI provides a layered driver
8786 	 * interface that allows specfs to know that an instance is in
8787 	 * use by a layered driver & should not be detached.
8788 	 *
8789 	 * Note: the semantics for layered driver opens are exactly one
8790 	 * close for every open.
8791 	 */
8792 	if (otyp == OTYP_LYR) {
8793 		un->un_layer_count++;
8794 	}
8795 
8796 	/*
8797 	 * Keep a count of the current # of opens in progress. This is because
8798 	 * some layered drivers try to call us as a regular open. This can
8799 	 * cause problems that we cannot prevent, however by keeping this count
8800 	 * we can at least keep our open and detach routines from racing against
8801 	 * each other under such conditions.
8802 	 */
8803 	un->un_opens_in_progress++;
8804 	mutex_exit(&sd_detach_mutex);
8805 
8806 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
8807 	part	 = SDPART(dev);
8808 	partmask = 1 << part;
8809 
8810 	/*
8811 	 * We use a semaphore here in order to serialize
8812 	 * open and close requests on the device.
8813 	 */
8814 	sema_p(&un->un_semoclose);
8815 
8816 	mutex_enter(SD_MUTEX(un));
8817 
8818 	/*
8819 	 * All device accesses go thru sdstrategy() where we check
8820 	 * on suspend status but there could be a scsi_poll command,
8821 	 * which bypasses sdstrategy(), so we need to check pm
8822 	 * status.
8823 	 */
8824 
8825 	if (!nodelay) {
8826 		while ((un->un_state == SD_STATE_SUSPENDED) ||
8827 		    (un->un_state == SD_STATE_PM_CHANGING)) {
8828 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
8829 		}
8830 
8831 		mutex_exit(SD_MUTEX(un));
8832 		if (sd_pm_entry(un) != DDI_SUCCESS) {
8833 			rval = EIO;
8834 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
8835 			    "sdopen: sd_pm_entry failed\n");
8836 			goto open_failed_with_pm;
8837 		}
8838 		mutex_enter(SD_MUTEX(un));
8839 	}
8840 
8841 	/* check for previous exclusive open */
8842 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
8843 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
8844 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
8845 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
8846 
8847 	if (un->un_exclopen & (partmask)) {
8848 		goto excl_open_fail;
8849 	}
8850 
8851 	if (flag & FEXCL) {
8852 		int i;
8853 		if (un->un_ocmap.lyropen[part]) {
8854 			goto excl_open_fail;
8855 		}
8856 		for (i = 0; i < (OTYPCNT - 1); i++) {
8857 			if (un->un_ocmap.regopen[i] & (partmask)) {
8858 				goto excl_open_fail;
8859 			}
8860 		}
8861 	}
8862 
8863 	/*
8864 	 * Check the write permission if this is a removable media device,
8865 	 * NDELAY has not been set, and writable permission is requested.
8866 	 *
8867 	 * Note: If NDELAY was set and this is write-protected media the WRITE
8868 	 * attempt will fail with EIO as part of the I/O processing. This is a
8869 	 * more permissive implementation that allows the open to succeed and
8870 	 * WRITE attempts to fail when appropriate.
8871 	 */
8872 	if (un->un_f_chk_wp_open) {
8873 		if ((flag & FWRITE) && (!nodelay)) {
8874 			mutex_exit(SD_MUTEX(un));
8875 			/*
8876 			 * Defer the check for write permission on writable
8877 			 * DVD drive till sdstrategy and will not fail open even
8878 			 * if FWRITE is set as the device can be writable
8879 			 * depending upon the media and the media can change
8880 			 * after the call to open().
8881 			 */
8882 			if (un->un_f_dvdram_writable_device == FALSE) {
8883 				if (ISCD(un) || sr_check_wp(dev)) {
8884 				rval = EROFS;
8885 				mutex_enter(SD_MUTEX(un));
8886 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
8887 				    "write to cd or write protected media\n");
8888 				goto open_fail;
8889 				}
8890 			}
8891 			mutex_enter(SD_MUTEX(un));
8892 		}
8893 	}
8894 
8895 	/*
8896 	 * If opening in NDELAY/NONBLOCK mode, just return.
8897 	 * Check if disk is ready and has a valid geometry later.
8898 	 */
8899 	if (!nodelay) {
8900 		mutex_exit(SD_MUTEX(un));
8901 		rval = sd_ready_and_valid(un);
8902 		mutex_enter(SD_MUTEX(un));
8903 		/*
8904 		 * Fail if device is not ready or if the number of disk
8905 		 * blocks is zero or negative for non CD devices.
8906 		 */
8907 
8908 		nblks = 0;
8909 
8910 		if (rval == SD_READY_VALID && (!ISCD(un))) {
8911 			/* if cmlb_partinfo fails, nblks remains 0 */
8912 			mutex_exit(SD_MUTEX(un));
8913 			(void) cmlb_partinfo(un->un_cmlbhandle, part, &nblks,
8914 			    NULL, NULL, NULL, (void *)SD_PATH_DIRECT);
8915 			mutex_enter(SD_MUTEX(un));
8916 		}
8917 
8918 		if ((rval != SD_READY_VALID) ||
8919 		    (!ISCD(un) && nblks <= 0)) {
8920 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
8921 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
8922 			    "device not ready or invalid disk block value\n");
8923 			goto open_fail;
8924 		}
8925 #if defined(__i386) || defined(__amd64)
8926 	} else {
8927 		uchar_t *cp;
8928 		/*
8929 		 * x86 requires special nodelay handling, so that p0 is
8930 		 * always defined and accessible.
8931 		 * Invalidate geometry only if device is not already open.
8932 		 */
8933 		cp = &un->un_ocmap.chkd[0];
8934 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
8935 			if (*cp != (uchar_t)0) {
8936 			    break;
8937 			}
8938 			cp++;
8939 		}
8940 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
8941 			mutex_exit(SD_MUTEX(un));
8942 			cmlb_invalidate(un->un_cmlbhandle,
8943 			    (void *)SD_PATH_DIRECT);
8944 			mutex_enter(SD_MUTEX(un));
8945 		}
8946 
8947 #endif
8948 	}
8949 
8950 	if (otyp == OTYP_LYR) {
8951 		un->un_ocmap.lyropen[part]++;
8952 	} else {
8953 		un->un_ocmap.regopen[otyp] |= partmask;
8954 	}
8955 
8956 	/* Set up open and exclusive open flags */
8957 	if (flag & FEXCL) {
8958 		un->un_exclopen |= (partmask);
8959 	}
8960 
8961 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
8962 	    "open of part %d type %d\n", part, otyp);
8963 
8964 	mutex_exit(SD_MUTEX(un));
8965 	if (!nodelay) {
8966 		sd_pm_exit(un);
8967 	}
8968 
8969 	sema_v(&un->un_semoclose);
8970 
8971 	mutex_enter(&sd_detach_mutex);
8972 	un->un_opens_in_progress--;
8973 	mutex_exit(&sd_detach_mutex);
8974 
8975 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
8976 	return (DDI_SUCCESS);
8977 
8978 excl_open_fail:
8979 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
8980 	rval = EBUSY;
8981 
8982 open_fail:
8983 	mutex_exit(SD_MUTEX(un));
8984 
8985 	/*
8986 	 * On a failed open we must exit the pm management.
8987 	 */
8988 	if (!nodelay) {
8989 		sd_pm_exit(un);
8990 	}
8991 open_failed_with_pm:
8992 	sema_v(&un->un_semoclose);
8993 
8994 	mutex_enter(&sd_detach_mutex);
8995 	un->un_opens_in_progress--;
8996 	if (otyp == OTYP_LYR) {
8997 		un->un_layer_count--;
8998 	}
8999 	mutex_exit(&sd_detach_mutex);
9000 
9001 	return (rval);
9002 }
9003 
9004 
9005 /*
9006  *    Function: sdclose
9007  *
9008  * Description: Driver's close(9e) entry point function.
9009  *
9010  *   Arguments: dev    - device number
9011  *		flag   - file status flag, informational only
9012  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
9013  *		cred_p - user credential pointer
9014  *
9015  * Return Code: ENXIO
9016  *
9017  *     Context: Kernel thread context
9018  */
9019 /* ARGSUSED */
9020 static int
9021 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
9022 {
9023 	struct sd_lun	*un;
9024 	uchar_t		*cp;
9025 	int		part;
9026 	int		nodelay;
9027 	int		rval = 0;
9028 
9029 	/* Validate the open type */
9030 	if (otyp >= OTYPCNT) {
9031 		return (ENXIO);
9032 	}
9033 
9034 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9035 		return (ENXIO);
9036 	}
9037 
9038 	part = SDPART(dev);
9039 	nodelay = flag & (FNDELAY | FNONBLOCK);
9040 
9041 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
9042 	    "sdclose: close of part %d type %d\n", part, otyp);
9043 
9044 	/*
9045 	 * We use a semaphore here in order to serialize
9046 	 * open and close requests on the device.
9047 	 */
9048 	sema_p(&un->un_semoclose);
9049 
9050 	mutex_enter(SD_MUTEX(un));
9051 
9052 	/* Don't proceed if power is being changed. */
9053 	while (un->un_state == SD_STATE_PM_CHANGING) {
9054 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9055 	}
9056 
9057 	if (un->un_exclopen & (1 << part)) {
9058 		un->un_exclopen &= ~(1 << part);
9059 	}
9060 
9061 	/* Update the open partition map */
9062 	if (otyp == OTYP_LYR) {
9063 		un->un_ocmap.lyropen[part] -= 1;
9064 	} else {
9065 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
9066 	}
9067 
9068 	cp = &un->un_ocmap.chkd[0];
9069 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
9070 		if (*cp != NULL) {
9071 			break;
9072 		}
9073 		cp++;
9074 	}
9075 
9076 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
9077 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
9078 
9079 		/*
9080 		 * We avoid persistance upon the last close, and set
9081 		 * the throttle back to the maximum.
9082 		 */
9083 		un->un_throttle = un->un_saved_throttle;
9084 
9085 		if (un->un_state == SD_STATE_OFFLINE) {
9086 			if (un->un_f_is_fibre == FALSE) {
9087 				scsi_log(SD_DEVINFO(un), sd_label,
9088 					CE_WARN, "offline\n");
9089 			}
9090 			mutex_exit(SD_MUTEX(un));
9091 			cmlb_invalidate(un->un_cmlbhandle,
9092 			    (void *)SD_PATH_DIRECT);
9093 			mutex_enter(SD_MUTEX(un));
9094 
9095 		} else {
9096 			/*
9097 			 * Flush any outstanding writes in NVRAM cache.
9098 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
9099 			 * cmd, it may not work for non-Pluto devices.
9100 			 * SYNCHRONIZE CACHE is not required for removables,
9101 			 * except DVD-RAM drives.
9102 			 *
9103 			 * Also note: because SYNCHRONIZE CACHE is currently
9104 			 * the only command issued here that requires the
9105 			 * drive be powered up, only do the power up before
9106 			 * sending the Sync Cache command. If additional
9107 			 * commands are added which require a powered up
9108 			 * drive, the following sequence may have to change.
9109 			 *
9110 			 * And finally, note that parallel SCSI on SPARC
9111 			 * only issues a Sync Cache to DVD-RAM, a newly
9112 			 * supported device.
9113 			 */
9114 #if defined(__i386) || defined(__amd64)
9115 			if (un->un_f_sync_cache_supported ||
9116 			    un->un_f_dvdram_writable_device == TRUE) {
9117 #else
9118 			if (un->un_f_dvdram_writable_device == TRUE) {
9119 #endif
9120 				mutex_exit(SD_MUTEX(un));
9121 				if (sd_pm_entry(un) == DDI_SUCCESS) {
9122 					rval =
9123 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
9124 					    NULL);
9125 					/* ignore error if not supported */
9126 					if (rval == ENOTSUP) {
9127 						rval = 0;
9128 					} else if (rval != 0) {
9129 						rval = EIO;
9130 					}
9131 					sd_pm_exit(un);
9132 				} else {
9133 					rval = EIO;
9134 				}
9135 				mutex_enter(SD_MUTEX(un));
9136 			}
9137 
9138 			/*
9139 			 * For devices which supports DOOR_LOCK, send an ALLOW
9140 			 * MEDIA REMOVAL command, but don't get upset if it
9141 			 * fails. We need to raise the power of the drive before
9142 			 * we can call sd_send_scsi_DOORLOCK()
9143 			 */
9144 			if (un->un_f_doorlock_supported) {
9145 				mutex_exit(SD_MUTEX(un));
9146 				if (sd_pm_entry(un) == DDI_SUCCESS) {
9147 					rval = sd_send_scsi_DOORLOCK(un,
9148 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
9149 
9150 					sd_pm_exit(un);
9151 					if (ISCD(un) && (rval != 0) &&
9152 					    (nodelay != 0)) {
9153 						rval = ENXIO;
9154 					}
9155 				} else {
9156 					rval = EIO;
9157 				}
9158 				mutex_enter(SD_MUTEX(un));
9159 			}
9160 
9161 			/*
9162 			 * If a device has removable media, invalidate all
9163 			 * parameters related to media, such as geometry,
9164 			 * blocksize, and blockcount.
9165 			 */
9166 			if (un->un_f_has_removable_media) {
9167 				sr_ejected(un);
9168 			}
9169 
9170 			/*
9171 			 * Destroy the cache (if it exists) which was
9172 			 * allocated for the write maps since this is
9173 			 * the last close for this media.
9174 			 */
9175 			if (un->un_wm_cache) {
9176 				/*
9177 				 * Check if there are pending commands.
9178 				 * and if there are give a warning and
9179 				 * do not destroy the cache.
9180 				 */
9181 				if (un->un_ncmds_in_driver > 0) {
9182 					scsi_log(SD_DEVINFO(un),
9183 					    sd_label, CE_WARN,
9184 					    "Unable to clean up memory "
9185 					    "because of pending I/O\n");
9186 				} else {
9187 					kmem_cache_destroy(
9188 					    un->un_wm_cache);
9189 					un->un_wm_cache = NULL;
9190 				}
9191 			}
9192 			mutex_exit(SD_MUTEX(un));
9193 			(void) cmlb_close(un->un_cmlbhandle,
9194 			    (void *)SD_PATH_DIRECT);
9195 			mutex_enter(SD_MUTEX(un));
9196 
9197 		}
9198 	}
9199 
9200 	mutex_exit(SD_MUTEX(un));
9201 	sema_v(&un->un_semoclose);
9202 
9203 	if (otyp == OTYP_LYR) {
9204 		mutex_enter(&sd_detach_mutex);
9205 		/*
9206 		 * The detach routine may run when the layer count
9207 		 * drops to zero.
9208 		 */
9209 		un->un_layer_count--;
9210 		mutex_exit(&sd_detach_mutex);
9211 	}
9212 
9213 	return (rval);
9214 }
9215 
9216 
9217 /*
9218  *    Function: sd_ready_and_valid
9219  *
9220  * Description: Test if device is ready and has a valid geometry.
9221  *
9222  *   Arguments: dev - device number
9223  *		un  - driver soft state (unit) structure
9224  *
9225  * Return Code: SD_READY_VALID		ready and valid label
9226  *		SD_NOT_READY_VALID	not ready, no label
9227  *		SD_RESERVED_BY_OTHERS	reservation conflict
9228  *
9229  *     Context: Never called at interrupt context.
9230  */
9231 
9232 static int
9233 sd_ready_and_valid(struct sd_lun *un)
9234 {
9235 	struct sd_errstats	*stp;
9236 	uint64_t		capacity;
9237 	uint_t			lbasize;
9238 	int			rval = SD_READY_VALID;
9239 	char			name_str[48];
9240 	int			is_valid;
9241 
9242 	ASSERT(un != NULL);
9243 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9244 
9245 	mutex_enter(SD_MUTEX(un));
9246 	/*
9247 	 * If a device has removable media, we must check if media is
9248 	 * ready when checking if this device is ready and valid.
9249 	 */
9250 	if (un->un_f_has_removable_media) {
9251 		mutex_exit(SD_MUTEX(un));
9252 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
9253 			rval = SD_NOT_READY_VALID;
9254 			mutex_enter(SD_MUTEX(un));
9255 			goto done;
9256 		}
9257 
9258 		is_valid = SD_IS_VALID_LABEL(un);
9259 		mutex_enter(SD_MUTEX(un));
9260 		if (!is_valid ||
9261 		    (un->un_f_blockcount_is_valid == FALSE) ||
9262 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
9263 
9264 			/* capacity has to be read every open. */
9265 			mutex_exit(SD_MUTEX(un));
9266 			if (sd_send_scsi_READ_CAPACITY(un, &capacity,
9267 			    &lbasize, SD_PATH_DIRECT) != 0) {
9268 				cmlb_invalidate(un->un_cmlbhandle,
9269 				    (void *)SD_PATH_DIRECT);
9270 				mutex_enter(SD_MUTEX(un));
9271 				rval = SD_NOT_READY_VALID;
9272 				goto done;
9273 			} else {
9274 				mutex_enter(SD_MUTEX(un));
9275 				sd_update_block_info(un, lbasize, capacity);
9276 			}
9277 		}
9278 
9279 		/*
9280 		 * Check if the media in the device is writable or not.
9281 		 */
9282 		if (!is_valid && ISCD(un)) {
9283 			sd_check_for_writable_cd(un);
9284 		}
9285 
9286 	} else {
9287 		/*
9288 		 * Do a test unit ready to clear any unit attention from non-cd
9289 		 * devices.
9290 		 */
9291 		mutex_exit(SD_MUTEX(un));
9292 		(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9293 		mutex_enter(SD_MUTEX(un));
9294 	}
9295 
9296 
9297 	/*
9298 	 * If this is a non 512 block device, allocate space for
9299 	 * the wmap cache. This is being done here since every time
9300 	 * a media is changed this routine will be called and the
9301 	 * block size is a function of media rather than device.
9302 	 */
9303 	if (un->un_f_non_devbsize_supported && NOT_DEVBSIZE(un)) {
9304 		if (!(un->un_wm_cache)) {
9305 			(void) snprintf(name_str, sizeof (name_str),
9306 			    "%s%d_cache",
9307 			    ddi_driver_name(SD_DEVINFO(un)),
9308 			    ddi_get_instance(SD_DEVINFO(un)));
9309 			un->un_wm_cache = kmem_cache_create(
9310 			    name_str, sizeof (struct sd_w_map),
9311 			    8, sd_wm_cache_constructor,
9312 			    sd_wm_cache_destructor, NULL,
9313 			    (void *)un, NULL, 0);
9314 			if (!(un->un_wm_cache)) {
9315 					rval = ENOMEM;
9316 					goto done;
9317 			}
9318 		}
9319 	}
9320 
9321 	if (un->un_state == SD_STATE_NORMAL) {
9322 		/*
9323 		 * If the target is not yet ready here (defined by a TUR
9324 		 * failure), invalidate the geometry and print an 'offline'
9325 		 * message. This is a legacy message, as the state of the
9326 		 * target is not actually changed to SD_STATE_OFFLINE.
9327 		 *
9328 		 * If the TUR fails for EACCES (Reservation Conflict),
9329 		 * SD_RESERVED_BY_OTHERS will be returned to indicate
9330 		 * reservation conflict. If the TUR fails for other
9331 		 * reasons, SD_NOT_READY_VALID will be returned.
9332 		 */
9333 		int err;
9334 
9335 		mutex_exit(SD_MUTEX(un));
9336 		err = sd_send_scsi_TEST_UNIT_READY(un, 0);
9337 		mutex_enter(SD_MUTEX(un));
9338 
9339 		if (err != 0) {
9340 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9341 			    "offline or reservation conflict\n");
9342 			mutex_exit(SD_MUTEX(un));
9343 			cmlb_invalidate(un->un_cmlbhandle,
9344 			    (void *)SD_PATH_DIRECT);
9345 			mutex_enter(SD_MUTEX(un));
9346 			if (err == EACCES) {
9347 				rval = SD_RESERVED_BY_OTHERS;
9348 			} else {
9349 				rval = SD_NOT_READY_VALID;
9350 			}
9351 			goto done;
9352 		}
9353 	}
9354 
9355 	if (un->un_f_format_in_progress == FALSE) {
9356 		mutex_exit(SD_MUTEX(un));
9357 		if (cmlb_validate(un->un_cmlbhandle, 0,
9358 		    (void *)SD_PATH_DIRECT) != 0) {
9359 			rval = SD_NOT_READY_VALID;
9360 			mutex_enter(SD_MUTEX(un));
9361 			goto done;
9362 		}
9363 		if (un->un_f_pkstats_enabled) {
9364 			sd_set_pstats(un);
9365 			SD_TRACE(SD_LOG_IO_PARTITION, un,
9366 			    "sd_ready_and_valid: un:0x%p pstats created and "
9367 			    "set\n", un);
9368 		}
9369 		mutex_enter(SD_MUTEX(un));
9370 	}
9371 
9372 	/*
9373 	 * If this device supports DOOR_LOCK command, try and send
9374 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
9375 	 * if it fails. For a CD, however, it is an error
9376 	 */
9377 	if (un->un_f_doorlock_supported) {
9378 		mutex_exit(SD_MUTEX(un));
9379 		if ((sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
9380 		    SD_PATH_DIRECT) != 0) && ISCD(un)) {
9381 			rval = SD_NOT_READY_VALID;
9382 			mutex_enter(SD_MUTEX(un));
9383 			goto done;
9384 		}
9385 		mutex_enter(SD_MUTEX(un));
9386 	}
9387 
9388 	/* The state has changed, inform the media watch routines */
9389 	un->un_mediastate = DKIO_INSERTED;
9390 	cv_broadcast(&un->un_state_cv);
9391 	rval = SD_READY_VALID;
9392 
9393 done:
9394 
9395 	/*
9396 	 * Initialize the capacity kstat value, if no media previously
9397 	 * (capacity kstat is 0) and a media has been inserted
9398 	 * (un_blockcount > 0).
9399 	 */
9400 	if (un->un_errstats != NULL) {
9401 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
9402 		if ((stp->sd_capacity.value.ui64 == 0) &&
9403 		    (un->un_f_blockcount_is_valid == TRUE)) {
9404 			stp->sd_capacity.value.ui64 =
9405 			    (uint64_t)((uint64_t)un->un_blockcount *
9406 			    un->un_sys_blocksize);
9407 		}
9408 	}
9409 
9410 	mutex_exit(SD_MUTEX(un));
9411 	return (rval);
9412 }
9413 
9414 
9415 /*
9416  *    Function: sdmin
9417  *
9418  * Description: Routine to limit the size of a data transfer. Used in
9419  *		conjunction with physio(9F).
9420  *
9421  *   Arguments: bp - pointer to the indicated buf(9S) struct.
9422  *
9423  *     Context: Kernel thread context.
9424  */
9425 
9426 static void
9427 sdmin(struct buf *bp)
9428 {
9429 	struct sd_lun	*un;
9430 	int		instance;
9431 
9432 	instance = SDUNIT(bp->b_edev);
9433 
9434 	un = ddi_get_soft_state(sd_state, instance);
9435 	ASSERT(un != NULL);
9436 
9437 	if (bp->b_bcount > un->un_max_xfer_size) {
9438 		bp->b_bcount = un->un_max_xfer_size;
9439 	}
9440 }
9441 
9442 
9443 /*
9444  *    Function: sdread
9445  *
9446  * Description: Driver's read(9e) entry point function.
9447  *
9448  *   Arguments: dev   - device number
9449  *		uio   - structure pointer describing where data is to be stored
9450  *			in user's space
9451  *		cred_p  - user credential pointer
9452  *
9453  * Return Code: ENXIO
9454  *		EIO
9455  *		EINVAL
9456  *		value returned by physio
9457  *
9458  *     Context: Kernel thread context.
9459  */
9460 /* ARGSUSED */
9461 static int
9462 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
9463 {
9464 	struct sd_lun	*un = NULL;
9465 	int		secmask;
9466 	int		err;
9467 
9468 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9469 		return (ENXIO);
9470 	}
9471 
9472 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9473 
9474 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9475 		mutex_enter(SD_MUTEX(un));
9476 		/*
9477 		 * Because the call to sd_ready_and_valid will issue I/O we
9478 		 * must wait here if either the device is suspended or
9479 		 * if it's power level is changing.
9480 		 */
9481 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9482 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9483 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9484 		}
9485 		un->un_ncmds_in_driver++;
9486 		mutex_exit(SD_MUTEX(un));
9487 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9488 			mutex_enter(SD_MUTEX(un));
9489 			un->un_ncmds_in_driver--;
9490 			ASSERT(un->un_ncmds_in_driver >= 0);
9491 			mutex_exit(SD_MUTEX(un));
9492 			return (EIO);
9493 		}
9494 		mutex_enter(SD_MUTEX(un));
9495 		un->un_ncmds_in_driver--;
9496 		ASSERT(un->un_ncmds_in_driver >= 0);
9497 		mutex_exit(SD_MUTEX(un));
9498 	}
9499 
9500 	/*
9501 	 * Read requests are restricted to multiples of the system block size.
9502 	 */
9503 	secmask = un->un_sys_blocksize - 1;
9504 
9505 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9506 		SD_ERROR(SD_LOG_READ_WRITE, un,
9507 		    "sdread: file offset not modulo %d\n",
9508 		    un->un_sys_blocksize);
9509 		err = EINVAL;
9510 	} else if (uio->uio_iov->iov_len & (secmask)) {
9511 		SD_ERROR(SD_LOG_READ_WRITE, un,
9512 		    "sdread: transfer length not modulo %d\n",
9513 		    un->un_sys_blocksize);
9514 		err = EINVAL;
9515 	} else {
9516 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
9517 	}
9518 	return (err);
9519 }
9520 
9521 
9522 /*
9523  *    Function: sdwrite
9524  *
9525  * Description: Driver's write(9e) entry point function.
9526  *
9527  *   Arguments: dev   - device number
9528  *		uio   - structure pointer describing where data is stored in
9529  *			user's space
9530  *		cred_p  - user credential pointer
9531  *
9532  * Return Code: ENXIO
9533  *		EIO
9534  *		EINVAL
9535  *		value returned by physio
9536  *
9537  *     Context: Kernel thread context.
9538  */
9539 /* ARGSUSED */
9540 static int
9541 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
9542 {
9543 	struct sd_lun	*un = NULL;
9544 	int		secmask;
9545 	int		err;
9546 
9547 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9548 		return (ENXIO);
9549 	}
9550 
9551 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9552 
9553 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9554 		mutex_enter(SD_MUTEX(un));
9555 		/*
9556 		 * Because the call to sd_ready_and_valid will issue I/O we
9557 		 * must wait here if either the device is suspended or
9558 		 * if it's power level is changing.
9559 		 */
9560 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9561 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9562 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9563 		}
9564 		un->un_ncmds_in_driver++;
9565 		mutex_exit(SD_MUTEX(un));
9566 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9567 			mutex_enter(SD_MUTEX(un));
9568 			un->un_ncmds_in_driver--;
9569 			ASSERT(un->un_ncmds_in_driver >= 0);
9570 			mutex_exit(SD_MUTEX(un));
9571 			return (EIO);
9572 		}
9573 		mutex_enter(SD_MUTEX(un));
9574 		un->un_ncmds_in_driver--;
9575 		ASSERT(un->un_ncmds_in_driver >= 0);
9576 		mutex_exit(SD_MUTEX(un));
9577 	}
9578 
9579 	/*
9580 	 * Write requests are restricted to multiples of the system block size.
9581 	 */
9582 	secmask = un->un_sys_blocksize - 1;
9583 
9584 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9585 		SD_ERROR(SD_LOG_READ_WRITE, un,
9586 		    "sdwrite: file offset not modulo %d\n",
9587 		    un->un_sys_blocksize);
9588 		err = EINVAL;
9589 	} else if (uio->uio_iov->iov_len & (secmask)) {
9590 		SD_ERROR(SD_LOG_READ_WRITE, un,
9591 		    "sdwrite: transfer length not modulo %d\n",
9592 		    un->un_sys_blocksize);
9593 		err = EINVAL;
9594 	} else {
9595 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
9596 	}
9597 	return (err);
9598 }
9599 
9600 
9601 /*
9602  *    Function: sdaread
9603  *
9604  * Description: Driver's aread(9e) entry point function.
9605  *
9606  *   Arguments: dev   - device number
9607  *		aio   - structure pointer describing where data is to be stored
9608  *		cred_p  - user credential pointer
9609  *
9610  * Return Code: ENXIO
9611  *		EIO
9612  *		EINVAL
9613  *		value returned by aphysio
9614  *
9615  *     Context: Kernel thread context.
9616  */
9617 /* ARGSUSED */
9618 static int
9619 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
9620 {
9621 	struct sd_lun	*un = NULL;
9622 	struct uio	*uio = aio->aio_uio;
9623 	int		secmask;
9624 	int		err;
9625 
9626 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9627 		return (ENXIO);
9628 	}
9629 
9630 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9631 
9632 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9633 		mutex_enter(SD_MUTEX(un));
9634 		/*
9635 		 * Because the call to sd_ready_and_valid will issue I/O we
9636 		 * must wait here if either the device is suspended or
9637 		 * if it's power level is changing.
9638 		 */
9639 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9640 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9641 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9642 		}
9643 		un->un_ncmds_in_driver++;
9644 		mutex_exit(SD_MUTEX(un));
9645 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9646 			mutex_enter(SD_MUTEX(un));
9647 			un->un_ncmds_in_driver--;
9648 			ASSERT(un->un_ncmds_in_driver >= 0);
9649 			mutex_exit(SD_MUTEX(un));
9650 			return (EIO);
9651 		}
9652 		mutex_enter(SD_MUTEX(un));
9653 		un->un_ncmds_in_driver--;
9654 		ASSERT(un->un_ncmds_in_driver >= 0);
9655 		mutex_exit(SD_MUTEX(un));
9656 	}
9657 
9658 	/*
9659 	 * Read requests are restricted to multiples of the system block size.
9660 	 */
9661 	secmask = un->un_sys_blocksize - 1;
9662 
9663 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9664 		SD_ERROR(SD_LOG_READ_WRITE, un,
9665 		    "sdaread: file offset not modulo %d\n",
9666 		    un->un_sys_blocksize);
9667 		err = EINVAL;
9668 	} else if (uio->uio_iov->iov_len & (secmask)) {
9669 		SD_ERROR(SD_LOG_READ_WRITE, un,
9670 		    "sdaread: transfer length not modulo %d\n",
9671 		    un->un_sys_blocksize);
9672 		err = EINVAL;
9673 	} else {
9674 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
9675 	}
9676 	return (err);
9677 }
9678 
9679 
9680 /*
9681  *    Function: sdawrite
9682  *
9683  * Description: Driver's awrite(9e) entry point function.
9684  *
9685  *   Arguments: dev   - device number
9686  *		aio   - structure pointer describing where data is stored
9687  *		cred_p  - user credential pointer
9688  *
9689  * Return Code: ENXIO
9690  *		EIO
9691  *		EINVAL
9692  *		value returned by aphysio
9693  *
9694  *     Context: Kernel thread context.
9695  */
9696 /* ARGSUSED */
9697 static int
9698 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
9699 {
9700 	struct sd_lun	*un = NULL;
9701 	struct uio	*uio = aio->aio_uio;
9702 	int		secmask;
9703 	int		err;
9704 
9705 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9706 		return (ENXIO);
9707 	}
9708 
9709 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9710 
9711 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9712 		mutex_enter(SD_MUTEX(un));
9713 		/*
9714 		 * Because the call to sd_ready_and_valid will issue I/O we
9715 		 * must wait here if either the device is suspended or
9716 		 * if it's power level is changing.
9717 		 */
9718 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9719 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9720 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9721 		}
9722 		un->un_ncmds_in_driver++;
9723 		mutex_exit(SD_MUTEX(un));
9724 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9725 			mutex_enter(SD_MUTEX(un));
9726 			un->un_ncmds_in_driver--;
9727 			ASSERT(un->un_ncmds_in_driver >= 0);
9728 			mutex_exit(SD_MUTEX(un));
9729 			return (EIO);
9730 		}
9731 		mutex_enter(SD_MUTEX(un));
9732 		un->un_ncmds_in_driver--;
9733 		ASSERT(un->un_ncmds_in_driver >= 0);
9734 		mutex_exit(SD_MUTEX(un));
9735 	}
9736 
9737 	/*
9738 	 * Write requests are restricted to multiples of the system block size.
9739 	 */
9740 	secmask = un->un_sys_blocksize - 1;
9741 
9742 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9743 		SD_ERROR(SD_LOG_READ_WRITE, un,
9744 		    "sdawrite: file offset not modulo %d\n",
9745 		    un->un_sys_blocksize);
9746 		err = EINVAL;
9747 	} else if (uio->uio_iov->iov_len & (secmask)) {
9748 		SD_ERROR(SD_LOG_READ_WRITE, un,
9749 		    "sdawrite: transfer length not modulo %d\n",
9750 		    un->un_sys_blocksize);
9751 		err = EINVAL;
9752 	} else {
9753 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
9754 	}
9755 	return (err);
9756 }
9757 
9758 
9759 
9760 
9761 
9762 /*
9763  * Driver IO processing follows the following sequence:
9764  *
9765  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
9766  *         |                |                     ^
9767  *         v                v                     |
9768  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
9769  *         |                |                     |                   |
9770  *         v                |                     |                   |
9771  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
9772  *         |                |                     ^                   ^
9773  *         v                v                     |                   |
9774  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
9775  *         |                |                     |                   |
9776  *     +---+                |                     +------------+      +-------+
9777  *     |                    |                                  |              |
9778  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
9779  *     |                    v                                  |              |
9780  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
9781  *     |                    |                                  ^              |
9782  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
9783  *     |                    v                                  |              |
9784  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
9785  *     |                    |                                  ^              |
9786  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
9787  *     |                    v                                  |              |
9788  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
9789  *     |                    |                                  ^              |
9790  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
9791  *     |                    v                                  |              |
9792  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
9793  *     |                    |                                  ^              |
9794  *     |                    |                                  |              |
9795  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
9796  *                          |                           ^
9797  *                          v                           |
9798  *                   sd_core_iostart()                  |
9799  *                          |                           |
9800  *                          |                           +------>(*destroypkt)()
9801  *                          +-> sd_start_cmds() <-+     |           |
9802  *                          |                     |     |           v
9803  *                          |                     |     |  scsi_destroy_pkt(9F)
9804  *                          |                     |     |
9805  *                          +->(*initpkt)()       +- sdintr()
9806  *                          |  |                        |  |
9807  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
9808  *                          |  +-> scsi_setup_cdb(9F)   |
9809  *                          |                           |
9810  *                          +--> scsi_transport(9F)     |
9811  *                                     |                |
9812  *                                     +----> SCSA ---->+
9813  *
9814  *
9815  * This code is based upon the following presumtions:
9816  *
9817  *   - iostart and iodone functions operate on buf(9S) structures. These
9818  *     functions perform the necessary operations on the buf(9S) and pass
9819  *     them along to the next function in the chain by using the macros
9820  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
9821  *     (for iodone side functions).
9822  *
9823  *   - The iostart side functions may sleep. The iodone side functions
9824  *     are called under interrupt context and may NOT sleep. Therefore
9825  *     iodone side functions also may not call iostart side functions.
9826  *     (NOTE: iostart side functions should NOT sleep for memory, as
9827  *     this could result in deadlock.)
9828  *
9829  *   - An iostart side function may call its corresponding iodone side
9830  *     function directly (if necessary).
9831  *
9832  *   - In the event of an error, an iostart side function can return a buf(9S)
9833  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
9834  *     b_error in the usual way of course).
9835  *
9836  *   - The taskq mechanism may be used by the iodone side functions to dispatch
9837  *     requests to the iostart side functions.  The iostart side functions in
9838  *     this case would be called under the context of a taskq thread, so it's
9839  *     OK for them to block/sleep/spin in this case.
9840  *
9841  *   - iostart side functions may allocate "shadow" buf(9S) structs and
9842  *     pass them along to the next function in the chain.  The corresponding
9843  *     iodone side functions must coalesce the "shadow" bufs and return
9844  *     the "original" buf to the next higher layer.
9845  *
9846  *   - The b_private field of the buf(9S) struct holds a pointer to
9847  *     an sd_xbuf struct, which contains information needed to
9848  *     construct the scsi_pkt for the command.
9849  *
9850  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
9851  *     layer must acquire & release the SD_MUTEX(un) as needed.
9852  */
9853 
9854 
9855 /*
9856  * Create taskq for all targets in the system. This is created at
9857  * _init(9E) and destroyed at _fini(9E).
9858  *
9859  * Note: here we set the minalloc to a reasonably high number to ensure that
9860  * we will have an adequate supply of task entries available at interrupt time.
9861  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
9862  * sd_create_taskq().  Since we do not want to sleep for allocations at
9863  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
9864  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
9865  * requests any one instant in time.
9866  */
9867 #define	SD_TASKQ_NUMTHREADS	8
9868 #define	SD_TASKQ_MINALLOC	256
9869 #define	SD_TASKQ_MAXALLOC	256
9870 
9871 static taskq_t	*sd_tq = NULL;
9872 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
9873 
9874 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
9875 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
9876 
9877 /*
9878  * The following task queue is being created for the write part of
9879  * read-modify-write of non-512 block size devices.
9880  * Limit the number of threads to 1 for now. This number has been choosen
9881  * considering the fact that it applies only to dvd ram drives/MO drives
9882  * currently. Performance for which is not main criteria at this stage.
9883  * Note: It needs to be explored if we can use a single taskq in future
9884  */
9885 #define	SD_WMR_TASKQ_NUMTHREADS	1
9886 static taskq_t	*sd_wmr_tq = NULL;
9887 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
9888 
9889 /*
9890  *    Function: sd_taskq_create
9891  *
9892  * Description: Create taskq thread(s) and preallocate task entries
9893  *
9894  * Return Code: Returns a pointer to the allocated taskq_t.
9895  *
9896  *     Context: Can sleep. Requires blockable context.
9897  *
9898  *       Notes: - The taskq() facility currently is NOT part of the DDI.
9899  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
9900  *		- taskq_create() will block for memory, also it will panic
9901  *		  if it cannot create the requested number of threads.
9902  *		- Currently taskq_create() creates threads that cannot be
9903  *		  swapped.
9904  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
9905  *		  supply of taskq entries at interrupt time (ie, so that we
9906  *		  do not have to sleep for memory)
9907  */
9908 
9909 static void
9910 sd_taskq_create(void)
9911 {
9912 	char	taskq_name[TASKQ_NAMELEN];
9913 
9914 	ASSERT(sd_tq == NULL);
9915 	ASSERT(sd_wmr_tq == NULL);
9916 
9917 	(void) snprintf(taskq_name, sizeof (taskq_name),
9918 	    "%s_drv_taskq", sd_label);
9919 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
9920 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
9921 	    TASKQ_PREPOPULATE));
9922 
9923 	(void) snprintf(taskq_name, sizeof (taskq_name),
9924 	    "%s_rmw_taskq", sd_label);
9925 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
9926 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
9927 	    TASKQ_PREPOPULATE));
9928 }
9929 
9930 
9931 /*
9932  *    Function: sd_taskq_delete
9933  *
9934  * Description: Complementary cleanup routine for sd_taskq_create().
9935  *
9936  *     Context: Kernel thread context.
9937  */
9938 
9939 static void
9940 sd_taskq_delete(void)
9941 {
9942 	ASSERT(sd_tq != NULL);
9943 	ASSERT(sd_wmr_tq != NULL);
9944 	taskq_destroy(sd_tq);
9945 	taskq_destroy(sd_wmr_tq);
9946 	sd_tq = NULL;
9947 	sd_wmr_tq = NULL;
9948 }
9949 
9950 
9951 /*
9952  *    Function: sdstrategy
9953  *
9954  * Description: Driver's strategy (9E) entry point function.
9955  *
9956  *   Arguments: bp - pointer to buf(9S)
9957  *
9958  * Return Code: Always returns zero
9959  *
9960  *     Context: Kernel thread context.
9961  */
9962 
9963 static int
9964 sdstrategy(struct buf *bp)
9965 {
9966 	struct sd_lun *un;
9967 
9968 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
9969 	if (un == NULL) {
9970 		bioerror(bp, EIO);
9971 		bp->b_resid = bp->b_bcount;
9972 		biodone(bp);
9973 		return (0);
9974 	}
9975 	/* As was done in the past, fail new cmds. if state is dumping. */
9976 	if (un->un_state == SD_STATE_DUMPING) {
9977 		bioerror(bp, ENXIO);
9978 		bp->b_resid = bp->b_bcount;
9979 		biodone(bp);
9980 		return (0);
9981 	}
9982 
9983 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9984 
9985 	/*
9986 	 * Commands may sneak in while we released the mutex in
9987 	 * DDI_SUSPEND, we should block new commands. However, old
9988 	 * commands that are still in the driver at this point should
9989 	 * still be allowed to drain.
9990 	 */
9991 	mutex_enter(SD_MUTEX(un));
9992 	/*
9993 	 * Must wait here if either the device is suspended or
9994 	 * if it's power level is changing.
9995 	 */
9996 	while ((un->un_state == SD_STATE_SUSPENDED) ||
9997 	    (un->un_state == SD_STATE_PM_CHANGING)) {
9998 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9999 	}
10000 
10001 	un->un_ncmds_in_driver++;
10002 
10003 	/*
10004 	 * atapi: Since we are running the CD for now in PIO mode we need to
10005 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
10006 	 * the HBA's init_pkt routine.
10007 	 */
10008 	if (un->un_f_cfg_is_atapi == TRUE) {
10009 		mutex_exit(SD_MUTEX(un));
10010 		bp_mapin(bp);
10011 		mutex_enter(SD_MUTEX(un));
10012 	}
10013 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
10014 	    un->un_ncmds_in_driver);
10015 
10016 	mutex_exit(SD_MUTEX(un));
10017 
10018 	/*
10019 	 * This will (eventually) allocate the sd_xbuf area and
10020 	 * call sd_xbuf_strategy().  We just want to return the
10021 	 * result of ddi_xbuf_qstrategy so that we have an opt-
10022 	 * imized tail call which saves us a stack frame.
10023 	 */
10024 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
10025 }
10026 
10027 
10028 /*
10029  *    Function: sd_xbuf_strategy
10030  *
10031  * Description: Function for initiating IO operations via the
10032  *		ddi_xbuf_qstrategy() mechanism.
10033  *
10034  *     Context: Kernel thread context.
10035  */
10036 
10037 static void
10038 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
10039 {
10040 	struct sd_lun *un = arg;
10041 
10042 	ASSERT(bp != NULL);
10043 	ASSERT(xp != NULL);
10044 	ASSERT(un != NULL);
10045 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10046 
10047 	/*
10048 	 * Initialize the fields in the xbuf and save a pointer to the
10049 	 * xbuf in bp->b_private.
10050 	 */
10051 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
10052 
10053 	/* Send the buf down the iostart chain */
10054 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
10055 }
10056 
10057 
10058 /*
10059  *    Function: sd_xbuf_init
10060  *
10061  * Description: Prepare the given sd_xbuf struct for use.
10062  *
10063  *   Arguments: un - ptr to softstate
10064  *		bp - ptr to associated buf(9S)
10065  *		xp - ptr to associated sd_xbuf
10066  *		chain_type - IO chain type to use:
10067  *			SD_CHAIN_NULL
10068  *			SD_CHAIN_BUFIO
10069  *			SD_CHAIN_USCSI
10070  *			SD_CHAIN_DIRECT
10071  *			SD_CHAIN_DIRECT_PRIORITY
10072  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
10073  *			initialization; may be NULL if none.
10074  *
10075  *     Context: Kernel thread context
10076  */
10077 
10078 static void
10079 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
10080 	uchar_t chain_type, void *pktinfop)
10081 {
10082 	int index;
10083 
10084 	ASSERT(un != NULL);
10085 	ASSERT(bp != NULL);
10086 	ASSERT(xp != NULL);
10087 
10088 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
10089 	    bp, chain_type);
10090 
10091 	xp->xb_un	= un;
10092 	xp->xb_pktp	= NULL;
10093 	xp->xb_pktinfo	= pktinfop;
10094 	xp->xb_private	= bp->b_private;
10095 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
10096 
10097 	/*
10098 	 * Set up the iostart and iodone chain indexes in the xbuf, based
10099 	 * upon the specified chain type to use.
10100 	 */
10101 	switch (chain_type) {
10102 	case SD_CHAIN_NULL:
10103 		/*
10104 		 * Fall thru to just use the values for the buf type, even
10105 		 * tho for the NULL chain these values will never be used.
10106 		 */
10107 		/* FALLTHRU */
10108 	case SD_CHAIN_BUFIO:
10109 		index = un->un_buf_chain_type;
10110 		break;
10111 	case SD_CHAIN_USCSI:
10112 		index = un->un_uscsi_chain_type;
10113 		break;
10114 	case SD_CHAIN_DIRECT:
10115 		index = un->un_direct_chain_type;
10116 		break;
10117 	case SD_CHAIN_DIRECT_PRIORITY:
10118 		index = un->un_priority_chain_type;
10119 		break;
10120 	default:
10121 		/* We're really broken if we ever get here... */
10122 		panic("sd_xbuf_init: illegal chain type!");
10123 		/*NOTREACHED*/
10124 	}
10125 
10126 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
10127 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
10128 
10129 	/*
10130 	 * It might be a bit easier to simply bzero the entire xbuf above,
10131 	 * but it turns out that since we init a fair number of members anyway,
10132 	 * we save a fair number cycles by doing explicit assignment of zero.
10133 	 */
10134 	xp->xb_pkt_flags	= 0;
10135 	xp->xb_dma_resid	= 0;
10136 	xp->xb_retry_count	= 0;
10137 	xp->xb_victim_retry_count = 0;
10138 	xp->xb_ua_retry_count	= 0;
10139 	xp->xb_sense_bp		= NULL;
10140 	xp->xb_sense_status	= 0;
10141 	xp->xb_sense_state	= 0;
10142 	xp->xb_sense_resid	= 0;
10143 
10144 	bp->b_private	= xp;
10145 	bp->b_flags	&= ~(B_DONE | B_ERROR);
10146 	bp->b_resid	= 0;
10147 	bp->av_forw	= NULL;
10148 	bp->av_back	= NULL;
10149 	bioerror(bp, 0);
10150 
10151 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
10152 }
10153 
10154 
10155 /*
10156  *    Function: sd_uscsi_strategy
10157  *
10158  * Description: Wrapper for calling into the USCSI chain via physio(9F)
10159  *
10160  *   Arguments: bp - buf struct ptr
10161  *
10162  * Return Code: Always returns 0
10163  *
10164  *     Context: Kernel thread context
10165  */
10166 
10167 static int
10168 sd_uscsi_strategy(struct buf *bp)
10169 {
10170 	struct sd_lun		*un;
10171 	struct sd_uscsi_info	*uip;
10172 	struct sd_xbuf		*xp;
10173 	uchar_t			chain_type;
10174 
10175 	ASSERT(bp != NULL);
10176 
10177 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
10178 	if (un == NULL) {
10179 		bioerror(bp, EIO);
10180 		bp->b_resid = bp->b_bcount;
10181 		biodone(bp);
10182 		return (0);
10183 	}
10184 
10185 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10186 
10187 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
10188 
10189 	mutex_enter(SD_MUTEX(un));
10190 	/*
10191 	 * atapi: Since we are running the CD for now in PIO mode we need to
10192 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
10193 	 * the HBA's init_pkt routine.
10194 	 */
10195 	if (un->un_f_cfg_is_atapi == TRUE) {
10196 		mutex_exit(SD_MUTEX(un));
10197 		bp_mapin(bp);
10198 		mutex_enter(SD_MUTEX(un));
10199 	}
10200 	un->un_ncmds_in_driver++;
10201 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
10202 	    un->un_ncmds_in_driver);
10203 	mutex_exit(SD_MUTEX(un));
10204 
10205 	/*
10206 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
10207 	 */
10208 	ASSERT(bp->b_private != NULL);
10209 	uip = (struct sd_uscsi_info *)bp->b_private;
10210 
10211 	switch (uip->ui_flags) {
10212 	case SD_PATH_DIRECT:
10213 		chain_type = SD_CHAIN_DIRECT;
10214 		break;
10215 	case SD_PATH_DIRECT_PRIORITY:
10216 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
10217 		break;
10218 	default:
10219 		chain_type = SD_CHAIN_USCSI;
10220 		break;
10221 	}
10222 
10223 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
10224 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
10225 
10226 	/* Use the index obtained within xbuf_init */
10227 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
10228 
10229 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
10230 
10231 	return (0);
10232 }
10233 
10234 /*
10235  *    Function: sd_send_scsi_cmd
10236  *
10237  * Description: Runs a USCSI command for user (when called thru sdioctl),
10238  *		or for the driver
10239  *
10240  *   Arguments: dev - the dev_t for the device
10241  *		incmd - ptr to a valid uscsi_cmd struct
10242  *		flag - bit flag, indicating open settings, 32/64 bit type
10243  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
10244  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
10245  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
10246  *			to use the USCSI "direct" chain and bypass the normal
10247  *			command waitq.
10248  *
10249  * Return Code: 0 -  successful completion of the given command
10250  *		EIO - scsi_uscsi_handle_command() failed
10251  *		ENXIO  - soft state not found for specified dev
10252  *		EINVAL
10253  *		EFAULT - copyin/copyout error
10254  *		return code of scsi_uscsi_handle_command():
10255  *			EIO
10256  *			ENXIO
10257  *			EACCES
10258  *
10259  *     Context: Waits for command to complete. Can sleep.
10260  */
10261 
10262 static int
10263 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
10264 	enum uio_seg dataspace, int path_flag)
10265 {
10266 	struct sd_uscsi_info	*uip;
10267 	struct uscsi_cmd	*uscmd;
10268 	struct sd_lun	*un;
10269 	int	format = 0;
10270 	int	rval;
10271 
10272 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
10273 	if (un == NULL) {
10274 		return (ENXIO);
10275 	}
10276 
10277 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10278 
10279 #ifdef SDDEBUG
10280 	switch (dataspace) {
10281 	case UIO_USERSPACE:
10282 		SD_TRACE(SD_LOG_IO, un,
10283 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_USERSPACE\n", un);
10284 		break;
10285 	case UIO_SYSSPACE:
10286 		SD_TRACE(SD_LOG_IO, un,
10287 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_SYSSPACE\n", un);
10288 		break;
10289 	default:
10290 		SD_TRACE(SD_LOG_IO, un,
10291 		    "sd_send_scsi_cmd: entry: un:0x%p UNEXPECTED SPACE\n", un);
10292 		break;
10293 	}
10294 #endif
10295 
10296 	rval = scsi_uscsi_alloc_and_copyin((intptr_t)incmd, flag,
10297 	    SD_ADDRESS(un), &uscmd);
10298 	if (rval != 0) {
10299 		SD_TRACE(SD_LOG_IO, un, "sd_sense_scsi_cmd: "
10300 		    "scsi_uscsi_alloc_and_copyin failed\n", un);
10301 		return (rval);
10302 	}
10303 
10304 	if ((uscmd->uscsi_cdb != NULL) &&
10305 	    (uscmd->uscsi_cdb[0] == SCMD_FORMAT)) {
10306 		mutex_enter(SD_MUTEX(un));
10307 		un->un_f_format_in_progress = TRUE;
10308 		mutex_exit(SD_MUTEX(un));
10309 		format = 1;
10310 	}
10311 
10312 	/*
10313 	 * Allocate an sd_uscsi_info struct and fill it with the info
10314 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
10315 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
10316 	 * since we allocate the buf here in this function, we do not
10317 	 * need to preserve the prior contents of b_private.
10318 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
10319 	 */
10320 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
10321 	uip->ui_flags = path_flag;
10322 	uip->ui_cmdp = uscmd;
10323 
10324 	/*
10325 	 * Commands sent with priority are intended for error recovery
10326 	 * situations, and do not have retries performed.
10327 	 */
10328 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
10329 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
10330 	}
10331 	uscmd->uscsi_flags &= ~USCSI_NOINTR;
10332 
10333 	rval = scsi_uscsi_handle_cmd(dev, dataspace, uscmd,
10334 	    sd_uscsi_strategy, NULL, uip);
10335 
10336 #ifdef SDDEBUG
10337 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
10338 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
10339 	    uscmd->uscsi_status, uscmd->uscsi_resid);
10340 	if (uscmd->uscsi_bufaddr != NULL) {
10341 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
10342 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
10343 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
10344 		if (dataspace == UIO_SYSSPACE) {
10345 			SD_DUMP_MEMORY(un, SD_LOG_IO,
10346 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
10347 			    uscmd->uscsi_buflen, SD_LOG_HEX);
10348 		}
10349 	}
10350 #endif
10351 
10352 	if (format == 1) {
10353 		mutex_enter(SD_MUTEX(un));
10354 		un->un_f_format_in_progress = FALSE;
10355 		mutex_exit(SD_MUTEX(un));
10356 	}
10357 
10358 	(void) scsi_uscsi_copyout_and_free((intptr_t)incmd, uscmd);
10359 	kmem_free(uip, sizeof (struct sd_uscsi_info));
10360 
10361 	return (rval);
10362 }
10363 
10364 
10365 /*
10366  *    Function: sd_buf_iodone
10367  *
10368  * Description: Frees the sd_xbuf & returns the buf to its originator.
10369  *
10370  *     Context: May be called from interrupt context.
10371  */
10372 /* ARGSUSED */
10373 static void
10374 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
10375 {
10376 	struct sd_xbuf *xp;
10377 
10378 	ASSERT(un != NULL);
10379 	ASSERT(bp != NULL);
10380 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10381 
10382 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
10383 
10384 	xp = SD_GET_XBUF(bp);
10385 	ASSERT(xp != NULL);
10386 
10387 	mutex_enter(SD_MUTEX(un));
10388 
10389 	/*
10390 	 * Grab time when the cmd completed.
10391 	 * This is used for determining if the system has been
10392 	 * idle long enough to make it idle to the PM framework.
10393 	 * This is for lowering the overhead, and therefore improving
10394 	 * performance per I/O operation.
10395 	 */
10396 	un->un_pm_idle_time = ddi_get_time();
10397 
10398 	un->un_ncmds_in_driver--;
10399 	ASSERT(un->un_ncmds_in_driver >= 0);
10400 	SD_INFO(SD_LOG_IO, un, "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
10401 	    un->un_ncmds_in_driver);
10402 
10403 	mutex_exit(SD_MUTEX(un));
10404 
10405 	ddi_xbuf_done(bp, un->un_xbuf_attr);	/* xbuf is gone after this */
10406 	biodone(bp);				/* bp is gone after this */
10407 
10408 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
10409 }
10410 
10411 
10412 /*
10413  *    Function: sd_uscsi_iodone
10414  *
10415  * Description: Frees the sd_xbuf & returns the buf to its originator.
10416  *
10417  *     Context: May be called from interrupt context.
10418  */
10419 /* ARGSUSED */
10420 static void
10421 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
10422 {
10423 	struct sd_xbuf *xp;
10424 
10425 	ASSERT(un != NULL);
10426 	ASSERT(bp != NULL);
10427 
10428 	xp = SD_GET_XBUF(bp);
10429 	ASSERT(xp != NULL);
10430 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10431 
10432 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
10433 
10434 	bp->b_private = xp->xb_private;
10435 
10436 	mutex_enter(SD_MUTEX(un));
10437 
10438 	/*
10439 	 * Grab time when the cmd completed.
10440 	 * This is used for determining if the system has been
10441 	 * idle long enough to make it idle to the PM framework.
10442 	 * This is for lowering the overhead, and therefore improving
10443 	 * performance per I/O operation.
10444 	 */
10445 	un->un_pm_idle_time = ddi_get_time();
10446 
10447 	un->un_ncmds_in_driver--;
10448 	ASSERT(un->un_ncmds_in_driver >= 0);
10449 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
10450 	    un->un_ncmds_in_driver);
10451 
10452 	mutex_exit(SD_MUTEX(un));
10453 
10454 	kmem_free(xp, sizeof (struct sd_xbuf));
10455 	biodone(bp);
10456 
10457 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
10458 }
10459 
10460 
10461 /*
10462  *    Function: sd_mapblockaddr_iostart
10463  *
10464  * Description: Verify request lies withing the partition limits for
10465  *		the indicated minor device.  Issue "overrun" buf if
10466  *		request would exceed partition range.  Converts
10467  *		partition-relative block address to absolute.
10468  *
10469  *     Context: Can sleep
10470  *
10471  *      Issues: This follows what the old code did, in terms of accessing
10472  *		some of the partition info in the unit struct without holding
10473  *		the mutext.  This is a general issue, if the partition info
10474  *		can be altered while IO is in progress... as soon as we send
10475  *		a buf, its partitioning can be invalid before it gets to the
10476  *		device.  Probably the right fix is to move partitioning out
10477  *		of the driver entirely.
10478  */
10479 
10480 static void
10481 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
10482 {
10483 	diskaddr_t	nblocks;	/* #blocks in the given partition */
10484 	daddr_t	blocknum;	/* Block number specified by the buf */
10485 	size_t	requested_nblocks;
10486 	size_t	available_nblocks;
10487 	int	partition;
10488 	diskaddr_t	partition_offset;
10489 	struct sd_xbuf *xp;
10490 
10491 
10492 	ASSERT(un != NULL);
10493 	ASSERT(bp != NULL);
10494 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10495 
10496 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10497 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
10498 
10499 	xp = SD_GET_XBUF(bp);
10500 	ASSERT(xp != NULL);
10501 
10502 	/*
10503 	 * If the geometry is not indicated as valid, attempt to access
10504 	 * the unit & verify the geometry/label. This can be the case for
10505 	 * removable-media devices, of if the device was opened in
10506 	 * NDELAY/NONBLOCK mode.
10507 	 */
10508 	if (!SD_IS_VALID_LABEL(un) &&
10509 	    (sd_ready_and_valid(un) != SD_READY_VALID)) {
10510 		/*
10511 		 * For removable devices it is possible to start an I/O
10512 		 * without a media by opening the device in nodelay mode.
10513 		 * Also for writable CDs there can be many scenarios where
10514 		 * there is no geometry yet but volume manager is trying to
10515 		 * issue a read() just because it can see TOC on the CD. So
10516 		 * do not print a message for removables.
10517 		 */
10518 		if (!un->un_f_has_removable_media) {
10519 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10520 			    "i/o to invalid geometry\n");
10521 		}
10522 		bioerror(bp, EIO);
10523 		bp->b_resid = bp->b_bcount;
10524 		SD_BEGIN_IODONE(index, un, bp);
10525 		return;
10526 	}
10527 
10528 	partition = SDPART(bp->b_edev);
10529 
10530 	nblocks = 0;
10531 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
10532 	    &nblocks, &partition_offset, NULL, NULL, (void *)SD_PATH_DIRECT);
10533 
10534 	/*
10535 	 * blocknum is the starting block number of the request. At this
10536 	 * point it is still relative to the start of the minor device.
10537 	 */
10538 	blocknum = xp->xb_blkno;
10539 
10540 	/*
10541 	 * Legacy: If the starting block number is one past the last block
10542 	 * in the partition, do not set B_ERROR in the buf.
10543 	 */
10544 	if (blocknum == nblocks)  {
10545 		goto error_exit;
10546 	}
10547 
10548 	/*
10549 	 * Confirm that the first block of the request lies within the
10550 	 * partition limits. Also the requested number of bytes must be
10551 	 * a multiple of the system block size.
10552 	 */
10553 	if ((blocknum < 0) || (blocknum >= nblocks) ||
10554 	    ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
10555 		bp->b_flags |= B_ERROR;
10556 		goto error_exit;
10557 	}
10558 
10559 	/*
10560 	 * If the requsted # blocks exceeds the available # blocks, that
10561 	 * is an overrun of the partition.
10562 	 */
10563 	requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
10564 	available_nblocks = (size_t)(nblocks - blocknum);
10565 	ASSERT(nblocks >= blocknum);
10566 
10567 	if (requested_nblocks > available_nblocks) {
10568 		/*
10569 		 * Allocate an "overrun" buf to allow the request to proceed
10570 		 * for the amount of space available in the partition. The
10571 		 * amount not transferred will be added into the b_resid
10572 		 * when the operation is complete. The overrun buf
10573 		 * replaces the original buf here, and the original buf
10574 		 * is saved inside the overrun buf, for later use.
10575 		 */
10576 		size_t resid = SD_SYSBLOCKS2BYTES(un,
10577 		    (offset_t)(requested_nblocks - available_nblocks));
10578 		size_t count = bp->b_bcount - resid;
10579 		/*
10580 		 * Note: count is an unsigned entity thus it'll NEVER
10581 		 * be less than 0 so ASSERT the original values are
10582 		 * correct.
10583 		 */
10584 		ASSERT(bp->b_bcount >= resid);
10585 
10586 		bp = sd_bioclone_alloc(bp, count, blocknum,
10587 			(int (*)(struct buf *)) sd_mapblockaddr_iodone);
10588 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
10589 		ASSERT(xp != NULL);
10590 	}
10591 
10592 	/* At this point there should be no residual for this buf. */
10593 	ASSERT(bp->b_resid == 0);
10594 
10595 	/* Convert the block number to an absolute address. */
10596 	xp->xb_blkno += partition_offset;
10597 
10598 	SD_NEXT_IOSTART(index, un, bp);
10599 
10600 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10601 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
10602 
10603 	return;
10604 
10605 error_exit:
10606 	bp->b_resid = bp->b_bcount;
10607 	SD_BEGIN_IODONE(index, un, bp);
10608 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10609 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
10610 }
10611 
10612 
10613 /*
10614  *    Function: sd_mapblockaddr_iodone
10615  *
10616  * Description: Completion-side processing for partition management.
10617  *
10618  *     Context: May be called under interrupt context
10619  */
10620 
10621 static void
10622 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
10623 {
10624 	/* int	partition; */	/* Not used, see below. */
10625 	ASSERT(un != NULL);
10626 	ASSERT(bp != NULL);
10627 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10628 
10629 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10630 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
10631 
10632 	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
10633 		/*
10634 		 * We have an "overrun" buf to deal with...
10635 		 */
10636 		struct sd_xbuf	*xp;
10637 		struct buf	*obp;	/* ptr to the original buf */
10638 
10639 		xp = SD_GET_XBUF(bp);
10640 		ASSERT(xp != NULL);
10641 
10642 		/* Retrieve the pointer to the original buf */
10643 		obp = (struct buf *)xp->xb_private;
10644 		ASSERT(obp != NULL);
10645 
10646 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
10647 		bioerror(obp, bp->b_error);
10648 
10649 		sd_bioclone_free(bp);
10650 
10651 		/*
10652 		 * Get back the original buf.
10653 		 * Note that since the restoration of xb_blkno below
10654 		 * was removed, the sd_xbuf is not needed.
10655 		 */
10656 		bp = obp;
10657 		/*
10658 		 * xp = SD_GET_XBUF(bp);
10659 		 * ASSERT(xp != NULL);
10660 		 */
10661 	}
10662 
10663 	/*
10664 	 * Convert sd->xb_blkno back to a minor-device relative value.
10665 	 * Note: this has been commented out, as it is not needed in the
10666 	 * current implementation of the driver (ie, since this function
10667 	 * is at the top of the layering chains, so the info will be
10668 	 * discarded) and it is in the "hot" IO path.
10669 	 *
10670 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
10671 	 * xp->xb_blkno -= un->un_offset[partition];
10672 	 */
10673 
10674 	SD_NEXT_IODONE(index, un, bp);
10675 
10676 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10677 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
10678 }
10679 
10680 
10681 /*
10682  *    Function: sd_mapblocksize_iostart
10683  *
10684  * Description: Convert between system block size (un->un_sys_blocksize)
10685  *		and target block size (un->un_tgt_blocksize).
10686  *
10687  *     Context: Can sleep to allocate resources.
10688  *
10689  * Assumptions: A higher layer has already performed any partition validation,
10690  *		and converted the xp->xb_blkno to an absolute value relative
10691  *		to the start of the device.
10692  *
10693  *		It is also assumed that the higher layer has implemented
10694  *		an "overrun" mechanism for the case where the request would
10695  *		read/write beyond the end of a partition.  In this case we
10696  *		assume (and ASSERT) that bp->b_resid == 0.
10697  *
10698  *		Note: The implementation for this routine assumes the target
10699  *		block size remains constant between allocation and transport.
10700  */
10701 
10702 static void
10703 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
10704 {
10705 	struct sd_mapblocksize_info	*bsp;
10706 	struct sd_xbuf			*xp;
10707 	offset_t first_byte;
10708 	daddr_t	start_block, end_block;
10709 	daddr_t	request_bytes;
10710 	ushort_t is_aligned = FALSE;
10711 
10712 	ASSERT(un != NULL);
10713 	ASSERT(bp != NULL);
10714 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10715 	ASSERT(bp->b_resid == 0);
10716 
10717 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
10718 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
10719 
10720 	/*
10721 	 * For a non-writable CD, a write request is an error
10722 	 */
10723 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
10724 	    (un->un_f_mmc_writable_media == FALSE)) {
10725 		bioerror(bp, EIO);
10726 		bp->b_resid = bp->b_bcount;
10727 		SD_BEGIN_IODONE(index, un, bp);
10728 		return;
10729 	}
10730 
10731 	/*
10732 	 * We do not need a shadow buf if the device is using
10733 	 * un->un_sys_blocksize as its block size or if bcount == 0.
10734 	 * In this case there is no layer-private data block allocated.
10735 	 */
10736 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
10737 	    (bp->b_bcount == 0)) {
10738 		goto done;
10739 	}
10740 
10741 #if defined(__i386) || defined(__amd64)
10742 	/* We do not support non-block-aligned transfers for ROD devices */
10743 	ASSERT(!ISROD(un));
10744 #endif
10745 
10746 	xp = SD_GET_XBUF(bp);
10747 	ASSERT(xp != NULL);
10748 
10749 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
10750 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
10751 	    un->un_tgt_blocksize, un->un_sys_blocksize);
10752 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
10753 	    "request start block:0x%x\n", xp->xb_blkno);
10754 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
10755 	    "request len:0x%x\n", bp->b_bcount);
10756 
10757 	/*
10758 	 * Allocate the layer-private data area for the mapblocksize layer.
10759 	 * Layers are allowed to use the xp_private member of the sd_xbuf
10760 	 * struct to store the pointer to their layer-private data block, but
10761 	 * each layer also has the responsibility of restoring the prior
10762 	 * contents of xb_private before returning the buf/xbuf to the
10763 	 * higher layer that sent it.
10764 	 *
10765 	 * Here we save the prior contents of xp->xb_private into the
10766 	 * bsp->mbs_oprivate field of our layer-private data area. This value
10767 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
10768 	 * the layer-private area and returning the buf/xbuf to the layer
10769 	 * that sent it.
10770 	 *
10771 	 * Note that here we use kmem_zalloc for the allocation as there are
10772 	 * parts of the mapblocksize code that expect certain fields to be
10773 	 * zero unless explicitly set to a required value.
10774 	 */
10775 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
10776 	bsp->mbs_oprivate = xp->xb_private;
10777 	xp->xb_private = bsp;
10778 
10779 	/*
10780 	 * This treats the data on the disk (target) as an array of bytes.
10781 	 * first_byte is the byte offset, from the beginning of the device,
10782 	 * to the location of the request. This is converted from a
10783 	 * un->un_sys_blocksize block address to a byte offset, and then back
10784 	 * to a block address based upon a un->un_tgt_blocksize block size.
10785 	 *
10786 	 * xp->xb_blkno should be absolute upon entry into this function,
10787 	 * but, but it is based upon partitions that use the "system"
10788 	 * block size. It must be adjusted to reflect the block size of
10789 	 * the target.
10790 	 *
10791 	 * Note that end_block is actually the block that follows the last
10792 	 * block of the request, but that's what is needed for the computation.
10793 	 */
10794 	first_byte  = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
10795 	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
10796 	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
10797 	    un->un_tgt_blocksize;
10798 
10799 	/* request_bytes is rounded up to a multiple of the target block size */
10800 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
10801 
10802 	/*
10803 	 * See if the starting address of the request and the request
10804 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
10805 	 * then we do not need to allocate a shadow buf to handle the request.
10806 	 */
10807 	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
10808 	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
10809 		is_aligned = TRUE;
10810 	}
10811 
10812 	if ((bp->b_flags & B_READ) == 0) {
10813 		/*
10814 		 * Lock the range for a write operation. An aligned request is
10815 		 * considered a simple write; otherwise the request must be a
10816 		 * read-modify-write.
10817 		 */
10818 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
10819 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
10820 	}
10821 
10822 	/*
10823 	 * Alloc a shadow buf if the request is not aligned. Also, this is
10824 	 * where the READ command is generated for a read-modify-write. (The
10825 	 * write phase is deferred until after the read completes.)
10826 	 */
10827 	if (is_aligned == FALSE) {
10828 
10829 		struct sd_mapblocksize_info	*shadow_bsp;
10830 		struct sd_xbuf	*shadow_xp;
10831 		struct buf	*shadow_bp;
10832 
10833 		/*
10834 		 * Allocate the shadow buf and it associated xbuf. Note that
10835 		 * after this call the xb_blkno value in both the original
10836 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
10837 		 * same: absolute relative to the start of the device, and
10838 		 * adjusted for the target block size. The b_blkno in the
10839 		 * shadow buf will also be set to this value. We should never
10840 		 * change b_blkno in the original bp however.
10841 		 *
10842 		 * Note also that the shadow buf will always need to be a
10843 		 * READ command, regardless of whether the incoming command
10844 		 * is a READ or a WRITE.
10845 		 */
10846 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
10847 		    xp->xb_blkno,
10848 		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
10849 
10850 		shadow_xp = SD_GET_XBUF(shadow_bp);
10851 
10852 		/*
10853 		 * Allocate the layer-private data for the shadow buf.
10854 		 * (No need to preserve xb_private in the shadow xbuf.)
10855 		 */
10856 		shadow_xp->xb_private = shadow_bsp =
10857 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
10858 
10859 		/*
10860 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
10861 		 * to figure out where the start of the user data is (based upon
10862 		 * the system block size) in the data returned by the READ
10863 		 * command (which will be based upon the target blocksize). Note
10864 		 * that this is only really used if the request is unaligned.
10865 		 */
10866 		bsp->mbs_copy_offset = (ssize_t)(first_byte -
10867 		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
10868 		ASSERT((bsp->mbs_copy_offset >= 0) &&
10869 		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
10870 
10871 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
10872 
10873 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
10874 
10875 		/* Transfer the wmap (if any) to the shadow buf */
10876 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
10877 		bsp->mbs_wmp = NULL;
10878 
10879 		/*
10880 		 * The shadow buf goes on from here in place of the
10881 		 * original buf.
10882 		 */
10883 		shadow_bsp->mbs_orig_bp = bp;
10884 		bp = shadow_bp;
10885 	}
10886 
10887 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
10888 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
10889 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
10890 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
10891 	    request_bytes);
10892 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
10893 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
10894 
10895 done:
10896 	SD_NEXT_IOSTART(index, un, bp);
10897 
10898 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
10899 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
10900 }
10901 
10902 
10903 /*
10904  *    Function: sd_mapblocksize_iodone
10905  *
10906  * Description: Completion side processing for block-size mapping.
10907  *
10908  *     Context: May be called under interrupt context
10909  */
10910 
10911 static void
10912 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
10913 {
10914 	struct sd_mapblocksize_info	*bsp;
10915 	struct sd_xbuf	*xp;
10916 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
10917 	struct buf	*orig_bp;	/* ptr to the original buf */
10918 	offset_t	shadow_end;
10919 	offset_t	request_end;
10920 	offset_t	shadow_start;
10921 	ssize_t		copy_offset;
10922 	size_t		copy_length;
10923 	size_t		shortfall;
10924 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
10925 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
10926 
10927 	ASSERT(un != NULL);
10928 	ASSERT(bp != NULL);
10929 
10930 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
10931 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
10932 
10933 	/*
10934 	 * There is no shadow buf or layer-private data if the target is
10935 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
10936 	 */
10937 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
10938 	    (bp->b_bcount == 0)) {
10939 		goto exit;
10940 	}
10941 
10942 	xp = SD_GET_XBUF(bp);
10943 	ASSERT(xp != NULL);
10944 
10945 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
10946 	bsp = xp->xb_private;
10947 
10948 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
10949 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
10950 
10951 	if (is_write) {
10952 		/*
10953 		 * For a WRITE request we must free up the block range that
10954 		 * we have locked up.  This holds regardless of whether this is
10955 		 * an aligned write request or a read-modify-write request.
10956 		 */
10957 		sd_range_unlock(un, bsp->mbs_wmp);
10958 		bsp->mbs_wmp = NULL;
10959 	}
10960 
10961 	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
10962 		/*
10963 		 * An aligned read or write command will have no shadow buf;
10964 		 * there is not much else to do with it.
10965 		 */
10966 		goto done;
10967 	}
10968 
10969 	orig_bp = bsp->mbs_orig_bp;
10970 	ASSERT(orig_bp != NULL);
10971 	orig_xp = SD_GET_XBUF(orig_bp);
10972 	ASSERT(orig_xp != NULL);
10973 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10974 
10975 	if (!is_write && has_wmap) {
10976 		/*
10977 		 * A READ with a wmap means this is the READ phase of a
10978 		 * read-modify-write. If an error occurred on the READ then
10979 		 * we do not proceed with the WRITE phase or copy any data.
10980 		 * Just release the write maps and return with an error.
10981 		 */
10982 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
10983 			orig_bp->b_resid = orig_bp->b_bcount;
10984 			bioerror(orig_bp, bp->b_error);
10985 			sd_range_unlock(un, bsp->mbs_wmp);
10986 			goto freebuf_done;
10987 		}
10988 	}
10989 
10990 	/*
10991 	 * Here is where we set up to copy the data from the shadow buf
10992 	 * into the space associated with the original buf.
10993 	 *
10994 	 * To deal with the conversion between block sizes, these
10995 	 * computations treat the data as an array of bytes, with the
10996 	 * first byte (byte 0) corresponding to the first byte in the
10997 	 * first block on the disk.
10998 	 */
10999 
11000 	/*
11001 	 * shadow_start and shadow_len indicate the location and size of
11002 	 * the data returned with the shadow IO request.
11003 	 */
11004 	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
11005 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
11006 
11007 	/*
11008 	 * copy_offset gives the offset (in bytes) from the start of the first
11009 	 * block of the READ request to the beginning of the data.  We retrieve
11010 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
11011 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
11012 	 * data to be copied (in bytes).
11013 	 */
11014 	copy_offset  = bsp->mbs_copy_offset;
11015 	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
11016 	copy_length  = orig_bp->b_bcount;
11017 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
11018 
11019 	/*
11020 	 * Set up the resid and error fields of orig_bp as appropriate.
11021 	 */
11022 	if (shadow_end >= request_end) {
11023 		/* We got all the requested data; set resid to zero */
11024 		orig_bp->b_resid = 0;
11025 	} else {
11026 		/*
11027 		 * We failed to get enough data to fully satisfy the original
11028 		 * request. Just copy back whatever data we got and set
11029 		 * up the residual and error code as required.
11030 		 *
11031 		 * 'shortfall' is the amount by which the data received with the
11032 		 * shadow buf has "fallen short" of the requested amount.
11033 		 */
11034 		shortfall = (size_t)(request_end - shadow_end);
11035 
11036 		if (shortfall > orig_bp->b_bcount) {
11037 			/*
11038 			 * We did not get enough data to even partially
11039 			 * fulfill the original request.  The residual is
11040 			 * equal to the amount requested.
11041 			 */
11042 			orig_bp->b_resid = orig_bp->b_bcount;
11043 		} else {
11044 			/*
11045 			 * We did not get all the data that we requested
11046 			 * from the device, but we will try to return what
11047 			 * portion we did get.
11048 			 */
11049 			orig_bp->b_resid = shortfall;
11050 		}
11051 		ASSERT(copy_length >= orig_bp->b_resid);
11052 		copy_length  -= orig_bp->b_resid;
11053 	}
11054 
11055 	/* Propagate the error code from the shadow buf to the original buf */
11056 	bioerror(orig_bp, bp->b_error);
11057 
11058 	if (is_write) {
11059 		goto freebuf_done;	/* No data copying for a WRITE */
11060 	}
11061 
11062 	if (has_wmap) {
11063 		/*
11064 		 * This is a READ command from the READ phase of a
11065 		 * read-modify-write request. We have to copy the data given
11066 		 * by the user OVER the data returned by the READ command,
11067 		 * then convert the command from a READ to a WRITE and send
11068 		 * it back to the target.
11069 		 */
11070 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
11071 		    copy_length);
11072 
11073 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
11074 
11075 		/*
11076 		 * Dispatch the WRITE command to the taskq thread, which
11077 		 * will in turn send the command to the target. When the
11078 		 * WRITE command completes, we (sd_mapblocksize_iodone())
11079 		 * will get called again as part of the iodone chain
11080 		 * processing for it. Note that we will still be dealing
11081 		 * with the shadow buf at that point.
11082 		 */
11083 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
11084 		    KM_NOSLEEP) != 0) {
11085 			/*
11086 			 * Dispatch was successful so we are done. Return
11087 			 * without going any higher up the iodone chain. Do
11088 			 * not free up any layer-private data until after the
11089 			 * WRITE completes.
11090 			 */
11091 			return;
11092 		}
11093 
11094 		/*
11095 		 * Dispatch of the WRITE command failed; set up the error
11096 		 * condition and send this IO back up the iodone chain.
11097 		 */
11098 		bioerror(orig_bp, EIO);
11099 		orig_bp->b_resid = orig_bp->b_bcount;
11100 
11101 	} else {
11102 		/*
11103 		 * This is a regular READ request (ie, not a RMW). Copy the
11104 		 * data from the shadow buf into the original buf. The
11105 		 * copy_offset compensates for any "misalignment" between the
11106 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
11107 		 * original buf (with its un->un_sys_blocksize blocks).
11108 		 */
11109 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
11110 		    copy_length);
11111 	}
11112 
11113 freebuf_done:
11114 
11115 	/*
11116 	 * At this point we still have both the shadow buf AND the original
11117 	 * buf to deal with, as well as the layer-private data area in each.
11118 	 * Local variables are as follows:
11119 	 *
11120 	 * bp -- points to shadow buf
11121 	 * xp -- points to xbuf of shadow buf
11122 	 * bsp -- points to layer-private data area of shadow buf
11123 	 * orig_bp -- points to original buf
11124 	 *
11125 	 * First free the shadow buf and its associated xbuf, then free the
11126 	 * layer-private data area from the shadow buf. There is no need to
11127 	 * restore xb_private in the shadow xbuf.
11128 	 */
11129 	sd_shadow_buf_free(bp);
11130 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
11131 
11132 	/*
11133 	 * Now update the local variables to point to the original buf, xbuf,
11134 	 * and layer-private area.
11135 	 */
11136 	bp = orig_bp;
11137 	xp = SD_GET_XBUF(bp);
11138 	ASSERT(xp != NULL);
11139 	ASSERT(xp == orig_xp);
11140 	bsp = xp->xb_private;
11141 	ASSERT(bsp != NULL);
11142 
11143 done:
11144 	/*
11145 	 * Restore xb_private to whatever it was set to by the next higher
11146 	 * layer in the chain, then free the layer-private data area.
11147 	 */
11148 	xp->xb_private = bsp->mbs_oprivate;
11149 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
11150 
11151 exit:
11152 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
11153 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
11154 
11155 	SD_NEXT_IODONE(index, un, bp);
11156 }
11157 
11158 
11159 /*
11160  *    Function: sd_checksum_iostart
11161  *
11162  * Description: A stub function for a layer that's currently not used.
11163  *		For now just a placeholder.
11164  *
11165  *     Context: Kernel thread context
11166  */
11167 
11168 static void
11169 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
11170 {
11171 	ASSERT(un != NULL);
11172 	ASSERT(bp != NULL);
11173 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11174 	SD_NEXT_IOSTART(index, un, bp);
11175 }
11176 
11177 
11178 /*
11179  *    Function: sd_checksum_iodone
11180  *
11181  * Description: A stub function for a layer that's currently not used.
11182  *		For now just a placeholder.
11183  *
11184  *     Context: May be called under interrupt context
11185  */
11186 
11187 static void
11188 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
11189 {
11190 	ASSERT(un != NULL);
11191 	ASSERT(bp != NULL);
11192 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11193 	SD_NEXT_IODONE(index, un, bp);
11194 }
11195 
11196 
11197 /*
11198  *    Function: sd_checksum_uscsi_iostart
11199  *
11200  * Description: A stub function for a layer that's currently not used.
11201  *		For now just a placeholder.
11202  *
11203  *     Context: Kernel thread context
11204  */
11205 
11206 static void
11207 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
11208 {
11209 	ASSERT(un != NULL);
11210 	ASSERT(bp != NULL);
11211 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11212 	SD_NEXT_IOSTART(index, un, bp);
11213 }
11214 
11215 
11216 /*
11217  *    Function: sd_checksum_uscsi_iodone
11218  *
11219  * Description: A stub function for a layer that's currently not used.
11220  *		For now just a placeholder.
11221  *
11222  *     Context: May be called under interrupt context
11223  */
11224 
11225 static void
11226 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
11227 {
11228 	ASSERT(un != NULL);
11229 	ASSERT(bp != NULL);
11230 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11231 	SD_NEXT_IODONE(index, un, bp);
11232 }
11233 
11234 
11235 /*
11236  *    Function: sd_pm_iostart
11237  *
11238  * Description: iostart-side routine for Power mangement.
11239  *
11240  *     Context: Kernel thread context
11241  */
11242 
11243 static void
11244 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
11245 {
11246 	ASSERT(un != NULL);
11247 	ASSERT(bp != NULL);
11248 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11249 	ASSERT(!mutex_owned(&un->un_pm_mutex));
11250 
11251 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
11252 
11253 	if (sd_pm_entry(un) != DDI_SUCCESS) {
11254 		/*
11255 		 * Set up to return the failed buf back up the 'iodone'
11256 		 * side of the calling chain.
11257 		 */
11258 		bioerror(bp, EIO);
11259 		bp->b_resid = bp->b_bcount;
11260 
11261 		SD_BEGIN_IODONE(index, un, bp);
11262 
11263 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
11264 		return;
11265 	}
11266 
11267 	SD_NEXT_IOSTART(index, un, bp);
11268 
11269 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
11270 }
11271 
11272 
11273 /*
11274  *    Function: sd_pm_iodone
11275  *
11276  * Description: iodone-side routine for power mangement.
11277  *
11278  *     Context: may be called from interrupt context
11279  */
11280 
11281 static void
11282 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
11283 {
11284 	ASSERT(un != NULL);
11285 	ASSERT(bp != NULL);
11286 	ASSERT(!mutex_owned(&un->un_pm_mutex));
11287 
11288 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
11289 
11290 	/*
11291 	 * After attach the following flag is only read, so don't
11292 	 * take the penalty of acquiring a mutex for it.
11293 	 */
11294 	if (un->un_f_pm_is_enabled == TRUE) {
11295 		sd_pm_exit(un);
11296 	}
11297 
11298 	SD_NEXT_IODONE(index, un, bp);
11299 
11300 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
11301 }
11302 
11303 
11304 /*
11305  *    Function: sd_core_iostart
11306  *
11307  * Description: Primary driver function for enqueuing buf(9S) structs from
11308  *		the system and initiating IO to the target device
11309  *
11310  *     Context: Kernel thread context. Can sleep.
11311  *
11312  * Assumptions:  - The given xp->xb_blkno is absolute
11313  *		   (ie, relative to the start of the device).
11314  *		 - The IO is to be done using the native blocksize of
11315  *		   the device, as specified in un->un_tgt_blocksize.
11316  */
11317 /* ARGSUSED */
11318 static void
11319 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
11320 {
11321 	struct sd_xbuf *xp;
11322 
11323 	ASSERT(un != NULL);
11324 	ASSERT(bp != NULL);
11325 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11326 	ASSERT(bp->b_resid == 0);
11327 
11328 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
11329 
11330 	xp = SD_GET_XBUF(bp);
11331 	ASSERT(xp != NULL);
11332 
11333 	mutex_enter(SD_MUTEX(un));
11334 
11335 	/*
11336 	 * If we are currently in the failfast state, fail any new IO
11337 	 * that has B_FAILFAST set, then return.
11338 	 */
11339 	if ((bp->b_flags & B_FAILFAST) &&
11340 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
11341 		mutex_exit(SD_MUTEX(un));
11342 		bioerror(bp, EIO);
11343 		bp->b_resid = bp->b_bcount;
11344 		SD_BEGIN_IODONE(index, un, bp);
11345 		return;
11346 	}
11347 
11348 	if (SD_IS_DIRECT_PRIORITY(xp)) {
11349 		/*
11350 		 * Priority command -- transport it immediately.
11351 		 *
11352 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
11353 		 * because all direct priority commands should be associated
11354 		 * with error recovery actions which we don't want to retry.
11355 		 */
11356 		sd_start_cmds(un, bp);
11357 	} else {
11358 		/*
11359 		 * Normal command -- add it to the wait queue, then start
11360 		 * transporting commands from the wait queue.
11361 		 */
11362 		sd_add_buf_to_waitq(un, bp);
11363 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
11364 		sd_start_cmds(un, NULL);
11365 	}
11366 
11367 	mutex_exit(SD_MUTEX(un));
11368 
11369 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
11370 }
11371 
11372 
11373 /*
11374  *    Function: sd_init_cdb_limits
11375  *
11376  * Description: This is to handle scsi_pkt initialization differences
11377  *		between the driver platforms.
11378  *
11379  *		Legacy behaviors:
11380  *
11381  *		If the block number or the sector count exceeds the
11382  *		capabilities of a Group 0 command, shift over to a
11383  *		Group 1 command. We don't blindly use Group 1
11384  *		commands because a) some drives (CDC Wren IVs) get a
11385  *		bit confused, and b) there is probably a fair amount
11386  *		of speed difference for a target to receive and decode
11387  *		a 10 byte command instead of a 6 byte command.
11388  *
11389  *		The xfer time difference of 6 vs 10 byte CDBs is
11390  *		still significant so this code is still worthwhile.
11391  *		10 byte CDBs are very inefficient with the fas HBA driver
11392  *		and older disks. Each CDB byte took 1 usec with some
11393  *		popular disks.
11394  *
11395  *     Context: Must be called at attach time
11396  */
11397 
11398 static void
11399 sd_init_cdb_limits(struct sd_lun *un)
11400 {
11401 	int hba_cdb_limit;
11402 
11403 	/*
11404 	 * Use CDB_GROUP1 commands for most devices except for
11405 	 * parallel SCSI fixed drives in which case we get better
11406 	 * performance using CDB_GROUP0 commands (where applicable).
11407 	 */
11408 	un->un_mincdb = SD_CDB_GROUP1;
11409 #if !defined(__fibre)
11410 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
11411 	    !un->un_f_has_removable_media) {
11412 		un->un_mincdb = SD_CDB_GROUP0;
11413 	}
11414 #endif
11415 
11416 	/*
11417 	 * Try to read the max-cdb-length supported by HBA.
11418 	 */
11419 	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
11420 	if (0 >= un->un_max_hba_cdb) {
11421 		un->un_max_hba_cdb = CDB_GROUP4;
11422 		hba_cdb_limit = SD_CDB_GROUP4;
11423 	} else if (0 < un->un_max_hba_cdb &&
11424 	    un->un_max_hba_cdb < CDB_GROUP1) {
11425 		hba_cdb_limit = SD_CDB_GROUP0;
11426 	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
11427 	    un->un_max_hba_cdb < CDB_GROUP5) {
11428 		hba_cdb_limit = SD_CDB_GROUP1;
11429 	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
11430 	    un->un_max_hba_cdb < CDB_GROUP4) {
11431 		hba_cdb_limit = SD_CDB_GROUP5;
11432 	} else {
11433 		hba_cdb_limit = SD_CDB_GROUP4;
11434 	}
11435 
11436 	/*
11437 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
11438 	 * commands for fixed disks unless we are building for a 32 bit
11439 	 * kernel.
11440 	 */
11441 #ifdef _LP64
11442 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
11443 	    min(hba_cdb_limit, SD_CDB_GROUP4);
11444 #else
11445 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
11446 	    min(hba_cdb_limit, SD_CDB_GROUP1);
11447 #endif
11448 
11449 	/*
11450 	 * x86 systems require the PKT_DMA_PARTIAL flag
11451 	 */
11452 #if defined(__x86)
11453 	un->un_pkt_flags = PKT_DMA_PARTIAL;
11454 #else
11455 	un->un_pkt_flags = 0;
11456 #endif
11457 
11458 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
11459 	    ? sizeof (struct scsi_arq_status) : 1);
11460 	un->un_cmd_timeout = (ushort_t)sd_io_time;
11461 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
11462 }
11463 
11464 
11465 /*
11466  *    Function: sd_initpkt_for_buf
11467  *
11468  * Description: Allocate and initialize for transport a scsi_pkt struct,
11469  *		based upon the info specified in the given buf struct.
11470  *
11471  *		Assumes the xb_blkno in the request is absolute (ie,
11472  *		relative to the start of the device (NOT partition!).
11473  *		Also assumes that the request is using the native block
11474  *		size of the device (as returned by the READ CAPACITY
11475  *		command).
11476  *
11477  * Return Code: SD_PKT_ALLOC_SUCCESS
11478  *		SD_PKT_ALLOC_FAILURE
11479  *		SD_PKT_ALLOC_FAILURE_NO_DMA
11480  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
11481  *
11482  *     Context: Kernel thread and may be called from software interrupt context
11483  *		as part of a sdrunout callback. This function may not block or
11484  *		call routines that block
11485  */
11486 
11487 static int
11488 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
11489 {
11490 	struct sd_xbuf	*xp;
11491 	struct scsi_pkt *pktp = NULL;
11492 	struct sd_lun	*un;
11493 	size_t		blockcount;
11494 	daddr_t		startblock;
11495 	int		rval;
11496 	int		cmd_flags;
11497 
11498 	ASSERT(bp != NULL);
11499 	ASSERT(pktpp != NULL);
11500 	xp = SD_GET_XBUF(bp);
11501 	ASSERT(xp != NULL);
11502 	un = SD_GET_UN(bp);
11503 	ASSERT(un != NULL);
11504 	ASSERT(mutex_owned(SD_MUTEX(un)));
11505 	ASSERT(bp->b_resid == 0);
11506 
11507 	SD_TRACE(SD_LOG_IO_CORE, un,
11508 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
11509 
11510 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
11511 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
11512 		/*
11513 		 * Already have a scsi_pkt -- just need DMA resources.
11514 		 * We must recompute the CDB in case the mapping returns
11515 		 * a nonzero pkt_resid.
11516 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
11517 		 * that is being retried, the unmap/remap of the DMA resouces
11518 		 * will result in the entire transfer starting over again
11519 		 * from the very first block.
11520 		 */
11521 		ASSERT(xp->xb_pktp != NULL);
11522 		pktp = xp->xb_pktp;
11523 	} else {
11524 		pktp = NULL;
11525 	}
11526 #endif /* __i386 || __amd64 */
11527 
11528 	startblock = xp->xb_blkno;	/* Absolute block num. */
11529 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
11530 
11531 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
11532 
11533 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
11534 
11535 #else
11536 
11537 	cmd_flags = un->un_pkt_flags | xp->xb_pkt_flags;
11538 
11539 #endif
11540 
11541 	/*
11542 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
11543 	 * call scsi_init_pkt, and build the CDB.
11544 	 */
11545 	rval = sd_setup_rw_pkt(un, &pktp, bp,
11546 	    cmd_flags, sdrunout, (caddr_t)un,
11547 	    startblock, blockcount);
11548 
11549 	if (rval == 0) {
11550 		/*
11551 		 * Success.
11552 		 *
11553 		 * If partial DMA is being used and required for this transfer.
11554 		 * set it up here.
11555 		 */
11556 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
11557 		    (pktp->pkt_resid != 0)) {
11558 
11559 			/*
11560 			 * Save the CDB length and pkt_resid for the
11561 			 * next xfer
11562 			 */
11563 			xp->xb_dma_resid = pktp->pkt_resid;
11564 
11565 			/* rezero resid */
11566 			pktp->pkt_resid = 0;
11567 
11568 		} else {
11569 			xp->xb_dma_resid = 0;
11570 		}
11571 
11572 		pktp->pkt_flags = un->un_tagflags;
11573 		pktp->pkt_time  = un->un_cmd_timeout;
11574 		pktp->pkt_comp  = sdintr;
11575 
11576 		pktp->pkt_private = bp;
11577 		*pktpp = pktp;
11578 
11579 		SD_TRACE(SD_LOG_IO_CORE, un,
11580 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
11581 
11582 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
11583 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
11584 #endif
11585 
11586 		return (SD_PKT_ALLOC_SUCCESS);
11587 
11588 	}
11589 
11590 	/*
11591 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
11592 	 * from sd_setup_rw_pkt.
11593 	 */
11594 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
11595 
11596 	if (rval == SD_PKT_ALLOC_FAILURE) {
11597 		*pktpp = NULL;
11598 		/*
11599 		 * Set the driver state to RWAIT to indicate the driver
11600 		 * is waiting on resource allocations. The driver will not
11601 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
11602 		 */
11603 		New_state(un, SD_STATE_RWAIT);
11604 
11605 		SD_ERROR(SD_LOG_IO_CORE, un,
11606 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
11607 
11608 		if ((bp->b_flags & B_ERROR) != 0) {
11609 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
11610 		}
11611 		return (SD_PKT_ALLOC_FAILURE);
11612 	} else {
11613 		/*
11614 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
11615 		 *
11616 		 * This should never happen.  Maybe someone messed with the
11617 		 * kernel's minphys?
11618 		 */
11619 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
11620 		    "Request rejected: too large for CDB: "
11621 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
11622 		SD_ERROR(SD_LOG_IO_CORE, un,
11623 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
11624 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11625 
11626 	}
11627 }
11628 
11629 
11630 /*
11631  *    Function: sd_destroypkt_for_buf
11632  *
11633  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
11634  *
11635  *     Context: Kernel thread or interrupt context
11636  */
11637 
11638 static void
11639 sd_destroypkt_for_buf(struct buf *bp)
11640 {
11641 	ASSERT(bp != NULL);
11642 	ASSERT(SD_GET_UN(bp) != NULL);
11643 
11644 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
11645 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
11646 
11647 	ASSERT(SD_GET_PKTP(bp) != NULL);
11648 	scsi_destroy_pkt(SD_GET_PKTP(bp));
11649 
11650 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
11651 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
11652 }
11653 
11654 /*
11655  *    Function: sd_setup_rw_pkt
11656  *
11657  * Description: Determines appropriate CDB group for the requested LBA
11658  *		and transfer length, calls scsi_init_pkt, and builds
11659  *		the CDB.  Do not use for partial DMA transfers except
11660  *		for the initial transfer since the CDB size must
11661  *		remain constant.
11662  *
11663  *     Context: Kernel thread and may be called from software interrupt
11664  *		context as part of a sdrunout callback. This function may not
11665  *		block or call routines that block
11666  */
11667 
11668 
11669 int
11670 sd_setup_rw_pkt(struct sd_lun *un,
11671     struct scsi_pkt **pktpp, struct buf *bp, int flags,
11672     int (*callback)(caddr_t), caddr_t callback_arg,
11673     diskaddr_t lba, uint32_t blockcount)
11674 {
11675 	struct scsi_pkt *return_pktp;
11676 	union scsi_cdb *cdbp;
11677 	struct sd_cdbinfo *cp = NULL;
11678 	int i;
11679 
11680 	/*
11681 	 * See which size CDB to use, based upon the request.
11682 	 */
11683 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
11684 
11685 		/*
11686 		 * Check lba and block count against sd_cdbtab limits.
11687 		 * In the partial DMA case, we have to use the same size
11688 		 * CDB for all the transfers.  Check lba + blockcount
11689 		 * against the max LBA so we know that segment of the
11690 		 * transfer can use the CDB we select.
11691 		 */
11692 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
11693 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
11694 
11695 			/*
11696 			 * The command will fit into the CDB type
11697 			 * specified by sd_cdbtab[i].
11698 			 */
11699 			cp = sd_cdbtab + i;
11700 
11701 			/*
11702 			 * Call scsi_init_pkt so we can fill in the
11703 			 * CDB.
11704 			 */
11705 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
11706 			    bp, cp->sc_grpcode, un->un_status_len, 0,
11707 			    flags, callback, callback_arg);
11708 
11709 			if (return_pktp != NULL) {
11710 
11711 				/*
11712 				 * Return new value of pkt
11713 				 */
11714 				*pktpp = return_pktp;
11715 
11716 				/*
11717 				 * To be safe, zero the CDB insuring there is
11718 				 * no leftover data from a previous command.
11719 				 */
11720 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
11721 
11722 				/*
11723 				 * Handle partial DMA mapping
11724 				 */
11725 				if (return_pktp->pkt_resid != 0) {
11726 
11727 					/*
11728 					 * Not going to xfer as many blocks as
11729 					 * originally expected
11730 					 */
11731 					blockcount -=
11732 					    SD_BYTES2TGTBLOCKS(un,
11733 						return_pktp->pkt_resid);
11734 				}
11735 
11736 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
11737 
11738 				/*
11739 				 * Set command byte based on the CDB
11740 				 * type we matched.
11741 				 */
11742 				cdbp->scc_cmd = cp->sc_grpmask |
11743 				    ((bp->b_flags & B_READ) ?
11744 					SCMD_READ : SCMD_WRITE);
11745 
11746 				SD_FILL_SCSI1_LUN(un, return_pktp);
11747 
11748 				/*
11749 				 * Fill in LBA and length
11750 				 */
11751 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
11752 				    (cp->sc_grpcode == CDB_GROUP4) ||
11753 				    (cp->sc_grpcode == CDB_GROUP0) ||
11754 				    (cp->sc_grpcode == CDB_GROUP5));
11755 
11756 				if (cp->sc_grpcode == CDB_GROUP1) {
11757 					FORMG1ADDR(cdbp, lba);
11758 					FORMG1COUNT(cdbp, blockcount);
11759 					return (0);
11760 				} else if (cp->sc_grpcode == CDB_GROUP4) {
11761 					FORMG4LONGADDR(cdbp, lba);
11762 					FORMG4COUNT(cdbp, blockcount);
11763 					return (0);
11764 				} else if (cp->sc_grpcode == CDB_GROUP0) {
11765 					FORMG0ADDR(cdbp, lba);
11766 					FORMG0COUNT(cdbp, blockcount);
11767 					return (0);
11768 				} else if (cp->sc_grpcode == CDB_GROUP5) {
11769 					FORMG5ADDR(cdbp, lba);
11770 					FORMG5COUNT(cdbp, blockcount);
11771 					return (0);
11772 				}
11773 
11774 				/*
11775 				 * It should be impossible to not match one
11776 				 * of the CDB types above, so we should never
11777 				 * reach this point.  Set the CDB command byte
11778 				 * to test-unit-ready to avoid writing
11779 				 * to somewhere we don't intend.
11780 				 */
11781 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
11782 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11783 			} else {
11784 				/*
11785 				 * Couldn't get scsi_pkt
11786 				 */
11787 				return (SD_PKT_ALLOC_FAILURE);
11788 			}
11789 		}
11790 	}
11791 
11792 	/*
11793 	 * None of the available CDB types were suitable.  This really
11794 	 * should never happen:  on a 64 bit system we support
11795 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
11796 	 * and on a 32 bit system we will refuse to bind to a device
11797 	 * larger than 2TB so addresses will never be larger than 32 bits.
11798 	 */
11799 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11800 }
11801 
11802 #if defined(__i386) || defined(__amd64)
11803 /*
11804  *    Function: sd_setup_next_rw_pkt
11805  *
11806  * Description: Setup packet for partial DMA transfers, except for the
11807  * 		initial transfer.  sd_setup_rw_pkt should be used for
11808  *		the initial transfer.
11809  *
11810  *     Context: Kernel thread and may be called from interrupt context.
11811  */
11812 
11813 int
11814 sd_setup_next_rw_pkt(struct sd_lun *un,
11815     struct scsi_pkt *pktp, struct buf *bp,
11816     diskaddr_t lba, uint32_t blockcount)
11817 {
11818 	uchar_t com;
11819 	union scsi_cdb *cdbp;
11820 	uchar_t cdb_group_id;
11821 
11822 	ASSERT(pktp != NULL);
11823 	ASSERT(pktp->pkt_cdbp != NULL);
11824 
11825 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
11826 	com = cdbp->scc_cmd;
11827 	cdb_group_id = CDB_GROUPID(com);
11828 
11829 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
11830 	    (cdb_group_id == CDB_GROUPID_1) ||
11831 	    (cdb_group_id == CDB_GROUPID_4) ||
11832 	    (cdb_group_id == CDB_GROUPID_5));
11833 
11834 	/*
11835 	 * Move pkt to the next portion of the xfer.
11836 	 * func is NULL_FUNC so we do not have to release
11837 	 * the disk mutex here.
11838 	 */
11839 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
11840 	    NULL_FUNC, NULL) == pktp) {
11841 		/* Success.  Handle partial DMA */
11842 		if (pktp->pkt_resid != 0) {
11843 			blockcount -=
11844 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
11845 		}
11846 
11847 		cdbp->scc_cmd = com;
11848 		SD_FILL_SCSI1_LUN(un, pktp);
11849 		if (cdb_group_id == CDB_GROUPID_1) {
11850 			FORMG1ADDR(cdbp, lba);
11851 			FORMG1COUNT(cdbp, blockcount);
11852 			return (0);
11853 		} else if (cdb_group_id == CDB_GROUPID_4) {
11854 			FORMG4LONGADDR(cdbp, lba);
11855 			FORMG4COUNT(cdbp, blockcount);
11856 			return (0);
11857 		} else if (cdb_group_id == CDB_GROUPID_0) {
11858 			FORMG0ADDR(cdbp, lba);
11859 			FORMG0COUNT(cdbp, blockcount);
11860 			return (0);
11861 		} else if (cdb_group_id == CDB_GROUPID_5) {
11862 			FORMG5ADDR(cdbp, lba);
11863 			FORMG5COUNT(cdbp, blockcount);
11864 			return (0);
11865 		}
11866 
11867 		/* Unreachable */
11868 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11869 	}
11870 
11871 	/*
11872 	 * Error setting up next portion of cmd transfer.
11873 	 * Something is definitely very wrong and this
11874 	 * should not happen.
11875 	 */
11876 	return (SD_PKT_ALLOC_FAILURE);
11877 }
11878 #endif /* defined(__i386) || defined(__amd64) */
11879 
11880 /*
11881  *    Function: sd_initpkt_for_uscsi
11882  *
11883  * Description: Allocate and initialize for transport a scsi_pkt struct,
11884  *		based upon the info specified in the given uscsi_cmd struct.
11885  *
11886  * Return Code: SD_PKT_ALLOC_SUCCESS
11887  *		SD_PKT_ALLOC_FAILURE
11888  *		SD_PKT_ALLOC_FAILURE_NO_DMA
11889  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
11890  *
11891  *     Context: Kernel thread and may be called from software interrupt context
11892  *		as part of a sdrunout callback. This function may not block or
11893  *		call routines that block
11894  */
11895 
11896 static int
11897 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
11898 {
11899 	struct uscsi_cmd *uscmd;
11900 	struct sd_xbuf	*xp;
11901 	struct scsi_pkt	*pktp;
11902 	struct sd_lun	*un;
11903 	uint32_t	flags = 0;
11904 
11905 	ASSERT(bp != NULL);
11906 	ASSERT(pktpp != NULL);
11907 	xp = SD_GET_XBUF(bp);
11908 	ASSERT(xp != NULL);
11909 	un = SD_GET_UN(bp);
11910 	ASSERT(un != NULL);
11911 	ASSERT(mutex_owned(SD_MUTEX(un)));
11912 
11913 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
11914 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
11915 	ASSERT(uscmd != NULL);
11916 
11917 	SD_TRACE(SD_LOG_IO_CORE, un,
11918 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
11919 
11920 	/*
11921 	 * Allocate the scsi_pkt for the command.
11922 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
11923 	 *	 during scsi_init_pkt time and will continue to use the
11924 	 *	 same path as long as the same scsi_pkt is used without
11925 	 *	 intervening scsi_dma_free(). Since uscsi command does
11926 	 *	 not call scsi_dmafree() before retry failed command, it
11927 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
11928 	 *	 set such that scsi_vhci can use other available path for
11929 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
11930 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
11931 	 */
11932 	pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
11933 	    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
11934 	    sizeof (struct scsi_arq_status), 0,
11935 	    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
11936 	    sdrunout, (caddr_t)un);
11937 
11938 	if (pktp == NULL) {
11939 		*pktpp = NULL;
11940 		/*
11941 		 * Set the driver state to RWAIT to indicate the driver
11942 		 * is waiting on resource allocations. The driver will not
11943 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
11944 		 */
11945 		New_state(un, SD_STATE_RWAIT);
11946 
11947 		SD_ERROR(SD_LOG_IO_CORE, un,
11948 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
11949 
11950 		if ((bp->b_flags & B_ERROR) != 0) {
11951 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
11952 		}
11953 		return (SD_PKT_ALLOC_FAILURE);
11954 	}
11955 
11956 	/*
11957 	 * We do not do DMA breakup for USCSI commands, so return failure
11958 	 * here if all the needed DMA resources were not allocated.
11959 	 */
11960 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
11961 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
11962 		scsi_destroy_pkt(pktp);
11963 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
11964 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
11965 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
11966 	}
11967 
11968 	/* Init the cdb from the given uscsi struct */
11969 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
11970 	    uscmd->uscsi_cdb[0], 0, 0, 0);
11971 
11972 	SD_FILL_SCSI1_LUN(un, pktp);
11973 
11974 	/*
11975 	 * Set up the optional USCSI flags. See the uscsi (7I) man page
11976 	 * for listing of the supported flags.
11977 	 */
11978 
11979 	if (uscmd->uscsi_flags & USCSI_SILENT) {
11980 		flags |= FLAG_SILENT;
11981 	}
11982 
11983 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
11984 		flags |= FLAG_DIAGNOSE;
11985 	}
11986 
11987 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
11988 		flags |= FLAG_ISOLATE;
11989 	}
11990 
11991 	if (un->un_f_is_fibre == FALSE) {
11992 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
11993 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
11994 		}
11995 	}
11996 
11997 	/*
11998 	 * Set the pkt flags here so we save time later.
11999 	 * Note: These flags are NOT in the uscsi man page!!!
12000 	 */
12001 	if (uscmd->uscsi_flags & USCSI_HEAD) {
12002 		flags |= FLAG_HEAD;
12003 	}
12004 
12005 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
12006 		flags |= FLAG_NOINTR;
12007 	}
12008 
12009 	/*
12010 	 * For tagged queueing, things get a bit complicated.
12011 	 * Check first for head of queue and last for ordered queue.
12012 	 * If neither head nor order, use the default driver tag flags.
12013 	 */
12014 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
12015 		if (uscmd->uscsi_flags & USCSI_HTAG) {
12016 			flags |= FLAG_HTAG;
12017 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
12018 			flags |= FLAG_OTAG;
12019 		} else {
12020 			flags |= un->un_tagflags & FLAG_TAGMASK;
12021 		}
12022 	}
12023 
12024 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
12025 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
12026 	}
12027 
12028 	pktp->pkt_flags = flags;
12029 
12030 	/* Copy the caller's CDB into the pkt... */
12031 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
12032 
12033 	if (uscmd->uscsi_timeout == 0) {
12034 		pktp->pkt_time = un->un_uscsi_timeout;
12035 	} else {
12036 		pktp->pkt_time = uscmd->uscsi_timeout;
12037 	}
12038 
12039 	/* need it later to identify USCSI request in sdintr */
12040 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
12041 
12042 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
12043 
12044 	pktp->pkt_private = bp;
12045 	pktp->pkt_comp = sdintr;
12046 	*pktpp = pktp;
12047 
12048 	SD_TRACE(SD_LOG_IO_CORE, un,
12049 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
12050 
12051 	return (SD_PKT_ALLOC_SUCCESS);
12052 }
12053 
12054 
12055 /*
12056  *    Function: sd_destroypkt_for_uscsi
12057  *
12058  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
12059  *		IOs.. Also saves relevant info into the associated uscsi_cmd
12060  *		struct.
12061  *
12062  *     Context: May be called under interrupt context
12063  */
12064 
12065 static void
12066 sd_destroypkt_for_uscsi(struct buf *bp)
12067 {
12068 	struct uscsi_cmd *uscmd;
12069 	struct sd_xbuf	*xp;
12070 	struct scsi_pkt	*pktp;
12071 	struct sd_lun	*un;
12072 
12073 	ASSERT(bp != NULL);
12074 	xp = SD_GET_XBUF(bp);
12075 	ASSERT(xp != NULL);
12076 	un = SD_GET_UN(bp);
12077 	ASSERT(un != NULL);
12078 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12079 	pktp = SD_GET_PKTP(bp);
12080 	ASSERT(pktp != NULL);
12081 
12082 	SD_TRACE(SD_LOG_IO_CORE, un,
12083 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
12084 
12085 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
12086 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
12087 	ASSERT(uscmd != NULL);
12088 
12089 	/* Save the status and the residual into the uscsi_cmd struct */
12090 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
12091 	uscmd->uscsi_resid  = bp->b_resid;
12092 
12093 	/*
12094 	 * If enabled, copy any saved sense data into the area specified
12095 	 * by the uscsi command.
12096 	 */
12097 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
12098 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
12099 		/*
12100 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
12101 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
12102 		 */
12103 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
12104 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
12105 		bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf, SENSE_LENGTH);
12106 	}
12107 
12108 	/* We are done with the scsi_pkt; free it now */
12109 	ASSERT(SD_GET_PKTP(bp) != NULL);
12110 	scsi_destroy_pkt(SD_GET_PKTP(bp));
12111 
12112 	SD_TRACE(SD_LOG_IO_CORE, un,
12113 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
12114 }
12115 
12116 
12117 /*
12118  *    Function: sd_bioclone_alloc
12119  *
12120  * Description: Allocate a buf(9S) and init it as per the given buf
12121  *		and the various arguments.  The associated sd_xbuf
12122  *		struct is (nearly) duplicated.  The struct buf *bp
12123  *		argument is saved in new_xp->xb_private.
12124  *
12125  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
12126  *		datalen - size of data area for the shadow bp
12127  *		blkno - starting LBA
12128  *		func - function pointer for b_iodone in the shadow buf. (May
12129  *			be NULL if none.)
12130  *
12131  * Return Code: Pointer to allocates buf(9S) struct
12132  *
12133  *     Context: Can sleep.
12134  */
12135 
12136 static struct buf *
12137 sd_bioclone_alloc(struct buf *bp, size_t datalen,
12138 	daddr_t blkno, int (*func)(struct buf *))
12139 {
12140 	struct	sd_lun	*un;
12141 	struct	sd_xbuf	*xp;
12142 	struct	sd_xbuf	*new_xp;
12143 	struct	buf	*new_bp;
12144 
12145 	ASSERT(bp != NULL);
12146 	xp = SD_GET_XBUF(bp);
12147 	ASSERT(xp != NULL);
12148 	un = SD_GET_UN(bp);
12149 	ASSERT(un != NULL);
12150 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12151 
12152 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
12153 	    NULL, KM_SLEEP);
12154 
12155 	new_bp->b_lblkno	= blkno;
12156 
12157 	/*
12158 	 * Allocate an xbuf for the shadow bp and copy the contents of the
12159 	 * original xbuf into it.
12160 	 */
12161 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
12162 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
12163 
12164 	/*
12165 	 * The given bp is automatically saved in the xb_private member
12166 	 * of the new xbuf.  Callers are allowed to depend on this.
12167 	 */
12168 	new_xp->xb_private = bp;
12169 
12170 	new_bp->b_private  = new_xp;
12171 
12172 	return (new_bp);
12173 }
12174 
12175 /*
12176  *    Function: sd_shadow_buf_alloc
12177  *
12178  * Description: Allocate a buf(9S) and init it as per the given buf
12179  *		and the various arguments.  The associated sd_xbuf
12180  *		struct is (nearly) duplicated.  The struct buf *bp
12181  *		argument is saved in new_xp->xb_private.
12182  *
12183  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
12184  *		datalen - size of data area for the shadow bp
12185  *		bflags - B_READ or B_WRITE (pseudo flag)
12186  *		blkno - starting LBA
12187  *		func - function pointer for b_iodone in the shadow buf. (May
12188  *			be NULL if none.)
12189  *
12190  * Return Code: Pointer to allocates buf(9S) struct
12191  *
12192  *     Context: Can sleep.
12193  */
12194 
12195 static struct buf *
12196 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
12197 	daddr_t blkno, int (*func)(struct buf *))
12198 {
12199 	struct	sd_lun	*un;
12200 	struct	sd_xbuf	*xp;
12201 	struct	sd_xbuf	*new_xp;
12202 	struct	buf	*new_bp;
12203 
12204 	ASSERT(bp != NULL);
12205 	xp = SD_GET_XBUF(bp);
12206 	ASSERT(xp != NULL);
12207 	un = SD_GET_UN(bp);
12208 	ASSERT(un != NULL);
12209 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12210 
12211 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
12212 		bp_mapin(bp);
12213 	}
12214 
12215 	bflags &= (B_READ | B_WRITE);
12216 #if defined(__i386) || defined(__amd64)
12217 	new_bp = getrbuf(KM_SLEEP);
12218 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
12219 	new_bp->b_bcount = datalen;
12220 	new_bp->b_flags = bflags |
12221 	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
12222 #else
12223 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
12224 	    datalen, bflags, SLEEP_FUNC, NULL);
12225 #endif
12226 	new_bp->av_forw	= NULL;
12227 	new_bp->av_back	= NULL;
12228 	new_bp->b_dev	= bp->b_dev;
12229 	new_bp->b_blkno	= blkno;
12230 	new_bp->b_iodone = func;
12231 	new_bp->b_edev	= bp->b_edev;
12232 	new_bp->b_resid	= 0;
12233 
12234 	/* We need to preserve the B_FAILFAST flag */
12235 	if (bp->b_flags & B_FAILFAST) {
12236 		new_bp->b_flags |= B_FAILFAST;
12237 	}
12238 
12239 	/*
12240 	 * Allocate an xbuf for the shadow bp and copy the contents of the
12241 	 * original xbuf into it.
12242 	 */
12243 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
12244 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
12245 
12246 	/* Need later to copy data between the shadow buf & original buf! */
12247 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
12248 
12249 	/*
12250 	 * The given bp is automatically saved in the xb_private member
12251 	 * of the new xbuf.  Callers are allowed to depend on this.
12252 	 */
12253 	new_xp->xb_private = bp;
12254 
12255 	new_bp->b_private  = new_xp;
12256 
12257 	return (new_bp);
12258 }
12259 
12260 /*
12261  *    Function: sd_bioclone_free
12262  *
12263  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
12264  *		in the larger than partition operation.
12265  *
12266  *     Context: May be called under interrupt context
12267  */
12268 
12269 static void
12270 sd_bioclone_free(struct buf *bp)
12271 {
12272 	struct sd_xbuf	*xp;
12273 
12274 	ASSERT(bp != NULL);
12275 	xp = SD_GET_XBUF(bp);
12276 	ASSERT(xp != NULL);
12277 
12278 	/*
12279 	 * Call bp_mapout() before freeing the buf,  in case a lower
12280 	 * layer or HBA  had done a bp_mapin().  we must do this here
12281 	 * as we are the "originator" of the shadow buf.
12282 	 */
12283 	bp_mapout(bp);
12284 
12285 	/*
12286 	 * Null out b_iodone before freeing the bp, to ensure that the driver
12287 	 * never gets confused by a stale value in this field. (Just a little
12288 	 * extra defensiveness here.)
12289 	 */
12290 	bp->b_iodone = NULL;
12291 
12292 	freerbuf(bp);
12293 
12294 	kmem_free(xp, sizeof (struct sd_xbuf));
12295 }
12296 
12297 /*
12298  *    Function: sd_shadow_buf_free
12299  *
12300  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
12301  *
12302  *     Context: May be called under interrupt context
12303  */
12304 
12305 static void
12306 sd_shadow_buf_free(struct buf *bp)
12307 {
12308 	struct sd_xbuf	*xp;
12309 
12310 	ASSERT(bp != NULL);
12311 	xp = SD_GET_XBUF(bp);
12312 	ASSERT(xp != NULL);
12313 
12314 #if defined(__sparc)
12315 	/*
12316 	 * Call bp_mapout() before freeing the buf,  in case a lower
12317 	 * layer or HBA  had done a bp_mapin().  we must do this here
12318 	 * as we are the "originator" of the shadow buf.
12319 	 */
12320 	bp_mapout(bp);
12321 #endif
12322 
12323 	/*
12324 	 * Null out b_iodone before freeing the bp, to ensure that the driver
12325 	 * never gets confused by a stale value in this field. (Just a little
12326 	 * extra defensiveness here.)
12327 	 */
12328 	bp->b_iodone = NULL;
12329 
12330 #if defined(__i386) || defined(__amd64)
12331 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
12332 	freerbuf(bp);
12333 #else
12334 	scsi_free_consistent_buf(bp);
12335 #endif
12336 
12337 	kmem_free(xp, sizeof (struct sd_xbuf));
12338 }
12339 
12340 
12341 /*
12342  *    Function: sd_print_transport_rejected_message
12343  *
12344  * Description: This implements the ludicrously complex rules for printing
12345  *		a "transport rejected" message.  This is to address the
12346  *		specific problem of having a flood of this error message
12347  *		produced when a failover occurs.
12348  *
12349  *     Context: Any.
12350  */
12351 
12352 static void
12353 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
12354 	int code)
12355 {
12356 	ASSERT(un != NULL);
12357 	ASSERT(mutex_owned(SD_MUTEX(un)));
12358 	ASSERT(xp != NULL);
12359 
12360 	/*
12361 	 * Print the "transport rejected" message under the following
12362 	 * conditions:
12363 	 *
12364 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
12365 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
12366 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
12367 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
12368 	 *   scsi_transport(9F) (which indicates that the target might have
12369 	 *   gone off-line).  This uses the un->un_tran_fatal_count
12370 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
12371 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
12372 	 *   from scsi_transport().
12373 	 *
12374 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
12375 	 * the preceeding cases in order for the message to be printed.
12376 	 */
12377 	if ((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) {
12378 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
12379 		    (code != TRAN_FATAL_ERROR) ||
12380 		    (un->un_tran_fatal_count == 1)) {
12381 			switch (code) {
12382 			case TRAN_BADPKT:
12383 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12384 				    "transport rejected bad packet\n");
12385 				break;
12386 			case TRAN_FATAL_ERROR:
12387 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12388 				    "transport rejected fatal error\n");
12389 				break;
12390 			default:
12391 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12392 				    "transport rejected (%d)\n", code);
12393 				break;
12394 			}
12395 		}
12396 	}
12397 }
12398 
12399 
12400 /*
12401  *    Function: sd_add_buf_to_waitq
12402  *
12403  * Description: Add the given buf(9S) struct to the wait queue for the
12404  *		instance.  If sorting is enabled, then the buf is added
12405  *		to the queue via an elevator sort algorithm (a la
12406  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
12407  *		If sorting is not enabled, then the buf is just added
12408  *		to the end of the wait queue.
12409  *
12410  * Return Code: void
12411  *
12412  *     Context: Does not sleep/block, therefore technically can be called
12413  *		from any context.  However if sorting is enabled then the
12414  *		execution time is indeterminate, and may take long if
12415  *		the wait queue grows large.
12416  */
12417 
12418 static void
12419 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
12420 {
12421 	struct buf *ap;
12422 
12423 	ASSERT(bp != NULL);
12424 	ASSERT(un != NULL);
12425 	ASSERT(mutex_owned(SD_MUTEX(un)));
12426 
12427 	/* If the queue is empty, add the buf as the only entry & return. */
12428 	if (un->un_waitq_headp == NULL) {
12429 		ASSERT(un->un_waitq_tailp == NULL);
12430 		un->un_waitq_headp = un->un_waitq_tailp = bp;
12431 		bp->av_forw = NULL;
12432 		return;
12433 	}
12434 
12435 	ASSERT(un->un_waitq_tailp != NULL);
12436 
12437 	/*
12438 	 * If sorting is disabled, just add the buf to the tail end of
12439 	 * the wait queue and return.
12440 	 */
12441 	if (un->un_f_disksort_disabled) {
12442 		un->un_waitq_tailp->av_forw = bp;
12443 		un->un_waitq_tailp = bp;
12444 		bp->av_forw = NULL;
12445 		return;
12446 	}
12447 
12448 	/*
12449 	 * Sort thru the list of requests currently on the wait queue
12450 	 * and add the new buf request at the appropriate position.
12451 	 *
12452 	 * The un->un_waitq_headp is an activity chain pointer on which
12453 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
12454 	 * first queue holds those requests which are positioned after
12455 	 * the current SD_GET_BLKNO() (in the first request); the second holds
12456 	 * requests which came in after their SD_GET_BLKNO() number was passed.
12457 	 * Thus we implement a one way scan, retracting after reaching
12458 	 * the end of the drive to the first request on the second
12459 	 * queue, at which time it becomes the first queue.
12460 	 * A one-way scan is natural because of the way UNIX read-ahead
12461 	 * blocks are allocated.
12462 	 *
12463 	 * If we lie after the first request, then we must locate the
12464 	 * second request list and add ourselves to it.
12465 	 */
12466 	ap = un->un_waitq_headp;
12467 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
12468 		while (ap->av_forw != NULL) {
12469 			/*
12470 			 * Look for an "inversion" in the (normally
12471 			 * ascending) block numbers. This indicates
12472 			 * the start of the second request list.
12473 			 */
12474 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
12475 				/*
12476 				 * Search the second request list for the
12477 				 * first request at a larger block number.
12478 				 * We go before that; however if there is
12479 				 * no such request, we go at the end.
12480 				 */
12481 				do {
12482 					if (SD_GET_BLKNO(bp) <
12483 					    SD_GET_BLKNO(ap->av_forw)) {
12484 						goto insert;
12485 					}
12486 					ap = ap->av_forw;
12487 				} while (ap->av_forw != NULL);
12488 				goto insert;		/* after last */
12489 			}
12490 			ap = ap->av_forw;
12491 		}
12492 
12493 		/*
12494 		 * No inversions... we will go after the last, and
12495 		 * be the first request in the second request list.
12496 		 */
12497 		goto insert;
12498 	}
12499 
12500 	/*
12501 	 * Request is at/after the current request...
12502 	 * sort in the first request list.
12503 	 */
12504 	while (ap->av_forw != NULL) {
12505 		/*
12506 		 * We want to go after the current request (1) if
12507 		 * there is an inversion after it (i.e. it is the end
12508 		 * of the first request list), or (2) if the next
12509 		 * request is a larger block no. than our request.
12510 		 */
12511 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
12512 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
12513 			goto insert;
12514 		}
12515 		ap = ap->av_forw;
12516 	}
12517 
12518 	/*
12519 	 * Neither a second list nor a larger request, therefore
12520 	 * we go at the end of the first list (which is the same
12521 	 * as the end of the whole schebang).
12522 	 */
12523 insert:
12524 	bp->av_forw = ap->av_forw;
12525 	ap->av_forw = bp;
12526 
12527 	/*
12528 	 * If we inserted onto the tail end of the waitq, make sure the
12529 	 * tail pointer is updated.
12530 	 */
12531 	if (ap == un->un_waitq_tailp) {
12532 		un->un_waitq_tailp = bp;
12533 	}
12534 }
12535 
12536 
12537 /*
12538  *    Function: sd_start_cmds
12539  *
12540  * Description: Remove and transport cmds from the driver queues.
12541  *
12542  *   Arguments: un - pointer to the unit (soft state) struct for the target.
12543  *
12544  *		immed_bp - ptr to a buf to be transported immediately. Only
12545  *		the immed_bp is transported; bufs on the waitq are not
12546  *		processed and the un_retry_bp is not checked.  If immed_bp is
12547  *		NULL, then normal queue processing is performed.
12548  *
12549  *     Context: May be called from kernel thread context, interrupt context,
12550  *		or runout callback context. This function may not block or
12551  *		call routines that block.
12552  */
12553 
12554 static void
12555 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
12556 {
12557 	struct	sd_xbuf	*xp;
12558 	struct	buf	*bp;
12559 	void	(*statp)(kstat_io_t *);
12560 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12561 	void	(*saved_statp)(kstat_io_t *);
12562 #endif
12563 	int	rval;
12564 
12565 	ASSERT(un != NULL);
12566 	ASSERT(mutex_owned(SD_MUTEX(un)));
12567 	ASSERT(un->un_ncmds_in_transport >= 0);
12568 	ASSERT(un->un_throttle >= 0);
12569 
12570 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
12571 
12572 	do {
12573 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12574 		saved_statp = NULL;
12575 #endif
12576 
12577 		/*
12578 		 * If we are syncing or dumping, fail the command to
12579 		 * avoid recursively calling back into scsi_transport().
12580 		 * The dump I/O itself uses a separate code path so this
12581 		 * only prevents non-dump I/O from being sent while dumping.
12582 		 * File system sync takes place before dumping begins.
12583 		 * During panic, filesystem I/O is allowed provided
12584 		 * un_in_callback is <= 1.  This is to prevent recursion
12585 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
12586 		 * sd_start_cmds and so on.  See panic.c for more information
12587 		 * about the states the system can be in during panic.
12588 		 */
12589 		if ((un->un_state == SD_STATE_DUMPING) ||
12590 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
12591 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12592 			    "sd_start_cmds: panicking\n");
12593 			goto exit;
12594 		}
12595 
12596 		if ((bp = immed_bp) != NULL) {
12597 			/*
12598 			 * We have a bp that must be transported immediately.
12599 			 * It's OK to transport the immed_bp here without doing
12600 			 * the throttle limit check because the immed_bp is
12601 			 * always used in a retry/recovery case. This means
12602 			 * that we know we are not at the throttle limit by
12603 			 * virtue of the fact that to get here we must have
12604 			 * already gotten a command back via sdintr(). This also
12605 			 * relies on (1) the command on un_retry_bp preventing
12606 			 * further commands from the waitq from being issued;
12607 			 * and (2) the code in sd_retry_command checking the
12608 			 * throttle limit before issuing a delayed or immediate
12609 			 * retry. This holds even if the throttle limit is
12610 			 * currently ratcheted down from its maximum value.
12611 			 */
12612 			statp = kstat_runq_enter;
12613 			if (bp == un->un_retry_bp) {
12614 				ASSERT((un->un_retry_statp == NULL) ||
12615 				    (un->un_retry_statp == kstat_waitq_enter) ||
12616 				    (un->un_retry_statp ==
12617 				    kstat_runq_back_to_waitq));
12618 				/*
12619 				 * If the waitq kstat was incremented when
12620 				 * sd_set_retry_bp() queued this bp for a retry,
12621 				 * then we must set up statp so that the waitq
12622 				 * count will get decremented correctly below.
12623 				 * Also we must clear un->un_retry_statp to
12624 				 * ensure that we do not act on a stale value
12625 				 * in this field.
12626 				 */
12627 				if ((un->un_retry_statp == kstat_waitq_enter) ||
12628 				    (un->un_retry_statp ==
12629 				    kstat_runq_back_to_waitq)) {
12630 					statp = kstat_waitq_to_runq;
12631 				}
12632 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12633 				saved_statp = un->un_retry_statp;
12634 #endif
12635 				un->un_retry_statp = NULL;
12636 
12637 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
12638 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
12639 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
12640 				    un, un->un_retry_bp, un->un_throttle,
12641 				    un->un_ncmds_in_transport);
12642 			} else {
12643 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
12644 				    "processing priority bp:0x%p\n", bp);
12645 			}
12646 
12647 		} else if ((bp = un->un_waitq_headp) != NULL) {
12648 			/*
12649 			 * A command on the waitq is ready to go, but do not
12650 			 * send it if:
12651 			 *
12652 			 * (1) the throttle limit has been reached, or
12653 			 * (2) a retry is pending, or
12654 			 * (3) a START_STOP_UNIT callback pending, or
12655 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
12656 			 *	command is pending.
12657 			 *
12658 			 * For all of these conditions, IO processing will
12659 			 * restart after the condition is cleared.
12660 			 */
12661 			if (un->un_ncmds_in_transport >= un->un_throttle) {
12662 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12663 				    "sd_start_cmds: exiting, "
12664 				    "throttle limit reached!\n");
12665 				goto exit;
12666 			}
12667 			if (un->un_retry_bp != NULL) {
12668 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12669 				    "sd_start_cmds: exiting, retry pending!\n");
12670 				goto exit;
12671 			}
12672 			if (un->un_startstop_timeid != NULL) {
12673 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12674 				    "sd_start_cmds: exiting, "
12675 				    "START_STOP pending!\n");
12676 				goto exit;
12677 			}
12678 			if (un->un_direct_priority_timeid != NULL) {
12679 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12680 				    "sd_start_cmds: exiting, "
12681 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
12682 				goto exit;
12683 			}
12684 
12685 			/* Dequeue the command */
12686 			un->un_waitq_headp = bp->av_forw;
12687 			if (un->un_waitq_headp == NULL) {
12688 				un->un_waitq_tailp = NULL;
12689 			}
12690 			bp->av_forw = NULL;
12691 			statp = kstat_waitq_to_runq;
12692 			SD_TRACE(SD_LOG_IO_CORE, un,
12693 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
12694 
12695 		} else {
12696 			/* No work to do so bail out now */
12697 			SD_TRACE(SD_LOG_IO_CORE, un,
12698 			    "sd_start_cmds: no more work, exiting!\n");
12699 			goto exit;
12700 		}
12701 
12702 		/*
12703 		 * Reset the state to normal. This is the mechanism by which
12704 		 * the state transitions from either SD_STATE_RWAIT or
12705 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
12706 		 * If state is SD_STATE_PM_CHANGING then this command is
12707 		 * part of the device power control and the state must
12708 		 * not be put back to normal. Doing so would would
12709 		 * allow new commands to proceed when they shouldn't,
12710 		 * the device may be going off.
12711 		 */
12712 		if ((un->un_state != SD_STATE_SUSPENDED) &&
12713 		    (un->un_state != SD_STATE_PM_CHANGING)) {
12714 			New_state(un, SD_STATE_NORMAL);
12715 		    }
12716 
12717 		xp = SD_GET_XBUF(bp);
12718 		ASSERT(xp != NULL);
12719 
12720 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12721 		/*
12722 		 * Allocate the scsi_pkt if we need one, or attach DMA
12723 		 * resources if we have a scsi_pkt that needs them. The
12724 		 * latter should only occur for commands that are being
12725 		 * retried.
12726 		 */
12727 		if ((xp->xb_pktp == NULL) ||
12728 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
12729 #else
12730 		if (xp->xb_pktp == NULL) {
12731 #endif
12732 			/*
12733 			 * There is no scsi_pkt allocated for this buf. Call
12734 			 * the initpkt function to allocate & init one.
12735 			 *
12736 			 * The scsi_init_pkt runout callback functionality is
12737 			 * implemented as follows:
12738 			 *
12739 			 * 1) The initpkt function always calls
12740 			 *    scsi_init_pkt(9F) with sdrunout specified as the
12741 			 *    callback routine.
12742 			 * 2) A successful packet allocation is initialized and
12743 			 *    the I/O is transported.
12744 			 * 3) The I/O associated with an allocation resource
12745 			 *    failure is left on its queue to be retried via
12746 			 *    runout or the next I/O.
12747 			 * 4) The I/O associated with a DMA error is removed
12748 			 *    from the queue and failed with EIO. Processing of
12749 			 *    the transport queues is also halted to be
12750 			 *    restarted via runout or the next I/O.
12751 			 * 5) The I/O associated with a CDB size or packet
12752 			 *    size error is removed from the queue and failed
12753 			 *    with EIO. Processing of the transport queues is
12754 			 *    continued.
12755 			 *
12756 			 * Note: there is no interface for canceling a runout
12757 			 * callback. To prevent the driver from detaching or
12758 			 * suspending while a runout is pending the driver
12759 			 * state is set to SD_STATE_RWAIT
12760 			 *
12761 			 * Note: using the scsi_init_pkt callback facility can
12762 			 * result in an I/O request persisting at the head of
12763 			 * the list which cannot be satisfied even after
12764 			 * multiple retries. In the future the driver may
12765 			 * implement some kind of maximum runout count before
12766 			 * failing an I/O.
12767 			 *
12768 			 * Note: the use of funcp below may seem superfluous,
12769 			 * but it helps warlock figure out the correct
12770 			 * initpkt function calls (see [s]sd.wlcmd).
12771 			 */
12772 			struct scsi_pkt	*pktp;
12773 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
12774 
12775 			ASSERT(bp != un->un_rqs_bp);
12776 
12777 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
12778 			switch ((*funcp)(bp, &pktp)) {
12779 			case  SD_PKT_ALLOC_SUCCESS:
12780 				xp->xb_pktp = pktp;
12781 				SD_TRACE(SD_LOG_IO_CORE, un,
12782 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
12783 				    pktp);
12784 				goto got_pkt;
12785 
12786 			case SD_PKT_ALLOC_FAILURE:
12787 				/*
12788 				 * Temporary (hopefully) resource depletion.
12789 				 * Since retries and RQS commands always have a
12790 				 * scsi_pkt allocated, these cases should never
12791 				 * get here. So the only cases this needs to
12792 				 * handle is a bp from the waitq (which we put
12793 				 * back onto the waitq for sdrunout), or a bp
12794 				 * sent as an immed_bp (which we just fail).
12795 				 */
12796 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12797 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
12798 
12799 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12800 
12801 				if (bp == immed_bp) {
12802 					/*
12803 					 * If SD_XB_DMA_FREED is clear, then
12804 					 * this is a failure to allocate a
12805 					 * scsi_pkt, and we must fail the
12806 					 * command.
12807 					 */
12808 					if ((xp->xb_pkt_flags &
12809 					    SD_XB_DMA_FREED) == 0) {
12810 						break;
12811 					}
12812 
12813 					/*
12814 					 * If this immediate command is NOT our
12815 					 * un_retry_bp, then we must fail it.
12816 					 */
12817 					if (bp != un->un_retry_bp) {
12818 						break;
12819 					}
12820 
12821 					/*
12822 					 * We get here if this cmd is our
12823 					 * un_retry_bp that was DMAFREED, but
12824 					 * scsi_init_pkt() failed to reallocate
12825 					 * DMA resources when we attempted to
12826 					 * retry it. This can happen when an
12827 					 * mpxio failover is in progress, but
12828 					 * we don't want to just fail the
12829 					 * command in this case.
12830 					 *
12831 					 * Use timeout(9F) to restart it after
12832 					 * a 100ms delay.  We don't want to
12833 					 * let sdrunout() restart it, because
12834 					 * sdrunout() is just supposed to start
12835 					 * commands that are sitting on the
12836 					 * wait queue.  The un_retry_bp stays
12837 					 * set until the command completes, but
12838 					 * sdrunout can be called many times
12839 					 * before that happens.  Since sdrunout
12840 					 * cannot tell if the un_retry_bp is
12841 					 * already in the transport, it could
12842 					 * end up calling scsi_transport() for
12843 					 * the un_retry_bp multiple times.
12844 					 *
12845 					 * Also: don't schedule the callback
12846 					 * if some other callback is already
12847 					 * pending.
12848 					 */
12849 					if (un->un_retry_statp == NULL) {
12850 						/*
12851 						 * restore the kstat pointer to
12852 						 * keep kstat counts coherent
12853 						 * when we do retry the command.
12854 						 */
12855 						un->un_retry_statp =
12856 						    saved_statp;
12857 					}
12858 
12859 					if ((un->un_startstop_timeid == NULL) &&
12860 					    (un->un_retry_timeid == NULL) &&
12861 					    (un->un_direct_priority_timeid ==
12862 					    NULL)) {
12863 
12864 						un->un_retry_timeid =
12865 						    timeout(
12866 						    sd_start_retry_command,
12867 						    un, SD_RESTART_TIMEOUT);
12868 					}
12869 					goto exit;
12870 				}
12871 
12872 #else
12873 				if (bp == immed_bp) {
12874 					break;	/* Just fail the command */
12875 				}
12876 #endif
12877 
12878 				/* Add the buf back to the head of the waitq */
12879 				bp->av_forw = un->un_waitq_headp;
12880 				un->un_waitq_headp = bp;
12881 				if (un->un_waitq_tailp == NULL) {
12882 					un->un_waitq_tailp = bp;
12883 				}
12884 				goto exit;
12885 
12886 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
12887 				/*
12888 				 * HBA DMA resource failure. Fail the command
12889 				 * and continue processing of the queues.
12890 				 */
12891 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12892 				    "sd_start_cmds: "
12893 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
12894 				break;
12895 
12896 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
12897 				/*
12898 				 * Note:x86: Partial DMA mapping not supported
12899 				 * for USCSI commands, and all the needed DMA
12900 				 * resources were not allocated.
12901 				 */
12902 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12903 				    "sd_start_cmds: "
12904 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
12905 				break;
12906 
12907 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
12908 				/*
12909 				 * Note:x86: Request cannot fit into CDB based
12910 				 * on lba and len.
12911 				 */
12912 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12913 				    "sd_start_cmds: "
12914 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
12915 				break;
12916 
12917 			default:
12918 				/* Should NEVER get here! */
12919 				panic("scsi_initpkt error");
12920 				/*NOTREACHED*/
12921 			}
12922 
12923 			/*
12924 			 * Fatal error in allocating a scsi_pkt for this buf.
12925 			 * Update kstats & return the buf with an error code.
12926 			 * We must use sd_return_failed_command_no_restart() to
12927 			 * avoid a recursive call back into sd_start_cmds().
12928 			 * However this also means that we must keep processing
12929 			 * the waitq here in order to avoid stalling.
12930 			 */
12931 			if (statp == kstat_waitq_to_runq) {
12932 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
12933 			}
12934 			sd_return_failed_command_no_restart(un, bp, EIO);
12935 			if (bp == immed_bp) {
12936 				/* immed_bp is gone by now, so clear this */
12937 				immed_bp = NULL;
12938 			}
12939 			continue;
12940 		}
12941 got_pkt:
12942 		if (bp == immed_bp) {
12943 			/* goto the head of the class.... */
12944 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
12945 		}
12946 
12947 		un->un_ncmds_in_transport++;
12948 		SD_UPDATE_KSTATS(un, statp, bp);
12949 
12950 		/*
12951 		 * Call scsi_transport() to send the command to the target.
12952 		 * According to SCSA architecture, we must drop the mutex here
12953 		 * before calling scsi_transport() in order to avoid deadlock.
12954 		 * Note that the scsi_pkt's completion routine can be executed
12955 		 * (from interrupt context) even before the call to
12956 		 * scsi_transport() returns.
12957 		 */
12958 		SD_TRACE(SD_LOG_IO_CORE, un,
12959 		    "sd_start_cmds: calling scsi_transport()\n");
12960 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
12961 
12962 		mutex_exit(SD_MUTEX(un));
12963 		rval = scsi_transport(xp->xb_pktp);
12964 		mutex_enter(SD_MUTEX(un));
12965 
12966 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12967 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
12968 
12969 		switch (rval) {
12970 		case TRAN_ACCEPT:
12971 			/* Clear this with every pkt accepted by the HBA */
12972 			un->un_tran_fatal_count = 0;
12973 			break;	/* Success; try the next cmd (if any) */
12974 
12975 		case TRAN_BUSY:
12976 			un->un_ncmds_in_transport--;
12977 			ASSERT(un->un_ncmds_in_transport >= 0);
12978 
12979 			/*
12980 			 * Don't retry request sense, the sense data
12981 			 * is lost when another request is sent.
12982 			 * Free up the rqs buf and retry
12983 			 * the original failed cmd.  Update kstat.
12984 			 */
12985 			if (bp == un->un_rqs_bp) {
12986 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
12987 				bp = sd_mark_rqs_idle(un, xp);
12988 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
12989 					NULL, NULL, EIO, SD_BSY_TIMEOUT / 500,
12990 					kstat_waitq_enter);
12991 				goto exit;
12992 			}
12993 
12994 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12995 			/*
12996 			 * Free the DMA resources for the  scsi_pkt. This will
12997 			 * allow mpxio to select another path the next time
12998 			 * we call scsi_transport() with this scsi_pkt.
12999 			 * See sdintr() for the rationalization behind this.
13000 			 */
13001 			if ((un->un_f_is_fibre == TRUE) &&
13002 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
13003 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
13004 				scsi_dmafree(xp->xb_pktp);
13005 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
13006 			}
13007 #endif
13008 
13009 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
13010 				/*
13011 				 * Commands that are SD_PATH_DIRECT_PRIORITY
13012 				 * are for error recovery situations. These do
13013 				 * not use the normal command waitq, so if they
13014 				 * get a TRAN_BUSY we cannot put them back onto
13015 				 * the waitq for later retry. One possible
13016 				 * problem is that there could already be some
13017 				 * other command on un_retry_bp that is waiting
13018 				 * for this one to complete, so we would be
13019 				 * deadlocked if we put this command back onto
13020 				 * the waitq for later retry (since un_retry_bp
13021 				 * must complete before the driver gets back to
13022 				 * commands on the waitq).
13023 				 *
13024 				 * To avoid deadlock we must schedule a callback
13025 				 * that will restart this command after a set
13026 				 * interval.  This should keep retrying for as
13027 				 * long as the underlying transport keeps
13028 				 * returning TRAN_BUSY (just like for other
13029 				 * commands).  Use the same timeout interval as
13030 				 * for the ordinary TRAN_BUSY retry.
13031 				 */
13032 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13033 				    "sd_start_cmds: scsi_transport() returned "
13034 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
13035 
13036 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
13037 				un->un_direct_priority_timeid =
13038 				    timeout(sd_start_direct_priority_command,
13039 				    bp, SD_BSY_TIMEOUT / 500);
13040 
13041 				goto exit;
13042 			}
13043 
13044 			/*
13045 			 * For TRAN_BUSY, we want to reduce the throttle value,
13046 			 * unless we are retrying a command.
13047 			 */
13048 			if (bp != un->un_retry_bp) {
13049 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
13050 			}
13051 
13052 			/*
13053 			 * Set up the bp to be tried again 10 ms later.
13054 			 * Note:x86: Is there a timeout value in the sd_lun
13055 			 * for this condition?
13056 			 */
13057 			sd_set_retry_bp(un, bp, SD_BSY_TIMEOUT / 500,
13058 				kstat_runq_back_to_waitq);
13059 			goto exit;
13060 
13061 		case TRAN_FATAL_ERROR:
13062 			un->un_tran_fatal_count++;
13063 			/* FALLTHRU */
13064 
13065 		case TRAN_BADPKT:
13066 		default:
13067 			un->un_ncmds_in_transport--;
13068 			ASSERT(un->un_ncmds_in_transport >= 0);
13069 
13070 			/*
13071 			 * If this is our REQUEST SENSE command with a
13072 			 * transport error, we must get back the pointers
13073 			 * to the original buf, and mark the REQUEST
13074 			 * SENSE command as "available".
13075 			 */
13076 			if (bp == un->un_rqs_bp) {
13077 				bp = sd_mark_rqs_idle(un, xp);
13078 				xp = SD_GET_XBUF(bp);
13079 			} else {
13080 				/*
13081 				 * Legacy behavior: do not update transport
13082 				 * error count for request sense commands.
13083 				 */
13084 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
13085 			}
13086 
13087 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
13088 			sd_print_transport_rejected_message(un, xp, rval);
13089 
13090 			/*
13091 			 * We must use sd_return_failed_command_no_restart() to
13092 			 * avoid a recursive call back into sd_start_cmds().
13093 			 * However this also means that we must keep processing
13094 			 * the waitq here in order to avoid stalling.
13095 			 */
13096 			sd_return_failed_command_no_restart(un, bp, EIO);
13097 
13098 			/*
13099 			 * Notify any threads waiting in sd_ddi_suspend() that
13100 			 * a command completion has occurred.
13101 			 */
13102 			if (un->un_state == SD_STATE_SUSPENDED) {
13103 				cv_broadcast(&un->un_disk_busy_cv);
13104 			}
13105 
13106 			if (bp == immed_bp) {
13107 				/* immed_bp is gone by now, so clear this */
13108 				immed_bp = NULL;
13109 			}
13110 			break;
13111 		}
13112 
13113 	} while (immed_bp == NULL);
13114 
13115 exit:
13116 	ASSERT(mutex_owned(SD_MUTEX(un)));
13117 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
13118 }
13119 
13120 
13121 /*
13122  *    Function: sd_return_command
13123  *
13124  * Description: Returns a command to its originator (with or without an
13125  *		error).  Also starts commands waiting to be transported
13126  *		to the target.
13127  *
13128  *     Context: May be called from interrupt, kernel, or timeout context
13129  */
13130 
13131 static void
13132 sd_return_command(struct sd_lun *un, struct buf *bp)
13133 {
13134 	struct sd_xbuf *xp;
13135 #if defined(__i386) || defined(__amd64)
13136 	struct scsi_pkt *pktp;
13137 #endif
13138 
13139 	ASSERT(bp != NULL);
13140 	ASSERT(un != NULL);
13141 	ASSERT(mutex_owned(SD_MUTEX(un)));
13142 	ASSERT(bp != un->un_rqs_bp);
13143 	xp = SD_GET_XBUF(bp);
13144 	ASSERT(xp != NULL);
13145 
13146 #if defined(__i386) || defined(__amd64)
13147 	pktp = SD_GET_PKTP(bp);
13148 #endif
13149 
13150 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
13151 
13152 #if defined(__i386) || defined(__amd64)
13153 	/*
13154 	 * Note:x86: check for the "sdrestart failed" case.
13155 	 */
13156 	if (((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
13157 		(geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
13158 		(xp->xb_pktp->pkt_resid == 0)) {
13159 
13160 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
13161 			/*
13162 			 * Successfully set up next portion of cmd
13163 			 * transfer, try sending it
13164 			 */
13165 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
13166 			    NULL, NULL, 0, (clock_t)0, NULL);
13167 			sd_start_cmds(un, NULL);
13168 			return;	/* Note:x86: need a return here? */
13169 		}
13170 	}
13171 #endif
13172 
13173 	/*
13174 	 * If this is the failfast bp, clear it from un_failfast_bp. This
13175 	 * can happen if upon being re-tried the failfast bp either
13176 	 * succeeded or encountered another error (possibly even a different
13177 	 * error than the one that precipitated the failfast state, but in
13178 	 * that case it would have had to exhaust retries as well). Regardless,
13179 	 * this should not occur whenever the instance is in the active
13180 	 * failfast state.
13181 	 */
13182 	if (bp == un->un_failfast_bp) {
13183 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
13184 		un->un_failfast_bp = NULL;
13185 	}
13186 
13187 	/*
13188 	 * Clear the failfast state upon successful completion of ANY cmd.
13189 	 */
13190 	if (bp->b_error == 0) {
13191 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
13192 	}
13193 
13194 	/*
13195 	 * This is used if the command was retried one or more times. Show that
13196 	 * we are done with it, and allow processing of the waitq to resume.
13197 	 */
13198 	if (bp == un->un_retry_bp) {
13199 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13200 		    "sd_return_command: un:0x%p: "
13201 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
13202 		un->un_retry_bp = NULL;
13203 		un->un_retry_statp = NULL;
13204 	}
13205 
13206 	SD_UPDATE_RDWR_STATS(un, bp);
13207 	SD_UPDATE_PARTITION_STATS(un, bp);
13208 
13209 	switch (un->un_state) {
13210 	case SD_STATE_SUSPENDED:
13211 		/*
13212 		 * Notify any threads waiting in sd_ddi_suspend() that
13213 		 * a command completion has occurred.
13214 		 */
13215 		cv_broadcast(&un->un_disk_busy_cv);
13216 		break;
13217 	default:
13218 		sd_start_cmds(un, NULL);
13219 		break;
13220 	}
13221 
13222 	/* Return this command up the iodone chain to its originator. */
13223 	mutex_exit(SD_MUTEX(un));
13224 
13225 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
13226 	xp->xb_pktp = NULL;
13227 
13228 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
13229 
13230 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13231 	mutex_enter(SD_MUTEX(un));
13232 
13233 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
13234 }
13235 
13236 
13237 /*
13238  *    Function: sd_return_failed_command
13239  *
13240  * Description: Command completion when an error occurred.
13241  *
13242  *     Context: May be called from interrupt context
13243  */
13244 
13245 static void
13246 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
13247 {
13248 	ASSERT(bp != NULL);
13249 	ASSERT(un != NULL);
13250 	ASSERT(mutex_owned(SD_MUTEX(un)));
13251 
13252 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13253 	    "sd_return_failed_command: entry\n");
13254 
13255 	/*
13256 	 * b_resid could already be nonzero due to a partial data
13257 	 * transfer, so do not change it here.
13258 	 */
13259 	SD_BIOERROR(bp, errcode);
13260 
13261 	sd_return_command(un, bp);
13262 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13263 	    "sd_return_failed_command: exit\n");
13264 }
13265 
13266 
13267 /*
13268  *    Function: sd_return_failed_command_no_restart
13269  *
13270  * Description: Same as sd_return_failed_command, but ensures that no
13271  *		call back into sd_start_cmds will be issued.
13272  *
13273  *     Context: May be called from interrupt context
13274  */
13275 
13276 static void
13277 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
13278 	int errcode)
13279 {
13280 	struct sd_xbuf *xp;
13281 
13282 	ASSERT(bp != NULL);
13283 	ASSERT(un != NULL);
13284 	ASSERT(mutex_owned(SD_MUTEX(un)));
13285 	xp = SD_GET_XBUF(bp);
13286 	ASSERT(xp != NULL);
13287 	ASSERT(errcode != 0);
13288 
13289 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13290 	    "sd_return_failed_command_no_restart: entry\n");
13291 
13292 	/*
13293 	 * b_resid could already be nonzero due to a partial data
13294 	 * transfer, so do not change it here.
13295 	 */
13296 	SD_BIOERROR(bp, errcode);
13297 
13298 	/*
13299 	 * If this is the failfast bp, clear it. This can happen if the
13300 	 * failfast bp encounterd a fatal error when we attempted to
13301 	 * re-try it (such as a scsi_transport(9F) failure).  However
13302 	 * we should NOT be in an active failfast state if the failfast
13303 	 * bp is not NULL.
13304 	 */
13305 	if (bp == un->un_failfast_bp) {
13306 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
13307 		un->un_failfast_bp = NULL;
13308 	}
13309 
13310 	if (bp == un->un_retry_bp) {
13311 		/*
13312 		 * This command was retried one or more times. Show that we are
13313 		 * done with it, and allow processing of the waitq to resume.
13314 		 */
13315 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13316 		    "sd_return_failed_command_no_restart: "
13317 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
13318 		un->un_retry_bp = NULL;
13319 		un->un_retry_statp = NULL;
13320 	}
13321 
13322 	SD_UPDATE_RDWR_STATS(un, bp);
13323 	SD_UPDATE_PARTITION_STATS(un, bp);
13324 
13325 	mutex_exit(SD_MUTEX(un));
13326 
13327 	if (xp->xb_pktp != NULL) {
13328 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
13329 		xp->xb_pktp = NULL;
13330 	}
13331 
13332 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
13333 
13334 	mutex_enter(SD_MUTEX(un));
13335 
13336 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13337 	    "sd_return_failed_command_no_restart: exit\n");
13338 }
13339 
13340 
13341 /*
13342  *    Function: sd_retry_command
13343  *
13344  * Description: queue up a command for retry, or (optionally) fail it
13345  *		if retry counts are exhausted.
13346  *
13347  *   Arguments: un - Pointer to the sd_lun struct for the target.
13348  *
13349  *		bp - Pointer to the buf for the command to be retried.
13350  *
13351  *		retry_check_flag - Flag to see which (if any) of the retry
13352  *		   counts should be decremented/checked. If the indicated
13353  *		   retry count is exhausted, then the command will not be
13354  *		   retried; it will be failed instead. This should use a
13355  *		   value equal to one of the following:
13356  *
13357  *			SD_RETRIES_NOCHECK
13358  *			SD_RESD_RETRIES_STANDARD
13359  *			SD_RETRIES_VICTIM
13360  *
13361  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
13362  *		   if the check should be made to see of FLAG_ISOLATE is set
13363  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
13364  *		   not retried, it is simply failed.
13365  *
13366  *		user_funcp - Ptr to function to call before dispatching the
13367  *		   command. May be NULL if no action needs to be performed.
13368  *		   (Primarily intended for printing messages.)
13369  *
13370  *		user_arg - Optional argument to be passed along to
13371  *		   the user_funcp call.
13372  *
13373  *		failure_code - errno return code to set in the bp if the
13374  *		   command is going to be failed.
13375  *
13376  *		retry_delay - Retry delay interval in (clock_t) units. May
13377  *		   be zero which indicates that the retry should be retried
13378  *		   immediately (ie, without an intervening delay).
13379  *
13380  *		statp - Ptr to kstat function to be updated if the command
13381  *		   is queued for a delayed retry. May be NULL if no kstat
13382  *		   update is desired.
13383  *
13384  *     Context: May be called from interupt context.
13385  */
13386 
13387 static void
13388 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
13389 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
13390 	code), void *user_arg, int failure_code,  clock_t retry_delay,
13391 	void (*statp)(kstat_io_t *))
13392 {
13393 	struct sd_xbuf	*xp;
13394 	struct scsi_pkt	*pktp;
13395 
13396 	ASSERT(un != NULL);
13397 	ASSERT(mutex_owned(SD_MUTEX(un)));
13398 	ASSERT(bp != NULL);
13399 	xp = SD_GET_XBUF(bp);
13400 	ASSERT(xp != NULL);
13401 	pktp = SD_GET_PKTP(bp);
13402 	ASSERT(pktp != NULL);
13403 
13404 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
13405 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
13406 
13407 	/*
13408 	 * If we are syncing or dumping, fail the command to avoid
13409 	 * recursively calling back into scsi_transport().
13410 	 */
13411 	if (ddi_in_panic()) {
13412 		goto fail_command_no_log;
13413 	}
13414 
13415 	/*
13416 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
13417 	 * log an error and fail the command.
13418 	 */
13419 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
13420 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
13421 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
13422 		sd_dump_memory(un, SD_LOG_IO, "CDB",
13423 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
13424 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
13425 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
13426 		goto fail_command;
13427 	}
13428 
13429 	/*
13430 	 * If we are suspended, then put the command onto head of the
13431 	 * wait queue since we don't want to start more commands.
13432 	 */
13433 	switch (un->un_state) {
13434 	case SD_STATE_SUSPENDED:
13435 	case SD_STATE_DUMPING:
13436 		bp->av_forw = un->un_waitq_headp;
13437 		un->un_waitq_headp = bp;
13438 		if (un->un_waitq_tailp == NULL) {
13439 			un->un_waitq_tailp = bp;
13440 		}
13441 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
13442 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
13443 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
13444 		return;
13445 	default:
13446 		break;
13447 	}
13448 
13449 	/*
13450 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
13451 	 * is set; if it is then we do not want to retry the command.
13452 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
13453 	 */
13454 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
13455 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
13456 			goto fail_command;
13457 		}
13458 	}
13459 
13460 
13461 	/*
13462 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
13463 	 * command timeout or a selection timeout has occurred. This means
13464 	 * that we were unable to establish an kind of communication with
13465 	 * the target, and subsequent retries and/or commands are likely
13466 	 * to encounter similar results and take a long time to complete.
13467 	 *
13468 	 * If this is a failfast error condition, we need to update the
13469 	 * failfast state, even if this bp does not have B_FAILFAST set.
13470 	 */
13471 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
13472 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
13473 			ASSERT(un->un_failfast_bp == NULL);
13474 			/*
13475 			 * If we are already in the active failfast state, and
13476 			 * another failfast error condition has been detected,
13477 			 * then fail this command if it has B_FAILFAST set.
13478 			 * If B_FAILFAST is clear, then maintain the legacy
13479 			 * behavior of retrying heroically, even tho this will
13480 			 * take a lot more time to fail the command.
13481 			 */
13482 			if (bp->b_flags & B_FAILFAST) {
13483 				goto fail_command;
13484 			}
13485 		} else {
13486 			/*
13487 			 * We're not in the active failfast state, but we
13488 			 * have a failfast error condition, so we must begin
13489 			 * transition to the next state. We do this regardless
13490 			 * of whether or not this bp has B_FAILFAST set.
13491 			 */
13492 			if (un->un_failfast_bp == NULL) {
13493 				/*
13494 				 * This is the first bp to meet a failfast
13495 				 * condition so save it on un_failfast_bp &
13496 				 * do normal retry processing. Do not enter
13497 				 * active failfast state yet. This marks
13498 				 * entry into the "failfast pending" state.
13499 				 */
13500 				un->un_failfast_bp = bp;
13501 
13502 			} else if (un->un_failfast_bp == bp) {
13503 				/*
13504 				 * This is the second time *this* bp has
13505 				 * encountered a failfast error condition,
13506 				 * so enter active failfast state & flush
13507 				 * queues as appropriate.
13508 				 */
13509 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
13510 				un->un_failfast_bp = NULL;
13511 				sd_failfast_flushq(un);
13512 
13513 				/*
13514 				 * Fail this bp now if B_FAILFAST set;
13515 				 * otherwise continue with retries. (It would
13516 				 * be pretty ironic if this bp succeeded on a
13517 				 * subsequent retry after we just flushed all
13518 				 * the queues).
13519 				 */
13520 				if (bp->b_flags & B_FAILFAST) {
13521 					goto fail_command;
13522 				}
13523 
13524 #if !defined(lint) && !defined(__lint)
13525 			} else {
13526 				/*
13527 				 * If neither of the preceeding conditionals
13528 				 * was true, it means that there is some
13529 				 * *other* bp that has met an inital failfast
13530 				 * condition and is currently either being
13531 				 * retried or is waiting to be retried. In
13532 				 * that case we should perform normal retry
13533 				 * processing on *this* bp, since there is a
13534 				 * chance that the current failfast condition
13535 				 * is transient and recoverable. If that does
13536 				 * not turn out to be the case, then retries
13537 				 * will be cleared when the wait queue is
13538 				 * flushed anyway.
13539 				 */
13540 #endif
13541 			}
13542 		}
13543 	} else {
13544 		/*
13545 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
13546 		 * likely were able to at least establish some level of
13547 		 * communication with the target and subsequent commands
13548 		 * and/or retries are likely to get through to the target,
13549 		 * In this case we want to be aggressive about clearing
13550 		 * the failfast state. Note that this does not affect
13551 		 * the "failfast pending" condition.
13552 		 */
13553 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
13554 	}
13555 
13556 
13557 	/*
13558 	 * Check the specified retry count to see if we can still do
13559 	 * any retries with this pkt before we should fail it.
13560 	 */
13561 	switch (retry_check_flag & SD_RETRIES_MASK) {
13562 	case SD_RETRIES_VICTIM:
13563 		/*
13564 		 * Check the victim retry count. If exhausted, then fall
13565 		 * thru & check against the standard retry count.
13566 		 */
13567 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
13568 			/* Increment count & proceed with the retry */
13569 			xp->xb_victim_retry_count++;
13570 			break;
13571 		}
13572 		/* Victim retries exhausted, fall back to std. retries... */
13573 		/* FALLTHRU */
13574 
13575 	case SD_RETRIES_STANDARD:
13576 		if (xp->xb_retry_count >= un->un_retry_count) {
13577 			/* Retries exhausted, fail the command */
13578 			SD_TRACE(SD_LOG_IO_CORE, un,
13579 			    "sd_retry_command: retries exhausted!\n");
13580 			/*
13581 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
13582 			 * commands with nonzero pkt_resid.
13583 			 */
13584 			if ((pktp->pkt_reason == CMD_CMPLT) &&
13585 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
13586 			    (pktp->pkt_resid != 0)) {
13587 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
13588 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
13589 					SD_UPDATE_B_RESID(bp, pktp);
13590 				}
13591 			}
13592 			goto fail_command;
13593 		}
13594 		xp->xb_retry_count++;
13595 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13596 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
13597 		break;
13598 
13599 	case SD_RETRIES_UA:
13600 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
13601 			/* Retries exhausted, fail the command */
13602 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13603 			    "Unit Attention retries exhausted. "
13604 			    "Check the target.\n");
13605 			goto fail_command;
13606 		}
13607 		xp->xb_ua_retry_count++;
13608 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13609 		    "sd_retry_command: retry count:%d\n",
13610 			xp->xb_ua_retry_count);
13611 		break;
13612 
13613 	case SD_RETRIES_BUSY:
13614 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
13615 			/* Retries exhausted, fail the command */
13616 			SD_TRACE(SD_LOG_IO_CORE, un,
13617 			    "sd_retry_command: retries exhausted!\n");
13618 			goto fail_command;
13619 		}
13620 		xp->xb_retry_count++;
13621 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13622 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
13623 		break;
13624 
13625 	case SD_RETRIES_NOCHECK:
13626 	default:
13627 		/* No retry count to check. Just proceed with the retry */
13628 		break;
13629 	}
13630 
13631 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
13632 
13633 	/*
13634 	 * If we were given a zero timeout, we must attempt to retry the
13635 	 * command immediately (ie, without a delay).
13636 	 */
13637 	if (retry_delay == 0) {
13638 		/*
13639 		 * Check some limiting conditions to see if we can actually
13640 		 * do the immediate retry.  If we cannot, then we must
13641 		 * fall back to queueing up a delayed retry.
13642 		 */
13643 		if (un->un_ncmds_in_transport >= un->un_throttle) {
13644 			/*
13645 			 * We are at the throttle limit for the target,
13646 			 * fall back to delayed retry.
13647 			 */
13648 			retry_delay = SD_BSY_TIMEOUT;
13649 			statp = kstat_waitq_enter;
13650 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13651 			    "sd_retry_command: immed. retry hit "
13652 			    "throttle!\n");
13653 		} else {
13654 			/*
13655 			 * We're clear to proceed with the immediate retry.
13656 			 * First call the user-provided function (if any)
13657 			 */
13658 			if (user_funcp != NULL) {
13659 				(*user_funcp)(un, bp, user_arg,
13660 				    SD_IMMEDIATE_RETRY_ISSUED);
13661 #ifdef __lock_lint
13662 				sd_print_incomplete_msg(un, bp, user_arg,
13663 				    SD_IMMEDIATE_RETRY_ISSUED);
13664 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
13665 				    SD_IMMEDIATE_RETRY_ISSUED);
13666 				sd_print_sense_failed_msg(un, bp, user_arg,
13667 				    SD_IMMEDIATE_RETRY_ISSUED);
13668 #endif
13669 			}
13670 
13671 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13672 			    "sd_retry_command: issuing immediate retry\n");
13673 
13674 			/*
13675 			 * Call sd_start_cmds() to transport the command to
13676 			 * the target.
13677 			 */
13678 			sd_start_cmds(un, bp);
13679 
13680 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13681 			    "sd_retry_command exit\n");
13682 			return;
13683 		}
13684 	}
13685 
13686 	/*
13687 	 * Set up to retry the command after a delay.
13688 	 * First call the user-provided function (if any)
13689 	 */
13690 	if (user_funcp != NULL) {
13691 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
13692 	}
13693 
13694 	sd_set_retry_bp(un, bp, retry_delay, statp);
13695 
13696 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
13697 	return;
13698 
13699 fail_command:
13700 
13701 	if (user_funcp != NULL) {
13702 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
13703 	}
13704 
13705 fail_command_no_log:
13706 
13707 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13708 	    "sd_retry_command: returning failed command\n");
13709 
13710 	sd_return_failed_command(un, bp, failure_code);
13711 
13712 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
13713 }
13714 
13715 
13716 /*
13717  *    Function: sd_set_retry_bp
13718  *
13719  * Description: Set up the given bp for retry.
13720  *
13721  *   Arguments: un - ptr to associated softstate
13722  *		bp - ptr to buf(9S) for the command
13723  *		retry_delay - time interval before issuing retry (may be 0)
13724  *		statp - optional pointer to kstat function
13725  *
13726  *     Context: May be called under interrupt context
13727  */
13728 
13729 static void
13730 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
13731 	void (*statp)(kstat_io_t *))
13732 {
13733 	ASSERT(un != NULL);
13734 	ASSERT(mutex_owned(SD_MUTEX(un)));
13735 	ASSERT(bp != NULL);
13736 
13737 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
13738 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
13739 
13740 	/*
13741 	 * Indicate that the command is being retried. This will not allow any
13742 	 * other commands on the wait queue to be transported to the target
13743 	 * until this command has been completed (success or failure). The
13744 	 * "retry command" is not transported to the target until the given
13745 	 * time delay expires, unless the user specified a 0 retry_delay.
13746 	 *
13747 	 * Note: the timeout(9F) callback routine is what actually calls
13748 	 * sd_start_cmds() to transport the command, with the exception of a
13749 	 * zero retry_delay. The only current implementor of a zero retry delay
13750 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
13751 	 */
13752 	if (un->un_retry_bp == NULL) {
13753 		ASSERT(un->un_retry_statp == NULL);
13754 		un->un_retry_bp = bp;
13755 
13756 		/*
13757 		 * If the user has not specified a delay the command should
13758 		 * be queued and no timeout should be scheduled.
13759 		 */
13760 		if (retry_delay == 0) {
13761 			/*
13762 			 * Save the kstat pointer that will be used in the
13763 			 * call to SD_UPDATE_KSTATS() below, so that
13764 			 * sd_start_cmds() can correctly decrement the waitq
13765 			 * count when it is time to transport this command.
13766 			 */
13767 			un->un_retry_statp = statp;
13768 			goto done;
13769 		}
13770 	}
13771 
13772 	if (un->un_retry_bp == bp) {
13773 		/*
13774 		 * Save the kstat pointer that will be used in the call to
13775 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
13776 		 * correctly decrement the waitq count when it is time to
13777 		 * transport this command.
13778 		 */
13779 		un->un_retry_statp = statp;
13780 
13781 		/*
13782 		 * Schedule a timeout if:
13783 		 *   1) The user has specified a delay.
13784 		 *   2) There is not a START_STOP_UNIT callback pending.
13785 		 *
13786 		 * If no delay has been specified, then it is up to the caller
13787 		 * to ensure that IO processing continues without stalling.
13788 		 * Effectively, this means that the caller will issue the
13789 		 * required call to sd_start_cmds(). The START_STOP_UNIT
13790 		 * callback does this after the START STOP UNIT command has
13791 		 * completed. In either of these cases we should not schedule
13792 		 * a timeout callback here.  Also don't schedule the timeout if
13793 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
13794 		 */
13795 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
13796 		    (un->un_direct_priority_timeid == NULL)) {
13797 			un->un_retry_timeid =
13798 			    timeout(sd_start_retry_command, un, retry_delay);
13799 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13800 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
13801 			    " bp:0x%p un_retry_timeid:0x%p\n",
13802 			    un, bp, un->un_retry_timeid);
13803 		}
13804 	} else {
13805 		/*
13806 		 * We only get in here if there is already another command
13807 		 * waiting to be retried.  In this case, we just put the
13808 		 * given command onto the wait queue, so it can be transported
13809 		 * after the current retry command has completed.
13810 		 *
13811 		 * Also we have to make sure that if the command at the head
13812 		 * of the wait queue is the un_failfast_bp, that we do not
13813 		 * put ahead of it any other commands that are to be retried.
13814 		 */
13815 		if ((un->un_failfast_bp != NULL) &&
13816 		    (un->un_failfast_bp == un->un_waitq_headp)) {
13817 			/*
13818 			 * Enqueue this command AFTER the first command on
13819 			 * the wait queue (which is also un_failfast_bp).
13820 			 */
13821 			bp->av_forw = un->un_waitq_headp->av_forw;
13822 			un->un_waitq_headp->av_forw = bp;
13823 			if (un->un_waitq_headp == un->un_waitq_tailp) {
13824 				un->un_waitq_tailp = bp;
13825 			}
13826 		} else {
13827 			/* Enqueue this command at the head of the waitq. */
13828 			bp->av_forw = un->un_waitq_headp;
13829 			un->un_waitq_headp = bp;
13830 			if (un->un_waitq_tailp == NULL) {
13831 				un->un_waitq_tailp = bp;
13832 			}
13833 		}
13834 
13835 		if (statp == NULL) {
13836 			statp = kstat_waitq_enter;
13837 		}
13838 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13839 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
13840 	}
13841 
13842 done:
13843 	if (statp != NULL) {
13844 		SD_UPDATE_KSTATS(un, statp, bp);
13845 	}
13846 
13847 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13848 	    "sd_set_retry_bp: exit un:0x%p\n", un);
13849 }
13850 
13851 
13852 /*
13853  *    Function: sd_start_retry_command
13854  *
13855  * Description: Start the command that has been waiting on the target's
13856  *		retry queue.  Called from timeout(9F) context after the
13857  *		retry delay interval has expired.
13858  *
13859  *   Arguments: arg - pointer to associated softstate for the device.
13860  *
13861  *     Context: timeout(9F) thread context.  May not sleep.
13862  */
13863 
13864 static void
13865 sd_start_retry_command(void *arg)
13866 {
13867 	struct sd_lun *un = arg;
13868 
13869 	ASSERT(un != NULL);
13870 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13871 
13872 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13873 	    "sd_start_retry_command: entry\n");
13874 
13875 	mutex_enter(SD_MUTEX(un));
13876 
13877 	un->un_retry_timeid = NULL;
13878 
13879 	if (un->un_retry_bp != NULL) {
13880 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13881 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
13882 		    un, un->un_retry_bp);
13883 		sd_start_cmds(un, un->un_retry_bp);
13884 	}
13885 
13886 	mutex_exit(SD_MUTEX(un));
13887 
13888 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13889 	    "sd_start_retry_command: exit\n");
13890 }
13891 
13892 
13893 /*
13894  *    Function: sd_start_direct_priority_command
13895  *
13896  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
13897  *		received TRAN_BUSY when we called scsi_transport() to send it
13898  *		to the underlying HBA. This function is called from timeout(9F)
13899  *		context after the delay interval has expired.
13900  *
13901  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
13902  *
13903  *     Context: timeout(9F) thread context.  May not sleep.
13904  */
13905 
13906 static void
13907 sd_start_direct_priority_command(void *arg)
13908 {
13909 	struct buf	*priority_bp = arg;
13910 	struct sd_lun	*un;
13911 
13912 	ASSERT(priority_bp != NULL);
13913 	un = SD_GET_UN(priority_bp);
13914 	ASSERT(un != NULL);
13915 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13916 
13917 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13918 	    "sd_start_direct_priority_command: entry\n");
13919 
13920 	mutex_enter(SD_MUTEX(un));
13921 	un->un_direct_priority_timeid = NULL;
13922 	sd_start_cmds(un, priority_bp);
13923 	mutex_exit(SD_MUTEX(un));
13924 
13925 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13926 	    "sd_start_direct_priority_command: exit\n");
13927 }
13928 
13929 
13930 /*
13931  *    Function: sd_send_request_sense_command
13932  *
13933  * Description: Sends a REQUEST SENSE command to the target
13934  *
13935  *     Context: May be called from interrupt context.
13936  */
13937 
13938 static void
13939 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
13940 	struct scsi_pkt *pktp)
13941 {
13942 	ASSERT(bp != NULL);
13943 	ASSERT(un != NULL);
13944 	ASSERT(mutex_owned(SD_MUTEX(un)));
13945 
13946 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
13947 	    "entry: buf:0x%p\n", bp);
13948 
13949 	/*
13950 	 * If we are syncing or dumping, then fail the command to avoid a
13951 	 * recursive callback into scsi_transport(). Also fail the command
13952 	 * if we are suspended (legacy behavior).
13953 	 */
13954 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
13955 	    (un->un_state == SD_STATE_DUMPING)) {
13956 		sd_return_failed_command(un, bp, EIO);
13957 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13958 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
13959 		return;
13960 	}
13961 
13962 	/*
13963 	 * Retry the failed command and don't issue the request sense if:
13964 	 *    1) the sense buf is busy
13965 	 *    2) we have 1 or more outstanding commands on the target
13966 	 *    (the sense data will be cleared or invalidated any way)
13967 	 *
13968 	 * Note: There could be an issue with not checking a retry limit here,
13969 	 * the problem is determining which retry limit to check.
13970 	 */
13971 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
13972 		/* Don't retry if the command is flagged as non-retryable */
13973 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
13974 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
13975 			    NULL, NULL, 0, SD_BSY_TIMEOUT, kstat_waitq_enter);
13976 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13977 			    "sd_send_request_sense_command: "
13978 			    "at full throttle, retrying exit\n");
13979 		} else {
13980 			sd_return_failed_command(un, bp, EIO);
13981 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13982 			    "sd_send_request_sense_command: "
13983 			    "at full throttle, non-retryable exit\n");
13984 		}
13985 		return;
13986 	}
13987 
13988 	sd_mark_rqs_busy(un, bp);
13989 	sd_start_cmds(un, un->un_rqs_bp);
13990 
13991 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13992 	    "sd_send_request_sense_command: exit\n");
13993 }
13994 
13995 
13996 /*
13997  *    Function: sd_mark_rqs_busy
13998  *
13999  * Description: Indicate that the request sense bp for this instance is
14000  *		in use.
14001  *
14002  *     Context: May be called under interrupt context
14003  */
14004 
14005 static void
14006 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
14007 {
14008 	struct sd_xbuf	*sense_xp;
14009 
14010 	ASSERT(un != NULL);
14011 	ASSERT(bp != NULL);
14012 	ASSERT(mutex_owned(SD_MUTEX(un)));
14013 	ASSERT(un->un_sense_isbusy == 0);
14014 
14015 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
14016 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
14017 
14018 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
14019 	ASSERT(sense_xp != NULL);
14020 
14021 	SD_INFO(SD_LOG_IO, un,
14022 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
14023 
14024 	ASSERT(sense_xp->xb_pktp != NULL);
14025 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
14026 	    == (FLAG_SENSING | FLAG_HEAD));
14027 
14028 	un->un_sense_isbusy = 1;
14029 	un->un_rqs_bp->b_resid = 0;
14030 	sense_xp->xb_pktp->pkt_resid  = 0;
14031 	sense_xp->xb_pktp->pkt_reason = 0;
14032 
14033 	/* So we can get back the bp at interrupt time! */
14034 	sense_xp->xb_sense_bp = bp;
14035 
14036 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
14037 
14038 	/*
14039 	 * Mark this buf as awaiting sense data. (This is already set in
14040 	 * the pkt_flags for the RQS packet.)
14041 	 */
14042 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
14043 
14044 	sense_xp->xb_retry_count	= 0;
14045 	sense_xp->xb_victim_retry_count = 0;
14046 	sense_xp->xb_ua_retry_count	= 0;
14047 	sense_xp->xb_dma_resid  = 0;
14048 
14049 	/* Clean up the fields for auto-request sense */
14050 	sense_xp->xb_sense_status = 0;
14051 	sense_xp->xb_sense_state  = 0;
14052 	sense_xp->xb_sense_resid  = 0;
14053 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
14054 
14055 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
14056 }
14057 
14058 
14059 /*
14060  *    Function: sd_mark_rqs_idle
14061  *
14062  * Description: SD_MUTEX must be held continuously through this routine
14063  *		to prevent reuse of the rqs struct before the caller can
14064  *		complete it's processing.
14065  *
14066  * Return Code: Pointer to the RQS buf
14067  *
14068  *     Context: May be called under interrupt context
14069  */
14070 
14071 static struct buf *
14072 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
14073 {
14074 	struct buf *bp;
14075 	ASSERT(un != NULL);
14076 	ASSERT(sense_xp != NULL);
14077 	ASSERT(mutex_owned(SD_MUTEX(un)));
14078 	ASSERT(un->un_sense_isbusy != 0);
14079 
14080 	un->un_sense_isbusy = 0;
14081 	bp = sense_xp->xb_sense_bp;
14082 	sense_xp->xb_sense_bp = NULL;
14083 
14084 	/* This pkt is no longer interested in getting sense data */
14085 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
14086 
14087 	return (bp);
14088 }
14089 
14090 
14091 
14092 /*
14093  *    Function: sd_alloc_rqs
14094  *
14095  * Description: Set up the unit to receive auto request sense data
14096  *
14097  * Return Code: DDI_SUCCESS or DDI_FAILURE
14098  *
14099  *     Context: Called under attach(9E) context
14100  */
14101 
14102 static int
14103 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
14104 {
14105 	struct sd_xbuf *xp;
14106 
14107 	ASSERT(un != NULL);
14108 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14109 	ASSERT(un->un_rqs_bp == NULL);
14110 	ASSERT(un->un_rqs_pktp == NULL);
14111 
14112 	/*
14113 	 * First allocate the required buf and scsi_pkt structs, then set up
14114 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
14115 	 */
14116 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
14117 	    SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
14118 	if (un->un_rqs_bp == NULL) {
14119 		return (DDI_FAILURE);
14120 	}
14121 
14122 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
14123 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
14124 
14125 	if (un->un_rqs_pktp == NULL) {
14126 		sd_free_rqs(un);
14127 		return (DDI_FAILURE);
14128 	}
14129 
14130 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
14131 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
14132 	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
14133 
14134 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
14135 
14136 	/* Set up the other needed members in the ARQ scsi_pkt. */
14137 	un->un_rqs_pktp->pkt_comp   = sdintr;
14138 	un->un_rqs_pktp->pkt_time   = sd_io_time;
14139 	un->un_rqs_pktp->pkt_flags |=
14140 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
14141 
14142 	/*
14143 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
14144 	 * provide any intpkt, destroypkt routines as we take care of
14145 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
14146 	 */
14147 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14148 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
14149 	xp->xb_pktp = un->un_rqs_pktp;
14150 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
14151 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
14152 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
14153 
14154 	/*
14155 	 * Save the pointer to the request sense private bp so it can
14156 	 * be retrieved in sdintr.
14157 	 */
14158 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
14159 	ASSERT(un->un_rqs_bp->b_private == xp);
14160 
14161 	/*
14162 	 * See if the HBA supports auto-request sense for the specified
14163 	 * target/lun. If it does, then try to enable it (if not already
14164 	 * enabled).
14165 	 *
14166 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
14167 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
14168 	 * return success.  However, in both of these cases ARQ is always
14169 	 * enabled and scsi_ifgetcap will always return true. The best approach
14170 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
14171 	 *
14172 	 * The 3rd case is the HBA (adp) always return enabled on
14173 	 * scsi_ifgetgetcap even when it's not enable, the best approach
14174 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
14175 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
14176 	 */
14177 
14178 	if (un->un_f_is_fibre == TRUE) {
14179 		un->un_f_arq_enabled = TRUE;
14180 	} else {
14181 #if defined(__i386) || defined(__amd64)
14182 		/*
14183 		 * Circumvent the Adaptec bug, remove this code when
14184 		 * the bug is fixed
14185 		 */
14186 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
14187 #endif
14188 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
14189 		case 0:
14190 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
14191 				"sd_alloc_rqs: HBA supports ARQ\n");
14192 			/*
14193 			 * ARQ is supported by this HBA but currently is not
14194 			 * enabled. Attempt to enable it and if successful then
14195 			 * mark this instance as ARQ enabled.
14196 			 */
14197 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
14198 				== 1) {
14199 				/* Successfully enabled ARQ in the HBA */
14200 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
14201 					"sd_alloc_rqs: ARQ enabled\n");
14202 				un->un_f_arq_enabled = TRUE;
14203 			} else {
14204 				/* Could not enable ARQ in the HBA */
14205 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
14206 				"sd_alloc_rqs: failed ARQ enable\n");
14207 				un->un_f_arq_enabled = FALSE;
14208 			}
14209 			break;
14210 		case 1:
14211 			/*
14212 			 * ARQ is supported by this HBA and is already enabled.
14213 			 * Just mark ARQ as enabled for this instance.
14214 			 */
14215 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
14216 				"sd_alloc_rqs: ARQ already enabled\n");
14217 			un->un_f_arq_enabled = TRUE;
14218 			break;
14219 		default:
14220 			/*
14221 			 * ARQ is not supported by this HBA; disable it for this
14222 			 * instance.
14223 			 */
14224 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
14225 				"sd_alloc_rqs: HBA does not support ARQ\n");
14226 			un->un_f_arq_enabled = FALSE;
14227 			break;
14228 		}
14229 	}
14230 
14231 	return (DDI_SUCCESS);
14232 }
14233 
14234 
14235 /*
14236  *    Function: sd_free_rqs
14237  *
14238  * Description: Cleanup for the pre-instance RQS command.
14239  *
14240  *     Context: Kernel thread context
14241  */
14242 
14243 static void
14244 sd_free_rqs(struct sd_lun *un)
14245 {
14246 	ASSERT(un != NULL);
14247 
14248 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
14249 
14250 	/*
14251 	 * If consistent memory is bound to a scsi_pkt, the pkt
14252 	 * has to be destroyed *before* freeing the consistent memory.
14253 	 * Don't change the sequence of this operations.
14254 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
14255 	 * after it was freed in scsi_free_consistent_buf().
14256 	 */
14257 	if (un->un_rqs_pktp != NULL) {
14258 		scsi_destroy_pkt(un->un_rqs_pktp);
14259 		un->un_rqs_pktp = NULL;
14260 	}
14261 
14262 	if (un->un_rqs_bp != NULL) {
14263 		kmem_free(SD_GET_XBUF(un->un_rqs_bp), sizeof (struct sd_xbuf));
14264 		scsi_free_consistent_buf(un->un_rqs_bp);
14265 		un->un_rqs_bp = NULL;
14266 	}
14267 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
14268 }
14269 
14270 
14271 
14272 /*
14273  *    Function: sd_reduce_throttle
14274  *
14275  * Description: Reduces the maximun # of outstanding commands on a
14276  *		target to the current number of outstanding commands.
14277  *		Queues a tiemout(9F) callback to restore the limit
14278  *		after a specified interval has elapsed.
14279  *		Typically used when we get a TRAN_BUSY return code
14280  *		back from scsi_transport().
14281  *
14282  *   Arguments: un - ptr to the sd_lun softstate struct
14283  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
14284  *
14285  *     Context: May be called from interrupt context
14286  */
14287 
14288 static void
14289 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
14290 {
14291 	ASSERT(un != NULL);
14292 	ASSERT(mutex_owned(SD_MUTEX(un)));
14293 	ASSERT(un->un_ncmds_in_transport >= 0);
14294 
14295 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
14296 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
14297 	    un, un->un_throttle, un->un_ncmds_in_transport);
14298 
14299 	if (un->un_throttle > 1) {
14300 		if (un->un_f_use_adaptive_throttle == TRUE) {
14301 			switch (throttle_type) {
14302 			case SD_THROTTLE_TRAN_BUSY:
14303 				if (un->un_busy_throttle == 0) {
14304 					un->un_busy_throttle = un->un_throttle;
14305 				}
14306 				break;
14307 			case SD_THROTTLE_QFULL:
14308 				un->un_busy_throttle = 0;
14309 				break;
14310 			default:
14311 				ASSERT(FALSE);
14312 			}
14313 
14314 			if (un->un_ncmds_in_transport > 0) {
14315 			    un->un_throttle = un->un_ncmds_in_transport;
14316 			}
14317 
14318 		} else {
14319 			if (un->un_ncmds_in_transport == 0) {
14320 				un->un_throttle = 1;
14321 			} else {
14322 				un->un_throttle = un->un_ncmds_in_transport;
14323 			}
14324 		}
14325 	}
14326 
14327 	/* Reschedule the timeout if none is currently active */
14328 	if (un->un_reset_throttle_timeid == NULL) {
14329 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
14330 		    un, SD_THROTTLE_RESET_INTERVAL);
14331 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14332 		    "sd_reduce_throttle: timeout scheduled!\n");
14333 	}
14334 
14335 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
14336 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
14337 }
14338 
14339 
14340 
14341 /*
14342  *    Function: sd_restore_throttle
14343  *
14344  * Description: Callback function for timeout(9F).  Resets the current
14345  *		value of un->un_throttle to its default.
14346  *
14347  *   Arguments: arg - pointer to associated softstate for the device.
14348  *
14349  *     Context: May be called from interrupt context
14350  */
14351 
14352 static void
14353 sd_restore_throttle(void *arg)
14354 {
14355 	struct sd_lun	*un = arg;
14356 
14357 	ASSERT(un != NULL);
14358 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14359 
14360 	mutex_enter(SD_MUTEX(un));
14361 
14362 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
14363 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
14364 
14365 	un->un_reset_throttle_timeid = NULL;
14366 
14367 	if (un->un_f_use_adaptive_throttle == TRUE) {
14368 		/*
14369 		 * If un_busy_throttle is nonzero, then it contains the
14370 		 * value that un_throttle was when we got a TRAN_BUSY back
14371 		 * from scsi_transport(). We want to revert back to this
14372 		 * value.
14373 		 *
14374 		 * In the QFULL case, the throttle limit will incrementally
14375 		 * increase until it reaches max throttle.
14376 		 */
14377 		if (un->un_busy_throttle > 0) {
14378 			un->un_throttle = un->un_busy_throttle;
14379 			un->un_busy_throttle = 0;
14380 		} else {
14381 			/*
14382 			 * increase throttle by 10% open gate slowly, schedule
14383 			 * another restore if saved throttle has not been
14384 			 * reached
14385 			 */
14386 			short throttle;
14387 			if (sd_qfull_throttle_enable) {
14388 				throttle = un->un_throttle +
14389 				    max((un->un_throttle / 10), 1);
14390 				un->un_throttle =
14391 				    (throttle < un->un_saved_throttle) ?
14392 				    throttle : un->un_saved_throttle;
14393 				if (un->un_throttle < un->un_saved_throttle) {
14394 				    un->un_reset_throttle_timeid =
14395 					timeout(sd_restore_throttle,
14396 					un, SD_QFULL_THROTTLE_RESET_INTERVAL);
14397 				}
14398 			}
14399 		}
14400 
14401 		/*
14402 		 * If un_throttle has fallen below the low-water mark, we
14403 		 * restore the maximum value here (and allow it to ratchet
14404 		 * down again if necessary).
14405 		 */
14406 		if (un->un_throttle < un->un_min_throttle) {
14407 			un->un_throttle = un->un_saved_throttle;
14408 		}
14409 	} else {
14410 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
14411 		    "restoring limit from 0x%x to 0x%x\n",
14412 		    un->un_throttle, un->un_saved_throttle);
14413 		un->un_throttle = un->un_saved_throttle;
14414 	}
14415 
14416 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14417 	    "sd_restore_throttle: calling sd_start_cmds!\n");
14418 
14419 	sd_start_cmds(un, NULL);
14420 
14421 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14422 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
14423 	    un, un->un_throttle);
14424 
14425 	mutex_exit(SD_MUTEX(un));
14426 
14427 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
14428 }
14429 
14430 /*
14431  *    Function: sdrunout
14432  *
14433  * Description: Callback routine for scsi_init_pkt when a resource allocation
14434  *		fails.
14435  *
14436  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
14437  *		soft state instance.
14438  *
14439  * Return Code: The scsi_init_pkt routine allows for the callback function to
14440  *		return a 0 indicating the callback should be rescheduled or a 1
14441  *		indicating not to reschedule. This routine always returns 1
14442  *		because the driver always provides a callback function to
14443  *		scsi_init_pkt. This results in a callback always being scheduled
14444  *		(via the scsi_init_pkt callback implementation) if a resource
14445  *		failure occurs.
14446  *
14447  *     Context: This callback function may not block or call routines that block
14448  *
14449  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
14450  *		request persisting at the head of the list which cannot be
14451  *		satisfied even after multiple retries. In the future the driver
14452  *		may implement some time of maximum runout count before failing
14453  *		an I/O.
14454  */
14455 
14456 static int
14457 sdrunout(caddr_t arg)
14458 {
14459 	struct sd_lun	*un = (struct sd_lun *)arg;
14460 
14461 	ASSERT(un != NULL);
14462 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14463 
14464 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
14465 
14466 	mutex_enter(SD_MUTEX(un));
14467 	sd_start_cmds(un, NULL);
14468 	mutex_exit(SD_MUTEX(un));
14469 	/*
14470 	 * This callback routine always returns 1 (i.e. do not reschedule)
14471 	 * because we always specify sdrunout as the callback handler for
14472 	 * scsi_init_pkt inside the call to sd_start_cmds.
14473 	 */
14474 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
14475 	return (1);
14476 }
14477 
14478 
14479 /*
14480  *    Function: sdintr
14481  *
14482  * Description: Completion callback routine for scsi_pkt(9S) structs
14483  *		sent to the HBA driver via scsi_transport(9F).
14484  *
14485  *     Context: Interrupt context
14486  */
14487 
14488 static void
14489 sdintr(struct scsi_pkt *pktp)
14490 {
14491 	struct buf	*bp;
14492 	struct sd_xbuf	*xp;
14493 	struct sd_lun	*un;
14494 
14495 	ASSERT(pktp != NULL);
14496 	bp = (struct buf *)pktp->pkt_private;
14497 	ASSERT(bp != NULL);
14498 	xp = SD_GET_XBUF(bp);
14499 	ASSERT(xp != NULL);
14500 	ASSERT(xp->xb_pktp != NULL);
14501 	un = SD_GET_UN(bp);
14502 	ASSERT(un != NULL);
14503 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14504 
14505 #ifdef SD_FAULT_INJECTION
14506 
14507 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
14508 	/* SD FaultInjection */
14509 	sd_faultinjection(pktp);
14510 
14511 #endif /* SD_FAULT_INJECTION */
14512 
14513 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
14514 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
14515 
14516 	mutex_enter(SD_MUTEX(un));
14517 
14518 	/* Reduce the count of the #commands currently in transport */
14519 	un->un_ncmds_in_transport--;
14520 	ASSERT(un->un_ncmds_in_transport >= 0);
14521 
14522 	/* Increment counter to indicate that the callback routine is active */
14523 	un->un_in_callback++;
14524 
14525 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14526 
14527 #ifdef	SDDEBUG
14528 	if (bp == un->un_retry_bp) {
14529 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
14530 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
14531 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
14532 	}
14533 #endif
14534 
14535 	/*
14536 	 * If pkt_reason is CMD_DEV_GONE, just fail the command
14537 	 */
14538 	if (pktp->pkt_reason == CMD_DEV_GONE) {
14539 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
14540 			    "Device is gone\n");
14541 		sd_return_failed_command(un, bp, EIO);
14542 		goto exit;
14543 	}
14544 
14545 	/*
14546 	 * First see if the pkt has auto-request sense data with it....
14547 	 * Look at the packet state first so we don't take a performance
14548 	 * hit looking at the arq enabled flag unless absolutely necessary.
14549 	 */
14550 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
14551 	    (un->un_f_arq_enabled == TRUE)) {
14552 		/*
14553 		 * The HBA did an auto request sense for this command so check
14554 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
14555 		 * driver command that should not be retried.
14556 		 */
14557 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
14558 			/*
14559 			 * Save the relevant sense info into the xp for the
14560 			 * original cmd.
14561 			 */
14562 			struct scsi_arq_status *asp;
14563 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
14564 			xp->xb_sense_status =
14565 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
14566 			xp->xb_sense_state  = asp->sts_rqpkt_state;
14567 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
14568 			bcopy(&asp->sts_sensedata, xp->xb_sense_data,
14569 			    min(sizeof (struct scsi_extended_sense),
14570 			    SENSE_LENGTH));
14571 
14572 			/* fail the command */
14573 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14574 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
14575 			sd_return_failed_command(un, bp, EIO);
14576 			goto exit;
14577 		}
14578 
14579 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
14580 		/*
14581 		 * We want to either retry or fail this command, so free
14582 		 * the DMA resources here.  If we retry the command then
14583 		 * the DMA resources will be reallocated in sd_start_cmds().
14584 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
14585 		 * causes the *entire* transfer to start over again from the
14586 		 * beginning of the request, even for PARTIAL chunks that
14587 		 * have already transferred successfully.
14588 		 */
14589 		if ((un->un_f_is_fibre == TRUE) &&
14590 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14591 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
14592 			scsi_dmafree(pktp);
14593 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14594 		}
14595 #endif
14596 
14597 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14598 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
14599 
14600 		sd_handle_auto_request_sense(un, bp, xp, pktp);
14601 		goto exit;
14602 	}
14603 
14604 	/* Next see if this is the REQUEST SENSE pkt for the instance */
14605 	if (pktp->pkt_flags & FLAG_SENSING)  {
14606 		/* This pktp is from the unit's REQUEST_SENSE command */
14607 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14608 		    "sdintr: sd_handle_request_sense\n");
14609 		sd_handle_request_sense(un, bp, xp, pktp);
14610 		goto exit;
14611 	}
14612 
14613 	/*
14614 	 * Check to see if the command successfully completed as requested;
14615 	 * this is the most common case (and also the hot performance path).
14616 	 *
14617 	 * Requirements for successful completion are:
14618 	 * pkt_reason is CMD_CMPLT and packet status is status good.
14619 	 * In addition:
14620 	 * - A residual of zero indicates successful completion no matter what
14621 	 *   the command is.
14622 	 * - If the residual is not zero and the command is not a read or
14623 	 *   write, then it's still defined as successful completion. In other
14624 	 *   words, if the command is a read or write the residual must be
14625 	 *   zero for successful completion.
14626 	 * - If the residual is not zero and the command is a read or
14627 	 *   write, and it's a USCSICMD, then it's still defined as
14628 	 *   successful completion.
14629 	 */
14630 	if ((pktp->pkt_reason == CMD_CMPLT) &&
14631 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
14632 
14633 		/*
14634 		 * Since this command is returned with a good status, we
14635 		 * can reset the count for Sonoma failover.
14636 		 */
14637 		un->un_sonoma_failure_count = 0;
14638 
14639 		/*
14640 		 * Return all USCSI commands on good status
14641 		 */
14642 		if (pktp->pkt_resid == 0) {
14643 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14644 			    "sdintr: returning command for resid == 0\n");
14645 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
14646 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
14647 			SD_UPDATE_B_RESID(bp, pktp);
14648 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14649 			    "sdintr: returning command for resid != 0\n");
14650 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
14651 			SD_UPDATE_B_RESID(bp, pktp);
14652 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14653 				"sdintr: returning uscsi command\n");
14654 		} else {
14655 			goto not_successful;
14656 		}
14657 		sd_return_command(un, bp);
14658 
14659 		/*
14660 		 * Decrement counter to indicate that the callback routine
14661 		 * is done.
14662 		 */
14663 		un->un_in_callback--;
14664 		ASSERT(un->un_in_callback >= 0);
14665 		mutex_exit(SD_MUTEX(un));
14666 
14667 		return;
14668 	}
14669 
14670 not_successful:
14671 
14672 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
14673 	/*
14674 	 * The following is based upon knowledge of the underlying transport
14675 	 * and its use of DMA resources.  This code should be removed when
14676 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
14677 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
14678 	 * and sd_start_cmds().
14679 	 *
14680 	 * Free any DMA resources associated with this command if there
14681 	 * is a chance it could be retried or enqueued for later retry.
14682 	 * If we keep the DMA binding then mpxio cannot reissue the
14683 	 * command on another path whenever a path failure occurs.
14684 	 *
14685 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
14686 	 * causes the *entire* transfer to start over again from the
14687 	 * beginning of the request, even for PARTIAL chunks that
14688 	 * have already transferred successfully.
14689 	 *
14690 	 * This is only done for non-uscsi commands (and also skipped for the
14691 	 * driver's internal RQS command). Also just do this for Fibre Channel
14692 	 * devices as these are the only ones that support mpxio.
14693 	 */
14694 	if ((un->un_f_is_fibre == TRUE) &&
14695 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14696 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
14697 		scsi_dmafree(pktp);
14698 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14699 	}
14700 #endif
14701 
14702 	/*
14703 	 * The command did not successfully complete as requested so check
14704 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
14705 	 * driver command that should not be retried so just return. If
14706 	 * FLAG_DIAGNOSE is not set the error will be processed below.
14707 	 */
14708 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
14709 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14710 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
14711 		/*
14712 		 * Issue a request sense if a check condition caused the error
14713 		 * (we handle the auto request sense case above), otherwise
14714 		 * just fail the command.
14715 		 */
14716 		if ((pktp->pkt_reason == CMD_CMPLT) &&
14717 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
14718 			sd_send_request_sense_command(un, bp, pktp);
14719 		} else {
14720 			sd_return_failed_command(un, bp, EIO);
14721 		}
14722 		goto exit;
14723 	}
14724 
14725 	/*
14726 	 * The command did not successfully complete as requested so process
14727 	 * the error, retry, and/or attempt recovery.
14728 	 */
14729 	switch (pktp->pkt_reason) {
14730 	case CMD_CMPLT:
14731 		switch (SD_GET_PKT_STATUS(pktp)) {
14732 		case STATUS_GOOD:
14733 			/*
14734 			 * The command completed successfully with a non-zero
14735 			 * residual
14736 			 */
14737 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14738 			    "sdintr: STATUS_GOOD \n");
14739 			sd_pkt_status_good(un, bp, xp, pktp);
14740 			break;
14741 
14742 		case STATUS_CHECK:
14743 		case STATUS_TERMINATED:
14744 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14745 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
14746 			sd_pkt_status_check_condition(un, bp, xp, pktp);
14747 			break;
14748 
14749 		case STATUS_BUSY:
14750 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14751 			    "sdintr: STATUS_BUSY\n");
14752 			sd_pkt_status_busy(un, bp, xp, pktp);
14753 			break;
14754 
14755 		case STATUS_RESERVATION_CONFLICT:
14756 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14757 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
14758 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
14759 			break;
14760 
14761 		case STATUS_QFULL:
14762 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14763 			    "sdintr: STATUS_QFULL\n");
14764 			sd_pkt_status_qfull(un, bp, xp, pktp);
14765 			break;
14766 
14767 		case STATUS_MET:
14768 		case STATUS_INTERMEDIATE:
14769 		case STATUS_SCSI2:
14770 		case STATUS_INTERMEDIATE_MET:
14771 		case STATUS_ACA_ACTIVE:
14772 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
14773 			    "Unexpected SCSI status received: 0x%x\n",
14774 			    SD_GET_PKT_STATUS(pktp));
14775 			sd_return_failed_command(un, bp, EIO);
14776 			break;
14777 
14778 		default:
14779 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
14780 			    "Invalid SCSI status received: 0x%x\n",
14781 			    SD_GET_PKT_STATUS(pktp));
14782 			sd_return_failed_command(un, bp, EIO);
14783 			break;
14784 
14785 		}
14786 		break;
14787 
14788 	case CMD_INCOMPLETE:
14789 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14790 		    "sdintr:  CMD_INCOMPLETE\n");
14791 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
14792 		break;
14793 	case CMD_TRAN_ERR:
14794 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14795 		    "sdintr: CMD_TRAN_ERR\n");
14796 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
14797 		break;
14798 	case CMD_RESET:
14799 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14800 		    "sdintr: CMD_RESET \n");
14801 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
14802 		break;
14803 	case CMD_ABORTED:
14804 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14805 		    "sdintr: CMD_ABORTED \n");
14806 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
14807 		break;
14808 	case CMD_TIMEOUT:
14809 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14810 		    "sdintr: CMD_TIMEOUT\n");
14811 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
14812 		break;
14813 	case CMD_UNX_BUS_FREE:
14814 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14815 		    "sdintr: CMD_UNX_BUS_FREE \n");
14816 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
14817 		break;
14818 	case CMD_TAG_REJECT:
14819 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14820 		    "sdintr: CMD_TAG_REJECT\n");
14821 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
14822 		break;
14823 	default:
14824 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14825 		    "sdintr: default\n");
14826 		sd_pkt_reason_default(un, bp, xp, pktp);
14827 		break;
14828 	}
14829 
14830 exit:
14831 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
14832 
14833 	/* Decrement counter to indicate that the callback routine is done. */
14834 	un->un_in_callback--;
14835 	ASSERT(un->un_in_callback >= 0);
14836 
14837 	/*
14838 	 * At this point, the pkt has been dispatched, ie, it is either
14839 	 * being re-tried or has been returned to its caller and should
14840 	 * not be referenced.
14841 	 */
14842 
14843 	mutex_exit(SD_MUTEX(un));
14844 }
14845 
14846 
14847 /*
14848  *    Function: sd_print_incomplete_msg
14849  *
14850  * Description: Prints the error message for a CMD_INCOMPLETE error.
14851  *
14852  *   Arguments: un - ptr to associated softstate for the device.
14853  *		bp - ptr to the buf(9S) for the command.
14854  *		arg - message string ptr
14855  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
14856  *			or SD_NO_RETRY_ISSUED.
14857  *
14858  *     Context: May be called under interrupt context
14859  */
14860 
14861 static void
14862 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
14863 {
14864 	struct scsi_pkt	*pktp;
14865 	char	*msgp;
14866 	char	*cmdp = arg;
14867 
14868 	ASSERT(un != NULL);
14869 	ASSERT(mutex_owned(SD_MUTEX(un)));
14870 	ASSERT(bp != NULL);
14871 	ASSERT(arg != NULL);
14872 	pktp = SD_GET_PKTP(bp);
14873 	ASSERT(pktp != NULL);
14874 
14875 	switch (code) {
14876 	case SD_DELAYED_RETRY_ISSUED:
14877 	case SD_IMMEDIATE_RETRY_ISSUED:
14878 		msgp = "retrying";
14879 		break;
14880 	case SD_NO_RETRY_ISSUED:
14881 	default:
14882 		msgp = "giving up";
14883 		break;
14884 	}
14885 
14886 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
14887 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14888 		    "incomplete %s- %s\n", cmdp, msgp);
14889 	}
14890 }
14891 
14892 
14893 
14894 /*
14895  *    Function: sd_pkt_status_good
14896  *
14897  * Description: Processing for a STATUS_GOOD code in pkt_status.
14898  *
14899  *     Context: May be called under interrupt context
14900  */
14901 
14902 static void
14903 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
14904 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
14905 {
14906 	char	*cmdp;
14907 
14908 	ASSERT(un != NULL);
14909 	ASSERT(mutex_owned(SD_MUTEX(un)));
14910 	ASSERT(bp != NULL);
14911 	ASSERT(xp != NULL);
14912 	ASSERT(pktp != NULL);
14913 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
14914 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
14915 	ASSERT(pktp->pkt_resid != 0);
14916 
14917 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
14918 
14919 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
14920 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
14921 	case SCMD_READ:
14922 		cmdp = "read";
14923 		break;
14924 	case SCMD_WRITE:
14925 		cmdp = "write";
14926 		break;
14927 	default:
14928 		SD_UPDATE_B_RESID(bp, pktp);
14929 		sd_return_command(un, bp);
14930 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
14931 		return;
14932 	}
14933 
14934 	/*
14935 	 * See if we can retry the read/write, preferrably immediately.
14936 	 * If retries are exhaused, then sd_retry_command() will update
14937 	 * the b_resid count.
14938 	 */
14939 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
14940 	    cmdp, EIO, (clock_t)0, NULL);
14941 
14942 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
14943 }
14944 
14945 
14946 
14947 
14948 
14949 /*
14950  *    Function: sd_handle_request_sense
14951  *
14952  * Description: Processing for non-auto Request Sense command.
14953  *
14954  *   Arguments: un - ptr to associated softstate
14955  *		sense_bp - ptr to buf(9S) for the RQS command
14956  *		sense_xp - ptr to the sd_xbuf for the RQS command
14957  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
14958  *
14959  *     Context: May be called under interrupt context
14960  */
14961 
14962 static void
14963 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
14964 	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
14965 {
14966 	struct buf	*cmd_bp;	/* buf for the original command */
14967 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
14968 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
14969 
14970 	ASSERT(un != NULL);
14971 	ASSERT(mutex_owned(SD_MUTEX(un)));
14972 	ASSERT(sense_bp != NULL);
14973 	ASSERT(sense_xp != NULL);
14974 	ASSERT(sense_pktp != NULL);
14975 
14976 	/*
14977 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
14978 	 * RQS command and not the original command.
14979 	 */
14980 	ASSERT(sense_pktp == un->un_rqs_pktp);
14981 	ASSERT(sense_bp   == un->un_rqs_bp);
14982 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
14983 	    (FLAG_SENSING | FLAG_HEAD));
14984 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
14985 	    FLAG_SENSING) == FLAG_SENSING);
14986 
14987 	/* These are the bp, xp, and pktp for the original command */
14988 	cmd_bp = sense_xp->xb_sense_bp;
14989 	cmd_xp = SD_GET_XBUF(cmd_bp);
14990 	cmd_pktp = SD_GET_PKTP(cmd_bp);
14991 
14992 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
14993 		/*
14994 		 * The REQUEST SENSE command failed.  Release the REQUEST
14995 		 * SENSE command for re-use, get back the bp for the original
14996 		 * command, and attempt to re-try the original command if
14997 		 * FLAG_DIAGNOSE is not set in the original packet.
14998 		 */
14999 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
15000 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15001 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
15002 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
15003 			    NULL, NULL, EIO, (clock_t)0, NULL);
15004 			return;
15005 		}
15006 	}
15007 
15008 	/*
15009 	 * Save the relevant sense info into the xp for the original cmd.
15010 	 *
15011 	 * Note: if the request sense failed the state info will be zero
15012 	 * as set in sd_mark_rqs_busy()
15013 	 */
15014 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
15015 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
15016 	cmd_xp->xb_sense_resid  = sense_pktp->pkt_resid;
15017 	bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data, SENSE_LENGTH);
15018 
15019 	/*
15020 	 *  Free up the RQS command....
15021 	 *  NOTE:
15022 	 *	Must do this BEFORE calling sd_validate_sense_data!
15023 	 *	sd_validate_sense_data may return the original command in
15024 	 *	which case the pkt will be freed and the flags can no
15025 	 *	longer be touched.
15026 	 *	SD_MUTEX is held through this process until the command
15027 	 *	is dispatched based upon the sense data, so there are
15028 	 *	no race conditions.
15029 	 */
15030 	(void) sd_mark_rqs_idle(un, sense_xp);
15031 
15032 	/*
15033 	 * For a retryable command see if we have valid sense data, if so then
15034 	 * turn it over to sd_decode_sense() to figure out the right course of
15035 	 * action. Just fail a non-retryable command.
15036 	 */
15037 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15038 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp) ==
15039 		    SD_SENSE_DATA_IS_VALID) {
15040 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
15041 		}
15042 	} else {
15043 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
15044 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15045 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
15046 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15047 		sd_return_failed_command(un, cmd_bp, EIO);
15048 	}
15049 }
15050 
15051 
15052 
15053 
15054 /*
15055  *    Function: sd_handle_auto_request_sense
15056  *
15057  * Description: Processing for auto-request sense information.
15058  *
15059  *   Arguments: un - ptr to associated softstate
15060  *		bp - ptr to buf(9S) for the command
15061  *		xp - ptr to the sd_xbuf for the command
15062  *		pktp - ptr to the scsi_pkt(9S) for the command
15063  *
15064  *     Context: May be called under interrupt context
15065  */
15066 
15067 static void
15068 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
15069 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
15070 {
15071 	struct scsi_arq_status *asp;
15072 
15073 	ASSERT(un != NULL);
15074 	ASSERT(mutex_owned(SD_MUTEX(un)));
15075 	ASSERT(bp != NULL);
15076 	ASSERT(xp != NULL);
15077 	ASSERT(pktp != NULL);
15078 	ASSERT(pktp != un->un_rqs_pktp);
15079 	ASSERT(bp   != un->un_rqs_bp);
15080 
15081 	/*
15082 	 * For auto-request sense, we get a scsi_arq_status back from
15083 	 * the HBA, with the sense data in the sts_sensedata member.
15084 	 * The pkt_scbp of the packet points to this scsi_arq_status.
15085 	 */
15086 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
15087 
15088 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
15089 		/*
15090 		 * The auto REQUEST SENSE failed; see if we can re-try
15091 		 * the original command.
15092 		 */
15093 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15094 		    "auto request sense failed (reason=%s)\n",
15095 		    scsi_rname(asp->sts_rqpkt_reason));
15096 
15097 		sd_reset_target(un, pktp);
15098 
15099 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15100 		    NULL, NULL, EIO, (clock_t)0, NULL);
15101 		return;
15102 	}
15103 
15104 	/* Save the relevant sense info into the xp for the original cmd. */
15105 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
15106 	xp->xb_sense_state  = asp->sts_rqpkt_state;
15107 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
15108 	bcopy(&asp->sts_sensedata, xp->xb_sense_data,
15109 	    min(sizeof (struct scsi_extended_sense), SENSE_LENGTH));
15110 
15111 	/*
15112 	 * See if we have valid sense data, if so then turn it over to
15113 	 * sd_decode_sense() to figure out the right course of action.
15114 	 */
15115 	if (sd_validate_sense_data(un, bp, xp) == SD_SENSE_DATA_IS_VALID) {
15116 		sd_decode_sense(un, bp, xp, pktp);
15117 	}
15118 }
15119 
15120 
15121 /*
15122  *    Function: sd_print_sense_failed_msg
15123  *
15124  * Description: Print log message when RQS has failed.
15125  *
15126  *   Arguments: un - ptr to associated softstate
15127  *		bp - ptr to buf(9S) for the command
15128  *		arg - generic message string ptr
15129  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
15130  *			or SD_NO_RETRY_ISSUED
15131  *
15132  *     Context: May be called from interrupt context
15133  */
15134 
15135 static void
15136 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
15137 	int code)
15138 {
15139 	char	*msgp = arg;
15140 
15141 	ASSERT(un != NULL);
15142 	ASSERT(mutex_owned(SD_MUTEX(un)));
15143 	ASSERT(bp != NULL);
15144 
15145 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
15146 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
15147 	}
15148 }
15149 
15150 
15151 /*
15152  *    Function: sd_validate_sense_data
15153  *
15154  * Description: Check the given sense data for validity.
15155  *		If the sense data is not valid, the command will
15156  *		be either failed or retried!
15157  *
15158  * Return Code: SD_SENSE_DATA_IS_INVALID
15159  *		SD_SENSE_DATA_IS_VALID
15160  *
15161  *     Context: May be called from interrupt context
15162  */
15163 
15164 static int
15165 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp)
15166 {
15167 	struct scsi_extended_sense *esp;
15168 	struct	scsi_pkt *pktp;
15169 	size_t	actual_len;
15170 	char	*msgp = NULL;
15171 
15172 	ASSERT(un != NULL);
15173 	ASSERT(mutex_owned(SD_MUTEX(un)));
15174 	ASSERT(bp != NULL);
15175 	ASSERT(bp != un->un_rqs_bp);
15176 	ASSERT(xp != NULL);
15177 
15178 	pktp = SD_GET_PKTP(bp);
15179 	ASSERT(pktp != NULL);
15180 
15181 	/*
15182 	 * Check the status of the RQS command (auto or manual).
15183 	 */
15184 	switch (xp->xb_sense_status & STATUS_MASK) {
15185 	case STATUS_GOOD:
15186 		break;
15187 
15188 	case STATUS_RESERVATION_CONFLICT:
15189 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
15190 		return (SD_SENSE_DATA_IS_INVALID);
15191 
15192 	case STATUS_BUSY:
15193 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15194 		    "Busy Status on REQUEST SENSE\n");
15195 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
15196 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
15197 		return (SD_SENSE_DATA_IS_INVALID);
15198 
15199 	case STATUS_QFULL:
15200 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15201 		    "QFULL Status on REQUEST SENSE\n");
15202 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
15203 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
15204 		return (SD_SENSE_DATA_IS_INVALID);
15205 
15206 	case STATUS_CHECK:
15207 	case STATUS_TERMINATED:
15208 		msgp = "Check Condition on REQUEST SENSE\n";
15209 		goto sense_failed;
15210 
15211 	default:
15212 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
15213 		goto sense_failed;
15214 	}
15215 
15216 	/*
15217 	 * See if we got the minimum required amount of sense data.
15218 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
15219 	 * or less.
15220 	 */
15221 	actual_len = (int)(SENSE_LENGTH - xp->xb_sense_resid);
15222 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
15223 	    (actual_len == 0)) {
15224 		msgp = "Request Sense couldn't get sense data\n";
15225 		goto sense_failed;
15226 	}
15227 
15228 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
15229 		msgp = "Not enough sense information\n";
15230 		goto sense_failed;
15231 	}
15232 
15233 	/*
15234 	 * We require the extended sense data
15235 	 */
15236 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
15237 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
15238 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
15239 			static char tmp[8];
15240 			static char buf[148];
15241 			char *p = (char *)(xp->xb_sense_data);
15242 			int i;
15243 
15244 			mutex_enter(&sd_sense_mutex);
15245 			(void) strcpy(buf, "undecodable sense information:");
15246 			for (i = 0; i < actual_len; i++) {
15247 				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
15248 				(void) strcpy(&buf[strlen(buf)], tmp);
15249 			}
15250 			i = strlen(buf);
15251 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
15252 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, buf);
15253 			mutex_exit(&sd_sense_mutex);
15254 		}
15255 		/* Note: Legacy behavior, fail the command with no retry */
15256 		sd_return_failed_command(un, bp, EIO);
15257 		return (SD_SENSE_DATA_IS_INVALID);
15258 	}
15259 
15260 	/*
15261 	 * Check that es_code is valid (es_class concatenated with es_code
15262 	 * make up the "response code" field.  es_class will always be 7, so
15263 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
15264 	 * format.
15265 	 */
15266 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
15267 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
15268 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
15269 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
15270 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
15271 		goto sense_failed;
15272 	}
15273 
15274 	return (SD_SENSE_DATA_IS_VALID);
15275 
15276 sense_failed:
15277 	/*
15278 	 * If the request sense failed (for whatever reason), attempt
15279 	 * to retry the original command.
15280 	 */
15281 #if defined(__i386) || defined(__amd64)
15282 	/*
15283 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
15284 	 * sddef.h for Sparc platform, and x86 uses 1 binary
15285 	 * for both SCSI/FC.
15286 	 * The SD_RETRY_DELAY value need to be adjusted here
15287 	 * when SD_RETRY_DELAY change in sddef.h
15288 	 */
15289 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15290 	    sd_print_sense_failed_msg, msgp, EIO,
15291 		un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
15292 #else
15293 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15294 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
15295 #endif
15296 
15297 	return (SD_SENSE_DATA_IS_INVALID);
15298 }
15299 
15300 
15301 
15302 /*
15303  *    Function: sd_decode_sense
15304  *
15305  * Description: Take recovery action(s) when SCSI Sense Data is received.
15306  *
15307  *     Context: Interrupt context.
15308  */
15309 
15310 static void
15311 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
15312 	struct scsi_pkt *pktp)
15313 {
15314 	uint8_t sense_key;
15315 
15316 	ASSERT(un != NULL);
15317 	ASSERT(mutex_owned(SD_MUTEX(un)));
15318 	ASSERT(bp != NULL);
15319 	ASSERT(bp != un->un_rqs_bp);
15320 	ASSERT(xp != NULL);
15321 	ASSERT(pktp != NULL);
15322 
15323 	sense_key = scsi_sense_key(xp->xb_sense_data);
15324 
15325 	switch (sense_key) {
15326 	case KEY_NO_SENSE:
15327 		sd_sense_key_no_sense(un, bp, xp, pktp);
15328 		break;
15329 	case KEY_RECOVERABLE_ERROR:
15330 		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
15331 		    bp, xp, pktp);
15332 		break;
15333 	case KEY_NOT_READY:
15334 		sd_sense_key_not_ready(un, xp->xb_sense_data,
15335 		    bp, xp, pktp);
15336 		break;
15337 	case KEY_MEDIUM_ERROR:
15338 	case KEY_HARDWARE_ERROR:
15339 		sd_sense_key_medium_or_hardware_error(un,
15340 		    xp->xb_sense_data, bp, xp, pktp);
15341 		break;
15342 	case KEY_ILLEGAL_REQUEST:
15343 		sd_sense_key_illegal_request(un, bp, xp, pktp);
15344 		break;
15345 	case KEY_UNIT_ATTENTION:
15346 		sd_sense_key_unit_attention(un, xp->xb_sense_data,
15347 		    bp, xp, pktp);
15348 		break;
15349 	case KEY_WRITE_PROTECT:
15350 	case KEY_VOLUME_OVERFLOW:
15351 	case KEY_MISCOMPARE:
15352 		sd_sense_key_fail_command(un, bp, xp, pktp);
15353 		break;
15354 	case KEY_BLANK_CHECK:
15355 		sd_sense_key_blank_check(un, bp, xp, pktp);
15356 		break;
15357 	case KEY_ABORTED_COMMAND:
15358 		sd_sense_key_aborted_command(un, bp, xp, pktp);
15359 		break;
15360 	case KEY_VENDOR_UNIQUE:
15361 	case KEY_COPY_ABORTED:
15362 	case KEY_EQUAL:
15363 	case KEY_RESERVED:
15364 	default:
15365 		sd_sense_key_default(un, xp->xb_sense_data,
15366 		    bp, xp, pktp);
15367 		break;
15368 	}
15369 }
15370 
15371 
15372 /*
15373  *    Function: sd_dump_memory
15374  *
15375  * Description: Debug logging routine to print the contents of a user provided
15376  *		buffer. The output of the buffer is broken up into 256 byte
15377  *		segments due to a size constraint of the scsi_log.
15378  *		implementation.
15379  *
15380  *   Arguments: un - ptr to softstate
15381  *		comp - component mask
15382  *		title - "title" string to preceed data when printed
15383  *		data - ptr to data block to be printed
15384  *		len - size of data block to be printed
15385  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
15386  *
15387  *     Context: May be called from interrupt context
15388  */
15389 
15390 #define	SD_DUMP_MEMORY_BUF_SIZE	256
15391 
15392 static char *sd_dump_format_string[] = {
15393 		" 0x%02x",
15394 		" %c"
15395 };
15396 
15397 static void
15398 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
15399     int len, int fmt)
15400 {
15401 	int	i, j;
15402 	int	avail_count;
15403 	int	start_offset;
15404 	int	end_offset;
15405 	size_t	entry_len;
15406 	char	*bufp;
15407 	char	*local_buf;
15408 	char	*format_string;
15409 
15410 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
15411 
15412 	/*
15413 	 * In the debug version of the driver, this function is called from a
15414 	 * number of places which are NOPs in the release driver.
15415 	 * The debug driver therefore has additional methods of filtering
15416 	 * debug output.
15417 	 */
15418 #ifdef SDDEBUG
15419 	/*
15420 	 * In the debug version of the driver we can reduce the amount of debug
15421 	 * messages by setting sd_error_level to something other than
15422 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
15423 	 * sd_component_mask.
15424 	 */
15425 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
15426 	    (sd_error_level != SCSI_ERR_ALL)) {
15427 		return;
15428 	}
15429 	if (((sd_component_mask & comp) == 0) ||
15430 	    (sd_error_level != SCSI_ERR_ALL)) {
15431 		return;
15432 	}
15433 #else
15434 	if (sd_error_level != SCSI_ERR_ALL) {
15435 		return;
15436 	}
15437 #endif
15438 
15439 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
15440 	bufp = local_buf;
15441 	/*
15442 	 * Available length is the length of local_buf[], minus the
15443 	 * length of the title string, minus one for the ":", minus
15444 	 * one for the newline, minus one for the NULL terminator.
15445 	 * This gives the #bytes available for holding the printed
15446 	 * values from the given data buffer.
15447 	 */
15448 	if (fmt == SD_LOG_HEX) {
15449 		format_string = sd_dump_format_string[0];
15450 	} else /* SD_LOG_CHAR */ {
15451 		format_string = sd_dump_format_string[1];
15452 	}
15453 	/*
15454 	 * Available count is the number of elements from the given
15455 	 * data buffer that we can fit into the available length.
15456 	 * This is based upon the size of the format string used.
15457 	 * Make one entry and find it's size.
15458 	 */
15459 	(void) sprintf(bufp, format_string, data[0]);
15460 	entry_len = strlen(bufp);
15461 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
15462 
15463 	j = 0;
15464 	while (j < len) {
15465 		bufp = local_buf;
15466 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
15467 		start_offset = j;
15468 
15469 		end_offset = start_offset + avail_count;
15470 
15471 		(void) sprintf(bufp, "%s:", title);
15472 		bufp += strlen(bufp);
15473 		for (i = start_offset; ((i < end_offset) && (j < len));
15474 		    i++, j++) {
15475 			(void) sprintf(bufp, format_string, data[i]);
15476 			bufp += entry_len;
15477 		}
15478 		(void) sprintf(bufp, "\n");
15479 
15480 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
15481 	}
15482 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
15483 }
15484 
15485 /*
15486  *    Function: sd_print_sense_msg
15487  *
15488  * Description: Log a message based upon the given sense data.
15489  *
15490  *   Arguments: un - ptr to associated softstate
15491  *		bp - ptr to buf(9S) for the command
15492  *		arg - ptr to associate sd_sense_info struct
15493  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
15494  *			or SD_NO_RETRY_ISSUED
15495  *
15496  *     Context: May be called from interrupt context
15497  */
15498 
15499 static void
15500 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
15501 {
15502 	struct sd_xbuf	*xp;
15503 	struct scsi_pkt	*pktp;
15504 	uint8_t *sensep;
15505 	daddr_t request_blkno;
15506 	diskaddr_t err_blkno;
15507 	int severity;
15508 	int pfa_flag;
15509 	extern struct scsi_key_strings scsi_cmds[];
15510 
15511 	ASSERT(un != NULL);
15512 	ASSERT(mutex_owned(SD_MUTEX(un)));
15513 	ASSERT(bp != NULL);
15514 	xp = SD_GET_XBUF(bp);
15515 	ASSERT(xp != NULL);
15516 	pktp = SD_GET_PKTP(bp);
15517 	ASSERT(pktp != NULL);
15518 	ASSERT(arg != NULL);
15519 
15520 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
15521 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
15522 
15523 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
15524 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
15525 		severity = SCSI_ERR_RETRYABLE;
15526 	}
15527 
15528 	/* Use absolute block number for the request block number */
15529 	request_blkno = xp->xb_blkno;
15530 
15531 	/*
15532 	 * Now try to get the error block number from the sense data
15533 	 */
15534 	sensep = xp->xb_sense_data;
15535 
15536 	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
15537 		(uint64_t *)&err_blkno)) {
15538 		/*
15539 		 * We retrieved the error block number from the information
15540 		 * portion of the sense data.
15541 		 *
15542 		 * For USCSI commands we are better off using the error
15543 		 * block no. as the requested block no. (This is the best
15544 		 * we can estimate.)
15545 		 */
15546 		if ((SD_IS_BUFIO(xp) == FALSE) &&
15547 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
15548 			request_blkno = err_blkno;
15549 		}
15550 	} else {
15551 		/*
15552 		 * Without the es_valid bit set (for fixed format) or an
15553 		 * information descriptor (for descriptor format) we cannot
15554 		 * be certain of the error blkno, so just use the
15555 		 * request_blkno.
15556 		 */
15557 		err_blkno = (diskaddr_t)request_blkno;
15558 	}
15559 
15560 	/*
15561 	 * The following will log the buffer contents for the release driver
15562 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
15563 	 * level is set to verbose.
15564 	 */
15565 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
15566 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15567 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15568 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
15569 
15570 	if (pfa_flag == FALSE) {
15571 		/* This is normally only set for USCSI */
15572 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
15573 			return;
15574 		}
15575 
15576 		if ((SD_IS_BUFIO(xp) == TRUE) &&
15577 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
15578 		    (severity < sd_error_level))) {
15579 			return;
15580 		}
15581 	}
15582 
15583 	/*
15584 	 * Check for Sonoma Failover and keep a count of how many failed I/O's
15585 	 */
15586 	if ((SD_IS_LSI(un)) &&
15587 	    (scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) &&
15588 	    (scsi_sense_asc(sensep) == 0x94) &&
15589 	    (scsi_sense_ascq(sensep) == 0x01)) {
15590 		un->un_sonoma_failure_count++;
15591 		if (un->un_sonoma_failure_count > 1) {
15592 			return;
15593 		}
15594 	}
15595 
15596 	scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
15597 	    request_blkno, err_blkno, scsi_cmds,
15598 	    (struct scsi_extended_sense *)sensep,
15599 	    un->un_additional_codes, NULL);
15600 }
15601 
15602 /*
15603  *    Function: sd_sense_key_no_sense
15604  *
15605  * Description: Recovery action when sense data was not received.
15606  *
15607  *     Context: May be called from interrupt context
15608  */
15609 
15610 static void
15611 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
15612 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
15613 {
15614 	struct sd_sense_info	si;
15615 
15616 	ASSERT(un != NULL);
15617 	ASSERT(mutex_owned(SD_MUTEX(un)));
15618 	ASSERT(bp != NULL);
15619 	ASSERT(xp != NULL);
15620 	ASSERT(pktp != NULL);
15621 
15622 	si.ssi_severity = SCSI_ERR_FATAL;
15623 	si.ssi_pfa_flag = FALSE;
15624 
15625 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
15626 
15627 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
15628 		&si, EIO, (clock_t)0, NULL);
15629 }
15630 
15631 
15632 /*
15633  *    Function: sd_sense_key_recoverable_error
15634  *
15635  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
15636  *
15637  *     Context: May be called from interrupt context
15638  */
15639 
15640 static void
15641 sd_sense_key_recoverable_error(struct sd_lun *un,
15642 	uint8_t *sense_datap,
15643 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
15644 {
15645 	struct sd_sense_info	si;
15646 	uint8_t asc = scsi_sense_asc(sense_datap);
15647 
15648 	ASSERT(un != NULL);
15649 	ASSERT(mutex_owned(SD_MUTEX(un)));
15650 	ASSERT(bp != NULL);
15651 	ASSERT(xp != NULL);
15652 	ASSERT(pktp != NULL);
15653 
15654 	/*
15655 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
15656 	 */
15657 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
15658 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
15659 		si.ssi_severity = SCSI_ERR_INFO;
15660 		si.ssi_pfa_flag = TRUE;
15661 	} else {
15662 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
15663 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
15664 		si.ssi_severity = SCSI_ERR_RECOVERED;
15665 		si.ssi_pfa_flag = FALSE;
15666 	}
15667 
15668 	if (pktp->pkt_resid == 0) {
15669 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
15670 		sd_return_command(un, bp);
15671 		return;
15672 	}
15673 
15674 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
15675 	    &si, EIO, (clock_t)0, NULL);
15676 }
15677 
15678 
15679 
15680 
15681 /*
15682  *    Function: sd_sense_key_not_ready
15683  *
15684  * Description: Recovery actions for a SCSI "Not Ready" sense key.
15685  *
15686  *     Context: May be called from interrupt context
15687  */
15688 
15689 static void
15690 sd_sense_key_not_ready(struct sd_lun *un,
15691 	uint8_t *sense_datap,
15692 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
15693 {
15694 	struct sd_sense_info	si;
15695 	uint8_t asc = scsi_sense_asc(sense_datap);
15696 	uint8_t ascq = scsi_sense_ascq(sense_datap);
15697 
15698 	ASSERT(un != NULL);
15699 	ASSERT(mutex_owned(SD_MUTEX(un)));
15700 	ASSERT(bp != NULL);
15701 	ASSERT(xp != NULL);
15702 	ASSERT(pktp != NULL);
15703 
15704 	si.ssi_severity = SCSI_ERR_FATAL;
15705 	si.ssi_pfa_flag = FALSE;
15706 
15707 	/*
15708 	 * Update error stats after first NOT READY error. Disks may have
15709 	 * been powered down and may need to be restarted.  For CDROMs,
15710 	 * report NOT READY errors only if media is present.
15711 	 */
15712 	if ((ISCD(un) && (asc == 0x3A)) ||
15713 	    (xp->xb_retry_count > 0)) {
15714 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
15715 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
15716 	}
15717 
15718 	/*
15719 	 * Just fail if the "not ready" retry limit has been reached.
15720 	 */
15721 	if (xp->xb_retry_count >= un->un_notready_retry_count) {
15722 		/* Special check for error message printing for removables. */
15723 		if (un->un_f_has_removable_media && (asc == 0x04) &&
15724 		    (ascq >= 0x04)) {
15725 			si.ssi_severity = SCSI_ERR_ALL;
15726 		}
15727 		goto fail_command;
15728 	}
15729 
15730 	/*
15731 	 * Check the ASC and ASCQ in the sense data as needed, to determine
15732 	 * what to do.
15733 	 */
15734 	switch (asc) {
15735 	case 0x04:	/* LOGICAL UNIT NOT READY */
15736 		/*
15737 		 * disk drives that don't spin up result in a very long delay
15738 		 * in format without warning messages. We will log a message
15739 		 * if the error level is set to verbose.
15740 		 */
15741 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
15742 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15743 			    "logical unit not ready, resetting disk\n");
15744 		}
15745 
15746 		/*
15747 		 * There are different requirements for CDROMs and disks for
15748 		 * the number of retries.  If a CD-ROM is giving this, it is
15749 		 * probably reading TOC and is in the process of getting
15750 		 * ready, so we should keep on trying for a long time to make
15751 		 * sure that all types of media are taken in account (for
15752 		 * some media the drive takes a long time to read TOC).  For
15753 		 * disks we do not want to retry this too many times as this
15754 		 * can cause a long hang in format when the drive refuses to
15755 		 * spin up (a very common failure).
15756 		 */
15757 		switch (ascq) {
15758 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
15759 			/*
15760 			 * Disk drives frequently refuse to spin up which
15761 			 * results in a very long hang in format without
15762 			 * warning messages.
15763 			 *
15764 			 * Note: This code preserves the legacy behavior of
15765 			 * comparing xb_retry_count against zero for fibre
15766 			 * channel targets instead of comparing against the
15767 			 * un_reset_retry_count value.  The reason for this
15768 			 * discrepancy has been so utterly lost beneath the
15769 			 * Sands of Time that even Indiana Jones could not
15770 			 * find it.
15771 			 */
15772 			if (un->un_f_is_fibre == TRUE) {
15773 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
15774 					(xp->xb_retry_count > 0)) &&
15775 					(un->un_startstop_timeid == NULL)) {
15776 					scsi_log(SD_DEVINFO(un), sd_label,
15777 					CE_WARN, "logical unit not ready, "
15778 					"resetting disk\n");
15779 					sd_reset_target(un, pktp);
15780 				}
15781 			} else {
15782 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
15783 					(xp->xb_retry_count >
15784 					un->un_reset_retry_count)) &&
15785 					(un->un_startstop_timeid == NULL)) {
15786 					scsi_log(SD_DEVINFO(un), sd_label,
15787 					CE_WARN, "logical unit not ready, "
15788 					"resetting disk\n");
15789 					sd_reset_target(un, pktp);
15790 				}
15791 			}
15792 			break;
15793 
15794 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
15795 			/*
15796 			 * If the target is in the process of becoming
15797 			 * ready, just proceed with the retry. This can
15798 			 * happen with CD-ROMs that take a long time to
15799 			 * read TOC after a power cycle or reset.
15800 			 */
15801 			goto do_retry;
15802 
15803 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
15804 			break;
15805 
15806 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
15807 			/*
15808 			 * Retries cannot help here so just fail right away.
15809 			 */
15810 			goto fail_command;
15811 
15812 		case 0x88:
15813 			/*
15814 			 * Vendor-unique code for T3/T4: it indicates a
15815 			 * path problem in a mutipathed config, but as far as
15816 			 * the target driver is concerned it equates to a fatal
15817 			 * error, so we should just fail the command right away
15818 			 * (without printing anything to the console). If this
15819 			 * is not a T3/T4, fall thru to the default recovery
15820 			 * action.
15821 			 * T3/T4 is FC only, don't need to check is_fibre
15822 			 */
15823 			if (SD_IS_T3(un) || SD_IS_T4(un)) {
15824 				sd_return_failed_command(un, bp, EIO);
15825 				return;
15826 			}
15827 			/* FALLTHRU */
15828 
15829 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
15830 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
15831 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
15832 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
15833 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
15834 		default:    /* Possible future codes in SCSI spec? */
15835 			/*
15836 			 * For removable-media devices, do not retry if
15837 			 * ASCQ > 2 as these result mostly from USCSI commands
15838 			 * on MMC devices issued to check status of an
15839 			 * operation initiated in immediate mode.  Also for
15840 			 * ASCQ >= 4 do not print console messages as these
15841 			 * mainly represent a user-initiated operation
15842 			 * instead of a system failure.
15843 			 */
15844 			if (un->un_f_has_removable_media) {
15845 				si.ssi_severity = SCSI_ERR_ALL;
15846 				goto fail_command;
15847 			}
15848 			break;
15849 		}
15850 
15851 		/*
15852 		 * As part of our recovery attempt for the NOT READY
15853 		 * condition, we issue a START STOP UNIT command. However
15854 		 * we want to wait for a short delay before attempting this
15855 		 * as there may still be more commands coming back from the
15856 		 * target with the check condition. To do this we use
15857 		 * timeout(9F) to call sd_start_stop_unit_callback() after
15858 		 * the delay interval expires. (sd_start_stop_unit_callback()
15859 		 * dispatches sd_start_stop_unit_task(), which will issue
15860 		 * the actual START STOP UNIT command. The delay interval
15861 		 * is one-half of the delay that we will use to retry the
15862 		 * command that generated the NOT READY condition.
15863 		 *
15864 		 * Note that we could just dispatch sd_start_stop_unit_task()
15865 		 * from here and allow it to sleep for the delay interval,
15866 		 * but then we would be tying up the taskq thread
15867 		 * uncesessarily for the duration of the delay.
15868 		 *
15869 		 * Do not issue the START STOP UNIT if the current command
15870 		 * is already a START STOP UNIT.
15871 		 */
15872 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
15873 			break;
15874 		}
15875 
15876 		/*
15877 		 * Do not schedule the timeout if one is already pending.
15878 		 */
15879 		if (un->un_startstop_timeid != NULL) {
15880 			SD_INFO(SD_LOG_ERROR, un,
15881 			    "sd_sense_key_not_ready: restart already issued to"
15882 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
15883 			    ddi_get_instance(SD_DEVINFO(un)));
15884 			break;
15885 		}
15886 
15887 		/*
15888 		 * Schedule the START STOP UNIT command, then queue the command
15889 		 * for a retry.
15890 		 *
15891 		 * Note: A timeout is not scheduled for this retry because we
15892 		 * want the retry to be serial with the START_STOP_UNIT. The
15893 		 * retry will be started when the START_STOP_UNIT is completed
15894 		 * in sd_start_stop_unit_task.
15895 		 */
15896 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
15897 		    un, SD_BSY_TIMEOUT / 2);
15898 		xp->xb_retry_count++;
15899 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
15900 		return;
15901 
15902 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
15903 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
15904 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15905 			    "unit does not respond to selection\n");
15906 		}
15907 		break;
15908 
15909 	case 0x3A:	/* MEDIUM NOT PRESENT */
15910 		if (sd_error_level >= SCSI_ERR_FATAL) {
15911 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15912 			    "Caddy not inserted in drive\n");
15913 		}
15914 
15915 		sr_ejected(un);
15916 		un->un_mediastate = DKIO_EJECTED;
15917 		/* The state has changed, inform the media watch routines */
15918 		cv_broadcast(&un->un_state_cv);
15919 		/* Just fail if no media is present in the drive. */
15920 		goto fail_command;
15921 
15922 	default:
15923 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
15924 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15925 			    "Unit not Ready. Additional sense code 0x%x\n",
15926 			    asc);
15927 		}
15928 		break;
15929 	}
15930 
15931 do_retry:
15932 
15933 	/*
15934 	 * Retry the command, as some targets may report NOT READY for
15935 	 * several seconds after being reset.
15936 	 */
15937 	xp->xb_retry_count++;
15938 	si.ssi_severity = SCSI_ERR_RETRYABLE;
15939 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
15940 	    &si, EIO, SD_BSY_TIMEOUT, NULL);
15941 
15942 	return;
15943 
15944 fail_command:
15945 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
15946 	sd_return_failed_command(un, bp, EIO);
15947 }
15948 
15949 
15950 
15951 /*
15952  *    Function: sd_sense_key_medium_or_hardware_error
15953  *
15954  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
15955  *		sense key.
15956  *
15957  *     Context: May be called from interrupt context
15958  */
15959 
15960 static void
15961 sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
15962 	uint8_t *sense_datap,
15963 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
15964 {
15965 	struct sd_sense_info	si;
15966 	uint8_t sense_key = scsi_sense_key(sense_datap);
15967 	uint8_t asc = scsi_sense_asc(sense_datap);
15968 
15969 	ASSERT(un != NULL);
15970 	ASSERT(mutex_owned(SD_MUTEX(un)));
15971 	ASSERT(bp != NULL);
15972 	ASSERT(xp != NULL);
15973 	ASSERT(pktp != NULL);
15974 
15975 	si.ssi_severity = SCSI_ERR_FATAL;
15976 	si.ssi_pfa_flag = FALSE;
15977 
15978 	if (sense_key == KEY_MEDIUM_ERROR) {
15979 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
15980 	}
15981 
15982 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
15983 
15984 	if ((un->un_reset_retry_count != 0) &&
15985 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
15986 		mutex_exit(SD_MUTEX(un));
15987 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
15988 		if (un->un_f_allow_bus_device_reset == TRUE) {
15989 
15990 			boolean_t try_resetting_target = B_TRUE;
15991 
15992 			/*
15993 			 * We need to be able to handle specific ASC when we are
15994 			 * handling a KEY_HARDWARE_ERROR. In particular
15995 			 * taking the default action of resetting the target may
15996 			 * not be the appropriate way to attempt recovery.
15997 			 * Resetting a target because of a single LUN failure
15998 			 * victimizes all LUNs on that target.
15999 			 *
16000 			 * This is true for the LSI arrays, if an LSI
16001 			 * array controller returns an ASC of 0x84 (LUN Dead) we
16002 			 * should trust it.
16003 			 */
16004 
16005 			if (sense_key == KEY_HARDWARE_ERROR) {
16006 				switch (asc) {
16007 				case 0x84:
16008 					if (SD_IS_LSI(un)) {
16009 						try_resetting_target = B_FALSE;
16010 					}
16011 					break;
16012 				default:
16013 					break;
16014 				}
16015 			}
16016 
16017 			if (try_resetting_target == B_TRUE) {
16018 				int reset_retval = 0;
16019 				if (un->un_f_lun_reset_enabled == TRUE) {
16020 					SD_TRACE(SD_LOG_IO_CORE, un,
16021 					    "sd_sense_key_medium_or_hardware_"
16022 					    "error: issuing RESET_LUN\n");
16023 					reset_retval =
16024 					    scsi_reset(SD_ADDRESS(un),
16025 					    RESET_LUN);
16026 				}
16027 				if (reset_retval == 0) {
16028 					SD_TRACE(SD_LOG_IO_CORE, un,
16029 					    "sd_sense_key_medium_or_hardware_"
16030 					    "error: issuing RESET_TARGET\n");
16031 					(void) scsi_reset(SD_ADDRESS(un),
16032 					    RESET_TARGET);
16033 				}
16034 			}
16035 		}
16036 		mutex_enter(SD_MUTEX(un));
16037 	}
16038 
16039 	/*
16040 	 * This really ought to be a fatal error, but we will retry anyway
16041 	 * as some drives report this as a spurious error.
16042 	 */
16043 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
16044 	    &si, EIO, (clock_t)0, NULL);
16045 }
16046 
16047 
16048 
16049 /*
16050  *    Function: sd_sense_key_illegal_request
16051  *
16052  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
16053  *
16054  *     Context: May be called from interrupt context
16055  */
16056 
16057 static void
16058 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
16059 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16060 {
16061 	struct sd_sense_info	si;
16062 
16063 	ASSERT(un != NULL);
16064 	ASSERT(mutex_owned(SD_MUTEX(un)));
16065 	ASSERT(bp != NULL);
16066 	ASSERT(xp != NULL);
16067 	ASSERT(pktp != NULL);
16068 
16069 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
16070 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
16071 
16072 	si.ssi_severity = SCSI_ERR_INFO;
16073 	si.ssi_pfa_flag = FALSE;
16074 
16075 	/* Pointless to retry if the target thinks it's an illegal request */
16076 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16077 	sd_return_failed_command(un, bp, EIO);
16078 }
16079 
16080 
16081 
16082 
16083 /*
16084  *    Function: sd_sense_key_unit_attention
16085  *
16086  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
16087  *
16088  *     Context: May be called from interrupt context
16089  */
16090 
16091 static void
16092 sd_sense_key_unit_attention(struct sd_lun *un,
16093 	uint8_t *sense_datap,
16094 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
16095 {
16096 	/*
16097 	 * For UNIT ATTENTION we allow retries for one minute. Devices
16098 	 * like Sonoma can return UNIT ATTENTION close to a minute
16099 	 * under certain conditions.
16100 	 */
16101 	int	retry_check_flag = SD_RETRIES_UA;
16102 	boolean_t	kstat_updated = B_FALSE;
16103 	struct	sd_sense_info		si;
16104 	uint8_t asc = scsi_sense_asc(sense_datap);
16105 
16106 	ASSERT(un != NULL);
16107 	ASSERT(mutex_owned(SD_MUTEX(un)));
16108 	ASSERT(bp != NULL);
16109 	ASSERT(xp != NULL);
16110 	ASSERT(pktp != NULL);
16111 
16112 	si.ssi_severity = SCSI_ERR_INFO;
16113 	si.ssi_pfa_flag = FALSE;
16114 
16115 
16116 	switch (asc) {
16117 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
16118 		if (sd_report_pfa != 0) {
16119 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
16120 			si.ssi_pfa_flag = TRUE;
16121 			retry_check_flag = SD_RETRIES_STANDARD;
16122 			goto do_retry;
16123 		}
16124 
16125 		break;
16126 
16127 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
16128 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
16129 			un->un_resvd_status |=
16130 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
16131 		}
16132 #ifdef _LP64
16133 		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
16134 			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
16135 			    un, KM_NOSLEEP) == 0) {
16136 				/*
16137 				 * If we can't dispatch the task we'll just
16138 				 * live without descriptor sense.  We can
16139 				 * try again on the next "unit attention"
16140 				 */
16141 				SD_ERROR(SD_LOG_ERROR, un,
16142 				    "sd_sense_key_unit_attention: "
16143 				    "Could not dispatch "
16144 				    "sd_reenable_dsense_task\n");
16145 			}
16146 		}
16147 #endif /* _LP64 */
16148 		/* FALLTHRU */
16149 
16150 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
16151 		if (!un->un_f_has_removable_media) {
16152 			break;
16153 		}
16154 
16155 		/*
16156 		 * When we get a unit attention from a removable-media device,
16157 		 * it may be in a state that will take a long time to recover
16158 		 * (e.g., from a reset).  Since we are executing in interrupt
16159 		 * context here, we cannot wait around for the device to come
16160 		 * back. So hand this command off to sd_media_change_task()
16161 		 * for deferred processing under taskq thread context. (Note
16162 		 * that the command still may be failed if a problem is
16163 		 * encountered at a later time.)
16164 		 */
16165 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
16166 		    KM_NOSLEEP) == 0) {
16167 			/*
16168 			 * Cannot dispatch the request so fail the command.
16169 			 */
16170 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
16171 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
16172 			si.ssi_severity = SCSI_ERR_FATAL;
16173 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16174 			sd_return_failed_command(un, bp, EIO);
16175 		}
16176 
16177 		/*
16178 		 * If failed to dispatch sd_media_change_task(), we already
16179 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
16180 		 * we should update kstat later if it encounters an error. So,
16181 		 * we update kstat_updated flag here.
16182 		 */
16183 		kstat_updated = B_TRUE;
16184 
16185 		/*
16186 		 * Either the command has been successfully dispatched to a
16187 		 * task Q for retrying, or the dispatch failed. In either case
16188 		 * do NOT retry again by calling sd_retry_command. This sets up
16189 		 * two retries of the same command and when one completes and
16190 		 * frees the resources the other will access freed memory,
16191 		 * a bad thing.
16192 		 */
16193 		return;
16194 
16195 	default:
16196 		break;
16197 	}
16198 
16199 	/*
16200 	 * Update kstat if we haven't done that.
16201 	 */
16202 	if (!kstat_updated) {
16203 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16204 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
16205 	}
16206 
16207 do_retry:
16208 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
16209 	    EIO, SD_UA_RETRY_DELAY, NULL);
16210 }
16211 
16212 
16213 
16214 /*
16215  *    Function: sd_sense_key_fail_command
16216  *
16217  * Description: Use to fail a command when we don't like the sense key that
16218  *		was returned.
16219  *
16220  *     Context: May be called from interrupt context
16221  */
16222 
16223 static void
16224 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
16225 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16226 {
16227 	struct sd_sense_info	si;
16228 
16229 	ASSERT(un != NULL);
16230 	ASSERT(mutex_owned(SD_MUTEX(un)));
16231 	ASSERT(bp != NULL);
16232 	ASSERT(xp != NULL);
16233 	ASSERT(pktp != NULL);
16234 
16235 	si.ssi_severity = SCSI_ERR_FATAL;
16236 	si.ssi_pfa_flag = FALSE;
16237 
16238 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16239 	sd_return_failed_command(un, bp, EIO);
16240 }
16241 
16242 
16243 
16244 /*
16245  *    Function: sd_sense_key_blank_check
16246  *
16247  * Description: Recovery actions for a SCSI "Blank Check" sense key.
16248  *		Has no monetary connotation.
16249  *
16250  *     Context: May be called from interrupt context
16251  */
16252 
16253 static void
16254 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
16255 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16256 {
16257 	struct sd_sense_info	si;
16258 
16259 	ASSERT(un != NULL);
16260 	ASSERT(mutex_owned(SD_MUTEX(un)));
16261 	ASSERT(bp != NULL);
16262 	ASSERT(xp != NULL);
16263 	ASSERT(pktp != NULL);
16264 
16265 	/*
16266 	 * Blank check is not fatal for removable devices, therefore
16267 	 * it does not require a console message.
16268 	 */
16269 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
16270 	    SCSI_ERR_FATAL;
16271 	si.ssi_pfa_flag = FALSE;
16272 
16273 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16274 	sd_return_failed_command(un, bp, EIO);
16275 }
16276 
16277 
16278 
16279 
16280 /*
16281  *    Function: sd_sense_key_aborted_command
16282  *
16283  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
16284  *
16285  *     Context: May be called from interrupt context
16286  */
16287 
16288 static void
16289 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
16290 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16291 {
16292 	struct sd_sense_info	si;
16293 
16294 	ASSERT(un != NULL);
16295 	ASSERT(mutex_owned(SD_MUTEX(un)));
16296 	ASSERT(bp != NULL);
16297 	ASSERT(xp != NULL);
16298 	ASSERT(pktp != NULL);
16299 
16300 	si.ssi_severity = SCSI_ERR_FATAL;
16301 	si.ssi_pfa_flag = FALSE;
16302 
16303 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16304 
16305 	/*
16306 	 * This really ought to be a fatal error, but we will retry anyway
16307 	 * as some drives report this as a spurious error.
16308 	 */
16309 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
16310 	    &si, EIO, (clock_t)0, NULL);
16311 }
16312 
16313 
16314 
16315 /*
16316  *    Function: sd_sense_key_default
16317  *
16318  * Description: Default recovery action for several SCSI sense keys (basically
16319  *		attempts a retry).
16320  *
16321  *     Context: May be called from interrupt context
16322  */
16323 
16324 static void
16325 sd_sense_key_default(struct sd_lun *un,
16326 	uint8_t *sense_datap,
16327 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
16328 {
16329 	struct sd_sense_info	si;
16330 	uint8_t sense_key = scsi_sense_key(sense_datap);
16331 
16332 	ASSERT(un != NULL);
16333 	ASSERT(mutex_owned(SD_MUTEX(un)));
16334 	ASSERT(bp != NULL);
16335 	ASSERT(xp != NULL);
16336 	ASSERT(pktp != NULL);
16337 
16338 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16339 
16340 	/*
16341 	 * Undecoded sense key.	Attempt retries and hope that will fix
16342 	 * the problem.  Otherwise, we're dead.
16343 	 */
16344 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16345 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16346 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
16347 	}
16348 
16349 	si.ssi_severity = SCSI_ERR_FATAL;
16350 	si.ssi_pfa_flag = FALSE;
16351 
16352 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
16353 	    &si, EIO, (clock_t)0, NULL);
16354 }
16355 
16356 
16357 
16358 /*
16359  *    Function: sd_print_retry_msg
16360  *
16361  * Description: Print a message indicating the retry action being taken.
16362  *
16363  *   Arguments: un - ptr to associated softstate
16364  *		bp - ptr to buf(9S) for the command
16365  *		arg - not used.
16366  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16367  *			or SD_NO_RETRY_ISSUED
16368  *
16369  *     Context: May be called from interrupt context
16370  */
16371 /* ARGSUSED */
16372 static void
16373 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
16374 {
16375 	struct sd_xbuf	*xp;
16376 	struct scsi_pkt *pktp;
16377 	char *reasonp;
16378 	char *msgp;
16379 
16380 	ASSERT(un != NULL);
16381 	ASSERT(mutex_owned(SD_MUTEX(un)));
16382 	ASSERT(bp != NULL);
16383 	pktp = SD_GET_PKTP(bp);
16384 	ASSERT(pktp != NULL);
16385 	xp = SD_GET_XBUF(bp);
16386 	ASSERT(xp != NULL);
16387 
16388 	ASSERT(!mutex_owned(&un->un_pm_mutex));
16389 	mutex_enter(&un->un_pm_mutex);
16390 	if ((un->un_state == SD_STATE_SUSPENDED) ||
16391 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
16392 	    (pktp->pkt_flags & FLAG_SILENT)) {
16393 		mutex_exit(&un->un_pm_mutex);
16394 		goto update_pkt_reason;
16395 	}
16396 	mutex_exit(&un->un_pm_mutex);
16397 
16398 	/*
16399 	 * Suppress messages if they are all the same pkt_reason; with
16400 	 * TQ, many (up to 256) are returned with the same pkt_reason.
16401 	 * If we are in panic, then suppress the retry messages.
16402 	 */
16403 	switch (flag) {
16404 	case SD_NO_RETRY_ISSUED:
16405 		msgp = "giving up";
16406 		break;
16407 	case SD_IMMEDIATE_RETRY_ISSUED:
16408 	case SD_DELAYED_RETRY_ISSUED:
16409 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
16410 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
16411 		    (sd_error_level != SCSI_ERR_ALL))) {
16412 			return;
16413 		}
16414 		msgp = "retrying command";
16415 		break;
16416 	default:
16417 		goto update_pkt_reason;
16418 	}
16419 
16420 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
16421 	    scsi_rname(pktp->pkt_reason));
16422 
16423 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16424 	    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
16425 
16426 update_pkt_reason:
16427 	/*
16428 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
16429 	 * This is to prevent multiple console messages for the same failure
16430 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
16431 	 * when the command is retried successfully because there still may be
16432 	 * more commands coming back with the same value of pktp->pkt_reason.
16433 	 */
16434 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
16435 		un->un_last_pkt_reason = pktp->pkt_reason;
16436 	}
16437 }
16438 
16439 
16440 /*
16441  *    Function: sd_print_cmd_incomplete_msg
16442  *
16443  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
16444  *
16445  *   Arguments: un - ptr to associated softstate
16446  *		bp - ptr to buf(9S) for the command
16447  *		arg - passed to sd_print_retry_msg()
16448  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16449  *			or SD_NO_RETRY_ISSUED
16450  *
16451  *     Context: May be called from interrupt context
16452  */
16453 
16454 static void
16455 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
16456 	int code)
16457 {
16458 	dev_info_t	*dip;
16459 
16460 	ASSERT(un != NULL);
16461 	ASSERT(mutex_owned(SD_MUTEX(un)));
16462 	ASSERT(bp != NULL);
16463 
16464 	switch (code) {
16465 	case SD_NO_RETRY_ISSUED:
16466 		/* Command was failed. Someone turned off this target? */
16467 		if (un->un_state != SD_STATE_OFFLINE) {
16468 			/*
16469 			 * Suppress message if we are detaching and
16470 			 * device has been disconnected
16471 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
16472 			 * private interface and not part of the DDI
16473 			 */
16474 			dip = un->un_sd->sd_dev;
16475 			if (!(DEVI_IS_DETACHING(dip) &&
16476 			    DEVI_IS_DEVICE_REMOVED(dip))) {
16477 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16478 				"disk not responding to selection\n");
16479 			}
16480 			New_state(un, SD_STATE_OFFLINE);
16481 		}
16482 		break;
16483 
16484 	case SD_DELAYED_RETRY_ISSUED:
16485 	case SD_IMMEDIATE_RETRY_ISSUED:
16486 	default:
16487 		/* Command was successfully queued for retry */
16488 		sd_print_retry_msg(un, bp, arg, code);
16489 		break;
16490 	}
16491 }
16492 
16493 
16494 /*
16495  *    Function: sd_pkt_reason_cmd_incomplete
16496  *
16497  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
16498  *
16499  *     Context: May be called from interrupt context
16500  */
16501 
16502 static void
16503 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
16504 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16505 {
16506 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
16507 
16508 	ASSERT(un != NULL);
16509 	ASSERT(mutex_owned(SD_MUTEX(un)));
16510 	ASSERT(bp != NULL);
16511 	ASSERT(xp != NULL);
16512 	ASSERT(pktp != NULL);
16513 
16514 	/* Do not do a reset if selection did not complete */
16515 	/* Note: Should this not just check the bit? */
16516 	if (pktp->pkt_state != STATE_GOT_BUS) {
16517 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
16518 		sd_reset_target(un, pktp);
16519 	}
16520 
16521 	/*
16522 	 * If the target was not successfully selected, then set
16523 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
16524 	 * with the target, and further retries and/or commands are
16525 	 * likely to take a long time.
16526 	 */
16527 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
16528 		flag |= SD_RETRIES_FAILFAST;
16529 	}
16530 
16531 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16532 
16533 	sd_retry_command(un, bp, flag,
16534 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16535 }
16536 
16537 
16538 
16539 /*
16540  *    Function: sd_pkt_reason_cmd_tran_err
16541  *
16542  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
16543  *
16544  *     Context: May be called from interrupt context
16545  */
16546 
16547 static void
16548 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
16549 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16550 {
16551 	ASSERT(un != NULL);
16552 	ASSERT(mutex_owned(SD_MUTEX(un)));
16553 	ASSERT(bp != NULL);
16554 	ASSERT(xp != NULL);
16555 	ASSERT(pktp != NULL);
16556 
16557 	/*
16558 	 * Do not reset if we got a parity error, or if
16559 	 * selection did not complete.
16560 	 */
16561 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16562 	/* Note: Should this not just check the bit for pkt_state? */
16563 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
16564 	    (pktp->pkt_state != STATE_GOT_BUS)) {
16565 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
16566 		sd_reset_target(un, pktp);
16567 	}
16568 
16569 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16570 
16571 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
16572 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16573 }
16574 
16575 
16576 
16577 /*
16578  *    Function: sd_pkt_reason_cmd_reset
16579  *
16580  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
16581  *
16582  *     Context: May be called from interrupt context
16583  */
16584 
16585 static void
16586 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
16587 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16588 {
16589 	ASSERT(un != NULL);
16590 	ASSERT(mutex_owned(SD_MUTEX(un)));
16591 	ASSERT(bp != NULL);
16592 	ASSERT(xp != NULL);
16593 	ASSERT(pktp != NULL);
16594 
16595 	/* The target may still be running the command, so try to reset. */
16596 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16597 	sd_reset_target(un, pktp);
16598 
16599 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16600 
16601 	/*
16602 	 * If pkt_reason is CMD_RESET chances are that this pkt got
16603 	 * reset because another target on this bus caused it. The target
16604 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
16605 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
16606 	 */
16607 
16608 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
16609 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16610 }
16611 
16612 
16613 
16614 
16615 /*
16616  *    Function: sd_pkt_reason_cmd_aborted
16617  *
16618  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
16619  *
16620  *     Context: May be called from interrupt context
16621  */
16622 
16623 static void
16624 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
16625 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16626 {
16627 	ASSERT(un != NULL);
16628 	ASSERT(mutex_owned(SD_MUTEX(un)));
16629 	ASSERT(bp != NULL);
16630 	ASSERT(xp != NULL);
16631 	ASSERT(pktp != NULL);
16632 
16633 	/* The target may still be running the command, so try to reset. */
16634 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16635 	sd_reset_target(un, pktp);
16636 
16637 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16638 
16639 	/*
16640 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
16641 	 * aborted because another target on this bus caused it. The target
16642 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
16643 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
16644 	 */
16645 
16646 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
16647 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16648 }
16649 
16650 
16651 
16652 /*
16653  *    Function: sd_pkt_reason_cmd_timeout
16654  *
16655  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
16656  *
16657  *     Context: May be called from interrupt context
16658  */
16659 
16660 static void
16661 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
16662 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16663 {
16664 	ASSERT(un != NULL);
16665 	ASSERT(mutex_owned(SD_MUTEX(un)));
16666 	ASSERT(bp != NULL);
16667 	ASSERT(xp != NULL);
16668 	ASSERT(pktp != NULL);
16669 
16670 
16671 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16672 	sd_reset_target(un, pktp);
16673 
16674 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16675 
16676 	/*
16677 	 * A command timeout indicates that we could not establish
16678 	 * communication with the target, so set SD_RETRIES_FAILFAST
16679 	 * as further retries/commands are likely to take a long time.
16680 	 */
16681 	sd_retry_command(un, bp,
16682 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
16683 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16684 }
16685 
16686 
16687 
16688 /*
16689  *    Function: sd_pkt_reason_cmd_unx_bus_free
16690  *
16691  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
16692  *
16693  *     Context: May be called from interrupt context
16694  */
16695 
16696 static void
16697 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
16698 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16699 {
16700 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
16701 
16702 	ASSERT(un != NULL);
16703 	ASSERT(mutex_owned(SD_MUTEX(un)));
16704 	ASSERT(bp != NULL);
16705 	ASSERT(xp != NULL);
16706 	ASSERT(pktp != NULL);
16707 
16708 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16709 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16710 
16711 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
16712 	    sd_print_retry_msg : NULL;
16713 
16714 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
16715 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16716 }
16717 
16718 
16719 /*
16720  *    Function: sd_pkt_reason_cmd_tag_reject
16721  *
16722  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
16723  *
16724  *     Context: May be called from interrupt context
16725  */
16726 
16727 static void
16728 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
16729 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16730 {
16731 	ASSERT(un != NULL);
16732 	ASSERT(mutex_owned(SD_MUTEX(un)));
16733 	ASSERT(bp != NULL);
16734 	ASSERT(xp != NULL);
16735 	ASSERT(pktp != NULL);
16736 
16737 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16738 	pktp->pkt_flags = 0;
16739 	un->un_tagflags = 0;
16740 	if (un->un_f_opt_queueing == TRUE) {
16741 		un->un_throttle = min(un->un_throttle, 3);
16742 	} else {
16743 		un->un_throttle = 1;
16744 	}
16745 	mutex_exit(SD_MUTEX(un));
16746 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
16747 	mutex_enter(SD_MUTEX(un));
16748 
16749 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16750 
16751 	/* Legacy behavior not to check retry counts here. */
16752 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
16753 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16754 }
16755 
16756 
16757 /*
16758  *    Function: sd_pkt_reason_default
16759  *
16760  * Description: Default recovery actions for SCSA pkt_reason values that
16761  *		do not have more explicit recovery actions.
16762  *
16763  *     Context: May be called from interrupt context
16764  */
16765 
16766 static void
16767 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
16768 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16769 {
16770 	ASSERT(un != NULL);
16771 	ASSERT(mutex_owned(SD_MUTEX(un)));
16772 	ASSERT(bp != NULL);
16773 	ASSERT(xp != NULL);
16774 	ASSERT(pktp != NULL);
16775 
16776 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16777 	sd_reset_target(un, pktp);
16778 
16779 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16780 
16781 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
16782 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16783 }
16784 
16785 
16786 
16787 /*
16788  *    Function: sd_pkt_status_check_condition
16789  *
16790  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
16791  *
16792  *     Context: May be called from interrupt context
16793  */
16794 
16795 static void
16796 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
16797 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16798 {
16799 	ASSERT(un != NULL);
16800 	ASSERT(mutex_owned(SD_MUTEX(un)));
16801 	ASSERT(bp != NULL);
16802 	ASSERT(xp != NULL);
16803 	ASSERT(pktp != NULL);
16804 
16805 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
16806 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
16807 
16808 	/*
16809 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
16810 	 * command will be retried after the request sense). Otherwise, retry
16811 	 * the command. Note: we are issuing the request sense even though the
16812 	 * retry limit may have been reached for the failed command.
16813 	 */
16814 	if (un->un_f_arq_enabled == FALSE) {
16815 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
16816 		    "no ARQ, sending request sense command\n");
16817 		sd_send_request_sense_command(un, bp, pktp);
16818 	} else {
16819 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
16820 		    "ARQ,retrying request sense command\n");
16821 #if defined(__i386) || defined(__amd64)
16822 		/*
16823 		 * The SD_RETRY_DELAY value need to be adjusted here
16824 		 * when SD_RETRY_DELAY change in sddef.h
16825 		 */
16826 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
16827 			un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
16828 			NULL);
16829 #else
16830 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
16831 		    EIO, SD_RETRY_DELAY, NULL);
16832 #endif
16833 	}
16834 
16835 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
16836 }
16837 
16838 
16839 /*
16840  *    Function: sd_pkt_status_busy
16841  *
16842  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
16843  *
16844  *     Context: May be called from interrupt context
16845  */
16846 
16847 static void
16848 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
16849 	struct scsi_pkt *pktp)
16850 {
16851 	ASSERT(un != NULL);
16852 	ASSERT(mutex_owned(SD_MUTEX(un)));
16853 	ASSERT(bp != NULL);
16854 	ASSERT(xp != NULL);
16855 	ASSERT(pktp != NULL);
16856 
16857 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16858 	    "sd_pkt_status_busy: entry\n");
16859 
16860 	/* If retries are exhausted, just fail the command. */
16861 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
16862 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16863 		    "device busy too long\n");
16864 		sd_return_failed_command(un, bp, EIO);
16865 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16866 		    "sd_pkt_status_busy: exit\n");
16867 		return;
16868 	}
16869 	xp->xb_retry_count++;
16870 
16871 	/*
16872 	 * Try to reset the target. However, we do not want to perform
16873 	 * more than one reset if the device continues to fail. The reset
16874 	 * will be performed when the retry count reaches the reset
16875 	 * threshold.  This threshold should be set such that at least
16876 	 * one retry is issued before the reset is performed.
16877 	 */
16878 	if (xp->xb_retry_count ==
16879 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
16880 		int rval = 0;
16881 		mutex_exit(SD_MUTEX(un));
16882 		if (un->un_f_allow_bus_device_reset == TRUE) {
16883 			/*
16884 			 * First try to reset the LUN; if we cannot then
16885 			 * try to reset the target.
16886 			 */
16887 			if (un->un_f_lun_reset_enabled == TRUE) {
16888 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16889 				    "sd_pkt_status_busy: RESET_LUN\n");
16890 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
16891 			}
16892 			if (rval == 0) {
16893 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16894 				    "sd_pkt_status_busy: RESET_TARGET\n");
16895 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
16896 			}
16897 		}
16898 		if (rval == 0) {
16899 			/*
16900 			 * If the RESET_LUN and/or RESET_TARGET failed,
16901 			 * try RESET_ALL
16902 			 */
16903 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16904 			    "sd_pkt_status_busy: RESET_ALL\n");
16905 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
16906 		}
16907 		mutex_enter(SD_MUTEX(un));
16908 		if (rval == 0) {
16909 			/*
16910 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
16911 			 * At this point we give up & fail the command.
16912 			 */
16913 			sd_return_failed_command(un, bp, EIO);
16914 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16915 			    "sd_pkt_status_busy: exit (failed cmd)\n");
16916 			return;
16917 		}
16918 	}
16919 
16920 	/*
16921 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
16922 	 * we have already checked the retry counts above.
16923 	 */
16924 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
16925 	    EIO, SD_BSY_TIMEOUT, NULL);
16926 
16927 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16928 	    "sd_pkt_status_busy: exit\n");
16929 }
16930 
16931 
16932 /*
16933  *    Function: sd_pkt_status_reservation_conflict
16934  *
16935  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
16936  *		command status.
16937  *
16938  *     Context: May be called from interrupt context
16939  */
16940 
16941 static void
16942 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
16943 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16944 {
16945 	ASSERT(un != NULL);
16946 	ASSERT(mutex_owned(SD_MUTEX(un)));
16947 	ASSERT(bp != NULL);
16948 	ASSERT(xp != NULL);
16949 	ASSERT(pktp != NULL);
16950 
16951 	/*
16952 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
16953 	 * conflict could be due to various reasons like incorrect keys, not
16954 	 * registered or not reserved etc. So, we return EACCES to the caller.
16955 	 */
16956 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
16957 		int cmd = SD_GET_PKT_OPCODE(pktp);
16958 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
16959 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
16960 			sd_return_failed_command(un, bp, EACCES);
16961 			return;
16962 		}
16963 	}
16964 
16965 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
16966 
16967 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
16968 		if (sd_failfast_enable != 0) {
16969 			/* By definition, we must panic here.... */
16970 			sd_panic_for_res_conflict(un);
16971 			/*NOTREACHED*/
16972 		}
16973 		SD_ERROR(SD_LOG_IO, un,
16974 		    "sd_handle_resv_conflict: Disk Reserved\n");
16975 		sd_return_failed_command(un, bp, EACCES);
16976 		return;
16977 	}
16978 
16979 	/*
16980 	 * 1147670: retry only if sd_retry_on_reservation_conflict
16981 	 * property is set (default is 1). Retries will not succeed
16982 	 * on a disk reserved by another initiator. HA systems
16983 	 * may reset this via sd.conf to avoid these retries.
16984 	 *
16985 	 * Note: The legacy return code for this failure is EIO, however EACCES
16986 	 * seems more appropriate for a reservation conflict.
16987 	 */
16988 	if (sd_retry_on_reservation_conflict == 0) {
16989 		SD_ERROR(SD_LOG_IO, un,
16990 		    "sd_handle_resv_conflict: Device Reserved\n");
16991 		sd_return_failed_command(un, bp, EIO);
16992 		return;
16993 	}
16994 
16995 	/*
16996 	 * Retry the command if we can.
16997 	 *
16998 	 * Note: The legacy return code for this failure is EIO, however EACCES
16999 	 * seems more appropriate for a reservation conflict.
17000 	 */
17001 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
17002 	    (clock_t)2, NULL);
17003 }
17004 
17005 
17006 
17007 /*
17008  *    Function: sd_pkt_status_qfull
17009  *
17010  * Description: Handle a QUEUE FULL condition from the target.  This can
17011  *		occur if the HBA does not handle the queue full condition.
17012  *		(Basically this means third-party HBAs as Sun HBAs will
17013  *		handle the queue full condition.)  Note that if there are
17014  *		some commands already in the transport, then the queue full
17015  *		has occurred because the queue for this nexus is actually
17016  *		full. If there are no commands in the transport, then the
17017  *		queue full is resulting from some other initiator or lun
17018  *		consuming all the resources at the target.
17019  *
17020  *     Context: May be called from interrupt context
17021  */
17022 
17023 static void
17024 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
17025 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17026 {
17027 	ASSERT(un != NULL);
17028 	ASSERT(mutex_owned(SD_MUTEX(un)));
17029 	ASSERT(bp != NULL);
17030 	ASSERT(xp != NULL);
17031 	ASSERT(pktp != NULL);
17032 
17033 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17034 	    "sd_pkt_status_qfull: entry\n");
17035 
17036 	/*
17037 	 * Just lower the QFULL throttle and retry the command.  Note that
17038 	 * we do not limit the number of retries here.
17039 	 */
17040 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
17041 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
17042 	    SD_RESTART_TIMEOUT, NULL);
17043 
17044 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17045 	    "sd_pkt_status_qfull: exit\n");
17046 }
17047 
17048 
17049 /*
17050  *    Function: sd_reset_target
17051  *
17052  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
17053  *		RESET_TARGET, or RESET_ALL.
17054  *
17055  *     Context: May be called under interrupt context.
17056  */
17057 
17058 static void
17059 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
17060 {
17061 	int rval = 0;
17062 
17063 	ASSERT(un != NULL);
17064 	ASSERT(mutex_owned(SD_MUTEX(un)));
17065 	ASSERT(pktp != NULL);
17066 
17067 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
17068 
17069 	/*
17070 	 * No need to reset if the transport layer has already done so.
17071 	 */
17072 	if ((pktp->pkt_statistics &
17073 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
17074 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17075 		    "sd_reset_target: no reset\n");
17076 		return;
17077 	}
17078 
17079 	mutex_exit(SD_MUTEX(un));
17080 
17081 	if (un->un_f_allow_bus_device_reset == TRUE) {
17082 		if (un->un_f_lun_reset_enabled == TRUE) {
17083 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17084 			    "sd_reset_target: RESET_LUN\n");
17085 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
17086 		}
17087 		if (rval == 0) {
17088 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17089 			    "sd_reset_target: RESET_TARGET\n");
17090 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
17091 		}
17092 	}
17093 
17094 	if (rval == 0) {
17095 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17096 		    "sd_reset_target: RESET_ALL\n");
17097 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
17098 	}
17099 
17100 	mutex_enter(SD_MUTEX(un));
17101 
17102 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
17103 }
17104 
17105 
17106 /*
17107  *    Function: sd_media_change_task
17108  *
17109  * Description: Recovery action for CDROM to become available.
17110  *
17111  *     Context: Executes in a taskq() thread context
17112  */
17113 
17114 static void
17115 sd_media_change_task(void *arg)
17116 {
17117 	struct	scsi_pkt	*pktp = arg;
17118 	struct	sd_lun		*un;
17119 	struct	buf		*bp;
17120 	struct	sd_xbuf		*xp;
17121 	int	err		= 0;
17122 	int	retry_count	= 0;
17123 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
17124 	struct	sd_sense_info	si;
17125 
17126 	ASSERT(pktp != NULL);
17127 	bp = (struct buf *)pktp->pkt_private;
17128 	ASSERT(bp != NULL);
17129 	xp = SD_GET_XBUF(bp);
17130 	ASSERT(xp != NULL);
17131 	un = SD_GET_UN(bp);
17132 	ASSERT(un != NULL);
17133 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17134 	ASSERT(un->un_f_monitor_media_state);
17135 
17136 	si.ssi_severity = SCSI_ERR_INFO;
17137 	si.ssi_pfa_flag = FALSE;
17138 
17139 	/*
17140 	 * When a reset is issued on a CDROM, it takes a long time to
17141 	 * recover. First few attempts to read capacity and other things
17142 	 * related to handling unit attention fail (with a ASC 0x4 and
17143 	 * ASCQ 0x1). In that case we want to do enough retries and we want
17144 	 * to limit the retries in other cases of genuine failures like
17145 	 * no media in drive.
17146 	 */
17147 	while (retry_count++ < retry_limit) {
17148 		if ((err = sd_handle_mchange(un)) == 0) {
17149 			break;
17150 		}
17151 		if (err == EAGAIN) {
17152 			retry_limit = SD_UNIT_ATTENTION_RETRY;
17153 		}
17154 		/* Sleep for 0.5 sec. & try again */
17155 		delay(drv_usectohz(500000));
17156 	}
17157 
17158 	/*
17159 	 * Dispatch (retry or fail) the original command here,
17160 	 * along with appropriate console messages....
17161 	 *
17162 	 * Must grab the mutex before calling sd_retry_command,
17163 	 * sd_print_sense_msg and sd_return_failed_command.
17164 	 */
17165 	mutex_enter(SD_MUTEX(un));
17166 	if (err != SD_CMD_SUCCESS) {
17167 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17168 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17169 		si.ssi_severity = SCSI_ERR_FATAL;
17170 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17171 		sd_return_failed_command(un, bp, EIO);
17172 	} else {
17173 		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17174 		    &si, EIO, (clock_t)0, NULL);
17175 	}
17176 	mutex_exit(SD_MUTEX(un));
17177 }
17178 
17179 
17180 
17181 /*
17182  *    Function: sd_handle_mchange
17183  *
17184  * Description: Perform geometry validation & other recovery when CDROM
17185  *		has been removed from drive.
17186  *
17187  * Return Code: 0 for success
17188  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
17189  *		sd_send_scsi_READ_CAPACITY()
17190  *
17191  *     Context: Executes in a taskq() thread context
17192  */
17193 
17194 static int
17195 sd_handle_mchange(struct sd_lun *un)
17196 {
17197 	uint64_t	capacity;
17198 	uint32_t	lbasize;
17199 	int		rval;
17200 
17201 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17202 	ASSERT(un->un_f_monitor_media_state);
17203 
17204 	if ((rval = sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
17205 	    SD_PATH_DIRECT_PRIORITY)) != 0) {
17206 		return (rval);
17207 	}
17208 
17209 	mutex_enter(SD_MUTEX(un));
17210 	sd_update_block_info(un, lbasize, capacity);
17211 
17212 	if (un->un_errstats != NULL) {
17213 		struct	sd_errstats *stp =
17214 		    (struct sd_errstats *)un->un_errstats->ks_data;
17215 		stp->sd_capacity.value.ui64 = (uint64_t)
17216 		    ((uint64_t)un->un_blockcount *
17217 		    (uint64_t)un->un_tgt_blocksize);
17218 	}
17219 
17220 	/*
17221 	 * Note: Maybe let the strategy/partitioning chain worry about getting
17222 	 * valid geometry.
17223 	 */
17224 	mutex_exit(SD_MUTEX(un));
17225 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
17226 
17227 
17228 	if (cmlb_validate(un->un_cmlbhandle, 0,
17229 	    (void *)SD_PATH_DIRECT_PRIORITY) != 0) {
17230 		return (EIO);
17231 	} else {
17232 		if (un->un_f_pkstats_enabled) {
17233 			sd_set_pstats(un);
17234 			SD_TRACE(SD_LOG_IO_PARTITION, un,
17235 			    "sd_handle_mchange: un:0x%p pstats created and "
17236 			    "set\n", un);
17237 		}
17238 	}
17239 
17240 
17241 	/*
17242 	 * Try to lock the door
17243 	 */
17244 	return (sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
17245 	    SD_PATH_DIRECT_PRIORITY));
17246 }
17247 
17248 
17249 /*
17250  *    Function: sd_send_scsi_DOORLOCK
17251  *
17252  * Description: Issue the scsi DOOR LOCK command
17253  *
17254  *   Arguments: un    - pointer to driver soft state (unit) structure for
17255  *			this target.
17256  *		flag  - SD_REMOVAL_ALLOW
17257  *			SD_REMOVAL_PREVENT
17258  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17259  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17260  *			to use the USCSI "direct" chain and bypass the normal
17261  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
17262  *			command is issued as part of an error recovery action.
17263  *
17264  * Return Code: 0   - Success
17265  *		errno return code from sd_send_scsi_cmd()
17266  *
17267  *     Context: Can sleep.
17268  */
17269 
17270 static int
17271 sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag)
17272 {
17273 	union scsi_cdb		cdb;
17274 	struct uscsi_cmd	ucmd_buf;
17275 	struct scsi_extended_sense	sense_buf;
17276 	int			status;
17277 
17278 	ASSERT(un != NULL);
17279 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17280 
17281 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
17282 
17283 	/* already determined doorlock is not supported, fake success */
17284 	if (un->un_f_doorlock_supported == FALSE) {
17285 		return (0);
17286 	}
17287 
17288 	/*
17289 	 * If we are ejecting and see an SD_REMOVAL_PREVENT
17290 	 * ignore the command so we can complete the eject
17291 	 * operation.
17292 	 */
17293 	if (flag == SD_REMOVAL_PREVENT) {
17294 		mutex_enter(SD_MUTEX(un));
17295 		if (un->un_f_ejecting == TRUE) {
17296 			mutex_exit(SD_MUTEX(un));
17297 			return (EAGAIN);
17298 		}
17299 		mutex_exit(SD_MUTEX(un));
17300 	}
17301 
17302 	bzero(&cdb, sizeof (cdb));
17303 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17304 
17305 	cdb.scc_cmd = SCMD_DOORLOCK;
17306 	cdb.cdb_opaque[4] = (uchar_t)flag;
17307 
17308 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17309 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
17310 	ucmd_buf.uscsi_bufaddr	= NULL;
17311 	ucmd_buf.uscsi_buflen	= 0;
17312 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17313 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
17314 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
17315 	ucmd_buf.uscsi_timeout	= 15;
17316 
17317 	SD_TRACE(SD_LOG_IO, un,
17318 	    "sd_send_scsi_DOORLOCK: returning sd_send_scsi_cmd()\n");
17319 
17320 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17321 	    UIO_SYSSPACE, path_flag);
17322 
17323 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
17324 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
17325 	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
17326 		/* fake success and skip subsequent doorlock commands */
17327 		un->un_f_doorlock_supported = FALSE;
17328 		return (0);
17329 	}
17330 
17331 	return (status);
17332 }
17333 
17334 /*
17335  *    Function: sd_send_scsi_READ_CAPACITY
17336  *
17337  * Description: This routine uses the scsi READ CAPACITY command to determine
17338  *		the device capacity in number of blocks and the device native
17339  *		block size. If this function returns a failure, then the
17340  *		values in *capp and *lbap are undefined.  If the capacity
17341  *		returned is 0xffffffff then the lun is too large for a
17342  *		normal READ CAPACITY command and the results of a
17343  *		READ CAPACITY 16 will be used instead.
17344  *
17345  *   Arguments: un   - ptr to soft state struct for the target
17346  *		capp - ptr to unsigned 64-bit variable to receive the
17347  *			capacity value from the command.
17348  *		lbap - ptr to unsigned 32-bit varaible to receive the
17349  *			block size value from the command
17350  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17351  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17352  *			to use the USCSI "direct" chain and bypass the normal
17353  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
17354  *			command is issued as part of an error recovery action.
17355  *
17356  * Return Code: 0   - Success
17357  *		EIO - IO error
17358  *		EACCES - Reservation conflict detected
17359  *		EAGAIN - Device is becoming ready
17360  *		errno return code from sd_send_scsi_cmd()
17361  *
17362  *     Context: Can sleep.  Blocks until command completes.
17363  */
17364 
17365 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
17366 
17367 static int
17368 sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp, uint32_t *lbap,
17369 	int path_flag)
17370 {
17371 	struct	scsi_extended_sense	sense_buf;
17372 	struct	uscsi_cmd	ucmd_buf;
17373 	union	scsi_cdb	cdb;
17374 	uint32_t		*capacity_buf;
17375 	uint64_t		capacity;
17376 	uint32_t		lbasize;
17377 	int			status;
17378 
17379 	ASSERT(un != NULL);
17380 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17381 	ASSERT(capp != NULL);
17382 	ASSERT(lbap != NULL);
17383 
17384 	SD_TRACE(SD_LOG_IO, un,
17385 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
17386 
17387 	/*
17388 	 * First send a READ_CAPACITY command to the target.
17389 	 * (This command is mandatory under SCSI-2.)
17390 	 *
17391 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
17392 	 * Medium Indicator bit is cleared.  The address field must be
17393 	 * zero if the PMI bit is zero.
17394 	 */
17395 	bzero(&cdb, sizeof (cdb));
17396 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17397 
17398 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
17399 
17400 	cdb.scc_cmd = SCMD_READ_CAPACITY;
17401 
17402 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17403 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
17404 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
17405 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
17406 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17407 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
17408 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
17409 	ucmd_buf.uscsi_timeout	= 60;
17410 
17411 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17412 	    UIO_SYSSPACE, path_flag);
17413 
17414 	switch (status) {
17415 	case 0:
17416 		/* Return failure if we did not get valid capacity data. */
17417 		if (ucmd_buf.uscsi_resid != 0) {
17418 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17419 			return (EIO);
17420 		}
17421 
17422 		/*
17423 		 * Read capacity and block size from the READ CAPACITY 10 data.
17424 		 * This data may be adjusted later due to device specific
17425 		 * issues.
17426 		 *
17427 		 * According to the SCSI spec, the READ CAPACITY 10
17428 		 * command returns the following:
17429 		 *
17430 		 *  bytes 0-3: Maximum logical block address available.
17431 		 *		(MSB in byte:0 & LSB in byte:3)
17432 		 *
17433 		 *  bytes 4-7: Block length in bytes
17434 		 *		(MSB in byte:4 & LSB in byte:7)
17435 		 *
17436 		 */
17437 		capacity = BE_32(capacity_buf[0]);
17438 		lbasize = BE_32(capacity_buf[1]);
17439 
17440 		/*
17441 		 * Done with capacity_buf
17442 		 */
17443 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17444 
17445 		/*
17446 		 * if the reported capacity is set to all 0xf's, then
17447 		 * this disk is too large and requires SBC-2 commands.
17448 		 * Reissue the request using READ CAPACITY 16.
17449 		 */
17450 		if (capacity == 0xffffffff) {
17451 			status = sd_send_scsi_READ_CAPACITY_16(un, &capacity,
17452 			    &lbasize, path_flag);
17453 			if (status != 0) {
17454 				return (status);
17455 			}
17456 		}
17457 		break;	/* Success! */
17458 	case EIO:
17459 		switch (ucmd_buf.uscsi_status) {
17460 		case STATUS_RESERVATION_CONFLICT:
17461 			status = EACCES;
17462 			break;
17463 		case STATUS_CHECK:
17464 			/*
17465 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
17466 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
17467 			 */
17468 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
17469 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
17470 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
17471 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17472 				return (EAGAIN);
17473 			}
17474 			break;
17475 		default:
17476 			break;
17477 		}
17478 		/* FALLTHRU */
17479 	default:
17480 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17481 		return (status);
17482 	}
17483 
17484 	/*
17485 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
17486 	 * (2352 and 0 are common) so for these devices always force the value
17487 	 * to 2048 as required by the ATAPI specs.
17488 	 */
17489 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
17490 		lbasize = 2048;
17491 	}
17492 
17493 	/*
17494 	 * Get the maximum LBA value from the READ CAPACITY data.
17495 	 * Here we assume that the Partial Medium Indicator (PMI) bit
17496 	 * was cleared when issuing the command. This means that the LBA
17497 	 * returned from the device is the LBA of the last logical block
17498 	 * on the logical unit.  The actual logical block count will be
17499 	 * this value plus one.
17500 	 *
17501 	 * Currently the capacity is saved in terms of un->un_sys_blocksize,
17502 	 * so scale the capacity value to reflect this.
17503 	 */
17504 	capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
17505 
17506 	/*
17507 	 * Copy the values from the READ CAPACITY command into the space
17508 	 * provided by the caller.
17509 	 */
17510 	*capp = capacity;
17511 	*lbap = lbasize;
17512 
17513 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
17514 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
17515 
17516 	/*
17517 	 * Both the lbasize and capacity from the device must be nonzero,
17518 	 * otherwise we assume that the values are not valid and return
17519 	 * failure to the caller. (4203735)
17520 	 */
17521 	if ((capacity == 0) || (lbasize == 0)) {
17522 		return (EIO);
17523 	}
17524 
17525 	return (0);
17526 }
17527 
17528 /*
17529  *    Function: sd_send_scsi_READ_CAPACITY_16
17530  *
17531  * Description: This routine uses the scsi READ CAPACITY 16 command to
17532  *		determine the device capacity in number of blocks and the
17533  *		device native block size.  If this function returns a failure,
17534  *		then the values in *capp and *lbap are undefined.
17535  *		This routine should always be called by
17536  *		sd_send_scsi_READ_CAPACITY which will appy any device
17537  *		specific adjustments to capacity and lbasize.
17538  *
17539  *   Arguments: un   - ptr to soft state struct for the target
17540  *		capp - ptr to unsigned 64-bit variable to receive the
17541  *			capacity value from the command.
17542  *		lbap - ptr to unsigned 32-bit varaible to receive the
17543  *			block size value from the command
17544  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17545  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17546  *			to use the USCSI "direct" chain and bypass the normal
17547  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
17548  *			this command is issued as part of an error recovery
17549  *			action.
17550  *
17551  * Return Code: 0   - Success
17552  *		EIO - IO error
17553  *		EACCES - Reservation conflict detected
17554  *		EAGAIN - Device is becoming ready
17555  *		errno return code from sd_send_scsi_cmd()
17556  *
17557  *     Context: Can sleep.  Blocks until command completes.
17558  */
17559 
17560 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
17561 
17562 static int
17563 sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
17564 	uint32_t *lbap, int path_flag)
17565 {
17566 	struct	scsi_extended_sense	sense_buf;
17567 	struct	uscsi_cmd	ucmd_buf;
17568 	union	scsi_cdb	cdb;
17569 	uint64_t		*capacity16_buf;
17570 	uint64_t		capacity;
17571 	uint32_t		lbasize;
17572 	int			status;
17573 
17574 	ASSERT(un != NULL);
17575 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17576 	ASSERT(capp != NULL);
17577 	ASSERT(lbap != NULL);
17578 
17579 	SD_TRACE(SD_LOG_IO, un,
17580 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
17581 
17582 	/*
17583 	 * First send a READ_CAPACITY_16 command to the target.
17584 	 *
17585 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
17586 	 * Medium Indicator bit is cleared.  The address field must be
17587 	 * zero if the PMI bit is zero.
17588 	 */
17589 	bzero(&cdb, sizeof (cdb));
17590 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17591 
17592 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
17593 
17594 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17595 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
17596 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
17597 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
17598 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17599 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
17600 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
17601 	ucmd_buf.uscsi_timeout	= 60;
17602 
17603 	/*
17604 	 * Read Capacity (16) is a Service Action In command.  One
17605 	 * command byte (0x9E) is overloaded for multiple operations,
17606 	 * with the second CDB byte specifying the desired operation
17607 	 */
17608 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
17609 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
17610 
17611 	/*
17612 	 * Fill in allocation length field
17613 	 */
17614 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
17615 
17616 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17617 	    UIO_SYSSPACE, path_flag);
17618 
17619 	switch (status) {
17620 	case 0:
17621 		/* Return failure if we did not get valid capacity data. */
17622 		if (ucmd_buf.uscsi_resid > 20) {
17623 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17624 			return (EIO);
17625 		}
17626 
17627 		/*
17628 		 * Read capacity and block size from the READ CAPACITY 10 data.
17629 		 * This data may be adjusted later due to device specific
17630 		 * issues.
17631 		 *
17632 		 * According to the SCSI spec, the READ CAPACITY 10
17633 		 * command returns the following:
17634 		 *
17635 		 *  bytes 0-7: Maximum logical block address available.
17636 		 *		(MSB in byte:0 & LSB in byte:7)
17637 		 *
17638 		 *  bytes 8-11: Block length in bytes
17639 		 *		(MSB in byte:8 & LSB in byte:11)
17640 		 *
17641 		 */
17642 		capacity = BE_64(capacity16_buf[0]);
17643 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
17644 
17645 		/*
17646 		 * Done with capacity16_buf
17647 		 */
17648 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17649 
17650 		/*
17651 		 * if the reported capacity is set to all 0xf's, then
17652 		 * this disk is too large.  This could only happen with
17653 		 * a device that supports LBAs larger than 64 bits which
17654 		 * are not defined by any current T10 standards.
17655 		 */
17656 		if (capacity == 0xffffffffffffffff) {
17657 			return (EIO);
17658 		}
17659 		break;	/* Success! */
17660 	case EIO:
17661 		switch (ucmd_buf.uscsi_status) {
17662 		case STATUS_RESERVATION_CONFLICT:
17663 			status = EACCES;
17664 			break;
17665 		case STATUS_CHECK:
17666 			/*
17667 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
17668 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
17669 			 */
17670 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
17671 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
17672 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
17673 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17674 				return (EAGAIN);
17675 			}
17676 			break;
17677 		default:
17678 			break;
17679 		}
17680 		/* FALLTHRU */
17681 	default:
17682 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17683 		return (status);
17684 	}
17685 
17686 	*capp = capacity;
17687 	*lbap = lbasize;
17688 
17689 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
17690 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
17691 
17692 	return (0);
17693 }
17694 
17695 
17696 /*
17697  *    Function: sd_send_scsi_START_STOP_UNIT
17698  *
17699  * Description: Issue a scsi START STOP UNIT command to the target.
17700  *
17701  *   Arguments: un    - pointer to driver soft state (unit) structure for
17702  *			this target.
17703  *		flag  - SD_TARGET_START
17704  *			SD_TARGET_STOP
17705  *			SD_TARGET_EJECT
17706  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17707  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17708  *			to use the USCSI "direct" chain and bypass the normal
17709  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
17710  *			command is issued as part of an error recovery action.
17711  *
17712  * Return Code: 0   - Success
17713  *		EIO - IO error
17714  *		EACCES - Reservation conflict detected
17715  *		ENXIO  - Not Ready, medium not present
17716  *		errno return code from sd_send_scsi_cmd()
17717  *
17718  *     Context: Can sleep.
17719  */
17720 
17721 static int
17722 sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag, int path_flag)
17723 {
17724 	struct	scsi_extended_sense	sense_buf;
17725 	union scsi_cdb		cdb;
17726 	struct uscsi_cmd	ucmd_buf;
17727 	int			status;
17728 
17729 	ASSERT(un != NULL);
17730 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17731 
17732 	SD_TRACE(SD_LOG_IO, un,
17733 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
17734 
17735 	if (un->un_f_check_start_stop &&
17736 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
17737 	    (un->un_f_start_stop_supported != TRUE)) {
17738 		return (0);
17739 	}
17740 
17741 	/*
17742 	 * If we are performing an eject operation and
17743 	 * we receive any command other than SD_TARGET_EJECT
17744 	 * we should immediately return.
17745 	 */
17746 	if (flag != SD_TARGET_EJECT) {
17747 		mutex_enter(SD_MUTEX(un));
17748 		if (un->un_f_ejecting == TRUE) {
17749 			mutex_exit(SD_MUTEX(un));
17750 			return (EAGAIN);
17751 		}
17752 		mutex_exit(SD_MUTEX(un));
17753 	}
17754 
17755 	bzero(&cdb, sizeof (cdb));
17756 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17757 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
17758 
17759 	cdb.scc_cmd = SCMD_START_STOP;
17760 	cdb.cdb_opaque[4] = (uchar_t)flag;
17761 
17762 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17763 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
17764 	ucmd_buf.uscsi_bufaddr	= NULL;
17765 	ucmd_buf.uscsi_buflen	= 0;
17766 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17767 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
17768 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
17769 	ucmd_buf.uscsi_timeout	= 200;
17770 
17771 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17772 	    UIO_SYSSPACE, path_flag);
17773 
17774 	switch (status) {
17775 	case 0:
17776 		break;	/* Success! */
17777 	case EIO:
17778 		switch (ucmd_buf.uscsi_status) {
17779 		case STATUS_RESERVATION_CONFLICT:
17780 			status = EACCES;
17781 			break;
17782 		case STATUS_CHECK:
17783 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
17784 				switch (scsi_sense_key(
17785 						(uint8_t *)&sense_buf)) {
17786 				case KEY_ILLEGAL_REQUEST:
17787 					status = ENOTSUP;
17788 					break;
17789 				case KEY_NOT_READY:
17790 					if (scsi_sense_asc(
17791 						    (uint8_t *)&sense_buf)
17792 					    == 0x3A) {
17793 						status = ENXIO;
17794 					}
17795 					break;
17796 				default:
17797 					break;
17798 				}
17799 			}
17800 			break;
17801 		default:
17802 			break;
17803 		}
17804 		break;
17805 	default:
17806 		break;
17807 	}
17808 
17809 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
17810 
17811 	return (status);
17812 }
17813 
17814 
17815 /*
17816  *    Function: sd_start_stop_unit_callback
17817  *
17818  * Description: timeout(9F) callback to begin recovery process for a
17819  *		device that has spun down.
17820  *
17821  *   Arguments: arg - pointer to associated softstate struct.
17822  *
17823  *     Context: Executes in a timeout(9F) thread context
17824  */
17825 
17826 static void
17827 sd_start_stop_unit_callback(void *arg)
17828 {
17829 	struct sd_lun	*un = arg;
17830 	ASSERT(un != NULL);
17831 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17832 
17833 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
17834 
17835 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
17836 }
17837 
17838 
17839 /*
17840  *    Function: sd_start_stop_unit_task
17841  *
17842  * Description: Recovery procedure when a drive is spun down.
17843  *
17844  *   Arguments: arg - pointer to associated softstate struct.
17845  *
17846  *     Context: Executes in a taskq() thread context
17847  */
17848 
17849 static void
17850 sd_start_stop_unit_task(void *arg)
17851 {
17852 	struct sd_lun	*un = arg;
17853 
17854 	ASSERT(un != NULL);
17855 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17856 
17857 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
17858 
17859 	/*
17860 	 * Some unformatted drives report not ready error, no need to
17861 	 * restart if format has been initiated.
17862 	 */
17863 	mutex_enter(SD_MUTEX(un));
17864 	if (un->un_f_format_in_progress == TRUE) {
17865 		mutex_exit(SD_MUTEX(un));
17866 		return;
17867 	}
17868 	mutex_exit(SD_MUTEX(un));
17869 
17870 	/*
17871 	 * When a START STOP command is issued from here, it is part of a
17872 	 * failure recovery operation and must be issued before any other
17873 	 * commands, including any pending retries. Thus it must be sent
17874 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
17875 	 * succeeds or not, we will start I/O after the attempt.
17876 	 */
17877 	(void) sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
17878 	    SD_PATH_DIRECT_PRIORITY);
17879 
17880 	/*
17881 	 * The above call blocks until the START_STOP_UNIT command completes.
17882 	 * Now that it has completed, we must re-try the original IO that
17883 	 * received the NOT READY condition in the first place. There are
17884 	 * three possible conditions here:
17885 	 *
17886 	 *  (1) The original IO is on un_retry_bp.
17887 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
17888 	 *	is NULL.
17889 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
17890 	 *	points to some other, unrelated bp.
17891 	 *
17892 	 * For each case, we must call sd_start_cmds() with un_retry_bp
17893 	 * as the argument. If un_retry_bp is NULL, this will initiate
17894 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
17895 	 * then this will process the bp on un_retry_bp. That may or may not
17896 	 * be the original IO, but that does not matter: the important thing
17897 	 * is to keep the IO processing going at this point.
17898 	 *
17899 	 * Note: This is a very specific error recovery sequence associated
17900 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
17901 	 * serialize the I/O with completion of the spin-up.
17902 	 */
17903 	mutex_enter(SD_MUTEX(un));
17904 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17905 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
17906 	    un, un->un_retry_bp);
17907 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
17908 	sd_start_cmds(un, un->un_retry_bp);
17909 	mutex_exit(SD_MUTEX(un));
17910 
17911 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
17912 }
17913 
17914 
17915 /*
17916  *    Function: sd_send_scsi_INQUIRY
17917  *
17918  * Description: Issue the scsi INQUIRY command.
17919  *
17920  *   Arguments: un
17921  *		bufaddr
17922  *		buflen
17923  *		evpd
17924  *		page_code
17925  *		page_length
17926  *
17927  * Return Code: 0   - Success
17928  *		errno return code from sd_send_scsi_cmd()
17929  *
17930  *     Context: Can sleep. Does not return until command is completed.
17931  */
17932 
17933 static int
17934 sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr, size_t buflen,
17935 	uchar_t evpd, uchar_t page_code, size_t *residp)
17936 {
17937 	union scsi_cdb		cdb;
17938 	struct uscsi_cmd	ucmd_buf;
17939 	int			status;
17940 
17941 	ASSERT(un != NULL);
17942 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17943 	ASSERT(bufaddr != NULL);
17944 
17945 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
17946 
17947 	bzero(&cdb, sizeof (cdb));
17948 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17949 	bzero(bufaddr, buflen);
17950 
17951 	cdb.scc_cmd = SCMD_INQUIRY;
17952 	cdb.cdb_opaque[1] = evpd;
17953 	cdb.cdb_opaque[2] = page_code;
17954 	FORMG0COUNT(&cdb, buflen);
17955 
17956 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17957 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
17958 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
17959 	ucmd_buf.uscsi_buflen	= buflen;
17960 	ucmd_buf.uscsi_rqbuf	= NULL;
17961 	ucmd_buf.uscsi_rqlen	= 0;
17962 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
17963 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
17964 
17965 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17966 	    UIO_SYSSPACE, SD_PATH_DIRECT);
17967 
17968 	if ((status == 0) && (residp != NULL)) {
17969 		*residp = ucmd_buf.uscsi_resid;
17970 	}
17971 
17972 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
17973 
17974 	return (status);
17975 }
17976 
17977 
17978 /*
17979  *    Function: sd_send_scsi_TEST_UNIT_READY
17980  *
17981  * Description: Issue the scsi TEST UNIT READY command.
17982  *		This routine can be told to set the flag USCSI_DIAGNOSE to
17983  *		prevent retrying failed commands. Use this when the intent
17984  *		is either to check for device readiness, to clear a Unit
17985  *		Attention, or to clear any outstanding sense data.
17986  *		However under specific conditions the expected behavior
17987  *		is for retries to bring a device ready, so use the flag
17988  *		with caution.
17989  *
17990  *   Arguments: un
17991  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
17992  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
17993  *			0: dont check for media present, do retries on cmd.
17994  *
17995  * Return Code: 0   - Success
17996  *		EIO - IO error
17997  *		EACCES - Reservation conflict detected
17998  *		ENXIO  - Not Ready, medium not present
17999  *		errno return code from sd_send_scsi_cmd()
18000  *
18001  *     Context: Can sleep. Does not return until command is completed.
18002  */
18003 
18004 static int
18005 sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag)
18006 {
18007 	struct	scsi_extended_sense	sense_buf;
18008 	union scsi_cdb		cdb;
18009 	struct uscsi_cmd	ucmd_buf;
18010 	int			status;
18011 
18012 	ASSERT(un != NULL);
18013 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18014 
18015 	SD_TRACE(SD_LOG_IO, un,
18016 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
18017 
18018 	/*
18019 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
18020 	 * timeouts when they receive a TUR and the queue is not empty. Check
18021 	 * the configuration flag set during attach (indicating the drive has
18022 	 * this firmware bug) and un_ncmds_in_transport before issuing the
18023 	 * TUR. If there are
18024 	 * pending commands return success, this is a bit arbitrary but is ok
18025 	 * for non-removables (i.e. the eliteI disks) and non-clustering
18026 	 * configurations.
18027 	 */
18028 	if (un->un_f_cfg_tur_check == TRUE) {
18029 		mutex_enter(SD_MUTEX(un));
18030 		if (un->un_ncmds_in_transport != 0) {
18031 			mutex_exit(SD_MUTEX(un));
18032 			return (0);
18033 		}
18034 		mutex_exit(SD_MUTEX(un));
18035 	}
18036 
18037 	bzero(&cdb, sizeof (cdb));
18038 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18039 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18040 
18041 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
18042 
18043 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18044 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
18045 	ucmd_buf.uscsi_bufaddr	= NULL;
18046 	ucmd_buf.uscsi_buflen	= 0;
18047 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18048 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18049 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
18050 
18051 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
18052 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
18053 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
18054 	}
18055 	ucmd_buf.uscsi_timeout	= 60;
18056 
18057 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18058 	    UIO_SYSSPACE, ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT :
18059 	    SD_PATH_STANDARD));
18060 
18061 	switch (status) {
18062 	case 0:
18063 		break;	/* Success! */
18064 	case EIO:
18065 		switch (ucmd_buf.uscsi_status) {
18066 		case STATUS_RESERVATION_CONFLICT:
18067 			status = EACCES;
18068 			break;
18069 		case STATUS_CHECK:
18070 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
18071 				break;
18072 			}
18073 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18074 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
18075 				KEY_NOT_READY) &&
18076 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
18077 				status = ENXIO;
18078 			}
18079 			break;
18080 		default:
18081 			break;
18082 		}
18083 		break;
18084 	default:
18085 		break;
18086 	}
18087 
18088 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
18089 
18090 	return (status);
18091 }
18092 
18093 
18094 /*
18095  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
18096  *
18097  * Description: Issue the scsi PERSISTENT RESERVE IN command.
18098  *
18099  *   Arguments: un
18100  *
18101  * Return Code: 0   - Success
18102  *		EACCES
18103  *		ENOTSUP
18104  *		errno return code from sd_send_scsi_cmd()
18105  *
18106  *     Context: Can sleep. Does not return until command is completed.
18107  */
18108 
18109 static int
18110 sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un, uchar_t  usr_cmd,
18111 	uint16_t data_len, uchar_t *data_bufp)
18112 {
18113 	struct scsi_extended_sense	sense_buf;
18114 	union scsi_cdb		cdb;
18115 	struct uscsi_cmd	ucmd_buf;
18116 	int			status;
18117 	int			no_caller_buf = FALSE;
18118 
18119 	ASSERT(un != NULL);
18120 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18121 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
18122 
18123 	SD_TRACE(SD_LOG_IO, un,
18124 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
18125 
18126 	bzero(&cdb, sizeof (cdb));
18127 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18128 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18129 	if (data_bufp == NULL) {
18130 		/* Allocate a default buf if the caller did not give one */
18131 		ASSERT(data_len == 0);
18132 		data_len  = MHIOC_RESV_KEY_SIZE;
18133 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
18134 		no_caller_buf = TRUE;
18135 	}
18136 
18137 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
18138 	cdb.cdb_opaque[1] = usr_cmd;
18139 	FORMG1COUNT(&cdb, data_len);
18140 
18141 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18142 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
18143 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
18144 	ucmd_buf.uscsi_buflen	= data_len;
18145 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18146 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18147 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
18148 	ucmd_buf.uscsi_timeout	= 60;
18149 
18150 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18151 	    UIO_SYSSPACE, SD_PATH_STANDARD);
18152 
18153 	switch (status) {
18154 	case 0:
18155 		break;	/* Success! */
18156 	case EIO:
18157 		switch (ucmd_buf.uscsi_status) {
18158 		case STATUS_RESERVATION_CONFLICT:
18159 			status = EACCES;
18160 			break;
18161 		case STATUS_CHECK:
18162 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18163 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
18164 				KEY_ILLEGAL_REQUEST)) {
18165 				status = ENOTSUP;
18166 			}
18167 			break;
18168 		default:
18169 			break;
18170 		}
18171 		break;
18172 	default:
18173 		break;
18174 	}
18175 
18176 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
18177 
18178 	if (no_caller_buf == TRUE) {
18179 		kmem_free(data_bufp, data_len);
18180 	}
18181 
18182 	return (status);
18183 }
18184 
18185 
18186 /*
18187  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
18188  *
18189  * Description: This routine is the driver entry point for handling CD-ROM
18190  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
18191  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
18192  *		device.
18193  *
18194  *   Arguments: un  -   Pointer to soft state struct for the target.
18195  *		usr_cmd SCSI-3 reservation facility command (one of
18196  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
18197  *			SD_SCSI3_PREEMPTANDABORT)
18198  *		usr_bufp - user provided pointer register, reserve descriptor or
18199  *			preempt and abort structure (mhioc_register_t,
18200  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
18201  *
18202  * Return Code: 0   - Success
18203  *		EACCES
18204  *		ENOTSUP
18205  *		errno return code from sd_send_scsi_cmd()
18206  *
18207  *     Context: Can sleep. Does not return until command is completed.
18208  */
18209 
18210 static int
18211 sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un, uchar_t usr_cmd,
18212 	uchar_t	*usr_bufp)
18213 {
18214 	struct scsi_extended_sense	sense_buf;
18215 	union scsi_cdb		cdb;
18216 	struct uscsi_cmd	ucmd_buf;
18217 	int			status;
18218 	uchar_t			data_len = sizeof (sd_prout_t);
18219 	sd_prout_t		*prp;
18220 
18221 	ASSERT(un != NULL);
18222 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18223 	ASSERT(data_len == 24);	/* required by scsi spec */
18224 
18225 	SD_TRACE(SD_LOG_IO, un,
18226 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
18227 
18228 	if (usr_bufp == NULL) {
18229 		return (EINVAL);
18230 	}
18231 
18232 	bzero(&cdb, sizeof (cdb));
18233 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18234 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18235 	prp = kmem_zalloc(data_len, KM_SLEEP);
18236 
18237 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
18238 	cdb.cdb_opaque[1] = usr_cmd;
18239 	FORMG1COUNT(&cdb, data_len);
18240 
18241 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18242 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
18243 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
18244 	ucmd_buf.uscsi_buflen	= data_len;
18245 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18246 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18247 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
18248 	ucmd_buf.uscsi_timeout	= 60;
18249 
18250 	switch (usr_cmd) {
18251 	case SD_SCSI3_REGISTER: {
18252 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
18253 
18254 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
18255 		bcopy(ptr->newkey.key, prp->service_key,
18256 		    MHIOC_RESV_KEY_SIZE);
18257 		prp->aptpl = ptr->aptpl;
18258 		break;
18259 	}
18260 	case SD_SCSI3_RESERVE:
18261 	case SD_SCSI3_RELEASE: {
18262 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
18263 
18264 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
18265 		prp->scope_address = BE_32(ptr->scope_specific_addr);
18266 		cdb.cdb_opaque[2] = ptr->type;
18267 		break;
18268 	}
18269 	case SD_SCSI3_PREEMPTANDABORT: {
18270 		mhioc_preemptandabort_t *ptr =
18271 		    (mhioc_preemptandabort_t *)usr_bufp;
18272 
18273 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
18274 		bcopy(ptr->victim_key.key, prp->service_key,
18275 		    MHIOC_RESV_KEY_SIZE);
18276 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
18277 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
18278 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
18279 		break;
18280 	}
18281 	case SD_SCSI3_REGISTERANDIGNOREKEY:
18282 	{
18283 		mhioc_registerandignorekey_t *ptr;
18284 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
18285 		bcopy(ptr->newkey.key,
18286 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
18287 		prp->aptpl = ptr->aptpl;
18288 		break;
18289 	}
18290 	default:
18291 		ASSERT(FALSE);
18292 		break;
18293 	}
18294 
18295 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18296 	    UIO_SYSSPACE, SD_PATH_STANDARD);
18297 
18298 	switch (status) {
18299 	case 0:
18300 		break;	/* Success! */
18301 	case EIO:
18302 		switch (ucmd_buf.uscsi_status) {
18303 		case STATUS_RESERVATION_CONFLICT:
18304 			status = EACCES;
18305 			break;
18306 		case STATUS_CHECK:
18307 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18308 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
18309 				KEY_ILLEGAL_REQUEST)) {
18310 				status = ENOTSUP;
18311 			}
18312 			break;
18313 		default:
18314 			break;
18315 		}
18316 		break;
18317 	default:
18318 		break;
18319 	}
18320 
18321 	kmem_free(prp, data_len);
18322 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
18323 	return (status);
18324 }
18325 
18326 
18327 /*
18328  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
18329  *
18330  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
18331  *
18332  *   Arguments: un - pointer to the target's soft state struct
18333  *
18334  * Return Code: 0 - success
18335  *		errno-type error code
18336  *
18337  *     Context: kernel thread context only.
18338  */
18339 
18340 static int
18341 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
18342 {
18343 	struct sd_uscsi_info	*uip;
18344 	struct uscsi_cmd	*uscmd;
18345 	union scsi_cdb		*cdb;
18346 	struct buf		*bp;
18347 	int			rval = 0;
18348 
18349 	SD_TRACE(SD_LOG_IO, un,
18350 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
18351 
18352 	ASSERT(un != NULL);
18353 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18354 
18355 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
18356 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
18357 
18358 	/*
18359 	 * First get some memory for the uscsi_cmd struct and cdb
18360 	 * and initialize for SYNCHRONIZE_CACHE cmd.
18361 	 */
18362 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
18363 	uscmd->uscsi_cdblen = CDB_GROUP1;
18364 	uscmd->uscsi_cdb = (caddr_t)cdb;
18365 	uscmd->uscsi_bufaddr = NULL;
18366 	uscmd->uscsi_buflen = 0;
18367 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
18368 	uscmd->uscsi_rqlen = SENSE_LENGTH;
18369 	uscmd->uscsi_rqresid = SENSE_LENGTH;
18370 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
18371 	uscmd->uscsi_timeout = sd_io_time;
18372 
18373 	/*
18374 	 * Allocate an sd_uscsi_info struct and fill it with the info
18375 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
18376 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
18377 	 * since we allocate the buf here in this function, we do not
18378 	 * need to preserve the prior contents of b_private.
18379 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
18380 	 */
18381 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
18382 	uip->ui_flags = SD_PATH_DIRECT;
18383 	uip->ui_cmdp  = uscmd;
18384 
18385 	bp = getrbuf(KM_SLEEP);
18386 	bp->b_private = uip;
18387 
18388 	/*
18389 	 * Setup buffer to carry uscsi request.
18390 	 */
18391 	bp->b_flags  = B_BUSY;
18392 	bp->b_bcount = 0;
18393 	bp->b_blkno  = 0;
18394 
18395 	if (dkc != NULL) {
18396 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
18397 		uip->ui_dkc = *dkc;
18398 	}
18399 
18400 	bp->b_edev = SD_GET_DEV(un);
18401 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
18402 
18403 	(void) sd_uscsi_strategy(bp);
18404 
18405 	/*
18406 	 * If synchronous request, wait for completion
18407 	 * If async just return and let b_iodone callback
18408 	 * cleanup.
18409 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
18410 	 * but it was also incremented in sd_uscsi_strategy(), so
18411 	 * we should be ok.
18412 	 */
18413 	if (dkc == NULL) {
18414 		(void) biowait(bp);
18415 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
18416 	}
18417 
18418 	return (rval);
18419 }
18420 
18421 
18422 static int
18423 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
18424 {
18425 	struct sd_uscsi_info *uip;
18426 	struct uscsi_cmd *uscmd;
18427 	uint8_t *sense_buf;
18428 	struct sd_lun *un;
18429 	int status;
18430 
18431 	uip = (struct sd_uscsi_info *)(bp->b_private);
18432 	ASSERT(uip != NULL);
18433 
18434 	uscmd = uip->ui_cmdp;
18435 	ASSERT(uscmd != NULL);
18436 
18437 	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
18438 	ASSERT(sense_buf != NULL);
18439 
18440 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
18441 	ASSERT(un != NULL);
18442 
18443 	status = geterror(bp);
18444 	switch (status) {
18445 	case 0:
18446 		break;	/* Success! */
18447 	case EIO:
18448 		switch (uscmd->uscsi_status) {
18449 		case STATUS_RESERVATION_CONFLICT:
18450 			/* Ignore reservation conflict */
18451 			status = 0;
18452 			goto done;
18453 
18454 		case STATUS_CHECK:
18455 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
18456 			    (scsi_sense_key(sense_buf) ==
18457 				KEY_ILLEGAL_REQUEST)) {
18458 				/* Ignore Illegal Request error */
18459 				mutex_enter(SD_MUTEX(un));
18460 				un->un_f_sync_cache_supported = FALSE;
18461 				mutex_exit(SD_MUTEX(un));
18462 				status = ENOTSUP;
18463 				goto done;
18464 			}
18465 			break;
18466 		default:
18467 			break;
18468 		}
18469 		/* FALLTHRU */
18470 	default:
18471 		/*
18472 		 * Don't log an error message if this device
18473 		 * has removable media.
18474 		 */
18475 		if (!un->un_f_has_removable_media) {
18476 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18477 			    "SYNCHRONIZE CACHE command failed (%d)\n", status);
18478 		}
18479 		break;
18480 	}
18481 
18482 done:
18483 	if (uip->ui_dkc.dkc_callback != NULL) {
18484 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
18485 	}
18486 
18487 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
18488 	freerbuf(bp);
18489 	kmem_free(uip, sizeof (struct sd_uscsi_info));
18490 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
18491 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
18492 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
18493 
18494 	return (status);
18495 }
18496 
18497 
18498 /*
18499  *    Function: sd_send_scsi_GET_CONFIGURATION
18500  *
18501  * Description: Issues the get configuration command to the device.
18502  *		Called from sd_check_for_writable_cd & sd_get_media_info
18503  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
18504  *   Arguments: un
18505  *		ucmdbuf
18506  *		rqbuf
18507  *		rqbuflen
18508  *		bufaddr
18509  *		buflen
18510  *
18511  * Return Code: 0   - Success
18512  *		errno return code from sd_send_scsi_cmd()
18513  *
18514  *     Context: Can sleep. Does not return until command is completed.
18515  *
18516  */
18517 
18518 static int
18519 sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un, struct uscsi_cmd *ucmdbuf,
18520 	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen)
18521 {
18522 	char	cdb[CDB_GROUP1];
18523 	int	status;
18524 
18525 	ASSERT(un != NULL);
18526 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18527 	ASSERT(bufaddr != NULL);
18528 	ASSERT(ucmdbuf != NULL);
18529 	ASSERT(rqbuf != NULL);
18530 
18531 	SD_TRACE(SD_LOG_IO, un,
18532 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
18533 
18534 	bzero(cdb, sizeof (cdb));
18535 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
18536 	bzero(rqbuf, rqbuflen);
18537 	bzero(bufaddr, buflen);
18538 
18539 	/*
18540 	 * Set up cdb field for the get configuration command.
18541 	 */
18542 	cdb[0] = SCMD_GET_CONFIGURATION;
18543 	cdb[1] = 0x02;  /* Requested Type */
18544 	cdb[8] = SD_PROFILE_HEADER_LEN;
18545 	ucmdbuf->uscsi_cdb = cdb;
18546 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
18547 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
18548 	ucmdbuf->uscsi_buflen = buflen;
18549 	ucmdbuf->uscsi_timeout = sd_io_time;
18550 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
18551 	ucmdbuf->uscsi_rqlen = rqbuflen;
18552 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
18553 
18554 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, FKIOCTL,
18555 	    UIO_SYSSPACE, SD_PATH_STANDARD);
18556 
18557 	switch (status) {
18558 	case 0:
18559 		break;  /* Success! */
18560 	case EIO:
18561 		switch (ucmdbuf->uscsi_status) {
18562 		case STATUS_RESERVATION_CONFLICT:
18563 			status = EACCES;
18564 			break;
18565 		default:
18566 			break;
18567 		}
18568 		break;
18569 	default:
18570 		break;
18571 	}
18572 
18573 	if (status == 0) {
18574 		SD_DUMP_MEMORY(un, SD_LOG_IO,
18575 		    "sd_send_scsi_GET_CONFIGURATION: data",
18576 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
18577 	}
18578 
18579 	SD_TRACE(SD_LOG_IO, un,
18580 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
18581 
18582 	return (status);
18583 }
18584 
18585 /*
18586  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
18587  *
18588  * Description: Issues the get configuration command to the device to
18589  *              retrieve a specfic feature. Called from
18590  *		sd_check_for_writable_cd & sd_set_mmc_caps.
18591  *   Arguments: un
18592  *              ucmdbuf
18593  *              rqbuf
18594  *              rqbuflen
18595  *              bufaddr
18596  *              buflen
18597  *		feature
18598  *
18599  * Return Code: 0   - Success
18600  *              errno return code from sd_send_scsi_cmd()
18601  *
18602  *     Context: Can sleep. Does not return until command is completed.
18603  *
18604  */
18605 static int
18606 sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
18607 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
18608 	uchar_t *bufaddr, uint_t buflen, char feature)
18609 {
18610 	char    cdb[CDB_GROUP1];
18611 	int	status;
18612 
18613 	ASSERT(un != NULL);
18614 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18615 	ASSERT(bufaddr != NULL);
18616 	ASSERT(ucmdbuf != NULL);
18617 	ASSERT(rqbuf != NULL);
18618 
18619 	SD_TRACE(SD_LOG_IO, un,
18620 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
18621 
18622 	bzero(cdb, sizeof (cdb));
18623 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
18624 	bzero(rqbuf, rqbuflen);
18625 	bzero(bufaddr, buflen);
18626 
18627 	/*
18628 	 * Set up cdb field for the get configuration command.
18629 	 */
18630 	cdb[0] = SCMD_GET_CONFIGURATION;
18631 	cdb[1] = 0x02;  /* Requested Type */
18632 	cdb[3] = feature;
18633 	cdb[8] = buflen;
18634 	ucmdbuf->uscsi_cdb = cdb;
18635 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
18636 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
18637 	ucmdbuf->uscsi_buflen = buflen;
18638 	ucmdbuf->uscsi_timeout = sd_io_time;
18639 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
18640 	ucmdbuf->uscsi_rqlen = rqbuflen;
18641 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
18642 
18643 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, FKIOCTL,
18644 	    UIO_SYSSPACE, SD_PATH_STANDARD);
18645 
18646 	switch (status) {
18647 	case 0:
18648 		break;  /* Success! */
18649 	case EIO:
18650 		switch (ucmdbuf->uscsi_status) {
18651 		case STATUS_RESERVATION_CONFLICT:
18652 			status = EACCES;
18653 			break;
18654 		default:
18655 			break;
18656 		}
18657 		break;
18658 	default:
18659 		break;
18660 	}
18661 
18662 	if (status == 0) {
18663 		SD_DUMP_MEMORY(un, SD_LOG_IO,
18664 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
18665 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
18666 	}
18667 
18668 	SD_TRACE(SD_LOG_IO, un,
18669 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
18670 
18671 	return (status);
18672 }
18673 
18674 
18675 /*
18676  *    Function: sd_send_scsi_MODE_SENSE
18677  *
18678  * Description: Utility function for issuing a scsi MODE SENSE command.
18679  *		Note: This routine uses a consistent implementation for Group0,
18680  *		Group1, and Group2 commands across all platforms. ATAPI devices
18681  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
18682  *
18683  *   Arguments: un - pointer to the softstate struct for the target.
18684  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
18685  *			  CDB_GROUP[1|2] (10 byte).
18686  *		bufaddr - buffer for page data retrieved from the target.
18687  *		buflen - size of page to be retrieved.
18688  *		page_code - page code of data to be retrieved from the target.
18689  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18690  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18691  *			to use the USCSI "direct" chain and bypass the normal
18692  *			command waitq.
18693  *
18694  * Return Code: 0   - Success
18695  *		errno return code from sd_send_scsi_cmd()
18696  *
18697  *     Context: Can sleep. Does not return until command is completed.
18698  */
18699 
18700 static int
18701 sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
18702 	size_t buflen,  uchar_t page_code, int path_flag)
18703 {
18704 	struct	scsi_extended_sense	sense_buf;
18705 	union scsi_cdb		cdb;
18706 	struct uscsi_cmd	ucmd_buf;
18707 	int			status;
18708 	int			headlen;
18709 
18710 	ASSERT(un != NULL);
18711 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18712 	ASSERT(bufaddr != NULL);
18713 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
18714 	    (cdbsize == CDB_GROUP2));
18715 
18716 	SD_TRACE(SD_LOG_IO, un,
18717 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
18718 
18719 	bzero(&cdb, sizeof (cdb));
18720 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18721 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18722 	bzero(bufaddr, buflen);
18723 
18724 	if (cdbsize == CDB_GROUP0) {
18725 		cdb.scc_cmd = SCMD_MODE_SENSE;
18726 		cdb.cdb_opaque[2] = page_code;
18727 		FORMG0COUNT(&cdb, buflen);
18728 		headlen = MODE_HEADER_LENGTH;
18729 	} else {
18730 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
18731 		cdb.cdb_opaque[2] = page_code;
18732 		FORMG1COUNT(&cdb, buflen);
18733 		headlen = MODE_HEADER_LENGTH_GRP2;
18734 	}
18735 
18736 	ASSERT(headlen <= buflen);
18737 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
18738 
18739 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18740 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
18741 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
18742 	ucmd_buf.uscsi_buflen	= buflen;
18743 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18744 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18745 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
18746 	ucmd_buf.uscsi_timeout	= 60;
18747 
18748 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18749 	    UIO_SYSSPACE, path_flag);
18750 
18751 	switch (status) {
18752 	case 0:
18753 		/*
18754 		 * sr_check_wp() uses 0x3f page code and check the header of
18755 		 * mode page to determine if target device is write-protected.
18756 		 * But some USB devices return 0 bytes for 0x3f page code. For
18757 		 * this case, make sure that mode page header is returned at
18758 		 * least.
18759 		 */
18760 		if (buflen - ucmd_buf.uscsi_resid <  headlen)
18761 			status = EIO;
18762 		break;	/* Success! */
18763 	case EIO:
18764 		switch (ucmd_buf.uscsi_status) {
18765 		case STATUS_RESERVATION_CONFLICT:
18766 			status = EACCES;
18767 			break;
18768 		default:
18769 			break;
18770 		}
18771 		break;
18772 	default:
18773 		break;
18774 	}
18775 
18776 	if (status == 0) {
18777 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
18778 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
18779 	}
18780 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
18781 
18782 	return (status);
18783 }
18784 
18785 
18786 /*
18787  *    Function: sd_send_scsi_MODE_SELECT
18788  *
18789  * Description: Utility function for issuing a scsi MODE SELECT command.
18790  *		Note: This routine uses a consistent implementation for Group0,
18791  *		Group1, and Group2 commands across all platforms. ATAPI devices
18792  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
18793  *
18794  *   Arguments: un - pointer to the softstate struct for the target.
18795  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
18796  *			  CDB_GROUP[1|2] (10 byte).
18797  *		bufaddr - buffer for page data retrieved from the target.
18798  *		buflen - size of page to be retrieved.
18799  *		save_page - boolean to determin if SP bit should be set.
18800  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18801  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18802  *			to use the USCSI "direct" chain and bypass the normal
18803  *			command waitq.
18804  *
18805  * Return Code: 0   - Success
18806  *		errno return code from sd_send_scsi_cmd()
18807  *
18808  *     Context: Can sleep. Does not return until command is completed.
18809  */
18810 
18811 static int
18812 sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
18813 	size_t buflen,  uchar_t save_page, int path_flag)
18814 {
18815 	struct	scsi_extended_sense	sense_buf;
18816 	union scsi_cdb		cdb;
18817 	struct uscsi_cmd	ucmd_buf;
18818 	int			status;
18819 
18820 	ASSERT(un != NULL);
18821 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18822 	ASSERT(bufaddr != NULL);
18823 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
18824 	    (cdbsize == CDB_GROUP2));
18825 
18826 	SD_TRACE(SD_LOG_IO, un,
18827 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
18828 
18829 	bzero(&cdb, sizeof (cdb));
18830 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18831 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18832 
18833 	/* Set the PF bit for many third party drives */
18834 	cdb.cdb_opaque[1] = 0x10;
18835 
18836 	/* Set the savepage(SP) bit if given */
18837 	if (save_page == SD_SAVE_PAGE) {
18838 		cdb.cdb_opaque[1] |= 0x01;
18839 	}
18840 
18841 	if (cdbsize == CDB_GROUP0) {
18842 		cdb.scc_cmd = SCMD_MODE_SELECT;
18843 		FORMG0COUNT(&cdb, buflen);
18844 	} else {
18845 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
18846 		FORMG1COUNT(&cdb, buflen);
18847 	}
18848 
18849 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
18850 
18851 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18852 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
18853 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
18854 	ucmd_buf.uscsi_buflen	= buflen;
18855 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18856 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18857 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
18858 	ucmd_buf.uscsi_timeout	= 60;
18859 
18860 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18861 	    UIO_SYSSPACE, path_flag);
18862 
18863 	switch (status) {
18864 	case 0:
18865 		break;	/* Success! */
18866 	case EIO:
18867 		switch (ucmd_buf.uscsi_status) {
18868 		case STATUS_RESERVATION_CONFLICT:
18869 			status = EACCES;
18870 			break;
18871 		default:
18872 			break;
18873 		}
18874 		break;
18875 	default:
18876 		break;
18877 	}
18878 
18879 	if (status == 0) {
18880 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
18881 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
18882 	}
18883 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
18884 
18885 	return (status);
18886 }
18887 
18888 
18889 /*
18890  *    Function: sd_send_scsi_RDWR
18891  *
18892  * Description: Issue a scsi READ or WRITE command with the given parameters.
18893  *
18894  *   Arguments: un:      Pointer to the sd_lun struct for the target.
18895  *		cmd:	 SCMD_READ or SCMD_WRITE
18896  *		bufaddr: Address of caller's buffer to receive the RDWR data
18897  *		buflen:  Length of caller's buffer receive the RDWR data.
18898  *		start_block: Block number for the start of the RDWR operation.
18899  *			 (Assumes target-native block size.)
18900  *		residp:  Pointer to variable to receive the redisual of the
18901  *			 RDWR operation (may be NULL of no residual requested).
18902  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18903  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18904  *			to use the USCSI "direct" chain and bypass the normal
18905  *			command waitq.
18906  *
18907  * Return Code: 0   - Success
18908  *		errno return code from sd_send_scsi_cmd()
18909  *
18910  *     Context: Can sleep. Does not return until command is completed.
18911  */
18912 
18913 static int
18914 sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
18915 	size_t buflen, daddr_t start_block, int path_flag)
18916 {
18917 	struct	scsi_extended_sense	sense_buf;
18918 	union scsi_cdb		cdb;
18919 	struct uscsi_cmd	ucmd_buf;
18920 	uint32_t		block_count;
18921 	int			status;
18922 	int			cdbsize;
18923 	uchar_t			flag;
18924 
18925 	ASSERT(un != NULL);
18926 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18927 	ASSERT(bufaddr != NULL);
18928 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
18929 
18930 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
18931 
18932 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
18933 		return (EINVAL);
18934 	}
18935 
18936 	mutex_enter(SD_MUTEX(un));
18937 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
18938 	mutex_exit(SD_MUTEX(un));
18939 
18940 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
18941 
18942 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
18943 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
18944 	    bufaddr, buflen, start_block, block_count);
18945 
18946 	bzero(&cdb, sizeof (cdb));
18947 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18948 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18949 
18950 	/* Compute CDB size to use */
18951 	if (start_block > 0xffffffff)
18952 		cdbsize = CDB_GROUP4;
18953 	else if ((start_block & 0xFFE00000) ||
18954 	    (un->un_f_cfg_is_atapi == TRUE))
18955 		cdbsize = CDB_GROUP1;
18956 	else
18957 		cdbsize = CDB_GROUP0;
18958 
18959 	switch (cdbsize) {
18960 	case CDB_GROUP0:	/* 6-byte CDBs */
18961 		cdb.scc_cmd = cmd;
18962 		FORMG0ADDR(&cdb, start_block);
18963 		FORMG0COUNT(&cdb, block_count);
18964 		break;
18965 	case CDB_GROUP1:	/* 10-byte CDBs */
18966 		cdb.scc_cmd = cmd | SCMD_GROUP1;
18967 		FORMG1ADDR(&cdb, start_block);
18968 		FORMG1COUNT(&cdb, block_count);
18969 		break;
18970 	case CDB_GROUP4:	/* 16-byte CDBs */
18971 		cdb.scc_cmd = cmd | SCMD_GROUP4;
18972 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
18973 		FORMG4COUNT(&cdb, block_count);
18974 		break;
18975 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
18976 	default:
18977 		/* All others reserved */
18978 		return (EINVAL);
18979 	}
18980 
18981 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
18982 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
18983 
18984 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18985 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
18986 	ucmd_buf.uscsi_bufaddr	= bufaddr;
18987 	ucmd_buf.uscsi_buflen	= buflen;
18988 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18989 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18990 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
18991 	ucmd_buf.uscsi_timeout	= 60;
18992 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18993 	    UIO_SYSSPACE, path_flag);
18994 	switch (status) {
18995 	case 0:
18996 		break;	/* Success! */
18997 	case EIO:
18998 		switch (ucmd_buf.uscsi_status) {
18999 		case STATUS_RESERVATION_CONFLICT:
19000 			status = EACCES;
19001 			break;
19002 		default:
19003 			break;
19004 		}
19005 		break;
19006 	default:
19007 		break;
19008 	}
19009 
19010 	if (status == 0) {
19011 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
19012 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
19013 	}
19014 
19015 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
19016 
19017 	return (status);
19018 }
19019 
19020 
19021 /*
19022  *    Function: sd_send_scsi_LOG_SENSE
19023  *
19024  * Description: Issue a scsi LOG_SENSE command with the given parameters.
19025  *
19026  *   Arguments: un:      Pointer to the sd_lun struct for the target.
19027  *
19028  * Return Code: 0   - Success
19029  *		errno return code from sd_send_scsi_cmd()
19030  *
19031  *     Context: Can sleep. Does not return until command is completed.
19032  */
19033 
19034 static int
19035 sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr, uint16_t buflen,
19036 	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
19037 	int path_flag)
19038 
19039 {
19040 	struct	scsi_extended_sense	sense_buf;
19041 	union scsi_cdb		cdb;
19042 	struct uscsi_cmd	ucmd_buf;
19043 	int			status;
19044 
19045 	ASSERT(un != NULL);
19046 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19047 
19048 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
19049 
19050 	bzero(&cdb, sizeof (cdb));
19051 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19052 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19053 
19054 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
19055 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
19056 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
19057 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
19058 	FORMG1COUNT(&cdb, buflen);
19059 
19060 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19061 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19062 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19063 	ucmd_buf.uscsi_buflen	= buflen;
19064 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19065 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19066 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19067 	ucmd_buf.uscsi_timeout	= 60;
19068 
19069 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
19070 	    UIO_SYSSPACE, path_flag);
19071 
19072 	switch (status) {
19073 	case 0:
19074 		break;
19075 	case EIO:
19076 		switch (ucmd_buf.uscsi_status) {
19077 		case STATUS_RESERVATION_CONFLICT:
19078 			status = EACCES;
19079 			break;
19080 		case STATUS_CHECK:
19081 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19082 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
19083 				KEY_ILLEGAL_REQUEST) &&
19084 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
19085 				/*
19086 				 * ASC 0x24: INVALID FIELD IN CDB
19087 				 */
19088 				switch (page_code) {
19089 				case START_STOP_CYCLE_PAGE:
19090 					/*
19091 					 * The start stop cycle counter is
19092 					 * implemented as page 0x31 in earlier
19093 					 * generation disks. In new generation
19094 					 * disks the start stop cycle counter is
19095 					 * implemented as page 0xE. To properly
19096 					 * handle this case if an attempt for
19097 					 * log page 0xE is made and fails we
19098 					 * will try again using page 0x31.
19099 					 *
19100 					 * Network storage BU committed to
19101 					 * maintain the page 0x31 for this
19102 					 * purpose and will not have any other
19103 					 * page implemented with page code 0x31
19104 					 * until all disks transition to the
19105 					 * standard page.
19106 					 */
19107 					mutex_enter(SD_MUTEX(un));
19108 					un->un_start_stop_cycle_page =
19109 					    START_STOP_CYCLE_VU_PAGE;
19110 					cdb.cdb_opaque[2] =
19111 					    (char)(page_control << 6) |
19112 					    un->un_start_stop_cycle_page;
19113 					mutex_exit(SD_MUTEX(un));
19114 					status = sd_send_scsi_cmd(
19115 					    SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
19116 					    UIO_SYSSPACE, path_flag);
19117 
19118 					break;
19119 				case TEMPERATURE_PAGE:
19120 					status = ENOTTY;
19121 					break;
19122 				default:
19123 					break;
19124 				}
19125 			}
19126 			break;
19127 		default:
19128 			break;
19129 		}
19130 		break;
19131 	default:
19132 		break;
19133 	}
19134 
19135 	if (status == 0) {
19136 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
19137 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
19138 	}
19139 
19140 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
19141 
19142 	return (status);
19143 }
19144 
19145 
19146 /*
19147  *    Function: sdioctl
19148  *
19149  * Description: Driver's ioctl(9e) entry point function.
19150  *
19151  *   Arguments: dev     - device number
19152  *		cmd     - ioctl operation to be performed
19153  *		arg     - user argument, contains data to be set or reference
19154  *			  parameter for get
19155  *		flag    - bit flag, indicating open settings, 32/64 bit type
19156  *		cred_p  - user credential pointer
19157  *		rval_p  - calling process return value (OPT)
19158  *
19159  * Return Code: EINVAL
19160  *		ENOTTY
19161  *		ENXIO
19162  *		EIO
19163  *		EFAULT
19164  *		ENOTSUP
19165  *		EPERM
19166  *
19167  *     Context: Called from the device switch at normal priority.
19168  */
19169 
19170 static int
19171 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
19172 {
19173 	struct sd_lun	*un = NULL;
19174 	int		err = 0;
19175 	int		i = 0;
19176 	cred_t		*cr;
19177 	int		tmprval = EINVAL;
19178 	int 		is_valid;
19179 
19180 	/*
19181 	 * All device accesses go thru sdstrategy where we check on suspend
19182 	 * status
19183 	 */
19184 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
19185 		return (ENXIO);
19186 	}
19187 
19188 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19189 
19190 
19191 	is_valid = SD_IS_VALID_LABEL(un);
19192 
19193 	/*
19194 	 * Moved this wait from sd_uscsi_strategy to here for
19195 	 * reasons of deadlock prevention. Internal driver commands,
19196 	 * specifically those to change a devices power level, result
19197 	 * in a call to sd_uscsi_strategy.
19198 	 */
19199 	mutex_enter(SD_MUTEX(un));
19200 	while ((un->un_state == SD_STATE_SUSPENDED) ||
19201 	    (un->un_state == SD_STATE_PM_CHANGING)) {
19202 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
19203 	}
19204 	/*
19205 	 * Twiddling the counter here protects commands from now
19206 	 * through to the top of sd_uscsi_strategy. Without the
19207 	 * counter inc. a power down, for example, could get in
19208 	 * after the above check for state is made and before
19209 	 * execution gets to the top of sd_uscsi_strategy.
19210 	 * That would cause problems.
19211 	 */
19212 	un->un_ncmds_in_driver++;
19213 
19214 	if (!is_valid &&
19215 	    (flag & (FNDELAY | FNONBLOCK))) {
19216 		switch (cmd) {
19217 		case DKIOCGGEOM:	/* SD_PATH_DIRECT */
19218 		case DKIOCGVTOC:
19219 		case DKIOCGAPART:
19220 		case DKIOCPARTINFO:
19221 		case DKIOCSGEOM:
19222 		case DKIOCSAPART:
19223 		case DKIOCGETEFI:
19224 		case DKIOCPARTITION:
19225 		case DKIOCSVTOC:
19226 		case DKIOCSETEFI:
19227 		case DKIOCGMBOOT:
19228 		case DKIOCSMBOOT:
19229 		case DKIOCG_PHYGEOM:
19230 		case DKIOCG_VIRTGEOM:
19231 			/* let cmlb handle it */
19232 			goto skip_ready_valid;
19233 
19234 		case CDROMPAUSE:
19235 		case CDROMRESUME:
19236 		case CDROMPLAYMSF:
19237 		case CDROMPLAYTRKIND:
19238 		case CDROMREADTOCHDR:
19239 		case CDROMREADTOCENTRY:
19240 		case CDROMSTOP:
19241 		case CDROMSTART:
19242 		case CDROMVOLCTRL:
19243 		case CDROMSUBCHNL:
19244 		case CDROMREADMODE2:
19245 		case CDROMREADMODE1:
19246 		case CDROMREADOFFSET:
19247 		case CDROMSBLKMODE:
19248 		case CDROMGBLKMODE:
19249 		case CDROMGDRVSPEED:
19250 		case CDROMSDRVSPEED:
19251 		case CDROMCDDA:
19252 		case CDROMCDXA:
19253 		case CDROMSUBCODE:
19254 			if (!ISCD(un)) {
19255 				un->un_ncmds_in_driver--;
19256 				ASSERT(un->un_ncmds_in_driver >= 0);
19257 				mutex_exit(SD_MUTEX(un));
19258 				return (ENOTTY);
19259 			}
19260 			break;
19261 		case FDEJECT:
19262 		case DKIOCEJECT:
19263 		case CDROMEJECT:
19264 			if (!un->un_f_eject_media_supported) {
19265 				un->un_ncmds_in_driver--;
19266 				ASSERT(un->un_ncmds_in_driver >= 0);
19267 				mutex_exit(SD_MUTEX(un));
19268 				return (ENOTTY);
19269 			}
19270 			break;
19271 		case DKIOCFLUSHWRITECACHE:
19272 			mutex_exit(SD_MUTEX(un));
19273 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
19274 			if (err != 0) {
19275 				mutex_enter(SD_MUTEX(un));
19276 				un->un_ncmds_in_driver--;
19277 				ASSERT(un->un_ncmds_in_driver >= 0);
19278 				mutex_exit(SD_MUTEX(un));
19279 				return (EIO);
19280 			}
19281 			mutex_enter(SD_MUTEX(un));
19282 			/* FALLTHROUGH */
19283 		case DKIOCREMOVABLE:
19284 		case DKIOCHOTPLUGGABLE:
19285 		case DKIOCINFO:
19286 		case DKIOCGMEDIAINFO:
19287 		case MHIOCENFAILFAST:
19288 		case MHIOCSTATUS:
19289 		case MHIOCTKOWN:
19290 		case MHIOCRELEASE:
19291 		case MHIOCGRP_INKEYS:
19292 		case MHIOCGRP_INRESV:
19293 		case MHIOCGRP_REGISTER:
19294 		case MHIOCGRP_RESERVE:
19295 		case MHIOCGRP_PREEMPTANDABORT:
19296 		case MHIOCGRP_REGISTERANDIGNOREKEY:
19297 		case CDROMCLOSETRAY:
19298 		case USCSICMD:
19299 			goto skip_ready_valid;
19300 		default:
19301 			break;
19302 		}
19303 
19304 		mutex_exit(SD_MUTEX(un));
19305 		err = sd_ready_and_valid(un);
19306 		mutex_enter(SD_MUTEX(un));
19307 
19308 		if (err != SD_READY_VALID) {
19309 			switch (cmd) {
19310 			case DKIOCSTATE:
19311 			case CDROMGDRVSPEED:
19312 			case CDROMSDRVSPEED:
19313 			case FDEJECT:	/* for eject command */
19314 			case DKIOCEJECT:
19315 			case CDROMEJECT:
19316 			case DKIOCREMOVABLE:
19317 			case DKIOCHOTPLUGGABLE:
19318 				break;
19319 			default:
19320 				if (un->un_f_has_removable_media) {
19321 					err = ENXIO;
19322 				} else {
19323 				/* Do not map SD_RESERVED_BY_OTHERS to EIO */
19324 					if (err == SD_RESERVED_BY_OTHERS) {
19325 						err = EACCES;
19326 					} else {
19327 						err = EIO;
19328 					}
19329 				}
19330 				un->un_ncmds_in_driver--;
19331 				ASSERT(un->un_ncmds_in_driver >= 0);
19332 				mutex_exit(SD_MUTEX(un));
19333 				return (err);
19334 			}
19335 		}
19336 	}
19337 
19338 skip_ready_valid:
19339 	mutex_exit(SD_MUTEX(un));
19340 
19341 	switch (cmd) {
19342 	case DKIOCINFO:
19343 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
19344 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
19345 		break;
19346 
19347 	case DKIOCGMEDIAINFO:
19348 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
19349 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
19350 		break;
19351 
19352 	case DKIOCGGEOM:
19353 	case DKIOCGVTOC:
19354 	case DKIOCGAPART:
19355 	case DKIOCPARTINFO:
19356 	case DKIOCSGEOM:
19357 	case DKIOCSAPART:
19358 	case DKIOCGETEFI:
19359 	case DKIOCPARTITION:
19360 	case DKIOCSVTOC:
19361 	case DKIOCSETEFI:
19362 	case DKIOCGMBOOT:
19363 	case DKIOCSMBOOT:
19364 	case DKIOCG_PHYGEOM:
19365 	case DKIOCG_VIRTGEOM:
19366 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOC %d\n", cmd);
19367 
19368 		/* TUR should spin up */
19369 
19370 		if (un->un_f_has_removable_media)
19371 			err = sd_send_scsi_TEST_UNIT_READY(un,
19372 			    SD_CHECK_FOR_MEDIA);
19373 		else
19374 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
19375 
19376 		if (err != 0)
19377 			break;
19378 
19379 		err = cmlb_ioctl(un->un_cmlbhandle, dev,
19380 		    cmd, arg, flag, cred_p, rval_p, (void *)SD_PATH_DIRECT);
19381 
19382 		if ((err == 0) &&
19383 		    ((cmd == DKIOCSETEFI) ||
19384 		    (un->un_f_pkstats_enabled) &&
19385 		    (cmd == DKIOCSAPART || cmd == DKIOCSVTOC))) {
19386 
19387 			tmprval = cmlb_validate(un->un_cmlbhandle, CMLB_SILENT,
19388 			    (void *)SD_PATH_DIRECT);
19389 			if ((tmprval == 0) && un->un_f_pkstats_enabled) {
19390 				sd_set_pstats(un);
19391 				SD_TRACE(SD_LOG_IO_PARTITION, un,
19392 				    "sd_ioctl: un:0x%p pstats created and "
19393 				    "set\n", un);
19394 			}
19395 		}
19396 
19397 		if ((cmd == DKIOCSVTOC) ||
19398 		    ((cmd == DKIOCSETEFI) && (tmprval == 0))) {
19399 
19400 			mutex_enter(SD_MUTEX(un));
19401 			if (un->un_f_devid_supported &&
19402 			    (un->un_f_opt_fab_devid == TRUE)) {
19403 				if (un->un_devid == NULL) {
19404 					sd_register_devid(un, SD_DEVINFO(un),
19405 					    SD_TARGET_IS_UNRESERVED);
19406 				} else {
19407 					/*
19408 					 * The device id for this disk
19409 					 * has been fabricated. The
19410 					 * device id must be preserved
19411 					 * by writing it back out to
19412 					 * disk.
19413 					 */
19414 					if (sd_write_deviceid(un) != 0) {
19415 						ddi_devid_free(un->un_devid);
19416 						un->un_devid = NULL;
19417 					}
19418 				}
19419 			}
19420 			mutex_exit(SD_MUTEX(un));
19421 		}
19422 
19423 		break;
19424 
19425 	case DKIOCLOCK:
19426 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
19427 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
19428 		    SD_PATH_STANDARD);
19429 		break;
19430 
19431 	case DKIOCUNLOCK:
19432 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
19433 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
19434 		    SD_PATH_STANDARD);
19435 		break;
19436 
19437 	case DKIOCSTATE: {
19438 		enum dkio_state		state;
19439 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
19440 
19441 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
19442 			err = EFAULT;
19443 		} else {
19444 			err = sd_check_media(dev, state);
19445 			if (err == 0) {
19446 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
19447 				    sizeof (int), flag) != 0)
19448 					err = EFAULT;
19449 			}
19450 		}
19451 		break;
19452 	}
19453 
19454 	case DKIOCREMOVABLE:
19455 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
19456 		i = un->un_f_has_removable_media ? 1 : 0;
19457 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
19458 			err = EFAULT;
19459 		} else {
19460 			err = 0;
19461 		}
19462 		break;
19463 
19464 	case DKIOCHOTPLUGGABLE:
19465 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
19466 		i = un->un_f_is_hotpluggable ? 1 : 0;
19467 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
19468 			err = EFAULT;
19469 		} else {
19470 			err = 0;
19471 		}
19472 		break;
19473 
19474 	case DKIOCGTEMPERATURE:
19475 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
19476 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
19477 		break;
19478 
19479 	case MHIOCENFAILFAST:
19480 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
19481 		if ((err = drv_priv(cred_p)) == 0) {
19482 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
19483 		}
19484 		break;
19485 
19486 	case MHIOCTKOWN:
19487 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
19488 		if ((err = drv_priv(cred_p)) == 0) {
19489 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
19490 		}
19491 		break;
19492 
19493 	case MHIOCRELEASE:
19494 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
19495 		if ((err = drv_priv(cred_p)) == 0) {
19496 			err = sd_mhdioc_release(dev);
19497 		}
19498 		break;
19499 
19500 	case MHIOCSTATUS:
19501 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
19502 		if ((err = drv_priv(cred_p)) == 0) {
19503 			switch (sd_send_scsi_TEST_UNIT_READY(un, 0)) {
19504 			case 0:
19505 				err = 0;
19506 				break;
19507 			case EACCES:
19508 				*rval_p = 1;
19509 				err = 0;
19510 				break;
19511 			default:
19512 				err = EIO;
19513 				break;
19514 			}
19515 		}
19516 		break;
19517 
19518 	case MHIOCQRESERVE:
19519 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
19520 		if ((err = drv_priv(cred_p)) == 0) {
19521 			err = sd_reserve_release(dev, SD_RESERVE);
19522 		}
19523 		break;
19524 
19525 	case MHIOCREREGISTERDEVID:
19526 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
19527 		if (drv_priv(cred_p) == EPERM) {
19528 			err = EPERM;
19529 		} else if (!un->un_f_devid_supported) {
19530 			err = ENOTTY;
19531 		} else {
19532 			err = sd_mhdioc_register_devid(dev);
19533 		}
19534 		break;
19535 
19536 	case MHIOCGRP_INKEYS:
19537 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
19538 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
19539 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19540 				err = ENOTSUP;
19541 			} else {
19542 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
19543 				    flag);
19544 			}
19545 		}
19546 		break;
19547 
19548 	case MHIOCGRP_INRESV:
19549 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
19550 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
19551 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19552 				err = ENOTSUP;
19553 			} else {
19554 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
19555 			}
19556 		}
19557 		break;
19558 
19559 	case MHIOCGRP_REGISTER:
19560 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
19561 		if ((err = drv_priv(cred_p)) != EPERM) {
19562 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19563 				err = ENOTSUP;
19564 			} else if (arg != NULL) {
19565 				mhioc_register_t reg;
19566 				if (ddi_copyin((void *)arg, &reg,
19567 				    sizeof (mhioc_register_t), flag) != 0) {
19568 					err = EFAULT;
19569 				} else {
19570 					err =
19571 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19572 					    un, SD_SCSI3_REGISTER,
19573 					    (uchar_t *)&reg);
19574 				}
19575 			}
19576 		}
19577 		break;
19578 
19579 	case MHIOCGRP_RESERVE:
19580 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
19581 		if ((err = drv_priv(cred_p)) != EPERM) {
19582 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19583 				err = ENOTSUP;
19584 			} else if (arg != NULL) {
19585 				mhioc_resv_desc_t resv_desc;
19586 				if (ddi_copyin((void *)arg, &resv_desc,
19587 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
19588 					err = EFAULT;
19589 				} else {
19590 					err =
19591 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19592 					    un, SD_SCSI3_RESERVE,
19593 					    (uchar_t *)&resv_desc);
19594 				}
19595 			}
19596 		}
19597 		break;
19598 
19599 	case MHIOCGRP_PREEMPTANDABORT:
19600 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
19601 		if ((err = drv_priv(cred_p)) != EPERM) {
19602 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19603 				err = ENOTSUP;
19604 			} else if (arg != NULL) {
19605 				mhioc_preemptandabort_t preempt_abort;
19606 				if (ddi_copyin((void *)arg, &preempt_abort,
19607 				    sizeof (mhioc_preemptandabort_t),
19608 				    flag) != 0) {
19609 					err = EFAULT;
19610 				} else {
19611 					err =
19612 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19613 					    un, SD_SCSI3_PREEMPTANDABORT,
19614 					    (uchar_t *)&preempt_abort);
19615 				}
19616 			}
19617 		}
19618 		break;
19619 
19620 	case MHIOCGRP_REGISTERANDIGNOREKEY:
19621 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
19622 		if ((err = drv_priv(cred_p)) != EPERM) {
19623 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19624 				err = ENOTSUP;
19625 			} else if (arg != NULL) {
19626 				mhioc_registerandignorekey_t r_and_i;
19627 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
19628 				    sizeof (mhioc_registerandignorekey_t),
19629 				    flag) != 0) {
19630 					err = EFAULT;
19631 				} else {
19632 					err =
19633 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19634 					    un, SD_SCSI3_REGISTERANDIGNOREKEY,
19635 					    (uchar_t *)&r_and_i);
19636 				}
19637 			}
19638 		}
19639 		break;
19640 
19641 	case USCSICMD:
19642 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
19643 		cr = ddi_get_cred();
19644 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
19645 			err = EPERM;
19646 		} else {
19647 			enum uio_seg	uioseg;
19648 			uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE :
19649 			    UIO_USERSPACE;
19650 			if (un->un_f_format_in_progress == TRUE) {
19651 				err = EAGAIN;
19652 				break;
19653 			}
19654 			err = sd_send_scsi_cmd(dev, (struct uscsi_cmd *)arg,
19655 			    flag, uioseg, SD_PATH_STANDARD);
19656 		}
19657 		break;
19658 
19659 	case CDROMPAUSE:
19660 	case CDROMRESUME:
19661 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
19662 		if (!ISCD(un)) {
19663 			err = ENOTTY;
19664 		} else {
19665 			err = sr_pause_resume(dev, cmd);
19666 		}
19667 		break;
19668 
19669 	case CDROMPLAYMSF:
19670 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
19671 		if (!ISCD(un)) {
19672 			err = ENOTTY;
19673 		} else {
19674 			err = sr_play_msf(dev, (caddr_t)arg, flag);
19675 		}
19676 		break;
19677 
19678 	case CDROMPLAYTRKIND:
19679 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
19680 #if defined(__i386) || defined(__amd64)
19681 		/*
19682 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
19683 		 */
19684 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
19685 #else
19686 		if (!ISCD(un)) {
19687 #endif
19688 			err = ENOTTY;
19689 		} else {
19690 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
19691 		}
19692 		break;
19693 
19694 	case CDROMREADTOCHDR:
19695 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
19696 		if (!ISCD(un)) {
19697 			err = ENOTTY;
19698 		} else {
19699 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
19700 		}
19701 		break;
19702 
19703 	case CDROMREADTOCENTRY:
19704 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
19705 		if (!ISCD(un)) {
19706 			err = ENOTTY;
19707 		} else {
19708 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
19709 		}
19710 		break;
19711 
19712 	case CDROMSTOP:
19713 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
19714 		if (!ISCD(un)) {
19715 			err = ENOTTY;
19716 		} else {
19717 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_STOP,
19718 			    SD_PATH_STANDARD);
19719 		}
19720 		break;
19721 
19722 	case CDROMSTART:
19723 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
19724 		if (!ISCD(un)) {
19725 			err = ENOTTY;
19726 		} else {
19727 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
19728 			    SD_PATH_STANDARD);
19729 		}
19730 		break;
19731 
19732 	case CDROMCLOSETRAY:
19733 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
19734 		if (!ISCD(un)) {
19735 			err = ENOTTY;
19736 		} else {
19737 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_CLOSE,
19738 			    SD_PATH_STANDARD);
19739 		}
19740 		break;
19741 
19742 	case FDEJECT:	/* for eject command */
19743 	case DKIOCEJECT:
19744 	case CDROMEJECT:
19745 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
19746 		if (!un->un_f_eject_media_supported) {
19747 			err = ENOTTY;
19748 		} else {
19749 			err = sr_eject(dev);
19750 		}
19751 		break;
19752 
19753 	case CDROMVOLCTRL:
19754 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
19755 		if (!ISCD(un)) {
19756 			err = ENOTTY;
19757 		} else {
19758 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
19759 		}
19760 		break;
19761 
19762 	case CDROMSUBCHNL:
19763 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
19764 		if (!ISCD(un)) {
19765 			err = ENOTTY;
19766 		} else {
19767 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
19768 		}
19769 		break;
19770 
19771 	case CDROMREADMODE2:
19772 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
19773 		if (!ISCD(un)) {
19774 			err = ENOTTY;
19775 		} else if (un->un_f_cfg_is_atapi == TRUE) {
19776 			/*
19777 			 * If the drive supports READ CD, use that instead of
19778 			 * switching the LBA size via a MODE SELECT
19779 			 * Block Descriptor
19780 			 */
19781 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
19782 		} else {
19783 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
19784 		}
19785 		break;
19786 
19787 	case CDROMREADMODE1:
19788 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
19789 		if (!ISCD(un)) {
19790 			err = ENOTTY;
19791 		} else {
19792 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
19793 		}
19794 		break;
19795 
19796 	case CDROMREADOFFSET:
19797 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
19798 		if (!ISCD(un)) {
19799 			err = ENOTTY;
19800 		} else {
19801 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
19802 			    flag);
19803 		}
19804 		break;
19805 
19806 	case CDROMSBLKMODE:
19807 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
19808 		/*
19809 		 * There is no means of changing block size in case of atapi
19810 		 * drives, thus return ENOTTY if drive type is atapi
19811 		 */
19812 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
19813 			err = ENOTTY;
19814 		} else if (un->un_f_mmc_cap == TRUE) {
19815 
19816 			/*
19817 			 * MMC Devices do not support changing the
19818 			 * logical block size
19819 			 *
19820 			 * Note: EINVAL is being returned instead of ENOTTY to
19821 			 * maintain consistancy with the original mmc
19822 			 * driver update.
19823 			 */
19824 			err = EINVAL;
19825 		} else {
19826 			mutex_enter(SD_MUTEX(un));
19827 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
19828 			    (un->un_ncmds_in_transport > 0)) {
19829 				mutex_exit(SD_MUTEX(un));
19830 				err = EINVAL;
19831 			} else {
19832 				mutex_exit(SD_MUTEX(un));
19833 				err = sr_change_blkmode(dev, cmd, arg, flag);
19834 			}
19835 		}
19836 		break;
19837 
19838 	case CDROMGBLKMODE:
19839 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
19840 		if (!ISCD(un)) {
19841 			err = ENOTTY;
19842 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
19843 		    (un->un_f_blockcount_is_valid != FALSE)) {
19844 			/*
19845 			 * Drive is an ATAPI drive so return target block
19846 			 * size for ATAPI drives since we cannot change the
19847 			 * blocksize on ATAPI drives. Used primarily to detect
19848 			 * if an ATAPI cdrom is present.
19849 			 */
19850 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
19851 			    sizeof (int), flag) != 0) {
19852 				err = EFAULT;
19853 			} else {
19854 				err = 0;
19855 			}
19856 
19857 		} else {
19858 			/*
19859 			 * Drive supports changing block sizes via a Mode
19860 			 * Select.
19861 			 */
19862 			err = sr_change_blkmode(dev, cmd, arg, flag);
19863 		}
19864 		break;
19865 
19866 	case CDROMGDRVSPEED:
19867 	case CDROMSDRVSPEED:
19868 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
19869 		if (!ISCD(un)) {
19870 			err = ENOTTY;
19871 		} else if (un->un_f_mmc_cap == TRUE) {
19872 			/*
19873 			 * Note: In the future the driver implementation
19874 			 * for getting and
19875 			 * setting cd speed should entail:
19876 			 * 1) If non-mmc try the Toshiba mode page
19877 			 *    (sr_change_speed)
19878 			 * 2) If mmc but no support for Real Time Streaming try
19879 			 *    the SET CD SPEED (0xBB) command
19880 			 *   (sr_atapi_change_speed)
19881 			 * 3) If mmc and support for Real Time Streaming
19882 			 *    try the GET PERFORMANCE and SET STREAMING
19883 			 *    commands (not yet implemented, 4380808)
19884 			 */
19885 			/*
19886 			 * As per recent MMC spec, CD-ROM speed is variable
19887 			 * and changes with LBA. Since there is no such
19888 			 * things as drive speed now, fail this ioctl.
19889 			 *
19890 			 * Note: EINVAL is returned for consistancy of original
19891 			 * implementation which included support for getting
19892 			 * the drive speed of mmc devices but not setting
19893 			 * the drive speed. Thus EINVAL would be returned
19894 			 * if a set request was made for an mmc device.
19895 			 * We no longer support get or set speed for
19896 			 * mmc but need to remain consistant with regard
19897 			 * to the error code returned.
19898 			 */
19899 			err = EINVAL;
19900 		} else if (un->un_f_cfg_is_atapi == TRUE) {
19901 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
19902 		} else {
19903 			err = sr_change_speed(dev, cmd, arg, flag);
19904 		}
19905 		break;
19906 
19907 	case CDROMCDDA:
19908 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
19909 		if (!ISCD(un)) {
19910 			err = ENOTTY;
19911 		} else {
19912 			err = sr_read_cdda(dev, (void *)arg, flag);
19913 		}
19914 		break;
19915 
19916 	case CDROMCDXA:
19917 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
19918 		if (!ISCD(un)) {
19919 			err = ENOTTY;
19920 		} else {
19921 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
19922 		}
19923 		break;
19924 
19925 	case CDROMSUBCODE:
19926 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
19927 		if (!ISCD(un)) {
19928 			err = ENOTTY;
19929 		} else {
19930 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
19931 		}
19932 		break;
19933 
19934 
19935 #ifdef SDDEBUG
19936 /* RESET/ABORTS testing ioctls */
19937 	case DKIOCRESET: {
19938 		int	reset_level;
19939 
19940 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
19941 			err = EFAULT;
19942 		} else {
19943 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
19944 			    "reset_level = 0x%lx\n", reset_level);
19945 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
19946 				err = 0;
19947 			} else {
19948 				err = EIO;
19949 			}
19950 		}
19951 		break;
19952 	}
19953 
19954 	case DKIOCABORT:
19955 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
19956 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
19957 			err = 0;
19958 		} else {
19959 			err = EIO;
19960 		}
19961 		break;
19962 #endif
19963 
19964 #ifdef SD_FAULT_INJECTION
19965 /* SDIOC FaultInjection testing ioctls */
19966 	case SDIOCSTART:
19967 	case SDIOCSTOP:
19968 	case SDIOCINSERTPKT:
19969 	case SDIOCINSERTXB:
19970 	case SDIOCINSERTUN:
19971 	case SDIOCINSERTARQ:
19972 	case SDIOCPUSH:
19973 	case SDIOCRETRIEVE:
19974 	case SDIOCRUN:
19975 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
19976 		    "SDIOC detected cmd:0x%X:\n", cmd);
19977 		/* call error generator */
19978 		sd_faultinjection_ioctl(cmd, arg, un);
19979 		err = 0;
19980 		break;
19981 
19982 #endif /* SD_FAULT_INJECTION */
19983 
19984 	case DKIOCFLUSHWRITECACHE:
19985 		{
19986 			struct dk_callback *dkc = (struct dk_callback *)arg;
19987 
19988 			mutex_enter(SD_MUTEX(un));
19989 			if (!un->un_f_sync_cache_supported ||
19990 			    !un->un_f_write_cache_enabled) {
19991 				err = un->un_f_sync_cache_supported ?
19992 					0 : ENOTSUP;
19993 				mutex_exit(SD_MUTEX(un));
19994 				if ((flag & FKIOCTL) && dkc != NULL &&
19995 				    dkc->dkc_callback != NULL) {
19996 					(*dkc->dkc_callback)(dkc->dkc_cookie,
19997 					    err);
19998 					/*
19999 					 * Did callback and reported error.
20000 					 * Since we did a callback, ioctl
20001 					 * should return 0.
20002 					 */
20003 					err = 0;
20004 				}
20005 				break;
20006 			}
20007 			mutex_exit(SD_MUTEX(un));
20008 
20009 			if ((flag & FKIOCTL) && dkc != NULL &&
20010 			    dkc->dkc_callback != NULL) {
20011 				/* async SYNC CACHE request */
20012 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
20013 			} else {
20014 				/* synchronous SYNC CACHE request */
20015 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
20016 			}
20017 		}
20018 		break;
20019 
20020 	case DKIOCGETWCE: {
20021 
20022 		int wce;
20023 
20024 		if ((err = sd_get_write_cache_enabled(un, &wce)) != 0) {
20025 			break;
20026 		}
20027 
20028 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
20029 			err = EFAULT;
20030 		}
20031 		break;
20032 	}
20033 
20034 	case DKIOCSETWCE: {
20035 
20036 		int wce, sync_supported;
20037 
20038 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
20039 			err = EFAULT;
20040 			break;
20041 		}
20042 
20043 		/*
20044 		 * Synchronize multiple threads trying to enable
20045 		 * or disable the cache via the un_f_wcc_cv
20046 		 * condition variable.
20047 		 */
20048 		mutex_enter(SD_MUTEX(un));
20049 
20050 		/*
20051 		 * Don't allow the cache to be enabled if the
20052 		 * config file has it disabled.
20053 		 */
20054 		if (un->un_f_opt_disable_cache && wce) {
20055 			mutex_exit(SD_MUTEX(un));
20056 			err = EINVAL;
20057 			break;
20058 		}
20059 
20060 		/*
20061 		 * Wait for write cache change in progress
20062 		 * bit to be clear before proceeding.
20063 		 */
20064 		while (un->un_f_wcc_inprog)
20065 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
20066 
20067 		un->un_f_wcc_inprog = 1;
20068 
20069 		if (un->un_f_write_cache_enabled && wce == 0) {
20070 			/*
20071 			 * Disable the write cache.  Don't clear
20072 			 * un_f_write_cache_enabled until after
20073 			 * the mode select and flush are complete.
20074 			 */
20075 			sync_supported = un->un_f_sync_cache_supported;
20076 			mutex_exit(SD_MUTEX(un));
20077 			if ((err = sd_cache_control(un, SD_CACHE_NOCHANGE,
20078 			    SD_CACHE_DISABLE)) == 0 && sync_supported) {
20079 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
20080 			}
20081 
20082 			mutex_enter(SD_MUTEX(un));
20083 			if (err == 0) {
20084 				un->un_f_write_cache_enabled = 0;
20085 			}
20086 
20087 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
20088 			/*
20089 			 * Set un_f_write_cache_enabled first, so there is
20090 			 * no window where the cache is enabled, but the
20091 			 * bit says it isn't.
20092 			 */
20093 			un->un_f_write_cache_enabled = 1;
20094 			mutex_exit(SD_MUTEX(un));
20095 
20096 			err = sd_cache_control(un, SD_CACHE_NOCHANGE,
20097 				SD_CACHE_ENABLE);
20098 
20099 			mutex_enter(SD_MUTEX(un));
20100 
20101 			if (err) {
20102 				un->un_f_write_cache_enabled = 0;
20103 			}
20104 		}
20105 
20106 		un->un_f_wcc_inprog = 0;
20107 		cv_broadcast(&un->un_wcc_cv);
20108 		mutex_exit(SD_MUTEX(un));
20109 		break;
20110 	}
20111 
20112 	default:
20113 		err = ENOTTY;
20114 		break;
20115 	}
20116 	mutex_enter(SD_MUTEX(un));
20117 	un->un_ncmds_in_driver--;
20118 	ASSERT(un->un_ncmds_in_driver >= 0);
20119 	mutex_exit(SD_MUTEX(un));
20120 
20121 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
20122 	return (err);
20123 }
20124 
20125 
20126 /*
20127  *    Function: sd_dkio_ctrl_info
20128  *
20129  * Description: This routine is the driver entry point for handling controller
20130  *		information ioctl requests (DKIOCINFO).
20131  *
20132  *   Arguments: dev  - the device number
20133  *		arg  - pointer to user provided dk_cinfo structure
20134  *		       specifying the controller type and attributes.
20135  *		flag - this argument is a pass through to ddi_copyxxx()
20136  *		       directly from the mode argument of ioctl().
20137  *
20138  * Return Code: 0
20139  *		EFAULT
20140  *		ENXIO
20141  */
20142 
20143 static int
20144 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
20145 {
20146 	struct sd_lun	*un = NULL;
20147 	struct dk_cinfo	*info;
20148 	dev_info_t	*pdip;
20149 	int		lun, tgt;
20150 
20151 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20152 		return (ENXIO);
20153 	}
20154 
20155 	info = (struct dk_cinfo *)
20156 		kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
20157 
20158 	switch (un->un_ctype) {
20159 	case CTYPE_CDROM:
20160 		info->dki_ctype = DKC_CDROM;
20161 		break;
20162 	default:
20163 		info->dki_ctype = DKC_SCSI_CCS;
20164 		break;
20165 	}
20166 	pdip = ddi_get_parent(SD_DEVINFO(un));
20167 	info->dki_cnum = ddi_get_instance(pdip);
20168 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
20169 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
20170 	} else {
20171 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
20172 		    DK_DEVLEN - 1);
20173 	}
20174 
20175 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
20176 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
20177 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
20178 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
20179 
20180 	/* Unit Information */
20181 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
20182 	info->dki_slave = ((tgt << 3) | lun);
20183 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
20184 	    DK_DEVLEN - 1);
20185 	info->dki_flags = DKI_FMTVOL;
20186 	info->dki_partition = SDPART(dev);
20187 
20188 	/* Max Transfer size of this device in blocks */
20189 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
20190 	info->dki_addr = 0;
20191 	info->dki_space = 0;
20192 	info->dki_prio = 0;
20193 	info->dki_vec = 0;
20194 
20195 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
20196 		kmem_free(info, sizeof (struct dk_cinfo));
20197 		return (EFAULT);
20198 	} else {
20199 		kmem_free(info, sizeof (struct dk_cinfo));
20200 		return (0);
20201 	}
20202 }
20203 
20204 
20205 /*
20206  *    Function: sd_get_media_info
20207  *
20208  * Description: This routine is the driver entry point for handling ioctl
20209  *		requests for the media type or command set profile used by the
20210  *		drive to operate on the media (DKIOCGMEDIAINFO).
20211  *
20212  *   Arguments: dev	- the device number
20213  *		arg	- pointer to user provided dk_minfo structure
20214  *			  specifying the media type, logical block size and
20215  *			  drive capacity.
20216  *		flag	- this argument is a pass through to ddi_copyxxx()
20217  *			  directly from the mode argument of ioctl().
20218  *
20219  * Return Code: 0
20220  *		EACCESS
20221  *		EFAULT
20222  *		ENXIO
20223  *		EIO
20224  */
20225 
20226 static int
20227 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
20228 {
20229 	struct sd_lun		*un = NULL;
20230 	struct uscsi_cmd	com;
20231 	struct scsi_inquiry	*sinq;
20232 	struct dk_minfo		media_info;
20233 	u_longlong_t		media_capacity;
20234 	uint64_t		capacity;
20235 	uint_t			lbasize;
20236 	uchar_t			*out_data;
20237 	uchar_t			*rqbuf;
20238 	int			rval = 0;
20239 	int			rtn;
20240 
20241 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
20242 	    (un->un_state == SD_STATE_OFFLINE)) {
20243 		return (ENXIO);
20244 	}
20245 
20246 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
20247 
20248 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
20249 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
20250 
20251 	/* Issue a TUR to determine if the drive is ready with media present */
20252 	rval = sd_send_scsi_TEST_UNIT_READY(un, SD_CHECK_FOR_MEDIA);
20253 	if (rval == ENXIO) {
20254 		goto done;
20255 	}
20256 
20257 	/* Now get configuration data */
20258 	if (ISCD(un)) {
20259 		media_info.dki_media_type = DK_CDROM;
20260 
20261 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
20262 		if (un->un_f_mmc_cap == TRUE) {
20263 			rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf,
20264 				SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN);
20265 
20266 			if (rtn) {
20267 				/*
20268 				 * Failed for other than an illegal request
20269 				 * or command not supported
20270 				 */
20271 				if ((com.uscsi_status == STATUS_CHECK) &&
20272 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
20273 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
20274 					    (rqbuf[12] != 0x20)) {
20275 						rval = EIO;
20276 						goto done;
20277 					}
20278 				}
20279 			} else {
20280 				/*
20281 				 * The GET CONFIGURATION command succeeded
20282 				 * so set the media type according to the
20283 				 * returned data
20284 				 */
20285 				media_info.dki_media_type = out_data[6];
20286 				media_info.dki_media_type <<= 8;
20287 				media_info.dki_media_type |= out_data[7];
20288 			}
20289 		}
20290 	} else {
20291 		/*
20292 		 * The profile list is not available, so we attempt to identify
20293 		 * the media type based on the inquiry data
20294 		 */
20295 		sinq = un->un_sd->sd_inq;
20296 		if (sinq->inq_qual == 0) {
20297 			/* This is a direct access device */
20298 			media_info.dki_media_type = DK_FIXED_DISK;
20299 
20300 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
20301 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
20302 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
20303 					media_info.dki_media_type = DK_ZIP;
20304 				} else if (
20305 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
20306 					media_info.dki_media_type = DK_JAZ;
20307 				}
20308 			}
20309 		} else {
20310 			/* Not a CD or direct access so return unknown media */
20311 			media_info.dki_media_type = DK_UNKNOWN;
20312 		}
20313 	}
20314 
20315 	/* Now read the capacity so we can provide the lbasize and capacity */
20316 	switch (sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
20317 	    SD_PATH_DIRECT)) {
20318 	case 0:
20319 		break;
20320 	case EACCES:
20321 		rval = EACCES;
20322 		goto done;
20323 	default:
20324 		rval = EIO;
20325 		goto done;
20326 	}
20327 
20328 	media_info.dki_lbsize = lbasize;
20329 	media_capacity = capacity;
20330 
20331 	/*
20332 	 * sd_send_scsi_READ_CAPACITY() reports capacity in
20333 	 * un->un_sys_blocksize chunks. So we need to convert it into
20334 	 * cap.lbasize chunks.
20335 	 */
20336 	media_capacity *= un->un_sys_blocksize;
20337 	media_capacity /= lbasize;
20338 	media_info.dki_capacity = media_capacity;
20339 
20340 	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
20341 		rval = EFAULT;
20342 		/* Put goto. Anybody might add some code below in future */
20343 		goto done;
20344 	}
20345 done:
20346 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
20347 	kmem_free(rqbuf, SENSE_LENGTH);
20348 	return (rval);
20349 }
20350 
20351 
20352 /*
20353  *    Function: sd_check_media
20354  *
20355  * Description: This utility routine implements the functionality for the
20356  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
20357  *		driver state changes from that specified by the user
20358  *		(inserted or ejected). For example, if the user specifies
20359  *		DKIO_EJECTED and the current media state is inserted this
20360  *		routine will immediately return DKIO_INSERTED. However, if the
20361  *		current media state is not inserted the user thread will be
20362  *		blocked until the drive state changes. If DKIO_NONE is specified
20363  *		the user thread will block until a drive state change occurs.
20364  *
20365  *   Arguments: dev  - the device number
20366  *		state  - user pointer to a dkio_state, updated with the current
20367  *			drive state at return.
20368  *
20369  * Return Code: ENXIO
20370  *		EIO
20371  *		EAGAIN
20372  *		EINTR
20373  */
20374 
20375 static int
20376 sd_check_media(dev_t dev, enum dkio_state state)
20377 {
20378 	struct sd_lun		*un = NULL;
20379 	enum dkio_state		prev_state;
20380 	opaque_t		token = NULL;
20381 	int			rval = 0;
20382 
20383 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20384 		return (ENXIO);
20385 	}
20386 
20387 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
20388 
20389 	mutex_enter(SD_MUTEX(un));
20390 
20391 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
20392 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
20393 
20394 	prev_state = un->un_mediastate;
20395 
20396 	/* is there anything to do? */
20397 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
20398 		/*
20399 		 * submit the request to the scsi_watch service;
20400 		 * scsi_media_watch_cb() does the real work
20401 		 */
20402 		mutex_exit(SD_MUTEX(un));
20403 
20404 		/*
20405 		 * This change handles the case where a scsi watch request is
20406 		 * added to a device that is powered down. To accomplish this
20407 		 * we power up the device before adding the scsi watch request,
20408 		 * since the scsi watch sends a TUR directly to the device
20409 		 * which the device cannot handle if it is powered down.
20410 		 */
20411 		if (sd_pm_entry(un) != DDI_SUCCESS) {
20412 			mutex_enter(SD_MUTEX(un));
20413 			goto done;
20414 		}
20415 
20416 		token = scsi_watch_request_submit(SD_SCSI_DEVP(un),
20417 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
20418 		    (caddr_t)dev);
20419 
20420 		sd_pm_exit(un);
20421 
20422 		mutex_enter(SD_MUTEX(un));
20423 		if (token == NULL) {
20424 			rval = EAGAIN;
20425 			goto done;
20426 		}
20427 
20428 		/*
20429 		 * This is a special case IOCTL that doesn't return
20430 		 * until the media state changes. Routine sdpower
20431 		 * knows about and handles this so don't count it
20432 		 * as an active cmd in the driver, which would
20433 		 * keep the device busy to the pm framework.
20434 		 * If the count isn't decremented the device can't
20435 		 * be powered down.
20436 		 */
20437 		un->un_ncmds_in_driver--;
20438 		ASSERT(un->un_ncmds_in_driver >= 0);
20439 
20440 		/*
20441 		 * if a prior request had been made, this will be the same
20442 		 * token, as scsi_watch was designed that way.
20443 		 */
20444 		un->un_swr_token = token;
20445 		un->un_specified_mediastate = state;
20446 
20447 		/*
20448 		 * now wait for media change
20449 		 * we will not be signalled unless mediastate == state but it is
20450 		 * still better to test for this condition, since there is a
20451 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
20452 		 */
20453 		SD_TRACE(SD_LOG_COMMON, un,
20454 		    "sd_check_media: waiting for media state change\n");
20455 		while (un->un_mediastate == state) {
20456 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
20457 				SD_TRACE(SD_LOG_COMMON, un,
20458 				    "sd_check_media: waiting for media state "
20459 				    "was interrupted\n");
20460 				un->un_ncmds_in_driver++;
20461 				rval = EINTR;
20462 				goto done;
20463 			}
20464 			SD_TRACE(SD_LOG_COMMON, un,
20465 			    "sd_check_media: received signal, state=%x\n",
20466 			    un->un_mediastate);
20467 		}
20468 		/*
20469 		 * Inc the counter to indicate the device once again
20470 		 * has an active outstanding cmd.
20471 		 */
20472 		un->un_ncmds_in_driver++;
20473 	}
20474 
20475 	/* invalidate geometry */
20476 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
20477 		sr_ejected(un);
20478 	}
20479 
20480 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
20481 		uint64_t	capacity;
20482 		uint_t		lbasize;
20483 
20484 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
20485 		mutex_exit(SD_MUTEX(un));
20486 		/*
20487 		 * Since the following routines use SD_PATH_DIRECT, we must
20488 		 * call PM directly before the upcoming disk accesses. This
20489 		 * may cause the disk to be power/spin up.
20490 		 */
20491 
20492 		if (sd_pm_entry(un) == DDI_SUCCESS) {
20493 			rval = sd_send_scsi_READ_CAPACITY(un,
20494 			    &capacity,
20495 			    &lbasize, SD_PATH_DIRECT);
20496 			if (rval != 0) {
20497 				sd_pm_exit(un);
20498 				mutex_enter(SD_MUTEX(un));
20499 				goto done;
20500 			}
20501 		} else {
20502 			rval = EIO;
20503 			mutex_enter(SD_MUTEX(un));
20504 			goto done;
20505 		}
20506 		mutex_enter(SD_MUTEX(un));
20507 
20508 		sd_update_block_info(un, lbasize, capacity);
20509 
20510 		mutex_exit(SD_MUTEX(un));
20511 		cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
20512 		if ((cmlb_validate(un->un_cmlbhandle, 0,
20513 		    (void *)SD_PATH_DIRECT) == 0) && un->un_f_pkstats_enabled) {
20514 			sd_set_pstats(un);
20515 			SD_TRACE(SD_LOG_IO_PARTITION, un,
20516 			    "sd_check_media: un:0x%p pstats created and "
20517 			    "set\n", un);
20518 		}
20519 
20520 		rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
20521 		    SD_PATH_DIRECT);
20522 		sd_pm_exit(un);
20523 
20524 		mutex_enter(SD_MUTEX(un));
20525 	}
20526 done:
20527 	un->un_f_watcht_stopped = FALSE;
20528 	if (un->un_swr_token) {
20529 		/*
20530 		 * Use of this local token and the mutex ensures that we avoid
20531 		 * some race conditions associated with terminating the
20532 		 * scsi watch.
20533 		 */
20534 		token = un->un_swr_token;
20535 		un->un_swr_token = (opaque_t)NULL;
20536 		mutex_exit(SD_MUTEX(un));
20537 		(void) scsi_watch_request_terminate(token,
20538 		    SCSI_WATCH_TERMINATE_WAIT);
20539 		mutex_enter(SD_MUTEX(un));
20540 	}
20541 
20542 	/*
20543 	 * Update the capacity kstat value, if no media previously
20544 	 * (capacity kstat is 0) and a media has been inserted
20545 	 * (un_f_blockcount_is_valid == TRUE)
20546 	 */
20547 	if (un->un_errstats) {
20548 		struct sd_errstats	*stp = NULL;
20549 
20550 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
20551 		if ((stp->sd_capacity.value.ui64 == 0) &&
20552 		    (un->un_f_blockcount_is_valid == TRUE)) {
20553 			stp->sd_capacity.value.ui64 =
20554 			    (uint64_t)((uint64_t)un->un_blockcount *
20555 			    un->un_sys_blocksize);
20556 		}
20557 	}
20558 	mutex_exit(SD_MUTEX(un));
20559 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
20560 	return (rval);
20561 }
20562 
20563 
20564 /*
20565  *    Function: sd_delayed_cv_broadcast
20566  *
20567  * Description: Delayed cv_broadcast to allow for target to recover from media
20568  *		insertion.
20569  *
20570  *   Arguments: arg - driver soft state (unit) structure
20571  */
20572 
20573 static void
20574 sd_delayed_cv_broadcast(void *arg)
20575 {
20576 	struct sd_lun *un = arg;
20577 
20578 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
20579 
20580 	mutex_enter(SD_MUTEX(un));
20581 	un->un_dcvb_timeid = NULL;
20582 	cv_broadcast(&un->un_state_cv);
20583 	mutex_exit(SD_MUTEX(un));
20584 }
20585 
20586 
20587 /*
20588  *    Function: sd_media_watch_cb
20589  *
20590  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
20591  *		routine processes the TUR sense data and updates the driver
20592  *		state if a transition has occurred. The user thread
20593  *		(sd_check_media) is then signalled.
20594  *
20595  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
20596  *			among multiple watches that share this callback function
20597  *		resultp - scsi watch facility result packet containing scsi
20598  *			  packet, status byte and sense data
20599  *
20600  * Return Code: 0 for success, -1 for failure
20601  */
20602 
20603 static int
20604 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
20605 {
20606 	struct sd_lun			*un;
20607 	struct scsi_status		*statusp = resultp->statusp;
20608 	uint8_t				*sensep = (uint8_t *)resultp->sensep;
20609 	enum dkio_state			state = DKIO_NONE;
20610 	dev_t				dev = (dev_t)arg;
20611 	uchar_t				actual_sense_length;
20612 	uint8_t				skey, asc, ascq;
20613 
20614 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20615 		return (-1);
20616 	}
20617 	actual_sense_length = resultp->actual_sense_length;
20618 
20619 	mutex_enter(SD_MUTEX(un));
20620 	SD_TRACE(SD_LOG_COMMON, un,
20621 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
20622 	    *((char *)statusp), (void *)sensep, actual_sense_length);
20623 
20624 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
20625 		un->un_mediastate = DKIO_DEV_GONE;
20626 		cv_broadcast(&un->un_state_cv);
20627 		mutex_exit(SD_MUTEX(un));
20628 
20629 		return (0);
20630 	}
20631 
20632 	/*
20633 	 * If there was a check condition then sensep points to valid sense data
20634 	 * If status was not a check condition but a reservation or busy status
20635 	 * then the new state is DKIO_NONE
20636 	 */
20637 	if (sensep != NULL) {
20638 		skey = scsi_sense_key(sensep);
20639 		asc = scsi_sense_asc(sensep);
20640 		ascq = scsi_sense_ascq(sensep);
20641 
20642 		SD_INFO(SD_LOG_COMMON, un,
20643 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
20644 		    skey, asc, ascq);
20645 		/* This routine only uses up to 13 bytes of sense data. */
20646 		if (actual_sense_length >= 13) {
20647 			if (skey == KEY_UNIT_ATTENTION) {
20648 				if (asc == 0x28) {
20649 					state = DKIO_INSERTED;
20650 				}
20651 			} else {
20652 				/*
20653 				 * if 02/04/02  means that the host
20654 				 * should send start command. Explicitly
20655 				 * leave the media state as is
20656 				 * (inserted) as the media is inserted
20657 				 * and host has stopped device for PM
20658 				 * reasons. Upon next true read/write
20659 				 * to this media will bring the
20660 				 * device to the right state good for
20661 				 * media access.
20662 				 */
20663 				if ((skey == KEY_NOT_READY) &&
20664 				    (asc == 0x3a)) {
20665 					state = DKIO_EJECTED;
20666 				}
20667 
20668 				/*
20669 				 * If the drivge is busy with an operation
20670 				 * or long write, keep the media in an
20671 				 * inserted state.
20672 				 */
20673 
20674 				if ((skey == KEY_NOT_READY) &&
20675 				    (asc == 0x04) &&
20676 				    ((ascq == 0x02) ||
20677 				    (ascq == 0x07) ||
20678 				    (ascq == 0x08))) {
20679 					state = DKIO_INSERTED;
20680 				}
20681 			}
20682 		}
20683 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
20684 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
20685 		state = DKIO_INSERTED;
20686 	}
20687 
20688 	SD_TRACE(SD_LOG_COMMON, un,
20689 	    "sd_media_watch_cb: state=%x, specified=%x\n",
20690 	    state, un->un_specified_mediastate);
20691 
20692 	/*
20693 	 * now signal the waiting thread if this is *not* the specified state;
20694 	 * delay the signal if the state is DKIO_INSERTED to allow the target
20695 	 * to recover
20696 	 */
20697 	if (state != un->un_specified_mediastate) {
20698 		un->un_mediastate = state;
20699 		if (state == DKIO_INSERTED) {
20700 			/*
20701 			 * delay the signal to give the drive a chance
20702 			 * to do what it apparently needs to do
20703 			 */
20704 			SD_TRACE(SD_LOG_COMMON, un,
20705 			    "sd_media_watch_cb: delayed cv_broadcast\n");
20706 			if (un->un_dcvb_timeid == NULL) {
20707 				un->un_dcvb_timeid =
20708 				    timeout(sd_delayed_cv_broadcast, un,
20709 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
20710 			}
20711 		} else {
20712 			SD_TRACE(SD_LOG_COMMON, un,
20713 			    "sd_media_watch_cb: immediate cv_broadcast\n");
20714 			cv_broadcast(&un->un_state_cv);
20715 		}
20716 	}
20717 	mutex_exit(SD_MUTEX(un));
20718 	return (0);
20719 }
20720 
20721 
20722 /*
20723  *    Function: sd_dkio_get_temp
20724  *
20725  * Description: This routine is the driver entry point for handling ioctl
20726  *		requests to get the disk temperature.
20727  *
20728  *   Arguments: dev  - the device number
20729  *		arg  - pointer to user provided dk_temperature structure.
20730  *		flag - this argument is a pass through to ddi_copyxxx()
20731  *		       directly from the mode argument of ioctl().
20732  *
20733  * Return Code: 0
20734  *		EFAULT
20735  *		ENXIO
20736  *		EAGAIN
20737  */
20738 
20739 static int
20740 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
20741 {
20742 	struct sd_lun		*un = NULL;
20743 	struct dk_temperature	*dktemp = NULL;
20744 	uchar_t			*temperature_page;
20745 	int			rval = 0;
20746 	int			path_flag = SD_PATH_STANDARD;
20747 
20748 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20749 		return (ENXIO);
20750 	}
20751 
20752 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
20753 
20754 	/* copyin the disk temp argument to get the user flags */
20755 	if (ddi_copyin((void *)arg, dktemp,
20756 	    sizeof (struct dk_temperature), flag) != 0) {
20757 		rval = EFAULT;
20758 		goto done;
20759 	}
20760 
20761 	/* Initialize the temperature to invalid. */
20762 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
20763 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
20764 
20765 	/*
20766 	 * Note: Investigate removing the "bypass pm" semantic.
20767 	 * Can we just bypass PM always?
20768 	 */
20769 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
20770 		path_flag = SD_PATH_DIRECT;
20771 		ASSERT(!mutex_owned(&un->un_pm_mutex));
20772 		mutex_enter(&un->un_pm_mutex);
20773 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
20774 			/*
20775 			 * If DKT_BYPASS_PM is set, and the drive happens to be
20776 			 * in low power mode, we can not wake it up, Need to
20777 			 * return EAGAIN.
20778 			 */
20779 			mutex_exit(&un->un_pm_mutex);
20780 			rval = EAGAIN;
20781 			goto done;
20782 		} else {
20783 			/*
20784 			 * Indicate to PM the device is busy. This is required
20785 			 * to avoid a race - i.e. the ioctl is issuing a
20786 			 * command and the pm framework brings down the device
20787 			 * to low power mode (possible power cut-off on some
20788 			 * platforms).
20789 			 */
20790 			mutex_exit(&un->un_pm_mutex);
20791 			if (sd_pm_entry(un) != DDI_SUCCESS) {
20792 				rval = EAGAIN;
20793 				goto done;
20794 			}
20795 		}
20796 	}
20797 
20798 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
20799 
20800 	if ((rval = sd_send_scsi_LOG_SENSE(un, temperature_page,
20801 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag)) != 0) {
20802 		goto done2;
20803 	}
20804 
20805 	/*
20806 	 * For the current temperature verify that the parameter length is 0x02
20807 	 * and the parameter code is 0x00
20808 	 */
20809 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
20810 	    (temperature_page[5] == 0x00)) {
20811 		if (temperature_page[9] == 0xFF) {
20812 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
20813 		} else {
20814 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
20815 		}
20816 	}
20817 
20818 	/*
20819 	 * For the reference temperature verify that the parameter
20820 	 * length is 0x02 and the parameter code is 0x01
20821 	 */
20822 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
20823 	    (temperature_page[11] == 0x01)) {
20824 		if (temperature_page[15] == 0xFF) {
20825 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
20826 		} else {
20827 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
20828 		}
20829 	}
20830 
20831 	/* Do the copyout regardless of the temperature commands status. */
20832 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
20833 	    flag) != 0) {
20834 		rval = EFAULT;
20835 	}
20836 
20837 done2:
20838 	if (path_flag == SD_PATH_DIRECT) {
20839 		sd_pm_exit(un);
20840 	}
20841 
20842 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
20843 done:
20844 	if (dktemp != NULL) {
20845 		kmem_free(dktemp, sizeof (struct dk_temperature));
20846 	}
20847 
20848 	return (rval);
20849 }
20850 
20851 
20852 /*
20853  *    Function: sd_log_page_supported
20854  *
20855  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
20856  *		supported log pages.
20857  *
20858  *   Arguments: un -
20859  *		log_page -
20860  *
20861  * Return Code: -1 - on error (log sense is optional and may not be supported).
20862  *		0  - log page not found.
20863  *  		1  - log page found.
20864  */
20865 
20866 static int
20867 sd_log_page_supported(struct sd_lun *un, int log_page)
20868 {
20869 	uchar_t *log_page_data;
20870 	int	i;
20871 	int	match = 0;
20872 	int	log_size;
20873 
20874 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
20875 
20876 	if (sd_send_scsi_LOG_SENSE(un, log_page_data, 0xFF, 0, 0x01, 0,
20877 	    SD_PATH_DIRECT) != 0) {
20878 		SD_ERROR(SD_LOG_COMMON, un,
20879 		    "sd_log_page_supported: failed log page retrieval\n");
20880 		kmem_free(log_page_data, 0xFF);
20881 		return (-1);
20882 	}
20883 	log_size = log_page_data[3];
20884 
20885 	/*
20886 	 * The list of supported log pages start from the fourth byte. Check
20887 	 * until we run out of log pages or a match is found.
20888 	 */
20889 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
20890 		if (log_page_data[i] == log_page) {
20891 			match++;
20892 		}
20893 	}
20894 	kmem_free(log_page_data, 0xFF);
20895 	return (match);
20896 }
20897 
20898 
20899 /*
20900  *    Function: sd_mhdioc_failfast
20901  *
20902  * Description: This routine is the driver entry point for handling ioctl
20903  *		requests to enable/disable the multihost failfast option.
20904  *		(MHIOCENFAILFAST)
20905  *
20906  *   Arguments: dev	- the device number
20907  *		arg	- user specified probing interval.
20908  *		flag	- this argument is a pass through to ddi_copyxxx()
20909  *			  directly from the mode argument of ioctl().
20910  *
20911  * Return Code: 0
20912  *		EFAULT
20913  *		ENXIO
20914  */
20915 
20916 static int
20917 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
20918 {
20919 	struct sd_lun	*un = NULL;
20920 	int		mh_time;
20921 	int		rval = 0;
20922 
20923 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20924 		return (ENXIO);
20925 	}
20926 
20927 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
20928 		return (EFAULT);
20929 
20930 	if (mh_time) {
20931 		mutex_enter(SD_MUTEX(un));
20932 		un->un_resvd_status |= SD_FAILFAST;
20933 		mutex_exit(SD_MUTEX(un));
20934 		/*
20935 		 * If mh_time is INT_MAX, then this ioctl is being used for
20936 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
20937 		 */
20938 		if (mh_time != INT_MAX) {
20939 			rval = sd_check_mhd(dev, mh_time);
20940 		}
20941 	} else {
20942 		(void) sd_check_mhd(dev, 0);
20943 		mutex_enter(SD_MUTEX(un));
20944 		un->un_resvd_status &= ~SD_FAILFAST;
20945 		mutex_exit(SD_MUTEX(un));
20946 	}
20947 	return (rval);
20948 }
20949 
20950 
20951 /*
20952  *    Function: sd_mhdioc_takeown
20953  *
20954  * Description: This routine is the driver entry point for handling ioctl
20955  *		requests to forcefully acquire exclusive access rights to the
20956  *		multihost disk (MHIOCTKOWN).
20957  *
20958  *   Arguments: dev	- the device number
20959  *		arg	- user provided structure specifying the delay
20960  *			  parameters in milliseconds
20961  *		flag	- this argument is a pass through to ddi_copyxxx()
20962  *			  directly from the mode argument of ioctl().
20963  *
20964  * Return Code: 0
20965  *		EFAULT
20966  *		ENXIO
20967  */
20968 
20969 static int
20970 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
20971 {
20972 	struct sd_lun		*un = NULL;
20973 	struct mhioctkown	*tkown = NULL;
20974 	int			rval = 0;
20975 
20976 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20977 		return (ENXIO);
20978 	}
20979 
20980 	if (arg != NULL) {
20981 		tkown = (struct mhioctkown *)
20982 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
20983 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
20984 		if (rval != 0) {
20985 			rval = EFAULT;
20986 			goto error;
20987 		}
20988 	}
20989 
20990 	rval = sd_take_ownership(dev, tkown);
20991 	mutex_enter(SD_MUTEX(un));
20992 	if (rval == 0) {
20993 		un->un_resvd_status |= SD_RESERVE;
20994 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
20995 			sd_reinstate_resv_delay =
20996 			    tkown->reinstate_resv_delay * 1000;
20997 		} else {
20998 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
20999 		}
21000 		/*
21001 		 * Give the scsi_watch routine interval set by
21002 		 * the MHIOCENFAILFAST ioctl precedence here.
21003 		 */
21004 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
21005 			mutex_exit(SD_MUTEX(un));
21006 			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
21007 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
21008 			    "sd_mhdioc_takeown : %d\n",
21009 			    sd_reinstate_resv_delay);
21010 		} else {
21011 			mutex_exit(SD_MUTEX(un));
21012 		}
21013 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
21014 		    sd_mhd_reset_notify_cb, (caddr_t)un);
21015 	} else {
21016 		un->un_resvd_status &= ~SD_RESERVE;
21017 		mutex_exit(SD_MUTEX(un));
21018 	}
21019 
21020 error:
21021 	if (tkown != NULL) {
21022 		kmem_free(tkown, sizeof (struct mhioctkown));
21023 	}
21024 	return (rval);
21025 }
21026 
21027 
21028 /*
21029  *    Function: sd_mhdioc_release
21030  *
21031  * Description: This routine is the driver entry point for handling ioctl
21032  *		requests to release exclusive access rights to the multihost
21033  *		disk (MHIOCRELEASE).
21034  *
21035  *   Arguments: dev	- the device number
21036  *
21037  * Return Code: 0
21038  *		ENXIO
21039  */
21040 
21041 static int
21042 sd_mhdioc_release(dev_t dev)
21043 {
21044 	struct sd_lun		*un = NULL;
21045 	timeout_id_t		resvd_timeid_save;
21046 	int			resvd_status_save;
21047 	int			rval = 0;
21048 
21049 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21050 		return (ENXIO);
21051 	}
21052 
21053 	mutex_enter(SD_MUTEX(un));
21054 	resvd_status_save = un->un_resvd_status;
21055 	un->un_resvd_status &=
21056 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
21057 	if (un->un_resvd_timeid) {
21058 		resvd_timeid_save = un->un_resvd_timeid;
21059 		un->un_resvd_timeid = NULL;
21060 		mutex_exit(SD_MUTEX(un));
21061 		(void) untimeout(resvd_timeid_save);
21062 	} else {
21063 		mutex_exit(SD_MUTEX(un));
21064 	}
21065 
21066 	/*
21067 	 * destroy any pending timeout thread that may be attempting to
21068 	 * reinstate reservation on this device.
21069 	 */
21070 	sd_rmv_resv_reclaim_req(dev);
21071 
21072 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
21073 		mutex_enter(SD_MUTEX(un));
21074 		if ((un->un_mhd_token) &&
21075 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
21076 			mutex_exit(SD_MUTEX(un));
21077 			(void) sd_check_mhd(dev, 0);
21078 		} else {
21079 			mutex_exit(SD_MUTEX(un));
21080 		}
21081 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
21082 		    sd_mhd_reset_notify_cb, (caddr_t)un);
21083 	} else {
21084 		/*
21085 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
21086 		 */
21087 		mutex_enter(SD_MUTEX(un));
21088 		un->un_resvd_status = resvd_status_save;
21089 		mutex_exit(SD_MUTEX(un));
21090 	}
21091 	return (rval);
21092 }
21093 
21094 
21095 /*
21096  *    Function: sd_mhdioc_register_devid
21097  *
21098  * Description: This routine is the driver entry point for handling ioctl
21099  *		requests to register the device id (MHIOCREREGISTERDEVID).
21100  *
21101  *		Note: The implementation for this ioctl has been updated to
21102  *		be consistent with the original PSARC case (1999/357)
21103  *		(4375899, 4241671, 4220005)
21104  *
21105  *   Arguments: dev	- the device number
21106  *
21107  * Return Code: 0
21108  *		ENXIO
21109  */
21110 
21111 static int
21112 sd_mhdioc_register_devid(dev_t dev)
21113 {
21114 	struct sd_lun	*un = NULL;
21115 	int		rval = 0;
21116 
21117 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21118 		return (ENXIO);
21119 	}
21120 
21121 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21122 
21123 	mutex_enter(SD_MUTEX(un));
21124 
21125 	/* If a devid already exists, de-register it */
21126 	if (un->un_devid != NULL) {
21127 		ddi_devid_unregister(SD_DEVINFO(un));
21128 		/*
21129 		 * After unregister devid, needs to free devid memory
21130 		 */
21131 		ddi_devid_free(un->un_devid);
21132 		un->un_devid = NULL;
21133 	}
21134 
21135 	/* Check for reservation conflict */
21136 	mutex_exit(SD_MUTEX(un));
21137 	rval = sd_send_scsi_TEST_UNIT_READY(un, 0);
21138 	mutex_enter(SD_MUTEX(un));
21139 
21140 	switch (rval) {
21141 	case 0:
21142 		sd_register_devid(un, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
21143 		break;
21144 	case EACCES:
21145 		break;
21146 	default:
21147 		rval = EIO;
21148 	}
21149 
21150 	mutex_exit(SD_MUTEX(un));
21151 	return (rval);
21152 }
21153 
21154 
21155 /*
21156  *    Function: sd_mhdioc_inkeys
21157  *
21158  * Description: This routine is the driver entry point for handling ioctl
21159  *		requests to issue the SCSI-3 Persistent In Read Keys command
21160  *		to the device (MHIOCGRP_INKEYS).
21161  *
21162  *   Arguments: dev	- the device number
21163  *		arg	- user provided in_keys structure
21164  *		flag	- this argument is a pass through to ddi_copyxxx()
21165  *			  directly from the mode argument of ioctl().
21166  *
21167  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
21168  *		ENXIO
21169  *		EFAULT
21170  */
21171 
21172 static int
21173 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
21174 {
21175 	struct sd_lun		*un;
21176 	mhioc_inkeys_t		inkeys;
21177 	int			rval = 0;
21178 
21179 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21180 		return (ENXIO);
21181 	}
21182 
21183 #ifdef _MULTI_DATAMODEL
21184 	switch (ddi_model_convert_from(flag & FMODELS)) {
21185 	case DDI_MODEL_ILP32: {
21186 		struct mhioc_inkeys32	inkeys32;
21187 
21188 		if (ddi_copyin(arg, &inkeys32,
21189 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
21190 			return (EFAULT);
21191 		}
21192 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
21193 		if ((rval = sd_persistent_reservation_in_read_keys(un,
21194 		    &inkeys, flag)) != 0) {
21195 			return (rval);
21196 		}
21197 		inkeys32.generation = inkeys.generation;
21198 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
21199 		    flag) != 0) {
21200 			return (EFAULT);
21201 		}
21202 		break;
21203 	}
21204 	case DDI_MODEL_NONE:
21205 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
21206 		    flag) != 0) {
21207 			return (EFAULT);
21208 		}
21209 		if ((rval = sd_persistent_reservation_in_read_keys(un,
21210 		    &inkeys, flag)) != 0) {
21211 			return (rval);
21212 		}
21213 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
21214 		    flag) != 0) {
21215 			return (EFAULT);
21216 		}
21217 		break;
21218 	}
21219 
21220 #else /* ! _MULTI_DATAMODEL */
21221 
21222 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
21223 		return (EFAULT);
21224 	}
21225 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
21226 	if (rval != 0) {
21227 		return (rval);
21228 	}
21229 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
21230 		return (EFAULT);
21231 	}
21232 
21233 #endif /* _MULTI_DATAMODEL */
21234 
21235 	return (rval);
21236 }
21237 
21238 
21239 /*
21240  *    Function: sd_mhdioc_inresv
21241  *
21242  * Description: This routine is the driver entry point for handling ioctl
21243  *		requests to issue the SCSI-3 Persistent In Read Reservations
21244  *		command to the device (MHIOCGRP_INKEYS).
21245  *
21246  *   Arguments: dev	- the device number
21247  *		arg	- user provided in_resv structure
21248  *		flag	- this argument is a pass through to ddi_copyxxx()
21249  *			  directly from the mode argument of ioctl().
21250  *
21251  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
21252  *		ENXIO
21253  *		EFAULT
21254  */
21255 
21256 static int
21257 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
21258 {
21259 	struct sd_lun		*un;
21260 	mhioc_inresvs_t		inresvs;
21261 	int			rval = 0;
21262 
21263 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21264 		return (ENXIO);
21265 	}
21266 
21267 #ifdef _MULTI_DATAMODEL
21268 
21269 	switch (ddi_model_convert_from(flag & FMODELS)) {
21270 	case DDI_MODEL_ILP32: {
21271 		struct mhioc_inresvs32	inresvs32;
21272 
21273 		if (ddi_copyin(arg, &inresvs32,
21274 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
21275 			return (EFAULT);
21276 		}
21277 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
21278 		if ((rval = sd_persistent_reservation_in_read_resv(un,
21279 		    &inresvs, flag)) != 0) {
21280 			return (rval);
21281 		}
21282 		inresvs32.generation = inresvs.generation;
21283 		if (ddi_copyout(&inresvs32, arg,
21284 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
21285 			return (EFAULT);
21286 		}
21287 		break;
21288 	}
21289 	case DDI_MODEL_NONE:
21290 		if (ddi_copyin(arg, &inresvs,
21291 		    sizeof (mhioc_inresvs_t), flag) != 0) {
21292 			return (EFAULT);
21293 		}
21294 		if ((rval = sd_persistent_reservation_in_read_resv(un,
21295 		    &inresvs, flag)) != 0) {
21296 			return (rval);
21297 		}
21298 		if (ddi_copyout(&inresvs, arg,
21299 		    sizeof (mhioc_inresvs_t), flag) != 0) {
21300 			return (EFAULT);
21301 		}
21302 		break;
21303 	}
21304 
21305 #else /* ! _MULTI_DATAMODEL */
21306 
21307 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
21308 		return (EFAULT);
21309 	}
21310 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
21311 	if (rval != 0) {
21312 		return (rval);
21313 	}
21314 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
21315 		return (EFAULT);
21316 	}
21317 
21318 #endif /* ! _MULTI_DATAMODEL */
21319 
21320 	return (rval);
21321 }
21322 
21323 
21324 /*
21325  * The following routines support the clustering functionality described below
21326  * and implement lost reservation reclaim functionality.
21327  *
21328  * Clustering
21329  * ----------
21330  * The clustering code uses two different, independent forms of SCSI
21331  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
21332  * Persistent Group Reservations. For any particular disk, it will use either
21333  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
21334  *
21335  * SCSI-2
21336  * The cluster software takes ownership of a multi-hosted disk by issuing the
21337  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
21338  * MHIOCRELEASE ioctl.Closely related is the MHIOCENFAILFAST ioctl -- a cluster,
21339  * just after taking ownership of the disk with the MHIOCTKOWN ioctl then issues
21340  * the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the driver. The
21341  * meaning of failfast is that if the driver (on this host) ever encounters the
21342  * scsi error return code RESERVATION_CONFLICT from the device, it should
21343  * immediately panic the host. The motivation for this ioctl is that if this
21344  * host does encounter reservation conflict, the underlying cause is that some
21345  * other host of the cluster has decided that this host is no longer in the
21346  * cluster and has seized control of the disks for itself. Since this host is no
21347  * longer in the cluster, it ought to panic itself. The MHIOCENFAILFAST ioctl
21348  * does two things:
21349  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
21350  *      error to panic the host
21351  *      (b) it sets up a periodic timer to test whether this host still has
21352  *      "access" (in that no other host has reserved the device):  if the
21353  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
21354  *      purpose of that periodic timer is to handle scenarios where the host is
21355  *      otherwise temporarily quiescent, temporarily doing no real i/o.
21356  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
21357  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
21358  * the device itself.
21359  *
21360  * SCSI-3 PGR
21361  * A direct semantic implementation of the SCSI-3 Persistent Reservation
21362  * facility is supported through the shared multihost disk ioctls
21363  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
21364  * MHIOCGRP_PREEMPTANDABORT)
21365  *
21366  * Reservation Reclaim:
21367  * --------------------
21368  * To support the lost reservation reclaim operations this driver creates a
21369  * single thread to handle reinstating reservations on all devices that have
21370  * lost reservations sd_resv_reclaim_requests are logged for all devices that
21371  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
21372  * and the reservation reclaim thread loops through the requests to regain the
21373  * lost reservations.
21374  */
21375 
21376 /*
21377  *    Function: sd_check_mhd()
21378  *
21379  * Description: This function sets up and submits a scsi watch request or
21380  *		terminates an existing watch request. This routine is used in
21381  *		support of reservation reclaim.
21382  *
21383  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
21384  *			 among multiple watches that share the callback function
21385  *		interval - the number of microseconds specifying the watch
21386  *			   interval for issuing TEST UNIT READY commands. If
21387  *			   set to 0 the watch should be terminated. If the
21388  *			   interval is set to 0 and if the device is required
21389  *			   to hold reservation while disabling failfast, the
21390  *			   watch is restarted with an interval of
21391  *			   reinstate_resv_delay.
21392  *
21393  * Return Code: 0	   - Successful submit/terminate of scsi watch request
21394  *		ENXIO      - Indicates an invalid device was specified
21395  *		EAGAIN     - Unable to submit the scsi watch request
21396  */
21397 
21398 static int
21399 sd_check_mhd(dev_t dev, int interval)
21400 {
21401 	struct sd_lun	*un;
21402 	opaque_t	token;
21403 
21404 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21405 		return (ENXIO);
21406 	}
21407 
21408 	/* is this a watch termination request? */
21409 	if (interval == 0) {
21410 		mutex_enter(SD_MUTEX(un));
21411 		/* if there is an existing watch task then terminate it */
21412 		if (un->un_mhd_token) {
21413 			token = un->un_mhd_token;
21414 			un->un_mhd_token = NULL;
21415 			mutex_exit(SD_MUTEX(un));
21416 			(void) scsi_watch_request_terminate(token,
21417 			    SCSI_WATCH_TERMINATE_WAIT);
21418 			mutex_enter(SD_MUTEX(un));
21419 		} else {
21420 			mutex_exit(SD_MUTEX(un));
21421 			/*
21422 			 * Note: If we return here we don't check for the
21423 			 * failfast case. This is the original legacy
21424 			 * implementation but perhaps we should be checking
21425 			 * the failfast case.
21426 			 */
21427 			return (0);
21428 		}
21429 		/*
21430 		 * If the device is required to hold reservation while
21431 		 * disabling failfast, we need to restart the scsi_watch
21432 		 * routine with an interval of reinstate_resv_delay.
21433 		 */
21434 		if (un->un_resvd_status & SD_RESERVE) {
21435 			interval = sd_reinstate_resv_delay/1000;
21436 		} else {
21437 			/* no failfast so bail */
21438 			mutex_exit(SD_MUTEX(un));
21439 			return (0);
21440 		}
21441 		mutex_exit(SD_MUTEX(un));
21442 	}
21443 
21444 	/*
21445 	 * adjust minimum time interval to 1 second,
21446 	 * and convert from msecs to usecs
21447 	 */
21448 	if (interval > 0 && interval < 1000) {
21449 		interval = 1000;
21450 	}
21451 	interval *= 1000;
21452 
21453 	/*
21454 	 * submit the request to the scsi_watch service
21455 	 */
21456 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
21457 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
21458 	if (token == NULL) {
21459 		return (EAGAIN);
21460 	}
21461 
21462 	/*
21463 	 * save token for termination later on
21464 	 */
21465 	mutex_enter(SD_MUTEX(un));
21466 	un->un_mhd_token = token;
21467 	mutex_exit(SD_MUTEX(un));
21468 	return (0);
21469 }
21470 
21471 
21472 /*
21473  *    Function: sd_mhd_watch_cb()
21474  *
21475  * Description: This function is the call back function used by the scsi watch
21476  *		facility. The scsi watch facility sends the "Test Unit Ready"
21477  *		and processes the status. If applicable (i.e. a "Unit Attention"
21478  *		status and automatic "Request Sense" not used) the scsi watch
21479  *		facility will send a "Request Sense" and retrieve the sense data
21480  *		to be passed to this callback function. In either case the
21481  *		automatic "Request Sense" or the facility submitting one, this
21482  *		callback is passed the status and sense data.
21483  *
21484  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
21485  *			among multiple watches that share this callback function
21486  *		resultp - scsi watch facility result packet containing scsi
21487  *			  packet, status byte and sense data
21488  *
21489  * Return Code: 0 - continue the watch task
21490  *		non-zero - terminate the watch task
21491  */
21492 
21493 static int
21494 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
21495 {
21496 	struct sd_lun			*un;
21497 	struct scsi_status		*statusp;
21498 	uint8_t				*sensep;
21499 	struct scsi_pkt			*pkt;
21500 	uchar_t				actual_sense_length;
21501 	dev_t  				dev = (dev_t)arg;
21502 
21503 	ASSERT(resultp != NULL);
21504 	statusp			= resultp->statusp;
21505 	sensep			= (uint8_t *)resultp->sensep;
21506 	pkt			= resultp->pkt;
21507 	actual_sense_length	= resultp->actual_sense_length;
21508 
21509 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21510 		return (ENXIO);
21511 	}
21512 
21513 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
21514 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
21515 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
21516 
21517 	/* Begin processing of the status and/or sense data */
21518 	if (pkt->pkt_reason != CMD_CMPLT) {
21519 		/* Handle the incomplete packet */
21520 		sd_mhd_watch_incomplete(un, pkt);
21521 		return (0);
21522 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
21523 		if (*((unsigned char *)statusp)
21524 		    == STATUS_RESERVATION_CONFLICT) {
21525 			/*
21526 			 * Handle a reservation conflict by panicking if
21527 			 * configured for failfast or by logging the conflict
21528 			 * and updating the reservation status
21529 			 */
21530 			mutex_enter(SD_MUTEX(un));
21531 			if ((un->un_resvd_status & SD_FAILFAST) &&
21532 			    (sd_failfast_enable)) {
21533 				sd_panic_for_res_conflict(un);
21534 				/*NOTREACHED*/
21535 			}
21536 			SD_INFO(SD_LOG_IOCTL_MHD, un,
21537 			    "sd_mhd_watch_cb: Reservation Conflict\n");
21538 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
21539 			mutex_exit(SD_MUTEX(un));
21540 		}
21541 	}
21542 
21543 	if (sensep != NULL) {
21544 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
21545 			mutex_enter(SD_MUTEX(un));
21546 			if ((scsi_sense_asc(sensep) ==
21547 			    SD_SCSI_RESET_SENSE_CODE) &&
21548 			    (un->un_resvd_status & SD_RESERVE)) {
21549 				/*
21550 				 * The additional sense code indicates a power
21551 				 * on or bus device reset has occurred; update
21552 				 * the reservation status.
21553 				 */
21554 				un->un_resvd_status |=
21555 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
21556 				SD_INFO(SD_LOG_IOCTL_MHD, un,
21557 				    "sd_mhd_watch_cb: Lost Reservation\n");
21558 			}
21559 		} else {
21560 			return (0);
21561 		}
21562 	} else {
21563 		mutex_enter(SD_MUTEX(un));
21564 	}
21565 
21566 	if ((un->un_resvd_status & SD_RESERVE) &&
21567 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
21568 		if (un->un_resvd_status & SD_WANT_RESERVE) {
21569 			/*
21570 			 * A reset occurred in between the last probe and this
21571 			 * one so if a timeout is pending cancel it.
21572 			 */
21573 			if (un->un_resvd_timeid) {
21574 				timeout_id_t temp_id = un->un_resvd_timeid;
21575 				un->un_resvd_timeid = NULL;
21576 				mutex_exit(SD_MUTEX(un));
21577 				(void) untimeout(temp_id);
21578 				mutex_enter(SD_MUTEX(un));
21579 			}
21580 			un->un_resvd_status &= ~SD_WANT_RESERVE;
21581 		}
21582 		if (un->un_resvd_timeid == 0) {
21583 			/* Schedule a timeout to handle the lost reservation */
21584 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
21585 			    (void *)dev,
21586 			    drv_usectohz(sd_reinstate_resv_delay));
21587 		}
21588 	}
21589 	mutex_exit(SD_MUTEX(un));
21590 	return (0);
21591 }
21592 
21593 
21594 /*
21595  *    Function: sd_mhd_watch_incomplete()
21596  *
21597  * Description: This function is used to find out why a scsi pkt sent by the
21598  *		scsi watch facility was not completed. Under some scenarios this
21599  *		routine will return. Otherwise it will send a bus reset to see
21600  *		if the drive is still online.
21601  *
21602  *   Arguments: un  - driver soft state (unit) structure
21603  *		pkt - incomplete scsi pkt
21604  */
21605 
21606 static void
21607 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
21608 {
21609 	int	be_chatty;
21610 	int	perr;
21611 
21612 	ASSERT(pkt != NULL);
21613 	ASSERT(un != NULL);
21614 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
21615 	perr		= (pkt->pkt_statistics & STAT_PERR);
21616 
21617 	mutex_enter(SD_MUTEX(un));
21618 	if (un->un_state == SD_STATE_DUMPING) {
21619 		mutex_exit(SD_MUTEX(un));
21620 		return;
21621 	}
21622 
21623 	switch (pkt->pkt_reason) {
21624 	case CMD_UNX_BUS_FREE:
21625 		/*
21626 		 * If we had a parity error that caused the target to drop BSY*,
21627 		 * don't be chatty about it.
21628 		 */
21629 		if (perr && be_chatty) {
21630 			be_chatty = 0;
21631 		}
21632 		break;
21633 	case CMD_TAG_REJECT:
21634 		/*
21635 		 * The SCSI-2 spec states that a tag reject will be sent by the
21636 		 * target if tagged queuing is not supported. A tag reject may
21637 		 * also be sent during certain initialization periods or to
21638 		 * control internal resources. For the latter case the target
21639 		 * may also return Queue Full.
21640 		 *
21641 		 * If this driver receives a tag reject from a target that is
21642 		 * going through an init period or controlling internal
21643 		 * resources tagged queuing will be disabled. This is a less
21644 		 * than optimal behavior but the driver is unable to determine
21645 		 * the target state and assumes tagged queueing is not supported
21646 		 */
21647 		pkt->pkt_flags = 0;
21648 		un->un_tagflags = 0;
21649 
21650 		if (un->un_f_opt_queueing == TRUE) {
21651 			un->un_throttle = min(un->un_throttle, 3);
21652 		} else {
21653 			un->un_throttle = 1;
21654 		}
21655 		mutex_exit(SD_MUTEX(un));
21656 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
21657 		mutex_enter(SD_MUTEX(un));
21658 		break;
21659 	case CMD_INCOMPLETE:
21660 		/*
21661 		 * The transport stopped with an abnormal state, fallthrough and
21662 		 * reset the target and/or bus unless selection did not complete
21663 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
21664 		 * go through a target/bus reset
21665 		 */
21666 		if (pkt->pkt_state == STATE_GOT_BUS) {
21667 			break;
21668 		}
21669 		/*FALLTHROUGH*/
21670 
21671 	case CMD_TIMEOUT:
21672 	default:
21673 		/*
21674 		 * The lun may still be running the command, so a lun reset
21675 		 * should be attempted. If the lun reset fails or cannot be
21676 		 * issued, than try a target reset. Lastly try a bus reset.
21677 		 */
21678 		if ((pkt->pkt_statistics &
21679 		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
21680 			int reset_retval = 0;
21681 			mutex_exit(SD_MUTEX(un));
21682 			if (un->un_f_allow_bus_device_reset == TRUE) {
21683 				if (un->un_f_lun_reset_enabled == TRUE) {
21684 					reset_retval =
21685 					    scsi_reset(SD_ADDRESS(un),
21686 					    RESET_LUN);
21687 				}
21688 				if (reset_retval == 0) {
21689 					reset_retval =
21690 					    scsi_reset(SD_ADDRESS(un),
21691 					    RESET_TARGET);
21692 				}
21693 			}
21694 			if (reset_retval == 0) {
21695 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
21696 			}
21697 			mutex_enter(SD_MUTEX(un));
21698 		}
21699 		break;
21700 	}
21701 
21702 	/* A device/bus reset has occurred; update the reservation status. */
21703 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
21704 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
21705 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
21706 			un->un_resvd_status |=
21707 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
21708 			SD_INFO(SD_LOG_IOCTL_MHD, un,
21709 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
21710 		}
21711 	}
21712 
21713 	/*
21714 	 * The disk has been turned off; Update the device state.
21715 	 *
21716 	 * Note: Should we be offlining the disk here?
21717 	 */
21718 	if (pkt->pkt_state == STATE_GOT_BUS) {
21719 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
21720 		    "Disk not responding to selection\n");
21721 		if (un->un_state != SD_STATE_OFFLINE) {
21722 			New_state(un, SD_STATE_OFFLINE);
21723 		}
21724 	} else if (be_chatty) {
21725 		/*
21726 		 * suppress messages if they are all the same pkt reason;
21727 		 * with TQ, many (up to 256) are returned with the same
21728 		 * pkt_reason
21729 		 */
21730 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
21731 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
21732 			    "sd_mhd_watch_incomplete: "
21733 			    "SCSI transport failed: reason '%s'\n",
21734 			    scsi_rname(pkt->pkt_reason));
21735 		}
21736 	}
21737 	un->un_last_pkt_reason = pkt->pkt_reason;
21738 	mutex_exit(SD_MUTEX(un));
21739 }
21740 
21741 
21742 /*
21743  *    Function: sd_sname()
21744  *
21745  * Description: This is a simple little routine to return a string containing
21746  *		a printable description of command status byte for use in
21747  *		logging.
21748  *
21749  *   Arguments: status - pointer to a status byte
21750  *
21751  * Return Code: char * - string containing status description.
21752  */
21753 
21754 static char *
21755 sd_sname(uchar_t status)
21756 {
21757 	switch (status & STATUS_MASK) {
21758 	case STATUS_GOOD:
21759 		return ("good status");
21760 	case STATUS_CHECK:
21761 		return ("check condition");
21762 	case STATUS_MET:
21763 		return ("condition met");
21764 	case STATUS_BUSY:
21765 		return ("busy");
21766 	case STATUS_INTERMEDIATE:
21767 		return ("intermediate");
21768 	case STATUS_INTERMEDIATE_MET:
21769 		return ("intermediate - condition met");
21770 	case STATUS_RESERVATION_CONFLICT:
21771 		return ("reservation_conflict");
21772 	case STATUS_TERMINATED:
21773 		return ("command terminated");
21774 	case STATUS_QFULL:
21775 		return ("queue full");
21776 	default:
21777 		return ("<unknown status>");
21778 	}
21779 }
21780 
21781 
21782 /*
21783  *    Function: sd_mhd_resvd_recover()
21784  *
21785  * Description: This function adds a reservation entry to the
21786  *		sd_resv_reclaim_request list and signals the reservation
21787  *		reclaim thread that there is work pending. If the reservation
21788  *		reclaim thread has not been previously created this function
21789  *		will kick it off.
21790  *
21791  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
21792  *			among multiple watches that share this callback function
21793  *
21794  *     Context: This routine is called by timeout() and is run in interrupt
21795  *		context. It must not sleep or call other functions which may
21796  *		sleep.
21797  */
21798 
21799 static void
21800 sd_mhd_resvd_recover(void *arg)
21801 {
21802 	dev_t			dev = (dev_t)arg;
21803 	struct sd_lun		*un;
21804 	struct sd_thr_request	*sd_treq = NULL;
21805 	struct sd_thr_request	*sd_cur = NULL;
21806 	struct sd_thr_request	*sd_prev = NULL;
21807 	int			already_there = 0;
21808 
21809 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21810 		return;
21811 	}
21812 
21813 	mutex_enter(SD_MUTEX(un));
21814 	un->un_resvd_timeid = NULL;
21815 	if (un->un_resvd_status & SD_WANT_RESERVE) {
21816 		/*
21817 		 * There was a reset so don't issue the reserve, allow the
21818 		 * sd_mhd_watch_cb callback function to notice this and
21819 		 * reschedule the timeout for reservation.
21820 		 */
21821 		mutex_exit(SD_MUTEX(un));
21822 		return;
21823 	}
21824 	mutex_exit(SD_MUTEX(un));
21825 
21826 	/*
21827 	 * Add this device to the sd_resv_reclaim_request list and the
21828 	 * sd_resv_reclaim_thread should take care of the rest.
21829 	 *
21830 	 * Note: We can't sleep in this context so if the memory allocation
21831 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
21832 	 * reschedule the timeout for reservation.  (4378460)
21833 	 */
21834 	sd_treq = (struct sd_thr_request *)
21835 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
21836 	if (sd_treq == NULL) {
21837 		return;
21838 	}
21839 
21840 	sd_treq->sd_thr_req_next = NULL;
21841 	sd_treq->dev = dev;
21842 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
21843 	if (sd_tr.srq_thr_req_head == NULL) {
21844 		sd_tr.srq_thr_req_head = sd_treq;
21845 	} else {
21846 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
21847 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
21848 			if (sd_cur->dev == dev) {
21849 				/*
21850 				 * already in Queue so don't log
21851 				 * another request for the device
21852 				 */
21853 				already_there = 1;
21854 				break;
21855 			}
21856 			sd_prev = sd_cur;
21857 		}
21858 		if (!already_there) {
21859 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
21860 			    "logging request for %lx\n", dev);
21861 			sd_prev->sd_thr_req_next = sd_treq;
21862 		} else {
21863 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
21864 		}
21865 	}
21866 
21867 	/*
21868 	 * Create a kernel thread to do the reservation reclaim and free up this
21869 	 * thread. We cannot block this thread while we go away to do the
21870 	 * reservation reclaim
21871 	 */
21872 	if (sd_tr.srq_resv_reclaim_thread == NULL)
21873 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
21874 		    sd_resv_reclaim_thread, NULL,
21875 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
21876 
21877 	/* Tell the reservation reclaim thread that it has work to do */
21878 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
21879 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
21880 }
21881 
21882 /*
21883  *    Function: sd_resv_reclaim_thread()
21884  *
21885  * Description: This function implements the reservation reclaim operations
21886  *
21887  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
21888  *		      among multiple watches that share this callback function
21889  */
21890 
21891 static void
21892 sd_resv_reclaim_thread()
21893 {
21894 	struct sd_lun		*un;
21895 	struct sd_thr_request	*sd_mhreq;
21896 
21897 	/* Wait for work */
21898 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
21899 	if (sd_tr.srq_thr_req_head == NULL) {
21900 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
21901 		    &sd_tr.srq_resv_reclaim_mutex);
21902 	}
21903 
21904 	/* Loop while we have work */
21905 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
21906 		un = ddi_get_soft_state(sd_state,
21907 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
21908 		if (un == NULL) {
21909 			/*
21910 			 * softstate structure is NULL so just
21911 			 * dequeue the request and continue
21912 			 */
21913 			sd_tr.srq_thr_req_head =
21914 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
21915 			kmem_free(sd_tr.srq_thr_cur_req,
21916 			    sizeof (struct sd_thr_request));
21917 			continue;
21918 		}
21919 
21920 		/* dequeue the request */
21921 		sd_mhreq = sd_tr.srq_thr_cur_req;
21922 		sd_tr.srq_thr_req_head =
21923 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
21924 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
21925 
21926 		/*
21927 		 * Reclaim reservation only if SD_RESERVE is still set. There
21928 		 * may have been a call to MHIOCRELEASE before we got here.
21929 		 */
21930 		mutex_enter(SD_MUTEX(un));
21931 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
21932 			/*
21933 			 * Note: The SD_LOST_RESERVE flag is cleared before
21934 			 * reclaiming the reservation. If this is done after the
21935 			 * call to sd_reserve_release a reservation loss in the
21936 			 * window between pkt completion of reserve cmd and
21937 			 * mutex_enter below may not be recognized
21938 			 */
21939 			un->un_resvd_status &= ~SD_LOST_RESERVE;
21940 			mutex_exit(SD_MUTEX(un));
21941 
21942 			if (sd_reserve_release(sd_mhreq->dev,
21943 			    SD_RESERVE) == 0) {
21944 				mutex_enter(SD_MUTEX(un));
21945 				un->un_resvd_status |= SD_RESERVE;
21946 				mutex_exit(SD_MUTEX(un));
21947 				SD_INFO(SD_LOG_IOCTL_MHD, un,
21948 				    "sd_resv_reclaim_thread: "
21949 				    "Reservation Recovered\n");
21950 			} else {
21951 				mutex_enter(SD_MUTEX(un));
21952 				un->un_resvd_status |= SD_LOST_RESERVE;
21953 				mutex_exit(SD_MUTEX(un));
21954 				SD_INFO(SD_LOG_IOCTL_MHD, un,
21955 				    "sd_resv_reclaim_thread: Failed "
21956 				    "Reservation Recovery\n");
21957 			}
21958 		} else {
21959 			mutex_exit(SD_MUTEX(un));
21960 		}
21961 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
21962 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
21963 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
21964 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
21965 		/*
21966 		 * wakeup the destroy thread if anyone is waiting on
21967 		 * us to complete.
21968 		 */
21969 		cv_signal(&sd_tr.srq_inprocess_cv);
21970 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
21971 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
21972 	}
21973 
21974 	/*
21975 	 * cleanup the sd_tr structure now that this thread will not exist
21976 	 */
21977 	ASSERT(sd_tr.srq_thr_req_head == NULL);
21978 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
21979 	sd_tr.srq_resv_reclaim_thread = NULL;
21980 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
21981 	thread_exit();
21982 }
21983 
21984 
21985 /*
21986  *    Function: sd_rmv_resv_reclaim_req()
21987  *
21988  * Description: This function removes any pending reservation reclaim requests
21989  *		for the specified device.
21990  *
21991  *   Arguments: dev - the device 'dev_t'
21992  */
21993 
21994 static void
21995 sd_rmv_resv_reclaim_req(dev_t dev)
21996 {
21997 	struct sd_thr_request *sd_mhreq;
21998 	struct sd_thr_request *sd_prev;
21999 
22000 	/* Remove a reservation reclaim request from the list */
22001 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
22002 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
22003 		/*
22004 		 * We are attempting to reinstate reservation for
22005 		 * this device. We wait for sd_reserve_release()
22006 		 * to return before we return.
22007 		 */
22008 		cv_wait(&sd_tr.srq_inprocess_cv,
22009 		    &sd_tr.srq_resv_reclaim_mutex);
22010 	} else {
22011 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
22012 		if (sd_mhreq && sd_mhreq->dev == dev) {
22013 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
22014 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
22015 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
22016 			return;
22017 		}
22018 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
22019 			if (sd_mhreq && sd_mhreq->dev == dev) {
22020 				break;
22021 			}
22022 			sd_prev = sd_mhreq;
22023 		}
22024 		if (sd_mhreq != NULL) {
22025 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
22026 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
22027 		}
22028 	}
22029 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
22030 }
22031 
22032 
22033 /*
22034  *    Function: sd_mhd_reset_notify_cb()
22035  *
22036  * Description: This is a call back function for scsi_reset_notify. This
22037  *		function updates the softstate reserved status and logs the
22038  *		reset. The driver scsi watch facility callback function
22039  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
22040  *		will reclaim the reservation.
22041  *
22042  *   Arguments: arg  - driver soft state (unit) structure
22043  */
22044 
22045 static void
22046 sd_mhd_reset_notify_cb(caddr_t arg)
22047 {
22048 	struct sd_lun *un = (struct sd_lun *)arg;
22049 
22050 	mutex_enter(SD_MUTEX(un));
22051 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
22052 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
22053 		SD_INFO(SD_LOG_IOCTL_MHD, un,
22054 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
22055 	}
22056 	mutex_exit(SD_MUTEX(un));
22057 }
22058 
22059 
22060 /*
22061  *    Function: sd_take_ownership()
22062  *
22063  * Description: This routine implements an algorithm to achieve a stable
22064  *		reservation on disks which don't implement priority reserve,
22065  *		and makes sure that other host lose re-reservation attempts.
22066  *		This algorithm contains of a loop that keeps issuing the RESERVE
22067  *		for some period of time (min_ownership_delay, default 6 seconds)
22068  *		During that loop, it looks to see if there has been a bus device
22069  *		reset or bus reset (both of which cause an existing reservation
22070  *		to be lost). If the reservation is lost issue RESERVE until a
22071  *		period of min_ownership_delay with no resets has gone by, or
22072  *		until max_ownership_delay has expired. This loop ensures that
22073  *		the host really did manage to reserve the device, in spite of
22074  *		resets. The looping for min_ownership_delay (default six
22075  *		seconds) is important to early generation clustering products,
22076  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
22077  *		MHIOCENFAILFAST periodic timer of two seconds. By having
22078  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
22079  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
22080  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
22081  *		have already noticed, via the MHIOCENFAILFAST polling, that it
22082  *		no longer "owns" the disk and will have panicked itself.  Thus,
22083  *		the host issuing the MHIOCTKOWN is assured (with timing
22084  *		dependencies) that by the time it actually starts to use the
22085  *		disk for real work, the old owner is no longer accessing it.
22086  *
22087  *		min_ownership_delay is the minimum amount of time for which the
22088  *		disk must be reserved continuously devoid of resets before the
22089  *		MHIOCTKOWN ioctl will return success.
22090  *
22091  *		max_ownership_delay indicates the amount of time by which the
22092  *		take ownership should succeed or timeout with an error.
22093  *
22094  *   Arguments: dev - the device 'dev_t'
22095  *		*p  - struct containing timing info.
22096  *
22097  * Return Code: 0 for success or error code
22098  */
22099 
22100 static int
22101 sd_take_ownership(dev_t dev, struct mhioctkown *p)
22102 {
22103 	struct sd_lun	*un;
22104 	int		rval;
22105 	int		err;
22106 	int		reservation_count   = 0;
22107 	int		min_ownership_delay =  6000000; /* in usec */
22108 	int		max_ownership_delay = 30000000; /* in usec */
22109 	clock_t		start_time;	/* starting time of this algorithm */
22110 	clock_t		end_time;	/* time limit for giving up */
22111 	clock_t		ownership_time;	/* time limit for stable ownership */
22112 	clock_t		current_time;
22113 	clock_t		previous_current_time;
22114 
22115 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22116 		return (ENXIO);
22117 	}
22118 
22119 	/*
22120 	 * Attempt a device reservation. A priority reservation is requested.
22121 	 */
22122 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
22123 	    != SD_SUCCESS) {
22124 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
22125 		    "sd_take_ownership: return(1)=%d\n", rval);
22126 		return (rval);
22127 	}
22128 
22129 	/* Update the softstate reserved status to indicate the reservation */
22130 	mutex_enter(SD_MUTEX(un));
22131 	un->un_resvd_status |= SD_RESERVE;
22132 	un->un_resvd_status &=
22133 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
22134 	mutex_exit(SD_MUTEX(un));
22135 
22136 	if (p != NULL) {
22137 		if (p->min_ownership_delay != 0) {
22138 			min_ownership_delay = p->min_ownership_delay * 1000;
22139 		}
22140 		if (p->max_ownership_delay != 0) {
22141 			max_ownership_delay = p->max_ownership_delay * 1000;
22142 		}
22143 	}
22144 	SD_INFO(SD_LOG_IOCTL_MHD, un,
22145 	    "sd_take_ownership: min, max delays: %d, %d\n",
22146 	    min_ownership_delay, max_ownership_delay);
22147 
22148 	start_time = ddi_get_lbolt();
22149 	current_time	= start_time;
22150 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
22151 	end_time	= start_time + drv_usectohz(max_ownership_delay);
22152 
22153 	while (current_time - end_time < 0) {
22154 		delay(drv_usectohz(500000));
22155 
22156 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
22157 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
22158 				mutex_enter(SD_MUTEX(un));
22159 				rval = (un->un_resvd_status &
22160 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
22161 				mutex_exit(SD_MUTEX(un));
22162 				break;
22163 			}
22164 		}
22165 		previous_current_time = current_time;
22166 		current_time = ddi_get_lbolt();
22167 		mutex_enter(SD_MUTEX(un));
22168 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
22169 			ownership_time = ddi_get_lbolt() +
22170 			    drv_usectohz(min_ownership_delay);
22171 			reservation_count = 0;
22172 		} else {
22173 			reservation_count++;
22174 		}
22175 		un->un_resvd_status |= SD_RESERVE;
22176 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
22177 		mutex_exit(SD_MUTEX(un));
22178 
22179 		SD_INFO(SD_LOG_IOCTL_MHD, un,
22180 		    "sd_take_ownership: ticks for loop iteration=%ld, "
22181 		    "reservation=%s\n", (current_time - previous_current_time),
22182 		    reservation_count ? "ok" : "reclaimed");
22183 
22184 		if (current_time - ownership_time >= 0 &&
22185 		    reservation_count >= 4) {
22186 			rval = 0; /* Achieved a stable ownership */
22187 			break;
22188 		}
22189 		if (current_time - end_time >= 0) {
22190 			rval = EACCES; /* No ownership in max possible time */
22191 			break;
22192 		}
22193 	}
22194 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
22195 	    "sd_take_ownership: return(2)=%d\n", rval);
22196 	return (rval);
22197 }
22198 
22199 
22200 /*
22201  *    Function: sd_reserve_release()
22202  *
22203  * Description: This function builds and sends scsi RESERVE, RELEASE, and
22204  *		PRIORITY RESERVE commands based on a user specified command type
22205  *
22206  *   Arguments: dev - the device 'dev_t'
22207  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
22208  *		      SD_RESERVE, SD_RELEASE
22209  *
22210  * Return Code: 0 or Error Code
22211  */
22212 
22213 static int
22214 sd_reserve_release(dev_t dev, int cmd)
22215 {
22216 	struct uscsi_cmd	*com = NULL;
22217 	struct sd_lun		*un = NULL;
22218 	char			cdb[CDB_GROUP0];
22219 	int			rval;
22220 
22221 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
22222 	    (cmd == SD_PRIORITY_RESERVE));
22223 
22224 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22225 		return (ENXIO);
22226 	}
22227 
22228 	/* instantiate and initialize the command and cdb */
22229 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
22230 	bzero(cdb, CDB_GROUP0);
22231 	com->uscsi_flags   = USCSI_SILENT;
22232 	com->uscsi_timeout = un->un_reserve_release_time;
22233 	com->uscsi_cdblen  = CDB_GROUP0;
22234 	com->uscsi_cdb	   = cdb;
22235 	if (cmd == SD_RELEASE) {
22236 		cdb[0] = SCMD_RELEASE;
22237 	} else {
22238 		cdb[0] = SCMD_RESERVE;
22239 	}
22240 
22241 	/* Send the command. */
22242 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
22243 	    SD_PATH_STANDARD);
22244 
22245 	/*
22246 	 * "break" a reservation that is held by another host, by issuing a
22247 	 * reset if priority reserve is desired, and we could not get the
22248 	 * device.
22249 	 */
22250 	if ((cmd == SD_PRIORITY_RESERVE) &&
22251 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
22252 		/*
22253 		 * First try to reset the LUN. If we cannot, then try a target
22254 		 * reset, followed by a bus reset if the target reset fails.
22255 		 */
22256 		int reset_retval = 0;
22257 		if (un->un_f_lun_reset_enabled == TRUE) {
22258 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
22259 		}
22260 		if (reset_retval == 0) {
22261 			/* The LUN reset either failed or was not issued */
22262 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
22263 		}
22264 		if ((reset_retval == 0) &&
22265 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
22266 			rval = EIO;
22267 			kmem_free(com, sizeof (*com));
22268 			return (rval);
22269 		}
22270 
22271 		bzero(com, sizeof (struct uscsi_cmd));
22272 		com->uscsi_flags   = USCSI_SILENT;
22273 		com->uscsi_cdb	   = cdb;
22274 		com->uscsi_cdblen  = CDB_GROUP0;
22275 		com->uscsi_timeout = 5;
22276 
22277 		/*
22278 		 * Reissue the last reserve command, this time without request
22279 		 * sense.  Assume that it is just a regular reserve command.
22280 		 */
22281 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
22282 		    SD_PATH_STANDARD);
22283 	}
22284 
22285 	/* Return an error if still getting a reservation conflict. */
22286 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
22287 		rval = EACCES;
22288 	}
22289 
22290 	kmem_free(com, sizeof (*com));
22291 	return (rval);
22292 }
22293 
22294 
22295 #define	SD_NDUMP_RETRIES	12
22296 /*
22297  *	System Crash Dump routine
22298  */
22299 
22300 static int
22301 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
22302 {
22303 	int		instance;
22304 	int		partition;
22305 	int		i;
22306 	int		err;
22307 	struct sd_lun	*un;
22308 	struct scsi_pkt *wr_pktp;
22309 	struct buf	*wr_bp;
22310 	struct buf	wr_buf;
22311 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
22312 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
22313 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
22314 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
22315 	size_t		io_start_offset;
22316 	int		doing_rmw = FALSE;
22317 	int		rval;
22318 #if defined(__i386) || defined(__amd64)
22319 	ssize_t dma_resid;
22320 	daddr_t oblkno;
22321 #endif
22322 	diskaddr_t	nblks = 0;
22323 	diskaddr_t	start_block;
22324 
22325 	instance = SDUNIT(dev);
22326 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
22327 	    !SD_IS_VALID_LABEL(un) || ISCD(un)) {
22328 		return (ENXIO);
22329 	}
22330 
22331 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
22332 
22333 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
22334 
22335 	partition = SDPART(dev);
22336 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
22337 
22338 	/* Validate blocks to dump at against partition size. */
22339 
22340 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
22341 	    &nblks, &start_block, NULL, NULL, (void *)SD_PATH_DIRECT);
22342 
22343 	if ((blkno + nblk) > nblks) {
22344 		SD_TRACE(SD_LOG_DUMP, un,
22345 		    "sddump: dump range larger than partition: "
22346 		    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
22347 		    blkno, nblk, nblks);
22348 		return (EINVAL);
22349 	}
22350 
22351 	mutex_enter(&un->un_pm_mutex);
22352 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
22353 		struct scsi_pkt *start_pktp;
22354 
22355 		mutex_exit(&un->un_pm_mutex);
22356 
22357 		/*
22358 		 * use pm framework to power on HBA 1st
22359 		 */
22360 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
22361 
22362 		/*
22363 		 * Dump no long uses sdpower to power on a device, it's
22364 		 * in-line here so it can be done in polled mode.
22365 		 */
22366 
22367 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
22368 
22369 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
22370 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
22371 
22372 		if (start_pktp == NULL) {
22373 			/* We were not given a SCSI packet, fail. */
22374 			return (EIO);
22375 		}
22376 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
22377 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
22378 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
22379 		start_pktp->pkt_flags = FLAG_NOINTR;
22380 
22381 		mutex_enter(SD_MUTEX(un));
22382 		SD_FILL_SCSI1_LUN(un, start_pktp);
22383 		mutex_exit(SD_MUTEX(un));
22384 		/*
22385 		 * Scsi_poll returns 0 (success) if the command completes and
22386 		 * the status block is STATUS_GOOD.
22387 		 */
22388 		if (sd_scsi_poll(un, start_pktp) != 0) {
22389 			scsi_destroy_pkt(start_pktp);
22390 			return (EIO);
22391 		}
22392 		scsi_destroy_pkt(start_pktp);
22393 		(void) sd_ddi_pm_resume(un);
22394 	} else {
22395 		mutex_exit(&un->un_pm_mutex);
22396 	}
22397 
22398 	mutex_enter(SD_MUTEX(un));
22399 	un->un_throttle = 0;
22400 
22401 	/*
22402 	 * The first time through, reset the specific target device.
22403 	 * However, when cpr calls sddump we know that sd is in a
22404 	 * a good state so no bus reset is required.
22405 	 * Clear sense data via Request Sense cmd.
22406 	 * In sddump we don't care about allow_bus_device_reset anymore
22407 	 */
22408 
22409 	if ((un->un_state != SD_STATE_SUSPENDED) &&
22410 	    (un->un_state != SD_STATE_DUMPING)) {
22411 
22412 		New_state(un, SD_STATE_DUMPING);
22413 
22414 		if (un->un_f_is_fibre == FALSE) {
22415 			mutex_exit(SD_MUTEX(un));
22416 			/*
22417 			 * Attempt a bus reset for parallel scsi.
22418 			 *
22419 			 * Note: A bus reset is required because on some host
22420 			 * systems (i.e. E420R) a bus device reset is
22421 			 * insufficient to reset the state of the target.
22422 			 *
22423 			 * Note: Don't issue the reset for fibre-channel,
22424 			 * because this tends to hang the bus (loop) for
22425 			 * too long while everyone is logging out and in
22426 			 * and the deadman timer for dumping will fire
22427 			 * before the dump is complete.
22428 			 */
22429 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
22430 				mutex_enter(SD_MUTEX(un));
22431 				Restore_state(un);
22432 				mutex_exit(SD_MUTEX(un));
22433 				return (EIO);
22434 			}
22435 
22436 			/* Delay to give the device some recovery time. */
22437 			drv_usecwait(10000);
22438 
22439 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
22440 				SD_INFO(SD_LOG_DUMP, un,
22441 					"sddump: sd_send_polled_RQS failed\n");
22442 			}
22443 			mutex_enter(SD_MUTEX(un));
22444 		}
22445 	}
22446 
22447 	/*
22448 	 * Convert the partition-relative block number to a
22449 	 * disk physical block number.
22450 	 */
22451 	blkno += start_block;
22452 
22453 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
22454 
22455 
22456 	/*
22457 	 * Check if the device has a non-512 block size.
22458 	 */
22459 	wr_bp = NULL;
22460 	if (NOT_DEVBSIZE(un)) {
22461 		tgt_byte_offset = blkno * un->un_sys_blocksize;
22462 		tgt_byte_count = nblk * un->un_sys_blocksize;
22463 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
22464 		    (tgt_byte_count % un->un_tgt_blocksize)) {
22465 			doing_rmw = TRUE;
22466 			/*
22467 			 * Calculate the block number and number of block
22468 			 * in terms of the media block size.
22469 			 */
22470 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
22471 			tgt_nblk =
22472 			    ((tgt_byte_offset + tgt_byte_count +
22473 				(un->un_tgt_blocksize - 1)) /
22474 				un->un_tgt_blocksize) - tgt_blkno;
22475 
22476 			/*
22477 			 * Invoke the routine which is going to do read part
22478 			 * of read-modify-write.
22479 			 * Note that this routine returns a pointer to
22480 			 * a valid bp in wr_bp.
22481 			 */
22482 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
22483 			    &wr_bp);
22484 			if (err) {
22485 				mutex_exit(SD_MUTEX(un));
22486 				return (err);
22487 			}
22488 			/*
22489 			 * Offset is being calculated as -
22490 			 * (original block # * system block size) -
22491 			 * (new block # * target block size)
22492 			 */
22493 			io_start_offset =
22494 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
22495 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
22496 
22497 			ASSERT((io_start_offset >= 0) &&
22498 			    (io_start_offset < un->un_tgt_blocksize));
22499 			/*
22500 			 * Do the modify portion of read modify write.
22501 			 */
22502 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
22503 			    (size_t)nblk * un->un_sys_blocksize);
22504 		} else {
22505 			doing_rmw = FALSE;
22506 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
22507 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
22508 		}
22509 
22510 		/* Convert blkno and nblk to target blocks */
22511 		blkno = tgt_blkno;
22512 		nblk = tgt_nblk;
22513 	} else {
22514 		wr_bp = &wr_buf;
22515 		bzero(wr_bp, sizeof (struct buf));
22516 		wr_bp->b_flags		= B_BUSY;
22517 		wr_bp->b_un.b_addr	= addr;
22518 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
22519 		wr_bp->b_resid		= 0;
22520 	}
22521 
22522 	mutex_exit(SD_MUTEX(un));
22523 
22524 	/*
22525 	 * Obtain a SCSI packet for the write command.
22526 	 * It should be safe to call the allocator here without
22527 	 * worrying about being locked for DVMA mapping because
22528 	 * the address we're passed is already a DVMA mapping
22529 	 *
22530 	 * We are also not going to worry about semaphore ownership
22531 	 * in the dump buffer. Dumping is single threaded at present.
22532 	 */
22533 
22534 	wr_pktp = NULL;
22535 
22536 #if defined(__i386) || defined(__amd64)
22537 	dma_resid = wr_bp->b_bcount;
22538 	oblkno = blkno;
22539 	while (dma_resid != 0) {
22540 #endif
22541 
22542 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
22543 		wr_bp->b_flags &= ~B_ERROR;
22544 
22545 #if defined(__i386) || defined(__amd64)
22546 		blkno = oblkno +
22547 			((wr_bp->b_bcount - dma_resid) /
22548 			    un->un_tgt_blocksize);
22549 		nblk = dma_resid / un->un_tgt_blocksize;
22550 
22551 		if (wr_pktp) {
22552 			/* Partial DMA transfers after initial transfer */
22553 			rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
22554 			    blkno, nblk);
22555 		} else {
22556 			/* Initial transfer */
22557 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
22558 			    un->un_pkt_flags, NULL_FUNC, NULL,
22559 			    blkno, nblk);
22560 		}
22561 #else
22562 		rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
22563 		    0, NULL_FUNC, NULL, blkno, nblk);
22564 #endif
22565 
22566 		if (rval == 0) {
22567 			/* We were given a SCSI packet, continue. */
22568 			break;
22569 		}
22570 
22571 		if (i == 0) {
22572 			if (wr_bp->b_flags & B_ERROR) {
22573 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
22574 				    "no resources for dumping; "
22575 				    "error code: 0x%x, retrying",
22576 				    geterror(wr_bp));
22577 			} else {
22578 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
22579 				    "no resources for dumping; retrying");
22580 			}
22581 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
22582 			if (wr_bp->b_flags & B_ERROR) {
22583 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22584 				    "no resources for dumping; error code: "
22585 				    "0x%x, retrying\n", geterror(wr_bp));
22586 			}
22587 		} else {
22588 			if (wr_bp->b_flags & B_ERROR) {
22589 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22590 				    "no resources for dumping; "
22591 				    "error code: 0x%x, retries failed, "
22592 				    "giving up.\n", geterror(wr_bp));
22593 			} else {
22594 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22595 				    "no resources for dumping; "
22596 				    "retries failed, giving up.\n");
22597 			}
22598 			mutex_enter(SD_MUTEX(un));
22599 			Restore_state(un);
22600 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
22601 				mutex_exit(SD_MUTEX(un));
22602 				scsi_free_consistent_buf(wr_bp);
22603 			} else {
22604 				mutex_exit(SD_MUTEX(un));
22605 			}
22606 			return (EIO);
22607 		}
22608 		drv_usecwait(10000);
22609 	}
22610 
22611 #if defined(__i386) || defined(__amd64)
22612 	/*
22613 	 * save the resid from PARTIAL_DMA
22614 	 */
22615 	dma_resid = wr_pktp->pkt_resid;
22616 	if (dma_resid != 0)
22617 		nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
22618 	wr_pktp->pkt_resid = 0;
22619 #endif
22620 
22621 	/* SunBug 1222170 */
22622 	wr_pktp->pkt_flags = FLAG_NOINTR;
22623 
22624 	err = EIO;
22625 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
22626 
22627 		/*
22628 		 * Scsi_poll returns 0 (success) if the command completes and
22629 		 * the status block is STATUS_GOOD.  We should only check
22630 		 * errors if this condition is not true.  Even then we should
22631 		 * send our own request sense packet only if we have a check
22632 		 * condition and auto request sense has not been performed by
22633 		 * the hba.
22634 		 */
22635 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
22636 
22637 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
22638 		    (wr_pktp->pkt_resid == 0)) {
22639 			err = SD_SUCCESS;
22640 			break;
22641 		}
22642 
22643 		/*
22644 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
22645 		 */
22646 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
22647 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22648 			    "Device is gone\n");
22649 			break;
22650 		}
22651 
22652 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
22653 			SD_INFO(SD_LOG_DUMP, un,
22654 			    "sddump: write failed with CHECK, try # %d\n", i);
22655 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
22656 				(void) sd_send_polled_RQS(un);
22657 			}
22658 
22659 			continue;
22660 		}
22661 
22662 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
22663 			int reset_retval = 0;
22664 
22665 			SD_INFO(SD_LOG_DUMP, un,
22666 			    "sddump: write failed with BUSY, try # %d\n", i);
22667 
22668 			if (un->un_f_lun_reset_enabled == TRUE) {
22669 				reset_retval = scsi_reset(SD_ADDRESS(un),
22670 				    RESET_LUN);
22671 			}
22672 			if (reset_retval == 0) {
22673 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
22674 			}
22675 			(void) sd_send_polled_RQS(un);
22676 
22677 		} else {
22678 			SD_INFO(SD_LOG_DUMP, un,
22679 			    "sddump: write failed with 0x%x, try # %d\n",
22680 			    SD_GET_PKT_STATUS(wr_pktp), i);
22681 			mutex_enter(SD_MUTEX(un));
22682 			sd_reset_target(un, wr_pktp);
22683 			mutex_exit(SD_MUTEX(un));
22684 		}
22685 
22686 		/*
22687 		 * If we are not getting anywhere with lun/target resets,
22688 		 * let's reset the bus.
22689 		 */
22690 		if (i == SD_NDUMP_RETRIES/2) {
22691 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
22692 			(void) sd_send_polled_RQS(un);
22693 		}
22694 
22695 	}
22696 #if defined(__i386) || defined(__amd64)
22697 	}	/* dma_resid */
22698 #endif
22699 
22700 	scsi_destroy_pkt(wr_pktp);
22701 	mutex_enter(SD_MUTEX(un));
22702 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
22703 		mutex_exit(SD_MUTEX(un));
22704 		scsi_free_consistent_buf(wr_bp);
22705 	} else {
22706 		mutex_exit(SD_MUTEX(un));
22707 	}
22708 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
22709 	return (err);
22710 }
22711 
22712 /*
22713  *    Function: sd_scsi_poll()
22714  *
22715  * Description: This is a wrapper for the scsi_poll call.
22716  *
22717  *   Arguments: sd_lun - The unit structure
22718  *              scsi_pkt - The scsi packet being sent to the device.
22719  *
22720  * Return Code: 0 - Command completed successfully with good status
22721  *             -1 - Command failed.  This could indicate a check condition
22722  *                  or other status value requiring recovery action.
22723  *
22724  */
22725 
22726 static int
22727 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
22728 {
22729 	int status;
22730 
22731 	ASSERT(un != NULL);
22732 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22733 	ASSERT(pktp != NULL);
22734 
22735 	status = SD_SUCCESS;
22736 
22737 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
22738 		pktp->pkt_flags |= un->un_tagflags;
22739 		pktp->pkt_flags &= ~FLAG_NODISCON;
22740 	}
22741 
22742 	status = sd_ddi_scsi_poll(pktp);
22743 	/*
22744 	 * Scsi_poll returns 0 (success) if the command completes and the
22745 	 * status block is STATUS_GOOD.  We should only check errors if this
22746 	 * condition is not true.  Even then we should send our own request
22747 	 * sense packet only if we have a check condition and auto
22748 	 * request sense has not been performed by the hba.
22749 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
22750 	 */
22751 	if ((status != SD_SUCCESS) &&
22752 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
22753 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
22754 	    (pktp->pkt_reason != CMD_DEV_GONE))
22755 		(void) sd_send_polled_RQS(un);
22756 
22757 	return (status);
22758 }
22759 
22760 /*
22761  *    Function: sd_send_polled_RQS()
22762  *
22763  * Description: This sends the request sense command to a device.
22764  *
22765  *   Arguments: sd_lun - The unit structure
22766  *
22767  * Return Code: 0 - Command completed successfully with good status
22768  *             -1 - Command failed.
22769  *
22770  */
22771 
22772 static int
22773 sd_send_polled_RQS(struct sd_lun *un)
22774 {
22775 	int	ret_val;
22776 	struct	scsi_pkt	*rqs_pktp;
22777 	struct	buf		*rqs_bp;
22778 
22779 	ASSERT(un != NULL);
22780 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22781 
22782 	ret_val = SD_SUCCESS;
22783 
22784 	rqs_pktp = un->un_rqs_pktp;
22785 	rqs_bp	 = un->un_rqs_bp;
22786 
22787 	mutex_enter(SD_MUTEX(un));
22788 
22789 	if (un->un_sense_isbusy) {
22790 		ret_val = SD_FAILURE;
22791 		mutex_exit(SD_MUTEX(un));
22792 		return (ret_val);
22793 	}
22794 
22795 	/*
22796 	 * If the request sense buffer (and packet) is not in use,
22797 	 * let's set the un_sense_isbusy and send our packet
22798 	 */
22799 	un->un_sense_isbusy 	= 1;
22800 	rqs_pktp->pkt_resid  	= 0;
22801 	rqs_pktp->pkt_reason 	= 0;
22802 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
22803 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
22804 
22805 	mutex_exit(SD_MUTEX(un));
22806 
22807 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
22808 	    " 0x%p\n", rqs_bp->b_un.b_addr);
22809 
22810 	/*
22811 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
22812 	 * axle - it has a call into us!
22813 	 */
22814 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
22815 		SD_INFO(SD_LOG_COMMON, un,
22816 		    "sd_send_polled_RQS: RQS failed\n");
22817 	}
22818 
22819 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
22820 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
22821 
22822 	mutex_enter(SD_MUTEX(un));
22823 	un->un_sense_isbusy = 0;
22824 	mutex_exit(SD_MUTEX(un));
22825 
22826 	return (ret_val);
22827 }
22828 
22829 /*
22830  * Defines needed for localized version of the scsi_poll routine.
22831  */
22832 #define	SD_CSEC		10000			/* usecs */
22833 #define	SD_SEC_TO_CSEC	(1000000/SD_CSEC)
22834 
22835 
22836 /*
22837  *    Function: sd_ddi_scsi_poll()
22838  *
22839  * Description: Localized version of the scsi_poll routine.  The purpose is to
22840  *		send a scsi_pkt to a device as a polled command.  This version
22841  *		is to ensure more robust handling of transport errors.
22842  *		Specifically this routine cures not ready, coming ready
22843  *		transition for power up and reset of sonoma's.  This can take
22844  *		up to 45 seconds for power-on and 20 seconds for reset of a
22845  * 		sonoma lun.
22846  *
22847  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
22848  *
22849  * Return Code: 0 - Command completed successfully with good status
22850  *             -1 - Command failed.
22851  *
22852  */
22853 
22854 static int
22855 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
22856 {
22857 	int busy_count;
22858 	int timeout;
22859 	int rval = SD_FAILURE;
22860 	int savef;
22861 	uint8_t *sensep;
22862 	long savet;
22863 	void (*savec)();
22864 	/*
22865 	 * The following is defined in machdep.c and is used in determining if
22866 	 * the scsi transport system will do polled I/O instead of interrupt
22867 	 * I/O when called from xx_dump().
22868 	 */
22869 	extern int do_polled_io;
22870 
22871 	/*
22872 	 * save old flags in pkt, to restore at end
22873 	 */
22874 	savef = pkt->pkt_flags;
22875 	savec = pkt->pkt_comp;
22876 	savet = pkt->pkt_time;
22877 
22878 	pkt->pkt_flags |= FLAG_NOINTR;
22879 
22880 	/*
22881 	 * XXX there is nothing in the SCSA spec that states that we should not
22882 	 * do a callback for polled cmds; however, removing this will break sd
22883 	 * and probably other target drivers
22884 	 */
22885 	pkt->pkt_comp = NULL;
22886 
22887 	/*
22888 	 * we don't like a polled command without timeout.
22889 	 * 60 seconds seems long enough.
22890 	 */
22891 	if (pkt->pkt_time == 0) {
22892 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
22893 	}
22894 
22895 	/*
22896 	 * Send polled cmd.
22897 	 *
22898 	 * We do some error recovery for various errors.  Tran_busy,
22899 	 * queue full, and non-dispatched commands are retried every 10 msec.
22900 	 * as they are typically transient failures.  Busy status and Not
22901 	 * Ready are retried every second as this status takes a while to
22902 	 * change.  Unit attention is retried for pkt_time (60) times
22903 	 * with no delay.
22904 	 */
22905 	timeout = pkt->pkt_time * SD_SEC_TO_CSEC;
22906 
22907 	for (busy_count = 0; busy_count < timeout; busy_count++) {
22908 		int rc;
22909 		int poll_delay;
22910 
22911 		/*
22912 		 * Initialize pkt status variables.
22913 		 */
22914 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
22915 
22916 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
22917 			if (rc != TRAN_BUSY) {
22918 				/* Transport failed - give up. */
22919 				break;
22920 			} else {
22921 				/* Transport busy - try again. */
22922 				poll_delay = 1 * SD_CSEC; /* 10 msec */
22923 			}
22924 		} else {
22925 			/*
22926 			 * Transport accepted - check pkt status.
22927 			 */
22928 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
22929 			if (pkt->pkt_reason == CMD_CMPLT &&
22930 			    rc == STATUS_CHECK &&
22931 			    pkt->pkt_state & STATE_ARQ_DONE) {
22932 				struct scsi_arq_status *arqstat =
22933 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
22934 
22935 				sensep = (uint8_t *)&arqstat->sts_sensedata;
22936 			} else {
22937 				sensep = NULL;
22938 			}
22939 
22940 			if ((pkt->pkt_reason == CMD_CMPLT) &&
22941 			    (rc == STATUS_GOOD)) {
22942 				/* No error - we're done */
22943 				rval = SD_SUCCESS;
22944 				break;
22945 
22946 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
22947 				/* Lost connection - give up */
22948 				break;
22949 
22950 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
22951 			    (pkt->pkt_state == 0)) {
22952 				/* Pkt not dispatched - try again. */
22953 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
22954 
22955 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
22956 			    (rc == STATUS_QFULL)) {
22957 				/* Queue full - try again. */
22958 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
22959 
22960 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
22961 			    (rc == STATUS_BUSY)) {
22962 				/* Busy - try again. */
22963 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
22964 				busy_count += (SD_SEC_TO_CSEC - 1);
22965 
22966 			} else if ((sensep != NULL) &&
22967 			    (scsi_sense_key(sensep) ==
22968 				KEY_UNIT_ATTENTION)) {
22969 				/* Unit Attention - try again */
22970 				busy_count += (SD_SEC_TO_CSEC - 1); /* 1 */
22971 				continue;
22972 
22973 			} else if ((sensep != NULL) &&
22974 			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
22975 			    (scsi_sense_asc(sensep) == 0x04) &&
22976 			    (scsi_sense_ascq(sensep) == 0x01)) {
22977 				/* Not ready -> ready - try again. */
22978 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
22979 				busy_count += (SD_SEC_TO_CSEC - 1);
22980 
22981 			} else {
22982 				/* BAD status - give up. */
22983 				break;
22984 			}
22985 		}
22986 
22987 		if ((curthread->t_flag & T_INTR_THREAD) == 0 &&
22988 		    !do_polled_io) {
22989 			delay(drv_usectohz(poll_delay));
22990 		} else {
22991 			/* we busy wait during cpr_dump or interrupt threads */
22992 			drv_usecwait(poll_delay);
22993 		}
22994 	}
22995 
22996 	pkt->pkt_flags = savef;
22997 	pkt->pkt_comp = savec;
22998 	pkt->pkt_time = savet;
22999 	return (rval);
23000 }
23001 
23002 
23003 /*
23004  *    Function: sd_persistent_reservation_in_read_keys
23005  *
23006  * Description: This routine is the driver entry point for handling CD-ROM
23007  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
23008  *		by sending the SCSI-3 PRIN commands to the device.
23009  *		Processes the read keys command response by copying the
23010  *		reservation key information into the user provided buffer.
23011  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
23012  *
23013  *   Arguments: un   -  Pointer to soft state struct for the target.
23014  *		usrp -	user provided pointer to multihost Persistent In Read
23015  *			Keys structure (mhioc_inkeys_t)
23016  *		flag -	this argument is a pass through to ddi_copyxxx()
23017  *			directly from the mode argument of ioctl().
23018  *
23019  * Return Code: 0   - Success
23020  *		EACCES
23021  *		ENOTSUP
23022  *		errno return code from sd_send_scsi_cmd()
23023  *
23024  *     Context: Can sleep. Does not return until command is completed.
23025  */
23026 
23027 static int
23028 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
23029     mhioc_inkeys_t *usrp, int flag)
23030 {
23031 #ifdef _MULTI_DATAMODEL
23032 	struct mhioc_key_list32	li32;
23033 #endif
23034 	sd_prin_readkeys_t	*in;
23035 	mhioc_inkeys_t		*ptr;
23036 	mhioc_key_list_t	li;
23037 	uchar_t			*data_bufp;
23038 	int 			data_len;
23039 	int			rval;
23040 	size_t			copysz;
23041 
23042 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
23043 		return (EINVAL);
23044 	}
23045 	bzero(&li, sizeof (mhioc_key_list_t));
23046 
23047 	/*
23048 	 * Get the listsize from user
23049 	 */
23050 #ifdef _MULTI_DATAMODEL
23051 
23052 	switch (ddi_model_convert_from(flag & FMODELS)) {
23053 	case DDI_MODEL_ILP32:
23054 		copysz = sizeof (struct mhioc_key_list32);
23055 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
23056 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23057 			    "sd_persistent_reservation_in_read_keys: "
23058 			    "failed ddi_copyin: mhioc_key_list32_t\n");
23059 			rval = EFAULT;
23060 			goto done;
23061 		}
23062 		li.listsize = li32.listsize;
23063 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
23064 		break;
23065 
23066 	case DDI_MODEL_NONE:
23067 		copysz = sizeof (mhioc_key_list_t);
23068 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
23069 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23070 			    "sd_persistent_reservation_in_read_keys: "
23071 			    "failed ddi_copyin: mhioc_key_list_t\n");
23072 			rval = EFAULT;
23073 			goto done;
23074 		}
23075 		break;
23076 	}
23077 
23078 #else /* ! _MULTI_DATAMODEL */
23079 	copysz = sizeof (mhioc_key_list_t);
23080 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
23081 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23082 		    "sd_persistent_reservation_in_read_keys: "
23083 		    "failed ddi_copyin: mhioc_key_list_t\n");
23084 		rval = EFAULT;
23085 		goto done;
23086 	}
23087 #endif
23088 
23089 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
23090 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
23091 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
23092 
23093 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS,
23094 	    data_len, data_bufp)) != 0) {
23095 		goto done;
23096 	}
23097 	in = (sd_prin_readkeys_t *)data_bufp;
23098 	ptr->generation = BE_32(in->generation);
23099 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
23100 
23101 	/*
23102 	 * Return the min(listsize, listlen) keys
23103 	 */
23104 #ifdef _MULTI_DATAMODEL
23105 
23106 	switch (ddi_model_convert_from(flag & FMODELS)) {
23107 	case DDI_MODEL_ILP32:
23108 		li32.listlen = li.listlen;
23109 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
23110 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23111 			    "sd_persistent_reservation_in_read_keys: "
23112 			    "failed ddi_copyout: mhioc_key_list32_t\n");
23113 			rval = EFAULT;
23114 			goto done;
23115 		}
23116 		break;
23117 
23118 	case DDI_MODEL_NONE:
23119 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
23120 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23121 			    "sd_persistent_reservation_in_read_keys: "
23122 			    "failed ddi_copyout: mhioc_key_list_t\n");
23123 			rval = EFAULT;
23124 			goto done;
23125 		}
23126 		break;
23127 	}
23128 
23129 #else /* ! _MULTI_DATAMODEL */
23130 
23131 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
23132 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23133 		    "sd_persistent_reservation_in_read_keys: "
23134 		    "failed ddi_copyout: mhioc_key_list_t\n");
23135 		rval = EFAULT;
23136 		goto done;
23137 	}
23138 
23139 #endif /* _MULTI_DATAMODEL */
23140 
23141 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
23142 	    li.listsize * MHIOC_RESV_KEY_SIZE);
23143 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
23144 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23145 		    "sd_persistent_reservation_in_read_keys: "
23146 		    "failed ddi_copyout: keylist\n");
23147 		rval = EFAULT;
23148 	}
23149 done:
23150 	kmem_free(data_bufp, data_len);
23151 	return (rval);
23152 }
23153 
23154 
23155 /*
23156  *    Function: sd_persistent_reservation_in_read_resv
23157  *
23158  * Description: This routine is the driver entry point for handling CD-ROM
23159  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
23160  *		by sending the SCSI-3 PRIN commands to the device.
23161  *		Process the read persistent reservations command response by
23162  *		copying the reservation information into the user provided
23163  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
23164  *
23165  *   Arguments: un   -  Pointer to soft state struct for the target.
23166  *		usrp -	user provided pointer to multihost Persistent In Read
23167  *			Keys structure (mhioc_inkeys_t)
23168  *		flag -	this argument is a pass through to ddi_copyxxx()
23169  *			directly from the mode argument of ioctl().
23170  *
23171  * Return Code: 0   - Success
23172  *		EACCES
23173  *		ENOTSUP
23174  *		errno return code from sd_send_scsi_cmd()
23175  *
23176  *     Context: Can sleep. Does not return until command is completed.
23177  */
23178 
23179 static int
23180 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
23181     mhioc_inresvs_t *usrp, int flag)
23182 {
23183 #ifdef _MULTI_DATAMODEL
23184 	struct mhioc_resv_desc_list32 resvlist32;
23185 #endif
23186 	sd_prin_readresv_t	*in;
23187 	mhioc_inresvs_t		*ptr;
23188 	sd_readresv_desc_t	*readresv_ptr;
23189 	mhioc_resv_desc_list_t	resvlist;
23190 	mhioc_resv_desc_t 	resvdesc;
23191 	uchar_t			*data_bufp;
23192 	int 			data_len;
23193 	int			rval;
23194 	int			i;
23195 	size_t			copysz;
23196 	mhioc_resv_desc_t	*bufp;
23197 
23198 	if ((ptr = usrp) == NULL) {
23199 		return (EINVAL);
23200 	}
23201 
23202 	/*
23203 	 * Get the listsize from user
23204 	 */
23205 #ifdef _MULTI_DATAMODEL
23206 	switch (ddi_model_convert_from(flag & FMODELS)) {
23207 	case DDI_MODEL_ILP32:
23208 		copysz = sizeof (struct mhioc_resv_desc_list32);
23209 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
23210 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23211 			    "sd_persistent_reservation_in_read_resv: "
23212 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
23213 			rval = EFAULT;
23214 			goto done;
23215 		}
23216 		resvlist.listsize = resvlist32.listsize;
23217 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
23218 		break;
23219 
23220 	case DDI_MODEL_NONE:
23221 		copysz = sizeof (mhioc_resv_desc_list_t);
23222 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
23223 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23224 			    "sd_persistent_reservation_in_read_resv: "
23225 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
23226 			rval = EFAULT;
23227 			goto done;
23228 		}
23229 		break;
23230 	}
23231 #else /* ! _MULTI_DATAMODEL */
23232 	copysz = sizeof (mhioc_resv_desc_list_t);
23233 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
23234 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23235 		    "sd_persistent_reservation_in_read_resv: "
23236 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
23237 		rval = EFAULT;
23238 		goto done;
23239 	}
23240 #endif /* ! _MULTI_DATAMODEL */
23241 
23242 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
23243 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
23244 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
23245 
23246 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_RESV,
23247 	    data_len, data_bufp)) != 0) {
23248 		goto done;
23249 	}
23250 	in = (sd_prin_readresv_t *)data_bufp;
23251 	ptr->generation = BE_32(in->generation);
23252 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
23253 
23254 	/*
23255 	 * Return the min(listsize, listlen( keys
23256 	 */
23257 #ifdef _MULTI_DATAMODEL
23258 
23259 	switch (ddi_model_convert_from(flag & FMODELS)) {
23260 	case DDI_MODEL_ILP32:
23261 		resvlist32.listlen = resvlist.listlen;
23262 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
23263 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23264 			    "sd_persistent_reservation_in_read_resv: "
23265 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
23266 			rval = EFAULT;
23267 			goto done;
23268 		}
23269 		break;
23270 
23271 	case DDI_MODEL_NONE:
23272 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
23273 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23274 			    "sd_persistent_reservation_in_read_resv: "
23275 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
23276 			rval = EFAULT;
23277 			goto done;
23278 		}
23279 		break;
23280 	}
23281 
23282 #else /* ! _MULTI_DATAMODEL */
23283 
23284 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
23285 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23286 		    "sd_persistent_reservation_in_read_resv: "
23287 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
23288 		rval = EFAULT;
23289 		goto done;
23290 	}
23291 
23292 #endif /* ! _MULTI_DATAMODEL */
23293 
23294 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
23295 	bufp = resvlist.list;
23296 	copysz = sizeof (mhioc_resv_desc_t);
23297 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
23298 	    i++, readresv_ptr++, bufp++) {
23299 
23300 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
23301 		    MHIOC_RESV_KEY_SIZE);
23302 		resvdesc.type  = readresv_ptr->type;
23303 		resvdesc.scope = readresv_ptr->scope;
23304 		resvdesc.scope_specific_addr =
23305 		    BE_32(readresv_ptr->scope_specific_addr);
23306 
23307 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
23308 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23309 			    "sd_persistent_reservation_in_read_resv: "
23310 			    "failed ddi_copyout: resvlist\n");
23311 			rval = EFAULT;
23312 			goto done;
23313 		}
23314 	}
23315 done:
23316 	kmem_free(data_bufp, data_len);
23317 	return (rval);
23318 }
23319 
23320 
23321 /*
23322  *    Function: sr_change_blkmode()
23323  *
23324  * Description: This routine is the driver entry point for handling CD-ROM
23325  *		block mode ioctl requests. Support for returning and changing
23326  *		the current block size in use by the device is implemented. The
23327  *		LBA size is changed via a MODE SELECT Block Descriptor.
23328  *
23329  *		This routine issues a mode sense with an allocation length of
23330  *		12 bytes for the mode page header and a single block descriptor.
23331  *
23332  *   Arguments: dev - the device 'dev_t'
23333  *		cmd - the request type; one of CDROMGBLKMODE (get) or
23334  *		      CDROMSBLKMODE (set)
23335  *		data - current block size or requested block size
23336  *		flag - this argument is a pass through to ddi_copyxxx() directly
23337  *		       from the mode argument of ioctl().
23338  *
23339  * Return Code: the code returned by sd_send_scsi_cmd()
23340  *		EINVAL if invalid arguments are provided
23341  *		EFAULT if ddi_copyxxx() fails
23342  *		ENXIO if fail ddi_get_soft_state
23343  *		EIO if invalid mode sense block descriptor length
23344  *
23345  */
23346 
23347 static int
23348 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
23349 {
23350 	struct sd_lun			*un = NULL;
23351 	struct mode_header		*sense_mhp, *select_mhp;
23352 	struct block_descriptor		*sense_desc, *select_desc;
23353 	int				current_bsize;
23354 	int				rval = EINVAL;
23355 	uchar_t				*sense = NULL;
23356 	uchar_t				*select = NULL;
23357 
23358 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
23359 
23360 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23361 		return (ENXIO);
23362 	}
23363 
23364 	/*
23365 	 * The block length is changed via the Mode Select block descriptor, the
23366 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
23367 	 * required as part of this routine. Therefore the mode sense allocation
23368 	 * length is specified to be the length of a mode page header and a
23369 	 * block descriptor.
23370 	 */
23371 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
23372 
23373 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
23374 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD)) != 0) {
23375 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23376 		    "sr_change_blkmode: Mode Sense Failed\n");
23377 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23378 		return (rval);
23379 	}
23380 
23381 	/* Check the block descriptor len to handle only 1 block descriptor */
23382 	sense_mhp = (struct mode_header *)sense;
23383 	if ((sense_mhp->bdesc_length == 0) ||
23384 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
23385 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23386 		    "sr_change_blkmode: Mode Sense returned invalid block"
23387 		    " descriptor length\n");
23388 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23389 		return (EIO);
23390 	}
23391 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
23392 	current_bsize = ((sense_desc->blksize_hi << 16) |
23393 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
23394 
23395 	/* Process command */
23396 	switch (cmd) {
23397 	case CDROMGBLKMODE:
23398 		/* Return the block size obtained during the mode sense */
23399 		if (ddi_copyout(&current_bsize, (void *)data,
23400 		    sizeof (int), flag) != 0)
23401 			rval = EFAULT;
23402 		break;
23403 	case CDROMSBLKMODE:
23404 		/* Validate the requested block size */
23405 		switch (data) {
23406 		case CDROM_BLK_512:
23407 		case CDROM_BLK_1024:
23408 		case CDROM_BLK_2048:
23409 		case CDROM_BLK_2056:
23410 		case CDROM_BLK_2336:
23411 		case CDROM_BLK_2340:
23412 		case CDROM_BLK_2352:
23413 		case CDROM_BLK_2368:
23414 		case CDROM_BLK_2448:
23415 		case CDROM_BLK_2646:
23416 		case CDROM_BLK_2647:
23417 			break;
23418 		default:
23419 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23420 			    "sr_change_blkmode: "
23421 			    "Block Size '%ld' Not Supported\n", data);
23422 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23423 			return (EINVAL);
23424 		}
23425 
23426 		/*
23427 		 * The current block size matches the requested block size so
23428 		 * there is no need to send the mode select to change the size
23429 		 */
23430 		if (current_bsize == data) {
23431 			break;
23432 		}
23433 
23434 		/* Build the select data for the requested block size */
23435 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
23436 		select_mhp = (struct mode_header *)select;
23437 		select_desc =
23438 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
23439 		/*
23440 		 * The LBA size is changed via the block descriptor, so the
23441 		 * descriptor is built according to the user data
23442 		 */
23443 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
23444 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
23445 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
23446 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
23447 
23448 		/* Send the mode select for the requested block size */
23449 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
23450 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
23451 		    SD_PATH_STANDARD)) != 0) {
23452 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23453 			    "sr_change_blkmode: Mode Select Failed\n");
23454 			/*
23455 			 * The mode select failed for the requested block size,
23456 			 * so reset the data for the original block size and
23457 			 * send it to the target. The error is indicated by the
23458 			 * return value for the failed mode select.
23459 			 */
23460 			select_desc->blksize_hi  = sense_desc->blksize_hi;
23461 			select_desc->blksize_mid = sense_desc->blksize_mid;
23462 			select_desc->blksize_lo  = sense_desc->blksize_lo;
23463 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
23464 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
23465 			    SD_PATH_STANDARD);
23466 		} else {
23467 			ASSERT(!mutex_owned(SD_MUTEX(un)));
23468 			mutex_enter(SD_MUTEX(un));
23469 			sd_update_block_info(un, (uint32_t)data, 0);
23470 			mutex_exit(SD_MUTEX(un));
23471 		}
23472 		break;
23473 	default:
23474 		/* should not reach here, but check anyway */
23475 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23476 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
23477 		rval = EINVAL;
23478 		break;
23479 	}
23480 
23481 	if (select) {
23482 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
23483 	}
23484 	if (sense) {
23485 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23486 	}
23487 	return (rval);
23488 }
23489 
23490 
23491 /*
23492  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
23493  * implement driver support for getting and setting the CD speed. The command
23494  * set used will be based on the device type. If the device has not been
23495  * identified as MMC the Toshiba vendor specific mode page will be used. If
23496  * the device is MMC but does not support the Real Time Streaming feature
23497  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
23498  * be used to read the speed.
23499  */
23500 
23501 /*
23502  *    Function: sr_change_speed()
23503  *
23504  * Description: This routine is the driver entry point for handling CD-ROM
23505  *		drive speed ioctl requests for devices supporting the Toshiba
23506  *		vendor specific drive speed mode page. Support for returning
23507  *		and changing the current drive speed in use by the device is
23508  *		implemented.
23509  *
23510  *   Arguments: dev - the device 'dev_t'
23511  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
23512  *		      CDROMSDRVSPEED (set)
23513  *		data - current drive speed or requested drive speed
23514  *		flag - this argument is a pass through to ddi_copyxxx() directly
23515  *		       from the mode argument of ioctl().
23516  *
23517  * Return Code: the code returned by sd_send_scsi_cmd()
23518  *		EINVAL if invalid arguments are provided
23519  *		EFAULT if ddi_copyxxx() fails
23520  *		ENXIO if fail ddi_get_soft_state
23521  *		EIO if invalid mode sense block descriptor length
23522  */
23523 
23524 static int
23525 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
23526 {
23527 	struct sd_lun			*un = NULL;
23528 	struct mode_header		*sense_mhp, *select_mhp;
23529 	struct mode_speed		*sense_page, *select_page;
23530 	int				current_speed;
23531 	int				rval = EINVAL;
23532 	int				bd_len;
23533 	uchar_t				*sense = NULL;
23534 	uchar_t				*select = NULL;
23535 
23536 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
23537 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23538 		return (ENXIO);
23539 	}
23540 
23541 	/*
23542 	 * Note: The drive speed is being modified here according to a Toshiba
23543 	 * vendor specific mode page (0x31).
23544 	 */
23545 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
23546 
23547 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
23548 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
23549 	    SD_PATH_STANDARD)) != 0) {
23550 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23551 		    "sr_change_speed: Mode Sense Failed\n");
23552 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23553 		return (rval);
23554 	}
23555 	sense_mhp  = (struct mode_header *)sense;
23556 
23557 	/* Check the block descriptor len to handle only 1 block descriptor */
23558 	bd_len = sense_mhp->bdesc_length;
23559 	if (bd_len > MODE_BLK_DESC_LENGTH) {
23560 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23561 		    "sr_change_speed: Mode Sense returned invalid block "
23562 		    "descriptor length\n");
23563 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23564 		return (EIO);
23565 	}
23566 
23567 	sense_page = (struct mode_speed *)
23568 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
23569 	current_speed = sense_page->speed;
23570 
23571 	/* Process command */
23572 	switch (cmd) {
23573 	case CDROMGDRVSPEED:
23574 		/* Return the drive speed obtained during the mode sense */
23575 		if (current_speed == 0x2) {
23576 			current_speed = CDROM_TWELVE_SPEED;
23577 		}
23578 		if (ddi_copyout(&current_speed, (void *)data,
23579 		    sizeof (int), flag) != 0) {
23580 			rval = EFAULT;
23581 		}
23582 		break;
23583 	case CDROMSDRVSPEED:
23584 		/* Validate the requested drive speed */
23585 		switch ((uchar_t)data) {
23586 		case CDROM_TWELVE_SPEED:
23587 			data = 0x2;
23588 			/*FALLTHROUGH*/
23589 		case CDROM_NORMAL_SPEED:
23590 		case CDROM_DOUBLE_SPEED:
23591 		case CDROM_QUAD_SPEED:
23592 		case CDROM_MAXIMUM_SPEED:
23593 			break;
23594 		default:
23595 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23596 			    "sr_change_speed: "
23597 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
23598 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23599 			return (EINVAL);
23600 		}
23601 
23602 		/*
23603 		 * The current drive speed matches the requested drive speed so
23604 		 * there is no need to send the mode select to change the speed
23605 		 */
23606 		if (current_speed == data) {
23607 			break;
23608 		}
23609 
23610 		/* Build the select data for the requested drive speed */
23611 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
23612 		select_mhp = (struct mode_header *)select;
23613 		select_mhp->bdesc_length = 0;
23614 		select_page =
23615 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
23616 		select_page =
23617 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
23618 		select_page->mode_page.code = CDROM_MODE_SPEED;
23619 		select_page->mode_page.length = 2;
23620 		select_page->speed = (uchar_t)data;
23621 
23622 		/* Send the mode select for the requested block size */
23623 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
23624 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
23625 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
23626 			/*
23627 			 * The mode select failed for the requested drive speed,
23628 			 * so reset the data for the original drive speed and
23629 			 * send it to the target. The error is indicated by the
23630 			 * return value for the failed mode select.
23631 			 */
23632 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23633 			    "sr_drive_speed: Mode Select Failed\n");
23634 			select_page->speed = sense_page->speed;
23635 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
23636 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
23637 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
23638 		}
23639 		break;
23640 	default:
23641 		/* should not reach here, but check anyway */
23642 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23643 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
23644 		rval = EINVAL;
23645 		break;
23646 	}
23647 
23648 	if (select) {
23649 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
23650 	}
23651 	if (sense) {
23652 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23653 	}
23654 
23655 	return (rval);
23656 }
23657 
23658 
23659 /*
23660  *    Function: sr_atapi_change_speed()
23661  *
23662  * Description: This routine is the driver entry point for handling CD-ROM
23663  *		drive speed ioctl requests for MMC devices that do not support
23664  *		the Real Time Streaming feature (0x107).
23665  *
23666  *		Note: This routine will use the SET SPEED command which may not
23667  *		be supported by all devices.
23668  *
23669  *   Arguments: dev- the device 'dev_t'
23670  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
23671  *		     CDROMSDRVSPEED (set)
23672  *		data- current drive speed or requested drive speed
23673  *		flag- this argument is a pass through to ddi_copyxxx() directly
23674  *		      from the mode argument of ioctl().
23675  *
23676  * Return Code: the code returned by sd_send_scsi_cmd()
23677  *		EINVAL if invalid arguments are provided
23678  *		EFAULT if ddi_copyxxx() fails
23679  *		ENXIO if fail ddi_get_soft_state
23680  *		EIO if invalid mode sense block descriptor length
23681  */
23682 
23683 static int
23684 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
23685 {
23686 	struct sd_lun			*un;
23687 	struct uscsi_cmd		*com = NULL;
23688 	struct mode_header_grp2		*sense_mhp;
23689 	uchar_t				*sense_page;
23690 	uchar_t				*sense = NULL;
23691 	char				cdb[CDB_GROUP5];
23692 	int				bd_len;
23693 	int				current_speed = 0;
23694 	int				max_speed = 0;
23695 	int				rval;
23696 
23697 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
23698 
23699 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23700 		return (ENXIO);
23701 	}
23702 
23703 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
23704 
23705 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
23706 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
23707 	    SD_PATH_STANDARD)) != 0) {
23708 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23709 		    "sr_atapi_change_speed: Mode Sense Failed\n");
23710 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23711 		return (rval);
23712 	}
23713 
23714 	/* Check the block descriptor len to handle only 1 block descriptor */
23715 	sense_mhp = (struct mode_header_grp2 *)sense;
23716 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
23717 	if (bd_len > MODE_BLK_DESC_LENGTH) {
23718 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23719 		    "sr_atapi_change_speed: Mode Sense returned invalid "
23720 		    "block descriptor length\n");
23721 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23722 		return (EIO);
23723 	}
23724 
23725 	/* Calculate the current and maximum drive speeds */
23726 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
23727 	current_speed = (sense_page[14] << 8) | sense_page[15];
23728 	max_speed = (sense_page[8] << 8) | sense_page[9];
23729 
23730 	/* Process the command */
23731 	switch (cmd) {
23732 	case CDROMGDRVSPEED:
23733 		current_speed /= SD_SPEED_1X;
23734 		if (ddi_copyout(&current_speed, (void *)data,
23735 		    sizeof (int), flag) != 0)
23736 			rval = EFAULT;
23737 		break;
23738 	case CDROMSDRVSPEED:
23739 		/* Convert the speed code to KB/sec */
23740 		switch ((uchar_t)data) {
23741 		case CDROM_NORMAL_SPEED:
23742 			current_speed = SD_SPEED_1X;
23743 			break;
23744 		case CDROM_DOUBLE_SPEED:
23745 			current_speed = 2 * SD_SPEED_1X;
23746 			break;
23747 		case CDROM_QUAD_SPEED:
23748 			current_speed = 4 * SD_SPEED_1X;
23749 			break;
23750 		case CDROM_TWELVE_SPEED:
23751 			current_speed = 12 * SD_SPEED_1X;
23752 			break;
23753 		case CDROM_MAXIMUM_SPEED:
23754 			current_speed = 0xffff;
23755 			break;
23756 		default:
23757 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23758 			    "sr_atapi_change_speed: invalid drive speed %d\n",
23759 			    (uchar_t)data);
23760 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23761 			return (EINVAL);
23762 		}
23763 
23764 		/* Check the request against the drive's max speed. */
23765 		if (current_speed != 0xffff) {
23766 			if (current_speed > max_speed) {
23767 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23768 				return (EINVAL);
23769 			}
23770 		}
23771 
23772 		/*
23773 		 * Build and send the SET SPEED command
23774 		 *
23775 		 * Note: The SET SPEED (0xBB) command used in this routine is
23776 		 * obsolete per the SCSI MMC spec but still supported in the
23777 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
23778 		 * therefore the command is still implemented in this routine.
23779 		 */
23780 		bzero(cdb, sizeof (cdb));
23781 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
23782 		cdb[2] = (uchar_t)(current_speed >> 8);
23783 		cdb[3] = (uchar_t)current_speed;
23784 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
23785 		com->uscsi_cdb	   = (caddr_t)cdb;
23786 		com->uscsi_cdblen  = CDB_GROUP5;
23787 		com->uscsi_bufaddr = NULL;
23788 		com->uscsi_buflen  = 0;
23789 		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
23790 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, 0, SD_PATH_STANDARD);
23791 		break;
23792 	default:
23793 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23794 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
23795 		rval = EINVAL;
23796 	}
23797 
23798 	if (sense) {
23799 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23800 	}
23801 	if (com) {
23802 		kmem_free(com, sizeof (*com));
23803 	}
23804 	return (rval);
23805 }
23806 
23807 
23808 /*
23809  *    Function: sr_pause_resume()
23810  *
23811  * Description: This routine is the driver entry point for handling CD-ROM
23812  *		pause/resume ioctl requests. This only affects the audio play
23813  *		operation.
23814  *
23815  *   Arguments: dev - the device 'dev_t'
23816  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
23817  *		      for setting the resume bit of the cdb.
23818  *
23819  * Return Code: the code returned by sd_send_scsi_cmd()
23820  *		EINVAL if invalid mode specified
23821  *
23822  */
23823 
23824 static int
23825 sr_pause_resume(dev_t dev, int cmd)
23826 {
23827 	struct sd_lun		*un;
23828 	struct uscsi_cmd	*com;
23829 	char			cdb[CDB_GROUP1];
23830 	int			rval;
23831 
23832 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23833 		return (ENXIO);
23834 	}
23835 
23836 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
23837 	bzero(cdb, CDB_GROUP1);
23838 	cdb[0] = SCMD_PAUSE_RESUME;
23839 	switch (cmd) {
23840 	case CDROMRESUME:
23841 		cdb[8] = 1;
23842 		break;
23843 	case CDROMPAUSE:
23844 		cdb[8] = 0;
23845 		break;
23846 	default:
23847 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
23848 		    " Command '%x' Not Supported\n", cmd);
23849 		rval = EINVAL;
23850 		goto done;
23851 	}
23852 
23853 	com->uscsi_cdb    = cdb;
23854 	com->uscsi_cdblen = CDB_GROUP1;
23855 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
23856 
23857 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
23858 	    SD_PATH_STANDARD);
23859 
23860 done:
23861 	kmem_free(com, sizeof (*com));
23862 	return (rval);
23863 }
23864 
23865 
23866 /*
23867  *    Function: sr_play_msf()
23868  *
23869  * Description: This routine is the driver entry point for handling CD-ROM
23870  *		ioctl requests to output the audio signals at the specified
23871  *		starting address and continue the audio play until the specified
23872  *		ending address (CDROMPLAYMSF) The address is in Minute Second
23873  *		Frame (MSF) format.
23874  *
23875  *   Arguments: dev	- the device 'dev_t'
23876  *		data	- pointer to user provided audio msf structure,
23877  *		          specifying start/end addresses.
23878  *		flag	- this argument is a pass through to ddi_copyxxx()
23879  *		          directly from the mode argument of ioctl().
23880  *
23881  * Return Code: the code returned by sd_send_scsi_cmd()
23882  *		EFAULT if ddi_copyxxx() fails
23883  *		ENXIO if fail ddi_get_soft_state
23884  *		EINVAL if data pointer is NULL
23885  */
23886 
23887 static int
23888 sr_play_msf(dev_t dev, caddr_t data, int flag)
23889 {
23890 	struct sd_lun		*un;
23891 	struct uscsi_cmd	*com;
23892 	struct cdrom_msf	msf_struct;
23893 	struct cdrom_msf	*msf = &msf_struct;
23894 	char			cdb[CDB_GROUP1];
23895 	int			rval;
23896 
23897 	if (data == NULL) {
23898 		return (EINVAL);
23899 	}
23900 
23901 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23902 		return (ENXIO);
23903 	}
23904 
23905 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
23906 		return (EFAULT);
23907 	}
23908 
23909 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
23910 	bzero(cdb, CDB_GROUP1);
23911 	cdb[0] = SCMD_PLAYAUDIO_MSF;
23912 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
23913 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
23914 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
23915 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
23916 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
23917 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
23918 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
23919 	} else {
23920 		cdb[3] = msf->cdmsf_min0;
23921 		cdb[4] = msf->cdmsf_sec0;
23922 		cdb[5] = msf->cdmsf_frame0;
23923 		cdb[6] = msf->cdmsf_min1;
23924 		cdb[7] = msf->cdmsf_sec1;
23925 		cdb[8] = msf->cdmsf_frame1;
23926 	}
23927 	com->uscsi_cdb    = cdb;
23928 	com->uscsi_cdblen = CDB_GROUP1;
23929 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
23930 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
23931 	    SD_PATH_STANDARD);
23932 	kmem_free(com, sizeof (*com));
23933 	return (rval);
23934 }
23935 
23936 
23937 /*
23938  *    Function: sr_play_trkind()
23939  *
23940  * Description: This routine is the driver entry point for handling CD-ROM
23941  *		ioctl requests to output the audio signals at the specified
23942  *		starting address and continue the audio play until the specified
23943  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
23944  *		format.
23945  *
23946  *   Arguments: dev	- the device 'dev_t'
23947  *		data	- pointer to user provided audio track/index structure,
23948  *		          specifying start/end addresses.
23949  *		flag	- this argument is a pass through to ddi_copyxxx()
23950  *		          directly from the mode argument of ioctl().
23951  *
23952  * Return Code: the code returned by sd_send_scsi_cmd()
23953  *		EFAULT if ddi_copyxxx() fails
23954  *		ENXIO if fail ddi_get_soft_state
23955  *		EINVAL if data pointer is NULL
23956  */
23957 
23958 static int
23959 sr_play_trkind(dev_t dev, caddr_t data, int flag)
23960 {
23961 	struct cdrom_ti		ti_struct;
23962 	struct cdrom_ti		*ti = &ti_struct;
23963 	struct uscsi_cmd	*com = NULL;
23964 	char			cdb[CDB_GROUP1];
23965 	int			rval;
23966 
23967 	if (data == NULL) {
23968 		return (EINVAL);
23969 	}
23970 
23971 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
23972 		return (EFAULT);
23973 	}
23974 
23975 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
23976 	bzero(cdb, CDB_GROUP1);
23977 	cdb[0] = SCMD_PLAYAUDIO_TI;
23978 	cdb[4] = ti->cdti_trk0;
23979 	cdb[5] = ti->cdti_ind0;
23980 	cdb[7] = ti->cdti_trk1;
23981 	cdb[8] = ti->cdti_ind1;
23982 	com->uscsi_cdb    = cdb;
23983 	com->uscsi_cdblen = CDB_GROUP1;
23984 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
23985 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
23986 	    SD_PATH_STANDARD);
23987 	kmem_free(com, sizeof (*com));
23988 	return (rval);
23989 }
23990 
23991 
23992 /*
23993  *    Function: sr_read_all_subcodes()
23994  *
23995  * Description: This routine is the driver entry point for handling CD-ROM
23996  *		ioctl requests to return raw subcode data while the target is
23997  *		playing audio (CDROMSUBCODE).
23998  *
23999  *   Arguments: dev	- the device 'dev_t'
24000  *		data	- pointer to user provided cdrom subcode structure,
24001  *		          specifying the transfer length and address.
24002  *		flag	- this argument is a pass through to ddi_copyxxx()
24003  *		          directly from the mode argument of ioctl().
24004  *
24005  * Return Code: the code returned by sd_send_scsi_cmd()
24006  *		EFAULT if ddi_copyxxx() fails
24007  *		ENXIO if fail ddi_get_soft_state
24008  *		EINVAL if data pointer is NULL
24009  */
24010 
24011 static int
24012 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
24013 {
24014 	struct sd_lun		*un = NULL;
24015 	struct uscsi_cmd	*com = NULL;
24016 	struct cdrom_subcode	*subcode = NULL;
24017 	int			rval;
24018 	size_t			buflen;
24019 	char			cdb[CDB_GROUP5];
24020 
24021 #ifdef _MULTI_DATAMODEL
24022 	/* To support ILP32 applications in an LP64 world */
24023 	struct cdrom_subcode32		cdrom_subcode32;
24024 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
24025 #endif
24026 	if (data == NULL) {
24027 		return (EINVAL);
24028 	}
24029 
24030 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24031 		return (ENXIO);
24032 	}
24033 
24034 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
24035 
24036 #ifdef _MULTI_DATAMODEL
24037 	switch (ddi_model_convert_from(flag & FMODELS)) {
24038 	case DDI_MODEL_ILP32:
24039 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
24040 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24041 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
24042 			kmem_free(subcode, sizeof (struct cdrom_subcode));
24043 			return (EFAULT);
24044 		}
24045 		/* Convert the ILP32 uscsi data from the application to LP64 */
24046 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
24047 		break;
24048 	case DDI_MODEL_NONE:
24049 		if (ddi_copyin(data, subcode,
24050 		    sizeof (struct cdrom_subcode), flag)) {
24051 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24052 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
24053 			kmem_free(subcode, sizeof (struct cdrom_subcode));
24054 			return (EFAULT);
24055 		}
24056 		break;
24057 	}
24058 #else /* ! _MULTI_DATAMODEL */
24059 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
24060 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24061 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
24062 		kmem_free(subcode, sizeof (struct cdrom_subcode));
24063 		return (EFAULT);
24064 	}
24065 #endif /* _MULTI_DATAMODEL */
24066 
24067 	/*
24068 	 * Since MMC-2 expects max 3 bytes for length, check if the
24069 	 * length input is greater than 3 bytes
24070 	 */
24071 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
24072 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24073 		    "sr_read_all_subcodes: "
24074 		    "cdrom transfer length too large: %d (limit %d)\n",
24075 		    subcode->cdsc_length, 0xFFFFFF);
24076 		kmem_free(subcode, sizeof (struct cdrom_subcode));
24077 		return (EINVAL);
24078 	}
24079 
24080 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
24081 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24082 	bzero(cdb, CDB_GROUP5);
24083 
24084 	if (un->un_f_mmc_cap == TRUE) {
24085 		cdb[0] = (char)SCMD_READ_CD;
24086 		cdb[2] = (char)0xff;
24087 		cdb[3] = (char)0xff;
24088 		cdb[4] = (char)0xff;
24089 		cdb[5] = (char)0xff;
24090 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
24091 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
24092 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
24093 		cdb[10] = 1;
24094 	} else {
24095 		/*
24096 		 * Note: A vendor specific command (0xDF) is being used her to
24097 		 * request a read of all subcodes.
24098 		 */
24099 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
24100 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
24101 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
24102 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
24103 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
24104 	}
24105 	com->uscsi_cdb	   = cdb;
24106 	com->uscsi_cdblen  = CDB_GROUP5;
24107 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
24108 	com->uscsi_buflen  = buflen;
24109 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24110 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
24111 	    SD_PATH_STANDARD);
24112 	kmem_free(subcode, sizeof (struct cdrom_subcode));
24113 	kmem_free(com, sizeof (*com));
24114 	return (rval);
24115 }
24116 
24117 
24118 /*
24119  *    Function: sr_read_subchannel()
24120  *
24121  * Description: This routine is the driver entry point for handling CD-ROM
24122  *		ioctl requests to return the Q sub-channel data of the CD
24123  *		current position block. (CDROMSUBCHNL) The data includes the
24124  *		track number, index number, absolute CD-ROM address (LBA or MSF
24125  *		format per the user) , track relative CD-ROM address (LBA or MSF
24126  *		format per the user), control data and audio status.
24127  *
24128  *   Arguments: dev	- the device 'dev_t'
24129  *		data	- pointer to user provided cdrom sub-channel structure
24130  *		flag	- this argument is a pass through to ddi_copyxxx()
24131  *		          directly from the mode argument of ioctl().
24132  *
24133  * Return Code: the code returned by sd_send_scsi_cmd()
24134  *		EFAULT if ddi_copyxxx() fails
24135  *		ENXIO if fail ddi_get_soft_state
24136  *		EINVAL if data pointer is NULL
24137  */
24138 
24139 static int
24140 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
24141 {
24142 	struct sd_lun		*un;
24143 	struct uscsi_cmd	*com;
24144 	struct cdrom_subchnl	subchanel;
24145 	struct cdrom_subchnl	*subchnl = &subchanel;
24146 	char			cdb[CDB_GROUP1];
24147 	caddr_t			buffer;
24148 	int			rval;
24149 
24150 	if (data == NULL) {
24151 		return (EINVAL);
24152 	}
24153 
24154 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24155 	    (un->un_state == SD_STATE_OFFLINE)) {
24156 		return (ENXIO);
24157 	}
24158 
24159 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
24160 		return (EFAULT);
24161 	}
24162 
24163 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
24164 	bzero(cdb, CDB_GROUP1);
24165 	cdb[0] = SCMD_READ_SUBCHANNEL;
24166 	/* Set the MSF bit based on the user requested address format */
24167 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
24168 	/*
24169 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
24170 	 * returned
24171 	 */
24172 	cdb[2] = 0x40;
24173 	/*
24174 	 * Set byte 3 to specify the return data format. A value of 0x01
24175 	 * indicates that the CD-ROM current position should be returned.
24176 	 */
24177 	cdb[3] = 0x01;
24178 	cdb[8] = 0x10;
24179 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24180 	com->uscsi_cdb	   = cdb;
24181 	com->uscsi_cdblen  = CDB_GROUP1;
24182 	com->uscsi_bufaddr = buffer;
24183 	com->uscsi_buflen  = 16;
24184 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24185 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24186 	    SD_PATH_STANDARD);
24187 	if (rval != 0) {
24188 		kmem_free(buffer, 16);
24189 		kmem_free(com, sizeof (*com));
24190 		return (rval);
24191 	}
24192 
24193 	/* Process the returned Q sub-channel data */
24194 	subchnl->cdsc_audiostatus = buffer[1];
24195 	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
24196 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
24197 	subchnl->cdsc_trk	= buffer[6];
24198 	subchnl->cdsc_ind	= buffer[7];
24199 	if (subchnl->cdsc_format & CDROM_LBA) {
24200 		subchnl->cdsc_absaddr.lba =
24201 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
24202 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
24203 		subchnl->cdsc_reladdr.lba =
24204 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
24205 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
24206 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
24207 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
24208 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
24209 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
24210 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
24211 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
24212 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
24213 	} else {
24214 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
24215 		subchnl->cdsc_absaddr.msf.second = buffer[10];
24216 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
24217 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
24218 		subchnl->cdsc_reladdr.msf.second = buffer[14];
24219 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
24220 	}
24221 	kmem_free(buffer, 16);
24222 	kmem_free(com, sizeof (*com));
24223 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
24224 	    != 0) {
24225 		return (EFAULT);
24226 	}
24227 	return (rval);
24228 }
24229 
24230 
24231 /*
24232  *    Function: sr_read_tocentry()
24233  *
24234  * Description: This routine is the driver entry point for handling CD-ROM
24235  *		ioctl requests to read from the Table of Contents (TOC)
24236  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
24237  *		fields, the starting address (LBA or MSF format per the user)
24238  *		and the data mode if the user specified track is a data track.
24239  *
24240  *		Note: The READ HEADER (0x44) command used in this routine is
24241  *		obsolete per the SCSI MMC spec but still supported in the
24242  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
24243  *		therefore the command is still implemented in this routine.
24244  *
24245  *   Arguments: dev	- the device 'dev_t'
24246  *		data	- pointer to user provided toc entry structure,
24247  *			  specifying the track # and the address format
24248  *			  (LBA or MSF).
24249  *		flag	- this argument is a pass through to ddi_copyxxx()
24250  *		          directly from the mode argument of ioctl().
24251  *
24252  * Return Code: the code returned by sd_send_scsi_cmd()
24253  *		EFAULT if ddi_copyxxx() fails
24254  *		ENXIO if fail ddi_get_soft_state
24255  *		EINVAL if data pointer is NULL
24256  */
24257 
24258 static int
24259 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
24260 {
24261 	struct sd_lun		*un = NULL;
24262 	struct uscsi_cmd	*com;
24263 	struct cdrom_tocentry	toc_entry;
24264 	struct cdrom_tocentry	*entry = &toc_entry;
24265 	caddr_t			buffer;
24266 	int			rval;
24267 	char			cdb[CDB_GROUP1];
24268 
24269 	if (data == NULL) {
24270 		return (EINVAL);
24271 	}
24272 
24273 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24274 	    (un->un_state == SD_STATE_OFFLINE)) {
24275 		return (ENXIO);
24276 	}
24277 
24278 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
24279 		return (EFAULT);
24280 	}
24281 
24282 	/* Validate the requested track and address format */
24283 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
24284 		return (EINVAL);
24285 	}
24286 
24287 	if (entry->cdte_track == 0) {
24288 		return (EINVAL);
24289 	}
24290 
24291 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
24292 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24293 	bzero(cdb, CDB_GROUP1);
24294 
24295 	cdb[0] = SCMD_READ_TOC;
24296 	/* Set the MSF bit based on the user requested address format  */
24297 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
24298 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
24299 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
24300 	} else {
24301 		cdb[6] = entry->cdte_track;
24302 	}
24303 
24304 	/*
24305 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
24306 	 * (4 byte TOC response header + 8 byte track descriptor)
24307 	 */
24308 	cdb[8] = 12;
24309 	com->uscsi_cdb	   = cdb;
24310 	com->uscsi_cdblen  = CDB_GROUP1;
24311 	com->uscsi_bufaddr = buffer;
24312 	com->uscsi_buflen  = 0x0C;
24313 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
24314 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24315 	    SD_PATH_STANDARD);
24316 	if (rval != 0) {
24317 		kmem_free(buffer, 12);
24318 		kmem_free(com, sizeof (*com));
24319 		return (rval);
24320 	}
24321 
24322 	/* Process the toc entry */
24323 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
24324 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
24325 	if (entry->cdte_format & CDROM_LBA) {
24326 		entry->cdte_addr.lba =
24327 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
24328 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
24329 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
24330 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
24331 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
24332 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
24333 		/*
24334 		 * Send a READ TOC command using the LBA address format to get
24335 		 * the LBA for the track requested so it can be used in the
24336 		 * READ HEADER request
24337 		 *
24338 		 * Note: The MSF bit of the READ HEADER command specifies the
24339 		 * output format. The block address specified in that command
24340 		 * must be in LBA format.
24341 		 */
24342 		cdb[1] = 0;
24343 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24344 		    SD_PATH_STANDARD);
24345 		if (rval != 0) {
24346 			kmem_free(buffer, 12);
24347 			kmem_free(com, sizeof (*com));
24348 			return (rval);
24349 		}
24350 	} else {
24351 		entry->cdte_addr.msf.minute	= buffer[9];
24352 		entry->cdte_addr.msf.second	= buffer[10];
24353 		entry->cdte_addr.msf.frame	= buffer[11];
24354 		/*
24355 		 * Send a READ TOC command using the LBA address format to get
24356 		 * the LBA for the track requested so it can be used in the
24357 		 * READ HEADER request
24358 		 *
24359 		 * Note: The MSF bit of the READ HEADER command specifies the
24360 		 * output format. The block address specified in that command
24361 		 * must be in LBA format.
24362 		 */
24363 		cdb[1] = 0;
24364 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24365 		    SD_PATH_STANDARD);
24366 		if (rval != 0) {
24367 			kmem_free(buffer, 12);
24368 			kmem_free(com, sizeof (*com));
24369 			return (rval);
24370 		}
24371 	}
24372 
24373 	/*
24374 	 * Build and send the READ HEADER command to determine the data mode of
24375 	 * the user specified track.
24376 	 */
24377 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
24378 	    (entry->cdte_track != CDROM_LEADOUT)) {
24379 		bzero(cdb, CDB_GROUP1);
24380 		cdb[0] = SCMD_READ_HEADER;
24381 		cdb[2] = buffer[8];
24382 		cdb[3] = buffer[9];
24383 		cdb[4] = buffer[10];
24384 		cdb[5] = buffer[11];
24385 		cdb[8] = 0x08;
24386 		com->uscsi_buflen = 0x08;
24387 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24388 		    SD_PATH_STANDARD);
24389 		if (rval == 0) {
24390 			entry->cdte_datamode = buffer[0];
24391 		} else {
24392 			/*
24393 			 * READ HEADER command failed, since this is
24394 			 * obsoleted in one spec, its better to return
24395 			 * -1 for an invlid track so that we can still
24396 			 * recieve the rest of the TOC data.
24397 			 */
24398 			entry->cdte_datamode = (uchar_t)-1;
24399 		}
24400 	} else {
24401 		entry->cdte_datamode = (uchar_t)-1;
24402 	}
24403 
24404 	kmem_free(buffer, 12);
24405 	kmem_free(com, sizeof (*com));
24406 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
24407 		return (EFAULT);
24408 
24409 	return (rval);
24410 }
24411 
24412 
24413 /*
24414  *    Function: sr_read_tochdr()
24415  *
24416  * Description: This routine is the driver entry point for handling CD-ROM
24417  * 		ioctl requests to read the Table of Contents (TOC) header
24418  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
24419  *		and ending track numbers
24420  *
24421  *   Arguments: dev	- the device 'dev_t'
24422  *		data	- pointer to user provided toc header structure,
24423  *			  specifying the starting and ending track numbers.
24424  *		flag	- this argument is a pass through to ddi_copyxxx()
24425  *			  directly from the mode argument of ioctl().
24426  *
24427  * Return Code: the code returned by sd_send_scsi_cmd()
24428  *		EFAULT if ddi_copyxxx() fails
24429  *		ENXIO if fail ddi_get_soft_state
24430  *		EINVAL if data pointer is NULL
24431  */
24432 
24433 static int
24434 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
24435 {
24436 	struct sd_lun		*un;
24437 	struct uscsi_cmd	*com;
24438 	struct cdrom_tochdr	toc_header;
24439 	struct cdrom_tochdr	*hdr = &toc_header;
24440 	char			cdb[CDB_GROUP1];
24441 	int			rval;
24442 	caddr_t			buffer;
24443 
24444 	if (data == NULL) {
24445 		return (EINVAL);
24446 	}
24447 
24448 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24449 	    (un->un_state == SD_STATE_OFFLINE)) {
24450 		return (ENXIO);
24451 	}
24452 
24453 	buffer = kmem_zalloc(4, KM_SLEEP);
24454 	bzero(cdb, CDB_GROUP1);
24455 	cdb[0] = SCMD_READ_TOC;
24456 	/*
24457 	 * Specifying a track number of 0x00 in the READ TOC command indicates
24458 	 * that the TOC header should be returned
24459 	 */
24460 	cdb[6] = 0x00;
24461 	/*
24462 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
24463 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
24464 	 */
24465 	cdb[8] = 0x04;
24466 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24467 	com->uscsi_cdb	   = cdb;
24468 	com->uscsi_cdblen  = CDB_GROUP1;
24469 	com->uscsi_bufaddr = buffer;
24470 	com->uscsi_buflen  = 0x04;
24471 	com->uscsi_timeout = 300;
24472 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24473 
24474 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24475 	    SD_PATH_STANDARD);
24476 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
24477 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
24478 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
24479 	} else {
24480 		hdr->cdth_trk0 = buffer[2];
24481 		hdr->cdth_trk1 = buffer[3];
24482 	}
24483 	kmem_free(buffer, 4);
24484 	kmem_free(com, sizeof (*com));
24485 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
24486 		return (EFAULT);
24487 	}
24488 	return (rval);
24489 }
24490 
24491 
24492 /*
24493  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
24494  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
24495  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
24496  * digital audio and extended architecture digital audio. These modes are
24497  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
24498  * MMC specs.
24499  *
24500  * In addition to support for the various data formats these routines also
24501  * include support for devices that implement only the direct access READ
24502  * commands (0x08, 0x28), devices that implement the READ_CD commands
24503  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
24504  * READ CDXA commands (0xD8, 0xDB)
24505  */
24506 
24507 /*
24508  *    Function: sr_read_mode1()
24509  *
24510  * Description: This routine is the driver entry point for handling CD-ROM
24511  *		ioctl read mode1 requests (CDROMREADMODE1).
24512  *
24513  *   Arguments: dev	- the device 'dev_t'
24514  *		data	- pointer to user provided cd read structure specifying
24515  *			  the lba buffer address and length.
24516  *		flag	- this argument is a pass through to ddi_copyxxx()
24517  *			  directly from the mode argument of ioctl().
24518  *
24519  * Return Code: the code returned by sd_send_scsi_cmd()
24520  *		EFAULT if ddi_copyxxx() fails
24521  *		ENXIO if fail ddi_get_soft_state
24522  *		EINVAL if data pointer is NULL
24523  */
24524 
24525 static int
24526 sr_read_mode1(dev_t dev, caddr_t data, int flag)
24527 {
24528 	struct sd_lun		*un;
24529 	struct cdrom_read	mode1_struct;
24530 	struct cdrom_read	*mode1 = &mode1_struct;
24531 	int			rval;
24532 #ifdef _MULTI_DATAMODEL
24533 	/* To support ILP32 applications in an LP64 world */
24534 	struct cdrom_read32	cdrom_read32;
24535 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
24536 #endif /* _MULTI_DATAMODEL */
24537 
24538 	if (data == NULL) {
24539 		return (EINVAL);
24540 	}
24541 
24542 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24543 	    (un->un_state == SD_STATE_OFFLINE)) {
24544 		return (ENXIO);
24545 	}
24546 
24547 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24548 	    "sd_read_mode1: entry: un:0x%p\n", un);
24549 
24550 #ifdef _MULTI_DATAMODEL
24551 	switch (ddi_model_convert_from(flag & FMODELS)) {
24552 	case DDI_MODEL_ILP32:
24553 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
24554 			return (EFAULT);
24555 		}
24556 		/* Convert the ILP32 uscsi data from the application to LP64 */
24557 		cdrom_read32tocdrom_read(cdrd32, mode1);
24558 		break;
24559 	case DDI_MODEL_NONE:
24560 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
24561 			return (EFAULT);
24562 		}
24563 	}
24564 #else /* ! _MULTI_DATAMODEL */
24565 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
24566 		return (EFAULT);
24567 	}
24568 #endif /* _MULTI_DATAMODEL */
24569 
24570 	rval = sd_send_scsi_READ(un, mode1->cdread_bufaddr,
24571 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
24572 
24573 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24574 	    "sd_read_mode1: exit: un:0x%p\n", un);
24575 
24576 	return (rval);
24577 }
24578 
24579 
24580 /*
24581  *    Function: sr_read_cd_mode2()
24582  *
24583  * Description: This routine is the driver entry point for handling CD-ROM
24584  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
24585  *		support the READ CD (0xBE) command or the 1st generation
24586  *		READ CD (0xD4) command.
24587  *
24588  *   Arguments: dev	- the device 'dev_t'
24589  *		data	- pointer to user provided cd read structure specifying
24590  *			  the lba buffer address and length.
24591  *		flag	- this argument is a pass through to ddi_copyxxx()
24592  *			  directly from the mode argument of ioctl().
24593  *
24594  * Return Code: the code returned by sd_send_scsi_cmd()
24595  *		EFAULT if ddi_copyxxx() fails
24596  *		ENXIO if fail ddi_get_soft_state
24597  *		EINVAL if data pointer is NULL
24598  */
24599 
24600 static int
24601 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
24602 {
24603 	struct sd_lun		*un;
24604 	struct uscsi_cmd	*com;
24605 	struct cdrom_read	mode2_struct;
24606 	struct cdrom_read	*mode2 = &mode2_struct;
24607 	uchar_t			cdb[CDB_GROUP5];
24608 	int			nblocks;
24609 	int			rval;
24610 #ifdef _MULTI_DATAMODEL
24611 	/*  To support ILP32 applications in an LP64 world */
24612 	struct cdrom_read32	cdrom_read32;
24613 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
24614 #endif /* _MULTI_DATAMODEL */
24615 
24616 	if (data == NULL) {
24617 		return (EINVAL);
24618 	}
24619 
24620 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24621 	    (un->un_state == SD_STATE_OFFLINE)) {
24622 		return (ENXIO);
24623 	}
24624 
24625 #ifdef _MULTI_DATAMODEL
24626 	switch (ddi_model_convert_from(flag & FMODELS)) {
24627 	case DDI_MODEL_ILP32:
24628 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
24629 			return (EFAULT);
24630 		}
24631 		/* Convert the ILP32 uscsi data from the application to LP64 */
24632 		cdrom_read32tocdrom_read(cdrd32, mode2);
24633 		break;
24634 	case DDI_MODEL_NONE:
24635 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
24636 			return (EFAULT);
24637 		}
24638 		break;
24639 	}
24640 
24641 #else /* ! _MULTI_DATAMODEL */
24642 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
24643 		return (EFAULT);
24644 	}
24645 #endif /* _MULTI_DATAMODEL */
24646 
24647 	bzero(cdb, sizeof (cdb));
24648 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
24649 		/* Read command supported by 1st generation atapi drives */
24650 		cdb[0] = SCMD_READ_CDD4;
24651 	} else {
24652 		/* Universal CD Access Command */
24653 		cdb[0] = SCMD_READ_CD;
24654 	}
24655 
24656 	/*
24657 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
24658 	 */
24659 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
24660 
24661 	/* set the start address */
24662 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
24663 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
24664 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
24665 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
24666 
24667 	/* set the transfer length */
24668 	nblocks = mode2->cdread_buflen / 2336;
24669 	cdb[6] = (uchar_t)(nblocks >> 16);
24670 	cdb[7] = (uchar_t)(nblocks >> 8);
24671 	cdb[8] = (uchar_t)nblocks;
24672 
24673 	/* set the filter bits */
24674 	cdb[9] = CDROM_READ_CD_USERDATA;
24675 
24676 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24677 	com->uscsi_cdb = (caddr_t)cdb;
24678 	com->uscsi_cdblen = sizeof (cdb);
24679 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
24680 	com->uscsi_buflen = mode2->cdread_buflen;
24681 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24682 
24683 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
24684 	    SD_PATH_STANDARD);
24685 	kmem_free(com, sizeof (*com));
24686 	return (rval);
24687 }
24688 
24689 
24690 /*
24691  *    Function: sr_read_mode2()
24692  *
24693  * Description: This routine is the driver entry point for handling CD-ROM
24694  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
24695  *		do not support the READ CD (0xBE) command.
24696  *
24697  *   Arguments: dev	- the device 'dev_t'
24698  *		data	- pointer to user provided cd read structure specifying
24699  *			  the lba buffer address and length.
24700  *		flag	- this argument is a pass through to ddi_copyxxx()
24701  *			  directly from the mode argument of ioctl().
24702  *
24703  * Return Code: the code returned by sd_send_scsi_cmd()
24704  *		EFAULT if ddi_copyxxx() fails
24705  *		ENXIO if fail ddi_get_soft_state
24706  *		EINVAL if data pointer is NULL
24707  *		EIO if fail to reset block size
24708  *		EAGAIN if commands are in progress in the driver
24709  */
24710 
24711 static int
24712 sr_read_mode2(dev_t dev, caddr_t data, int flag)
24713 {
24714 	struct sd_lun		*un;
24715 	struct cdrom_read	mode2_struct;
24716 	struct cdrom_read	*mode2 = &mode2_struct;
24717 	int			rval;
24718 	uint32_t		restore_blksize;
24719 	struct uscsi_cmd	*com;
24720 	uchar_t			cdb[CDB_GROUP0];
24721 	int			nblocks;
24722 
24723 #ifdef _MULTI_DATAMODEL
24724 	/* To support ILP32 applications in an LP64 world */
24725 	struct cdrom_read32	cdrom_read32;
24726 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
24727 #endif /* _MULTI_DATAMODEL */
24728 
24729 	if (data == NULL) {
24730 		return (EINVAL);
24731 	}
24732 
24733 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24734 	    (un->un_state == SD_STATE_OFFLINE)) {
24735 		return (ENXIO);
24736 	}
24737 
24738 	/*
24739 	 * Because this routine will update the device and driver block size
24740 	 * being used we want to make sure there are no commands in progress.
24741 	 * If commands are in progress the user will have to try again.
24742 	 *
24743 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
24744 	 * in sdioctl to protect commands from sdioctl through to the top of
24745 	 * sd_uscsi_strategy. See sdioctl for details.
24746 	 */
24747 	mutex_enter(SD_MUTEX(un));
24748 	if (un->un_ncmds_in_driver != 1) {
24749 		mutex_exit(SD_MUTEX(un));
24750 		return (EAGAIN);
24751 	}
24752 	mutex_exit(SD_MUTEX(un));
24753 
24754 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24755 	    "sd_read_mode2: entry: un:0x%p\n", un);
24756 
24757 #ifdef _MULTI_DATAMODEL
24758 	switch (ddi_model_convert_from(flag & FMODELS)) {
24759 	case DDI_MODEL_ILP32:
24760 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
24761 			return (EFAULT);
24762 		}
24763 		/* Convert the ILP32 uscsi data from the application to LP64 */
24764 		cdrom_read32tocdrom_read(cdrd32, mode2);
24765 		break;
24766 	case DDI_MODEL_NONE:
24767 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
24768 			return (EFAULT);
24769 		}
24770 		break;
24771 	}
24772 #else /* ! _MULTI_DATAMODEL */
24773 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
24774 		return (EFAULT);
24775 	}
24776 #endif /* _MULTI_DATAMODEL */
24777 
24778 	/* Store the current target block size for restoration later */
24779 	restore_blksize = un->un_tgt_blocksize;
24780 
24781 	/* Change the device and soft state target block size to 2336 */
24782 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
24783 		rval = EIO;
24784 		goto done;
24785 	}
24786 
24787 
24788 	bzero(cdb, sizeof (cdb));
24789 
24790 	/* set READ operation */
24791 	cdb[0] = SCMD_READ;
24792 
24793 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
24794 	mode2->cdread_lba >>= 2;
24795 
24796 	/* set the start address */
24797 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
24798 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
24799 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
24800 
24801 	/* set the transfer length */
24802 	nblocks = mode2->cdread_buflen / 2336;
24803 	cdb[4] = (uchar_t)nblocks & 0xFF;
24804 
24805 	/* build command */
24806 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24807 	com->uscsi_cdb = (caddr_t)cdb;
24808 	com->uscsi_cdblen = sizeof (cdb);
24809 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
24810 	com->uscsi_buflen = mode2->cdread_buflen;
24811 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24812 
24813 	/*
24814 	 * Issue SCSI command with user space address for read buffer.
24815 	 *
24816 	 * This sends the command through main channel in the driver.
24817 	 *
24818 	 * Since this is accessed via an IOCTL call, we go through the
24819 	 * standard path, so that if the device was powered down, then
24820 	 * it would be 'awakened' to handle the command.
24821 	 */
24822 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
24823 	    SD_PATH_STANDARD);
24824 
24825 	kmem_free(com, sizeof (*com));
24826 
24827 	/* Restore the device and soft state target block size */
24828 	if (sr_sector_mode(dev, restore_blksize) != 0) {
24829 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24830 		    "can't do switch back to mode 1\n");
24831 		/*
24832 		 * If sd_send_scsi_READ succeeded we still need to report
24833 		 * an error because we failed to reset the block size
24834 		 */
24835 		if (rval == 0) {
24836 			rval = EIO;
24837 		}
24838 	}
24839 
24840 done:
24841 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24842 	    "sd_read_mode2: exit: un:0x%p\n", un);
24843 
24844 	return (rval);
24845 }
24846 
24847 
24848 /*
24849  *    Function: sr_sector_mode()
24850  *
24851  * Description: This utility function is used by sr_read_mode2 to set the target
24852  *		block size based on the user specified size. This is a legacy
24853  *		implementation based upon a vendor specific mode page
24854  *
24855  *   Arguments: dev	- the device 'dev_t'
24856  *		data	- flag indicating if block size is being set to 2336 or
24857  *			  512.
24858  *
24859  * Return Code: the code returned by sd_send_scsi_cmd()
24860  *		EFAULT if ddi_copyxxx() fails
24861  *		ENXIO if fail ddi_get_soft_state
24862  *		EINVAL if data pointer is NULL
24863  */
24864 
24865 static int
24866 sr_sector_mode(dev_t dev, uint32_t blksize)
24867 {
24868 	struct sd_lun	*un;
24869 	uchar_t		*sense;
24870 	uchar_t		*select;
24871 	int		rval;
24872 
24873 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24874 	    (un->un_state == SD_STATE_OFFLINE)) {
24875 		return (ENXIO);
24876 	}
24877 
24878 	sense = kmem_zalloc(20, KM_SLEEP);
24879 
24880 	/* Note: This is a vendor specific mode page (0x81) */
24881 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, 20, 0x81,
24882 	    SD_PATH_STANDARD)) != 0) {
24883 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
24884 		    "sr_sector_mode: Mode Sense failed\n");
24885 		kmem_free(sense, 20);
24886 		return (rval);
24887 	}
24888 	select = kmem_zalloc(20, KM_SLEEP);
24889 	select[3] = 0x08;
24890 	select[10] = ((blksize >> 8) & 0xff);
24891 	select[11] = (blksize & 0xff);
24892 	select[12] = 0x01;
24893 	select[13] = 0x06;
24894 	select[14] = sense[14];
24895 	select[15] = sense[15];
24896 	if (blksize == SD_MODE2_BLKSIZE) {
24897 		select[14] |= 0x01;
24898 	}
24899 
24900 	if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select, 20,
24901 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
24902 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
24903 		    "sr_sector_mode: Mode Select failed\n");
24904 	} else {
24905 		/*
24906 		 * Only update the softstate block size if we successfully
24907 		 * changed the device block mode.
24908 		 */
24909 		mutex_enter(SD_MUTEX(un));
24910 		sd_update_block_info(un, blksize, 0);
24911 		mutex_exit(SD_MUTEX(un));
24912 	}
24913 	kmem_free(sense, 20);
24914 	kmem_free(select, 20);
24915 	return (rval);
24916 }
24917 
24918 
24919 /*
24920  *    Function: sr_read_cdda()
24921  *
24922  * Description: This routine is the driver entry point for handling CD-ROM
24923  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
24924  *		the target supports CDDA these requests are handled via a vendor
24925  *		specific command (0xD8) If the target does not support CDDA
24926  *		these requests are handled via the READ CD command (0xBE).
24927  *
24928  *   Arguments: dev	- the device 'dev_t'
24929  *		data	- pointer to user provided CD-DA structure specifying
24930  *			  the track starting address, transfer length, and
24931  *			  subcode options.
24932  *		flag	- this argument is a pass through to ddi_copyxxx()
24933  *			  directly from the mode argument of ioctl().
24934  *
24935  * Return Code: the code returned by sd_send_scsi_cmd()
24936  *		EFAULT if ddi_copyxxx() fails
24937  *		ENXIO if fail ddi_get_soft_state
24938  *		EINVAL if invalid arguments are provided
24939  *		ENOTTY
24940  */
24941 
24942 static int
24943 sr_read_cdda(dev_t dev, caddr_t data, int flag)
24944 {
24945 	struct sd_lun			*un;
24946 	struct uscsi_cmd		*com;
24947 	struct cdrom_cdda		*cdda;
24948 	int				rval;
24949 	size_t				buflen;
24950 	char				cdb[CDB_GROUP5];
24951 
24952 #ifdef _MULTI_DATAMODEL
24953 	/* To support ILP32 applications in an LP64 world */
24954 	struct cdrom_cdda32	cdrom_cdda32;
24955 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
24956 #endif /* _MULTI_DATAMODEL */
24957 
24958 	if (data == NULL) {
24959 		return (EINVAL);
24960 	}
24961 
24962 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24963 		return (ENXIO);
24964 	}
24965 
24966 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
24967 
24968 #ifdef _MULTI_DATAMODEL
24969 	switch (ddi_model_convert_from(flag & FMODELS)) {
24970 	case DDI_MODEL_ILP32:
24971 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
24972 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24973 			    "sr_read_cdda: ddi_copyin Failed\n");
24974 			kmem_free(cdda, sizeof (struct cdrom_cdda));
24975 			return (EFAULT);
24976 		}
24977 		/* Convert the ILP32 uscsi data from the application to LP64 */
24978 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
24979 		break;
24980 	case DDI_MODEL_NONE:
24981 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
24982 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24983 			    "sr_read_cdda: ddi_copyin Failed\n");
24984 			kmem_free(cdda, sizeof (struct cdrom_cdda));
24985 			return (EFAULT);
24986 		}
24987 		break;
24988 	}
24989 #else /* ! _MULTI_DATAMODEL */
24990 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
24991 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24992 		    "sr_read_cdda: ddi_copyin Failed\n");
24993 		kmem_free(cdda, sizeof (struct cdrom_cdda));
24994 		return (EFAULT);
24995 	}
24996 #endif /* _MULTI_DATAMODEL */
24997 
24998 	/*
24999 	 * Since MMC-2 expects max 3 bytes for length, check if the
25000 	 * length input is greater than 3 bytes
25001 	 */
25002 	if ((cdda->cdda_length & 0xFF000000) != 0) {
25003 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
25004 		    "cdrom transfer length too large: %d (limit %d)\n",
25005 		    cdda->cdda_length, 0xFFFFFF);
25006 		kmem_free(cdda, sizeof (struct cdrom_cdda));
25007 		return (EINVAL);
25008 	}
25009 
25010 	switch (cdda->cdda_subcode) {
25011 	case CDROM_DA_NO_SUBCODE:
25012 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
25013 		break;
25014 	case CDROM_DA_SUBQ:
25015 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
25016 		break;
25017 	case CDROM_DA_ALL_SUBCODE:
25018 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
25019 		break;
25020 	case CDROM_DA_SUBCODE_ONLY:
25021 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
25022 		break;
25023 	default:
25024 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25025 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
25026 		    cdda->cdda_subcode);
25027 		kmem_free(cdda, sizeof (struct cdrom_cdda));
25028 		return (EINVAL);
25029 	}
25030 
25031 	/* Build and send the command */
25032 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25033 	bzero(cdb, CDB_GROUP5);
25034 
25035 	if (un->un_f_cfg_cdda == TRUE) {
25036 		cdb[0] = (char)SCMD_READ_CD;
25037 		cdb[1] = 0x04;
25038 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
25039 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
25040 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
25041 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
25042 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
25043 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
25044 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
25045 		cdb[9] = 0x10;
25046 		switch (cdda->cdda_subcode) {
25047 		case CDROM_DA_NO_SUBCODE :
25048 			cdb[10] = 0x0;
25049 			break;
25050 		case CDROM_DA_SUBQ :
25051 			cdb[10] = 0x2;
25052 			break;
25053 		case CDROM_DA_ALL_SUBCODE :
25054 			cdb[10] = 0x1;
25055 			break;
25056 		case CDROM_DA_SUBCODE_ONLY :
25057 			/* FALLTHROUGH */
25058 		default :
25059 			kmem_free(cdda, sizeof (struct cdrom_cdda));
25060 			kmem_free(com, sizeof (*com));
25061 			return (ENOTTY);
25062 		}
25063 	} else {
25064 		cdb[0] = (char)SCMD_READ_CDDA;
25065 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
25066 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
25067 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
25068 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
25069 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
25070 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
25071 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
25072 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
25073 		cdb[10] = cdda->cdda_subcode;
25074 	}
25075 
25076 	com->uscsi_cdb = cdb;
25077 	com->uscsi_cdblen = CDB_GROUP5;
25078 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
25079 	com->uscsi_buflen = buflen;
25080 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
25081 
25082 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
25083 	    SD_PATH_STANDARD);
25084 
25085 	kmem_free(cdda, sizeof (struct cdrom_cdda));
25086 	kmem_free(com, sizeof (*com));
25087 	return (rval);
25088 }
25089 
25090 
25091 /*
25092  *    Function: sr_read_cdxa()
25093  *
25094  * Description: This routine is the driver entry point for handling CD-ROM
25095  *		ioctl requests to return CD-XA (Extended Architecture) data.
25096  *		(CDROMCDXA).
25097  *
25098  *   Arguments: dev	- the device 'dev_t'
25099  *		data	- pointer to user provided CD-XA structure specifying
25100  *			  the data starting address, transfer length, and format
25101  *		flag	- this argument is a pass through to ddi_copyxxx()
25102  *			  directly from the mode argument of ioctl().
25103  *
25104  * Return Code: the code returned by sd_send_scsi_cmd()
25105  *		EFAULT if ddi_copyxxx() fails
25106  *		ENXIO if fail ddi_get_soft_state
25107  *		EINVAL if data pointer is NULL
25108  */
25109 
25110 static int
25111 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
25112 {
25113 	struct sd_lun		*un;
25114 	struct uscsi_cmd	*com;
25115 	struct cdrom_cdxa	*cdxa;
25116 	int			rval;
25117 	size_t			buflen;
25118 	char			cdb[CDB_GROUP5];
25119 	uchar_t			read_flags;
25120 
25121 #ifdef _MULTI_DATAMODEL
25122 	/* To support ILP32 applications in an LP64 world */
25123 	struct cdrom_cdxa32		cdrom_cdxa32;
25124 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
25125 #endif /* _MULTI_DATAMODEL */
25126 
25127 	if (data == NULL) {
25128 		return (EINVAL);
25129 	}
25130 
25131 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25132 		return (ENXIO);
25133 	}
25134 
25135 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
25136 
25137 #ifdef _MULTI_DATAMODEL
25138 	switch (ddi_model_convert_from(flag & FMODELS)) {
25139 	case DDI_MODEL_ILP32:
25140 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
25141 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25142 			return (EFAULT);
25143 		}
25144 		/*
25145 		 * Convert the ILP32 uscsi data from the
25146 		 * application to LP64 for internal use.
25147 		 */
25148 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
25149 		break;
25150 	case DDI_MODEL_NONE:
25151 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
25152 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25153 			return (EFAULT);
25154 		}
25155 		break;
25156 	}
25157 #else /* ! _MULTI_DATAMODEL */
25158 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
25159 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25160 		return (EFAULT);
25161 	}
25162 #endif /* _MULTI_DATAMODEL */
25163 
25164 	/*
25165 	 * Since MMC-2 expects max 3 bytes for length, check if the
25166 	 * length input is greater than 3 bytes
25167 	 */
25168 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
25169 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
25170 		    "cdrom transfer length too large: %d (limit %d)\n",
25171 		    cdxa->cdxa_length, 0xFFFFFF);
25172 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25173 		return (EINVAL);
25174 	}
25175 
25176 	switch (cdxa->cdxa_format) {
25177 	case CDROM_XA_DATA:
25178 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
25179 		read_flags = 0x10;
25180 		break;
25181 	case CDROM_XA_SECTOR_DATA:
25182 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
25183 		read_flags = 0xf8;
25184 		break;
25185 	case CDROM_XA_DATA_W_ERROR:
25186 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
25187 		read_flags = 0xfc;
25188 		break;
25189 	default:
25190 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25191 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
25192 		    cdxa->cdxa_format);
25193 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25194 		return (EINVAL);
25195 	}
25196 
25197 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25198 	bzero(cdb, CDB_GROUP5);
25199 	if (un->un_f_mmc_cap == TRUE) {
25200 		cdb[0] = (char)SCMD_READ_CD;
25201 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
25202 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
25203 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
25204 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
25205 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
25206 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
25207 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
25208 		cdb[9] = (char)read_flags;
25209 	} else {
25210 		/*
25211 		 * Note: A vendor specific command (0xDB) is being used her to
25212 		 * request a read of all subcodes.
25213 		 */
25214 		cdb[0] = (char)SCMD_READ_CDXA;
25215 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
25216 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
25217 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
25218 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
25219 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
25220 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
25221 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
25222 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
25223 		cdb[10] = cdxa->cdxa_format;
25224 	}
25225 	com->uscsi_cdb	   = cdb;
25226 	com->uscsi_cdblen  = CDB_GROUP5;
25227 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
25228 	com->uscsi_buflen  = buflen;
25229 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
25230 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
25231 	    SD_PATH_STANDARD);
25232 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25233 	kmem_free(com, sizeof (*com));
25234 	return (rval);
25235 }
25236 
25237 
25238 /*
25239  *    Function: sr_eject()
25240  *
25241  * Description: This routine is the driver entry point for handling CD-ROM
25242  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
25243  *
25244  *   Arguments: dev	- the device 'dev_t'
25245  *
25246  * Return Code: the code returned by sd_send_scsi_cmd()
25247  */
25248 
25249 static int
25250 sr_eject(dev_t dev)
25251 {
25252 	struct sd_lun	*un;
25253 	int		rval;
25254 
25255 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25256 	    (un->un_state == SD_STATE_OFFLINE)) {
25257 		return (ENXIO);
25258 	}
25259 
25260 	/*
25261 	 * To prevent race conditions with the eject
25262 	 * command, keep track of an eject command as
25263 	 * it progresses. If we are already handling
25264 	 * an eject command in the driver for the given
25265 	 * unit and another request to eject is received
25266 	 * immediately return EAGAIN so we don't lose
25267 	 * the command if the current eject command fails.
25268 	 */
25269 	mutex_enter(SD_MUTEX(un));
25270 	if (un->un_f_ejecting == TRUE) {
25271 		mutex_exit(SD_MUTEX(un));
25272 		return (EAGAIN);
25273 	}
25274 	un->un_f_ejecting = TRUE;
25275 	mutex_exit(SD_MUTEX(un));
25276 
25277 	if ((rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
25278 	    SD_PATH_STANDARD)) != 0) {
25279 		mutex_enter(SD_MUTEX(un));
25280 		un->un_f_ejecting = FALSE;
25281 		mutex_exit(SD_MUTEX(un));
25282 		return (rval);
25283 	}
25284 
25285 	rval = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_EJECT,
25286 	    SD_PATH_STANDARD);
25287 
25288 	if (rval == 0) {
25289 		mutex_enter(SD_MUTEX(un));
25290 		sr_ejected(un);
25291 		un->un_mediastate = DKIO_EJECTED;
25292 		un->un_f_ejecting = FALSE;
25293 		cv_broadcast(&un->un_state_cv);
25294 		mutex_exit(SD_MUTEX(un));
25295 	} else {
25296 		mutex_enter(SD_MUTEX(un));
25297 		un->un_f_ejecting = FALSE;
25298 		mutex_exit(SD_MUTEX(un));
25299 	}
25300 	return (rval);
25301 }
25302 
25303 
25304 /*
25305  *    Function: sr_ejected()
25306  *
25307  * Description: This routine updates the soft state structure to invalidate the
25308  *		geometry information after the media has been ejected or a
25309  *		media eject has been detected.
25310  *
25311  *   Arguments: un - driver soft state (unit) structure
25312  */
25313 
25314 static void
25315 sr_ejected(struct sd_lun *un)
25316 {
25317 	struct sd_errstats *stp;
25318 
25319 	ASSERT(un != NULL);
25320 	ASSERT(mutex_owned(SD_MUTEX(un)));
25321 
25322 	un->un_f_blockcount_is_valid	= FALSE;
25323 	un->un_f_tgt_blocksize_is_valid	= FALSE;
25324 	mutex_exit(SD_MUTEX(un));
25325 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
25326 	mutex_enter(SD_MUTEX(un));
25327 
25328 	if (un->un_errstats != NULL) {
25329 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
25330 		stp->sd_capacity.value.ui64 = 0;
25331 	}
25332 }
25333 
25334 
25335 /*
25336  *    Function: sr_check_wp()
25337  *
25338  * Description: This routine checks the write protection of a removable
25339  *      media disk and hotpluggable devices via the write protect bit of
25340  *      the Mode Page Header device specific field. Some devices choke
25341  *      on unsupported mode page. In order to workaround this issue,
25342  *      this routine has been implemented to use 0x3f mode page(request
25343  *      for all pages) for all device types.
25344  *
25345  *   Arguments: dev		- the device 'dev_t'
25346  *
25347  * Return Code: int indicating if the device is write protected (1) or not (0)
25348  *
25349  *     Context: Kernel thread.
25350  *
25351  */
25352 
25353 static int
25354 sr_check_wp(dev_t dev)
25355 {
25356 	struct sd_lun	*un;
25357 	uchar_t		device_specific;
25358 	uchar_t		*sense;
25359 	int		hdrlen;
25360 	int		rval = FALSE;
25361 
25362 	/*
25363 	 * Note: The return codes for this routine should be reworked to
25364 	 * properly handle the case of a NULL softstate.
25365 	 */
25366 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25367 		return (FALSE);
25368 	}
25369 
25370 	if (un->un_f_cfg_is_atapi == TRUE) {
25371 		/*
25372 		 * The mode page contents are not required; set the allocation
25373 		 * length for the mode page header only
25374 		 */
25375 		hdrlen = MODE_HEADER_LENGTH_GRP2;
25376 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
25377 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense, hdrlen,
25378 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
25379 			goto err_exit;
25380 		device_specific =
25381 		    ((struct mode_header_grp2 *)sense)->device_specific;
25382 	} else {
25383 		hdrlen = MODE_HEADER_LENGTH;
25384 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
25385 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, hdrlen,
25386 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
25387 			goto err_exit;
25388 		device_specific =
25389 		    ((struct mode_header *)sense)->device_specific;
25390 	}
25391 
25392 	/*
25393 	 * Write protect mode sense failed; not all disks
25394 	 * understand this query. Return FALSE assuming that
25395 	 * these devices are not writable.
25396 	 */
25397 	if (device_specific & WRITE_PROTECT) {
25398 		rval = TRUE;
25399 	}
25400 
25401 err_exit:
25402 	kmem_free(sense, hdrlen);
25403 	return (rval);
25404 }
25405 
25406 /*
25407  *    Function: sr_volume_ctrl()
25408  *
25409  * Description: This routine is the driver entry point for handling CD-ROM
25410  *		audio output volume ioctl requests. (CDROMVOLCTRL)
25411  *
25412  *   Arguments: dev	- the device 'dev_t'
25413  *		data	- pointer to user audio volume control structure
25414  *		flag	- this argument is a pass through to ddi_copyxxx()
25415  *			  directly from the mode argument of ioctl().
25416  *
25417  * Return Code: the code returned by sd_send_scsi_cmd()
25418  *		EFAULT if ddi_copyxxx() fails
25419  *		ENXIO if fail ddi_get_soft_state
25420  *		EINVAL if data pointer is NULL
25421  *
25422  */
25423 
25424 static int
25425 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
25426 {
25427 	struct sd_lun		*un;
25428 	struct cdrom_volctrl    volume;
25429 	struct cdrom_volctrl    *vol = &volume;
25430 	uchar_t			*sense_page;
25431 	uchar_t			*select_page;
25432 	uchar_t			*sense;
25433 	uchar_t			*select;
25434 	int			sense_buflen;
25435 	int			select_buflen;
25436 	int			rval;
25437 
25438 	if (data == NULL) {
25439 		return (EINVAL);
25440 	}
25441 
25442 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25443 	    (un->un_state == SD_STATE_OFFLINE)) {
25444 		return (ENXIO);
25445 	}
25446 
25447 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
25448 		return (EFAULT);
25449 	}
25450 
25451 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
25452 		struct mode_header_grp2		*sense_mhp;
25453 		struct mode_header_grp2		*select_mhp;
25454 		int				bd_len;
25455 
25456 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
25457 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
25458 		    MODEPAGE_AUDIO_CTRL_LEN;
25459 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
25460 		select = kmem_zalloc(select_buflen, KM_SLEEP);
25461 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
25462 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
25463 		    SD_PATH_STANDARD)) != 0) {
25464 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
25465 			    "sr_volume_ctrl: Mode Sense Failed\n");
25466 			kmem_free(sense, sense_buflen);
25467 			kmem_free(select, select_buflen);
25468 			return (rval);
25469 		}
25470 		sense_mhp = (struct mode_header_grp2 *)sense;
25471 		select_mhp = (struct mode_header_grp2 *)select;
25472 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
25473 		    sense_mhp->bdesc_length_lo;
25474 		if (bd_len > MODE_BLK_DESC_LENGTH) {
25475 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25476 			    "sr_volume_ctrl: Mode Sense returned invalid "
25477 			    "block descriptor length\n");
25478 			kmem_free(sense, sense_buflen);
25479 			kmem_free(select, select_buflen);
25480 			return (EIO);
25481 		}
25482 		sense_page = (uchar_t *)
25483 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
25484 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
25485 		select_mhp->length_msb = 0;
25486 		select_mhp->length_lsb = 0;
25487 		select_mhp->bdesc_length_hi = 0;
25488 		select_mhp->bdesc_length_lo = 0;
25489 	} else {
25490 		struct mode_header		*sense_mhp, *select_mhp;
25491 
25492 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
25493 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
25494 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
25495 		select = kmem_zalloc(select_buflen, KM_SLEEP);
25496 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
25497 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
25498 		    SD_PATH_STANDARD)) != 0) {
25499 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25500 			    "sr_volume_ctrl: Mode Sense Failed\n");
25501 			kmem_free(sense, sense_buflen);
25502 			kmem_free(select, select_buflen);
25503 			return (rval);
25504 		}
25505 		sense_mhp  = (struct mode_header *)sense;
25506 		select_mhp = (struct mode_header *)select;
25507 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
25508 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25509 			    "sr_volume_ctrl: Mode Sense returned invalid "
25510 			    "block descriptor length\n");
25511 			kmem_free(sense, sense_buflen);
25512 			kmem_free(select, select_buflen);
25513 			return (EIO);
25514 		}
25515 		sense_page = (uchar_t *)
25516 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
25517 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
25518 		select_mhp->length = 0;
25519 		select_mhp->bdesc_length = 0;
25520 	}
25521 	/*
25522 	 * Note: An audio control data structure could be created and overlayed
25523 	 * on the following in place of the array indexing method implemented.
25524 	 */
25525 
25526 	/* Build the select data for the user volume data */
25527 	select_page[0] = MODEPAGE_AUDIO_CTRL;
25528 	select_page[1] = 0xE;
25529 	/* Set the immediate bit */
25530 	select_page[2] = 0x04;
25531 	/* Zero out reserved fields */
25532 	select_page[3] = 0x00;
25533 	select_page[4] = 0x00;
25534 	/* Return sense data for fields not to be modified */
25535 	select_page[5] = sense_page[5];
25536 	select_page[6] = sense_page[6];
25537 	select_page[7] = sense_page[7];
25538 	/* Set the user specified volume levels for channel 0 and 1 */
25539 	select_page[8] = 0x01;
25540 	select_page[9] = vol->channel0;
25541 	select_page[10] = 0x02;
25542 	select_page[11] = vol->channel1;
25543 	/* Channel 2 and 3 are currently unsupported so return the sense data */
25544 	select_page[12] = sense_page[12];
25545 	select_page[13] = sense_page[13];
25546 	select_page[14] = sense_page[14];
25547 	select_page[15] = sense_page[15];
25548 
25549 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
25550 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, select,
25551 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
25552 	} else {
25553 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
25554 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
25555 	}
25556 
25557 	kmem_free(sense, sense_buflen);
25558 	kmem_free(select, select_buflen);
25559 	return (rval);
25560 }
25561 
25562 
25563 /*
25564  *    Function: sr_read_sony_session_offset()
25565  *
25566  * Description: This routine is the driver entry point for handling CD-ROM
25567  *		ioctl requests for session offset information. (CDROMREADOFFSET)
25568  *		The address of the first track in the last session of a
25569  *		multi-session CD-ROM is returned
25570  *
25571  *		Note: This routine uses a vendor specific key value in the
25572  *		command control field without implementing any vendor check here
25573  *		or in the ioctl routine.
25574  *
25575  *   Arguments: dev	- the device 'dev_t'
25576  *		data	- pointer to an int to hold the requested address
25577  *		flag	- this argument is a pass through to ddi_copyxxx()
25578  *			  directly from the mode argument of ioctl().
25579  *
25580  * Return Code: the code returned by sd_send_scsi_cmd()
25581  *		EFAULT if ddi_copyxxx() fails
25582  *		ENXIO if fail ddi_get_soft_state
25583  *		EINVAL if data pointer is NULL
25584  */
25585 
25586 static int
25587 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
25588 {
25589 	struct sd_lun		*un;
25590 	struct uscsi_cmd	*com;
25591 	caddr_t			buffer;
25592 	char			cdb[CDB_GROUP1];
25593 	int			session_offset = 0;
25594 	int			rval;
25595 
25596 	if (data == NULL) {
25597 		return (EINVAL);
25598 	}
25599 
25600 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25601 	    (un->un_state == SD_STATE_OFFLINE)) {
25602 		return (ENXIO);
25603 	}
25604 
25605 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
25606 	bzero(cdb, CDB_GROUP1);
25607 	cdb[0] = SCMD_READ_TOC;
25608 	/*
25609 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
25610 	 * (4 byte TOC response header + 8 byte response data)
25611 	 */
25612 	cdb[8] = SONY_SESSION_OFFSET_LEN;
25613 	/* Byte 9 is the control byte. A vendor specific value is used */
25614 	cdb[9] = SONY_SESSION_OFFSET_KEY;
25615 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25616 	com->uscsi_cdb = cdb;
25617 	com->uscsi_cdblen = CDB_GROUP1;
25618 	com->uscsi_bufaddr = buffer;
25619 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
25620 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
25621 
25622 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
25623 	    SD_PATH_STANDARD);
25624 	if (rval != 0) {
25625 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
25626 		kmem_free(com, sizeof (*com));
25627 		return (rval);
25628 	}
25629 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
25630 		session_offset =
25631 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
25632 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
25633 		/*
25634 		 * Offset returned offset in current lbasize block's. Convert to
25635 		 * 2k block's to return to the user
25636 		 */
25637 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
25638 			session_offset >>= 2;
25639 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
25640 			session_offset >>= 1;
25641 		}
25642 	}
25643 
25644 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
25645 		rval = EFAULT;
25646 	}
25647 
25648 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
25649 	kmem_free(com, sizeof (*com));
25650 	return (rval);
25651 }
25652 
25653 
25654 /*
25655  *    Function: sd_wm_cache_constructor()
25656  *
25657  * Description: Cache Constructor for the wmap cache for the read/modify/write
25658  * 		devices.
25659  *
25660  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
25661  *		un	- sd_lun structure for the device.
25662  *		flag	- the km flags passed to constructor
25663  *
25664  * Return Code: 0 on success.
25665  *		-1 on failure.
25666  */
25667 
25668 /*ARGSUSED*/
25669 static int
25670 sd_wm_cache_constructor(void *wm, void *un, int flags)
25671 {
25672 	bzero(wm, sizeof (struct sd_w_map));
25673 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
25674 	return (0);
25675 }
25676 
25677 
25678 /*
25679  *    Function: sd_wm_cache_destructor()
25680  *
25681  * Description: Cache destructor for the wmap cache for the read/modify/write
25682  * 		devices.
25683  *
25684  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
25685  *		un	- sd_lun structure for the device.
25686  */
25687 /*ARGSUSED*/
25688 static void
25689 sd_wm_cache_destructor(void *wm, void *un)
25690 {
25691 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
25692 }
25693 
25694 
25695 /*
25696  *    Function: sd_range_lock()
25697  *
25698  * Description: Lock the range of blocks specified as parameter to ensure
25699  *		that read, modify write is atomic and no other i/o writes
25700  *		to the same location. The range is specified in terms
25701  *		of start and end blocks. Block numbers are the actual
25702  *		media block numbers and not system.
25703  *
25704  *   Arguments: un	- sd_lun structure for the device.
25705  *		startb - The starting block number
25706  *		endb - The end block number
25707  *		typ - type of i/o - simple/read_modify_write
25708  *
25709  * Return Code: wm  - pointer to the wmap structure.
25710  *
25711  *     Context: This routine can sleep.
25712  */
25713 
25714 static struct sd_w_map *
25715 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
25716 {
25717 	struct sd_w_map *wmp = NULL;
25718 	struct sd_w_map *sl_wmp = NULL;
25719 	struct sd_w_map *tmp_wmp;
25720 	wm_state state = SD_WM_CHK_LIST;
25721 
25722 
25723 	ASSERT(un != NULL);
25724 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25725 
25726 	mutex_enter(SD_MUTEX(un));
25727 
25728 	while (state != SD_WM_DONE) {
25729 
25730 		switch (state) {
25731 		case SD_WM_CHK_LIST:
25732 			/*
25733 			 * This is the starting state. Check the wmap list
25734 			 * to see if the range is currently available.
25735 			 */
25736 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
25737 				/*
25738 				 * If this is a simple write and no rmw
25739 				 * i/o is pending then try to lock the
25740 				 * range as the range should be available.
25741 				 */
25742 				state = SD_WM_LOCK_RANGE;
25743 			} else {
25744 				tmp_wmp = sd_get_range(un, startb, endb);
25745 				if (tmp_wmp != NULL) {
25746 					if ((wmp != NULL) && ONLIST(un, wmp)) {
25747 						/*
25748 						 * Should not keep onlist wmps
25749 						 * while waiting this macro
25750 						 * will also do wmp = NULL;
25751 						 */
25752 						FREE_ONLIST_WMAP(un, wmp);
25753 					}
25754 					/*
25755 					 * sl_wmp is the wmap on which wait
25756 					 * is done, since the tmp_wmp points
25757 					 * to the inuse wmap, set sl_wmp to
25758 					 * tmp_wmp and change the state to sleep
25759 					 */
25760 					sl_wmp = tmp_wmp;
25761 					state = SD_WM_WAIT_MAP;
25762 				} else {
25763 					state = SD_WM_LOCK_RANGE;
25764 				}
25765 
25766 			}
25767 			break;
25768 
25769 		case SD_WM_LOCK_RANGE:
25770 			ASSERT(un->un_wm_cache);
25771 			/*
25772 			 * The range need to be locked, try to get a wmap.
25773 			 * First attempt it with NO_SLEEP, want to avoid a sleep
25774 			 * if possible as we will have to release the sd mutex
25775 			 * if we have to sleep.
25776 			 */
25777 			if (wmp == NULL)
25778 				wmp = kmem_cache_alloc(un->un_wm_cache,
25779 				    KM_NOSLEEP);
25780 			if (wmp == NULL) {
25781 				mutex_exit(SD_MUTEX(un));
25782 				_NOTE(DATA_READABLE_WITHOUT_LOCK
25783 				    (sd_lun::un_wm_cache))
25784 				wmp = kmem_cache_alloc(un->un_wm_cache,
25785 				    KM_SLEEP);
25786 				mutex_enter(SD_MUTEX(un));
25787 				/*
25788 				 * we released the mutex so recheck and go to
25789 				 * check list state.
25790 				 */
25791 				state = SD_WM_CHK_LIST;
25792 			} else {
25793 				/*
25794 				 * We exit out of state machine since we
25795 				 * have the wmap. Do the housekeeping first.
25796 				 * place the wmap on the wmap list if it is not
25797 				 * on it already and then set the state to done.
25798 				 */
25799 				wmp->wm_start = startb;
25800 				wmp->wm_end = endb;
25801 				wmp->wm_flags = typ | SD_WM_BUSY;
25802 				if (typ & SD_WTYPE_RMW) {
25803 					un->un_rmw_count++;
25804 				}
25805 				/*
25806 				 * If not already on the list then link
25807 				 */
25808 				if (!ONLIST(un, wmp)) {
25809 					wmp->wm_next = un->un_wm;
25810 					wmp->wm_prev = NULL;
25811 					if (wmp->wm_next)
25812 						wmp->wm_next->wm_prev = wmp;
25813 					un->un_wm = wmp;
25814 				}
25815 				state = SD_WM_DONE;
25816 			}
25817 			break;
25818 
25819 		case SD_WM_WAIT_MAP:
25820 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
25821 			/*
25822 			 * Wait is done on sl_wmp, which is set in the
25823 			 * check_list state.
25824 			 */
25825 			sl_wmp->wm_wanted_count++;
25826 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
25827 			sl_wmp->wm_wanted_count--;
25828 			/*
25829 			 * We can reuse the memory from the completed sl_wmp
25830 			 * lock range for our new lock, but only if noone is
25831 			 * waiting for it.
25832 			 */
25833 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
25834 			if (sl_wmp->wm_wanted_count == 0) {
25835 				if (wmp != NULL)
25836 					CHK_N_FREEWMP(un, wmp);
25837 				wmp = sl_wmp;
25838 			}
25839 			sl_wmp = NULL;
25840 			/*
25841 			 * After waking up, need to recheck for availability of
25842 			 * range.
25843 			 */
25844 			state = SD_WM_CHK_LIST;
25845 			break;
25846 
25847 		default:
25848 			panic("sd_range_lock: "
25849 			    "Unknown state %d in sd_range_lock", state);
25850 			/*NOTREACHED*/
25851 		} /* switch(state) */
25852 
25853 	} /* while(state != SD_WM_DONE) */
25854 
25855 	mutex_exit(SD_MUTEX(un));
25856 
25857 	ASSERT(wmp != NULL);
25858 
25859 	return (wmp);
25860 }
25861 
25862 
25863 /*
25864  *    Function: sd_get_range()
25865  *
25866  * Description: Find if there any overlapping I/O to this one
25867  *		Returns the write-map of 1st such I/O, NULL otherwise.
25868  *
25869  *   Arguments: un	- sd_lun structure for the device.
25870  *		startb - The starting block number
25871  *		endb - The end block number
25872  *
25873  * Return Code: wm  - pointer to the wmap structure.
25874  */
25875 
25876 static struct sd_w_map *
25877 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
25878 {
25879 	struct sd_w_map *wmp;
25880 
25881 	ASSERT(un != NULL);
25882 
25883 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
25884 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
25885 			continue;
25886 		}
25887 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
25888 			break;
25889 		}
25890 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
25891 			break;
25892 		}
25893 	}
25894 
25895 	return (wmp);
25896 }
25897 
25898 
25899 /*
25900  *    Function: sd_free_inlist_wmap()
25901  *
25902  * Description: Unlink and free a write map struct.
25903  *
25904  *   Arguments: un      - sd_lun structure for the device.
25905  *		wmp	- sd_w_map which needs to be unlinked.
25906  */
25907 
25908 static void
25909 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
25910 {
25911 	ASSERT(un != NULL);
25912 
25913 	if (un->un_wm == wmp) {
25914 		un->un_wm = wmp->wm_next;
25915 	} else {
25916 		wmp->wm_prev->wm_next = wmp->wm_next;
25917 	}
25918 
25919 	if (wmp->wm_next) {
25920 		wmp->wm_next->wm_prev = wmp->wm_prev;
25921 	}
25922 
25923 	wmp->wm_next = wmp->wm_prev = NULL;
25924 
25925 	kmem_cache_free(un->un_wm_cache, wmp);
25926 }
25927 
25928 
25929 /*
25930  *    Function: sd_range_unlock()
25931  *
25932  * Description: Unlock the range locked by wm.
25933  *		Free write map if nobody else is waiting on it.
25934  *
25935  *   Arguments: un      - sd_lun structure for the device.
25936  *              wmp     - sd_w_map which needs to be unlinked.
25937  */
25938 
25939 static void
25940 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
25941 {
25942 	ASSERT(un != NULL);
25943 	ASSERT(wm != NULL);
25944 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25945 
25946 	mutex_enter(SD_MUTEX(un));
25947 
25948 	if (wm->wm_flags & SD_WTYPE_RMW) {
25949 		un->un_rmw_count--;
25950 	}
25951 
25952 	if (wm->wm_wanted_count) {
25953 		wm->wm_flags = 0;
25954 		/*
25955 		 * Broadcast that the wmap is available now.
25956 		 */
25957 		cv_broadcast(&wm->wm_avail);
25958 	} else {
25959 		/*
25960 		 * If no one is waiting on the map, it should be free'ed.
25961 		 */
25962 		sd_free_inlist_wmap(un, wm);
25963 	}
25964 
25965 	mutex_exit(SD_MUTEX(un));
25966 }
25967 
25968 
25969 /*
25970  *    Function: sd_read_modify_write_task
25971  *
25972  * Description: Called from a taskq thread to initiate the write phase of
25973  *		a read-modify-write request.  This is used for targets where
25974  *		un->un_sys_blocksize != un->un_tgt_blocksize.
25975  *
25976  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
25977  *
25978  *     Context: Called under taskq thread context.
25979  */
25980 
25981 static void
25982 sd_read_modify_write_task(void *arg)
25983 {
25984 	struct sd_mapblocksize_info	*bsp;
25985 	struct buf	*bp;
25986 	struct sd_xbuf	*xp;
25987 	struct sd_lun	*un;
25988 
25989 	bp = arg;	/* The bp is given in arg */
25990 	ASSERT(bp != NULL);
25991 
25992 	/* Get the pointer to the layer-private data struct */
25993 	xp = SD_GET_XBUF(bp);
25994 	ASSERT(xp != NULL);
25995 	bsp = xp->xb_private;
25996 	ASSERT(bsp != NULL);
25997 
25998 	un = SD_GET_UN(bp);
25999 	ASSERT(un != NULL);
26000 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26001 
26002 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
26003 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
26004 
26005 	/*
26006 	 * This is the write phase of a read-modify-write request, called
26007 	 * under the context of a taskq thread in response to the completion
26008 	 * of the read portion of the rmw request completing under interrupt
26009 	 * context. The write request must be sent from here down the iostart
26010 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
26011 	 * we use the layer index saved in the layer-private data area.
26012 	 */
26013 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
26014 
26015 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
26016 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
26017 }
26018 
26019 
26020 /*
26021  *    Function: sddump_do_read_of_rmw()
26022  *
26023  * Description: This routine will be called from sddump, If sddump is called
26024  *		with an I/O which not aligned on device blocksize boundary
26025  *		then the write has to be converted to read-modify-write.
26026  *		Do the read part here in order to keep sddump simple.
26027  *		Note - That the sd_mutex is held across the call to this
26028  *		routine.
26029  *
26030  *   Arguments: un	- sd_lun
26031  *		blkno	- block number in terms of media block size.
26032  *		nblk	- number of blocks.
26033  *		bpp	- pointer to pointer to the buf structure. On return
26034  *			from this function, *bpp points to the valid buffer
26035  *			to which the write has to be done.
26036  *
26037  * Return Code: 0 for success or errno-type return code
26038  */
26039 
26040 static int
26041 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
26042 	struct buf **bpp)
26043 {
26044 	int err;
26045 	int i;
26046 	int rval;
26047 	struct buf *bp;
26048 	struct scsi_pkt *pkt = NULL;
26049 	uint32_t target_blocksize;
26050 
26051 	ASSERT(un != NULL);
26052 	ASSERT(mutex_owned(SD_MUTEX(un)));
26053 
26054 	target_blocksize = un->un_tgt_blocksize;
26055 
26056 	mutex_exit(SD_MUTEX(un));
26057 
26058 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
26059 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
26060 	if (bp == NULL) {
26061 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26062 		    "no resources for dumping; giving up");
26063 		err = ENOMEM;
26064 		goto done;
26065 	}
26066 
26067 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
26068 	    blkno, nblk);
26069 	if (rval != 0) {
26070 		scsi_free_consistent_buf(bp);
26071 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26072 		    "no resources for dumping; giving up");
26073 		err = ENOMEM;
26074 		goto done;
26075 	}
26076 
26077 	pkt->pkt_flags |= FLAG_NOINTR;
26078 
26079 	err = EIO;
26080 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26081 
26082 		/*
26083 		 * Scsi_poll returns 0 (success) if the command completes and
26084 		 * the status block is STATUS_GOOD.  We should only check
26085 		 * errors if this condition is not true.  Even then we should
26086 		 * send our own request sense packet only if we have a check
26087 		 * condition and auto request sense has not been performed by
26088 		 * the hba.
26089 		 */
26090 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
26091 
26092 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
26093 			err = 0;
26094 			break;
26095 		}
26096 
26097 		/*
26098 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
26099 		 * no need to read RQS data.
26100 		 */
26101 		if (pkt->pkt_reason == CMD_DEV_GONE) {
26102 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26103 			    "Device is gone\n");
26104 			break;
26105 		}
26106 
26107 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
26108 			SD_INFO(SD_LOG_DUMP, un,
26109 			    "sddump: read failed with CHECK, try # %d\n", i);
26110 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
26111 				(void) sd_send_polled_RQS(un);
26112 			}
26113 
26114 			continue;
26115 		}
26116 
26117 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
26118 			int reset_retval = 0;
26119 
26120 			SD_INFO(SD_LOG_DUMP, un,
26121 			    "sddump: read failed with BUSY, try # %d\n", i);
26122 
26123 			if (un->un_f_lun_reset_enabled == TRUE) {
26124 				reset_retval = scsi_reset(SD_ADDRESS(un),
26125 				    RESET_LUN);
26126 			}
26127 			if (reset_retval == 0) {
26128 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26129 			}
26130 			(void) sd_send_polled_RQS(un);
26131 
26132 		} else {
26133 			SD_INFO(SD_LOG_DUMP, un,
26134 			    "sddump: read failed with 0x%x, try # %d\n",
26135 			    SD_GET_PKT_STATUS(pkt), i);
26136 			mutex_enter(SD_MUTEX(un));
26137 			sd_reset_target(un, pkt);
26138 			mutex_exit(SD_MUTEX(un));
26139 		}
26140 
26141 		/*
26142 		 * If we are not getting anywhere with lun/target resets,
26143 		 * let's reset the bus.
26144 		 */
26145 		if (i > SD_NDUMP_RETRIES/2) {
26146 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26147 			(void) sd_send_polled_RQS(un);
26148 		}
26149 
26150 	}
26151 	scsi_destroy_pkt(pkt);
26152 
26153 	if (err != 0) {
26154 		scsi_free_consistent_buf(bp);
26155 		*bpp = NULL;
26156 	} else {
26157 		*bpp = bp;
26158 	}
26159 
26160 done:
26161 	mutex_enter(SD_MUTEX(un));
26162 	return (err);
26163 }
26164 
26165 
26166 /*
26167  *    Function: sd_failfast_flushq
26168  *
26169  * Description: Take all bp's on the wait queue that have B_FAILFAST set
26170  *		in b_flags and move them onto the failfast queue, then kick
26171  *		off a thread to return all bp's on the failfast queue to
26172  *		their owners with an error set.
26173  *
26174  *   Arguments: un - pointer to the soft state struct for the instance.
26175  *
26176  *     Context: may execute in interrupt context.
26177  */
26178 
26179 static void
26180 sd_failfast_flushq(struct sd_lun *un)
26181 {
26182 	struct buf *bp;
26183 	struct buf *next_waitq_bp;
26184 	struct buf *prev_waitq_bp = NULL;
26185 
26186 	ASSERT(un != NULL);
26187 	ASSERT(mutex_owned(SD_MUTEX(un)));
26188 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
26189 	ASSERT(un->un_failfast_bp == NULL);
26190 
26191 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
26192 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
26193 
26194 	/*
26195 	 * Check if we should flush all bufs when entering failfast state, or
26196 	 * just those with B_FAILFAST set.
26197 	 */
26198 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
26199 		/*
26200 		 * Move *all* bp's on the wait queue to the failfast flush
26201 		 * queue, including those that do NOT have B_FAILFAST set.
26202 		 */
26203 		if (un->un_failfast_headp == NULL) {
26204 			ASSERT(un->un_failfast_tailp == NULL);
26205 			un->un_failfast_headp = un->un_waitq_headp;
26206 		} else {
26207 			ASSERT(un->un_failfast_tailp != NULL);
26208 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
26209 		}
26210 
26211 		un->un_failfast_tailp = un->un_waitq_tailp;
26212 
26213 		/* update kstat for each bp moved out of the waitq */
26214 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
26215 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
26216 		}
26217 
26218 		/* empty the waitq */
26219 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
26220 
26221 	} else {
26222 		/*
26223 		 * Go thru the wait queue, pick off all entries with
26224 		 * B_FAILFAST set, and move these onto the failfast queue.
26225 		 */
26226 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
26227 			/*
26228 			 * Save the pointer to the next bp on the wait queue,
26229 			 * so we get to it on the next iteration of this loop.
26230 			 */
26231 			next_waitq_bp = bp->av_forw;
26232 
26233 			/*
26234 			 * If this bp from the wait queue does NOT have
26235 			 * B_FAILFAST set, just move on to the next element
26236 			 * in the wait queue. Note, this is the only place
26237 			 * where it is correct to set prev_waitq_bp.
26238 			 */
26239 			if ((bp->b_flags & B_FAILFAST) == 0) {
26240 				prev_waitq_bp = bp;
26241 				continue;
26242 			}
26243 
26244 			/*
26245 			 * Remove the bp from the wait queue.
26246 			 */
26247 			if (bp == un->un_waitq_headp) {
26248 				/* The bp is the first element of the waitq. */
26249 				un->un_waitq_headp = next_waitq_bp;
26250 				if (un->un_waitq_headp == NULL) {
26251 					/* The wait queue is now empty */
26252 					un->un_waitq_tailp = NULL;
26253 				}
26254 			} else {
26255 				/*
26256 				 * The bp is either somewhere in the middle
26257 				 * or at the end of the wait queue.
26258 				 */
26259 				ASSERT(un->un_waitq_headp != NULL);
26260 				ASSERT(prev_waitq_bp != NULL);
26261 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
26262 				    == 0);
26263 				if (bp == un->un_waitq_tailp) {
26264 					/* bp is the last entry on the waitq. */
26265 					ASSERT(next_waitq_bp == NULL);
26266 					un->un_waitq_tailp = prev_waitq_bp;
26267 				}
26268 				prev_waitq_bp->av_forw = next_waitq_bp;
26269 			}
26270 			bp->av_forw = NULL;
26271 
26272 			/*
26273 			 * update kstat since the bp is moved out of
26274 			 * the waitq
26275 			 */
26276 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
26277 
26278 			/*
26279 			 * Now put the bp onto the failfast queue.
26280 			 */
26281 			if (un->un_failfast_headp == NULL) {
26282 				/* failfast queue is currently empty */
26283 				ASSERT(un->un_failfast_tailp == NULL);
26284 				un->un_failfast_headp =
26285 				    un->un_failfast_tailp = bp;
26286 			} else {
26287 				/* Add the bp to the end of the failfast q */
26288 				ASSERT(un->un_failfast_tailp != NULL);
26289 				ASSERT(un->un_failfast_tailp->b_flags &
26290 				    B_FAILFAST);
26291 				un->un_failfast_tailp->av_forw = bp;
26292 				un->un_failfast_tailp = bp;
26293 			}
26294 		}
26295 	}
26296 
26297 	/*
26298 	 * Now return all bp's on the failfast queue to their owners.
26299 	 */
26300 	while ((bp = un->un_failfast_headp) != NULL) {
26301 
26302 		un->un_failfast_headp = bp->av_forw;
26303 		if (un->un_failfast_headp == NULL) {
26304 			un->un_failfast_tailp = NULL;
26305 		}
26306 
26307 		/*
26308 		 * We want to return the bp with a failure error code, but
26309 		 * we do not want a call to sd_start_cmds() to occur here,
26310 		 * so use sd_return_failed_command_no_restart() instead of
26311 		 * sd_return_failed_command().
26312 		 */
26313 		sd_return_failed_command_no_restart(un, bp, EIO);
26314 	}
26315 
26316 	/* Flush the xbuf queues if required. */
26317 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
26318 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
26319 	}
26320 
26321 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
26322 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
26323 }
26324 
26325 
26326 /*
26327  *    Function: sd_failfast_flushq_callback
26328  *
26329  * Description: Return TRUE if the given bp meets the criteria for failfast
26330  *		flushing. Used with ddi_xbuf_flushq(9F).
26331  *
26332  *   Arguments: bp - ptr to buf struct to be examined.
26333  *
26334  *     Context: Any
26335  */
26336 
26337 static int
26338 sd_failfast_flushq_callback(struct buf *bp)
26339 {
26340 	/*
26341 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
26342 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
26343 	 */
26344 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
26345 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
26346 }
26347 
26348 
26349 
26350 #if defined(__i386) || defined(__amd64)
26351 /*
26352  * Function: sd_setup_next_xfer
26353  *
26354  * Description: Prepare next I/O operation using DMA_PARTIAL
26355  *
26356  */
26357 
26358 static int
26359 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
26360     struct scsi_pkt *pkt, struct sd_xbuf *xp)
26361 {
26362 	ssize_t	num_blks_not_xfered;
26363 	daddr_t	strt_blk_num;
26364 	ssize_t	bytes_not_xfered;
26365 	int	rval;
26366 
26367 	ASSERT(pkt->pkt_resid == 0);
26368 
26369 	/*
26370 	 * Calculate next block number and amount to be transferred.
26371 	 *
26372 	 * How much data NOT transfered to the HBA yet.
26373 	 */
26374 	bytes_not_xfered = xp->xb_dma_resid;
26375 
26376 	/*
26377 	 * figure how many blocks NOT transfered to the HBA yet.
26378 	 */
26379 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
26380 
26381 	/*
26382 	 * set starting block number to the end of what WAS transfered.
26383 	 */
26384 	strt_blk_num = xp->xb_blkno +
26385 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
26386 
26387 	/*
26388 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
26389 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
26390 	 * the disk mutex here.
26391 	 */
26392 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
26393 	    strt_blk_num, num_blks_not_xfered);
26394 
26395 	if (rval == 0) {
26396 
26397 		/*
26398 		 * Success.
26399 		 *
26400 		 * Adjust things if there are still more blocks to be
26401 		 * transfered.
26402 		 */
26403 		xp->xb_dma_resid = pkt->pkt_resid;
26404 		pkt->pkt_resid = 0;
26405 
26406 		return (1);
26407 	}
26408 
26409 	/*
26410 	 * There's really only one possible return value from
26411 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
26412 	 * returns NULL.
26413 	 */
26414 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
26415 
26416 	bp->b_resid = bp->b_bcount;
26417 	bp->b_flags |= B_ERROR;
26418 
26419 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26420 	    "Error setting up next portion of DMA transfer\n");
26421 
26422 	return (0);
26423 }
26424 #endif
26425 
26426 /*
26427  *    Function: sd_panic_for_res_conflict
26428  *
26429  * Description: Call panic with a string formated with "Reservation Conflict"
26430  *		and a human readable identifier indicating the SD instance
26431  *		that experienced the reservation conflict.
26432  *
26433  *   Arguments: un - pointer to the soft state struct for the instance.
26434  *
26435  *     Context: may execute in interrupt context.
26436  */
26437 
26438 #define	SD_RESV_CONFLICT_FMT_LEN 40
26439 void
26440 sd_panic_for_res_conflict(struct sd_lun *un)
26441 {
26442 	char panic_str[SD_RESV_CONFLICT_FMT_LEN+MAXPATHLEN];
26443 	char path_str[MAXPATHLEN];
26444 
26445 	(void) snprintf(panic_str, sizeof (panic_str),
26446 	    "Reservation Conflict\nDisk: %s",
26447 	    ddi_pathname(SD_DEVINFO(un), path_str));
26448 
26449 	panic(panic_str);
26450 }
26451 
26452 /*
26453  * Note: The following sd_faultinjection_ioctl( ) routines implement
26454  * driver support for handling fault injection for error analysis
26455  * causing faults in multiple layers of the driver.
26456  *
26457  */
26458 
26459 #ifdef SD_FAULT_INJECTION
26460 static uint_t   sd_fault_injection_on = 0;
26461 
26462 /*
26463  *    Function: sd_faultinjection_ioctl()
26464  *
26465  * Description: This routine is the driver entry point for handling
26466  *              faultinjection ioctls to inject errors into the
26467  *              layer model
26468  *
26469  *   Arguments: cmd	- the ioctl cmd recieved
26470  *		arg	- the arguments from user and returns
26471  */
26472 
26473 static void
26474 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
26475 
26476 	uint_t i;
26477 	uint_t rval;
26478 
26479 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
26480 
26481 	mutex_enter(SD_MUTEX(un));
26482 
26483 	switch (cmd) {
26484 	case SDIOCRUN:
26485 		/* Allow pushed faults to be injected */
26486 		SD_INFO(SD_LOG_SDTEST, un,
26487 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
26488 
26489 		sd_fault_injection_on = 1;
26490 
26491 		SD_INFO(SD_LOG_IOERR, un,
26492 		    "sd_faultinjection_ioctl: run finished\n");
26493 		break;
26494 
26495 	case SDIOCSTART:
26496 		/* Start Injection Session */
26497 		SD_INFO(SD_LOG_SDTEST, un,
26498 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
26499 
26500 		sd_fault_injection_on = 0;
26501 		un->sd_injection_mask = 0xFFFFFFFF;
26502 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
26503 			un->sd_fi_fifo_pkt[i] = NULL;
26504 			un->sd_fi_fifo_xb[i] = NULL;
26505 			un->sd_fi_fifo_un[i] = NULL;
26506 			un->sd_fi_fifo_arq[i] = NULL;
26507 		}
26508 		un->sd_fi_fifo_start = 0;
26509 		un->sd_fi_fifo_end = 0;
26510 
26511 		mutex_enter(&(un->un_fi_mutex));
26512 		un->sd_fi_log[0] = '\0';
26513 		un->sd_fi_buf_len = 0;
26514 		mutex_exit(&(un->un_fi_mutex));
26515 
26516 		SD_INFO(SD_LOG_IOERR, un,
26517 		    "sd_faultinjection_ioctl: start finished\n");
26518 		break;
26519 
26520 	case SDIOCSTOP:
26521 		/* Stop Injection Session */
26522 		SD_INFO(SD_LOG_SDTEST, un,
26523 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
26524 		sd_fault_injection_on = 0;
26525 		un->sd_injection_mask = 0x0;
26526 
26527 		/* Empty stray or unuseds structs from fifo */
26528 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
26529 			if (un->sd_fi_fifo_pkt[i] != NULL) {
26530 				kmem_free(un->sd_fi_fifo_pkt[i],
26531 				    sizeof (struct sd_fi_pkt));
26532 			}
26533 			if (un->sd_fi_fifo_xb[i] != NULL) {
26534 				kmem_free(un->sd_fi_fifo_xb[i],
26535 				    sizeof (struct sd_fi_xb));
26536 			}
26537 			if (un->sd_fi_fifo_un[i] != NULL) {
26538 				kmem_free(un->sd_fi_fifo_un[i],
26539 				    sizeof (struct sd_fi_un));
26540 			}
26541 			if (un->sd_fi_fifo_arq[i] != NULL) {
26542 				kmem_free(un->sd_fi_fifo_arq[i],
26543 				    sizeof (struct sd_fi_arq));
26544 			}
26545 			un->sd_fi_fifo_pkt[i] = NULL;
26546 			un->sd_fi_fifo_un[i] = NULL;
26547 			un->sd_fi_fifo_xb[i] = NULL;
26548 			un->sd_fi_fifo_arq[i] = NULL;
26549 		}
26550 		un->sd_fi_fifo_start = 0;
26551 		un->sd_fi_fifo_end = 0;
26552 
26553 		SD_INFO(SD_LOG_IOERR, un,
26554 		    "sd_faultinjection_ioctl: stop finished\n");
26555 		break;
26556 
26557 	case SDIOCINSERTPKT:
26558 		/* Store a packet struct to be pushed onto fifo */
26559 		SD_INFO(SD_LOG_SDTEST, un,
26560 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
26561 
26562 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26563 
26564 		sd_fault_injection_on = 0;
26565 
26566 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
26567 		if (un->sd_fi_fifo_pkt[i] != NULL) {
26568 			kmem_free(un->sd_fi_fifo_pkt[i],
26569 			    sizeof (struct sd_fi_pkt));
26570 		}
26571 		if (arg != NULL) {
26572 			un->sd_fi_fifo_pkt[i] =
26573 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
26574 			if (un->sd_fi_fifo_pkt[i] == NULL) {
26575 				/* Alloc failed don't store anything */
26576 				break;
26577 			}
26578 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
26579 			    sizeof (struct sd_fi_pkt), 0);
26580 			if (rval == -1) {
26581 				kmem_free(un->sd_fi_fifo_pkt[i],
26582 				    sizeof (struct sd_fi_pkt));
26583 				un->sd_fi_fifo_pkt[i] = NULL;
26584 			}
26585 		} else {
26586 			SD_INFO(SD_LOG_IOERR, un,
26587 			    "sd_faultinjection_ioctl: pkt null\n");
26588 		}
26589 		break;
26590 
26591 	case SDIOCINSERTXB:
26592 		/* Store a xb struct to be pushed onto fifo */
26593 		SD_INFO(SD_LOG_SDTEST, un,
26594 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
26595 
26596 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26597 
26598 		sd_fault_injection_on = 0;
26599 
26600 		if (un->sd_fi_fifo_xb[i] != NULL) {
26601 			kmem_free(un->sd_fi_fifo_xb[i],
26602 			    sizeof (struct sd_fi_xb));
26603 			un->sd_fi_fifo_xb[i] = NULL;
26604 		}
26605 		if (arg != NULL) {
26606 			un->sd_fi_fifo_xb[i] =
26607 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
26608 			if (un->sd_fi_fifo_xb[i] == NULL) {
26609 				/* Alloc failed don't store anything */
26610 				break;
26611 			}
26612 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
26613 			    sizeof (struct sd_fi_xb), 0);
26614 
26615 			if (rval == -1) {
26616 				kmem_free(un->sd_fi_fifo_xb[i],
26617 				    sizeof (struct sd_fi_xb));
26618 				un->sd_fi_fifo_xb[i] = NULL;
26619 			}
26620 		} else {
26621 			SD_INFO(SD_LOG_IOERR, un,
26622 			    "sd_faultinjection_ioctl: xb null\n");
26623 		}
26624 		break;
26625 
26626 	case SDIOCINSERTUN:
26627 		/* Store a un struct to be pushed onto fifo */
26628 		SD_INFO(SD_LOG_SDTEST, un,
26629 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
26630 
26631 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26632 
26633 		sd_fault_injection_on = 0;
26634 
26635 		if (un->sd_fi_fifo_un[i] != NULL) {
26636 			kmem_free(un->sd_fi_fifo_un[i],
26637 			    sizeof (struct sd_fi_un));
26638 			un->sd_fi_fifo_un[i] = NULL;
26639 		}
26640 		if (arg != NULL) {
26641 			un->sd_fi_fifo_un[i] =
26642 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
26643 			if (un->sd_fi_fifo_un[i] == NULL) {
26644 				/* Alloc failed don't store anything */
26645 				break;
26646 			}
26647 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
26648 			    sizeof (struct sd_fi_un), 0);
26649 			if (rval == -1) {
26650 				kmem_free(un->sd_fi_fifo_un[i],
26651 				    sizeof (struct sd_fi_un));
26652 				un->sd_fi_fifo_un[i] = NULL;
26653 			}
26654 
26655 		} else {
26656 			SD_INFO(SD_LOG_IOERR, un,
26657 			    "sd_faultinjection_ioctl: un null\n");
26658 		}
26659 
26660 		break;
26661 
26662 	case SDIOCINSERTARQ:
26663 		/* Store a arq struct to be pushed onto fifo */
26664 		SD_INFO(SD_LOG_SDTEST, un,
26665 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
26666 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26667 
26668 		sd_fault_injection_on = 0;
26669 
26670 		if (un->sd_fi_fifo_arq[i] != NULL) {
26671 			kmem_free(un->sd_fi_fifo_arq[i],
26672 			    sizeof (struct sd_fi_arq));
26673 			un->sd_fi_fifo_arq[i] = NULL;
26674 		}
26675 		if (arg != NULL) {
26676 			un->sd_fi_fifo_arq[i] =
26677 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
26678 			if (un->sd_fi_fifo_arq[i] == NULL) {
26679 				/* Alloc failed don't store anything */
26680 				break;
26681 			}
26682 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
26683 			    sizeof (struct sd_fi_arq), 0);
26684 			if (rval == -1) {
26685 				kmem_free(un->sd_fi_fifo_arq[i],
26686 				    sizeof (struct sd_fi_arq));
26687 				un->sd_fi_fifo_arq[i] = NULL;
26688 			}
26689 
26690 		} else {
26691 			SD_INFO(SD_LOG_IOERR, un,
26692 			    "sd_faultinjection_ioctl: arq null\n");
26693 		}
26694 
26695 		break;
26696 
26697 	case SDIOCPUSH:
26698 		/* Push stored xb, pkt, un, and arq onto fifo */
26699 		sd_fault_injection_on = 0;
26700 
26701 		if (arg != NULL) {
26702 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
26703 			if (rval != -1 &&
26704 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
26705 				un->sd_fi_fifo_end += i;
26706 			}
26707 		} else {
26708 			SD_INFO(SD_LOG_IOERR, un,
26709 			    "sd_faultinjection_ioctl: push arg null\n");
26710 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
26711 				un->sd_fi_fifo_end++;
26712 			}
26713 		}
26714 		SD_INFO(SD_LOG_IOERR, un,
26715 		    "sd_faultinjection_ioctl: push to end=%d\n",
26716 		    un->sd_fi_fifo_end);
26717 		break;
26718 
26719 	case SDIOCRETRIEVE:
26720 		/* Return buffer of log from Injection session */
26721 		SD_INFO(SD_LOG_SDTEST, un,
26722 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
26723 
26724 		sd_fault_injection_on = 0;
26725 
26726 		mutex_enter(&(un->un_fi_mutex));
26727 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
26728 		    un->sd_fi_buf_len+1, 0);
26729 		mutex_exit(&(un->un_fi_mutex));
26730 
26731 		if (rval == -1) {
26732 			/*
26733 			 * arg is possibly invalid setting
26734 			 * it to NULL for return
26735 			 */
26736 			arg = NULL;
26737 		}
26738 		break;
26739 	}
26740 
26741 	mutex_exit(SD_MUTEX(un));
26742 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
26743 			    " exit\n");
26744 }
26745 
26746 
26747 /*
26748  *    Function: sd_injection_log()
26749  *
26750  * Description: This routine adds buff to the already existing injection log
26751  *              for retrieval via faultinjection_ioctl for use in fault
26752  *              detection and recovery
26753  *
26754  *   Arguments: buf - the string to add to the log
26755  */
26756 
26757 static void
26758 sd_injection_log(char *buf, struct sd_lun *un)
26759 {
26760 	uint_t len;
26761 
26762 	ASSERT(un != NULL);
26763 	ASSERT(buf != NULL);
26764 
26765 	mutex_enter(&(un->un_fi_mutex));
26766 
26767 	len = min(strlen(buf), 255);
26768 	/* Add logged value to Injection log to be returned later */
26769 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
26770 		uint_t	offset = strlen((char *)un->sd_fi_log);
26771 		char *destp = (char *)un->sd_fi_log + offset;
26772 		int i;
26773 		for (i = 0; i < len; i++) {
26774 			*destp++ = *buf++;
26775 		}
26776 		un->sd_fi_buf_len += len;
26777 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
26778 	}
26779 
26780 	mutex_exit(&(un->un_fi_mutex));
26781 }
26782 
26783 
26784 /*
26785  *    Function: sd_faultinjection()
26786  *
26787  * Description: This routine takes the pkt and changes its
26788  *		content based on error injection scenerio.
26789  *
26790  *   Arguments: pktp	- packet to be changed
26791  */
26792 
26793 static void
26794 sd_faultinjection(struct scsi_pkt *pktp)
26795 {
26796 	uint_t i;
26797 	struct sd_fi_pkt *fi_pkt;
26798 	struct sd_fi_xb *fi_xb;
26799 	struct sd_fi_un *fi_un;
26800 	struct sd_fi_arq *fi_arq;
26801 	struct buf *bp;
26802 	struct sd_xbuf *xb;
26803 	struct sd_lun *un;
26804 
26805 	ASSERT(pktp != NULL);
26806 
26807 	/* pull bp xb and un from pktp */
26808 	bp = (struct buf *)pktp->pkt_private;
26809 	xb = SD_GET_XBUF(bp);
26810 	un = SD_GET_UN(bp);
26811 
26812 	ASSERT(un != NULL);
26813 
26814 	mutex_enter(SD_MUTEX(un));
26815 
26816 	SD_TRACE(SD_LOG_SDTEST, un,
26817 	    "sd_faultinjection: entry Injection from sdintr\n");
26818 
26819 	/* if injection is off return */
26820 	if (sd_fault_injection_on == 0 ||
26821 		un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
26822 		mutex_exit(SD_MUTEX(un));
26823 		return;
26824 	}
26825 
26826 
26827 	/* take next set off fifo */
26828 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
26829 
26830 	fi_pkt = un->sd_fi_fifo_pkt[i];
26831 	fi_xb = un->sd_fi_fifo_xb[i];
26832 	fi_un = un->sd_fi_fifo_un[i];
26833 	fi_arq = un->sd_fi_fifo_arq[i];
26834 
26835 
26836 	/* set variables accordingly */
26837 	/* set pkt if it was on fifo */
26838 	if (fi_pkt != NULL) {
26839 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
26840 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
26841 		SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
26842 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
26843 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
26844 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
26845 
26846 	}
26847 
26848 	/* set xb if it was on fifo */
26849 	if (fi_xb != NULL) {
26850 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
26851 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
26852 		SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
26853 		SD_CONDSET(xb, xb, xb_victim_retry_count,
26854 		    "xb_victim_retry_count");
26855 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
26856 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
26857 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
26858 
26859 		/* copy in block data from sense */
26860 		if (fi_xb->xb_sense_data[0] != -1) {
26861 			bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
26862 			    SENSE_LENGTH);
26863 		}
26864 
26865 		/* copy in extended sense codes */
26866 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_code,
26867 		    "es_code");
26868 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_key,
26869 		    "es_key");
26870 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_add_code,
26871 		    "es_add_code");
26872 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb,
26873 		    es_qual_code, "es_qual_code");
26874 	}
26875 
26876 	/* set un if it was on fifo */
26877 	if (fi_un != NULL) {
26878 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
26879 		SD_CONDSET(un, un, un_ctype, "un_ctype");
26880 		SD_CONDSET(un, un, un_reset_retry_count,
26881 		    "un_reset_retry_count");
26882 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
26883 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
26884 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
26885 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
26886 		    "un_f_allow_bus_device_reset");
26887 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
26888 
26889 	}
26890 
26891 	/* copy in auto request sense if it was on fifo */
26892 	if (fi_arq != NULL) {
26893 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
26894 	}
26895 
26896 	/* free structs */
26897 	if (un->sd_fi_fifo_pkt[i] != NULL) {
26898 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
26899 	}
26900 	if (un->sd_fi_fifo_xb[i] != NULL) {
26901 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
26902 	}
26903 	if (un->sd_fi_fifo_un[i] != NULL) {
26904 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
26905 	}
26906 	if (un->sd_fi_fifo_arq[i] != NULL) {
26907 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
26908 	}
26909 
26910 	/*
26911 	 * kmem_free does not gurantee to set to NULL
26912 	 * since we uses these to determine if we set
26913 	 * values or not lets confirm they are always
26914 	 * NULL after free
26915 	 */
26916 	un->sd_fi_fifo_pkt[i] = NULL;
26917 	un->sd_fi_fifo_un[i] = NULL;
26918 	un->sd_fi_fifo_xb[i] = NULL;
26919 	un->sd_fi_fifo_arq[i] = NULL;
26920 
26921 	un->sd_fi_fifo_start++;
26922 
26923 	mutex_exit(SD_MUTEX(un));
26924 
26925 	SD_TRACE(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
26926 }
26927 
26928 #endif /* SD_FAULT_INJECTION */
26929 
26930 /*
26931  * This routine is invoked in sd_unit_attach(). Before calling it, the
26932  * properties in conf file should be processed already, and "hotpluggable"
26933  * property was processed also.
26934  *
26935  * The sd driver distinguishes 3 different type of devices: removable media,
26936  * non-removable media, and hotpluggable. Below the differences are defined:
26937  *
26938  * 1. Device ID
26939  *
26940  *     The device ID of a device is used to identify this device. Refer to
26941  *     ddi_devid_register(9F).
26942  *
26943  *     For a non-removable media disk device which can provide 0x80 or 0x83
26944  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
26945  *     device ID is created to identify this device. For other non-removable
26946  *     media devices, a default device ID is created only if this device has
26947  *     at least 2 alter cylinders. Otherwise, this device has no devid.
26948  *
26949  *     -------------------------------------------------------
26950  *     removable media   hotpluggable  | Can Have Device ID
26951  *     -------------------------------------------------------
26952  *         false             false     |     Yes
26953  *         false             true      |     Yes
26954  *         true                x       |     No
26955  *     ------------------------------------------------------
26956  *
26957  *
26958  * 2. SCSI group 4 commands
26959  *
26960  *     In SCSI specs, only some commands in group 4 command set can use
26961  *     8-byte addresses that can be used to access >2TB storage spaces.
26962  *     Other commands have no such capability. Without supporting group4,
26963  *     it is impossible to make full use of storage spaces of a disk with
26964  *     capacity larger than 2TB.
26965  *
26966  *     -----------------------------------------------
26967  *     removable media   hotpluggable   LP64  |  Group
26968  *     -----------------------------------------------
26969  *           false          false       false |   1
26970  *           false          false       true  |   4
26971  *           false          true        false |   1
26972  *           false          true        true  |   4
26973  *           true             x           x   |   5
26974  *     -----------------------------------------------
26975  *
26976  *
26977  * 3. Check for VTOC Label
26978  *
26979  *     If a direct-access disk has no EFI label, sd will check if it has a
26980  *     valid VTOC label. Now, sd also does that check for removable media
26981  *     and hotpluggable devices.
26982  *
26983  *     --------------------------------------------------------------
26984  *     Direct-Access   removable media    hotpluggable |  Check Label
26985  *     -------------------------------------------------------------
26986  *         false          false           false        |   No
26987  *         false          false           true         |   No
26988  *         false          true            false        |   Yes
26989  *         false          true            true         |   Yes
26990  *         true            x                x          |   Yes
26991  *     --------------------------------------------------------------
26992  *
26993  *
26994  * 4. Building default VTOC label
26995  *
26996  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
26997  *     If those devices have no valid VTOC label, sd(7d) will attempt to
26998  *     create default VTOC for them. Currently sd creates default VTOC label
26999  *     for all devices on x86 platform (VTOC_16), but only for removable
27000  *     media devices on SPARC (VTOC_8).
27001  *
27002  *     -----------------------------------------------------------
27003  *       removable media hotpluggable platform   |   Default Label
27004  *     -----------------------------------------------------------
27005  *             false          false    sparc     |     No
27006  *             false          true      x86      |     Yes
27007  *             false          true     sparc     |     Yes
27008  *             true             x        x       |     Yes
27009  *     ----------------------------------------------------------
27010  *
27011  *
27012  * 5. Supported blocksizes of target devices
27013  *
27014  *     Sd supports non-512-byte blocksize for removable media devices only.
27015  *     For other devices, only 512-byte blocksize is supported. This may be
27016  *     changed in near future because some RAID devices require non-512-byte
27017  *     blocksize
27018  *
27019  *     -----------------------------------------------------------
27020  *     removable media    hotpluggable    | non-512-byte blocksize
27021  *     -----------------------------------------------------------
27022  *           false          false         |   No
27023  *           false          true          |   No
27024  *           true             x           |   Yes
27025  *     -----------------------------------------------------------
27026  *
27027  *
27028  * 6. Automatic mount & unmount
27029  *
27030  *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
27031  *     if a device is removable media device. It return 1 for removable media
27032  *     devices, and 0 for others.
27033  *
27034  *     The automatic mounting subsystem should distinguish between the types
27035  *     of devices and apply automounting policies to each.
27036  *
27037  *
27038  * 7. fdisk partition management
27039  *
27040  *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
27041  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
27042  *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
27043  *     fdisk partitions on both x86 and SPARC platform.
27044  *
27045  *     -----------------------------------------------------------
27046  *       platform   removable media  USB/1394  |  fdisk supported
27047  *     -----------------------------------------------------------
27048  *        x86         X               X        |       true
27049  *     ------------------------------------------------------------
27050  *        sparc       X               X        |       false
27051  *     ------------------------------------------------------------
27052  *
27053  *
27054  * 8. MBOOT/MBR
27055  *
27056  *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
27057  *     read/write mboot for removable media devices on sparc platform.
27058  *
27059  *     -----------------------------------------------------------
27060  *       platform   removable media  USB/1394  |  mboot supported
27061  *     -----------------------------------------------------------
27062  *        x86         X               X        |       true
27063  *     ------------------------------------------------------------
27064  *        sparc      false           false     |       false
27065  *        sparc      false           true      |       true
27066  *        sparc      true            false     |       true
27067  *        sparc      true            true      |       true
27068  *     ------------------------------------------------------------
27069  *
27070  *
27071  * 9.  error handling during opening device
27072  *
27073  *     If failed to open a disk device, an errno is returned. For some kinds
27074  *     of errors, different errno is returned depending on if this device is
27075  *     a removable media device. This brings USB/1394 hard disks in line with
27076  *     expected hard disk behavior. It is not expected that this breaks any
27077  *     application.
27078  *
27079  *     ------------------------------------------------------
27080  *       removable media    hotpluggable   |  errno
27081  *     ------------------------------------------------------
27082  *             false          false        |   EIO
27083  *             false          true         |   EIO
27084  *             true             x          |   ENXIO
27085  *     ------------------------------------------------------
27086  *
27087  *
27088  * 11. ioctls: DKIOCEJECT, CDROMEJECT
27089  *
27090  *     These IOCTLs are applicable only to removable media devices.
27091  *
27092  *     -----------------------------------------------------------
27093  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
27094  *     -----------------------------------------------------------
27095  *             false          false        |     No
27096  *             false          true         |     No
27097  *             true            x           |     Yes
27098  *     -----------------------------------------------------------
27099  *
27100  *
27101  * 12. Kstats for partitions
27102  *
27103  *     sd creates partition kstat for non-removable media devices. USB and
27104  *     Firewire hard disks now have partition kstats
27105  *
27106  *      ------------------------------------------------------
27107  *       removable media    hotplugable    |   kstat
27108  *      ------------------------------------------------------
27109  *             false          false        |    Yes
27110  *             false          true         |    Yes
27111  *             true             x          |    No
27112  *       ------------------------------------------------------
27113  *
27114  *
27115  * 13. Removable media & hotpluggable properties
27116  *
27117  *     Sd driver creates a "removable-media" property for removable media
27118  *     devices. Parent nexus drivers create a "hotpluggable" property if
27119  *     it supports hotplugging.
27120  *
27121  *     ---------------------------------------------------------------------
27122  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
27123  *     ---------------------------------------------------------------------
27124  *       false            false       |    No                   No
27125  *       false            true        |    No                   Yes
27126  *       true             false       |    Yes                  No
27127  *       true             true        |    Yes                  Yes
27128  *     ---------------------------------------------------------------------
27129  *
27130  *
27131  * 14. Power Management
27132  *
27133  *     sd only power manages removable media devices or devices that support
27134  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
27135  *
27136  *     A parent nexus that supports hotplugging can also set "pm-capable"
27137  *     if the disk can be power managed.
27138  *
27139  *     ------------------------------------------------------------
27140  *       removable media hotpluggable pm-capable  |   power manage
27141  *     ------------------------------------------------------------
27142  *             false          false     false     |     No
27143  *             false          false     true      |     Yes
27144  *             false          true      false     |     No
27145  *             false          true      true      |     Yes
27146  *             true             x        x        |     Yes
27147  *     ------------------------------------------------------------
27148  *
27149  *      USB and firewire hard disks can now be power managed independently
27150  *      of the framebuffer
27151  *
27152  *
27153  * 15. Support for USB disks with capacity larger than 1TB
27154  *
27155  *     Currently, sd doesn't permit a fixed disk device with capacity
27156  *     larger than 1TB to be used in a 32-bit operating system environment.
27157  *     However, sd doesn't do that for removable media devices. Instead, it
27158  *     assumes that removable media devices cannot have a capacity larger
27159  *     than 1TB. Therefore, using those devices on 32-bit system is partially
27160  *     supported, which can cause some unexpected results.
27161  *
27162  *     ---------------------------------------------------------------------
27163  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
27164  *     ---------------------------------------------------------------------
27165  *             false          false  |   true         |     no
27166  *             false          true   |   true         |     no
27167  *             true           false  |   true         |     Yes
27168  *             true           true   |   true         |     Yes
27169  *     ---------------------------------------------------------------------
27170  *
27171  *
27172  * 16. Check write-protection at open time
27173  *
27174  *     When a removable media device is being opened for writing without NDELAY
27175  *     flag, sd will check if this device is writable. If attempting to open
27176  *     without NDELAY flag a write-protected device, this operation will abort.
27177  *
27178  *     ------------------------------------------------------------
27179  *       removable media    USB/1394   |   WP Check
27180  *     ------------------------------------------------------------
27181  *             false          false    |     No
27182  *             false          true     |     No
27183  *             true           false    |     Yes
27184  *             true           true     |     Yes
27185  *     ------------------------------------------------------------
27186  *
27187  *
27188  * 17. syslog when corrupted VTOC is encountered
27189  *
27190  *      Currently, if an invalid VTOC is encountered, sd only print syslog
27191  *      for fixed SCSI disks.
27192  *     ------------------------------------------------------------
27193  *       removable media    USB/1394   |   print syslog
27194  *     ------------------------------------------------------------
27195  *             false          false    |     Yes
27196  *             false          true     |     No
27197  *             true           false    |     No
27198  *             true           true     |     No
27199  *     ------------------------------------------------------------
27200  */
27201 static void
27202 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
27203 {
27204 	int	pm_capable_prop;
27205 
27206 	ASSERT(un->un_sd);
27207 	ASSERT(un->un_sd->sd_inq);
27208 
27209 	/*
27210 	 * Enable SYNC CACHE support for all devices.
27211 	 */
27212 	un->un_f_sync_cache_supported = TRUE;
27213 
27214 	if (un->un_sd->sd_inq->inq_rmb) {
27215 		/*
27216 		 * The media of this device is removable. And for this kind
27217 		 * of devices, it is possible to change medium after opening
27218 		 * devices. Thus we should support this operation.
27219 		 */
27220 		un->un_f_has_removable_media = TRUE;
27221 
27222 		/*
27223 		 * support non-512-byte blocksize of removable media devices
27224 		 */
27225 		un->un_f_non_devbsize_supported = TRUE;
27226 
27227 		/*
27228 		 * Assume that all removable media devices support DOOR_LOCK
27229 		 */
27230 		un->un_f_doorlock_supported = TRUE;
27231 
27232 		/*
27233 		 * For a removable media device, it is possible to be opened
27234 		 * with NDELAY flag when there is no media in drive, in this
27235 		 * case we don't care if device is writable. But if without
27236 		 * NDELAY flag, we need to check if media is write-protected.
27237 		 */
27238 		un->un_f_chk_wp_open = TRUE;
27239 
27240 		/*
27241 		 * need to start a SCSI watch thread to monitor media state,
27242 		 * when media is being inserted or ejected, notify syseventd.
27243 		 */
27244 		un->un_f_monitor_media_state = TRUE;
27245 
27246 		/*
27247 		 * Some devices don't support START_STOP_UNIT command.
27248 		 * Therefore, we'd better check if a device supports it
27249 		 * before sending it.
27250 		 */
27251 		un->un_f_check_start_stop = TRUE;
27252 
27253 		/*
27254 		 * support eject media ioctl:
27255 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
27256 		 */
27257 		un->un_f_eject_media_supported = TRUE;
27258 
27259 		/*
27260 		 * Because many removable-media devices don't support
27261 		 * LOG_SENSE, we couldn't use this command to check if
27262 		 * a removable media device support power-management.
27263 		 * We assume that they support power-management via
27264 		 * START_STOP_UNIT command and can be spun up and down
27265 		 * without limitations.
27266 		 */
27267 		un->un_f_pm_supported = TRUE;
27268 
27269 		/*
27270 		 * Need to create a zero length (Boolean) property
27271 		 * removable-media for the removable media devices.
27272 		 * Note that the return value of the property is not being
27273 		 * checked, since if unable to create the property
27274 		 * then do not want the attach to fail altogether. Consistent
27275 		 * with other property creation in attach.
27276 		 */
27277 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
27278 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
27279 
27280 	} else {
27281 		/*
27282 		 * create device ID for device
27283 		 */
27284 		un->un_f_devid_supported = TRUE;
27285 
27286 		/*
27287 		 * Spin up non-removable-media devices once it is attached
27288 		 */
27289 		un->un_f_attach_spinup = TRUE;
27290 
27291 		/*
27292 		 * According to SCSI specification, Sense data has two kinds of
27293 		 * format: fixed format, and descriptor format. At present, we
27294 		 * don't support descriptor format sense data for removable
27295 		 * media.
27296 		 */
27297 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
27298 			un->un_f_descr_format_supported = TRUE;
27299 		}
27300 
27301 		/*
27302 		 * kstats are created only for non-removable media devices.
27303 		 *
27304 		 * Set this in sd.conf to 0 in order to disable kstats.  The
27305 		 * default is 1, so they are enabled by default.
27306 		 */
27307 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
27308 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
27309 			"enable-partition-kstats", 1));
27310 
27311 		/*
27312 		 * Check if HBA has set the "pm-capable" property.
27313 		 * If "pm-capable" exists and is non-zero then we can
27314 		 * power manage the device without checking the start/stop
27315 		 * cycle count log sense page.
27316 		 *
27317 		 * If "pm-capable" exists and is SD_PM_CAPABLE_FALSE (0)
27318 		 * then we should not power manage the device.
27319 		 *
27320 		 * If "pm-capable" doesn't exist then pm_capable_prop will
27321 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
27322 		 * sd will check the start/stop cycle count log sense page
27323 		 * and power manage the device if the cycle count limit has
27324 		 * not been exceeded.
27325 		 */
27326 		pm_capable_prop = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
27327 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
27328 		if (pm_capable_prop == SD_PM_CAPABLE_UNDEFINED) {
27329 			un->un_f_log_sense_supported = TRUE;
27330 		} else {
27331 			/*
27332 			 * pm-capable property exists.
27333 			 *
27334 			 * Convert "TRUE" values for pm_capable_prop to
27335 			 * SD_PM_CAPABLE_TRUE (1) to make it easier to check
27336 			 * later. "TRUE" values are any values except
27337 			 * SD_PM_CAPABLE_FALSE (0) and
27338 			 * SD_PM_CAPABLE_UNDEFINED (-1)
27339 			 */
27340 			if (pm_capable_prop == SD_PM_CAPABLE_FALSE) {
27341 				un->un_f_log_sense_supported = FALSE;
27342 			} else {
27343 				un->un_f_pm_supported = TRUE;
27344 			}
27345 
27346 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
27347 			    "sd_unit_attach: un:0x%p pm-capable "
27348 			    "property set to %d.\n", un, un->un_f_pm_supported);
27349 		}
27350 	}
27351 
27352 	if (un->un_f_is_hotpluggable) {
27353 
27354 		/*
27355 		 * Have to watch hotpluggable devices as well, since
27356 		 * that's the only way for userland applications to
27357 		 * detect hot removal while device is busy/mounted.
27358 		 */
27359 		un->un_f_monitor_media_state = TRUE;
27360 
27361 		un->un_f_check_start_stop = TRUE;
27362 
27363 	}
27364 }
27365 
27366 /*
27367  * sd_tg_rdwr:
27368  * Provides rdwr access for cmlb via sd_tgops. The start_block is
27369  * in sys block size, req_length in bytes.
27370  *
27371  */
27372 static int
27373 sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
27374     diskaddr_t start_block, size_t reqlength, void *tg_cookie)
27375 {
27376 	struct sd_lun *un;
27377 	int path_flag = (int)(uintptr_t)tg_cookie;
27378 	char *dkl = NULL;
27379 	diskaddr_t real_addr = start_block;
27380 	diskaddr_t first_byte, end_block;
27381 
27382 	size_t	buffer_size = reqlength;
27383 	int rval;
27384 	diskaddr_t	cap;
27385 	uint32_t	lbasize;
27386 
27387 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
27388 	if (un == NULL)
27389 		return (ENXIO);
27390 
27391 	if (cmd != TG_READ && cmd != TG_WRITE)
27392 		return (EINVAL);
27393 
27394 	mutex_enter(SD_MUTEX(un));
27395 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
27396 		mutex_exit(SD_MUTEX(un));
27397 		rval = sd_send_scsi_READ_CAPACITY(un, (uint64_t *)&cap,
27398 		    &lbasize, path_flag);
27399 		if (rval != 0)
27400 			return (rval);
27401 		mutex_enter(SD_MUTEX(un));
27402 		sd_update_block_info(un, lbasize, cap);
27403 		if ((un->un_f_tgt_blocksize_is_valid == FALSE)) {
27404 			mutex_exit(SD_MUTEX(un));
27405 			return (EIO);
27406 		}
27407 	}
27408 
27409 	if (NOT_DEVBSIZE(un)) {
27410 		/*
27411 		 * sys_blocksize != tgt_blocksize, need to re-adjust
27412 		 * blkno and save the index to beginning of dk_label
27413 		 */
27414 		first_byte  = SD_SYSBLOCKS2BYTES(un, start_block);
27415 		real_addr = first_byte / un->un_tgt_blocksize;
27416 
27417 		end_block = (first_byte + reqlength +
27418 		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
27419 
27420 		/* round up buffer size to multiple of target block size */
27421 		buffer_size = (end_block - real_addr) * un->un_tgt_blocksize;
27422 
27423 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_tg_rdwr",
27424 		    "label_addr: 0x%x allocation size: 0x%x\n",
27425 		    real_addr, buffer_size);
27426 
27427 		if (((first_byte % un->un_tgt_blocksize) != 0) ||
27428 		    (reqlength % un->un_tgt_blocksize) != 0)
27429 			/* the request is not aligned */
27430 			dkl = kmem_zalloc(buffer_size, KM_SLEEP);
27431 	}
27432 
27433 	/*
27434 	 * The MMC standard allows READ CAPACITY to be
27435 	 * inaccurate by a bounded amount (in the interest of
27436 	 * response latency).  As a result, failed READs are
27437 	 * commonplace (due to the reading of metadata and not
27438 	 * data). Depending on the per-Vendor/drive Sense data,
27439 	 * the failed READ can cause many (unnecessary) retries.
27440 	 */
27441 
27442 	if (ISCD(un) && (cmd == TG_READ) &&
27443 	    (un->un_f_blockcount_is_valid == TRUE) &&
27444 	    ((start_block == (un->un_blockcount - 1))||
27445 	    (start_block == (un->un_blockcount - 2)))) {
27446 			path_flag = SD_PATH_DIRECT_PRIORITY;
27447 	}
27448 
27449 	mutex_exit(SD_MUTEX(un));
27450 	if (cmd == TG_READ) {
27451 		rval = sd_send_scsi_READ(un, (dkl != NULL)? dkl: bufaddr,
27452 		    buffer_size, real_addr, path_flag);
27453 		if (dkl != NULL)
27454 			bcopy(dkl + SD_TGTBYTEOFFSET(un, start_block,
27455 			    real_addr), bufaddr, reqlength);
27456 	} else {
27457 		if (dkl) {
27458 			rval = sd_send_scsi_READ(un, dkl, buffer_size,
27459 			    real_addr, path_flag);
27460 			if (rval) {
27461 				kmem_free(dkl, buffer_size);
27462 				return (rval);
27463 			}
27464 			bcopy(bufaddr, dkl + SD_TGTBYTEOFFSET(un, start_block,
27465 			    real_addr), reqlength);
27466 		}
27467 		rval = sd_send_scsi_WRITE(un, (dkl != NULL)? dkl: bufaddr,
27468 		    buffer_size, real_addr, path_flag);
27469 	}
27470 
27471 	if (dkl != NULL)
27472 		kmem_free(dkl, buffer_size);
27473 
27474 	return (rval);
27475 }
27476 
27477 
27478 static int
27479 sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie)
27480 {
27481 
27482 	struct sd_lun *un;
27483 	diskaddr_t	cap;
27484 	uint32_t	lbasize;
27485 	int		path_flag = (int)(uintptr_t)tg_cookie;
27486 	int		ret = 0;
27487 
27488 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
27489 	if (un == NULL)
27490 		return (ENXIO);
27491 
27492 	switch (cmd) {
27493 	case TG_GETPHYGEOM:
27494 	case TG_GETVIRTGEOM:
27495 	case TG_GETCAPACITY:
27496 	case  TG_GETBLOCKSIZE:
27497 		mutex_enter(SD_MUTEX(un));
27498 
27499 		if ((un->un_f_blockcount_is_valid == TRUE) &&
27500 		    (un->un_f_tgt_blocksize_is_valid == TRUE)) {
27501 			cap = un->un_blockcount;
27502 			lbasize = un->un_tgt_blocksize;
27503 			mutex_exit(SD_MUTEX(un));
27504 		} else {
27505 			mutex_exit(SD_MUTEX(un));
27506 			ret = sd_send_scsi_READ_CAPACITY(un, (uint64_t *)&cap,
27507 			    &lbasize, path_flag);
27508 			if (ret != 0)
27509 				return (ret);
27510 			mutex_enter(SD_MUTEX(un));
27511 			sd_update_block_info(un, lbasize, cap);
27512 			if ((un->un_f_blockcount_is_valid == FALSE) ||
27513 			    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
27514 				mutex_exit(SD_MUTEX(un));
27515 				return (EIO);
27516 			}
27517 			mutex_exit(SD_MUTEX(un));
27518 		}
27519 
27520 		if (cmd == TG_GETCAPACITY) {
27521 			*(diskaddr_t *)arg = cap;
27522 			return (0);
27523 		}
27524 
27525 		if (cmd == TG_GETBLOCKSIZE) {
27526 			*(uint32_t *)arg = lbasize;
27527 			return (0);
27528 		}
27529 
27530 		if (cmd == TG_GETPHYGEOM)
27531 			ret = sd_get_physical_geometry(un, (cmlb_geom_t *)arg,
27532 			    cap, lbasize, path_flag);
27533 		else
27534 			/* TG_GETVIRTGEOM */
27535 			ret = sd_get_virtual_geometry(un,
27536 			    (cmlb_geom_t *)arg, cap, lbasize);
27537 
27538 		return (ret);
27539 
27540 	case TG_GETATTR:
27541 		mutex_enter(SD_MUTEX(un));
27542 		((tg_attribute_t *)arg)->media_is_writable =
27543 		    un->un_f_mmc_writable_media;
27544 		mutex_exit(SD_MUTEX(un));
27545 		return (0);
27546 	default:
27547 		return (ENOTTY);
27548 
27549 	}
27550 
27551 }
27552