xref: /illumos-gate/usr/src/uts/common/io/scsi/targets/sd.c (revision a5b881a79e40ec2c21d682e676b130a1ee3d2a73)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * SCSI disk target driver.
30  */
31 #include <sys/scsi/scsi.h>
32 #include <sys/dkbad.h>
33 #include <sys/dklabel.h>
34 #include <sys/dkio.h>
35 #include <sys/fdio.h>
36 #include <sys/cdio.h>
37 #include <sys/mhd.h>
38 #include <sys/vtoc.h>
39 #include <sys/dktp/fdisk.h>
40 #include <sys/kstat.h>
41 #include <sys/vtrace.h>
42 #include <sys/note.h>
43 #include <sys/thread.h>
44 #include <sys/proc.h>
45 #include <sys/efi_partition.h>
46 #include <sys/var.h>
47 #include <sys/aio_req.h>
48 
49 #ifdef __lock_lint
50 #define	_LP64
51 #define	__amd64
52 #endif
53 
54 #if (defined(__fibre))
55 /* Note: is there a leadville version of the following? */
56 #include <sys/fc4/fcal_linkapp.h>
57 #endif
58 #include <sys/taskq.h>
59 #include <sys/uuid.h>
60 #include <sys/byteorder.h>
61 #include <sys/sdt.h>
62 
63 #include "sd_xbuf.h"
64 
65 #include <sys/scsi/targets/sddef.h>
66 #include <sys/cmlb.h>
67 
68 
69 /*
70  * Loadable module info.
71  */
72 #if (defined(__fibre))
73 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver %I%"
74 char _depends_on[]	= "misc/scsi misc/cmlb drv/fcp";
75 #else
76 #define	SD_MODULE_NAME	"SCSI Disk Driver %I%"
77 char _depends_on[]	= "misc/scsi misc/cmlb";
78 #endif
79 
80 /*
81  * Define the interconnect type, to allow the driver to distinguish
82  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
83  *
84  * This is really for backward compatability. In the future, the driver
85  * should actually check the "interconnect-type" property as reported by
86  * the HBA; however at present this property is not defined by all HBAs,
87  * so we will use this #define (1) to permit the driver to run in
88  * backward-compatability mode; and (2) to print a notification message
89  * if an FC HBA does not support the "interconnect-type" property.  The
90  * behavior of the driver will be to assume parallel SCSI behaviors unless
91  * the "interconnect-type" property is defined by the HBA **AND** has a
92  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
93  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
94  * Channel behaviors (as per the old ssd).  (Note that the
95  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
96  * will result in the driver assuming parallel SCSI behaviors.)
97  *
98  * (see common/sys/scsi/impl/services.h)
99  *
100  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
101  * since some FC HBAs may already support that, and there is some code in
102  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
103  * default would confuse that code, and besides things should work fine
104  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
105  * "interconnect_type" property.
106  *
107  */
108 #if (defined(__fibre))
109 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
110 #else
111 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
112 #endif
113 
114 /*
115  * The name of the driver, established from the module name in _init.
116  */
117 static	char *sd_label			= NULL;
118 
119 /*
120  * Driver name is unfortunately prefixed on some driver.conf properties.
121  */
122 #if (defined(__fibre))
123 #define	sd_max_xfer_size		ssd_max_xfer_size
124 #define	sd_config_list			ssd_config_list
125 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
126 static	char *sd_config_list		= "ssd-config-list";
127 #else
128 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
129 static	char *sd_config_list		= "sd-config-list";
130 #endif
131 
132 /*
133  * Driver global variables
134  */
135 
136 #if (defined(__fibre))
137 /*
138  * These #defines are to avoid namespace collisions that occur because this
139  * code is currently used to compile two seperate driver modules: sd and ssd.
140  * All global variables need to be treated this way (even if declared static)
141  * in order to allow the debugger to resolve the names properly.
142  * It is anticipated that in the near future the ssd module will be obsoleted,
143  * at which time this namespace issue should go away.
144  */
145 #define	sd_state			ssd_state
146 #define	sd_io_time			ssd_io_time
147 #define	sd_failfast_enable		ssd_failfast_enable
148 #define	sd_ua_retry_count		ssd_ua_retry_count
149 #define	sd_report_pfa			ssd_report_pfa
150 #define	sd_max_throttle			ssd_max_throttle
151 #define	sd_min_throttle			ssd_min_throttle
152 #define	sd_rot_delay			ssd_rot_delay
153 
154 #define	sd_retry_on_reservation_conflict	\
155 					ssd_retry_on_reservation_conflict
156 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
157 #define	sd_resv_conflict_name		ssd_resv_conflict_name
158 
159 #define	sd_component_mask		ssd_component_mask
160 #define	sd_level_mask			ssd_level_mask
161 #define	sd_debug_un			ssd_debug_un
162 #define	sd_error_level			ssd_error_level
163 
164 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
165 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
166 
167 #define	sd_tr				ssd_tr
168 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
169 #define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
170 #define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
171 #define	sd_check_media_time		ssd_check_media_time
172 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
173 #define	sd_label_mutex			ssd_label_mutex
174 #define	sd_detach_mutex			ssd_detach_mutex
175 #define	sd_log_buf			ssd_log_buf
176 #define	sd_log_mutex			ssd_log_mutex
177 
178 #define	sd_disk_table			ssd_disk_table
179 #define	sd_disk_table_size		ssd_disk_table_size
180 #define	sd_sense_mutex			ssd_sense_mutex
181 #define	sd_cdbtab			ssd_cdbtab
182 
183 #define	sd_cb_ops			ssd_cb_ops
184 #define	sd_ops				ssd_ops
185 #define	sd_additional_codes		ssd_additional_codes
186 #define	sd_tgops			ssd_tgops
187 
188 #define	sd_minor_data			ssd_minor_data
189 #define	sd_minor_data_efi		ssd_minor_data_efi
190 
191 #define	sd_tq				ssd_tq
192 #define	sd_wmr_tq			ssd_wmr_tq
193 #define	sd_taskq_name			ssd_taskq_name
194 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
195 #define	sd_taskq_minalloc		ssd_taskq_minalloc
196 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
197 
198 #define	sd_dump_format_string		ssd_dump_format_string
199 
200 #define	sd_iostart_chain		ssd_iostart_chain
201 #define	sd_iodone_chain			ssd_iodone_chain
202 
203 #define	sd_pm_idletime			ssd_pm_idletime
204 
205 #define	sd_force_pm_supported		ssd_force_pm_supported
206 
207 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
208 
209 #endif
210 
211 
212 #ifdef	SDDEBUG
213 int	sd_force_pm_supported		= 0;
214 #endif	/* SDDEBUG */
215 
216 void *sd_state				= NULL;
217 int sd_io_time				= SD_IO_TIME;
218 int sd_failfast_enable			= 1;
219 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
220 int sd_report_pfa			= 1;
221 int sd_max_throttle			= SD_MAX_THROTTLE;
222 int sd_min_throttle			= SD_MIN_THROTTLE;
223 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
224 int sd_qfull_throttle_enable		= TRUE;
225 
226 int sd_retry_on_reservation_conflict	= 1;
227 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
228 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
229 
230 static int sd_dtype_optical_bind	= -1;
231 
232 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
233 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
234 
235 /*
236  * Global data for debug logging. To enable debug printing, sd_component_mask
237  * and sd_level_mask should be set to the desired bit patterns as outlined in
238  * sddef.h.
239  */
240 uint_t	sd_component_mask		= 0x0;
241 uint_t	sd_level_mask			= 0x0;
242 struct	sd_lun *sd_debug_un		= NULL;
243 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
244 
245 /* Note: these may go away in the future... */
246 static uint32_t	sd_xbuf_active_limit	= 512;
247 static uint32_t sd_xbuf_reserve_limit	= 16;
248 
249 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
250 
251 /*
252  * Timer value used to reset the throttle after it has been reduced
253  * (typically in response to TRAN_BUSY or STATUS_QFULL)
254  */
255 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
256 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
257 
258 /*
259  * Interval value associated with the media change scsi watch.
260  */
261 static int sd_check_media_time		= 3000000;
262 
263 /*
264  * Wait value used for in progress operations during a DDI_SUSPEND
265  */
266 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
267 
268 /*
269  * sd_label_mutex protects a static buffer used in the disk label
270  * component of the driver
271  */
272 static kmutex_t sd_label_mutex;
273 
274 /*
275  * sd_detach_mutex protects un_layer_count, un_detach_count, and
276  * un_opens_in_progress in the sd_lun structure.
277  */
278 static kmutex_t sd_detach_mutex;
279 
280 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
281 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
282 
283 /*
284  * Global buffer and mutex for debug logging
285  */
286 static char	sd_log_buf[1024];
287 static kmutex_t	sd_log_mutex;
288 
289 /*
290  * Structs and globals for recording attached lun information.
291  * This maintains a chain. Each node in the chain represents a SCSI controller.
292  * The structure records the number of luns attached to each target connected
293  * with the controller.
294  * For parallel scsi device only.
295  */
296 struct sd_scsi_hba_tgt_lun {
297 	struct sd_scsi_hba_tgt_lun	*next;
298 	dev_info_t			*pdip;
299 	int				nlun[NTARGETS_WIDE];
300 };
301 
302 /*
303  * Flag to indicate the lun is attached or detached
304  */
305 #define	SD_SCSI_LUN_ATTACH	0
306 #define	SD_SCSI_LUN_DETACH	1
307 
308 static kmutex_t	sd_scsi_target_lun_mutex;
309 static struct sd_scsi_hba_tgt_lun	*sd_scsi_target_lun_head = NULL;
310 
311 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
312     sd_scsi_hba_tgt_lun::next sd_scsi_hba_tgt_lun::pdip))
313 
314 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
315     sd_scsi_target_lun_head))
316 
317 /*
318  * "Smart" Probe Caching structs, globals, #defines, etc.
319  * For parallel scsi and non-self-identify device only.
320  */
321 
322 /*
323  * The following resources and routines are implemented to support
324  * "smart" probing, which caches the scsi_probe() results in an array,
325  * in order to help avoid long probe times.
326  */
327 struct sd_scsi_probe_cache {
328 	struct	sd_scsi_probe_cache	*next;
329 	dev_info_t	*pdip;
330 	int		cache[NTARGETS_WIDE];
331 };
332 
333 static kmutex_t	sd_scsi_probe_cache_mutex;
334 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
335 
336 /*
337  * Really we only need protection on the head of the linked list, but
338  * better safe than sorry.
339  */
340 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
341     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
342 
343 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
344     sd_scsi_probe_cache_head))
345 
346 
347 /*
348  * Vendor specific data name property declarations
349  */
350 
351 #if defined(__fibre) || defined(__i386) ||defined(__amd64)
352 
353 static sd_tunables seagate_properties = {
354 	SEAGATE_THROTTLE_VALUE,
355 	0,
356 	0,
357 	0,
358 	0,
359 	0,
360 	0,
361 	0,
362 	0
363 };
364 
365 
366 static sd_tunables fujitsu_properties = {
367 	FUJITSU_THROTTLE_VALUE,
368 	0,
369 	0,
370 	0,
371 	0,
372 	0,
373 	0,
374 	0,
375 	0
376 };
377 
378 static sd_tunables ibm_properties = {
379 	IBM_THROTTLE_VALUE,
380 	0,
381 	0,
382 	0,
383 	0,
384 	0,
385 	0,
386 	0,
387 	0
388 };
389 
390 static sd_tunables purple_properties = {
391 	PURPLE_THROTTLE_VALUE,
392 	0,
393 	0,
394 	PURPLE_BUSY_RETRIES,
395 	PURPLE_RESET_RETRY_COUNT,
396 	PURPLE_RESERVE_RELEASE_TIME,
397 	0,
398 	0,
399 	0
400 };
401 
402 static sd_tunables sve_properties = {
403 	SVE_THROTTLE_VALUE,
404 	0,
405 	0,
406 	SVE_BUSY_RETRIES,
407 	SVE_RESET_RETRY_COUNT,
408 	SVE_RESERVE_RELEASE_TIME,
409 	SVE_MIN_THROTTLE_VALUE,
410 	SVE_DISKSORT_DISABLED_FLAG,
411 	0
412 };
413 
414 static sd_tunables maserati_properties = {
415 	0,
416 	0,
417 	0,
418 	0,
419 	0,
420 	0,
421 	0,
422 	MASERATI_DISKSORT_DISABLED_FLAG,
423 	MASERATI_LUN_RESET_ENABLED_FLAG
424 };
425 
426 static sd_tunables pirus_properties = {
427 	PIRUS_THROTTLE_VALUE,
428 	0,
429 	PIRUS_NRR_COUNT,
430 	PIRUS_BUSY_RETRIES,
431 	PIRUS_RESET_RETRY_COUNT,
432 	0,
433 	PIRUS_MIN_THROTTLE_VALUE,
434 	PIRUS_DISKSORT_DISABLED_FLAG,
435 	PIRUS_LUN_RESET_ENABLED_FLAG
436 };
437 
438 #endif
439 
440 #if (defined(__sparc) && !defined(__fibre)) || \
441 	(defined(__i386) || defined(__amd64))
442 
443 
444 static sd_tunables elite_properties = {
445 	ELITE_THROTTLE_VALUE,
446 	0,
447 	0,
448 	0,
449 	0,
450 	0,
451 	0,
452 	0,
453 	0
454 };
455 
456 static sd_tunables st31200n_properties = {
457 	ST31200N_THROTTLE_VALUE,
458 	0,
459 	0,
460 	0,
461 	0,
462 	0,
463 	0,
464 	0,
465 	0
466 };
467 
468 #endif /* Fibre or not */
469 
470 static sd_tunables lsi_properties_scsi = {
471 	LSI_THROTTLE_VALUE,
472 	0,
473 	LSI_NOTREADY_RETRIES,
474 	0,
475 	0,
476 	0,
477 	0,
478 	0,
479 	0
480 };
481 
482 static sd_tunables symbios_properties = {
483 	SYMBIOS_THROTTLE_VALUE,
484 	0,
485 	SYMBIOS_NOTREADY_RETRIES,
486 	0,
487 	0,
488 	0,
489 	0,
490 	0,
491 	0
492 };
493 
494 static sd_tunables lsi_properties = {
495 	0,
496 	0,
497 	LSI_NOTREADY_RETRIES,
498 	0,
499 	0,
500 	0,
501 	0,
502 	0,
503 	0
504 };
505 
506 static sd_tunables lsi_oem_properties = {
507 	0,
508 	0,
509 	LSI_OEM_NOTREADY_RETRIES,
510 	0,
511 	0,
512 	0,
513 	0,
514 	0,
515 	0
516 };
517 
518 
519 
520 #if (defined(SD_PROP_TST))
521 
522 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
523 #define	SD_TST_THROTTLE_VAL	16
524 #define	SD_TST_NOTREADY_VAL	12
525 #define	SD_TST_BUSY_VAL		60
526 #define	SD_TST_RST_RETRY_VAL	36
527 #define	SD_TST_RSV_REL_TIME	60
528 
529 static sd_tunables tst_properties = {
530 	SD_TST_THROTTLE_VAL,
531 	SD_TST_CTYPE_VAL,
532 	SD_TST_NOTREADY_VAL,
533 	SD_TST_BUSY_VAL,
534 	SD_TST_RST_RETRY_VAL,
535 	SD_TST_RSV_REL_TIME,
536 	0,
537 	0,
538 	0
539 };
540 #endif
541 
542 /* This is similiar to the ANSI toupper implementation */
543 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
544 
545 /*
546  * Static Driver Configuration Table
547  *
548  * This is the table of disks which need throttle adjustment (or, perhaps
549  * something else as defined by the flags at a future time.)  device_id
550  * is a string consisting of concatenated vid (vendor), pid (product/model)
551  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
552  * the parts of the string are as defined by the sizes in the scsi_inquiry
553  * structure.  Device type is searched as far as the device_id string is
554  * defined.  Flags defines which values are to be set in the driver from the
555  * properties list.
556  *
557  * Entries below which begin and end with a "*" are a special case.
558  * These do not have a specific vendor, and the string which follows
559  * can appear anywhere in the 16 byte PID portion of the inquiry data.
560  *
561  * Entries below which begin and end with a " " (blank) are a special
562  * case. The comparison function will treat multiple consecutive blanks
563  * as equivalent to a single blank. For example, this causes a
564  * sd_disk_table entry of " NEC CDROM " to match a device's id string
565  * of  "NEC       CDROM".
566  *
567  * Note: The MD21 controller type has been obsoleted.
568  *	 ST318202F is a Legacy device
569  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
570  *	 made with an FC connection. The entries here are a legacy.
571  */
572 static sd_disk_config_t sd_disk_table[] = {
573 #if defined(__fibre) || defined(__i386) || defined(__amd64)
574 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
575 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
576 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
577 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
578 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
579 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
580 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
581 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
582 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
583 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
584 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
585 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
586 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
587 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
588 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
589 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
590 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
591 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
592 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
593 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
594 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
595 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
596 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
597 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
598 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
599 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
600 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
601 	{ "IBM     1724-100",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
602 	{ "IBM     1726-2xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
603 	{ "IBM     1726-4xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
604 	{ "IBM     1726-3xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
605 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
606 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
607 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
608 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
609 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
610 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
611 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
612 	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
613 	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
614 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
615 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
616 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
617 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
618 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
619 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
620 	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
621 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
622 	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
623 			SD_CONF_BSET_BSY_RETRY_COUNT|
624 			SD_CONF_BSET_RST_RETRIES|
625 			SD_CONF_BSET_RSV_REL_TIME,
626 		&purple_properties },
627 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
628 		SD_CONF_BSET_BSY_RETRY_COUNT|
629 		SD_CONF_BSET_RST_RETRIES|
630 		SD_CONF_BSET_RSV_REL_TIME|
631 		SD_CONF_BSET_MIN_THROTTLE|
632 		SD_CONF_BSET_DISKSORT_DISABLED,
633 		&sve_properties },
634 	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
635 			SD_CONF_BSET_BSY_RETRY_COUNT|
636 			SD_CONF_BSET_RST_RETRIES|
637 			SD_CONF_BSET_RSV_REL_TIME,
638 		&purple_properties },
639 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
640 		SD_CONF_BSET_LUN_RESET_ENABLED,
641 		&maserati_properties },
642 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
643 		SD_CONF_BSET_NRR_COUNT|
644 		SD_CONF_BSET_BSY_RETRY_COUNT|
645 		SD_CONF_BSET_RST_RETRIES|
646 		SD_CONF_BSET_MIN_THROTTLE|
647 		SD_CONF_BSET_DISKSORT_DISABLED|
648 		SD_CONF_BSET_LUN_RESET_ENABLED,
649 		&pirus_properties },
650 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
651 		SD_CONF_BSET_NRR_COUNT|
652 		SD_CONF_BSET_BSY_RETRY_COUNT|
653 		SD_CONF_BSET_RST_RETRIES|
654 		SD_CONF_BSET_MIN_THROTTLE|
655 		SD_CONF_BSET_DISKSORT_DISABLED|
656 		SD_CONF_BSET_LUN_RESET_ENABLED,
657 		&pirus_properties },
658 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
659 		SD_CONF_BSET_NRR_COUNT|
660 		SD_CONF_BSET_BSY_RETRY_COUNT|
661 		SD_CONF_BSET_RST_RETRIES|
662 		SD_CONF_BSET_MIN_THROTTLE|
663 		SD_CONF_BSET_DISKSORT_DISABLED|
664 		SD_CONF_BSET_LUN_RESET_ENABLED,
665 		&pirus_properties },
666 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
667 		SD_CONF_BSET_NRR_COUNT|
668 		SD_CONF_BSET_BSY_RETRY_COUNT|
669 		SD_CONF_BSET_RST_RETRIES|
670 		SD_CONF_BSET_MIN_THROTTLE|
671 		SD_CONF_BSET_DISKSORT_DISABLED|
672 		SD_CONF_BSET_LUN_RESET_ENABLED,
673 		&pirus_properties },
674 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
675 		SD_CONF_BSET_NRR_COUNT|
676 		SD_CONF_BSET_BSY_RETRY_COUNT|
677 		SD_CONF_BSET_RST_RETRIES|
678 		SD_CONF_BSET_MIN_THROTTLE|
679 		SD_CONF_BSET_DISKSORT_DISABLED|
680 		SD_CONF_BSET_LUN_RESET_ENABLED,
681 		&pirus_properties },
682 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
683 		SD_CONF_BSET_NRR_COUNT|
684 		SD_CONF_BSET_BSY_RETRY_COUNT|
685 		SD_CONF_BSET_RST_RETRIES|
686 		SD_CONF_BSET_MIN_THROTTLE|
687 		SD_CONF_BSET_DISKSORT_DISABLED|
688 		SD_CONF_BSET_LUN_RESET_ENABLED,
689 		&pirus_properties },
690 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
691 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
692 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
693 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
694 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
695 #endif /* fibre or NON-sparc platforms */
696 #if ((defined(__sparc) && !defined(__fibre)) ||\
697 	(defined(__i386) || defined(__amd64)))
698 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
699 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
700 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
701 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
702 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
703 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
704 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
705 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
706 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
707 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
708 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
709 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
710 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
711 	    &symbios_properties },
712 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
713 	    &lsi_properties_scsi },
714 #if defined(__i386) || defined(__amd64)
715 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
716 				    | SD_CONF_BSET_READSUB_BCD
717 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
718 				    | SD_CONF_BSET_NO_READ_HEADER
719 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
720 
721 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
722 				    | SD_CONF_BSET_READSUB_BCD
723 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
724 				    | SD_CONF_BSET_NO_READ_HEADER
725 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
726 #endif /* __i386 || __amd64 */
727 #endif /* sparc NON-fibre or NON-sparc platforms */
728 
729 #if (defined(SD_PROP_TST))
730 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
731 				| SD_CONF_BSET_CTYPE
732 				| SD_CONF_BSET_NRR_COUNT
733 				| SD_CONF_BSET_FAB_DEVID
734 				| SD_CONF_BSET_NOCACHE
735 				| SD_CONF_BSET_BSY_RETRY_COUNT
736 				| SD_CONF_BSET_PLAYMSF_BCD
737 				| SD_CONF_BSET_READSUB_BCD
738 				| SD_CONF_BSET_READ_TOC_TRK_BCD
739 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
740 				| SD_CONF_BSET_NO_READ_HEADER
741 				| SD_CONF_BSET_READ_CD_XD4
742 				| SD_CONF_BSET_RST_RETRIES
743 				| SD_CONF_BSET_RSV_REL_TIME
744 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
745 #endif
746 };
747 
748 static const int sd_disk_table_size =
749 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
750 
751 
752 
753 #define	SD_INTERCONNECT_PARALLEL	0
754 #define	SD_INTERCONNECT_FABRIC		1
755 #define	SD_INTERCONNECT_FIBRE		2
756 #define	SD_INTERCONNECT_SSA		3
757 #define	SD_INTERCONNECT_SATA		4
758 #define	SD_IS_PARALLEL_SCSI(un)		\
759 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
760 #define	SD_IS_SERIAL(un)		\
761 	((un)->un_interconnect_type == SD_INTERCONNECT_SATA)
762 
763 /*
764  * Definitions used by device id registration routines
765  */
766 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
767 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
768 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
769 
770 static kmutex_t sd_sense_mutex = {0};
771 
772 /*
773  * Macros for updates of the driver state
774  */
775 #define	New_state(un, s)        \
776 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
777 #define	Restore_state(un)	\
778 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
779 
780 static struct sd_cdbinfo sd_cdbtab[] = {
781 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
782 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
783 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
784 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
785 };
786 
787 /*
788  * Specifies the number of seconds that must have elapsed since the last
789  * cmd. has completed for a device to be declared idle to the PM framework.
790  */
791 static int sd_pm_idletime = 1;
792 
793 /*
794  * Internal function prototypes
795  */
796 
797 #if (defined(__fibre))
798 /*
799  * These #defines are to avoid namespace collisions that occur because this
800  * code is currently used to compile two seperate driver modules: sd and ssd.
801  * All function names need to be treated this way (even if declared static)
802  * in order to allow the debugger to resolve the names properly.
803  * It is anticipated that in the near future the ssd module will be obsoleted,
804  * at which time this ugliness should go away.
805  */
806 #define	sd_log_trace			ssd_log_trace
807 #define	sd_log_info			ssd_log_info
808 #define	sd_log_err			ssd_log_err
809 #define	sdprobe				ssdprobe
810 #define	sdinfo				ssdinfo
811 #define	sd_prop_op			ssd_prop_op
812 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
813 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
814 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
815 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
816 #define	sd_scsi_target_lun_init		ssd_scsi_target_lun_init
817 #define	sd_scsi_target_lun_fini		ssd_scsi_target_lun_fini
818 #define	sd_scsi_get_target_lun_count	ssd_scsi_get_target_lun_count
819 #define	sd_scsi_update_lun_on_target	ssd_scsi_update_lun_on_target
820 #define	sd_spin_up_unit			ssd_spin_up_unit
821 #define	sd_enable_descr_sense		ssd_enable_descr_sense
822 #define	sd_reenable_dsense_task		ssd_reenable_dsense_task
823 #define	sd_set_mmc_caps			ssd_set_mmc_caps
824 #define	sd_read_unit_properties		ssd_read_unit_properties
825 #define	sd_process_sdconf_file		ssd_process_sdconf_file
826 #define	sd_process_sdconf_table		ssd_process_sdconf_table
827 #define	sd_sdconf_id_match		ssd_sdconf_id_match
828 #define	sd_blank_cmp			ssd_blank_cmp
829 #define	sd_chk_vers1_data		ssd_chk_vers1_data
830 #define	sd_set_vers1_properties		ssd_set_vers1_properties
831 
832 #define	sd_get_physical_geometry	ssd_get_physical_geometry
833 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
834 #define	sd_update_block_info		ssd_update_block_info
835 #define	sd_register_devid		ssd_register_devid
836 #define	sd_get_devid			ssd_get_devid
837 #define	sd_create_devid			ssd_create_devid
838 #define	sd_write_deviceid		ssd_write_deviceid
839 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
840 #define	sd_setup_pm			ssd_setup_pm
841 #define	sd_create_pm_components		ssd_create_pm_components
842 #define	sd_ddi_suspend			ssd_ddi_suspend
843 #define	sd_ddi_pm_suspend		ssd_ddi_pm_suspend
844 #define	sd_ddi_resume			ssd_ddi_resume
845 #define	sd_ddi_pm_resume		ssd_ddi_pm_resume
846 #define	sdpower				ssdpower
847 #define	sdattach			ssdattach
848 #define	sddetach			ssddetach
849 #define	sd_unit_attach			ssd_unit_attach
850 #define	sd_unit_detach			ssd_unit_detach
851 #define	sd_set_unit_attributes		ssd_set_unit_attributes
852 #define	sd_create_errstats		ssd_create_errstats
853 #define	sd_set_errstats			ssd_set_errstats
854 #define	sd_set_pstats			ssd_set_pstats
855 #define	sddump				ssddump
856 #define	sd_scsi_poll			ssd_scsi_poll
857 #define	sd_send_polled_RQS		ssd_send_polled_RQS
858 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
859 #define	sd_init_event_callbacks		ssd_init_event_callbacks
860 #define	sd_event_callback		ssd_event_callback
861 #define	sd_cache_control		ssd_cache_control
862 #define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
863 #define	sd_make_device			ssd_make_device
864 #define	sdopen				ssdopen
865 #define	sdclose				ssdclose
866 #define	sd_ready_and_valid		ssd_ready_and_valid
867 #define	sdmin				ssdmin
868 #define	sdread				ssdread
869 #define	sdwrite				ssdwrite
870 #define	sdaread				ssdaread
871 #define	sdawrite			ssdawrite
872 #define	sdstrategy			ssdstrategy
873 #define	sdioctl				ssdioctl
874 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
875 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
876 #define	sd_checksum_iostart		ssd_checksum_iostart
877 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
878 #define	sd_pm_iostart			ssd_pm_iostart
879 #define	sd_core_iostart			ssd_core_iostart
880 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
881 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
882 #define	sd_checksum_iodone		ssd_checksum_iodone
883 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
884 #define	sd_pm_iodone			ssd_pm_iodone
885 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
886 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
887 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
888 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
889 #define	sd_buf_iodone			ssd_buf_iodone
890 #define	sd_uscsi_strategy		ssd_uscsi_strategy
891 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
892 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
893 #define	sd_uscsi_iodone			ssd_uscsi_iodone
894 #define	sd_xbuf_strategy		ssd_xbuf_strategy
895 #define	sd_xbuf_init			ssd_xbuf_init
896 #define	sd_pm_entry			ssd_pm_entry
897 #define	sd_pm_exit			ssd_pm_exit
898 
899 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
900 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
901 
902 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
903 #define	sdintr				ssdintr
904 #define	sd_start_cmds			ssd_start_cmds
905 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
906 #define	sd_bioclone_alloc		ssd_bioclone_alloc
907 #define	sd_bioclone_free		ssd_bioclone_free
908 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
909 #define	sd_shadow_buf_free		ssd_shadow_buf_free
910 #define	sd_print_transport_rejected_message	\
911 					ssd_print_transport_rejected_message
912 #define	sd_retry_command		ssd_retry_command
913 #define	sd_set_retry_bp			ssd_set_retry_bp
914 #define	sd_send_request_sense_command	ssd_send_request_sense_command
915 #define	sd_start_retry_command		ssd_start_retry_command
916 #define	sd_start_direct_priority_command	\
917 					ssd_start_direct_priority_command
918 #define	sd_return_failed_command	ssd_return_failed_command
919 #define	sd_return_failed_command_no_restart	\
920 					ssd_return_failed_command_no_restart
921 #define	sd_return_command		ssd_return_command
922 #define	sd_sync_with_callback		ssd_sync_with_callback
923 #define	sdrunout			ssdrunout
924 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
925 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
926 #define	sd_reduce_throttle		ssd_reduce_throttle
927 #define	sd_restore_throttle		ssd_restore_throttle
928 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
929 #define	sd_init_cdb_limits		ssd_init_cdb_limits
930 #define	sd_pkt_status_good		ssd_pkt_status_good
931 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
932 #define	sd_pkt_status_busy		ssd_pkt_status_busy
933 #define	sd_pkt_status_reservation_conflict	\
934 					ssd_pkt_status_reservation_conflict
935 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
936 #define	sd_handle_request_sense		ssd_handle_request_sense
937 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
938 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
939 #define	sd_validate_sense_data		ssd_validate_sense_data
940 #define	sd_decode_sense			ssd_decode_sense
941 #define	sd_print_sense_msg		ssd_print_sense_msg
942 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
943 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
944 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
945 #define	sd_sense_key_medium_or_hardware_error	\
946 					ssd_sense_key_medium_or_hardware_error
947 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
948 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
949 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
950 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
951 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
952 #define	sd_sense_key_default		ssd_sense_key_default
953 #define	sd_print_retry_msg		ssd_print_retry_msg
954 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
955 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
956 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
957 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
958 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
959 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
960 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
961 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
962 #define	sd_pkt_reason_default		ssd_pkt_reason_default
963 #define	sd_reset_target			ssd_reset_target
964 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
965 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
966 #define	sd_taskq_create			ssd_taskq_create
967 #define	sd_taskq_delete			ssd_taskq_delete
968 #define	sd_media_change_task		ssd_media_change_task
969 #define	sd_handle_mchange		ssd_handle_mchange
970 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
971 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
972 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
973 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
974 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
975 					sd_send_scsi_feature_GET_CONFIGURATION
976 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
977 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
978 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
979 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
980 					ssd_send_scsi_PERSISTENT_RESERVE_IN
981 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
982 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
983 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
984 #define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
985 					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
986 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
987 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
988 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
989 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
990 #define	sd_alloc_rqs			ssd_alloc_rqs
991 #define	sd_free_rqs			ssd_free_rqs
992 #define	sd_dump_memory			ssd_dump_memory
993 #define	sd_get_media_info		ssd_get_media_info
994 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
995 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
996 #define	sd_setup_next_xfer		ssd_setup_next_xfer
997 #define	sd_dkio_get_temp		ssd_dkio_get_temp
998 #define	sd_check_mhd			ssd_check_mhd
999 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
1000 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1001 #define	sd_sname			ssd_sname
1002 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1003 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1004 #define	sd_take_ownership		ssd_take_ownership
1005 #define	sd_reserve_release		ssd_reserve_release
1006 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1007 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1008 #define	sd_persistent_reservation_in_read_keys	\
1009 					ssd_persistent_reservation_in_read_keys
1010 #define	sd_persistent_reservation_in_read_resv	\
1011 					ssd_persistent_reservation_in_read_resv
1012 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1013 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1014 #define	sd_mhdioc_release		ssd_mhdioc_release
1015 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1016 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1017 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1018 #define	sr_change_blkmode		ssr_change_blkmode
1019 #define	sr_change_speed			ssr_change_speed
1020 #define	sr_atapi_change_speed		ssr_atapi_change_speed
1021 #define	sr_pause_resume			ssr_pause_resume
1022 #define	sr_play_msf			ssr_play_msf
1023 #define	sr_play_trkind			ssr_play_trkind
1024 #define	sr_read_all_subcodes		ssr_read_all_subcodes
1025 #define	sr_read_subchannel		ssr_read_subchannel
1026 #define	sr_read_tocentry		ssr_read_tocentry
1027 #define	sr_read_tochdr			ssr_read_tochdr
1028 #define	sr_read_cdda			ssr_read_cdda
1029 #define	sr_read_cdxa			ssr_read_cdxa
1030 #define	sr_read_mode1			ssr_read_mode1
1031 #define	sr_read_mode2			ssr_read_mode2
1032 #define	sr_read_cd_mode2		ssr_read_cd_mode2
1033 #define	sr_sector_mode			ssr_sector_mode
1034 #define	sr_eject			ssr_eject
1035 #define	sr_ejected			ssr_ejected
1036 #define	sr_check_wp			ssr_check_wp
1037 #define	sd_check_media			ssd_check_media
1038 #define	sd_media_watch_cb		ssd_media_watch_cb
1039 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1040 #define	sr_volume_ctrl			ssr_volume_ctrl
1041 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1042 #define	sd_log_page_supported		ssd_log_page_supported
1043 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1044 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1045 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1046 #define	sd_range_lock			ssd_range_lock
1047 #define	sd_get_range			ssd_get_range
1048 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1049 #define	sd_range_unlock			ssd_range_unlock
1050 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1051 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1052 
1053 #define	sd_iostart_chain		ssd_iostart_chain
1054 #define	sd_iodone_chain			ssd_iodone_chain
1055 #define	sd_initpkt_map			ssd_initpkt_map
1056 #define	sd_destroypkt_map		ssd_destroypkt_map
1057 #define	sd_chain_type_map		ssd_chain_type_map
1058 #define	sd_chain_index_map		ssd_chain_index_map
1059 
1060 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1061 #define	sd_failfast_flushq		ssd_failfast_flushq
1062 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1063 
1064 #define	sd_is_lsi			ssd_is_lsi
1065 #define	sd_tg_rdwr			ssd_tg_rdwr
1066 #define	sd_tg_getinfo			ssd_tg_getinfo
1067 
1068 #endif	/* #if (defined(__fibre)) */
1069 
1070 
1071 int _init(void);
1072 int _fini(void);
1073 int _info(struct modinfo *modinfop);
1074 
1075 /*PRINTFLIKE3*/
1076 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1077 /*PRINTFLIKE3*/
1078 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1079 /*PRINTFLIKE3*/
1080 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1081 
1082 static int sdprobe(dev_info_t *devi);
1083 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1084     void **result);
1085 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1086     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1087 
1088 /*
1089  * Smart probe for parallel scsi
1090  */
1091 static void sd_scsi_probe_cache_init(void);
1092 static void sd_scsi_probe_cache_fini(void);
1093 static void sd_scsi_clear_probe_cache(void);
1094 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1095 
1096 /*
1097  * Attached luns on target for parallel scsi
1098  */
1099 static void sd_scsi_target_lun_init(void);
1100 static void sd_scsi_target_lun_fini(void);
1101 static int  sd_scsi_get_target_lun_count(dev_info_t *dip, int target);
1102 static void sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag);
1103 
1104 static int	sd_spin_up_unit(struct sd_lun *un);
1105 #ifdef _LP64
1106 static void	sd_enable_descr_sense(struct sd_lun *un);
1107 static void	sd_reenable_dsense_task(void *arg);
1108 #endif /* _LP64 */
1109 
1110 static void	sd_set_mmc_caps(struct sd_lun *un);
1111 
1112 static void sd_read_unit_properties(struct sd_lun *un);
1113 static int  sd_process_sdconf_file(struct sd_lun *un);
1114 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1115     int *data_list, sd_tunables *values);
1116 static void sd_process_sdconf_table(struct sd_lun *un);
1117 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1118 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1119 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1120 	int list_len, char *dataname_ptr);
1121 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1122     sd_tunables *prop_list);
1123 
1124 static void sd_register_devid(struct sd_lun *un, dev_info_t *devi,
1125     int reservation_flag);
1126 static int  sd_get_devid(struct sd_lun *un);
1127 static int  sd_get_serialnum(struct sd_lun *un, uchar_t *wwn, int *len);
1128 static ddi_devid_t sd_create_devid(struct sd_lun *un);
1129 static int  sd_write_deviceid(struct sd_lun *un);
1130 static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1131 static int  sd_check_vpd_page_support(struct sd_lun *un);
1132 
1133 static void sd_setup_pm(struct sd_lun *un, dev_info_t *devi);
1134 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1135 
1136 static int  sd_ddi_suspend(dev_info_t *devi);
1137 static int  sd_ddi_pm_suspend(struct sd_lun *un);
1138 static int  sd_ddi_resume(dev_info_t *devi);
1139 static int  sd_ddi_pm_resume(struct sd_lun *un);
1140 static int  sdpower(dev_info_t *devi, int component, int level);
1141 
1142 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1143 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1144 static int  sd_unit_attach(dev_info_t *devi);
1145 static int  sd_unit_detach(dev_info_t *devi);
1146 
1147 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1148 static void sd_create_errstats(struct sd_lun *un, int instance);
1149 static void sd_set_errstats(struct sd_lun *un);
1150 static void sd_set_pstats(struct sd_lun *un);
1151 
1152 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1153 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1154 static int  sd_send_polled_RQS(struct sd_lun *un);
1155 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1156 
1157 #if (defined(__fibre))
1158 /*
1159  * Event callbacks (photon)
1160  */
1161 static void sd_init_event_callbacks(struct sd_lun *un);
1162 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1163 #endif
1164 
1165 /*
1166  * Defines for sd_cache_control
1167  */
1168 
1169 #define	SD_CACHE_ENABLE		1
1170 #define	SD_CACHE_DISABLE	0
1171 #define	SD_CACHE_NOCHANGE	-1
1172 
1173 static int   sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag);
1174 static int   sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled);
1175 static dev_t sd_make_device(dev_info_t *devi);
1176 
1177 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1178 	uint64_t capacity);
1179 
1180 /*
1181  * Driver entry point functions.
1182  */
1183 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1184 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1185 static int  sd_ready_and_valid(struct sd_lun *un);
1186 
1187 static void sdmin(struct buf *bp);
1188 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1189 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1190 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1191 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1192 
1193 static int sdstrategy(struct buf *bp);
1194 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1195 
1196 /*
1197  * Function prototypes for layering functions in the iostart chain.
1198  */
1199 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1200 	struct buf *bp);
1201 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1202 	struct buf *bp);
1203 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1204 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1205 	struct buf *bp);
1206 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1207 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1208 
1209 /*
1210  * Function prototypes for layering functions in the iodone chain.
1211  */
1212 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1213 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1214 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1215 	struct buf *bp);
1216 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1217 	struct buf *bp);
1218 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1219 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1220 	struct buf *bp);
1221 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1222 
1223 /*
1224  * Prototypes for functions to support buf(9S) based IO.
1225  */
1226 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1227 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1228 static void sd_destroypkt_for_buf(struct buf *);
1229 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1230 	struct buf *bp, int flags,
1231 	int (*callback)(caddr_t), caddr_t callback_arg,
1232 	diskaddr_t lba, uint32_t blockcount);
1233 #if defined(__i386) || defined(__amd64)
1234 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1235 	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1236 #endif /* defined(__i386) || defined(__amd64) */
1237 
1238 /*
1239  * Prototypes for functions to support USCSI IO.
1240  */
1241 static int sd_uscsi_strategy(struct buf *bp);
1242 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1243 static void sd_destroypkt_for_uscsi(struct buf *);
1244 
1245 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1246 	uchar_t chain_type, void *pktinfop);
1247 
1248 static int  sd_pm_entry(struct sd_lun *un);
1249 static void sd_pm_exit(struct sd_lun *un);
1250 
1251 static void sd_pm_idletimeout_handler(void *arg);
1252 
1253 /*
1254  * sd_core internal functions (used at the sd_core_io layer).
1255  */
1256 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1257 static void sdintr(struct scsi_pkt *pktp);
1258 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1259 
1260 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
1261 	enum uio_seg dataspace, int path_flag);
1262 
1263 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1264 	daddr_t blkno, int (*func)(struct buf *));
1265 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1266 	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1267 static void sd_bioclone_free(struct buf *bp);
1268 static void sd_shadow_buf_free(struct buf *bp);
1269 
1270 static void sd_print_transport_rejected_message(struct sd_lun *un,
1271 	struct sd_xbuf *xp, int code);
1272 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1273     void *arg, int code);
1274 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1275     void *arg, int code);
1276 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1277     void *arg, int code);
1278 
1279 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1280 	int retry_check_flag,
1281 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1282 		int c),
1283 	void *user_arg, int failure_code,  clock_t retry_delay,
1284 	void (*statp)(kstat_io_t *));
1285 
1286 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1287 	clock_t retry_delay, void (*statp)(kstat_io_t *));
1288 
1289 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1290 	struct scsi_pkt *pktp);
1291 static void sd_start_retry_command(void *arg);
1292 static void sd_start_direct_priority_command(void *arg);
1293 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1294 	int errcode);
1295 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1296 	struct buf *bp, int errcode);
1297 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1298 static void sd_sync_with_callback(struct sd_lun *un);
1299 static int sdrunout(caddr_t arg);
1300 
1301 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1302 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1303 
1304 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1305 static void sd_restore_throttle(void *arg);
1306 
1307 static void sd_init_cdb_limits(struct sd_lun *un);
1308 
1309 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1310 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1311 
1312 /*
1313  * Error handling functions
1314  */
1315 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1316 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1317 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1318 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1319 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1320 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1321 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1322 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1323 
1324 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1325 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1326 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1327 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1328 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1329 	struct sd_xbuf *xp);
1330 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1331 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1332 
1333 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1334 	void *arg, int code);
1335 
1336 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1337 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1338 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1339 	uint8_t *sense_datap,
1340 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1341 static void sd_sense_key_not_ready(struct sd_lun *un,
1342 	uint8_t *sense_datap,
1343 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1344 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1345 	uint8_t *sense_datap,
1346 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1347 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1348 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1349 static void sd_sense_key_unit_attention(struct sd_lun *un,
1350 	uint8_t *sense_datap,
1351 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1352 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1353 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1354 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1355 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1356 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1357 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1358 static void sd_sense_key_default(struct sd_lun *un,
1359 	uint8_t *sense_datap,
1360 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1361 
1362 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1363 	void *arg, int flag);
1364 
1365 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1366 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1367 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1368 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1369 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1370 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1371 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1372 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1373 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1374 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1375 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1376 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1377 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1378 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1379 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1380 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1381 
1382 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1383 
1384 static void sd_start_stop_unit_callback(void *arg);
1385 static void sd_start_stop_unit_task(void *arg);
1386 
1387 static void sd_taskq_create(void);
1388 static void sd_taskq_delete(void);
1389 static void sd_media_change_task(void *arg);
1390 
1391 static int sd_handle_mchange(struct sd_lun *un);
1392 static int sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag);
1393 static int sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp,
1394 	uint32_t *lbap, int path_flag);
1395 static int sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
1396 	uint32_t *lbap, int path_flag);
1397 static int sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag,
1398 	int path_flag);
1399 static int sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr,
1400 	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1401 static int sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag);
1402 static int sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un,
1403 	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1404 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un,
1405 	uchar_t usr_cmd, uchar_t *usr_bufp);
1406 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1407 	struct dk_callback *dkc);
1408 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1409 static int sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un,
1410 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1411 	uchar_t *bufaddr, uint_t buflen, int path_flag);
1412 static int sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
1413 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1414 	uchar_t *bufaddr, uint_t buflen, char feature, int path_flag);
1415 static int sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize,
1416 	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1417 static int sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize,
1418 	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1419 static int sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
1420 	size_t buflen, daddr_t start_block, int path_flag);
1421 #define	sd_send_scsi_READ(un, bufaddr, buflen, start_block, path_flag)	\
1422 	sd_send_scsi_RDWR(un, SCMD_READ, bufaddr, buflen, start_block, \
1423 	path_flag)
1424 #define	sd_send_scsi_WRITE(un, bufaddr, buflen, start_block, path_flag)	\
1425 	sd_send_scsi_RDWR(un, SCMD_WRITE, bufaddr, buflen, start_block,\
1426 	path_flag)
1427 
1428 static int sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr,
1429 	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1430 	uint16_t param_ptr, int path_flag);
1431 
1432 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1433 static void sd_free_rqs(struct sd_lun *un);
1434 
1435 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1436 	uchar_t *data, int len, int fmt);
1437 static void sd_panic_for_res_conflict(struct sd_lun *un);
1438 
1439 /*
1440  * Disk Ioctl Function Prototypes
1441  */
1442 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1443 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1444 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1445 
1446 /*
1447  * Multi-host Ioctl Prototypes
1448  */
1449 static int sd_check_mhd(dev_t dev, int interval);
1450 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1451 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1452 static char *sd_sname(uchar_t status);
1453 static void sd_mhd_resvd_recover(void *arg);
1454 static void sd_resv_reclaim_thread();
1455 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1456 static int sd_reserve_release(dev_t dev, int cmd);
1457 static void sd_rmv_resv_reclaim_req(dev_t dev);
1458 static void sd_mhd_reset_notify_cb(caddr_t arg);
1459 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1460 	mhioc_inkeys_t *usrp, int flag);
1461 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1462 	mhioc_inresvs_t *usrp, int flag);
1463 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1464 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1465 static int sd_mhdioc_release(dev_t dev);
1466 static int sd_mhdioc_register_devid(dev_t dev);
1467 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1468 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1469 
1470 /*
1471  * SCSI removable prototypes
1472  */
1473 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1474 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1475 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1476 static int sr_pause_resume(dev_t dev, int mode);
1477 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1478 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1479 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1480 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1481 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1482 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1483 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1484 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1485 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1486 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1487 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1488 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1489 static int sr_eject(dev_t dev);
1490 static void sr_ejected(register struct sd_lun *un);
1491 static int sr_check_wp(dev_t dev);
1492 static int sd_check_media(dev_t dev, enum dkio_state state);
1493 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1494 static void sd_delayed_cv_broadcast(void *arg);
1495 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1496 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1497 
1498 static int sd_log_page_supported(struct sd_lun *un, int log_page);
1499 
1500 /*
1501  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1502  */
1503 static void sd_check_for_writable_cd(struct sd_lun *un, int path_flag);
1504 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1505 static void sd_wm_cache_destructor(void *wm, void *un);
1506 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1507 	daddr_t endb, ushort_t typ);
1508 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1509 	daddr_t endb);
1510 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1511 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1512 static void sd_read_modify_write_task(void * arg);
1513 static int
1514 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1515 	struct buf **bpp);
1516 
1517 
1518 /*
1519  * Function prototypes for failfast support.
1520  */
1521 static void sd_failfast_flushq(struct sd_lun *un);
1522 static int sd_failfast_flushq_callback(struct buf *bp);
1523 
1524 /*
1525  * Function prototypes to check for lsi devices
1526  */
1527 static void sd_is_lsi(struct sd_lun *un);
1528 
1529 /*
1530  * Function prototypes for x86 support
1531  */
1532 #if defined(__i386) || defined(__amd64)
1533 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1534 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1535 #endif
1536 
1537 
1538 /* Function prototypes for cmlb */
1539 static int sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
1540     diskaddr_t start_block, size_t reqlength, void *tg_cookie);
1541 
1542 static int sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie);
1543 
1544 /*
1545  * Constants for failfast support:
1546  *
1547  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1548  * failfast processing being performed.
1549  *
1550  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1551  * failfast processing on all bufs with B_FAILFAST set.
1552  */
1553 
1554 #define	SD_FAILFAST_INACTIVE		0
1555 #define	SD_FAILFAST_ACTIVE		1
1556 
1557 /*
1558  * Bitmask to control behavior of buf(9S) flushes when a transition to
1559  * the failfast state occurs. Optional bits include:
1560  *
1561  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1562  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1563  * be flushed.
1564  *
1565  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1566  * driver, in addition to the regular wait queue. This includes the xbuf
1567  * queues. When clear, only the driver's wait queue will be flushed.
1568  */
1569 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1570 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1571 
1572 /*
1573  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1574  * to flush all queues within the driver.
1575  */
1576 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1577 
1578 
1579 /*
1580  * SD Testing Fault Injection
1581  */
1582 #ifdef SD_FAULT_INJECTION
1583 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1584 static void sd_faultinjection(struct scsi_pkt *pktp);
1585 static void sd_injection_log(char *buf, struct sd_lun *un);
1586 #endif
1587 
1588 /*
1589  * Device driver ops vector
1590  */
1591 static struct cb_ops sd_cb_ops = {
1592 	sdopen,			/* open */
1593 	sdclose,		/* close */
1594 	sdstrategy,		/* strategy */
1595 	nodev,			/* print */
1596 	sddump,			/* dump */
1597 	sdread,			/* read */
1598 	sdwrite,		/* write */
1599 	sdioctl,		/* ioctl */
1600 	nodev,			/* devmap */
1601 	nodev,			/* mmap */
1602 	nodev,			/* segmap */
1603 	nochpoll,		/* poll */
1604 	sd_prop_op,		/* cb_prop_op */
1605 	0,			/* streamtab  */
1606 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1607 	CB_REV,			/* cb_rev */
1608 	sdaread, 		/* async I/O read entry point */
1609 	sdawrite		/* async I/O write entry point */
1610 };
1611 
1612 static struct dev_ops sd_ops = {
1613 	DEVO_REV,		/* devo_rev, */
1614 	0,			/* refcnt  */
1615 	sdinfo,			/* info */
1616 	nulldev,		/* identify */
1617 	sdprobe,		/* probe */
1618 	sdattach,		/* attach */
1619 	sddetach,		/* detach */
1620 	nodev,			/* reset */
1621 	&sd_cb_ops,		/* driver operations */
1622 	NULL,			/* bus operations */
1623 	sdpower			/* power */
1624 };
1625 
1626 
1627 /*
1628  * This is the loadable module wrapper.
1629  */
1630 #include <sys/modctl.h>
1631 
1632 static struct modldrv modldrv = {
1633 	&mod_driverops,		/* Type of module. This one is a driver */
1634 	SD_MODULE_NAME,		/* Module name. */
1635 	&sd_ops			/* driver ops */
1636 };
1637 
1638 
1639 static struct modlinkage modlinkage = {
1640 	MODREV_1,
1641 	&modldrv,
1642 	NULL
1643 };
1644 
1645 static cmlb_tg_ops_t sd_tgops = {
1646 	TG_DK_OPS_VERSION_1,
1647 	sd_tg_rdwr,
1648 	sd_tg_getinfo
1649 	};
1650 
1651 static struct scsi_asq_key_strings sd_additional_codes[] = {
1652 	0x81, 0, "Logical Unit is Reserved",
1653 	0x85, 0, "Audio Address Not Valid",
1654 	0xb6, 0, "Media Load Mechanism Failed",
1655 	0xB9, 0, "Audio Play Operation Aborted",
1656 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1657 	0x53, 2, "Medium removal prevented",
1658 	0x6f, 0, "Authentication failed during key exchange",
1659 	0x6f, 1, "Key not present",
1660 	0x6f, 2, "Key not established",
1661 	0x6f, 3, "Read without proper authentication",
1662 	0x6f, 4, "Mismatched region to this logical unit",
1663 	0x6f, 5, "Region reset count error",
1664 	0xffff, 0x0, NULL
1665 };
1666 
1667 
1668 /*
1669  * Struct for passing printing information for sense data messages
1670  */
1671 struct sd_sense_info {
1672 	int	ssi_severity;
1673 	int	ssi_pfa_flag;
1674 };
1675 
1676 /*
1677  * Table of function pointers for iostart-side routines. Seperate "chains"
1678  * of layered function calls are formed by placing the function pointers
1679  * sequentially in the desired order. Functions are called according to an
1680  * incrementing table index ordering. The last function in each chain must
1681  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1682  * in the sd_iodone_chain[] array.
1683  *
1684  * Note: It may seem more natural to organize both the iostart and iodone
1685  * functions together, into an array of structures (or some similar
1686  * organization) with a common index, rather than two seperate arrays which
1687  * must be maintained in synchronization. The purpose of this division is
1688  * to achiece improved performance: individual arrays allows for more
1689  * effective cache line utilization on certain platforms.
1690  */
1691 
1692 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1693 
1694 
1695 static sd_chain_t sd_iostart_chain[] = {
1696 
1697 	/* Chain for buf IO for disk drive targets (PM enabled) */
1698 	sd_mapblockaddr_iostart,	/* Index: 0 */
1699 	sd_pm_iostart,			/* Index: 1 */
1700 	sd_core_iostart,		/* Index: 2 */
1701 
1702 	/* Chain for buf IO for disk drive targets (PM disabled) */
1703 	sd_mapblockaddr_iostart,	/* Index: 3 */
1704 	sd_core_iostart,		/* Index: 4 */
1705 
1706 	/* Chain for buf IO for removable-media targets (PM enabled) */
1707 	sd_mapblockaddr_iostart,	/* Index: 5 */
1708 	sd_mapblocksize_iostart,	/* Index: 6 */
1709 	sd_pm_iostart,			/* Index: 7 */
1710 	sd_core_iostart,		/* Index: 8 */
1711 
1712 	/* Chain for buf IO for removable-media targets (PM disabled) */
1713 	sd_mapblockaddr_iostart,	/* Index: 9 */
1714 	sd_mapblocksize_iostart,	/* Index: 10 */
1715 	sd_core_iostart,		/* Index: 11 */
1716 
1717 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1718 	sd_mapblockaddr_iostart,	/* Index: 12 */
1719 	sd_checksum_iostart,		/* Index: 13 */
1720 	sd_pm_iostart,			/* Index: 14 */
1721 	sd_core_iostart,		/* Index: 15 */
1722 
1723 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1724 	sd_mapblockaddr_iostart,	/* Index: 16 */
1725 	sd_checksum_iostart,		/* Index: 17 */
1726 	sd_core_iostart,		/* Index: 18 */
1727 
1728 	/* Chain for USCSI commands (all targets) */
1729 	sd_pm_iostart,			/* Index: 19 */
1730 	sd_core_iostart,		/* Index: 20 */
1731 
1732 	/* Chain for checksumming USCSI commands (all targets) */
1733 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1734 	sd_pm_iostart,			/* Index: 22 */
1735 	sd_core_iostart,		/* Index: 23 */
1736 
1737 	/* Chain for "direct" USCSI commands (all targets) */
1738 	sd_core_iostart,		/* Index: 24 */
1739 
1740 	/* Chain for "direct priority" USCSI commands (all targets) */
1741 	sd_core_iostart,		/* Index: 25 */
1742 };
1743 
1744 /*
1745  * Macros to locate the first function of each iostart chain in the
1746  * sd_iostart_chain[] array. These are located by the index in the array.
1747  */
1748 #define	SD_CHAIN_DISK_IOSTART			0
1749 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1750 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1751 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1752 #define	SD_CHAIN_CHKSUM_IOSTART			12
1753 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1754 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1755 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1756 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1757 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1758 
1759 
1760 /*
1761  * Table of function pointers for the iodone-side routines for the driver-
1762  * internal layering mechanism.  The calling sequence for iodone routines
1763  * uses a decrementing table index, so the last routine called in a chain
1764  * must be at the lowest array index location for that chain.  The last
1765  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1766  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1767  * of the functions in an iodone side chain must correspond to the ordering
1768  * of the iostart routines for that chain.  Note that there is no iodone
1769  * side routine that corresponds to sd_core_iostart(), so there is no
1770  * entry in the table for this.
1771  */
1772 
1773 static sd_chain_t sd_iodone_chain[] = {
1774 
1775 	/* Chain for buf IO for disk drive targets (PM enabled) */
1776 	sd_buf_iodone,			/* Index: 0 */
1777 	sd_mapblockaddr_iodone,		/* Index: 1 */
1778 	sd_pm_iodone,			/* Index: 2 */
1779 
1780 	/* Chain for buf IO for disk drive targets (PM disabled) */
1781 	sd_buf_iodone,			/* Index: 3 */
1782 	sd_mapblockaddr_iodone,		/* Index: 4 */
1783 
1784 	/* Chain for buf IO for removable-media targets (PM enabled) */
1785 	sd_buf_iodone,			/* Index: 5 */
1786 	sd_mapblockaddr_iodone,		/* Index: 6 */
1787 	sd_mapblocksize_iodone,		/* Index: 7 */
1788 	sd_pm_iodone,			/* Index: 8 */
1789 
1790 	/* Chain for buf IO for removable-media targets (PM disabled) */
1791 	sd_buf_iodone,			/* Index: 9 */
1792 	sd_mapblockaddr_iodone,		/* Index: 10 */
1793 	sd_mapblocksize_iodone,		/* Index: 11 */
1794 
1795 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1796 	sd_buf_iodone,			/* Index: 12 */
1797 	sd_mapblockaddr_iodone,		/* Index: 13 */
1798 	sd_checksum_iodone,		/* Index: 14 */
1799 	sd_pm_iodone,			/* Index: 15 */
1800 
1801 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1802 	sd_buf_iodone,			/* Index: 16 */
1803 	sd_mapblockaddr_iodone,		/* Index: 17 */
1804 	sd_checksum_iodone,		/* Index: 18 */
1805 
1806 	/* Chain for USCSI commands (non-checksum targets) */
1807 	sd_uscsi_iodone,		/* Index: 19 */
1808 	sd_pm_iodone,			/* Index: 20 */
1809 
1810 	/* Chain for USCSI commands (checksum targets) */
1811 	sd_uscsi_iodone,		/* Index: 21 */
1812 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1813 	sd_pm_iodone,			/* Index: 22 */
1814 
1815 	/* Chain for "direct" USCSI commands (all targets) */
1816 	sd_uscsi_iodone,		/* Index: 24 */
1817 
1818 	/* Chain for "direct priority" USCSI commands (all targets) */
1819 	sd_uscsi_iodone,		/* Index: 25 */
1820 };
1821 
1822 
1823 /*
1824  * Macros to locate the "first" function in the sd_iodone_chain[] array for
1825  * each iodone-side chain. These are located by the array index, but as the
1826  * iodone side functions are called in a decrementing-index order, the
1827  * highest index number in each chain must be specified (as these correspond
1828  * to the first function in the iodone chain that will be called by the core
1829  * at IO completion time).
1830  */
1831 
1832 #define	SD_CHAIN_DISK_IODONE			2
1833 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
1834 #define	SD_CHAIN_RMMEDIA_IODONE			8
1835 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1836 #define	SD_CHAIN_CHKSUM_IODONE			15
1837 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1838 #define	SD_CHAIN_USCSI_CMD_IODONE		20
1839 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1840 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
1841 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1842 
1843 
1844 
1845 
1846 /*
1847  * Array to map a layering chain index to the appropriate initpkt routine.
1848  * The redundant entries are present so that the index used for accessing
1849  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1850  * with this table as well.
1851  */
1852 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1853 
1854 static sd_initpkt_t	sd_initpkt_map[] = {
1855 
1856 	/* Chain for buf IO for disk drive targets (PM enabled) */
1857 	sd_initpkt_for_buf,		/* Index: 0 */
1858 	sd_initpkt_for_buf,		/* Index: 1 */
1859 	sd_initpkt_for_buf,		/* Index: 2 */
1860 
1861 	/* Chain for buf IO for disk drive targets (PM disabled) */
1862 	sd_initpkt_for_buf,		/* Index: 3 */
1863 	sd_initpkt_for_buf,		/* Index: 4 */
1864 
1865 	/* Chain for buf IO for removable-media targets (PM enabled) */
1866 	sd_initpkt_for_buf,		/* Index: 5 */
1867 	sd_initpkt_for_buf,		/* Index: 6 */
1868 	sd_initpkt_for_buf,		/* Index: 7 */
1869 	sd_initpkt_for_buf,		/* Index: 8 */
1870 
1871 	/* Chain for buf IO for removable-media targets (PM disabled) */
1872 	sd_initpkt_for_buf,		/* Index: 9 */
1873 	sd_initpkt_for_buf,		/* Index: 10 */
1874 	sd_initpkt_for_buf,		/* Index: 11 */
1875 
1876 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1877 	sd_initpkt_for_buf,		/* Index: 12 */
1878 	sd_initpkt_for_buf,		/* Index: 13 */
1879 	sd_initpkt_for_buf,		/* Index: 14 */
1880 	sd_initpkt_for_buf,		/* Index: 15 */
1881 
1882 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1883 	sd_initpkt_for_buf,		/* Index: 16 */
1884 	sd_initpkt_for_buf,		/* Index: 17 */
1885 	sd_initpkt_for_buf,		/* Index: 18 */
1886 
1887 	/* Chain for USCSI commands (non-checksum targets) */
1888 	sd_initpkt_for_uscsi,		/* Index: 19 */
1889 	sd_initpkt_for_uscsi,		/* Index: 20 */
1890 
1891 	/* Chain for USCSI commands (checksum targets) */
1892 	sd_initpkt_for_uscsi,		/* Index: 21 */
1893 	sd_initpkt_for_uscsi,		/* Index: 22 */
1894 	sd_initpkt_for_uscsi,		/* Index: 22 */
1895 
1896 	/* Chain for "direct" USCSI commands (all targets) */
1897 	sd_initpkt_for_uscsi,		/* Index: 24 */
1898 
1899 	/* Chain for "direct priority" USCSI commands (all targets) */
1900 	sd_initpkt_for_uscsi,		/* Index: 25 */
1901 
1902 };
1903 
1904 
1905 /*
1906  * Array to map a layering chain index to the appropriate destroypktpkt routine.
1907  * The redundant entries are present so that the index used for accessing
1908  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1909  * with this table as well.
1910  */
1911 typedef void (*sd_destroypkt_t)(struct buf *);
1912 
1913 static sd_destroypkt_t	sd_destroypkt_map[] = {
1914 
1915 	/* Chain for buf IO for disk drive targets (PM enabled) */
1916 	sd_destroypkt_for_buf,		/* Index: 0 */
1917 	sd_destroypkt_for_buf,		/* Index: 1 */
1918 	sd_destroypkt_for_buf,		/* Index: 2 */
1919 
1920 	/* Chain for buf IO for disk drive targets (PM disabled) */
1921 	sd_destroypkt_for_buf,		/* Index: 3 */
1922 	sd_destroypkt_for_buf,		/* Index: 4 */
1923 
1924 	/* Chain for buf IO for removable-media targets (PM enabled) */
1925 	sd_destroypkt_for_buf,		/* Index: 5 */
1926 	sd_destroypkt_for_buf,		/* Index: 6 */
1927 	sd_destroypkt_for_buf,		/* Index: 7 */
1928 	sd_destroypkt_for_buf,		/* Index: 8 */
1929 
1930 	/* Chain for buf IO for removable-media targets (PM disabled) */
1931 	sd_destroypkt_for_buf,		/* Index: 9 */
1932 	sd_destroypkt_for_buf,		/* Index: 10 */
1933 	sd_destroypkt_for_buf,		/* Index: 11 */
1934 
1935 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1936 	sd_destroypkt_for_buf,		/* Index: 12 */
1937 	sd_destroypkt_for_buf,		/* Index: 13 */
1938 	sd_destroypkt_for_buf,		/* Index: 14 */
1939 	sd_destroypkt_for_buf,		/* Index: 15 */
1940 
1941 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1942 	sd_destroypkt_for_buf,		/* Index: 16 */
1943 	sd_destroypkt_for_buf,		/* Index: 17 */
1944 	sd_destroypkt_for_buf,		/* Index: 18 */
1945 
1946 	/* Chain for USCSI commands (non-checksum targets) */
1947 	sd_destroypkt_for_uscsi,	/* Index: 19 */
1948 	sd_destroypkt_for_uscsi,	/* Index: 20 */
1949 
1950 	/* Chain for USCSI commands (checksum targets) */
1951 	sd_destroypkt_for_uscsi,	/* Index: 21 */
1952 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1953 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1954 
1955 	/* Chain for "direct" USCSI commands (all targets) */
1956 	sd_destroypkt_for_uscsi,	/* Index: 24 */
1957 
1958 	/* Chain for "direct priority" USCSI commands (all targets) */
1959 	sd_destroypkt_for_uscsi,	/* Index: 25 */
1960 
1961 };
1962 
1963 
1964 
1965 /*
1966  * Array to map a layering chain index to the appropriate chain "type".
1967  * The chain type indicates a specific property/usage of the chain.
1968  * The redundant entries are present so that the index used for accessing
1969  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1970  * with this table as well.
1971  */
1972 
1973 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
1974 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
1975 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
1976 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
1977 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
1978 						/* (for error recovery) */
1979 
1980 static int sd_chain_type_map[] = {
1981 
1982 	/* Chain for buf IO for disk drive targets (PM enabled) */
1983 	SD_CHAIN_BUFIO,			/* Index: 0 */
1984 	SD_CHAIN_BUFIO,			/* Index: 1 */
1985 	SD_CHAIN_BUFIO,			/* Index: 2 */
1986 
1987 	/* Chain for buf IO for disk drive targets (PM disabled) */
1988 	SD_CHAIN_BUFIO,			/* Index: 3 */
1989 	SD_CHAIN_BUFIO,			/* Index: 4 */
1990 
1991 	/* Chain for buf IO for removable-media targets (PM enabled) */
1992 	SD_CHAIN_BUFIO,			/* Index: 5 */
1993 	SD_CHAIN_BUFIO,			/* Index: 6 */
1994 	SD_CHAIN_BUFIO,			/* Index: 7 */
1995 	SD_CHAIN_BUFIO,			/* Index: 8 */
1996 
1997 	/* Chain for buf IO for removable-media targets (PM disabled) */
1998 	SD_CHAIN_BUFIO,			/* Index: 9 */
1999 	SD_CHAIN_BUFIO,			/* Index: 10 */
2000 	SD_CHAIN_BUFIO,			/* Index: 11 */
2001 
2002 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2003 	SD_CHAIN_BUFIO,			/* Index: 12 */
2004 	SD_CHAIN_BUFIO,			/* Index: 13 */
2005 	SD_CHAIN_BUFIO,			/* Index: 14 */
2006 	SD_CHAIN_BUFIO,			/* Index: 15 */
2007 
2008 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2009 	SD_CHAIN_BUFIO,			/* Index: 16 */
2010 	SD_CHAIN_BUFIO,			/* Index: 17 */
2011 	SD_CHAIN_BUFIO,			/* Index: 18 */
2012 
2013 	/* Chain for USCSI commands (non-checksum targets) */
2014 	SD_CHAIN_USCSI,			/* Index: 19 */
2015 	SD_CHAIN_USCSI,			/* Index: 20 */
2016 
2017 	/* Chain for USCSI commands (checksum targets) */
2018 	SD_CHAIN_USCSI,			/* Index: 21 */
2019 	SD_CHAIN_USCSI,			/* Index: 22 */
2020 	SD_CHAIN_USCSI,			/* Index: 22 */
2021 
2022 	/* Chain for "direct" USCSI commands (all targets) */
2023 	SD_CHAIN_DIRECT,		/* Index: 24 */
2024 
2025 	/* Chain for "direct priority" USCSI commands (all targets) */
2026 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2027 };
2028 
2029 
2030 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2031 #define	SD_IS_BUFIO(xp)			\
2032 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2033 
2034 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2035 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2036 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2037 
2038 
2039 
2040 /*
2041  * Struct, array, and macros to map a specific chain to the appropriate
2042  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2043  *
2044  * The sd_chain_index_map[] array is used at attach time to set the various
2045  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2046  * chain to be used with the instance. This allows different instances to use
2047  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2048  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2049  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2050  * dynamically & without the use of locking; and (2) a layer may update the
2051  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2052  * to allow for deferred processing of an IO within the same chain from a
2053  * different execution context.
2054  */
2055 
2056 struct sd_chain_index {
2057 	int	sci_iostart_index;
2058 	int	sci_iodone_index;
2059 };
2060 
2061 static struct sd_chain_index	sd_chain_index_map[] = {
2062 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2063 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2064 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2065 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2066 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2067 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2068 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2069 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2070 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2071 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2072 };
2073 
2074 
2075 /*
2076  * The following are indexes into the sd_chain_index_map[] array.
2077  */
2078 
2079 /* un->un_buf_chain_type must be set to one of these */
2080 #define	SD_CHAIN_INFO_DISK		0
2081 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2082 #define	SD_CHAIN_INFO_RMMEDIA		2
2083 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2084 #define	SD_CHAIN_INFO_CHKSUM		4
2085 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2086 
2087 /* un->un_uscsi_chain_type must be set to one of these */
2088 #define	SD_CHAIN_INFO_USCSI_CMD		6
2089 /* USCSI with PM disabled is the same as DIRECT */
2090 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2091 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2092 
2093 /* un->un_direct_chain_type must be set to one of these */
2094 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2095 
2096 /* un->un_priority_chain_type must be set to one of these */
2097 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2098 
2099 /* size for devid inquiries */
2100 #define	MAX_INQUIRY_SIZE		0xF0
2101 
2102 /*
2103  * Macros used by functions to pass a given buf(9S) struct along to the
2104  * next function in the layering chain for further processing.
2105  *
2106  * In the following macros, passing more than three arguments to the called
2107  * routines causes the optimizer for the SPARC compiler to stop doing tail
2108  * call elimination which results in significant performance degradation.
2109  */
2110 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2111 	((*(sd_iostart_chain[index]))(index, un, bp))
2112 
2113 #define	SD_BEGIN_IODONE(index, un, bp)	\
2114 	((*(sd_iodone_chain[index]))(index, un, bp))
2115 
2116 #define	SD_NEXT_IOSTART(index, un, bp)				\
2117 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2118 
2119 #define	SD_NEXT_IODONE(index, un, bp)				\
2120 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2121 
2122 /*
2123  *    Function: _init
2124  *
2125  * Description: This is the driver _init(9E) entry point.
2126  *
2127  * Return Code: Returns the value from mod_install(9F) or
2128  *		ddi_soft_state_init(9F) as appropriate.
2129  *
2130  *     Context: Called when driver module loaded.
2131  */
2132 
2133 int
2134 _init(void)
2135 {
2136 	int	err;
2137 
2138 	/* establish driver name from module name */
2139 	sd_label = mod_modname(&modlinkage);
2140 
2141 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2142 		SD_MAXUNIT);
2143 
2144 	if (err != 0) {
2145 		return (err);
2146 	}
2147 
2148 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2149 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2150 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2151 
2152 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2153 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2154 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2155 
2156 	/*
2157 	 * it's ok to init here even for fibre device
2158 	 */
2159 	sd_scsi_probe_cache_init();
2160 
2161 	sd_scsi_target_lun_init();
2162 
2163 	/*
2164 	 * Creating taskq before mod_install ensures that all callers (threads)
2165 	 * that enter the module after a successfull mod_install encounter
2166 	 * a valid taskq.
2167 	 */
2168 	sd_taskq_create();
2169 
2170 	err = mod_install(&modlinkage);
2171 	if (err != 0) {
2172 		/* delete taskq if install fails */
2173 		sd_taskq_delete();
2174 
2175 		mutex_destroy(&sd_detach_mutex);
2176 		mutex_destroy(&sd_log_mutex);
2177 		mutex_destroy(&sd_label_mutex);
2178 
2179 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2180 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2181 		cv_destroy(&sd_tr.srq_inprocess_cv);
2182 
2183 		sd_scsi_probe_cache_fini();
2184 
2185 		sd_scsi_target_lun_fini();
2186 
2187 		ddi_soft_state_fini(&sd_state);
2188 		return (err);
2189 	}
2190 
2191 	return (err);
2192 }
2193 
2194 
2195 /*
2196  *    Function: _fini
2197  *
2198  * Description: This is the driver _fini(9E) entry point.
2199  *
2200  * Return Code: Returns the value from mod_remove(9F)
2201  *
2202  *     Context: Called when driver module is unloaded.
2203  */
2204 
2205 int
2206 _fini(void)
2207 {
2208 	int err;
2209 
2210 	if ((err = mod_remove(&modlinkage)) != 0) {
2211 		return (err);
2212 	}
2213 
2214 	sd_taskq_delete();
2215 
2216 	mutex_destroy(&sd_detach_mutex);
2217 	mutex_destroy(&sd_log_mutex);
2218 	mutex_destroy(&sd_label_mutex);
2219 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2220 
2221 	sd_scsi_probe_cache_fini();
2222 
2223 	sd_scsi_target_lun_fini();
2224 
2225 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2226 	cv_destroy(&sd_tr.srq_inprocess_cv);
2227 
2228 	ddi_soft_state_fini(&sd_state);
2229 
2230 	return (err);
2231 }
2232 
2233 
2234 /*
2235  *    Function: _info
2236  *
2237  * Description: This is the driver _info(9E) entry point.
2238  *
2239  *   Arguments: modinfop - pointer to the driver modinfo structure
2240  *
2241  * Return Code: Returns the value from mod_info(9F).
2242  *
2243  *     Context: Kernel thread context
2244  */
2245 
2246 int
2247 _info(struct modinfo *modinfop)
2248 {
2249 	return (mod_info(&modlinkage, modinfop));
2250 }
2251 
2252 
2253 /*
2254  * The following routines implement the driver message logging facility.
2255  * They provide component- and level- based debug output filtering.
2256  * Output may also be restricted to messages for a single instance by
2257  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2258  * to NULL, then messages for all instances are printed.
2259  *
2260  * These routines have been cloned from each other due to the language
2261  * constraints of macros and variable argument list processing.
2262  */
2263 
2264 
2265 /*
2266  *    Function: sd_log_err
2267  *
2268  * Description: This routine is called by the SD_ERROR macro for debug
2269  *		logging of error conditions.
2270  *
2271  *   Arguments: comp - driver component being logged
2272  *		dev  - pointer to driver info structure
2273  *		fmt  - error string and format to be logged
2274  */
2275 
2276 static void
2277 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2278 {
2279 	va_list		ap;
2280 	dev_info_t	*dev;
2281 
2282 	ASSERT(un != NULL);
2283 	dev = SD_DEVINFO(un);
2284 	ASSERT(dev != NULL);
2285 
2286 	/*
2287 	 * Filter messages based on the global component and level masks.
2288 	 * Also print if un matches the value of sd_debug_un, or if
2289 	 * sd_debug_un is set to NULL.
2290 	 */
2291 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2292 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2293 		mutex_enter(&sd_log_mutex);
2294 		va_start(ap, fmt);
2295 		(void) vsprintf(sd_log_buf, fmt, ap);
2296 		va_end(ap);
2297 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2298 		mutex_exit(&sd_log_mutex);
2299 	}
2300 #ifdef SD_FAULT_INJECTION
2301 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2302 	if (un->sd_injection_mask & comp) {
2303 		mutex_enter(&sd_log_mutex);
2304 		va_start(ap, fmt);
2305 		(void) vsprintf(sd_log_buf, fmt, ap);
2306 		va_end(ap);
2307 		sd_injection_log(sd_log_buf, un);
2308 		mutex_exit(&sd_log_mutex);
2309 	}
2310 #endif
2311 }
2312 
2313 
2314 /*
2315  *    Function: sd_log_info
2316  *
2317  * Description: This routine is called by the SD_INFO macro for debug
2318  *		logging of general purpose informational conditions.
2319  *
2320  *   Arguments: comp - driver component being logged
2321  *		dev  - pointer to driver info structure
2322  *		fmt  - info string and format to be logged
2323  */
2324 
2325 static void
2326 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2327 {
2328 	va_list		ap;
2329 	dev_info_t	*dev;
2330 
2331 	ASSERT(un != NULL);
2332 	dev = SD_DEVINFO(un);
2333 	ASSERT(dev != NULL);
2334 
2335 	/*
2336 	 * Filter messages based on the global component and level masks.
2337 	 * Also print if un matches the value of sd_debug_un, or if
2338 	 * sd_debug_un is set to NULL.
2339 	 */
2340 	if ((sd_component_mask & component) &&
2341 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2342 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2343 		mutex_enter(&sd_log_mutex);
2344 		va_start(ap, fmt);
2345 		(void) vsprintf(sd_log_buf, fmt, ap);
2346 		va_end(ap);
2347 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2348 		mutex_exit(&sd_log_mutex);
2349 	}
2350 #ifdef SD_FAULT_INJECTION
2351 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2352 	if (un->sd_injection_mask & component) {
2353 		mutex_enter(&sd_log_mutex);
2354 		va_start(ap, fmt);
2355 		(void) vsprintf(sd_log_buf, fmt, ap);
2356 		va_end(ap);
2357 		sd_injection_log(sd_log_buf, un);
2358 		mutex_exit(&sd_log_mutex);
2359 	}
2360 #endif
2361 }
2362 
2363 
2364 /*
2365  *    Function: sd_log_trace
2366  *
2367  * Description: This routine is called by the SD_TRACE macro for debug
2368  *		logging of trace conditions (i.e. function entry/exit).
2369  *
2370  *   Arguments: comp - driver component being logged
2371  *		dev  - pointer to driver info structure
2372  *		fmt  - trace string and format to be logged
2373  */
2374 
2375 static void
2376 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2377 {
2378 	va_list		ap;
2379 	dev_info_t	*dev;
2380 
2381 	ASSERT(un != NULL);
2382 	dev = SD_DEVINFO(un);
2383 	ASSERT(dev != NULL);
2384 
2385 	/*
2386 	 * Filter messages based on the global component and level masks.
2387 	 * Also print if un matches the value of sd_debug_un, or if
2388 	 * sd_debug_un is set to NULL.
2389 	 */
2390 	if ((sd_component_mask & component) &&
2391 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2392 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2393 		mutex_enter(&sd_log_mutex);
2394 		va_start(ap, fmt);
2395 		(void) vsprintf(sd_log_buf, fmt, ap);
2396 		va_end(ap);
2397 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2398 		mutex_exit(&sd_log_mutex);
2399 	}
2400 #ifdef SD_FAULT_INJECTION
2401 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2402 	if (un->sd_injection_mask & component) {
2403 		mutex_enter(&sd_log_mutex);
2404 		va_start(ap, fmt);
2405 		(void) vsprintf(sd_log_buf, fmt, ap);
2406 		va_end(ap);
2407 		sd_injection_log(sd_log_buf, un);
2408 		mutex_exit(&sd_log_mutex);
2409 	}
2410 #endif
2411 }
2412 
2413 
2414 /*
2415  *    Function: sdprobe
2416  *
2417  * Description: This is the driver probe(9e) entry point function.
2418  *
2419  *   Arguments: devi - opaque device info handle
2420  *
2421  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2422  *              DDI_PROBE_FAILURE: If the probe failed.
2423  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2424  *				   but may be present in the future.
2425  */
2426 
2427 static int
2428 sdprobe(dev_info_t *devi)
2429 {
2430 	struct scsi_device	*devp;
2431 	int			rval;
2432 	int			instance;
2433 
2434 	/*
2435 	 * if it wasn't for pln, sdprobe could actually be nulldev
2436 	 * in the "__fibre" case.
2437 	 */
2438 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2439 		return (DDI_PROBE_DONTCARE);
2440 	}
2441 
2442 	devp = ddi_get_driver_private(devi);
2443 
2444 	if (devp == NULL) {
2445 		/* Ooops... nexus driver is mis-configured... */
2446 		return (DDI_PROBE_FAILURE);
2447 	}
2448 
2449 	instance = ddi_get_instance(devi);
2450 
2451 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2452 		return (DDI_PROBE_PARTIAL);
2453 	}
2454 
2455 	/*
2456 	 * Call the SCSA utility probe routine to see if we actually
2457 	 * have a target at this SCSI nexus.
2458 	 */
2459 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2460 	case SCSIPROBE_EXISTS:
2461 		switch (devp->sd_inq->inq_dtype) {
2462 		case DTYPE_DIRECT:
2463 			rval = DDI_PROBE_SUCCESS;
2464 			break;
2465 		case DTYPE_RODIRECT:
2466 			/* CDs etc. Can be removable media */
2467 			rval = DDI_PROBE_SUCCESS;
2468 			break;
2469 		case DTYPE_OPTICAL:
2470 			/*
2471 			 * Rewritable optical driver HP115AA
2472 			 * Can also be removable media
2473 			 */
2474 
2475 			/*
2476 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2477 			 * pre solaris 9 sparc sd behavior is required
2478 			 *
2479 			 * If first time through and sd_dtype_optical_bind
2480 			 * has not been set in /etc/system check properties
2481 			 */
2482 
2483 			if (sd_dtype_optical_bind  < 0) {
2484 			    sd_dtype_optical_bind = ddi_prop_get_int
2485 				(DDI_DEV_T_ANY,	devi,	0,
2486 				"optical-device-bind",	1);
2487 			}
2488 
2489 			if (sd_dtype_optical_bind == 0) {
2490 				rval = DDI_PROBE_FAILURE;
2491 			} else {
2492 				rval = DDI_PROBE_SUCCESS;
2493 			}
2494 			break;
2495 
2496 		case DTYPE_NOTPRESENT:
2497 		default:
2498 			rval = DDI_PROBE_FAILURE;
2499 			break;
2500 		}
2501 		break;
2502 	default:
2503 		rval = DDI_PROBE_PARTIAL;
2504 		break;
2505 	}
2506 
2507 	/*
2508 	 * This routine checks for resource allocation prior to freeing,
2509 	 * so it will take care of the "smart probing" case where a
2510 	 * scsi_probe() may or may not have been issued and will *not*
2511 	 * free previously-freed resources.
2512 	 */
2513 	scsi_unprobe(devp);
2514 	return (rval);
2515 }
2516 
2517 
2518 /*
2519  *    Function: sdinfo
2520  *
2521  * Description: This is the driver getinfo(9e) entry point function.
2522  * 		Given the device number, return the devinfo pointer from
2523  *		the scsi_device structure or the instance number
2524  *		associated with the dev_t.
2525  *
2526  *   Arguments: dip     - pointer to device info structure
2527  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2528  *			  DDI_INFO_DEVT2INSTANCE)
2529  *		arg     - driver dev_t
2530  *		resultp - user buffer for request response
2531  *
2532  * Return Code: DDI_SUCCESS
2533  *              DDI_FAILURE
2534  */
2535 /* ARGSUSED */
2536 static int
2537 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2538 {
2539 	struct sd_lun	*un;
2540 	dev_t		dev;
2541 	int		instance;
2542 	int		error;
2543 
2544 	switch (infocmd) {
2545 	case DDI_INFO_DEVT2DEVINFO:
2546 		dev = (dev_t)arg;
2547 		instance = SDUNIT(dev);
2548 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2549 			return (DDI_FAILURE);
2550 		}
2551 		*result = (void *) SD_DEVINFO(un);
2552 		error = DDI_SUCCESS;
2553 		break;
2554 	case DDI_INFO_DEVT2INSTANCE:
2555 		dev = (dev_t)arg;
2556 		instance = SDUNIT(dev);
2557 		*result = (void *)(uintptr_t)instance;
2558 		error = DDI_SUCCESS;
2559 		break;
2560 	default:
2561 		error = DDI_FAILURE;
2562 	}
2563 	return (error);
2564 }
2565 
2566 /*
2567  *    Function: sd_prop_op
2568  *
2569  * Description: This is the driver prop_op(9e) entry point function.
2570  *		Return the number of blocks for the partition in question
2571  *		or forward the request to the property facilities.
2572  *
2573  *   Arguments: dev       - device number
2574  *		dip       - pointer to device info structure
2575  *		prop_op   - property operator
2576  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2577  *		name      - pointer to property name
2578  *		valuep    - pointer or address of the user buffer
2579  *		lengthp   - property length
2580  *
2581  * Return Code: DDI_PROP_SUCCESS
2582  *              DDI_PROP_NOT_FOUND
2583  *              DDI_PROP_UNDEFINED
2584  *              DDI_PROP_NO_MEMORY
2585  *              DDI_PROP_BUF_TOO_SMALL
2586  */
2587 
2588 static int
2589 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2590 	char *name, caddr_t valuep, int *lengthp)
2591 {
2592 	int		instance = ddi_get_instance(dip);
2593 	struct sd_lun	*un;
2594 	uint64_t	nblocks64;
2595 
2596 	/*
2597 	 * Our dynamic properties are all device specific and size oriented.
2598 	 * Requests issued under conditions where size is valid are passed
2599 	 * to ddi_prop_op_nblocks with the size information, otherwise the
2600 	 * request is passed to ddi_prop_op. Size depends on valid geometry.
2601 	 */
2602 	un = ddi_get_soft_state(sd_state, instance);
2603 	if ((dev == DDI_DEV_T_ANY) || (un == NULL)) {
2604 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2605 		    name, valuep, lengthp));
2606 	} else if (!SD_IS_VALID_LABEL(un)) {
2607 		return (ddi_prop_op(dev, dip, prop_op, mod_flags, name,
2608 		    valuep, lengthp));
2609 	}
2610 
2611 	/* get nblocks value */
2612 	ASSERT(!mutex_owned(SD_MUTEX(un)));
2613 
2614 	(void) cmlb_partinfo(un->un_cmlbhandle, SDPART(dev),
2615 	    (diskaddr_t *)&nblocks64, NULL, NULL, NULL, (void *)SD_PATH_DIRECT);
2616 
2617 	return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags,
2618 	    name, valuep, lengthp, nblocks64));
2619 }
2620 
2621 /*
2622  * The following functions are for smart probing:
2623  * sd_scsi_probe_cache_init()
2624  * sd_scsi_probe_cache_fini()
2625  * sd_scsi_clear_probe_cache()
2626  * sd_scsi_probe_with_cache()
2627  */
2628 
2629 /*
2630  *    Function: sd_scsi_probe_cache_init
2631  *
2632  * Description: Initializes the probe response cache mutex and head pointer.
2633  *
2634  *     Context: Kernel thread context
2635  */
2636 
2637 static void
2638 sd_scsi_probe_cache_init(void)
2639 {
2640 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2641 	sd_scsi_probe_cache_head = NULL;
2642 }
2643 
2644 
2645 /*
2646  *    Function: sd_scsi_probe_cache_fini
2647  *
2648  * Description: Frees all resources associated with the probe response cache.
2649  *
2650  *     Context: Kernel thread context
2651  */
2652 
2653 static void
2654 sd_scsi_probe_cache_fini(void)
2655 {
2656 	struct sd_scsi_probe_cache *cp;
2657 	struct sd_scsi_probe_cache *ncp;
2658 
2659 	/* Clean up our smart probing linked list */
2660 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2661 		ncp = cp->next;
2662 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2663 	}
2664 	sd_scsi_probe_cache_head = NULL;
2665 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2666 }
2667 
2668 
2669 /*
2670  *    Function: sd_scsi_clear_probe_cache
2671  *
2672  * Description: This routine clears the probe response cache. This is
2673  *		done when open() returns ENXIO so that when deferred
2674  *		attach is attempted (possibly after a device has been
2675  *		turned on) we will retry the probe. Since we don't know
2676  *		which target we failed to open, we just clear the
2677  *		entire cache.
2678  *
2679  *     Context: Kernel thread context
2680  */
2681 
2682 static void
2683 sd_scsi_clear_probe_cache(void)
2684 {
2685 	struct sd_scsi_probe_cache	*cp;
2686 	int				i;
2687 
2688 	mutex_enter(&sd_scsi_probe_cache_mutex);
2689 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2690 		/*
2691 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2692 		 * force probing to be performed the next time
2693 		 * sd_scsi_probe_with_cache is called.
2694 		 */
2695 		for (i = 0; i < NTARGETS_WIDE; i++) {
2696 			cp->cache[i] = SCSIPROBE_EXISTS;
2697 		}
2698 	}
2699 	mutex_exit(&sd_scsi_probe_cache_mutex);
2700 }
2701 
2702 
2703 /*
2704  *    Function: sd_scsi_probe_with_cache
2705  *
2706  * Description: This routine implements support for a scsi device probe
2707  *		with cache. The driver maintains a cache of the target
2708  *		responses to scsi probes. If we get no response from a
2709  *		target during a probe inquiry, we remember that, and we
2710  *		avoid additional calls to scsi_probe on non-zero LUNs
2711  *		on the same target until the cache is cleared. By doing
2712  *		so we avoid the 1/4 sec selection timeout for nonzero
2713  *		LUNs. lun0 of a target is always probed.
2714  *
2715  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2716  *              waitfunc - indicates what the allocator routines should
2717  *			   do when resources are not available. This value
2718  *			   is passed on to scsi_probe() when that routine
2719  *			   is called.
2720  *
2721  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2722  *		otherwise the value returned by scsi_probe(9F).
2723  *
2724  *     Context: Kernel thread context
2725  */
2726 
2727 static int
2728 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2729 {
2730 	struct sd_scsi_probe_cache	*cp;
2731 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2732 	int		lun, tgt;
2733 
2734 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2735 	    SCSI_ADDR_PROP_LUN, 0);
2736 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2737 	    SCSI_ADDR_PROP_TARGET, -1);
2738 
2739 	/* Make sure caching enabled and target in range */
2740 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2741 		/* do it the old way (no cache) */
2742 		return (scsi_probe(devp, waitfn));
2743 	}
2744 
2745 	mutex_enter(&sd_scsi_probe_cache_mutex);
2746 
2747 	/* Find the cache for this scsi bus instance */
2748 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2749 		if (cp->pdip == pdip) {
2750 			break;
2751 		}
2752 	}
2753 
2754 	/* If we can't find a cache for this pdip, create one */
2755 	if (cp == NULL) {
2756 		int i;
2757 
2758 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2759 		    KM_SLEEP);
2760 		cp->pdip = pdip;
2761 		cp->next = sd_scsi_probe_cache_head;
2762 		sd_scsi_probe_cache_head = cp;
2763 		for (i = 0; i < NTARGETS_WIDE; i++) {
2764 			cp->cache[i] = SCSIPROBE_EXISTS;
2765 		}
2766 	}
2767 
2768 	mutex_exit(&sd_scsi_probe_cache_mutex);
2769 
2770 	/* Recompute the cache for this target if LUN zero */
2771 	if (lun == 0) {
2772 		cp->cache[tgt] = SCSIPROBE_EXISTS;
2773 	}
2774 
2775 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2776 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2777 		return (SCSIPROBE_NORESP);
2778 	}
2779 
2780 	/* Do the actual probe; save & return the result */
2781 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2782 }
2783 
2784 
2785 /*
2786  *    Function: sd_scsi_target_lun_init
2787  *
2788  * Description: Initializes the attached lun chain mutex and head pointer.
2789  *
2790  *     Context: Kernel thread context
2791  */
2792 
2793 static void
2794 sd_scsi_target_lun_init(void)
2795 {
2796 	mutex_init(&sd_scsi_target_lun_mutex, NULL, MUTEX_DRIVER, NULL);
2797 	sd_scsi_target_lun_head = NULL;
2798 }
2799 
2800 
2801 /*
2802  *    Function: sd_scsi_target_lun_fini
2803  *
2804  * Description: Frees all resources associated with the attached lun
2805  *              chain
2806  *
2807  *     Context: Kernel thread context
2808  */
2809 
2810 static void
2811 sd_scsi_target_lun_fini(void)
2812 {
2813 	struct sd_scsi_hba_tgt_lun	*cp;
2814 	struct sd_scsi_hba_tgt_lun	*ncp;
2815 
2816 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = ncp) {
2817 		ncp = cp->next;
2818 		kmem_free(cp, sizeof (struct sd_scsi_hba_tgt_lun));
2819 	}
2820 	sd_scsi_target_lun_head = NULL;
2821 	mutex_destroy(&sd_scsi_target_lun_mutex);
2822 }
2823 
2824 
2825 /*
2826  *    Function: sd_scsi_get_target_lun_count
2827  *
2828  * Description: This routine will check in the attached lun chain to see
2829  * 		how many luns are attached on the required SCSI controller
2830  * 		and target. Currently, some capabilities like tagged queue
2831  *		are supported per target based by HBA. So all luns in a
2832  *		target have the same capabilities. Based on this assumption,
2833  * 		sd should only set these capabilities once per target. This
2834  *		function is called when sd needs to decide how many luns
2835  *		already attached on a target.
2836  *
2837  *   Arguments: dip	- Pointer to the system's dev_info_t for the SCSI
2838  *			  controller device.
2839  *              target	- The target ID on the controller's SCSI bus.
2840  *
2841  * Return Code: The number of luns attached on the required target and
2842  *		controller.
2843  *		-1 if target ID is not in parallel SCSI scope or the given
2844  * 		dip is not in the chain.
2845  *
2846  *     Context: Kernel thread context
2847  */
2848 
2849 static int
2850 sd_scsi_get_target_lun_count(dev_info_t *dip, int target)
2851 {
2852 	struct sd_scsi_hba_tgt_lun	*cp;
2853 
2854 	if ((target < 0) || (target >= NTARGETS_WIDE)) {
2855 		return (-1);
2856 	}
2857 
2858 	mutex_enter(&sd_scsi_target_lun_mutex);
2859 
2860 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2861 		if (cp->pdip == dip) {
2862 			break;
2863 		}
2864 	}
2865 
2866 	mutex_exit(&sd_scsi_target_lun_mutex);
2867 
2868 	if (cp == NULL) {
2869 		return (-1);
2870 	}
2871 
2872 	return (cp->nlun[target]);
2873 }
2874 
2875 
2876 /*
2877  *    Function: sd_scsi_update_lun_on_target
2878  *
2879  * Description: This routine is used to update the attached lun chain when a
2880  *		lun is attached or detached on a target.
2881  *
2882  *   Arguments: dip     - Pointer to the system's dev_info_t for the SCSI
2883  *                        controller device.
2884  *              target  - The target ID on the controller's SCSI bus.
2885  *		flag	- Indicate the lun is attached or detached.
2886  *
2887  *     Context: Kernel thread context
2888  */
2889 
2890 static void
2891 sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag)
2892 {
2893 	struct sd_scsi_hba_tgt_lun	*cp;
2894 
2895 	mutex_enter(&sd_scsi_target_lun_mutex);
2896 
2897 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2898 		if (cp->pdip == dip) {
2899 			break;
2900 		}
2901 	}
2902 
2903 	if ((cp == NULL) && (flag == SD_SCSI_LUN_ATTACH)) {
2904 		cp = kmem_zalloc(sizeof (struct sd_scsi_hba_tgt_lun),
2905 		    KM_SLEEP);
2906 		cp->pdip = dip;
2907 		cp->next = sd_scsi_target_lun_head;
2908 		sd_scsi_target_lun_head = cp;
2909 	}
2910 
2911 	mutex_exit(&sd_scsi_target_lun_mutex);
2912 
2913 	if (cp != NULL) {
2914 		if (flag == SD_SCSI_LUN_ATTACH) {
2915 			cp->nlun[target] ++;
2916 		} else {
2917 			cp->nlun[target] --;
2918 		}
2919 	}
2920 }
2921 
2922 
2923 /*
2924  *    Function: sd_spin_up_unit
2925  *
2926  * Description: Issues the following commands to spin-up the device:
2927  *		START STOP UNIT, and INQUIRY.
2928  *
2929  *   Arguments: un - driver soft state (unit) structure
2930  *
2931  * Return Code: 0 - success
2932  *		EIO - failure
2933  *		EACCES - reservation conflict
2934  *
2935  *     Context: Kernel thread context
2936  */
2937 
2938 static int
2939 sd_spin_up_unit(struct sd_lun *un)
2940 {
2941 	size_t	resid		= 0;
2942 	int	has_conflict	= FALSE;
2943 	uchar_t *bufaddr;
2944 
2945 	ASSERT(un != NULL);
2946 
2947 	/*
2948 	 * Send a throwaway START UNIT command.
2949 	 *
2950 	 * If we fail on this, we don't care presently what precisely
2951 	 * is wrong.  EMC's arrays will also fail this with a check
2952 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
2953 	 * we don't want to fail the attach because it may become
2954 	 * "active" later.
2955 	 */
2956 	if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START, SD_PATH_DIRECT)
2957 	    == EACCES)
2958 		has_conflict = TRUE;
2959 
2960 	/*
2961 	 * Send another INQUIRY command to the target. This is necessary for
2962 	 * non-removable media direct access devices because their INQUIRY data
2963 	 * may not be fully qualified until they are spun up (perhaps via the
2964 	 * START command above).  Note: This seems to be needed for some
2965 	 * legacy devices only.) The INQUIRY command should succeed even if a
2966 	 * Reservation Conflict is present.
2967 	 */
2968 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
2969 	if (sd_send_scsi_INQUIRY(un, bufaddr, SUN_INQSIZE, 0, 0, &resid) != 0) {
2970 		kmem_free(bufaddr, SUN_INQSIZE);
2971 		return (EIO);
2972 	}
2973 
2974 	/*
2975 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
2976 	 * Note that this routine does not return a failure here even if the
2977 	 * INQUIRY command did not return any data.  This is a legacy behavior.
2978 	 */
2979 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
2980 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
2981 	}
2982 
2983 	kmem_free(bufaddr, SUN_INQSIZE);
2984 
2985 	/* If we hit a reservation conflict above, tell the caller. */
2986 	if (has_conflict == TRUE) {
2987 		return (EACCES);
2988 	}
2989 
2990 	return (0);
2991 }
2992 
2993 #ifdef _LP64
2994 /*
2995  *    Function: sd_enable_descr_sense
2996  *
2997  * Description: This routine attempts to select descriptor sense format
2998  *		using the Control mode page.  Devices that support 64 bit
2999  *		LBAs (for >2TB luns) should also implement descriptor
3000  *		sense data so we will call this function whenever we see
3001  *		a lun larger than 2TB.  If for some reason the device
3002  *		supports 64 bit LBAs but doesn't support descriptor sense
3003  *		presumably the mode select will fail.  Everything will
3004  *		continue to work normally except that we will not get
3005  *		complete sense data for commands that fail with an LBA
3006  *		larger than 32 bits.
3007  *
3008  *   Arguments: un - driver soft state (unit) structure
3009  *
3010  *     Context: Kernel thread context only
3011  */
3012 
3013 static void
3014 sd_enable_descr_sense(struct sd_lun *un)
3015 {
3016 	uchar_t			*header;
3017 	struct mode_control_scsi3 *ctrl_bufp;
3018 	size_t			buflen;
3019 	size_t			bd_len;
3020 
3021 	/*
3022 	 * Read MODE SENSE page 0xA, Control Mode Page
3023 	 */
3024 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
3025 	    sizeof (struct mode_control_scsi3);
3026 	header = kmem_zalloc(buflen, KM_SLEEP);
3027 	if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
3028 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT) != 0) {
3029 		SD_ERROR(SD_LOG_COMMON, un,
3030 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
3031 		goto eds_exit;
3032 	}
3033 
3034 	/*
3035 	 * Determine size of Block Descriptors in order to locate
3036 	 * the mode page data. ATAPI devices return 0, SCSI devices
3037 	 * should return MODE_BLK_DESC_LENGTH.
3038 	 */
3039 	bd_len  = ((struct mode_header *)header)->bdesc_length;
3040 
3041 	/* Clear the mode data length field for MODE SELECT */
3042 	((struct mode_header *)header)->length = 0;
3043 
3044 	ctrl_bufp = (struct mode_control_scsi3 *)
3045 	    (header + MODE_HEADER_LENGTH + bd_len);
3046 
3047 	/*
3048 	 * If the page length is smaller than the expected value,
3049 	 * the target device doesn't support D_SENSE. Bail out here.
3050 	 */
3051 	if (ctrl_bufp->mode_page.length <
3052 	    sizeof (struct mode_control_scsi3) - 2) {
3053 		SD_ERROR(SD_LOG_COMMON, un,
3054 		    "sd_enable_descr_sense: enable D_SENSE failed\n");
3055 		goto eds_exit;
3056 	}
3057 
3058 	/*
3059 	 * Clear PS bit for MODE SELECT
3060 	 */
3061 	ctrl_bufp->mode_page.ps = 0;
3062 
3063 	/*
3064 	 * Set D_SENSE to enable descriptor sense format.
3065 	 */
3066 	ctrl_bufp->d_sense = 1;
3067 
3068 	/*
3069 	 * Use MODE SELECT to commit the change to the D_SENSE bit
3070 	 */
3071 	if (sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
3072 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT) != 0) {
3073 		SD_INFO(SD_LOG_COMMON, un,
3074 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
3075 		goto eds_exit;
3076 	}
3077 
3078 eds_exit:
3079 	kmem_free(header, buflen);
3080 }
3081 
3082 /*
3083  *    Function: sd_reenable_dsense_task
3084  *
3085  * Description: Re-enable descriptor sense after device or bus reset
3086  *
3087  *     Context: Executes in a taskq() thread context
3088  */
3089 static void
3090 sd_reenable_dsense_task(void *arg)
3091 {
3092 	struct	sd_lun	*un = arg;
3093 
3094 	ASSERT(un != NULL);
3095 	sd_enable_descr_sense(un);
3096 }
3097 #endif /* _LP64 */
3098 
3099 /*
3100  *    Function: sd_set_mmc_caps
3101  *
3102  * Description: This routine determines if the device is MMC compliant and if
3103  *		the device supports CDDA via a mode sense of the CDVD
3104  *		capabilities mode page. Also checks if the device is a
3105  *		dvdram writable device.
3106  *
3107  *   Arguments: un - driver soft state (unit) structure
3108  *
3109  *     Context: Kernel thread context only
3110  */
3111 
3112 static void
3113 sd_set_mmc_caps(struct sd_lun *un)
3114 {
3115 	struct mode_header_grp2		*sense_mhp;
3116 	uchar_t				*sense_page;
3117 	caddr_t				buf;
3118 	int				bd_len;
3119 	int				status;
3120 	struct uscsi_cmd		com;
3121 	int				rtn;
3122 	uchar_t				*out_data_rw, *out_data_hd;
3123 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3124 
3125 	ASSERT(un != NULL);
3126 
3127 	/*
3128 	 * The flags which will be set in this function are - mmc compliant,
3129 	 * dvdram writable device, cdda support. Initialize them to FALSE
3130 	 * and if a capability is detected - it will be set to TRUE.
3131 	 */
3132 	un->un_f_mmc_cap = FALSE;
3133 	un->un_f_dvdram_writable_device = FALSE;
3134 	un->un_f_cfg_cdda = FALSE;
3135 
3136 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3137 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3138 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3139 
3140 	if (status != 0) {
3141 		/* command failed; just return */
3142 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3143 		return;
3144 	}
3145 	/*
3146 	 * If the mode sense request for the CDROM CAPABILITIES
3147 	 * page (0x2A) succeeds the device is assumed to be MMC.
3148 	 */
3149 	un->un_f_mmc_cap = TRUE;
3150 
3151 	/* Get to the page data */
3152 	sense_mhp = (struct mode_header_grp2 *)buf;
3153 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3154 	    sense_mhp->bdesc_length_lo;
3155 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3156 		/*
3157 		 * We did not get back the expected block descriptor
3158 		 * length so we cannot determine if the device supports
3159 		 * CDDA. However, we still indicate the device is MMC
3160 		 * according to the successful response to the page
3161 		 * 0x2A mode sense request.
3162 		 */
3163 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3164 		    "sd_set_mmc_caps: Mode Sense returned "
3165 		    "invalid block descriptor length\n");
3166 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3167 		return;
3168 	}
3169 
3170 	/* See if read CDDA is supported */
3171 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3172 	    bd_len);
3173 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3174 
3175 	/* See if writing DVD RAM is supported. */
3176 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3177 	if (un->un_f_dvdram_writable_device == TRUE) {
3178 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3179 		return;
3180 	}
3181 
3182 	/*
3183 	 * If the device presents DVD or CD capabilities in the mode
3184 	 * page, we can return here since a RRD will not have
3185 	 * these capabilities.
3186 	 */
3187 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3188 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3189 		return;
3190 	}
3191 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3192 
3193 	/*
3194 	 * If un->un_f_dvdram_writable_device is still FALSE,
3195 	 * check for a Removable Rigid Disk (RRD).  A RRD
3196 	 * device is identified by the features RANDOM_WRITABLE and
3197 	 * HARDWARE_DEFECT_MANAGEMENT.
3198 	 */
3199 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3200 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3201 
3202 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3203 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3204 	    RANDOM_WRITABLE, SD_PATH_STANDARD);
3205 	if (rtn != 0) {
3206 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3207 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3208 		return;
3209 	}
3210 
3211 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3212 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3213 
3214 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3215 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3216 	    HARDWARE_DEFECT_MANAGEMENT, SD_PATH_STANDARD);
3217 	if (rtn == 0) {
3218 		/*
3219 		 * We have good information, check for random writable
3220 		 * and hardware defect features.
3221 		 */
3222 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3223 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3224 			un->un_f_dvdram_writable_device = TRUE;
3225 		}
3226 	}
3227 
3228 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3229 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3230 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3231 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3232 }
3233 
3234 /*
3235  *    Function: sd_check_for_writable_cd
3236  *
3237  * Description: This routine determines if the media in the device is
3238  *		writable or not. It uses the get configuration command (0x46)
3239  *		to determine if the media is writable
3240  *
3241  *   Arguments: un - driver soft state (unit) structure
3242  *              path_flag - SD_PATH_DIRECT to use the USCSI "direct"
3243  *                           chain and the normal command waitq, or
3244  *                           SD_PATH_DIRECT_PRIORITY to use the USCSI
3245  *                           "direct" chain and bypass the normal command
3246  *                           waitq.
3247  *
3248  *     Context: Never called at interrupt context.
3249  */
3250 
3251 static void
3252 sd_check_for_writable_cd(struct sd_lun *un, int path_flag)
3253 {
3254 	struct uscsi_cmd		com;
3255 	uchar_t				*out_data;
3256 	uchar_t				*rqbuf;
3257 	int				rtn;
3258 	uchar_t				*out_data_rw, *out_data_hd;
3259 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3260 	struct mode_header_grp2		*sense_mhp;
3261 	uchar_t				*sense_page;
3262 	caddr_t				buf;
3263 	int				bd_len;
3264 	int				status;
3265 
3266 	ASSERT(un != NULL);
3267 	ASSERT(mutex_owned(SD_MUTEX(un)));
3268 
3269 	/*
3270 	 * Initialize the writable media to false, if configuration info.
3271 	 * tells us otherwise then only we will set it.
3272 	 */
3273 	un->un_f_mmc_writable_media = FALSE;
3274 	mutex_exit(SD_MUTEX(un));
3275 
3276 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3277 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3278 
3279 	rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf, SENSE_LENGTH,
3280 	    out_data, SD_PROFILE_HEADER_LEN, path_flag);
3281 
3282 	mutex_enter(SD_MUTEX(un));
3283 	if (rtn == 0) {
3284 		/*
3285 		 * We have good information, check for writable DVD.
3286 		 */
3287 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3288 			un->un_f_mmc_writable_media = TRUE;
3289 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3290 			kmem_free(rqbuf, SENSE_LENGTH);
3291 			return;
3292 		}
3293 	}
3294 
3295 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3296 	kmem_free(rqbuf, SENSE_LENGTH);
3297 
3298 	/*
3299 	 * Determine if this is a RRD type device.
3300 	 */
3301 	mutex_exit(SD_MUTEX(un));
3302 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3303 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3304 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, path_flag);
3305 	mutex_enter(SD_MUTEX(un));
3306 	if (status != 0) {
3307 		/* command failed; just return */
3308 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3309 		return;
3310 	}
3311 
3312 	/* Get to the page data */
3313 	sense_mhp = (struct mode_header_grp2 *)buf;
3314 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3315 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3316 		/*
3317 		 * We did not get back the expected block descriptor length so
3318 		 * we cannot check the mode page.
3319 		 */
3320 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3321 		    "sd_check_for_writable_cd: Mode Sense returned "
3322 		    "invalid block descriptor length\n");
3323 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3324 		return;
3325 	}
3326 
3327 	/*
3328 	 * If the device presents DVD or CD capabilities in the mode
3329 	 * page, we can return here since a RRD device will not have
3330 	 * these capabilities.
3331 	 */
3332 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3333 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3334 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3335 		return;
3336 	}
3337 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3338 
3339 	/*
3340 	 * If un->un_f_mmc_writable_media is still FALSE,
3341 	 * check for RRD type media.  A RRD device is identified
3342 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3343 	 */
3344 	mutex_exit(SD_MUTEX(un));
3345 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3346 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3347 
3348 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3349 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3350 	    RANDOM_WRITABLE, path_flag);
3351 	if (rtn != 0) {
3352 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3353 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3354 		mutex_enter(SD_MUTEX(un));
3355 		return;
3356 	}
3357 
3358 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3359 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3360 
3361 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3362 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3363 	    HARDWARE_DEFECT_MANAGEMENT, path_flag);
3364 	mutex_enter(SD_MUTEX(un));
3365 	if (rtn == 0) {
3366 		/*
3367 		 * We have good information, check for random writable
3368 		 * and hardware defect features as current.
3369 		 */
3370 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3371 		    (out_data_rw[10] & 0x1) &&
3372 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3373 		    (out_data_hd[10] & 0x1)) {
3374 			un->un_f_mmc_writable_media = TRUE;
3375 		}
3376 	}
3377 
3378 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3379 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3380 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3381 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3382 }
3383 
3384 /*
3385  *    Function: sd_read_unit_properties
3386  *
3387  * Description: The following implements a property lookup mechanism.
3388  *		Properties for particular disks (keyed on vendor, model
3389  *		and rev numbers) are sought in the sd.conf file via
3390  *		sd_process_sdconf_file(), and if not found there, are
3391  *		looked for in a list hardcoded in this driver via
3392  *		sd_process_sdconf_table() Once located the properties
3393  *		are used to update the driver unit structure.
3394  *
3395  *   Arguments: un - driver soft state (unit) structure
3396  */
3397 
3398 static void
3399 sd_read_unit_properties(struct sd_lun *un)
3400 {
3401 	/*
3402 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3403 	 * the "sd-config-list" property (from the sd.conf file) or if
3404 	 * there was not a match for the inquiry vid/pid. If this event
3405 	 * occurs the static driver configuration table is searched for
3406 	 * a match.
3407 	 */
3408 	ASSERT(un != NULL);
3409 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3410 		sd_process_sdconf_table(un);
3411 	}
3412 
3413 	/* check for LSI device */
3414 	sd_is_lsi(un);
3415 
3416 
3417 }
3418 
3419 
3420 /*
3421  *    Function: sd_process_sdconf_file
3422  *
3423  * Description: Use ddi_getlongprop to obtain the properties from the
3424  *		driver's config file (ie, sd.conf) and update the driver
3425  *		soft state structure accordingly.
3426  *
3427  *   Arguments: un - driver soft state (unit) structure
3428  *
3429  * Return Code: SD_SUCCESS - The properties were successfully set according
3430  *			     to the driver configuration file.
3431  *		SD_FAILURE - The driver config list was not obtained or
3432  *			     there was no vid/pid match. This indicates that
3433  *			     the static config table should be used.
3434  *
3435  * The config file has a property, "sd-config-list", which consists of
3436  * one or more duplets as follows:
3437  *
3438  *  sd-config-list=
3439  *	<duplet>,
3440  *	[<duplet>,]
3441  *	[<duplet>];
3442  *
3443  * The structure of each duplet is as follows:
3444  *
3445  *  <duplet>:= <vid+pid>,<data-property-name_list>
3446  *
3447  * The first entry of the duplet is the device ID string (the concatenated
3448  * vid & pid; not to be confused with a device_id).  This is defined in
3449  * the same way as in the sd_disk_table.
3450  *
3451  * The second part of the duplet is a string that identifies a
3452  * data-property-name-list. The data-property-name-list is defined as
3453  * follows:
3454  *
3455  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3456  *
3457  * The syntax of <data-property-name> depends on the <version> field.
3458  *
3459  * If version = SD_CONF_VERSION_1 we have the following syntax:
3460  *
3461  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3462  *
3463  * where the prop0 value will be used to set prop0 if bit0 set in the
3464  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3465  *
3466  */
3467 
3468 static int
3469 sd_process_sdconf_file(struct sd_lun *un)
3470 {
3471 	char	*config_list = NULL;
3472 	int	config_list_len;
3473 	int	len;
3474 	int	dupletlen = 0;
3475 	char	*vidptr;
3476 	int	vidlen;
3477 	char	*dnlist_ptr;
3478 	char	*dataname_ptr;
3479 	int	dnlist_len;
3480 	int	dataname_len;
3481 	int	*data_list;
3482 	int	data_list_len;
3483 	int	rval = SD_FAILURE;
3484 	int	i;
3485 
3486 	ASSERT(un != NULL);
3487 
3488 	/* Obtain the configuration list associated with the .conf file */
3489 	if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), DDI_PROP_DONTPASS,
3490 	    sd_config_list, (caddr_t)&config_list, &config_list_len)
3491 	    != DDI_PROP_SUCCESS) {
3492 		return (SD_FAILURE);
3493 	}
3494 
3495 	/*
3496 	 * Compare vids in each duplet to the inquiry vid - if a match is
3497 	 * made, get the data value and update the soft state structure
3498 	 * accordingly.
3499 	 *
3500 	 * Note: This algorithm is complex and difficult to maintain. It should
3501 	 * be replaced with a more robust implementation.
3502 	 */
3503 	for (len = config_list_len, vidptr = config_list; len > 0;
3504 	    vidptr += dupletlen, len -= dupletlen) {
3505 		/*
3506 		 * Note: The assumption here is that each vid entry is on
3507 		 * a unique line from its associated duplet.
3508 		 */
3509 		vidlen = dupletlen = (int)strlen(vidptr);
3510 		if ((vidlen == 0) ||
3511 		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3512 			dupletlen++;
3513 			continue;
3514 		}
3515 
3516 		/*
3517 		 * dnlist contains 1 or more blank separated
3518 		 * data-property-name entries
3519 		 */
3520 		dnlist_ptr = vidptr + vidlen + 1;
3521 		dnlist_len = (int)strlen(dnlist_ptr);
3522 		dupletlen += dnlist_len + 2;
3523 
3524 		/*
3525 		 * Set a pointer for the first data-property-name
3526 		 * entry in the list
3527 		 */
3528 		dataname_ptr = dnlist_ptr;
3529 		dataname_len = 0;
3530 
3531 		/*
3532 		 * Loop through all data-property-name entries in the
3533 		 * data-property-name-list setting the properties for each.
3534 		 */
3535 		while (dataname_len < dnlist_len) {
3536 			int version;
3537 
3538 			/*
3539 			 * Determine the length of the current
3540 			 * data-property-name entry by indexing until a
3541 			 * blank or NULL is encountered. When the space is
3542 			 * encountered reset it to a NULL for compliance
3543 			 * with ddi_getlongprop().
3544 			 */
3545 			for (i = 0; ((dataname_ptr[i] != ' ') &&
3546 			    (dataname_ptr[i] != '\0')); i++) {
3547 				;
3548 			}
3549 
3550 			dataname_len += i;
3551 			/* If not null terminated, Make it so */
3552 			if (dataname_ptr[i] == ' ') {
3553 				dataname_ptr[i] = '\0';
3554 			}
3555 			dataname_len++;
3556 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3557 			    "sd_process_sdconf_file: disk:%s, data:%s\n",
3558 			    vidptr, dataname_ptr);
3559 
3560 			/* Get the data list */
3561 			if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), 0,
3562 			    dataname_ptr, (caddr_t)&data_list, &data_list_len)
3563 			    != DDI_PROP_SUCCESS) {
3564 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3565 				    "sd_process_sdconf_file: data property (%s)"
3566 				    " has no value\n", dataname_ptr);
3567 				dataname_ptr = dnlist_ptr + dataname_len;
3568 				continue;
3569 			}
3570 
3571 			version = data_list[0];
3572 
3573 			if (version == SD_CONF_VERSION_1) {
3574 				sd_tunables values;
3575 
3576 				/* Set the properties */
3577 				if (sd_chk_vers1_data(un, data_list[1],
3578 				    &data_list[2], data_list_len, dataname_ptr)
3579 				    == SD_SUCCESS) {
3580 					sd_get_tunables_from_conf(un,
3581 					    data_list[1], &data_list[2],
3582 					    &values);
3583 					sd_set_vers1_properties(un,
3584 					    data_list[1], &values);
3585 					rval = SD_SUCCESS;
3586 				} else {
3587 					rval = SD_FAILURE;
3588 				}
3589 			} else {
3590 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3591 				    "data property %s version 0x%x is invalid.",
3592 				    dataname_ptr, version);
3593 				rval = SD_FAILURE;
3594 			}
3595 			kmem_free(data_list, data_list_len);
3596 			dataname_ptr = dnlist_ptr + dataname_len;
3597 		}
3598 	}
3599 
3600 	/* free up the memory allocated by ddi_getlongprop */
3601 	if (config_list) {
3602 		kmem_free(config_list, config_list_len);
3603 	}
3604 
3605 	return (rval);
3606 }
3607 
3608 /*
3609  *    Function: sd_get_tunables_from_conf()
3610  *
3611  *
3612  *    This function reads the data list from the sd.conf file and pulls
3613  *    the values that can have numeric values as arguments and places
3614  *    the values in the apropriate sd_tunables member.
3615  *    Since the order of the data list members varies across platforms
3616  *    This function reads them from the data list in a platform specific
3617  *    order and places them into the correct sd_tunable member that is
3618  *    a consistant across all platforms.
3619  */
3620 static void
3621 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3622     sd_tunables *values)
3623 {
3624 	int i;
3625 	int mask;
3626 
3627 	bzero(values, sizeof (sd_tunables));
3628 
3629 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3630 
3631 		mask = 1 << i;
3632 		if (mask > flags) {
3633 			break;
3634 		}
3635 
3636 		switch (mask & flags) {
3637 		case 0:	/* This mask bit not set in flags */
3638 			continue;
3639 		case SD_CONF_BSET_THROTTLE:
3640 			values->sdt_throttle = data_list[i];
3641 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3642 			    "sd_get_tunables_from_conf: throttle = %d\n",
3643 			    values->sdt_throttle);
3644 			break;
3645 		case SD_CONF_BSET_CTYPE:
3646 			values->sdt_ctype = data_list[i];
3647 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3648 			    "sd_get_tunables_from_conf: ctype = %d\n",
3649 			    values->sdt_ctype);
3650 			break;
3651 		case SD_CONF_BSET_NRR_COUNT:
3652 			values->sdt_not_rdy_retries = data_list[i];
3653 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3654 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
3655 			    values->sdt_not_rdy_retries);
3656 			break;
3657 		case SD_CONF_BSET_BSY_RETRY_COUNT:
3658 			values->sdt_busy_retries = data_list[i];
3659 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3660 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
3661 			    values->sdt_busy_retries);
3662 			break;
3663 		case SD_CONF_BSET_RST_RETRIES:
3664 			values->sdt_reset_retries = data_list[i];
3665 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3666 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
3667 			    values->sdt_reset_retries);
3668 			break;
3669 		case SD_CONF_BSET_RSV_REL_TIME:
3670 			values->sdt_reserv_rel_time = data_list[i];
3671 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3672 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
3673 			    values->sdt_reserv_rel_time);
3674 			break;
3675 		case SD_CONF_BSET_MIN_THROTTLE:
3676 			values->sdt_min_throttle = data_list[i];
3677 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3678 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
3679 			    values->sdt_min_throttle);
3680 			break;
3681 		case SD_CONF_BSET_DISKSORT_DISABLED:
3682 			values->sdt_disk_sort_dis = data_list[i];
3683 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3684 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
3685 			    values->sdt_disk_sort_dis);
3686 			break;
3687 		case SD_CONF_BSET_LUN_RESET_ENABLED:
3688 			values->sdt_lun_reset_enable = data_list[i];
3689 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3690 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
3691 			    "\n", values->sdt_lun_reset_enable);
3692 			break;
3693 		}
3694 	}
3695 }
3696 
3697 /*
3698  *    Function: sd_process_sdconf_table
3699  *
3700  * Description: Search the static configuration table for a match on the
3701  *		inquiry vid/pid and update the driver soft state structure
3702  *		according to the table property values for the device.
3703  *
3704  *		The form of a configuration table entry is:
3705  *		  <vid+pid>,<flags>,<property-data>
3706  *		  "SEAGATE ST42400N",1,63,0,0			(Fibre)
3707  *		  "SEAGATE ST42400N",1,63,0,0,0,0		(Sparc)
3708  *		  "SEAGATE ST42400N",1,63,0,0,0,0,0,0,0,0,0,0	(Intel)
3709  *
3710  *   Arguments: un - driver soft state (unit) structure
3711  */
3712 
3713 static void
3714 sd_process_sdconf_table(struct sd_lun *un)
3715 {
3716 	char	*id = NULL;
3717 	int	table_index;
3718 	int	idlen;
3719 
3720 	ASSERT(un != NULL);
3721 	for (table_index = 0; table_index < sd_disk_table_size;
3722 	    table_index++) {
3723 		id = sd_disk_table[table_index].device_id;
3724 		idlen = strlen(id);
3725 		if (idlen == 0) {
3726 			continue;
3727 		}
3728 
3729 		/*
3730 		 * The static configuration table currently does not
3731 		 * implement version 10 properties. Additionally,
3732 		 * multiple data-property-name entries are not
3733 		 * implemented in the static configuration table.
3734 		 */
3735 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
3736 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3737 			    "sd_process_sdconf_table: disk %s\n", id);
3738 			sd_set_vers1_properties(un,
3739 			    sd_disk_table[table_index].flags,
3740 			    sd_disk_table[table_index].properties);
3741 			break;
3742 		}
3743 	}
3744 }
3745 
3746 
3747 /*
3748  *    Function: sd_sdconf_id_match
3749  *
3750  * Description: This local function implements a case sensitive vid/pid
3751  *		comparison as well as the boundary cases of wild card and
3752  *		multiple blanks.
3753  *
3754  *		Note: An implicit assumption made here is that the scsi
3755  *		inquiry structure will always keep the vid, pid and
3756  *		revision strings in consecutive sequence, so they can be
3757  *		read as a single string. If this assumption is not the
3758  *		case, a separate string, to be used for the check, needs
3759  *		to be built with these strings concatenated.
3760  *
3761  *   Arguments: un - driver soft state (unit) structure
3762  *		id - table or config file vid/pid
3763  *		idlen  - length of the vid/pid (bytes)
3764  *
3765  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3766  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3767  */
3768 
3769 static int
3770 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
3771 {
3772 	struct scsi_inquiry	*sd_inq;
3773 	int 			rval = SD_SUCCESS;
3774 
3775 	ASSERT(un != NULL);
3776 	sd_inq = un->un_sd->sd_inq;
3777 	ASSERT(id != NULL);
3778 
3779 	/*
3780 	 * We use the inq_vid as a pointer to a buffer containing the
3781 	 * vid and pid and use the entire vid/pid length of the table
3782 	 * entry for the comparison. This works because the inq_pid
3783 	 * data member follows inq_vid in the scsi_inquiry structure.
3784 	 */
3785 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
3786 		/*
3787 		 * The user id string is compared to the inquiry vid/pid
3788 		 * using a case insensitive comparison and ignoring
3789 		 * multiple spaces.
3790 		 */
3791 		rval = sd_blank_cmp(un, id, idlen);
3792 		if (rval != SD_SUCCESS) {
3793 			/*
3794 			 * User id strings that start and end with a "*"
3795 			 * are a special case. These do not have a
3796 			 * specific vendor, and the product string can
3797 			 * appear anywhere in the 16 byte PID portion of
3798 			 * the inquiry data. This is a simple strstr()
3799 			 * type search for the user id in the inquiry data.
3800 			 */
3801 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
3802 				char	*pidptr = &id[1];
3803 				int	i;
3804 				int	j;
3805 				int	pidstrlen = idlen - 2;
3806 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
3807 				    pidstrlen;
3808 
3809 				if (j < 0) {
3810 					return (SD_FAILURE);
3811 				}
3812 				for (i = 0; i < j; i++) {
3813 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
3814 					    pidptr, pidstrlen) == 0) {
3815 						rval = SD_SUCCESS;
3816 						break;
3817 					}
3818 				}
3819 			}
3820 		}
3821 	}
3822 	return (rval);
3823 }
3824 
3825 
3826 /*
3827  *    Function: sd_blank_cmp
3828  *
3829  * Description: If the id string starts and ends with a space, treat
3830  *		multiple consecutive spaces as equivalent to a single
3831  *		space. For example, this causes a sd_disk_table entry
3832  *		of " NEC CDROM " to match a device's id string of
3833  *		"NEC       CDROM".
3834  *
3835  *		Note: The success exit condition for this routine is if
3836  *		the pointer to the table entry is '\0' and the cnt of
3837  *		the inquiry length is zero. This will happen if the inquiry
3838  *		string returned by the device is padded with spaces to be
3839  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
3840  *		SCSI spec states that the inquiry string is to be padded with
3841  *		spaces.
3842  *
3843  *   Arguments: un - driver soft state (unit) structure
3844  *		id - table or config file vid/pid
3845  *		idlen  - length of the vid/pid (bytes)
3846  *
3847  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3848  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3849  */
3850 
3851 static int
3852 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
3853 {
3854 	char		*p1;
3855 	char		*p2;
3856 	int		cnt;
3857 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
3858 	    sizeof (SD_INQUIRY(un)->inq_pid);
3859 
3860 	ASSERT(un != NULL);
3861 	p2 = un->un_sd->sd_inq->inq_vid;
3862 	ASSERT(id != NULL);
3863 	p1 = id;
3864 
3865 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
3866 		/*
3867 		 * Note: string p1 is terminated by a NUL but string p2
3868 		 * isn't.  The end of p2 is determined by cnt.
3869 		 */
3870 		for (;;) {
3871 			/* skip over any extra blanks in both strings */
3872 			while ((*p1 != '\0') && (*p1 == ' ')) {
3873 				p1++;
3874 			}
3875 			while ((cnt != 0) && (*p2 == ' ')) {
3876 				p2++;
3877 				cnt--;
3878 			}
3879 
3880 			/* compare the two strings */
3881 			if ((cnt == 0) ||
3882 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
3883 				break;
3884 			}
3885 			while ((cnt > 0) &&
3886 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
3887 				p1++;
3888 				p2++;
3889 				cnt--;
3890 			}
3891 		}
3892 	}
3893 
3894 	/* return SD_SUCCESS if both strings match */
3895 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
3896 }
3897 
3898 
3899 /*
3900  *    Function: sd_chk_vers1_data
3901  *
3902  * Description: Verify the version 1 device properties provided by the
3903  *		user via the configuration file
3904  *
3905  *   Arguments: un	     - driver soft state (unit) structure
3906  *		flags	     - integer mask indicating properties to be set
3907  *		prop_list    - integer list of property values
3908  *		list_len     - length of user provided data
3909  *
3910  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
3911  *		SD_FAILURE - Indicates the user provided data is invalid
3912  */
3913 
3914 static int
3915 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
3916     int list_len, char *dataname_ptr)
3917 {
3918 	int i;
3919 	int mask = 1;
3920 	int index = 0;
3921 
3922 	ASSERT(un != NULL);
3923 
3924 	/* Check for a NULL property name and list */
3925 	if (dataname_ptr == NULL) {
3926 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3927 		    "sd_chk_vers1_data: NULL data property name.");
3928 		return (SD_FAILURE);
3929 	}
3930 	if (prop_list == NULL) {
3931 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3932 		    "sd_chk_vers1_data: %s NULL data property list.",
3933 		    dataname_ptr);
3934 		return (SD_FAILURE);
3935 	}
3936 
3937 	/* Display a warning if undefined bits are set in the flags */
3938 	if (flags & ~SD_CONF_BIT_MASK) {
3939 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3940 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
3941 		    "Properties not set.",
3942 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
3943 		return (SD_FAILURE);
3944 	}
3945 
3946 	/*
3947 	 * Verify the length of the list by identifying the highest bit set
3948 	 * in the flags and validating that the property list has a length
3949 	 * up to the index of this bit.
3950 	 */
3951 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3952 		if (flags & mask) {
3953 			index++;
3954 		}
3955 		mask = 1 << i;
3956 	}
3957 	if ((list_len / sizeof (int)) < (index + 2)) {
3958 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3959 		    "sd_chk_vers1_data: "
3960 		    "Data property list %s size is incorrect. "
3961 		    "Properties not set.", dataname_ptr);
3962 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
3963 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
3964 		return (SD_FAILURE);
3965 	}
3966 	return (SD_SUCCESS);
3967 }
3968 
3969 
3970 /*
3971  *    Function: sd_set_vers1_properties
3972  *
3973  * Description: Set version 1 device properties based on a property list
3974  *		retrieved from the driver configuration file or static
3975  *		configuration table. Version 1 properties have the format:
3976  *
3977  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3978  *
3979  *		where the prop0 value will be used to set prop0 if bit0
3980  *		is set in the flags
3981  *
3982  *   Arguments: un	     - driver soft state (unit) structure
3983  *		flags	     - integer mask indicating properties to be set
3984  *		prop_list    - integer list of property values
3985  */
3986 
3987 static void
3988 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
3989 {
3990 	ASSERT(un != NULL);
3991 
3992 	/*
3993 	 * Set the flag to indicate cache is to be disabled. An attempt
3994 	 * to disable the cache via sd_cache_control() will be made
3995 	 * later during attach once the basic initialization is complete.
3996 	 */
3997 	if (flags & SD_CONF_BSET_NOCACHE) {
3998 		un->un_f_opt_disable_cache = TRUE;
3999 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4000 		    "sd_set_vers1_properties: caching disabled flag set\n");
4001 	}
4002 
4003 	/* CD-specific configuration parameters */
4004 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
4005 		un->un_f_cfg_playmsf_bcd = TRUE;
4006 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4007 		    "sd_set_vers1_properties: playmsf_bcd set\n");
4008 	}
4009 	if (flags & SD_CONF_BSET_READSUB_BCD) {
4010 		un->un_f_cfg_readsub_bcd = TRUE;
4011 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4012 		    "sd_set_vers1_properties: readsub_bcd set\n");
4013 	}
4014 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
4015 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
4016 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4017 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
4018 	}
4019 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
4020 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
4021 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4022 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
4023 	}
4024 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
4025 		un->un_f_cfg_no_read_header = TRUE;
4026 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4027 			    "sd_set_vers1_properties: no_read_header set\n");
4028 	}
4029 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
4030 		un->un_f_cfg_read_cd_xd4 = TRUE;
4031 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4032 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
4033 	}
4034 
4035 	/* Support for devices which do not have valid/unique serial numbers */
4036 	if (flags & SD_CONF_BSET_FAB_DEVID) {
4037 		un->un_f_opt_fab_devid = TRUE;
4038 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4039 		    "sd_set_vers1_properties: fab_devid bit set\n");
4040 	}
4041 
4042 	/* Support for user throttle configuration */
4043 	if (flags & SD_CONF_BSET_THROTTLE) {
4044 		ASSERT(prop_list != NULL);
4045 		un->un_saved_throttle = un->un_throttle =
4046 		    prop_list->sdt_throttle;
4047 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4048 		    "sd_set_vers1_properties: throttle set to %d\n",
4049 		    prop_list->sdt_throttle);
4050 	}
4051 
4052 	/* Set the per disk retry count according to the conf file or table. */
4053 	if (flags & SD_CONF_BSET_NRR_COUNT) {
4054 		ASSERT(prop_list != NULL);
4055 		if (prop_list->sdt_not_rdy_retries) {
4056 			un->un_notready_retry_count =
4057 				prop_list->sdt_not_rdy_retries;
4058 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4059 			    "sd_set_vers1_properties: not ready retry count"
4060 			    " set to %d\n", un->un_notready_retry_count);
4061 		}
4062 	}
4063 
4064 	/* The controller type is reported for generic disk driver ioctls */
4065 	if (flags & SD_CONF_BSET_CTYPE) {
4066 		ASSERT(prop_list != NULL);
4067 		switch (prop_list->sdt_ctype) {
4068 		case CTYPE_CDROM:
4069 			un->un_ctype = prop_list->sdt_ctype;
4070 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4071 			    "sd_set_vers1_properties: ctype set to "
4072 			    "CTYPE_CDROM\n");
4073 			break;
4074 		case CTYPE_CCS:
4075 			un->un_ctype = prop_list->sdt_ctype;
4076 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4077 				"sd_set_vers1_properties: ctype set to "
4078 				"CTYPE_CCS\n");
4079 			break;
4080 		case CTYPE_ROD:		/* RW optical */
4081 			un->un_ctype = prop_list->sdt_ctype;
4082 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4083 			    "sd_set_vers1_properties: ctype set to "
4084 			    "CTYPE_ROD\n");
4085 			break;
4086 		default:
4087 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4088 			    "sd_set_vers1_properties: Could not set "
4089 			    "invalid ctype value (%d)",
4090 			    prop_list->sdt_ctype);
4091 		}
4092 	}
4093 
4094 	/* Purple failover timeout */
4095 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4096 		ASSERT(prop_list != NULL);
4097 		un->un_busy_retry_count =
4098 			prop_list->sdt_busy_retries;
4099 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4100 		    "sd_set_vers1_properties: "
4101 		    "busy retry count set to %d\n",
4102 		    un->un_busy_retry_count);
4103 	}
4104 
4105 	/* Purple reset retry count */
4106 	if (flags & SD_CONF_BSET_RST_RETRIES) {
4107 		ASSERT(prop_list != NULL);
4108 		un->un_reset_retry_count =
4109 			prop_list->sdt_reset_retries;
4110 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4111 		    "sd_set_vers1_properties: "
4112 		    "reset retry count set to %d\n",
4113 		    un->un_reset_retry_count);
4114 	}
4115 
4116 	/* Purple reservation release timeout */
4117 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4118 		ASSERT(prop_list != NULL);
4119 		un->un_reserve_release_time =
4120 			prop_list->sdt_reserv_rel_time;
4121 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4122 		    "sd_set_vers1_properties: "
4123 		    "reservation release timeout set to %d\n",
4124 		    un->un_reserve_release_time);
4125 	}
4126 
4127 	/*
4128 	 * Driver flag telling the driver to verify that no commands are pending
4129 	 * for a device before issuing a Test Unit Ready. This is a workaround
4130 	 * for a firmware bug in some Seagate eliteI drives.
4131 	 */
4132 	if (flags & SD_CONF_BSET_TUR_CHECK) {
4133 		un->un_f_cfg_tur_check = TRUE;
4134 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4135 		    "sd_set_vers1_properties: tur queue check set\n");
4136 	}
4137 
4138 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4139 		un->un_min_throttle = prop_list->sdt_min_throttle;
4140 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4141 		    "sd_set_vers1_properties: min throttle set to %d\n",
4142 		    un->un_min_throttle);
4143 	}
4144 
4145 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4146 		un->un_f_disksort_disabled =
4147 		    (prop_list->sdt_disk_sort_dis != 0) ?
4148 		    TRUE : FALSE;
4149 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4150 		    "sd_set_vers1_properties: disksort disabled "
4151 		    "flag set to %d\n",
4152 		    prop_list->sdt_disk_sort_dis);
4153 	}
4154 
4155 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4156 		un->un_f_lun_reset_enabled =
4157 		    (prop_list->sdt_lun_reset_enable != 0) ?
4158 		    TRUE : FALSE;
4159 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4160 		    "sd_set_vers1_properties: lun reset enabled "
4161 		    "flag set to %d\n",
4162 		    prop_list->sdt_lun_reset_enable);
4163 	}
4164 
4165 	/*
4166 	 * Validate the throttle values.
4167 	 * If any of the numbers are invalid, set everything to defaults.
4168 	 */
4169 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4170 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4171 	    (un->un_min_throttle > un->un_throttle)) {
4172 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4173 		un->un_min_throttle = sd_min_throttle;
4174 	}
4175 }
4176 
4177 /*
4178  *   Function: sd_is_lsi()
4179  *
4180  *   Description: Check for lsi devices, step throught the static device
4181  *	table to match vid/pid.
4182  *
4183  *   Args: un - ptr to sd_lun
4184  *
4185  *   Notes:  When creating new LSI property, need to add the new LSI property
4186  *		to this function.
4187  */
4188 static void
4189 sd_is_lsi(struct sd_lun *un)
4190 {
4191 	char	*id = NULL;
4192 	int	table_index;
4193 	int	idlen;
4194 	void	*prop;
4195 
4196 	ASSERT(un != NULL);
4197 	for (table_index = 0; table_index < sd_disk_table_size;
4198 	    table_index++) {
4199 		id = sd_disk_table[table_index].device_id;
4200 		idlen = strlen(id);
4201 		if (idlen == 0) {
4202 			continue;
4203 		}
4204 
4205 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4206 			prop = sd_disk_table[table_index].properties;
4207 			if (prop == &lsi_properties ||
4208 			    prop == &lsi_oem_properties ||
4209 			    prop == &lsi_properties_scsi ||
4210 			    prop == &symbios_properties) {
4211 				un->un_f_cfg_is_lsi = TRUE;
4212 			}
4213 			break;
4214 		}
4215 	}
4216 }
4217 
4218 /*
4219  *    Function: sd_get_physical_geometry
4220  *
4221  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4222  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4223  *		target, and use this information to initialize the physical
4224  *		geometry cache specified by pgeom_p.
4225  *
4226  *		MODE SENSE is an optional command, so failure in this case
4227  *		does not necessarily denote an error. We want to use the
4228  *		MODE SENSE commands to derive the physical geometry of the
4229  *		device, but if either command fails, the logical geometry is
4230  *		used as the fallback for disk label geometry in cmlb.
4231  *
4232  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4233  *		have already been initialized for the current target and
4234  *		that the current values be passed as args so that we don't
4235  *		end up ever trying to use -1 as a valid value. This could
4236  *		happen if either value is reset while we're not holding
4237  *		the mutex.
4238  *
4239  *   Arguments: un - driver soft state (unit) structure
4240  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4241  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4242  *			to use the USCSI "direct" chain and bypass the normal
4243  *			command waitq.
4244  *
4245  *     Context: Kernel thread only (can sleep).
4246  */
4247 
4248 static int
4249 sd_get_physical_geometry(struct sd_lun *un, cmlb_geom_t *pgeom_p,
4250 	diskaddr_t capacity, int lbasize, int path_flag)
4251 {
4252 	struct	mode_format	*page3p;
4253 	struct	mode_geometry	*page4p;
4254 	struct	mode_header	*headerp;
4255 	int	sector_size;
4256 	int	nsect;
4257 	int	nhead;
4258 	int	ncyl;
4259 	int	intrlv;
4260 	int	spc;
4261 	diskaddr_t	modesense_capacity;
4262 	int	rpm;
4263 	int	bd_len;
4264 	int	mode_header_length;
4265 	uchar_t	*p3bufp;
4266 	uchar_t	*p4bufp;
4267 	int	cdbsize;
4268 	int 	ret = EIO;
4269 
4270 	ASSERT(un != NULL);
4271 
4272 	if (lbasize == 0) {
4273 		if (ISCD(un)) {
4274 			lbasize = 2048;
4275 		} else {
4276 			lbasize = un->un_sys_blocksize;
4277 		}
4278 	}
4279 	pgeom_p->g_secsize = (unsigned short)lbasize;
4280 
4281 	/*
4282 	 * If the unit is a cd/dvd drive MODE SENSE page three
4283 	 * and MODE SENSE page four are reserved (see SBC spec
4284 	 * and MMC spec). To prevent soft errors just return
4285 	 * using the default LBA size.
4286 	 */
4287 	if (ISCD(un))
4288 		return (ret);
4289 
4290 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4291 
4292 	/*
4293 	 * Retrieve MODE SENSE page 3 - Format Device Page
4294 	 */
4295 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4296 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p3bufp,
4297 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag)
4298 	    != 0) {
4299 		SD_ERROR(SD_LOG_COMMON, un,
4300 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4301 		goto page3_exit;
4302 	}
4303 
4304 	/*
4305 	 * Determine size of Block Descriptors in order to locate the mode
4306 	 * page data.  ATAPI devices return 0, SCSI devices should return
4307 	 * MODE_BLK_DESC_LENGTH.
4308 	 */
4309 	headerp = (struct mode_header *)p3bufp;
4310 	if (un->un_f_cfg_is_atapi == TRUE) {
4311 		struct mode_header_grp2 *mhp =
4312 		    (struct mode_header_grp2 *)headerp;
4313 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4314 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4315 	} else {
4316 		mode_header_length = MODE_HEADER_LENGTH;
4317 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4318 	}
4319 
4320 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4321 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4322 		    "received unexpected bd_len of %d, page3\n", bd_len);
4323 		goto page3_exit;
4324 	}
4325 
4326 	page3p = (struct mode_format *)
4327 	    ((caddr_t)headerp + mode_header_length + bd_len);
4328 
4329 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4330 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4331 		    "mode sense pg3 code mismatch %d\n",
4332 		    page3p->mode_page.code);
4333 		goto page3_exit;
4334 	}
4335 
4336 	/*
4337 	 * Use this physical geometry data only if BOTH MODE SENSE commands
4338 	 * complete successfully; otherwise, revert to the logical geometry.
4339 	 * So, we need to save everything in temporary variables.
4340 	 */
4341 	sector_size = BE_16(page3p->data_bytes_sect);
4342 
4343 	/*
4344 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
4345 	 */
4346 	if (sector_size == 0) {
4347 		sector_size = un->un_sys_blocksize;
4348 	} else {
4349 		sector_size &= ~(un->un_sys_blocksize - 1);
4350 	}
4351 
4352 	nsect  = BE_16(page3p->sect_track);
4353 	intrlv = BE_16(page3p->interleave);
4354 
4355 	SD_INFO(SD_LOG_COMMON, un,
4356 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
4357 	SD_INFO(SD_LOG_COMMON, un,
4358 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
4359 	    page3p->mode_page.code, nsect, sector_size);
4360 	SD_INFO(SD_LOG_COMMON, un,
4361 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
4362 	    BE_16(page3p->track_skew),
4363 	    BE_16(page3p->cylinder_skew));
4364 
4365 
4366 	/*
4367 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
4368 	 */
4369 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
4370 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p4bufp,
4371 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag)
4372 	    != 0) {
4373 		SD_ERROR(SD_LOG_COMMON, un,
4374 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
4375 		goto page4_exit;
4376 	}
4377 
4378 	/*
4379 	 * Determine size of Block Descriptors in order to locate the mode
4380 	 * page data.  ATAPI devices return 0, SCSI devices should return
4381 	 * MODE_BLK_DESC_LENGTH.
4382 	 */
4383 	headerp = (struct mode_header *)p4bufp;
4384 	if (un->un_f_cfg_is_atapi == TRUE) {
4385 		struct mode_header_grp2 *mhp =
4386 		    (struct mode_header_grp2 *)headerp;
4387 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4388 	} else {
4389 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4390 	}
4391 
4392 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4393 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4394 		    "received unexpected bd_len of %d, page4\n", bd_len);
4395 		goto page4_exit;
4396 	}
4397 
4398 	page4p = (struct mode_geometry *)
4399 	    ((caddr_t)headerp + mode_header_length + bd_len);
4400 
4401 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
4402 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4403 		    "mode sense pg4 code mismatch %d\n",
4404 		    page4p->mode_page.code);
4405 		goto page4_exit;
4406 	}
4407 
4408 	/*
4409 	 * Stash the data now, after we know that both commands completed.
4410 	 */
4411 
4412 
4413 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
4414 	spc   = nhead * nsect;
4415 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
4416 	rpm   = BE_16(page4p->rpm);
4417 
4418 	modesense_capacity = spc * ncyl;
4419 
4420 	SD_INFO(SD_LOG_COMMON, un,
4421 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
4422 	SD_INFO(SD_LOG_COMMON, un,
4423 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
4424 	SD_INFO(SD_LOG_COMMON, un,
4425 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
4426 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
4427 	    (void *)pgeom_p, capacity);
4428 
4429 	/*
4430 	 * Compensate if the drive's geometry is not rectangular, i.e.,
4431 	 * the product of C * H * S returned by MODE SENSE >= that returned
4432 	 * by read capacity. This is an idiosyncrasy of the original x86
4433 	 * disk subsystem.
4434 	 */
4435 	if (modesense_capacity >= capacity) {
4436 		SD_INFO(SD_LOG_COMMON, un,
4437 		    "sd_get_physical_geometry: adjusting acyl; "
4438 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
4439 		    (modesense_capacity - capacity + spc - 1) / spc);
4440 		if (sector_size != 0) {
4441 			/* 1243403: NEC D38x7 drives don't support sec size */
4442 			pgeom_p->g_secsize = (unsigned short)sector_size;
4443 		}
4444 		pgeom_p->g_nsect    = (unsigned short)nsect;
4445 		pgeom_p->g_nhead    = (unsigned short)nhead;
4446 		pgeom_p->g_capacity = capacity;
4447 		pgeom_p->g_acyl	    =
4448 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
4449 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
4450 	}
4451 
4452 	pgeom_p->g_rpm    = (unsigned short)rpm;
4453 	pgeom_p->g_intrlv = (unsigned short)intrlv;
4454 	ret = 0;
4455 
4456 	SD_INFO(SD_LOG_COMMON, un,
4457 	    "sd_get_physical_geometry: mode sense geometry:\n");
4458 	SD_INFO(SD_LOG_COMMON, un,
4459 	    "   nsect: %d; sector size: %d; interlv: %d\n",
4460 	    nsect, sector_size, intrlv);
4461 	SD_INFO(SD_LOG_COMMON, un,
4462 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
4463 	    nhead, ncyl, rpm, modesense_capacity);
4464 	SD_INFO(SD_LOG_COMMON, un,
4465 	    "sd_get_physical_geometry: (cached)\n");
4466 	SD_INFO(SD_LOG_COMMON, un,
4467 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4468 	    pgeom_p->g_ncyl,  pgeom_p->g_acyl,
4469 	    pgeom_p->g_nhead, pgeom_p->g_nsect);
4470 	SD_INFO(SD_LOG_COMMON, un,
4471 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
4472 	    pgeom_p->g_secsize, pgeom_p->g_capacity,
4473 	    pgeom_p->g_intrlv, pgeom_p->g_rpm);
4474 
4475 page4_exit:
4476 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
4477 page3_exit:
4478 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
4479 
4480 	return (ret);
4481 }
4482 
4483 /*
4484  *    Function: sd_get_virtual_geometry
4485  *
4486  * Description: Ask the controller to tell us about the target device.
4487  *
4488  *   Arguments: un - pointer to softstate
4489  *		capacity - disk capacity in #blocks
4490  *		lbasize - disk block size in bytes
4491  *
4492  *     Context: Kernel thread only
4493  */
4494 
4495 static int
4496 sd_get_virtual_geometry(struct sd_lun *un, cmlb_geom_t *lgeom_p,
4497     diskaddr_t capacity, int lbasize)
4498 {
4499 	uint_t	geombuf;
4500 	int	spc;
4501 
4502 	ASSERT(un != NULL);
4503 
4504 	/* Set sector size, and total number of sectors */
4505 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
4506 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
4507 
4508 	/* Let the HBA tell us its geometry */
4509 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
4510 
4511 	/* A value of -1 indicates an undefined "geometry" property */
4512 	if (geombuf == (-1)) {
4513 		return (EINVAL);
4514 	}
4515 
4516 	/* Initialize the logical geometry cache. */
4517 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
4518 	lgeom_p->g_nsect   = geombuf & 0xffff;
4519 	lgeom_p->g_secsize = un->un_sys_blocksize;
4520 
4521 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
4522 
4523 	/*
4524 	 * Note: The driver originally converted the capacity value from
4525 	 * target blocks to system blocks. However, the capacity value passed
4526 	 * to this routine is already in terms of system blocks (this scaling
4527 	 * is done when the READ CAPACITY command is issued and processed).
4528 	 * This 'error' may have gone undetected because the usage of g_ncyl
4529 	 * (which is based upon g_capacity) is very limited within the driver
4530 	 */
4531 	lgeom_p->g_capacity = capacity;
4532 
4533 	/*
4534 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
4535 	 * hba may return zero values if the device has been removed.
4536 	 */
4537 	if (spc == 0) {
4538 		lgeom_p->g_ncyl = 0;
4539 	} else {
4540 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
4541 	}
4542 	lgeom_p->g_acyl = 0;
4543 
4544 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
4545 	return (0);
4546 
4547 }
4548 /*
4549  *    Function: sd_update_block_info
4550  *
4551  * Description: Calculate a byte count to sector count bitshift value
4552  *		from sector size.
4553  *
4554  *   Arguments: un: unit struct.
4555  *		lbasize: new target sector size
4556  *		capacity: new target capacity, ie. block count
4557  *
4558  *     Context: Kernel thread context
4559  */
4560 
4561 static void
4562 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
4563 {
4564 	if (lbasize != 0) {
4565 		un->un_tgt_blocksize = lbasize;
4566 		un->un_f_tgt_blocksize_is_valid	= TRUE;
4567 	}
4568 
4569 	if (capacity != 0) {
4570 		un->un_blockcount		= capacity;
4571 		un->un_f_blockcount_is_valid	= TRUE;
4572 	}
4573 }
4574 
4575 
4576 /*
4577  *    Function: sd_register_devid
4578  *
4579  * Description: This routine will obtain the device id information from the
4580  *		target, obtain the serial number, and register the device
4581  *		id with the ddi framework.
4582  *
4583  *   Arguments: devi - the system's dev_info_t for the device.
4584  *		un - driver soft state (unit) structure
4585  *		reservation_flag - indicates if a reservation conflict
4586  *		occurred during attach
4587  *
4588  *     Context: Kernel Thread
4589  */
4590 static void
4591 sd_register_devid(struct sd_lun *un, dev_info_t *devi, int reservation_flag)
4592 {
4593 	int		rval		= 0;
4594 	uchar_t		*inq80		= NULL;
4595 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
4596 	size_t		inq80_resid	= 0;
4597 	uchar_t		*inq83		= NULL;
4598 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
4599 	size_t		inq83_resid	= 0;
4600 
4601 	ASSERT(un != NULL);
4602 	ASSERT(mutex_owned(SD_MUTEX(un)));
4603 	ASSERT((SD_DEVINFO(un)) == devi);
4604 
4605 	/*
4606 	 * This is the case of antiquated Sun disk drives that have the
4607 	 * FAB_DEVID property set in the disk_table.  These drives
4608 	 * manage the devid's by storing them in last 2 available sectors
4609 	 * on the drive and have them fabricated by the ddi layer by calling
4610 	 * ddi_devid_init and passing the DEVID_FAB flag.
4611 	 */
4612 	if (un->un_f_opt_fab_devid == TRUE) {
4613 		/*
4614 		 * Depending on EINVAL isn't reliable, since a reserved disk
4615 		 * may result in invalid geometry, so check to make sure a
4616 		 * reservation conflict did not occur during attach.
4617 		 */
4618 		if ((sd_get_devid(un) == EINVAL) &&
4619 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
4620 			/*
4621 			 * The devid is invalid AND there is no reservation
4622 			 * conflict.  Fabricate a new devid.
4623 			 */
4624 			(void) sd_create_devid(un);
4625 		}
4626 
4627 		/* Register the devid if it exists */
4628 		if (un->un_devid != NULL) {
4629 			(void) ddi_devid_register(SD_DEVINFO(un),
4630 			    un->un_devid);
4631 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4632 			    "sd_register_devid: Devid Fabricated\n");
4633 		}
4634 		return;
4635 	}
4636 
4637 	/*
4638 	 * We check the availibility of the World Wide Name (0x83) and Unit
4639 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
4640 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
4641 	 * 0x83 is availible, that is the best choice.  Our next choice is
4642 	 * 0x80.  If neither are availible, we munge the devid from the device
4643 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
4644 	 * to fabricate a devid for non-Sun qualified disks.
4645 	 */
4646 	if (sd_check_vpd_page_support(un) == 0) {
4647 		/* collect page 80 data if available */
4648 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
4649 
4650 			mutex_exit(SD_MUTEX(un));
4651 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
4652 			rval = sd_send_scsi_INQUIRY(un, inq80, inq80_len,
4653 			    0x01, 0x80, &inq80_resid);
4654 
4655 			if (rval != 0) {
4656 				kmem_free(inq80, inq80_len);
4657 				inq80 = NULL;
4658 				inq80_len = 0;
4659 			}
4660 			mutex_enter(SD_MUTEX(un));
4661 		}
4662 
4663 		/* collect page 83 data if available */
4664 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
4665 			mutex_exit(SD_MUTEX(un));
4666 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
4667 			rval = sd_send_scsi_INQUIRY(un, inq83, inq83_len,
4668 			    0x01, 0x83, &inq83_resid);
4669 
4670 			if (rval != 0) {
4671 				kmem_free(inq83, inq83_len);
4672 				inq83 = NULL;
4673 				inq83_len = 0;
4674 			}
4675 			mutex_enter(SD_MUTEX(un));
4676 		}
4677 	}
4678 
4679 	/* encode best devid possible based on data available */
4680 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
4681 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
4682 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
4683 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
4684 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
4685 
4686 		/* devid successfully encoded, register devid */
4687 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
4688 
4689 	} else {
4690 		/*
4691 		 * Unable to encode a devid based on data available.
4692 		 * This is not a Sun qualified disk.  Older Sun disk
4693 		 * drives that have the SD_FAB_DEVID property
4694 		 * set in the disk_table and non Sun qualified
4695 		 * disks are treated in the same manner.  These
4696 		 * drives manage the devid's by storing them in
4697 		 * last 2 available sectors on the drive and
4698 		 * have them fabricated by the ddi layer by
4699 		 * calling ddi_devid_init and passing the
4700 		 * DEVID_FAB flag.
4701 		 * Create a fabricate devid only if there's no
4702 		 * fabricate devid existed.
4703 		 */
4704 		if (sd_get_devid(un) == EINVAL) {
4705 			(void) sd_create_devid(un);
4706 		}
4707 		un->un_f_opt_fab_devid = TRUE;
4708 
4709 		/* Register the devid if it exists */
4710 		if (un->un_devid != NULL) {
4711 			(void) ddi_devid_register(SD_DEVINFO(un),
4712 			    un->un_devid);
4713 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4714 			    "sd_register_devid: devid fabricated using "
4715 			    "ddi framework\n");
4716 		}
4717 	}
4718 
4719 	/* clean up resources */
4720 	if (inq80 != NULL) {
4721 		kmem_free(inq80, inq80_len);
4722 	}
4723 	if (inq83 != NULL) {
4724 		kmem_free(inq83, inq83_len);
4725 	}
4726 }
4727 
4728 
4729 
4730 /*
4731  *    Function: sd_get_devid
4732  *
4733  * Description: This routine will return 0 if a valid device id has been
4734  *		obtained from the target and stored in the soft state. If a
4735  *		valid device id has not been previously read and stored, a
4736  *		read attempt will be made.
4737  *
4738  *   Arguments: un - driver soft state (unit) structure
4739  *
4740  * Return Code: 0 if we successfully get the device id
4741  *
4742  *     Context: Kernel Thread
4743  */
4744 
4745 static int
4746 sd_get_devid(struct sd_lun *un)
4747 {
4748 	struct dk_devid		*dkdevid;
4749 	ddi_devid_t		tmpid;
4750 	uint_t			*ip;
4751 	size_t			sz;
4752 	diskaddr_t		blk;
4753 	int			status;
4754 	int			chksum;
4755 	int			i;
4756 	size_t			buffer_size;
4757 
4758 	ASSERT(un != NULL);
4759 	ASSERT(mutex_owned(SD_MUTEX(un)));
4760 
4761 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
4762 	    un);
4763 
4764 	if (un->un_devid != NULL) {
4765 		return (0);
4766 	}
4767 
4768 	mutex_exit(SD_MUTEX(un));
4769 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
4770 	    (void *)SD_PATH_DIRECT) != 0) {
4771 		mutex_enter(SD_MUTEX(un));
4772 		return (EINVAL);
4773 	}
4774 
4775 	/*
4776 	 * Read and verify device id, stored in the reserved cylinders at the
4777 	 * end of the disk. Backup label is on the odd sectors of the last
4778 	 * track of the last cylinder. Device id will be on track of the next
4779 	 * to last cylinder.
4780 	 */
4781 	mutex_enter(SD_MUTEX(un));
4782 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
4783 	mutex_exit(SD_MUTEX(un));
4784 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
4785 	status = sd_send_scsi_READ(un, dkdevid, buffer_size, blk,
4786 	    SD_PATH_DIRECT);
4787 	if (status != 0) {
4788 		goto error;
4789 	}
4790 
4791 	/* Validate the revision */
4792 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
4793 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
4794 		status = EINVAL;
4795 		goto error;
4796 	}
4797 
4798 	/* Calculate the checksum */
4799 	chksum = 0;
4800 	ip = (uint_t *)dkdevid;
4801 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
4802 	    i++) {
4803 		chksum ^= ip[i];
4804 	}
4805 
4806 	/* Compare the checksums */
4807 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
4808 		status = EINVAL;
4809 		goto error;
4810 	}
4811 
4812 	/* Validate the device id */
4813 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
4814 		status = EINVAL;
4815 		goto error;
4816 	}
4817 
4818 	/*
4819 	 * Store the device id in the driver soft state
4820 	 */
4821 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
4822 	tmpid = kmem_alloc(sz, KM_SLEEP);
4823 
4824 	mutex_enter(SD_MUTEX(un));
4825 
4826 	un->un_devid = tmpid;
4827 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
4828 
4829 	kmem_free(dkdevid, buffer_size);
4830 
4831 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
4832 
4833 	return (status);
4834 error:
4835 	mutex_enter(SD_MUTEX(un));
4836 	kmem_free(dkdevid, buffer_size);
4837 	return (status);
4838 }
4839 
4840 
4841 /*
4842  *    Function: sd_create_devid
4843  *
4844  * Description: This routine will fabricate the device id and write it
4845  *		to the disk.
4846  *
4847  *   Arguments: un - driver soft state (unit) structure
4848  *
4849  * Return Code: value of the fabricated device id
4850  *
4851  *     Context: Kernel Thread
4852  */
4853 
4854 static ddi_devid_t
4855 sd_create_devid(struct sd_lun *un)
4856 {
4857 	ASSERT(un != NULL);
4858 
4859 	/* Fabricate the devid */
4860 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
4861 	    == DDI_FAILURE) {
4862 		return (NULL);
4863 	}
4864 
4865 	/* Write the devid to disk */
4866 	if (sd_write_deviceid(un) != 0) {
4867 		ddi_devid_free(un->un_devid);
4868 		un->un_devid = NULL;
4869 	}
4870 
4871 	return (un->un_devid);
4872 }
4873 
4874 
4875 /*
4876  *    Function: sd_write_deviceid
4877  *
4878  * Description: This routine will write the device id to the disk
4879  *		reserved sector.
4880  *
4881  *   Arguments: un - driver soft state (unit) structure
4882  *
4883  * Return Code: EINVAL
4884  *		value returned by sd_send_scsi_cmd
4885  *
4886  *     Context: Kernel Thread
4887  */
4888 
4889 static int
4890 sd_write_deviceid(struct sd_lun *un)
4891 {
4892 	struct dk_devid		*dkdevid;
4893 	diskaddr_t		blk;
4894 	uint_t			*ip, chksum;
4895 	int			status;
4896 	int			i;
4897 
4898 	ASSERT(mutex_owned(SD_MUTEX(un)));
4899 
4900 	mutex_exit(SD_MUTEX(un));
4901 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
4902 	    (void *)SD_PATH_DIRECT) != 0) {
4903 		mutex_enter(SD_MUTEX(un));
4904 		return (-1);
4905 	}
4906 
4907 
4908 	/* Allocate the buffer */
4909 	dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
4910 
4911 	/* Fill in the revision */
4912 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
4913 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
4914 
4915 	/* Copy in the device id */
4916 	mutex_enter(SD_MUTEX(un));
4917 	bcopy(un->un_devid, &dkdevid->dkd_devid,
4918 	    ddi_devid_sizeof(un->un_devid));
4919 	mutex_exit(SD_MUTEX(un));
4920 
4921 	/* Calculate the checksum */
4922 	chksum = 0;
4923 	ip = (uint_t *)dkdevid;
4924 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
4925 	    i++) {
4926 		chksum ^= ip[i];
4927 	}
4928 
4929 	/* Fill-in checksum */
4930 	DKD_FORMCHKSUM(chksum, dkdevid);
4931 
4932 	/* Write the reserved sector */
4933 	status = sd_send_scsi_WRITE(un, dkdevid, un->un_sys_blocksize, blk,
4934 	    SD_PATH_DIRECT);
4935 
4936 	kmem_free(dkdevid, un->un_sys_blocksize);
4937 
4938 	mutex_enter(SD_MUTEX(un));
4939 	return (status);
4940 }
4941 
4942 
4943 /*
4944  *    Function: sd_check_vpd_page_support
4945  *
4946  * Description: This routine sends an inquiry command with the EVPD bit set and
4947  *		a page code of 0x00 to the device. It is used to determine which
4948  *		vital product pages are availible to find the devid. We are
4949  *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
4950  *		device does not support that command.
4951  *
4952  *   Arguments: un  - driver soft state (unit) structure
4953  *
4954  * Return Code: 0 - success
4955  *		1 - check condition
4956  *
4957  *     Context: This routine can sleep.
4958  */
4959 
4960 static int
4961 sd_check_vpd_page_support(struct sd_lun *un)
4962 {
4963 	uchar_t	*page_list	= NULL;
4964 	uchar_t	page_length	= 0xff;	/* Use max possible length */
4965 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
4966 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
4967 	int    	rval		= 0;
4968 	int	counter;
4969 
4970 	ASSERT(un != NULL);
4971 	ASSERT(mutex_owned(SD_MUTEX(un)));
4972 
4973 	mutex_exit(SD_MUTEX(un));
4974 
4975 	/*
4976 	 * We'll set the page length to the maximum to save figuring it out
4977 	 * with an additional call.
4978 	 */
4979 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
4980 
4981 	rval = sd_send_scsi_INQUIRY(un, page_list, page_length, evpd,
4982 	    page_code, NULL);
4983 
4984 	mutex_enter(SD_MUTEX(un));
4985 
4986 	/*
4987 	 * Now we must validate that the device accepted the command, as some
4988 	 * drives do not support it.  If the drive does support it, we will
4989 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
4990 	 * not, we return -1.
4991 	 */
4992 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
4993 		/* Loop to find one of the 2 pages we need */
4994 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
4995 
4996 		/*
4997 		 * Pages are returned in ascending order, and 0x83 is what we
4998 		 * are hoping for.
4999 		 */
5000 		while ((page_list[counter] <= 0x83) &&
5001 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
5002 		    VPD_HEAD_OFFSET))) {
5003 			/*
5004 			 * Add 3 because page_list[3] is the number of
5005 			 * pages minus 3
5006 			 */
5007 
5008 			switch (page_list[counter]) {
5009 			case 0x00:
5010 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
5011 				break;
5012 			case 0x80:
5013 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
5014 				break;
5015 			case 0x81:
5016 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
5017 				break;
5018 			case 0x82:
5019 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
5020 				break;
5021 			case 0x83:
5022 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
5023 				break;
5024 			}
5025 			counter++;
5026 		}
5027 
5028 	} else {
5029 		rval = -1;
5030 
5031 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
5032 		    "sd_check_vpd_page_support: This drive does not implement "
5033 		    "VPD pages.\n");
5034 	}
5035 
5036 	kmem_free(page_list, page_length);
5037 
5038 	return (rval);
5039 }
5040 
5041 
5042 /*
5043  *    Function: sd_setup_pm
5044  *
5045  * Description: Initialize Power Management on the device
5046  *
5047  *     Context: Kernel Thread
5048  */
5049 
5050 static void
5051 sd_setup_pm(struct sd_lun *un, dev_info_t *devi)
5052 {
5053 	uint_t	log_page_size;
5054 	uchar_t	*log_page_data;
5055 	int	rval;
5056 
5057 	/*
5058 	 * Since we are called from attach, holding a mutex for
5059 	 * un is unnecessary. Because some of the routines called
5060 	 * from here require SD_MUTEX to not be held, assert this
5061 	 * right up front.
5062 	 */
5063 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5064 	/*
5065 	 * Since the sd device does not have the 'reg' property,
5066 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
5067 	 * The following code is to tell cpr that this device
5068 	 * DOES need to be suspended and resumed.
5069 	 */
5070 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
5071 	    "pm-hardware-state", "needs-suspend-resume");
5072 
5073 	/*
5074 	 * This complies with the new power management framework
5075 	 * for certain desktop machines. Create the pm_components
5076 	 * property as a string array property.
5077 	 */
5078 	if (un->un_f_pm_supported) {
5079 		/*
5080 		 * not all devices have a motor, try it first.
5081 		 * some devices may return ILLEGAL REQUEST, some
5082 		 * will hang
5083 		 * The following START_STOP_UNIT is used to check if target
5084 		 * device has a motor.
5085 		 */
5086 		un->un_f_start_stop_supported = TRUE;
5087 		if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
5088 		    SD_PATH_DIRECT) != 0) {
5089 			un->un_f_start_stop_supported = FALSE;
5090 		}
5091 
5092 		/*
5093 		 * create pm properties anyways otherwise the parent can't
5094 		 * go to sleep
5095 		 */
5096 		(void) sd_create_pm_components(devi, un);
5097 		un->un_f_pm_is_enabled = TRUE;
5098 		return;
5099 	}
5100 
5101 	if (!un->un_f_log_sense_supported) {
5102 		un->un_power_level = SD_SPINDLE_ON;
5103 		un->un_f_pm_is_enabled = FALSE;
5104 		return;
5105 	}
5106 
5107 	rval = sd_log_page_supported(un, START_STOP_CYCLE_PAGE);
5108 
5109 #ifdef	SDDEBUG
5110 	if (sd_force_pm_supported) {
5111 		/* Force a successful result */
5112 		rval = 1;
5113 	}
5114 #endif
5115 
5116 	/*
5117 	 * If the start-stop cycle counter log page is not supported
5118 	 * or if the pm-capable property is SD_PM_CAPABLE_FALSE (0)
5119 	 * then we should not create the pm_components property.
5120 	 */
5121 	if (rval == -1) {
5122 		/*
5123 		 * Error.
5124 		 * Reading log sense failed, most likely this is
5125 		 * an older drive that does not support log sense.
5126 		 * If this fails auto-pm is not supported.
5127 		 */
5128 		un->un_power_level = SD_SPINDLE_ON;
5129 		un->un_f_pm_is_enabled = FALSE;
5130 
5131 	} else if (rval == 0) {
5132 		/*
5133 		 * Page not found.
5134 		 * The start stop cycle counter is implemented as page
5135 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
5136 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
5137 		 */
5138 		if (sd_log_page_supported(un, START_STOP_CYCLE_VU_PAGE) == 1) {
5139 			/*
5140 			 * Page found, use this one.
5141 			 */
5142 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
5143 			un->un_f_pm_is_enabled = TRUE;
5144 		} else {
5145 			/*
5146 			 * Error or page not found.
5147 			 * auto-pm is not supported for this device.
5148 			 */
5149 			un->un_power_level = SD_SPINDLE_ON;
5150 			un->un_f_pm_is_enabled = FALSE;
5151 		}
5152 	} else {
5153 		/*
5154 		 * Page found, use it.
5155 		 */
5156 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
5157 		un->un_f_pm_is_enabled = TRUE;
5158 	}
5159 
5160 
5161 	if (un->un_f_pm_is_enabled == TRUE) {
5162 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
5163 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
5164 
5165 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
5166 		    log_page_size, un->un_start_stop_cycle_page,
5167 		    0x01, 0, SD_PATH_DIRECT);
5168 #ifdef	SDDEBUG
5169 		if (sd_force_pm_supported) {
5170 			/* Force a successful result */
5171 			rval = 0;
5172 		}
5173 #endif
5174 
5175 		/*
5176 		 * If the Log sense for Page( Start/stop cycle counter page)
5177 		 * succeeds, then power managment is supported and we can
5178 		 * enable auto-pm.
5179 		 */
5180 		if (rval == 0)  {
5181 			(void) sd_create_pm_components(devi, un);
5182 		} else {
5183 			un->un_power_level = SD_SPINDLE_ON;
5184 			un->un_f_pm_is_enabled = FALSE;
5185 		}
5186 
5187 		kmem_free(log_page_data, log_page_size);
5188 	}
5189 }
5190 
5191 
5192 /*
5193  *    Function: sd_create_pm_components
5194  *
5195  * Description: Initialize PM property.
5196  *
5197  *     Context: Kernel thread context
5198  */
5199 
5200 static void
5201 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
5202 {
5203 	char *pm_comp[] = { "NAME=spindle-motor", "0=off", "1=on", NULL };
5204 
5205 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5206 
5207 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
5208 	    "pm-components", pm_comp, 3) == DDI_PROP_SUCCESS) {
5209 		/*
5210 		 * When components are initially created they are idle,
5211 		 * power up any non-removables.
5212 		 * Note: the return value of pm_raise_power can't be used
5213 		 * for determining if PM should be enabled for this device.
5214 		 * Even if you check the return values and remove this
5215 		 * property created above, the PM framework will not honor the
5216 		 * change after the first call to pm_raise_power. Hence,
5217 		 * removal of that property does not help if pm_raise_power
5218 		 * fails. In the case of removable media, the start/stop
5219 		 * will fail if the media is not present.
5220 		 */
5221 		if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
5222 		    SD_SPINDLE_ON) == DDI_SUCCESS)) {
5223 			mutex_enter(SD_MUTEX(un));
5224 			un->un_power_level = SD_SPINDLE_ON;
5225 			mutex_enter(&un->un_pm_mutex);
5226 			/* Set to on and not busy. */
5227 			un->un_pm_count = 0;
5228 		} else {
5229 			mutex_enter(SD_MUTEX(un));
5230 			un->un_power_level = SD_SPINDLE_OFF;
5231 			mutex_enter(&un->un_pm_mutex);
5232 			/* Set to off. */
5233 			un->un_pm_count = -1;
5234 		}
5235 		mutex_exit(&un->un_pm_mutex);
5236 		mutex_exit(SD_MUTEX(un));
5237 	} else {
5238 		un->un_power_level = SD_SPINDLE_ON;
5239 		un->un_f_pm_is_enabled = FALSE;
5240 	}
5241 }
5242 
5243 
5244 /*
5245  *    Function: sd_ddi_suspend
5246  *
5247  * Description: Performs system power-down operations. This includes
5248  *		setting the drive state to indicate its suspended so
5249  *		that no new commands will be accepted. Also, wait for
5250  *		all commands that are in transport or queued to a timer
5251  *		for retry to complete. All timeout threads are cancelled.
5252  *
5253  * Return Code: DDI_FAILURE or DDI_SUCCESS
5254  *
5255  *     Context: Kernel thread context
5256  */
5257 
5258 static int
5259 sd_ddi_suspend(dev_info_t *devi)
5260 {
5261 	struct	sd_lun	*un;
5262 	clock_t		wait_cmds_complete;
5263 
5264 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
5265 	if (un == NULL) {
5266 		return (DDI_FAILURE);
5267 	}
5268 
5269 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
5270 
5271 	mutex_enter(SD_MUTEX(un));
5272 
5273 	/* Return success if the device is already suspended. */
5274 	if (un->un_state == SD_STATE_SUSPENDED) {
5275 		mutex_exit(SD_MUTEX(un));
5276 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5277 		    "device already suspended, exiting\n");
5278 		return (DDI_SUCCESS);
5279 	}
5280 
5281 	/* Return failure if the device is being used by HA */
5282 	if (un->un_resvd_status &
5283 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
5284 		mutex_exit(SD_MUTEX(un));
5285 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5286 		    "device in use by HA, exiting\n");
5287 		return (DDI_FAILURE);
5288 	}
5289 
5290 	/*
5291 	 * Return failure if the device is in a resource wait
5292 	 * or power changing state.
5293 	 */
5294 	if ((un->un_state == SD_STATE_RWAIT) ||
5295 	    (un->un_state == SD_STATE_PM_CHANGING)) {
5296 		mutex_exit(SD_MUTEX(un));
5297 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5298 		    "device in resource wait state, exiting\n");
5299 		return (DDI_FAILURE);
5300 	}
5301 
5302 
5303 	un->un_save_state = un->un_last_state;
5304 	New_state(un, SD_STATE_SUSPENDED);
5305 
5306 	/*
5307 	 * Wait for all commands that are in transport or queued to a timer
5308 	 * for retry to complete.
5309 	 *
5310 	 * While waiting, no new commands will be accepted or sent because of
5311 	 * the new state we set above.
5312 	 *
5313 	 * Wait till current operation has completed. If we are in the resource
5314 	 * wait state (with an intr outstanding) then we need to wait till the
5315 	 * intr completes and starts the next cmd. We want to wait for
5316 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
5317 	 */
5318 	wait_cmds_complete = ddi_get_lbolt() +
5319 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
5320 
5321 	while (un->un_ncmds_in_transport != 0) {
5322 		/*
5323 		 * Fail if commands do not finish in the specified time.
5324 		 */
5325 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
5326 		    wait_cmds_complete) == -1) {
5327 			/*
5328 			 * Undo the state changes made above. Everything
5329 			 * must go back to it's original value.
5330 			 */
5331 			Restore_state(un);
5332 			un->un_last_state = un->un_save_state;
5333 			/* Wake up any threads that might be waiting. */
5334 			cv_broadcast(&un->un_suspend_cv);
5335 			mutex_exit(SD_MUTEX(un));
5336 			SD_ERROR(SD_LOG_IO_PM, un,
5337 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
5338 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
5339 			return (DDI_FAILURE);
5340 		}
5341 	}
5342 
5343 	/*
5344 	 * Cancel SCSI watch thread and timeouts, if any are active
5345 	 */
5346 
5347 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
5348 		opaque_t temp_token = un->un_swr_token;
5349 		mutex_exit(SD_MUTEX(un));
5350 		scsi_watch_suspend(temp_token);
5351 		mutex_enter(SD_MUTEX(un));
5352 	}
5353 
5354 	if (un->un_reset_throttle_timeid != NULL) {
5355 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
5356 		un->un_reset_throttle_timeid = NULL;
5357 		mutex_exit(SD_MUTEX(un));
5358 		(void) untimeout(temp_id);
5359 		mutex_enter(SD_MUTEX(un));
5360 	}
5361 
5362 	if (un->un_dcvb_timeid != NULL) {
5363 		timeout_id_t temp_id = un->un_dcvb_timeid;
5364 		un->un_dcvb_timeid = NULL;
5365 		mutex_exit(SD_MUTEX(un));
5366 		(void) untimeout(temp_id);
5367 		mutex_enter(SD_MUTEX(un));
5368 	}
5369 
5370 	mutex_enter(&un->un_pm_mutex);
5371 	if (un->un_pm_timeid != NULL) {
5372 		timeout_id_t temp_id = un->un_pm_timeid;
5373 		un->un_pm_timeid = NULL;
5374 		mutex_exit(&un->un_pm_mutex);
5375 		mutex_exit(SD_MUTEX(un));
5376 		(void) untimeout(temp_id);
5377 		mutex_enter(SD_MUTEX(un));
5378 	} else {
5379 		mutex_exit(&un->un_pm_mutex);
5380 	}
5381 
5382 	if (un->un_retry_timeid != NULL) {
5383 		timeout_id_t temp_id = un->un_retry_timeid;
5384 		un->un_retry_timeid = NULL;
5385 		mutex_exit(SD_MUTEX(un));
5386 		(void) untimeout(temp_id);
5387 		mutex_enter(SD_MUTEX(un));
5388 	}
5389 
5390 	if (un->un_direct_priority_timeid != NULL) {
5391 		timeout_id_t temp_id = un->un_direct_priority_timeid;
5392 		un->un_direct_priority_timeid = NULL;
5393 		mutex_exit(SD_MUTEX(un));
5394 		(void) untimeout(temp_id);
5395 		mutex_enter(SD_MUTEX(un));
5396 	}
5397 
5398 	if (un->un_f_is_fibre == TRUE) {
5399 		/*
5400 		 * Remove callbacks for insert and remove events
5401 		 */
5402 		if (un->un_insert_event != NULL) {
5403 			mutex_exit(SD_MUTEX(un));
5404 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
5405 			mutex_enter(SD_MUTEX(un));
5406 			un->un_insert_event = NULL;
5407 		}
5408 
5409 		if (un->un_remove_event != NULL) {
5410 			mutex_exit(SD_MUTEX(un));
5411 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
5412 			mutex_enter(SD_MUTEX(un));
5413 			un->un_remove_event = NULL;
5414 		}
5415 	}
5416 
5417 	mutex_exit(SD_MUTEX(un));
5418 
5419 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
5420 
5421 	return (DDI_SUCCESS);
5422 }
5423 
5424 
5425 /*
5426  *    Function: sd_ddi_pm_suspend
5427  *
5428  * Description: Set the drive state to low power.
5429  *		Someone else is required to actually change the drive
5430  *		power level.
5431  *
5432  *   Arguments: un - driver soft state (unit) structure
5433  *
5434  * Return Code: DDI_FAILURE or DDI_SUCCESS
5435  *
5436  *     Context: Kernel thread context
5437  */
5438 
5439 static int
5440 sd_ddi_pm_suspend(struct sd_lun *un)
5441 {
5442 	ASSERT(un != NULL);
5443 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: entry\n");
5444 
5445 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5446 	mutex_enter(SD_MUTEX(un));
5447 
5448 	/*
5449 	 * Exit if power management is not enabled for this device, or if
5450 	 * the device is being used by HA.
5451 	 */
5452 	if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
5453 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
5454 		mutex_exit(SD_MUTEX(un));
5455 		SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exiting\n");
5456 		return (DDI_SUCCESS);
5457 	}
5458 
5459 	SD_INFO(SD_LOG_POWER, un, "sd_ddi_pm_suspend: un_ncmds_in_driver=%ld\n",
5460 	    un->un_ncmds_in_driver);
5461 
5462 	/*
5463 	 * See if the device is not busy, ie.:
5464 	 *    - we have no commands in the driver for this device
5465 	 *    - not waiting for resources
5466 	 */
5467 	if ((un->un_ncmds_in_driver == 0) &&
5468 	    (un->un_state != SD_STATE_RWAIT)) {
5469 		/*
5470 		 * The device is not busy, so it is OK to go to low power state.
5471 		 * Indicate low power, but rely on someone else to actually
5472 		 * change it.
5473 		 */
5474 		mutex_enter(&un->un_pm_mutex);
5475 		un->un_pm_count = -1;
5476 		mutex_exit(&un->un_pm_mutex);
5477 		un->un_power_level = SD_SPINDLE_OFF;
5478 	}
5479 
5480 	mutex_exit(SD_MUTEX(un));
5481 
5482 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exit\n");
5483 
5484 	return (DDI_SUCCESS);
5485 }
5486 
5487 
5488 /*
5489  *    Function: sd_ddi_resume
5490  *
5491  * Description: Performs system power-up operations..
5492  *
5493  * Return Code: DDI_SUCCESS
5494  *		DDI_FAILURE
5495  *
5496  *     Context: Kernel thread context
5497  */
5498 
5499 static int
5500 sd_ddi_resume(dev_info_t *devi)
5501 {
5502 	struct	sd_lun	*un;
5503 
5504 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
5505 	if (un == NULL) {
5506 		return (DDI_FAILURE);
5507 	}
5508 
5509 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
5510 
5511 	mutex_enter(SD_MUTEX(un));
5512 	Restore_state(un);
5513 
5514 	/*
5515 	 * Restore the state which was saved to give the
5516 	 * the right state in un_last_state
5517 	 */
5518 	un->un_last_state = un->un_save_state;
5519 	/*
5520 	 * Note: throttle comes back at full.
5521 	 * Also note: this MUST be done before calling pm_raise_power
5522 	 * otherwise the system can get hung in biowait. The scenario where
5523 	 * this'll happen is under cpr suspend. Writing of the system
5524 	 * state goes through sddump, which writes 0 to un_throttle. If
5525 	 * writing the system state then fails, example if the partition is
5526 	 * too small, then cpr attempts a resume. If throttle isn't restored
5527 	 * from the saved value until after calling pm_raise_power then
5528 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
5529 	 * in biowait.
5530 	 */
5531 	un->un_throttle = un->un_saved_throttle;
5532 
5533 	/*
5534 	 * The chance of failure is very rare as the only command done in power
5535 	 * entry point is START command when you transition from 0->1 or
5536 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
5537 	 * which suspend was done. Ignore the return value as the resume should
5538 	 * not be failed. In the case of removable media the media need not be
5539 	 * inserted and hence there is a chance that raise power will fail with
5540 	 * media not present.
5541 	 */
5542 	if (un->un_f_attach_spinup) {
5543 		mutex_exit(SD_MUTEX(un));
5544 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
5545 		mutex_enter(SD_MUTEX(un));
5546 	}
5547 
5548 	/*
5549 	 * Don't broadcast to the suspend cv and therefore possibly
5550 	 * start I/O until after power has been restored.
5551 	 */
5552 	cv_broadcast(&un->un_suspend_cv);
5553 	cv_broadcast(&un->un_state_cv);
5554 
5555 	/* restart thread */
5556 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
5557 		scsi_watch_resume(un->un_swr_token);
5558 	}
5559 
5560 #if (defined(__fibre))
5561 	if (un->un_f_is_fibre == TRUE) {
5562 		/*
5563 		 * Add callbacks for insert and remove events
5564 		 */
5565 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
5566 			sd_init_event_callbacks(un);
5567 		}
5568 	}
5569 #endif
5570 
5571 	/*
5572 	 * Transport any pending commands to the target.
5573 	 *
5574 	 * If this is a low-activity device commands in queue will have to wait
5575 	 * until new commands come in, which may take awhile. Also, we
5576 	 * specifically don't check un_ncmds_in_transport because we know that
5577 	 * there really are no commands in progress after the unit was
5578 	 * suspended and we could have reached the throttle level, been
5579 	 * suspended, and have no new commands coming in for awhile. Highly
5580 	 * unlikely, but so is the low-activity disk scenario.
5581 	 */
5582 	ddi_xbuf_dispatch(un->un_xbuf_attr);
5583 
5584 	sd_start_cmds(un, NULL);
5585 	mutex_exit(SD_MUTEX(un));
5586 
5587 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
5588 
5589 	return (DDI_SUCCESS);
5590 }
5591 
5592 
5593 /*
5594  *    Function: sd_ddi_pm_resume
5595  *
5596  * Description: Set the drive state to powered on.
5597  *		Someone else is required to actually change the drive
5598  *		power level.
5599  *
5600  *   Arguments: un - driver soft state (unit) structure
5601  *
5602  * Return Code: DDI_SUCCESS
5603  *
5604  *     Context: Kernel thread context
5605  */
5606 
5607 static int
5608 sd_ddi_pm_resume(struct sd_lun *un)
5609 {
5610 	ASSERT(un != NULL);
5611 
5612 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5613 	mutex_enter(SD_MUTEX(un));
5614 	un->un_power_level = SD_SPINDLE_ON;
5615 
5616 	ASSERT(!mutex_owned(&un->un_pm_mutex));
5617 	mutex_enter(&un->un_pm_mutex);
5618 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
5619 		un->un_pm_count++;
5620 		ASSERT(un->un_pm_count == 0);
5621 		/*
5622 		 * Note: no longer do the cv_broadcast on un_suspend_cv. The
5623 		 * un_suspend_cv is for a system resume, not a power management
5624 		 * device resume. (4297749)
5625 		 *	 cv_broadcast(&un->un_suspend_cv);
5626 		 */
5627 	}
5628 	mutex_exit(&un->un_pm_mutex);
5629 	mutex_exit(SD_MUTEX(un));
5630 
5631 	return (DDI_SUCCESS);
5632 }
5633 
5634 
5635 /*
5636  *    Function: sd_pm_idletimeout_handler
5637  *
5638  * Description: A timer routine that's active only while a device is busy.
5639  *		The purpose is to extend slightly the pm framework's busy
5640  *		view of the device to prevent busy/idle thrashing for
5641  *		back-to-back commands. Do this by comparing the current time
5642  *		to the time at which the last command completed and when the
5643  *		difference is greater than sd_pm_idletime, call
5644  *		pm_idle_component. In addition to indicating idle to the pm
5645  *		framework, update the chain type to again use the internal pm
5646  *		layers of the driver.
5647  *
5648  *   Arguments: arg - driver soft state (unit) structure
5649  *
5650  *     Context: Executes in a timeout(9F) thread context
5651  */
5652 
5653 static void
5654 sd_pm_idletimeout_handler(void *arg)
5655 {
5656 	struct sd_lun *un = arg;
5657 
5658 	time_t	now;
5659 
5660 	mutex_enter(&sd_detach_mutex);
5661 	if (un->un_detach_count != 0) {
5662 		/* Abort if the instance is detaching */
5663 		mutex_exit(&sd_detach_mutex);
5664 		return;
5665 	}
5666 	mutex_exit(&sd_detach_mutex);
5667 
5668 	now = ddi_get_time();
5669 	/*
5670 	 * Grab both mutexes, in the proper order, since we're accessing
5671 	 * both PM and softstate variables.
5672 	 */
5673 	mutex_enter(SD_MUTEX(un));
5674 	mutex_enter(&un->un_pm_mutex);
5675 	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
5676 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
5677 		/*
5678 		 * Update the chain types.
5679 		 * This takes affect on the next new command received.
5680 		 */
5681 		if (un->un_f_non_devbsize_supported) {
5682 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
5683 		} else {
5684 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
5685 		}
5686 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
5687 
5688 		SD_TRACE(SD_LOG_IO_PM, un,
5689 		    "sd_pm_idletimeout_handler: idling device\n");
5690 		(void) pm_idle_component(SD_DEVINFO(un), 0);
5691 		un->un_pm_idle_timeid = NULL;
5692 	} else {
5693 		un->un_pm_idle_timeid =
5694 			timeout(sd_pm_idletimeout_handler, un,
5695 			(drv_usectohz((clock_t)300000))); /* 300 ms. */
5696 	}
5697 	mutex_exit(&un->un_pm_mutex);
5698 	mutex_exit(SD_MUTEX(un));
5699 }
5700 
5701 
5702 /*
5703  *    Function: sd_pm_timeout_handler
5704  *
5705  * Description: Callback to tell framework we are idle.
5706  *
5707  *     Context: timeout(9f) thread context.
5708  */
5709 
5710 static void
5711 sd_pm_timeout_handler(void *arg)
5712 {
5713 	struct sd_lun *un = arg;
5714 
5715 	(void) pm_idle_component(SD_DEVINFO(un), 0);
5716 	mutex_enter(&un->un_pm_mutex);
5717 	un->un_pm_timeid = NULL;
5718 	mutex_exit(&un->un_pm_mutex);
5719 }
5720 
5721 
5722 /*
5723  *    Function: sdpower
5724  *
5725  * Description: PM entry point.
5726  *
5727  * Return Code: DDI_SUCCESS
5728  *		DDI_FAILURE
5729  *
5730  *     Context: Kernel thread context
5731  */
5732 
5733 static int
5734 sdpower(dev_info_t *devi, int component, int level)
5735 {
5736 	struct sd_lun	*un;
5737 	int		instance;
5738 	int		rval = DDI_SUCCESS;
5739 	uint_t		i, log_page_size, maxcycles, ncycles;
5740 	uchar_t		*log_page_data;
5741 	int		log_sense_page;
5742 	int		medium_present;
5743 	time_t		intvlp;
5744 	dev_t		dev;
5745 	struct pm_trans_data	sd_pm_tran_data;
5746 	uchar_t		save_state;
5747 	int		sval;
5748 	uchar_t		state_before_pm;
5749 	int		got_semaphore_here;
5750 
5751 	instance = ddi_get_instance(devi);
5752 
5753 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
5754 	    (SD_SPINDLE_OFF > level) || (level > SD_SPINDLE_ON) ||
5755 	    component != 0) {
5756 		return (DDI_FAILURE);
5757 	}
5758 
5759 	dev = sd_make_device(SD_DEVINFO(un));
5760 
5761 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
5762 
5763 	/*
5764 	 * Must synchronize power down with close.
5765 	 * Attempt to decrement/acquire the open/close semaphore,
5766 	 * but do NOT wait on it. If it's not greater than zero,
5767 	 * ie. it can't be decremented without waiting, then
5768 	 * someone else, either open or close, already has it
5769 	 * and the try returns 0. Use that knowledge here to determine
5770 	 * if it's OK to change the device power level.
5771 	 * Also, only increment it on exit if it was decremented, ie. gotten,
5772 	 * here.
5773 	 */
5774 	got_semaphore_here = sema_tryp(&un->un_semoclose);
5775 
5776 	mutex_enter(SD_MUTEX(un));
5777 
5778 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
5779 	    un->un_ncmds_in_driver);
5780 
5781 	/*
5782 	 * If un_ncmds_in_driver is non-zero it indicates commands are
5783 	 * already being processed in the driver, or if the semaphore was
5784 	 * not gotten here it indicates an open or close is being processed.
5785 	 * At the same time somebody is requesting to go low power which
5786 	 * can't happen, therefore we need to return failure.
5787 	 */
5788 	if ((level == SD_SPINDLE_OFF) &&
5789 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
5790 		mutex_exit(SD_MUTEX(un));
5791 
5792 		if (got_semaphore_here != 0) {
5793 			sema_v(&un->un_semoclose);
5794 		}
5795 		SD_TRACE(SD_LOG_IO_PM, un,
5796 		    "sdpower: exit, device has queued cmds.\n");
5797 		return (DDI_FAILURE);
5798 	}
5799 
5800 	/*
5801 	 * if it is OFFLINE that means the disk is completely dead
5802 	 * in our case we have to put the disk in on or off by sending commands
5803 	 * Of course that will fail anyway so return back here.
5804 	 *
5805 	 * Power changes to a device that's OFFLINE or SUSPENDED
5806 	 * are not allowed.
5807 	 */
5808 	if ((un->un_state == SD_STATE_OFFLINE) ||
5809 	    (un->un_state == SD_STATE_SUSPENDED)) {
5810 		mutex_exit(SD_MUTEX(un));
5811 
5812 		if (got_semaphore_here != 0) {
5813 			sema_v(&un->un_semoclose);
5814 		}
5815 		SD_TRACE(SD_LOG_IO_PM, un,
5816 		    "sdpower: exit, device is off-line.\n");
5817 		return (DDI_FAILURE);
5818 	}
5819 
5820 	/*
5821 	 * Change the device's state to indicate it's power level
5822 	 * is being changed. Do this to prevent a power off in the
5823 	 * middle of commands, which is especially bad on devices
5824 	 * that are really powered off instead of just spun down.
5825 	 */
5826 	state_before_pm = un->un_state;
5827 	un->un_state = SD_STATE_PM_CHANGING;
5828 
5829 	mutex_exit(SD_MUTEX(un));
5830 
5831 	/*
5832 	 * If "pm-capable" property is set to TRUE by HBA drivers,
5833 	 * bypass the following checking, otherwise, check the log
5834 	 * sense information for this device
5835 	 */
5836 	if ((level == SD_SPINDLE_OFF) && un->un_f_log_sense_supported) {
5837 		/*
5838 		 * Get the log sense information to understand whether the
5839 		 * the powercycle counts have gone beyond the threshhold.
5840 		 */
5841 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
5842 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
5843 
5844 		mutex_enter(SD_MUTEX(un));
5845 		log_sense_page = un->un_start_stop_cycle_page;
5846 		mutex_exit(SD_MUTEX(un));
5847 
5848 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
5849 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
5850 #ifdef	SDDEBUG
5851 		if (sd_force_pm_supported) {
5852 			/* Force a successful result */
5853 			rval = 0;
5854 		}
5855 #endif
5856 		if (rval != 0) {
5857 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5858 			    "Log Sense Failed\n");
5859 			kmem_free(log_page_data, log_page_size);
5860 			/* Cannot support power management on those drives */
5861 
5862 			if (got_semaphore_here != 0) {
5863 				sema_v(&un->un_semoclose);
5864 			}
5865 			/*
5866 			 * On exit put the state back to it's original value
5867 			 * and broadcast to anyone waiting for the power
5868 			 * change completion.
5869 			 */
5870 			mutex_enter(SD_MUTEX(un));
5871 			un->un_state = state_before_pm;
5872 			cv_broadcast(&un->un_suspend_cv);
5873 			mutex_exit(SD_MUTEX(un));
5874 			SD_TRACE(SD_LOG_IO_PM, un,
5875 			    "sdpower: exit, Log Sense Failed.\n");
5876 			return (DDI_FAILURE);
5877 		}
5878 
5879 		/*
5880 		 * From the page data - Convert the essential information to
5881 		 * pm_trans_data
5882 		 */
5883 		maxcycles =
5884 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
5885 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
5886 
5887 		sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
5888 
5889 		ncycles =
5890 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
5891 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
5892 
5893 		sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
5894 
5895 		for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
5896 			sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
5897 			    log_page_data[8+i];
5898 		}
5899 
5900 		kmem_free(log_page_data, log_page_size);
5901 
5902 		/*
5903 		 * Call pm_trans_check routine to get the Ok from
5904 		 * the global policy
5905 		 */
5906 
5907 		sd_pm_tran_data.format = DC_SCSI_FORMAT;
5908 		sd_pm_tran_data.un.scsi_cycles.flag = 0;
5909 
5910 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
5911 #ifdef	SDDEBUG
5912 		if (sd_force_pm_supported) {
5913 			/* Force a successful result */
5914 			rval = 1;
5915 		}
5916 #endif
5917 		switch (rval) {
5918 		case 0:
5919 			/*
5920 			 * Not Ok to Power cycle or error in parameters passed
5921 			 * Would have given the advised time to consider power
5922 			 * cycle. Based on the new intvlp parameter we are
5923 			 * supposed to pretend we are busy so that pm framework
5924 			 * will never call our power entry point. Because of
5925 			 * that install a timeout handler and wait for the
5926 			 * recommended time to elapse so that power management
5927 			 * can be effective again.
5928 			 *
5929 			 * To effect this behavior, call pm_busy_component to
5930 			 * indicate to the framework this device is busy.
5931 			 * By not adjusting un_pm_count the rest of PM in
5932 			 * the driver will function normally, and independant
5933 			 * of this but because the framework is told the device
5934 			 * is busy it won't attempt powering down until it gets
5935 			 * a matching idle. The timeout handler sends this.
5936 			 * Note: sd_pm_entry can't be called here to do this
5937 			 * because sdpower may have been called as a result
5938 			 * of a call to pm_raise_power from within sd_pm_entry.
5939 			 *
5940 			 * If a timeout handler is already active then
5941 			 * don't install another.
5942 			 */
5943 			mutex_enter(&un->un_pm_mutex);
5944 			if (un->un_pm_timeid == NULL) {
5945 				un->un_pm_timeid =
5946 				    timeout(sd_pm_timeout_handler,
5947 				    un, intvlp * drv_usectohz(1000000));
5948 				mutex_exit(&un->un_pm_mutex);
5949 				(void) pm_busy_component(SD_DEVINFO(un), 0);
5950 			} else {
5951 				mutex_exit(&un->un_pm_mutex);
5952 			}
5953 			if (got_semaphore_here != 0) {
5954 				sema_v(&un->un_semoclose);
5955 			}
5956 			/*
5957 			 * On exit put the state back to it's original value
5958 			 * and broadcast to anyone waiting for the power
5959 			 * change completion.
5960 			 */
5961 			mutex_enter(SD_MUTEX(un));
5962 			un->un_state = state_before_pm;
5963 			cv_broadcast(&un->un_suspend_cv);
5964 			mutex_exit(SD_MUTEX(un));
5965 
5966 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
5967 			    "trans check Failed, not ok to power cycle.\n");
5968 			return (DDI_FAILURE);
5969 
5970 		case -1:
5971 			if (got_semaphore_here != 0) {
5972 				sema_v(&un->un_semoclose);
5973 			}
5974 			/*
5975 			 * On exit put the state back to it's original value
5976 			 * and broadcast to anyone waiting for the power
5977 			 * change completion.
5978 			 */
5979 			mutex_enter(SD_MUTEX(un));
5980 			un->un_state = state_before_pm;
5981 			cv_broadcast(&un->un_suspend_cv);
5982 			mutex_exit(SD_MUTEX(un));
5983 			SD_TRACE(SD_LOG_IO_PM, un,
5984 			    "sdpower: exit, trans check command Failed.\n");
5985 			return (DDI_FAILURE);
5986 		}
5987 	}
5988 
5989 	if (level == SD_SPINDLE_OFF) {
5990 		/*
5991 		 * Save the last state... if the STOP FAILS we need it
5992 		 * for restoring
5993 		 */
5994 		mutex_enter(SD_MUTEX(un));
5995 		save_state = un->un_last_state;
5996 		/*
5997 		 * There must not be any cmds. getting processed
5998 		 * in the driver when we get here. Power to the
5999 		 * device is potentially going off.
6000 		 */
6001 		ASSERT(un->un_ncmds_in_driver == 0);
6002 		mutex_exit(SD_MUTEX(un));
6003 
6004 		/*
6005 		 * For now suspend the device completely before spindle is
6006 		 * turned off
6007 		 */
6008 		if ((rval = sd_ddi_pm_suspend(un)) == DDI_FAILURE) {
6009 			if (got_semaphore_here != 0) {
6010 				sema_v(&un->un_semoclose);
6011 			}
6012 			/*
6013 			 * On exit put the state back to it's original value
6014 			 * and broadcast to anyone waiting for the power
6015 			 * change completion.
6016 			 */
6017 			mutex_enter(SD_MUTEX(un));
6018 			un->un_state = state_before_pm;
6019 			cv_broadcast(&un->un_suspend_cv);
6020 			mutex_exit(SD_MUTEX(un));
6021 			SD_TRACE(SD_LOG_IO_PM, un,
6022 			    "sdpower: exit, PM suspend Failed.\n");
6023 			return (DDI_FAILURE);
6024 		}
6025 	}
6026 
6027 	/*
6028 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
6029 	 * close, or strategy. Dump no long uses this routine, it uses it's
6030 	 * own code so it can be done in polled mode.
6031 	 */
6032 
6033 	medium_present = TRUE;
6034 
6035 	/*
6036 	 * When powering up, issue a TUR in case the device is at unit
6037 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
6038 	 * a deadlock on un_pm_busy_cv will occur.
6039 	 */
6040 	if (level == SD_SPINDLE_ON) {
6041 		(void) sd_send_scsi_TEST_UNIT_READY(un,
6042 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
6043 	}
6044 
6045 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
6046 	    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
6047 
6048 	sval = sd_send_scsi_START_STOP_UNIT(un,
6049 	    ((level == SD_SPINDLE_ON) ? SD_TARGET_START : SD_TARGET_STOP),
6050 	    SD_PATH_DIRECT);
6051 	/* Command failed, check for media present. */
6052 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
6053 		medium_present = FALSE;
6054 	}
6055 
6056 	/*
6057 	 * The conditions of interest here are:
6058 	 *   if a spindle off with media present fails,
6059 	 *	then restore the state and return an error.
6060 	 *   else if a spindle on fails,
6061 	 *	then return an error (there's no state to restore).
6062 	 * In all other cases we setup for the new state
6063 	 * and return success.
6064 	 */
6065 	switch (level) {
6066 	case SD_SPINDLE_OFF:
6067 		if ((medium_present == TRUE) && (sval != 0)) {
6068 			/* The stop command from above failed */
6069 			rval = DDI_FAILURE;
6070 			/*
6071 			 * The stop command failed, and we have media
6072 			 * present. Put the level back by calling the
6073 			 * sd_pm_resume() and set the state back to
6074 			 * it's previous value.
6075 			 */
6076 			(void) sd_ddi_pm_resume(un);
6077 			mutex_enter(SD_MUTEX(un));
6078 			un->un_last_state = save_state;
6079 			mutex_exit(SD_MUTEX(un));
6080 			break;
6081 		}
6082 		/*
6083 		 * The stop command from above succeeded.
6084 		 */
6085 		if (un->un_f_monitor_media_state) {
6086 			/*
6087 			 * Terminate watch thread in case of removable media
6088 			 * devices going into low power state. This is as per
6089 			 * the requirements of pm framework, otherwise commands
6090 			 * will be generated for the device (through watch
6091 			 * thread), even when the device is in low power state.
6092 			 */
6093 			mutex_enter(SD_MUTEX(un));
6094 			un->un_f_watcht_stopped = FALSE;
6095 			if (un->un_swr_token != NULL) {
6096 				opaque_t temp_token = un->un_swr_token;
6097 				un->un_f_watcht_stopped = TRUE;
6098 				un->un_swr_token = NULL;
6099 				mutex_exit(SD_MUTEX(un));
6100 				(void) scsi_watch_request_terminate(temp_token,
6101 				    SCSI_WATCH_TERMINATE_WAIT);
6102 			} else {
6103 				mutex_exit(SD_MUTEX(un));
6104 			}
6105 		}
6106 		break;
6107 
6108 	default:	/* The level requested is spindle on... */
6109 		/*
6110 		 * Legacy behavior: return success on a failed spinup
6111 		 * if there is no media in the drive.
6112 		 * Do this by looking at medium_present here.
6113 		 */
6114 		if ((sval != 0) && medium_present) {
6115 			/* The start command from above failed */
6116 			rval = DDI_FAILURE;
6117 			break;
6118 		}
6119 		/*
6120 		 * The start command from above succeeded
6121 		 * Resume the devices now that we have
6122 		 * started the disks
6123 		 */
6124 		(void) sd_ddi_pm_resume(un);
6125 
6126 		/*
6127 		 * Resume the watch thread since it was suspended
6128 		 * when the device went into low power mode.
6129 		 */
6130 		if (un->un_f_monitor_media_state) {
6131 			mutex_enter(SD_MUTEX(un));
6132 			if (un->un_f_watcht_stopped == TRUE) {
6133 				opaque_t temp_token;
6134 
6135 				un->un_f_watcht_stopped = FALSE;
6136 				mutex_exit(SD_MUTEX(un));
6137 				temp_token = scsi_watch_request_submit(
6138 				    SD_SCSI_DEVP(un),
6139 				    sd_check_media_time,
6140 				    SENSE_LENGTH, sd_media_watch_cb,
6141 				    (caddr_t)dev);
6142 				mutex_enter(SD_MUTEX(un));
6143 				un->un_swr_token = temp_token;
6144 			}
6145 			mutex_exit(SD_MUTEX(un));
6146 		}
6147 	}
6148 	if (got_semaphore_here != 0) {
6149 		sema_v(&un->un_semoclose);
6150 	}
6151 	/*
6152 	 * On exit put the state back to it's original value
6153 	 * and broadcast to anyone waiting for the power
6154 	 * change completion.
6155 	 */
6156 	mutex_enter(SD_MUTEX(un));
6157 	un->un_state = state_before_pm;
6158 	cv_broadcast(&un->un_suspend_cv);
6159 	mutex_exit(SD_MUTEX(un));
6160 
6161 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
6162 
6163 	return (rval);
6164 }
6165 
6166 
6167 
6168 /*
6169  *    Function: sdattach
6170  *
6171  * Description: Driver's attach(9e) entry point function.
6172  *
6173  *   Arguments: devi - opaque device info handle
6174  *		cmd  - attach  type
6175  *
6176  * Return Code: DDI_SUCCESS
6177  *		DDI_FAILURE
6178  *
6179  *     Context: Kernel thread context
6180  */
6181 
6182 static int
6183 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
6184 {
6185 	switch (cmd) {
6186 	case DDI_ATTACH:
6187 		return (sd_unit_attach(devi));
6188 	case DDI_RESUME:
6189 		return (sd_ddi_resume(devi));
6190 	default:
6191 		break;
6192 	}
6193 	return (DDI_FAILURE);
6194 }
6195 
6196 
6197 /*
6198  *    Function: sddetach
6199  *
6200  * Description: Driver's detach(9E) entry point function.
6201  *
6202  *   Arguments: devi - opaque device info handle
6203  *		cmd  - detach  type
6204  *
6205  * Return Code: DDI_SUCCESS
6206  *		DDI_FAILURE
6207  *
6208  *     Context: Kernel thread context
6209  */
6210 
6211 static int
6212 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
6213 {
6214 	switch (cmd) {
6215 	case DDI_DETACH:
6216 		return (sd_unit_detach(devi));
6217 	case DDI_SUSPEND:
6218 		return (sd_ddi_suspend(devi));
6219 	default:
6220 		break;
6221 	}
6222 	return (DDI_FAILURE);
6223 }
6224 
6225 
6226 /*
6227  *     Function: sd_sync_with_callback
6228  *
6229  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
6230  *		 state while the callback routine is active.
6231  *
6232  *    Arguments: un: softstate structure for the instance
6233  *
6234  *	Context: Kernel thread context
6235  */
6236 
6237 static void
6238 sd_sync_with_callback(struct sd_lun *un)
6239 {
6240 	ASSERT(un != NULL);
6241 
6242 	mutex_enter(SD_MUTEX(un));
6243 
6244 	ASSERT(un->un_in_callback >= 0);
6245 
6246 	while (un->un_in_callback > 0) {
6247 		mutex_exit(SD_MUTEX(un));
6248 		delay(2);
6249 		mutex_enter(SD_MUTEX(un));
6250 	}
6251 
6252 	mutex_exit(SD_MUTEX(un));
6253 }
6254 
6255 /*
6256  *    Function: sd_unit_attach
6257  *
6258  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
6259  *		the soft state structure for the device and performs
6260  *		all necessary structure and device initializations.
6261  *
6262  *   Arguments: devi: the system's dev_info_t for the device.
6263  *
6264  * Return Code: DDI_SUCCESS if attach is successful.
6265  *		DDI_FAILURE if any part of the attach fails.
6266  *
6267  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
6268  *		Kernel thread context only.  Can sleep.
6269  */
6270 
6271 static int
6272 sd_unit_attach(dev_info_t *devi)
6273 {
6274 	struct	scsi_device	*devp;
6275 	struct	sd_lun		*un;
6276 	char			*variantp;
6277 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
6278 	int	instance;
6279 	int	rval;
6280 	int	wc_enabled;
6281 	int	tgt;
6282 	uint64_t	capacity;
6283 	uint_t		lbasize = 0;
6284 	dev_info_t	*pdip = ddi_get_parent(devi);
6285 	int		offbyone = 0;
6286 	int		geom_label_valid = 0;
6287 
6288 	/*
6289 	 * Retrieve the target driver's private data area. This was set
6290 	 * up by the HBA.
6291 	 */
6292 	devp = ddi_get_driver_private(devi);
6293 
6294 	/*
6295 	 * Retrieve the target ID of the device.
6296 	 */
6297 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
6298 	    SCSI_ADDR_PROP_TARGET, -1);
6299 
6300 	/*
6301 	 * Since we have no idea what state things were left in by the last
6302 	 * user of the device, set up some 'default' settings, ie. turn 'em
6303 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
6304 	 * Do this before the scsi_probe, which sends an inquiry.
6305 	 * This is a fix for bug (4430280).
6306 	 * Of special importance is wide-xfer. The drive could have been left
6307 	 * in wide transfer mode by the last driver to communicate with it,
6308 	 * this includes us. If that's the case, and if the following is not
6309 	 * setup properly or we don't re-negotiate with the drive prior to
6310 	 * transferring data to/from the drive, it causes bus parity errors,
6311 	 * data overruns, and unexpected interrupts. This first occurred when
6312 	 * the fix for bug (4378686) was made.
6313 	 */
6314 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
6315 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
6316 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
6317 
6318 	/*
6319 	 * Currently, scsi_ifsetcap sets tagged-qing capability for all LUNs
6320 	 * on a target. Setting it per lun instance actually sets the
6321 	 * capability of this target, which affects those luns already
6322 	 * attached on the same target. So during attach, we can only disable
6323 	 * this capability only when no other lun has been attached on this
6324 	 * target. By doing this, we assume a target has the same tagged-qing
6325 	 * capability for every lun. The condition can be removed when HBA
6326 	 * is changed to support per lun based tagged-qing capability.
6327 	 */
6328 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
6329 		(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
6330 	}
6331 
6332 	/*
6333 	 * Use scsi_probe() to issue an INQUIRY command to the device.
6334 	 * This call will allocate and fill in the scsi_inquiry structure
6335 	 * and point the sd_inq member of the scsi_device structure to it.
6336 	 * If the attach succeeds, then this memory will not be de-allocated
6337 	 * (via scsi_unprobe()) until the instance is detached.
6338 	 */
6339 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
6340 		goto probe_failed;
6341 	}
6342 
6343 	/*
6344 	 * Check the device type as specified in the inquiry data and
6345 	 * claim it if it is of a type that we support.
6346 	 */
6347 	switch (devp->sd_inq->inq_dtype) {
6348 	case DTYPE_DIRECT:
6349 		break;
6350 	case DTYPE_RODIRECT:
6351 		break;
6352 	case DTYPE_OPTICAL:
6353 		break;
6354 	case DTYPE_NOTPRESENT:
6355 	default:
6356 		/* Unsupported device type; fail the attach. */
6357 		goto probe_failed;
6358 	}
6359 
6360 	/*
6361 	 * Allocate the soft state structure for this unit.
6362 	 *
6363 	 * We rely upon this memory being set to all zeroes by
6364 	 * ddi_soft_state_zalloc().  We assume that any member of the
6365 	 * soft state structure that is not explicitly initialized by
6366 	 * this routine will have a value of zero.
6367 	 */
6368 	instance = ddi_get_instance(devp->sd_dev);
6369 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
6370 		goto probe_failed;
6371 	}
6372 
6373 	/*
6374 	 * Retrieve a pointer to the newly-allocated soft state.
6375 	 *
6376 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
6377 	 * was successful, unless something has gone horribly wrong and the
6378 	 * ddi's soft state internals are corrupt (in which case it is
6379 	 * probably better to halt here than just fail the attach....)
6380 	 */
6381 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
6382 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
6383 		    instance);
6384 		/*NOTREACHED*/
6385 	}
6386 
6387 	/*
6388 	 * Link the back ptr of the driver soft state to the scsi_device
6389 	 * struct for this lun.
6390 	 * Save a pointer to the softstate in the driver-private area of
6391 	 * the scsi_device struct.
6392 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
6393 	 * we first set un->un_sd below.
6394 	 */
6395 	un->un_sd = devp;
6396 	devp->sd_private = (opaque_t)un;
6397 
6398 	/*
6399 	 * The following must be after devp is stored in the soft state struct.
6400 	 */
6401 #ifdef SDDEBUG
6402 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6403 	    "%s_unit_attach: un:0x%p instance:%d\n",
6404 	    ddi_driver_name(devi), un, instance);
6405 #endif
6406 
6407 	/*
6408 	 * Set up the device type and node type (for the minor nodes).
6409 	 * By default we assume that the device can at least support the
6410 	 * Common Command Set. Call it a CD-ROM if it reports itself
6411 	 * as a RODIRECT device.
6412 	 */
6413 	switch (devp->sd_inq->inq_dtype) {
6414 	case DTYPE_RODIRECT:
6415 		un->un_node_type = DDI_NT_CD_CHAN;
6416 		un->un_ctype	 = CTYPE_CDROM;
6417 		break;
6418 	case DTYPE_OPTICAL:
6419 		un->un_node_type = DDI_NT_BLOCK_CHAN;
6420 		un->un_ctype	 = CTYPE_ROD;
6421 		break;
6422 	default:
6423 		un->un_node_type = DDI_NT_BLOCK_CHAN;
6424 		un->un_ctype	 = CTYPE_CCS;
6425 		break;
6426 	}
6427 
6428 	/*
6429 	 * Try to read the interconnect type from the HBA.
6430 	 *
6431 	 * Note: This driver is currently compiled as two binaries, a parallel
6432 	 * scsi version (sd) and a fibre channel version (ssd). All functional
6433 	 * differences are determined at compile time. In the future a single
6434 	 * binary will be provided and the inteconnect type will be used to
6435 	 * differentiate between fibre and parallel scsi behaviors. At that time
6436 	 * it will be necessary for all fibre channel HBAs to support this
6437 	 * property.
6438 	 *
6439 	 * set un_f_is_fiber to TRUE ( default fiber )
6440 	 */
6441 	un->un_f_is_fibre = TRUE;
6442 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
6443 	case INTERCONNECT_SSA:
6444 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
6445 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6446 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
6447 		break;
6448 	case INTERCONNECT_PARALLEL:
6449 		un->un_f_is_fibre = FALSE;
6450 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
6451 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6452 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
6453 		break;
6454 	case INTERCONNECT_SATA:
6455 		un->un_f_is_fibre = FALSE;
6456 		un->un_interconnect_type = SD_INTERCONNECT_SATA;
6457 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6458 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
6459 		break;
6460 	case INTERCONNECT_FIBRE:
6461 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
6462 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6463 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
6464 		break;
6465 	case INTERCONNECT_FABRIC:
6466 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
6467 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
6468 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6469 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
6470 		break;
6471 	default:
6472 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
6473 		/*
6474 		 * The HBA does not support the "interconnect-type" property
6475 		 * (or did not provide a recognized type).
6476 		 *
6477 		 * Note: This will be obsoleted when a single fibre channel
6478 		 * and parallel scsi driver is delivered. In the meantime the
6479 		 * interconnect type will be set to the platform default.If that
6480 		 * type is not parallel SCSI, it means that we should be
6481 		 * assuming "ssd" semantics. However, here this also means that
6482 		 * the FC HBA is not supporting the "interconnect-type" property
6483 		 * like we expect it to, so log this occurrence.
6484 		 */
6485 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
6486 		if (!SD_IS_PARALLEL_SCSI(un)) {
6487 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6488 			    "sd_unit_attach: un:0x%p Assuming "
6489 			    "INTERCONNECT_FIBRE\n", un);
6490 		} else {
6491 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6492 			    "sd_unit_attach: un:0x%p Assuming "
6493 			    "INTERCONNECT_PARALLEL\n", un);
6494 			un->un_f_is_fibre = FALSE;
6495 		}
6496 #else
6497 		/*
6498 		 * Note: This source will be implemented when a single fibre
6499 		 * channel and parallel scsi driver is delivered. The default
6500 		 * will be to assume that if a device does not support the
6501 		 * "interconnect-type" property it is a parallel SCSI HBA and
6502 		 * we will set the interconnect type for parallel scsi.
6503 		 */
6504 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
6505 		un->un_f_is_fibre = FALSE;
6506 #endif
6507 		break;
6508 	}
6509 
6510 	if (un->un_f_is_fibre == TRUE) {
6511 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
6512 			SCSI_VERSION_3) {
6513 			switch (un->un_interconnect_type) {
6514 			case SD_INTERCONNECT_FIBRE:
6515 			case SD_INTERCONNECT_SSA:
6516 				un->un_node_type = DDI_NT_BLOCK_WWN;
6517 				break;
6518 			default:
6519 				break;
6520 			}
6521 		}
6522 	}
6523 
6524 	/*
6525 	 * Initialize the Request Sense command for the target
6526 	 */
6527 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
6528 		goto alloc_rqs_failed;
6529 	}
6530 
6531 	/*
6532 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
6533 	 * with seperate binary for sd and ssd.
6534 	 *
6535 	 * x86 has 1 binary, un_retry_count is set base on connection type.
6536 	 * The hardcoded values will go away when Sparc uses 1 binary
6537 	 * for sd and ssd.  This hardcoded values need to match
6538 	 * SD_RETRY_COUNT in sddef.h
6539 	 * The value used is base on interconnect type.
6540 	 * fibre = 3, parallel = 5
6541 	 */
6542 #if defined(__i386) || defined(__amd64)
6543 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
6544 #else
6545 	un->un_retry_count = SD_RETRY_COUNT;
6546 #endif
6547 
6548 	/*
6549 	 * Set the per disk retry count to the default number of retries
6550 	 * for disks and CDROMs. This value can be overridden by the
6551 	 * disk property list or an entry in sd.conf.
6552 	 */
6553 	un->un_notready_retry_count =
6554 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
6555 			: DISK_NOT_READY_RETRY_COUNT(un);
6556 
6557 	/*
6558 	 * Set the busy retry count to the default value of un_retry_count.
6559 	 * This can be overridden by entries in sd.conf or the device
6560 	 * config table.
6561 	 */
6562 	un->un_busy_retry_count = un->un_retry_count;
6563 
6564 	/*
6565 	 * Init the reset threshold for retries.  This number determines
6566 	 * how many retries must be performed before a reset can be issued
6567 	 * (for certain error conditions). This can be overridden by entries
6568 	 * in sd.conf or the device config table.
6569 	 */
6570 	un->un_reset_retry_count = (un->un_retry_count / 2);
6571 
6572 	/*
6573 	 * Set the victim_retry_count to the default un_retry_count
6574 	 */
6575 	un->un_victim_retry_count = (2 * un->un_retry_count);
6576 
6577 	/*
6578 	 * Set the reservation release timeout to the default value of
6579 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
6580 	 * device config table.
6581 	 */
6582 	un->un_reserve_release_time = 5;
6583 
6584 	/*
6585 	 * Set up the default maximum transfer size. Note that this may
6586 	 * get updated later in the attach, when setting up default wide
6587 	 * operations for disks.
6588 	 */
6589 #if defined(__i386) || defined(__amd64)
6590 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
6591 #else
6592 	un->un_max_xfer_size = (uint_t)maxphys;
6593 #endif
6594 
6595 	/*
6596 	 * Get "allow bus device reset" property (defaults to "enabled" if
6597 	 * the property was not defined). This is to disable bus resets for
6598 	 * certain kinds of error recovery. Note: In the future when a run-time
6599 	 * fibre check is available the soft state flag should default to
6600 	 * enabled.
6601 	 */
6602 	if (un->un_f_is_fibre == TRUE) {
6603 		un->un_f_allow_bus_device_reset = TRUE;
6604 	} else {
6605 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
6606 			"allow-bus-device-reset", 1) != 0) {
6607 			un->un_f_allow_bus_device_reset = TRUE;
6608 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6609 			"sd_unit_attach: un:0x%p Bus device reset enabled\n",
6610 				un);
6611 		} else {
6612 			un->un_f_allow_bus_device_reset = FALSE;
6613 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6614 			"sd_unit_attach: un:0x%p Bus device reset disabled\n",
6615 				un);
6616 		}
6617 	}
6618 
6619 	/*
6620 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
6621 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
6622 	 *
6623 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
6624 	 * property. The new "variant" property with a value of "atapi" has been
6625 	 * introduced so that future 'variants' of standard SCSI behavior (like
6626 	 * atapi) could be specified by the underlying HBA drivers by supplying
6627 	 * a new value for the "variant" property, instead of having to define a
6628 	 * new property.
6629 	 */
6630 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
6631 		un->un_f_cfg_is_atapi = TRUE;
6632 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6633 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
6634 	}
6635 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
6636 	    &variantp) == DDI_PROP_SUCCESS) {
6637 		if (strcmp(variantp, "atapi") == 0) {
6638 			un->un_f_cfg_is_atapi = TRUE;
6639 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6640 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
6641 		}
6642 		ddi_prop_free(variantp);
6643 	}
6644 
6645 	un->un_cmd_timeout	= SD_IO_TIME;
6646 
6647 	/* Info on current states, statuses, etc. (Updated frequently) */
6648 	un->un_state		= SD_STATE_NORMAL;
6649 	un->un_last_state	= SD_STATE_NORMAL;
6650 
6651 	/* Control & status info for command throttling */
6652 	un->un_throttle		= sd_max_throttle;
6653 	un->un_saved_throttle	= sd_max_throttle;
6654 	un->un_min_throttle	= sd_min_throttle;
6655 
6656 	if (un->un_f_is_fibre == TRUE) {
6657 		un->un_f_use_adaptive_throttle = TRUE;
6658 	} else {
6659 		un->un_f_use_adaptive_throttle = FALSE;
6660 	}
6661 
6662 	/* Removable media support. */
6663 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
6664 	un->un_mediastate		= DKIO_NONE;
6665 	un->un_specified_mediastate	= DKIO_NONE;
6666 
6667 	/* CVs for suspend/resume (PM or DR) */
6668 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
6669 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
6670 
6671 	/* Power management support. */
6672 	un->un_power_level = SD_SPINDLE_UNINIT;
6673 
6674 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
6675 	un->un_f_wcc_inprog = 0;
6676 
6677 	/*
6678 	 * The open/close semaphore is used to serialize threads executing
6679 	 * in the driver's open & close entry point routines for a given
6680 	 * instance.
6681 	 */
6682 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
6683 
6684 	/*
6685 	 * The conf file entry and softstate variable is a forceful override,
6686 	 * meaning a non-zero value must be entered to change the default.
6687 	 */
6688 	un->un_f_disksort_disabled = FALSE;
6689 
6690 	/*
6691 	 * Retrieve the properties from the static driver table or the driver
6692 	 * configuration file (.conf) for this unit and update the soft state
6693 	 * for the device as needed for the indicated properties.
6694 	 * Note: the property configuration needs to occur here as some of the
6695 	 * following routines may have dependancies on soft state flags set
6696 	 * as part of the driver property configuration.
6697 	 */
6698 	sd_read_unit_properties(un);
6699 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6700 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
6701 
6702 	/*
6703 	 * Only if a device has "hotpluggable" property, it is
6704 	 * treated as hotpluggable device. Otherwise, it is
6705 	 * regarded as non-hotpluggable one.
6706 	 */
6707 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
6708 	    -1) != -1) {
6709 		un->un_f_is_hotpluggable = TRUE;
6710 	}
6711 
6712 	/*
6713 	 * set unit's attributes(flags) according to "hotpluggable" and
6714 	 * RMB bit in INQUIRY data.
6715 	 */
6716 	sd_set_unit_attributes(un, devi);
6717 
6718 	/*
6719 	 * By default, we mark the capacity, lbasize, and geometry
6720 	 * as invalid. Only if we successfully read a valid capacity
6721 	 * will we update the un_blockcount and un_tgt_blocksize with the
6722 	 * valid values (the geometry will be validated later).
6723 	 */
6724 	un->un_f_blockcount_is_valid	= FALSE;
6725 	un->un_f_tgt_blocksize_is_valid	= FALSE;
6726 
6727 	/*
6728 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
6729 	 * otherwise.
6730 	 */
6731 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
6732 	un->un_blockcount = 0;
6733 
6734 	/*
6735 	 * Set up the per-instance info needed to determine the correct
6736 	 * CDBs and other info for issuing commands to the target.
6737 	 */
6738 	sd_init_cdb_limits(un);
6739 
6740 	/*
6741 	 * Set up the IO chains to use, based upon the target type.
6742 	 */
6743 	if (un->un_f_non_devbsize_supported) {
6744 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
6745 	} else {
6746 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
6747 	}
6748 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
6749 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
6750 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
6751 
6752 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
6753 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
6754 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
6755 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
6756 
6757 
6758 	if (ISCD(un)) {
6759 		un->un_additional_codes = sd_additional_codes;
6760 	} else {
6761 		un->un_additional_codes = NULL;
6762 	}
6763 
6764 	/*
6765 	 * Create the kstats here so they can be available for attach-time
6766 	 * routines that send commands to the unit (either polled or via
6767 	 * sd_send_scsi_cmd).
6768 	 *
6769 	 * Note: This is a critical sequence that needs to be maintained:
6770 	 *	1) Instantiate the kstats here, before any routines using the
6771 	 *	   iopath (i.e. sd_send_scsi_cmd).
6772 	 *	2) Instantiate and initialize the partition stats
6773 	 *	   (sd_set_pstats).
6774 	 *	3) Initialize the error stats (sd_set_errstats), following
6775 	 *	   sd_validate_geometry(),sd_register_devid(),
6776 	 *	   and sd_cache_control().
6777 	 */
6778 
6779 	un->un_stats = kstat_create(sd_label, instance,
6780 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
6781 	if (un->un_stats != NULL) {
6782 		un->un_stats->ks_lock = SD_MUTEX(un);
6783 		kstat_install(un->un_stats);
6784 	}
6785 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6786 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
6787 
6788 	sd_create_errstats(un, instance);
6789 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6790 	    "sd_unit_attach: un:0x%p errstats created\n", un);
6791 
6792 	/*
6793 	 * The following if/else code was relocated here from below as part
6794 	 * of the fix for bug (4430280). However with the default setup added
6795 	 * on entry to this routine, it's no longer absolutely necessary for
6796 	 * this to be before the call to sd_spin_up_unit.
6797 	 */
6798 	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
6799 		/*
6800 		 * If SCSI-2 tagged queueing is supported by the target
6801 		 * and by the host adapter then we will enable it.
6802 		 */
6803 		un->un_tagflags = 0;
6804 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
6805 		    (devp->sd_inq->inq_cmdque) &&
6806 		    (un->un_f_arq_enabled == TRUE)) {
6807 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
6808 			    1, 1) == 1) {
6809 				un->un_tagflags = FLAG_STAG;
6810 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6811 				    "sd_unit_attach: un:0x%p tag queueing "
6812 				    "enabled\n", un);
6813 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
6814 			    "untagged-qing", 0) == 1) {
6815 				un->un_f_opt_queueing = TRUE;
6816 				un->un_saved_throttle = un->un_throttle =
6817 				    min(un->un_throttle, 3);
6818 			} else {
6819 				un->un_f_opt_queueing = FALSE;
6820 				un->un_saved_throttle = un->un_throttle = 1;
6821 			}
6822 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
6823 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
6824 			/* The Host Adapter supports internal queueing. */
6825 			un->un_f_opt_queueing = TRUE;
6826 			un->un_saved_throttle = un->un_throttle =
6827 			    min(un->un_throttle, 3);
6828 		} else {
6829 			un->un_f_opt_queueing = FALSE;
6830 			un->un_saved_throttle = un->un_throttle = 1;
6831 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6832 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
6833 		}
6834 
6835 		/*
6836 		 * Enable large transfers for SATA/SAS drives
6837 		 */
6838 		if (SD_IS_SERIAL(un)) {
6839 			un->un_max_xfer_size =
6840 			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
6841 			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
6842 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6843 			    "sd_unit_attach: un:0x%p max transfer "
6844 			    "size=0x%x\n", un, un->un_max_xfer_size);
6845 
6846 		}
6847 
6848 		/* Setup or tear down default wide operations for disks */
6849 
6850 		/*
6851 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
6852 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
6853 		 * system and be set to different values. In the future this
6854 		 * code may need to be updated when the ssd module is
6855 		 * obsoleted and removed from the system. (4299588)
6856 		 */
6857 		if (SD_IS_PARALLEL_SCSI(un) &&
6858 		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
6859 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
6860 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
6861 			    1, 1) == 1) {
6862 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6863 				    "sd_unit_attach: un:0x%p Wide Transfer "
6864 				    "enabled\n", un);
6865 			}
6866 
6867 			/*
6868 			 * If tagged queuing has also been enabled, then
6869 			 * enable large xfers
6870 			 */
6871 			if (un->un_saved_throttle == sd_max_throttle) {
6872 				un->un_max_xfer_size =
6873 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
6874 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
6875 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6876 				    "sd_unit_attach: un:0x%p max transfer "
6877 				    "size=0x%x\n", un, un->un_max_xfer_size);
6878 			}
6879 		} else {
6880 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
6881 			    0, 1) == 1) {
6882 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6883 				    "sd_unit_attach: un:0x%p "
6884 				    "Wide Transfer disabled\n", un);
6885 			}
6886 		}
6887 	} else {
6888 		un->un_tagflags = FLAG_STAG;
6889 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
6890 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
6891 	}
6892 
6893 	/*
6894 	 * If this target supports LUN reset, try to enable it.
6895 	 */
6896 	if (un->un_f_lun_reset_enabled) {
6897 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
6898 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
6899 			    "un:0x%p lun_reset capability set\n", un);
6900 		} else {
6901 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
6902 			    "un:0x%p lun-reset capability not set\n", un);
6903 		}
6904 	}
6905 
6906 	/*
6907 	 * At this point in the attach, we have enough info in the
6908 	 * soft state to be able to issue commands to the target.
6909 	 *
6910 	 * All command paths used below MUST issue their commands as
6911 	 * SD_PATH_DIRECT. This is important as intermediate layers
6912 	 * are not all initialized yet (such as PM).
6913 	 */
6914 
6915 	/*
6916 	 * Send a TEST UNIT READY command to the device. This should clear
6917 	 * any outstanding UNIT ATTENTION that may be present.
6918 	 *
6919 	 * Note: Don't check for success, just track if there is a reservation,
6920 	 * this is a throw away command to clear any unit attentions.
6921 	 *
6922 	 * Note: This MUST be the first command issued to the target during
6923 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
6924 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
6925 	 * with attempts at spinning up a device with no media.
6926 	 */
6927 	if (sd_send_scsi_TEST_UNIT_READY(un, SD_DONT_RETRY_TUR) == EACCES) {
6928 		reservation_flag = SD_TARGET_IS_RESERVED;
6929 	}
6930 
6931 	/*
6932 	 * If the device is NOT a removable media device, attempt to spin
6933 	 * it up (using the START_STOP_UNIT command) and read its capacity
6934 	 * (using the READ CAPACITY command).  Note, however, that either
6935 	 * of these could fail and in some cases we would continue with
6936 	 * the attach despite the failure (see below).
6937 	 */
6938 	if (un->un_f_descr_format_supported) {
6939 		switch (sd_spin_up_unit(un)) {
6940 		case 0:
6941 			/*
6942 			 * Spin-up was successful; now try to read the
6943 			 * capacity.  If successful then save the results
6944 			 * and mark the capacity & lbasize as valid.
6945 			 */
6946 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6947 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
6948 
6949 			switch (sd_send_scsi_READ_CAPACITY(un, &capacity,
6950 			    &lbasize, SD_PATH_DIRECT)) {
6951 			case 0: {
6952 				if (capacity > DK_MAX_BLOCKS) {
6953 #ifdef _LP64
6954 					if (capacity + 1 >
6955 					    SD_GROUP1_MAX_ADDRESS) {
6956 						/*
6957 						 * Enable descriptor format
6958 						 * sense data so that we can
6959 						 * get 64 bit sense data
6960 						 * fields.
6961 						 */
6962 						sd_enable_descr_sense(un);
6963 					}
6964 #else
6965 					/* 32-bit kernels can't handle this */
6966 					scsi_log(SD_DEVINFO(un),
6967 					    sd_label, CE_WARN,
6968 					    "disk has %llu blocks, which "
6969 					    "is too large for a 32-bit "
6970 					    "kernel", capacity);
6971 
6972 #if defined(__i386) || defined(__amd64)
6973 					/*
6974 					 * 1TB disk was treated as (1T - 512)B
6975 					 * in the past, so that it might have
6976 					 * valid VTOC and solaris partitions,
6977 					 * we have to allow it to continue to
6978 					 * work.
6979 					 */
6980 					if (capacity -1 > DK_MAX_BLOCKS)
6981 #endif
6982 					goto spinup_failed;
6983 #endif
6984 				}
6985 
6986 				/*
6987 				 * Here it's not necessary to check the case:
6988 				 * the capacity of the device is bigger than
6989 				 * what the max hba cdb can support. Because
6990 				 * sd_send_scsi_READ_CAPACITY will retrieve
6991 				 * the capacity by sending USCSI command, which
6992 				 * is constrained by the max hba cdb. Actually,
6993 				 * sd_send_scsi_READ_CAPACITY will return
6994 				 * EINVAL when using bigger cdb than required
6995 				 * cdb length. Will handle this case in
6996 				 * "case EINVAL".
6997 				 */
6998 
6999 				/*
7000 				 * The following relies on
7001 				 * sd_send_scsi_READ_CAPACITY never
7002 				 * returning 0 for capacity and/or lbasize.
7003 				 */
7004 				sd_update_block_info(un, lbasize, capacity);
7005 
7006 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7007 				    "sd_unit_attach: un:0x%p capacity = %ld "
7008 				    "blocks; lbasize= %ld.\n", un,
7009 				    un->un_blockcount, un->un_tgt_blocksize);
7010 
7011 				break;
7012 			}
7013 			case EINVAL:
7014 				/*
7015 				 * In the case where the max-cdb-length property
7016 				 * is smaller than the required CDB length for
7017 				 * a SCSI device, a target driver can fail to
7018 				 * attach to that device.
7019 				 */
7020 				scsi_log(SD_DEVINFO(un),
7021 				    sd_label, CE_WARN,
7022 				    "disk capacity is too large "
7023 				    "for current cdb length");
7024 				goto spinup_failed;
7025 			case EACCES:
7026 				/*
7027 				 * Should never get here if the spin-up
7028 				 * succeeded, but code it in anyway.
7029 				 * From here, just continue with the attach...
7030 				 */
7031 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7032 				    "sd_unit_attach: un:0x%p "
7033 				    "sd_send_scsi_READ_CAPACITY "
7034 				    "returned reservation conflict\n", un);
7035 				reservation_flag = SD_TARGET_IS_RESERVED;
7036 				break;
7037 			default:
7038 				/*
7039 				 * Likewise, should never get here if the
7040 				 * spin-up succeeded. Just continue with
7041 				 * the attach...
7042 				 */
7043 				break;
7044 			}
7045 			break;
7046 		case EACCES:
7047 			/*
7048 			 * Device is reserved by another host.  In this case
7049 			 * we could not spin it up or read the capacity, but
7050 			 * we continue with the attach anyway.
7051 			 */
7052 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7053 			    "sd_unit_attach: un:0x%p spin-up reservation "
7054 			    "conflict.\n", un);
7055 			reservation_flag = SD_TARGET_IS_RESERVED;
7056 			break;
7057 		default:
7058 			/* Fail the attach if the spin-up failed. */
7059 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7060 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
7061 			goto spinup_failed;
7062 		}
7063 	}
7064 
7065 	/*
7066 	 * Check to see if this is a MMC drive
7067 	 */
7068 	if (ISCD(un)) {
7069 		sd_set_mmc_caps(un);
7070 	}
7071 
7072 
7073 	/*
7074 	 * Add a zero-length attribute to tell the world we support
7075 	 * kernel ioctls (for layered drivers)
7076 	 */
7077 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7078 	    DDI_KERNEL_IOCTL, NULL, 0);
7079 
7080 	/*
7081 	 * Add a boolean property to tell the world we support
7082 	 * the B_FAILFAST flag (for layered drivers)
7083 	 */
7084 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7085 	    "ddi-failfast-supported", NULL, 0);
7086 
7087 	/*
7088 	 * Initialize power management
7089 	 */
7090 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
7091 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
7092 	sd_setup_pm(un, devi);
7093 	if (un->un_f_pm_is_enabled == FALSE) {
7094 		/*
7095 		 * For performance, point to a jump table that does
7096 		 * not include pm.
7097 		 * The direct and priority chains don't change with PM.
7098 		 *
7099 		 * Note: this is currently done based on individual device
7100 		 * capabilities. When an interface for determining system
7101 		 * power enabled state becomes available, or when additional
7102 		 * layers are added to the command chain, these values will
7103 		 * have to be re-evaluated for correctness.
7104 		 */
7105 		if (un->un_f_non_devbsize_supported) {
7106 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
7107 		} else {
7108 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
7109 		}
7110 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
7111 	}
7112 
7113 	/*
7114 	 * This property is set to 0 by HA software to avoid retries
7115 	 * on a reserved disk. (The preferred property name is
7116 	 * "retry-on-reservation-conflict") (1189689)
7117 	 *
7118 	 * Note: The use of a global here can have unintended consequences. A
7119 	 * per instance variable is preferrable to match the capabilities of
7120 	 * different underlying hba's (4402600)
7121 	 */
7122 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
7123 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
7124 	    sd_retry_on_reservation_conflict);
7125 	if (sd_retry_on_reservation_conflict != 0) {
7126 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
7127 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
7128 		    sd_retry_on_reservation_conflict);
7129 	}
7130 
7131 	/* Set up options for QFULL handling. */
7132 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7133 	    "qfull-retries", -1)) != -1) {
7134 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
7135 		    rval, 1);
7136 	}
7137 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7138 	    "qfull-retry-interval", -1)) != -1) {
7139 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
7140 		    rval, 1);
7141 	}
7142 
7143 	/*
7144 	 * This just prints a message that announces the existence of the
7145 	 * device. The message is always printed in the system logfile, but
7146 	 * only appears on the console if the system is booted with the
7147 	 * -v (verbose) argument.
7148 	 */
7149 	ddi_report_dev(devi);
7150 
7151 	un->un_mediastate = DKIO_NONE;
7152 
7153 	cmlb_alloc_handle(&un->un_cmlbhandle);
7154 
7155 #if defined(__i386) || defined(__amd64)
7156 	/*
7157 	 * On x86, compensate for off-by-1 legacy error
7158 	 */
7159 	if (!un->un_f_has_removable_media && !un->un_f_is_hotpluggable &&
7160 	    (lbasize == un->un_sys_blocksize))
7161 		offbyone = CMLB_OFF_BY_ONE;
7162 #endif
7163 
7164 	if (cmlb_attach(devi, &sd_tgops, (int)devp->sd_inq->inq_dtype,
7165 	    un->un_f_has_removable_media, un->un_f_is_hotpluggable,
7166 	    un->un_node_type, offbyone, un->un_cmlbhandle,
7167 	    (void *)SD_PATH_DIRECT) != 0) {
7168 		goto cmlb_attach_failed;
7169 	}
7170 
7171 
7172 	/*
7173 	 * Read and validate the device's geometry (ie, disk label)
7174 	 * A new unformatted drive will not have a valid geometry, but
7175 	 * the driver needs to successfully attach to this device so
7176 	 * the drive can be formatted via ioctls.
7177 	 */
7178 	geom_label_valid = (cmlb_validate(un->un_cmlbhandle, 0,
7179 	    (void *)SD_PATH_DIRECT) == 0) ? 1: 0;
7180 
7181 	mutex_enter(SD_MUTEX(un));
7182 
7183 	/*
7184 	 * Read and initialize the devid for the unit.
7185 	 */
7186 	ASSERT(un->un_errstats != NULL);
7187 	if (un->un_f_devid_supported) {
7188 		sd_register_devid(un, devi, reservation_flag);
7189 	}
7190 	mutex_exit(SD_MUTEX(un));
7191 
7192 #if (defined(__fibre))
7193 	/*
7194 	 * Register callbacks for fibre only.  You can't do this soley
7195 	 * on the basis of the devid_type because this is hba specific.
7196 	 * We need to query our hba capabilities to find out whether to
7197 	 * register or not.
7198 	 */
7199 	if (un->un_f_is_fibre) {
7200 	    if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
7201 		sd_init_event_callbacks(un);
7202 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7203 		    "sd_unit_attach: un:0x%p event callbacks inserted", un);
7204 	    }
7205 	}
7206 #endif
7207 
7208 	if (un->un_f_opt_disable_cache == TRUE) {
7209 		/*
7210 		 * Disable both read cache and write cache.  This is
7211 		 * the historic behavior of the keywords in the config file.
7212 		 */
7213 		if (sd_cache_control(un, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
7214 		    0) {
7215 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7216 			    "sd_unit_attach: un:0x%p Could not disable "
7217 			    "caching", un);
7218 			goto devid_failed;
7219 		}
7220 	}
7221 
7222 	/*
7223 	 * Check the value of the WCE bit now and
7224 	 * set un_f_write_cache_enabled accordingly.
7225 	 */
7226 	(void) sd_get_write_cache_enabled(un, &wc_enabled);
7227 	mutex_enter(SD_MUTEX(un));
7228 	un->un_f_write_cache_enabled = (wc_enabled != 0);
7229 	mutex_exit(SD_MUTEX(un));
7230 
7231 	/*
7232 	 * Find out what type of reservation this disk supports.
7233 	 */
7234 	switch (sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS, 0, NULL)) {
7235 	case 0:
7236 		/*
7237 		 * SCSI-3 reservations are supported.
7238 		 */
7239 		un->un_reservation_type = SD_SCSI3_RESERVATION;
7240 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7241 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
7242 		break;
7243 	case ENOTSUP:
7244 		/*
7245 		 * The PERSISTENT RESERVE IN command would not be recognized by
7246 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
7247 		 */
7248 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7249 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
7250 		un->un_reservation_type = SD_SCSI2_RESERVATION;
7251 		break;
7252 	default:
7253 		/*
7254 		 * default to SCSI-3 reservations
7255 		 */
7256 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7257 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
7258 		un->un_reservation_type = SD_SCSI3_RESERVATION;
7259 		break;
7260 	}
7261 
7262 	/*
7263 	 * Set the pstat and error stat values here, so data obtained during the
7264 	 * previous attach-time routines is available.
7265 	 *
7266 	 * Note: This is a critical sequence that needs to be maintained:
7267 	 *	1) Instantiate the kstats before any routines using the iopath
7268 	 *	   (i.e. sd_send_scsi_cmd).
7269 	 *	2) Initialize the error stats (sd_set_errstats) and partition
7270 	 *	   stats (sd_set_pstats)here, following
7271 	 *	   cmlb_validate_geometry(), sd_register_devid(), and
7272 	 *	   sd_cache_control().
7273 	 */
7274 
7275 	if (un->un_f_pkstats_enabled && geom_label_valid) {
7276 		sd_set_pstats(un);
7277 		SD_TRACE(SD_LOG_IO_PARTITION, un,
7278 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
7279 	}
7280 
7281 	sd_set_errstats(un);
7282 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7283 	    "sd_unit_attach: un:0x%p errstats set\n", un);
7284 
7285 
7286 	/*
7287 	 * After successfully attaching an instance, we record the information
7288 	 * of how many luns have been attached on the relative target and
7289 	 * controller for parallel SCSI. This information is used when sd tries
7290 	 * to set the tagged queuing capability in HBA.
7291 	 */
7292 	if (SD_IS_PARALLEL_SCSI(un) && (tgt >= 0) && (tgt < NTARGETS_WIDE)) {
7293 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_ATTACH);
7294 	}
7295 
7296 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7297 	    "sd_unit_attach: un:0x%p exit success\n", un);
7298 
7299 	return (DDI_SUCCESS);
7300 
7301 	/*
7302 	 * An error occurred during the attach; clean up & return failure.
7303 	 */
7304 
7305 devid_failed:
7306 
7307 setup_pm_failed:
7308 	ddi_remove_minor_node(devi, NULL);
7309 
7310 cmlb_attach_failed:
7311 	/*
7312 	 * Cleanup from the scsi_ifsetcap() calls (437868)
7313 	 */
7314 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
7315 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
7316 
7317 	/*
7318 	 * Refer to the comments of setting tagged-qing in the beginning of
7319 	 * sd_unit_attach. We can only disable tagged queuing when there is
7320 	 * no lun attached on the target.
7321 	 */
7322 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
7323 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
7324 	}
7325 
7326 	if (un->un_f_is_fibre == FALSE) {
7327 	    (void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
7328 	}
7329 
7330 spinup_failed:
7331 
7332 	mutex_enter(SD_MUTEX(un));
7333 
7334 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
7335 	if (un->un_direct_priority_timeid != NULL) {
7336 		timeout_id_t temp_id = un->un_direct_priority_timeid;
7337 		un->un_direct_priority_timeid = NULL;
7338 		mutex_exit(SD_MUTEX(un));
7339 		(void) untimeout(temp_id);
7340 		mutex_enter(SD_MUTEX(un));
7341 	}
7342 
7343 	/* Cancel any pending start/stop timeouts */
7344 	if (un->un_startstop_timeid != NULL) {
7345 		timeout_id_t temp_id = un->un_startstop_timeid;
7346 		un->un_startstop_timeid = NULL;
7347 		mutex_exit(SD_MUTEX(un));
7348 		(void) untimeout(temp_id);
7349 		mutex_enter(SD_MUTEX(un));
7350 	}
7351 
7352 	/* Cancel any pending reset-throttle timeouts */
7353 	if (un->un_reset_throttle_timeid != NULL) {
7354 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
7355 		un->un_reset_throttle_timeid = NULL;
7356 		mutex_exit(SD_MUTEX(un));
7357 		(void) untimeout(temp_id);
7358 		mutex_enter(SD_MUTEX(un));
7359 	}
7360 
7361 	/* Cancel any pending retry timeouts */
7362 	if (un->un_retry_timeid != NULL) {
7363 		timeout_id_t temp_id = un->un_retry_timeid;
7364 		un->un_retry_timeid = NULL;
7365 		mutex_exit(SD_MUTEX(un));
7366 		(void) untimeout(temp_id);
7367 		mutex_enter(SD_MUTEX(un));
7368 	}
7369 
7370 	/* Cancel any pending delayed cv broadcast timeouts */
7371 	if (un->un_dcvb_timeid != NULL) {
7372 		timeout_id_t temp_id = un->un_dcvb_timeid;
7373 		un->un_dcvb_timeid = NULL;
7374 		mutex_exit(SD_MUTEX(un));
7375 		(void) untimeout(temp_id);
7376 		mutex_enter(SD_MUTEX(un));
7377 	}
7378 
7379 	mutex_exit(SD_MUTEX(un));
7380 
7381 	/* There should not be any in-progress I/O so ASSERT this check */
7382 	ASSERT(un->un_ncmds_in_transport == 0);
7383 	ASSERT(un->un_ncmds_in_driver == 0);
7384 
7385 	/* Do not free the softstate if the callback routine is active */
7386 	sd_sync_with_callback(un);
7387 
7388 	/*
7389 	 * Partition stats apparently are not used with removables. These would
7390 	 * not have been created during attach, so no need to clean them up...
7391 	 */
7392 	if (un->un_stats != NULL) {
7393 		kstat_delete(un->un_stats);
7394 		un->un_stats = NULL;
7395 	}
7396 	if (un->un_errstats != NULL) {
7397 		kstat_delete(un->un_errstats);
7398 		un->un_errstats = NULL;
7399 	}
7400 
7401 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
7402 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
7403 
7404 	ddi_prop_remove_all(devi);
7405 	sema_destroy(&un->un_semoclose);
7406 	cv_destroy(&un->un_state_cv);
7407 
7408 getrbuf_failed:
7409 
7410 	sd_free_rqs(un);
7411 
7412 alloc_rqs_failed:
7413 
7414 	devp->sd_private = NULL;
7415 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
7416 
7417 get_softstate_failed:
7418 	/*
7419 	 * Note: the man pages are unclear as to whether or not doing a
7420 	 * ddi_soft_state_free(sd_state, instance) is the right way to
7421 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
7422 	 * ddi_get_soft_state() fails.  The implication seems to be
7423 	 * that the get_soft_state cannot fail if the zalloc succeeds.
7424 	 */
7425 	ddi_soft_state_free(sd_state, instance);
7426 
7427 probe_failed:
7428 	scsi_unprobe(devp);
7429 #ifdef SDDEBUG
7430 	if ((sd_component_mask & SD_LOG_ATTACH_DETACH) &&
7431 	    (sd_level_mask & SD_LOGMASK_TRACE)) {
7432 		cmn_err(CE_CONT, "sd_unit_attach: un:0x%p exit failure\n",
7433 		    (void *)un);
7434 	}
7435 #endif
7436 	return (DDI_FAILURE);
7437 }
7438 
7439 
7440 /*
7441  *    Function: sd_unit_detach
7442  *
7443  * Description: Performs DDI_DETACH processing for sddetach().
7444  *
7445  * Return Code: DDI_SUCCESS
7446  *		DDI_FAILURE
7447  *
7448  *     Context: Kernel thread context
7449  */
7450 
7451 static int
7452 sd_unit_detach(dev_info_t *devi)
7453 {
7454 	struct scsi_device	*devp;
7455 	struct sd_lun		*un;
7456 	int			i;
7457 	int			tgt;
7458 	dev_t			dev;
7459 	dev_info_t		*pdip = ddi_get_parent(devi);
7460 	int			instance = ddi_get_instance(devi);
7461 
7462 	mutex_enter(&sd_detach_mutex);
7463 
7464 	/*
7465 	 * Fail the detach for any of the following:
7466 	 *  - Unable to get the sd_lun struct for the instance
7467 	 *  - A layered driver has an outstanding open on the instance
7468 	 *  - Another thread is already detaching this instance
7469 	 *  - Another thread is currently performing an open
7470 	 */
7471 	devp = ddi_get_driver_private(devi);
7472 	if ((devp == NULL) ||
7473 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
7474 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
7475 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
7476 		mutex_exit(&sd_detach_mutex);
7477 		return (DDI_FAILURE);
7478 	}
7479 
7480 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
7481 
7482 	/*
7483 	 * Mark this instance as currently in a detach, to inhibit any
7484 	 * opens from a layered driver.
7485 	 */
7486 	un->un_detach_count++;
7487 	mutex_exit(&sd_detach_mutex);
7488 
7489 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7490 	    SCSI_ADDR_PROP_TARGET, -1);
7491 
7492 	dev = sd_make_device(SD_DEVINFO(un));
7493 
7494 #ifndef lint
7495 	_NOTE(COMPETING_THREADS_NOW);
7496 #endif
7497 
7498 	mutex_enter(SD_MUTEX(un));
7499 
7500 	/*
7501 	 * Fail the detach if there are any outstanding layered
7502 	 * opens on this device.
7503 	 */
7504 	for (i = 0; i < NDKMAP; i++) {
7505 		if (un->un_ocmap.lyropen[i] != 0) {
7506 			goto err_notclosed;
7507 		}
7508 	}
7509 
7510 	/*
7511 	 * Verify there are NO outstanding commands issued to this device.
7512 	 * ie, un_ncmds_in_transport == 0.
7513 	 * It's possible to have outstanding commands through the physio
7514 	 * code path, even though everything's closed.
7515 	 */
7516 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
7517 	    (un->un_direct_priority_timeid != NULL) ||
7518 	    (un->un_state == SD_STATE_RWAIT)) {
7519 		mutex_exit(SD_MUTEX(un));
7520 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7521 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
7522 		goto err_stillbusy;
7523 	}
7524 
7525 	/*
7526 	 * If we have the device reserved, release the reservation.
7527 	 */
7528 	if ((un->un_resvd_status & SD_RESERVE) &&
7529 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
7530 		mutex_exit(SD_MUTEX(un));
7531 		/*
7532 		 * Note: sd_reserve_release sends a command to the device
7533 		 * via the sd_ioctlcmd() path, and can sleep.
7534 		 */
7535 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
7536 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7537 			    "sd_dr_detach: Cannot release reservation \n");
7538 		}
7539 	} else {
7540 		mutex_exit(SD_MUTEX(un));
7541 	}
7542 
7543 	/*
7544 	 * Untimeout any reserve recover, throttle reset, restart unit
7545 	 * and delayed broadcast timeout threads. Protect the timeout pointer
7546 	 * from getting nulled by their callback functions.
7547 	 */
7548 	mutex_enter(SD_MUTEX(un));
7549 	if (un->un_resvd_timeid != NULL) {
7550 		timeout_id_t temp_id = un->un_resvd_timeid;
7551 		un->un_resvd_timeid = NULL;
7552 		mutex_exit(SD_MUTEX(un));
7553 		(void) untimeout(temp_id);
7554 		mutex_enter(SD_MUTEX(un));
7555 	}
7556 
7557 	if (un->un_reset_throttle_timeid != NULL) {
7558 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
7559 		un->un_reset_throttle_timeid = NULL;
7560 		mutex_exit(SD_MUTEX(un));
7561 		(void) untimeout(temp_id);
7562 		mutex_enter(SD_MUTEX(un));
7563 	}
7564 
7565 	if (un->un_startstop_timeid != NULL) {
7566 		timeout_id_t temp_id = un->un_startstop_timeid;
7567 		un->un_startstop_timeid = NULL;
7568 		mutex_exit(SD_MUTEX(un));
7569 		(void) untimeout(temp_id);
7570 		mutex_enter(SD_MUTEX(un));
7571 	}
7572 
7573 	if (un->un_dcvb_timeid != NULL) {
7574 		timeout_id_t temp_id = un->un_dcvb_timeid;
7575 		un->un_dcvb_timeid = NULL;
7576 		mutex_exit(SD_MUTEX(un));
7577 		(void) untimeout(temp_id);
7578 	} else {
7579 		mutex_exit(SD_MUTEX(un));
7580 	}
7581 
7582 	/* Remove any pending reservation reclaim requests for this device */
7583 	sd_rmv_resv_reclaim_req(dev);
7584 
7585 	mutex_enter(SD_MUTEX(un));
7586 
7587 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
7588 	if (un->un_direct_priority_timeid != NULL) {
7589 		timeout_id_t temp_id = un->un_direct_priority_timeid;
7590 		un->un_direct_priority_timeid = NULL;
7591 		mutex_exit(SD_MUTEX(un));
7592 		(void) untimeout(temp_id);
7593 		mutex_enter(SD_MUTEX(un));
7594 	}
7595 
7596 	/* Cancel any active multi-host disk watch thread requests */
7597 	if (un->un_mhd_token != NULL) {
7598 		mutex_exit(SD_MUTEX(un));
7599 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
7600 		if (scsi_watch_request_terminate(un->un_mhd_token,
7601 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
7602 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7603 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
7604 			/*
7605 			 * Note: We are returning here after having removed
7606 			 * some driver timeouts above. This is consistent with
7607 			 * the legacy implementation but perhaps the watch
7608 			 * terminate call should be made with the wait flag set.
7609 			 */
7610 			goto err_stillbusy;
7611 		}
7612 		mutex_enter(SD_MUTEX(un));
7613 		un->un_mhd_token = NULL;
7614 	}
7615 
7616 	if (un->un_swr_token != NULL) {
7617 		mutex_exit(SD_MUTEX(un));
7618 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
7619 		if (scsi_watch_request_terminate(un->un_swr_token,
7620 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
7621 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7622 			    "sd_dr_detach: Cannot cancel swr watch request\n");
7623 			/*
7624 			 * Note: We are returning here after having removed
7625 			 * some driver timeouts above. This is consistent with
7626 			 * the legacy implementation but perhaps the watch
7627 			 * terminate call should be made with the wait flag set.
7628 			 */
7629 			goto err_stillbusy;
7630 		}
7631 		mutex_enter(SD_MUTEX(un));
7632 		un->un_swr_token = NULL;
7633 	}
7634 
7635 	mutex_exit(SD_MUTEX(un));
7636 
7637 	/*
7638 	 * Clear any scsi_reset_notifies. We clear the reset notifies
7639 	 * if we have not registered one.
7640 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
7641 	 */
7642 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
7643 	    sd_mhd_reset_notify_cb, (caddr_t)un);
7644 
7645 	/*
7646 	 * protect the timeout pointers from getting nulled by
7647 	 * their callback functions during the cancellation process.
7648 	 * In such a scenario untimeout can be invoked with a null value.
7649 	 */
7650 	_NOTE(NO_COMPETING_THREADS_NOW);
7651 
7652 	mutex_enter(&un->un_pm_mutex);
7653 	if (un->un_pm_idle_timeid != NULL) {
7654 		timeout_id_t temp_id = un->un_pm_idle_timeid;
7655 		un->un_pm_idle_timeid = NULL;
7656 		mutex_exit(&un->un_pm_mutex);
7657 
7658 		/*
7659 		 * Timeout is active; cancel it.
7660 		 * Note that it'll never be active on a device
7661 		 * that does not support PM therefore we don't
7662 		 * have to check before calling pm_idle_component.
7663 		 */
7664 		(void) untimeout(temp_id);
7665 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7666 		mutex_enter(&un->un_pm_mutex);
7667 	}
7668 
7669 	/*
7670 	 * Check whether there is already a timeout scheduled for power
7671 	 * management. If yes then don't lower the power here, that's.
7672 	 * the timeout handler's job.
7673 	 */
7674 	if (un->un_pm_timeid != NULL) {
7675 		timeout_id_t temp_id = un->un_pm_timeid;
7676 		un->un_pm_timeid = NULL;
7677 		mutex_exit(&un->un_pm_mutex);
7678 		/*
7679 		 * Timeout is active; cancel it.
7680 		 * Note that it'll never be active on a device
7681 		 * that does not support PM therefore we don't
7682 		 * have to check before calling pm_idle_component.
7683 		 */
7684 		(void) untimeout(temp_id);
7685 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7686 
7687 	} else {
7688 		mutex_exit(&un->un_pm_mutex);
7689 		if ((un->un_f_pm_is_enabled == TRUE) &&
7690 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_SPINDLE_OFF) !=
7691 		    DDI_SUCCESS)) {
7692 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7693 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
7694 			/*
7695 			 * Fix for bug: 4297749, item # 13
7696 			 * The above test now includes a check to see if PM is
7697 			 * supported by this device before call
7698 			 * pm_lower_power().
7699 			 * Note, the following is not dead code. The call to
7700 			 * pm_lower_power above will generate a call back into
7701 			 * our sdpower routine which might result in a timeout
7702 			 * handler getting activated. Therefore the following
7703 			 * code is valid and necessary.
7704 			 */
7705 			mutex_enter(&un->un_pm_mutex);
7706 			if (un->un_pm_timeid != NULL) {
7707 				timeout_id_t temp_id = un->un_pm_timeid;
7708 				un->un_pm_timeid = NULL;
7709 				mutex_exit(&un->un_pm_mutex);
7710 				(void) untimeout(temp_id);
7711 				(void) pm_idle_component(SD_DEVINFO(un), 0);
7712 			} else {
7713 				mutex_exit(&un->un_pm_mutex);
7714 			}
7715 		}
7716 	}
7717 
7718 	/*
7719 	 * Cleanup from the scsi_ifsetcap() calls (437868)
7720 	 * Relocated here from above to be after the call to
7721 	 * pm_lower_power, which was getting errors.
7722 	 */
7723 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
7724 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
7725 
7726 	/*
7727 	 * Currently, tagged queuing is supported per target based by HBA.
7728 	 * Setting this per lun instance actually sets the capability of this
7729 	 * target in HBA, which affects those luns already attached on the
7730 	 * same target. So during detach, we can only disable this capability
7731 	 * only when this is the only lun left on this target. By doing
7732 	 * this, we assume a target has the same tagged queuing capability
7733 	 * for every lun. The condition can be removed when HBA is changed to
7734 	 * support per lun based tagged queuing capability.
7735 	 */
7736 	if (sd_scsi_get_target_lun_count(pdip, tgt) <= 1) {
7737 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
7738 	}
7739 
7740 	if (un->un_f_is_fibre == FALSE) {
7741 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
7742 	}
7743 
7744 	/*
7745 	 * Remove any event callbacks, fibre only
7746 	 */
7747 	if (un->un_f_is_fibre == TRUE) {
7748 		if ((un->un_insert_event != NULL) &&
7749 			(ddi_remove_event_handler(un->un_insert_cb_id) !=
7750 				DDI_SUCCESS)) {
7751 			/*
7752 			 * Note: We are returning here after having done
7753 			 * substantial cleanup above. This is consistent
7754 			 * with the legacy implementation but this may not
7755 			 * be the right thing to do.
7756 			 */
7757 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7758 				"sd_dr_detach: Cannot cancel insert event\n");
7759 			goto err_remove_event;
7760 		}
7761 		un->un_insert_event = NULL;
7762 
7763 		if ((un->un_remove_event != NULL) &&
7764 			(ddi_remove_event_handler(un->un_remove_cb_id) !=
7765 				DDI_SUCCESS)) {
7766 			/*
7767 			 * Note: We are returning here after having done
7768 			 * substantial cleanup above. This is consistent
7769 			 * with the legacy implementation but this may not
7770 			 * be the right thing to do.
7771 			 */
7772 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7773 				"sd_dr_detach: Cannot cancel remove event\n");
7774 			goto err_remove_event;
7775 		}
7776 		un->un_remove_event = NULL;
7777 	}
7778 
7779 	/* Do not free the softstate if the callback routine is active */
7780 	sd_sync_with_callback(un);
7781 
7782 	cmlb_detach(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
7783 	cmlb_free_handle(&un->un_cmlbhandle);
7784 
7785 	/*
7786 	 * Hold the detach mutex here, to make sure that no other threads ever
7787 	 * can access a (partially) freed soft state structure.
7788 	 */
7789 	mutex_enter(&sd_detach_mutex);
7790 
7791 	/*
7792 	 * Clean up the soft state struct.
7793 	 * Cleanup is done in reverse order of allocs/inits.
7794 	 * At this point there should be no competing threads anymore.
7795 	 */
7796 
7797 	/* Unregister and free device id. */
7798 	ddi_devid_unregister(devi);
7799 	if (un->un_devid) {
7800 		ddi_devid_free(un->un_devid);
7801 		un->un_devid = NULL;
7802 	}
7803 
7804 	/*
7805 	 * Destroy wmap cache if it exists.
7806 	 */
7807 	if (un->un_wm_cache != NULL) {
7808 		kmem_cache_destroy(un->un_wm_cache);
7809 		un->un_wm_cache = NULL;
7810 	}
7811 
7812 	/*
7813 	 * kstat cleanup is done in detach for all device types (4363169).
7814 	 * We do not want to fail detach if the device kstats are not deleted
7815 	 * since there is a confusion about the devo_refcnt for the device.
7816 	 * We just delete the kstats and let detach complete successfully.
7817 	 */
7818 	if (un->un_stats != NULL) {
7819 		kstat_delete(un->un_stats);
7820 		un->un_stats = NULL;
7821 	}
7822 	if (un->un_errstats != NULL) {
7823 		kstat_delete(un->un_errstats);
7824 		un->un_errstats = NULL;
7825 	}
7826 
7827 	/* Remove partition stats */
7828 	if (un->un_f_pkstats_enabled) {
7829 		for (i = 0; i < NSDMAP; i++) {
7830 			if (un->un_pstats[i] != NULL) {
7831 				kstat_delete(un->un_pstats[i]);
7832 				un->un_pstats[i] = NULL;
7833 			}
7834 		}
7835 	}
7836 
7837 	/* Remove xbuf registration */
7838 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
7839 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
7840 
7841 	/* Remove driver properties */
7842 	ddi_prop_remove_all(devi);
7843 
7844 	mutex_destroy(&un->un_pm_mutex);
7845 	cv_destroy(&un->un_pm_busy_cv);
7846 
7847 	cv_destroy(&un->un_wcc_cv);
7848 
7849 	/* Open/close semaphore */
7850 	sema_destroy(&un->un_semoclose);
7851 
7852 	/* Removable media condvar. */
7853 	cv_destroy(&un->un_state_cv);
7854 
7855 	/* Suspend/resume condvar. */
7856 	cv_destroy(&un->un_suspend_cv);
7857 	cv_destroy(&un->un_disk_busy_cv);
7858 
7859 	sd_free_rqs(un);
7860 
7861 	/* Free up soft state */
7862 	devp->sd_private = NULL;
7863 
7864 	bzero(un, sizeof (struct sd_lun));
7865 	ddi_soft_state_free(sd_state, instance);
7866 
7867 	mutex_exit(&sd_detach_mutex);
7868 
7869 	/* This frees up the INQUIRY data associated with the device. */
7870 	scsi_unprobe(devp);
7871 
7872 	/*
7873 	 * After successfully detaching an instance, we update the information
7874 	 * of how many luns have been attached in the relative target and
7875 	 * controller for parallel SCSI. This information is used when sd tries
7876 	 * to set the tagged queuing capability in HBA.
7877 	 * Since un has been released, we can't use SD_IS_PARALLEL_SCSI(un) to
7878 	 * check if the device is parallel SCSI. However, we don't need to
7879 	 * check here because we've already checked during attach. No device
7880 	 * that is not parallel SCSI is in the chain.
7881 	 */
7882 	if ((tgt >= 0) && (tgt < NTARGETS_WIDE)) {
7883 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_DETACH);
7884 	}
7885 
7886 	return (DDI_SUCCESS);
7887 
7888 err_notclosed:
7889 	mutex_exit(SD_MUTEX(un));
7890 
7891 err_stillbusy:
7892 	_NOTE(NO_COMPETING_THREADS_NOW);
7893 
7894 err_remove_event:
7895 	mutex_enter(&sd_detach_mutex);
7896 	un->un_detach_count--;
7897 	mutex_exit(&sd_detach_mutex);
7898 
7899 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
7900 	return (DDI_FAILURE);
7901 }
7902 
7903 
7904 /*
7905  *    Function: sd_create_errstats
7906  *
7907  * Description: This routine instantiates the device error stats.
7908  *
7909  *		Note: During attach the stats are instantiated first so they are
7910  *		available for attach-time routines that utilize the driver
7911  *		iopath to send commands to the device. The stats are initialized
7912  *		separately so data obtained during some attach-time routines is
7913  *		available. (4362483)
7914  *
7915  *   Arguments: un - driver soft state (unit) structure
7916  *		instance - driver instance
7917  *
7918  *     Context: Kernel thread context
7919  */
7920 
7921 static void
7922 sd_create_errstats(struct sd_lun *un, int instance)
7923 {
7924 	struct	sd_errstats	*stp;
7925 	char	kstatmodule_err[KSTAT_STRLEN];
7926 	char	kstatname[KSTAT_STRLEN];
7927 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
7928 
7929 	ASSERT(un != NULL);
7930 
7931 	if (un->un_errstats != NULL) {
7932 		return;
7933 	}
7934 
7935 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
7936 	    "%serr", sd_label);
7937 	(void) snprintf(kstatname, sizeof (kstatname),
7938 	    "%s%d,err", sd_label, instance);
7939 
7940 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
7941 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
7942 
7943 	if (un->un_errstats == NULL) {
7944 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7945 		    "sd_create_errstats: Failed kstat_create\n");
7946 		return;
7947 	}
7948 
7949 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
7950 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
7951 	    KSTAT_DATA_UINT32);
7952 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
7953 	    KSTAT_DATA_UINT32);
7954 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
7955 	    KSTAT_DATA_UINT32);
7956 	kstat_named_init(&stp->sd_vid,		"Vendor",
7957 	    KSTAT_DATA_CHAR);
7958 	kstat_named_init(&stp->sd_pid,		"Product",
7959 	    KSTAT_DATA_CHAR);
7960 	kstat_named_init(&stp->sd_revision,	"Revision",
7961 	    KSTAT_DATA_CHAR);
7962 	kstat_named_init(&stp->sd_serial,	"Serial No",
7963 	    KSTAT_DATA_CHAR);
7964 	kstat_named_init(&stp->sd_capacity,	"Size",
7965 	    KSTAT_DATA_ULONGLONG);
7966 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
7967 	    KSTAT_DATA_UINT32);
7968 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
7969 	    KSTAT_DATA_UINT32);
7970 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
7971 	    KSTAT_DATA_UINT32);
7972 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
7973 	    KSTAT_DATA_UINT32);
7974 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
7975 	    KSTAT_DATA_UINT32);
7976 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
7977 	    KSTAT_DATA_UINT32);
7978 
7979 	un->un_errstats->ks_private = un;
7980 	un->un_errstats->ks_update  = nulldev;
7981 
7982 	kstat_install(un->un_errstats);
7983 }
7984 
7985 
7986 /*
7987  *    Function: sd_set_errstats
7988  *
7989  * Description: This routine sets the value of the vendor id, product id,
7990  *		revision, serial number, and capacity device error stats.
7991  *
7992  *		Note: During attach the stats are instantiated first so they are
7993  *		available for attach-time routines that utilize the driver
7994  *		iopath to send commands to the device. The stats are initialized
7995  *		separately so data obtained during some attach-time routines is
7996  *		available. (4362483)
7997  *
7998  *   Arguments: un - driver soft state (unit) structure
7999  *
8000  *     Context: Kernel thread context
8001  */
8002 
8003 static void
8004 sd_set_errstats(struct sd_lun *un)
8005 {
8006 	struct	sd_errstats	*stp;
8007 
8008 	ASSERT(un != NULL);
8009 	ASSERT(un->un_errstats != NULL);
8010 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
8011 	ASSERT(stp != NULL);
8012 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
8013 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
8014 	(void) strncpy(stp->sd_revision.value.c,
8015 	    un->un_sd->sd_inq->inq_revision, 4);
8016 
8017 	/*
8018 	 * All the errstats are persistent across detach/attach,
8019 	 * so reset all the errstats here in case of the hot
8020 	 * replacement of disk drives, except for not changed
8021 	 * Sun qualified drives.
8022 	 */
8023 	if ((bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) != 0) ||
8024 	    (bcmp(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
8025 	    sizeof (SD_INQUIRY(un)->inq_serial)) != 0)) {
8026 		stp->sd_softerrs.value.ui32 = 0;
8027 		stp->sd_harderrs.value.ui32 = 0;
8028 		stp->sd_transerrs.value.ui32 = 0;
8029 		stp->sd_rq_media_err.value.ui32 = 0;
8030 		stp->sd_rq_ntrdy_err.value.ui32 = 0;
8031 		stp->sd_rq_nodev_err.value.ui32 = 0;
8032 		stp->sd_rq_recov_err.value.ui32 = 0;
8033 		stp->sd_rq_illrq_err.value.ui32 = 0;
8034 		stp->sd_rq_pfa_err.value.ui32 = 0;
8035 	}
8036 
8037 	/*
8038 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
8039 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
8040 	 * (4376302))
8041 	 */
8042 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
8043 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
8044 		    sizeof (SD_INQUIRY(un)->inq_serial));
8045 	}
8046 
8047 	if (un->un_f_blockcount_is_valid != TRUE) {
8048 		/*
8049 		 * Set capacity error stat to 0 for no media. This ensures
8050 		 * a valid capacity is displayed in response to 'iostat -E'
8051 		 * when no media is present in the device.
8052 		 */
8053 		stp->sd_capacity.value.ui64 = 0;
8054 	} else {
8055 		/*
8056 		 * Multiply un_blockcount by un->un_sys_blocksize to get
8057 		 * capacity.
8058 		 *
8059 		 * Note: for non-512 blocksize devices "un_blockcount" has been
8060 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
8061 		 * (un_tgt_blocksize / un->un_sys_blocksize).
8062 		 */
8063 		stp->sd_capacity.value.ui64 = (uint64_t)
8064 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
8065 	}
8066 }
8067 
8068 
8069 /*
8070  *    Function: sd_set_pstats
8071  *
8072  * Description: This routine instantiates and initializes the partition
8073  *              stats for each partition with more than zero blocks.
8074  *		(4363169)
8075  *
8076  *   Arguments: un - driver soft state (unit) structure
8077  *
8078  *     Context: Kernel thread context
8079  */
8080 
8081 static void
8082 sd_set_pstats(struct sd_lun *un)
8083 {
8084 	char	kstatname[KSTAT_STRLEN];
8085 	int	instance;
8086 	int	i;
8087 	diskaddr_t	nblks = 0;
8088 	char	*partname = NULL;
8089 
8090 	ASSERT(un != NULL);
8091 
8092 	instance = ddi_get_instance(SD_DEVINFO(un));
8093 
8094 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
8095 	for (i = 0; i < NSDMAP; i++) {
8096 
8097 		if (cmlb_partinfo(un->un_cmlbhandle, i,
8098 		    &nblks, NULL, &partname, NULL, (void *)SD_PATH_DIRECT) != 0)
8099 			continue;
8100 		mutex_enter(SD_MUTEX(un));
8101 
8102 		if ((un->un_pstats[i] == NULL) &&
8103 		    (nblks != 0)) {
8104 
8105 			(void) snprintf(kstatname, sizeof (kstatname),
8106 			    "%s%d,%s", sd_label, instance,
8107 			    partname);
8108 
8109 			un->un_pstats[i] = kstat_create(sd_label,
8110 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
8111 			    1, KSTAT_FLAG_PERSISTENT);
8112 			if (un->un_pstats[i] != NULL) {
8113 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
8114 				kstat_install(un->un_pstats[i]);
8115 			}
8116 		}
8117 		mutex_exit(SD_MUTEX(un));
8118 	}
8119 }
8120 
8121 
8122 #if (defined(__fibre))
8123 /*
8124  *    Function: sd_init_event_callbacks
8125  *
8126  * Description: This routine initializes the insertion and removal event
8127  *		callbacks. (fibre only)
8128  *
8129  *   Arguments: un - driver soft state (unit) structure
8130  *
8131  *     Context: Kernel thread context
8132  */
8133 
8134 static void
8135 sd_init_event_callbacks(struct sd_lun *un)
8136 {
8137 	ASSERT(un != NULL);
8138 
8139 	if ((un->un_insert_event == NULL) &&
8140 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
8141 	    &un->un_insert_event) == DDI_SUCCESS)) {
8142 		/*
8143 		 * Add the callback for an insertion event
8144 		 */
8145 		(void) ddi_add_event_handler(SD_DEVINFO(un),
8146 		    un->un_insert_event, sd_event_callback, (void *)un,
8147 		    &(un->un_insert_cb_id));
8148 	}
8149 
8150 	if ((un->un_remove_event == NULL) &&
8151 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
8152 	    &un->un_remove_event) == DDI_SUCCESS)) {
8153 		/*
8154 		 * Add the callback for a removal event
8155 		 */
8156 		(void) ddi_add_event_handler(SD_DEVINFO(un),
8157 		    un->un_remove_event, sd_event_callback, (void *)un,
8158 		    &(un->un_remove_cb_id));
8159 	}
8160 }
8161 
8162 
8163 /*
8164  *    Function: sd_event_callback
8165  *
8166  * Description: This routine handles insert/remove events (photon). The
8167  *		state is changed to OFFLINE which can be used to supress
8168  *		error msgs. (fibre only)
8169  *
8170  *   Arguments: un - driver soft state (unit) structure
8171  *
8172  *     Context: Callout thread context
8173  */
8174 /* ARGSUSED */
8175 static void
8176 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
8177     void *bus_impldata)
8178 {
8179 	struct sd_lun *un = (struct sd_lun *)arg;
8180 
8181 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
8182 	if (event == un->un_insert_event) {
8183 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
8184 		mutex_enter(SD_MUTEX(un));
8185 		if (un->un_state == SD_STATE_OFFLINE) {
8186 			if (un->un_last_state != SD_STATE_SUSPENDED) {
8187 				un->un_state = un->un_last_state;
8188 			} else {
8189 				/*
8190 				 * We have gone through SUSPEND/RESUME while
8191 				 * we were offline. Restore the last state
8192 				 */
8193 				un->un_state = un->un_save_state;
8194 			}
8195 		}
8196 		mutex_exit(SD_MUTEX(un));
8197 
8198 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
8199 	} else if (event == un->un_remove_event) {
8200 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
8201 		mutex_enter(SD_MUTEX(un));
8202 		/*
8203 		 * We need to handle an event callback that occurs during
8204 		 * the suspend operation, since we don't prevent it.
8205 		 */
8206 		if (un->un_state != SD_STATE_OFFLINE) {
8207 			if (un->un_state != SD_STATE_SUSPENDED) {
8208 				New_state(un, SD_STATE_OFFLINE);
8209 			} else {
8210 				un->un_last_state = SD_STATE_OFFLINE;
8211 			}
8212 		}
8213 		mutex_exit(SD_MUTEX(un));
8214 	} else {
8215 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
8216 		    "!Unknown event\n");
8217 	}
8218 
8219 }
8220 #endif
8221 
8222 /*
8223  *    Function: sd_cache_control()
8224  *
8225  * Description: This routine is the driver entry point for setting
8226  *		read and write caching by modifying the WCE (write cache
8227  *		enable) and RCD (read cache disable) bits of mode
8228  *		page 8 (MODEPAGE_CACHING).
8229  *
8230  *   Arguments: un - driver soft state (unit) structure
8231  *		rcd_flag - flag for controlling the read cache
8232  *		wce_flag - flag for controlling the write cache
8233  *
8234  * Return Code: EIO
8235  *		code returned by sd_send_scsi_MODE_SENSE and
8236  *		sd_send_scsi_MODE_SELECT
8237  *
8238  *     Context: Kernel Thread
8239  */
8240 
8241 static int
8242 sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag)
8243 {
8244 	struct mode_caching	*mode_caching_page;
8245 	uchar_t			*header;
8246 	size_t			buflen;
8247 	int			hdrlen;
8248 	int			bd_len;
8249 	int			rval = 0;
8250 	struct mode_header_grp2	*mhp;
8251 
8252 	ASSERT(un != NULL);
8253 
8254 	/*
8255 	 * Do a test unit ready, otherwise a mode sense may not work if this
8256 	 * is the first command sent to the device after boot.
8257 	 */
8258 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
8259 
8260 	if (un->un_f_cfg_is_atapi == TRUE) {
8261 		hdrlen = MODE_HEADER_LENGTH_GRP2;
8262 	} else {
8263 		hdrlen = MODE_HEADER_LENGTH;
8264 	}
8265 
8266 	/*
8267 	 * Allocate memory for the retrieved mode page and its headers.  Set
8268 	 * a pointer to the page itself.  Use mode_cache_scsi3 to insure
8269 	 * we get all of the mode sense data otherwise, the mode select
8270 	 * will fail.  mode_cache_scsi3 is a superset of mode_caching.
8271 	 */
8272 	buflen = hdrlen + MODE_BLK_DESC_LENGTH +
8273 		sizeof (struct mode_cache_scsi3);
8274 
8275 	header = kmem_zalloc(buflen, KM_SLEEP);
8276 
8277 	/* Get the information from the device. */
8278 	if (un->un_f_cfg_is_atapi == TRUE) {
8279 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
8280 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8281 	} else {
8282 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
8283 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8284 	}
8285 	if (rval != 0) {
8286 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
8287 		    "sd_cache_control: Mode Sense Failed\n");
8288 		kmem_free(header, buflen);
8289 		return (rval);
8290 	}
8291 
8292 	/*
8293 	 * Determine size of Block Descriptors in order to locate
8294 	 * the mode page data. ATAPI devices return 0, SCSI devices
8295 	 * should return MODE_BLK_DESC_LENGTH.
8296 	 */
8297 	if (un->un_f_cfg_is_atapi == TRUE) {
8298 		mhp	= (struct mode_header_grp2 *)header;
8299 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
8300 	} else {
8301 		bd_len  = ((struct mode_header *)header)->bdesc_length;
8302 	}
8303 
8304 	if (bd_len > MODE_BLK_DESC_LENGTH) {
8305 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
8306 		    "sd_cache_control: Mode Sense returned invalid "
8307 		    "block descriptor length\n");
8308 		kmem_free(header, buflen);
8309 		return (EIO);
8310 	}
8311 
8312 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
8313 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
8314 		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
8315 		    " caching page code mismatch %d\n",
8316 		    mode_caching_page->mode_page.code);
8317 		kmem_free(header, buflen);
8318 		return (EIO);
8319 	}
8320 
8321 	/* Check the relevant bits on successful mode sense. */
8322 	if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
8323 	    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
8324 	    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
8325 	    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
8326 
8327 		size_t sbuflen;
8328 		uchar_t save_pg;
8329 
8330 		/*
8331 		 * Construct select buffer length based on the
8332 		 * length of the sense data returned.
8333 		 */
8334 		sbuflen =  hdrlen + MODE_BLK_DESC_LENGTH +
8335 				sizeof (struct mode_page) +
8336 				(int)mode_caching_page->mode_page.length;
8337 
8338 		/*
8339 		 * Set the caching bits as requested.
8340 		 */
8341 		if (rcd_flag == SD_CACHE_ENABLE)
8342 			mode_caching_page->rcd = 0;
8343 		else if (rcd_flag == SD_CACHE_DISABLE)
8344 			mode_caching_page->rcd = 1;
8345 
8346 		if (wce_flag == SD_CACHE_ENABLE)
8347 			mode_caching_page->wce = 1;
8348 		else if (wce_flag == SD_CACHE_DISABLE)
8349 			mode_caching_page->wce = 0;
8350 
8351 		/*
8352 		 * Save the page if the mode sense says the
8353 		 * drive supports it.
8354 		 */
8355 		save_pg = mode_caching_page->mode_page.ps ?
8356 				SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
8357 
8358 		/* Clear reserved bits before mode select. */
8359 		mode_caching_page->mode_page.ps = 0;
8360 
8361 		/*
8362 		 * Clear out mode header for mode select.
8363 		 * The rest of the retrieved page will be reused.
8364 		 */
8365 		bzero(header, hdrlen);
8366 
8367 		if (un->un_f_cfg_is_atapi == TRUE) {
8368 			mhp = (struct mode_header_grp2 *)header;
8369 			mhp->bdesc_length_hi = bd_len >> 8;
8370 			mhp->bdesc_length_lo = (uchar_t)bd_len & 0xff;
8371 		} else {
8372 			((struct mode_header *)header)->bdesc_length = bd_len;
8373 		}
8374 
8375 		/* Issue mode select to change the cache settings */
8376 		if (un->un_f_cfg_is_atapi == TRUE) {
8377 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, header,
8378 			    sbuflen, save_pg, SD_PATH_DIRECT);
8379 		} else {
8380 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
8381 			    sbuflen, save_pg, SD_PATH_DIRECT);
8382 		}
8383 	}
8384 
8385 	kmem_free(header, buflen);
8386 	return (rval);
8387 }
8388 
8389 
8390 /*
8391  *    Function: sd_get_write_cache_enabled()
8392  *
8393  * Description: This routine is the driver entry point for determining if
8394  *		write caching is enabled.  It examines the WCE (write cache
8395  *		enable) bits of mode page 8 (MODEPAGE_CACHING).
8396  *
8397  *   Arguments: un - driver soft state (unit) structure
8398  *		is_enabled - pointer to int where write cache enabled state
8399  *		is returned (non-zero -> write cache enabled)
8400  *
8401  *
8402  * Return Code: EIO
8403  *		code returned by sd_send_scsi_MODE_SENSE
8404  *
8405  *     Context: Kernel Thread
8406  *
8407  * NOTE: If ioctl is added to disable write cache, this sequence should
8408  * be followed so that no locking is required for accesses to
8409  * un->un_f_write_cache_enabled:
8410  * 	do mode select to clear wce
8411  * 	do synchronize cache to flush cache
8412  * 	set un->un_f_write_cache_enabled = FALSE
8413  *
8414  * Conversely, an ioctl to enable the write cache should be done
8415  * in this order:
8416  * 	set un->un_f_write_cache_enabled = TRUE
8417  * 	do mode select to set wce
8418  */
8419 
8420 static int
8421 sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled)
8422 {
8423 	struct mode_caching	*mode_caching_page;
8424 	uchar_t			*header;
8425 	size_t			buflen;
8426 	int			hdrlen;
8427 	int			bd_len;
8428 	int			rval = 0;
8429 
8430 	ASSERT(un != NULL);
8431 	ASSERT(is_enabled != NULL);
8432 
8433 	/* in case of error, flag as enabled */
8434 	*is_enabled = TRUE;
8435 
8436 	/*
8437 	 * Do a test unit ready, otherwise a mode sense may not work if this
8438 	 * is the first command sent to the device after boot.
8439 	 */
8440 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
8441 
8442 	if (un->un_f_cfg_is_atapi == TRUE) {
8443 		hdrlen = MODE_HEADER_LENGTH_GRP2;
8444 	} else {
8445 		hdrlen = MODE_HEADER_LENGTH;
8446 	}
8447 
8448 	/*
8449 	 * Allocate memory for the retrieved mode page and its headers.  Set
8450 	 * a pointer to the page itself.
8451 	 */
8452 	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
8453 	header = kmem_zalloc(buflen, KM_SLEEP);
8454 
8455 	/* Get the information from the device. */
8456 	if (un->un_f_cfg_is_atapi == TRUE) {
8457 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
8458 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8459 	} else {
8460 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
8461 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8462 	}
8463 	if (rval != 0) {
8464 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
8465 		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
8466 		kmem_free(header, buflen);
8467 		return (rval);
8468 	}
8469 
8470 	/*
8471 	 * Determine size of Block Descriptors in order to locate
8472 	 * the mode page data. ATAPI devices return 0, SCSI devices
8473 	 * should return MODE_BLK_DESC_LENGTH.
8474 	 */
8475 	if (un->un_f_cfg_is_atapi == TRUE) {
8476 		struct mode_header_grp2	*mhp;
8477 		mhp	= (struct mode_header_grp2 *)header;
8478 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
8479 	} else {
8480 		bd_len  = ((struct mode_header *)header)->bdesc_length;
8481 	}
8482 
8483 	if (bd_len > MODE_BLK_DESC_LENGTH) {
8484 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
8485 		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
8486 		    "block descriptor length\n");
8487 		kmem_free(header, buflen);
8488 		return (EIO);
8489 	}
8490 
8491 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
8492 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
8493 		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
8494 		    " caching page code mismatch %d\n",
8495 		    mode_caching_page->mode_page.code);
8496 		kmem_free(header, buflen);
8497 		return (EIO);
8498 	}
8499 	*is_enabled = mode_caching_page->wce;
8500 
8501 	kmem_free(header, buflen);
8502 	return (0);
8503 }
8504 
8505 
8506 /*
8507  *    Function: sd_make_device
8508  *
8509  * Description: Utility routine to return the Solaris device number from
8510  *		the data in the device's dev_info structure.
8511  *
8512  * Return Code: The Solaris device number
8513  *
8514  *     Context: Any
8515  */
8516 
8517 static dev_t
8518 sd_make_device(dev_info_t *devi)
8519 {
8520 	return (makedevice(ddi_name_to_major(ddi_get_name(devi)),
8521 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
8522 }
8523 
8524 
8525 /*
8526  *    Function: sd_pm_entry
8527  *
8528  * Description: Called at the start of a new command to manage power
8529  *		and busy status of a device. This includes determining whether
8530  *		the current power state of the device is sufficient for
8531  *		performing the command or whether it must be changed.
8532  *		The PM framework is notified appropriately.
8533  *		Only with a return status of DDI_SUCCESS will the
8534  *		component be busy to the framework.
8535  *
8536  *		All callers of sd_pm_entry must check the return status
8537  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
8538  *		of DDI_FAILURE indicates the device failed to power up.
8539  *		In this case un_pm_count has been adjusted so the result
8540  *		on exit is still powered down, ie. count is less than 0.
8541  *		Calling sd_pm_exit with this count value hits an ASSERT.
8542  *
8543  * Return Code: DDI_SUCCESS or DDI_FAILURE
8544  *
8545  *     Context: Kernel thread context.
8546  */
8547 
8548 static int
8549 sd_pm_entry(struct sd_lun *un)
8550 {
8551 	int return_status = DDI_SUCCESS;
8552 
8553 	ASSERT(!mutex_owned(SD_MUTEX(un)));
8554 	ASSERT(!mutex_owned(&un->un_pm_mutex));
8555 
8556 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
8557 
8558 	if (un->un_f_pm_is_enabled == FALSE) {
8559 		SD_TRACE(SD_LOG_IO_PM, un,
8560 		    "sd_pm_entry: exiting, PM not enabled\n");
8561 		return (return_status);
8562 	}
8563 
8564 	/*
8565 	 * Just increment a counter if PM is enabled. On the transition from
8566 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
8567 	 * the count with each IO and mark the device as idle when the count
8568 	 * hits 0.
8569 	 *
8570 	 * If the count is less than 0 the device is powered down. If a powered
8571 	 * down device is successfully powered up then the count must be
8572 	 * incremented to reflect the power up. Note that it'll get incremented
8573 	 * a second time to become busy.
8574 	 *
8575 	 * Because the following has the potential to change the device state
8576 	 * and must release the un_pm_mutex to do so, only one thread can be
8577 	 * allowed through at a time.
8578 	 */
8579 
8580 	mutex_enter(&un->un_pm_mutex);
8581 	while (un->un_pm_busy == TRUE) {
8582 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
8583 	}
8584 	un->un_pm_busy = TRUE;
8585 
8586 	if (un->un_pm_count < 1) {
8587 
8588 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
8589 
8590 		/*
8591 		 * Indicate we are now busy so the framework won't attempt to
8592 		 * power down the device. This call will only fail if either
8593 		 * we passed a bad component number or the device has no
8594 		 * components. Neither of these should ever happen.
8595 		 */
8596 		mutex_exit(&un->un_pm_mutex);
8597 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
8598 		ASSERT(return_status == DDI_SUCCESS);
8599 
8600 		mutex_enter(&un->un_pm_mutex);
8601 
8602 		if (un->un_pm_count < 0) {
8603 			mutex_exit(&un->un_pm_mutex);
8604 
8605 			SD_TRACE(SD_LOG_IO_PM, un,
8606 			    "sd_pm_entry: power up component\n");
8607 
8608 			/*
8609 			 * pm_raise_power will cause sdpower to be called
8610 			 * which brings the device power level to the
8611 			 * desired state, ON in this case. If successful,
8612 			 * un_pm_count and un_power_level will be updated
8613 			 * appropriately.
8614 			 */
8615 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
8616 			    SD_SPINDLE_ON);
8617 
8618 			mutex_enter(&un->un_pm_mutex);
8619 
8620 			if (return_status != DDI_SUCCESS) {
8621 				/*
8622 				 * Power up failed.
8623 				 * Idle the device and adjust the count
8624 				 * so the result on exit is that we're
8625 				 * still powered down, ie. count is less than 0.
8626 				 */
8627 				SD_TRACE(SD_LOG_IO_PM, un,
8628 				    "sd_pm_entry: power up failed,"
8629 				    " idle the component\n");
8630 
8631 				(void) pm_idle_component(SD_DEVINFO(un), 0);
8632 				un->un_pm_count--;
8633 			} else {
8634 				/*
8635 				 * Device is powered up, verify the
8636 				 * count is non-negative.
8637 				 * This is debug only.
8638 				 */
8639 				ASSERT(un->un_pm_count == 0);
8640 			}
8641 		}
8642 
8643 		if (return_status == DDI_SUCCESS) {
8644 			/*
8645 			 * For performance, now that the device has been tagged
8646 			 * as busy, and it's known to be powered up, update the
8647 			 * chain types to use jump tables that do not include
8648 			 * pm. This significantly lowers the overhead and
8649 			 * therefore improves performance.
8650 			 */
8651 
8652 			mutex_exit(&un->un_pm_mutex);
8653 			mutex_enter(SD_MUTEX(un));
8654 			SD_TRACE(SD_LOG_IO_PM, un,
8655 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
8656 			    un->un_uscsi_chain_type);
8657 
8658 			if (un->un_f_non_devbsize_supported) {
8659 				un->un_buf_chain_type =
8660 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
8661 			} else {
8662 				un->un_buf_chain_type =
8663 				    SD_CHAIN_INFO_DISK_NO_PM;
8664 			}
8665 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8666 
8667 			SD_TRACE(SD_LOG_IO_PM, un,
8668 			    "             changed  uscsi_chain_type to   %d\n",
8669 			    un->un_uscsi_chain_type);
8670 			mutex_exit(SD_MUTEX(un));
8671 			mutex_enter(&un->un_pm_mutex);
8672 
8673 			if (un->un_pm_idle_timeid == NULL) {
8674 				/* 300 ms. */
8675 				un->un_pm_idle_timeid =
8676 				    timeout(sd_pm_idletimeout_handler, un,
8677 				    (drv_usectohz((clock_t)300000)));
8678 				/*
8679 				 * Include an extra call to busy which keeps the
8680 				 * device busy with-respect-to the PM layer
8681 				 * until the timer fires, at which time it'll
8682 				 * get the extra idle call.
8683 				 */
8684 				(void) pm_busy_component(SD_DEVINFO(un), 0);
8685 			}
8686 		}
8687 	}
8688 	un->un_pm_busy = FALSE;
8689 	/* Next... */
8690 	cv_signal(&un->un_pm_busy_cv);
8691 
8692 	un->un_pm_count++;
8693 
8694 	SD_TRACE(SD_LOG_IO_PM, un,
8695 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
8696 
8697 	mutex_exit(&un->un_pm_mutex);
8698 
8699 	return (return_status);
8700 }
8701 
8702 
8703 /*
8704  *    Function: sd_pm_exit
8705  *
8706  * Description: Called at the completion of a command to manage busy
8707  *		status for the device. If the device becomes idle the
8708  *		PM framework is notified.
8709  *
8710  *     Context: Kernel thread context
8711  */
8712 
8713 static void
8714 sd_pm_exit(struct sd_lun *un)
8715 {
8716 	ASSERT(!mutex_owned(SD_MUTEX(un)));
8717 	ASSERT(!mutex_owned(&un->un_pm_mutex));
8718 
8719 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
8720 
8721 	/*
8722 	 * After attach the following flag is only read, so don't
8723 	 * take the penalty of acquiring a mutex for it.
8724 	 */
8725 	if (un->un_f_pm_is_enabled == TRUE) {
8726 
8727 		mutex_enter(&un->un_pm_mutex);
8728 		un->un_pm_count--;
8729 
8730 		SD_TRACE(SD_LOG_IO_PM, un,
8731 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
8732 
8733 		ASSERT(un->un_pm_count >= 0);
8734 		if (un->un_pm_count == 0) {
8735 			mutex_exit(&un->un_pm_mutex);
8736 
8737 			SD_TRACE(SD_LOG_IO_PM, un,
8738 			    "sd_pm_exit: idle component\n");
8739 
8740 			(void) pm_idle_component(SD_DEVINFO(un), 0);
8741 
8742 		} else {
8743 			mutex_exit(&un->un_pm_mutex);
8744 		}
8745 	}
8746 
8747 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
8748 }
8749 
8750 
8751 /*
8752  *    Function: sdopen
8753  *
8754  * Description: Driver's open(9e) entry point function.
8755  *
8756  *   Arguments: dev_i   - pointer to device number
8757  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
8758  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
8759  *		cred_p  - user credential pointer
8760  *
8761  * Return Code: EINVAL
8762  *		ENXIO
8763  *		EIO
8764  *		EROFS
8765  *		EBUSY
8766  *
8767  *     Context: Kernel thread context
8768  */
8769 /* ARGSUSED */
8770 static int
8771 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
8772 {
8773 	struct sd_lun	*un;
8774 	int		nodelay;
8775 	int		part;
8776 	uint64_t	partmask;
8777 	int		instance;
8778 	dev_t		dev;
8779 	int		rval = EIO;
8780 	diskaddr_t	nblks = 0;
8781 
8782 	/* Validate the open type */
8783 	if (otyp >= OTYPCNT) {
8784 		return (EINVAL);
8785 	}
8786 
8787 	dev = *dev_p;
8788 	instance = SDUNIT(dev);
8789 	mutex_enter(&sd_detach_mutex);
8790 
8791 	/*
8792 	 * Fail the open if there is no softstate for the instance, or
8793 	 * if another thread somewhere is trying to detach the instance.
8794 	 */
8795 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
8796 	    (un->un_detach_count != 0)) {
8797 		mutex_exit(&sd_detach_mutex);
8798 		/*
8799 		 * The probe cache only needs to be cleared when open (9e) fails
8800 		 * with ENXIO (4238046).
8801 		 */
8802 		/*
8803 		 * un-conditionally clearing probe cache is ok with
8804 		 * separate sd/ssd binaries
8805 		 * x86 platform can be an issue with both parallel
8806 		 * and fibre in 1 binary
8807 		 */
8808 		sd_scsi_clear_probe_cache();
8809 		return (ENXIO);
8810 	}
8811 
8812 	/*
8813 	 * The un_layer_count is to prevent another thread in specfs from
8814 	 * trying to detach the instance, which can happen when we are
8815 	 * called from a higher-layer driver instead of thru specfs.
8816 	 * This will not be needed when DDI provides a layered driver
8817 	 * interface that allows specfs to know that an instance is in
8818 	 * use by a layered driver & should not be detached.
8819 	 *
8820 	 * Note: the semantics for layered driver opens are exactly one
8821 	 * close for every open.
8822 	 */
8823 	if (otyp == OTYP_LYR) {
8824 		un->un_layer_count++;
8825 	}
8826 
8827 	/*
8828 	 * Keep a count of the current # of opens in progress. This is because
8829 	 * some layered drivers try to call us as a regular open. This can
8830 	 * cause problems that we cannot prevent, however by keeping this count
8831 	 * we can at least keep our open and detach routines from racing against
8832 	 * each other under such conditions.
8833 	 */
8834 	un->un_opens_in_progress++;
8835 	mutex_exit(&sd_detach_mutex);
8836 
8837 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
8838 	part	 = SDPART(dev);
8839 	partmask = 1 << part;
8840 
8841 	/*
8842 	 * We use a semaphore here in order to serialize
8843 	 * open and close requests on the device.
8844 	 */
8845 	sema_p(&un->un_semoclose);
8846 
8847 	mutex_enter(SD_MUTEX(un));
8848 
8849 	/*
8850 	 * All device accesses go thru sdstrategy() where we check
8851 	 * on suspend status but there could be a scsi_poll command,
8852 	 * which bypasses sdstrategy(), so we need to check pm
8853 	 * status.
8854 	 */
8855 
8856 	if (!nodelay) {
8857 		while ((un->un_state == SD_STATE_SUSPENDED) ||
8858 		    (un->un_state == SD_STATE_PM_CHANGING)) {
8859 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
8860 		}
8861 
8862 		mutex_exit(SD_MUTEX(un));
8863 		if (sd_pm_entry(un) != DDI_SUCCESS) {
8864 			rval = EIO;
8865 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
8866 			    "sdopen: sd_pm_entry failed\n");
8867 			goto open_failed_with_pm;
8868 		}
8869 		mutex_enter(SD_MUTEX(un));
8870 	}
8871 
8872 	/* check for previous exclusive open */
8873 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
8874 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
8875 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
8876 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
8877 
8878 	if (un->un_exclopen & (partmask)) {
8879 		goto excl_open_fail;
8880 	}
8881 
8882 	if (flag & FEXCL) {
8883 		int i;
8884 		if (un->un_ocmap.lyropen[part]) {
8885 			goto excl_open_fail;
8886 		}
8887 		for (i = 0; i < (OTYPCNT - 1); i++) {
8888 			if (un->un_ocmap.regopen[i] & (partmask)) {
8889 				goto excl_open_fail;
8890 			}
8891 		}
8892 	}
8893 
8894 	/*
8895 	 * Check the write permission if this is a removable media device,
8896 	 * NDELAY has not been set, and writable permission is requested.
8897 	 *
8898 	 * Note: If NDELAY was set and this is write-protected media the WRITE
8899 	 * attempt will fail with EIO as part of the I/O processing. This is a
8900 	 * more permissive implementation that allows the open to succeed and
8901 	 * WRITE attempts to fail when appropriate.
8902 	 */
8903 	if (un->un_f_chk_wp_open) {
8904 		if ((flag & FWRITE) && (!nodelay)) {
8905 			mutex_exit(SD_MUTEX(un));
8906 			/*
8907 			 * Defer the check for write permission on writable
8908 			 * DVD drive till sdstrategy and will not fail open even
8909 			 * if FWRITE is set as the device can be writable
8910 			 * depending upon the media and the media can change
8911 			 * after the call to open().
8912 			 */
8913 			if (un->un_f_dvdram_writable_device == FALSE) {
8914 				if (ISCD(un) || sr_check_wp(dev)) {
8915 				rval = EROFS;
8916 				mutex_enter(SD_MUTEX(un));
8917 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
8918 				    "write to cd or write protected media\n");
8919 				goto open_fail;
8920 				}
8921 			}
8922 			mutex_enter(SD_MUTEX(un));
8923 		}
8924 	}
8925 
8926 	/*
8927 	 * If opening in NDELAY/NONBLOCK mode, just return.
8928 	 * Check if disk is ready and has a valid geometry later.
8929 	 */
8930 	if (!nodelay) {
8931 		mutex_exit(SD_MUTEX(un));
8932 		rval = sd_ready_and_valid(un);
8933 		mutex_enter(SD_MUTEX(un));
8934 		/*
8935 		 * Fail if device is not ready or if the number of disk
8936 		 * blocks is zero or negative for non CD devices.
8937 		 */
8938 
8939 		nblks = 0;
8940 
8941 		if (rval == SD_READY_VALID && (!ISCD(un))) {
8942 			/* if cmlb_partinfo fails, nblks remains 0 */
8943 			mutex_exit(SD_MUTEX(un));
8944 			(void) cmlb_partinfo(un->un_cmlbhandle, part, &nblks,
8945 			    NULL, NULL, NULL, (void *)SD_PATH_DIRECT);
8946 			mutex_enter(SD_MUTEX(un));
8947 		}
8948 
8949 		if ((rval != SD_READY_VALID) ||
8950 		    (!ISCD(un) && nblks <= 0)) {
8951 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
8952 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
8953 			    "device not ready or invalid disk block value\n");
8954 			goto open_fail;
8955 		}
8956 #if defined(__i386) || defined(__amd64)
8957 	} else {
8958 		uchar_t *cp;
8959 		/*
8960 		 * x86 requires special nodelay handling, so that p0 is
8961 		 * always defined and accessible.
8962 		 * Invalidate geometry only if device is not already open.
8963 		 */
8964 		cp = &un->un_ocmap.chkd[0];
8965 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
8966 			if (*cp != (uchar_t)0) {
8967 			    break;
8968 			}
8969 			cp++;
8970 		}
8971 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
8972 			mutex_exit(SD_MUTEX(un));
8973 			cmlb_invalidate(un->un_cmlbhandle,
8974 			    (void *)SD_PATH_DIRECT);
8975 			mutex_enter(SD_MUTEX(un));
8976 		}
8977 
8978 #endif
8979 	}
8980 
8981 	if (otyp == OTYP_LYR) {
8982 		un->un_ocmap.lyropen[part]++;
8983 	} else {
8984 		un->un_ocmap.regopen[otyp] |= partmask;
8985 	}
8986 
8987 	/* Set up open and exclusive open flags */
8988 	if (flag & FEXCL) {
8989 		un->un_exclopen |= (partmask);
8990 	}
8991 
8992 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
8993 	    "open of part %d type %d\n", part, otyp);
8994 
8995 	mutex_exit(SD_MUTEX(un));
8996 	if (!nodelay) {
8997 		sd_pm_exit(un);
8998 	}
8999 
9000 	sema_v(&un->un_semoclose);
9001 
9002 	mutex_enter(&sd_detach_mutex);
9003 	un->un_opens_in_progress--;
9004 	mutex_exit(&sd_detach_mutex);
9005 
9006 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
9007 	return (DDI_SUCCESS);
9008 
9009 excl_open_fail:
9010 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
9011 	rval = EBUSY;
9012 
9013 open_fail:
9014 	mutex_exit(SD_MUTEX(un));
9015 
9016 	/*
9017 	 * On a failed open we must exit the pm management.
9018 	 */
9019 	if (!nodelay) {
9020 		sd_pm_exit(un);
9021 	}
9022 open_failed_with_pm:
9023 	sema_v(&un->un_semoclose);
9024 
9025 	mutex_enter(&sd_detach_mutex);
9026 	un->un_opens_in_progress--;
9027 	if (otyp == OTYP_LYR) {
9028 		un->un_layer_count--;
9029 	}
9030 	mutex_exit(&sd_detach_mutex);
9031 
9032 	return (rval);
9033 }
9034 
9035 
9036 /*
9037  *    Function: sdclose
9038  *
9039  * Description: Driver's close(9e) entry point function.
9040  *
9041  *   Arguments: dev    - device number
9042  *		flag   - file status flag, informational only
9043  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
9044  *		cred_p - user credential pointer
9045  *
9046  * Return Code: ENXIO
9047  *
9048  *     Context: Kernel thread context
9049  */
9050 /* ARGSUSED */
9051 static int
9052 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
9053 {
9054 	struct sd_lun	*un;
9055 	uchar_t		*cp;
9056 	int		part;
9057 	int		nodelay;
9058 	int		rval = 0;
9059 
9060 	/* Validate the open type */
9061 	if (otyp >= OTYPCNT) {
9062 		return (ENXIO);
9063 	}
9064 
9065 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9066 		return (ENXIO);
9067 	}
9068 
9069 	part = SDPART(dev);
9070 	nodelay = flag & (FNDELAY | FNONBLOCK);
9071 
9072 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
9073 	    "sdclose: close of part %d type %d\n", part, otyp);
9074 
9075 	/*
9076 	 * We use a semaphore here in order to serialize
9077 	 * open and close requests on the device.
9078 	 */
9079 	sema_p(&un->un_semoclose);
9080 
9081 	mutex_enter(SD_MUTEX(un));
9082 
9083 	/* Don't proceed if power is being changed. */
9084 	while (un->un_state == SD_STATE_PM_CHANGING) {
9085 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9086 	}
9087 
9088 	if (un->un_exclopen & (1 << part)) {
9089 		un->un_exclopen &= ~(1 << part);
9090 	}
9091 
9092 	/* Update the open partition map */
9093 	if (otyp == OTYP_LYR) {
9094 		un->un_ocmap.lyropen[part] -= 1;
9095 	} else {
9096 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
9097 	}
9098 
9099 	cp = &un->un_ocmap.chkd[0];
9100 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
9101 		if (*cp != NULL) {
9102 			break;
9103 		}
9104 		cp++;
9105 	}
9106 
9107 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
9108 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
9109 
9110 		/*
9111 		 * We avoid persistance upon the last close, and set
9112 		 * the throttle back to the maximum.
9113 		 */
9114 		un->un_throttle = un->un_saved_throttle;
9115 
9116 		if (un->un_state == SD_STATE_OFFLINE) {
9117 			if (un->un_f_is_fibre == FALSE) {
9118 				scsi_log(SD_DEVINFO(un), sd_label,
9119 					CE_WARN, "offline\n");
9120 			}
9121 			mutex_exit(SD_MUTEX(un));
9122 			cmlb_invalidate(un->un_cmlbhandle,
9123 			    (void *)SD_PATH_DIRECT);
9124 			mutex_enter(SD_MUTEX(un));
9125 
9126 		} else {
9127 			/*
9128 			 * Flush any outstanding writes in NVRAM cache.
9129 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
9130 			 * cmd, it may not work for non-Pluto devices.
9131 			 * SYNCHRONIZE CACHE is not required for removables,
9132 			 * except DVD-RAM drives.
9133 			 *
9134 			 * Also note: because SYNCHRONIZE CACHE is currently
9135 			 * the only command issued here that requires the
9136 			 * drive be powered up, only do the power up before
9137 			 * sending the Sync Cache command. If additional
9138 			 * commands are added which require a powered up
9139 			 * drive, the following sequence may have to change.
9140 			 *
9141 			 * And finally, note that parallel SCSI on SPARC
9142 			 * only issues a Sync Cache to DVD-RAM, a newly
9143 			 * supported device.
9144 			 */
9145 #if defined(__i386) || defined(__amd64)
9146 			if (un->un_f_sync_cache_supported ||
9147 			    un->un_f_dvdram_writable_device == TRUE) {
9148 #else
9149 			if (un->un_f_dvdram_writable_device == TRUE) {
9150 #endif
9151 				mutex_exit(SD_MUTEX(un));
9152 				if (sd_pm_entry(un) == DDI_SUCCESS) {
9153 					rval =
9154 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
9155 					    NULL);
9156 					/* ignore error if not supported */
9157 					if (rval == ENOTSUP) {
9158 						rval = 0;
9159 					} else if (rval != 0) {
9160 						rval = EIO;
9161 					}
9162 					sd_pm_exit(un);
9163 				} else {
9164 					rval = EIO;
9165 				}
9166 				mutex_enter(SD_MUTEX(un));
9167 			}
9168 
9169 			/*
9170 			 * For devices which supports DOOR_LOCK, send an ALLOW
9171 			 * MEDIA REMOVAL command, but don't get upset if it
9172 			 * fails. We need to raise the power of the drive before
9173 			 * we can call sd_send_scsi_DOORLOCK()
9174 			 */
9175 			if (un->un_f_doorlock_supported) {
9176 				mutex_exit(SD_MUTEX(un));
9177 				if (sd_pm_entry(un) == DDI_SUCCESS) {
9178 					rval = sd_send_scsi_DOORLOCK(un,
9179 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
9180 
9181 					sd_pm_exit(un);
9182 					if (ISCD(un) && (rval != 0) &&
9183 					    (nodelay != 0)) {
9184 						rval = ENXIO;
9185 					}
9186 				} else {
9187 					rval = EIO;
9188 				}
9189 				mutex_enter(SD_MUTEX(un));
9190 			}
9191 
9192 			/*
9193 			 * If a device has removable media, invalidate all
9194 			 * parameters related to media, such as geometry,
9195 			 * blocksize, and blockcount.
9196 			 */
9197 			if (un->un_f_has_removable_media) {
9198 				sr_ejected(un);
9199 			}
9200 
9201 			/*
9202 			 * Destroy the cache (if it exists) which was
9203 			 * allocated for the write maps since this is
9204 			 * the last close for this media.
9205 			 */
9206 			if (un->un_wm_cache) {
9207 				/*
9208 				 * Check if there are pending commands.
9209 				 * and if there are give a warning and
9210 				 * do not destroy the cache.
9211 				 */
9212 				if (un->un_ncmds_in_driver > 0) {
9213 					scsi_log(SD_DEVINFO(un),
9214 					    sd_label, CE_WARN,
9215 					    "Unable to clean up memory "
9216 					    "because of pending I/O\n");
9217 				} else {
9218 					kmem_cache_destroy(
9219 					    un->un_wm_cache);
9220 					un->un_wm_cache = NULL;
9221 				}
9222 			}
9223 		}
9224 	}
9225 
9226 	mutex_exit(SD_MUTEX(un));
9227 	sema_v(&un->un_semoclose);
9228 
9229 	if (otyp == OTYP_LYR) {
9230 		mutex_enter(&sd_detach_mutex);
9231 		/*
9232 		 * The detach routine may run when the layer count
9233 		 * drops to zero.
9234 		 */
9235 		un->un_layer_count--;
9236 		mutex_exit(&sd_detach_mutex);
9237 	}
9238 
9239 	return (rval);
9240 }
9241 
9242 
9243 /*
9244  *    Function: sd_ready_and_valid
9245  *
9246  * Description: Test if device is ready and has a valid geometry.
9247  *
9248  *   Arguments: dev - device number
9249  *		un  - driver soft state (unit) structure
9250  *
9251  * Return Code: SD_READY_VALID		ready and valid label
9252  *		SD_NOT_READY_VALID	not ready, no label
9253  *		SD_RESERVED_BY_OTHERS	reservation conflict
9254  *
9255  *     Context: Never called at interrupt context.
9256  */
9257 
9258 static int
9259 sd_ready_and_valid(struct sd_lun *un)
9260 {
9261 	struct sd_errstats	*stp;
9262 	uint64_t		capacity;
9263 	uint_t			lbasize;
9264 	int			rval = SD_READY_VALID;
9265 	char			name_str[48];
9266 	int			is_valid;
9267 
9268 	ASSERT(un != NULL);
9269 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9270 
9271 	mutex_enter(SD_MUTEX(un));
9272 	/*
9273 	 * If a device has removable media, we must check if media is
9274 	 * ready when checking if this device is ready and valid.
9275 	 */
9276 	if (un->un_f_has_removable_media) {
9277 		mutex_exit(SD_MUTEX(un));
9278 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
9279 			rval = SD_NOT_READY_VALID;
9280 			mutex_enter(SD_MUTEX(un));
9281 			goto done;
9282 		}
9283 
9284 		is_valid = SD_IS_VALID_LABEL(un);
9285 		mutex_enter(SD_MUTEX(un));
9286 		if (!is_valid ||
9287 		    (un->un_f_blockcount_is_valid == FALSE) ||
9288 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
9289 
9290 			/* capacity has to be read every open. */
9291 			mutex_exit(SD_MUTEX(un));
9292 			if (sd_send_scsi_READ_CAPACITY(un, &capacity,
9293 			    &lbasize, SD_PATH_DIRECT) != 0) {
9294 				cmlb_invalidate(un->un_cmlbhandle,
9295 				    (void *)SD_PATH_DIRECT);
9296 				mutex_enter(SD_MUTEX(un));
9297 				rval = SD_NOT_READY_VALID;
9298 				goto done;
9299 			} else {
9300 				mutex_enter(SD_MUTEX(un));
9301 				sd_update_block_info(un, lbasize, capacity);
9302 			}
9303 		}
9304 
9305 		/*
9306 		 * Check if the media in the device is writable or not.
9307 		 */
9308 		if (!is_valid && ISCD(un)) {
9309 			sd_check_for_writable_cd(un, SD_PATH_DIRECT);
9310 		}
9311 
9312 	} else {
9313 		/*
9314 		 * Do a test unit ready to clear any unit attention from non-cd
9315 		 * devices.
9316 		 */
9317 		mutex_exit(SD_MUTEX(un));
9318 		(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9319 		mutex_enter(SD_MUTEX(un));
9320 	}
9321 
9322 
9323 	/*
9324 	 * If this is a non 512 block device, allocate space for
9325 	 * the wmap cache. This is being done here since every time
9326 	 * a media is changed this routine will be called and the
9327 	 * block size is a function of media rather than device.
9328 	 */
9329 	if (un->un_f_non_devbsize_supported && NOT_DEVBSIZE(un)) {
9330 		if (!(un->un_wm_cache)) {
9331 			(void) snprintf(name_str, sizeof (name_str),
9332 			    "%s%d_cache",
9333 			    ddi_driver_name(SD_DEVINFO(un)),
9334 			    ddi_get_instance(SD_DEVINFO(un)));
9335 			un->un_wm_cache = kmem_cache_create(
9336 			    name_str, sizeof (struct sd_w_map),
9337 			    8, sd_wm_cache_constructor,
9338 			    sd_wm_cache_destructor, NULL,
9339 			    (void *)un, NULL, 0);
9340 			if (!(un->un_wm_cache)) {
9341 					rval = ENOMEM;
9342 					goto done;
9343 			}
9344 		}
9345 	}
9346 
9347 	if (un->un_state == SD_STATE_NORMAL) {
9348 		/*
9349 		 * If the target is not yet ready here (defined by a TUR
9350 		 * failure), invalidate the geometry and print an 'offline'
9351 		 * message. This is a legacy message, as the state of the
9352 		 * target is not actually changed to SD_STATE_OFFLINE.
9353 		 *
9354 		 * If the TUR fails for EACCES (Reservation Conflict),
9355 		 * SD_RESERVED_BY_OTHERS will be returned to indicate
9356 		 * reservation conflict. If the TUR fails for other
9357 		 * reasons, SD_NOT_READY_VALID will be returned.
9358 		 */
9359 		int err;
9360 
9361 		mutex_exit(SD_MUTEX(un));
9362 		err = sd_send_scsi_TEST_UNIT_READY(un, 0);
9363 		mutex_enter(SD_MUTEX(un));
9364 
9365 		if (err != 0) {
9366 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9367 			    "offline or reservation conflict\n");
9368 			mutex_exit(SD_MUTEX(un));
9369 			cmlb_invalidate(un->un_cmlbhandle,
9370 			    (void *)SD_PATH_DIRECT);
9371 			mutex_enter(SD_MUTEX(un));
9372 			if (err == EACCES) {
9373 				rval = SD_RESERVED_BY_OTHERS;
9374 			} else {
9375 				rval = SD_NOT_READY_VALID;
9376 			}
9377 			goto done;
9378 		}
9379 	}
9380 
9381 	if (un->un_f_format_in_progress == FALSE) {
9382 		mutex_exit(SD_MUTEX(un));
9383 		if (cmlb_validate(un->un_cmlbhandle, 0,
9384 		    (void *)SD_PATH_DIRECT) != 0) {
9385 			rval = SD_NOT_READY_VALID;
9386 			mutex_enter(SD_MUTEX(un));
9387 			goto done;
9388 		}
9389 		if (un->un_f_pkstats_enabled) {
9390 			sd_set_pstats(un);
9391 			SD_TRACE(SD_LOG_IO_PARTITION, un,
9392 			    "sd_ready_and_valid: un:0x%p pstats created and "
9393 			    "set\n", un);
9394 		}
9395 		mutex_enter(SD_MUTEX(un));
9396 	}
9397 
9398 	/*
9399 	 * If this device supports DOOR_LOCK command, try and send
9400 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
9401 	 * if it fails. For a CD, however, it is an error
9402 	 */
9403 	if (un->un_f_doorlock_supported) {
9404 		mutex_exit(SD_MUTEX(un));
9405 		if ((sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
9406 		    SD_PATH_DIRECT) != 0) && ISCD(un)) {
9407 			rval = SD_NOT_READY_VALID;
9408 			mutex_enter(SD_MUTEX(un));
9409 			goto done;
9410 		}
9411 		mutex_enter(SD_MUTEX(un));
9412 	}
9413 
9414 	/* The state has changed, inform the media watch routines */
9415 	un->un_mediastate = DKIO_INSERTED;
9416 	cv_broadcast(&un->un_state_cv);
9417 	rval = SD_READY_VALID;
9418 
9419 done:
9420 
9421 	/*
9422 	 * Initialize the capacity kstat value, if no media previously
9423 	 * (capacity kstat is 0) and a media has been inserted
9424 	 * (un_blockcount > 0).
9425 	 */
9426 	if (un->un_errstats != NULL) {
9427 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
9428 		if ((stp->sd_capacity.value.ui64 == 0) &&
9429 		    (un->un_f_blockcount_is_valid == TRUE)) {
9430 			stp->sd_capacity.value.ui64 =
9431 			    (uint64_t)((uint64_t)un->un_blockcount *
9432 			    un->un_sys_blocksize);
9433 		}
9434 	}
9435 
9436 	mutex_exit(SD_MUTEX(un));
9437 	return (rval);
9438 }
9439 
9440 
9441 /*
9442  *    Function: sdmin
9443  *
9444  * Description: Routine to limit the size of a data transfer. Used in
9445  *		conjunction with physio(9F).
9446  *
9447  *   Arguments: bp - pointer to the indicated buf(9S) struct.
9448  *
9449  *     Context: Kernel thread context.
9450  */
9451 
9452 static void
9453 sdmin(struct buf *bp)
9454 {
9455 	struct sd_lun	*un;
9456 	int		instance;
9457 
9458 	instance = SDUNIT(bp->b_edev);
9459 
9460 	un = ddi_get_soft_state(sd_state, instance);
9461 	ASSERT(un != NULL);
9462 
9463 	if (bp->b_bcount > un->un_max_xfer_size) {
9464 		bp->b_bcount = un->un_max_xfer_size;
9465 	}
9466 }
9467 
9468 
9469 /*
9470  *    Function: sdread
9471  *
9472  * Description: Driver's read(9e) entry point function.
9473  *
9474  *   Arguments: dev   - device number
9475  *		uio   - structure pointer describing where data is to be stored
9476  *			in user's space
9477  *		cred_p  - user credential pointer
9478  *
9479  * Return Code: ENXIO
9480  *		EIO
9481  *		EINVAL
9482  *		value returned by physio
9483  *
9484  *     Context: Kernel thread context.
9485  */
9486 /* ARGSUSED */
9487 static int
9488 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
9489 {
9490 	struct sd_lun	*un = NULL;
9491 	int		secmask;
9492 	int		err;
9493 
9494 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9495 		return (ENXIO);
9496 	}
9497 
9498 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9499 
9500 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9501 		mutex_enter(SD_MUTEX(un));
9502 		/*
9503 		 * Because the call to sd_ready_and_valid will issue I/O we
9504 		 * must wait here if either the device is suspended or
9505 		 * if it's power level is changing.
9506 		 */
9507 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9508 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9509 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9510 		}
9511 		un->un_ncmds_in_driver++;
9512 		mutex_exit(SD_MUTEX(un));
9513 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9514 			mutex_enter(SD_MUTEX(un));
9515 			un->un_ncmds_in_driver--;
9516 			ASSERT(un->un_ncmds_in_driver >= 0);
9517 			mutex_exit(SD_MUTEX(un));
9518 			return (EIO);
9519 		}
9520 		mutex_enter(SD_MUTEX(un));
9521 		un->un_ncmds_in_driver--;
9522 		ASSERT(un->un_ncmds_in_driver >= 0);
9523 		mutex_exit(SD_MUTEX(un));
9524 	}
9525 
9526 	/*
9527 	 * Read requests are restricted to multiples of the system block size.
9528 	 */
9529 	secmask = un->un_sys_blocksize - 1;
9530 
9531 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9532 		SD_ERROR(SD_LOG_READ_WRITE, un,
9533 		    "sdread: file offset not modulo %d\n",
9534 		    un->un_sys_blocksize);
9535 		err = EINVAL;
9536 	} else if (uio->uio_iov->iov_len & (secmask)) {
9537 		SD_ERROR(SD_LOG_READ_WRITE, un,
9538 		    "sdread: transfer length not modulo %d\n",
9539 		    un->un_sys_blocksize);
9540 		err = EINVAL;
9541 	} else {
9542 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
9543 	}
9544 	return (err);
9545 }
9546 
9547 
9548 /*
9549  *    Function: sdwrite
9550  *
9551  * Description: Driver's write(9e) entry point function.
9552  *
9553  *   Arguments: dev   - device number
9554  *		uio   - structure pointer describing where data is stored in
9555  *			user's space
9556  *		cred_p  - user credential pointer
9557  *
9558  * Return Code: ENXIO
9559  *		EIO
9560  *		EINVAL
9561  *		value returned by physio
9562  *
9563  *     Context: Kernel thread context.
9564  */
9565 /* ARGSUSED */
9566 static int
9567 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
9568 {
9569 	struct sd_lun	*un = NULL;
9570 	int		secmask;
9571 	int		err;
9572 
9573 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9574 		return (ENXIO);
9575 	}
9576 
9577 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9578 
9579 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9580 		mutex_enter(SD_MUTEX(un));
9581 		/*
9582 		 * Because the call to sd_ready_and_valid will issue I/O we
9583 		 * must wait here if either the device is suspended or
9584 		 * if it's power level is changing.
9585 		 */
9586 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9587 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9588 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9589 		}
9590 		un->un_ncmds_in_driver++;
9591 		mutex_exit(SD_MUTEX(un));
9592 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9593 			mutex_enter(SD_MUTEX(un));
9594 			un->un_ncmds_in_driver--;
9595 			ASSERT(un->un_ncmds_in_driver >= 0);
9596 			mutex_exit(SD_MUTEX(un));
9597 			return (EIO);
9598 		}
9599 		mutex_enter(SD_MUTEX(un));
9600 		un->un_ncmds_in_driver--;
9601 		ASSERT(un->un_ncmds_in_driver >= 0);
9602 		mutex_exit(SD_MUTEX(un));
9603 	}
9604 
9605 	/*
9606 	 * Write requests are restricted to multiples of the system block size.
9607 	 */
9608 	secmask = un->un_sys_blocksize - 1;
9609 
9610 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9611 		SD_ERROR(SD_LOG_READ_WRITE, un,
9612 		    "sdwrite: file offset not modulo %d\n",
9613 		    un->un_sys_blocksize);
9614 		err = EINVAL;
9615 	} else if (uio->uio_iov->iov_len & (secmask)) {
9616 		SD_ERROR(SD_LOG_READ_WRITE, un,
9617 		    "sdwrite: transfer length not modulo %d\n",
9618 		    un->un_sys_blocksize);
9619 		err = EINVAL;
9620 	} else {
9621 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
9622 	}
9623 	return (err);
9624 }
9625 
9626 
9627 /*
9628  *    Function: sdaread
9629  *
9630  * Description: Driver's aread(9e) entry point function.
9631  *
9632  *   Arguments: dev   - device number
9633  *		aio   - structure pointer describing where data is to be stored
9634  *		cred_p  - user credential pointer
9635  *
9636  * Return Code: ENXIO
9637  *		EIO
9638  *		EINVAL
9639  *		value returned by aphysio
9640  *
9641  *     Context: Kernel thread context.
9642  */
9643 /* ARGSUSED */
9644 static int
9645 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
9646 {
9647 	struct sd_lun	*un = NULL;
9648 	struct uio	*uio = aio->aio_uio;
9649 	int		secmask;
9650 	int		err;
9651 
9652 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9653 		return (ENXIO);
9654 	}
9655 
9656 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9657 
9658 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9659 		mutex_enter(SD_MUTEX(un));
9660 		/*
9661 		 * Because the call to sd_ready_and_valid will issue I/O we
9662 		 * must wait here if either the device is suspended or
9663 		 * if it's power level is changing.
9664 		 */
9665 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9666 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9667 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9668 		}
9669 		un->un_ncmds_in_driver++;
9670 		mutex_exit(SD_MUTEX(un));
9671 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9672 			mutex_enter(SD_MUTEX(un));
9673 			un->un_ncmds_in_driver--;
9674 			ASSERT(un->un_ncmds_in_driver >= 0);
9675 			mutex_exit(SD_MUTEX(un));
9676 			return (EIO);
9677 		}
9678 		mutex_enter(SD_MUTEX(un));
9679 		un->un_ncmds_in_driver--;
9680 		ASSERT(un->un_ncmds_in_driver >= 0);
9681 		mutex_exit(SD_MUTEX(un));
9682 	}
9683 
9684 	/*
9685 	 * Read requests are restricted to multiples of the system block size.
9686 	 */
9687 	secmask = un->un_sys_blocksize - 1;
9688 
9689 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9690 		SD_ERROR(SD_LOG_READ_WRITE, un,
9691 		    "sdaread: file offset not modulo %d\n",
9692 		    un->un_sys_blocksize);
9693 		err = EINVAL;
9694 	} else if (uio->uio_iov->iov_len & (secmask)) {
9695 		SD_ERROR(SD_LOG_READ_WRITE, un,
9696 		    "sdaread: transfer length not modulo %d\n",
9697 		    un->un_sys_blocksize);
9698 		err = EINVAL;
9699 	} else {
9700 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
9701 	}
9702 	return (err);
9703 }
9704 
9705 
9706 /*
9707  *    Function: sdawrite
9708  *
9709  * Description: Driver's awrite(9e) entry point function.
9710  *
9711  *   Arguments: dev   - device number
9712  *		aio   - structure pointer describing where data is stored
9713  *		cred_p  - user credential pointer
9714  *
9715  * Return Code: ENXIO
9716  *		EIO
9717  *		EINVAL
9718  *		value returned by aphysio
9719  *
9720  *     Context: Kernel thread context.
9721  */
9722 /* ARGSUSED */
9723 static int
9724 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
9725 {
9726 	struct sd_lun	*un = NULL;
9727 	struct uio	*uio = aio->aio_uio;
9728 	int		secmask;
9729 	int		err;
9730 
9731 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9732 		return (ENXIO);
9733 	}
9734 
9735 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9736 
9737 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9738 		mutex_enter(SD_MUTEX(un));
9739 		/*
9740 		 * Because the call to sd_ready_and_valid will issue I/O we
9741 		 * must wait here if either the device is suspended or
9742 		 * if it's power level is changing.
9743 		 */
9744 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9745 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9746 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9747 		}
9748 		un->un_ncmds_in_driver++;
9749 		mutex_exit(SD_MUTEX(un));
9750 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9751 			mutex_enter(SD_MUTEX(un));
9752 			un->un_ncmds_in_driver--;
9753 			ASSERT(un->un_ncmds_in_driver >= 0);
9754 			mutex_exit(SD_MUTEX(un));
9755 			return (EIO);
9756 		}
9757 		mutex_enter(SD_MUTEX(un));
9758 		un->un_ncmds_in_driver--;
9759 		ASSERT(un->un_ncmds_in_driver >= 0);
9760 		mutex_exit(SD_MUTEX(un));
9761 	}
9762 
9763 	/*
9764 	 * Write requests are restricted to multiples of the system block size.
9765 	 */
9766 	secmask = un->un_sys_blocksize - 1;
9767 
9768 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9769 		SD_ERROR(SD_LOG_READ_WRITE, un,
9770 		    "sdawrite: file offset not modulo %d\n",
9771 		    un->un_sys_blocksize);
9772 		err = EINVAL;
9773 	} else if (uio->uio_iov->iov_len & (secmask)) {
9774 		SD_ERROR(SD_LOG_READ_WRITE, un,
9775 		    "sdawrite: transfer length not modulo %d\n",
9776 		    un->un_sys_blocksize);
9777 		err = EINVAL;
9778 	} else {
9779 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
9780 	}
9781 	return (err);
9782 }
9783 
9784 
9785 
9786 
9787 
9788 /*
9789  * Driver IO processing follows the following sequence:
9790  *
9791  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
9792  *         |                |                     ^
9793  *         v                v                     |
9794  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
9795  *         |                |                     |                   |
9796  *         v                |                     |                   |
9797  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
9798  *         |                |                     ^                   ^
9799  *         v                v                     |                   |
9800  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
9801  *         |                |                     |                   |
9802  *     +---+                |                     +------------+      +-------+
9803  *     |                    |                                  |              |
9804  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
9805  *     |                    v                                  |              |
9806  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
9807  *     |                    |                                  ^              |
9808  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
9809  *     |                    v                                  |              |
9810  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
9811  *     |                    |                                  ^              |
9812  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
9813  *     |                    v                                  |              |
9814  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
9815  *     |                    |                                  ^              |
9816  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
9817  *     |                    v                                  |              |
9818  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
9819  *     |                    |                                  ^              |
9820  *     |                    |                                  |              |
9821  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
9822  *                          |                           ^
9823  *                          v                           |
9824  *                   sd_core_iostart()                  |
9825  *                          |                           |
9826  *                          |                           +------>(*destroypkt)()
9827  *                          +-> sd_start_cmds() <-+     |           |
9828  *                          |                     |     |           v
9829  *                          |                     |     |  scsi_destroy_pkt(9F)
9830  *                          |                     |     |
9831  *                          +->(*initpkt)()       +- sdintr()
9832  *                          |  |                        |  |
9833  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
9834  *                          |  +-> scsi_setup_cdb(9F)   |
9835  *                          |                           |
9836  *                          +--> scsi_transport(9F)     |
9837  *                                     |                |
9838  *                                     +----> SCSA ---->+
9839  *
9840  *
9841  * This code is based upon the following presumtions:
9842  *
9843  *   - iostart and iodone functions operate on buf(9S) structures. These
9844  *     functions perform the necessary operations on the buf(9S) and pass
9845  *     them along to the next function in the chain by using the macros
9846  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
9847  *     (for iodone side functions).
9848  *
9849  *   - The iostart side functions may sleep. The iodone side functions
9850  *     are called under interrupt context and may NOT sleep. Therefore
9851  *     iodone side functions also may not call iostart side functions.
9852  *     (NOTE: iostart side functions should NOT sleep for memory, as
9853  *     this could result in deadlock.)
9854  *
9855  *   - An iostart side function may call its corresponding iodone side
9856  *     function directly (if necessary).
9857  *
9858  *   - In the event of an error, an iostart side function can return a buf(9S)
9859  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
9860  *     b_error in the usual way of course).
9861  *
9862  *   - The taskq mechanism may be used by the iodone side functions to dispatch
9863  *     requests to the iostart side functions.  The iostart side functions in
9864  *     this case would be called under the context of a taskq thread, so it's
9865  *     OK for them to block/sleep/spin in this case.
9866  *
9867  *   - iostart side functions may allocate "shadow" buf(9S) structs and
9868  *     pass them along to the next function in the chain.  The corresponding
9869  *     iodone side functions must coalesce the "shadow" bufs and return
9870  *     the "original" buf to the next higher layer.
9871  *
9872  *   - The b_private field of the buf(9S) struct holds a pointer to
9873  *     an sd_xbuf struct, which contains information needed to
9874  *     construct the scsi_pkt for the command.
9875  *
9876  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
9877  *     layer must acquire & release the SD_MUTEX(un) as needed.
9878  */
9879 
9880 
9881 /*
9882  * Create taskq for all targets in the system. This is created at
9883  * _init(9E) and destroyed at _fini(9E).
9884  *
9885  * Note: here we set the minalloc to a reasonably high number to ensure that
9886  * we will have an adequate supply of task entries available at interrupt time.
9887  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
9888  * sd_create_taskq().  Since we do not want to sleep for allocations at
9889  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
9890  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
9891  * requests any one instant in time.
9892  */
9893 #define	SD_TASKQ_NUMTHREADS	8
9894 #define	SD_TASKQ_MINALLOC	256
9895 #define	SD_TASKQ_MAXALLOC	256
9896 
9897 static taskq_t	*sd_tq = NULL;
9898 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
9899 
9900 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
9901 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
9902 
9903 /*
9904  * The following task queue is being created for the write part of
9905  * read-modify-write of non-512 block size devices.
9906  * Limit the number of threads to 1 for now. This number has been choosen
9907  * considering the fact that it applies only to dvd ram drives/MO drives
9908  * currently. Performance for which is not main criteria at this stage.
9909  * Note: It needs to be explored if we can use a single taskq in future
9910  */
9911 #define	SD_WMR_TASKQ_NUMTHREADS	1
9912 static taskq_t	*sd_wmr_tq = NULL;
9913 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
9914 
9915 /*
9916  *    Function: sd_taskq_create
9917  *
9918  * Description: Create taskq thread(s) and preallocate task entries
9919  *
9920  * Return Code: Returns a pointer to the allocated taskq_t.
9921  *
9922  *     Context: Can sleep. Requires blockable context.
9923  *
9924  *       Notes: - The taskq() facility currently is NOT part of the DDI.
9925  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
9926  *		- taskq_create() will block for memory, also it will panic
9927  *		  if it cannot create the requested number of threads.
9928  *		- Currently taskq_create() creates threads that cannot be
9929  *		  swapped.
9930  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
9931  *		  supply of taskq entries at interrupt time (ie, so that we
9932  *		  do not have to sleep for memory)
9933  */
9934 
9935 static void
9936 sd_taskq_create(void)
9937 {
9938 	char	taskq_name[TASKQ_NAMELEN];
9939 
9940 	ASSERT(sd_tq == NULL);
9941 	ASSERT(sd_wmr_tq == NULL);
9942 
9943 	(void) snprintf(taskq_name, sizeof (taskq_name),
9944 	    "%s_drv_taskq", sd_label);
9945 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
9946 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
9947 	    TASKQ_PREPOPULATE));
9948 
9949 	(void) snprintf(taskq_name, sizeof (taskq_name),
9950 	    "%s_rmw_taskq", sd_label);
9951 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
9952 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
9953 	    TASKQ_PREPOPULATE));
9954 }
9955 
9956 
9957 /*
9958  *    Function: sd_taskq_delete
9959  *
9960  * Description: Complementary cleanup routine for sd_taskq_create().
9961  *
9962  *     Context: Kernel thread context.
9963  */
9964 
9965 static void
9966 sd_taskq_delete(void)
9967 {
9968 	ASSERT(sd_tq != NULL);
9969 	ASSERT(sd_wmr_tq != NULL);
9970 	taskq_destroy(sd_tq);
9971 	taskq_destroy(sd_wmr_tq);
9972 	sd_tq = NULL;
9973 	sd_wmr_tq = NULL;
9974 }
9975 
9976 
9977 /*
9978  *    Function: sdstrategy
9979  *
9980  * Description: Driver's strategy (9E) entry point function.
9981  *
9982  *   Arguments: bp - pointer to buf(9S)
9983  *
9984  * Return Code: Always returns zero
9985  *
9986  *     Context: Kernel thread context.
9987  */
9988 
9989 static int
9990 sdstrategy(struct buf *bp)
9991 {
9992 	struct sd_lun *un;
9993 
9994 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
9995 	if (un == NULL) {
9996 		bioerror(bp, EIO);
9997 		bp->b_resid = bp->b_bcount;
9998 		biodone(bp);
9999 		return (0);
10000 	}
10001 	/* As was done in the past, fail new cmds. if state is dumping. */
10002 	if (un->un_state == SD_STATE_DUMPING) {
10003 		bioerror(bp, ENXIO);
10004 		bp->b_resid = bp->b_bcount;
10005 		biodone(bp);
10006 		return (0);
10007 	}
10008 
10009 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10010 
10011 	/*
10012 	 * Commands may sneak in while we released the mutex in
10013 	 * DDI_SUSPEND, we should block new commands. However, old
10014 	 * commands that are still in the driver at this point should
10015 	 * still be allowed to drain.
10016 	 */
10017 	mutex_enter(SD_MUTEX(un));
10018 	/*
10019 	 * Must wait here if either the device is suspended or
10020 	 * if it's power level is changing.
10021 	 */
10022 	while ((un->un_state == SD_STATE_SUSPENDED) ||
10023 	    (un->un_state == SD_STATE_PM_CHANGING)) {
10024 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10025 	}
10026 
10027 	un->un_ncmds_in_driver++;
10028 
10029 	/*
10030 	 * atapi: Since we are running the CD for now in PIO mode we need to
10031 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
10032 	 * the HBA's init_pkt routine.
10033 	 */
10034 	if (un->un_f_cfg_is_atapi == TRUE) {
10035 		mutex_exit(SD_MUTEX(un));
10036 		bp_mapin(bp);
10037 		mutex_enter(SD_MUTEX(un));
10038 	}
10039 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
10040 	    un->un_ncmds_in_driver);
10041 
10042 	mutex_exit(SD_MUTEX(un));
10043 
10044 	/*
10045 	 * This will (eventually) allocate the sd_xbuf area and
10046 	 * call sd_xbuf_strategy().  We just want to return the
10047 	 * result of ddi_xbuf_qstrategy so that we have an opt-
10048 	 * imized tail call which saves us a stack frame.
10049 	 */
10050 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
10051 }
10052 
10053 
10054 /*
10055  *    Function: sd_xbuf_strategy
10056  *
10057  * Description: Function for initiating IO operations via the
10058  *		ddi_xbuf_qstrategy() mechanism.
10059  *
10060  *     Context: Kernel thread context.
10061  */
10062 
10063 static void
10064 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
10065 {
10066 	struct sd_lun *un = arg;
10067 
10068 	ASSERT(bp != NULL);
10069 	ASSERT(xp != NULL);
10070 	ASSERT(un != NULL);
10071 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10072 
10073 	/*
10074 	 * Initialize the fields in the xbuf and save a pointer to the
10075 	 * xbuf in bp->b_private.
10076 	 */
10077 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
10078 
10079 	/* Send the buf down the iostart chain */
10080 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
10081 }
10082 
10083 
10084 /*
10085  *    Function: sd_xbuf_init
10086  *
10087  * Description: Prepare the given sd_xbuf struct for use.
10088  *
10089  *   Arguments: un - ptr to softstate
10090  *		bp - ptr to associated buf(9S)
10091  *		xp - ptr to associated sd_xbuf
10092  *		chain_type - IO chain type to use:
10093  *			SD_CHAIN_NULL
10094  *			SD_CHAIN_BUFIO
10095  *			SD_CHAIN_USCSI
10096  *			SD_CHAIN_DIRECT
10097  *			SD_CHAIN_DIRECT_PRIORITY
10098  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
10099  *			initialization; may be NULL if none.
10100  *
10101  *     Context: Kernel thread context
10102  */
10103 
10104 static void
10105 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
10106 	uchar_t chain_type, void *pktinfop)
10107 {
10108 	int index;
10109 
10110 	ASSERT(un != NULL);
10111 	ASSERT(bp != NULL);
10112 	ASSERT(xp != NULL);
10113 
10114 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
10115 	    bp, chain_type);
10116 
10117 	xp->xb_un	= un;
10118 	xp->xb_pktp	= NULL;
10119 	xp->xb_pktinfo	= pktinfop;
10120 	xp->xb_private	= bp->b_private;
10121 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
10122 
10123 	/*
10124 	 * Set up the iostart and iodone chain indexes in the xbuf, based
10125 	 * upon the specified chain type to use.
10126 	 */
10127 	switch (chain_type) {
10128 	case SD_CHAIN_NULL:
10129 		/*
10130 		 * Fall thru to just use the values for the buf type, even
10131 		 * tho for the NULL chain these values will never be used.
10132 		 */
10133 		/* FALLTHRU */
10134 	case SD_CHAIN_BUFIO:
10135 		index = un->un_buf_chain_type;
10136 		break;
10137 	case SD_CHAIN_USCSI:
10138 		index = un->un_uscsi_chain_type;
10139 		break;
10140 	case SD_CHAIN_DIRECT:
10141 		index = un->un_direct_chain_type;
10142 		break;
10143 	case SD_CHAIN_DIRECT_PRIORITY:
10144 		index = un->un_priority_chain_type;
10145 		break;
10146 	default:
10147 		/* We're really broken if we ever get here... */
10148 		panic("sd_xbuf_init: illegal chain type!");
10149 		/*NOTREACHED*/
10150 	}
10151 
10152 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
10153 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
10154 
10155 	/*
10156 	 * It might be a bit easier to simply bzero the entire xbuf above,
10157 	 * but it turns out that since we init a fair number of members anyway,
10158 	 * we save a fair number cycles by doing explicit assignment of zero.
10159 	 */
10160 	xp->xb_pkt_flags	= 0;
10161 	xp->xb_dma_resid	= 0;
10162 	xp->xb_retry_count	= 0;
10163 	xp->xb_victim_retry_count = 0;
10164 	xp->xb_ua_retry_count	= 0;
10165 	xp->xb_sense_bp		= NULL;
10166 	xp->xb_sense_status	= 0;
10167 	xp->xb_sense_state	= 0;
10168 	xp->xb_sense_resid	= 0;
10169 
10170 	bp->b_private	= xp;
10171 	bp->b_flags	&= ~(B_DONE | B_ERROR);
10172 	bp->b_resid	= 0;
10173 	bp->av_forw	= NULL;
10174 	bp->av_back	= NULL;
10175 	bioerror(bp, 0);
10176 
10177 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
10178 }
10179 
10180 
10181 /*
10182  *    Function: sd_uscsi_strategy
10183  *
10184  * Description: Wrapper for calling into the USCSI chain via physio(9F)
10185  *
10186  *   Arguments: bp - buf struct ptr
10187  *
10188  * Return Code: Always returns 0
10189  *
10190  *     Context: Kernel thread context
10191  */
10192 
10193 static int
10194 sd_uscsi_strategy(struct buf *bp)
10195 {
10196 	struct sd_lun		*un;
10197 	struct sd_uscsi_info	*uip;
10198 	struct sd_xbuf		*xp;
10199 	uchar_t			chain_type;
10200 
10201 	ASSERT(bp != NULL);
10202 
10203 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
10204 	if (un == NULL) {
10205 		bioerror(bp, EIO);
10206 		bp->b_resid = bp->b_bcount;
10207 		biodone(bp);
10208 		return (0);
10209 	}
10210 
10211 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10212 
10213 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
10214 
10215 	mutex_enter(SD_MUTEX(un));
10216 	/*
10217 	 * atapi: Since we are running the CD for now in PIO mode we need to
10218 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
10219 	 * the HBA's init_pkt routine.
10220 	 */
10221 	if (un->un_f_cfg_is_atapi == TRUE) {
10222 		mutex_exit(SD_MUTEX(un));
10223 		bp_mapin(bp);
10224 		mutex_enter(SD_MUTEX(un));
10225 	}
10226 	un->un_ncmds_in_driver++;
10227 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
10228 	    un->un_ncmds_in_driver);
10229 	mutex_exit(SD_MUTEX(un));
10230 
10231 	/*
10232 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
10233 	 */
10234 	ASSERT(bp->b_private != NULL);
10235 	uip = (struct sd_uscsi_info *)bp->b_private;
10236 
10237 	switch (uip->ui_flags) {
10238 	case SD_PATH_DIRECT:
10239 		chain_type = SD_CHAIN_DIRECT;
10240 		break;
10241 	case SD_PATH_DIRECT_PRIORITY:
10242 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
10243 		break;
10244 	default:
10245 		chain_type = SD_CHAIN_USCSI;
10246 		break;
10247 	}
10248 
10249 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
10250 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
10251 
10252 	/* Use the index obtained within xbuf_init */
10253 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
10254 
10255 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
10256 
10257 	return (0);
10258 }
10259 
10260 /*
10261  *    Function: sd_send_scsi_cmd
10262  *
10263  * Description: Runs a USCSI command for user (when called thru sdioctl),
10264  *		or for the driver
10265  *
10266  *   Arguments: dev - the dev_t for the device
10267  *		incmd - ptr to a valid uscsi_cmd struct
10268  *		flag - bit flag, indicating open settings, 32/64 bit type
10269  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
10270  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
10271  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
10272  *			to use the USCSI "direct" chain and bypass the normal
10273  *			command waitq.
10274  *
10275  * Return Code: 0 -  successful completion of the given command
10276  *		EIO - scsi_uscsi_handle_command() failed
10277  *		ENXIO  - soft state not found for specified dev
10278  *		EINVAL
10279  *		EFAULT - copyin/copyout error
10280  *		return code of scsi_uscsi_handle_command():
10281  *			EIO
10282  *			ENXIO
10283  *			EACCES
10284  *
10285  *     Context: Waits for command to complete. Can sleep.
10286  */
10287 
10288 static int
10289 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
10290 	enum uio_seg dataspace, int path_flag)
10291 {
10292 	struct sd_uscsi_info	*uip;
10293 	struct uscsi_cmd	*uscmd;
10294 	struct sd_lun	*un;
10295 	int	format = 0;
10296 	int	rval;
10297 
10298 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
10299 	if (un == NULL) {
10300 		return (ENXIO);
10301 	}
10302 
10303 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10304 
10305 #ifdef SDDEBUG
10306 	switch (dataspace) {
10307 	case UIO_USERSPACE:
10308 		SD_TRACE(SD_LOG_IO, un,
10309 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_USERSPACE\n", un);
10310 		break;
10311 	case UIO_SYSSPACE:
10312 		SD_TRACE(SD_LOG_IO, un,
10313 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_SYSSPACE\n", un);
10314 		break;
10315 	default:
10316 		SD_TRACE(SD_LOG_IO, un,
10317 		    "sd_send_scsi_cmd: entry: un:0x%p UNEXPECTED SPACE\n", un);
10318 		break;
10319 	}
10320 #endif
10321 
10322 	rval = scsi_uscsi_alloc_and_copyin((intptr_t)incmd, flag,
10323 	    SD_ADDRESS(un), &uscmd);
10324 	if (rval != 0) {
10325 		SD_TRACE(SD_LOG_IO, un, "sd_sense_scsi_cmd: "
10326 		    "scsi_uscsi_alloc_and_copyin failed\n", un);
10327 		return (rval);
10328 	}
10329 
10330 	if ((uscmd->uscsi_cdb != NULL) &&
10331 	    (uscmd->uscsi_cdb[0] == SCMD_FORMAT)) {
10332 		mutex_enter(SD_MUTEX(un));
10333 		un->un_f_format_in_progress = TRUE;
10334 		mutex_exit(SD_MUTEX(un));
10335 		format = 1;
10336 	}
10337 
10338 	/*
10339 	 * Allocate an sd_uscsi_info struct and fill it with the info
10340 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
10341 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
10342 	 * since we allocate the buf here in this function, we do not
10343 	 * need to preserve the prior contents of b_private.
10344 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
10345 	 */
10346 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
10347 	uip->ui_flags = path_flag;
10348 	uip->ui_cmdp = uscmd;
10349 
10350 	/*
10351 	 * Commands sent with priority are intended for error recovery
10352 	 * situations, and do not have retries performed.
10353 	 */
10354 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
10355 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
10356 	}
10357 	uscmd->uscsi_flags &= ~USCSI_NOINTR;
10358 
10359 	rval = scsi_uscsi_handle_cmd(dev, dataspace, uscmd,
10360 	    sd_uscsi_strategy, NULL, uip);
10361 
10362 #ifdef SDDEBUG
10363 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
10364 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
10365 	    uscmd->uscsi_status, uscmd->uscsi_resid);
10366 	if (uscmd->uscsi_bufaddr != NULL) {
10367 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
10368 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
10369 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
10370 		if (dataspace == UIO_SYSSPACE) {
10371 			SD_DUMP_MEMORY(un, SD_LOG_IO,
10372 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
10373 			    uscmd->uscsi_buflen, SD_LOG_HEX);
10374 		}
10375 	}
10376 #endif
10377 
10378 	if (format == 1) {
10379 		mutex_enter(SD_MUTEX(un));
10380 		un->un_f_format_in_progress = FALSE;
10381 		mutex_exit(SD_MUTEX(un));
10382 	}
10383 
10384 	(void) scsi_uscsi_copyout_and_free((intptr_t)incmd, uscmd);
10385 	kmem_free(uip, sizeof (struct sd_uscsi_info));
10386 
10387 	return (rval);
10388 }
10389 
10390 
10391 /*
10392  *    Function: sd_buf_iodone
10393  *
10394  * Description: Frees the sd_xbuf & returns the buf to its originator.
10395  *
10396  *     Context: May be called from interrupt context.
10397  */
10398 /* ARGSUSED */
10399 static void
10400 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
10401 {
10402 	struct sd_xbuf *xp;
10403 
10404 	ASSERT(un != NULL);
10405 	ASSERT(bp != NULL);
10406 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10407 
10408 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
10409 
10410 	xp = SD_GET_XBUF(bp);
10411 	ASSERT(xp != NULL);
10412 
10413 	mutex_enter(SD_MUTEX(un));
10414 
10415 	/*
10416 	 * Grab time when the cmd completed.
10417 	 * This is used for determining if the system has been
10418 	 * idle long enough to make it idle to the PM framework.
10419 	 * This is for lowering the overhead, and therefore improving
10420 	 * performance per I/O operation.
10421 	 */
10422 	un->un_pm_idle_time = ddi_get_time();
10423 
10424 	un->un_ncmds_in_driver--;
10425 	ASSERT(un->un_ncmds_in_driver >= 0);
10426 	SD_INFO(SD_LOG_IO, un, "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
10427 	    un->un_ncmds_in_driver);
10428 
10429 	mutex_exit(SD_MUTEX(un));
10430 
10431 	ddi_xbuf_done(bp, un->un_xbuf_attr);	/* xbuf is gone after this */
10432 	biodone(bp);				/* bp is gone after this */
10433 
10434 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
10435 }
10436 
10437 
10438 /*
10439  *    Function: sd_uscsi_iodone
10440  *
10441  * Description: Frees the sd_xbuf & returns the buf to its originator.
10442  *
10443  *     Context: May be called from interrupt context.
10444  */
10445 /* ARGSUSED */
10446 static void
10447 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
10448 {
10449 	struct sd_xbuf *xp;
10450 
10451 	ASSERT(un != NULL);
10452 	ASSERT(bp != NULL);
10453 
10454 	xp = SD_GET_XBUF(bp);
10455 	ASSERT(xp != NULL);
10456 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10457 
10458 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
10459 
10460 	bp->b_private = xp->xb_private;
10461 
10462 	mutex_enter(SD_MUTEX(un));
10463 
10464 	/*
10465 	 * Grab time when the cmd completed.
10466 	 * This is used for determining if the system has been
10467 	 * idle long enough to make it idle to the PM framework.
10468 	 * This is for lowering the overhead, and therefore improving
10469 	 * performance per I/O operation.
10470 	 */
10471 	un->un_pm_idle_time = ddi_get_time();
10472 
10473 	un->un_ncmds_in_driver--;
10474 	ASSERT(un->un_ncmds_in_driver >= 0);
10475 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
10476 	    un->un_ncmds_in_driver);
10477 
10478 	mutex_exit(SD_MUTEX(un));
10479 
10480 	kmem_free(xp, sizeof (struct sd_xbuf));
10481 	biodone(bp);
10482 
10483 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
10484 }
10485 
10486 
10487 /*
10488  *    Function: sd_mapblockaddr_iostart
10489  *
10490  * Description: Verify request lies withing the partition limits for
10491  *		the indicated minor device.  Issue "overrun" buf if
10492  *		request would exceed partition range.  Converts
10493  *		partition-relative block address to absolute.
10494  *
10495  *     Context: Can sleep
10496  *
10497  *      Issues: This follows what the old code did, in terms of accessing
10498  *		some of the partition info in the unit struct without holding
10499  *		the mutext.  This is a general issue, if the partition info
10500  *		can be altered while IO is in progress... as soon as we send
10501  *		a buf, its partitioning can be invalid before it gets to the
10502  *		device.  Probably the right fix is to move partitioning out
10503  *		of the driver entirely.
10504  */
10505 
10506 static void
10507 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
10508 {
10509 	diskaddr_t	nblocks;	/* #blocks in the given partition */
10510 	daddr_t	blocknum;	/* Block number specified by the buf */
10511 	size_t	requested_nblocks;
10512 	size_t	available_nblocks;
10513 	int	partition;
10514 	diskaddr_t	partition_offset;
10515 	struct sd_xbuf *xp;
10516 
10517 
10518 	ASSERT(un != NULL);
10519 	ASSERT(bp != NULL);
10520 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10521 
10522 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10523 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
10524 
10525 	xp = SD_GET_XBUF(bp);
10526 	ASSERT(xp != NULL);
10527 
10528 	/*
10529 	 * If the geometry is not indicated as valid, attempt to access
10530 	 * the unit & verify the geometry/label. This can be the case for
10531 	 * removable-media devices, of if the device was opened in
10532 	 * NDELAY/NONBLOCK mode.
10533 	 */
10534 	if (!SD_IS_VALID_LABEL(un) &&
10535 	    (sd_ready_and_valid(un) != SD_READY_VALID)) {
10536 		/*
10537 		 * For removable devices it is possible to start an I/O
10538 		 * without a media by opening the device in nodelay mode.
10539 		 * Also for writable CDs there can be many scenarios where
10540 		 * there is no geometry yet but volume manager is trying to
10541 		 * issue a read() just because it can see TOC on the CD. So
10542 		 * do not print a message for removables.
10543 		 */
10544 		if (!un->un_f_has_removable_media) {
10545 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10546 			    "i/o to invalid geometry\n");
10547 		}
10548 		bioerror(bp, EIO);
10549 		bp->b_resid = bp->b_bcount;
10550 		SD_BEGIN_IODONE(index, un, bp);
10551 		return;
10552 	}
10553 
10554 	partition = SDPART(bp->b_edev);
10555 
10556 	nblocks = 0;
10557 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
10558 	    &nblocks, &partition_offset, NULL, NULL, (void *)SD_PATH_DIRECT);
10559 
10560 	/*
10561 	 * blocknum is the starting block number of the request. At this
10562 	 * point it is still relative to the start of the minor device.
10563 	 */
10564 	blocknum = xp->xb_blkno;
10565 
10566 	/*
10567 	 * Legacy: If the starting block number is one past the last block
10568 	 * in the partition, do not set B_ERROR in the buf.
10569 	 */
10570 	if (blocknum == nblocks)  {
10571 		goto error_exit;
10572 	}
10573 
10574 	/*
10575 	 * Confirm that the first block of the request lies within the
10576 	 * partition limits. Also the requested number of bytes must be
10577 	 * a multiple of the system block size.
10578 	 */
10579 	if ((blocknum < 0) || (blocknum >= nblocks) ||
10580 	    ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
10581 		bp->b_flags |= B_ERROR;
10582 		goto error_exit;
10583 	}
10584 
10585 	/*
10586 	 * If the requsted # blocks exceeds the available # blocks, that
10587 	 * is an overrun of the partition.
10588 	 */
10589 	requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
10590 	available_nblocks = (size_t)(nblocks - blocknum);
10591 	ASSERT(nblocks >= blocknum);
10592 
10593 	if (requested_nblocks > available_nblocks) {
10594 		/*
10595 		 * Allocate an "overrun" buf to allow the request to proceed
10596 		 * for the amount of space available in the partition. The
10597 		 * amount not transferred will be added into the b_resid
10598 		 * when the operation is complete. The overrun buf
10599 		 * replaces the original buf here, and the original buf
10600 		 * is saved inside the overrun buf, for later use.
10601 		 */
10602 		size_t resid = SD_SYSBLOCKS2BYTES(un,
10603 		    (offset_t)(requested_nblocks - available_nblocks));
10604 		size_t count = bp->b_bcount - resid;
10605 		/*
10606 		 * Note: count is an unsigned entity thus it'll NEVER
10607 		 * be less than 0 so ASSERT the original values are
10608 		 * correct.
10609 		 */
10610 		ASSERT(bp->b_bcount >= resid);
10611 
10612 		bp = sd_bioclone_alloc(bp, count, blocknum,
10613 			(int (*)(struct buf *)) sd_mapblockaddr_iodone);
10614 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
10615 		ASSERT(xp != NULL);
10616 	}
10617 
10618 	/* At this point there should be no residual for this buf. */
10619 	ASSERT(bp->b_resid == 0);
10620 
10621 	/* Convert the block number to an absolute address. */
10622 	xp->xb_blkno += partition_offset;
10623 
10624 	SD_NEXT_IOSTART(index, un, bp);
10625 
10626 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10627 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
10628 
10629 	return;
10630 
10631 error_exit:
10632 	bp->b_resid = bp->b_bcount;
10633 	SD_BEGIN_IODONE(index, un, bp);
10634 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10635 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
10636 }
10637 
10638 
10639 /*
10640  *    Function: sd_mapblockaddr_iodone
10641  *
10642  * Description: Completion-side processing for partition management.
10643  *
10644  *     Context: May be called under interrupt context
10645  */
10646 
10647 static void
10648 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
10649 {
10650 	/* int	partition; */	/* Not used, see below. */
10651 	ASSERT(un != NULL);
10652 	ASSERT(bp != NULL);
10653 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10654 
10655 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10656 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
10657 
10658 	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
10659 		/*
10660 		 * We have an "overrun" buf to deal with...
10661 		 */
10662 		struct sd_xbuf	*xp;
10663 		struct buf	*obp;	/* ptr to the original buf */
10664 
10665 		xp = SD_GET_XBUF(bp);
10666 		ASSERT(xp != NULL);
10667 
10668 		/* Retrieve the pointer to the original buf */
10669 		obp = (struct buf *)xp->xb_private;
10670 		ASSERT(obp != NULL);
10671 
10672 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
10673 		bioerror(obp, bp->b_error);
10674 
10675 		sd_bioclone_free(bp);
10676 
10677 		/*
10678 		 * Get back the original buf.
10679 		 * Note that since the restoration of xb_blkno below
10680 		 * was removed, the sd_xbuf is not needed.
10681 		 */
10682 		bp = obp;
10683 		/*
10684 		 * xp = SD_GET_XBUF(bp);
10685 		 * ASSERT(xp != NULL);
10686 		 */
10687 	}
10688 
10689 	/*
10690 	 * Convert sd->xb_blkno back to a minor-device relative value.
10691 	 * Note: this has been commented out, as it is not needed in the
10692 	 * current implementation of the driver (ie, since this function
10693 	 * is at the top of the layering chains, so the info will be
10694 	 * discarded) and it is in the "hot" IO path.
10695 	 *
10696 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
10697 	 * xp->xb_blkno -= un->un_offset[partition];
10698 	 */
10699 
10700 	SD_NEXT_IODONE(index, un, bp);
10701 
10702 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10703 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
10704 }
10705 
10706 
10707 /*
10708  *    Function: sd_mapblocksize_iostart
10709  *
10710  * Description: Convert between system block size (un->un_sys_blocksize)
10711  *		and target block size (un->un_tgt_blocksize).
10712  *
10713  *     Context: Can sleep to allocate resources.
10714  *
10715  * Assumptions: A higher layer has already performed any partition validation,
10716  *		and converted the xp->xb_blkno to an absolute value relative
10717  *		to the start of the device.
10718  *
10719  *		It is also assumed that the higher layer has implemented
10720  *		an "overrun" mechanism for the case where the request would
10721  *		read/write beyond the end of a partition.  In this case we
10722  *		assume (and ASSERT) that bp->b_resid == 0.
10723  *
10724  *		Note: The implementation for this routine assumes the target
10725  *		block size remains constant between allocation and transport.
10726  */
10727 
10728 static void
10729 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
10730 {
10731 	struct sd_mapblocksize_info	*bsp;
10732 	struct sd_xbuf			*xp;
10733 	offset_t first_byte;
10734 	daddr_t	start_block, end_block;
10735 	daddr_t	request_bytes;
10736 	ushort_t is_aligned = FALSE;
10737 
10738 	ASSERT(un != NULL);
10739 	ASSERT(bp != NULL);
10740 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10741 	ASSERT(bp->b_resid == 0);
10742 
10743 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
10744 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
10745 
10746 	/*
10747 	 * For a non-writable CD, a write request is an error
10748 	 */
10749 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
10750 	    (un->un_f_mmc_writable_media == FALSE)) {
10751 		bioerror(bp, EIO);
10752 		bp->b_resid = bp->b_bcount;
10753 		SD_BEGIN_IODONE(index, un, bp);
10754 		return;
10755 	}
10756 
10757 	/*
10758 	 * We do not need a shadow buf if the device is using
10759 	 * un->un_sys_blocksize as its block size or if bcount == 0.
10760 	 * In this case there is no layer-private data block allocated.
10761 	 */
10762 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
10763 	    (bp->b_bcount == 0)) {
10764 		goto done;
10765 	}
10766 
10767 #if defined(__i386) || defined(__amd64)
10768 	/* We do not support non-block-aligned transfers for ROD devices */
10769 	ASSERT(!ISROD(un));
10770 #endif
10771 
10772 	xp = SD_GET_XBUF(bp);
10773 	ASSERT(xp != NULL);
10774 
10775 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
10776 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
10777 	    un->un_tgt_blocksize, un->un_sys_blocksize);
10778 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
10779 	    "request start block:0x%x\n", xp->xb_blkno);
10780 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
10781 	    "request len:0x%x\n", bp->b_bcount);
10782 
10783 	/*
10784 	 * Allocate the layer-private data area for the mapblocksize layer.
10785 	 * Layers are allowed to use the xp_private member of the sd_xbuf
10786 	 * struct to store the pointer to their layer-private data block, but
10787 	 * each layer also has the responsibility of restoring the prior
10788 	 * contents of xb_private before returning the buf/xbuf to the
10789 	 * higher layer that sent it.
10790 	 *
10791 	 * Here we save the prior contents of xp->xb_private into the
10792 	 * bsp->mbs_oprivate field of our layer-private data area. This value
10793 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
10794 	 * the layer-private area and returning the buf/xbuf to the layer
10795 	 * that sent it.
10796 	 *
10797 	 * Note that here we use kmem_zalloc for the allocation as there are
10798 	 * parts of the mapblocksize code that expect certain fields to be
10799 	 * zero unless explicitly set to a required value.
10800 	 */
10801 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
10802 	bsp->mbs_oprivate = xp->xb_private;
10803 	xp->xb_private = bsp;
10804 
10805 	/*
10806 	 * This treats the data on the disk (target) as an array of bytes.
10807 	 * first_byte is the byte offset, from the beginning of the device,
10808 	 * to the location of the request. This is converted from a
10809 	 * un->un_sys_blocksize block address to a byte offset, and then back
10810 	 * to a block address based upon a un->un_tgt_blocksize block size.
10811 	 *
10812 	 * xp->xb_blkno should be absolute upon entry into this function,
10813 	 * but, but it is based upon partitions that use the "system"
10814 	 * block size. It must be adjusted to reflect the block size of
10815 	 * the target.
10816 	 *
10817 	 * Note that end_block is actually the block that follows the last
10818 	 * block of the request, but that's what is needed for the computation.
10819 	 */
10820 	first_byte  = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
10821 	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
10822 	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
10823 	    un->un_tgt_blocksize;
10824 
10825 	/* request_bytes is rounded up to a multiple of the target block size */
10826 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
10827 
10828 	/*
10829 	 * See if the starting address of the request and the request
10830 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
10831 	 * then we do not need to allocate a shadow buf to handle the request.
10832 	 */
10833 	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
10834 	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
10835 		is_aligned = TRUE;
10836 	}
10837 
10838 	if ((bp->b_flags & B_READ) == 0) {
10839 		/*
10840 		 * Lock the range for a write operation. An aligned request is
10841 		 * considered a simple write; otherwise the request must be a
10842 		 * read-modify-write.
10843 		 */
10844 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
10845 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
10846 	}
10847 
10848 	/*
10849 	 * Alloc a shadow buf if the request is not aligned. Also, this is
10850 	 * where the READ command is generated for a read-modify-write. (The
10851 	 * write phase is deferred until after the read completes.)
10852 	 */
10853 	if (is_aligned == FALSE) {
10854 
10855 		struct sd_mapblocksize_info	*shadow_bsp;
10856 		struct sd_xbuf	*shadow_xp;
10857 		struct buf	*shadow_bp;
10858 
10859 		/*
10860 		 * Allocate the shadow buf and it associated xbuf. Note that
10861 		 * after this call the xb_blkno value in both the original
10862 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
10863 		 * same: absolute relative to the start of the device, and
10864 		 * adjusted for the target block size. The b_blkno in the
10865 		 * shadow buf will also be set to this value. We should never
10866 		 * change b_blkno in the original bp however.
10867 		 *
10868 		 * Note also that the shadow buf will always need to be a
10869 		 * READ command, regardless of whether the incoming command
10870 		 * is a READ or a WRITE.
10871 		 */
10872 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
10873 		    xp->xb_blkno,
10874 		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
10875 
10876 		shadow_xp = SD_GET_XBUF(shadow_bp);
10877 
10878 		/*
10879 		 * Allocate the layer-private data for the shadow buf.
10880 		 * (No need to preserve xb_private in the shadow xbuf.)
10881 		 */
10882 		shadow_xp->xb_private = shadow_bsp =
10883 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
10884 
10885 		/*
10886 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
10887 		 * to figure out where the start of the user data is (based upon
10888 		 * the system block size) in the data returned by the READ
10889 		 * command (which will be based upon the target blocksize). Note
10890 		 * that this is only really used if the request is unaligned.
10891 		 */
10892 		bsp->mbs_copy_offset = (ssize_t)(first_byte -
10893 		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
10894 		ASSERT((bsp->mbs_copy_offset >= 0) &&
10895 		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
10896 
10897 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
10898 
10899 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
10900 
10901 		/* Transfer the wmap (if any) to the shadow buf */
10902 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
10903 		bsp->mbs_wmp = NULL;
10904 
10905 		/*
10906 		 * The shadow buf goes on from here in place of the
10907 		 * original buf.
10908 		 */
10909 		shadow_bsp->mbs_orig_bp = bp;
10910 		bp = shadow_bp;
10911 	}
10912 
10913 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
10914 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
10915 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
10916 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
10917 	    request_bytes);
10918 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
10919 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
10920 
10921 done:
10922 	SD_NEXT_IOSTART(index, un, bp);
10923 
10924 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
10925 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
10926 }
10927 
10928 
10929 /*
10930  *    Function: sd_mapblocksize_iodone
10931  *
10932  * Description: Completion side processing for block-size mapping.
10933  *
10934  *     Context: May be called under interrupt context
10935  */
10936 
10937 static void
10938 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
10939 {
10940 	struct sd_mapblocksize_info	*bsp;
10941 	struct sd_xbuf	*xp;
10942 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
10943 	struct buf	*orig_bp;	/* ptr to the original buf */
10944 	offset_t	shadow_end;
10945 	offset_t	request_end;
10946 	offset_t	shadow_start;
10947 	ssize_t		copy_offset;
10948 	size_t		copy_length;
10949 	size_t		shortfall;
10950 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
10951 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
10952 
10953 	ASSERT(un != NULL);
10954 	ASSERT(bp != NULL);
10955 
10956 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
10957 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
10958 
10959 	/*
10960 	 * There is no shadow buf or layer-private data if the target is
10961 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
10962 	 */
10963 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
10964 	    (bp->b_bcount == 0)) {
10965 		goto exit;
10966 	}
10967 
10968 	xp = SD_GET_XBUF(bp);
10969 	ASSERT(xp != NULL);
10970 
10971 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
10972 	bsp = xp->xb_private;
10973 
10974 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
10975 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
10976 
10977 	if (is_write) {
10978 		/*
10979 		 * For a WRITE request we must free up the block range that
10980 		 * we have locked up.  This holds regardless of whether this is
10981 		 * an aligned write request or a read-modify-write request.
10982 		 */
10983 		sd_range_unlock(un, bsp->mbs_wmp);
10984 		bsp->mbs_wmp = NULL;
10985 	}
10986 
10987 	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
10988 		/*
10989 		 * An aligned read or write command will have no shadow buf;
10990 		 * there is not much else to do with it.
10991 		 */
10992 		goto done;
10993 	}
10994 
10995 	orig_bp = bsp->mbs_orig_bp;
10996 	ASSERT(orig_bp != NULL);
10997 	orig_xp = SD_GET_XBUF(orig_bp);
10998 	ASSERT(orig_xp != NULL);
10999 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11000 
11001 	if (!is_write && has_wmap) {
11002 		/*
11003 		 * A READ with a wmap means this is the READ phase of a
11004 		 * read-modify-write. If an error occurred on the READ then
11005 		 * we do not proceed with the WRITE phase or copy any data.
11006 		 * Just release the write maps and return with an error.
11007 		 */
11008 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
11009 			orig_bp->b_resid = orig_bp->b_bcount;
11010 			bioerror(orig_bp, bp->b_error);
11011 			sd_range_unlock(un, bsp->mbs_wmp);
11012 			goto freebuf_done;
11013 		}
11014 	}
11015 
11016 	/*
11017 	 * Here is where we set up to copy the data from the shadow buf
11018 	 * into the space associated with the original buf.
11019 	 *
11020 	 * To deal with the conversion between block sizes, these
11021 	 * computations treat the data as an array of bytes, with the
11022 	 * first byte (byte 0) corresponding to the first byte in the
11023 	 * first block on the disk.
11024 	 */
11025 
11026 	/*
11027 	 * shadow_start and shadow_len indicate the location and size of
11028 	 * the data returned with the shadow IO request.
11029 	 */
11030 	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
11031 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
11032 
11033 	/*
11034 	 * copy_offset gives the offset (in bytes) from the start of the first
11035 	 * block of the READ request to the beginning of the data.  We retrieve
11036 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
11037 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
11038 	 * data to be copied (in bytes).
11039 	 */
11040 	copy_offset  = bsp->mbs_copy_offset;
11041 	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
11042 	copy_length  = orig_bp->b_bcount;
11043 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
11044 
11045 	/*
11046 	 * Set up the resid and error fields of orig_bp as appropriate.
11047 	 */
11048 	if (shadow_end >= request_end) {
11049 		/* We got all the requested data; set resid to zero */
11050 		orig_bp->b_resid = 0;
11051 	} else {
11052 		/*
11053 		 * We failed to get enough data to fully satisfy the original
11054 		 * request. Just copy back whatever data we got and set
11055 		 * up the residual and error code as required.
11056 		 *
11057 		 * 'shortfall' is the amount by which the data received with the
11058 		 * shadow buf has "fallen short" of the requested amount.
11059 		 */
11060 		shortfall = (size_t)(request_end - shadow_end);
11061 
11062 		if (shortfall > orig_bp->b_bcount) {
11063 			/*
11064 			 * We did not get enough data to even partially
11065 			 * fulfill the original request.  The residual is
11066 			 * equal to the amount requested.
11067 			 */
11068 			orig_bp->b_resid = orig_bp->b_bcount;
11069 		} else {
11070 			/*
11071 			 * We did not get all the data that we requested
11072 			 * from the device, but we will try to return what
11073 			 * portion we did get.
11074 			 */
11075 			orig_bp->b_resid = shortfall;
11076 		}
11077 		ASSERT(copy_length >= orig_bp->b_resid);
11078 		copy_length  -= orig_bp->b_resid;
11079 	}
11080 
11081 	/* Propagate the error code from the shadow buf to the original buf */
11082 	bioerror(orig_bp, bp->b_error);
11083 
11084 	if (is_write) {
11085 		goto freebuf_done;	/* No data copying for a WRITE */
11086 	}
11087 
11088 	if (has_wmap) {
11089 		/*
11090 		 * This is a READ command from the READ phase of a
11091 		 * read-modify-write request. We have to copy the data given
11092 		 * by the user OVER the data returned by the READ command,
11093 		 * then convert the command from a READ to a WRITE and send
11094 		 * it back to the target.
11095 		 */
11096 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
11097 		    copy_length);
11098 
11099 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
11100 
11101 		/*
11102 		 * Dispatch the WRITE command to the taskq thread, which
11103 		 * will in turn send the command to the target. When the
11104 		 * WRITE command completes, we (sd_mapblocksize_iodone())
11105 		 * will get called again as part of the iodone chain
11106 		 * processing for it. Note that we will still be dealing
11107 		 * with the shadow buf at that point.
11108 		 */
11109 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
11110 		    KM_NOSLEEP) != 0) {
11111 			/*
11112 			 * Dispatch was successful so we are done. Return
11113 			 * without going any higher up the iodone chain. Do
11114 			 * not free up any layer-private data until after the
11115 			 * WRITE completes.
11116 			 */
11117 			return;
11118 		}
11119 
11120 		/*
11121 		 * Dispatch of the WRITE command failed; set up the error
11122 		 * condition and send this IO back up the iodone chain.
11123 		 */
11124 		bioerror(orig_bp, EIO);
11125 		orig_bp->b_resid = orig_bp->b_bcount;
11126 
11127 	} else {
11128 		/*
11129 		 * This is a regular READ request (ie, not a RMW). Copy the
11130 		 * data from the shadow buf into the original buf. The
11131 		 * copy_offset compensates for any "misalignment" between the
11132 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
11133 		 * original buf (with its un->un_sys_blocksize blocks).
11134 		 */
11135 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
11136 		    copy_length);
11137 	}
11138 
11139 freebuf_done:
11140 
11141 	/*
11142 	 * At this point we still have both the shadow buf AND the original
11143 	 * buf to deal with, as well as the layer-private data area in each.
11144 	 * Local variables are as follows:
11145 	 *
11146 	 * bp -- points to shadow buf
11147 	 * xp -- points to xbuf of shadow buf
11148 	 * bsp -- points to layer-private data area of shadow buf
11149 	 * orig_bp -- points to original buf
11150 	 *
11151 	 * First free the shadow buf and its associated xbuf, then free the
11152 	 * layer-private data area from the shadow buf. There is no need to
11153 	 * restore xb_private in the shadow xbuf.
11154 	 */
11155 	sd_shadow_buf_free(bp);
11156 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
11157 
11158 	/*
11159 	 * Now update the local variables to point to the original buf, xbuf,
11160 	 * and layer-private area.
11161 	 */
11162 	bp = orig_bp;
11163 	xp = SD_GET_XBUF(bp);
11164 	ASSERT(xp != NULL);
11165 	ASSERT(xp == orig_xp);
11166 	bsp = xp->xb_private;
11167 	ASSERT(bsp != NULL);
11168 
11169 done:
11170 	/*
11171 	 * Restore xb_private to whatever it was set to by the next higher
11172 	 * layer in the chain, then free the layer-private data area.
11173 	 */
11174 	xp->xb_private = bsp->mbs_oprivate;
11175 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
11176 
11177 exit:
11178 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
11179 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
11180 
11181 	SD_NEXT_IODONE(index, un, bp);
11182 }
11183 
11184 
11185 /*
11186  *    Function: sd_checksum_iostart
11187  *
11188  * Description: A stub function for a layer that's currently not used.
11189  *		For now just a placeholder.
11190  *
11191  *     Context: Kernel thread context
11192  */
11193 
11194 static void
11195 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
11196 {
11197 	ASSERT(un != NULL);
11198 	ASSERT(bp != NULL);
11199 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11200 	SD_NEXT_IOSTART(index, un, bp);
11201 }
11202 
11203 
11204 /*
11205  *    Function: sd_checksum_iodone
11206  *
11207  * Description: A stub function for a layer that's currently not used.
11208  *		For now just a placeholder.
11209  *
11210  *     Context: May be called under interrupt context
11211  */
11212 
11213 static void
11214 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
11215 {
11216 	ASSERT(un != NULL);
11217 	ASSERT(bp != NULL);
11218 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11219 	SD_NEXT_IODONE(index, un, bp);
11220 }
11221 
11222 
11223 /*
11224  *    Function: sd_checksum_uscsi_iostart
11225  *
11226  * Description: A stub function for a layer that's currently not used.
11227  *		For now just a placeholder.
11228  *
11229  *     Context: Kernel thread context
11230  */
11231 
11232 static void
11233 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
11234 {
11235 	ASSERT(un != NULL);
11236 	ASSERT(bp != NULL);
11237 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11238 	SD_NEXT_IOSTART(index, un, bp);
11239 }
11240 
11241 
11242 /*
11243  *    Function: sd_checksum_uscsi_iodone
11244  *
11245  * Description: A stub function for a layer that's currently not used.
11246  *		For now just a placeholder.
11247  *
11248  *     Context: May be called under interrupt context
11249  */
11250 
11251 static void
11252 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
11253 {
11254 	ASSERT(un != NULL);
11255 	ASSERT(bp != NULL);
11256 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11257 	SD_NEXT_IODONE(index, un, bp);
11258 }
11259 
11260 
11261 /*
11262  *    Function: sd_pm_iostart
11263  *
11264  * Description: iostart-side routine for Power mangement.
11265  *
11266  *     Context: Kernel thread context
11267  */
11268 
11269 static void
11270 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
11271 {
11272 	ASSERT(un != NULL);
11273 	ASSERT(bp != NULL);
11274 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11275 	ASSERT(!mutex_owned(&un->un_pm_mutex));
11276 
11277 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
11278 
11279 	if (sd_pm_entry(un) != DDI_SUCCESS) {
11280 		/*
11281 		 * Set up to return the failed buf back up the 'iodone'
11282 		 * side of the calling chain.
11283 		 */
11284 		bioerror(bp, EIO);
11285 		bp->b_resid = bp->b_bcount;
11286 
11287 		SD_BEGIN_IODONE(index, un, bp);
11288 
11289 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
11290 		return;
11291 	}
11292 
11293 	SD_NEXT_IOSTART(index, un, bp);
11294 
11295 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
11296 }
11297 
11298 
11299 /*
11300  *    Function: sd_pm_iodone
11301  *
11302  * Description: iodone-side routine for power mangement.
11303  *
11304  *     Context: may be called from interrupt context
11305  */
11306 
11307 static void
11308 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
11309 {
11310 	ASSERT(un != NULL);
11311 	ASSERT(bp != NULL);
11312 	ASSERT(!mutex_owned(&un->un_pm_mutex));
11313 
11314 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
11315 
11316 	/*
11317 	 * After attach the following flag is only read, so don't
11318 	 * take the penalty of acquiring a mutex for it.
11319 	 */
11320 	if (un->un_f_pm_is_enabled == TRUE) {
11321 		sd_pm_exit(un);
11322 	}
11323 
11324 	SD_NEXT_IODONE(index, un, bp);
11325 
11326 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
11327 }
11328 
11329 
11330 /*
11331  *    Function: sd_core_iostart
11332  *
11333  * Description: Primary driver function for enqueuing buf(9S) structs from
11334  *		the system and initiating IO to the target device
11335  *
11336  *     Context: Kernel thread context. Can sleep.
11337  *
11338  * Assumptions:  - The given xp->xb_blkno is absolute
11339  *		   (ie, relative to the start of the device).
11340  *		 - The IO is to be done using the native blocksize of
11341  *		   the device, as specified in un->un_tgt_blocksize.
11342  */
11343 /* ARGSUSED */
11344 static void
11345 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
11346 {
11347 	struct sd_xbuf *xp;
11348 
11349 	ASSERT(un != NULL);
11350 	ASSERT(bp != NULL);
11351 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11352 	ASSERT(bp->b_resid == 0);
11353 
11354 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
11355 
11356 	xp = SD_GET_XBUF(bp);
11357 	ASSERT(xp != NULL);
11358 
11359 	mutex_enter(SD_MUTEX(un));
11360 
11361 	/*
11362 	 * If we are currently in the failfast state, fail any new IO
11363 	 * that has B_FAILFAST set, then return.
11364 	 */
11365 	if ((bp->b_flags & B_FAILFAST) &&
11366 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
11367 		mutex_exit(SD_MUTEX(un));
11368 		bioerror(bp, EIO);
11369 		bp->b_resid = bp->b_bcount;
11370 		SD_BEGIN_IODONE(index, un, bp);
11371 		return;
11372 	}
11373 
11374 	if (SD_IS_DIRECT_PRIORITY(xp)) {
11375 		/*
11376 		 * Priority command -- transport it immediately.
11377 		 *
11378 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
11379 		 * because all direct priority commands should be associated
11380 		 * with error recovery actions which we don't want to retry.
11381 		 */
11382 		sd_start_cmds(un, bp);
11383 	} else {
11384 		/*
11385 		 * Normal command -- add it to the wait queue, then start
11386 		 * transporting commands from the wait queue.
11387 		 */
11388 		sd_add_buf_to_waitq(un, bp);
11389 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
11390 		sd_start_cmds(un, NULL);
11391 	}
11392 
11393 	mutex_exit(SD_MUTEX(un));
11394 
11395 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
11396 }
11397 
11398 
11399 /*
11400  *    Function: sd_init_cdb_limits
11401  *
11402  * Description: This is to handle scsi_pkt initialization differences
11403  *		between the driver platforms.
11404  *
11405  *		Legacy behaviors:
11406  *
11407  *		If the block number or the sector count exceeds the
11408  *		capabilities of a Group 0 command, shift over to a
11409  *		Group 1 command. We don't blindly use Group 1
11410  *		commands because a) some drives (CDC Wren IVs) get a
11411  *		bit confused, and b) there is probably a fair amount
11412  *		of speed difference for a target to receive and decode
11413  *		a 10 byte command instead of a 6 byte command.
11414  *
11415  *		The xfer time difference of 6 vs 10 byte CDBs is
11416  *		still significant so this code is still worthwhile.
11417  *		10 byte CDBs are very inefficient with the fas HBA driver
11418  *		and older disks. Each CDB byte took 1 usec with some
11419  *		popular disks.
11420  *
11421  *     Context: Must be called at attach time
11422  */
11423 
11424 static void
11425 sd_init_cdb_limits(struct sd_lun *un)
11426 {
11427 	int hba_cdb_limit;
11428 
11429 	/*
11430 	 * Use CDB_GROUP1 commands for most devices except for
11431 	 * parallel SCSI fixed drives in which case we get better
11432 	 * performance using CDB_GROUP0 commands (where applicable).
11433 	 */
11434 	un->un_mincdb = SD_CDB_GROUP1;
11435 #if !defined(__fibre)
11436 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
11437 	    !un->un_f_has_removable_media) {
11438 		un->un_mincdb = SD_CDB_GROUP0;
11439 	}
11440 #endif
11441 
11442 	/*
11443 	 * Try to read the max-cdb-length supported by HBA.
11444 	 */
11445 	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
11446 	if (0 >= un->un_max_hba_cdb) {
11447 		un->un_max_hba_cdb = CDB_GROUP4;
11448 		hba_cdb_limit = SD_CDB_GROUP4;
11449 	} else if (0 < un->un_max_hba_cdb &&
11450 	    un->un_max_hba_cdb < CDB_GROUP1) {
11451 		hba_cdb_limit = SD_CDB_GROUP0;
11452 	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
11453 	    un->un_max_hba_cdb < CDB_GROUP5) {
11454 		hba_cdb_limit = SD_CDB_GROUP1;
11455 	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
11456 	    un->un_max_hba_cdb < CDB_GROUP4) {
11457 		hba_cdb_limit = SD_CDB_GROUP5;
11458 	} else {
11459 		hba_cdb_limit = SD_CDB_GROUP4;
11460 	}
11461 
11462 	/*
11463 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
11464 	 * commands for fixed disks unless we are building for a 32 bit
11465 	 * kernel.
11466 	 */
11467 #ifdef _LP64
11468 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
11469 	    min(hba_cdb_limit, SD_CDB_GROUP4);
11470 #else
11471 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
11472 	    min(hba_cdb_limit, SD_CDB_GROUP1);
11473 #endif
11474 
11475 	/*
11476 	 * x86 systems require the PKT_DMA_PARTIAL flag
11477 	 */
11478 #if defined(__x86)
11479 	un->un_pkt_flags = PKT_DMA_PARTIAL;
11480 #else
11481 	un->un_pkt_flags = 0;
11482 #endif
11483 
11484 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
11485 	    ? sizeof (struct scsi_arq_status) : 1);
11486 	un->un_cmd_timeout = (ushort_t)sd_io_time;
11487 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
11488 }
11489 
11490 
11491 /*
11492  *    Function: sd_initpkt_for_buf
11493  *
11494  * Description: Allocate and initialize for transport a scsi_pkt struct,
11495  *		based upon the info specified in the given buf struct.
11496  *
11497  *		Assumes the xb_blkno in the request is absolute (ie,
11498  *		relative to the start of the device (NOT partition!).
11499  *		Also assumes that the request is using the native block
11500  *		size of the device (as returned by the READ CAPACITY
11501  *		command).
11502  *
11503  * Return Code: SD_PKT_ALLOC_SUCCESS
11504  *		SD_PKT_ALLOC_FAILURE
11505  *		SD_PKT_ALLOC_FAILURE_NO_DMA
11506  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
11507  *
11508  *     Context: Kernel thread and may be called from software interrupt context
11509  *		as part of a sdrunout callback. This function may not block or
11510  *		call routines that block
11511  */
11512 
11513 static int
11514 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
11515 {
11516 	struct sd_xbuf	*xp;
11517 	struct scsi_pkt *pktp = NULL;
11518 	struct sd_lun	*un;
11519 	size_t		blockcount;
11520 	daddr_t		startblock;
11521 	int		rval;
11522 	int		cmd_flags;
11523 
11524 	ASSERT(bp != NULL);
11525 	ASSERT(pktpp != NULL);
11526 	xp = SD_GET_XBUF(bp);
11527 	ASSERT(xp != NULL);
11528 	un = SD_GET_UN(bp);
11529 	ASSERT(un != NULL);
11530 	ASSERT(mutex_owned(SD_MUTEX(un)));
11531 	ASSERT(bp->b_resid == 0);
11532 
11533 	SD_TRACE(SD_LOG_IO_CORE, un,
11534 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
11535 
11536 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
11537 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
11538 		/*
11539 		 * Already have a scsi_pkt -- just need DMA resources.
11540 		 * We must recompute the CDB in case the mapping returns
11541 		 * a nonzero pkt_resid.
11542 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
11543 		 * that is being retried, the unmap/remap of the DMA resouces
11544 		 * will result in the entire transfer starting over again
11545 		 * from the very first block.
11546 		 */
11547 		ASSERT(xp->xb_pktp != NULL);
11548 		pktp = xp->xb_pktp;
11549 	} else {
11550 		pktp = NULL;
11551 	}
11552 #endif /* __i386 || __amd64 */
11553 
11554 	startblock = xp->xb_blkno;	/* Absolute block num. */
11555 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
11556 
11557 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
11558 
11559 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
11560 
11561 #else
11562 
11563 	cmd_flags = un->un_pkt_flags | xp->xb_pkt_flags;
11564 
11565 #endif
11566 
11567 	/*
11568 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
11569 	 * call scsi_init_pkt, and build the CDB.
11570 	 */
11571 	rval = sd_setup_rw_pkt(un, &pktp, bp,
11572 	    cmd_flags, sdrunout, (caddr_t)un,
11573 	    startblock, blockcount);
11574 
11575 	if (rval == 0) {
11576 		/*
11577 		 * Success.
11578 		 *
11579 		 * If partial DMA is being used and required for this transfer.
11580 		 * set it up here.
11581 		 */
11582 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
11583 		    (pktp->pkt_resid != 0)) {
11584 
11585 			/*
11586 			 * Save the CDB length and pkt_resid for the
11587 			 * next xfer
11588 			 */
11589 			xp->xb_dma_resid = pktp->pkt_resid;
11590 
11591 			/* rezero resid */
11592 			pktp->pkt_resid = 0;
11593 
11594 		} else {
11595 			xp->xb_dma_resid = 0;
11596 		}
11597 
11598 		pktp->pkt_flags = un->un_tagflags;
11599 		pktp->pkt_time  = un->un_cmd_timeout;
11600 		pktp->pkt_comp  = sdintr;
11601 
11602 		pktp->pkt_private = bp;
11603 		*pktpp = pktp;
11604 
11605 		SD_TRACE(SD_LOG_IO_CORE, un,
11606 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
11607 
11608 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
11609 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
11610 #endif
11611 
11612 		return (SD_PKT_ALLOC_SUCCESS);
11613 
11614 	}
11615 
11616 	/*
11617 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
11618 	 * from sd_setup_rw_pkt.
11619 	 */
11620 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
11621 
11622 	if (rval == SD_PKT_ALLOC_FAILURE) {
11623 		*pktpp = NULL;
11624 		/*
11625 		 * Set the driver state to RWAIT to indicate the driver
11626 		 * is waiting on resource allocations. The driver will not
11627 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
11628 		 */
11629 		New_state(un, SD_STATE_RWAIT);
11630 
11631 		SD_ERROR(SD_LOG_IO_CORE, un,
11632 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
11633 
11634 		if ((bp->b_flags & B_ERROR) != 0) {
11635 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
11636 		}
11637 		return (SD_PKT_ALLOC_FAILURE);
11638 	} else {
11639 		/*
11640 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
11641 		 *
11642 		 * This should never happen.  Maybe someone messed with the
11643 		 * kernel's minphys?
11644 		 */
11645 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
11646 		    "Request rejected: too large for CDB: "
11647 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
11648 		SD_ERROR(SD_LOG_IO_CORE, un,
11649 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
11650 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11651 
11652 	}
11653 }
11654 
11655 
11656 /*
11657  *    Function: sd_destroypkt_for_buf
11658  *
11659  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
11660  *
11661  *     Context: Kernel thread or interrupt context
11662  */
11663 
11664 static void
11665 sd_destroypkt_for_buf(struct buf *bp)
11666 {
11667 	ASSERT(bp != NULL);
11668 	ASSERT(SD_GET_UN(bp) != NULL);
11669 
11670 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
11671 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
11672 
11673 	ASSERT(SD_GET_PKTP(bp) != NULL);
11674 	scsi_destroy_pkt(SD_GET_PKTP(bp));
11675 
11676 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
11677 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
11678 }
11679 
11680 /*
11681  *    Function: sd_setup_rw_pkt
11682  *
11683  * Description: Determines appropriate CDB group for the requested LBA
11684  *		and transfer length, calls scsi_init_pkt, and builds
11685  *		the CDB.  Do not use for partial DMA transfers except
11686  *		for the initial transfer since the CDB size must
11687  *		remain constant.
11688  *
11689  *     Context: Kernel thread and may be called from software interrupt
11690  *		context as part of a sdrunout callback. This function may not
11691  *		block or call routines that block
11692  */
11693 
11694 
11695 int
11696 sd_setup_rw_pkt(struct sd_lun *un,
11697     struct scsi_pkt **pktpp, struct buf *bp, int flags,
11698     int (*callback)(caddr_t), caddr_t callback_arg,
11699     diskaddr_t lba, uint32_t blockcount)
11700 {
11701 	struct scsi_pkt *return_pktp;
11702 	union scsi_cdb *cdbp;
11703 	struct sd_cdbinfo *cp = NULL;
11704 	int i;
11705 
11706 	/*
11707 	 * See which size CDB to use, based upon the request.
11708 	 */
11709 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
11710 
11711 		/*
11712 		 * Check lba and block count against sd_cdbtab limits.
11713 		 * In the partial DMA case, we have to use the same size
11714 		 * CDB for all the transfers.  Check lba + blockcount
11715 		 * against the max LBA so we know that segment of the
11716 		 * transfer can use the CDB we select.
11717 		 */
11718 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
11719 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
11720 
11721 			/*
11722 			 * The command will fit into the CDB type
11723 			 * specified by sd_cdbtab[i].
11724 			 */
11725 			cp = sd_cdbtab + i;
11726 
11727 			/*
11728 			 * Call scsi_init_pkt so we can fill in the
11729 			 * CDB.
11730 			 */
11731 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
11732 			    bp, cp->sc_grpcode, un->un_status_len, 0,
11733 			    flags, callback, callback_arg);
11734 
11735 			if (return_pktp != NULL) {
11736 
11737 				/*
11738 				 * Return new value of pkt
11739 				 */
11740 				*pktpp = return_pktp;
11741 
11742 				/*
11743 				 * To be safe, zero the CDB insuring there is
11744 				 * no leftover data from a previous command.
11745 				 */
11746 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
11747 
11748 				/*
11749 				 * Handle partial DMA mapping
11750 				 */
11751 				if (return_pktp->pkt_resid != 0) {
11752 
11753 					/*
11754 					 * Not going to xfer as many blocks as
11755 					 * originally expected
11756 					 */
11757 					blockcount -=
11758 					    SD_BYTES2TGTBLOCKS(un,
11759 						return_pktp->pkt_resid);
11760 				}
11761 
11762 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
11763 
11764 				/*
11765 				 * Set command byte based on the CDB
11766 				 * type we matched.
11767 				 */
11768 				cdbp->scc_cmd = cp->sc_grpmask |
11769 				    ((bp->b_flags & B_READ) ?
11770 					SCMD_READ : SCMD_WRITE);
11771 
11772 				SD_FILL_SCSI1_LUN(un, return_pktp);
11773 
11774 				/*
11775 				 * Fill in LBA and length
11776 				 */
11777 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
11778 				    (cp->sc_grpcode == CDB_GROUP4) ||
11779 				    (cp->sc_grpcode == CDB_GROUP0) ||
11780 				    (cp->sc_grpcode == CDB_GROUP5));
11781 
11782 				if (cp->sc_grpcode == CDB_GROUP1) {
11783 					FORMG1ADDR(cdbp, lba);
11784 					FORMG1COUNT(cdbp, blockcount);
11785 					return (0);
11786 				} else if (cp->sc_grpcode == CDB_GROUP4) {
11787 					FORMG4LONGADDR(cdbp, lba);
11788 					FORMG4COUNT(cdbp, blockcount);
11789 					return (0);
11790 				} else if (cp->sc_grpcode == CDB_GROUP0) {
11791 					FORMG0ADDR(cdbp, lba);
11792 					FORMG0COUNT(cdbp, blockcount);
11793 					return (0);
11794 				} else if (cp->sc_grpcode == CDB_GROUP5) {
11795 					FORMG5ADDR(cdbp, lba);
11796 					FORMG5COUNT(cdbp, blockcount);
11797 					return (0);
11798 				}
11799 
11800 				/*
11801 				 * It should be impossible to not match one
11802 				 * of the CDB types above, so we should never
11803 				 * reach this point.  Set the CDB command byte
11804 				 * to test-unit-ready to avoid writing
11805 				 * to somewhere we don't intend.
11806 				 */
11807 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
11808 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11809 			} else {
11810 				/*
11811 				 * Couldn't get scsi_pkt
11812 				 */
11813 				return (SD_PKT_ALLOC_FAILURE);
11814 			}
11815 		}
11816 	}
11817 
11818 	/*
11819 	 * None of the available CDB types were suitable.  This really
11820 	 * should never happen:  on a 64 bit system we support
11821 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
11822 	 * and on a 32 bit system we will refuse to bind to a device
11823 	 * larger than 2TB so addresses will never be larger than 32 bits.
11824 	 */
11825 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11826 }
11827 
11828 #if defined(__i386) || defined(__amd64)
11829 /*
11830  *    Function: sd_setup_next_rw_pkt
11831  *
11832  * Description: Setup packet for partial DMA transfers, except for the
11833  * 		initial transfer.  sd_setup_rw_pkt should be used for
11834  *		the initial transfer.
11835  *
11836  *     Context: Kernel thread and may be called from interrupt context.
11837  */
11838 
11839 int
11840 sd_setup_next_rw_pkt(struct sd_lun *un,
11841     struct scsi_pkt *pktp, struct buf *bp,
11842     diskaddr_t lba, uint32_t blockcount)
11843 {
11844 	uchar_t com;
11845 	union scsi_cdb *cdbp;
11846 	uchar_t cdb_group_id;
11847 
11848 	ASSERT(pktp != NULL);
11849 	ASSERT(pktp->pkt_cdbp != NULL);
11850 
11851 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
11852 	com = cdbp->scc_cmd;
11853 	cdb_group_id = CDB_GROUPID(com);
11854 
11855 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
11856 	    (cdb_group_id == CDB_GROUPID_1) ||
11857 	    (cdb_group_id == CDB_GROUPID_4) ||
11858 	    (cdb_group_id == CDB_GROUPID_5));
11859 
11860 	/*
11861 	 * Move pkt to the next portion of the xfer.
11862 	 * func is NULL_FUNC so we do not have to release
11863 	 * the disk mutex here.
11864 	 */
11865 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
11866 	    NULL_FUNC, NULL) == pktp) {
11867 		/* Success.  Handle partial DMA */
11868 		if (pktp->pkt_resid != 0) {
11869 			blockcount -=
11870 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
11871 		}
11872 
11873 		cdbp->scc_cmd = com;
11874 		SD_FILL_SCSI1_LUN(un, pktp);
11875 		if (cdb_group_id == CDB_GROUPID_1) {
11876 			FORMG1ADDR(cdbp, lba);
11877 			FORMG1COUNT(cdbp, blockcount);
11878 			return (0);
11879 		} else if (cdb_group_id == CDB_GROUPID_4) {
11880 			FORMG4LONGADDR(cdbp, lba);
11881 			FORMG4COUNT(cdbp, blockcount);
11882 			return (0);
11883 		} else if (cdb_group_id == CDB_GROUPID_0) {
11884 			FORMG0ADDR(cdbp, lba);
11885 			FORMG0COUNT(cdbp, blockcount);
11886 			return (0);
11887 		} else if (cdb_group_id == CDB_GROUPID_5) {
11888 			FORMG5ADDR(cdbp, lba);
11889 			FORMG5COUNT(cdbp, blockcount);
11890 			return (0);
11891 		}
11892 
11893 		/* Unreachable */
11894 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11895 	}
11896 
11897 	/*
11898 	 * Error setting up next portion of cmd transfer.
11899 	 * Something is definitely very wrong and this
11900 	 * should not happen.
11901 	 */
11902 	return (SD_PKT_ALLOC_FAILURE);
11903 }
11904 #endif /* defined(__i386) || defined(__amd64) */
11905 
11906 /*
11907  *    Function: sd_initpkt_for_uscsi
11908  *
11909  * Description: Allocate and initialize for transport a scsi_pkt struct,
11910  *		based upon the info specified in the given uscsi_cmd struct.
11911  *
11912  * Return Code: SD_PKT_ALLOC_SUCCESS
11913  *		SD_PKT_ALLOC_FAILURE
11914  *		SD_PKT_ALLOC_FAILURE_NO_DMA
11915  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
11916  *
11917  *     Context: Kernel thread and may be called from software interrupt context
11918  *		as part of a sdrunout callback. This function may not block or
11919  *		call routines that block
11920  */
11921 
11922 static int
11923 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
11924 {
11925 	struct uscsi_cmd *uscmd;
11926 	struct sd_xbuf	*xp;
11927 	struct scsi_pkt	*pktp;
11928 	struct sd_lun	*un;
11929 	uint32_t	flags = 0;
11930 
11931 	ASSERT(bp != NULL);
11932 	ASSERT(pktpp != NULL);
11933 	xp = SD_GET_XBUF(bp);
11934 	ASSERT(xp != NULL);
11935 	un = SD_GET_UN(bp);
11936 	ASSERT(un != NULL);
11937 	ASSERT(mutex_owned(SD_MUTEX(un)));
11938 
11939 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
11940 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
11941 	ASSERT(uscmd != NULL);
11942 
11943 	SD_TRACE(SD_LOG_IO_CORE, un,
11944 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
11945 
11946 	/*
11947 	 * Allocate the scsi_pkt for the command.
11948 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
11949 	 *	 during scsi_init_pkt time and will continue to use the
11950 	 *	 same path as long as the same scsi_pkt is used without
11951 	 *	 intervening scsi_dma_free(). Since uscsi command does
11952 	 *	 not call scsi_dmafree() before retry failed command, it
11953 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
11954 	 *	 set such that scsi_vhci can use other available path for
11955 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
11956 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
11957 	 */
11958 	pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
11959 	    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
11960 	    sizeof (struct scsi_arq_status), 0,
11961 	    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
11962 	    sdrunout, (caddr_t)un);
11963 
11964 	if (pktp == NULL) {
11965 		*pktpp = NULL;
11966 		/*
11967 		 * Set the driver state to RWAIT to indicate the driver
11968 		 * is waiting on resource allocations. The driver will not
11969 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
11970 		 */
11971 		New_state(un, SD_STATE_RWAIT);
11972 
11973 		SD_ERROR(SD_LOG_IO_CORE, un,
11974 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
11975 
11976 		if ((bp->b_flags & B_ERROR) != 0) {
11977 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
11978 		}
11979 		return (SD_PKT_ALLOC_FAILURE);
11980 	}
11981 
11982 	/*
11983 	 * We do not do DMA breakup for USCSI commands, so return failure
11984 	 * here if all the needed DMA resources were not allocated.
11985 	 */
11986 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
11987 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
11988 		scsi_destroy_pkt(pktp);
11989 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
11990 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
11991 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
11992 	}
11993 
11994 	/* Init the cdb from the given uscsi struct */
11995 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
11996 	    uscmd->uscsi_cdb[0], 0, 0, 0);
11997 
11998 	SD_FILL_SCSI1_LUN(un, pktp);
11999 
12000 	/*
12001 	 * Set up the optional USCSI flags. See the uscsi (7I) man page
12002 	 * for listing of the supported flags.
12003 	 */
12004 
12005 	if (uscmd->uscsi_flags & USCSI_SILENT) {
12006 		flags |= FLAG_SILENT;
12007 	}
12008 
12009 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
12010 		flags |= FLAG_DIAGNOSE;
12011 	}
12012 
12013 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
12014 		flags |= FLAG_ISOLATE;
12015 	}
12016 
12017 	if (un->un_f_is_fibre == FALSE) {
12018 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
12019 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
12020 		}
12021 	}
12022 
12023 	/*
12024 	 * Set the pkt flags here so we save time later.
12025 	 * Note: These flags are NOT in the uscsi man page!!!
12026 	 */
12027 	if (uscmd->uscsi_flags & USCSI_HEAD) {
12028 		flags |= FLAG_HEAD;
12029 	}
12030 
12031 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
12032 		flags |= FLAG_NOINTR;
12033 	}
12034 
12035 	/*
12036 	 * For tagged queueing, things get a bit complicated.
12037 	 * Check first for head of queue and last for ordered queue.
12038 	 * If neither head nor order, use the default driver tag flags.
12039 	 */
12040 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
12041 		if (uscmd->uscsi_flags & USCSI_HTAG) {
12042 			flags |= FLAG_HTAG;
12043 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
12044 			flags |= FLAG_OTAG;
12045 		} else {
12046 			flags |= un->un_tagflags & FLAG_TAGMASK;
12047 		}
12048 	}
12049 
12050 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
12051 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
12052 	}
12053 
12054 	pktp->pkt_flags = flags;
12055 
12056 	/* Copy the caller's CDB into the pkt... */
12057 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
12058 
12059 	if (uscmd->uscsi_timeout == 0) {
12060 		pktp->pkt_time = un->un_uscsi_timeout;
12061 	} else {
12062 		pktp->pkt_time = uscmd->uscsi_timeout;
12063 	}
12064 
12065 	/* need it later to identify USCSI request in sdintr */
12066 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
12067 
12068 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
12069 
12070 	pktp->pkt_private = bp;
12071 	pktp->pkt_comp = sdintr;
12072 	*pktpp = pktp;
12073 
12074 	SD_TRACE(SD_LOG_IO_CORE, un,
12075 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
12076 
12077 	return (SD_PKT_ALLOC_SUCCESS);
12078 }
12079 
12080 
12081 /*
12082  *    Function: sd_destroypkt_for_uscsi
12083  *
12084  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
12085  *		IOs.. Also saves relevant info into the associated uscsi_cmd
12086  *		struct.
12087  *
12088  *     Context: May be called under interrupt context
12089  */
12090 
12091 static void
12092 sd_destroypkt_for_uscsi(struct buf *bp)
12093 {
12094 	struct uscsi_cmd *uscmd;
12095 	struct sd_xbuf	*xp;
12096 	struct scsi_pkt	*pktp;
12097 	struct sd_lun	*un;
12098 
12099 	ASSERT(bp != NULL);
12100 	xp = SD_GET_XBUF(bp);
12101 	ASSERT(xp != NULL);
12102 	un = SD_GET_UN(bp);
12103 	ASSERT(un != NULL);
12104 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12105 	pktp = SD_GET_PKTP(bp);
12106 	ASSERT(pktp != NULL);
12107 
12108 	SD_TRACE(SD_LOG_IO_CORE, un,
12109 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
12110 
12111 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
12112 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
12113 	ASSERT(uscmd != NULL);
12114 
12115 	/* Save the status and the residual into the uscsi_cmd struct */
12116 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
12117 	uscmd->uscsi_resid  = bp->b_resid;
12118 
12119 	/*
12120 	 * If enabled, copy any saved sense data into the area specified
12121 	 * by the uscsi command.
12122 	 */
12123 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
12124 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
12125 		/*
12126 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
12127 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
12128 		 */
12129 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
12130 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
12131 		bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf, SENSE_LENGTH);
12132 	}
12133 
12134 	/* We are done with the scsi_pkt; free it now */
12135 	ASSERT(SD_GET_PKTP(bp) != NULL);
12136 	scsi_destroy_pkt(SD_GET_PKTP(bp));
12137 
12138 	SD_TRACE(SD_LOG_IO_CORE, un,
12139 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
12140 }
12141 
12142 
12143 /*
12144  *    Function: sd_bioclone_alloc
12145  *
12146  * Description: Allocate a buf(9S) and init it as per the given buf
12147  *		and the various arguments.  The associated sd_xbuf
12148  *		struct is (nearly) duplicated.  The struct buf *bp
12149  *		argument is saved in new_xp->xb_private.
12150  *
12151  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
12152  *		datalen - size of data area for the shadow bp
12153  *		blkno - starting LBA
12154  *		func - function pointer for b_iodone in the shadow buf. (May
12155  *			be NULL if none.)
12156  *
12157  * Return Code: Pointer to allocates buf(9S) struct
12158  *
12159  *     Context: Can sleep.
12160  */
12161 
12162 static struct buf *
12163 sd_bioclone_alloc(struct buf *bp, size_t datalen,
12164 	daddr_t blkno, int (*func)(struct buf *))
12165 {
12166 	struct	sd_lun	*un;
12167 	struct	sd_xbuf	*xp;
12168 	struct	sd_xbuf	*new_xp;
12169 	struct	buf	*new_bp;
12170 
12171 	ASSERT(bp != NULL);
12172 	xp = SD_GET_XBUF(bp);
12173 	ASSERT(xp != NULL);
12174 	un = SD_GET_UN(bp);
12175 	ASSERT(un != NULL);
12176 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12177 
12178 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
12179 	    NULL, KM_SLEEP);
12180 
12181 	new_bp->b_lblkno	= blkno;
12182 
12183 	/*
12184 	 * Allocate an xbuf for the shadow bp and copy the contents of the
12185 	 * original xbuf into it.
12186 	 */
12187 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
12188 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
12189 
12190 	/*
12191 	 * The given bp is automatically saved in the xb_private member
12192 	 * of the new xbuf.  Callers are allowed to depend on this.
12193 	 */
12194 	new_xp->xb_private = bp;
12195 
12196 	new_bp->b_private  = new_xp;
12197 
12198 	return (new_bp);
12199 }
12200 
12201 /*
12202  *    Function: sd_shadow_buf_alloc
12203  *
12204  * Description: Allocate a buf(9S) and init it as per the given buf
12205  *		and the various arguments.  The associated sd_xbuf
12206  *		struct is (nearly) duplicated.  The struct buf *bp
12207  *		argument is saved in new_xp->xb_private.
12208  *
12209  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
12210  *		datalen - size of data area for the shadow bp
12211  *		bflags - B_READ or B_WRITE (pseudo flag)
12212  *		blkno - starting LBA
12213  *		func - function pointer for b_iodone in the shadow buf. (May
12214  *			be NULL if none.)
12215  *
12216  * Return Code: Pointer to allocates buf(9S) struct
12217  *
12218  *     Context: Can sleep.
12219  */
12220 
12221 static struct buf *
12222 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
12223 	daddr_t blkno, int (*func)(struct buf *))
12224 {
12225 	struct	sd_lun	*un;
12226 	struct	sd_xbuf	*xp;
12227 	struct	sd_xbuf	*new_xp;
12228 	struct	buf	*new_bp;
12229 
12230 	ASSERT(bp != NULL);
12231 	xp = SD_GET_XBUF(bp);
12232 	ASSERT(xp != NULL);
12233 	un = SD_GET_UN(bp);
12234 	ASSERT(un != NULL);
12235 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12236 
12237 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
12238 		bp_mapin(bp);
12239 	}
12240 
12241 	bflags &= (B_READ | B_WRITE);
12242 #if defined(__i386) || defined(__amd64)
12243 	new_bp = getrbuf(KM_SLEEP);
12244 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
12245 	new_bp->b_bcount = datalen;
12246 	new_bp->b_flags = bflags |
12247 	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
12248 #else
12249 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
12250 	    datalen, bflags, SLEEP_FUNC, NULL);
12251 #endif
12252 	new_bp->av_forw	= NULL;
12253 	new_bp->av_back	= NULL;
12254 	new_bp->b_dev	= bp->b_dev;
12255 	new_bp->b_blkno	= blkno;
12256 	new_bp->b_iodone = func;
12257 	new_bp->b_edev	= bp->b_edev;
12258 	new_bp->b_resid	= 0;
12259 
12260 	/* We need to preserve the B_FAILFAST flag */
12261 	if (bp->b_flags & B_FAILFAST) {
12262 		new_bp->b_flags |= B_FAILFAST;
12263 	}
12264 
12265 	/*
12266 	 * Allocate an xbuf for the shadow bp and copy the contents of the
12267 	 * original xbuf into it.
12268 	 */
12269 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
12270 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
12271 
12272 	/* Need later to copy data between the shadow buf & original buf! */
12273 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
12274 
12275 	/*
12276 	 * The given bp is automatically saved in the xb_private member
12277 	 * of the new xbuf.  Callers are allowed to depend on this.
12278 	 */
12279 	new_xp->xb_private = bp;
12280 
12281 	new_bp->b_private  = new_xp;
12282 
12283 	return (new_bp);
12284 }
12285 
12286 /*
12287  *    Function: sd_bioclone_free
12288  *
12289  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
12290  *		in the larger than partition operation.
12291  *
12292  *     Context: May be called under interrupt context
12293  */
12294 
12295 static void
12296 sd_bioclone_free(struct buf *bp)
12297 {
12298 	struct sd_xbuf	*xp;
12299 
12300 	ASSERT(bp != NULL);
12301 	xp = SD_GET_XBUF(bp);
12302 	ASSERT(xp != NULL);
12303 
12304 	/*
12305 	 * Call bp_mapout() before freeing the buf,  in case a lower
12306 	 * layer or HBA  had done a bp_mapin().  we must do this here
12307 	 * as we are the "originator" of the shadow buf.
12308 	 */
12309 	bp_mapout(bp);
12310 
12311 	/*
12312 	 * Null out b_iodone before freeing the bp, to ensure that the driver
12313 	 * never gets confused by a stale value in this field. (Just a little
12314 	 * extra defensiveness here.)
12315 	 */
12316 	bp->b_iodone = NULL;
12317 
12318 	freerbuf(bp);
12319 
12320 	kmem_free(xp, sizeof (struct sd_xbuf));
12321 }
12322 
12323 /*
12324  *    Function: sd_shadow_buf_free
12325  *
12326  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
12327  *
12328  *     Context: May be called under interrupt context
12329  */
12330 
12331 static void
12332 sd_shadow_buf_free(struct buf *bp)
12333 {
12334 	struct sd_xbuf	*xp;
12335 
12336 	ASSERT(bp != NULL);
12337 	xp = SD_GET_XBUF(bp);
12338 	ASSERT(xp != NULL);
12339 
12340 #if defined(__sparc)
12341 	/*
12342 	 * Call bp_mapout() before freeing the buf,  in case a lower
12343 	 * layer or HBA  had done a bp_mapin().  we must do this here
12344 	 * as we are the "originator" of the shadow buf.
12345 	 */
12346 	bp_mapout(bp);
12347 #endif
12348 
12349 	/*
12350 	 * Null out b_iodone before freeing the bp, to ensure that the driver
12351 	 * never gets confused by a stale value in this field. (Just a little
12352 	 * extra defensiveness here.)
12353 	 */
12354 	bp->b_iodone = NULL;
12355 
12356 #if defined(__i386) || defined(__amd64)
12357 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
12358 	freerbuf(bp);
12359 #else
12360 	scsi_free_consistent_buf(bp);
12361 #endif
12362 
12363 	kmem_free(xp, sizeof (struct sd_xbuf));
12364 }
12365 
12366 
12367 /*
12368  *    Function: sd_print_transport_rejected_message
12369  *
12370  * Description: This implements the ludicrously complex rules for printing
12371  *		a "transport rejected" message.  This is to address the
12372  *		specific problem of having a flood of this error message
12373  *		produced when a failover occurs.
12374  *
12375  *     Context: Any.
12376  */
12377 
12378 static void
12379 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
12380 	int code)
12381 {
12382 	ASSERT(un != NULL);
12383 	ASSERT(mutex_owned(SD_MUTEX(un)));
12384 	ASSERT(xp != NULL);
12385 
12386 	/*
12387 	 * Print the "transport rejected" message under the following
12388 	 * conditions:
12389 	 *
12390 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
12391 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
12392 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
12393 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
12394 	 *   scsi_transport(9F) (which indicates that the target might have
12395 	 *   gone off-line).  This uses the un->un_tran_fatal_count
12396 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
12397 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
12398 	 *   from scsi_transport().
12399 	 *
12400 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
12401 	 * the preceeding cases in order for the message to be printed.
12402 	 */
12403 	if ((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) {
12404 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
12405 		    (code != TRAN_FATAL_ERROR) ||
12406 		    (un->un_tran_fatal_count == 1)) {
12407 			switch (code) {
12408 			case TRAN_BADPKT:
12409 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12410 				    "transport rejected bad packet\n");
12411 				break;
12412 			case TRAN_FATAL_ERROR:
12413 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12414 				    "transport rejected fatal error\n");
12415 				break;
12416 			default:
12417 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12418 				    "transport rejected (%d)\n", code);
12419 				break;
12420 			}
12421 		}
12422 	}
12423 }
12424 
12425 
12426 /*
12427  *    Function: sd_add_buf_to_waitq
12428  *
12429  * Description: Add the given buf(9S) struct to the wait queue for the
12430  *		instance.  If sorting is enabled, then the buf is added
12431  *		to the queue via an elevator sort algorithm (a la
12432  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
12433  *		If sorting is not enabled, then the buf is just added
12434  *		to the end of the wait queue.
12435  *
12436  * Return Code: void
12437  *
12438  *     Context: Does not sleep/block, therefore technically can be called
12439  *		from any context.  However if sorting is enabled then the
12440  *		execution time is indeterminate, and may take long if
12441  *		the wait queue grows large.
12442  */
12443 
12444 static void
12445 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
12446 {
12447 	struct buf *ap;
12448 
12449 	ASSERT(bp != NULL);
12450 	ASSERT(un != NULL);
12451 	ASSERT(mutex_owned(SD_MUTEX(un)));
12452 
12453 	/* If the queue is empty, add the buf as the only entry & return. */
12454 	if (un->un_waitq_headp == NULL) {
12455 		ASSERT(un->un_waitq_tailp == NULL);
12456 		un->un_waitq_headp = un->un_waitq_tailp = bp;
12457 		bp->av_forw = NULL;
12458 		return;
12459 	}
12460 
12461 	ASSERT(un->un_waitq_tailp != NULL);
12462 
12463 	/*
12464 	 * If sorting is disabled, just add the buf to the tail end of
12465 	 * the wait queue and return.
12466 	 */
12467 	if (un->un_f_disksort_disabled) {
12468 		un->un_waitq_tailp->av_forw = bp;
12469 		un->un_waitq_tailp = bp;
12470 		bp->av_forw = NULL;
12471 		return;
12472 	}
12473 
12474 	/*
12475 	 * Sort thru the list of requests currently on the wait queue
12476 	 * and add the new buf request at the appropriate position.
12477 	 *
12478 	 * The un->un_waitq_headp is an activity chain pointer on which
12479 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
12480 	 * first queue holds those requests which are positioned after
12481 	 * the current SD_GET_BLKNO() (in the first request); the second holds
12482 	 * requests which came in after their SD_GET_BLKNO() number was passed.
12483 	 * Thus we implement a one way scan, retracting after reaching
12484 	 * the end of the drive to the first request on the second
12485 	 * queue, at which time it becomes the first queue.
12486 	 * A one-way scan is natural because of the way UNIX read-ahead
12487 	 * blocks are allocated.
12488 	 *
12489 	 * If we lie after the first request, then we must locate the
12490 	 * second request list and add ourselves to it.
12491 	 */
12492 	ap = un->un_waitq_headp;
12493 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
12494 		while (ap->av_forw != NULL) {
12495 			/*
12496 			 * Look for an "inversion" in the (normally
12497 			 * ascending) block numbers. This indicates
12498 			 * the start of the second request list.
12499 			 */
12500 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
12501 				/*
12502 				 * Search the second request list for the
12503 				 * first request at a larger block number.
12504 				 * We go before that; however if there is
12505 				 * no such request, we go at the end.
12506 				 */
12507 				do {
12508 					if (SD_GET_BLKNO(bp) <
12509 					    SD_GET_BLKNO(ap->av_forw)) {
12510 						goto insert;
12511 					}
12512 					ap = ap->av_forw;
12513 				} while (ap->av_forw != NULL);
12514 				goto insert;		/* after last */
12515 			}
12516 			ap = ap->av_forw;
12517 		}
12518 
12519 		/*
12520 		 * No inversions... we will go after the last, and
12521 		 * be the first request in the second request list.
12522 		 */
12523 		goto insert;
12524 	}
12525 
12526 	/*
12527 	 * Request is at/after the current request...
12528 	 * sort in the first request list.
12529 	 */
12530 	while (ap->av_forw != NULL) {
12531 		/*
12532 		 * We want to go after the current request (1) if
12533 		 * there is an inversion after it (i.e. it is the end
12534 		 * of the first request list), or (2) if the next
12535 		 * request is a larger block no. than our request.
12536 		 */
12537 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
12538 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
12539 			goto insert;
12540 		}
12541 		ap = ap->av_forw;
12542 	}
12543 
12544 	/*
12545 	 * Neither a second list nor a larger request, therefore
12546 	 * we go at the end of the first list (which is the same
12547 	 * as the end of the whole schebang).
12548 	 */
12549 insert:
12550 	bp->av_forw = ap->av_forw;
12551 	ap->av_forw = bp;
12552 
12553 	/*
12554 	 * If we inserted onto the tail end of the waitq, make sure the
12555 	 * tail pointer is updated.
12556 	 */
12557 	if (ap == un->un_waitq_tailp) {
12558 		un->un_waitq_tailp = bp;
12559 	}
12560 }
12561 
12562 
12563 /*
12564  *    Function: sd_start_cmds
12565  *
12566  * Description: Remove and transport cmds from the driver queues.
12567  *
12568  *   Arguments: un - pointer to the unit (soft state) struct for the target.
12569  *
12570  *		immed_bp - ptr to a buf to be transported immediately. Only
12571  *		the immed_bp is transported; bufs on the waitq are not
12572  *		processed and the un_retry_bp is not checked.  If immed_bp is
12573  *		NULL, then normal queue processing is performed.
12574  *
12575  *     Context: May be called from kernel thread context, interrupt context,
12576  *		or runout callback context. This function may not block or
12577  *		call routines that block.
12578  */
12579 
12580 static void
12581 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
12582 {
12583 	struct	sd_xbuf	*xp;
12584 	struct	buf	*bp;
12585 	void	(*statp)(kstat_io_t *);
12586 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12587 	void	(*saved_statp)(kstat_io_t *);
12588 #endif
12589 	int	rval;
12590 
12591 	ASSERT(un != NULL);
12592 	ASSERT(mutex_owned(SD_MUTEX(un)));
12593 	ASSERT(un->un_ncmds_in_transport >= 0);
12594 	ASSERT(un->un_throttle >= 0);
12595 
12596 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
12597 
12598 	do {
12599 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12600 		saved_statp = NULL;
12601 #endif
12602 
12603 		/*
12604 		 * If we are syncing or dumping, fail the command to
12605 		 * avoid recursively calling back into scsi_transport().
12606 		 * The dump I/O itself uses a separate code path so this
12607 		 * only prevents non-dump I/O from being sent while dumping.
12608 		 * File system sync takes place before dumping begins.
12609 		 * During panic, filesystem I/O is allowed provided
12610 		 * un_in_callback is <= 1.  This is to prevent recursion
12611 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
12612 		 * sd_start_cmds and so on.  See panic.c for more information
12613 		 * about the states the system can be in during panic.
12614 		 */
12615 		if ((un->un_state == SD_STATE_DUMPING) ||
12616 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
12617 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12618 			    "sd_start_cmds: panicking\n");
12619 			goto exit;
12620 		}
12621 
12622 		if ((bp = immed_bp) != NULL) {
12623 			/*
12624 			 * We have a bp that must be transported immediately.
12625 			 * It's OK to transport the immed_bp here without doing
12626 			 * the throttle limit check because the immed_bp is
12627 			 * always used in a retry/recovery case. This means
12628 			 * that we know we are not at the throttle limit by
12629 			 * virtue of the fact that to get here we must have
12630 			 * already gotten a command back via sdintr(). This also
12631 			 * relies on (1) the command on un_retry_bp preventing
12632 			 * further commands from the waitq from being issued;
12633 			 * and (2) the code in sd_retry_command checking the
12634 			 * throttle limit before issuing a delayed or immediate
12635 			 * retry. This holds even if the throttle limit is
12636 			 * currently ratcheted down from its maximum value.
12637 			 */
12638 			statp = kstat_runq_enter;
12639 			if (bp == un->un_retry_bp) {
12640 				ASSERT((un->un_retry_statp == NULL) ||
12641 				    (un->un_retry_statp == kstat_waitq_enter) ||
12642 				    (un->un_retry_statp ==
12643 				    kstat_runq_back_to_waitq));
12644 				/*
12645 				 * If the waitq kstat was incremented when
12646 				 * sd_set_retry_bp() queued this bp for a retry,
12647 				 * then we must set up statp so that the waitq
12648 				 * count will get decremented correctly below.
12649 				 * Also we must clear un->un_retry_statp to
12650 				 * ensure that we do not act on a stale value
12651 				 * in this field.
12652 				 */
12653 				if ((un->un_retry_statp == kstat_waitq_enter) ||
12654 				    (un->un_retry_statp ==
12655 				    kstat_runq_back_to_waitq)) {
12656 					statp = kstat_waitq_to_runq;
12657 				}
12658 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12659 				saved_statp = un->un_retry_statp;
12660 #endif
12661 				un->un_retry_statp = NULL;
12662 
12663 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
12664 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
12665 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
12666 				    un, un->un_retry_bp, un->un_throttle,
12667 				    un->un_ncmds_in_transport);
12668 			} else {
12669 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
12670 				    "processing priority bp:0x%p\n", bp);
12671 			}
12672 
12673 		} else if ((bp = un->un_waitq_headp) != NULL) {
12674 			/*
12675 			 * A command on the waitq is ready to go, but do not
12676 			 * send it if:
12677 			 *
12678 			 * (1) the throttle limit has been reached, or
12679 			 * (2) a retry is pending, or
12680 			 * (3) a START_STOP_UNIT callback pending, or
12681 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
12682 			 *	command is pending.
12683 			 *
12684 			 * For all of these conditions, IO processing will
12685 			 * restart after the condition is cleared.
12686 			 */
12687 			if (un->un_ncmds_in_transport >= un->un_throttle) {
12688 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12689 				    "sd_start_cmds: exiting, "
12690 				    "throttle limit reached!\n");
12691 				goto exit;
12692 			}
12693 			if (un->un_retry_bp != NULL) {
12694 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12695 				    "sd_start_cmds: exiting, retry pending!\n");
12696 				goto exit;
12697 			}
12698 			if (un->un_startstop_timeid != NULL) {
12699 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12700 				    "sd_start_cmds: exiting, "
12701 				    "START_STOP pending!\n");
12702 				goto exit;
12703 			}
12704 			if (un->un_direct_priority_timeid != NULL) {
12705 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12706 				    "sd_start_cmds: exiting, "
12707 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
12708 				goto exit;
12709 			}
12710 
12711 			/* Dequeue the command */
12712 			un->un_waitq_headp = bp->av_forw;
12713 			if (un->un_waitq_headp == NULL) {
12714 				un->un_waitq_tailp = NULL;
12715 			}
12716 			bp->av_forw = NULL;
12717 			statp = kstat_waitq_to_runq;
12718 			SD_TRACE(SD_LOG_IO_CORE, un,
12719 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
12720 
12721 		} else {
12722 			/* No work to do so bail out now */
12723 			SD_TRACE(SD_LOG_IO_CORE, un,
12724 			    "sd_start_cmds: no more work, exiting!\n");
12725 			goto exit;
12726 		}
12727 
12728 		/*
12729 		 * Reset the state to normal. This is the mechanism by which
12730 		 * the state transitions from either SD_STATE_RWAIT or
12731 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
12732 		 * If state is SD_STATE_PM_CHANGING then this command is
12733 		 * part of the device power control and the state must
12734 		 * not be put back to normal. Doing so would would
12735 		 * allow new commands to proceed when they shouldn't,
12736 		 * the device may be going off.
12737 		 */
12738 		if ((un->un_state != SD_STATE_SUSPENDED) &&
12739 		    (un->un_state != SD_STATE_PM_CHANGING)) {
12740 			New_state(un, SD_STATE_NORMAL);
12741 		    }
12742 
12743 		xp = SD_GET_XBUF(bp);
12744 		ASSERT(xp != NULL);
12745 
12746 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12747 		/*
12748 		 * Allocate the scsi_pkt if we need one, or attach DMA
12749 		 * resources if we have a scsi_pkt that needs them. The
12750 		 * latter should only occur for commands that are being
12751 		 * retried.
12752 		 */
12753 		if ((xp->xb_pktp == NULL) ||
12754 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
12755 #else
12756 		if (xp->xb_pktp == NULL) {
12757 #endif
12758 			/*
12759 			 * There is no scsi_pkt allocated for this buf. Call
12760 			 * the initpkt function to allocate & init one.
12761 			 *
12762 			 * The scsi_init_pkt runout callback functionality is
12763 			 * implemented as follows:
12764 			 *
12765 			 * 1) The initpkt function always calls
12766 			 *    scsi_init_pkt(9F) with sdrunout specified as the
12767 			 *    callback routine.
12768 			 * 2) A successful packet allocation is initialized and
12769 			 *    the I/O is transported.
12770 			 * 3) The I/O associated with an allocation resource
12771 			 *    failure is left on its queue to be retried via
12772 			 *    runout or the next I/O.
12773 			 * 4) The I/O associated with a DMA error is removed
12774 			 *    from the queue and failed with EIO. Processing of
12775 			 *    the transport queues is also halted to be
12776 			 *    restarted via runout or the next I/O.
12777 			 * 5) The I/O associated with a CDB size or packet
12778 			 *    size error is removed from the queue and failed
12779 			 *    with EIO. Processing of the transport queues is
12780 			 *    continued.
12781 			 *
12782 			 * Note: there is no interface for canceling a runout
12783 			 * callback. To prevent the driver from detaching or
12784 			 * suspending while a runout is pending the driver
12785 			 * state is set to SD_STATE_RWAIT
12786 			 *
12787 			 * Note: using the scsi_init_pkt callback facility can
12788 			 * result in an I/O request persisting at the head of
12789 			 * the list which cannot be satisfied even after
12790 			 * multiple retries. In the future the driver may
12791 			 * implement some kind of maximum runout count before
12792 			 * failing an I/O.
12793 			 *
12794 			 * Note: the use of funcp below may seem superfluous,
12795 			 * but it helps warlock figure out the correct
12796 			 * initpkt function calls (see [s]sd.wlcmd).
12797 			 */
12798 			struct scsi_pkt	*pktp;
12799 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
12800 
12801 			ASSERT(bp != un->un_rqs_bp);
12802 
12803 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
12804 			switch ((*funcp)(bp, &pktp)) {
12805 			case  SD_PKT_ALLOC_SUCCESS:
12806 				xp->xb_pktp = pktp;
12807 				SD_TRACE(SD_LOG_IO_CORE, un,
12808 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
12809 				    pktp);
12810 				goto got_pkt;
12811 
12812 			case SD_PKT_ALLOC_FAILURE:
12813 				/*
12814 				 * Temporary (hopefully) resource depletion.
12815 				 * Since retries and RQS commands always have a
12816 				 * scsi_pkt allocated, these cases should never
12817 				 * get here. So the only cases this needs to
12818 				 * handle is a bp from the waitq (which we put
12819 				 * back onto the waitq for sdrunout), or a bp
12820 				 * sent as an immed_bp (which we just fail).
12821 				 */
12822 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12823 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
12824 
12825 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12826 
12827 				if (bp == immed_bp) {
12828 					/*
12829 					 * If SD_XB_DMA_FREED is clear, then
12830 					 * this is a failure to allocate a
12831 					 * scsi_pkt, and we must fail the
12832 					 * command.
12833 					 */
12834 					if ((xp->xb_pkt_flags &
12835 					    SD_XB_DMA_FREED) == 0) {
12836 						break;
12837 					}
12838 
12839 					/*
12840 					 * If this immediate command is NOT our
12841 					 * un_retry_bp, then we must fail it.
12842 					 */
12843 					if (bp != un->un_retry_bp) {
12844 						break;
12845 					}
12846 
12847 					/*
12848 					 * We get here if this cmd is our
12849 					 * un_retry_bp that was DMAFREED, but
12850 					 * scsi_init_pkt() failed to reallocate
12851 					 * DMA resources when we attempted to
12852 					 * retry it. This can happen when an
12853 					 * mpxio failover is in progress, but
12854 					 * we don't want to just fail the
12855 					 * command in this case.
12856 					 *
12857 					 * Use timeout(9F) to restart it after
12858 					 * a 100ms delay.  We don't want to
12859 					 * let sdrunout() restart it, because
12860 					 * sdrunout() is just supposed to start
12861 					 * commands that are sitting on the
12862 					 * wait queue.  The un_retry_bp stays
12863 					 * set until the command completes, but
12864 					 * sdrunout can be called many times
12865 					 * before that happens.  Since sdrunout
12866 					 * cannot tell if the un_retry_bp is
12867 					 * already in the transport, it could
12868 					 * end up calling scsi_transport() for
12869 					 * the un_retry_bp multiple times.
12870 					 *
12871 					 * Also: don't schedule the callback
12872 					 * if some other callback is already
12873 					 * pending.
12874 					 */
12875 					if (un->un_retry_statp == NULL) {
12876 						/*
12877 						 * restore the kstat pointer to
12878 						 * keep kstat counts coherent
12879 						 * when we do retry the command.
12880 						 */
12881 						un->un_retry_statp =
12882 						    saved_statp;
12883 					}
12884 
12885 					if ((un->un_startstop_timeid == NULL) &&
12886 					    (un->un_retry_timeid == NULL) &&
12887 					    (un->un_direct_priority_timeid ==
12888 					    NULL)) {
12889 
12890 						un->un_retry_timeid =
12891 						    timeout(
12892 						    sd_start_retry_command,
12893 						    un, SD_RESTART_TIMEOUT);
12894 					}
12895 					goto exit;
12896 				}
12897 
12898 #else
12899 				if (bp == immed_bp) {
12900 					break;	/* Just fail the command */
12901 				}
12902 #endif
12903 
12904 				/* Add the buf back to the head of the waitq */
12905 				bp->av_forw = un->un_waitq_headp;
12906 				un->un_waitq_headp = bp;
12907 				if (un->un_waitq_tailp == NULL) {
12908 					un->un_waitq_tailp = bp;
12909 				}
12910 				goto exit;
12911 
12912 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
12913 				/*
12914 				 * HBA DMA resource failure. Fail the command
12915 				 * and continue processing of the queues.
12916 				 */
12917 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12918 				    "sd_start_cmds: "
12919 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
12920 				break;
12921 
12922 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
12923 				/*
12924 				 * Note:x86: Partial DMA mapping not supported
12925 				 * for USCSI commands, and all the needed DMA
12926 				 * resources were not allocated.
12927 				 */
12928 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12929 				    "sd_start_cmds: "
12930 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
12931 				break;
12932 
12933 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
12934 				/*
12935 				 * Note:x86: Request cannot fit into CDB based
12936 				 * on lba and len.
12937 				 */
12938 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12939 				    "sd_start_cmds: "
12940 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
12941 				break;
12942 
12943 			default:
12944 				/* Should NEVER get here! */
12945 				panic("scsi_initpkt error");
12946 				/*NOTREACHED*/
12947 			}
12948 
12949 			/*
12950 			 * Fatal error in allocating a scsi_pkt for this buf.
12951 			 * Update kstats & return the buf with an error code.
12952 			 * We must use sd_return_failed_command_no_restart() to
12953 			 * avoid a recursive call back into sd_start_cmds().
12954 			 * However this also means that we must keep processing
12955 			 * the waitq here in order to avoid stalling.
12956 			 */
12957 			if (statp == kstat_waitq_to_runq) {
12958 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
12959 			}
12960 			sd_return_failed_command_no_restart(un, bp, EIO);
12961 			if (bp == immed_bp) {
12962 				/* immed_bp is gone by now, so clear this */
12963 				immed_bp = NULL;
12964 			}
12965 			continue;
12966 		}
12967 got_pkt:
12968 		if (bp == immed_bp) {
12969 			/* goto the head of the class.... */
12970 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
12971 		}
12972 
12973 		un->un_ncmds_in_transport++;
12974 		SD_UPDATE_KSTATS(un, statp, bp);
12975 
12976 		/*
12977 		 * Call scsi_transport() to send the command to the target.
12978 		 * According to SCSA architecture, we must drop the mutex here
12979 		 * before calling scsi_transport() in order to avoid deadlock.
12980 		 * Note that the scsi_pkt's completion routine can be executed
12981 		 * (from interrupt context) even before the call to
12982 		 * scsi_transport() returns.
12983 		 */
12984 		SD_TRACE(SD_LOG_IO_CORE, un,
12985 		    "sd_start_cmds: calling scsi_transport()\n");
12986 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
12987 
12988 		mutex_exit(SD_MUTEX(un));
12989 		rval = scsi_transport(xp->xb_pktp);
12990 		mutex_enter(SD_MUTEX(un));
12991 
12992 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12993 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
12994 
12995 		switch (rval) {
12996 		case TRAN_ACCEPT:
12997 			/* Clear this with every pkt accepted by the HBA */
12998 			un->un_tran_fatal_count = 0;
12999 			break;	/* Success; try the next cmd (if any) */
13000 
13001 		case TRAN_BUSY:
13002 			un->un_ncmds_in_transport--;
13003 			ASSERT(un->un_ncmds_in_transport >= 0);
13004 
13005 			/*
13006 			 * Don't retry request sense, the sense data
13007 			 * is lost when another request is sent.
13008 			 * Free up the rqs buf and retry
13009 			 * the original failed cmd.  Update kstat.
13010 			 */
13011 			if (bp == un->un_rqs_bp) {
13012 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
13013 				bp = sd_mark_rqs_idle(un, xp);
13014 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
13015 					NULL, NULL, EIO, SD_BSY_TIMEOUT / 500,
13016 					kstat_waitq_enter);
13017 				goto exit;
13018 			}
13019 
13020 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13021 			/*
13022 			 * Free the DMA resources for the  scsi_pkt. This will
13023 			 * allow mpxio to select another path the next time
13024 			 * we call scsi_transport() with this scsi_pkt.
13025 			 * See sdintr() for the rationalization behind this.
13026 			 */
13027 			if ((un->un_f_is_fibre == TRUE) &&
13028 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
13029 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
13030 				scsi_dmafree(xp->xb_pktp);
13031 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
13032 			}
13033 #endif
13034 
13035 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
13036 				/*
13037 				 * Commands that are SD_PATH_DIRECT_PRIORITY
13038 				 * are for error recovery situations. These do
13039 				 * not use the normal command waitq, so if they
13040 				 * get a TRAN_BUSY we cannot put them back onto
13041 				 * the waitq for later retry. One possible
13042 				 * problem is that there could already be some
13043 				 * other command on un_retry_bp that is waiting
13044 				 * for this one to complete, so we would be
13045 				 * deadlocked if we put this command back onto
13046 				 * the waitq for later retry (since un_retry_bp
13047 				 * must complete before the driver gets back to
13048 				 * commands on the waitq).
13049 				 *
13050 				 * To avoid deadlock we must schedule a callback
13051 				 * that will restart this command after a set
13052 				 * interval.  This should keep retrying for as
13053 				 * long as the underlying transport keeps
13054 				 * returning TRAN_BUSY (just like for other
13055 				 * commands).  Use the same timeout interval as
13056 				 * for the ordinary TRAN_BUSY retry.
13057 				 */
13058 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13059 				    "sd_start_cmds: scsi_transport() returned "
13060 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
13061 
13062 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
13063 				un->un_direct_priority_timeid =
13064 				    timeout(sd_start_direct_priority_command,
13065 				    bp, SD_BSY_TIMEOUT / 500);
13066 
13067 				goto exit;
13068 			}
13069 
13070 			/*
13071 			 * For TRAN_BUSY, we want to reduce the throttle value,
13072 			 * unless we are retrying a command.
13073 			 */
13074 			if (bp != un->un_retry_bp) {
13075 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
13076 			}
13077 
13078 			/*
13079 			 * Set up the bp to be tried again 10 ms later.
13080 			 * Note:x86: Is there a timeout value in the sd_lun
13081 			 * for this condition?
13082 			 */
13083 			sd_set_retry_bp(un, bp, SD_BSY_TIMEOUT / 500,
13084 				kstat_runq_back_to_waitq);
13085 			goto exit;
13086 
13087 		case TRAN_FATAL_ERROR:
13088 			un->un_tran_fatal_count++;
13089 			/* FALLTHRU */
13090 
13091 		case TRAN_BADPKT:
13092 		default:
13093 			un->un_ncmds_in_transport--;
13094 			ASSERT(un->un_ncmds_in_transport >= 0);
13095 
13096 			/*
13097 			 * If this is our REQUEST SENSE command with a
13098 			 * transport error, we must get back the pointers
13099 			 * to the original buf, and mark the REQUEST
13100 			 * SENSE command as "available".
13101 			 */
13102 			if (bp == un->un_rqs_bp) {
13103 				bp = sd_mark_rqs_idle(un, xp);
13104 				xp = SD_GET_XBUF(bp);
13105 			} else {
13106 				/*
13107 				 * Legacy behavior: do not update transport
13108 				 * error count for request sense commands.
13109 				 */
13110 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
13111 			}
13112 
13113 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
13114 			sd_print_transport_rejected_message(un, xp, rval);
13115 
13116 			/*
13117 			 * We must use sd_return_failed_command_no_restart() to
13118 			 * avoid a recursive call back into sd_start_cmds().
13119 			 * However this also means that we must keep processing
13120 			 * the waitq here in order to avoid stalling.
13121 			 */
13122 			sd_return_failed_command_no_restart(un, bp, EIO);
13123 
13124 			/*
13125 			 * Notify any threads waiting in sd_ddi_suspend() that
13126 			 * a command completion has occurred.
13127 			 */
13128 			if (un->un_state == SD_STATE_SUSPENDED) {
13129 				cv_broadcast(&un->un_disk_busy_cv);
13130 			}
13131 
13132 			if (bp == immed_bp) {
13133 				/* immed_bp is gone by now, so clear this */
13134 				immed_bp = NULL;
13135 			}
13136 			break;
13137 		}
13138 
13139 	} while (immed_bp == NULL);
13140 
13141 exit:
13142 	ASSERT(mutex_owned(SD_MUTEX(un)));
13143 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
13144 }
13145 
13146 
13147 /*
13148  *    Function: sd_return_command
13149  *
13150  * Description: Returns a command to its originator (with or without an
13151  *		error).  Also starts commands waiting to be transported
13152  *		to the target.
13153  *
13154  *     Context: May be called from interrupt, kernel, or timeout context
13155  */
13156 
13157 static void
13158 sd_return_command(struct sd_lun *un, struct buf *bp)
13159 {
13160 	struct sd_xbuf *xp;
13161 #if defined(__i386) || defined(__amd64)
13162 	struct scsi_pkt *pktp;
13163 #endif
13164 
13165 	ASSERT(bp != NULL);
13166 	ASSERT(un != NULL);
13167 	ASSERT(mutex_owned(SD_MUTEX(un)));
13168 	ASSERT(bp != un->un_rqs_bp);
13169 	xp = SD_GET_XBUF(bp);
13170 	ASSERT(xp != NULL);
13171 
13172 #if defined(__i386) || defined(__amd64)
13173 	pktp = SD_GET_PKTP(bp);
13174 #endif
13175 
13176 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
13177 
13178 #if defined(__i386) || defined(__amd64)
13179 	/*
13180 	 * Note:x86: check for the "sdrestart failed" case.
13181 	 */
13182 	if (((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
13183 		(geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
13184 		(xp->xb_pktp->pkt_resid == 0)) {
13185 
13186 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
13187 			/*
13188 			 * Successfully set up next portion of cmd
13189 			 * transfer, try sending it
13190 			 */
13191 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
13192 			    NULL, NULL, 0, (clock_t)0, NULL);
13193 			sd_start_cmds(un, NULL);
13194 			return;	/* Note:x86: need a return here? */
13195 		}
13196 	}
13197 #endif
13198 
13199 	/*
13200 	 * If this is the failfast bp, clear it from un_failfast_bp. This
13201 	 * can happen if upon being re-tried the failfast bp either
13202 	 * succeeded or encountered another error (possibly even a different
13203 	 * error than the one that precipitated the failfast state, but in
13204 	 * that case it would have had to exhaust retries as well). Regardless,
13205 	 * this should not occur whenever the instance is in the active
13206 	 * failfast state.
13207 	 */
13208 	if (bp == un->un_failfast_bp) {
13209 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
13210 		un->un_failfast_bp = NULL;
13211 	}
13212 
13213 	/*
13214 	 * Clear the failfast state upon successful completion of ANY cmd.
13215 	 */
13216 	if (bp->b_error == 0) {
13217 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
13218 	}
13219 
13220 	/*
13221 	 * This is used if the command was retried one or more times. Show that
13222 	 * we are done with it, and allow processing of the waitq to resume.
13223 	 */
13224 	if (bp == un->un_retry_bp) {
13225 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13226 		    "sd_return_command: un:0x%p: "
13227 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
13228 		un->un_retry_bp = NULL;
13229 		un->un_retry_statp = NULL;
13230 	}
13231 
13232 	SD_UPDATE_RDWR_STATS(un, bp);
13233 	SD_UPDATE_PARTITION_STATS(un, bp);
13234 
13235 	switch (un->un_state) {
13236 	case SD_STATE_SUSPENDED:
13237 		/*
13238 		 * Notify any threads waiting in sd_ddi_suspend() that
13239 		 * a command completion has occurred.
13240 		 */
13241 		cv_broadcast(&un->un_disk_busy_cv);
13242 		break;
13243 	default:
13244 		sd_start_cmds(un, NULL);
13245 		break;
13246 	}
13247 
13248 	/* Return this command up the iodone chain to its originator. */
13249 	mutex_exit(SD_MUTEX(un));
13250 
13251 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
13252 	xp->xb_pktp = NULL;
13253 
13254 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
13255 
13256 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13257 	mutex_enter(SD_MUTEX(un));
13258 
13259 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
13260 }
13261 
13262 
13263 /*
13264  *    Function: sd_return_failed_command
13265  *
13266  * Description: Command completion when an error occurred.
13267  *
13268  *     Context: May be called from interrupt context
13269  */
13270 
13271 static void
13272 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
13273 {
13274 	ASSERT(bp != NULL);
13275 	ASSERT(un != NULL);
13276 	ASSERT(mutex_owned(SD_MUTEX(un)));
13277 
13278 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13279 	    "sd_return_failed_command: entry\n");
13280 
13281 	/*
13282 	 * b_resid could already be nonzero due to a partial data
13283 	 * transfer, so do not change it here.
13284 	 */
13285 	SD_BIOERROR(bp, errcode);
13286 
13287 	sd_return_command(un, bp);
13288 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13289 	    "sd_return_failed_command: exit\n");
13290 }
13291 
13292 
13293 /*
13294  *    Function: sd_return_failed_command_no_restart
13295  *
13296  * Description: Same as sd_return_failed_command, but ensures that no
13297  *		call back into sd_start_cmds will be issued.
13298  *
13299  *     Context: May be called from interrupt context
13300  */
13301 
13302 static void
13303 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
13304 	int errcode)
13305 {
13306 	struct sd_xbuf *xp;
13307 
13308 	ASSERT(bp != NULL);
13309 	ASSERT(un != NULL);
13310 	ASSERT(mutex_owned(SD_MUTEX(un)));
13311 	xp = SD_GET_XBUF(bp);
13312 	ASSERT(xp != NULL);
13313 	ASSERT(errcode != 0);
13314 
13315 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13316 	    "sd_return_failed_command_no_restart: entry\n");
13317 
13318 	/*
13319 	 * b_resid could already be nonzero due to a partial data
13320 	 * transfer, so do not change it here.
13321 	 */
13322 	SD_BIOERROR(bp, errcode);
13323 
13324 	/*
13325 	 * If this is the failfast bp, clear it. This can happen if the
13326 	 * failfast bp encounterd a fatal error when we attempted to
13327 	 * re-try it (such as a scsi_transport(9F) failure).  However
13328 	 * we should NOT be in an active failfast state if the failfast
13329 	 * bp is not NULL.
13330 	 */
13331 	if (bp == un->un_failfast_bp) {
13332 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
13333 		un->un_failfast_bp = NULL;
13334 	}
13335 
13336 	if (bp == un->un_retry_bp) {
13337 		/*
13338 		 * This command was retried one or more times. Show that we are
13339 		 * done with it, and allow processing of the waitq to resume.
13340 		 */
13341 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13342 		    "sd_return_failed_command_no_restart: "
13343 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
13344 		un->un_retry_bp = NULL;
13345 		un->un_retry_statp = NULL;
13346 	}
13347 
13348 	SD_UPDATE_RDWR_STATS(un, bp);
13349 	SD_UPDATE_PARTITION_STATS(un, bp);
13350 
13351 	mutex_exit(SD_MUTEX(un));
13352 
13353 	if (xp->xb_pktp != NULL) {
13354 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
13355 		xp->xb_pktp = NULL;
13356 	}
13357 
13358 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
13359 
13360 	mutex_enter(SD_MUTEX(un));
13361 
13362 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13363 	    "sd_return_failed_command_no_restart: exit\n");
13364 }
13365 
13366 
13367 /*
13368  *    Function: sd_retry_command
13369  *
13370  * Description: queue up a command for retry, or (optionally) fail it
13371  *		if retry counts are exhausted.
13372  *
13373  *   Arguments: un - Pointer to the sd_lun struct for the target.
13374  *
13375  *		bp - Pointer to the buf for the command to be retried.
13376  *
13377  *		retry_check_flag - Flag to see which (if any) of the retry
13378  *		   counts should be decremented/checked. If the indicated
13379  *		   retry count is exhausted, then the command will not be
13380  *		   retried; it will be failed instead. This should use a
13381  *		   value equal to one of the following:
13382  *
13383  *			SD_RETRIES_NOCHECK
13384  *			SD_RESD_RETRIES_STANDARD
13385  *			SD_RETRIES_VICTIM
13386  *
13387  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
13388  *		   if the check should be made to see of FLAG_ISOLATE is set
13389  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
13390  *		   not retried, it is simply failed.
13391  *
13392  *		user_funcp - Ptr to function to call before dispatching the
13393  *		   command. May be NULL if no action needs to be performed.
13394  *		   (Primarily intended for printing messages.)
13395  *
13396  *		user_arg - Optional argument to be passed along to
13397  *		   the user_funcp call.
13398  *
13399  *		failure_code - errno return code to set in the bp if the
13400  *		   command is going to be failed.
13401  *
13402  *		retry_delay - Retry delay interval in (clock_t) units. May
13403  *		   be zero which indicates that the retry should be retried
13404  *		   immediately (ie, without an intervening delay).
13405  *
13406  *		statp - Ptr to kstat function to be updated if the command
13407  *		   is queued for a delayed retry. May be NULL if no kstat
13408  *		   update is desired.
13409  *
13410  *     Context: May be called from interupt context.
13411  */
13412 
13413 static void
13414 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
13415 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
13416 	code), void *user_arg, int failure_code,  clock_t retry_delay,
13417 	void (*statp)(kstat_io_t *))
13418 {
13419 	struct sd_xbuf	*xp;
13420 	struct scsi_pkt	*pktp;
13421 
13422 	ASSERT(un != NULL);
13423 	ASSERT(mutex_owned(SD_MUTEX(un)));
13424 	ASSERT(bp != NULL);
13425 	xp = SD_GET_XBUF(bp);
13426 	ASSERT(xp != NULL);
13427 	pktp = SD_GET_PKTP(bp);
13428 	ASSERT(pktp != NULL);
13429 
13430 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
13431 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
13432 
13433 	/*
13434 	 * If we are syncing or dumping, fail the command to avoid
13435 	 * recursively calling back into scsi_transport().
13436 	 */
13437 	if (ddi_in_panic()) {
13438 		goto fail_command_no_log;
13439 	}
13440 
13441 	/*
13442 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
13443 	 * log an error and fail the command.
13444 	 */
13445 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
13446 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
13447 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
13448 		sd_dump_memory(un, SD_LOG_IO, "CDB",
13449 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
13450 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
13451 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
13452 		goto fail_command;
13453 	}
13454 
13455 	/*
13456 	 * If we are suspended, then put the command onto head of the
13457 	 * wait queue since we don't want to start more commands, and
13458 	 * clear the un_retry_bp. Next time when we are resumed, will
13459 	 * handle the command in the wait queue.
13460 	 */
13461 	switch (un->un_state) {
13462 	case SD_STATE_SUSPENDED:
13463 	case SD_STATE_DUMPING:
13464 		bp->av_forw = un->un_waitq_headp;
13465 		un->un_waitq_headp = bp;
13466 		if (un->un_waitq_tailp == NULL) {
13467 			un->un_waitq_tailp = bp;
13468 		}
13469 		if (bp == un->un_retry_bp) {
13470 			un->un_retry_bp = NULL;
13471 			un->un_retry_statp = NULL;
13472 		}
13473 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
13474 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
13475 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
13476 		return;
13477 	default:
13478 		break;
13479 	}
13480 
13481 	/*
13482 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
13483 	 * is set; if it is then we do not want to retry the command.
13484 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
13485 	 */
13486 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
13487 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
13488 			goto fail_command;
13489 		}
13490 	}
13491 
13492 
13493 	/*
13494 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
13495 	 * command timeout or a selection timeout has occurred. This means
13496 	 * that we were unable to establish an kind of communication with
13497 	 * the target, and subsequent retries and/or commands are likely
13498 	 * to encounter similar results and take a long time to complete.
13499 	 *
13500 	 * If this is a failfast error condition, we need to update the
13501 	 * failfast state, even if this bp does not have B_FAILFAST set.
13502 	 */
13503 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
13504 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
13505 			ASSERT(un->un_failfast_bp == NULL);
13506 			/*
13507 			 * If we are already in the active failfast state, and
13508 			 * another failfast error condition has been detected,
13509 			 * then fail this command if it has B_FAILFAST set.
13510 			 * If B_FAILFAST is clear, then maintain the legacy
13511 			 * behavior of retrying heroically, even tho this will
13512 			 * take a lot more time to fail the command.
13513 			 */
13514 			if (bp->b_flags & B_FAILFAST) {
13515 				goto fail_command;
13516 			}
13517 		} else {
13518 			/*
13519 			 * We're not in the active failfast state, but we
13520 			 * have a failfast error condition, so we must begin
13521 			 * transition to the next state. We do this regardless
13522 			 * of whether or not this bp has B_FAILFAST set.
13523 			 */
13524 			if (un->un_failfast_bp == NULL) {
13525 				/*
13526 				 * This is the first bp to meet a failfast
13527 				 * condition so save it on un_failfast_bp &
13528 				 * do normal retry processing. Do not enter
13529 				 * active failfast state yet. This marks
13530 				 * entry into the "failfast pending" state.
13531 				 */
13532 				un->un_failfast_bp = bp;
13533 
13534 			} else if (un->un_failfast_bp == bp) {
13535 				/*
13536 				 * This is the second time *this* bp has
13537 				 * encountered a failfast error condition,
13538 				 * so enter active failfast state & flush
13539 				 * queues as appropriate.
13540 				 */
13541 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
13542 				un->un_failfast_bp = NULL;
13543 				sd_failfast_flushq(un);
13544 
13545 				/*
13546 				 * Fail this bp now if B_FAILFAST set;
13547 				 * otherwise continue with retries. (It would
13548 				 * be pretty ironic if this bp succeeded on a
13549 				 * subsequent retry after we just flushed all
13550 				 * the queues).
13551 				 */
13552 				if (bp->b_flags & B_FAILFAST) {
13553 					goto fail_command;
13554 				}
13555 
13556 #if !defined(lint) && !defined(__lint)
13557 			} else {
13558 				/*
13559 				 * If neither of the preceeding conditionals
13560 				 * was true, it means that there is some
13561 				 * *other* bp that has met an inital failfast
13562 				 * condition and is currently either being
13563 				 * retried or is waiting to be retried. In
13564 				 * that case we should perform normal retry
13565 				 * processing on *this* bp, since there is a
13566 				 * chance that the current failfast condition
13567 				 * is transient and recoverable. If that does
13568 				 * not turn out to be the case, then retries
13569 				 * will be cleared when the wait queue is
13570 				 * flushed anyway.
13571 				 */
13572 #endif
13573 			}
13574 		}
13575 	} else {
13576 		/*
13577 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
13578 		 * likely were able to at least establish some level of
13579 		 * communication with the target and subsequent commands
13580 		 * and/or retries are likely to get through to the target,
13581 		 * In this case we want to be aggressive about clearing
13582 		 * the failfast state. Note that this does not affect
13583 		 * the "failfast pending" condition.
13584 		 */
13585 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
13586 	}
13587 
13588 
13589 	/*
13590 	 * Check the specified retry count to see if we can still do
13591 	 * any retries with this pkt before we should fail it.
13592 	 */
13593 	switch (retry_check_flag & SD_RETRIES_MASK) {
13594 	case SD_RETRIES_VICTIM:
13595 		/*
13596 		 * Check the victim retry count. If exhausted, then fall
13597 		 * thru & check against the standard retry count.
13598 		 */
13599 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
13600 			/* Increment count & proceed with the retry */
13601 			xp->xb_victim_retry_count++;
13602 			break;
13603 		}
13604 		/* Victim retries exhausted, fall back to std. retries... */
13605 		/* FALLTHRU */
13606 
13607 	case SD_RETRIES_STANDARD:
13608 		if (xp->xb_retry_count >= un->un_retry_count) {
13609 			/* Retries exhausted, fail the command */
13610 			SD_TRACE(SD_LOG_IO_CORE, un,
13611 			    "sd_retry_command: retries exhausted!\n");
13612 			/*
13613 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
13614 			 * commands with nonzero pkt_resid.
13615 			 */
13616 			if ((pktp->pkt_reason == CMD_CMPLT) &&
13617 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
13618 			    (pktp->pkt_resid != 0)) {
13619 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
13620 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
13621 					SD_UPDATE_B_RESID(bp, pktp);
13622 				}
13623 			}
13624 			goto fail_command;
13625 		}
13626 		xp->xb_retry_count++;
13627 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13628 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
13629 		break;
13630 
13631 	case SD_RETRIES_UA:
13632 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
13633 			/* Retries exhausted, fail the command */
13634 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13635 			    "Unit Attention retries exhausted. "
13636 			    "Check the target.\n");
13637 			goto fail_command;
13638 		}
13639 		xp->xb_ua_retry_count++;
13640 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13641 		    "sd_retry_command: retry count:%d\n",
13642 			xp->xb_ua_retry_count);
13643 		break;
13644 
13645 	case SD_RETRIES_BUSY:
13646 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
13647 			/* Retries exhausted, fail the command */
13648 			SD_TRACE(SD_LOG_IO_CORE, un,
13649 			    "sd_retry_command: retries exhausted!\n");
13650 			goto fail_command;
13651 		}
13652 		xp->xb_retry_count++;
13653 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13654 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
13655 		break;
13656 
13657 	case SD_RETRIES_NOCHECK:
13658 	default:
13659 		/* No retry count to check. Just proceed with the retry */
13660 		break;
13661 	}
13662 
13663 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
13664 
13665 	/*
13666 	 * If we were given a zero timeout, we must attempt to retry the
13667 	 * command immediately (ie, without a delay).
13668 	 */
13669 	if (retry_delay == 0) {
13670 		/*
13671 		 * Check some limiting conditions to see if we can actually
13672 		 * do the immediate retry.  If we cannot, then we must
13673 		 * fall back to queueing up a delayed retry.
13674 		 */
13675 		if (un->un_ncmds_in_transport >= un->un_throttle) {
13676 			/*
13677 			 * We are at the throttle limit for the target,
13678 			 * fall back to delayed retry.
13679 			 */
13680 			retry_delay = SD_BSY_TIMEOUT;
13681 			statp = kstat_waitq_enter;
13682 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13683 			    "sd_retry_command: immed. retry hit "
13684 			    "throttle!\n");
13685 		} else {
13686 			/*
13687 			 * We're clear to proceed with the immediate retry.
13688 			 * First call the user-provided function (if any)
13689 			 */
13690 			if (user_funcp != NULL) {
13691 				(*user_funcp)(un, bp, user_arg,
13692 				    SD_IMMEDIATE_RETRY_ISSUED);
13693 #ifdef __lock_lint
13694 				sd_print_incomplete_msg(un, bp, user_arg,
13695 				    SD_IMMEDIATE_RETRY_ISSUED);
13696 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
13697 				    SD_IMMEDIATE_RETRY_ISSUED);
13698 				sd_print_sense_failed_msg(un, bp, user_arg,
13699 				    SD_IMMEDIATE_RETRY_ISSUED);
13700 #endif
13701 			}
13702 
13703 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13704 			    "sd_retry_command: issuing immediate retry\n");
13705 
13706 			/*
13707 			 * Call sd_start_cmds() to transport the command to
13708 			 * the target.
13709 			 */
13710 			sd_start_cmds(un, bp);
13711 
13712 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13713 			    "sd_retry_command exit\n");
13714 			return;
13715 		}
13716 	}
13717 
13718 	/*
13719 	 * Set up to retry the command after a delay.
13720 	 * First call the user-provided function (if any)
13721 	 */
13722 	if (user_funcp != NULL) {
13723 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
13724 	}
13725 
13726 	sd_set_retry_bp(un, bp, retry_delay, statp);
13727 
13728 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
13729 	return;
13730 
13731 fail_command:
13732 
13733 	if (user_funcp != NULL) {
13734 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
13735 	}
13736 
13737 fail_command_no_log:
13738 
13739 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13740 	    "sd_retry_command: returning failed command\n");
13741 
13742 	sd_return_failed_command(un, bp, failure_code);
13743 
13744 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
13745 }
13746 
13747 
13748 /*
13749  *    Function: sd_set_retry_bp
13750  *
13751  * Description: Set up the given bp for retry.
13752  *
13753  *   Arguments: un - ptr to associated softstate
13754  *		bp - ptr to buf(9S) for the command
13755  *		retry_delay - time interval before issuing retry (may be 0)
13756  *		statp - optional pointer to kstat function
13757  *
13758  *     Context: May be called under interrupt context
13759  */
13760 
13761 static void
13762 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
13763 	void (*statp)(kstat_io_t *))
13764 {
13765 	ASSERT(un != NULL);
13766 	ASSERT(mutex_owned(SD_MUTEX(un)));
13767 	ASSERT(bp != NULL);
13768 
13769 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
13770 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
13771 
13772 	/*
13773 	 * Indicate that the command is being retried. This will not allow any
13774 	 * other commands on the wait queue to be transported to the target
13775 	 * until this command has been completed (success or failure). The
13776 	 * "retry command" is not transported to the target until the given
13777 	 * time delay expires, unless the user specified a 0 retry_delay.
13778 	 *
13779 	 * Note: the timeout(9F) callback routine is what actually calls
13780 	 * sd_start_cmds() to transport the command, with the exception of a
13781 	 * zero retry_delay. The only current implementor of a zero retry delay
13782 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
13783 	 */
13784 	if (un->un_retry_bp == NULL) {
13785 		ASSERT(un->un_retry_statp == NULL);
13786 		un->un_retry_bp = bp;
13787 
13788 		/*
13789 		 * If the user has not specified a delay the command should
13790 		 * be queued and no timeout should be scheduled.
13791 		 */
13792 		if (retry_delay == 0) {
13793 			/*
13794 			 * Save the kstat pointer that will be used in the
13795 			 * call to SD_UPDATE_KSTATS() below, so that
13796 			 * sd_start_cmds() can correctly decrement the waitq
13797 			 * count when it is time to transport this command.
13798 			 */
13799 			un->un_retry_statp = statp;
13800 			goto done;
13801 		}
13802 	}
13803 
13804 	if (un->un_retry_bp == bp) {
13805 		/*
13806 		 * Save the kstat pointer that will be used in the call to
13807 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
13808 		 * correctly decrement the waitq count when it is time to
13809 		 * transport this command.
13810 		 */
13811 		un->un_retry_statp = statp;
13812 
13813 		/*
13814 		 * Schedule a timeout if:
13815 		 *   1) The user has specified a delay.
13816 		 *   2) There is not a START_STOP_UNIT callback pending.
13817 		 *
13818 		 * If no delay has been specified, then it is up to the caller
13819 		 * to ensure that IO processing continues without stalling.
13820 		 * Effectively, this means that the caller will issue the
13821 		 * required call to sd_start_cmds(). The START_STOP_UNIT
13822 		 * callback does this after the START STOP UNIT command has
13823 		 * completed. In either of these cases we should not schedule
13824 		 * a timeout callback here.  Also don't schedule the timeout if
13825 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
13826 		 */
13827 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
13828 		    (un->un_direct_priority_timeid == NULL)) {
13829 			un->un_retry_timeid =
13830 			    timeout(sd_start_retry_command, un, retry_delay);
13831 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13832 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
13833 			    " bp:0x%p un_retry_timeid:0x%p\n",
13834 			    un, bp, un->un_retry_timeid);
13835 		}
13836 	} else {
13837 		/*
13838 		 * We only get in here if there is already another command
13839 		 * waiting to be retried.  In this case, we just put the
13840 		 * given command onto the wait queue, so it can be transported
13841 		 * after the current retry command has completed.
13842 		 *
13843 		 * Also we have to make sure that if the command at the head
13844 		 * of the wait queue is the un_failfast_bp, that we do not
13845 		 * put ahead of it any other commands that are to be retried.
13846 		 */
13847 		if ((un->un_failfast_bp != NULL) &&
13848 		    (un->un_failfast_bp == un->un_waitq_headp)) {
13849 			/*
13850 			 * Enqueue this command AFTER the first command on
13851 			 * the wait queue (which is also un_failfast_bp).
13852 			 */
13853 			bp->av_forw = un->un_waitq_headp->av_forw;
13854 			un->un_waitq_headp->av_forw = bp;
13855 			if (un->un_waitq_headp == un->un_waitq_tailp) {
13856 				un->un_waitq_tailp = bp;
13857 			}
13858 		} else {
13859 			/* Enqueue this command at the head of the waitq. */
13860 			bp->av_forw = un->un_waitq_headp;
13861 			un->un_waitq_headp = bp;
13862 			if (un->un_waitq_tailp == NULL) {
13863 				un->un_waitq_tailp = bp;
13864 			}
13865 		}
13866 
13867 		if (statp == NULL) {
13868 			statp = kstat_waitq_enter;
13869 		}
13870 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13871 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
13872 	}
13873 
13874 done:
13875 	if (statp != NULL) {
13876 		SD_UPDATE_KSTATS(un, statp, bp);
13877 	}
13878 
13879 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13880 	    "sd_set_retry_bp: exit un:0x%p\n", un);
13881 }
13882 
13883 
13884 /*
13885  *    Function: sd_start_retry_command
13886  *
13887  * Description: Start the command that has been waiting on the target's
13888  *		retry queue.  Called from timeout(9F) context after the
13889  *		retry delay interval has expired.
13890  *
13891  *   Arguments: arg - pointer to associated softstate for the device.
13892  *
13893  *     Context: timeout(9F) thread context.  May not sleep.
13894  */
13895 
13896 static void
13897 sd_start_retry_command(void *arg)
13898 {
13899 	struct sd_lun *un = arg;
13900 
13901 	ASSERT(un != NULL);
13902 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13903 
13904 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13905 	    "sd_start_retry_command: entry\n");
13906 
13907 	mutex_enter(SD_MUTEX(un));
13908 
13909 	un->un_retry_timeid = NULL;
13910 
13911 	if (un->un_retry_bp != NULL) {
13912 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13913 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
13914 		    un, un->un_retry_bp);
13915 		sd_start_cmds(un, un->un_retry_bp);
13916 	}
13917 
13918 	mutex_exit(SD_MUTEX(un));
13919 
13920 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13921 	    "sd_start_retry_command: exit\n");
13922 }
13923 
13924 
13925 /*
13926  *    Function: sd_start_direct_priority_command
13927  *
13928  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
13929  *		received TRAN_BUSY when we called scsi_transport() to send it
13930  *		to the underlying HBA. This function is called from timeout(9F)
13931  *		context after the delay interval has expired.
13932  *
13933  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
13934  *
13935  *     Context: timeout(9F) thread context.  May not sleep.
13936  */
13937 
13938 static void
13939 sd_start_direct_priority_command(void *arg)
13940 {
13941 	struct buf	*priority_bp = arg;
13942 	struct sd_lun	*un;
13943 
13944 	ASSERT(priority_bp != NULL);
13945 	un = SD_GET_UN(priority_bp);
13946 	ASSERT(un != NULL);
13947 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13948 
13949 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13950 	    "sd_start_direct_priority_command: entry\n");
13951 
13952 	mutex_enter(SD_MUTEX(un));
13953 	un->un_direct_priority_timeid = NULL;
13954 	sd_start_cmds(un, priority_bp);
13955 	mutex_exit(SD_MUTEX(un));
13956 
13957 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13958 	    "sd_start_direct_priority_command: exit\n");
13959 }
13960 
13961 
13962 /*
13963  *    Function: sd_send_request_sense_command
13964  *
13965  * Description: Sends a REQUEST SENSE command to the target
13966  *
13967  *     Context: May be called from interrupt context.
13968  */
13969 
13970 static void
13971 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
13972 	struct scsi_pkt *pktp)
13973 {
13974 	ASSERT(bp != NULL);
13975 	ASSERT(un != NULL);
13976 	ASSERT(mutex_owned(SD_MUTEX(un)));
13977 
13978 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
13979 	    "entry: buf:0x%p\n", bp);
13980 
13981 	/*
13982 	 * If we are syncing or dumping, then fail the command to avoid a
13983 	 * recursive callback into scsi_transport(). Also fail the command
13984 	 * if we are suspended (legacy behavior).
13985 	 */
13986 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
13987 	    (un->un_state == SD_STATE_DUMPING)) {
13988 		sd_return_failed_command(un, bp, EIO);
13989 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13990 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
13991 		return;
13992 	}
13993 
13994 	/*
13995 	 * Retry the failed command and don't issue the request sense if:
13996 	 *    1) the sense buf is busy
13997 	 *    2) we have 1 or more outstanding commands on the target
13998 	 *    (the sense data will be cleared or invalidated any way)
13999 	 *
14000 	 * Note: There could be an issue with not checking a retry limit here,
14001 	 * the problem is determining which retry limit to check.
14002 	 */
14003 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
14004 		/* Don't retry if the command is flagged as non-retryable */
14005 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
14006 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
14007 			    NULL, NULL, 0, SD_BSY_TIMEOUT, kstat_waitq_enter);
14008 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14009 			    "sd_send_request_sense_command: "
14010 			    "at full throttle, retrying exit\n");
14011 		} else {
14012 			sd_return_failed_command(un, bp, EIO);
14013 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14014 			    "sd_send_request_sense_command: "
14015 			    "at full throttle, non-retryable exit\n");
14016 		}
14017 		return;
14018 	}
14019 
14020 	sd_mark_rqs_busy(un, bp);
14021 	sd_start_cmds(un, un->un_rqs_bp);
14022 
14023 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14024 	    "sd_send_request_sense_command: exit\n");
14025 }
14026 
14027 
14028 /*
14029  *    Function: sd_mark_rqs_busy
14030  *
14031  * Description: Indicate that the request sense bp for this instance is
14032  *		in use.
14033  *
14034  *     Context: May be called under interrupt context
14035  */
14036 
14037 static void
14038 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
14039 {
14040 	struct sd_xbuf	*sense_xp;
14041 
14042 	ASSERT(un != NULL);
14043 	ASSERT(bp != NULL);
14044 	ASSERT(mutex_owned(SD_MUTEX(un)));
14045 	ASSERT(un->un_sense_isbusy == 0);
14046 
14047 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
14048 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
14049 
14050 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
14051 	ASSERT(sense_xp != NULL);
14052 
14053 	SD_INFO(SD_LOG_IO, un,
14054 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
14055 
14056 	ASSERT(sense_xp->xb_pktp != NULL);
14057 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
14058 	    == (FLAG_SENSING | FLAG_HEAD));
14059 
14060 	un->un_sense_isbusy = 1;
14061 	un->un_rqs_bp->b_resid = 0;
14062 	sense_xp->xb_pktp->pkt_resid  = 0;
14063 	sense_xp->xb_pktp->pkt_reason = 0;
14064 
14065 	/* So we can get back the bp at interrupt time! */
14066 	sense_xp->xb_sense_bp = bp;
14067 
14068 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
14069 
14070 	/*
14071 	 * Mark this buf as awaiting sense data. (This is already set in
14072 	 * the pkt_flags for the RQS packet.)
14073 	 */
14074 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
14075 
14076 	sense_xp->xb_retry_count	= 0;
14077 	sense_xp->xb_victim_retry_count = 0;
14078 	sense_xp->xb_ua_retry_count	= 0;
14079 	sense_xp->xb_dma_resid  = 0;
14080 
14081 	/* Clean up the fields for auto-request sense */
14082 	sense_xp->xb_sense_status = 0;
14083 	sense_xp->xb_sense_state  = 0;
14084 	sense_xp->xb_sense_resid  = 0;
14085 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
14086 
14087 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
14088 }
14089 
14090 
14091 /*
14092  *    Function: sd_mark_rqs_idle
14093  *
14094  * Description: SD_MUTEX must be held continuously through this routine
14095  *		to prevent reuse of the rqs struct before the caller can
14096  *		complete it's processing.
14097  *
14098  * Return Code: Pointer to the RQS buf
14099  *
14100  *     Context: May be called under interrupt context
14101  */
14102 
14103 static struct buf *
14104 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
14105 {
14106 	struct buf *bp;
14107 	ASSERT(un != NULL);
14108 	ASSERT(sense_xp != NULL);
14109 	ASSERT(mutex_owned(SD_MUTEX(un)));
14110 	ASSERT(un->un_sense_isbusy != 0);
14111 
14112 	un->un_sense_isbusy = 0;
14113 	bp = sense_xp->xb_sense_bp;
14114 	sense_xp->xb_sense_bp = NULL;
14115 
14116 	/* This pkt is no longer interested in getting sense data */
14117 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
14118 
14119 	return (bp);
14120 }
14121 
14122 
14123 
14124 /*
14125  *    Function: sd_alloc_rqs
14126  *
14127  * Description: Set up the unit to receive auto request sense data
14128  *
14129  * Return Code: DDI_SUCCESS or DDI_FAILURE
14130  *
14131  *     Context: Called under attach(9E) context
14132  */
14133 
14134 static int
14135 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
14136 {
14137 	struct sd_xbuf *xp;
14138 
14139 	ASSERT(un != NULL);
14140 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14141 	ASSERT(un->un_rqs_bp == NULL);
14142 	ASSERT(un->un_rqs_pktp == NULL);
14143 
14144 	/*
14145 	 * First allocate the required buf and scsi_pkt structs, then set up
14146 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
14147 	 */
14148 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
14149 	    SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
14150 	if (un->un_rqs_bp == NULL) {
14151 		return (DDI_FAILURE);
14152 	}
14153 
14154 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
14155 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
14156 
14157 	if (un->un_rqs_pktp == NULL) {
14158 		sd_free_rqs(un);
14159 		return (DDI_FAILURE);
14160 	}
14161 
14162 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
14163 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
14164 	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
14165 
14166 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
14167 
14168 	/* Set up the other needed members in the ARQ scsi_pkt. */
14169 	un->un_rqs_pktp->pkt_comp   = sdintr;
14170 	un->un_rqs_pktp->pkt_time   = sd_io_time;
14171 	un->un_rqs_pktp->pkt_flags |=
14172 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
14173 
14174 	/*
14175 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
14176 	 * provide any intpkt, destroypkt routines as we take care of
14177 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
14178 	 */
14179 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14180 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
14181 	xp->xb_pktp = un->un_rqs_pktp;
14182 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
14183 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
14184 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
14185 
14186 	/*
14187 	 * Save the pointer to the request sense private bp so it can
14188 	 * be retrieved in sdintr.
14189 	 */
14190 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
14191 	ASSERT(un->un_rqs_bp->b_private == xp);
14192 
14193 	/*
14194 	 * See if the HBA supports auto-request sense for the specified
14195 	 * target/lun. If it does, then try to enable it (if not already
14196 	 * enabled).
14197 	 *
14198 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
14199 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
14200 	 * return success.  However, in both of these cases ARQ is always
14201 	 * enabled and scsi_ifgetcap will always return true. The best approach
14202 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
14203 	 *
14204 	 * The 3rd case is the HBA (adp) always return enabled on
14205 	 * scsi_ifgetgetcap even when it's not enable, the best approach
14206 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
14207 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
14208 	 */
14209 
14210 	if (un->un_f_is_fibre == TRUE) {
14211 		un->un_f_arq_enabled = TRUE;
14212 	} else {
14213 #if defined(__i386) || defined(__amd64)
14214 		/*
14215 		 * Circumvent the Adaptec bug, remove this code when
14216 		 * the bug is fixed
14217 		 */
14218 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
14219 #endif
14220 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
14221 		case 0:
14222 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
14223 				"sd_alloc_rqs: HBA supports ARQ\n");
14224 			/*
14225 			 * ARQ is supported by this HBA but currently is not
14226 			 * enabled. Attempt to enable it and if successful then
14227 			 * mark this instance as ARQ enabled.
14228 			 */
14229 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
14230 				== 1) {
14231 				/* Successfully enabled ARQ in the HBA */
14232 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
14233 					"sd_alloc_rqs: ARQ enabled\n");
14234 				un->un_f_arq_enabled = TRUE;
14235 			} else {
14236 				/* Could not enable ARQ in the HBA */
14237 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
14238 				"sd_alloc_rqs: failed ARQ enable\n");
14239 				un->un_f_arq_enabled = FALSE;
14240 			}
14241 			break;
14242 		case 1:
14243 			/*
14244 			 * ARQ is supported by this HBA and is already enabled.
14245 			 * Just mark ARQ as enabled for this instance.
14246 			 */
14247 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
14248 				"sd_alloc_rqs: ARQ already enabled\n");
14249 			un->un_f_arq_enabled = TRUE;
14250 			break;
14251 		default:
14252 			/*
14253 			 * ARQ is not supported by this HBA; disable it for this
14254 			 * instance.
14255 			 */
14256 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
14257 				"sd_alloc_rqs: HBA does not support ARQ\n");
14258 			un->un_f_arq_enabled = FALSE;
14259 			break;
14260 		}
14261 	}
14262 
14263 	return (DDI_SUCCESS);
14264 }
14265 
14266 
14267 /*
14268  *    Function: sd_free_rqs
14269  *
14270  * Description: Cleanup for the pre-instance RQS command.
14271  *
14272  *     Context: Kernel thread context
14273  */
14274 
14275 static void
14276 sd_free_rqs(struct sd_lun *un)
14277 {
14278 	ASSERT(un != NULL);
14279 
14280 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
14281 
14282 	/*
14283 	 * If consistent memory is bound to a scsi_pkt, the pkt
14284 	 * has to be destroyed *before* freeing the consistent memory.
14285 	 * Don't change the sequence of this operations.
14286 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
14287 	 * after it was freed in scsi_free_consistent_buf().
14288 	 */
14289 	if (un->un_rqs_pktp != NULL) {
14290 		scsi_destroy_pkt(un->un_rqs_pktp);
14291 		un->un_rqs_pktp = NULL;
14292 	}
14293 
14294 	if (un->un_rqs_bp != NULL) {
14295 		kmem_free(SD_GET_XBUF(un->un_rqs_bp), sizeof (struct sd_xbuf));
14296 		scsi_free_consistent_buf(un->un_rqs_bp);
14297 		un->un_rqs_bp = NULL;
14298 	}
14299 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
14300 }
14301 
14302 
14303 
14304 /*
14305  *    Function: sd_reduce_throttle
14306  *
14307  * Description: Reduces the maximun # of outstanding commands on a
14308  *		target to the current number of outstanding commands.
14309  *		Queues a tiemout(9F) callback to restore the limit
14310  *		after a specified interval has elapsed.
14311  *		Typically used when we get a TRAN_BUSY return code
14312  *		back from scsi_transport().
14313  *
14314  *   Arguments: un - ptr to the sd_lun softstate struct
14315  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
14316  *
14317  *     Context: May be called from interrupt context
14318  */
14319 
14320 static void
14321 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
14322 {
14323 	ASSERT(un != NULL);
14324 	ASSERT(mutex_owned(SD_MUTEX(un)));
14325 	ASSERT(un->un_ncmds_in_transport >= 0);
14326 
14327 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
14328 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
14329 	    un, un->un_throttle, un->un_ncmds_in_transport);
14330 
14331 	if (un->un_throttle > 1) {
14332 		if (un->un_f_use_adaptive_throttle == TRUE) {
14333 			switch (throttle_type) {
14334 			case SD_THROTTLE_TRAN_BUSY:
14335 				if (un->un_busy_throttle == 0) {
14336 					un->un_busy_throttle = un->un_throttle;
14337 				}
14338 				break;
14339 			case SD_THROTTLE_QFULL:
14340 				un->un_busy_throttle = 0;
14341 				break;
14342 			default:
14343 				ASSERT(FALSE);
14344 			}
14345 
14346 			if (un->un_ncmds_in_transport > 0) {
14347 			    un->un_throttle = un->un_ncmds_in_transport;
14348 			}
14349 
14350 		} else {
14351 			if (un->un_ncmds_in_transport == 0) {
14352 				un->un_throttle = 1;
14353 			} else {
14354 				un->un_throttle = un->un_ncmds_in_transport;
14355 			}
14356 		}
14357 	}
14358 
14359 	/* Reschedule the timeout if none is currently active */
14360 	if (un->un_reset_throttle_timeid == NULL) {
14361 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
14362 		    un, SD_THROTTLE_RESET_INTERVAL);
14363 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14364 		    "sd_reduce_throttle: timeout scheduled!\n");
14365 	}
14366 
14367 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
14368 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
14369 }
14370 
14371 
14372 
14373 /*
14374  *    Function: sd_restore_throttle
14375  *
14376  * Description: Callback function for timeout(9F).  Resets the current
14377  *		value of un->un_throttle to its default.
14378  *
14379  *   Arguments: arg - pointer to associated softstate for the device.
14380  *
14381  *     Context: May be called from interrupt context
14382  */
14383 
14384 static void
14385 sd_restore_throttle(void *arg)
14386 {
14387 	struct sd_lun	*un = arg;
14388 
14389 	ASSERT(un != NULL);
14390 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14391 
14392 	mutex_enter(SD_MUTEX(un));
14393 
14394 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
14395 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
14396 
14397 	un->un_reset_throttle_timeid = NULL;
14398 
14399 	if (un->un_f_use_adaptive_throttle == TRUE) {
14400 		/*
14401 		 * If un_busy_throttle is nonzero, then it contains the
14402 		 * value that un_throttle was when we got a TRAN_BUSY back
14403 		 * from scsi_transport(). We want to revert back to this
14404 		 * value.
14405 		 *
14406 		 * In the QFULL case, the throttle limit will incrementally
14407 		 * increase until it reaches max throttle.
14408 		 */
14409 		if (un->un_busy_throttle > 0) {
14410 			un->un_throttle = un->un_busy_throttle;
14411 			un->un_busy_throttle = 0;
14412 		} else {
14413 			/*
14414 			 * increase throttle by 10% open gate slowly, schedule
14415 			 * another restore if saved throttle has not been
14416 			 * reached
14417 			 */
14418 			short throttle;
14419 			if (sd_qfull_throttle_enable) {
14420 				throttle = un->un_throttle +
14421 				    max((un->un_throttle / 10), 1);
14422 				un->un_throttle =
14423 				    (throttle < un->un_saved_throttle) ?
14424 				    throttle : un->un_saved_throttle;
14425 				if (un->un_throttle < un->un_saved_throttle) {
14426 				    un->un_reset_throttle_timeid =
14427 					timeout(sd_restore_throttle,
14428 					un, SD_QFULL_THROTTLE_RESET_INTERVAL);
14429 				}
14430 			}
14431 		}
14432 
14433 		/*
14434 		 * If un_throttle has fallen below the low-water mark, we
14435 		 * restore the maximum value here (and allow it to ratchet
14436 		 * down again if necessary).
14437 		 */
14438 		if (un->un_throttle < un->un_min_throttle) {
14439 			un->un_throttle = un->un_saved_throttle;
14440 		}
14441 	} else {
14442 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
14443 		    "restoring limit from 0x%x to 0x%x\n",
14444 		    un->un_throttle, un->un_saved_throttle);
14445 		un->un_throttle = un->un_saved_throttle;
14446 	}
14447 
14448 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14449 	    "sd_restore_throttle: calling sd_start_cmds!\n");
14450 
14451 	sd_start_cmds(un, NULL);
14452 
14453 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14454 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
14455 	    un, un->un_throttle);
14456 
14457 	mutex_exit(SD_MUTEX(un));
14458 
14459 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
14460 }
14461 
14462 /*
14463  *    Function: sdrunout
14464  *
14465  * Description: Callback routine for scsi_init_pkt when a resource allocation
14466  *		fails.
14467  *
14468  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
14469  *		soft state instance.
14470  *
14471  * Return Code: The scsi_init_pkt routine allows for the callback function to
14472  *		return a 0 indicating the callback should be rescheduled or a 1
14473  *		indicating not to reschedule. This routine always returns 1
14474  *		because the driver always provides a callback function to
14475  *		scsi_init_pkt. This results in a callback always being scheduled
14476  *		(via the scsi_init_pkt callback implementation) if a resource
14477  *		failure occurs.
14478  *
14479  *     Context: This callback function may not block or call routines that block
14480  *
14481  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
14482  *		request persisting at the head of the list which cannot be
14483  *		satisfied even after multiple retries. In the future the driver
14484  *		may implement some time of maximum runout count before failing
14485  *		an I/O.
14486  */
14487 
14488 static int
14489 sdrunout(caddr_t arg)
14490 {
14491 	struct sd_lun	*un = (struct sd_lun *)arg;
14492 
14493 	ASSERT(un != NULL);
14494 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14495 
14496 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
14497 
14498 	mutex_enter(SD_MUTEX(un));
14499 	sd_start_cmds(un, NULL);
14500 	mutex_exit(SD_MUTEX(un));
14501 	/*
14502 	 * This callback routine always returns 1 (i.e. do not reschedule)
14503 	 * because we always specify sdrunout as the callback handler for
14504 	 * scsi_init_pkt inside the call to sd_start_cmds.
14505 	 */
14506 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
14507 	return (1);
14508 }
14509 
14510 
14511 /*
14512  *    Function: sdintr
14513  *
14514  * Description: Completion callback routine for scsi_pkt(9S) structs
14515  *		sent to the HBA driver via scsi_transport(9F).
14516  *
14517  *     Context: Interrupt context
14518  */
14519 
14520 static void
14521 sdintr(struct scsi_pkt *pktp)
14522 {
14523 	struct buf	*bp;
14524 	struct sd_xbuf	*xp;
14525 	struct sd_lun	*un;
14526 
14527 	ASSERT(pktp != NULL);
14528 	bp = (struct buf *)pktp->pkt_private;
14529 	ASSERT(bp != NULL);
14530 	xp = SD_GET_XBUF(bp);
14531 	ASSERT(xp != NULL);
14532 	ASSERT(xp->xb_pktp != NULL);
14533 	un = SD_GET_UN(bp);
14534 	ASSERT(un != NULL);
14535 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14536 
14537 #ifdef SD_FAULT_INJECTION
14538 
14539 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
14540 	/* SD FaultInjection */
14541 	sd_faultinjection(pktp);
14542 
14543 #endif /* SD_FAULT_INJECTION */
14544 
14545 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
14546 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
14547 
14548 	mutex_enter(SD_MUTEX(un));
14549 
14550 	/* Reduce the count of the #commands currently in transport */
14551 	un->un_ncmds_in_transport--;
14552 	ASSERT(un->un_ncmds_in_transport >= 0);
14553 
14554 	/* Increment counter to indicate that the callback routine is active */
14555 	un->un_in_callback++;
14556 
14557 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14558 
14559 #ifdef	SDDEBUG
14560 	if (bp == un->un_retry_bp) {
14561 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
14562 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
14563 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
14564 	}
14565 #endif
14566 
14567 	/*
14568 	 * If pkt_reason is CMD_DEV_GONE, just fail the command
14569 	 */
14570 	if (pktp->pkt_reason == CMD_DEV_GONE) {
14571 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
14572 			    "Device is gone\n");
14573 		sd_return_failed_command(un, bp, EIO);
14574 		goto exit;
14575 	}
14576 
14577 	/*
14578 	 * First see if the pkt has auto-request sense data with it....
14579 	 * Look at the packet state first so we don't take a performance
14580 	 * hit looking at the arq enabled flag unless absolutely necessary.
14581 	 */
14582 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
14583 	    (un->un_f_arq_enabled == TRUE)) {
14584 		/*
14585 		 * The HBA did an auto request sense for this command so check
14586 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
14587 		 * driver command that should not be retried.
14588 		 */
14589 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
14590 			/*
14591 			 * Save the relevant sense info into the xp for the
14592 			 * original cmd.
14593 			 */
14594 			struct scsi_arq_status *asp;
14595 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
14596 			xp->xb_sense_status =
14597 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
14598 			xp->xb_sense_state  = asp->sts_rqpkt_state;
14599 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
14600 			bcopy(&asp->sts_sensedata, xp->xb_sense_data,
14601 			    min(sizeof (struct scsi_extended_sense),
14602 			    SENSE_LENGTH));
14603 
14604 			/* fail the command */
14605 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14606 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
14607 			sd_return_failed_command(un, bp, EIO);
14608 			goto exit;
14609 		}
14610 
14611 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
14612 		/*
14613 		 * We want to either retry or fail this command, so free
14614 		 * the DMA resources here.  If we retry the command then
14615 		 * the DMA resources will be reallocated in sd_start_cmds().
14616 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
14617 		 * causes the *entire* transfer to start over again from the
14618 		 * beginning of the request, even for PARTIAL chunks that
14619 		 * have already transferred successfully.
14620 		 */
14621 		if ((un->un_f_is_fibre == TRUE) &&
14622 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14623 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
14624 			scsi_dmafree(pktp);
14625 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14626 		}
14627 #endif
14628 
14629 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14630 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
14631 
14632 		sd_handle_auto_request_sense(un, bp, xp, pktp);
14633 		goto exit;
14634 	}
14635 
14636 	/* Next see if this is the REQUEST SENSE pkt for the instance */
14637 	if (pktp->pkt_flags & FLAG_SENSING)  {
14638 		/* This pktp is from the unit's REQUEST_SENSE command */
14639 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14640 		    "sdintr: sd_handle_request_sense\n");
14641 		sd_handle_request_sense(un, bp, xp, pktp);
14642 		goto exit;
14643 	}
14644 
14645 	/*
14646 	 * Check to see if the command successfully completed as requested;
14647 	 * this is the most common case (and also the hot performance path).
14648 	 *
14649 	 * Requirements for successful completion are:
14650 	 * pkt_reason is CMD_CMPLT and packet status is status good.
14651 	 * In addition:
14652 	 * - A residual of zero indicates successful completion no matter what
14653 	 *   the command is.
14654 	 * - If the residual is not zero and the command is not a read or
14655 	 *   write, then it's still defined as successful completion. In other
14656 	 *   words, if the command is a read or write the residual must be
14657 	 *   zero for successful completion.
14658 	 * - If the residual is not zero and the command is a read or
14659 	 *   write, and it's a USCSICMD, then it's still defined as
14660 	 *   successful completion.
14661 	 */
14662 	if ((pktp->pkt_reason == CMD_CMPLT) &&
14663 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
14664 
14665 		/*
14666 		 * Since this command is returned with a good status, we
14667 		 * can reset the count for Sonoma failover.
14668 		 */
14669 		un->un_sonoma_failure_count = 0;
14670 
14671 		/*
14672 		 * Return all USCSI commands on good status
14673 		 */
14674 		if (pktp->pkt_resid == 0) {
14675 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14676 			    "sdintr: returning command for resid == 0\n");
14677 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
14678 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
14679 			SD_UPDATE_B_RESID(bp, pktp);
14680 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14681 			    "sdintr: returning command for resid != 0\n");
14682 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
14683 			SD_UPDATE_B_RESID(bp, pktp);
14684 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14685 				"sdintr: returning uscsi command\n");
14686 		} else {
14687 			goto not_successful;
14688 		}
14689 		sd_return_command(un, bp);
14690 
14691 		/*
14692 		 * Decrement counter to indicate that the callback routine
14693 		 * is done.
14694 		 */
14695 		un->un_in_callback--;
14696 		ASSERT(un->un_in_callback >= 0);
14697 		mutex_exit(SD_MUTEX(un));
14698 
14699 		return;
14700 	}
14701 
14702 not_successful:
14703 
14704 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
14705 	/*
14706 	 * The following is based upon knowledge of the underlying transport
14707 	 * and its use of DMA resources.  This code should be removed when
14708 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
14709 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
14710 	 * and sd_start_cmds().
14711 	 *
14712 	 * Free any DMA resources associated with this command if there
14713 	 * is a chance it could be retried or enqueued for later retry.
14714 	 * If we keep the DMA binding then mpxio cannot reissue the
14715 	 * command on another path whenever a path failure occurs.
14716 	 *
14717 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
14718 	 * causes the *entire* transfer to start over again from the
14719 	 * beginning of the request, even for PARTIAL chunks that
14720 	 * have already transferred successfully.
14721 	 *
14722 	 * This is only done for non-uscsi commands (and also skipped for the
14723 	 * driver's internal RQS command). Also just do this for Fibre Channel
14724 	 * devices as these are the only ones that support mpxio.
14725 	 */
14726 	if ((un->un_f_is_fibre == TRUE) &&
14727 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14728 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
14729 		scsi_dmafree(pktp);
14730 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14731 	}
14732 #endif
14733 
14734 	/*
14735 	 * The command did not successfully complete as requested so check
14736 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
14737 	 * driver command that should not be retried so just return. If
14738 	 * FLAG_DIAGNOSE is not set the error will be processed below.
14739 	 */
14740 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
14741 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14742 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
14743 		/*
14744 		 * Issue a request sense if a check condition caused the error
14745 		 * (we handle the auto request sense case above), otherwise
14746 		 * just fail the command.
14747 		 */
14748 		if ((pktp->pkt_reason == CMD_CMPLT) &&
14749 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
14750 			sd_send_request_sense_command(un, bp, pktp);
14751 		} else {
14752 			sd_return_failed_command(un, bp, EIO);
14753 		}
14754 		goto exit;
14755 	}
14756 
14757 	/*
14758 	 * The command did not successfully complete as requested so process
14759 	 * the error, retry, and/or attempt recovery.
14760 	 */
14761 	switch (pktp->pkt_reason) {
14762 	case CMD_CMPLT:
14763 		switch (SD_GET_PKT_STATUS(pktp)) {
14764 		case STATUS_GOOD:
14765 			/*
14766 			 * The command completed successfully with a non-zero
14767 			 * residual
14768 			 */
14769 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14770 			    "sdintr: STATUS_GOOD \n");
14771 			sd_pkt_status_good(un, bp, xp, pktp);
14772 			break;
14773 
14774 		case STATUS_CHECK:
14775 		case STATUS_TERMINATED:
14776 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14777 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
14778 			sd_pkt_status_check_condition(un, bp, xp, pktp);
14779 			break;
14780 
14781 		case STATUS_BUSY:
14782 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14783 			    "sdintr: STATUS_BUSY\n");
14784 			sd_pkt_status_busy(un, bp, xp, pktp);
14785 			break;
14786 
14787 		case STATUS_RESERVATION_CONFLICT:
14788 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14789 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
14790 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
14791 			break;
14792 
14793 		case STATUS_QFULL:
14794 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14795 			    "sdintr: STATUS_QFULL\n");
14796 			sd_pkt_status_qfull(un, bp, xp, pktp);
14797 			break;
14798 
14799 		case STATUS_MET:
14800 		case STATUS_INTERMEDIATE:
14801 		case STATUS_SCSI2:
14802 		case STATUS_INTERMEDIATE_MET:
14803 		case STATUS_ACA_ACTIVE:
14804 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
14805 			    "Unexpected SCSI status received: 0x%x\n",
14806 			    SD_GET_PKT_STATUS(pktp));
14807 			sd_return_failed_command(un, bp, EIO);
14808 			break;
14809 
14810 		default:
14811 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
14812 			    "Invalid SCSI status received: 0x%x\n",
14813 			    SD_GET_PKT_STATUS(pktp));
14814 			sd_return_failed_command(un, bp, EIO);
14815 			break;
14816 
14817 		}
14818 		break;
14819 
14820 	case CMD_INCOMPLETE:
14821 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14822 		    "sdintr:  CMD_INCOMPLETE\n");
14823 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
14824 		break;
14825 	case CMD_TRAN_ERR:
14826 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14827 		    "sdintr: CMD_TRAN_ERR\n");
14828 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
14829 		break;
14830 	case CMD_RESET:
14831 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14832 		    "sdintr: CMD_RESET \n");
14833 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
14834 		break;
14835 	case CMD_ABORTED:
14836 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14837 		    "sdintr: CMD_ABORTED \n");
14838 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
14839 		break;
14840 	case CMD_TIMEOUT:
14841 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14842 		    "sdintr: CMD_TIMEOUT\n");
14843 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
14844 		break;
14845 	case CMD_UNX_BUS_FREE:
14846 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14847 		    "sdintr: CMD_UNX_BUS_FREE \n");
14848 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
14849 		break;
14850 	case CMD_TAG_REJECT:
14851 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14852 		    "sdintr: CMD_TAG_REJECT\n");
14853 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
14854 		break;
14855 	default:
14856 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14857 		    "sdintr: default\n");
14858 		sd_pkt_reason_default(un, bp, xp, pktp);
14859 		break;
14860 	}
14861 
14862 exit:
14863 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
14864 
14865 	/* Decrement counter to indicate that the callback routine is done. */
14866 	un->un_in_callback--;
14867 	ASSERT(un->un_in_callback >= 0);
14868 
14869 	/*
14870 	 * At this point, the pkt has been dispatched, ie, it is either
14871 	 * being re-tried or has been returned to its caller and should
14872 	 * not be referenced.
14873 	 */
14874 
14875 	mutex_exit(SD_MUTEX(un));
14876 }
14877 
14878 
14879 /*
14880  *    Function: sd_print_incomplete_msg
14881  *
14882  * Description: Prints the error message for a CMD_INCOMPLETE error.
14883  *
14884  *   Arguments: un - ptr to associated softstate for the device.
14885  *		bp - ptr to the buf(9S) for the command.
14886  *		arg - message string ptr
14887  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
14888  *			or SD_NO_RETRY_ISSUED.
14889  *
14890  *     Context: May be called under interrupt context
14891  */
14892 
14893 static void
14894 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
14895 {
14896 	struct scsi_pkt	*pktp;
14897 	char	*msgp;
14898 	char	*cmdp = arg;
14899 
14900 	ASSERT(un != NULL);
14901 	ASSERT(mutex_owned(SD_MUTEX(un)));
14902 	ASSERT(bp != NULL);
14903 	ASSERT(arg != NULL);
14904 	pktp = SD_GET_PKTP(bp);
14905 	ASSERT(pktp != NULL);
14906 
14907 	switch (code) {
14908 	case SD_DELAYED_RETRY_ISSUED:
14909 	case SD_IMMEDIATE_RETRY_ISSUED:
14910 		msgp = "retrying";
14911 		break;
14912 	case SD_NO_RETRY_ISSUED:
14913 	default:
14914 		msgp = "giving up";
14915 		break;
14916 	}
14917 
14918 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
14919 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14920 		    "incomplete %s- %s\n", cmdp, msgp);
14921 	}
14922 }
14923 
14924 
14925 
14926 /*
14927  *    Function: sd_pkt_status_good
14928  *
14929  * Description: Processing for a STATUS_GOOD code in pkt_status.
14930  *
14931  *     Context: May be called under interrupt context
14932  */
14933 
14934 static void
14935 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
14936 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
14937 {
14938 	char	*cmdp;
14939 
14940 	ASSERT(un != NULL);
14941 	ASSERT(mutex_owned(SD_MUTEX(un)));
14942 	ASSERT(bp != NULL);
14943 	ASSERT(xp != NULL);
14944 	ASSERT(pktp != NULL);
14945 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
14946 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
14947 	ASSERT(pktp->pkt_resid != 0);
14948 
14949 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
14950 
14951 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
14952 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
14953 	case SCMD_READ:
14954 		cmdp = "read";
14955 		break;
14956 	case SCMD_WRITE:
14957 		cmdp = "write";
14958 		break;
14959 	default:
14960 		SD_UPDATE_B_RESID(bp, pktp);
14961 		sd_return_command(un, bp);
14962 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
14963 		return;
14964 	}
14965 
14966 	/*
14967 	 * See if we can retry the read/write, preferrably immediately.
14968 	 * If retries are exhaused, then sd_retry_command() will update
14969 	 * the b_resid count.
14970 	 */
14971 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
14972 	    cmdp, EIO, (clock_t)0, NULL);
14973 
14974 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
14975 }
14976 
14977 
14978 
14979 
14980 
14981 /*
14982  *    Function: sd_handle_request_sense
14983  *
14984  * Description: Processing for non-auto Request Sense command.
14985  *
14986  *   Arguments: un - ptr to associated softstate
14987  *		sense_bp - ptr to buf(9S) for the RQS command
14988  *		sense_xp - ptr to the sd_xbuf for the RQS command
14989  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
14990  *
14991  *     Context: May be called under interrupt context
14992  */
14993 
14994 static void
14995 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
14996 	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
14997 {
14998 	struct buf	*cmd_bp;	/* buf for the original command */
14999 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
15000 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
15001 
15002 	ASSERT(un != NULL);
15003 	ASSERT(mutex_owned(SD_MUTEX(un)));
15004 	ASSERT(sense_bp != NULL);
15005 	ASSERT(sense_xp != NULL);
15006 	ASSERT(sense_pktp != NULL);
15007 
15008 	/*
15009 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
15010 	 * RQS command and not the original command.
15011 	 */
15012 	ASSERT(sense_pktp == un->un_rqs_pktp);
15013 	ASSERT(sense_bp   == un->un_rqs_bp);
15014 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
15015 	    (FLAG_SENSING | FLAG_HEAD));
15016 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
15017 	    FLAG_SENSING) == FLAG_SENSING);
15018 
15019 	/* These are the bp, xp, and pktp for the original command */
15020 	cmd_bp = sense_xp->xb_sense_bp;
15021 	cmd_xp = SD_GET_XBUF(cmd_bp);
15022 	cmd_pktp = SD_GET_PKTP(cmd_bp);
15023 
15024 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
15025 		/*
15026 		 * The REQUEST SENSE command failed.  Release the REQUEST
15027 		 * SENSE command for re-use, get back the bp for the original
15028 		 * command, and attempt to re-try the original command if
15029 		 * FLAG_DIAGNOSE is not set in the original packet.
15030 		 */
15031 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
15032 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15033 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
15034 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
15035 			    NULL, NULL, EIO, (clock_t)0, NULL);
15036 			return;
15037 		}
15038 	}
15039 
15040 	/*
15041 	 * Save the relevant sense info into the xp for the original cmd.
15042 	 *
15043 	 * Note: if the request sense failed the state info will be zero
15044 	 * as set in sd_mark_rqs_busy()
15045 	 */
15046 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
15047 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
15048 	cmd_xp->xb_sense_resid  = sense_pktp->pkt_resid;
15049 	bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data, SENSE_LENGTH);
15050 
15051 	/*
15052 	 *  Free up the RQS command....
15053 	 *  NOTE:
15054 	 *	Must do this BEFORE calling sd_validate_sense_data!
15055 	 *	sd_validate_sense_data may return the original command in
15056 	 *	which case the pkt will be freed and the flags can no
15057 	 *	longer be touched.
15058 	 *	SD_MUTEX is held through this process until the command
15059 	 *	is dispatched based upon the sense data, so there are
15060 	 *	no race conditions.
15061 	 */
15062 	(void) sd_mark_rqs_idle(un, sense_xp);
15063 
15064 	/*
15065 	 * For a retryable command see if we have valid sense data, if so then
15066 	 * turn it over to sd_decode_sense() to figure out the right course of
15067 	 * action. Just fail a non-retryable command.
15068 	 */
15069 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15070 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp) ==
15071 		    SD_SENSE_DATA_IS_VALID) {
15072 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
15073 		}
15074 	} else {
15075 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
15076 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15077 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
15078 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15079 		sd_return_failed_command(un, cmd_bp, EIO);
15080 	}
15081 }
15082 
15083 
15084 
15085 
15086 /*
15087  *    Function: sd_handle_auto_request_sense
15088  *
15089  * Description: Processing for auto-request sense information.
15090  *
15091  *   Arguments: un - ptr to associated softstate
15092  *		bp - ptr to buf(9S) for the command
15093  *		xp - ptr to the sd_xbuf for the command
15094  *		pktp - ptr to the scsi_pkt(9S) for the command
15095  *
15096  *     Context: May be called under interrupt context
15097  */
15098 
15099 static void
15100 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
15101 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
15102 {
15103 	struct scsi_arq_status *asp;
15104 
15105 	ASSERT(un != NULL);
15106 	ASSERT(mutex_owned(SD_MUTEX(un)));
15107 	ASSERT(bp != NULL);
15108 	ASSERT(xp != NULL);
15109 	ASSERT(pktp != NULL);
15110 	ASSERT(pktp != un->un_rqs_pktp);
15111 	ASSERT(bp   != un->un_rqs_bp);
15112 
15113 	/*
15114 	 * For auto-request sense, we get a scsi_arq_status back from
15115 	 * the HBA, with the sense data in the sts_sensedata member.
15116 	 * The pkt_scbp of the packet points to this scsi_arq_status.
15117 	 */
15118 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
15119 
15120 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
15121 		/*
15122 		 * The auto REQUEST SENSE failed; see if we can re-try
15123 		 * the original command.
15124 		 */
15125 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15126 		    "auto request sense failed (reason=%s)\n",
15127 		    scsi_rname(asp->sts_rqpkt_reason));
15128 
15129 		sd_reset_target(un, pktp);
15130 
15131 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15132 		    NULL, NULL, EIO, (clock_t)0, NULL);
15133 		return;
15134 	}
15135 
15136 	/* Save the relevant sense info into the xp for the original cmd. */
15137 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
15138 	xp->xb_sense_state  = asp->sts_rqpkt_state;
15139 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
15140 	bcopy(&asp->sts_sensedata, xp->xb_sense_data,
15141 	    min(sizeof (struct scsi_extended_sense), SENSE_LENGTH));
15142 
15143 	/*
15144 	 * See if we have valid sense data, if so then turn it over to
15145 	 * sd_decode_sense() to figure out the right course of action.
15146 	 */
15147 	if (sd_validate_sense_data(un, bp, xp) == SD_SENSE_DATA_IS_VALID) {
15148 		sd_decode_sense(un, bp, xp, pktp);
15149 	}
15150 }
15151 
15152 
15153 /*
15154  *    Function: sd_print_sense_failed_msg
15155  *
15156  * Description: Print log message when RQS has failed.
15157  *
15158  *   Arguments: un - ptr to associated softstate
15159  *		bp - ptr to buf(9S) for the command
15160  *		arg - generic message string ptr
15161  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
15162  *			or SD_NO_RETRY_ISSUED
15163  *
15164  *     Context: May be called from interrupt context
15165  */
15166 
15167 static void
15168 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
15169 	int code)
15170 {
15171 	char	*msgp = arg;
15172 
15173 	ASSERT(un != NULL);
15174 	ASSERT(mutex_owned(SD_MUTEX(un)));
15175 	ASSERT(bp != NULL);
15176 
15177 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
15178 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
15179 	}
15180 }
15181 
15182 
15183 /*
15184  *    Function: sd_validate_sense_data
15185  *
15186  * Description: Check the given sense data for validity.
15187  *		If the sense data is not valid, the command will
15188  *		be either failed or retried!
15189  *
15190  * Return Code: SD_SENSE_DATA_IS_INVALID
15191  *		SD_SENSE_DATA_IS_VALID
15192  *
15193  *     Context: May be called from interrupt context
15194  */
15195 
15196 static int
15197 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp)
15198 {
15199 	struct scsi_extended_sense *esp;
15200 	struct	scsi_pkt *pktp;
15201 	size_t	actual_len;
15202 	char	*msgp = NULL;
15203 
15204 	ASSERT(un != NULL);
15205 	ASSERT(mutex_owned(SD_MUTEX(un)));
15206 	ASSERT(bp != NULL);
15207 	ASSERT(bp != un->un_rqs_bp);
15208 	ASSERT(xp != NULL);
15209 
15210 	pktp = SD_GET_PKTP(bp);
15211 	ASSERT(pktp != NULL);
15212 
15213 	/*
15214 	 * Check the status of the RQS command (auto or manual).
15215 	 */
15216 	switch (xp->xb_sense_status & STATUS_MASK) {
15217 	case STATUS_GOOD:
15218 		break;
15219 
15220 	case STATUS_RESERVATION_CONFLICT:
15221 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
15222 		return (SD_SENSE_DATA_IS_INVALID);
15223 
15224 	case STATUS_BUSY:
15225 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15226 		    "Busy Status on REQUEST SENSE\n");
15227 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
15228 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
15229 		return (SD_SENSE_DATA_IS_INVALID);
15230 
15231 	case STATUS_QFULL:
15232 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15233 		    "QFULL Status on REQUEST SENSE\n");
15234 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
15235 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
15236 		return (SD_SENSE_DATA_IS_INVALID);
15237 
15238 	case STATUS_CHECK:
15239 	case STATUS_TERMINATED:
15240 		msgp = "Check Condition on REQUEST SENSE\n";
15241 		goto sense_failed;
15242 
15243 	default:
15244 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
15245 		goto sense_failed;
15246 	}
15247 
15248 	/*
15249 	 * See if we got the minimum required amount of sense data.
15250 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
15251 	 * or less.
15252 	 */
15253 	actual_len = (int)(SENSE_LENGTH - xp->xb_sense_resid);
15254 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
15255 	    (actual_len == 0)) {
15256 		msgp = "Request Sense couldn't get sense data\n";
15257 		goto sense_failed;
15258 	}
15259 
15260 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
15261 		msgp = "Not enough sense information\n";
15262 		goto sense_failed;
15263 	}
15264 
15265 	/*
15266 	 * We require the extended sense data
15267 	 */
15268 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
15269 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
15270 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
15271 			static char tmp[8];
15272 			static char buf[148];
15273 			char *p = (char *)(xp->xb_sense_data);
15274 			int i;
15275 
15276 			mutex_enter(&sd_sense_mutex);
15277 			(void) strcpy(buf, "undecodable sense information:");
15278 			for (i = 0; i < actual_len; i++) {
15279 				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
15280 				(void) strcpy(&buf[strlen(buf)], tmp);
15281 			}
15282 			i = strlen(buf);
15283 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
15284 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, buf);
15285 			mutex_exit(&sd_sense_mutex);
15286 		}
15287 		/* Note: Legacy behavior, fail the command with no retry */
15288 		sd_return_failed_command(un, bp, EIO);
15289 		return (SD_SENSE_DATA_IS_INVALID);
15290 	}
15291 
15292 	/*
15293 	 * Check that es_code is valid (es_class concatenated with es_code
15294 	 * make up the "response code" field.  es_class will always be 7, so
15295 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
15296 	 * format.
15297 	 */
15298 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
15299 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
15300 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
15301 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
15302 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
15303 		goto sense_failed;
15304 	}
15305 
15306 	return (SD_SENSE_DATA_IS_VALID);
15307 
15308 sense_failed:
15309 	/*
15310 	 * If the request sense failed (for whatever reason), attempt
15311 	 * to retry the original command.
15312 	 */
15313 #if defined(__i386) || defined(__amd64)
15314 	/*
15315 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
15316 	 * sddef.h for Sparc platform, and x86 uses 1 binary
15317 	 * for both SCSI/FC.
15318 	 * The SD_RETRY_DELAY value need to be adjusted here
15319 	 * when SD_RETRY_DELAY change in sddef.h
15320 	 */
15321 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15322 	    sd_print_sense_failed_msg, msgp, EIO,
15323 		un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
15324 #else
15325 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15326 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
15327 #endif
15328 
15329 	return (SD_SENSE_DATA_IS_INVALID);
15330 }
15331 
15332 
15333 
15334 /*
15335  *    Function: sd_decode_sense
15336  *
15337  * Description: Take recovery action(s) when SCSI Sense Data is received.
15338  *
15339  *     Context: Interrupt context.
15340  */
15341 
15342 static void
15343 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
15344 	struct scsi_pkt *pktp)
15345 {
15346 	uint8_t sense_key;
15347 
15348 	ASSERT(un != NULL);
15349 	ASSERT(mutex_owned(SD_MUTEX(un)));
15350 	ASSERT(bp != NULL);
15351 	ASSERT(bp != un->un_rqs_bp);
15352 	ASSERT(xp != NULL);
15353 	ASSERT(pktp != NULL);
15354 
15355 	sense_key = scsi_sense_key(xp->xb_sense_data);
15356 
15357 	switch (sense_key) {
15358 	case KEY_NO_SENSE:
15359 		sd_sense_key_no_sense(un, bp, xp, pktp);
15360 		break;
15361 	case KEY_RECOVERABLE_ERROR:
15362 		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
15363 		    bp, xp, pktp);
15364 		break;
15365 	case KEY_NOT_READY:
15366 		sd_sense_key_not_ready(un, xp->xb_sense_data,
15367 		    bp, xp, pktp);
15368 		break;
15369 	case KEY_MEDIUM_ERROR:
15370 	case KEY_HARDWARE_ERROR:
15371 		sd_sense_key_medium_or_hardware_error(un,
15372 		    xp->xb_sense_data, bp, xp, pktp);
15373 		break;
15374 	case KEY_ILLEGAL_REQUEST:
15375 		sd_sense_key_illegal_request(un, bp, xp, pktp);
15376 		break;
15377 	case KEY_UNIT_ATTENTION:
15378 		sd_sense_key_unit_attention(un, xp->xb_sense_data,
15379 		    bp, xp, pktp);
15380 		break;
15381 	case KEY_WRITE_PROTECT:
15382 	case KEY_VOLUME_OVERFLOW:
15383 	case KEY_MISCOMPARE:
15384 		sd_sense_key_fail_command(un, bp, xp, pktp);
15385 		break;
15386 	case KEY_BLANK_CHECK:
15387 		sd_sense_key_blank_check(un, bp, xp, pktp);
15388 		break;
15389 	case KEY_ABORTED_COMMAND:
15390 		sd_sense_key_aborted_command(un, bp, xp, pktp);
15391 		break;
15392 	case KEY_VENDOR_UNIQUE:
15393 	case KEY_COPY_ABORTED:
15394 	case KEY_EQUAL:
15395 	case KEY_RESERVED:
15396 	default:
15397 		sd_sense_key_default(un, xp->xb_sense_data,
15398 		    bp, xp, pktp);
15399 		break;
15400 	}
15401 }
15402 
15403 
15404 /*
15405  *    Function: sd_dump_memory
15406  *
15407  * Description: Debug logging routine to print the contents of a user provided
15408  *		buffer. The output of the buffer is broken up into 256 byte
15409  *		segments due to a size constraint of the scsi_log.
15410  *		implementation.
15411  *
15412  *   Arguments: un - ptr to softstate
15413  *		comp - component mask
15414  *		title - "title" string to preceed data when printed
15415  *		data - ptr to data block to be printed
15416  *		len - size of data block to be printed
15417  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
15418  *
15419  *     Context: May be called from interrupt context
15420  */
15421 
15422 #define	SD_DUMP_MEMORY_BUF_SIZE	256
15423 
15424 static char *sd_dump_format_string[] = {
15425 		" 0x%02x",
15426 		" %c"
15427 };
15428 
15429 static void
15430 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
15431     int len, int fmt)
15432 {
15433 	int	i, j;
15434 	int	avail_count;
15435 	int	start_offset;
15436 	int	end_offset;
15437 	size_t	entry_len;
15438 	char	*bufp;
15439 	char	*local_buf;
15440 	char	*format_string;
15441 
15442 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
15443 
15444 	/*
15445 	 * In the debug version of the driver, this function is called from a
15446 	 * number of places which are NOPs in the release driver.
15447 	 * The debug driver therefore has additional methods of filtering
15448 	 * debug output.
15449 	 */
15450 #ifdef SDDEBUG
15451 	/*
15452 	 * In the debug version of the driver we can reduce the amount of debug
15453 	 * messages by setting sd_error_level to something other than
15454 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
15455 	 * sd_component_mask.
15456 	 */
15457 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
15458 	    (sd_error_level != SCSI_ERR_ALL)) {
15459 		return;
15460 	}
15461 	if (((sd_component_mask & comp) == 0) ||
15462 	    (sd_error_level != SCSI_ERR_ALL)) {
15463 		return;
15464 	}
15465 #else
15466 	if (sd_error_level != SCSI_ERR_ALL) {
15467 		return;
15468 	}
15469 #endif
15470 
15471 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
15472 	bufp = local_buf;
15473 	/*
15474 	 * Available length is the length of local_buf[], minus the
15475 	 * length of the title string, minus one for the ":", minus
15476 	 * one for the newline, minus one for the NULL terminator.
15477 	 * This gives the #bytes available for holding the printed
15478 	 * values from the given data buffer.
15479 	 */
15480 	if (fmt == SD_LOG_HEX) {
15481 		format_string = sd_dump_format_string[0];
15482 	} else /* SD_LOG_CHAR */ {
15483 		format_string = sd_dump_format_string[1];
15484 	}
15485 	/*
15486 	 * Available count is the number of elements from the given
15487 	 * data buffer that we can fit into the available length.
15488 	 * This is based upon the size of the format string used.
15489 	 * Make one entry and find it's size.
15490 	 */
15491 	(void) sprintf(bufp, format_string, data[0]);
15492 	entry_len = strlen(bufp);
15493 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
15494 
15495 	j = 0;
15496 	while (j < len) {
15497 		bufp = local_buf;
15498 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
15499 		start_offset = j;
15500 
15501 		end_offset = start_offset + avail_count;
15502 
15503 		(void) sprintf(bufp, "%s:", title);
15504 		bufp += strlen(bufp);
15505 		for (i = start_offset; ((i < end_offset) && (j < len));
15506 		    i++, j++) {
15507 			(void) sprintf(bufp, format_string, data[i]);
15508 			bufp += entry_len;
15509 		}
15510 		(void) sprintf(bufp, "\n");
15511 
15512 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
15513 	}
15514 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
15515 }
15516 
15517 /*
15518  *    Function: sd_print_sense_msg
15519  *
15520  * Description: Log a message based upon the given sense data.
15521  *
15522  *   Arguments: un - ptr to associated softstate
15523  *		bp - ptr to buf(9S) for the command
15524  *		arg - ptr to associate sd_sense_info struct
15525  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
15526  *			or SD_NO_RETRY_ISSUED
15527  *
15528  *     Context: May be called from interrupt context
15529  */
15530 
15531 static void
15532 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
15533 {
15534 	struct sd_xbuf	*xp;
15535 	struct scsi_pkt	*pktp;
15536 	uint8_t *sensep;
15537 	daddr_t request_blkno;
15538 	diskaddr_t err_blkno;
15539 	int severity;
15540 	int pfa_flag;
15541 	extern struct scsi_key_strings scsi_cmds[];
15542 
15543 	ASSERT(un != NULL);
15544 	ASSERT(mutex_owned(SD_MUTEX(un)));
15545 	ASSERT(bp != NULL);
15546 	xp = SD_GET_XBUF(bp);
15547 	ASSERT(xp != NULL);
15548 	pktp = SD_GET_PKTP(bp);
15549 	ASSERT(pktp != NULL);
15550 	ASSERT(arg != NULL);
15551 
15552 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
15553 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
15554 
15555 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
15556 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
15557 		severity = SCSI_ERR_RETRYABLE;
15558 	}
15559 
15560 	/* Use absolute block number for the request block number */
15561 	request_blkno = xp->xb_blkno;
15562 
15563 	/*
15564 	 * Now try to get the error block number from the sense data
15565 	 */
15566 	sensep = xp->xb_sense_data;
15567 
15568 	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
15569 		(uint64_t *)&err_blkno)) {
15570 		/*
15571 		 * We retrieved the error block number from the information
15572 		 * portion of the sense data.
15573 		 *
15574 		 * For USCSI commands we are better off using the error
15575 		 * block no. as the requested block no. (This is the best
15576 		 * we can estimate.)
15577 		 */
15578 		if ((SD_IS_BUFIO(xp) == FALSE) &&
15579 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
15580 			request_blkno = err_blkno;
15581 		}
15582 	} else {
15583 		/*
15584 		 * Without the es_valid bit set (for fixed format) or an
15585 		 * information descriptor (for descriptor format) we cannot
15586 		 * be certain of the error blkno, so just use the
15587 		 * request_blkno.
15588 		 */
15589 		err_blkno = (diskaddr_t)request_blkno;
15590 	}
15591 
15592 	/*
15593 	 * The following will log the buffer contents for the release driver
15594 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
15595 	 * level is set to verbose.
15596 	 */
15597 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
15598 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15599 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15600 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
15601 
15602 	if (pfa_flag == FALSE) {
15603 		/* This is normally only set for USCSI */
15604 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
15605 			return;
15606 		}
15607 
15608 		if ((SD_IS_BUFIO(xp) == TRUE) &&
15609 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
15610 		    (severity < sd_error_level))) {
15611 			return;
15612 		}
15613 	}
15614 
15615 	/*
15616 	 * Check for Sonoma Failover and keep a count of how many failed I/O's
15617 	 */
15618 	if ((SD_IS_LSI(un)) &&
15619 	    (scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) &&
15620 	    (scsi_sense_asc(sensep) == 0x94) &&
15621 	    (scsi_sense_ascq(sensep) == 0x01)) {
15622 		un->un_sonoma_failure_count++;
15623 		if (un->un_sonoma_failure_count > 1) {
15624 			return;
15625 		}
15626 	}
15627 
15628 	scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
15629 	    request_blkno, err_blkno, scsi_cmds,
15630 	    (struct scsi_extended_sense *)sensep,
15631 	    un->un_additional_codes, NULL);
15632 }
15633 
15634 /*
15635  *    Function: sd_sense_key_no_sense
15636  *
15637  * Description: Recovery action when sense data was not received.
15638  *
15639  *     Context: May be called from interrupt context
15640  */
15641 
15642 static void
15643 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
15644 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
15645 {
15646 	struct sd_sense_info	si;
15647 
15648 	ASSERT(un != NULL);
15649 	ASSERT(mutex_owned(SD_MUTEX(un)));
15650 	ASSERT(bp != NULL);
15651 	ASSERT(xp != NULL);
15652 	ASSERT(pktp != NULL);
15653 
15654 	si.ssi_severity = SCSI_ERR_FATAL;
15655 	si.ssi_pfa_flag = FALSE;
15656 
15657 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
15658 
15659 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
15660 		&si, EIO, (clock_t)0, NULL);
15661 }
15662 
15663 
15664 /*
15665  *    Function: sd_sense_key_recoverable_error
15666  *
15667  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
15668  *
15669  *     Context: May be called from interrupt context
15670  */
15671 
15672 static void
15673 sd_sense_key_recoverable_error(struct sd_lun *un,
15674 	uint8_t *sense_datap,
15675 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
15676 {
15677 	struct sd_sense_info	si;
15678 	uint8_t asc = scsi_sense_asc(sense_datap);
15679 
15680 	ASSERT(un != NULL);
15681 	ASSERT(mutex_owned(SD_MUTEX(un)));
15682 	ASSERT(bp != NULL);
15683 	ASSERT(xp != NULL);
15684 	ASSERT(pktp != NULL);
15685 
15686 	/*
15687 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
15688 	 */
15689 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
15690 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
15691 		si.ssi_severity = SCSI_ERR_INFO;
15692 		si.ssi_pfa_flag = TRUE;
15693 	} else {
15694 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
15695 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
15696 		si.ssi_severity = SCSI_ERR_RECOVERED;
15697 		si.ssi_pfa_flag = FALSE;
15698 	}
15699 
15700 	if (pktp->pkt_resid == 0) {
15701 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
15702 		sd_return_command(un, bp);
15703 		return;
15704 	}
15705 
15706 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
15707 	    &si, EIO, (clock_t)0, NULL);
15708 }
15709 
15710 
15711 
15712 
15713 /*
15714  *    Function: sd_sense_key_not_ready
15715  *
15716  * Description: Recovery actions for a SCSI "Not Ready" sense key.
15717  *
15718  *     Context: May be called from interrupt context
15719  */
15720 
15721 static void
15722 sd_sense_key_not_ready(struct sd_lun *un,
15723 	uint8_t *sense_datap,
15724 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
15725 {
15726 	struct sd_sense_info	si;
15727 	uint8_t asc = scsi_sense_asc(sense_datap);
15728 	uint8_t ascq = scsi_sense_ascq(sense_datap);
15729 
15730 	ASSERT(un != NULL);
15731 	ASSERT(mutex_owned(SD_MUTEX(un)));
15732 	ASSERT(bp != NULL);
15733 	ASSERT(xp != NULL);
15734 	ASSERT(pktp != NULL);
15735 
15736 	si.ssi_severity = SCSI_ERR_FATAL;
15737 	si.ssi_pfa_flag = FALSE;
15738 
15739 	/*
15740 	 * Update error stats after first NOT READY error. Disks may have
15741 	 * been powered down and may need to be restarted.  For CDROMs,
15742 	 * report NOT READY errors only if media is present.
15743 	 */
15744 	if ((ISCD(un) && (asc == 0x3A)) ||
15745 	    (xp->xb_retry_count > 0)) {
15746 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
15747 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
15748 	}
15749 
15750 	/*
15751 	 * Just fail if the "not ready" retry limit has been reached.
15752 	 */
15753 	if (xp->xb_retry_count >= un->un_notready_retry_count) {
15754 		/* Special check for error message printing for removables. */
15755 		if (un->un_f_has_removable_media && (asc == 0x04) &&
15756 		    (ascq >= 0x04)) {
15757 			si.ssi_severity = SCSI_ERR_ALL;
15758 		}
15759 		goto fail_command;
15760 	}
15761 
15762 	/*
15763 	 * Check the ASC and ASCQ in the sense data as needed, to determine
15764 	 * what to do.
15765 	 */
15766 	switch (asc) {
15767 	case 0x04:	/* LOGICAL UNIT NOT READY */
15768 		/*
15769 		 * disk drives that don't spin up result in a very long delay
15770 		 * in format without warning messages. We will log a message
15771 		 * if the error level is set to verbose.
15772 		 */
15773 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
15774 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15775 			    "logical unit not ready, resetting disk\n");
15776 		}
15777 
15778 		/*
15779 		 * There are different requirements for CDROMs and disks for
15780 		 * the number of retries.  If a CD-ROM is giving this, it is
15781 		 * probably reading TOC and is in the process of getting
15782 		 * ready, so we should keep on trying for a long time to make
15783 		 * sure that all types of media are taken in account (for
15784 		 * some media the drive takes a long time to read TOC).  For
15785 		 * disks we do not want to retry this too many times as this
15786 		 * can cause a long hang in format when the drive refuses to
15787 		 * spin up (a very common failure).
15788 		 */
15789 		switch (ascq) {
15790 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
15791 			/*
15792 			 * Disk drives frequently refuse to spin up which
15793 			 * results in a very long hang in format without
15794 			 * warning messages.
15795 			 *
15796 			 * Note: This code preserves the legacy behavior of
15797 			 * comparing xb_retry_count against zero for fibre
15798 			 * channel targets instead of comparing against the
15799 			 * un_reset_retry_count value.  The reason for this
15800 			 * discrepancy has been so utterly lost beneath the
15801 			 * Sands of Time that even Indiana Jones could not
15802 			 * find it.
15803 			 */
15804 			if (un->un_f_is_fibre == TRUE) {
15805 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
15806 					(xp->xb_retry_count > 0)) &&
15807 					(un->un_startstop_timeid == NULL)) {
15808 					scsi_log(SD_DEVINFO(un), sd_label,
15809 					CE_WARN, "logical unit not ready, "
15810 					"resetting disk\n");
15811 					sd_reset_target(un, pktp);
15812 				}
15813 			} else {
15814 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
15815 					(xp->xb_retry_count >
15816 					un->un_reset_retry_count)) &&
15817 					(un->un_startstop_timeid == NULL)) {
15818 					scsi_log(SD_DEVINFO(un), sd_label,
15819 					CE_WARN, "logical unit not ready, "
15820 					"resetting disk\n");
15821 					sd_reset_target(un, pktp);
15822 				}
15823 			}
15824 			break;
15825 
15826 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
15827 			/*
15828 			 * If the target is in the process of becoming
15829 			 * ready, just proceed with the retry. This can
15830 			 * happen with CD-ROMs that take a long time to
15831 			 * read TOC after a power cycle or reset.
15832 			 */
15833 			goto do_retry;
15834 
15835 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
15836 			break;
15837 
15838 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
15839 			/*
15840 			 * Retries cannot help here so just fail right away.
15841 			 */
15842 			goto fail_command;
15843 
15844 		case 0x88:
15845 			/*
15846 			 * Vendor-unique code for T3/T4: it indicates a
15847 			 * path problem in a mutipathed config, but as far as
15848 			 * the target driver is concerned it equates to a fatal
15849 			 * error, so we should just fail the command right away
15850 			 * (without printing anything to the console). If this
15851 			 * is not a T3/T4, fall thru to the default recovery
15852 			 * action.
15853 			 * T3/T4 is FC only, don't need to check is_fibre
15854 			 */
15855 			if (SD_IS_T3(un) || SD_IS_T4(un)) {
15856 				sd_return_failed_command(un, bp, EIO);
15857 				return;
15858 			}
15859 			/* FALLTHRU */
15860 
15861 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
15862 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
15863 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
15864 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
15865 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
15866 		default:    /* Possible future codes in SCSI spec? */
15867 			/*
15868 			 * For removable-media devices, do not retry if
15869 			 * ASCQ > 2 as these result mostly from USCSI commands
15870 			 * on MMC devices issued to check status of an
15871 			 * operation initiated in immediate mode.  Also for
15872 			 * ASCQ >= 4 do not print console messages as these
15873 			 * mainly represent a user-initiated operation
15874 			 * instead of a system failure.
15875 			 */
15876 			if (un->un_f_has_removable_media) {
15877 				si.ssi_severity = SCSI_ERR_ALL;
15878 				goto fail_command;
15879 			}
15880 			break;
15881 		}
15882 
15883 		/*
15884 		 * As part of our recovery attempt for the NOT READY
15885 		 * condition, we issue a START STOP UNIT command. However
15886 		 * we want to wait for a short delay before attempting this
15887 		 * as there may still be more commands coming back from the
15888 		 * target with the check condition. To do this we use
15889 		 * timeout(9F) to call sd_start_stop_unit_callback() after
15890 		 * the delay interval expires. (sd_start_stop_unit_callback()
15891 		 * dispatches sd_start_stop_unit_task(), which will issue
15892 		 * the actual START STOP UNIT command. The delay interval
15893 		 * is one-half of the delay that we will use to retry the
15894 		 * command that generated the NOT READY condition.
15895 		 *
15896 		 * Note that we could just dispatch sd_start_stop_unit_task()
15897 		 * from here and allow it to sleep for the delay interval,
15898 		 * but then we would be tying up the taskq thread
15899 		 * uncesessarily for the duration of the delay.
15900 		 *
15901 		 * Do not issue the START STOP UNIT if the current command
15902 		 * is already a START STOP UNIT.
15903 		 */
15904 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
15905 			break;
15906 		}
15907 
15908 		/*
15909 		 * Do not schedule the timeout if one is already pending.
15910 		 */
15911 		if (un->un_startstop_timeid != NULL) {
15912 			SD_INFO(SD_LOG_ERROR, un,
15913 			    "sd_sense_key_not_ready: restart already issued to"
15914 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
15915 			    ddi_get_instance(SD_DEVINFO(un)));
15916 			break;
15917 		}
15918 
15919 		/*
15920 		 * Schedule the START STOP UNIT command, then queue the command
15921 		 * for a retry.
15922 		 *
15923 		 * Note: A timeout is not scheduled for this retry because we
15924 		 * want the retry to be serial with the START_STOP_UNIT. The
15925 		 * retry will be started when the START_STOP_UNIT is completed
15926 		 * in sd_start_stop_unit_task.
15927 		 */
15928 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
15929 		    un, SD_BSY_TIMEOUT / 2);
15930 		xp->xb_retry_count++;
15931 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
15932 		return;
15933 
15934 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
15935 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
15936 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15937 			    "unit does not respond to selection\n");
15938 		}
15939 		break;
15940 
15941 	case 0x3A:	/* MEDIUM NOT PRESENT */
15942 		if (sd_error_level >= SCSI_ERR_FATAL) {
15943 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15944 			    "Caddy not inserted in drive\n");
15945 		}
15946 
15947 		sr_ejected(un);
15948 		un->un_mediastate = DKIO_EJECTED;
15949 		/* The state has changed, inform the media watch routines */
15950 		cv_broadcast(&un->un_state_cv);
15951 		/* Just fail if no media is present in the drive. */
15952 		goto fail_command;
15953 
15954 	default:
15955 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
15956 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15957 			    "Unit not Ready. Additional sense code 0x%x\n",
15958 			    asc);
15959 		}
15960 		break;
15961 	}
15962 
15963 do_retry:
15964 
15965 	/*
15966 	 * Retry the command, as some targets may report NOT READY for
15967 	 * several seconds after being reset.
15968 	 */
15969 	xp->xb_retry_count++;
15970 	si.ssi_severity = SCSI_ERR_RETRYABLE;
15971 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
15972 	    &si, EIO, SD_BSY_TIMEOUT, NULL);
15973 
15974 	return;
15975 
15976 fail_command:
15977 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
15978 	sd_return_failed_command(un, bp, EIO);
15979 }
15980 
15981 
15982 
15983 /*
15984  *    Function: sd_sense_key_medium_or_hardware_error
15985  *
15986  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
15987  *		sense key.
15988  *
15989  *     Context: May be called from interrupt context
15990  */
15991 
15992 static void
15993 sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
15994 	uint8_t *sense_datap,
15995 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
15996 {
15997 	struct sd_sense_info	si;
15998 	uint8_t sense_key = scsi_sense_key(sense_datap);
15999 	uint8_t asc = scsi_sense_asc(sense_datap);
16000 
16001 	ASSERT(un != NULL);
16002 	ASSERT(mutex_owned(SD_MUTEX(un)));
16003 	ASSERT(bp != NULL);
16004 	ASSERT(xp != NULL);
16005 	ASSERT(pktp != NULL);
16006 
16007 	si.ssi_severity = SCSI_ERR_FATAL;
16008 	si.ssi_pfa_flag = FALSE;
16009 
16010 	if (sense_key == KEY_MEDIUM_ERROR) {
16011 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
16012 	}
16013 
16014 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16015 
16016 	if ((un->un_reset_retry_count != 0) &&
16017 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
16018 		mutex_exit(SD_MUTEX(un));
16019 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
16020 		if (un->un_f_allow_bus_device_reset == TRUE) {
16021 
16022 			boolean_t try_resetting_target = B_TRUE;
16023 
16024 			/*
16025 			 * We need to be able to handle specific ASC when we are
16026 			 * handling a KEY_HARDWARE_ERROR. In particular
16027 			 * taking the default action of resetting the target may
16028 			 * not be the appropriate way to attempt recovery.
16029 			 * Resetting a target because of a single LUN failure
16030 			 * victimizes all LUNs on that target.
16031 			 *
16032 			 * This is true for the LSI arrays, if an LSI
16033 			 * array controller returns an ASC of 0x84 (LUN Dead) we
16034 			 * should trust it.
16035 			 */
16036 
16037 			if (sense_key == KEY_HARDWARE_ERROR) {
16038 				switch (asc) {
16039 				case 0x84:
16040 					if (SD_IS_LSI(un)) {
16041 						try_resetting_target = B_FALSE;
16042 					}
16043 					break;
16044 				default:
16045 					break;
16046 				}
16047 			}
16048 
16049 			if (try_resetting_target == B_TRUE) {
16050 				int reset_retval = 0;
16051 				if (un->un_f_lun_reset_enabled == TRUE) {
16052 					SD_TRACE(SD_LOG_IO_CORE, un,
16053 					    "sd_sense_key_medium_or_hardware_"
16054 					    "error: issuing RESET_LUN\n");
16055 					reset_retval =
16056 					    scsi_reset(SD_ADDRESS(un),
16057 					    RESET_LUN);
16058 				}
16059 				if (reset_retval == 0) {
16060 					SD_TRACE(SD_LOG_IO_CORE, un,
16061 					    "sd_sense_key_medium_or_hardware_"
16062 					    "error: issuing RESET_TARGET\n");
16063 					(void) scsi_reset(SD_ADDRESS(un),
16064 					    RESET_TARGET);
16065 				}
16066 			}
16067 		}
16068 		mutex_enter(SD_MUTEX(un));
16069 	}
16070 
16071 	/*
16072 	 * This really ought to be a fatal error, but we will retry anyway
16073 	 * as some drives report this as a spurious error.
16074 	 */
16075 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
16076 	    &si, EIO, (clock_t)0, NULL);
16077 }
16078 
16079 
16080 
16081 /*
16082  *    Function: sd_sense_key_illegal_request
16083  *
16084  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
16085  *
16086  *     Context: May be called from interrupt context
16087  */
16088 
16089 static void
16090 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
16091 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16092 {
16093 	struct sd_sense_info	si;
16094 
16095 	ASSERT(un != NULL);
16096 	ASSERT(mutex_owned(SD_MUTEX(un)));
16097 	ASSERT(bp != NULL);
16098 	ASSERT(xp != NULL);
16099 	ASSERT(pktp != NULL);
16100 
16101 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
16102 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
16103 
16104 	si.ssi_severity = SCSI_ERR_INFO;
16105 	si.ssi_pfa_flag = FALSE;
16106 
16107 	/* Pointless to retry if the target thinks it's an illegal request */
16108 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16109 	sd_return_failed_command(un, bp, EIO);
16110 }
16111 
16112 
16113 
16114 
16115 /*
16116  *    Function: sd_sense_key_unit_attention
16117  *
16118  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
16119  *
16120  *     Context: May be called from interrupt context
16121  */
16122 
16123 static void
16124 sd_sense_key_unit_attention(struct sd_lun *un,
16125 	uint8_t *sense_datap,
16126 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
16127 {
16128 	/*
16129 	 * For UNIT ATTENTION we allow retries for one minute. Devices
16130 	 * like Sonoma can return UNIT ATTENTION close to a minute
16131 	 * under certain conditions.
16132 	 */
16133 	int	retry_check_flag = SD_RETRIES_UA;
16134 	boolean_t	kstat_updated = B_FALSE;
16135 	struct	sd_sense_info		si;
16136 	uint8_t asc = scsi_sense_asc(sense_datap);
16137 
16138 	ASSERT(un != NULL);
16139 	ASSERT(mutex_owned(SD_MUTEX(un)));
16140 	ASSERT(bp != NULL);
16141 	ASSERT(xp != NULL);
16142 	ASSERT(pktp != NULL);
16143 
16144 	si.ssi_severity = SCSI_ERR_INFO;
16145 	si.ssi_pfa_flag = FALSE;
16146 
16147 
16148 	switch (asc) {
16149 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
16150 		if (sd_report_pfa != 0) {
16151 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
16152 			si.ssi_pfa_flag = TRUE;
16153 			retry_check_flag = SD_RETRIES_STANDARD;
16154 			goto do_retry;
16155 		}
16156 
16157 		break;
16158 
16159 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
16160 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
16161 			un->un_resvd_status |=
16162 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
16163 		}
16164 #ifdef _LP64
16165 		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
16166 			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
16167 			    un, KM_NOSLEEP) == 0) {
16168 				/*
16169 				 * If we can't dispatch the task we'll just
16170 				 * live without descriptor sense.  We can
16171 				 * try again on the next "unit attention"
16172 				 */
16173 				SD_ERROR(SD_LOG_ERROR, un,
16174 				    "sd_sense_key_unit_attention: "
16175 				    "Could not dispatch "
16176 				    "sd_reenable_dsense_task\n");
16177 			}
16178 		}
16179 #endif /* _LP64 */
16180 		/* FALLTHRU */
16181 
16182 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
16183 		if (!un->un_f_has_removable_media) {
16184 			break;
16185 		}
16186 
16187 		/*
16188 		 * When we get a unit attention from a removable-media device,
16189 		 * it may be in a state that will take a long time to recover
16190 		 * (e.g., from a reset).  Since we are executing in interrupt
16191 		 * context here, we cannot wait around for the device to come
16192 		 * back. So hand this command off to sd_media_change_task()
16193 		 * for deferred processing under taskq thread context. (Note
16194 		 * that the command still may be failed if a problem is
16195 		 * encountered at a later time.)
16196 		 */
16197 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
16198 		    KM_NOSLEEP) == 0) {
16199 			/*
16200 			 * Cannot dispatch the request so fail the command.
16201 			 */
16202 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
16203 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
16204 			si.ssi_severity = SCSI_ERR_FATAL;
16205 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16206 			sd_return_failed_command(un, bp, EIO);
16207 		}
16208 
16209 		/*
16210 		 * If failed to dispatch sd_media_change_task(), we already
16211 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
16212 		 * we should update kstat later if it encounters an error. So,
16213 		 * we update kstat_updated flag here.
16214 		 */
16215 		kstat_updated = B_TRUE;
16216 
16217 		/*
16218 		 * Either the command has been successfully dispatched to a
16219 		 * task Q for retrying, or the dispatch failed. In either case
16220 		 * do NOT retry again by calling sd_retry_command. This sets up
16221 		 * two retries of the same command and when one completes and
16222 		 * frees the resources the other will access freed memory,
16223 		 * a bad thing.
16224 		 */
16225 		return;
16226 
16227 	default:
16228 		break;
16229 	}
16230 
16231 	/*
16232 	 * Update kstat if we haven't done that.
16233 	 */
16234 	if (!kstat_updated) {
16235 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16236 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
16237 	}
16238 
16239 do_retry:
16240 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
16241 	    EIO, SD_UA_RETRY_DELAY, NULL);
16242 }
16243 
16244 
16245 
16246 /*
16247  *    Function: sd_sense_key_fail_command
16248  *
16249  * Description: Use to fail a command when we don't like the sense key that
16250  *		was returned.
16251  *
16252  *     Context: May be called from interrupt context
16253  */
16254 
16255 static void
16256 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
16257 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16258 {
16259 	struct sd_sense_info	si;
16260 
16261 	ASSERT(un != NULL);
16262 	ASSERT(mutex_owned(SD_MUTEX(un)));
16263 	ASSERT(bp != NULL);
16264 	ASSERT(xp != NULL);
16265 	ASSERT(pktp != NULL);
16266 
16267 	si.ssi_severity = SCSI_ERR_FATAL;
16268 	si.ssi_pfa_flag = FALSE;
16269 
16270 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16271 	sd_return_failed_command(un, bp, EIO);
16272 }
16273 
16274 
16275 
16276 /*
16277  *    Function: sd_sense_key_blank_check
16278  *
16279  * Description: Recovery actions for a SCSI "Blank Check" sense key.
16280  *		Has no monetary connotation.
16281  *
16282  *     Context: May be called from interrupt context
16283  */
16284 
16285 static void
16286 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
16287 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16288 {
16289 	struct sd_sense_info	si;
16290 
16291 	ASSERT(un != NULL);
16292 	ASSERT(mutex_owned(SD_MUTEX(un)));
16293 	ASSERT(bp != NULL);
16294 	ASSERT(xp != NULL);
16295 	ASSERT(pktp != NULL);
16296 
16297 	/*
16298 	 * Blank check is not fatal for removable devices, therefore
16299 	 * it does not require a console message.
16300 	 */
16301 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
16302 	    SCSI_ERR_FATAL;
16303 	si.ssi_pfa_flag = FALSE;
16304 
16305 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16306 	sd_return_failed_command(un, bp, EIO);
16307 }
16308 
16309 
16310 
16311 
16312 /*
16313  *    Function: sd_sense_key_aborted_command
16314  *
16315  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
16316  *
16317  *     Context: May be called from interrupt context
16318  */
16319 
16320 static void
16321 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
16322 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16323 {
16324 	struct sd_sense_info	si;
16325 
16326 	ASSERT(un != NULL);
16327 	ASSERT(mutex_owned(SD_MUTEX(un)));
16328 	ASSERT(bp != NULL);
16329 	ASSERT(xp != NULL);
16330 	ASSERT(pktp != NULL);
16331 
16332 	si.ssi_severity = SCSI_ERR_FATAL;
16333 	si.ssi_pfa_flag = FALSE;
16334 
16335 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16336 
16337 	/*
16338 	 * This really ought to be a fatal error, but we will retry anyway
16339 	 * as some drives report this as a spurious error.
16340 	 */
16341 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
16342 	    &si, EIO, (clock_t)0, NULL);
16343 }
16344 
16345 
16346 
16347 /*
16348  *    Function: sd_sense_key_default
16349  *
16350  * Description: Default recovery action for several SCSI sense keys (basically
16351  *		attempts a retry).
16352  *
16353  *     Context: May be called from interrupt context
16354  */
16355 
16356 static void
16357 sd_sense_key_default(struct sd_lun *un,
16358 	uint8_t *sense_datap,
16359 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
16360 {
16361 	struct sd_sense_info	si;
16362 	uint8_t sense_key = scsi_sense_key(sense_datap);
16363 
16364 	ASSERT(un != NULL);
16365 	ASSERT(mutex_owned(SD_MUTEX(un)));
16366 	ASSERT(bp != NULL);
16367 	ASSERT(xp != NULL);
16368 	ASSERT(pktp != NULL);
16369 
16370 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16371 
16372 	/*
16373 	 * Undecoded sense key.	Attempt retries and hope that will fix
16374 	 * the problem.  Otherwise, we're dead.
16375 	 */
16376 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16377 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16378 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
16379 	}
16380 
16381 	si.ssi_severity = SCSI_ERR_FATAL;
16382 	si.ssi_pfa_flag = FALSE;
16383 
16384 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
16385 	    &si, EIO, (clock_t)0, NULL);
16386 }
16387 
16388 
16389 
16390 /*
16391  *    Function: sd_print_retry_msg
16392  *
16393  * Description: Print a message indicating the retry action being taken.
16394  *
16395  *   Arguments: un - ptr to associated softstate
16396  *		bp - ptr to buf(9S) for the command
16397  *		arg - not used.
16398  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16399  *			or SD_NO_RETRY_ISSUED
16400  *
16401  *     Context: May be called from interrupt context
16402  */
16403 /* ARGSUSED */
16404 static void
16405 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
16406 {
16407 	struct sd_xbuf	*xp;
16408 	struct scsi_pkt *pktp;
16409 	char *reasonp;
16410 	char *msgp;
16411 
16412 	ASSERT(un != NULL);
16413 	ASSERT(mutex_owned(SD_MUTEX(un)));
16414 	ASSERT(bp != NULL);
16415 	pktp = SD_GET_PKTP(bp);
16416 	ASSERT(pktp != NULL);
16417 	xp = SD_GET_XBUF(bp);
16418 	ASSERT(xp != NULL);
16419 
16420 	ASSERT(!mutex_owned(&un->un_pm_mutex));
16421 	mutex_enter(&un->un_pm_mutex);
16422 	if ((un->un_state == SD_STATE_SUSPENDED) ||
16423 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
16424 	    (pktp->pkt_flags & FLAG_SILENT)) {
16425 		mutex_exit(&un->un_pm_mutex);
16426 		goto update_pkt_reason;
16427 	}
16428 	mutex_exit(&un->un_pm_mutex);
16429 
16430 	/*
16431 	 * Suppress messages if they are all the same pkt_reason; with
16432 	 * TQ, many (up to 256) are returned with the same pkt_reason.
16433 	 * If we are in panic, then suppress the retry messages.
16434 	 */
16435 	switch (flag) {
16436 	case SD_NO_RETRY_ISSUED:
16437 		msgp = "giving up";
16438 		break;
16439 	case SD_IMMEDIATE_RETRY_ISSUED:
16440 	case SD_DELAYED_RETRY_ISSUED:
16441 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
16442 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
16443 		    (sd_error_level != SCSI_ERR_ALL))) {
16444 			return;
16445 		}
16446 		msgp = "retrying command";
16447 		break;
16448 	default:
16449 		goto update_pkt_reason;
16450 	}
16451 
16452 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
16453 	    scsi_rname(pktp->pkt_reason));
16454 
16455 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16456 	    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
16457 
16458 update_pkt_reason:
16459 	/*
16460 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
16461 	 * This is to prevent multiple console messages for the same failure
16462 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
16463 	 * when the command is retried successfully because there still may be
16464 	 * more commands coming back with the same value of pktp->pkt_reason.
16465 	 */
16466 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
16467 		un->un_last_pkt_reason = pktp->pkt_reason;
16468 	}
16469 }
16470 
16471 
16472 /*
16473  *    Function: sd_print_cmd_incomplete_msg
16474  *
16475  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
16476  *
16477  *   Arguments: un - ptr to associated softstate
16478  *		bp - ptr to buf(9S) for the command
16479  *		arg - passed to sd_print_retry_msg()
16480  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16481  *			or SD_NO_RETRY_ISSUED
16482  *
16483  *     Context: May be called from interrupt context
16484  */
16485 
16486 static void
16487 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
16488 	int code)
16489 {
16490 	dev_info_t	*dip;
16491 
16492 	ASSERT(un != NULL);
16493 	ASSERT(mutex_owned(SD_MUTEX(un)));
16494 	ASSERT(bp != NULL);
16495 
16496 	switch (code) {
16497 	case SD_NO_RETRY_ISSUED:
16498 		/* Command was failed. Someone turned off this target? */
16499 		if (un->un_state != SD_STATE_OFFLINE) {
16500 			/*
16501 			 * Suppress message if we are detaching and
16502 			 * device has been disconnected
16503 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
16504 			 * private interface and not part of the DDI
16505 			 */
16506 			dip = un->un_sd->sd_dev;
16507 			if (!(DEVI_IS_DETACHING(dip) &&
16508 			    DEVI_IS_DEVICE_REMOVED(dip))) {
16509 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16510 				"disk not responding to selection\n");
16511 			}
16512 			New_state(un, SD_STATE_OFFLINE);
16513 		}
16514 		break;
16515 
16516 	case SD_DELAYED_RETRY_ISSUED:
16517 	case SD_IMMEDIATE_RETRY_ISSUED:
16518 	default:
16519 		/* Command was successfully queued for retry */
16520 		sd_print_retry_msg(un, bp, arg, code);
16521 		break;
16522 	}
16523 }
16524 
16525 
16526 /*
16527  *    Function: sd_pkt_reason_cmd_incomplete
16528  *
16529  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
16530  *
16531  *     Context: May be called from interrupt context
16532  */
16533 
16534 static void
16535 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
16536 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16537 {
16538 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
16539 
16540 	ASSERT(un != NULL);
16541 	ASSERT(mutex_owned(SD_MUTEX(un)));
16542 	ASSERT(bp != NULL);
16543 	ASSERT(xp != NULL);
16544 	ASSERT(pktp != NULL);
16545 
16546 	/* Do not do a reset if selection did not complete */
16547 	/* Note: Should this not just check the bit? */
16548 	if (pktp->pkt_state != STATE_GOT_BUS) {
16549 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
16550 		sd_reset_target(un, pktp);
16551 	}
16552 
16553 	/*
16554 	 * If the target was not successfully selected, then set
16555 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
16556 	 * with the target, and further retries and/or commands are
16557 	 * likely to take a long time.
16558 	 */
16559 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
16560 		flag |= SD_RETRIES_FAILFAST;
16561 	}
16562 
16563 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16564 
16565 	sd_retry_command(un, bp, flag,
16566 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16567 }
16568 
16569 
16570 
16571 /*
16572  *    Function: sd_pkt_reason_cmd_tran_err
16573  *
16574  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
16575  *
16576  *     Context: May be called from interrupt context
16577  */
16578 
16579 static void
16580 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
16581 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16582 {
16583 	ASSERT(un != NULL);
16584 	ASSERT(mutex_owned(SD_MUTEX(un)));
16585 	ASSERT(bp != NULL);
16586 	ASSERT(xp != NULL);
16587 	ASSERT(pktp != NULL);
16588 
16589 	/*
16590 	 * Do not reset if we got a parity error, or if
16591 	 * selection did not complete.
16592 	 */
16593 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16594 	/* Note: Should this not just check the bit for pkt_state? */
16595 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
16596 	    (pktp->pkt_state != STATE_GOT_BUS)) {
16597 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
16598 		sd_reset_target(un, pktp);
16599 	}
16600 
16601 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16602 
16603 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
16604 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16605 }
16606 
16607 
16608 
16609 /*
16610  *    Function: sd_pkt_reason_cmd_reset
16611  *
16612  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
16613  *
16614  *     Context: May be called from interrupt context
16615  */
16616 
16617 static void
16618 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
16619 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16620 {
16621 	ASSERT(un != NULL);
16622 	ASSERT(mutex_owned(SD_MUTEX(un)));
16623 	ASSERT(bp != NULL);
16624 	ASSERT(xp != NULL);
16625 	ASSERT(pktp != NULL);
16626 
16627 	/* The target may still be running the command, so try to reset. */
16628 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16629 	sd_reset_target(un, pktp);
16630 
16631 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16632 
16633 	/*
16634 	 * If pkt_reason is CMD_RESET chances are that this pkt got
16635 	 * reset because another target on this bus caused it. The target
16636 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
16637 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
16638 	 */
16639 
16640 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
16641 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16642 }
16643 
16644 
16645 
16646 
16647 /*
16648  *    Function: sd_pkt_reason_cmd_aborted
16649  *
16650  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
16651  *
16652  *     Context: May be called from interrupt context
16653  */
16654 
16655 static void
16656 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
16657 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16658 {
16659 	ASSERT(un != NULL);
16660 	ASSERT(mutex_owned(SD_MUTEX(un)));
16661 	ASSERT(bp != NULL);
16662 	ASSERT(xp != NULL);
16663 	ASSERT(pktp != NULL);
16664 
16665 	/* The target may still be running the command, so try to reset. */
16666 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16667 	sd_reset_target(un, pktp);
16668 
16669 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16670 
16671 	/*
16672 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
16673 	 * aborted because another target on this bus caused it. The target
16674 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
16675 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
16676 	 */
16677 
16678 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
16679 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16680 }
16681 
16682 
16683 
16684 /*
16685  *    Function: sd_pkt_reason_cmd_timeout
16686  *
16687  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
16688  *
16689  *     Context: May be called from interrupt context
16690  */
16691 
16692 static void
16693 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
16694 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16695 {
16696 	ASSERT(un != NULL);
16697 	ASSERT(mutex_owned(SD_MUTEX(un)));
16698 	ASSERT(bp != NULL);
16699 	ASSERT(xp != NULL);
16700 	ASSERT(pktp != NULL);
16701 
16702 
16703 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16704 	sd_reset_target(un, pktp);
16705 
16706 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16707 
16708 	/*
16709 	 * A command timeout indicates that we could not establish
16710 	 * communication with the target, so set SD_RETRIES_FAILFAST
16711 	 * as further retries/commands are likely to take a long time.
16712 	 */
16713 	sd_retry_command(un, bp,
16714 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
16715 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16716 }
16717 
16718 
16719 
16720 /*
16721  *    Function: sd_pkt_reason_cmd_unx_bus_free
16722  *
16723  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
16724  *
16725  *     Context: May be called from interrupt context
16726  */
16727 
16728 static void
16729 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
16730 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16731 {
16732 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
16733 
16734 	ASSERT(un != NULL);
16735 	ASSERT(mutex_owned(SD_MUTEX(un)));
16736 	ASSERT(bp != NULL);
16737 	ASSERT(xp != NULL);
16738 	ASSERT(pktp != NULL);
16739 
16740 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16741 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16742 
16743 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
16744 	    sd_print_retry_msg : NULL;
16745 
16746 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
16747 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16748 }
16749 
16750 
16751 /*
16752  *    Function: sd_pkt_reason_cmd_tag_reject
16753  *
16754  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
16755  *
16756  *     Context: May be called from interrupt context
16757  */
16758 
16759 static void
16760 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
16761 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16762 {
16763 	ASSERT(un != NULL);
16764 	ASSERT(mutex_owned(SD_MUTEX(un)));
16765 	ASSERT(bp != NULL);
16766 	ASSERT(xp != NULL);
16767 	ASSERT(pktp != NULL);
16768 
16769 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16770 	pktp->pkt_flags = 0;
16771 	un->un_tagflags = 0;
16772 	if (un->un_f_opt_queueing == TRUE) {
16773 		un->un_throttle = min(un->un_throttle, 3);
16774 	} else {
16775 		un->un_throttle = 1;
16776 	}
16777 	mutex_exit(SD_MUTEX(un));
16778 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
16779 	mutex_enter(SD_MUTEX(un));
16780 
16781 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16782 
16783 	/* Legacy behavior not to check retry counts here. */
16784 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
16785 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16786 }
16787 
16788 
16789 /*
16790  *    Function: sd_pkt_reason_default
16791  *
16792  * Description: Default recovery actions for SCSA pkt_reason values that
16793  *		do not have more explicit recovery actions.
16794  *
16795  *     Context: May be called from interrupt context
16796  */
16797 
16798 static void
16799 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
16800 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16801 {
16802 	ASSERT(un != NULL);
16803 	ASSERT(mutex_owned(SD_MUTEX(un)));
16804 	ASSERT(bp != NULL);
16805 	ASSERT(xp != NULL);
16806 	ASSERT(pktp != NULL);
16807 
16808 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16809 	sd_reset_target(un, pktp);
16810 
16811 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16812 
16813 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
16814 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16815 }
16816 
16817 
16818 
16819 /*
16820  *    Function: sd_pkt_status_check_condition
16821  *
16822  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
16823  *
16824  *     Context: May be called from interrupt context
16825  */
16826 
16827 static void
16828 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
16829 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16830 {
16831 	ASSERT(un != NULL);
16832 	ASSERT(mutex_owned(SD_MUTEX(un)));
16833 	ASSERT(bp != NULL);
16834 	ASSERT(xp != NULL);
16835 	ASSERT(pktp != NULL);
16836 
16837 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
16838 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
16839 
16840 	/*
16841 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
16842 	 * command will be retried after the request sense). Otherwise, retry
16843 	 * the command. Note: we are issuing the request sense even though the
16844 	 * retry limit may have been reached for the failed command.
16845 	 */
16846 	if (un->un_f_arq_enabled == FALSE) {
16847 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
16848 		    "no ARQ, sending request sense command\n");
16849 		sd_send_request_sense_command(un, bp, pktp);
16850 	} else {
16851 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
16852 		    "ARQ,retrying request sense command\n");
16853 #if defined(__i386) || defined(__amd64)
16854 		/*
16855 		 * The SD_RETRY_DELAY value need to be adjusted here
16856 		 * when SD_RETRY_DELAY change in sddef.h
16857 		 */
16858 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
16859 			un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
16860 			NULL);
16861 #else
16862 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
16863 		    EIO, SD_RETRY_DELAY, NULL);
16864 #endif
16865 	}
16866 
16867 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
16868 }
16869 
16870 
16871 /*
16872  *    Function: sd_pkt_status_busy
16873  *
16874  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
16875  *
16876  *     Context: May be called from interrupt context
16877  */
16878 
16879 static void
16880 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
16881 	struct scsi_pkt *pktp)
16882 {
16883 	ASSERT(un != NULL);
16884 	ASSERT(mutex_owned(SD_MUTEX(un)));
16885 	ASSERT(bp != NULL);
16886 	ASSERT(xp != NULL);
16887 	ASSERT(pktp != NULL);
16888 
16889 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16890 	    "sd_pkt_status_busy: entry\n");
16891 
16892 	/* If retries are exhausted, just fail the command. */
16893 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
16894 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16895 		    "device busy too long\n");
16896 		sd_return_failed_command(un, bp, EIO);
16897 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16898 		    "sd_pkt_status_busy: exit\n");
16899 		return;
16900 	}
16901 	xp->xb_retry_count++;
16902 
16903 	/*
16904 	 * Try to reset the target. However, we do not want to perform
16905 	 * more than one reset if the device continues to fail. The reset
16906 	 * will be performed when the retry count reaches the reset
16907 	 * threshold.  This threshold should be set such that at least
16908 	 * one retry is issued before the reset is performed.
16909 	 */
16910 	if (xp->xb_retry_count ==
16911 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
16912 		int rval = 0;
16913 		mutex_exit(SD_MUTEX(un));
16914 		if (un->un_f_allow_bus_device_reset == TRUE) {
16915 			/*
16916 			 * First try to reset the LUN; if we cannot then
16917 			 * try to reset the target.
16918 			 */
16919 			if (un->un_f_lun_reset_enabled == TRUE) {
16920 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16921 				    "sd_pkt_status_busy: RESET_LUN\n");
16922 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
16923 			}
16924 			if (rval == 0) {
16925 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16926 				    "sd_pkt_status_busy: RESET_TARGET\n");
16927 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
16928 			}
16929 		}
16930 		if (rval == 0) {
16931 			/*
16932 			 * If the RESET_LUN and/or RESET_TARGET failed,
16933 			 * try RESET_ALL
16934 			 */
16935 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16936 			    "sd_pkt_status_busy: RESET_ALL\n");
16937 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
16938 		}
16939 		mutex_enter(SD_MUTEX(un));
16940 		if (rval == 0) {
16941 			/*
16942 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
16943 			 * At this point we give up & fail the command.
16944 			 */
16945 			sd_return_failed_command(un, bp, EIO);
16946 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16947 			    "sd_pkt_status_busy: exit (failed cmd)\n");
16948 			return;
16949 		}
16950 	}
16951 
16952 	/*
16953 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
16954 	 * we have already checked the retry counts above.
16955 	 */
16956 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
16957 	    EIO, SD_BSY_TIMEOUT, NULL);
16958 
16959 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16960 	    "sd_pkt_status_busy: exit\n");
16961 }
16962 
16963 
16964 /*
16965  *    Function: sd_pkt_status_reservation_conflict
16966  *
16967  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
16968  *		command status.
16969  *
16970  *     Context: May be called from interrupt context
16971  */
16972 
16973 static void
16974 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
16975 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16976 {
16977 	ASSERT(un != NULL);
16978 	ASSERT(mutex_owned(SD_MUTEX(un)));
16979 	ASSERT(bp != NULL);
16980 	ASSERT(xp != NULL);
16981 	ASSERT(pktp != NULL);
16982 
16983 	/*
16984 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
16985 	 * conflict could be due to various reasons like incorrect keys, not
16986 	 * registered or not reserved etc. So, we return EACCES to the caller.
16987 	 */
16988 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
16989 		int cmd = SD_GET_PKT_OPCODE(pktp);
16990 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
16991 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
16992 			sd_return_failed_command(un, bp, EACCES);
16993 			return;
16994 		}
16995 	}
16996 
16997 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
16998 
16999 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
17000 		if (sd_failfast_enable != 0) {
17001 			/* By definition, we must panic here.... */
17002 			sd_panic_for_res_conflict(un);
17003 			/*NOTREACHED*/
17004 		}
17005 		SD_ERROR(SD_LOG_IO, un,
17006 		    "sd_handle_resv_conflict: Disk Reserved\n");
17007 		sd_return_failed_command(un, bp, EACCES);
17008 		return;
17009 	}
17010 
17011 	/*
17012 	 * 1147670: retry only if sd_retry_on_reservation_conflict
17013 	 * property is set (default is 1). Retries will not succeed
17014 	 * on a disk reserved by another initiator. HA systems
17015 	 * may reset this via sd.conf to avoid these retries.
17016 	 *
17017 	 * Note: The legacy return code for this failure is EIO, however EACCES
17018 	 * seems more appropriate for a reservation conflict.
17019 	 */
17020 	if (sd_retry_on_reservation_conflict == 0) {
17021 		SD_ERROR(SD_LOG_IO, un,
17022 		    "sd_handle_resv_conflict: Device Reserved\n");
17023 		sd_return_failed_command(un, bp, EIO);
17024 		return;
17025 	}
17026 
17027 	/*
17028 	 * Retry the command if we can.
17029 	 *
17030 	 * Note: The legacy return code for this failure is EIO, however EACCES
17031 	 * seems more appropriate for a reservation conflict.
17032 	 */
17033 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
17034 	    (clock_t)2, NULL);
17035 }
17036 
17037 
17038 
17039 /*
17040  *    Function: sd_pkt_status_qfull
17041  *
17042  * Description: Handle a QUEUE FULL condition from the target.  This can
17043  *		occur if the HBA does not handle the queue full condition.
17044  *		(Basically this means third-party HBAs as Sun HBAs will
17045  *		handle the queue full condition.)  Note that if there are
17046  *		some commands already in the transport, then the queue full
17047  *		has occurred because the queue for this nexus is actually
17048  *		full. If there are no commands in the transport, then the
17049  *		queue full is resulting from some other initiator or lun
17050  *		consuming all the resources at the target.
17051  *
17052  *     Context: May be called from interrupt context
17053  */
17054 
17055 static void
17056 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
17057 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17058 {
17059 	ASSERT(un != NULL);
17060 	ASSERT(mutex_owned(SD_MUTEX(un)));
17061 	ASSERT(bp != NULL);
17062 	ASSERT(xp != NULL);
17063 	ASSERT(pktp != NULL);
17064 
17065 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17066 	    "sd_pkt_status_qfull: entry\n");
17067 
17068 	/*
17069 	 * Just lower the QFULL throttle and retry the command.  Note that
17070 	 * we do not limit the number of retries here.
17071 	 */
17072 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
17073 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
17074 	    SD_RESTART_TIMEOUT, NULL);
17075 
17076 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17077 	    "sd_pkt_status_qfull: exit\n");
17078 }
17079 
17080 
17081 /*
17082  *    Function: sd_reset_target
17083  *
17084  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
17085  *		RESET_TARGET, or RESET_ALL.
17086  *
17087  *     Context: May be called under interrupt context.
17088  */
17089 
17090 static void
17091 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
17092 {
17093 	int rval = 0;
17094 
17095 	ASSERT(un != NULL);
17096 	ASSERT(mutex_owned(SD_MUTEX(un)));
17097 	ASSERT(pktp != NULL);
17098 
17099 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
17100 
17101 	/*
17102 	 * No need to reset if the transport layer has already done so.
17103 	 */
17104 	if ((pktp->pkt_statistics &
17105 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
17106 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17107 		    "sd_reset_target: no reset\n");
17108 		return;
17109 	}
17110 
17111 	mutex_exit(SD_MUTEX(un));
17112 
17113 	if (un->un_f_allow_bus_device_reset == TRUE) {
17114 		if (un->un_f_lun_reset_enabled == TRUE) {
17115 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17116 			    "sd_reset_target: RESET_LUN\n");
17117 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
17118 		}
17119 		if (rval == 0) {
17120 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17121 			    "sd_reset_target: RESET_TARGET\n");
17122 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
17123 		}
17124 	}
17125 
17126 	if (rval == 0) {
17127 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17128 		    "sd_reset_target: RESET_ALL\n");
17129 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
17130 	}
17131 
17132 	mutex_enter(SD_MUTEX(un));
17133 
17134 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
17135 }
17136 
17137 
17138 /*
17139  *    Function: sd_media_change_task
17140  *
17141  * Description: Recovery action for CDROM to become available.
17142  *
17143  *     Context: Executes in a taskq() thread context
17144  */
17145 
17146 static void
17147 sd_media_change_task(void *arg)
17148 {
17149 	struct	scsi_pkt	*pktp = arg;
17150 	struct	sd_lun		*un;
17151 	struct	buf		*bp;
17152 	struct	sd_xbuf		*xp;
17153 	int	err		= 0;
17154 	int	retry_count	= 0;
17155 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
17156 	struct	sd_sense_info	si;
17157 
17158 	ASSERT(pktp != NULL);
17159 	bp = (struct buf *)pktp->pkt_private;
17160 	ASSERT(bp != NULL);
17161 	xp = SD_GET_XBUF(bp);
17162 	ASSERT(xp != NULL);
17163 	un = SD_GET_UN(bp);
17164 	ASSERT(un != NULL);
17165 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17166 	ASSERT(un->un_f_monitor_media_state);
17167 
17168 	si.ssi_severity = SCSI_ERR_INFO;
17169 	si.ssi_pfa_flag = FALSE;
17170 
17171 	/*
17172 	 * When a reset is issued on a CDROM, it takes a long time to
17173 	 * recover. First few attempts to read capacity and other things
17174 	 * related to handling unit attention fail (with a ASC 0x4 and
17175 	 * ASCQ 0x1). In that case we want to do enough retries and we want
17176 	 * to limit the retries in other cases of genuine failures like
17177 	 * no media in drive.
17178 	 */
17179 	while (retry_count++ < retry_limit) {
17180 		if ((err = sd_handle_mchange(un)) == 0) {
17181 			break;
17182 		}
17183 		if (err == EAGAIN) {
17184 			retry_limit = SD_UNIT_ATTENTION_RETRY;
17185 		}
17186 		/* Sleep for 0.5 sec. & try again */
17187 		delay(drv_usectohz(500000));
17188 	}
17189 
17190 	/*
17191 	 * Dispatch (retry or fail) the original command here,
17192 	 * along with appropriate console messages....
17193 	 *
17194 	 * Must grab the mutex before calling sd_retry_command,
17195 	 * sd_print_sense_msg and sd_return_failed_command.
17196 	 */
17197 	mutex_enter(SD_MUTEX(un));
17198 	if (err != SD_CMD_SUCCESS) {
17199 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17200 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17201 		si.ssi_severity = SCSI_ERR_FATAL;
17202 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17203 		sd_return_failed_command(un, bp, EIO);
17204 	} else {
17205 		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17206 		    &si, EIO, (clock_t)0, NULL);
17207 	}
17208 	mutex_exit(SD_MUTEX(un));
17209 }
17210 
17211 
17212 
17213 /*
17214  *    Function: sd_handle_mchange
17215  *
17216  * Description: Perform geometry validation & other recovery when CDROM
17217  *		has been removed from drive.
17218  *
17219  * Return Code: 0 for success
17220  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
17221  *		sd_send_scsi_READ_CAPACITY()
17222  *
17223  *     Context: Executes in a taskq() thread context
17224  */
17225 
17226 static int
17227 sd_handle_mchange(struct sd_lun *un)
17228 {
17229 	uint64_t	capacity;
17230 	uint32_t	lbasize;
17231 	int		rval;
17232 
17233 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17234 	ASSERT(un->un_f_monitor_media_state);
17235 
17236 	if ((rval = sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
17237 	    SD_PATH_DIRECT_PRIORITY)) != 0) {
17238 		return (rval);
17239 	}
17240 
17241 	mutex_enter(SD_MUTEX(un));
17242 	sd_update_block_info(un, lbasize, capacity);
17243 
17244 	if (un->un_errstats != NULL) {
17245 		struct	sd_errstats *stp =
17246 		    (struct sd_errstats *)un->un_errstats->ks_data;
17247 		stp->sd_capacity.value.ui64 = (uint64_t)
17248 		    ((uint64_t)un->un_blockcount *
17249 		    (uint64_t)un->un_tgt_blocksize);
17250 	}
17251 
17252 
17253 	/*
17254 	 * Check if the media in the device is writable or not
17255 	 */
17256 	if (ISCD(un))
17257 		sd_check_for_writable_cd(un, SD_PATH_DIRECT_PRIORITY);
17258 
17259 	/*
17260 	 * Note: Maybe let the strategy/partitioning chain worry about getting
17261 	 * valid geometry.
17262 	 */
17263 	mutex_exit(SD_MUTEX(un));
17264 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
17265 
17266 
17267 	if (cmlb_validate(un->un_cmlbhandle, 0,
17268 	    (void *)SD_PATH_DIRECT_PRIORITY) != 0) {
17269 		return (EIO);
17270 	} else {
17271 		if (un->un_f_pkstats_enabled) {
17272 			sd_set_pstats(un);
17273 			SD_TRACE(SD_LOG_IO_PARTITION, un,
17274 			    "sd_handle_mchange: un:0x%p pstats created and "
17275 			    "set\n", un);
17276 		}
17277 	}
17278 
17279 
17280 	/*
17281 	 * Try to lock the door
17282 	 */
17283 	return (sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
17284 	    SD_PATH_DIRECT_PRIORITY));
17285 }
17286 
17287 
17288 /*
17289  *    Function: sd_send_scsi_DOORLOCK
17290  *
17291  * Description: Issue the scsi DOOR LOCK command
17292  *
17293  *   Arguments: un    - pointer to driver soft state (unit) structure for
17294  *			this target.
17295  *		flag  - SD_REMOVAL_ALLOW
17296  *			SD_REMOVAL_PREVENT
17297  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17298  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17299  *			to use the USCSI "direct" chain and bypass the normal
17300  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
17301  *			command is issued as part of an error recovery action.
17302  *
17303  * Return Code: 0   - Success
17304  *		errno return code from sd_send_scsi_cmd()
17305  *
17306  *     Context: Can sleep.
17307  */
17308 
17309 static int
17310 sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag)
17311 {
17312 	union scsi_cdb		cdb;
17313 	struct uscsi_cmd	ucmd_buf;
17314 	struct scsi_extended_sense	sense_buf;
17315 	int			status;
17316 
17317 	ASSERT(un != NULL);
17318 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17319 
17320 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
17321 
17322 	/* already determined doorlock is not supported, fake success */
17323 	if (un->un_f_doorlock_supported == FALSE) {
17324 		return (0);
17325 	}
17326 
17327 	/*
17328 	 * If we are ejecting and see an SD_REMOVAL_PREVENT
17329 	 * ignore the command so we can complete the eject
17330 	 * operation.
17331 	 */
17332 	if (flag == SD_REMOVAL_PREVENT) {
17333 		mutex_enter(SD_MUTEX(un));
17334 		if (un->un_f_ejecting == TRUE) {
17335 			mutex_exit(SD_MUTEX(un));
17336 			return (EAGAIN);
17337 		}
17338 		mutex_exit(SD_MUTEX(un));
17339 	}
17340 
17341 	bzero(&cdb, sizeof (cdb));
17342 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17343 
17344 	cdb.scc_cmd = SCMD_DOORLOCK;
17345 	cdb.cdb_opaque[4] = (uchar_t)flag;
17346 
17347 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17348 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
17349 	ucmd_buf.uscsi_bufaddr	= NULL;
17350 	ucmd_buf.uscsi_buflen	= 0;
17351 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17352 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
17353 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
17354 	ucmd_buf.uscsi_timeout	= 15;
17355 
17356 	SD_TRACE(SD_LOG_IO, un,
17357 	    "sd_send_scsi_DOORLOCK: returning sd_send_scsi_cmd()\n");
17358 
17359 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17360 	    UIO_SYSSPACE, path_flag);
17361 
17362 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
17363 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
17364 	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
17365 		/* fake success and skip subsequent doorlock commands */
17366 		un->un_f_doorlock_supported = FALSE;
17367 		return (0);
17368 	}
17369 
17370 	return (status);
17371 }
17372 
17373 /*
17374  *    Function: sd_send_scsi_READ_CAPACITY
17375  *
17376  * Description: This routine uses the scsi READ CAPACITY command to determine
17377  *		the device capacity in number of blocks and the device native
17378  *		block size. If this function returns a failure, then the
17379  *		values in *capp and *lbap are undefined.  If the capacity
17380  *		returned is 0xffffffff then the lun is too large for a
17381  *		normal READ CAPACITY command and the results of a
17382  *		READ CAPACITY 16 will be used instead.
17383  *
17384  *   Arguments: un   - ptr to soft state struct for the target
17385  *		capp - ptr to unsigned 64-bit variable to receive the
17386  *			capacity value from the command.
17387  *		lbap - ptr to unsigned 32-bit varaible to receive the
17388  *			block size value from the command
17389  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17390  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17391  *			to use the USCSI "direct" chain and bypass the normal
17392  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
17393  *			command is issued as part of an error recovery action.
17394  *
17395  * Return Code: 0   - Success
17396  *		EIO - IO error
17397  *		EACCES - Reservation conflict detected
17398  *		EAGAIN - Device is becoming ready
17399  *		errno return code from sd_send_scsi_cmd()
17400  *
17401  *     Context: Can sleep.  Blocks until command completes.
17402  */
17403 
17404 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
17405 
17406 static int
17407 sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp, uint32_t *lbap,
17408 	int path_flag)
17409 {
17410 	struct	scsi_extended_sense	sense_buf;
17411 	struct	uscsi_cmd	ucmd_buf;
17412 	union	scsi_cdb	cdb;
17413 	uint32_t		*capacity_buf;
17414 	uint64_t		capacity;
17415 	uint32_t		lbasize;
17416 	int			status;
17417 
17418 	ASSERT(un != NULL);
17419 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17420 	ASSERT(capp != NULL);
17421 	ASSERT(lbap != NULL);
17422 
17423 	SD_TRACE(SD_LOG_IO, un,
17424 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
17425 
17426 	/*
17427 	 * First send a READ_CAPACITY command to the target.
17428 	 * (This command is mandatory under SCSI-2.)
17429 	 *
17430 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
17431 	 * Medium Indicator bit is cleared.  The address field must be
17432 	 * zero if the PMI bit is zero.
17433 	 */
17434 	bzero(&cdb, sizeof (cdb));
17435 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17436 
17437 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
17438 
17439 	cdb.scc_cmd = SCMD_READ_CAPACITY;
17440 
17441 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17442 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
17443 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
17444 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
17445 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17446 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
17447 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
17448 	ucmd_buf.uscsi_timeout	= 60;
17449 
17450 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17451 	    UIO_SYSSPACE, path_flag);
17452 
17453 	switch (status) {
17454 	case 0:
17455 		/* Return failure if we did not get valid capacity data. */
17456 		if (ucmd_buf.uscsi_resid != 0) {
17457 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17458 			return (EIO);
17459 		}
17460 
17461 		/*
17462 		 * Read capacity and block size from the READ CAPACITY 10 data.
17463 		 * This data may be adjusted later due to device specific
17464 		 * issues.
17465 		 *
17466 		 * According to the SCSI spec, the READ CAPACITY 10
17467 		 * command returns the following:
17468 		 *
17469 		 *  bytes 0-3: Maximum logical block address available.
17470 		 *		(MSB in byte:0 & LSB in byte:3)
17471 		 *
17472 		 *  bytes 4-7: Block length in bytes
17473 		 *		(MSB in byte:4 & LSB in byte:7)
17474 		 *
17475 		 */
17476 		capacity = BE_32(capacity_buf[0]);
17477 		lbasize = BE_32(capacity_buf[1]);
17478 
17479 		/*
17480 		 * Done with capacity_buf
17481 		 */
17482 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17483 
17484 		/*
17485 		 * if the reported capacity is set to all 0xf's, then
17486 		 * this disk is too large and requires SBC-2 commands.
17487 		 * Reissue the request using READ CAPACITY 16.
17488 		 */
17489 		if (capacity == 0xffffffff) {
17490 			status = sd_send_scsi_READ_CAPACITY_16(un, &capacity,
17491 			    &lbasize, path_flag);
17492 			if (status != 0) {
17493 				return (status);
17494 			}
17495 		}
17496 		break;	/* Success! */
17497 	case EIO:
17498 		switch (ucmd_buf.uscsi_status) {
17499 		case STATUS_RESERVATION_CONFLICT:
17500 			status = EACCES;
17501 			break;
17502 		case STATUS_CHECK:
17503 			/*
17504 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
17505 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
17506 			 */
17507 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
17508 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
17509 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
17510 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17511 				return (EAGAIN);
17512 			}
17513 			break;
17514 		default:
17515 			break;
17516 		}
17517 		/* FALLTHRU */
17518 	default:
17519 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17520 		return (status);
17521 	}
17522 
17523 	/*
17524 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
17525 	 * (2352 and 0 are common) so for these devices always force the value
17526 	 * to 2048 as required by the ATAPI specs.
17527 	 */
17528 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
17529 		lbasize = 2048;
17530 	}
17531 
17532 	/*
17533 	 * Get the maximum LBA value from the READ CAPACITY data.
17534 	 * Here we assume that the Partial Medium Indicator (PMI) bit
17535 	 * was cleared when issuing the command. This means that the LBA
17536 	 * returned from the device is the LBA of the last logical block
17537 	 * on the logical unit.  The actual logical block count will be
17538 	 * this value plus one.
17539 	 *
17540 	 * Currently the capacity is saved in terms of un->un_sys_blocksize,
17541 	 * so scale the capacity value to reflect this.
17542 	 */
17543 	capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
17544 
17545 	/*
17546 	 * Copy the values from the READ CAPACITY command into the space
17547 	 * provided by the caller.
17548 	 */
17549 	*capp = capacity;
17550 	*lbap = lbasize;
17551 
17552 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
17553 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
17554 
17555 	/*
17556 	 * Both the lbasize and capacity from the device must be nonzero,
17557 	 * otherwise we assume that the values are not valid and return
17558 	 * failure to the caller. (4203735)
17559 	 */
17560 	if ((capacity == 0) || (lbasize == 0)) {
17561 		return (EIO);
17562 	}
17563 
17564 	return (0);
17565 }
17566 
17567 /*
17568  *    Function: sd_send_scsi_READ_CAPACITY_16
17569  *
17570  * Description: This routine uses the scsi READ CAPACITY 16 command to
17571  *		determine the device capacity in number of blocks and the
17572  *		device native block size.  If this function returns a failure,
17573  *		then the values in *capp and *lbap are undefined.
17574  *		This routine should always be called by
17575  *		sd_send_scsi_READ_CAPACITY which will appy any device
17576  *		specific adjustments to capacity and lbasize.
17577  *
17578  *   Arguments: un   - ptr to soft state struct for the target
17579  *		capp - ptr to unsigned 64-bit variable to receive the
17580  *			capacity value from the command.
17581  *		lbap - ptr to unsigned 32-bit varaible to receive the
17582  *			block size value from the command
17583  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17584  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17585  *			to use the USCSI "direct" chain and bypass the normal
17586  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
17587  *			this command is issued as part of an error recovery
17588  *			action.
17589  *
17590  * Return Code: 0   - Success
17591  *		EIO - IO error
17592  *		EACCES - Reservation conflict detected
17593  *		EAGAIN - Device is becoming ready
17594  *		errno return code from sd_send_scsi_cmd()
17595  *
17596  *     Context: Can sleep.  Blocks until command completes.
17597  */
17598 
17599 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
17600 
17601 static int
17602 sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
17603 	uint32_t *lbap, int path_flag)
17604 {
17605 	struct	scsi_extended_sense	sense_buf;
17606 	struct	uscsi_cmd	ucmd_buf;
17607 	union	scsi_cdb	cdb;
17608 	uint64_t		*capacity16_buf;
17609 	uint64_t		capacity;
17610 	uint32_t		lbasize;
17611 	int			status;
17612 
17613 	ASSERT(un != NULL);
17614 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17615 	ASSERT(capp != NULL);
17616 	ASSERT(lbap != NULL);
17617 
17618 	SD_TRACE(SD_LOG_IO, un,
17619 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
17620 
17621 	/*
17622 	 * First send a READ_CAPACITY_16 command to the target.
17623 	 *
17624 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
17625 	 * Medium Indicator bit is cleared.  The address field must be
17626 	 * zero if the PMI bit is zero.
17627 	 */
17628 	bzero(&cdb, sizeof (cdb));
17629 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17630 
17631 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
17632 
17633 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17634 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
17635 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
17636 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
17637 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17638 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
17639 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
17640 	ucmd_buf.uscsi_timeout	= 60;
17641 
17642 	/*
17643 	 * Read Capacity (16) is a Service Action In command.  One
17644 	 * command byte (0x9E) is overloaded for multiple operations,
17645 	 * with the second CDB byte specifying the desired operation
17646 	 */
17647 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
17648 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
17649 
17650 	/*
17651 	 * Fill in allocation length field
17652 	 */
17653 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
17654 
17655 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17656 	    UIO_SYSSPACE, path_flag);
17657 
17658 	switch (status) {
17659 	case 0:
17660 		/* Return failure if we did not get valid capacity data. */
17661 		if (ucmd_buf.uscsi_resid > 20) {
17662 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17663 			return (EIO);
17664 		}
17665 
17666 		/*
17667 		 * Read capacity and block size from the READ CAPACITY 10 data.
17668 		 * This data may be adjusted later due to device specific
17669 		 * issues.
17670 		 *
17671 		 * According to the SCSI spec, the READ CAPACITY 10
17672 		 * command returns the following:
17673 		 *
17674 		 *  bytes 0-7: Maximum logical block address available.
17675 		 *		(MSB in byte:0 & LSB in byte:7)
17676 		 *
17677 		 *  bytes 8-11: Block length in bytes
17678 		 *		(MSB in byte:8 & LSB in byte:11)
17679 		 *
17680 		 */
17681 		capacity = BE_64(capacity16_buf[0]);
17682 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
17683 
17684 		/*
17685 		 * Done with capacity16_buf
17686 		 */
17687 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17688 
17689 		/*
17690 		 * if the reported capacity is set to all 0xf's, then
17691 		 * this disk is too large.  This could only happen with
17692 		 * a device that supports LBAs larger than 64 bits which
17693 		 * are not defined by any current T10 standards.
17694 		 */
17695 		if (capacity == 0xffffffffffffffff) {
17696 			return (EIO);
17697 		}
17698 		break;	/* Success! */
17699 	case EIO:
17700 		switch (ucmd_buf.uscsi_status) {
17701 		case STATUS_RESERVATION_CONFLICT:
17702 			status = EACCES;
17703 			break;
17704 		case STATUS_CHECK:
17705 			/*
17706 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
17707 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
17708 			 */
17709 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
17710 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
17711 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
17712 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17713 				return (EAGAIN);
17714 			}
17715 			break;
17716 		default:
17717 			break;
17718 		}
17719 		/* FALLTHRU */
17720 	default:
17721 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17722 		return (status);
17723 	}
17724 
17725 	*capp = capacity;
17726 	*lbap = lbasize;
17727 
17728 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
17729 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
17730 
17731 	return (0);
17732 }
17733 
17734 
17735 /*
17736  *    Function: sd_send_scsi_START_STOP_UNIT
17737  *
17738  * Description: Issue a scsi START STOP UNIT command to the target.
17739  *
17740  *   Arguments: un    - pointer to driver soft state (unit) structure for
17741  *			this target.
17742  *		flag  - SD_TARGET_START
17743  *			SD_TARGET_STOP
17744  *			SD_TARGET_EJECT
17745  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17746  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17747  *			to use the USCSI "direct" chain and bypass the normal
17748  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
17749  *			command is issued as part of an error recovery action.
17750  *
17751  * Return Code: 0   - Success
17752  *		EIO - IO error
17753  *		EACCES - Reservation conflict detected
17754  *		ENXIO  - Not Ready, medium not present
17755  *		errno return code from sd_send_scsi_cmd()
17756  *
17757  *     Context: Can sleep.
17758  */
17759 
17760 static int
17761 sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag, int path_flag)
17762 {
17763 	struct	scsi_extended_sense	sense_buf;
17764 	union scsi_cdb		cdb;
17765 	struct uscsi_cmd	ucmd_buf;
17766 	int			status;
17767 
17768 	ASSERT(un != NULL);
17769 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17770 
17771 	SD_TRACE(SD_LOG_IO, un,
17772 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
17773 
17774 	if (un->un_f_check_start_stop &&
17775 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
17776 	    (un->un_f_start_stop_supported != TRUE)) {
17777 		return (0);
17778 	}
17779 
17780 	/*
17781 	 * If we are performing an eject operation and
17782 	 * we receive any command other than SD_TARGET_EJECT
17783 	 * we should immediately return.
17784 	 */
17785 	if (flag != SD_TARGET_EJECT) {
17786 		mutex_enter(SD_MUTEX(un));
17787 		if (un->un_f_ejecting == TRUE) {
17788 			mutex_exit(SD_MUTEX(un));
17789 			return (EAGAIN);
17790 		}
17791 		mutex_exit(SD_MUTEX(un));
17792 	}
17793 
17794 	bzero(&cdb, sizeof (cdb));
17795 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17796 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
17797 
17798 	cdb.scc_cmd = SCMD_START_STOP;
17799 	cdb.cdb_opaque[4] = (uchar_t)flag;
17800 
17801 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17802 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
17803 	ucmd_buf.uscsi_bufaddr	= NULL;
17804 	ucmd_buf.uscsi_buflen	= 0;
17805 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17806 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
17807 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
17808 	ucmd_buf.uscsi_timeout	= 200;
17809 
17810 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17811 	    UIO_SYSSPACE, path_flag);
17812 
17813 	switch (status) {
17814 	case 0:
17815 		break;	/* Success! */
17816 	case EIO:
17817 		switch (ucmd_buf.uscsi_status) {
17818 		case STATUS_RESERVATION_CONFLICT:
17819 			status = EACCES;
17820 			break;
17821 		case STATUS_CHECK:
17822 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
17823 				switch (scsi_sense_key(
17824 						(uint8_t *)&sense_buf)) {
17825 				case KEY_ILLEGAL_REQUEST:
17826 					status = ENOTSUP;
17827 					break;
17828 				case KEY_NOT_READY:
17829 					if (scsi_sense_asc(
17830 						    (uint8_t *)&sense_buf)
17831 					    == 0x3A) {
17832 						status = ENXIO;
17833 					}
17834 					break;
17835 				default:
17836 					break;
17837 				}
17838 			}
17839 			break;
17840 		default:
17841 			break;
17842 		}
17843 		break;
17844 	default:
17845 		break;
17846 	}
17847 
17848 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
17849 
17850 	return (status);
17851 }
17852 
17853 
17854 /*
17855  *    Function: sd_start_stop_unit_callback
17856  *
17857  * Description: timeout(9F) callback to begin recovery process for a
17858  *		device that has spun down.
17859  *
17860  *   Arguments: arg - pointer to associated softstate struct.
17861  *
17862  *     Context: Executes in a timeout(9F) thread context
17863  */
17864 
17865 static void
17866 sd_start_stop_unit_callback(void *arg)
17867 {
17868 	struct sd_lun	*un = arg;
17869 	ASSERT(un != NULL);
17870 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17871 
17872 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
17873 
17874 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
17875 }
17876 
17877 
17878 /*
17879  *    Function: sd_start_stop_unit_task
17880  *
17881  * Description: Recovery procedure when a drive is spun down.
17882  *
17883  *   Arguments: arg - pointer to associated softstate struct.
17884  *
17885  *     Context: Executes in a taskq() thread context
17886  */
17887 
17888 static void
17889 sd_start_stop_unit_task(void *arg)
17890 {
17891 	struct sd_lun	*un = arg;
17892 
17893 	ASSERT(un != NULL);
17894 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17895 
17896 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
17897 
17898 	/*
17899 	 * Some unformatted drives report not ready error, no need to
17900 	 * restart if format has been initiated.
17901 	 */
17902 	mutex_enter(SD_MUTEX(un));
17903 	if (un->un_f_format_in_progress == TRUE) {
17904 		mutex_exit(SD_MUTEX(un));
17905 		return;
17906 	}
17907 	mutex_exit(SD_MUTEX(un));
17908 
17909 	/*
17910 	 * When a START STOP command is issued from here, it is part of a
17911 	 * failure recovery operation and must be issued before any other
17912 	 * commands, including any pending retries. Thus it must be sent
17913 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
17914 	 * succeeds or not, we will start I/O after the attempt.
17915 	 */
17916 	(void) sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
17917 	    SD_PATH_DIRECT_PRIORITY);
17918 
17919 	/*
17920 	 * The above call blocks until the START_STOP_UNIT command completes.
17921 	 * Now that it has completed, we must re-try the original IO that
17922 	 * received the NOT READY condition in the first place. There are
17923 	 * three possible conditions here:
17924 	 *
17925 	 *  (1) The original IO is on un_retry_bp.
17926 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
17927 	 *	is NULL.
17928 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
17929 	 *	points to some other, unrelated bp.
17930 	 *
17931 	 * For each case, we must call sd_start_cmds() with un_retry_bp
17932 	 * as the argument. If un_retry_bp is NULL, this will initiate
17933 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
17934 	 * then this will process the bp on un_retry_bp. That may or may not
17935 	 * be the original IO, but that does not matter: the important thing
17936 	 * is to keep the IO processing going at this point.
17937 	 *
17938 	 * Note: This is a very specific error recovery sequence associated
17939 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
17940 	 * serialize the I/O with completion of the spin-up.
17941 	 */
17942 	mutex_enter(SD_MUTEX(un));
17943 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17944 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
17945 	    un, un->un_retry_bp);
17946 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
17947 	sd_start_cmds(un, un->un_retry_bp);
17948 	mutex_exit(SD_MUTEX(un));
17949 
17950 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
17951 }
17952 
17953 
17954 /*
17955  *    Function: sd_send_scsi_INQUIRY
17956  *
17957  * Description: Issue the scsi INQUIRY command.
17958  *
17959  *   Arguments: un
17960  *		bufaddr
17961  *		buflen
17962  *		evpd
17963  *		page_code
17964  *		page_length
17965  *
17966  * Return Code: 0   - Success
17967  *		errno return code from sd_send_scsi_cmd()
17968  *
17969  *     Context: Can sleep. Does not return until command is completed.
17970  */
17971 
17972 static int
17973 sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr, size_t buflen,
17974 	uchar_t evpd, uchar_t page_code, size_t *residp)
17975 {
17976 	union scsi_cdb		cdb;
17977 	struct uscsi_cmd	ucmd_buf;
17978 	int			status;
17979 
17980 	ASSERT(un != NULL);
17981 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17982 	ASSERT(bufaddr != NULL);
17983 
17984 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
17985 
17986 	bzero(&cdb, sizeof (cdb));
17987 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17988 	bzero(bufaddr, buflen);
17989 
17990 	cdb.scc_cmd = SCMD_INQUIRY;
17991 	cdb.cdb_opaque[1] = evpd;
17992 	cdb.cdb_opaque[2] = page_code;
17993 	FORMG0COUNT(&cdb, buflen);
17994 
17995 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17996 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
17997 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
17998 	ucmd_buf.uscsi_buflen	= buflen;
17999 	ucmd_buf.uscsi_rqbuf	= NULL;
18000 	ucmd_buf.uscsi_rqlen	= 0;
18001 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
18002 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
18003 
18004 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18005 	    UIO_SYSSPACE, SD_PATH_DIRECT);
18006 
18007 	if ((status == 0) && (residp != NULL)) {
18008 		*residp = ucmd_buf.uscsi_resid;
18009 	}
18010 
18011 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
18012 
18013 	return (status);
18014 }
18015 
18016 
18017 /*
18018  *    Function: sd_send_scsi_TEST_UNIT_READY
18019  *
18020  * Description: Issue the scsi TEST UNIT READY command.
18021  *		This routine can be told to set the flag USCSI_DIAGNOSE to
18022  *		prevent retrying failed commands. Use this when the intent
18023  *		is either to check for device readiness, to clear a Unit
18024  *		Attention, or to clear any outstanding sense data.
18025  *		However under specific conditions the expected behavior
18026  *		is for retries to bring a device ready, so use the flag
18027  *		with caution.
18028  *
18029  *   Arguments: un
18030  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
18031  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
18032  *			0: dont check for media present, do retries on cmd.
18033  *
18034  * Return Code: 0   - Success
18035  *		EIO - IO error
18036  *		EACCES - Reservation conflict detected
18037  *		ENXIO  - Not Ready, medium not present
18038  *		errno return code from sd_send_scsi_cmd()
18039  *
18040  *     Context: Can sleep. Does not return until command is completed.
18041  */
18042 
18043 static int
18044 sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag)
18045 {
18046 	struct	scsi_extended_sense	sense_buf;
18047 	union scsi_cdb		cdb;
18048 	struct uscsi_cmd	ucmd_buf;
18049 	int			status;
18050 
18051 	ASSERT(un != NULL);
18052 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18053 
18054 	SD_TRACE(SD_LOG_IO, un,
18055 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
18056 
18057 	/*
18058 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
18059 	 * timeouts when they receive a TUR and the queue is not empty. Check
18060 	 * the configuration flag set during attach (indicating the drive has
18061 	 * this firmware bug) and un_ncmds_in_transport before issuing the
18062 	 * TUR. If there are
18063 	 * pending commands return success, this is a bit arbitrary but is ok
18064 	 * for non-removables (i.e. the eliteI disks) and non-clustering
18065 	 * configurations.
18066 	 */
18067 	if (un->un_f_cfg_tur_check == TRUE) {
18068 		mutex_enter(SD_MUTEX(un));
18069 		if (un->un_ncmds_in_transport != 0) {
18070 			mutex_exit(SD_MUTEX(un));
18071 			return (0);
18072 		}
18073 		mutex_exit(SD_MUTEX(un));
18074 	}
18075 
18076 	bzero(&cdb, sizeof (cdb));
18077 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18078 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18079 
18080 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
18081 
18082 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18083 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
18084 	ucmd_buf.uscsi_bufaddr	= NULL;
18085 	ucmd_buf.uscsi_buflen	= 0;
18086 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18087 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18088 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
18089 
18090 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
18091 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
18092 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
18093 	}
18094 	ucmd_buf.uscsi_timeout	= 60;
18095 
18096 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18097 	    UIO_SYSSPACE, ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT :
18098 	    SD_PATH_STANDARD));
18099 
18100 	switch (status) {
18101 	case 0:
18102 		break;	/* Success! */
18103 	case EIO:
18104 		switch (ucmd_buf.uscsi_status) {
18105 		case STATUS_RESERVATION_CONFLICT:
18106 			status = EACCES;
18107 			break;
18108 		case STATUS_CHECK:
18109 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
18110 				break;
18111 			}
18112 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18113 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
18114 				KEY_NOT_READY) &&
18115 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
18116 				status = ENXIO;
18117 			}
18118 			break;
18119 		default:
18120 			break;
18121 		}
18122 		break;
18123 	default:
18124 		break;
18125 	}
18126 
18127 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
18128 
18129 	return (status);
18130 }
18131 
18132 
18133 /*
18134  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
18135  *
18136  * Description: Issue the scsi PERSISTENT RESERVE IN command.
18137  *
18138  *   Arguments: un
18139  *
18140  * Return Code: 0   - Success
18141  *		EACCES
18142  *		ENOTSUP
18143  *		errno return code from sd_send_scsi_cmd()
18144  *
18145  *     Context: Can sleep. Does not return until command is completed.
18146  */
18147 
18148 static int
18149 sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un, uchar_t  usr_cmd,
18150 	uint16_t data_len, uchar_t *data_bufp)
18151 {
18152 	struct scsi_extended_sense	sense_buf;
18153 	union scsi_cdb		cdb;
18154 	struct uscsi_cmd	ucmd_buf;
18155 	int			status;
18156 	int			no_caller_buf = FALSE;
18157 
18158 	ASSERT(un != NULL);
18159 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18160 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
18161 
18162 	SD_TRACE(SD_LOG_IO, un,
18163 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
18164 
18165 	bzero(&cdb, sizeof (cdb));
18166 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18167 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18168 	if (data_bufp == NULL) {
18169 		/* Allocate a default buf if the caller did not give one */
18170 		ASSERT(data_len == 0);
18171 		data_len  = MHIOC_RESV_KEY_SIZE;
18172 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
18173 		no_caller_buf = TRUE;
18174 	}
18175 
18176 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
18177 	cdb.cdb_opaque[1] = usr_cmd;
18178 	FORMG1COUNT(&cdb, data_len);
18179 
18180 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18181 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
18182 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
18183 	ucmd_buf.uscsi_buflen	= data_len;
18184 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18185 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18186 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
18187 	ucmd_buf.uscsi_timeout	= 60;
18188 
18189 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18190 	    UIO_SYSSPACE, SD_PATH_STANDARD);
18191 
18192 	switch (status) {
18193 	case 0:
18194 		break;	/* Success! */
18195 	case EIO:
18196 		switch (ucmd_buf.uscsi_status) {
18197 		case STATUS_RESERVATION_CONFLICT:
18198 			status = EACCES;
18199 			break;
18200 		case STATUS_CHECK:
18201 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18202 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
18203 				KEY_ILLEGAL_REQUEST)) {
18204 				status = ENOTSUP;
18205 			}
18206 			break;
18207 		default:
18208 			break;
18209 		}
18210 		break;
18211 	default:
18212 		break;
18213 	}
18214 
18215 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
18216 
18217 	if (no_caller_buf == TRUE) {
18218 		kmem_free(data_bufp, data_len);
18219 	}
18220 
18221 	return (status);
18222 }
18223 
18224 
18225 /*
18226  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
18227  *
18228  * Description: This routine is the driver entry point for handling CD-ROM
18229  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
18230  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
18231  *		device.
18232  *
18233  *   Arguments: un  -   Pointer to soft state struct for the target.
18234  *		usr_cmd SCSI-3 reservation facility command (one of
18235  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
18236  *			SD_SCSI3_PREEMPTANDABORT)
18237  *		usr_bufp - user provided pointer register, reserve descriptor or
18238  *			preempt and abort structure (mhioc_register_t,
18239  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
18240  *
18241  * Return Code: 0   - Success
18242  *		EACCES
18243  *		ENOTSUP
18244  *		errno return code from sd_send_scsi_cmd()
18245  *
18246  *     Context: Can sleep. Does not return until command is completed.
18247  */
18248 
18249 static int
18250 sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un, uchar_t usr_cmd,
18251 	uchar_t	*usr_bufp)
18252 {
18253 	struct scsi_extended_sense	sense_buf;
18254 	union scsi_cdb		cdb;
18255 	struct uscsi_cmd	ucmd_buf;
18256 	int			status;
18257 	uchar_t			data_len = sizeof (sd_prout_t);
18258 	sd_prout_t		*prp;
18259 
18260 	ASSERT(un != NULL);
18261 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18262 	ASSERT(data_len == 24);	/* required by scsi spec */
18263 
18264 	SD_TRACE(SD_LOG_IO, un,
18265 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
18266 
18267 	if (usr_bufp == NULL) {
18268 		return (EINVAL);
18269 	}
18270 
18271 	bzero(&cdb, sizeof (cdb));
18272 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18273 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18274 	prp = kmem_zalloc(data_len, KM_SLEEP);
18275 
18276 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
18277 	cdb.cdb_opaque[1] = usr_cmd;
18278 	FORMG1COUNT(&cdb, data_len);
18279 
18280 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18281 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
18282 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
18283 	ucmd_buf.uscsi_buflen	= data_len;
18284 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18285 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18286 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
18287 	ucmd_buf.uscsi_timeout	= 60;
18288 
18289 	switch (usr_cmd) {
18290 	case SD_SCSI3_REGISTER: {
18291 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
18292 
18293 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
18294 		bcopy(ptr->newkey.key, prp->service_key,
18295 		    MHIOC_RESV_KEY_SIZE);
18296 		prp->aptpl = ptr->aptpl;
18297 		break;
18298 	}
18299 	case SD_SCSI3_RESERVE:
18300 	case SD_SCSI3_RELEASE: {
18301 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
18302 
18303 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
18304 		prp->scope_address = BE_32(ptr->scope_specific_addr);
18305 		cdb.cdb_opaque[2] = ptr->type;
18306 		break;
18307 	}
18308 	case SD_SCSI3_PREEMPTANDABORT: {
18309 		mhioc_preemptandabort_t *ptr =
18310 		    (mhioc_preemptandabort_t *)usr_bufp;
18311 
18312 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
18313 		bcopy(ptr->victim_key.key, prp->service_key,
18314 		    MHIOC_RESV_KEY_SIZE);
18315 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
18316 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
18317 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
18318 		break;
18319 	}
18320 	case SD_SCSI3_REGISTERANDIGNOREKEY:
18321 	{
18322 		mhioc_registerandignorekey_t *ptr;
18323 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
18324 		bcopy(ptr->newkey.key,
18325 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
18326 		prp->aptpl = ptr->aptpl;
18327 		break;
18328 	}
18329 	default:
18330 		ASSERT(FALSE);
18331 		break;
18332 	}
18333 
18334 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18335 	    UIO_SYSSPACE, SD_PATH_STANDARD);
18336 
18337 	switch (status) {
18338 	case 0:
18339 		break;	/* Success! */
18340 	case EIO:
18341 		switch (ucmd_buf.uscsi_status) {
18342 		case STATUS_RESERVATION_CONFLICT:
18343 			status = EACCES;
18344 			break;
18345 		case STATUS_CHECK:
18346 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18347 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
18348 				KEY_ILLEGAL_REQUEST)) {
18349 				status = ENOTSUP;
18350 			}
18351 			break;
18352 		default:
18353 			break;
18354 		}
18355 		break;
18356 	default:
18357 		break;
18358 	}
18359 
18360 	kmem_free(prp, data_len);
18361 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
18362 	return (status);
18363 }
18364 
18365 
18366 /*
18367  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
18368  *
18369  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
18370  *
18371  *   Arguments: un - pointer to the target's soft state struct
18372  *
18373  * Return Code: 0 - success
18374  *		errno-type error code
18375  *
18376  *     Context: kernel thread context only.
18377  */
18378 
18379 static int
18380 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
18381 {
18382 	struct sd_uscsi_info	*uip;
18383 	struct uscsi_cmd	*uscmd;
18384 	union scsi_cdb		*cdb;
18385 	struct buf		*bp;
18386 	int			rval = 0;
18387 
18388 	SD_TRACE(SD_LOG_IO, un,
18389 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
18390 
18391 	ASSERT(un != NULL);
18392 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18393 
18394 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
18395 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
18396 
18397 	/*
18398 	 * First get some memory for the uscsi_cmd struct and cdb
18399 	 * and initialize for SYNCHRONIZE_CACHE cmd.
18400 	 */
18401 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
18402 	uscmd->uscsi_cdblen = CDB_GROUP1;
18403 	uscmd->uscsi_cdb = (caddr_t)cdb;
18404 	uscmd->uscsi_bufaddr = NULL;
18405 	uscmd->uscsi_buflen = 0;
18406 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
18407 	uscmd->uscsi_rqlen = SENSE_LENGTH;
18408 	uscmd->uscsi_rqresid = SENSE_LENGTH;
18409 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
18410 	uscmd->uscsi_timeout = sd_io_time;
18411 
18412 	/*
18413 	 * Allocate an sd_uscsi_info struct and fill it with the info
18414 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
18415 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
18416 	 * since we allocate the buf here in this function, we do not
18417 	 * need to preserve the prior contents of b_private.
18418 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
18419 	 */
18420 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
18421 	uip->ui_flags = SD_PATH_DIRECT;
18422 	uip->ui_cmdp  = uscmd;
18423 
18424 	bp = getrbuf(KM_SLEEP);
18425 	bp->b_private = uip;
18426 
18427 	/*
18428 	 * Setup buffer to carry uscsi request.
18429 	 */
18430 	bp->b_flags  = B_BUSY;
18431 	bp->b_bcount = 0;
18432 	bp->b_blkno  = 0;
18433 
18434 	if (dkc != NULL) {
18435 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
18436 		uip->ui_dkc = *dkc;
18437 	}
18438 
18439 	bp->b_edev = SD_GET_DEV(un);
18440 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
18441 
18442 	(void) sd_uscsi_strategy(bp);
18443 
18444 	/*
18445 	 * If synchronous request, wait for completion
18446 	 * If async just return and let b_iodone callback
18447 	 * cleanup.
18448 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
18449 	 * but it was also incremented in sd_uscsi_strategy(), so
18450 	 * we should be ok.
18451 	 */
18452 	if (dkc == NULL) {
18453 		(void) biowait(bp);
18454 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
18455 	}
18456 
18457 	return (rval);
18458 }
18459 
18460 
18461 static int
18462 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
18463 {
18464 	struct sd_uscsi_info *uip;
18465 	struct uscsi_cmd *uscmd;
18466 	uint8_t *sense_buf;
18467 	struct sd_lun *un;
18468 	int status;
18469 
18470 	uip = (struct sd_uscsi_info *)(bp->b_private);
18471 	ASSERT(uip != NULL);
18472 
18473 	uscmd = uip->ui_cmdp;
18474 	ASSERT(uscmd != NULL);
18475 
18476 	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
18477 	ASSERT(sense_buf != NULL);
18478 
18479 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
18480 	ASSERT(un != NULL);
18481 
18482 	status = geterror(bp);
18483 	switch (status) {
18484 	case 0:
18485 		break;	/* Success! */
18486 	case EIO:
18487 		switch (uscmd->uscsi_status) {
18488 		case STATUS_RESERVATION_CONFLICT:
18489 			/* Ignore reservation conflict */
18490 			status = 0;
18491 			goto done;
18492 
18493 		case STATUS_CHECK:
18494 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
18495 			    (scsi_sense_key(sense_buf) ==
18496 				KEY_ILLEGAL_REQUEST)) {
18497 				/* Ignore Illegal Request error */
18498 				mutex_enter(SD_MUTEX(un));
18499 				un->un_f_sync_cache_supported = FALSE;
18500 				mutex_exit(SD_MUTEX(un));
18501 				status = ENOTSUP;
18502 				goto done;
18503 			}
18504 			break;
18505 		default:
18506 			break;
18507 		}
18508 		/* FALLTHRU */
18509 	default:
18510 		/*
18511 		 * Don't log an error message if this device
18512 		 * has removable media.
18513 		 */
18514 		if (!un->un_f_has_removable_media) {
18515 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18516 			    "SYNCHRONIZE CACHE command failed (%d)\n", status);
18517 		}
18518 		break;
18519 	}
18520 
18521 done:
18522 	if (uip->ui_dkc.dkc_callback != NULL) {
18523 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
18524 	}
18525 
18526 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
18527 	freerbuf(bp);
18528 	kmem_free(uip, sizeof (struct sd_uscsi_info));
18529 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
18530 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
18531 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
18532 
18533 	return (status);
18534 }
18535 
18536 
18537 /*
18538  *    Function: sd_send_scsi_GET_CONFIGURATION
18539  *
18540  * Description: Issues the get configuration command to the device.
18541  *		Called from sd_check_for_writable_cd & sd_get_media_info
18542  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
18543  *   Arguments: un
18544  *		ucmdbuf
18545  *		rqbuf
18546  *		rqbuflen
18547  *		bufaddr
18548  *		buflen
18549  *		path_flag
18550  *
18551  * Return Code: 0   - Success
18552  *		errno return code from sd_send_scsi_cmd()
18553  *
18554  *     Context: Can sleep. Does not return until command is completed.
18555  *
18556  */
18557 
18558 static int
18559 sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un, struct uscsi_cmd *ucmdbuf,
18560 	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen,
18561 	int path_flag)
18562 {
18563 	char	cdb[CDB_GROUP1];
18564 	int	status;
18565 
18566 	ASSERT(un != NULL);
18567 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18568 	ASSERT(bufaddr != NULL);
18569 	ASSERT(ucmdbuf != NULL);
18570 	ASSERT(rqbuf != NULL);
18571 
18572 	SD_TRACE(SD_LOG_IO, un,
18573 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
18574 
18575 	bzero(cdb, sizeof (cdb));
18576 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
18577 	bzero(rqbuf, rqbuflen);
18578 	bzero(bufaddr, buflen);
18579 
18580 	/*
18581 	 * Set up cdb field for the get configuration command.
18582 	 */
18583 	cdb[0] = SCMD_GET_CONFIGURATION;
18584 	cdb[1] = 0x02;  /* Requested Type */
18585 	cdb[8] = SD_PROFILE_HEADER_LEN;
18586 	ucmdbuf->uscsi_cdb = cdb;
18587 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
18588 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
18589 	ucmdbuf->uscsi_buflen = buflen;
18590 	ucmdbuf->uscsi_timeout = sd_io_time;
18591 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
18592 	ucmdbuf->uscsi_rqlen = rqbuflen;
18593 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
18594 
18595 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, FKIOCTL,
18596 	    UIO_SYSSPACE, path_flag);
18597 
18598 	switch (status) {
18599 	case 0:
18600 		break;  /* Success! */
18601 	case EIO:
18602 		switch (ucmdbuf->uscsi_status) {
18603 		case STATUS_RESERVATION_CONFLICT:
18604 			status = EACCES;
18605 			break;
18606 		default:
18607 			break;
18608 		}
18609 		break;
18610 	default:
18611 		break;
18612 	}
18613 
18614 	if (status == 0) {
18615 		SD_DUMP_MEMORY(un, SD_LOG_IO,
18616 		    "sd_send_scsi_GET_CONFIGURATION: data",
18617 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
18618 	}
18619 
18620 	SD_TRACE(SD_LOG_IO, un,
18621 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
18622 
18623 	return (status);
18624 }
18625 
18626 /*
18627  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
18628  *
18629  * Description: Issues the get configuration command to the device to
18630  *              retrieve a specfic feature. Called from
18631  *		sd_check_for_writable_cd & sd_set_mmc_caps.
18632  *   Arguments: un
18633  *              ucmdbuf
18634  *              rqbuf
18635  *              rqbuflen
18636  *              bufaddr
18637  *              buflen
18638  *		feature
18639  *
18640  * Return Code: 0   - Success
18641  *              errno return code from sd_send_scsi_cmd()
18642  *
18643  *     Context: Can sleep. Does not return until command is completed.
18644  *
18645  */
18646 static int
18647 sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
18648 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
18649 	uchar_t *bufaddr, uint_t buflen, char feature, int path_flag)
18650 {
18651 	char    cdb[CDB_GROUP1];
18652 	int	status;
18653 
18654 	ASSERT(un != NULL);
18655 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18656 	ASSERT(bufaddr != NULL);
18657 	ASSERT(ucmdbuf != NULL);
18658 	ASSERT(rqbuf != NULL);
18659 
18660 	SD_TRACE(SD_LOG_IO, un,
18661 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
18662 
18663 	bzero(cdb, sizeof (cdb));
18664 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
18665 	bzero(rqbuf, rqbuflen);
18666 	bzero(bufaddr, buflen);
18667 
18668 	/*
18669 	 * Set up cdb field for the get configuration command.
18670 	 */
18671 	cdb[0] = SCMD_GET_CONFIGURATION;
18672 	cdb[1] = 0x02;  /* Requested Type */
18673 	cdb[3] = feature;
18674 	cdb[8] = buflen;
18675 	ucmdbuf->uscsi_cdb = cdb;
18676 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
18677 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
18678 	ucmdbuf->uscsi_buflen = buflen;
18679 	ucmdbuf->uscsi_timeout = sd_io_time;
18680 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
18681 	ucmdbuf->uscsi_rqlen = rqbuflen;
18682 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
18683 
18684 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, FKIOCTL,
18685 	    UIO_SYSSPACE, path_flag);
18686 
18687 	switch (status) {
18688 	case 0:
18689 		break;  /* Success! */
18690 	case EIO:
18691 		switch (ucmdbuf->uscsi_status) {
18692 		case STATUS_RESERVATION_CONFLICT:
18693 			status = EACCES;
18694 			break;
18695 		default:
18696 			break;
18697 		}
18698 		break;
18699 	default:
18700 		break;
18701 	}
18702 
18703 	if (status == 0) {
18704 		SD_DUMP_MEMORY(un, SD_LOG_IO,
18705 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
18706 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
18707 	}
18708 
18709 	SD_TRACE(SD_LOG_IO, un,
18710 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
18711 
18712 	return (status);
18713 }
18714 
18715 
18716 /*
18717  *    Function: sd_send_scsi_MODE_SENSE
18718  *
18719  * Description: Utility function for issuing a scsi MODE SENSE command.
18720  *		Note: This routine uses a consistent implementation for Group0,
18721  *		Group1, and Group2 commands across all platforms. ATAPI devices
18722  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
18723  *
18724  *   Arguments: un - pointer to the softstate struct for the target.
18725  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
18726  *			  CDB_GROUP[1|2] (10 byte).
18727  *		bufaddr - buffer for page data retrieved from the target.
18728  *		buflen - size of page to be retrieved.
18729  *		page_code - page code of data to be retrieved from the target.
18730  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18731  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18732  *			to use the USCSI "direct" chain and bypass the normal
18733  *			command waitq.
18734  *
18735  * Return Code: 0   - Success
18736  *		errno return code from sd_send_scsi_cmd()
18737  *
18738  *     Context: Can sleep. Does not return until command is completed.
18739  */
18740 
18741 static int
18742 sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
18743 	size_t buflen,  uchar_t page_code, int path_flag)
18744 {
18745 	struct	scsi_extended_sense	sense_buf;
18746 	union scsi_cdb		cdb;
18747 	struct uscsi_cmd	ucmd_buf;
18748 	int			status;
18749 	int			headlen;
18750 
18751 	ASSERT(un != NULL);
18752 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18753 	ASSERT(bufaddr != NULL);
18754 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
18755 	    (cdbsize == CDB_GROUP2));
18756 
18757 	SD_TRACE(SD_LOG_IO, un,
18758 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
18759 
18760 	bzero(&cdb, sizeof (cdb));
18761 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18762 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18763 	bzero(bufaddr, buflen);
18764 
18765 	if (cdbsize == CDB_GROUP0) {
18766 		cdb.scc_cmd = SCMD_MODE_SENSE;
18767 		cdb.cdb_opaque[2] = page_code;
18768 		FORMG0COUNT(&cdb, buflen);
18769 		headlen = MODE_HEADER_LENGTH;
18770 	} else {
18771 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
18772 		cdb.cdb_opaque[2] = page_code;
18773 		FORMG1COUNT(&cdb, buflen);
18774 		headlen = MODE_HEADER_LENGTH_GRP2;
18775 	}
18776 
18777 	ASSERT(headlen <= buflen);
18778 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
18779 
18780 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18781 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
18782 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
18783 	ucmd_buf.uscsi_buflen	= buflen;
18784 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18785 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18786 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
18787 	ucmd_buf.uscsi_timeout	= 60;
18788 
18789 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18790 	    UIO_SYSSPACE, path_flag);
18791 
18792 	switch (status) {
18793 	case 0:
18794 		/*
18795 		 * sr_check_wp() uses 0x3f page code and check the header of
18796 		 * mode page to determine if target device is write-protected.
18797 		 * But some USB devices return 0 bytes for 0x3f page code. For
18798 		 * this case, make sure that mode page header is returned at
18799 		 * least.
18800 		 */
18801 		if (buflen - ucmd_buf.uscsi_resid <  headlen)
18802 			status = EIO;
18803 		break;	/* Success! */
18804 	case EIO:
18805 		switch (ucmd_buf.uscsi_status) {
18806 		case STATUS_RESERVATION_CONFLICT:
18807 			status = EACCES;
18808 			break;
18809 		default:
18810 			break;
18811 		}
18812 		break;
18813 	default:
18814 		break;
18815 	}
18816 
18817 	if (status == 0) {
18818 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
18819 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
18820 	}
18821 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
18822 
18823 	return (status);
18824 }
18825 
18826 
18827 /*
18828  *    Function: sd_send_scsi_MODE_SELECT
18829  *
18830  * Description: Utility function for issuing a scsi MODE SELECT command.
18831  *		Note: This routine uses a consistent implementation for Group0,
18832  *		Group1, and Group2 commands across all platforms. ATAPI devices
18833  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
18834  *
18835  *   Arguments: un - pointer to the softstate struct for the target.
18836  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
18837  *			  CDB_GROUP[1|2] (10 byte).
18838  *		bufaddr - buffer for page data retrieved from the target.
18839  *		buflen - size of page to be retrieved.
18840  *		save_page - boolean to determin if SP bit should be set.
18841  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18842  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18843  *			to use the USCSI "direct" chain and bypass the normal
18844  *			command waitq.
18845  *
18846  * Return Code: 0   - Success
18847  *		errno return code from sd_send_scsi_cmd()
18848  *
18849  *     Context: Can sleep. Does not return until command is completed.
18850  */
18851 
18852 static int
18853 sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
18854 	size_t buflen,  uchar_t save_page, int path_flag)
18855 {
18856 	struct	scsi_extended_sense	sense_buf;
18857 	union scsi_cdb		cdb;
18858 	struct uscsi_cmd	ucmd_buf;
18859 	int			status;
18860 
18861 	ASSERT(un != NULL);
18862 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18863 	ASSERT(bufaddr != NULL);
18864 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
18865 	    (cdbsize == CDB_GROUP2));
18866 
18867 	SD_TRACE(SD_LOG_IO, un,
18868 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
18869 
18870 	bzero(&cdb, sizeof (cdb));
18871 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18872 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18873 
18874 	/* Set the PF bit for many third party drives */
18875 	cdb.cdb_opaque[1] = 0x10;
18876 
18877 	/* Set the savepage(SP) bit if given */
18878 	if (save_page == SD_SAVE_PAGE) {
18879 		cdb.cdb_opaque[1] |= 0x01;
18880 	}
18881 
18882 	if (cdbsize == CDB_GROUP0) {
18883 		cdb.scc_cmd = SCMD_MODE_SELECT;
18884 		FORMG0COUNT(&cdb, buflen);
18885 	} else {
18886 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
18887 		FORMG1COUNT(&cdb, buflen);
18888 	}
18889 
18890 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
18891 
18892 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18893 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
18894 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
18895 	ucmd_buf.uscsi_buflen	= buflen;
18896 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18897 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18898 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
18899 	ucmd_buf.uscsi_timeout	= 60;
18900 
18901 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18902 	    UIO_SYSSPACE, path_flag);
18903 
18904 	switch (status) {
18905 	case 0:
18906 		break;	/* Success! */
18907 	case EIO:
18908 		switch (ucmd_buf.uscsi_status) {
18909 		case STATUS_RESERVATION_CONFLICT:
18910 			status = EACCES;
18911 			break;
18912 		default:
18913 			break;
18914 		}
18915 		break;
18916 	default:
18917 		break;
18918 	}
18919 
18920 	if (status == 0) {
18921 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
18922 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
18923 	}
18924 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
18925 
18926 	return (status);
18927 }
18928 
18929 
18930 /*
18931  *    Function: sd_send_scsi_RDWR
18932  *
18933  * Description: Issue a scsi READ or WRITE command with the given parameters.
18934  *
18935  *   Arguments: un:      Pointer to the sd_lun struct for the target.
18936  *		cmd:	 SCMD_READ or SCMD_WRITE
18937  *		bufaddr: Address of caller's buffer to receive the RDWR data
18938  *		buflen:  Length of caller's buffer receive the RDWR data.
18939  *		start_block: Block number for the start of the RDWR operation.
18940  *			 (Assumes target-native block size.)
18941  *		residp:  Pointer to variable to receive the redisual of the
18942  *			 RDWR operation (may be NULL of no residual requested).
18943  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18944  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18945  *			to use the USCSI "direct" chain and bypass the normal
18946  *			command waitq.
18947  *
18948  * Return Code: 0   - Success
18949  *		errno return code from sd_send_scsi_cmd()
18950  *
18951  *     Context: Can sleep. Does not return until command is completed.
18952  */
18953 
18954 static int
18955 sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
18956 	size_t buflen, daddr_t start_block, int path_flag)
18957 {
18958 	struct	scsi_extended_sense	sense_buf;
18959 	union scsi_cdb		cdb;
18960 	struct uscsi_cmd	ucmd_buf;
18961 	uint32_t		block_count;
18962 	int			status;
18963 	int			cdbsize;
18964 	uchar_t			flag;
18965 
18966 	ASSERT(un != NULL);
18967 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18968 	ASSERT(bufaddr != NULL);
18969 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
18970 
18971 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
18972 
18973 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
18974 		return (EINVAL);
18975 	}
18976 
18977 	mutex_enter(SD_MUTEX(un));
18978 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
18979 	mutex_exit(SD_MUTEX(un));
18980 
18981 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
18982 
18983 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
18984 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
18985 	    bufaddr, buflen, start_block, block_count);
18986 
18987 	bzero(&cdb, sizeof (cdb));
18988 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18989 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18990 
18991 	/* Compute CDB size to use */
18992 	if (start_block > 0xffffffff)
18993 		cdbsize = CDB_GROUP4;
18994 	else if ((start_block & 0xFFE00000) ||
18995 	    (un->un_f_cfg_is_atapi == TRUE))
18996 		cdbsize = CDB_GROUP1;
18997 	else
18998 		cdbsize = CDB_GROUP0;
18999 
19000 	switch (cdbsize) {
19001 	case CDB_GROUP0:	/* 6-byte CDBs */
19002 		cdb.scc_cmd = cmd;
19003 		FORMG0ADDR(&cdb, start_block);
19004 		FORMG0COUNT(&cdb, block_count);
19005 		break;
19006 	case CDB_GROUP1:	/* 10-byte CDBs */
19007 		cdb.scc_cmd = cmd | SCMD_GROUP1;
19008 		FORMG1ADDR(&cdb, start_block);
19009 		FORMG1COUNT(&cdb, block_count);
19010 		break;
19011 	case CDB_GROUP4:	/* 16-byte CDBs */
19012 		cdb.scc_cmd = cmd | SCMD_GROUP4;
19013 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
19014 		FORMG4COUNT(&cdb, block_count);
19015 		break;
19016 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
19017 	default:
19018 		/* All others reserved */
19019 		return (EINVAL);
19020 	}
19021 
19022 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
19023 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
19024 
19025 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19026 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
19027 	ucmd_buf.uscsi_bufaddr	= bufaddr;
19028 	ucmd_buf.uscsi_buflen	= buflen;
19029 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19030 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19031 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
19032 	ucmd_buf.uscsi_timeout	= 60;
19033 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
19034 	    UIO_SYSSPACE, path_flag);
19035 	switch (status) {
19036 	case 0:
19037 		break;	/* Success! */
19038 	case EIO:
19039 		switch (ucmd_buf.uscsi_status) {
19040 		case STATUS_RESERVATION_CONFLICT:
19041 			status = EACCES;
19042 			break;
19043 		default:
19044 			break;
19045 		}
19046 		break;
19047 	default:
19048 		break;
19049 	}
19050 
19051 	if (status == 0) {
19052 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
19053 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
19054 	}
19055 
19056 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
19057 
19058 	return (status);
19059 }
19060 
19061 
19062 /*
19063  *    Function: sd_send_scsi_LOG_SENSE
19064  *
19065  * Description: Issue a scsi LOG_SENSE command with the given parameters.
19066  *
19067  *   Arguments: un:      Pointer to the sd_lun struct for the target.
19068  *
19069  * Return Code: 0   - Success
19070  *		errno return code from sd_send_scsi_cmd()
19071  *
19072  *     Context: Can sleep. Does not return until command is completed.
19073  */
19074 
19075 static int
19076 sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr, uint16_t buflen,
19077 	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
19078 	int path_flag)
19079 
19080 {
19081 	struct	scsi_extended_sense	sense_buf;
19082 	union scsi_cdb		cdb;
19083 	struct uscsi_cmd	ucmd_buf;
19084 	int			status;
19085 
19086 	ASSERT(un != NULL);
19087 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19088 
19089 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
19090 
19091 	bzero(&cdb, sizeof (cdb));
19092 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19093 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19094 
19095 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
19096 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
19097 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
19098 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
19099 	FORMG1COUNT(&cdb, buflen);
19100 
19101 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19102 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19103 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19104 	ucmd_buf.uscsi_buflen	= buflen;
19105 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19106 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19107 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19108 	ucmd_buf.uscsi_timeout	= 60;
19109 
19110 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
19111 	    UIO_SYSSPACE, path_flag);
19112 
19113 	switch (status) {
19114 	case 0:
19115 		break;
19116 	case EIO:
19117 		switch (ucmd_buf.uscsi_status) {
19118 		case STATUS_RESERVATION_CONFLICT:
19119 			status = EACCES;
19120 			break;
19121 		case STATUS_CHECK:
19122 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19123 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
19124 				KEY_ILLEGAL_REQUEST) &&
19125 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
19126 				/*
19127 				 * ASC 0x24: INVALID FIELD IN CDB
19128 				 */
19129 				switch (page_code) {
19130 				case START_STOP_CYCLE_PAGE:
19131 					/*
19132 					 * The start stop cycle counter is
19133 					 * implemented as page 0x31 in earlier
19134 					 * generation disks. In new generation
19135 					 * disks the start stop cycle counter is
19136 					 * implemented as page 0xE. To properly
19137 					 * handle this case if an attempt for
19138 					 * log page 0xE is made and fails we
19139 					 * will try again using page 0x31.
19140 					 *
19141 					 * Network storage BU committed to
19142 					 * maintain the page 0x31 for this
19143 					 * purpose and will not have any other
19144 					 * page implemented with page code 0x31
19145 					 * until all disks transition to the
19146 					 * standard page.
19147 					 */
19148 					mutex_enter(SD_MUTEX(un));
19149 					un->un_start_stop_cycle_page =
19150 					    START_STOP_CYCLE_VU_PAGE;
19151 					cdb.cdb_opaque[2] =
19152 					    (char)(page_control << 6) |
19153 					    un->un_start_stop_cycle_page;
19154 					mutex_exit(SD_MUTEX(un));
19155 					status = sd_send_scsi_cmd(
19156 					    SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
19157 					    UIO_SYSSPACE, path_flag);
19158 
19159 					break;
19160 				case TEMPERATURE_PAGE:
19161 					status = ENOTTY;
19162 					break;
19163 				default:
19164 					break;
19165 				}
19166 			}
19167 			break;
19168 		default:
19169 			break;
19170 		}
19171 		break;
19172 	default:
19173 		break;
19174 	}
19175 
19176 	if (status == 0) {
19177 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
19178 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
19179 	}
19180 
19181 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
19182 
19183 	return (status);
19184 }
19185 
19186 
19187 /*
19188  *    Function: sdioctl
19189  *
19190  * Description: Driver's ioctl(9e) entry point function.
19191  *
19192  *   Arguments: dev     - device number
19193  *		cmd     - ioctl operation to be performed
19194  *		arg     - user argument, contains data to be set or reference
19195  *			  parameter for get
19196  *		flag    - bit flag, indicating open settings, 32/64 bit type
19197  *		cred_p  - user credential pointer
19198  *		rval_p  - calling process return value (OPT)
19199  *
19200  * Return Code: EINVAL
19201  *		ENOTTY
19202  *		ENXIO
19203  *		EIO
19204  *		EFAULT
19205  *		ENOTSUP
19206  *		EPERM
19207  *
19208  *     Context: Called from the device switch at normal priority.
19209  */
19210 
19211 static int
19212 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
19213 {
19214 	struct sd_lun	*un = NULL;
19215 	int		err = 0;
19216 	int		i = 0;
19217 	cred_t		*cr;
19218 	int		tmprval = EINVAL;
19219 	int 		is_valid;
19220 
19221 	/*
19222 	 * All device accesses go thru sdstrategy where we check on suspend
19223 	 * status
19224 	 */
19225 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
19226 		return (ENXIO);
19227 	}
19228 
19229 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19230 
19231 
19232 	is_valid = SD_IS_VALID_LABEL(un);
19233 
19234 	/*
19235 	 * Moved this wait from sd_uscsi_strategy to here for
19236 	 * reasons of deadlock prevention. Internal driver commands,
19237 	 * specifically those to change a devices power level, result
19238 	 * in a call to sd_uscsi_strategy.
19239 	 */
19240 	mutex_enter(SD_MUTEX(un));
19241 	while ((un->un_state == SD_STATE_SUSPENDED) ||
19242 	    (un->un_state == SD_STATE_PM_CHANGING)) {
19243 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
19244 	}
19245 	/*
19246 	 * Twiddling the counter here protects commands from now
19247 	 * through to the top of sd_uscsi_strategy. Without the
19248 	 * counter inc. a power down, for example, could get in
19249 	 * after the above check for state is made and before
19250 	 * execution gets to the top of sd_uscsi_strategy.
19251 	 * That would cause problems.
19252 	 */
19253 	un->un_ncmds_in_driver++;
19254 
19255 	if (!is_valid &&
19256 	    (flag & (FNDELAY | FNONBLOCK))) {
19257 		switch (cmd) {
19258 		case DKIOCGGEOM:	/* SD_PATH_DIRECT */
19259 		case DKIOCGVTOC:
19260 		case DKIOCGAPART:
19261 		case DKIOCPARTINFO:
19262 		case DKIOCSGEOM:
19263 		case DKIOCSAPART:
19264 		case DKIOCGETEFI:
19265 		case DKIOCPARTITION:
19266 		case DKIOCSVTOC:
19267 		case DKIOCSETEFI:
19268 		case DKIOCGMBOOT:
19269 		case DKIOCSMBOOT:
19270 		case DKIOCG_PHYGEOM:
19271 		case DKIOCG_VIRTGEOM:
19272 			/* let cmlb handle it */
19273 			goto skip_ready_valid;
19274 
19275 		case CDROMPAUSE:
19276 		case CDROMRESUME:
19277 		case CDROMPLAYMSF:
19278 		case CDROMPLAYTRKIND:
19279 		case CDROMREADTOCHDR:
19280 		case CDROMREADTOCENTRY:
19281 		case CDROMSTOP:
19282 		case CDROMSTART:
19283 		case CDROMVOLCTRL:
19284 		case CDROMSUBCHNL:
19285 		case CDROMREADMODE2:
19286 		case CDROMREADMODE1:
19287 		case CDROMREADOFFSET:
19288 		case CDROMSBLKMODE:
19289 		case CDROMGBLKMODE:
19290 		case CDROMGDRVSPEED:
19291 		case CDROMSDRVSPEED:
19292 		case CDROMCDDA:
19293 		case CDROMCDXA:
19294 		case CDROMSUBCODE:
19295 			if (!ISCD(un)) {
19296 				un->un_ncmds_in_driver--;
19297 				ASSERT(un->un_ncmds_in_driver >= 0);
19298 				mutex_exit(SD_MUTEX(un));
19299 				return (ENOTTY);
19300 			}
19301 			break;
19302 		case FDEJECT:
19303 		case DKIOCEJECT:
19304 		case CDROMEJECT:
19305 			if (!un->un_f_eject_media_supported) {
19306 				un->un_ncmds_in_driver--;
19307 				ASSERT(un->un_ncmds_in_driver >= 0);
19308 				mutex_exit(SD_MUTEX(un));
19309 				return (ENOTTY);
19310 			}
19311 			break;
19312 		case DKIOCFLUSHWRITECACHE:
19313 			mutex_exit(SD_MUTEX(un));
19314 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
19315 			if (err != 0) {
19316 				mutex_enter(SD_MUTEX(un));
19317 				un->un_ncmds_in_driver--;
19318 				ASSERT(un->un_ncmds_in_driver >= 0);
19319 				mutex_exit(SD_MUTEX(un));
19320 				return (EIO);
19321 			}
19322 			mutex_enter(SD_MUTEX(un));
19323 			/* FALLTHROUGH */
19324 		case DKIOCREMOVABLE:
19325 		case DKIOCHOTPLUGGABLE:
19326 		case DKIOCINFO:
19327 		case DKIOCGMEDIAINFO:
19328 		case MHIOCENFAILFAST:
19329 		case MHIOCSTATUS:
19330 		case MHIOCTKOWN:
19331 		case MHIOCRELEASE:
19332 		case MHIOCGRP_INKEYS:
19333 		case MHIOCGRP_INRESV:
19334 		case MHIOCGRP_REGISTER:
19335 		case MHIOCGRP_RESERVE:
19336 		case MHIOCGRP_PREEMPTANDABORT:
19337 		case MHIOCGRP_REGISTERANDIGNOREKEY:
19338 		case CDROMCLOSETRAY:
19339 		case USCSICMD:
19340 			goto skip_ready_valid;
19341 		default:
19342 			break;
19343 		}
19344 
19345 		mutex_exit(SD_MUTEX(un));
19346 		err = sd_ready_and_valid(un);
19347 		mutex_enter(SD_MUTEX(un));
19348 
19349 		if (err != SD_READY_VALID) {
19350 			switch (cmd) {
19351 			case DKIOCSTATE:
19352 			case CDROMGDRVSPEED:
19353 			case CDROMSDRVSPEED:
19354 			case FDEJECT:	/* for eject command */
19355 			case DKIOCEJECT:
19356 			case CDROMEJECT:
19357 			case DKIOCREMOVABLE:
19358 			case DKIOCHOTPLUGGABLE:
19359 				break;
19360 			default:
19361 				if (un->un_f_has_removable_media) {
19362 					err = ENXIO;
19363 				} else {
19364 				/* Do not map SD_RESERVED_BY_OTHERS to EIO */
19365 					if (err == SD_RESERVED_BY_OTHERS) {
19366 						err = EACCES;
19367 					} else {
19368 						err = EIO;
19369 					}
19370 				}
19371 				un->un_ncmds_in_driver--;
19372 				ASSERT(un->un_ncmds_in_driver >= 0);
19373 				mutex_exit(SD_MUTEX(un));
19374 				return (err);
19375 			}
19376 		}
19377 	}
19378 
19379 skip_ready_valid:
19380 	mutex_exit(SD_MUTEX(un));
19381 
19382 	switch (cmd) {
19383 	case DKIOCINFO:
19384 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
19385 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
19386 		break;
19387 
19388 	case DKIOCGMEDIAINFO:
19389 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
19390 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
19391 		break;
19392 
19393 	case DKIOCGGEOM:
19394 	case DKIOCGVTOC:
19395 	case DKIOCGAPART:
19396 	case DKIOCPARTINFO:
19397 	case DKIOCSGEOM:
19398 	case DKIOCSAPART:
19399 	case DKIOCGETEFI:
19400 	case DKIOCPARTITION:
19401 	case DKIOCSVTOC:
19402 	case DKIOCSETEFI:
19403 	case DKIOCGMBOOT:
19404 	case DKIOCSMBOOT:
19405 	case DKIOCG_PHYGEOM:
19406 	case DKIOCG_VIRTGEOM:
19407 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOC %d\n", cmd);
19408 
19409 		/* TUR should spin up */
19410 
19411 		if (un->un_f_has_removable_media)
19412 			err = sd_send_scsi_TEST_UNIT_READY(un,
19413 			    SD_CHECK_FOR_MEDIA);
19414 		else
19415 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
19416 
19417 		if (err != 0)
19418 			break;
19419 
19420 		err = cmlb_ioctl(un->un_cmlbhandle, dev,
19421 		    cmd, arg, flag, cred_p, rval_p, (void *)SD_PATH_DIRECT);
19422 
19423 		if ((err == 0) &&
19424 		    ((cmd == DKIOCSETEFI) ||
19425 		    (un->un_f_pkstats_enabled) &&
19426 		    (cmd == DKIOCSAPART || cmd == DKIOCSVTOC))) {
19427 
19428 			tmprval = cmlb_validate(un->un_cmlbhandle, CMLB_SILENT,
19429 			    (void *)SD_PATH_DIRECT);
19430 			if ((tmprval == 0) && un->un_f_pkstats_enabled) {
19431 				sd_set_pstats(un);
19432 				SD_TRACE(SD_LOG_IO_PARTITION, un,
19433 				    "sd_ioctl: un:0x%p pstats created and "
19434 				    "set\n", un);
19435 			}
19436 		}
19437 
19438 		if ((cmd == DKIOCSVTOC) ||
19439 		    ((cmd == DKIOCSETEFI) && (tmprval == 0))) {
19440 
19441 			mutex_enter(SD_MUTEX(un));
19442 			if (un->un_f_devid_supported &&
19443 			    (un->un_f_opt_fab_devid == TRUE)) {
19444 				if (un->un_devid == NULL) {
19445 					sd_register_devid(un, SD_DEVINFO(un),
19446 					    SD_TARGET_IS_UNRESERVED);
19447 				} else {
19448 					/*
19449 					 * The device id for this disk
19450 					 * has been fabricated. The
19451 					 * device id must be preserved
19452 					 * by writing it back out to
19453 					 * disk.
19454 					 */
19455 					if (sd_write_deviceid(un) != 0) {
19456 						ddi_devid_free(un->un_devid);
19457 						un->un_devid = NULL;
19458 					}
19459 				}
19460 			}
19461 			mutex_exit(SD_MUTEX(un));
19462 		}
19463 
19464 		break;
19465 
19466 	case DKIOCLOCK:
19467 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
19468 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
19469 		    SD_PATH_STANDARD);
19470 		break;
19471 
19472 	case DKIOCUNLOCK:
19473 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
19474 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
19475 		    SD_PATH_STANDARD);
19476 		break;
19477 
19478 	case DKIOCSTATE: {
19479 		enum dkio_state		state;
19480 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
19481 
19482 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
19483 			err = EFAULT;
19484 		} else {
19485 			err = sd_check_media(dev, state);
19486 			if (err == 0) {
19487 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
19488 				    sizeof (int), flag) != 0)
19489 					err = EFAULT;
19490 			}
19491 		}
19492 		break;
19493 	}
19494 
19495 	case DKIOCREMOVABLE:
19496 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
19497 		i = un->un_f_has_removable_media ? 1 : 0;
19498 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
19499 			err = EFAULT;
19500 		} else {
19501 			err = 0;
19502 		}
19503 		break;
19504 
19505 	case DKIOCHOTPLUGGABLE:
19506 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
19507 		i = un->un_f_is_hotpluggable ? 1 : 0;
19508 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
19509 			err = EFAULT;
19510 		} else {
19511 			err = 0;
19512 		}
19513 		break;
19514 
19515 	case DKIOCGTEMPERATURE:
19516 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
19517 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
19518 		break;
19519 
19520 	case MHIOCENFAILFAST:
19521 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
19522 		if ((err = drv_priv(cred_p)) == 0) {
19523 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
19524 		}
19525 		break;
19526 
19527 	case MHIOCTKOWN:
19528 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
19529 		if ((err = drv_priv(cred_p)) == 0) {
19530 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
19531 		}
19532 		break;
19533 
19534 	case MHIOCRELEASE:
19535 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
19536 		if ((err = drv_priv(cred_p)) == 0) {
19537 			err = sd_mhdioc_release(dev);
19538 		}
19539 		break;
19540 
19541 	case MHIOCSTATUS:
19542 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
19543 		if ((err = drv_priv(cred_p)) == 0) {
19544 			switch (sd_send_scsi_TEST_UNIT_READY(un, 0)) {
19545 			case 0:
19546 				err = 0;
19547 				break;
19548 			case EACCES:
19549 				*rval_p = 1;
19550 				err = 0;
19551 				break;
19552 			default:
19553 				err = EIO;
19554 				break;
19555 			}
19556 		}
19557 		break;
19558 
19559 	case MHIOCQRESERVE:
19560 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
19561 		if ((err = drv_priv(cred_p)) == 0) {
19562 			err = sd_reserve_release(dev, SD_RESERVE);
19563 		}
19564 		break;
19565 
19566 	case MHIOCREREGISTERDEVID:
19567 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
19568 		if (drv_priv(cred_p) == EPERM) {
19569 			err = EPERM;
19570 		} else if (!un->un_f_devid_supported) {
19571 			err = ENOTTY;
19572 		} else {
19573 			err = sd_mhdioc_register_devid(dev);
19574 		}
19575 		break;
19576 
19577 	case MHIOCGRP_INKEYS:
19578 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
19579 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
19580 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19581 				err = ENOTSUP;
19582 			} else {
19583 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
19584 				    flag);
19585 			}
19586 		}
19587 		break;
19588 
19589 	case MHIOCGRP_INRESV:
19590 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
19591 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
19592 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19593 				err = ENOTSUP;
19594 			} else {
19595 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
19596 			}
19597 		}
19598 		break;
19599 
19600 	case MHIOCGRP_REGISTER:
19601 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
19602 		if ((err = drv_priv(cred_p)) != EPERM) {
19603 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19604 				err = ENOTSUP;
19605 			} else if (arg != NULL) {
19606 				mhioc_register_t reg;
19607 				if (ddi_copyin((void *)arg, &reg,
19608 				    sizeof (mhioc_register_t), flag) != 0) {
19609 					err = EFAULT;
19610 				} else {
19611 					err =
19612 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19613 					    un, SD_SCSI3_REGISTER,
19614 					    (uchar_t *)&reg);
19615 				}
19616 			}
19617 		}
19618 		break;
19619 
19620 	case MHIOCGRP_RESERVE:
19621 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
19622 		if ((err = drv_priv(cred_p)) != EPERM) {
19623 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19624 				err = ENOTSUP;
19625 			} else if (arg != NULL) {
19626 				mhioc_resv_desc_t resv_desc;
19627 				if (ddi_copyin((void *)arg, &resv_desc,
19628 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
19629 					err = EFAULT;
19630 				} else {
19631 					err =
19632 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19633 					    un, SD_SCSI3_RESERVE,
19634 					    (uchar_t *)&resv_desc);
19635 				}
19636 			}
19637 		}
19638 		break;
19639 
19640 	case MHIOCGRP_PREEMPTANDABORT:
19641 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
19642 		if ((err = drv_priv(cred_p)) != EPERM) {
19643 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19644 				err = ENOTSUP;
19645 			} else if (arg != NULL) {
19646 				mhioc_preemptandabort_t preempt_abort;
19647 				if (ddi_copyin((void *)arg, &preempt_abort,
19648 				    sizeof (mhioc_preemptandabort_t),
19649 				    flag) != 0) {
19650 					err = EFAULT;
19651 				} else {
19652 					err =
19653 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19654 					    un, SD_SCSI3_PREEMPTANDABORT,
19655 					    (uchar_t *)&preempt_abort);
19656 				}
19657 			}
19658 		}
19659 		break;
19660 
19661 	case MHIOCGRP_REGISTERANDIGNOREKEY:
19662 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTERANDIGNOREKEY\n");
19663 		if ((err = drv_priv(cred_p)) != EPERM) {
19664 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19665 				err = ENOTSUP;
19666 			} else if (arg != NULL) {
19667 				mhioc_registerandignorekey_t r_and_i;
19668 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
19669 				    sizeof (mhioc_registerandignorekey_t),
19670 				    flag) != 0) {
19671 					err = EFAULT;
19672 				} else {
19673 					err =
19674 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19675 					    un, SD_SCSI3_REGISTERANDIGNOREKEY,
19676 					    (uchar_t *)&r_and_i);
19677 				}
19678 			}
19679 		}
19680 		break;
19681 
19682 	case USCSICMD:
19683 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
19684 		cr = ddi_get_cred();
19685 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
19686 			err = EPERM;
19687 		} else {
19688 			enum uio_seg	uioseg;
19689 			uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE :
19690 			    UIO_USERSPACE;
19691 			if (un->un_f_format_in_progress == TRUE) {
19692 				err = EAGAIN;
19693 				break;
19694 			}
19695 			err = sd_send_scsi_cmd(dev, (struct uscsi_cmd *)arg,
19696 			    flag, uioseg, SD_PATH_STANDARD);
19697 		}
19698 		break;
19699 
19700 	case CDROMPAUSE:
19701 	case CDROMRESUME:
19702 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
19703 		if (!ISCD(un)) {
19704 			err = ENOTTY;
19705 		} else {
19706 			err = sr_pause_resume(dev, cmd);
19707 		}
19708 		break;
19709 
19710 	case CDROMPLAYMSF:
19711 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
19712 		if (!ISCD(un)) {
19713 			err = ENOTTY;
19714 		} else {
19715 			err = sr_play_msf(dev, (caddr_t)arg, flag);
19716 		}
19717 		break;
19718 
19719 	case CDROMPLAYTRKIND:
19720 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
19721 #if defined(__i386) || defined(__amd64)
19722 		/*
19723 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
19724 		 */
19725 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
19726 #else
19727 		if (!ISCD(un)) {
19728 #endif
19729 			err = ENOTTY;
19730 		} else {
19731 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
19732 		}
19733 		break;
19734 
19735 	case CDROMREADTOCHDR:
19736 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
19737 		if (!ISCD(un)) {
19738 			err = ENOTTY;
19739 		} else {
19740 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
19741 		}
19742 		break;
19743 
19744 	case CDROMREADTOCENTRY:
19745 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
19746 		if (!ISCD(un)) {
19747 			err = ENOTTY;
19748 		} else {
19749 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
19750 		}
19751 		break;
19752 
19753 	case CDROMSTOP:
19754 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
19755 		if (!ISCD(un)) {
19756 			err = ENOTTY;
19757 		} else {
19758 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_STOP,
19759 			    SD_PATH_STANDARD);
19760 		}
19761 		break;
19762 
19763 	case CDROMSTART:
19764 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
19765 		if (!ISCD(un)) {
19766 			err = ENOTTY;
19767 		} else {
19768 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
19769 			    SD_PATH_STANDARD);
19770 		}
19771 		break;
19772 
19773 	case CDROMCLOSETRAY:
19774 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
19775 		if (!ISCD(un)) {
19776 			err = ENOTTY;
19777 		} else {
19778 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_CLOSE,
19779 			    SD_PATH_STANDARD);
19780 		}
19781 		break;
19782 
19783 	case FDEJECT:	/* for eject command */
19784 	case DKIOCEJECT:
19785 	case CDROMEJECT:
19786 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
19787 		if (!un->un_f_eject_media_supported) {
19788 			err = ENOTTY;
19789 		} else {
19790 			err = sr_eject(dev);
19791 		}
19792 		break;
19793 
19794 	case CDROMVOLCTRL:
19795 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
19796 		if (!ISCD(un)) {
19797 			err = ENOTTY;
19798 		} else {
19799 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
19800 		}
19801 		break;
19802 
19803 	case CDROMSUBCHNL:
19804 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
19805 		if (!ISCD(un)) {
19806 			err = ENOTTY;
19807 		} else {
19808 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
19809 		}
19810 		break;
19811 
19812 	case CDROMREADMODE2:
19813 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
19814 		if (!ISCD(un)) {
19815 			err = ENOTTY;
19816 		} else if (un->un_f_cfg_is_atapi == TRUE) {
19817 			/*
19818 			 * If the drive supports READ CD, use that instead of
19819 			 * switching the LBA size via a MODE SELECT
19820 			 * Block Descriptor
19821 			 */
19822 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
19823 		} else {
19824 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
19825 		}
19826 		break;
19827 
19828 	case CDROMREADMODE1:
19829 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
19830 		if (!ISCD(un)) {
19831 			err = ENOTTY;
19832 		} else {
19833 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
19834 		}
19835 		break;
19836 
19837 	case CDROMREADOFFSET:
19838 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
19839 		if (!ISCD(un)) {
19840 			err = ENOTTY;
19841 		} else {
19842 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
19843 			    flag);
19844 		}
19845 		break;
19846 
19847 	case CDROMSBLKMODE:
19848 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
19849 		/*
19850 		 * There is no means of changing block size in case of atapi
19851 		 * drives, thus return ENOTTY if drive type is atapi
19852 		 */
19853 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
19854 			err = ENOTTY;
19855 		} else if (un->un_f_mmc_cap == TRUE) {
19856 
19857 			/*
19858 			 * MMC Devices do not support changing the
19859 			 * logical block size
19860 			 *
19861 			 * Note: EINVAL is being returned instead of ENOTTY to
19862 			 * maintain consistancy with the original mmc
19863 			 * driver update.
19864 			 */
19865 			err = EINVAL;
19866 		} else {
19867 			mutex_enter(SD_MUTEX(un));
19868 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
19869 			    (un->un_ncmds_in_transport > 0)) {
19870 				mutex_exit(SD_MUTEX(un));
19871 				err = EINVAL;
19872 			} else {
19873 				mutex_exit(SD_MUTEX(un));
19874 				err = sr_change_blkmode(dev, cmd, arg, flag);
19875 			}
19876 		}
19877 		break;
19878 
19879 	case CDROMGBLKMODE:
19880 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
19881 		if (!ISCD(un)) {
19882 			err = ENOTTY;
19883 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
19884 		    (un->un_f_blockcount_is_valid != FALSE)) {
19885 			/*
19886 			 * Drive is an ATAPI drive so return target block
19887 			 * size for ATAPI drives since we cannot change the
19888 			 * blocksize on ATAPI drives. Used primarily to detect
19889 			 * if an ATAPI cdrom is present.
19890 			 */
19891 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
19892 			    sizeof (int), flag) != 0) {
19893 				err = EFAULT;
19894 			} else {
19895 				err = 0;
19896 			}
19897 
19898 		} else {
19899 			/*
19900 			 * Drive supports changing block sizes via a Mode
19901 			 * Select.
19902 			 */
19903 			err = sr_change_blkmode(dev, cmd, arg, flag);
19904 		}
19905 		break;
19906 
19907 	case CDROMGDRVSPEED:
19908 	case CDROMSDRVSPEED:
19909 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
19910 		if (!ISCD(un)) {
19911 			err = ENOTTY;
19912 		} else if (un->un_f_mmc_cap == TRUE) {
19913 			/*
19914 			 * Note: In the future the driver implementation
19915 			 * for getting and
19916 			 * setting cd speed should entail:
19917 			 * 1) If non-mmc try the Toshiba mode page
19918 			 *    (sr_change_speed)
19919 			 * 2) If mmc but no support for Real Time Streaming try
19920 			 *    the SET CD SPEED (0xBB) command
19921 			 *   (sr_atapi_change_speed)
19922 			 * 3) If mmc and support for Real Time Streaming
19923 			 *    try the GET PERFORMANCE and SET STREAMING
19924 			 *    commands (not yet implemented, 4380808)
19925 			 */
19926 			/*
19927 			 * As per recent MMC spec, CD-ROM speed is variable
19928 			 * and changes with LBA. Since there is no such
19929 			 * things as drive speed now, fail this ioctl.
19930 			 *
19931 			 * Note: EINVAL is returned for consistancy of original
19932 			 * implementation which included support for getting
19933 			 * the drive speed of mmc devices but not setting
19934 			 * the drive speed. Thus EINVAL would be returned
19935 			 * if a set request was made for an mmc device.
19936 			 * We no longer support get or set speed for
19937 			 * mmc but need to remain consistant with regard
19938 			 * to the error code returned.
19939 			 */
19940 			err = EINVAL;
19941 		} else if (un->un_f_cfg_is_atapi == TRUE) {
19942 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
19943 		} else {
19944 			err = sr_change_speed(dev, cmd, arg, flag);
19945 		}
19946 		break;
19947 
19948 	case CDROMCDDA:
19949 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
19950 		if (!ISCD(un)) {
19951 			err = ENOTTY;
19952 		} else {
19953 			err = sr_read_cdda(dev, (void *)arg, flag);
19954 		}
19955 		break;
19956 
19957 	case CDROMCDXA:
19958 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
19959 		if (!ISCD(un)) {
19960 			err = ENOTTY;
19961 		} else {
19962 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
19963 		}
19964 		break;
19965 
19966 	case CDROMSUBCODE:
19967 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
19968 		if (!ISCD(un)) {
19969 			err = ENOTTY;
19970 		} else {
19971 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
19972 		}
19973 		break;
19974 
19975 
19976 #ifdef SDDEBUG
19977 /* RESET/ABORTS testing ioctls */
19978 	case DKIOCRESET: {
19979 		int	reset_level;
19980 
19981 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
19982 			err = EFAULT;
19983 		} else {
19984 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
19985 			    "reset_level = 0x%lx\n", reset_level);
19986 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
19987 				err = 0;
19988 			} else {
19989 				err = EIO;
19990 			}
19991 		}
19992 		break;
19993 	}
19994 
19995 	case DKIOCABORT:
19996 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
19997 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
19998 			err = 0;
19999 		} else {
20000 			err = EIO;
20001 		}
20002 		break;
20003 #endif
20004 
20005 #ifdef SD_FAULT_INJECTION
20006 /* SDIOC FaultInjection testing ioctls */
20007 	case SDIOCSTART:
20008 	case SDIOCSTOP:
20009 	case SDIOCINSERTPKT:
20010 	case SDIOCINSERTXB:
20011 	case SDIOCINSERTUN:
20012 	case SDIOCINSERTARQ:
20013 	case SDIOCPUSH:
20014 	case SDIOCRETRIEVE:
20015 	case SDIOCRUN:
20016 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
20017 		    "SDIOC detected cmd:0x%X:\n", cmd);
20018 		/* call error generator */
20019 		sd_faultinjection_ioctl(cmd, arg, un);
20020 		err = 0;
20021 		break;
20022 
20023 #endif /* SD_FAULT_INJECTION */
20024 
20025 	case DKIOCFLUSHWRITECACHE:
20026 		{
20027 			struct dk_callback *dkc = (struct dk_callback *)arg;
20028 
20029 			mutex_enter(SD_MUTEX(un));
20030 			if (!un->un_f_sync_cache_supported ||
20031 			    !un->un_f_write_cache_enabled) {
20032 				err = un->un_f_sync_cache_supported ?
20033 					0 : ENOTSUP;
20034 				mutex_exit(SD_MUTEX(un));
20035 				if ((flag & FKIOCTL) && dkc != NULL &&
20036 				    dkc->dkc_callback != NULL) {
20037 					(*dkc->dkc_callback)(dkc->dkc_cookie,
20038 					    err);
20039 					/*
20040 					 * Did callback and reported error.
20041 					 * Since we did a callback, ioctl
20042 					 * should return 0.
20043 					 */
20044 					err = 0;
20045 				}
20046 				break;
20047 			}
20048 			mutex_exit(SD_MUTEX(un));
20049 
20050 			if ((flag & FKIOCTL) && dkc != NULL &&
20051 			    dkc->dkc_callback != NULL) {
20052 				/* async SYNC CACHE request */
20053 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
20054 			} else {
20055 				/* synchronous SYNC CACHE request */
20056 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
20057 			}
20058 		}
20059 		break;
20060 
20061 	case DKIOCGETWCE: {
20062 
20063 		int wce;
20064 
20065 		if ((err = sd_get_write_cache_enabled(un, &wce)) != 0) {
20066 			break;
20067 		}
20068 
20069 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
20070 			err = EFAULT;
20071 		}
20072 		break;
20073 	}
20074 
20075 	case DKIOCSETWCE: {
20076 
20077 		int wce, sync_supported;
20078 
20079 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
20080 			err = EFAULT;
20081 			break;
20082 		}
20083 
20084 		/*
20085 		 * Synchronize multiple threads trying to enable
20086 		 * or disable the cache via the un_f_wcc_cv
20087 		 * condition variable.
20088 		 */
20089 		mutex_enter(SD_MUTEX(un));
20090 
20091 		/*
20092 		 * Don't allow the cache to be enabled if the
20093 		 * config file has it disabled.
20094 		 */
20095 		if (un->un_f_opt_disable_cache && wce) {
20096 			mutex_exit(SD_MUTEX(un));
20097 			err = EINVAL;
20098 			break;
20099 		}
20100 
20101 		/*
20102 		 * Wait for write cache change in progress
20103 		 * bit to be clear before proceeding.
20104 		 */
20105 		while (un->un_f_wcc_inprog)
20106 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
20107 
20108 		un->un_f_wcc_inprog = 1;
20109 
20110 		if (un->un_f_write_cache_enabled && wce == 0) {
20111 			/*
20112 			 * Disable the write cache.  Don't clear
20113 			 * un_f_write_cache_enabled until after
20114 			 * the mode select and flush are complete.
20115 			 */
20116 			sync_supported = un->un_f_sync_cache_supported;
20117 			mutex_exit(SD_MUTEX(un));
20118 			if ((err = sd_cache_control(un, SD_CACHE_NOCHANGE,
20119 			    SD_CACHE_DISABLE)) == 0 && sync_supported) {
20120 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
20121 			}
20122 
20123 			mutex_enter(SD_MUTEX(un));
20124 			if (err == 0) {
20125 				un->un_f_write_cache_enabled = 0;
20126 			}
20127 
20128 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
20129 			/*
20130 			 * Set un_f_write_cache_enabled first, so there is
20131 			 * no window where the cache is enabled, but the
20132 			 * bit says it isn't.
20133 			 */
20134 			un->un_f_write_cache_enabled = 1;
20135 			mutex_exit(SD_MUTEX(un));
20136 
20137 			err = sd_cache_control(un, SD_CACHE_NOCHANGE,
20138 				SD_CACHE_ENABLE);
20139 
20140 			mutex_enter(SD_MUTEX(un));
20141 
20142 			if (err) {
20143 				un->un_f_write_cache_enabled = 0;
20144 			}
20145 		}
20146 
20147 		un->un_f_wcc_inprog = 0;
20148 		cv_broadcast(&un->un_wcc_cv);
20149 		mutex_exit(SD_MUTEX(un));
20150 		break;
20151 	}
20152 
20153 	default:
20154 		err = ENOTTY;
20155 		break;
20156 	}
20157 	mutex_enter(SD_MUTEX(un));
20158 	un->un_ncmds_in_driver--;
20159 	ASSERT(un->un_ncmds_in_driver >= 0);
20160 	mutex_exit(SD_MUTEX(un));
20161 
20162 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
20163 	return (err);
20164 }
20165 
20166 
20167 /*
20168  *    Function: sd_dkio_ctrl_info
20169  *
20170  * Description: This routine is the driver entry point for handling controller
20171  *		information ioctl requests (DKIOCINFO).
20172  *
20173  *   Arguments: dev  - the device number
20174  *		arg  - pointer to user provided dk_cinfo structure
20175  *		       specifying the controller type and attributes.
20176  *		flag - this argument is a pass through to ddi_copyxxx()
20177  *		       directly from the mode argument of ioctl().
20178  *
20179  * Return Code: 0
20180  *		EFAULT
20181  *		ENXIO
20182  */
20183 
20184 static int
20185 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
20186 {
20187 	struct sd_lun	*un = NULL;
20188 	struct dk_cinfo	*info;
20189 	dev_info_t	*pdip;
20190 	int		lun, tgt;
20191 
20192 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20193 		return (ENXIO);
20194 	}
20195 
20196 	info = (struct dk_cinfo *)
20197 		kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
20198 
20199 	switch (un->un_ctype) {
20200 	case CTYPE_CDROM:
20201 		info->dki_ctype = DKC_CDROM;
20202 		break;
20203 	default:
20204 		info->dki_ctype = DKC_SCSI_CCS;
20205 		break;
20206 	}
20207 	pdip = ddi_get_parent(SD_DEVINFO(un));
20208 	info->dki_cnum = ddi_get_instance(pdip);
20209 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
20210 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
20211 	} else {
20212 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
20213 		    DK_DEVLEN - 1);
20214 	}
20215 
20216 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
20217 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
20218 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
20219 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
20220 
20221 	/* Unit Information */
20222 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
20223 	info->dki_slave = ((tgt << 3) | lun);
20224 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
20225 	    DK_DEVLEN - 1);
20226 	info->dki_flags = DKI_FMTVOL;
20227 	info->dki_partition = SDPART(dev);
20228 
20229 	/* Max Transfer size of this device in blocks */
20230 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
20231 	info->dki_addr = 0;
20232 	info->dki_space = 0;
20233 	info->dki_prio = 0;
20234 	info->dki_vec = 0;
20235 
20236 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
20237 		kmem_free(info, sizeof (struct dk_cinfo));
20238 		return (EFAULT);
20239 	} else {
20240 		kmem_free(info, sizeof (struct dk_cinfo));
20241 		return (0);
20242 	}
20243 }
20244 
20245 
20246 /*
20247  *    Function: sd_get_media_info
20248  *
20249  * Description: This routine is the driver entry point for handling ioctl
20250  *		requests for the media type or command set profile used by the
20251  *		drive to operate on the media (DKIOCGMEDIAINFO).
20252  *
20253  *   Arguments: dev	- the device number
20254  *		arg	- pointer to user provided dk_minfo structure
20255  *			  specifying the media type, logical block size and
20256  *			  drive capacity.
20257  *		flag	- this argument is a pass through to ddi_copyxxx()
20258  *			  directly from the mode argument of ioctl().
20259  *
20260  * Return Code: 0
20261  *		EACCESS
20262  *		EFAULT
20263  *		ENXIO
20264  *		EIO
20265  */
20266 
20267 static int
20268 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
20269 {
20270 	struct sd_lun		*un = NULL;
20271 	struct uscsi_cmd	com;
20272 	struct scsi_inquiry	*sinq;
20273 	struct dk_minfo		media_info;
20274 	u_longlong_t		media_capacity;
20275 	uint64_t		capacity;
20276 	uint_t			lbasize;
20277 	uchar_t			*out_data;
20278 	uchar_t			*rqbuf;
20279 	int			rval = 0;
20280 	int			rtn;
20281 
20282 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
20283 	    (un->un_state == SD_STATE_OFFLINE)) {
20284 		return (ENXIO);
20285 	}
20286 
20287 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
20288 
20289 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
20290 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
20291 
20292 	/* Issue a TUR to determine if the drive is ready with media present */
20293 	rval = sd_send_scsi_TEST_UNIT_READY(un, SD_CHECK_FOR_MEDIA);
20294 	if (rval == ENXIO) {
20295 		goto done;
20296 	}
20297 
20298 	/* Now get configuration data */
20299 	if (ISCD(un)) {
20300 		media_info.dki_media_type = DK_CDROM;
20301 
20302 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
20303 		if (un->un_f_mmc_cap == TRUE) {
20304 			rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf,
20305 				SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN,
20306 				SD_PATH_STANDARD);
20307 
20308 			if (rtn) {
20309 				/*
20310 				 * Failed for other than an illegal request
20311 				 * or command not supported
20312 				 */
20313 				if ((com.uscsi_status == STATUS_CHECK) &&
20314 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
20315 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
20316 					    (rqbuf[12] != 0x20)) {
20317 						rval = EIO;
20318 						goto done;
20319 					}
20320 				}
20321 			} else {
20322 				/*
20323 				 * The GET CONFIGURATION command succeeded
20324 				 * so set the media type according to the
20325 				 * returned data
20326 				 */
20327 				media_info.dki_media_type = out_data[6];
20328 				media_info.dki_media_type <<= 8;
20329 				media_info.dki_media_type |= out_data[7];
20330 			}
20331 		}
20332 	} else {
20333 		/*
20334 		 * The profile list is not available, so we attempt to identify
20335 		 * the media type based on the inquiry data
20336 		 */
20337 		sinq = un->un_sd->sd_inq;
20338 		if ((sinq->inq_dtype == DTYPE_DIRECT) ||
20339 		    (sinq->inq_dtype == DTYPE_OPTICAL)) {
20340 			/* This is a direct access device  or optical disk */
20341 			media_info.dki_media_type = DK_FIXED_DISK;
20342 
20343 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
20344 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
20345 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
20346 					media_info.dki_media_type = DK_ZIP;
20347 				} else if (
20348 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
20349 					media_info.dki_media_type = DK_JAZ;
20350 				}
20351 			}
20352 		} else {
20353 			/*
20354 			 * Not a CD, direct access or optical disk so return
20355 			 * unknown media
20356 			 */
20357 			media_info.dki_media_type = DK_UNKNOWN;
20358 		}
20359 	}
20360 
20361 	/* Now read the capacity so we can provide the lbasize and capacity */
20362 	switch (sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
20363 	    SD_PATH_DIRECT)) {
20364 	case 0:
20365 		break;
20366 	case EACCES:
20367 		rval = EACCES;
20368 		goto done;
20369 	default:
20370 		rval = EIO;
20371 		goto done;
20372 	}
20373 
20374 	media_info.dki_lbsize = lbasize;
20375 	media_capacity = capacity;
20376 
20377 	/*
20378 	 * sd_send_scsi_READ_CAPACITY() reports capacity in
20379 	 * un->un_sys_blocksize chunks. So we need to convert it into
20380 	 * cap.lbasize chunks.
20381 	 */
20382 	media_capacity *= un->un_sys_blocksize;
20383 	media_capacity /= lbasize;
20384 	media_info.dki_capacity = media_capacity;
20385 
20386 	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
20387 		rval = EFAULT;
20388 		/* Put goto. Anybody might add some code below in future */
20389 		goto done;
20390 	}
20391 done:
20392 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
20393 	kmem_free(rqbuf, SENSE_LENGTH);
20394 	return (rval);
20395 }
20396 
20397 
20398 /*
20399  *    Function: sd_check_media
20400  *
20401  * Description: This utility routine implements the functionality for the
20402  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
20403  *		driver state changes from that specified by the user
20404  *		(inserted or ejected). For example, if the user specifies
20405  *		DKIO_EJECTED and the current media state is inserted this
20406  *		routine will immediately return DKIO_INSERTED. However, if the
20407  *		current media state is not inserted the user thread will be
20408  *		blocked until the drive state changes. If DKIO_NONE is specified
20409  *		the user thread will block until a drive state change occurs.
20410  *
20411  *   Arguments: dev  - the device number
20412  *		state  - user pointer to a dkio_state, updated with the current
20413  *			drive state at return.
20414  *
20415  * Return Code: ENXIO
20416  *		EIO
20417  *		EAGAIN
20418  *		EINTR
20419  */
20420 
20421 static int
20422 sd_check_media(dev_t dev, enum dkio_state state)
20423 {
20424 	struct sd_lun		*un = NULL;
20425 	enum dkio_state		prev_state;
20426 	opaque_t		token = NULL;
20427 	int			rval = 0;
20428 
20429 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20430 		return (ENXIO);
20431 	}
20432 
20433 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
20434 
20435 	mutex_enter(SD_MUTEX(un));
20436 
20437 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
20438 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
20439 
20440 	prev_state = un->un_mediastate;
20441 
20442 	/* is there anything to do? */
20443 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
20444 		/*
20445 		 * submit the request to the scsi_watch service;
20446 		 * scsi_media_watch_cb() does the real work
20447 		 */
20448 		mutex_exit(SD_MUTEX(un));
20449 
20450 		/*
20451 		 * This change handles the case where a scsi watch request is
20452 		 * added to a device that is powered down. To accomplish this
20453 		 * we power up the device before adding the scsi watch request,
20454 		 * since the scsi watch sends a TUR directly to the device
20455 		 * which the device cannot handle if it is powered down.
20456 		 */
20457 		if (sd_pm_entry(un) != DDI_SUCCESS) {
20458 			mutex_enter(SD_MUTEX(un));
20459 			goto done;
20460 		}
20461 
20462 		token = scsi_watch_request_submit(SD_SCSI_DEVP(un),
20463 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
20464 		    (caddr_t)dev);
20465 
20466 		sd_pm_exit(un);
20467 
20468 		mutex_enter(SD_MUTEX(un));
20469 		if (token == NULL) {
20470 			rval = EAGAIN;
20471 			goto done;
20472 		}
20473 
20474 		/*
20475 		 * This is a special case IOCTL that doesn't return
20476 		 * until the media state changes. Routine sdpower
20477 		 * knows about and handles this so don't count it
20478 		 * as an active cmd in the driver, which would
20479 		 * keep the device busy to the pm framework.
20480 		 * If the count isn't decremented the device can't
20481 		 * be powered down.
20482 		 */
20483 		un->un_ncmds_in_driver--;
20484 		ASSERT(un->un_ncmds_in_driver >= 0);
20485 
20486 		/*
20487 		 * if a prior request had been made, this will be the same
20488 		 * token, as scsi_watch was designed that way.
20489 		 */
20490 		un->un_swr_token = token;
20491 		un->un_specified_mediastate = state;
20492 
20493 		/*
20494 		 * now wait for media change
20495 		 * we will not be signalled unless mediastate == state but it is
20496 		 * still better to test for this condition, since there is a
20497 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
20498 		 */
20499 		SD_TRACE(SD_LOG_COMMON, un,
20500 		    "sd_check_media: waiting for media state change\n");
20501 		while (un->un_mediastate == state) {
20502 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
20503 				SD_TRACE(SD_LOG_COMMON, un,
20504 				    "sd_check_media: waiting for media state "
20505 				    "was interrupted\n");
20506 				un->un_ncmds_in_driver++;
20507 				rval = EINTR;
20508 				goto done;
20509 			}
20510 			SD_TRACE(SD_LOG_COMMON, un,
20511 			    "sd_check_media: received signal, state=%x\n",
20512 			    un->un_mediastate);
20513 		}
20514 		/*
20515 		 * Inc the counter to indicate the device once again
20516 		 * has an active outstanding cmd.
20517 		 */
20518 		un->un_ncmds_in_driver++;
20519 	}
20520 
20521 	/* invalidate geometry */
20522 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
20523 		sr_ejected(un);
20524 	}
20525 
20526 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
20527 		uint64_t	capacity;
20528 		uint_t		lbasize;
20529 
20530 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
20531 		mutex_exit(SD_MUTEX(un));
20532 		/*
20533 		 * Since the following routines use SD_PATH_DIRECT, we must
20534 		 * call PM directly before the upcoming disk accesses. This
20535 		 * may cause the disk to be power/spin up.
20536 		 */
20537 
20538 		if (sd_pm_entry(un) == DDI_SUCCESS) {
20539 			rval = sd_send_scsi_READ_CAPACITY(un,
20540 			    &capacity,
20541 			    &lbasize, SD_PATH_DIRECT);
20542 			if (rval != 0) {
20543 				sd_pm_exit(un);
20544 				mutex_enter(SD_MUTEX(un));
20545 				goto done;
20546 			}
20547 		} else {
20548 			rval = EIO;
20549 			mutex_enter(SD_MUTEX(un));
20550 			goto done;
20551 		}
20552 		mutex_enter(SD_MUTEX(un));
20553 
20554 		sd_update_block_info(un, lbasize, capacity);
20555 
20556 		/*
20557 		 *  Check if the media in the device is writable or not
20558 		 */
20559 		if (ISCD(un))
20560 			sd_check_for_writable_cd(un, SD_PATH_DIRECT);
20561 
20562 		mutex_exit(SD_MUTEX(un));
20563 		cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
20564 		if ((cmlb_validate(un->un_cmlbhandle, 0,
20565 		    (void *)SD_PATH_DIRECT) == 0) && un->un_f_pkstats_enabled) {
20566 			sd_set_pstats(un);
20567 			SD_TRACE(SD_LOG_IO_PARTITION, un,
20568 			    "sd_check_media: un:0x%p pstats created and "
20569 			    "set\n", un);
20570 		}
20571 
20572 		rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
20573 		    SD_PATH_DIRECT);
20574 		sd_pm_exit(un);
20575 
20576 		mutex_enter(SD_MUTEX(un));
20577 	}
20578 done:
20579 	un->un_f_watcht_stopped = FALSE;
20580 	if (un->un_swr_token) {
20581 		/*
20582 		 * Use of this local token and the mutex ensures that we avoid
20583 		 * some race conditions associated with terminating the
20584 		 * scsi watch.
20585 		 */
20586 		token = un->un_swr_token;
20587 		un->un_swr_token = (opaque_t)NULL;
20588 		mutex_exit(SD_MUTEX(un));
20589 		(void) scsi_watch_request_terminate(token,
20590 		    SCSI_WATCH_TERMINATE_WAIT);
20591 		mutex_enter(SD_MUTEX(un));
20592 	}
20593 
20594 	/*
20595 	 * Update the capacity kstat value, if no media previously
20596 	 * (capacity kstat is 0) and a media has been inserted
20597 	 * (un_f_blockcount_is_valid == TRUE)
20598 	 */
20599 	if (un->un_errstats) {
20600 		struct sd_errstats	*stp = NULL;
20601 
20602 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
20603 		if ((stp->sd_capacity.value.ui64 == 0) &&
20604 		    (un->un_f_blockcount_is_valid == TRUE)) {
20605 			stp->sd_capacity.value.ui64 =
20606 			    (uint64_t)((uint64_t)un->un_blockcount *
20607 			    un->un_sys_blocksize);
20608 		}
20609 	}
20610 	mutex_exit(SD_MUTEX(un));
20611 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
20612 	return (rval);
20613 }
20614 
20615 
20616 /*
20617  *    Function: sd_delayed_cv_broadcast
20618  *
20619  * Description: Delayed cv_broadcast to allow for target to recover from media
20620  *		insertion.
20621  *
20622  *   Arguments: arg - driver soft state (unit) structure
20623  */
20624 
20625 static void
20626 sd_delayed_cv_broadcast(void *arg)
20627 {
20628 	struct sd_lun *un = arg;
20629 
20630 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
20631 
20632 	mutex_enter(SD_MUTEX(un));
20633 	un->un_dcvb_timeid = NULL;
20634 	cv_broadcast(&un->un_state_cv);
20635 	mutex_exit(SD_MUTEX(un));
20636 }
20637 
20638 
20639 /*
20640  *    Function: sd_media_watch_cb
20641  *
20642  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
20643  *		routine processes the TUR sense data and updates the driver
20644  *		state if a transition has occurred. The user thread
20645  *		(sd_check_media) is then signalled.
20646  *
20647  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
20648  *			among multiple watches that share this callback function
20649  *		resultp - scsi watch facility result packet containing scsi
20650  *			  packet, status byte and sense data
20651  *
20652  * Return Code: 0 for success, -1 for failure
20653  */
20654 
20655 static int
20656 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
20657 {
20658 	struct sd_lun			*un;
20659 	struct scsi_status		*statusp = resultp->statusp;
20660 	uint8_t				*sensep = (uint8_t *)resultp->sensep;
20661 	enum dkio_state			state = DKIO_NONE;
20662 	dev_t				dev = (dev_t)arg;
20663 	uchar_t				actual_sense_length;
20664 	uint8_t				skey, asc, ascq;
20665 
20666 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20667 		return (-1);
20668 	}
20669 	actual_sense_length = resultp->actual_sense_length;
20670 
20671 	mutex_enter(SD_MUTEX(un));
20672 	SD_TRACE(SD_LOG_COMMON, un,
20673 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
20674 	    *((char *)statusp), (void *)sensep, actual_sense_length);
20675 
20676 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
20677 		un->un_mediastate = DKIO_DEV_GONE;
20678 		cv_broadcast(&un->un_state_cv);
20679 		mutex_exit(SD_MUTEX(un));
20680 
20681 		return (0);
20682 	}
20683 
20684 	/*
20685 	 * If there was a check condition then sensep points to valid sense data
20686 	 * If status was not a check condition but a reservation or busy status
20687 	 * then the new state is DKIO_NONE
20688 	 */
20689 	if (sensep != NULL) {
20690 		skey = scsi_sense_key(sensep);
20691 		asc = scsi_sense_asc(sensep);
20692 		ascq = scsi_sense_ascq(sensep);
20693 
20694 		SD_INFO(SD_LOG_COMMON, un,
20695 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
20696 		    skey, asc, ascq);
20697 		/* This routine only uses up to 13 bytes of sense data. */
20698 		if (actual_sense_length >= 13) {
20699 			if (skey == KEY_UNIT_ATTENTION) {
20700 				if (asc == 0x28) {
20701 					state = DKIO_INSERTED;
20702 				}
20703 			} else if (skey == KEY_NOT_READY) {
20704 				/*
20705 				 * if 02/04/02  means that the host
20706 				 * should send start command. Explicitly
20707 				 * leave the media state as is
20708 				 * (inserted) as the media is inserted
20709 				 * and host has stopped device for PM
20710 				 * reasons. Upon next true read/write
20711 				 * to this media will bring the
20712 				 * device to the right state good for
20713 				 * media access.
20714 				 */
20715 				if (asc == 0x3a) {
20716 					state = DKIO_EJECTED;
20717 				} else {
20718 					/*
20719 					 * If the drive is busy with an
20720 					 * operation or long write, keep the
20721 					 * media in an inserted state.
20722 					 */
20723 
20724 					if ((asc == 0x04) &&
20725 					    ((ascq == 0x02) ||
20726 					    (ascq == 0x07) ||
20727 					    (ascq == 0x08))) {
20728 						state = DKIO_INSERTED;
20729 					}
20730 				}
20731 			} else if (skey == KEY_NO_SENSE) {
20732 				if ((asc == 0x00) && (ascq == 0x00)) {
20733 					/*
20734 					 * Sense Data 00/00/00 does not provide
20735 					 * any information about the state of
20736 					 * the media. Ignore it.
20737 					 */
20738 					mutex_exit(SD_MUTEX(un));
20739 					return (0);
20740 				}
20741 			}
20742 		}
20743 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
20744 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
20745 		state = DKIO_INSERTED;
20746 	}
20747 
20748 	SD_TRACE(SD_LOG_COMMON, un,
20749 	    "sd_media_watch_cb: state=%x, specified=%x\n",
20750 	    state, un->un_specified_mediastate);
20751 
20752 	/*
20753 	 * now signal the waiting thread if this is *not* the specified state;
20754 	 * delay the signal if the state is DKIO_INSERTED to allow the target
20755 	 * to recover
20756 	 */
20757 	if (state != un->un_specified_mediastate) {
20758 		un->un_mediastate = state;
20759 		if (state == DKIO_INSERTED) {
20760 			/*
20761 			 * delay the signal to give the drive a chance
20762 			 * to do what it apparently needs to do
20763 			 */
20764 			SD_TRACE(SD_LOG_COMMON, un,
20765 			    "sd_media_watch_cb: delayed cv_broadcast\n");
20766 			if (un->un_dcvb_timeid == NULL) {
20767 				un->un_dcvb_timeid =
20768 				    timeout(sd_delayed_cv_broadcast, un,
20769 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
20770 			}
20771 		} else {
20772 			SD_TRACE(SD_LOG_COMMON, un,
20773 			    "sd_media_watch_cb: immediate cv_broadcast\n");
20774 			cv_broadcast(&un->un_state_cv);
20775 		}
20776 	}
20777 	mutex_exit(SD_MUTEX(un));
20778 	return (0);
20779 }
20780 
20781 
20782 /*
20783  *    Function: sd_dkio_get_temp
20784  *
20785  * Description: This routine is the driver entry point for handling ioctl
20786  *		requests to get the disk temperature.
20787  *
20788  *   Arguments: dev  - the device number
20789  *		arg  - pointer to user provided dk_temperature structure.
20790  *		flag - this argument is a pass through to ddi_copyxxx()
20791  *		       directly from the mode argument of ioctl().
20792  *
20793  * Return Code: 0
20794  *		EFAULT
20795  *		ENXIO
20796  *		EAGAIN
20797  */
20798 
20799 static int
20800 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
20801 {
20802 	struct sd_lun		*un = NULL;
20803 	struct dk_temperature	*dktemp = NULL;
20804 	uchar_t			*temperature_page;
20805 	int			rval = 0;
20806 	int			path_flag = SD_PATH_STANDARD;
20807 
20808 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20809 		return (ENXIO);
20810 	}
20811 
20812 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
20813 
20814 	/* copyin the disk temp argument to get the user flags */
20815 	if (ddi_copyin((void *)arg, dktemp,
20816 	    sizeof (struct dk_temperature), flag) != 0) {
20817 		rval = EFAULT;
20818 		goto done;
20819 	}
20820 
20821 	/* Initialize the temperature to invalid. */
20822 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
20823 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
20824 
20825 	/*
20826 	 * Note: Investigate removing the "bypass pm" semantic.
20827 	 * Can we just bypass PM always?
20828 	 */
20829 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
20830 		path_flag = SD_PATH_DIRECT;
20831 		ASSERT(!mutex_owned(&un->un_pm_mutex));
20832 		mutex_enter(&un->un_pm_mutex);
20833 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
20834 			/*
20835 			 * If DKT_BYPASS_PM is set, and the drive happens to be
20836 			 * in low power mode, we can not wake it up, Need to
20837 			 * return EAGAIN.
20838 			 */
20839 			mutex_exit(&un->un_pm_mutex);
20840 			rval = EAGAIN;
20841 			goto done;
20842 		} else {
20843 			/*
20844 			 * Indicate to PM the device is busy. This is required
20845 			 * to avoid a race - i.e. the ioctl is issuing a
20846 			 * command and the pm framework brings down the device
20847 			 * to low power mode (possible power cut-off on some
20848 			 * platforms).
20849 			 */
20850 			mutex_exit(&un->un_pm_mutex);
20851 			if (sd_pm_entry(un) != DDI_SUCCESS) {
20852 				rval = EAGAIN;
20853 				goto done;
20854 			}
20855 		}
20856 	}
20857 
20858 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
20859 
20860 	if ((rval = sd_send_scsi_LOG_SENSE(un, temperature_page,
20861 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag)) != 0) {
20862 		goto done2;
20863 	}
20864 
20865 	/*
20866 	 * For the current temperature verify that the parameter length is 0x02
20867 	 * and the parameter code is 0x00
20868 	 */
20869 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
20870 	    (temperature_page[5] == 0x00)) {
20871 		if (temperature_page[9] == 0xFF) {
20872 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
20873 		} else {
20874 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
20875 		}
20876 	}
20877 
20878 	/*
20879 	 * For the reference temperature verify that the parameter
20880 	 * length is 0x02 and the parameter code is 0x01
20881 	 */
20882 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
20883 	    (temperature_page[11] == 0x01)) {
20884 		if (temperature_page[15] == 0xFF) {
20885 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
20886 		} else {
20887 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
20888 		}
20889 	}
20890 
20891 	/* Do the copyout regardless of the temperature commands status. */
20892 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
20893 	    flag) != 0) {
20894 		rval = EFAULT;
20895 	}
20896 
20897 done2:
20898 	if (path_flag == SD_PATH_DIRECT) {
20899 		sd_pm_exit(un);
20900 	}
20901 
20902 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
20903 done:
20904 	if (dktemp != NULL) {
20905 		kmem_free(dktemp, sizeof (struct dk_temperature));
20906 	}
20907 
20908 	return (rval);
20909 }
20910 
20911 
20912 /*
20913  *    Function: sd_log_page_supported
20914  *
20915  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
20916  *		supported log pages.
20917  *
20918  *   Arguments: un -
20919  *		log_page -
20920  *
20921  * Return Code: -1 - on error (log sense is optional and may not be supported).
20922  *		0  - log page not found.
20923  *  		1  - log page found.
20924  */
20925 
20926 static int
20927 sd_log_page_supported(struct sd_lun *un, int log_page)
20928 {
20929 	uchar_t *log_page_data;
20930 	int	i;
20931 	int	match = 0;
20932 	int	log_size;
20933 
20934 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
20935 
20936 	if (sd_send_scsi_LOG_SENSE(un, log_page_data, 0xFF, 0, 0x01, 0,
20937 	    SD_PATH_DIRECT) != 0) {
20938 		SD_ERROR(SD_LOG_COMMON, un,
20939 		    "sd_log_page_supported: failed log page retrieval\n");
20940 		kmem_free(log_page_data, 0xFF);
20941 		return (-1);
20942 	}
20943 	log_size = log_page_data[3];
20944 
20945 	/*
20946 	 * The list of supported log pages start from the fourth byte. Check
20947 	 * until we run out of log pages or a match is found.
20948 	 */
20949 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
20950 		if (log_page_data[i] == log_page) {
20951 			match++;
20952 		}
20953 	}
20954 	kmem_free(log_page_data, 0xFF);
20955 	return (match);
20956 }
20957 
20958 
20959 /*
20960  *    Function: sd_mhdioc_failfast
20961  *
20962  * Description: This routine is the driver entry point for handling ioctl
20963  *		requests to enable/disable the multihost failfast option.
20964  *		(MHIOCENFAILFAST)
20965  *
20966  *   Arguments: dev	- the device number
20967  *		arg	- user specified probing interval.
20968  *		flag	- this argument is a pass through to ddi_copyxxx()
20969  *			  directly from the mode argument of ioctl().
20970  *
20971  * Return Code: 0
20972  *		EFAULT
20973  *		ENXIO
20974  */
20975 
20976 static int
20977 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
20978 {
20979 	struct sd_lun	*un = NULL;
20980 	int		mh_time;
20981 	int		rval = 0;
20982 
20983 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20984 		return (ENXIO);
20985 	}
20986 
20987 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
20988 		return (EFAULT);
20989 
20990 	if (mh_time) {
20991 		mutex_enter(SD_MUTEX(un));
20992 		un->un_resvd_status |= SD_FAILFAST;
20993 		mutex_exit(SD_MUTEX(un));
20994 		/*
20995 		 * If mh_time is INT_MAX, then this ioctl is being used for
20996 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
20997 		 */
20998 		if (mh_time != INT_MAX) {
20999 			rval = sd_check_mhd(dev, mh_time);
21000 		}
21001 	} else {
21002 		(void) sd_check_mhd(dev, 0);
21003 		mutex_enter(SD_MUTEX(un));
21004 		un->un_resvd_status &= ~SD_FAILFAST;
21005 		mutex_exit(SD_MUTEX(un));
21006 	}
21007 	return (rval);
21008 }
21009 
21010 
21011 /*
21012  *    Function: sd_mhdioc_takeown
21013  *
21014  * Description: This routine is the driver entry point for handling ioctl
21015  *		requests to forcefully acquire exclusive access rights to the
21016  *		multihost disk (MHIOCTKOWN).
21017  *
21018  *   Arguments: dev	- the device number
21019  *		arg	- user provided structure specifying the delay
21020  *			  parameters in milliseconds
21021  *		flag	- this argument is a pass through to ddi_copyxxx()
21022  *			  directly from the mode argument of ioctl().
21023  *
21024  * Return Code: 0
21025  *		EFAULT
21026  *		ENXIO
21027  */
21028 
21029 static int
21030 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
21031 {
21032 	struct sd_lun		*un = NULL;
21033 	struct mhioctkown	*tkown = NULL;
21034 	int			rval = 0;
21035 
21036 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21037 		return (ENXIO);
21038 	}
21039 
21040 	if (arg != NULL) {
21041 		tkown = (struct mhioctkown *)
21042 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
21043 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
21044 		if (rval != 0) {
21045 			rval = EFAULT;
21046 			goto error;
21047 		}
21048 	}
21049 
21050 	rval = sd_take_ownership(dev, tkown);
21051 	mutex_enter(SD_MUTEX(un));
21052 	if (rval == 0) {
21053 		un->un_resvd_status |= SD_RESERVE;
21054 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
21055 			sd_reinstate_resv_delay =
21056 			    tkown->reinstate_resv_delay * 1000;
21057 		} else {
21058 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
21059 		}
21060 		/*
21061 		 * Give the scsi_watch routine interval set by
21062 		 * the MHIOCENFAILFAST ioctl precedence here.
21063 		 */
21064 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
21065 			mutex_exit(SD_MUTEX(un));
21066 			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
21067 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
21068 			    "sd_mhdioc_takeown : %d\n",
21069 			    sd_reinstate_resv_delay);
21070 		} else {
21071 			mutex_exit(SD_MUTEX(un));
21072 		}
21073 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
21074 		    sd_mhd_reset_notify_cb, (caddr_t)un);
21075 	} else {
21076 		un->un_resvd_status &= ~SD_RESERVE;
21077 		mutex_exit(SD_MUTEX(un));
21078 	}
21079 
21080 error:
21081 	if (tkown != NULL) {
21082 		kmem_free(tkown, sizeof (struct mhioctkown));
21083 	}
21084 	return (rval);
21085 }
21086 
21087 
21088 /*
21089  *    Function: sd_mhdioc_release
21090  *
21091  * Description: This routine is the driver entry point for handling ioctl
21092  *		requests to release exclusive access rights to the multihost
21093  *		disk (MHIOCRELEASE).
21094  *
21095  *   Arguments: dev	- the device number
21096  *
21097  * Return Code: 0
21098  *		ENXIO
21099  */
21100 
21101 static int
21102 sd_mhdioc_release(dev_t dev)
21103 {
21104 	struct sd_lun		*un = NULL;
21105 	timeout_id_t		resvd_timeid_save;
21106 	int			resvd_status_save;
21107 	int			rval = 0;
21108 
21109 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21110 		return (ENXIO);
21111 	}
21112 
21113 	mutex_enter(SD_MUTEX(un));
21114 	resvd_status_save = un->un_resvd_status;
21115 	un->un_resvd_status &=
21116 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
21117 	if (un->un_resvd_timeid) {
21118 		resvd_timeid_save = un->un_resvd_timeid;
21119 		un->un_resvd_timeid = NULL;
21120 		mutex_exit(SD_MUTEX(un));
21121 		(void) untimeout(resvd_timeid_save);
21122 	} else {
21123 		mutex_exit(SD_MUTEX(un));
21124 	}
21125 
21126 	/*
21127 	 * destroy any pending timeout thread that may be attempting to
21128 	 * reinstate reservation on this device.
21129 	 */
21130 	sd_rmv_resv_reclaim_req(dev);
21131 
21132 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
21133 		mutex_enter(SD_MUTEX(un));
21134 		if ((un->un_mhd_token) &&
21135 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
21136 			mutex_exit(SD_MUTEX(un));
21137 			(void) sd_check_mhd(dev, 0);
21138 		} else {
21139 			mutex_exit(SD_MUTEX(un));
21140 		}
21141 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
21142 		    sd_mhd_reset_notify_cb, (caddr_t)un);
21143 	} else {
21144 		/*
21145 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
21146 		 */
21147 		mutex_enter(SD_MUTEX(un));
21148 		un->un_resvd_status = resvd_status_save;
21149 		mutex_exit(SD_MUTEX(un));
21150 	}
21151 	return (rval);
21152 }
21153 
21154 
21155 /*
21156  *    Function: sd_mhdioc_register_devid
21157  *
21158  * Description: This routine is the driver entry point for handling ioctl
21159  *		requests to register the device id (MHIOCREREGISTERDEVID).
21160  *
21161  *		Note: The implementation for this ioctl has been updated to
21162  *		be consistent with the original PSARC case (1999/357)
21163  *		(4375899, 4241671, 4220005)
21164  *
21165  *   Arguments: dev	- the device number
21166  *
21167  * Return Code: 0
21168  *		ENXIO
21169  */
21170 
21171 static int
21172 sd_mhdioc_register_devid(dev_t dev)
21173 {
21174 	struct sd_lun	*un = NULL;
21175 	int		rval = 0;
21176 
21177 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21178 		return (ENXIO);
21179 	}
21180 
21181 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21182 
21183 	mutex_enter(SD_MUTEX(un));
21184 
21185 	/* If a devid already exists, de-register it */
21186 	if (un->un_devid != NULL) {
21187 		ddi_devid_unregister(SD_DEVINFO(un));
21188 		/*
21189 		 * After unregister devid, needs to free devid memory
21190 		 */
21191 		ddi_devid_free(un->un_devid);
21192 		un->un_devid = NULL;
21193 	}
21194 
21195 	/* Check for reservation conflict */
21196 	mutex_exit(SD_MUTEX(un));
21197 	rval = sd_send_scsi_TEST_UNIT_READY(un, 0);
21198 	mutex_enter(SD_MUTEX(un));
21199 
21200 	switch (rval) {
21201 	case 0:
21202 		sd_register_devid(un, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
21203 		break;
21204 	case EACCES:
21205 		break;
21206 	default:
21207 		rval = EIO;
21208 	}
21209 
21210 	mutex_exit(SD_MUTEX(un));
21211 	return (rval);
21212 }
21213 
21214 
21215 /*
21216  *    Function: sd_mhdioc_inkeys
21217  *
21218  * Description: This routine is the driver entry point for handling ioctl
21219  *		requests to issue the SCSI-3 Persistent In Read Keys command
21220  *		to the device (MHIOCGRP_INKEYS).
21221  *
21222  *   Arguments: dev	- the device number
21223  *		arg	- user provided in_keys structure
21224  *		flag	- this argument is a pass through to ddi_copyxxx()
21225  *			  directly from the mode argument of ioctl().
21226  *
21227  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
21228  *		ENXIO
21229  *		EFAULT
21230  */
21231 
21232 static int
21233 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
21234 {
21235 	struct sd_lun		*un;
21236 	mhioc_inkeys_t		inkeys;
21237 	int			rval = 0;
21238 
21239 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21240 		return (ENXIO);
21241 	}
21242 
21243 #ifdef _MULTI_DATAMODEL
21244 	switch (ddi_model_convert_from(flag & FMODELS)) {
21245 	case DDI_MODEL_ILP32: {
21246 		struct mhioc_inkeys32	inkeys32;
21247 
21248 		if (ddi_copyin(arg, &inkeys32,
21249 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
21250 			return (EFAULT);
21251 		}
21252 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
21253 		if ((rval = sd_persistent_reservation_in_read_keys(un,
21254 		    &inkeys, flag)) != 0) {
21255 			return (rval);
21256 		}
21257 		inkeys32.generation = inkeys.generation;
21258 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
21259 		    flag) != 0) {
21260 			return (EFAULT);
21261 		}
21262 		break;
21263 	}
21264 	case DDI_MODEL_NONE:
21265 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
21266 		    flag) != 0) {
21267 			return (EFAULT);
21268 		}
21269 		if ((rval = sd_persistent_reservation_in_read_keys(un,
21270 		    &inkeys, flag)) != 0) {
21271 			return (rval);
21272 		}
21273 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
21274 		    flag) != 0) {
21275 			return (EFAULT);
21276 		}
21277 		break;
21278 	}
21279 
21280 #else /* ! _MULTI_DATAMODEL */
21281 
21282 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
21283 		return (EFAULT);
21284 	}
21285 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
21286 	if (rval != 0) {
21287 		return (rval);
21288 	}
21289 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
21290 		return (EFAULT);
21291 	}
21292 
21293 #endif /* _MULTI_DATAMODEL */
21294 
21295 	return (rval);
21296 }
21297 
21298 
21299 /*
21300  *    Function: sd_mhdioc_inresv
21301  *
21302  * Description: This routine is the driver entry point for handling ioctl
21303  *		requests to issue the SCSI-3 Persistent In Read Reservations
21304  *		command to the device (MHIOCGRP_INKEYS).
21305  *
21306  *   Arguments: dev	- the device number
21307  *		arg	- user provided in_resv structure
21308  *		flag	- this argument is a pass through to ddi_copyxxx()
21309  *			  directly from the mode argument of ioctl().
21310  *
21311  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
21312  *		ENXIO
21313  *		EFAULT
21314  */
21315 
21316 static int
21317 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
21318 {
21319 	struct sd_lun		*un;
21320 	mhioc_inresvs_t		inresvs;
21321 	int			rval = 0;
21322 
21323 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21324 		return (ENXIO);
21325 	}
21326 
21327 #ifdef _MULTI_DATAMODEL
21328 
21329 	switch (ddi_model_convert_from(flag & FMODELS)) {
21330 	case DDI_MODEL_ILP32: {
21331 		struct mhioc_inresvs32	inresvs32;
21332 
21333 		if (ddi_copyin(arg, &inresvs32,
21334 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
21335 			return (EFAULT);
21336 		}
21337 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
21338 		if ((rval = sd_persistent_reservation_in_read_resv(un,
21339 		    &inresvs, flag)) != 0) {
21340 			return (rval);
21341 		}
21342 		inresvs32.generation = inresvs.generation;
21343 		if (ddi_copyout(&inresvs32, arg,
21344 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
21345 			return (EFAULT);
21346 		}
21347 		break;
21348 	}
21349 	case DDI_MODEL_NONE:
21350 		if (ddi_copyin(arg, &inresvs,
21351 		    sizeof (mhioc_inresvs_t), flag) != 0) {
21352 			return (EFAULT);
21353 		}
21354 		if ((rval = sd_persistent_reservation_in_read_resv(un,
21355 		    &inresvs, flag)) != 0) {
21356 			return (rval);
21357 		}
21358 		if (ddi_copyout(&inresvs, arg,
21359 		    sizeof (mhioc_inresvs_t), flag) != 0) {
21360 			return (EFAULT);
21361 		}
21362 		break;
21363 	}
21364 
21365 #else /* ! _MULTI_DATAMODEL */
21366 
21367 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
21368 		return (EFAULT);
21369 	}
21370 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
21371 	if (rval != 0) {
21372 		return (rval);
21373 	}
21374 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
21375 		return (EFAULT);
21376 	}
21377 
21378 #endif /* ! _MULTI_DATAMODEL */
21379 
21380 	return (rval);
21381 }
21382 
21383 
21384 /*
21385  * The following routines support the clustering functionality described below
21386  * and implement lost reservation reclaim functionality.
21387  *
21388  * Clustering
21389  * ----------
21390  * The clustering code uses two different, independent forms of SCSI
21391  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
21392  * Persistent Group Reservations. For any particular disk, it will use either
21393  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
21394  *
21395  * SCSI-2
21396  * The cluster software takes ownership of a multi-hosted disk by issuing the
21397  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
21398  * MHIOCRELEASE ioctl.Closely related is the MHIOCENFAILFAST ioctl -- a cluster,
21399  * just after taking ownership of the disk with the MHIOCTKOWN ioctl then issues
21400  * the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the driver. The
21401  * meaning of failfast is that if the driver (on this host) ever encounters the
21402  * scsi error return code RESERVATION_CONFLICT from the device, it should
21403  * immediately panic the host. The motivation for this ioctl is that if this
21404  * host does encounter reservation conflict, the underlying cause is that some
21405  * other host of the cluster has decided that this host is no longer in the
21406  * cluster and has seized control of the disks for itself. Since this host is no
21407  * longer in the cluster, it ought to panic itself. The MHIOCENFAILFAST ioctl
21408  * does two things:
21409  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
21410  *      error to panic the host
21411  *      (b) it sets up a periodic timer to test whether this host still has
21412  *      "access" (in that no other host has reserved the device):  if the
21413  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
21414  *      purpose of that periodic timer is to handle scenarios where the host is
21415  *      otherwise temporarily quiescent, temporarily doing no real i/o.
21416  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
21417  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
21418  * the device itself.
21419  *
21420  * SCSI-3 PGR
21421  * A direct semantic implementation of the SCSI-3 Persistent Reservation
21422  * facility is supported through the shared multihost disk ioctls
21423  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
21424  * MHIOCGRP_PREEMPTANDABORT)
21425  *
21426  * Reservation Reclaim:
21427  * --------------------
21428  * To support the lost reservation reclaim operations this driver creates a
21429  * single thread to handle reinstating reservations on all devices that have
21430  * lost reservations sd_resv_reclaim_requests are logged for all devices that
21431  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
21432  * and the reservation reclaim thread loops through the requests to regain the
21433  * lost reservations.
21434  */
21435 
21436 /*
21437  *    Function: sd_check_mhd()
21438  *
21439  * Description: This function sets up and submits a scsi watch request or
21440  *		terminates an existing watch request. This routine is used in
21441  *		support of reservation reclaim.
21442  *
21443  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
21444  *			 among multiple watches that share the callback function
21445  *		interval - the number of microseconds specifying the watch
21446  *			   interval for issuing TEST UNIT READY commands. If
21447  *			   set to 0 the watch should be terminated. If the
21448  *			   interval is set to 0 and if the device is required
21449  *			   to hold reservation while disabling failfast, the
21450  *			   watch is restarted with an interval of
21451  *			   reinstate_resv_delay.
21452  *
21453  * Return Code: 0	   - Successful submit/terminate of scsi watch request
21454  *		ENXIO      - Indicates an invalid device was specified
21455  *		EAGAIN     - Unable to submit the scsi watch request
21456  */
21457 
21458 static int
21459 sd_check_mhd(dev_t dev, int interval)
21460 {
21461 	struct sd_lun	*un;
21462 	opaque_t	token;
21463 
21464 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21465 		return (ENXIO);
21466 	}
21467 
21468 	/* is this a watch termination request? */
21469 	if (interval == 0) {
21470 		mutex_enter(SD_MUTEX(un));
21471 		/* if there is an existing watch task then terminate it */
21472 		if (un->un_mhd_token) {
21473 			token = un->un_mhd_token;
21474 			un->un_mhd_token = NULL;
21475 			mutex_exit(SD_MUTEX(un));
21476 			(void) scsi_watch_request_terminate(token,
21477 			    SCSI_WATCH_TERMINATE_WAIT);
21478 			mutex_enter(SD_MUTEX(un));
21479 		} else {
21480 			mutex_exit(SD_MUTEX(un));
21481 			/*
21482 			 * Note: If we return here we don't check for the
21483 			 * failfast case. This is the original legacy
21484 			 * implementation but perhaps we should be checking
21485 			 * the failfast case.
21486 			 */
21487 			return (0);
21488 		}
21489 		/*
21490 		 * If the device is required to hold reservation while
21491 		 * disabling failfast, we need to restart the scsi_watch
21492 		 * routine with an interval of reinstate_resv_delay.
21493 		 */
21494 		if (un->un_resvd_status & SD_RESERVE) {
21495 			interval = sd_reinstate_resv_delay/1000;
21496 		} else {
21497 			/* no failfast so bail */
21498 			mutex_exit(SD_MUTEX(un));
21499 			return (0);
21500 		}
21501 		mutex_exit(SD_MUTEX(un));
21502 	}
21503 
21504 	/*
21505 	 * adjust minimum time interval to 1 second,
21506 	 * and convert from msecs to usecs
21507 	 */
21508 	if (interval > 0 && interval < 1000) {
21509 		interval = 1000;
21510 	}
21511 	interval *= 1000;
21512 
21513 	/*
21514 	 * submit the request to the scsi_watch service
21515 	 */
21516 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
21517 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
21518 	if (token == NULL) {
21519 		return (EAGAIN);
21520 	}
21521 
21522 	/*
21523 	 * save token for termination later on
21524 	 */
21525 	mutex_enter(SD_MUTEX(un));
21526 	un->un_mhd_token = token;
21527 	mutex_exit(SD_MUTEX(un));
21528 	return (0);
21529 }
21530 
21531 
21532 /*
21533  *    Function: sd_mhd_watch_cb()
21534  *
21535  * Description: This function is the call back function used by the scsi watch
21536  *		facility. The scsi watch facility sends the "Test Unit Ready"
21537  *		and processes the status. If applicable (i.e. a "Unit Attention"
21538  *		status and automatic "Request Sense" not used) the scsi watch
21539  *		facility will send a "Request Sense" and retrieve the sense data
21540  *		to be passed to this callback function. In either case the
21541  *		automatic "Request Sense" or the facility submitting one, this
21542  *		callback is passed the status and sense data.
21543  *
21544  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
21545  *			among multiple watches that share this callback function
21546  *		resultp - scsi watch facility result packet containing scsi
21547  *			  packet, status byte and sense data
21548  *
21549  * Return Code: 0 - continue the watch task
21550  *		non-zero - terminate the watch task
21551  */
21552 
21553 static int
21554 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
21555 {
21556 	struct sd_lun			*un;
21557 	struct scsi_status		*statusp;
21558 	uint8_t				*sensep;
21559 	struct scsi_pkt			*pkt;
21560 	uchar_t				actual_sense_length;
21561 	dev_t  				dev = (dev_t)arg;
21562 
21563 	ASSERT(resultp != NULL);
21564 	statusp			= resultp->statusp;
21565 	sensep			= (uint8_t *)resultp->sensep;
21566 	pkt			= resultp->pkt;
21567 	actual_sense_length	= resultp->actual_sense_length;
21568 
21569 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21570 		return (ENXIO);
21571 	}
21572 
21573 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
21574 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
21575 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
21576 
21577 	/* Begin processing of the status and/or sense data */
21578 	if (pkt->pkt_reason != CMD_CMPLT) {
21579 		/* Handle the incomplete packet */
21580 		sd_mhd_watch_incomplete(un, pkt);
21581 		return (0);
21582 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
21583 		if (*((unsigned char *)statusp)
21584 		    == STATUS_RESERVATION_CONFLICT) {
21585 			/*
21586 			 * Handle a reservation conflict by panicking if
21587 			 * configured for failfast or by logging the conflict
21588 			 * and updating the reservation status
21589 			 */
21590 			mutex_enter(SD_MUTEX(un));
21591 			if ((un->un_resvd_status & SD_FAILFAST) &&
21592 			    (sd_failfast_enable)) {
21593 				sd_panic_for_res_conflict(un);
21594 				/*NOTREACHED*/
21595 			}
21596 			SD_INFO(SD_LOG_IOCTL_MHD, un,
21597 			    "sd_mhd_watch_cb: Reservation Conflict\n");
21598 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
21599 			mutex_exit(SD_MUTEX(un));
21600 		}
21601 	}
21602 
21603 	if (sensep != NULL) {
21604 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
21605 			mutex_enter(SD_MUTEX(un));
21606 			if ((scsi_sense_asc(sensep) ==
21607 			    SD_SCSI_RESET_SENSE_CODE) &&
21608 			    (un->un_resvd_status & SD_RESERVE)) {
21609 				/*
21610 				 * The additional sense code indicates a power
21611 				 * on or bus device reset has occurred; update
21612 				 * the reservation status.
21613 				 */
21614 				un->un_resvd_status |=
21615 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
21616 				SD_INFO(SD_LOG_IOCTL_MHD, un,
21617 				    "sd_mhd_watch_cb: Lost Reservation\n");
21618 			}
21619 		} else {
21620 			return (0);
21621 		}
21622 	} else {
21623 		mutex_enter(SD_MUTEX(un));
21624 	}
21625 
21626 	if ((un->un_resvd_status & SD_RESERVE) &&
21627 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
21628 		if (un->un_resvd_status & SD_WANT_RESERVE) {
21629 			/*
21630 			 * A reset occurred in between the last probe and this
21631 			 * one so if a timeout is pending cancel it.
21632 			 */
21633 			if (un->un_resvd_timeid) {
21634 				timeout_id_t temp_id = un->un_resvd_timeid;
21635 				un->un_resvd_timeid = NULL;
21636 				mutex_exit(SD_MUTEX(un));
21637 				(void) untimeout(temp_id);
21638 				mutex_enter(SD_MUTEX(un));
21639 			}
21640 			un->un_resvd_status &= ~SD_WANT_RESERVE;
21641 		}
21642 		if (un->un_resvd_timeid == 0) {
21643 			/* Schedule a timeout to handle the lost reservation */
21644 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
21645 			    (void *)dev,
21646 			    drv_usectohz(sd_reinstate_resv_delay));
21647 		}
21648 	}
21649 	mutex_exit(SD_MUTEX(un));
21650 	return (0);
21651 }
21652 
21653 
21654 /*
21655  *    Function: sd_mhd_watch_incomplete()
21656  *
21657  * Description: This function is used to find out why a scsi pkt sent by the
21658  *		scsi watch facility was not completed. Under some scenarios this
21659  *		routine will return. Otherwise it will send a bus reset to see
21660  *		if the drive is still online.
21661  *
21662  *   Arguments: un  - driver soft state (unit) structure
21663  *		pkt - incomplete scsi pkt
21664  */
21665 
21666 static void
21667 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
21668 {
21669 	int	be_chatty;
21670 	int	perr;
21671 
21672 	ASSERT(pkt != NULL);
21673 	ASSERT(un != NULL);
21674 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
21675 	perr		= (pkt->pkt_statistics & STAT_PERR);
21676 
21677 	mutex_enter(SD_MUTEX(un));
21678 	if (un->un_state == SD_STATE_DUMPING) {
21679 		mutex_exit(SD_MUTEX(un));
21680 		return;
21681 	}
21682 
21683 	switch (pkt->pkt_reason) {
21684 	case CMD_UNX_BUS_FREE:
21685 		/*
21686 		 * If we had a parity error that caused the target to drop BSY*,
21687 		 * don't be chatty about it.
21688 		 */
21689 		if (perr && be_chatty) {
21690 			be_chatty = 0;
21691 		}
21692 		break;
21693 	case CMD_TAG_REJECT:
21694 		/*
21695 		 * The SCSI-2 spec states that a tag reject will be sent by the
21696 		 * target if tagged queuing is not supported. A tag reject may
21697 		 * also be sent during certain initialization periods or to
21698 		 * control internal resources. For the latter case the target
21699 		 * may also return Queue Full.
21700 		 *
21701 		 * If this driver receives a tag reject from a target that is
21702 		 * going through an init period or controlling internal
21703 		 * resources tagged queuing will be disabled. This is a less
21704 		 * than optimal behavior but the driver is unable to determine
21705 		 * the target state and assumes tagged queueing is not supported
21706 		 */
21707 		pkt->pkt_flags = 0;
21708 		un->un_tagflags = 0;
21709 
21710 		if (un->un_f_opt_queueing == TRUE) {
21711 			un->un_throttle = min(un->un_throttle, 3);
21712 		} else {
21713 			un->un_throttle = 1;
21714 		}
21715 		mutex_exit(SD_MUTEX(un));
21716 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
21717 		mutex_enter(SD_MUTEX(un));
21718 		break;
21719 	case CMD_INCOMPLETE:
21720 		/*
21721 		 * The transport stopped with an abnormal state, fallthrough and
21722 		 * reset the target and/or bus unless selection did not complete
21723 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
21724 		 * go through a target/bus reset
21725 		 */
21726 		if (pkt->pkt_state == STATE_GOT_BUS) {
21727 			break;
21728 		}
21729 		/*FALLTHROUGH*/
21730 
21731 	case CMD_TIMEOUT:
21732 	default:
21733 		/*
21734 		 * The lun may still be running the command, so a lun reset
21735 		 * should be attempted. If the lun reset fails or cannot be
21736 		 * issued, than try a target reset. Lastly try a bus reset.
21737 		 */
21738 		if ((pkt->pkt_statistics &
21739 		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
21740 			int reset_retval = 0;
21741 			mutex_exit(SD_MUTEX(un));
21742 			if (un->un_f_allow_bus_device_reset == TRUE) {
21743 				if (un->un_f_lun_reset_enabled == TRUE) {
21744 					reset_retval =
21745 					    scsi_reset(SD_ADDRESS(un),
21746 					    RESET_LUN);
21747 				}
21748 				if (reset_retval == 0) {
21749 					reset_retval =
21750 					    scsi_reset(SD_ADDRESS(un),
21751 					    RESET_TARGET);
21752 				}
21753 			}
21754 			if (reset_retval == 0) {
21755 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
21756 			}
21757 			mutex_enter(SD_MUTEX(un));
21758 		}
21759 		break;
21760 	}
21761 
21762 	/* A device/bus reset has occurred; update the reservation status. */
21763 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
21764 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
21765 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
21766 			un->un_resvd_status |=
21767 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
21768 			SD_INFO(SD_LOG_IOCTL_MHD, un,
21769 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
21770 		}
21771 	}
21772 
21773 	/*
21774 	 * The disk has been turned off; Update the device state.
21775 	 *
21776 	 * Note: Should we be offlining the disk here?
21777 	 */
21778 	if (pkt->pkt_state == STATE_GOT_BUS) {
21779 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
21780 		    "Disk not responding to selection\n");
21781 		if (un->un_state != SD_STATE_OFFLINE) {
21782 			New_state(un, SD_STATE_OFFLINE);
21783 		}
21784 	} else if (be_chatty) {
21785 		/*
21786 		 * suppress messages if they are all the same pkt reason;
21787 		 * with TQ, many (up to 256) are returned with the same
21788 		 * pkt_reason
21789 		 */
21790 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
21791 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
21792 			    "sd_mhd_watch_incomplete: "
21793 			    "SCSI transport failed: reason '%s'\n",
21794 			    scsi_rname(pkt->pkt_reason));
21795 		}
21796 	}
21797 	un->un_last_pkt_reason = pkt->pkt_reason;
21798 	mutex_exit(SD_MUTEX(un));
21799 }
21800 
21801 
21802 /*
21803  *    Function: sd_sname()
21804  *
21805  * Description: This is a simple little routine to return a string containing
21806  *		a printable description of command status byte for use in
21807  *		logging.
21808  *
21809  *   Arguments: status - pointer to a status byte
21810  *
21811  * Return Code: char * - string containing status description.
21812  */
21813 
21814 static char *
21815 sd_sname(uchar_t status)
21816 {
21817 	switch (status & STATUS_MASK) {
21818 	case STATUS_GOOD:
21819 		return ("good status");
21820 	case STATUS_CHECK:
21821 		return ("check condition");
21822 	case STATUS_MET:
21823 		return ("condition met");
21824 	case STATUS_BUSY:
21825 		return ("busy");
21826 	case STATUS_INTERMEDIATE:
21827 		return ("intermediate");
21828 	case STATUS_INTERMEDIATE_MET:
21829 		return ("intermediate - condition met");
21830 	case STATUS_RESERVATION_CONFLICT:
21831 		return ("reservation_conflict");
21832 	case STATUS_TERMINATED:
21833 		return ("command terminated");
21834 	case STATUS_QFULL:
21835 		return ("queue full");
21836 	default:
21837 		return ("<unknown status>");
21838 	}
21839 }
21840 
21841 
21842 /*
21843  *    Function: sd_mhd_resvd_recover()
21844  *
21845  * Description: This function adds a reservation entry to the
21846  *		sd_resv_reclaim_request list and signals the reservation
21847  *		reclaim thread that there is work pending. If the reservation
21848  *		reclaim thread has not been previously created this function
21849  *		will kick it off.
21850  *
21851  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
21852  *			among multiple watches that share this callback function
21853  *
21854  *     Context: This routine is called by timeout() and is run in interrupt
21855  *		context. It must not sleep or call other functions which may
21856  *		sleep.
21857  */
21858 
21859 static void
21860 sd_mhd_resvd_recover(void *arg)
21861 {
21862 	dev_t			dev = (dev_t)arg;
21863 	struct sd_lun		*un;
21864 	struct sd_thr_request	*sd_treq = NULL;
21865 	struct sd_thr_request	*sd_cur = NULL;
21866 	struct sd_thr_request	*sd_prev = NULL;
21867 	int			already_there = 0;
21868 
21869 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21870 		return;
21871 	}
21872 
21873 	mutex_enter(SD_MUTEX(un));
21874 	un->un_resvd_timeid = NULL;
21875 	if (un->un_resvd_status & SD_WANT_RESERVE) {
21876 		/*
21877 		 * There was a reset so don't issue the reserve, allow the
21878 		 * sd_mhd_watch_cb callback function to notice this and
21879 		 * reschedule the timeout for reservation.
21880 		 */
21881 		mutex_exit(SD_MUTEX(un));
21882 		return;
21883 	}
21884 	mutex_exit(SD_MUTEX(un));
21885 
21886 	/*
21887 	 * Add this device to the sd_resv_reclaim_request list and the
21888 	 * sd_resv_reclaim_thread should take care of the rest.
21889 	 *
21890 	 * Note: We can't sleep in this context so if the memory allocation
21891 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
21892 	 * reschedule the timeout for reservation.  (4378460)
21893 	 */
21894 	sd_treq = (struct sd_thr_request *)
21895 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
21896 	if (sd_treq == NULL) {
21897 		return;
21898 	}
21899 
21900 	sd_treq->sd_thr_req_next = NULL;
21901 	sd_treq->dev = dev;
21902 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
21903 	if (sd_tr.srq_thr_req_head == NULL) {
21904 		sd_tr.srq_thr_req_head = sd_treq;
21905 	} else {
21906 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
21907 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
21908 			if (sd_cur->dev == dev) {
21909 				/*
21910 				 * already in Queue so don't log
21911 				 * another request for the device
21912 				 */
21913 				already_there = 1;
21914 				break;
21915 			}
21916 			sd_prev = sd_cur;
21917 		}
21918 		if (!already_there) {
21919 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
21920 			    "logging request for %lx\n", dev);
21921 			sd_prev->sd_thr_req_next = sd_treq;
21922 		} else {
21923 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
21924 		}
21925 	}
21926 
21927 	/*
21928 	 * Create a kernel thread to do the reservation reclaim and free up this
21929 	 * thread. We cannot block this thread while we go away to do the
21930 	 * reservation reclaim
21931 	 */
21932 	if (sd_tr.srq_resv_reclaim_thread == NULL)
21933 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
21934 		    sd_resv_reclaim_thread, NULL,
21935 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
21936 
21937 	/* Tell the reservation reclaim thread that it has work to do */
21938 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
21939 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
21940 }
21941 
21942 /*
21943  *    Function: sd_resv_reclaim_thread()
21944  *
21945  * Description: This function implements the reservation reclaim operations
21946  *
21947  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
21948  *		      among multiple watches that share this callback function
21949  */
21950 
21951 static void
21952 sd_resv_reclaim_thread()
21953 {
21954 	struct sd_lun		*un;
21955 	struct sd_thr_request	*sd_mhreq;
21956 
21957 	/* Wait for work */
21958 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
21959 	if (sd_tr.srq_thr_req_head == NULL) {
21960 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
21961 		    &sd_tr.srq_resv_reclaim_mutex);
21962 	}
21963 
21964 	/* Loop while we have work */
21965 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
21966 		un = ddi_get_soft_state(sd_state,
21967 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
21968 		if (un == NULL) {
21969 			/*
21970 			 * softstate structure is NULL so just
21971 			 * dequeue the request and continue
21972 			 */
21973 			sd_tr.srq_thr_req_head =
21974 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
21975 			kmem_free(sd_tr.srq_thr_cur_req,
21976 			    sizeof (struct sd_thr_request));
21977 			continue;
21978 		}
21979 
21980 		/* dequeue the request */
21981 		sd_mhreq = sd_tr.srq_thr_cur_req;
21982 		sd_tr.srq_thr_req_head =
21983 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
21984 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
21985 
21986 		/*
21987 		 * Reclaim reservation only if SD_RESERVE is still set. There
21988 		 * may have been a call to MHIOCRELEASE before we got here.
21989 		 */
21990 		mutex_enter(SD_MUTEX(un));
21991 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
21992 			/*
21993 			 * Note: The SD_LOST_RESERVE flag is cleared before
21994 			 * reclaiming the reservation. If this is done after the
21995 			 * call to sd_reserve_release a reservation loss in the
21996 			 * window between pkt completion of reserve cmd and
21997 			 * mutex_enter below may not be recognized
21998 			 */
21999 			un->un_resvd_status &= ~SD_LOST_RESERVE;
22000 			mutex_exit(SD_MUTEX(un));
22001 
22002 			if (sd_reserve_release(sd_mhreq->dev,
22003 			    SD_RESERVE) == 0) {
22004 				mutex_enter(SD_MUTEX(un));
22005 				un->un_resvd_status |= SD_RESERVE;
22006 				mutex_exit(SD_MUTEX(un));
22007 				SD_INFO(SD_LOG_IOCTL_MHD, un,
22008 				    "sd_resv_reclaim_thread: "
22009 				    "Reservation Recovered\n");
22010 			} else {
22011 				mutex_enter(SD_MUTEX(un));
22012 				un->un_resvd_status |= SD_LOST_RESERVE;
22013 				mutex_exit(SD_MUTEX(un));
22014 				SD_INFO(SD_LOG_IOCTL_MHD, un,
22015 				    "sd_resv_reclaim_thread: Failed "
22016 				    "Reservation Recovery\n");
22017 			}
22018 		} else {
22019 			mutex_exit(SD_MUTEX(un));
22020 		}
22021 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
22022 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
22023 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
22024 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
22025 		/*
22026 		 * wakeup the destroy thread if anyone is waiting on
22027 		 * us to complete.
22028 		 */
22029 		cv_signal(&sd_tr.srq_inprocess_cv);
22030 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
22031 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
22032 	}
22033 
22034 	/*
22035 	 * cleanup the sd_tr structure now that this thread will not exist
22036 	 */
22037 	ASSERT(sd_tr.srq_thr_req_head == NULL);
22038 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
22039 	sd_tr.srq_resv_reclaim_thread = NULL;
22040 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
22041 	thread_exit();
22042 }
22043 
22044 
22045 /*
22046  *    Function: sd_rmv_resv_reclaim_req()
22047  *
22048  * Description: This function removes any pending reservation reclaim requests
22049  *		for the specified device.
22050  *
22051  *   Arguments: dev - the device 'dev_t'
22052  */
22053 
22054 static void
22055 sd_rmv_resv_reclaim_req(dev_t dev)
22056 {
22057 	struct sd_thr_request *sd_mhreq;
22058 	struct sd_thr_request *sd_prev;
22059 
22060 	/* Remove a reservation reclaim request from the list */
22061 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
22062 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
22063 		/*
22064 		 * We are attempting to reinstate reservation for
22065 		 * this device. We wait for sd_reserve_release()
22066 		 * to return before we return.
22067 		 */
22068 		cv_wait(&sd_tr.srq_inprocess_cv,
22069 		    &sd_tr.srq_resv_reclaim_mutex);
22070 	} else {
22071 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
22072 		if (sd_mhreq && sd_mhreq->dev == dev) {
22073 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
22074 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
22075 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
22076 			return;
22077 		}
22078 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
22079 			if (sd_mhreq && sd_mhreq->dev == dev) {
22080 				break;
22081 			}
22082 			sd_prev = sd_mhreq;
22083 		}
22084 		if (sd_mhreq != NULL) {
22085 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
22086 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
22087 		}
22088 	}
22089 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
22090 }
22091 
22092 
22093 /*
22094  *    Function: sd_mhd_reset_notify_cb()
22095  *
22096  * Description: This is a call back function for scsi_reset_notify. This
22097  *		function updates the softstate reserved status and logs the
22098  *		reset. The driver scsi watch facility callback function
22099  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
22100  *		will reclaim the reservation.
22101  *
22102  *   Arguments: arg  - driver soft state (unit) structure
22103  */
22104 
22105 static void
22106 sd_mhd_reset_notify_cb(caddr_t arg)
22107 {
22108 	struct sd_lun *un = (struct sd_lun *)arg;
22109 
22110 	mutex_enter(SD_MUTEX(un));
22111 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
22112 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
22113 		SD_INFO(SD_LOG_IOCTL_MHD, un,
22114 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
22115 	}
22116 	mutex_exit(SD_MUTEX(un));
22117 }
22118 
22119 
22120 /*
22121  *    Function: sd_take_ownership()
22122  *
22123  * Description: This routine implements an algorithm to achieve a stable
22124  *		reservation on disks which don't implement priority reserve,
22125  *		and makes sure that other host lose re-reservation attempts.
22126  *		This algorithm contains of a loop that keeps issuing the RESERVE
22127  *		for some period of time (min_ownership_delay, default 6 seconds)
22128  *		During that loop, it looks to see if there has been a bus device
22129  *		reset or bus reset (both of which cause an existing reservation
22130  *		to be lost). If the reservation is lost issue RESERVE until a
22131  *		period of min_ownership_delay with no resets has gone by, or
22132  *		until max_ownership_delay has expired. This loop ensures that
22133  *		the host really did manage to reserve the device, in spite of
22134  *		resets. The looping for min_ownership_delay (default six
22135  *		seconds) is important to early generation clustering products,
22136  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
22137  *		MHIOCENFAILFAST periodic timer of two seconds. By having
22138  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
22139  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
22140  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
22141  *		have already noticed, via the MHIOCENFAILFAST polling, that it
22142  *		no longer "owns" the disk and will have panicked itself.  Thus,
22143  *		the host issuing the MHIOCTKOWN is assured (with timing
22144  *		dependencies) that by the time it actually starts to use the
22145  *		disk for real work, the old owner is no longer accessing it.
22146  *
22147  *		min_ownership_delay is the minimum amount of time for which the
22148  *		disk must be reserved continuously devoid of resets before the
22149  *		MHIOCTKOWN ioctl will return success.
22150  *
22151  *		max_ownership_delay indicates the amount of time by which the
22152  *		take ownership should succeed or timeout with an error.
22153  *
22154  *   Arguments: dev - the device 'dev_t'
22155  *		*p  - struct containing timing info.
22156  *
22157  * Return Code: 0 for success or error code
22158  */
22159 
22160 static int
22161 sd_take_ownership(dev_t dev, struct mhioctkown *p)
22162 {
22163 	struct sd_lun	*un;
22164 	int		rval;
22165 	int		err;
22166 	int		reservation_count   = 0;
22167 	int		min_ownership_delay =  6000000; /* in usec */
22168 	int		max_ownership_delay = 30000000; /* in usec */
22169 	clock_t		start_time;	/* starting time of this algorithm */
22170 	clock_t		end_time;	/* time limit for giving up */
22171 	clock_t		ownership_time;	/* time limit for stable ownership */
22172 	clock_t		current_time;
22173 	clock_t		previous_current_time;
22174 
22175 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22176 		return (ENXIO);
22177 	}
22178 
22179 	/*
22180 	 * Attempt a device reservation. A priority reservation is requested.
22181 	 */
22182 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
22183 	    != SD_SUCCESS) {
22184 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
22185 		    "sd_take_ownership: return(1)=%d\n", rval);
22186 		return (rval);
22187 	}
22188 
22189 	/* Update the softstate reserved status to indicate the reservation */
22190 	mutex_enter(SD_MUTEX(un));
22191 	un->un_resvd_status |= SD_RESERVE;
22192 	un->un_resvd_status &=
22193 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
22194 	mutex_exit(SD_MUTEX(un));
22195 
22196 	if (p != NULL) {
22197 		if (p->min_ownership_delay != 0) {
22198 			min_ownership_delay = p->min_ownership_delay * 1000;
22199 		}
22200 		if (p->max_ownership_delay != 0) {
22201 			max_ownership_delay = p->max_ownership_delay * 1000;
22202 		}
22203 	}
22204 	SD_INFO(SD_LOG_IOCTL_MHD, un,
22205 	    "sd_take_ownership: min, max delays: %d, %d\n",
22206 	    min_ownership_delay, max_ownership_delay);
22207 
22208 	start_time = ddi_get_lbolt();
22209 	current_time	= start_time;
22210 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
22211 	end_time	= start_time + drv_usectohz(max_ownership_delay);
22212 
22213 	while (current_time - end_time < 0) {
22214 		delay(drv_usectohz(500000));
22215 
22216 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
22217 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
22218 				mutex_enter(SD_MUTEX(un));
22219 				rval = (un->un_resvd_status &
22220 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
22221 				mutex_exit(SD_MUTEX(un));
22222 				break;
22223 			}
22224 		}
22225 		previous_current_time = current_time;
22226 		current_time = ddi_get_lbolt();
22227 		mutex_enter(SD_MUTEX(un));
22228 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
22229 			ownership_time = ddi_get_lbolt() +
22230 			    drv_usectohz(min_ownership_delay);
22231 			reservation_count = 0;
22232 		} else {
22233 			reservation_count++;
22234 		}
22235 		un->un_resvd_status |= SD_RESERVE;
22236 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
22237 		mutex_exit(SD_MUTEX(un));
22238 
22239 		SD_INFO(SD_LOG_IOCTL_MHD, un,
22240 		    "sd_take_ownership: ticks for loop iteration=%ld, "
22241 		    "reservation=%s\n", (current_time - previous_current_time),
22242 		    reservation_count ? "ok" : "reclaimed");
22243 
22244 		if (current_time - ownership_time >= 0 &&
22245 		    reservation_count >= 4) {
22246 			rval = 0; /* Achieved a stable ownership */
22247 			break;
22248 		}
22249 		if (current_time - end_time >= 0) {
22250 			rval = EACCES; /* No ownership in max possible time */
22251 			break;
22252 		}
22253 	}
22254 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
22255 	    "sd_take_ownership: return(2)=%d\n", rval);
22256 	return (rval);
22257 }
22258 
22259 
22260 /*
22261  *    Function: sd_reserve_release()
22262  *
22263  * Description: This function builds and sends scsi RESERVE, RELEASE, and
22264  *		PRIORITY RESERVE commands based on a user specified command type
22265  *
22266  *   Arguments: dev - the device 'dev_t'
22267  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
22268  *		      SD_RESERVE, SD_RELEASE
22269  *
22270  * Return Code: 0 or Error Code
22271  */
22272 
22273 static int
22274 sd_reserve_release(dev_t dev, int cmd)
22275 {
22276 	struct uscsi_cmd	*com = NULL;
22277 	struct sd_lun		*un = NULL;
22278 	char			cdb[CDB_GROUP0];
22279 	int			rval;
22280 
22281 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
22282 	    (cmd == SD_PRIORITY_RESERVE));
22283 
22284 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22285 		return (ENXIO);
22286 	}
22287 
22288 	/* instantiate and initialize the command and cdb */
22289 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
22290 	bzero(cdb, CDB_GROUP0);
22291 	com->uscsi_flags   = USCSI_SILENT;
22292 	com->uscsi_timeout = un->un_reserve_release_time;
22293 	com->uscsi_cdblen  = CDB_GROUP0;
22294 	com->uscsi_cdb	   = cdb;
22295 	if (cmd == SD_RELEASE) {
22296 		cdb[0] = SCMD_RELEASE;
22297 	} else {
22298 		cdb[0] = SCMD_RESERVE;
22299 	}
22300 
22301 	/* Send the command. */
22302 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
22303 	    SD_PATH_STANDARD);
22304 
22305 	/*
22306 	 * "break" a reservation that is held by another host, by issuing a
22307 	 * reset if priority reserve is desired, and we could not get the
22308 	 * device.
22309 	 */
22310 	if ((cmd == SD_PRIORITY_RESERVE) &&
22311 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
22312 		/*
22313 		 * First try to reset the LUN. If we cannot, then try a target
22314 		 * reset, followed by a bus reset if the target reset fails.
22315 		 */
22316 		int reset_retval = 0;
22317 		if (un->un_f_lun_reset_enabled == TRUE) {
22318 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
22319 		}
22320 		if (reset_retval == 0) {
22321 			/* The LUN reset either failed or was not issued */
22322 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
22323 		}
22324 		if ((reset_retval == 0) &&
22325 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
22326 			rval = EIO;
22327 			kmem_free(com, sizeof (*com));
22328 			return (rval);
22329 		}
22330 
22331 		bzero(com, sizeof (struct uscsi_cmd));
22332 		com->uscsi_flags   = USCSI_SILENT;
22333 		com->uscsi_cdb	   = cdb;
22334 		com->uscsi_cdblen  = CDB_GROUP0;
22335 		com->uscsi_timeout = 5;
22336 
22337 		/*
22338 		 * Reissue the last reserve command, this time without request
22339 		 * sense.  Assume that it is just a regular reserve command.
22340 		 */
22341 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
22342 		    SD_PATH_STANDARD);
22343 	}
22344 
22345 	/* Return an error if still getting a reservation conflict. */
22346 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
22347 		rval = EACCES;
22348 	}
22349 
22350 	kmem_free(com, sizeof (*com));
22351 	return (rval);
22352 }
22353 
22354 
22355 #define	SD_NDUMP_RETRIES	12
22356 /*
22357  *	System Crash Dump routine
22358  */
22359 
22360 static int
22361 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
22362 {
22363 	int		instance;
22364 	int		partition;
22365 	int		i;
22366 	int		err;
22367 	struct sd_lun	*un;
22368 	struct scsi_pkt *wr_pktp;
22369 	struct buf	*wr_bp;
22370 	struct buf	wr_buf;
22371 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
22372 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
22373 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
22374 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
22375 	size_t		io_start_offset;
22376 	int		doing_rmw = FALSE;
22377 	int		rval;
22378 #if defined(__i386) || defined(__amd64)
22379 	ssize_t dma_resid;
22380 	daddr_t oblkno;
22381 #endif
22382 	diskaddr_t	nblks = 0;
22383 	diskaddr_t	start_block;
22384 
22385 	instance = SDUNIT(dev);
22386 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
22387 	    !SD_IS_VALID_LABEL(un) || ISCD(un)) {
22388 		return (ENXIO);
22389 	}
22390 
22391 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
22392 
22393 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
22394 
22395 	partition = SDPART(dev);
22396 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
22397 
22398 	/* Validate blocks to dump at against partition size. */
22399 
22400 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
22401 	    &nblks, &start_block, NULL, NULL, (void *)SD_PATH_DIRECT);
22402 
22403 	if ((blkno + nblk) > nblks) {
22404 		SD_TRACE(SD_LOG_DUMP, un,
22405 		    "sddump: dump range larger than partition: "
22406 		    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
22407 		    blkno, nblk, nblks);
22408 		return (EINVAL);
22409 	}
22410 
22411 	mutex_enter(&un->un_pm_mutex);
22412 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
22413 		struct scsi_pkt *start_pktp;
22414 
22415 		mutex_exit(&un->un_pm_mutex);
22416 
22417 		/*
22418 		 * use pm framework to power on HBA 1st
22419 		 */
22420 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
22421 
22422 		/*
22423 		 * Dump no long uses sdpower to power on a device, it's
22424 		 * in-line here so it can be done in polled mode.
22425 		 */
22426 
22427 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
22428 
22429 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
22430 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
22431 
22432 		if (start_pktp == NULL) {
22433 			/* We were not given a SCSI packet, fail. */
22434 			return (EIO);
22435 		}
22436 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
22437 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
22438 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
22439 		start_pktp->pkt_flags = FLAG_NOINTR;
22440 
22441 		mutex_enter(SD_MUTEX(un));
22442 		SD_FILL_SCSI1_LUN(un, start_pktp);
22443 		mutex_exit(SD_MUTEX(un));
22444 		/*
22445 		 * Scsi_poll returns 0 (success) if the command completes and
22446 		 * the status block is STATUS_GOOD.
22447 		 */
22448 		if (sd_scsi_poll(un, start_pktp) != 0) {
22449 			scsi_destroy_pkt(start_pktp);
22450 			return (EIO);
22451 		}
22452 		scsi_destroy_pkt(start_pktp);
22453 		(void) sd_ddi_pm_resume(un);
22454 	} else {
22455 		mutex_exit(&un->un_pm_mutex);
22456 	}
22457 
22458 	mutex_enter(SD_MUTEX(un));
22459 	un->un_throttle = 0;
22460 
22461 	/*
22462 	 * The first time through, reset the specific target device.
22463 	 * However, when cpr calls sddump we know that sd is in a
22464 	 * a good state so no bus reset is required.
22465 	 * Clear sense data via Request Sense cmd.
22466 	 * In sddump we don't care about allow_bus_device_reset anymore
22467 	 */
22468 
22469 	if ((un->un_state != SD_STATE_SUSPENDED) &&
22470 	    (un->un_state != SD_STATE_DUMPING)) {
22471 
22472 		New_state(un, SD_STATE_DUMPING);
22473 
22474 		if (un->un_f_is_fibre == FALSE) {
22475 			mutex_exit(SD_MUTEX(un));
22476 			/*
22477 			 * Attempt a bus reset for parallel scsi.
22478 			 *
22479 			 * Note: A bus reset is required because on some host
22480 			 * systems (i.e. E420R) a bus device reset is
22481 			 * insufficient to reset the state of the target.
22482 			 *
22483 			 * Note: Don't issue the reset for fibre-channel,
22484 			 * because this tends to hang the bus (loop) for
22485 			 * too long while everyone is logging out and in
22486 			 * and the deadman timer for dumping will fire
22487 			 * before the dump is complete.
22488 			 */
22489 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
22490 				mutex_enter(SD_MUTEX(un));
22491 				Restore_state(un);
22492 				mutex_exit(SD_MUTEX(un));
22493 				return (EIO);
22494 			}
22495 
22496 			/* Delay to give the device some recovery time. */
22497 			drv_usecwait(10000);
22498 
22499 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
22500 				SD_INFO(SD_LOG_DUMP, un,
22501 					"sddump: sd_send_polled_RQS failed\n");
22502 			}
22503 			mutex_enter(SD_MUTEX(un));
22504 		}
22505 	}
22506 
22507 	/*
22508 	 * Convert the partition-relative block number to a
22509 	 * disk physical block number.
22510 	 */
22511 	blkno += start_block;
22512 
22513 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
22514 
22515 
22516 	/*
22517 	 * Check if the device has a non-512 block size.
22518 	 */
22519 	wr_bp = NULL;
22520 	if (NOT_DEVBSIZE(un)) {
22521 		tgt_byte_offset = blkno * un->un_sys_blocksize;
22522 		tgt_byte_count = nblk * un->un_sys_blocksize;
22523 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
22524 		    (tgt_byte_count % un->un_tgt_blocksize)) {
22525 			doing_rmw = TRUE;
22526 			/*
22527 			 * Calculate the block number and number of block
22528 			 * in terms of the media block size.
22529 			 */
22530 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
22531 			tgt_nblk =
22532 			    ((tgt_byte_offset + tgt_byte_count +
22533 				(un->un_tgt_blocksize - 1)) /
22534 				un->un_tgt_blocksize) - tgt_blkno;
22535 
22536 			/*
22537 			 * Invoke the routine which is going to do read part
22538 			 * of read-modify-write.
22539 			 * Note that this routine returns a pointer to
22540 			 * a valid bp in wr_bp.
22541 			 */
22542 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
22543 			    &wr_bp);
22544 			if (err) {
22545 				mutex_exit(SD_MUTEX(un));
22546 				return (err);
22547 			}
22548 			/*
22549 			 * Offset is being calculated as -
22550 			 * (original block # * system block size) -
22551 			 * (new block # * target block size)
22552 			 */
22553 			io_start_offset =
22554 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
22555 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
22556 
22557 			ASSERT((io_start_offset >= 0) &&
22558 			    (io_start_offset < un->un_tgt_blocksize));
22559 			/*
22560 			 * Do the modify portion of read modify write.
22561 			 */
22562 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
22563 			    (size_t)nblk * un->un_sys_blocksize);
22564 		} else {
22565 			doing_rmw = FALSE;
22566 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
22567 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
22568 		}
22569 
22570 		/* Convert blkno and nblk to target blocks */
22571 		blkno = tgt_blkno;
22572 		nblk = tgt_nblk;
22573 	} else {
22574 		wr_bp = &wr_buf;
22575 		bzero(wr_bp, sizeof (struct buf));
22576 		wr_bp->b_flags		= B_BUSY;
22577 		wr_bp->b_un.b_addr	= addr;
22578 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
22579 		wr_bp->b_resid		= 0;
22580 	}
22581 
22582 	mutex_exit(SD_MUTEX(un));
22583 
22584 	/*
22585 	 * Obtain a SCSI packet for the write command.
22586 	 * It should be safe to call the allocator here without
22587 	 * worrying about being locked for DVMA mapping because
22588 	 * the address we're passed is already a DVMA mapping
22589 	 *
22590 	 * We are also not going to worry about semaphore ownership
22591 	 * in the dump buffer. Dumping is single threaded at present.
22592 	 */
22593 
22594 	wr_pktp = NULL;
22595 
22596 #if defined(__i386) || defined(__amd64)
22597 	dma_resid = wr_bp->b_bcount;
22598 	oblkno = blkno;
22599 	while (dma_resid != 0) {
22600 #endif
22601 
22602 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
22603 		wr_bp->b_flags &= ~B_ERROR;
22604 
22605 #if defined(__i386) || defined(__amd64)
22606 		blkno = oblkno +
22607 			((wr_bp->b_bcount - dma_resid) /
22608 			    un->un_tgt_blocksize);
22609 		nblk = dma_resid / un->un_tgt_blocksize;
22610 
22611 		if (wr_pktp) {
22612 			/* Partial DMA transfers after initial transfer */
22613 			rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
22614 			    blkno, nblk);
22615 		} else {
22616 			/* Initial transfer */
22617 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
22618 			    un->un_pkt_flags, NULL_FUNC, NULL,
22619 			    blkno, nblk);
22620 		}
22621 #else
22622 		rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
22623 		    0, NULL_FUNC, NULL, blkno, nblk);
22624 #endif
22625 
22626 		if (rval == 0) {
22627 			/* We were given a SCSI packet, continue. */
22628 			break;
22629 		}
22630 
22631 		if (i == 0) {
22632 			if (wr_bp->b_flags & B_ERROR) {
22633 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
22634 				    "no resources for dumping; "
22635 				    "error code: 0x%x, retrying",
22636 				    geterror(wr_bp));
22637 			} else {
22638 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
22639 				    "no resources for dumping; retrying");
22640 			}
22641 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
22642 			if (wr_bp->b_flags & B_ERROR) {
22643 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22644 				    "no resources for dumping; error code: "
22645 				    "0x%x, retrying\n", geterror(wr_bp));
22646 			}
22647 		} else {
22648 			if (wr_bp->b_flags & B_ERROR) {
22649 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22650 				    "no resources for dumping; "
22651 				    "error code: 0x%x, retries failed, "
22652 				    "giving up.\n", geterror(wr_bp));
22653 			} else {
22654 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22655 				    "no resources for dumping; "
22656 				    "retries failed, giving up.\n");
22657 			}
22658 			mutex_enter(SD_MUTEX(un));
22659 			Restore_state(un);
22660 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
22661 				mutex_exit(SD_MUTEX(un));
22662 				scsi_free_consistent_buf(wr_bp);
22663 			} else {
22664 				mutex_exit(SD_MUTEX(un));
22665 			}
22666 			return (EIO);
22667 		}
22668 		drv_usecwait(10000);
22669 	}
22670 
22671 #if defined(__i386) || defined(__amd64)
22672 	/*
22673 	 * save the resid from PARTIAL_DMA
22674 	 */
22675 	dma_resid = wr_pktp->pkt_resid;
22676 	if (dma_resid != 0)
22677 		nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
22678 	wr_pktp->pkt_resid = 0;
22679 #endif
22680 
22681 	/* SunBug 1222170 */
22682 	wr_pktp->pkt_flags = FLAG_NOINTR;
22683 
22684 	err = EIO;
22685 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
22686 
22687 		/*
22688 		 * Scsi_poll returns 0 (success) if the command completes and
22689 		 * the status block is STATUS_GOOD.  We should only check
22690 		 * errors if this condition is not true.  Even then we should
22691 		 * send our own request sense packet only if we have a check
22692 		 * condition and auto request sense has not been performed by
22693 		 * the hba.
22694 		 */
22695 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
22696 
22697 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
22698 		    (wr_pktp->pkt_resid == 0)) {
22699 			err = SD_SUCCESS;
22700 			break;
22701 		}
22702 
22703 		/*
22704 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
22705 		 */
22706 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
22707 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22708 			    "Device is gone\n");
22709 			break;
22710 		}
22711 
22712 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
22713 			SD_INFO(SD_LOG_DUMP, un,
22714 			    "sddump: write failed with CHECK, try # %d\n", i);
22715 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
22716 				(void) sd_send_polled_RQS(un);
22717 			}
22718 
22719 			continue;
22720 		}
22721 
22722 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
22723 			int reset_retval = 0;
22724 
22725 			SD_INFO(SD_LOG_DUMP, un,
22726 			    "sddump: write failed with BUSY, try # %d\n", i);
22727 
22728 			if (un->un_f_lun_reset_enabled == TRUE) {
22729 				reset_retval = scsi_reset(SD_ADDRESS(un),
22730 				    RESET_LUN);
22731 			}
22732 			if (reset_retval == 0) {
22733 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
22734 			}
22735 			(void) sd_send_polled_RQS(un);
22736 
22737 		} else {
22738 			SD_INFO(SD_LOG_DUMP, un,
22739 			    "sddump: write failed with 0x%x, try # %d\n",
22740 			    SD_GET_PKT_STATUS(wr_pktp), i);
22741 			mutex_enter(SD_MUTEX(un));
22742 			sd_reset_target(un, wr_pktp);
22743 			mutex_exit(SD_MUTEX(un));
22744 		}
22745 
22746 		/*
22747 		 * If we are not getting anywhere with lun/target resets,
22748 		 * let's reset the bus.
22749 		 */
22750 		if (i == SD_NDUMP_RETRIES/2) {
22751 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
22752 			(void) sd_send_polled_RQS(un);
22753 		}
22754 
22755 	}
22756 #if defined(__i386) || defined(__amd64)
22757 	}	/* dma_resid */
22758 #endif
22759 
22760 	scsi_destroy_pkt(wr_pktp);
22761 	mutex_enter(SD_MUTEX(un));
22762 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
22763 		mutex_exit(SD_MUTEX(un));
22764 		scsi_free_consistent_buf(wr_bp);
22765 	} else {
22766 		mutex_exit(SD_MUTEX(un));
22767 	}
22768 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
22769 	return (err);
22770 }
22771 
22772 /*
22773  *    Function: sd_scsi_poll()
22774  *
22775  * Description: This is a wrapper for the scsi_poll call.
22776  *
22777  *   Arguments: sd_lun - The unit structure
22778  *              scsi_pkt - The scsi packet being sent to the device.
22779  *
22780  * Return Code: 0 - Command completed successfully with good status
22781  *             -1 - Command failed.  This could indicate a check condition
22782  *                  or other status value requiring recovery action.
22783  *
22784  */
22785 
22786 static int
22787 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
22788 {
22789 	int status;
22790 
22791 	ASSERT(un != NULL);
22792 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22793 	ASSERT(pktp != NULL);
22794 
22795 	status = SD_SUCCESS;
22796 
22797 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
22798 		pktp->pkt_flags |= un->un_tagflags;
22799 		pktp->pkt_flags &= ~FLAG_NODISCON;
22800 	}
22801 
22802 	status = sd_ddi_scsi_poll(pktp);
22803 	/*
22804 	 * Scsi_poll returns 0 (success) if the command completes and the
22805 	 * status block is STATUS_GOOD.  We should only check errors if this
22806 	 * condition is not true.  Even then we should send our own request
22807 	 * sense packet only if we have a check condition and auto
22808 	 * request sense has not been performed by the hba.
22809 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
22810 	 */
22811 	if ((status != SD_SUCCESS) &&
22812 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
22813 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
22814 	    (pktp->pkt_reason != CMD_DEV_GONE))
22815 		(void) sd_send_polled_RQS(un);
22816 
22817 	return (status);
22818 }
22819 
22820 /*
22821  *    Function: sd_send_polled_RQS()
22822  *
22823  * Description: This sends the request sense command to a device.
22824  *
22825  *   Arguments: sd_lun - The unit structure
22826  *
22827  * Return Code: 0 - Command completed successfully with good status
22828  *             -1 - Command failed.
22829  *
22830  */
22831 
22832 static int
22833 sd_send_polled_RQS(struct sd_lun *un)
22834 {
22835 	int	ret_val;
22836 	struct	scsi_pkt	*rqs_pktp;
22837 	struct	buf		*rqs_bp;
22838 
22839 	ASSERT(un != NULL);
22840 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22841 
22842 	ret_val = SD_SUCCESS;
22843 
22844 	rqs_pktp = un->un_rqs_pktp;
22845 	rqs_bp	 = un->un_rqs_bp;
22846 
22847 	mutex_enter(SD_MUTEX(un));
22848 
22849 	if (un->un_sense_isbusy) {
22850 		ret_val = SD_FAILURE;
22851 		mutex_exit(SD_MUTEX(un));
22852 		return (ret_val);
22853 	}
22854 
22855 	/*
22856 	 * If the request sense buffer (and packet) is not in use,
22857 	 * let's set the un_sense_isbusy and send our packet
22858 	 */
22859 	un->un_sense_isbusy 	= 1;
22860 	rqs_pktp->pkt_resid  	= 0;
22861 	rqs_pktp->pkt_reason 	= 0;
22862 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
22863 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
22864 
22865 	mutex_exit(SD_MUTEX(un));
22866 
22867 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
22868 	    " 0x%p\n", rqs_bp->b_un.b_addr);
22869 
22870 	/*
22871 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
22872 	 * axle - it has a call into us!
22873 	 */
22874 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
22875 		SD_INFO(SD_LOG_COMMON, un,
22876 		    "sd_send_polled_RQS: RQS failed\n");
22877 	}
22878 
22879 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
22880 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
22881 
22882 	mutex_enter(SD_MUTEX(un));
22883 	un->un_sense_isbusy = 0;
22884 	mutex_exit(SD_MUTEX(un));
22885 
22886 	return (ret_val);
22887 }
22888 
22889 /*
22890  * Defines needed for localized version of the scsi_poll routine.
22891  */
22892 #define	SD_CSEC		10000			/* usecs */
22893 #define	SD_SEC_TO_CSEC	(1000000/SD_CSEC)
22894 
22895 
22896 /*
22897  *    Function: sd_ddi_scsi_poll()
22898  *
22899  * Description: Localized version of the scsi_poll routine.  The purpose is to
22900  *		send a scsi_pkt to a device as a polled command.  This version
22901  *		is to ensure more robust handling of transport errors.
22902  *		Specifically this routine cures not ready, coming ready
22903  *		transition for power up and reset of sonoma's.  This can take
22904  *		up to 45 seconds for power-on and 20 seconds for reset of a
22905  * 		sonoma lun.
22906  *
22907  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
22908  *
22909  * Return Code: 0 - Command completed successfully with good status
22910  *             -1 - Command failed.
22911  *
22912  */
22913 
22914 static int
22915 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
22916 {
22917 	int busy_count;
22918 	int timeout;
22919 	int rval = SD_FAILURE;
22920 	int savef;
22921 	uint8_t *sensep;
22922 	long savet;
22923 	void (*savec)();
22924 	/*
22925 	 * The following is defined in machdep.c and is used in determining if
22926 	 * the scsi transport system will do polled I/O instead of interrupt
22927 	 * I/O when called from xx_dump().
22928 	 */
22929 	extern int do_polled_io;
22930 
22931 	/*
22932 	 * save old flags in pkt, to restore at end
22933 	 */
22934 	savef = pkt->pkt_flags;
22935 	savec = pkt->pkt_comp;
22936 	savet = pkt->pkt_time;
22937 
22938 	pkt->pkt_flags |= FLAG_NOINTR;
22939 
22940 	/*
22941 	 * XXX there is nothing in the SCSA spec that states that we should not
22942 	 * do a callback for polled cmds; however, removing this will break sd
22943 	 * and probably other target drivers
22944 	 */
22945 	pkt->pkt_comp = NULL;
22946 
22947 	/*
22948 	 * we don't like a polled command without timeout.
22949 	 * 60 seconds seems long enough.
22950 	 */
22951 	if (pkt->pkt_time == 0) {
22952 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
22953 	}
22954 
22955 	/*
22956 	 * Send polled cmd.
22957 	 *
22958 	 * We do some error recovery for various errors.  Tran_busy,
22959 	 * queue full, and non-dispatched commands are retried every 10 msec.
22960 	 * as they are typically transient failures.  Busy status and Not
22961 	 * Ready are retried every second as this status takes a while to
22962 	 * change.  Unit attention is retried for pkt_time (60) times
22963 	 * with no delay.
22964 	 */
22965 	timeout = pkt->pkt_time * SD_SEC_TO_CSEC;
22966 
22967 	for (busy_count = 0; busy_count < timeout; busy_count++) {
22968 		int rc;
22969 		int poll_delay;
22970 
22971 		/*
22972 		 * Initialize pkt status variables.
22973 		 */
22974 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
22975 
22976 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
22977 			if (rc != TRAN_BUSY) {
22978 				/* Transport failed - give up. */
22979 				break;
22980 			} else {
22981 				/* Transport busy - try again. */
22982 				poll_delay = 1 * SD_CSEC; /* 10 msec */
22983 			}
22984 		} else {
22985 			/*
22986 			 * Transport accepted - check pkt status.
22987 			 */
22988 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
22989 			if (pkt->pkt_reason == CMD_CMPLT &&
22990 			    rc == STATUS_CHECK &&
22991 			    pkt->pkt_state & STATE_ARQ_DONE) {
22992 				struct scsi_arq_status *arqstat =
22993 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
22994 
22995 				sensep = (uint8_t *)&arqstat->sts_sensedata;
22996 			} else {
22997 				sensep = NULL;
22998 			}
22999 
23000 			if ((pkt->pkt_reason == CMD_CMPLT) &&
23001 			    (rc == STATUS_GOOD)) {
23002 				/* No error - we're done */
23003 				rval = SD_SUCCESS;
23004 				break;
23005 
23006 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
23007 				/* Lost connection - give up */
23008 				break;
23009 
23010 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
23011 			    (pkt->pkt_state == 0)) {
23012 				/* Pkt not dispatched - try again. */
23013 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
23014 
23015 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
23016 			    (rc == STATUS_QFULL)) {
23017 				/* Queue full - try again. */
23018 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
23019 
23020 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
23021 			    (rc == STATUS_BUSY)) {
23022 				/* Busy - try again. */
23023 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
23024 				busy_count += (SD_SEC_TO_CSEC - 1);
23025 
23026 			} else if ((sensep != NULL) &&
23027 			    (scsi_sense_key(sensep) ==
23028 				KEY_UNIT_ATTENTION)) {
23029 				/* Unit Attention - try again */
23030 				busy_count += (SD_SEC_TO_CSEC - 1); /* 1 */
23031 				continue;
23032 
23033 			} else if ((sensep != NULL) &&
23034 			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
23035 			    (scsi_sense_asc(sensep) == 0x04) &&
23036 			    (scsi_sense_ascq(sensep) == 0x01)) {
23037 				/* Not ready -> ready - try again. */
23038 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
23039 				busy_count += (SD_SEC_TO_CSEC - 1);
23040 
23041 			} else {
23042 				/* BAD status - give up. */
23043 				break;
23044 			}
23045 		}
23046 
23047 		if ((curthread->t_flag & T_INTR_THREAD) == 0 &&
23048 		    !do_polled_io) {
23049 			delay(drv_usectohz(poll_delay));
23050 		} else {
23051 			/* we busy wait during cpr_dump or interrupt threads */
23052 			drv_usecwait(poll_delay);
23053 		}
23054 	}
23055 
23056 	pkt->pkt_flags = savef;
23057 	pkt->pkt_comp = savec;
23058 	pkt->pkt_time = savet;
23059 	return (rval);
23060 }
23061 
23062 
23063 /*
23064  *    Function: sd_persistent_reservation_in_read_keys
23065  *
23066  * Description: This routine is the driver entry point for handling CD-ROM
23067  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
23068  *		by sending the SCSI-3 PRIN commands to the device.
23069  *		Processes the read keys command response by copying the
23070  *		reservation key information into the user provided buffer.
23071  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
23072  *
23073  *   Arguments: un   -  Pointer to soft state struct for the target.
23074  *		usrp -	user provided pointer to multihost Persistent In Read
23075  *			Keys structure (mhioc_inkeys_t)
23076  *		flag -	this argument is a pass through to ddi_copyxxx()
23077  *			directly from the mode argument of ioctl().
23078  *
23079  * Return Code: 0   - Success
23080  *		EACCES
23081  *		ENOTSUP
23082  *		errno return code from sd_send_scsi_cmd()
23083  *
23084  *     Context: Can sleep. Does not return until command is completed.
23085  */
23086 
23087 static int
23088 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
23089     mhioc_inkeys_t *usrp, int flag)
23090 {
23091 #ifdef _MULTI_DATAMODEL
23092 	struct mhioc_key_list32	li32;
23093 #endif
23094 	sd_prin_readkeys_t	*in;
23095 	mhioc_inkeys_t		*ptr;
23096 	mhioc_key_list_t	li;
23097 	uchar_t			*data_bufp;
23098 	int 			data_len;
23099 	int			rval;
23100 	size_t			copysz;
23101 
23102 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
23103 		return (EINVAL);
23104 	}
23105 	bzero(&li, sizeof (mhioc_key_list_t));
23106 
23107 	/*
23108 	 * Get the listsize from user
23109 	 */
23110 #ifdef _MULTI_DATAMODEL
23111 
23112 	switch (ddi_model_convert_from(flag & FMODELS)) {
23113 	case DDI_MODEL_ILP32:
23114 		copysz = sizeof (struct mhioc_key_list32);
23115 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
23116 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23117 			    "sd_persistent_reservation_in_read_keys: "
23118 			    "failed ddi_copyin: mhioc_key_list32_t\n");
23119 			rval = EFAULT;
23120 			goto done;
23121 		}
23122 		li.listsize = li32.listsize;
23123 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
23124 		break;
23125 
23126 	case DDI_MODEL_NONE:
23127 		copysz = sizeof (mhioc_key_list_t);
23128 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
23129 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23130 			    "sd_persistent_reservation_in_read_keys: "
23131 			    "failed ddi_copyin: mhioc_key_list_t\n");
23132 			rval = EFAULT;
23133 			goto done;
23134 		}
23135 		break;
23136 	}
23137 
23138 #else /* ! _MULTI_DATAMODEL */
23139 	copysz = sizeof (mhioc_key_list_t);
23140 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
23141 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23142 		    "sd_persistent_reservation_in_read_keys: "
23143 		    "failed ddi_copyin: mhioc_key_list_t\n");
23144 		rval = EFAULT;
23145 		goto done;
23146 	}
23147 #endif
23148 
23149 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
23150 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
23151 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
23152 
23153 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS,
23154 	    data_len, data_bufp)) != 0) {
23155 		goto done;
23156 	}
23157 	in = (sd_prin_readkeys_t *)data_bufp;
23158 	ptr->generation = BE_32(in->generation);
23159 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
23160 
23161 	/*
23162 	 * Return the min(listsize, listlen) keys
23163 	 */
23164 #ifdef _MULTI_DATAMODEL
23165 
23166 	switch (ddi_model_convert_from(flag & FMODELS)) {
23167 	case DDI_MODEL_ILP32:
23168 		li32.listlen = li.listlen;
23169 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
23170 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23171 			    "sd_persistent_reservation_in_read_keys: "
23172 			    "failed ddi_copyout: mhioc_key_list32_t\n");
23173 			rval = EFAULT;
23174 			goto done;
23175 		}
23176 		break;
23177 
23178 	case DDI_MODEL_NONE:
23179 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
23180 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23181 			    "sd_persistent_reservation_in_read_keys: "
23182 			    "failed ddi_copyout: mhioc_key_list_t\n");
23183 			rval = EFAULT;
23184 			goto done;
23185 		}
23186 		break;
23187 	}
23188 
23189 #else /* ! _MULTI_DATAMODEL */
23190 
23191 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
23192 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23193 		    "sd_persistent_reservation_in_read_keys: "
23194 		    "failed ddi_copyout: mhioc_key_list_t\n");
23195 		rval = EFAULT;
23196 		goto done;
23197 	}
23198 
23199 #endif /* _MULTI_DATAMODEL */
23200 
23201 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
23202 	    li.listsize * MHIOC_RESV_KEY_SIZE);
23203 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
23204 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23205 		    "sd_persistent_reservation_in_read_keys: "
23206 		    "failed ddi_copyout: keylist\n");
23207 		rval = EFAULT;
23208 	}
23209 done:
23210 	kmem_free(data_bufp, data_len);
23211 	return (rval);
23212 }
23213 
23214 
23215 /*
23216  *    Function: sd_persistent_reservation_in_read_resv
23217  *
23218  * Description: This routine is the driver entry point for handling CD-ROM
23219  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
23220  *		by sending the SCSI-3 PRIN commands to the device.
23221  *		Process the read persistent reservations command response by
23222  *		copying the reservation information into the user provided
23223  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
23224  *
23225  *   Arguments: un   -  Pointer to soft state struct for the target.
23226  *		usrp -	user provided pointer to multihost Persistent In Read
23227  *			Keys structure (mhioc_inkeys_t)
23228  *		flag -	this argument is a pass through to ddi_copyxxx()
23229  *			directly from the mode argument of ioctl().
23230  *
23231  * Return Code: 0   - Success
23232  *		EACCES
23233  *		ENOTSUP
23234  *		errno return code from sd_send_scsi_cmd()
23235  *
23236  *     Context: Can sleep. Does not return until command is completed.
23237  */
23238 
23239 static int
23240 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
23241     mhioc_inresvs_t *usrp, int flag)
23242 {
23243 #ifdef _MULTI_DATAMODEL
23244 	struct mhioc_resv_desc_list32 resvlist32;
23245 #endif
23246 	sd_prin_readresv_t	*in;
23247 	mhioc_inresvs_t		*ptr;
23248 	sd_readresv_desc_t	*readresv_ptr;
23249 	mhioc_resv_desc_list_t	resvlist;
23250 	mhioc_resv_desc_t 	resvdesc;
23251 	uchar_t			*data_bufp;
23252 	int 			data_len;
23253 	int			rval;
23254 	int			i;
23255 	size_t			copysz;
23256 	mhioc_resv_desc_t	*bufp;
23257 
23258 	if ((ptr = usrp) == NULL) {
23259 		return (EINVAL);
23260 	}
23261 
23262 	/*
23263 	 * Get the listsize from user
23264 	 */
23265 #ifdef _MULTI_DATAMODEL
23266 	switch (ddi_model_convert_from(flag & FMODELS)) {
23267 	case DDI_MODEL_ILP32:
23268 		copysz = sizeof (struct mhioc_resv_desc_list32);
23269 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
23270 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23271 			    "sd_persistent_reservation_in_read_resv: "
23272 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
23273 			rval = EFAULT;
23274 			goto done;
23275 		}
23276 		resvlist.listsize = resvlist32.listsize;
23277 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
23278 		break;
23279 
23280 	case DDI_MODEL_NONE:
23281 		copysz = sizeof (mhioc_resv_desc_list_t);
23282 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
23283 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23284 			    "sd_persistent_reservation_in_read_resv: "
23285 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
23286 			rval = EFAULT;
23287 			goto done;
23288 		}
23289 		break;
23290 	}
23291 #else /* ! _MULTI_DATAMODEL */
23292 	copysz = sizeof (mhioc_resv_desc_list_t);
23293 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
23294 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23295 		    "sd_persistent_reservation_in_read_resv: "
23296 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
23297 		rval = EFAULT;
23298 		goto done;
23299 	}
23300 #endif /* ! _MULTI_DATAMODEL */
23301 
23302 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
23303 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
23304 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
23305 
23306 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_RESV,
23307 	    data_len, data_bufp)) != 0) {
23308 		goto done;
23309 	}
23310 	in = (sd_prin_readresv_t *)data_bufp;
23311 	ptr->generation = BE_32(in->generation);
23312 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
23313 
23314 	/*
23315 	 * Return the min(listsize, listlen( keys
23316 	 */
23317 #ifdef _MULTI_DATAMODEL
23318 
23319 	switch (ddi_model_convert_from(flag & FMODELS)) {
23320 	case DDI_MODEL_ILP32:
23321 		resvlist32.listlen = resvlist.listlen;
23322 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
23323 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23324 			    "sd_persistent_reservation_in_read_resv: "
23325 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
23326 			rval = EFAULT;
23327 			goto done;
23328 		}
23329 		break;
23330 
23331 	case DDI_MODEL_NONE:
23332 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
23333 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23334 			    "sd_persistent_reservation_in_read_resv: "
23335 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
23336 			rval = EFAULT;
23337 			goto done;
23338 		}
23339 		break;
23340 	}
23341 
23342 #else /* ! _MULTI_DATAMODEL */
23343 
23344 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
23345 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23346 		    "sd_persistent_reservation_in_read_resv: "
23347 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
23348 		rval = EFAULT;
23349 		goto done;
23350 	}
23351 
23352 #endif /* ! _MULTI_DATAMODEL */
23353 
23354 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
23355 	bufp = resvlist.list;
23356 	copysz = sizeof (mhioc_resv_desc_t);
23357 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
23358 	    i++, readresv_ptr++, bufp++) {
23359 
23360 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
23361 		    MHIOC_RESV_KEY_SIZE);
23362 		resvdesc.type  = readresv_ptr->type;
23363 		resvdesc.scope = readresv_ptr->scope;
23364 		resvdesc.scope_specific_addr =
23365 		    BE_32(readresv_ptr->scope_specific_addr);
23366 
23367 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
23368 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23369 			    "sd_persistent_reservation_in_read_resv: "
23370 			    "failed ddi_copyout: resvlist\n");
23371 			rval = EFAULT;
23372 			goto done;
23373 		}
23374 	}
23375 done:
23376 	kmem_free(data_bufp, data_len);
23377 	return (rval);
23378 }
23379 
23380 
23381 /*
23382  *    Function: sr_change_blkmode()
23383  *
23384  * Description: This routine is the driver entry point for handling CD-ROM
23385  *		block mode ioctl requests. Support for returning and changing
23386  *		the current block size in use by the device is implemented. The
23387  *		LBA size is changed via a MODE SELECT Block Descriptor.
23388  *
23389  *		This routine issues a mode sense with an allocation length of
23390  *		12 bytes for the mode page header and a single block descriptor.
23391  *
23392  *   Arguments: dev - the device 'dev_t'
23393  *		cmd - the request type; one of CDROMGBLKMODE (get) or
23394  *		      CDROMSBLKMODE (set)
23395  *		data - current block size or requested block size
23396  *		flag - this argument is a pass through to ddi_copyxxx() directly
23397  *		       from the mode argument of ioctl().
23398  *
23399  * Return Code: the code returned by sd_send_scsi_cmd()
23400  *		EINVAL if invalid arguments are provided
23401  *		EFAULT if ddi_copyxxx() fails
23402  *		ENXIO if fail ddi_get_soft_state
23403  *		EIO if invalid mode sense block descriptor length
23404  *
23405  */
23406 
23407 static int
23408 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
23409 {
23410 	struct sd_lun			*un = NULL;
23411 	struct mode_header		*sense_mhp, *select_mhp;
23412 	struct block_descriptor		*sense_desc, *select_desc;
23413 	int				current_bsize;
23414 	int				rval = EINVAL;
23415 	uchar_t				*sense = NULL;
23416 	uchar_t				*select = NULL;
23417 
23418 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
23419 
23420 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23421 		return (ENXIO);
23422 	}
23423 
23424 	/*
23425 	 * The block length is changed via the Mode Select block descriptor, the
23426 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
23427 	 * required as part of this routine. Therefore the mode sense allocation
23428 	 * length is specified to be the length of a mode page header and a
23429 	 * block descriptor.
23430 	 */
23431 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
23432 
23433 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
23434 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD)) != 0) {
23435 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23436 		    "sr_change_blkmode: Mode Sense Failed\n");
23437 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23438 		return (rval);
23439 	}
23440 
23441 	/* Check the block descriptor len to handle only 1 block descriptor */
23442 	sense_mhp = (struct mode_header *)sense;
23443 	if ((sense_mhp->bdesc_length == 0) ||
23444 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
23445 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23446 		    "sr_change_blkmode: Mode Sense returned invalid block"
23447 		    " descriptor length\n");
23448 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23449 		return (EIO);
23450 	}
23451 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
23452 	current_bsize = ((sense_desc->blksize_hi << 16) |
23453 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
23454 
23455 	/* Process command */
23456 	switch (cmd) {
23457 	case CDROMGBLKMODE:
23458 		/* Return the block size obtained during the mode sense */
23459 		if (ddi_copyout(&current_bsize, (void *)data,
23460 		    sizeof (int), flag) != 0)
23461 			rval = EFAULT;
23462 		break;
23463 	case CDROMSBLKMODE:
23464 		/* Validate the requested block size */
23465 		switch (data) {
23466 		case CDROM_BLK_512:
23467 		case CDROM_BLK_1024:
23468 		case CDROM_BLK_2048:
23469 		case CDROM_BLK_2056:
23470 		case CDROM_BLK_2336:
23471 		case CDROM_BLK_2340:
23472 		case CDROM_BLK_2352:
23473 		case CDROM_BLK_2368:
23474 		case CDROM_BLK_2448:
23475 		case CDROM_BLK_2646:
23476 		case CDROM_BLK_2647:
23477 			break;
23478 		default:
23479 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23480 			    "sr_change_blkmode: "
23481 			    "Block Size '%ld' Not Supported\n", data);
23482 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23483 			return (EINVAL);
23484 		}
23485 
23486 		/*
23487 		 * The current block size matches the requested block size so
23488 		 * there is no need to send the mode select to change the size
23489 		 */
23490 		if (current_bsize == data) {
23491 			break;
23492 		}
23493 
23494 		/* Build the select data for the requested block size */
23495 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
23496 		select_mhp = (struct mode_header *)select;
23497 		select_desc =
23498 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
23499 		/*
23500 		 * The LBA size is changed via the block descriptor, so the
23501 		 * descriptor is built according to the user data
23502 		 */
23503 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
23504 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
23505 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
23506 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
23507 
23508 		/* Send the mode select for the requested block size */
23509 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
23510 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
23511 		    SD_PATH_STANDARD)) != 0) {
23512 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23513 			    "sr_change_blkmode: Mode Select Failed\n");
23514 			/*
23515 			 * The mode select failed for the requested block size,
23516 			 * so reset the data for the original block size and
23517 			 * send it to the target. The error is indicated by the
23518 			 * return value for the failed mode select.
23519 			 */
23520 			select_desc->blksize_hi  = sense_desc->blksize_hi;
23521 			select_desc->blksize_mid = sense_desc->blksize_mid;
23522 			select_desc->blksize_lo  = sense_desc->blksize_lo;
23523 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
23524 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
23525 			    SD_PATH_STANDARD);
23526 		} else {
23527 			ASSERT(!mutex_owned(SD_MUTEX(un)));
23528 			mutex_enter(SD_MUTEX(un));
23529 			sd_update_block_info(un, (uint32_t)data, 0);
23530 			mutex_exit(SD_MUTEX(un));
23531 		}
23532 		break;
23533 	default:
23534 		/* should not reach here, but check anyway */
23535 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23536 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
23537 		rval = EINVAL;
23538 		break;
23539 	}
23540 
23541 	if (select) {
23542 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
23543 	}
23544 	if (sense) {
23545 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23546 	}
23547 	return (rval);
23548 }
23549 
23550 
23551 /*
23552  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
23553  * implement driver support for getting and setting the CD speed. The command
23554  * set used will be based on the device type. If the device has not been
23555  * identified as MMC the Toshiba vendor specific mode page will be used. If
23556  * the device is MMC but does not support the Real Time Streaming feature
23557  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
23558  * be used to read the speed.
23559  */
23560 
23561 /*
23562  *    Function: sr_change_speed()
23563  *
23564  * Description: This routine is the driver entry point for handling CD-ROM
23565  *		drive speed ioctl requests for devices supporting the Toshiba
23566  *		vendor specific drive speed mode page. Support for returning
23567  *		and changing the current drive speed in use by the device is
23568  *		implemented.
23569  *
23570  *   Arguments: dev - the device 'dev_t'
23571  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
23572  *		      CDROMSDRVSPEED (set)
23573  *		data - current drive speed or requested drive speed
23574  *		flag - this argument is a pass through to ddi_copyxxx() directly
23575  *		       from the mode argument of ioctl().
23576  *
23577  * Return Code: the code returned by sd_send_scsi_cmd()
23578  *		EINVAL if invalid arguments are provided
23579  *		EFAULT if ddi_copyxxx() fails
23580  *		ENXIO if fail ddi_get_soft_state
23581  *		EIO if invalid mode sense block descriptor length
23582  */
23583 
23584 static int
23585 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
23586 {
23587 	struct sd_lun			*un = NULL;
23588 	struct mode_header		*sense_mhp, *select_mhp;
23589 	struct mode_speed		*sense_page, *select_page;
23590 	int				current_speed;
23591 	int				rval = EINVAL;
23592 	int				bd_len;
23593 	uchar_t				*sense = NULL;
23594 	uchar_t				*select = NULL;
23595 
23596 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
23597 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23598 		return (ENXIO);
23599 	}
23600 
23601 	/*
23602 	 * Note: The drive speed is being modified here according to a Toshiba
23603 	 * vendor specific mode page (0x31).
23604 	 */
23605 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
23606 
23607 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
23608 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
23609 	    SD_PATH_STANDARD)) != 0) {
23610 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23611 		    "sr_change_speed: Mode Sense Failed\n");
23612 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23613 		return (rval);
23614 	}
23615 	sense_mhp  = (struct mode_header *)sense;
23616 
23617 	/* Check the block descriptor len to handle only 1 block descriptor */
23618 	bd_len = sense_mhp->bdesc_length;
23619 	if (bd_len > MODE_BLK_DESC_LENGTH) {
23620 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23621 		    "sr_change_speed: Mode Sense returned invalid block "
23622 		    "descriptor length\n");
23623 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23624 		return (EIO);
23625 	}
23626 
23627 	sense_page = (struct mode_speed *)
23628 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
23629 	current_speed = sense_page->speed;
23630 
23631 	/* Process command */
23632 	switch (cmd) {
23633 	case CDROMGDRVSPEED:
23634 		/* Return the drive speed obtained during the mode sense */
23635 		if (current_speed == 0x2) {
23636 			current_speed = CDROM_TWELVE_SPEED;
23637 		}
23638 		if (ddi_copyout(&current_speed, (void *)data,
23639 		    sizeof (int), flag) != 0) {
23640 			rval = EFAULT;
23641 		}
23642 		break;
23643 	case CDROMSDRVSPEED:
23644 		/* Validate the requested drive speed */
23645 		switch ((uchar_t)data) {
23646 		case CDROM_TWELVE_SPEED:
23647 			data = 0x2;
23648 			/*FALLTHROUGH*/
23649 		case CDROM_NORMAL_SPEED:
23650 		case CDROM_DOUBLE_SPEED:
23651 		case CDROM_QUAD_SPEED:
23652 		case CDROM_MAXIMUM_SPEED:
23653 			break;
23654 		default:
23655 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23656 			    "sr_change_speed: "
23657 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
23658 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23659 			return (EINVAL);
23660 		}
23661 
23662 		/*
23663 		 * The current drive speed matches the requested drive speed so
23664 		 * there is no need to send the mode select to change the speed
23665 		 */
23666 		if (current_speed == data) {
23667 			break;
23668 		}
23669 
23670 		/* Build the select data for the requested drive speed */
23671 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
23672 		select_mhp = (struct mode_header *)select;
23673 		select_mhp->bdesc_length = 0;
23674 		select_page =
23675 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
23676 		select_page =
23677 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
23678 		select_page->mode_page.code = CDROM_MODE_SPEED;
23679 		select_page->mode_page.length = 2;
23680 		select_page->speed = (uchar_t)data;
23681 
23682 		/* Send the mode select for the requested block size */
23683 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
23684 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
23685 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
23686 			/*
23687 			 * The mode select failed for the requested drive speed,
23688 			 * so reset the data for the original drive speed and
23689 			 * send it to the target. The error is indicated by the
23690 			 * return value for the failed mode select.
23691 			 */
23692 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23693 			    "sr_drive_speed: Mode Select Failed\n");
23694 			select_page->speed = sense_page->speed;
23695 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
23696 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
23697 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
23698 		}
23699 		break;
23700 	default:
23701 		/* should not reach here, but check anyway */
23702 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23703 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
23704 		rval = EINVAL;
23705 		break;
23706 	}
23707 
23708 	if (select) {
23709 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
23710 	}
23711 	if (sense) {
23712 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23713 	}
23714 
23715 	return (rval);
23716 }
23717 
23718 
23719 /*
23720  *    Function: sr_atapi_change_speed()
23721  *
23722  * Description: This routine is the driver entry point for handling CD-ROM
23723  *		drive speed ioctl requests for MMC devices that do not support
23724  *		the Real Time Streaming feature (0x107).
23725  *
23726  *		Note: This routine will use the SET SPEED command which may not
23727  *		be supported by all devices.
23728  *
23729  *   Arguments: dev- the device 'dev_t'
23730  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
23731  *		     CDROMSDRVSPEED (set)
23732  *		data- current drive speed or requested drive speed
23733  *		flag- this argument is a pass through to ddi_copyxxx() directly
23734  *		      from the mode argument of ioctl().
23735  *
23736  * Return Code: the code returned by sd_send_scsi_cmd()
23737  *		EINVAL if invalid arguments are provided
23738  *		EFAULT if ddi_copyxxx() fails
23739  *		ENXIO if fail ddi_get_soft_state
23740  *		EIO if invalid mode sense block descriptor length
23741  */
23742 
23743 static int
23744 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
23745 {
23746 	struct sd_lun			*un;
23747 	struct uscsi_cmd		*com = NULL;
23748 	struct mode_header_grp2		*sense_mhp;
23749 	uchar_t				*sense_page;
23750 	uchar_t				*sense = NULL;
23751 	char				cdb[CDB_GROUP5];
23752 	int				bd_len;
23753 	int				current_speed = 0;
23754 	int				max_speed = 0;
23755 	int				rval;
23756 
23757 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
23758 
23759 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23760 		return (ENXIO);
23761 	}
23762 
23763 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
23764 
23765 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
23766 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
23767 	    SD_PATH_STANDARD)) != 0) {
23768 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23769 		    "sr_atapi_change_speed: Mode Sense Failed\n");
23770 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23771 		return (rval);
23772 	}
23773 
23774 	/* Check the block descriptor len to handle only 1 block descriptor */
23775 	sense_mhp = (struct mode_header_grp2 *)sense;
23776 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
23777 	if (bd_len > MODE_BLK_DESC_LENGTH) {
23778 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23779 		    "sr_atapi_change_speed: Mode Sense returned invalid "
23780 		    "block descriptor length\n");
23781 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23782 		return (EIO);
23783 	}
23784 
23785 	/* Calculate the current and maximum drive speeds */
23786 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
23787 	current_speed = (sense_page[14] << 8) | sense_page[15];
23788 	max_speed = (sense_page[8] << 8) | sense_page[9];
23789 
23790 	/* Process the command */
23791 	switch (cmd) {
23792 	case CDROMGDRVSPEED:
23793 		current_speed /= SD_SPEED_1X;
23794 		if (ddi_copyout(&current_speed, (void *)data,
23795 		    sizeof (int), flag) != 0)
23796 			rval = EFAULT;
23797 		break;
23798 	case CDROMSDRVSPEED:
23799 		/* Convert the speed code to KB/sec */
23800 		switch ((uchar_t)data) {
23801 		case CDROM_NORMAL_SPEED:
23802 			current_speed = SD_SPEED_1X;
23803 			break;
23804 		case CDROM_DOUBLE_SPEED:
23805 			current_speed = 2 * SD_SPEED_1X;
23806 			break;
23807 		case CDROM_QUAD_SPEED:
23808 			current_speed = 4 * SD_SPEED_1X;
23809 			break;
23810 		case CDROM_TWELVE_SPEED:
23811 			current_speed = 12 * SD_SPEED_1X;
23812 			break;
23813 		case CDROM_MAXIMUM_SPEED:
23814 			current_speed = 0xffff;
23815 			break;
23816 		default:
23817 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23818 			    "sr_atapi_change_speed: invalid drive speed %d\n",
23819 			    (uchar_t)data);
23820 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23821 			return (EINVAL);
23822 		}
23823 
23824 		/* Check the request against the drive's max speed. */
23825 		if (current_speed != 0xffff) {
23826 			if (current_speed > max_speed) {
23827 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23828 				return (EINVAL);
23829 			}
23830 		}
23831 
23832 		/*
23833 		 * Build and send the SET SPEED command
23834 		 *
23835 		 * Note: The SET SPEED (0xBB) command used in this routine is
23836 		 * obsolete per the SCSI MMC spec but still supported in the
23837 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
23838 		 * therefore the command is still implemented in this routine.
23839 		 */
23840 		bzero(cdb, sizeof (cdb));
23841 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
23842 		cdb[2] = (uchar_t)(current_speed >> 8);
23843 		cdb[3] = (uchar_t)current_speed;
23844 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
23845 		com->uscsi_cdb	   = (caddr_t)cdb;
23846 		com->uscsi_cdblen  = CDB_GROUP5;
23847 		com->uscsi_bufaddr = NULL;
23848 		com->uscsi_buflen  = 0;
23849 		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
23850 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, 0, SD_PATH_STANDARD);
23851 		break;
23852 	default:
23853 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23854 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
23855 		rval = EINVAL;
23856 	}
23857 
23858 	if (sense) {
23859 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23860 	}
23861 	if (com) {
23862 		kmem_free(com, sizeof (*com));
23863 	}
23864 	return (rval);
23865 }
23866 
23867 
23868 /*
23869  *    Function: sr_pause_resume()
23870  *
23871  * Description: This routine is the driver entry point for handling CD-ROM
23872  *		pause/resume ioctl requests. This only affects the audio play
23873  *		operation.
23874  *
23875  *   Arguments: dev - the device 'dev_t'
23876  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
23877  *		      for setting the resume bit of the cdb.
23878  *
23879  * Return Code: the code returned by sd_send_scsi_cmd()
23880  *		EINVAL if invalid mode specified
23881  *
23882  */
23883 
23884 static int
23885 sr_pause_resume(dev_t dev, int cmd)
23886 {
23887 	struct sd_lun		*un;
23888 	struct uscsi_cmd	*com;
23889 	char			cdb[CDB_GROUP1];
23890 	int			rval;
23891 
23892 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23893 		return (ENXIO);
23894 	}
23895 
23896 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
23897 	bzero(cdb, CDB_GROUP1);
23898 	cdb[0] = SCMD_PAUSE_RESUME;
23899 	switch (cmd) {
23900 	case CDROMRESUME:
23901 		cdb[8] = 1;
23902 		break;
23903 	case CDROMPAUSE:
23904 		cdb[8] = 0;
23905 		break;
23906 	default:
23907 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
23908 		    " Command '%x' Not Supported\n", cmd);
23909 		rval = EINVAL;
23910 		goto done;
23911 	}
23912 
23913 	com->uscsi_cdb    = cdb;
23914 	com->uscsi_cdblen = CDB_GROUP1;
23915 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
23916 
23917 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
23918 	    SD_PATH_STANDARD);
23919 
23920 done:
23921 	kmem_free(com, sizeof (*com));
23922 	return (rval);
23923 }
23924 
23925 
23926 /*
23927  *    Function: sr_play_msf()
23928  *
23929  * Description: This routine is the driver entry point for handling CD-ROM
23930  *		ioctl requests to output the audio signals at the specified
23931  *		starting address and continue the audio play until the specified
23932  *		ending address (CDROMPLAYMSF) The address is in Minute Second
23933  *		Frame (MSF) format.
23934  *
23935  *   Arguments: dev	- the device 'dev_t'
23936  *		data	- pointer to user provided audio msf structure,
23937  *		          specifying start/end addresses.
23938  *		flag	- this argument is a pass through to ddi_copyxxx()
23939  *		          directly from the mode argument of ioctl().
23940  *
23941  * Return Code: the code returned by sd_send_scsi_cmd()
23942  *		EFAULT if ddi_copyxxx() fails
23943  *		ENXIO if fail ddi_get_soft_state
23944  *		EINVAL if data pointer is NULL
23945  */
23946 
23947 static int
23948 sr_play_msf(dev_t dev, caddr_t data, int flag)
23949 {
23950 	struct sd_lun		*un;
23951 	struct uscsi_cmd	*com;
23952 	struct cdrom_msf	msf_struct;
23953 	struct cdrom_msf	*msf = &msf_struct;
23954 	char			cdb[CDB_GROUP1];
23955 	int			rval;
23956 
23957 	if (data == NULL) {
23958 		return (EINVAL);
23959 	}
23960 
23961 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23962 		return (ENXIO);
23963 	}
23964 
23965 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
23966 		return (EFAULT);
23967 	}
23968 
23969 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
23970 	bzero(cdb, CDB_GROUP1);
23971 	cdb[0] = SCMD_PLAYAUDIO_MSF;
23972 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
23973 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
23974 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
23975 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
23976 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
23977 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
23978 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
23979 	} else {
23980 		cdb[3] = msf->cdmsf_min0;
23981 		cdb[4] = msf->cdmsf_sec0;
23982 		cdb[5] = msf->cdmsf_frame0;
23983 		cdb[6] = msf->cdmsf_min1;
23984 		cdb[7] = msf->cdmsf_sec1;
23985 		cdb[8] = msf->cdmsf_frame1;
23986 	}
23987 	com->uscsi_cdb    = cdb;
23988 	com->uscsi_cdblen = CDB_GROUP1;
23989 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
23990 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
23991 	    SD_PATH_STANDARD);
23992 	kmem_free(com, sizeof (*com));
23993 	return (rval);
23994 }
23995 
23996 
23997 /*
23998  *    Function: sr_play_trkind()
23999  *
24000  * Description: This routine is the driver entry point for handling CD-ROM
24001  *		ioctl requests to output the audio signals at the specified
24002  *		starting address and continue the audio play until the specified
24003  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
24004  *		format.
24005  *
24006  *   Arguments: dev	- the device 'dev_t'
24007  *		data	- pointer to user provided audio track/index structure,
24008  *		          specifying start/end addresses.
24009  *		flag	- this argument is a pass through to ddi_copyxxx()
24010  *		          directly from the mode argument of ioctl().
24011  *
24012  * Return Code: the code returned by sd_send_scsi_cmd()
24013  *		EFAULT if ddi_copyxxx() fails
24014  *		ENXIO if fail ddi_get_soft_state
24015  *		EINVAL if data pointer is NULL
24016  */
24017 
24018 static int
24019 sr_play_trkind(dev_t dev, caddr_t data, int flag)
24020 {
24021 	struct cdrom_ti		ti_struct;
24022 	struct cdrom_ti		*ti = &ti_struct;
24023 	struct uscsi_cmd	*com = NULL;
24024 	char			cdb[CDB_GROUP1];
24025 	int			rval;
24026 
24027 	if (data == NULL) {
24028 		return (EINVAL);
24029 	}
24030 
24031 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
24032 		return (EFAULT);
24033 	}
24034 
24035 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24036 	bzero(cdb, CDB_GROUP1);
24037 	cdb[0] = SCMD_PLAYAUDIO_TI;
24038 	cdb[4] = ti->cdti_trk0;
24039 	cdb[5] = ti->cdti_ind0;
24040 	cdb[7] = ti->cdti_trk1;
24041 	cdb[8] = ti->cdti_ind1;
24042 	com->uscsi_cdb    = cdb;
24043 	com->uscsi_cdblen = CDB_GROUP1;
24044 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
24045 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24046 	    SD_PATH_STANDARD);
24047 	kmem_free(com, sizeof (*com));
24048 	return (rval);
24049 }
24050 
24051 
24052 /*
24053  *    Function: sr_read_all_subcodes()
24054  *
24055  * Description: This routine is the driver entry point for handling CD-ROM
24056  *		ioctl requests to return raw subcode data while the target is
24057  *		playing audio (CDROMSUBCODE).
24058  *
24059  *   Arguments: dev	- the device 'dev_t'
24060  *		data	- pointer to user provided cdrom subcode structure,
24061  *		          specifying the transfer length and address.
24062  *		flag	- this argument is a pass through to ddi_copyxxx()
24063  *		          directly from the mode argument of ioctl().
24064  *
24065  * Return Code: the code returned by sd_send_scsi_cmd()
24066  *		EFAULT if ddi_copyxxx() fails
24067  *		ENXIO if fail ddi_get_soft_state
24068  *		EINVAL if data pointer is NULL
24069  */
24070 
24071 static int
24072 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
24073 {
24074 	struct sd_lun		*un = NULL;
24075 	struct uscsi_cmd	*com = NULL;
24076 	struct cdrom_subcode	*subcode = NULL;
24077 	int			rval;
24078 	size_t			buflen;
24079 	char			cdb[CDB_GROUP5];
24080 
24081 #ifdef _MULTI_DATAMODEL
24082 	/* To support ILP32 applications in an LP64 world */
24083 	struct cdrom_subcode32		cdrom_subcode32;
24084 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
24085 #endif
24086 	if (data == NULL) {
24087 		return (EINVAL);
24088 	}
24089 
24090 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24091 		return (ENXIO);
24092 	}
24093 
24094 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
24095 
24096 #ifdef _MULTI_DATAMODEL
24097 	switch (ddi_model_convert_from(flag & FMODELS)) {
24098 	case DDI_MODEL_ILP32:
24099 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
24100 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24101 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
24102 			kmem_free(subcode, sizeof (struct cdrom_subcode));
24103 			return (EFAULT);
24104 		}
24105 		/* Convert the ILP32 uscsi data from the application to LP64 */
24106 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
24107 		break;
24108 	case DDI_MODEL_NONE:
24109 		if (ddi_copyin(data, subcode,
24110 		    sizeof (struct cdrom_subcode), flag)) {
24111 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24112 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
24113 			kmem_free(subcode, sizeof (struct cdrom_subcode));
24114 			return (EFAULT);
24115 		}
24116 		break;
24117 	}
24118 #else /* ! _MULTI_DATAMODEL */
24119 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
24120 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24121 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
24122 		kmem_free(subcode, sizeof (struct cdrom_subcode));
24123 		return (EFAULT);
24124 	}
24125 #endif /* _MULTI_DATAMODEL */
24126 
24127 	/*
24128 	 * Since MMC-2 expects max 3 bytes for length, check if the
24129 	 * length input is greater than 3 bytes
24130 	 */
24131 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
24132 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24133 		    "sr_read_all_subcodes: "
24134 		    "cdrom transfer length too large: %d (limit %d)\n",
24135 		    subcode->cdsc_length, 0xFFFFFF);
24136 		kmem_free(subcode, sizeof (struct cdrom_subcode));
24137 		return (EINVAL);
24138 	}
24139 
24140 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
24141 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24142 	bzero(cdb, CDB_GROUP5);
24143 
24144 	if (un->un_f_mmc_cap == TRUE) {
24145 		cdb[0] = (char)SCMD_READ_CD;
24146 		cdb[2] = (char)0xff;
24147 		cdb[3] = (char)0xff;
24148 		cdb[4] = (char)0xff;
24149 		cdb[5] = (char)0xff;
24150 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
24151 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
24152 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
24153 		cdb[10] = 1;
24154 	} else {
24155 		/*
24156 		 * Note: A vendor specific command (0xDF) is being used her to
24157 		 * request a read of all subcodes.
24158 		 */
24159 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
24160 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
24161 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
24162 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
24163 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
24164 	}
24165 	com->uscsi_cdb	   = cdb;
24166 	com->uscsi_cdblen  = CDB_GROUP5;
24167 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
24168 	com->uscsi_buflen  = buflen;
24169 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24170 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
24171 	    SD_PATH_STANDARD);
24172 	kmem_free(subcode, sizeof (struct cdrom_subcode));
24173 	kmem_free(com, sizeof (*com));
24174 	return (rval);
24175 }
24176 
24177 
24178 /*
24179  *    Function: sr_read_subchannel()
24180  *
24181  * Description: This routine is the driver entry point for handling CD-ROM
24182  *		ioctl requests to return the Q sub-channel data of the CD
24183  *		current position block. (CDROMSUBCHNL) The data includes the
24184  *		track number, index number, absolute CD-ROM address (LBA or MSF
24185  *		format per the user) , track relative CD-ROM address (LBA or MSF
24186  *		format per the user), control data and audio status.
24187  *
24188  *   Arguments: dev	- the device 'dev_t'
24189  *		data	- pointer to user provided cdrom sub-channel structure
24190  *		flag	- this argument is a pass through to ddi_copyxxx()
24191  *		          directly from the mode argument of ioctl().
24192  *
24193  * Return Code: the code returned by sd_send_scsi_cmd()
24194  *		EFAULT if ddi_copyxxx() fails
24195  *		ENXIO if fail ddi_get_soft_state
24196  *		EINVAL if data pointer is NULL
24197  */
24198 
24199 static int
24200 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
24201 {
24202 	struct sd_lun		*un;
24203 	struct uscsi_cmd	*com;
24204 	struct cdrom_subchnl	subchanel;
24205 	struct cdrom_subchnl	*subchnl = &subchanel;
24206 	char			cdb[CDB_GROUP1];
24207 	caddr_t			buffer;
24208 	int			rval;
24209 
24210 	if (data == NULL) {
24211 		return (EINVAL);
24212 	}
24213 
24214 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24215 	    (un->un_state == SD_STATE_OFFLINE)) {
24216 		return (ENXIO);
24217 	}
24218 
24219 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
24220 		return (EFAULT);
24221 	}
24222 
24223 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
24224 	bzero(cdb, CDB_GROUP1);
24225 	cdb[0] = SCMD_READ_SUBCHANNEL;
24226 	/* Set the MSF bit based on the user requested address format */
24227 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
24228 	/*
24229 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
24230 	 * returned
24231 	 */
24232 	cdb[2] = 0x40;
24233 	/*
24234 	 * Set byte 3 to specify the return data format. A value of 0x01
24235 	 * indicates that the CD-ROM current position should be returned.
24236 	 */
24237 	cdb[3] = 0x01;
24238 	cdb[8] = 0x10;
24239 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24240 	com->uscsi_cdb	   = cdb;
24241 	com->uscsi_cdblen  = CDB_GROUP1;
24242 	com->uscsi_bufaddr = buffer;
24243 	com->uscsi_buflen  = 16;
24244 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24245 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24246 	    SD_PATH_STANDARD);
24247 	if (rval != 0) {
24248 		kmem_free(buffer, 16);
24249 		kmem_free(com, sizeof (*com));
24250 		return (rval);
24251 	}
24252 
24253 	/* Process the returned Q sub-channel data */
24254 	subchnl->cdsc_audiostatus = buffer[1];
24255 	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
24256 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
24257 	subchnl->cdsc_trk	= buffer[6];
24258 	subchnl->cdsc_ind	= buffer[7];
24259 	if (subchnl->cdsc_format & CDROM_LBA) {
24260 		subchnl->cdsc_absaddr.lba =
24261 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
24262 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
24263 		subchnl->cdsc_reladdr.lba =
24264 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
24265 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
24266 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
24267 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
24268 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
24269 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
24270 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
24271 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
24272 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
24273 	} else {
24274 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
24275 		subchnl->cdsc_absaddr.msf.second = buffer[10];
24276 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
24277 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
24278 		subchnl->cdsc_reladdr.msf.second = buffer[14];
24279 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
24280 	}
24281 	kmem_free(buffer, 16);
24282 	kmem_free(com, sizeof (*com));
24283 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
24284 	    != 0) {
24285 		return (EFAULT);
24286 	}
24287 	return (rval);
24288 }
24289 
24290 
24291 /*
24292  *    Function: sr_read_tocentry()
24293  *
24294  * Description: This routine is the driver entry point for handling CD-ROM
24295  *		ioctl requests to read from the Table of Contents (TOC)
24296  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
24297  *		fields, the starting address (LBA or MSF format per the user)
24298  *		and the data mode if the user specified track is a data track.
24299  *
24300  *		Note: The READ HEADER (0x44) command used in this routine is
24301  *		obsolete per the SCSI MMC spec but still supported in the
24302  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
24303  *		therefore the command is still implemented in this routine.
24304  *
24305  *   Arguments: dev	- the device 'dev_t'
24306  *		data	- pointer to user provided toc entry structure,
24307  *			  specifying the track # and the address format
24308  *			  (LBA or MSF).
24309  *		flag	- this argument is a pass through to ddi_copyxxx()
24310  *		          directly from the mode argument of ioctl().
24311  *
24312  * Return Code: the code returned by sd_send_scsi_cmd()
24313  *		EFAULT if ddi_copyxxx() fails
24314  *		ENXIO if fail ddi_get_soft_state
24315  *		EINVAL if data pointer is NULL
24316  */
24317 
24318 static int
24319 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
24320 {
24321 	struct sd_lun		*un = NULL;
24322 	struct uscsi_cmd	*com;
24323 	struct cdrom_tocentry	toc_entry;
24324 	struct cdrom_tocentry	*entry = &toc_entry;
24325 	caddr_t			buffer;
24326 	int			rval;
24327 	char			cdb[CDB_GROUP1];
24328 
24329 	if (data == NULL) {
24330 		return (EINVAL);
24331 	}
24332 
24333 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24334 	    (un->un_state == SD_STATE_OFFLINE)) {
24335 		return (ENXIO);
24336 	}
24337 
24338 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
24339 		return (EFAULT);
24340 	}
24341 
24342 	/* Validate the requested track and address format */
24343 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
24344 		return (EINVAL);
24345 	}
24346 
24347 	if (entry->cdte_track == 0) {
24348 		return (EINVAL);
24349 	}
24350 
24351 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
24352 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24353 	bzero(cdb, CDB_GROUP1);
24354 
24355 	cdb[0] = SCMD_READ_TOC;
24356 	/* Set the MSF bit based on the user requested address format  */
24357 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
24358 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
24359 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
24360 	} else {
24361 		cdb[6] = entry->cdte_track;
24362 	}
24363 
24364 	/*
24365 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
24366 	 * (4 byte TOC response header + 8 byte track descriptor)
24367 	 */
24368 	cdb[8] = 12;
24369 	com->uscsi_cdb	   = cdb;
24370 	com->uscsi_cdblen  = CDB_GROUP1;
24371 	com->uscsi_bufaddr = buffer;
24372 	com->uscsi_buflen  = 0x0C;
24373 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
24374 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24375 	    SD_PATH_STANDARD);
24376 	if (rval != 0) {
24377 		kmem_free(buffer, 12);
24378 		kmem_free(com, sizeof (*com));
24379 		return (rval);
24380 	}
24381 
24382 	/* Process the toc entry */
24383 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
24384 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
24385 	if (entry->cdte_format & CDROM_LBA) {
24386 		entry->cdte_addr.lba =
24387 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
24388 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
24389 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
24390 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
24391 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
24392 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
24393 		/*
24394 		 * Send a READ TOC command using the LBA address format to get
24395 		 * the LBA for the track requested so it can be used in the
24396 		 * READ HEADER request
24397 		 *
24398 		 * Note: The MSF bit of the READ HEADER command specifies the
24399 		 * output format. The block address specified in that command
24400 		 * must be in LBA format.
24401 		 */
24402 		cdb[1] = 0;
24403 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24404 		    SD_PATH_STANDARD);
24405 		if (rval != 0) {
24406 			kmem_free(buffer, 12);
24407 			kmem_free(com, sizeof (*com));
24408 			return (rval);
24409 		}
24410 	} else {
24411 		entry->cdte_addr.msf.minute	= buffer[9];
24412 		entry->cdte_addr.msf.second	= buffer[10];
24413 		entry->cdte_addr.msf.frame	= buffer[11];
24414 		/*
24415 		 * Send a READ TOC command using the LBA address format to get
24416 		 * the LBA for the track requested so it can be used in the
24417 		 * READ HEADER request
24418 		 *
24419 		 * Note: The MSF bit of the READ HEADER command specifies the
24420 		 * output format. The block address specified in that command
24421 		 * must be in LBA format.
24422 		 */
24423 		cdb[1] = 0;
24424 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24425 		    SD_PATH_STANDARD);
24426 		if (rval != 0) {
24427 			kmem_free(buffer, 12);
24428 			kmem_free(com, sizeof (*com));
24429 			return (rval);
24430 		}
24431 	}
24432 
24433 	/*
24434 	 * Build and send the READ HEADER command to determine the data mode of
24435 	 * the user specified track.
24436 	 */
24437 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
24438 	    (entry->cdte_track != CDROM_LEADOUT)) {
24439 		bzero(cdb, CDB_GROUP1);
24440 		cdb[0] = SCMD_READ_HEADER;
24441 		cdb[2] = buffer[8];
24442 		cdb[3] = buffer[9];
24443 		cdb[4] = buffer[10];
24444 		cdb[5] = buffer[11];
24445 		cdb[8] = 0x08;
24446 		com->uscsi_buflen = 0x08;
24447 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24448 		    SD_PATH_STANDARD);
24449 		if (rval == 0) {
24450 			entry->cdte_datamode = buffer[0];
24451 		} else {
24452 			/*
24453 			 * READ HEADER command failed, since this is
24454 			 * obsoleted in one spec, its better to return
24455 			 * -1 for an invlid track so that we can still
24456 			 * recieve the rest of the TOC data.
24457 			 */
24458 			entry->cdte_datamode = (uchar_t)-1;
24459 		}
24460 	} else {
24461 		entry->cdte_datamode = (uchar_t)-1;
24462 	}
24463 
24464 	kmem_free(buffer, 12);
24465 	kmem_free(com, sizeof (*com));
24466 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
24467 		return (EFAULT);
24468 
24469 	return (rval);
24470 }
24471 
24472 
24473 /*
24474  *    Function: sr_read_tochdr()
24475  *
24476  * Description: This routine is the driver entry point for handling CD-ROM
24477  * 		ioctl requests to read the Table of Contents (TOC) header
24478  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
24479  *		and ending track numbers
24480  *
24481  *   Arguments: dev	- the device 'dev_t'
24482  *		data	- pointer to user provided toc header structure,
24483  *			  specifying the starting and ending track numbers.
24484  *		flag	- this argument is a pass through to ddi_copyxxx()
24485  *			  directly from the mode argument of ioctl().
24486  *
24487  * Return Code: the code returned by sd_send_scsi_cmd()
24488  *		EFAULT if ddi_copyxxx() fails
24489  *		ENXIO if fail ddi_get_soft_state
24490  *		EINVAL if data pointer is NULL
24491  */
24492 
24493 static int
24494 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
24495 {
24496 	struct sd_lun		*un;
24497 	struct uscsi_cmd	*com;
24498 	struct cdrom_tochdr	toc_header;
24499 	struct cdrom_tochdr	*hdr = &toc_header;
24500 	char			cdb[CDB_GROUP1];
24501 	int			rval;
24502 	caddr_t			buffer;
24503 
24504 	if (data == NULL) {
24505 		return (EINVAL);
24506 	}
24507 
24508 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24509 	    (un->un_state == SD_STATE_OFFLINE)) {
24510 		return (ENXIO);
24511 	}
24512 
24513 	buffer = kmem_zalloc(4, KM_SLEEP);
24514 	bzero(cdb, CDB_GROUP1);
24515 	cdb[0] = SCMD_READ_TOC;
24516 	/*
24517 	 * Specifying a track number of 0x00 in the READ TOC command indicates
24518 	 * that the TOC header should be returned
24519 	 */
24520 	cdb[6] = 0x00;
24521 	/*
24522 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
24523 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
24524 	 */
24525 	cdb[8] = 0x04;
24526 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24527 	com->uscsi_cdb	   = cdb;
24528 	com->uscsi_cdblen  = CDB_GROUP1;
24529 	com->uscsi_bufaddr = buffer;
24530 	com->uscsi_buflen  = 0x04;
24531 	com->uscsi_timeout = 300;
24532 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24533 
24534 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24535 	    SD_PATH_STANDARD);
24536 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
24537 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
24538 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
24539 	} else {
24540 		hdr->cdth_trk0 = buffer[2];
24541 		hdr->cdth_trk1 = buffer[3];
24542 	}
24543 	kmem_free(buffer, 4);
24544 	kmem_free(com, sizeof (*com));
24545 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
24546 		return (EFAULT);
24547 	}
24548 	return (rval);
24549 }
24550 
24551 
24552 /*
24553  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
24554  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
24555  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
24556  * digital audio and extended architecture digital audio. These modes are
24557  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
24558  * MMC specs.
24559  *
24560  * In addition to support for the various data formats these routines also
24561  * include support for devices that implement only the direct access READ
24562  * commands (0x08, 0x28), devices that implement the READ_CD commands
24563  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
24564  * READ CDXA commands (0xD8, 0xDB)
24565  */
24566 
24567 /*
24568  *    Function: sr_read_mode1()
24569  *
24570  * Description: This routine is the driver entry point for handling CD-ROM
24571  *		ioctl read mode1 requests (CDROMREADMODE1).
24572  *
24573  *   Arguments: dev	- the device 'dev_t'
24574  *		data	- pointer to user provided cd read structure specifying
24575  *			  the lba buffer address and length.
24576  *		flag	- this argument is a pass through to ddi_copyxxx()
24577  *			  directly from the mode argument of ioctl().
24578  *
24579  * Return Code: the code returned by sd_send_scsi_cmd()
24580  *		EFAULT if ddi_copyxxx() fails
24581  *		ENXIO if fail ddi_get_soft_state
24582  *		EINVAL if data pointer is NULL
24583  */
24584 
24585 static int
24586 sr_read_mode1(dev_t dev, caddr_t data, int flag)
24587 {
24588 	struct sd_lun		*un;
24589 	struct cdrom_read	mode1_struct;
24590 	struct cdrom_read	*mode1 = &mode1_struct;
24591 	int			rval;
24592 #ifdef _MULTI_DATAMODEL
24593 	/* To support ILP32 applications in an LP64 world */
24594 	struct cdrom_read32	cdrom_read32;
24595 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
24596 #endif /* _MULTI_DATAMODEL */
24597 
24598 	if (data == NULL) {
24599 		return (EINVAL);
24600 	}
24601 
24602 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24603 	    (un->un_state == SD_STATE_OFFLINE)) {
24604 		return (ENXIO);
24605 	}
24606 
24607 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24608 	    "sd_read_mode1: entry: un:0x%p\n", un);
24609 
24610 #ifdef _MULTI_DATAMODEL
24611 	switch (ddi_model_convert_from(flag & FMODELS)) {
24612 	case DDI_MODEL_ILP32:
24613 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
24614 			return (EFAULT);
24615 		}
24616 		/* Convert the ILP32 uscsi data from the application to LP64 */
24617 		cdrom_read32tocdrom_read(cdrd32, mode1);
24618 		break;
24619 	case DDI_MODEL_NONE:
24620 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
24621 			return (EFAULT);
24622 		}
24623 	}
24624 #else /* ! _MULTI_DATAMODEL */
24625 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
24626 		return (EFAULT);
24627 	}
24628 #endif /* _MULTI_DATAMODEL */
24629 
24630 	rval = sd_send_scsi_READ(un, mode1->cdread_bufaddr,
24631 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
24632 
24633 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24634 	    "sd_read_mode1: exit: un:0x%p\n", un);
24635 
24636 	return (rval);
24637 }
24638 
24639 
24640 /*
24641  *    Function: sr_read_cd_mode2()
24642  *
24643  * Description: This routine is the driver entry point for handling CD-ROM
24644  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
24645  *		support the READ CD (0xBE) command or the 1st generation
24646  *		READ CD (0xD4) command.
24647  *
24648  *   Arguments: dev	- the device 'dev_t'
24649  *		data	- pointer to user provided cd read structure specifying
24650  *			  the lba buffer address and length.
24651  *		flag	- this argument is a pass through to ddi_copyxxx()
24652  *			  directly from the mode argument of ioctl().
24653  *
24654  * Return Code: the code returned by sd_send_scsi_cmd()
24655  *		EFAULT if ddi_copyxxx() fails
24656  *		ENXIO if fail ddi_get_soft_state
24657  *		EINVAL if data pointer is NULL
24658  */
24659 
24660 static int
24661 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
24662 {
24663 	struct sd_lun		*un;
24664 	struct uscsi_cmd	*com;
24665 	struct cdrom_read	mode2_struct;
24666 	struct cdrom_read	*mode2 = &mode2_struct;
24667 	uchar_t			cdb[CDB_GROUP5];
24668 	int			nblocks;
24669 	int			rval;
24670 #ifdef _MULTI_DATAMODEL
24671 	/*  To support ILP32 applications in an LP64 world */
24672 	struct cdrom_read32	cdrom_read32;
24673 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
24674 #endif /* _MULTI_DATAMODEL */
24675 
24676 	if (data == NULL) {
24677 		return (EINVAL);
24678 	}
24679 
24680 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24681 	    (un->un_state == SD_STATE_OFFLINE)) {
24682 		return (ENXIO);
24683 	}
24684 
24685 #ifdef _MULTI_DATAMODEL
24686 	switch (ddi_model_convert_from(flag & FMODELS)) {
24687 	case DDI_MODEL_ILP32:
24688 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
24689 			return (EFAULT);
24690 		}
24691 		/* Convert the ILP32 uscsi data from the application to LP64 */
24692 		cdrom_read32tocdrom_read(cdrd32, mode2);
24693 		break;
24694 	case DDI_MODEL_NONE:
24695 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
24696 			return (EFAULT);
24697 		}
24698 		break;
24699 	}
24700 
24701 #else /* ! _MULTI_DATAMODEL */
24702 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
24703 		return (EFAULT);
24704 	}
24705 #endif /* _MULTI_DATAMODEL */
24706 
24707 	bzero(cdb, sizeof (cdb));
24708 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
24709 		/* Read command supported by 1st generation atapi drives */
24710 		cdb[0] = SCMD_READ_CDD4;
24711 	} else {
24712 		/* Universal CD Access Command */
24713 		cdb[0] = SCMD_READ_CD;
24714 	}
24715 
24716 	/*
24717 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
24718 	 */
24719 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
24720 
24721 	/* set the start address */
24722 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
24723 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
24724 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
24725 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
24726 
24727 	/* set the transfer length */
24728 	nblocks = mode2->cdread_buflen / 2336;
24729 	cdb[6] = (uchar_t)(nblocks >> 16);
24730 	cdb[7] = (uchar_t)(nblocks >> 8);
24731 	cdb[8] = (uchar_t)nblocks;
24732 
24733 	/* set the filter bits */
24734 	cdb[9] = CDROM_READ_CD_USERDATA;
24735 
24736 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24737 	com->uscsi_cdb = (caddr_t)cdb;
24738 	com->uscsi_cdblen = sizeof (cdb);
24739 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
24740 	com->uscsi_buflen = mode2->cdread_buflen;
24741 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24742 
24743 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
24744 	    SD_PATH_STANDARD);
24745 	kmem_free(com, sizeof (*com));
24746 	return (rval);
24747 }
24748 
24749 
24750 /*
24751  *    Function: sr_read_mode2()
24752  *
24753  * Description: This routine is the driver entry point for handling CD-ROM
24754  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
24755  *		do not support the READ CD (0xBE) command.
24756  *
24757  *   Arguments: dev	- the device 'dev_t'
24758  *		data	- pointer to user provided cd read structure specifying
24759  *			  the lba buffer address and length.
24760  *		flag	- this argument is a pass through to ddi_copyxxx()
24761  *			  directly from the mode argument of ioctl().
24762  *
24763  * Return Code: the code returned by sd_send_scsi_cmd()
24764  *		EFAULT if ddi_copyxxx() fails
24765  *		ENXIO if fail ddi_get_soft_state
24766  *		EINVAL if data pointer is NULL
24767  *		EIO if fail to reset block size
24768  *		EAGAIN if commands are in progress in the driver
24769  */
24770 
24771 static int
24772 sr_read_mode2(dev_t dev, caddr_t data, int flag)
24773 {
24774 	struct sd_lun		*un;
24775 	struct cdrom_read	mode2_struct;
24776 	struct cdrom_read	*mode2 = &mode2_struct;
24777 	int			rval;
24778 	uint32_t		restore_blksize;
24779 	struct uscsi_cmd	*com;
24780 	uchar_t			cdb[CDB_GROUP0];
24781 	int			nblocks;
24782 
24783 #ifdef _MULTI_DATAMODEL
24784 	/* To support ILP32 applications in an LP64 world */
24785 	struct cdrom_read32	cdrom_read32;
24786 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
24787 #endif /* _MULTI_DATAMODEL */
24788 
24789 	if (data == NULL) {
24790 		return (EINVAL);
24791 	}
24792 
24793 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24794 	    (un->un_state == SD_STATE_OFFLINE)) {
24795 		return (ENXIO);
24796 	}
24797 
24798 	/*
24799 	 * Because this routine will update the device and driver block size
24800 	 * being used we want to make sure there are no commands in progress.
24801 	 * If commands are in progress the user will have to try again.
24802 	 *
24803 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
24804 	 * in sdioctl to protect commands from sdioctl through to the top of
24805 	 * sd_uscsi_strategy. See sdioctl for details.
24806 	 */
24807 	mutex_enter(SD_MUTEX(un));
24808 	if (un->un_ncmds_in_driver != 1) {
24809 		mutex_exit(SD_MUTEX(un));
24810 		return (EAGAIN);
24811 	}
24812 	mutex_exit(SD_MUTEX(un));
24813 
24814 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24815 	    "sd_read_mode2: entry: un:0x%p\n", un);
24816 
24817 #ifdef _MULTI_DATAMODEL
24818 	switch (ddi_model_convert_from(flag & FMODELS)) {
24819 	case DDI_MODEL_ILP32:
24820 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
24821 			return (EFAULT);
24822 		}
24823 		/* Convert the ILP32 uscsi data from the application to LP64 */
24824 		cdrom_read32tocdrom_read(cdrd32, mode2);
24825 		break;
24826 	case DDI_MODEL_NONE:
24827 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
24828 			return (EFAULT);
24829 		}
24830 		break;
24831 	}
24832 #else /* ! _MULTI_DATAMODEL */
24833 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
24834 		return (EFAULT);
24835 	}
24836 #endif /* _MULTI_DATAMODEL */
24837 
24838 	/* Store the current target block size for restoration later */
24839 	restore_blksize = un->un_tgt_blocksize;
24840 
24841 	/* Change the device and soft state target block size to 2336 */
24842 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
24843 		rval = EIO;
24844 		goto done;
24845 	}
24846 
24847 
24848 	bzero(cdb, sizeof (cdb));
24849 
24850 	/* set READ operation */
24851 	cdb[0] = SCMD_READ;
24852 
24853 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
24854 	mode2->cdread_lba >>= 2;
24855 
24856 	/* set the start address */
24857 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
24858 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
24859 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
24860 
24861 	/* set the transfer length */
24862 	nblocks = mode2->cdread_buflen / 2336;
24863 	cdb[4] = (uchar_t)nblocks & 0xFF;
24864 
24865 	/* build command */
24866 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24867 	com->uscsi_cdb = (caddr_t)cdb;
24868 	com->uscsi_cdblen = sizeof (cdb);
24869 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
24870 	com->uscsi_buflen = mode2->cdread_buflen;
24871 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24872 
24873 	/*
24874 	 * Issue SCSI command with user space address for read buffer.
24875 	 *
24876 	 * This sends the command through main channel in the driver.
24877 	 *
24878 	 * Since this is accessed via an IOCTL call, we go through the
24879 	 * standard path, so that if the device was powered down, then
24880 	 * it would be 'awakened' to handle the command.
24881 	 */
24882 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
24883 	    SD_PATH_STANDARD);
24884 
24885 	kmem_free(com, sizeof (*com));
24886 
24887 	/* Restore the device and soft state target block size */
24888 	if (sr_sector_mode(dev, restore_blksize) != 0) {
24889 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24890 		    "can't do switch back to mode 1\n");
24891 		/*
24892 		 * If sd_send_scsi_READ succeeded we still need to report
24893 		 * an error because we failed to reset the block size
24894 		 */
24895 		if (rval == 0) {
24896 			rval = EIO;
24897 		}
24898 	}
24899 
24900 done:
24901 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24902 	    "sd_read_mode2: exit: un:0x%p\n", un);
24903 
24904 	return (rval);
24905 }
24906 
24907 
24908 /*
24909  *    Function: sr_sector_mode()
24910  *
24911  * Description: This utility function is used by sr_read_mode2 to set the target
24912  *		block size based on the user specified size. This is a legacy
24913  *		implementation based upon a vendor specific mode page
24914  *
24915  *   Arguments: dev	- the device 'dev_t'
24916  *		data	- flag indicating if block size is being set to 2336 or
24917  *			  512.
24918  *
24919  * Return Code: the code returned by sd_send_scsi_cmd()
24920  *		EFAULT if ddi_copyxxx() fails
24921  *		ENXIO if fail ddi_get_soft_state
24922  *		EINVAL if data pointer is NULL
24923  */
24924 
24925 static int
24926 sr_sector_mode(dev_t dev, uint32_t blksize)
24927 {
24928 	struct sd_lun	*un;
24929 	uchar_t		*sense;
24930 	uchar_t		*select;
24931 	int		rval;
24932 
24933 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24934 	    (un->un_state == SD_STATE_OFFLINE)) {
24935 		return (ENXIO);
24936 	}
24937 
24938 	sense = kmem_zalloc(20, KM_SLEEP);
24939 
24940 	/* Note: This is a vendor specific mode page (0x81) */
24941 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, 20, 0x81,
24942 	    SD_PATH_STANDARD)) != 0) {
24943 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
24944 		    "sr_sector_mode: Mode Sense failed\n");
24945 		kmem_free(sense, 20);
24946 		return (rval);
24947 	}
24948 	select = kmem_zalloc(20, KM_SLEEP);
24949 	select[3] = 0x08;
24950 	select[10] = ((blksize >> 8) & 0xff);
24951 	select[11] = (blksize & 0xff);
24952 	select[12] = 0x01;
24953 	select[13] = 0x06;
24954 	select[14] = sense[14];
24955 	select[15] = sense[15];
24956 	if (blksize == SD_MODE2_BLKSIZE) {
24957 		select[14] |= 0x01;
24958 	}
24959 
24960 	if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select, 20,
24961 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
24962 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
24963 		    "sr_sector_mode: Mode Select failed\n");
24964 	} else {
24965 		/*
24966 		 * Only update the softstate block size if we successfully
24967 		 * changed the device block mode.
24968 		 */
24969 		mutex_enter(SD_MUTEX(un));
24970 		sd_update_block_info(un, blksize, 0);
24971 		mutex_exit(SD_MUTEX(un));
24972 	}
24973 	kmem_free(sense, 20);
24974 	kmem_free(select, 20);
24975 	return (rval);
24976 }
24977 
24978 
24979 /*
24980  *    Function: sr_read_cdda()
24981  *
24982  * Description: This routine is the driver entry point for handling CD-ROM
24983  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
24984  *		the target supports CDDA these requests are handled via a vendor
24985  *		specific command (0xD8) If the target does not support CDDA
24986  *		these requests are handled via the READ CD command (0xBE).
24987  *
24988  *   Arguments: dev	- the device 'dev_t'
24989  *		data	- pointer to user provided CD-DA structure specifying
24990  *			  the track starting address, transfer length, and
24991  *			  subcode options.
24992  *		flag	- this argument is a pass through to ddi_copyxxx()
24993  *			  directly from the mode argument of ioctl().
24994  *
24995  * Return Code: the code returned by sd_send_scsi_cmd()
24996  *		EFAULT if ddi_copyxxx() fails
24997  *		ENXIO if fail ddi_get_soft_state
24998  *		EINVAL if invalid arguments are provided
24999  *		ENOTTY
25000  */
25001 
25002 static int
25003 sr_read_cdda(dev_t dev, caddr_t data, int flag)
25004 {
25005 	struct sd_lun			*un;
25006 	struct uscsi_cmd		*com;
25007 	struct cdrom_cdda		*cdda;
25008 	int				rval;
25009 	size_t				buflen;
25010 	char				cdb[CDB_GROUP5];
25011 
25012 #ifdef _MULTI_DATAMODEL
25013 	/* To support ILP32 applications in an LP64 world */
25014 	struct cdrom_cdda32	cdrom_cdda32;
25015 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
25016 #endif /* _MULTI_DATAMODEL */
25017 
25018 	if (data == NULL) {
25019 		return (EINVAL);
25020 	}
25021 
25022 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25023 		return (ENXIO);
25024 	}
25025 
25026 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
25027 
25028 #ifdef _MULTI_DATAMODEL
25029 	switch (ddi_model_convert_from(flag & FMODELS)) {
25030 	case DDI_MODEL_ILP32:
25031 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
25032 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25033 			    "sr_read_cdda: ddi_copyin Failed\n");
25034 			kmem_free(cdda, sizeof (struct cdrom_cdda));
25035 			return (EFAULT);
25036 		}
25037 		/* Convert the ILP32 uscsi data from the application to LP64 */
25038 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
25039 		break;
25040 	case DDI_MODEL_NONE:
25041 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
25042 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25043 			    "sr_read_cdda: ddi_copyin Failed\n");
25044 			kmem_free(cdda, sizeof (struct cdrom_cdda));
25045 			return (EFAULT);
25046 		}
25047 		break;
25048 	}
25049 #else /* ! _MULTI_DATAMODEL */
25050 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
25051 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25052 		    "sr_read_cdda: ddi_copyin Failed\n");
25053 		kmem_free(cdda, sizeof (struct cdrom_cdda));
25054 		return (EFAULT);
25055 	}
25056 #endif /* _MULTI_DATAMODEL */
25057 
25058 	/*
25059 	 * Since MMC-2 expects max 3 bytes for length, check if the
25060 	 * length input is greater than 3 bytes
25061 	 */
25062 	if ((cdda->cdda_length & 0xFF000000) != 0) {
25063 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
25064 		    "cdrom transfer length too large: %d (limit %d)\n",
25065 		    cdda->cdda_length, 0xFFFFFF);
25066 		kmem_free(cdda, sizeof (struct cdrom_cdda));
25067 		return (EINVAL);
25068 	}
25069 
25070 	switch (cdda->cdda_subcode) {
25071 	case CDROM_DA_NO_SUBCODE:
25072 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
25073 		break;
25074 	case CDROM_DA_SUBQ:
25075 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
25076 		break;
25077 	case CDROM_DA_ALL_SUBCODE:
25078 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
25079 		break;
25080 	case CDROM_DA_SUBCODE_ONLY:
25081 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
25082 		break;
25083 	default:
25084 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25085 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
25086 		    cdda->cdda_subcode);
25087 		kmem_free(cdda, sizeof (struct cdrom_cdda));
25088 		return (EINVAL);
25089 	}
25090 
25091 	/* Build and send the command */
25092 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25093 	bzero(cdb, CDB_GROUP5);
25094 
25095 	if (un->un_f_cfg_cdda == TRUE) {
25096 		cdb[0] = (char)SCMD_READ_CD;
25097 		cdb[1] = 0x04;
25098 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
25099 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
25100 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
25101 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
25102 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
25103 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
25104 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
25105 		cdb[9] = 0x10;
25106 		switch (cdda->cdda_subcode) {
25107 		case CDROM_DA_NO_SUBCODE :
25108 			cdb[10] = 0x0;
25109 			break;
25110 		case CDROM_DA_SUBQ :
25111 			cdb[10] = 0x2;
25112 			break;
25113 		case CDROM_DA_ALL_SUBCODE :
25114 			cdb[10] = 0x1;
25115 			break;
25116 		case CDROM_DA_SUBCODE_ONLY :
25117 			/* FALLTHROUGH */
25118 		default :
25119 			kmem_free(cdda, sizeof (struct cdrom_cdda));
25120 			kmem_free(com, sizeof (*com));
25121 			return (ENOTTY);
25122 		}
25123 	} else {
25124 		cdb[0] = (char)SCMD_READ_CDDA;
25125 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
25126 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
25127 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
25128 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
25129 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
25130 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
25131 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
25132 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
25133 		cdb[10] = cdda->cdda_subcode;
25134 	}
25135 
25136 	com->uscsi_cdb = cdb;
25137 	com->uscsi_cdblen = CDB_GROUP5;
25138 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
25139 	com->uscsi_buflen = buflen;
25140 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
25141 
25142 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
25143 	    SD_PATH_STANDARD);
25144 
25145 	kmem_free(cdda, sizeof (struct cdrom_cdda));
25146 	kmem_free(com, sizeof (*com));
25147 	return (rval);
25148 }
25149 
25150 
25151 /*
25152  *    Function: sr_read_cdxa()
25153  *
25154  * Description: This routine is the driver entry point for handling CD-ROM
25155  *		ioctl requests to return CD-XA (Extended Architecture) data.
25156  *		(CDROMCDXA).
25157  *
25158  *   Arguments: dev	- the device 'dev_t'
25159  *		data	- pointer to user provided CD-XA structure specifying
25160  *			  the data starting address, transfer length, and format
25161  *		flag	- this argument is a pass through to ddi_copyxxx()
25162  *			  directly from the mode argument of ioctl().
25163  *
25164  * Return Code: the code returned by sd_send_scsi_cmd()
25165  *		EFAULT if ddi_copyxxx() fails
25166  *		ENXIO if fail ddi_get_soft_state
25167  *		EINVAL if data pointer is NULL
25168  */
25169 
25170 static int
25171 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
25172 {
25173 	struct sd_lun		*un;
25174 	struct uscsi_cmd	*com;
25175 	struct cdrom_cdxa	*cdxa;
25176 	int			rval;
25177 	size_t			buflen;
25178 	char			cdb[CDB_GROUP5];
25179 	uchar_t			read_flags;
25180 
25181 #ifdef _MULTI_DATAMODEL
25182 	/* To support ILP32 applications in an LP64 world */
25183 	struct cdrom_cdxa32		cdrom_cdxa32;
25184 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
25185 #endif /* _MULTI_DATAMODEL */
25186 
25187 	if (data == NULL) {
25188 		return (EINVAL);
25189 	}
25190 
25191 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25192 		return (ENXIO);
25193 	}
25194 
25195 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
25196 
25197 #ifdef _MULTI_DATAMODEL
25198 	switch (ddi_model_convert_from(flag & FMODELS)) {
25199 	case DDI_MODEL_ILP32:
25200 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
25201 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25202 			return (EFAULT);
25203 		}
25204 		/*
25205 		 * Convert the ILP32 uscsi data from the
25206 		 * application to LP64 for internal use.
25207 		 */
25208 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
25209 		break;
25210 	case DDI_MODEL_NONE:
25211 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
25212 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25213 			return (EFAULT);
25214 		}
25215 		break;
25216 	}
25217 #else /* ! _MULTI_DATAMODEL */
25218 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
25219 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25220 		return (EFAULT);
25221 	}
25222 #endif /* _MULTI_DATAMODEL */
25223 
25224 	/*
25225 	 * Since MMC-2 expects max 3 bytes for length, check if the
25226 	 * length input is greater than 3 bytes
25227 	 */
25228 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
25229 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
25230 		    "cdrom transfer length too large: %d (limit %d)\n",
25231 		    cdxa->cdxa_length, 0xFFFFFF);
25232 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25233 		return (EINVAL);
25234 	}
25235 
25236 	switch (cdxa->cdxa_format) {
25237 	case CDROM_XA_DATA:
25238 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
25239 		read_flags = 0x10;
25240 		break;
25241 	case CDROM_XA_SECTOR_DATA:
25242 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
25243 		read_flags = 0xf8;
25244 		break;
25245 	case CDROM_XA_DATA_W_ERROR:
25246 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
25247 		read_flags = 0xfc;
25248 		break;
25249 	default:
25250 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25251 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
25252 		    cdxa->cdxa_format);
25253 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25254 		return (EINVAL);
25255 	}
25256 
25257 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25258 	bzero(cdb, CDB_GROUP5);
25259 	if (un->un_f_mmc_cap == TRUE) {
25260 		cdb[0] = (char)SCMD_READ_CD;
25261 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
25262 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
25263 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
25264 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
25265 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
25266 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
25267 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
25268 		cdb[9] = (char)read_flags;
25269 	} else {
25270 		/*
25271 		 * Note: A vendor specific command (0xDB) is being used her to
25272 		 * request a read of all subcodes.
25273 		 */
25274 		cdb[0] = (char)SCMD_READ_CDXA;
25275 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
25276 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
25277 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
25278 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
25279 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
25280 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
25281 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
25282 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
25283 		cdb[10] = cdxa->cdxa_format;
25284 	}
25285 	com->uscsi_cdb	   = cdb;
25286 	com->uscsi_cdblen  = CDB_GROUP5;
25287 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
25288 	com->uscsi_buflen  = buflen;
25289 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
25290 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
25291 	    SD_PATH_STANDARD);
25292 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25293 	kmem_free(com, sizeof (*com));
25294 	return (rval);
25295 }
25296 
25297 
25298 /*
25299  *    Function: sr_eject()
25300  *
25301  * Description: This routine is the driver entry point for handling CD-ROM
25302  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
25303  *
25304  *   Arguments: dev	- the device 'dev_t'
25305  *
25306  * Return Code: the code returned by sd_send_scsi_cmd()
25307  */
25308 
25309 static int
25310 sr_eject(dev_t dev)
25311 {
25312 	struct sd_lun	*un;
25313 	int		rval;
25314 
25315 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25316 	    (un->un_state == SD_STATE_OFFLINE)) {
25317 		return (ENXIO);
25318 	}
25319 
25320 	/*
25321 	 * To prevent race conditions with the eject
25322 	 * command, keep track of an eject command as
25323 	 * it progresses. If we are already handling
25324 	 * an eject command in the driver for the given
25325 	 * unit and another request to eject is received
25326 	 * immediately return EAGAIN so we don't lose
25327 	 * the command if the current eject command fails.
25328 	 */
25329 	mutex_enter(SD_MUTEX(un));
25330 	if (un->un_f_ejecting == TRUE) {
25331 		mutex_exit(SD_MUTEX(un));
25332 		return (EAGAIN);
25333 	}
25334 	un->un_f_ejecting = TRUE;
25335 	mutex_exit(SD_MUTEX(un));
25336 
25337 	if ((rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
25338 	    SD_PATH_STANDARD)) != 0) {
25339 		mutex_enter(SD_MUTEX(un));
25340 		un->un_f_ejecting = FALSE;
25341 		mutex_exit(SD_MUTEX(un));
25342 		return (rval);
25343 	}
25344 
25345 	rval = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_EJECT,
25346 	    SD_PATH_STANDARD);
25347 
25348 	if (rval == 0) {
25349 		mutex_enter(SD_MUTEX(un));
25350 		sr_ejected(un);
25351 		un->un_mediastate = DKIO_EJECTED;
25352 		un->un_f_ejecting = FALSE;
25353 		cv_broadcast(&un->un_state_cv);
25354 		mutex_exit(SD_MUTEX(un));
25355 	} else {
25356 		mutex_enter(SD_MUTEX(un));
25357 		un->un_f_ejecting = FALSE;
25358 		mutex_exit(SD_MUTEX(un));
25359 	}
25360 	return (rval);
25361 }
25362 
25363 
25364 /*
25365  *    Function: sr_ejected()
25366  *
25367  * Description: This routine updates the soft state structure to invalidate the
25368  *		geometry information after the media has been ejected or a
25369  *		media eject has been detected.
25370  *
25371  *   Arguments: un - driver soft state (unit) structure
25372  */
25373 
25374 static void
25375 sr_ejected(struct sd_lun *un)
25376 {
25377 	struct sd_errstats *stp;
25378 
25379 	ASSERT(un != NULL);
25380 	ASSERT(mutex_owned(SD_MUTEX(un)));
25381 
25382 	un->un_f_blockcount_is_valid	= FALSE;
25383 	un->un_f_tgt_blocksize_is_valid	= FALSE;
25384 	mutex_exit(SD_MUTEX(un));
25385 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
25386 	mutex_enter(SD_MUTEX(un));
25387 
25388 	if (un->un_errstats != NULL) {
25389 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
25390 		stp->sd_capacity.value.ui64 = 0;
25391 	}
25392 }
25393 
25394 
25395 /*
25396  *    Function: sr_check_wp()
25397  *
25398  * Description: This routine checks the write protection of a removable
25399  *      media disk and hotpluggable devices via the write protect bit of
25400  *      the Mode Page Header device specific field. Some devices choke
25401  *      on unsupported mode page. In order to workaround this issue,
25402  *      this routine has been implemented to use 0x3f mode page(request
25403  *      for all pages) for all device types.
25404  *
25405  *   Arguments: dev		- the device 'dev_t'
25406  *
25407  * Return Code: int indicating if the device is write protected (1) or not (0)
25408  *
25409  *     Context: Kernel thread.
25410  *
25411  */
25412 
25413 static int
25414 sr_check_wp(dev_t dev)
25415 {
25416 	struct sd_lun	*un;
25417 	uchar_t		device_specific;
25418 	uchar_t		*sense;
25419 	int		hdrlen;
25420 	int		rval = FALSE;
25421 
25422 	/*
25423 	 * Note: The return codes for this routine should be reworked to
25424 	 * properly handle the case of a NULL softstate.
25425 	 */
25426 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25427 		return (FALSE);
25428 	}
25429 
25430 	if (un->un_f_cfg_is_atapi == TRUE) {
25431 		/*
25432 		 * The mode page contents are not required; set the allocation
25433 		 * length for the mode page header only
25434 		 */
25435 		hdrlen = MODE_HEADER_LENGTH_GRP2;
25436 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
25437 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense, hdrlen,
25438 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
25439 			goto err_exit;
25440 		device_specific =
25441 		    ((struct mode_header_grp2 *)sense)->device_specific;
25442 	} else {
25443 		hdrlen = MODE_HEADER_LENGTH;
25444 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
25445 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, hdrlen,
25446 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
25447 			goto err_exit;
25448 		device_specific =
25449 		    ((struct mode_header *)sense)->device_specific;
25450 	}
25451 
25452 	/*
25453 	 * Write protect mode sense failed; not all disks
25454 	 * understand this query. Return FALSE assuming that
25455 	 * these devices are not writable.
25456 	 */
25457 	if (device_specific & WRITE_PROTECT) {
25458 		rval = TRUE;
25459 	}
25460 
25461 err_exit:
25462 	kmem_free(sense, hdrlen);
25463 	return (rval);
25464 }
25465 
25466 /*
25467  *    Function: sr_volume_ctrl()
25468  *
25469  * Description: This routine is the driver entry point for handling CD-ROM
25470  *		audio output volume ioctl requests. (CDROMVOLCTRL)
25471  *
25472  *   Arguments: dev	- the device 'dev_t'
25473  *		data	- pointer to user audio volume control structure
25474  *		flag	- this argument is a pass through to ddi_copyxxx()
25475  *			  directly from the mode argument of ioctl().
25476  *
25477  * Return Code: the code returned by sd_send_scsi_cmd()
25478  *		EFAULT if ddi_copyxxx() fails
25479  *		ENXIO if fail ddi_get_soft_state
25480  *		EINVAL if data pointer is NULL
25481  *
25482  */
25483 
25484 static int
25485 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
25486 {
25487 	struct sd_lun		*un;
25488 	struct cdrom_volctrl    volume;
25489 	struct cdrom_volctrl    *vol = &volume;
25490 	uchar_t			*sense_page;
25491 	uchar_t			*select_page;
25492 	uchar_t			*sense;
25493 	uchar_t			*select;
25494 	int			sense_buflen;
25495 	int			select_buflen;
25496 	int			rval;
25497 
25498 	if (data == NULL) {
25499 		return (EINVAL);
25500 	}
25501 
25502 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25503 	    (un->un_state == SD_STATE_OFFLINE)) {
25504 		return (ENXIO);
25505 	}
25506 
25507 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
25508 		return (EFAULT);
25509 	}
25510 
25511 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
25512 		struct mode_header_grp2		*sense_mhp;
25513 		struct mode_header_grp2		*select_mhp;
25514 		int				bd_len;
25515 
25516 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
25517 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
25518 		    MODEPAGE_AUDIO_CTRL_LEN;
25519 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
25520 		select = kmem_zalloc(select_buflen, KM_SLEEP);
25521 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
25522 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
25523 		    SD_PATH_STANDARD)) != 0) {
25524 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
25525 			    "sr_volume_ctrl: Mode Sense Failed\n");
25526 			kmem_free(sense, sense_buflen);
25527 			kmem_free(select, select_buflen);
25528 			return (rval);
25529 		}
25530 		sense_mhp = (struct mode_header_grp2 *)sense;
25531 		select_mhp = (struct mode_header_grp2 *)select;
25532 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
25533 		    sense_mhp->bdesc_length_lo;
25534 		if (bd_len > MODE_BLK_DESC_LENGTH) {
25535 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25536 			    "sr_volume_ctrl: Mode Sense returned invalid "
25537 			    "block descriptor length\n");
25538 			kmem_free(sense, sense_buflen);
25539 			kmem_free(select, select_buflen);
25540 			return (EIO);
25541 		}
25542 		sense_page = (uchar_t *)
25543 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
25544 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
25545 		select_mhp->length_msb = 0;
25546 		select_mhp->length_lsb = 0;
25547 		select_mhp->bdesc_length_hi = 0;
25548 		select_mhp->bdesc_length_lo = 0;
25549 	} else {
25550 		struct mode_header		*sense_mhp, *select_mhp;
25551 
25552 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
25553 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
25554 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
25555 		select = kmem_zalloc(select_buflen, KM_SLEEP);
25556 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
25557 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
25558 		    SD_PATH_STANDARD)) != 0) {
25559 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25560 			    "sr_volume_ctrl: Mode Sense Failed\n");
25561 			kmem_free(sense, sense_buflen);
25562 			kmem_free(select, select_buflen);
25563 			return (rval);
25564 		}
25565 		sense_mhp  = (struct mode_header *)sense;
25566 		select_mhp = (struct mode_header *)select;
25567 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
25568 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25569 			    "sr_volume_ctrl: Mode Sense returned invalid "
25570 			    "block descriptor length\n");
25571 			kmem_free(sense, sense_buflen);
25572 			kmem_free(select, select_buflen);
25573 			return (EIO);
25574 		}
25575 		sense_page = (uchar_t *)
25576 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
25577 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
25578 		select_mhp->length = 0;
25579 		select_mhp->bdesc_length = 0;
25580 	}
25581 	/*
25582 	 * Note: An audio control data structure could be created and overlayed
25583 	 * on the following in place of the array indexing method implemented.
25584 	 */
25585 
25586 	/* Build the select data for the user volume data */
25587 	select_page[0] = MODEPAGE_AUDIO_CTRL;
25588 	select_page[1] = 0xE;
25589 	/* Set the immediate bit */
25590 	select_page[2] = 0x04;
25591 	/* Zero out reserved fields */
25592 	select_page[3] = 0x00;
25593 	select_page[4] = 0x00;
25594 	/* Return sense data for fields not to be modified */
25595 	select_page[5] = sense_page[5];
25596 	select_page[6] = sense_page[6];
25597 	select_page[7] = sense_page[7];
25598 	/* Set the user specified volume levels for channel 0 and 1 */
25599 	select_page[8] = 0x01;
25600 	select_page[9] = vol->channel0;
25601 	select_page[10] = 0x02;
25602 	select_page[11] = vol->channel1;
25603 	/* Channel 2 and 3 are currently unsupported so return the sense data */
25604 	select_page[12] = sense_page[12];
25605 	select_page[13] = sense_page[13];
25606 	select_page[14] = sense_page[14];
25607 	select_page[15] = sense_page[15];
25608 
25609 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
25610 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, select,
25611 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
25612 	} else {
25613 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
25614 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
25615 	}
25616 
25617 	kmem_free(sense, sense_buflen);
25618 	kmem_free(select, select_buflen);
25619 	return (rval);
25620 }
25621 
25622 
25623 /*
25624  *    Function: sr_read_sony_session_offset()
25625  *
25626  * Description: This routine is the driver entry point for handling CD-ROM
25627  *		ioctl requests for session offset information. (CDROMREADOFFSET)
25628  *		The address of the first track in the last session of a
25629  *		multi-session CD-ROM is returned
25630  *
25631  *		Note: This routine uses a vendor specific key value in the
25632  *		command control field without implementing any vendor check here
25633  *		or in the ioctl routine.
25634  *
25635  *   Arguments: dev	- the device 'dev_t'
25636  *		data	- pointer to an int to hold the requested address
25637  *		flag	- this argument is a pass through to ddi_copyxxx()
25638  *			  directly from the mode argument of ioctl().
25639  *
25640  * Return Code: the code returned by sd_send_scsi_cmd()
25641  *		EFAULT if ddi_copyxxx() fails
25642  *		ENXIO if fail ddi_get_soft_state
25643  *		EINVAL if data pointer is NULL
25644  */
25645 
25646 static int
25647 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
25648 {
25649 	struct sd_lun		*un;
25650 	struct uscsi_cmd	*com;
25651 	caddr_t			buffer;
25652 	char			cdb[CDB_GROUP1];
25653 	int			session_offset = 0;
25654 	int			rval;
25655 
25656 	if (data == NULL) {
25657 		return (EINVAL);
25658 	}
25659 
25660 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25661 	    (un->un_state == SD_STATE_OFFLINE)) {
25662 		return (ENXIO);
25663 	}
25664 
25665 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
25666 	bzero(cdb, CDB_GROUP1);
25667 	cdb[0] = SCMD_READ_TOC;
25668 	/*
25669 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
25670 	 * (4 byte TOC response header + 8 byte response data)
25671 	 */
25672 	cdb[8] = SONY_SESSION_OFFSET_LEN;
25673 	/* Byte 9 is the control byte. A vendor specific value is used */
25674 	cdb[9] = SONY_SESSION_OFFSET_KEY;
25675 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25676 	com->uscsi_cdb = cdb;
25677 	com->uscsi_cdblen = CDB_GROUP1;
25678 	com->uscsi_bufaddr = buffer;
25679 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
25680 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
25681 
25682 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
25683 	    SD_PATH_STANDARD);
25684 	if (rval != 0) {
25685 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
25686 		kmem_free(com, sizeof (*com));
25687 		return (rval);
25688 	}
25689 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
25690 		session_offset =
25691 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
25692 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
25693 		/*
25694 		 * Offset returned offset in current lbasize block's. Convert to
25695 		 * 2k block's to return to the user
25696 		 */
25697 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
25698 			session_offset >>= 2;
25699 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
25700 			session_offset >>= 1;
25701 		}
25702 	}
25703 
25704 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
25705 		rval = EFAULT;
25706 	}
25707 
25708 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
25709 	kmem_free(com, sizeof (*com));
25710 	return (rval);
25711 }
25712 
25713 
25714 /*
25715  *    Function: sd_wm_cache_constructor()
25716  *
25717  * Description: Cache Constructor for the wmap cache for the read/modify/write
25718  * 		devices.
25719  *
25720  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
25721  *		un	- sd_lun structure for the device.
25722  *		flag	- the km flags passed to constructor
25723  *
25724  * Return Code: 0 on success.
25725  *		-1 on failure.
25726  */
25727 
25728 /*ARGSUSED*/
25729 static int
25730 sd_wm_cache_constructor(void *wm, void *un, int flags)
25731 {
25732 	bzero(wm, sizeof (struct sd_w_map));
25733 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
25734 	return (0);
25735 }
25736 
25737 
25738 /*
25739  *    Function: sd_wm_cache_destructor()
25740  *
25741  * Description: Cache destructor for the wmap cache for the read/modify/write
25742  * 		devices.
25743  *
25744  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
25745  *		un	- sd_lun structure for the device.
25746  */
25747 /*ARGSUSED*/
25748 static void
25749 sd_wm_cache_destructor(void *wm, void *un)
25750 {
25751 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
25752 }
25753 
25754 
25755 /*
25756  *    Function: sd_range_lock()
25757  *
25758  * Description: Lock the range of blocks specified as parameter to ensure
25759  *		that read, modify write is atomic and no other i/o writes
25760  *		to the same location. The range is specified in terms
25761  *		of start and end blocks. Block numbers are the actual
25762  *		media block numbers and not system.
25763  *
25764  *   Arguments: un	- sd_lun structure for the device.
25765  *		startb - The starting block number
25766  *		endb - The end block number
25767  *		typ - type of i/o - simple/read_modify_write
25768  *
25769  * Return Code: wm  - pointer to the wmap structure.
25770  *
25771  *     Context: This routine can sleep.
25772  */
25773 
25774 static struct sd_w_map *
25775 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
25776 {
25777 	struct sd_w_map *wmp = NULL;
25778 	struct sd_w_map *sl_wmp = NULL;
25779 	struct sd_w_map *tmp_wmp;
25780 	wm_state state = SD_WM_CHK_LIST;
25781 
25782 
25783 	ASSERT(un != NULL);
25784 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25785 
25786 	mutex_enter(SD_MUTEX(un));
25787 
25788 	while (state != SD_WM_DONE) {
25789 
25790 		switch (state) {
25791 		case SD_WM_CHK_LIST:
25792 			/*
25793 			 * This is the starting state. Check the wmap list
25794 			 * to see if the range is currently available.
25795 			 */
25796 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
25797 				/*
25798 				 * If this is a simple write and no rmw
25799 				 * i/o is pending then try to lock the
25800 				 * range as the range should be available.
25801 				 */
25802 				state = SD_WM_LOCK_RANGE;
25803 			} else {
25804 				tmp_wmp = sd_get_range(un, startb, endb);
25805 				if (tmp_wmp != NULL) {
25806 					if ((wmp != NULL) && ONLIST(un, wmp)) {
25807 						/*
25808 						 * Should not keep onlist wmps
25809 						 * while waiting this macro
25810 						 * will also do wmp = NULL;
25811 						 */
25812 						FREE_ONLIST_WMAP(un, wmp);
25813 					}
25814 					/*
25815 					 * sl_wmp is the wmap on which wait
25816 					 * is done, since the tmp_wmp points
25817 					 * to the inuse wmap, set sl_wmp to
25818 					 * tmp_wmp and change the state to sleep
25819 					 */
25820 					sl_wmp = tmp_wmp;
25821 					state = SD_WM_WAIT_MAP;
25822 				} else {
25823 					state = SD_WM_LOCK_RANGE;
25824 				}
25825 
25826 			}
25827 			break;
25828 
25829 		case SD_WM_LOCK_RANGE:
25830 			ASSERT(un->un_wm_cache);
25831 			/*
25832 			 * The range need to be locked, try to get a wmap.
25833 			 * First attempt it with NO_SLEEP, want to avoid a sleep
25834 			 * if possible as we will have to release the sd mutex
25835 			 * if we have to sleep.
25836 			 */
25837 			if (wmp == NULL)
25838 				wmp = kmem_cache_alloc(un->un_wm_cache,
25839 				    KM_NOSLEEP);
25840 			if (wmp == NULL) {
25841 				mutex_exit(SD_MUTEX(un));
25842 				_NOTE(DATA_READABLE_WITHOUT_LOCK
25843 				    (sd_lun::un_wm_cache))
25844 				wmp = kmem_cache_alloc(un->un_wm_cache,
25845 				    KM_SLEEP);
25846 				mutex_enter(SD_MUTEX(un));
25847 				/*
25848 				 * we released the mutex so recheck and go to
25849 				 * check list state.
25850 				 */
25851 				state = SD_WM_CHK_LIST;
25852 			} else {
25853 				/*
25854 				 * We exit out of state machine since we
25855 				 * have the wmap. Do the housekeeping first.
25856 				 * place the wmap on the wmap list if it is not
25857 				 * on it already and then set the state to done.
25858 				 */
25859 				wmp->wm_start = startb;
25860 				wmp->wm_end = endb;
25861 				wmp->wm_flags = typ | SD_WM_BUSY;
25862 				if (typ & SD_WTYPE_RMW) {
25863 					un->un_rmw_count++;
25864 				}
25865 				/*
25866 				 * If not already on the list then link
25867 				 */
25868 				if (!ONLIST(un, wmp)) {
25869 					wmp->wm_next = un->un_wm;
25870 					wmp->wm_prev = NULL;
25871 					if (wmp->wm_next)
25872 						wmp->wm_next->wm_prev = wmp;
25873 					un->un_wm = wmp;
25874 				}
25875 				state = SD_WM_DONE;
25876 			}
25877 			break;
25878 
25879 		case SD_WM_WAIT_MAP:
25880 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
25881 			/*
25882 			 * Wait is done on sl_wmp, which is set in the
25883 			 * check_list state.
25884 			 */
25885 			sl_wmp->wm_wanted_count++;
25886 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
25887 			sl_wmp->wm_wanted_count--;
25888 			/*
25889 			 * We can reuse the memory from the completed sl_wmp
25890 			 * lock range for our new lock, but only if noone is
25891 			 * waiting for it.
25892 			 */
25893 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
25894 			if (sl_wmp->wm_wanted_count == 0) {
25895 				if (wmp != NULL)
25896 					CHK_N_FREEWMP(un, wmp);
25897 				wmp = sl_wmp;
25898 			}
25899 			sl_wmp = NULL;
25900 			/*
25901 			 * After waking up, need to recheck for availability of
25902 			 * range.
25903 			 */
25904 			state = SD_WM_CHK_LIST;
25905 			break;
25906 
25907 		default:
25908 			panic("sd_range_lock: "
25909 			    "Unknown state %d in sd_range_lock", state);
25910 			/*NOTREACHED*/
25911 		} /* switch(state) */
25912 
25913 	} /* while(state != SD_WM_DONE) */
25914 
25915 	mutex_exit(SD_MUTEX(un));
25916 
25917 	ASSERT(wmp != NULL);
25918 
25919 	return (wmp);
25920 }
25921 
25922 
25923 /*
25924  *    Function: sd_get_range()
25925  *
25926  * Description: Find if there any overlapping I/O to this one
25927  *		Returns the write-map of 1st such I/O, NULL otherwise.
25928  *
25929  *   Arguments: un	- sd_lun structure for the device.
25930  *		startb - The starting block number
25931  *		endb - The end block number
25932  *
25933  * Return Code: wm  - pointer to the wmap structure.
25934  */
25935 
25936 static struct sd_w_map *
25937 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
25938 {
25939 	struct sd_w_map *wmp;
25940 
25941 	ASSERT(un != NULL);
25942 
25943 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
25944 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
25945 			continue;
25946 		}
25947 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
25948 			break;
25949 		}
25950 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
25951 			break;
25952 		}
25953 	}
25954 
25955 	return (wmp);
25956 }
25957 
25958 
25959 /*
25960  *    Function: sd_free_inlist_wmap()
25961  *
25962  * Description: Unlink and free a write map struct.
25963  *
25964  *   Arguments: un      - sd_lun structure for the device.
25965  *		wmp	- sd_w_map which needs to be unlinked.
25966  */
25967 
25968 static void
25969 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
25970 {
25971 	ASSERT(un != NULL);
25972 
25973 	if (un->un_wm == wmp) {
25974 		un->un_wm = wmp->wm_next;
25975 	} else {
25976 		wmp->wm_prev->wm_next = wmp->wm_next;
25977 	}
25978 
25979 	if (wmp->wm_next) {
25980 		wmp->wm_next->wm_prev = wmp->wm_prev;
25981 	}
25982 
25983 	wmp->wm_next = wmp->wm_prev = NULL;
25984 
25985 	kmem_cache_free(un->un_wm_cache, wmp);
25986 }
25987 
25988 
25989 /*
25990  *    Function: sd_range_unlock()
25991  *
25992  * Description: Unlock the range locked by wm.
25993  *		Free write map if nobody else is waiting on it.
25994  *
25995  *   Arguments: un      - sd_lun structure for the device.
25996  *              wmp     - sd_w_map which needs to be unlinked.
25997  */
25998 
25999 static void
26000 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
26001 {
26002 	ASSERT(un != NULL);
26003 	ASSERT(wm != NULL);
26004 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26005 
26006 	mutex_enter(SD_MUTEX(un));
26007 
26008 	if (wm->wm_flags & SD_WTYPE_RMW) {
26009 		un->un_rmw_count--;
26010 	}
26011 
26012 	if (wm->wm_wanted_count) {
26013 		wm->wm_flags = 0;
26014 		/*
26015 		 * Broadcast that the wmap is available now.
26016 		 */
26017 		cv_broadcast(&wm->wm_avail);
26018 	} else {
26019 		/*
26020 		 * If no one is waiting on the map, it should be free'ed.
26021 		 */
26022 		sd_free_inlist_wmap(un, wm);
26023 	}
26024 
26025 	mutex_exit(SD_MUTEX(un));
26026 }
26027 
26028 
26029 /*
26030  *    Function: sd_read_modify_write_task
26031  *
26032  * Description: Called from a taskq thread to initiate the write phase of
26033  *		a read-modify-write request.  This is used for targets where
26034  *		un->un_sys_blocksize != un->un_tgt_blocksize.
26035  *
26036  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
26037  *
26038  *     Context: Called under taskq thread context.
26039  */
26040 
26041 static void
26042 sd_read_modify_write_task(void *arg)
26043 {
26044 	struct sd_mapblocksize_info	*bsp;
26045 	struct buf	*bp;
26046 	struct sd_xbuf	*xp;
26047 	struct sd_lun	*un;
26048 
26049 	bp = arg;	/* The bp is given in arg */
26050 	ASSERT(bp != NULL);
26051 
26052 	/* Get the pointer to the layer-private data struct */
26053 	xp = SD_GET_XBUF(bp);
26054 	ASSERT(xp != NULL);
26055 	bsp = xp->xb_private;
26056 	ASSERT(bsp != NULL);
26057 
26058 	un = SD_GET_UN(bp);
26059 	ASSERT(un != NULL);
26060 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26061 
26062 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
26063 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
26064 
26065 	/*
26066 	 * This is the write phase of a read-modify-write request, called
26067 	 * under the context of a taskq thread in response to the completion
26068 	 * of the read portion of the rmw request completing under interrupt
26069 	 * context. The write request must be sent from here down the iostart
26070 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
26071 	 * we use the layer index saved in the layer-private data area.
26072 	 */
26073 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
26074 
26075 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
26076 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
26077 }
26078 
26079 
26080 /*
26081  *    Function: sddump_do_read_of_rmw()
26082  *
26083  * Description: This routine will be called from sddump, If sddump is called
26084  *		with an I/O which not aligned on device blocksize boundary
26085  *		then the write has to be converted to read-modify-write.
26086  *		Do the read part here in order to keep sddump simple.
26087  *		Note - That the sd_mutex is held across the call to this
26088  *		routine.
26089  *
26090  *   Arguments: un	- sd_lun
26091  *		blkno	- block number in terms of media block size.
26092  *		nblk	- number of blocks.
26093  *		bpp	- pointer to pointer to the buf structure. On return
26094  *			from this function, *bpp points to the valid buffer
26095  *			to which the write has to be done.
26096  *
26097  * Return Code: 0 for success or errno-type return code
26098  */
26099 
26100 static int
26101 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
26102 	struct buf **bpp)
26103 {
26104 	int err;
26105 	int i;
26106 	int rval;
26107 	struct buf *bp;
26108 	struct scsi_pkt *pkt = NULL;
26109 	uint32_t target_blocksize;
26110 
26111 	ASSERT(un != NULL);
26112 	ASSERT(mutex_owned(SD_MUTEX(un)));
26113 
26114 	target_blocksize = un->un_tgt_blocksize;
26115 
26116 	mutex_exit(SD_MUTEX(un));
26117 
26118 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
26119 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
26120 	if (bp == NULL) {
26121 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26122 		    "no resources for dumping; giving up");
26123 		err = ENOMEM;
26124 		goto done;
26125 	}
26126 
26127 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
26128 	    blkno, nblk);
26129 	if (rval != 0) {
26130 		scsi_free_consistent_buf(bp);
26131 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26132 		    "no resources for dumping; giving up");
26133 		err = ENOMEM;
26134 		goto done;
26135 	}
26136 
26137 	pkt->pkt_flags |= FLAG_NOINTR;
26138 
26139 	err = EIO;
26140 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26141 
26142 		/*
26143 		 * Scsi_poll returns 0 (success) if the command completes and
26144 		 * the status block is STATUS_GOOD.  We should only check
26145 		 * errors if this condition is not true.  Even then we should
26146 		 * send our own request sense packet only if we have a check
26147 		 * condition and auto request sense has not been performed by
26148 		 * the hba.
26149 		 */
26150 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
26151 
26152 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
26153 			err = 0;
26154 			break;
26155 		}
26156 
26157 		/*
26158 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
26159 		 * no need to read RQS data.
26160 		 */
26161 		if (pkt->pkt_reason == CMD_DEV_GONE) {
26162 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26163 			    "Device is gone\n");
26164 			break;
26165 		}
26166 
26167 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
26168 			SD_INFO(SD_LOG_DUMP, un,
26169 			    "sddump: read failed with CHECK, try # %d\n", i);
26170 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
26171 				(void) sd_send_polled_RQS(un);
26172 			}
26173 
26174 			continue;
26175 		}
26176 
26177 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
26178 			int reset_retval = 0;
26179 
26180 			SD_INFO(SD_LOG_DUMP, un,
26181 			    "sddump: read failed with BUSY, try # %d\n", i);
26182 
26183 			if (un->un_f_lun_reset_enabled == TRUE) {
26184 				reset_retval = scsi_reset(SD_ADDRESS(un),
26185 				    RESET_LUN);
26186 			}
26187 			if (reset_retval == 0) {
26188 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26189 			}
26190 			(void) sd_send_polled_RQS(un);
26191 
26192 		} else {
26193 			SD_INFO(SD_LOG_DUMP, un,
26194 			    "sddump: read failed with 0x%x, try # %d\n",
26195 			    SD_GET_PKT_STATUS(pkt), i);
26196 			mutex_enter(SD_MUTEX(un));
26197 			sd_reset_target(un, pkt);
26198 			mutex_exit(SD_MUTEX(un));
26199 		}
26200 
26201 		/*
26202 		 * If we are not getting anywhere with lun/target resets,
26203 		 * let's reset the bus.
26204 		 */
26205 		if (i > SD_NDUMP_RETRIES/2) {
26206 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26207 			(void) sd_send_polled_RQS(un);
26208 		}
26209 
26210 	}
26211 	scsi_destroy_pkt(pkt);
26212 
26213 	if (err != 0) {
26214 		scsi_free_consistent_buf(bp);
26215 		*bpp = NULL;
26216 	} else {
26217 		*bpp = bp;
26218 	}
26219 
26220 done:
26221 	mutex_enter(SD_MUTEX(un));
26222 	return (err);
26223 }
26224 
26225 
26226 /*
26227  *    Function: sd_failfast_flushq
26228  *
26229  * Description: Take all bp's on the wait queue that have B_FAILFAST set
26230  *		in b_flags and move them onto the failfast queue, then kick
26231  *		off a thread to return all bp's on the failfast queue to
26232  *		their owners with an error set.
26233  *
26234  *   Arguments: un - pointer to the soft state struct for the instance.
26235  *
26236  *     Context: may execute in interrupt context.
26237  */
26238 
26239 static void
26240 sd_failfast_flushq(struct sd_lun *un)
26241 {
26242 	struct buf *bp;
26243 	struct buf *next_waitq_bp;
26244 	struct buf *prev_waitq_bp = NULL;
26245 
26246 	ASSERT(un != NULL);
26247 	ASSERT(mutex_owned(SD_MUTEX(un)));
26248 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
26249 	ASSERT(un->un_failfast_bp == NULL);
26250 
26251 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
26252 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
26253 
26254 	/*
26255 	 * Check if we should flush all bufs when entering failfast state, or
26256 	 * just those with B_FAILFAST set.
26257 	 */
26258 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
26259 		/*
26260 		 * Move *all* bp's on the wait queue to the failfast flush
26261 		 * queue, including those that do NOT have B_FAILFAST set.
26262 		 */
26263 		if (un->un_failfast_headp == NULL) {
26264 			ASSERT(un->un_failfast_tailp == NULL);
26265 			un->un_failfast_headp = un->un_waitq_headp;
26266 		} else {
26267 			ASSERT(un->un_failfast_tailp != NULL);
26268 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
26269 		}
26270 
26271 		un->un_failfast_tailp = un->un_waitq_tailp;
26272 
26273 		/* update kstat for each bp moved out of the waitq */
26274 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
26275 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
26276 		}
26277 
26278 		/* empty the waitq */
26279 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
26280 
26281 	} else {
26282 		/*
26283 		 * Go thru the wait queue, pick off all entries with
26284 		 * B_FAILFAST set, and move these onto the failfast queue.
26285 		 */
26286 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
26287 			/*
26288 			 * Save the pointer to the next bp on the wait queue,
26289 			 * so we get to it on the next iteration of this loop.
26290 			 */
26291 			next_waitq_bp = bp->av_forw;
26292 
26293 			/*
26294 			 * If this bp from the wait queue does NOT have
26295 			 * B_FAILFAST set, just move on to the next element
26296 			 * in the wait queue. Note, this is the only place
26297 			 * where it is correct to set prev_waitq_bp.
26298 			 */
26299 			if ((bp->b_flags & B_FAILFAST) == 0) {
26300 				prev_waitq_bp = bp;
26301 				continue;
26302 			}
26303 
26304 			/*
26305 			 * Remove the bp from the wait queue.
26306 			 */
26307 			if (bp == un->un_waitq_headp) {
26308 				/* The bp is the first element of the waitq. */
26309 				un->un_waitq_headp = next_waitq_bp;
26310 				if (un->un_waitq_headp == NULL) {
26311 					/* The wait queue is now empty */
26312 					un->un_waitq_tailp = NULL;
26313 				}
26314 			} else {
26315 				/*
26316 				 * The bp is either somewhere in the middle
26317 				 * or at the end of the wait queue.
26318 				 */
26319 				ASSERT(un->un_waitq_headp != NULL);
26320 				ASSERT(prev_waitq_bp != NULL);
26321 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
26322 				    == 0);
26323 				if (bp == un->un_waitq_tailp) {
26324 					/* bp is the last entry on the waitq. */
26325 					ASSERT(next_waitq_bp == NULL);
26326 					un->un_waitq_tailp = prev_waitq_bp;
26327 				}
26328 				prev_waitq_bp->av_forw = next_waitq_bp;
26329 			}
26330 			bp->av_forw = NULL;
26331 
26332 			/*
26333 			 * update kstat since the bp is moved out of
26334 			 * the waitq
26335 			 */
26336 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
26337 
26338 			/*
26339 			 * Now put the bp onto the failfast queue.
26340 			 */
26341 			if (un->un_failfast_headp == NULL) {
26342 				/* failfast queue is currently empty */
26343 				ASSERT(un->un_failfast_tailp == NULL);
26344 				un->un_failfast_headp =
26345 				    un->un_failfast_tailp = bp;
26346 			} else {
26347 				/* Add the bp to the end of the failfast q */
26348 				ASSERT(un->un_failfast_tailp != NULL);
26349 				ASSERT(un->un_failfast_tailp->b_flags &
26350 				    B_FAILFAST);
26351 				un->un_failfast_tailp->av_forw = bp;
26352 				un->un_failfast_tailp = bp;
26353 			}
26354 		}
26355 	}
26356 
26357 	/*
26358 	 * Now return all bp's on the failfast queue to their owners.
26359 	 */
26360 	while ((bp = un->un_failfast_headp) != NULL) {
26361 
26362 		un->un_failfast_headp = bp->av_forw;
26363 		if (un->un_failfast_headp == NULL) {
26364 			un->un_failfast_tailp = NULL;
26365 		}
26366 
26367 		/*
26368 		 * We want to return the bp with a failure error code, but
26369 		 * we do not want a call to sd_start_cmds() to occur here,
26370 		 * so use sd_return_failed_command_no_restart() instead of
26371 		 * sd_return_failed_command().
26372 		 */
26373 		sd_return_failed_command_no_restart(un, bp, EIO);
26374 	}
26375 
26376 	/* Flush the xbuf queues if required. */
26377 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
26378 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
26379 	}
26380 
26381 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
26382 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
26383 }
26384 
26385 
26386 /*
26387  *    Function: sd_failfast_flushq_callback
26388  *
26389  * Description: Return TRUE if the given bp meets the criteria for failfast
26390  *		flushing. Used with ddi_xbuf_flushq(9F).
26391  *
26392  *   Arguments: bp - ptr to buf struct to be examined.
26393  *
26394  *     Context: Any
26395  */
26396 
26397 static int
26398 sd_failfast_flushq_callback(struct buf *bp)
26399 {
26400 	/*
26401 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
26402 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
26403 	 */
26404 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
26405 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
26406 }
26407 
26408 
26409 
26410 #if defined(__i386) || defined(__amd64)
26411 /*
26412  * Function: sd_setup_next_xfer
26413  *
26414  * Description: Prepare next I/O operation using DMA_PARTIAL
26415  *
26416  */
26417 
26418 static int
26419 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
26420     struct scsi_pkt *pkt, struct sd_xbuf *xp)
26421 {
26422 	ssize_t	num_blks_not_xfered;
26423 	daddr_t	strt_blk_num;
26424 	ssize_t	bytes_not_xfered;
26425 	int	rval;
26426 
26427 	ASSERT(pkt->pkt_resid == 0);
26428 
26429 	/*
26430 	 * Calculate next block number and amount to be transferred.
26431 	 *
26432 	 * How much data NOT transfered to the HBA yet.
26433 	 */
26434 	bytes_not_xfered = xp->xb_dma_resid;
26435 
26436 	/*
26437 	 * figure how many blocks NOT transfered to the HBA yet.
26438 	 */
26439 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
26440 
26441 	/*
26442 	 * set starting block number to the end of what WAS transfered.
26443 	 */
26444 	strt_blk_num = xp->xb_blkno +
26445 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
26446 
26447 	/*
26448 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
26449 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
26450 	 * the disk mutex here.
26451 	 */
26452 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
26453 	    strt_blk_num, num_blks_not_xfered);
26454 
26455 	if (rval == 0) {
26456 
26457 		/*
26458 		 * Success.
26459 		 *
26460 		 * Adjust things if there are still more blocks to be
26461 		 * transfered.
26462 		 */
26463 		xp->xb_dma_resid = pkt->pkt_resid;
26464 		pkt->pkt_resid = 0;
26465 
26466 		return (1);
26467 	}
26468 
26469 	/*
26470 	 * There's really only one possible return value from
26471 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
26472 	 * returns NULL.
26473 	 */
26474 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
26475 
26476 	bp->b_resid = bp->b_bcount;
26477 	bp->b_flags |= B_ERROR;
26478 
26479 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26480 	    "Error setting up next portion of DMA transfer\n");
26481 
26482 	return (0);
26483 }
26484 #endif
26485 
26486 /*
26487  *    Function: sd_panic_for_res_conflict
26488  *
26489  * Description: Call panic with a string formated with "Reservation Conflict"
26490  *		and a human readable identifier indicating the SD instance
26491  *		that experienced the reservation conflict.
26492  *
26493  *   Arguments: un - pointer to the soft state struct for the instance.
26494  *
26495  *     Context: may execute in interrupt context.
26496  */
26497 
26498 #define	SD_RESV_CONFLICT_FMT_LEN 40
26499 void
26500 sd_panic_for_res_conflict(struct sd_lun *un)
26501 {
26502 	char panic_str[SD_RESV_CONFLICT_FMT_LEN+MAXPATHLEN];
26503 	char path_str[MAXPATHLEN];
26504 
26505 	(void) snprintf(panic_str, sizeof (panic_str),
26506 	    "Reservation Conflict\nDisk: %s",
26507 	    ddi_pathname(SD_DEVINFO(un), path_str));
26508 
26509 	panic(panic_str);
26510 }
26511 
26512 /*
26513  * Note: The following sd_faultinjection_ioctl( ) routines implement
26514  * driver support for handling fault injection for error analysis
26515  * causing faults in multiple layers of the driver.
26516  *
26517  */
26518 
26519 #ifdef SD_FAULT_INJECTION
26520 static uint_t   sd_fault_injection_on = 0;
26521 
26522 /*
26523  *    Function: sd_faultinjection_ioctl()
26524  *
26525  * Description: This routine is the driver entry point for handling
26526  *              faultinjection ioctls to inject errors into the
26527  *              layer model
26528  *
26529  *   Arguments: cmd	- the ioctl cmd recieved
26530  *		arg	- the arguments from user and returns
26531  */
26532 
26533 static void
26534 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
26535 
26536 	uint_t i;
26537 	uint_t rval;
26538 
26539 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
26540 
26541 	mutex_enter(SD_MUTEX(un));
26542 
26543 	switch (cmd) {
26544 	case SDIOCRUN:
26545 		/* Allow pushed faults to be injected */
26546 		SD_INFO(SD_LOG_SDTEST, un,
26547 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
26548 
26549 		sd_fault_injection_on = 1;
26550 
26551 		SD_INFO(SD_LOG_IOERR, un,
26552 		    "sd_faultinjection_ioctl: run finished\n");
26553 		break;
26554 
26555 	case SDIOCSTART:
26556 		/* Start Injection Session */
26557 		SD_INFO(SD_LOG_SDTEST, un,
26558 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
26559 
26560 		sd_fault_injection_on = 0;
26561 		un->sd_injection_mask = 0xFFFFFFFF;
26562 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
26563 			un->sd_fi_fifo_pkt[i] = NULL;
26564 			un->sd_fi_fifo_xb[i] = NULL;
26565 			un->sd_fi_fifo_un[i] = NULL;
26566 			un->sd_fi_fifo_arq[i] = NULL;
26567 		}
26568 		un->sd_fi_fifo_start = 0;
26569 		un->sd_fi_fifo_end = 0;
26570 
26571 		mutex_enter(&(un->un_fi_mutex));
26572 		un->sd_fi_log[0] = '\0';
26573 		un->sd_fi_buf_len = 0;
26574 		mutex_exit(&(un->un_fi_mutex));
26575 
26576 		SD_INFO(SD_LOG_IOERR, un,
26577 		    "sd_faultinjection_ioctl: start finished\n");
26578 		break;
26579 
26580 	case SDIOCSTOP:
26581 		/* Stop Injection Session */
26582 		SD_INFO(SD_LOG_SDTEST, un,
26583 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
26584 		sd_fault_injection_on = 0;
26585 		un->sd_injection_mask = 0x0;
26586 
26587 		/* Empty stray or unuseds structs from fifo */
26588 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
26589 			if (un->sd_fi_fifo_pkt[i] != NULL) {
26590 				kmem_free(un->sd_fi_fifo_pkt[i],
26591 				    sizeof (struct sd_fi_pkt));
26592 			}
26593 			if (un->sd_fi_fifo_xb[i] != NULL) {
26594 				kmem_free(un->sd_fi_fifo_xb[i],
26595 				    sizeof (struct sd_fi_xb));
26596 			}
26597 			if (un->sd_fi_fifo_un[i] != NULL) {
26598 				kmem_free(un->sd_fi_fifo_un[i],
26599 				    sizeof (struct sd_fi_un));
26600 			}
26601 			if (un->sd_fi_fifo_arq[i] != NULL) {
26602 				kmem_free(un->sd_fi_fifo_arq[i],
26603 				    sizeof (struct sd_fi_arq));
26604 			}
26605 			un->sd_fi_fifo_pkt[i] = NULL;
26606 			un->sd_fi_fifo_un[i] = NULL;
26607 			un->sd_fi_fifo_xb[i] = NULL;
26608 			un->sd_fi_fifo_arq[i] = NULL;
26609 		}
26610 		un->sd_fi_fifo_start = 0;
26611 		un->sd_fi_fifo_end = 0;
26612 
26613 		SD_INFO(SD_LOG_IOERR, un,
26614 		    "sd_faultinjection_ioctl: stop finished\n");
26615 		break;
26616 
26617 	case SDIOCINSERTPKT:
26618 		/* Store a packet struct to be pushed onto fifo */
26619 		SD_INFO(SD_LOG_SDTEST, un,
26620 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
26621 
26622 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26623 
26624 		sd_fault_injection_on = 0;
26625 
26626 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
26627 		if (un->sd_fi_fifo_pkt[i] != NULL) {
26628 			kmem_free(un->sd_fi_fifo_pkt[i],
26629 			    sizeof (struct sd_fi_pkt));
26630 		}
26631 		if (arg != NULL) {
26632 			un->sd_fi_fifo_pkt[i] =
26633 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
26634 			if (un->sd_fi_fifo_pkt[i] == NULL) {
26635 				/* Alloc failed don't store anything */
26636 				break;
26637 			}
26638 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
26639 			    sizeof (struct sd_fi_pkt), 0);
26640 			if (rval == -1) {
26641 				kmem_free(un->sd_fi_fifo_pkt[i],
26642 				    sizeof (struct sd_fi_pkt));
26643 				un->sd_fi_fifo_pkt[i] = NULL;
26644 			}
26645 		} else {
26646 			SD_INFO(SD_LOG_IOERR, un,
26647 			    "sd_faultinjection_ioctl: pkt null\n");
26648 		}
26649 		break;
26650 
26651 	case SDIOCINSERTXB:
26652 		/* Store a xb struct to be pushed onto fifo */
26653 		SD_INFO(SD_LOG_SDTEST, un,
26654 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
26655 
26656 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26657 
26658 		sd_fault_injection_on = 0;
26659 
26660 		if (un->sd_fi_fifo_xb[i] != NULL) {
26661 			kmem_free(un->sd_fi_fifo_xb[i],
26662 			    sizeof (struct sd_fi_xb));
26663 			un->sd_fi_fifo_xb[i] = NULL;
26664 		}
26665 		if (arg != NULL) {
26666 			un->sd_fi_fifo_xb[i] =
26667 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
26668 			if (un->sd_fi_fifo_xb[i] == NULL) {
26669 				/* Alloc failed don't store anything */
26670 				break;
26671 			}
26672 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
26673 			    sizeof (struct sd_fi_xb), 0);
26674 
26675 			if (rval == -1) {
26676 				kmem_free(un->sd_fi_fifo_xb[i],
26677 				    sizeof (struct sd_fi_xb));
26678 				un->sd_fi_fifo_xb[i] = NULL;
26679 			}
26680 		} else {
26681 			SD_INFO(SD_LOG_IOERR, un,
26682 			    "sd_faultinjection_ioctl: xb null\n");
26683 		}
26684 		break;
26685 
26686 	case SDIOCINSERTUN:
26687 		/* Store a un struct to be pushed onto fifo */
26688 		SD_INFO(SD_LOG_SDTEST, un,
26689 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
26690 
26691 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26692 
26693 		sd_fault_injection_on = 0;
26694 
26695 		if (un->sd_fi_fifo_un[i] != NULL) {
26696 			kmem_free(un->sd_fi_fifo_un[i],
26697 			    sizeof (struct sd_fi_un));
26698 			un->sd_fi_fifo_un[i] = NULL;
26699 		}
26700 		if (arg != NULL) {
26701 			un->sd_fi_fifo_un[i] =
26702 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
26703 			if (un->sd_fi_fifo_un[i] == NULL) {
26704 				/* Alloc failed don't store anything */
26705 				break;
26706 			}
26707 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
26708 			    sizeof (struct sd_fi_un), 0);
26709 			if (rval == -1) {
26710 				kmem_free(un->sd_fi_fifo_un[i],
26711 				    sizeof (struct sd_fi_un));
26712 				un->sd_fi_fifo_un[i] = NULL;
26713 			}
26714 
26715 		} else {
26716 			SD_INFO(SD_LOG_IOERR, un,
26717 			    "sd_faultinjection_ioctl: un null\n");
26718 		}
26719 
26720 		break;
26721 
26722 	case SDIOCINSERTARQ:
26723 		/* Store a arq struct to be pushed onto fifo */
26724 		SD_INFO(SD_LOG_SDTEST, un,
26725 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
26726 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26727 
26728 		sd_fault_injection_on = 0;
26729 
26730 		if (un->sd_fi_fifo_arq[i] != NULL) {
26731 			kmem_free(un->sd_fi_fifo_arq[i],
26732 			    sizeof (struct sd_fi_arq));
26733 			un->sd_fi_fifo_arq[i] = NULL;
26734 		}
26735 		if (arg != NULL) {
26736 			un->sd_fi_fifo_arq[i] =
26737 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
26738 			if (un->sd_fi_fifo_arq[i] == NULL) {
26739 				/* Alloc failed don't store anything */
26740 				break;
26741 			}
26742 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
26743 			    sizeof (struct sd_fi_arq), 0);
26744 			if (rval == -1) {
26745 				kmem_free(un->sd_fi_fifo_arq[i],
26746 				    sizeof (struct sd_fi_arq));
26747 				un->sd_fi_fifo_arq[i] = NULL;
26748 			}
26749 
26750 		} else {
26751 			SD_INFO(SD_LOG_IOERR, un,
26752 			    "sd_faultinjection_ioctl: arq null\n");
26753 		}
26754 
26755 		break;
26756 
26757 	case SDIOCPUSH:
26758 		/* Push stored xb, pkt, un, and arq onto fifo */
26759 		sd_fault_injection_on = 0;
26760 
26761 		if (arg != NULL) {
26762 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
26763 			if (rval != -1 &&
26764 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
26765 				un->sd_fi_fifo_end += i;
26766 			}
26767 		} else {
26768 			SD_INFO(SD_LOG_IOERR, un,
26769 			    "sd_faultinjection_ioctl: push arg null\n");
26770 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
26771 				un->sd_fi_fifo_end++;
26772 			}
26773 		}
26774 		SD_INFO(SD_LOG_IOERR, un,
26775 		    "sd_faultinjection_ioctl: push to end=%d\n",
26776 		    un->sd_fi_fifo_end);
26777 		break;
26778 
26779 	case SDIOCRETRIEVE:
26780 		/* Return buffer of log from Injection session */
26781 		SD_INFO(SD_LOG_SDTEST, un,
26782 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
26783 
26784 		sd_fault_injection_on = 0;
26785 
26786 		mutex_enter(&(un->un_fi_mutex));
26787 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
26788 		    un->sd_fi_buf_len+1, 0);
26789 		mutex_exit(&(un->un_fi_mutex));
26790 
26791 		if (rval == -1) {
26792 			/*
26793 			 * arg is possibly invalid setting
26794 			 * it to NULL for return
26795 			 */
26796 			arg = NULL;
26797 		}
26798 		break;
26799 	}
26800 
26801 	mutex_exit(SD_MUTEX(un));
26802 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
26803 			    " exit\n");
26804 }
26805 
26806 
26807 /*
26808  *    Function: sd_injection_log()
26809  *
26810  * Description: This routine adds buff to the already existing injection log
26811  *              for retrieval via faultinjection_ioctl for use in fault
26812  *              detection and recovery
26813  *
26814  *   Arguments: buf - the string to add to the log
26815  */
26816 
26817 static void
26818 sd_injection_log(char *buf, struct sd_lun *un)
26819 {
26820 	uint_t len;
26821 
26822 	ASSERT(un != NULL);
26823 	ASSERT(buf != NULL);
26824 
26825 	mutex_enter(&(un->un_fi_mutex));
26826 
26827 	len = min(strlen(buf), 255);
26828 	/* Add logged value to Injection log to be returned later */
26829 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
26830 		uint_t	offset = strlen((char *)un->sd_fi_log);
26831 		char *destp = (char *)un->sd_fi_log + offset;
26832 		int i;
26833 		for (i = 0; i < len; i++) {
26834 			*destp++ = *buf++;
26835 		}
26836 		un->sd_fi_buf_len += len;
26837 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
26838 	}
26839 
26840 	mutex_exit(&(un->un_fi_mutex));
26841 }
26842 
26843 
26844 /*
26845  *    Function: sd_faultinjection()
26846  *
26847  * Description: This routine takes the pkt and changes its
26848  *		content based on error injection scenerio.
26849  *
26850  *   Arguments: pktp	- packet to be changed
26851  */
26852 
26853 static void
26854 sd_faultinjection(struct scsi_pkt *pktp)
26855 {
26856 	uint_t i;
26857 	struct sd_fi_pkt *fi_pkt;
26858 	struct sd_fi_xb *fi_xb;
26859 	struct sd_fi_un *fi_un;
26860 	struct sd_fi_arq *fi_arq;
26861 	struct buf *bp;
26862 	struct sd_xbuf *xb;
26863 	struct sd_lun *un;
26864 
26865 	ASSERT(pktp != NULL);
26866 
26867 	/* pull bp xb and un from pktp */
26868 	bp = (struct buf *)pktp->pkt_private;
26869 	xb = SD_GET_XBUF(bp);
26870 	un = SD_GET_UN(bp);
26871 
26872 	ASSERT(un != NULL);
26873 
26874 	mutex_enter(SD_MUTEX(un));
26875 
26876 	SD_TRACE(SD_LOG_SDTEST, un,
26877 	    "sd_faultinjection: entry Injection from sdintr\n");
26878 
26879 	/* if injection is off return */
26880 	if (sd_fault_injection_on == 0 ||
26881 		un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
26882 		mutex_exit(SD_MUTEX(un));
26883 		return;
26884 	}
26885 
26886 
26887 	/* take next set off fifo */
26888 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
26889 
26890 	fi_pkt = un->sd_fi_fifo_pkt[i];
26891 	fi_xb = un->sd_fi_fifo_xb[i];
26892 	fi_un = un->sd_fi_fifo_un[i];
26893 	fi_arq = un->sd_fi_fifo_arq[i];
26894 
26895 
26896 	/* set variables accordingly */
26897 	/* set pkt if it was on fifo */
26898 	if (fi_pkt != NULL) {
26899 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
26900 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
26901 		SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
26902 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
26903 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
26904 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
26905 
26906 	}
26907 
26908 	/* set xb if it was on fifo */
26909 	if (fi_xb != NULL) {
26910 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
26911 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
26912 		SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
26913 		SD_CONDSET(xb, xb, xb_victim_retry_count,
26914 		    "xb_victim_retry_count");
26915 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
26916 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
26917 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
26918 
26919 		/* copy in block data from sense */
26920 		if (fi_xb->xb_sense_data[0] != -1) {
26921 			bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
26922 			    SENSE_LENGTH);
26923 		}
26924 
26925 		/* copy in extended sense codes */
26926 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_code,
26927 		    "es_code");
26928 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_key,
26929 		    "es_key");
26930 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_add_code,
26931 		    "es_add_code");
26932 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb,
26933 		    es_qual_code, "es_qual_code");
26934 	}
26935 
26936 	/* set un if it was on fifo */
26937 	if (fi_un != NULL) {
26938 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
26939 		SD_CONDSET(un, un, un_ctype, "un_ctype");
26940 		SD_CONDSET(un, un, un_reset_retry_count,
26941 		    "un_reset_retry_count");
26942 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
26943 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
26944 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
26945 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
26946 		    "un_f_allow_bus_device_reset");
26947 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
26948 
26949 	}
26950 
26951 	/* copy in auto request sense if it was on fifo */
26952 	if (fi_arq != NULL) {
26953 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
26954 	}
26955 
26956 	/* free structs */
26957 	if (un->sd_fi_fifo_pkt[i] != NULL) {
26958 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
26959 	}
26960 	if (un->sd_fi_fifo_xb[i] != NULL) {
26961 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
26962 	}
26963 	if (un->sd_fi_fifo_un[i] != NULL) {
26964 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
26965 	}
26966 	if (un->sd_fi_fifo_arq[i] != NULL) {
26967 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
26968 	}
26969 
26970 	/*
26971 	 * kmem_free does not gurantee to set to NULL
26972 	 * since we uses these to determine if we set
26973 	 * values or not lets confirm they are always
26974 	 * NULL after free
26975 	 */
26976 	un->sd_fi_fifo_pkt[i] = NULL;
26977 	un->sd_fi_fifo_un[i] = NULL;
26978 	un->sd_fi_fifo_xb[i] = NULL;
26979 	un->sd_fi_fifo_arq[i] = NULL;
26980 
26981 	un->sd_fi_fifo_start++;
26982 
26983 	mutex_exit(SD_MUTEX(un));
26984 
26985 	SD_TRACE(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
26986 }
26987 
26988 #endif /* SD_FAULT_INJECTION */
26989 
26990 /*
26991  * This routine is invoked in sd_unit_attach(). Before calling it, the
26992  * properties in conf file should be processed already, and "hotpluggable"
26993  * property was processed also.
26994  *
26995  * The sd driver distinguishes 3 different type of devices: removable media,
26996  * non-removable media, and hotpluggable. Below the differences are defined:
26997  *
26998  * 1. Device ID
26999  *
27000  *     The device ID of a device is used to identify this device. Refer to
27001  *     ddi_devid_register(9F).
27002  *
27003  *     For a non-removable media disk device which can provide 0x80 or 0x83
27004  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
27005  *     device ID is created to identify this device. For other non-removable
27006  *     media devices, a default device ID is created only if this device has
27007  *     at least 2 alter cylinders. Otherwise, this device has no devid.
27008  *
27009  *     -------------------------------------------------------
27010  *     removable media   hotpluggable  | Can Have Device ID
27011  *     -------------------------------------------------------
27012  *         false             false     |     Yes
27013  *         false             true      |     Yes
27014  *         true                x       |     No
27015  *     ------------------------------------------------------
27016  *
27017  *
27018  * 2. SCSI group 4 commands
27019  *
27020  *     In SCSI specs, only some commands in group 4 command set can use
27021  *     8-byte addresses that can be used to access >2TB storage spaces.
27022  *     Other commands have no such capability. Without supporting group4,
27023  *     it is impossible to make full use of storage spaces of a disk with
27024  *     capacity larger than 2TB.
27025  *
27026  *     -----------------------------------------------
27027  *     removable media   hotpluggable   LP64  |  Group
27028  *     -----------------------------------------------
27029  *           false          false       false |   1
27030  *           false          false       true  |   4
27031  *           false          true        false |   1
27032  *           false          true        true  |   4
27033  *           true             x           x   |   5
27034  *     -----------------------------------------------
27035  *
27036  *
27037  * 3. Check for VTOC Label
27038  *
27039  *     If a direct-access disk has no EFI label, sd will check if it has a
27040  *     valid VTOC label. Now, sd also does that check for removable media
27041  *     and hotpluggable devices.
27042  *
27043  *     --------------------------------------------------------------
27044  *     Direct-Access   removable media    hotpluggable |  Check Label
27045  *     -------------------------------------------------------------
27046  *         false          false           false        |   No
27047  *         false          false           true         |   No
27048  *         false          true            false        |   Yes
27049  *         false          true            true         |   Yes
27050  *         true            x                x          |   Yes
27051  *     --------------------------------------------------------------
27052  *
27053  *
27054  * 4. Building default VTOC label
27055  *
27056  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
27057  *     If those devices have no valid VTOC label, sd(7d) will attempt to
27058  *     create default VTOC for them. Currently sd creates default VTOC label
27059  *     for all devices on x86 platform (VTOC_16), but only for removable
27060  *     media devices on SPARC (VTOC_8).
27061  *
27062  *     -----------------------------------------------------------
27063  *       removable media hotpluggable platform   |   Default Label
27064  *     -----------------------------------------------------------
27065  *             false          false    sparc     |     No
27066  *             false          true      x86      |     Yes
27067  *             false          true     sparc     |     Yes
27068  *             true             x        x       |     Yes
27069  *     ----------------------------------------------------------
27070  *
27071  *
27072  * 5. Supported blocksizes of target devices
27073  *
27074  *     Sd supports non-512-byte blocksize for removable media devices only.
27075  *     For other devices, only 512-byte blocksize is supported. This may be
27076  *     changed in near future because some RAID devices require non-512-byte
27077  *     blocksize
27078  *
27079  *     -----------------------------------------------------------
27080  *     removable media    hotpluggable    | non-512-byte blocksize
27081  *     -----------------------------------------------------------
27082  *           false          false         |   No
27083  *           false          true          |   No
27084  *           true             x           |   Yes
27085  *     -----------------------------------------------------------
27086  *
27087  *
27088  * 6. Automatic mount & unmount
27089  *
27090  *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
27091  *     if a device is removable media device. It return 1 for removable media
27092  *     devices, and 0 for others.
27093  *
27094  *     The automatic mounting subsystem should distinguish between the types
27095  *     of devices and apply automounting policies to each.
27096  *
27097  *
27098  * 7. fdisk partition management
27099  *
27100  *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
27101  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
27102  *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
27103  *     fdisk partitions on both x86 and SPARC platform.
27104  *
27105  *     -----------------------------------------------------------
27106  *       platform   removable media  USB/1394  |  fdisk supported
27107  *     -----------------------------------------------------------
27108  *        x86         X               X        |       true
27109  *     ------------------------------------------------------------
27110  *        sparc       X               X        |       false
27111  *     ------------------------------------------------------------
27112  *
27113  *
27114  * 8. MBOOT/MBR
27115  *
27116  *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
27117  *     read/write mboot for removable media devices on sparc platform.
27118  *
27119  *     -----------------------------------------------------------
27120  *       platform   removable media  USB/1394  |  mboot supported
27121  *     -----------------------------------------------------------
27122  *        x86         X               X        |       true
27123  *     ------------------------------------------------------------
27124  *        sparc      false           false     |       false
27125  *        sparc      false           true      |       true
27126  *        sparc      true            false     |       true
27127  *        sparc      true            true      |       true
27128  *     ------------------------------------------------------------
27129  *
27130  *
27131  * 9.  error handling during opening device
27132  *
27133  *     If failed to open a disk device, an errno is returned. For some kinds
27134  *     of errors, different errno is returned depending on if this device is
27135  *     a removable media device. This brings USB/1394 hard disks in line with
27136  *     expected hard disk behavior. It is not expected that this breaks any
27137  *     application.
27138  *
27139  *     ------------------------------------------------------
27140  *       removable media    hotpluggable   |  errno
27141  *     ------------------------------------------------------
27142  *             false          false        |   EIO
27143  *             false          true         |   EIO
27144  *             true             x          |   ENXIO
27145  *     ------------------------------------------------------
27146  *
27147  *
27148  * 11. ioctls: DKIOCEJECT, CDROMEJECT
27149  *
27150  *     These IOCTLs are applicable only to removable media devices.
27151  *
27152  *     -----------------------------------------------------------
27153  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
27154  *     -----------------------------------------------------------
27155  *             false          false        |     No
27156  *             false          true         |     No
27157  *             true            x           |     Yes
27158  *     -----------------------------------------------------------
27159  *
27160  *
27161  * 12. Kstats for partitions
27162  *
27163  *     sd creates partition kstat for non-removable media devices. USB and
27164  *     Firewire hard disks now have partition kstats
27165  *
27166  *      ------------------------------------------------------
27167  *       removable media    hotplugable    |   kstat
27168  *      ------------------------------------------------------
27169  *             false          false        |    Yes
27170  *             false          true         |    Yes
27171  *             true             x          |    No
27172  *       ------------------------------------------------------
27173  *
27174  *
27175  * 13. Removable media & hotpluggable properties
27176  *
27177  *     Sd driver creates a "removable-media" property for removable media
27178  *     devices. Parent nexus drivers create a "hotpluggable" property if
27179  *     it supports hotplugging.
27180  *
27181  *     ---------------------------------------------------------------------
27182  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
27183  *     ---------------------------------------------------------------------
27184  *       false            false       |    No                   No
27185  *       false            true        |    No                   Yes
27186  *       true             false       |    Yes                  No
27187  *       true             true        |    Yes                  Yes
27188  *     ---------------------------------------------------------------------
27189  *
27190  *
27191  * 14. Power Management
27192  *
27193  *     sd only power manages removable media devices or devices that support
27194  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
27195  *
27196  *     A parent nexus that supports hotplugging can also set "pm-capable"
27197  *     if the disk can be power managed.
27198  *
27199  *     ------------------------------------------------------------
27200  *       removable media hotpluggable pm-capable  |   power manage
27201  *     ------------------------------------------------------------
27202  *             false          false     false     |     No
27203  *             false          false     true      |     Yes
27204  *             false          true      false     |     No
27205  *             false          true      true      |     Yes
27206  *             true             x        x        |     Yes
27207  *     ------------------------------------------------------------
27208  *
27209  *      USB and firewire hard disks can now be power managed independently
27210  *      of the framebuffer
27211  *
27212  *
27213  * 15. Support for USB disks with capacity larger than 1TB
27214  *
27215  *     Currently, sd doesn't permit a fixed disk device with capacity
27216  *     larger than 1TB to be used in a 32-bit operating system environment.
27217  *     However, sd doesn't do that for removable media devices. Instead, it
27218  *     assumes that removable media devices cannot have a capacity larger
27219  *     than 1TB. Therefore, using those devices on 32-bit system is partially
27220  *     supported, which can cause some unexpected results.
27221  *
27222  *     ---------------------------------------------------------------------
27223  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
27224  *     ---------------------------------------------------------------------
27225  *             false          false  |   true         |     no
27226  *             false          true   |   true         |     no
27227  *             true           false  |   true         |     Yes
27228  *             true           true   |   true         |     Yes
27229  *     ---------------------------------------------------------------------
27230  *
27231  *
27232  * 16. Check write-protection at open time
27233  *
27234  *     When a removable media device is being opened for writing without NDELAY
27235  *     flag, sd will check if this device is writable. If attempting to open
27236  *     without NDELAY flag a write-protected device, this operation will abort.
27237  *
27238  *     ------------------------------------------------------------
27239  *       removable media    USB/1394   |   WP Check
27240  *     ------------------------------------------------------------
27241  *             false          false    |     No
27242  *             false          true     |     No
27243  *             true           false    |     Yes
27244  *             true           true     |     Yes
27245  *     ------------------------------------------------------------
27246  *
27247  *
27248  * 17. syslog when corrupted VTOC is encountered
27249  *
27250  *      Currently, if an invalid VTOC is encountered, sd only print syslog
27251  *      for fixed SCSI disks.
27252  *     ------------------------------------------------------------
27253  *       removable media    USB/1394   |   print syslog
27254  *     ------------------------------------------------------------
27255  *             false          false    |     Yes
27256  *             false          true     |     No
27257  *             true           false    |     No
27258  *             true           true     |     No
27259  *     ------------------------------------------------------------
27260  */
27261 static void
27262 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
27263 {
27264 	int	pm_capable_prop;
27265 
27266 	ASSERT(un->un_sd);
27267 	ASSERT(un->un_sd->sd_inq);
27268 
27269 	/*
27270 	 * Enable SYNC CACHE support for all devices.
27271 	 */
27272 	un->un_f_sync_cache_supported = TRUE;
27273 
27274 	if (un->un_sd->sd_inq->inq_rmb) {
27275 		/*
27276 		 * The media of this device is removable. And for this kind
27277 		 * of devices, it is possible to change medium after opening
27278 		 * devices. Thus we should support this operation.
27279 		 */
27280 		un->un_f_has_removable_media = TRUE;
27281 
27282 		/*
27283 		 * support non-512-byte blocksize of removable media devices
27284 		 */
27285 		un->un_f_non_devbsize_supported = TRUE;
27286 
27287 		/*
27288 		 * Assume that all removable media devices support DOOR_LOCK
27289 		 */
27290 		un->un_f_doorlock_supported = TRUE;
27291 
27292 		/*
27293 		 * For a removable media device, it is possible to be opened
27294 		 * with NDELAY flag when there is no media in drive, in this
27295 		 * case we don't care if device is writable. But if without
27296 		 * NDELAY flag, we need to check if media is write-protected.
27297 		 */
27298 		un->un_f_chk_wp_open = TRUE;
27299 
27300 		/*
27301 		 * need to start a SCSI watch thread to monitor media state,
27302 		 * when media is being inserted or ejected, notify syseventd.
27303 		 */
27304 		un->un_f_monitor_media_state = TRUE;
27305 
27306 		/*
27307 		 * Some devices don't support START_STOP_UNIT command.
27308 		 * Therefore, we'd better check if a device supports it
27309 		 * before sending it.
27310 		 */
27311 		un->un_f_check_start_stop = TRUE;
27312 
27313 		/*
27314 		 * support eject media ioctl:
27315 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
27316 		 */
27317 		un->un_f_eject_media_supported = TRUE;
27318 
27319 		/*
27320 		 * Because many removable-media devices don't support
27321 		 * LOG_SENSE, we couldn't use this command to check if
27322 		 * a removable media device support power-management.
27323 		 * We assume that they support power-management via
27324 		 * START_STOP_UNIT command and can be spun up and down
27325 		 * without limitations.
27326 		 */
27327 		un->un_f_pm_supported = TRUE;
27328 
27329 		/*
27330 		 * Need to create a zero length (Boolean) property
27331 		 * removable-media for the removable media devices.
27332 		 * Note that the return value of the property is not being
27333 		 * checked, since if unable to create the property
27334 		 * then do not want the attach to fail altogether. Consistent
27335 		 * with other property creation in attach.
27336 		 */
27337 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
27338 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
27339 
27340 	} else {
27341 		/*
27342 		 * create device ID for device
27343 		 */
27344 		un->un_f_devid_supported = TRUE;
27345 
27346 		/*
27347 		 * Spin up non-removable-media devices once it is attached
27348 		 */
27349 		un->un_f_attach_spinup = TRUE;
27350 
27351 		/*
27352 		 * According to SCSI specification, Sense data has two kinds of
27353 		 * format: fixed format, and descriptor format. At present, we
27354 		 * don't support descriptor format sense data for removable
27355 		 * media.
27356 		 */
27357 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
27358 			un->un_f_descr_format_supported = TRUE;
27359 		}
27360 
27361 		/*
27362 		 * kstats are created only for non-removable media devices.
27363 		 *
27364 		 * Set this in sd.conf to 0 in order to disable kstats.  The
27365 		 * default is 1, so they are enabled by default.
27366 		 */
27367 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
27368 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
27369 			"enable-partition-kstats", 1));
27370 
27371 		/*
27372 		 * Check if HBA has set the "pm-capable" property.
27373 		 * If "pm-capable" exists and is non-zero then we can
27374 		 * power manage the device without checking the start/stop
27375 		 * cycle count log sense page.
27376 		 *
27377 		 * If "pm-capable" exists and is SD_PM_CAPABLE_FALSE (0)
27378 		 * then we should not power manage the device.
27379 		 *
27380 		 * If "pm-capable" doesn't exist then pm_capable_prop will
27381 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
27382 		 * sd will check the start/stop cycle count log sense page
27383 		 * and power manage the device if the cycle count limit has
27384 		 * not been exceeded.
27385 		 */
27386 		pm_capable_prop = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
27387 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
27388 		if (pm_capable_prop == SD_PM_CAPABLE_UNDEFINED) {
27389 			un->un_f_log_sense_supported = TRUE;
27390 		} else {
27391 			/*
27392 			 * pm-capable property exists.
27393 			 *
27394 			 * Convert "TRUE" values for pm_capable_prop to
27395 			 * SD_PM_CAPABLE_TRUE (1) to make it easier to check
27396 			 * later. "TRUE" values are any values except
27397 			 * SD_PM_CAPABLE_FALSE (0) and
27398 			 * SD_PM_CAPABLE_UNDEFINED (-1)
27399 			 */
27400 			if (pm_capable_prop == SD_PM_CAPABLE_FALSE) {
27401 				un->un_f_log_sense_supported = FALSE;
27402 			} else {
27403 				un->un_f_pm_supported = TRUE;
27404 			}
27405 
27406 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
27407 			    "sd_unit_attach: un:0x%p pm-capable "
27408 			    "property set to %d.\n", un, un->un_f_pm_supported);
27409 		}
27410 	}
27411 
27412 	if (un->un_f_is_hotpluggable) {
27413 
27414 		/*
27415 		 * Have to watch hotpluggable devices as well, since
27416 		 * that's the only way for userland applications to
27417 		 * detect hot removal while device is busy/mounted.
27418 		 */
27419 		un->un_f_monitor_media_state = TRUE;
27420 
27421 		un->un_f_check_start_stop = TRUE;
27422 
27423 	}
27424 }
27425 
27426 /*
27427  * sd_tg_rdwr:
27428  * Provides rdwr access for cmlb via sd_tgops. The start_block is
27429  * in sys block size, req_length in bytes.
27430  *
27431  */
27432 static int
27433 sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
27434     diskaddr_t start_block, size_t reqlength, void *tg_cookie)
27435 {
27436 	struct sd_lun *un;
27437 	int path_flag = (int)(uintptr_t)tg_cookie;
27438 	char *dkl = NULL;
27439 	diskaddr_t real_addr = start_block;
27440 	diskaddr_t first_byte, end_block;
27441 
27442 	size_t	buffer_size = reqlength;
27443 	int rval;
27444 	diskaddr_t	cap;
27445 	uint32_t	lbasize;
27446 
27447 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
27448 	if (un == NULL)
27449 		return (ENXIO);
27450 
27451 	if (cmd != TG_READ && cmd != TG_WRITE)
27452 		return (EINVAL);
27453 
27454 	mutex_enter(SD_MUTEX(un));
27455 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
27456 		mutex_exit(SD_MUTEX(un));
27457 		rval = sd_send_scsi_READ_CAPACITY(un, (uint64_t *)&cap,
27458 		    &lbasize, path_flag);
27459 		if (rval != 0)
27460 			return (rval);
27461 		mutex_enter(SD_MUTEX(un));
27462 		sd_update_block_info(un, lbasize, cap);
27463 		if ((un->un_f_tgt_blocksize_is_valid == FALSE)) {
27464 			mutex_exit(SD_MUTEX(un));
27465 			return (EIO);
27466 		}
27467 	}
27468 
27469 	if (NOT_DEVBSIZE(un)) {
27470 		/*
27471 		 * sys_blocksize != tgt_blocksize, need to re-adjust
27472 		 * blkno and save the index to beginning of dk_label
27473 		 */
27474 		first_byte  = SD_SYSBLOCKS2BYTES(un, start_block);
27475 		real_addr = first_byte / un->un_tgt_blocksize;
27476 
27477 		end_block = (first_byte + reqlength +
27478 		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
27479 
27480 		/* round up buffer size to multiple of target block size */
27481 		buffer_size = (end_block - real_addr) * un->un_tgt_blocksize;
27482 
27483 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_tg_rdwr",
27484 		    "label_addr: 0x%x allocation size: 0x%x\n",
27485 		    real_addr, buffer_size);
27486 
27487 		if (((first_byte % un->un_tgt_blocksize) != 0) ||
27488 		    (reqlength % un->un_tgt_blocksize) != 0)
27489 			/* the request is not aligned */
27490 			dkl = kmem_zalloc(buffer_size, KM_SLEEP);
27491 	}
27492 
27493 	/*
27494 	 * The MMC standard allows READ CAPACITY to be
27495 	 * inaccurate by a bounded amount (in the interest of
27496 	 * response latency).  As a result, failed READs are
27497 	 * commonplace (due to the reading of metadata and not
27498 	 * data). Depending on the per-Vendor/drive Sense data,
27499 	 * the failed READ can cause many (unnecessary) retries.
27500 	 */
27501 
27502 	if (ISCD(un) && (cmd == TG_READ) &&
27503 	    (un->un_f_blockcount_is_valid == TRUE) &&
27504 	    ((start_block == (un->un_blockcount - 1))||
27505 	    (start_block == (un->un_blockcount - 2)))) {
27506 			path_flag = SD_PATH_DIRECT_PRIORITY;
27507 	}
27508 
27509 	mutex_exit(SD_MUTEX(un));
27510 	if (cmd == TG_READ) {
27511 		rval = sd_send_scsi_READ(un, (dkl != NULL)? dkl: bufaddr,
27512 		    buffer_size, real_addr, path_flag);
27513 		if (dkl != NULL)
27514 			bcopy(dkl + SD_TGTBYTEOFFSET(un, start_block,
27515 			    real_addr), bufaddr, reqlength);
27516 	} else {
27517 		if (dkl) {
27518 			rval = sd_send_scsi_READ(un, dkl, buffer_size,
27519 			    real_addr, path_flag);
27520 			if (rval) {
27521 				kmem_free(dkl, buffer_size);
27522 				return (rval);
27523 			}
27524 			bcopy(bufaddr, dkl + SD_TGTBYTEOFFSET(un, start_block,
27525 			    real_addr), reqlength);
27526 		}
27527 		rval = sd_send_scsi_WRITE(un, (dkl != NULL)? dkl: bufaddr,
27528 		    buffer_size, real_addr, path_flag);
27529 	}
27530 
27531 	if (dkl != NULL)
27532 		kmem_free(dkl, buffer_size);
27533 
27534 	return (rval);
27535 }
27536 
27537 
27538 static int
27539 sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie)
27540 {
27541 
27542 	struct sd_lun *un;
27543 	diskaddr_t	cap;
27544 	uint32_t	lbasize;
27545 	int		path_flag = (int)(uintptr_t)tg_cookie;
27546 	int		ret = 0;
27547 
27548 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
27549 	if (un == NULL)
27550 		return (ENXIO);
27551 
27552 	switch (cmd) {
27553 	case TG_GETPHYGEOM:
27554 	case TG_GETVIRTGEOM:
27555 	case TG_GETCAPACITY:
27556 	case  TG_GETBLOCKSIZE:
27557 		mutex_enter(SD_MUTEX(un));
27558 
27559 		if ((un->un_f_blockcount_is_valid == TRUE) &&
27560 		    (un->un_f_tgt_blocksize_is_valid == TRUE)) {
27561 			cap = un->un_blockcount;
27562 			lbasize = un->un_tgt_blocksize;
27563 			mutex_exit(SD_MUTEX(un));
27564 		} else {
27565 			mutex_exit(SD_MUTEX(un));
27566 			ret = sd_send_scsi_READ_CAPACITY(un, (uint64_t *)&cap,
27567 			    &lbasize, path_flag);
27568 			if (ret != 0)
27569 				return (ret);
27570 			mutex_enter(SD_MUTEX(un));
27571 			sd_update_block_info(un, lbasize, cap);
27572 			if ((un->un_f_blockcount_is_valid == FALSE) ||
27573 			    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
27574 				mutex_exit(SD_MUTEX(un));
27575 				return (EIO);
27576 			}
27577 			mutex_exit(SD_MUTEX(un));
27578 		}
27579 
27580 		if (cmd == TG_GETCAPACITY) {
27581 			*(diskaddr_t *)arg = cap;
27582 			return (0);
27583 		}
27584 
27585 		if (cmd == TG_GETBLOCKSIZE) {
27586 			*(uint32_t *)arg = lbasize;
27587 			return (0);
27588 		}
27589 
27590 		if (cmd == TG_GETPHYGEOM)
27591 			ret = sd_get_physical_geometry(un, (cmlb_geom_t *)arg,
27592 			    cap, lbasize, path_flag);
27593 		else
27594 			/* TG_GETVIRTGEOM */
27595 			ret = sd_get_virtual_geometry(un,
27596 			    (cmlb_geom_t *)arg, cap, lbasize);
27597 
27598 		return (ret);
27599 
27600 	case TG_GETATTR:
27601 		mutex_enter(SD_MUTEX(un));
27602 		((tg_attribute_t *)arg)->media_is_writable =
27603 		    un->un_f_mmc_writable_media;
27604 		mutex_exit(SD_MUTEX(un));
27605 		return (0);
27606 	default:
27607 		return (ENOTTY);
27608 
27609 	}
27610 
27611 }
27612