xref: /titanic_50/usr/src/uts/common/io/scsi/targets/sd.c (revision 573374d31f6a3da992a4aa77ca650864b49d7a76)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * SCSI disk target driver.
30  */
31 #include <sys/scsi/scsi.h>
32 #include <sys/dkbad.h>
33 #include <sys/dklabel.h>
34 #include <sys/dkio.h>
35 #include <sys/fdio.h>
36 #include <sys/cdio.h>
37 #include <sys/mhd.h>
38 #include <sys/vtoc.h>
39 #include <sys/dktp/fdisk.h>
40 #include <sys/kstat.h>
41 #include <sys/vtrace.h>
42 #include <sys/note.h>
43 #include <sys/thread.h>
44 #include <sys/proc.h>
45 #include <sys/efi_partition.h>
46 #include <sys/var.h>
47 #include <sys/aio_req.h>
48 
49 #ifdef __lock_lint
50 #define	_LP64
51 #define	__amd64
52 #endif
53 
54 #if (defined(__fibre))
55 /* Note: is there a leadville version of the following? */
56 #include <sys/fc4/fcal_linkapp.h>
57 #endif
58 #include <sys/taskq.h>
59 #include <sys/uuid.h>
60 #include <sys/byteorder.h>
61 #include <sys/sdt.h>
62 
63 #include "sd_xbuf.h"
64 
65 #include <sys/scsi/targets/sddef.h>
66 #include <sys/cmlb.h>
67 
68 
69 /*
70  * Loadable module info.
71  */
72 #if (defined(__fibre))
73 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver %I%"
74 char _depends_on[]	= "misc/scsi misc/cmlb drv/fcp";
75 #else
76 #define	SD_MODULE_NAME	"SCSI Disk Driver %I%"
77 char _depends_on[]	= "misc/scsi misc/cmlb";
78 #endif
79 
80 /*
81  * Define the interconnect type, to allow the driver to distinguish
82  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
83  *
84  * This is really for backward compatability. In the future, the driver
85  * should actually check the "interconnect-type" property as reported by
86  * the HBA; however at present this property is not defined by all HBAs,
87  * so we will use this #define (1) to permit the driver to run in
88  * backward-compatability mode; and (2) to print a notification message
89  * if an FC HBA does not support the "interconnect-type" property.  The
90  * behavior of the driver will be to assume parallel SCSI behaviors unless
91  * the "interconnect-type" property is defined by the HBA **AND** has a
92  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
93  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
94  * Channel behaviors (as per the old ssd).  (Note that the
95  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
96  * will result in the driver assuming parallel SCSI behaviors.)
97  *
98  * (see common/sys/scsi/impl/services.h)
99  *
100  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
101  * since some FC HBAs may already support that, and there is some code in
102  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
103  * default would confuse that code, and besides things should work fine
104  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
105  * "interconnect_type" property.
106  *
107  */
108 #if (defined(__fibre))
109 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
110 #else
111 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
112 #endif
113 
114 /*
115  * The name of the driver, established from the module name in _init.
116  */
117 static	char *sd_label			= NULL;
118 
119 /*
120  * Driver name is unfortunately prefixed on some driver.conf properties.
121  */
122 #if (defined(__fibre))
123 #define	sd_max_xfer_size		ssd_max_xfer_size
124 #define	sd_config_list			ssd_config_list
125 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
126 static	char *sd_config_list		= "ssd-config-list";
127 #else
128 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
129 static	char *sd_config_list		= "sd-config-list";
130 #endif
131 
132 /*
133  * Driver global variables
134  */
135 
136 #if (defined(__fibre))
137 /*
138  * These #defines are to avoid namespace collisions that occur because this
139  * code is currently used to compile two seperate driver modules: sd and ssd.
140  * All global variables need to be treated this way (even if declared static)
141  * in order to allow the debugger to resolve the names properly.
142  * It is anticipated that in the near future the ssd module will be obsoleted,
143  * at which time this namespace issue should go away.
144  */
145 #define	sd_state			ssd_state
146 #define	sd_io_time			ssd_io_time
147 #define	sd_failfast_enable		ssd_failfast_enable
148 #define	sd_ua_retry_count		ssd_ua_retry_count
149 #define	sd_report_pfa			ssd_report_pfa
150 #define	sd_max_throttle			ssd_max_throttle
151 #define	sd_min_throttle			ssd_min_throttle
152 #define	sd_rot_delay			ssd_rot_delay
153 
154 #define	sd_retry_on_reservation_conflict	\
155 					ssd_retry_on_reservation_conflict
156 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
157 #define	sd_resv_conflict_name		ssd_resv_conflict_name
158 
159 #define	sd_component_mask		ssd_component_mask
160 #define	sd_level_mask			ssd_level_mask
161 #define	sd_debug_un			ssd_debug_un
162 #define	sd_error_level			ssd_error_level
163 
164 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
165 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
166 
167 #define	sd_tr				ssd_tr
168 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
169 #define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
170 #define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
171 #define	sd_check_media_time		ssd_check_media_time
172 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
173 #define	sd_label_mutex			ssd_label_mutex
174 #define	sd_detach_mutex			ssd_detach_mutex
175 #define	sd_log_buf			ssd_log_buf
176 #define	sd_log_mutex			ssd_log_mutex
177 
178 #define	sd_disk_table			ssd_disk_table
179 #define	sd_disk_table_size		ssd_disk_table_size
180 #define	sd_sense_mutex			ssd_sense_mutex
181 #define	sd_cdbtab			ssd_cdbtab
182 
183 #define	sd_cb_ops			ssd_cb_ops
184 #define	sd_ops				ssd_ops
185 #define	sd_additional_codes		ssd_additional_codes
186 #define	sd_tgops			ssd_tgops
187 
188 #define	sd_minor_data			ssd_minor_data
189 #define	sd_minor_data_efi		ssd_minor_data_efi
190 
191 #define	sd_tq				ssd_tq
192 #define	sd_wmr_tq			ssd_wmr_tq
193 #define	sd_taskq_name			ssd_taskq_name
194 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
195 #define	sd_taskq_minalloc		ssd_taskq_minalloc
196 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
197 
198 #define	sd_dump_format_string		ssd_dump_format_string
199 
200 #define	sd_iostart_chain		ssd_iostart_chain
201 #define	sd_iodone_chain			ssd_iodone_chain
202 
203 #define	sd_pm_idletime			ssd_pm_idletime
204 
205 #define	sd_force_pm_supported		ssd_force_pm_supported
206 
207 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
208 
209 #endif
210 
211 
212 #ifdef	SDDEBUG
213 int	sd_force_pm_supported		= 0;
214 #endif	/* SDDEBUG */
215 
216 void *sd_state				= NULL;
217 int sd_io_time				= SD_IO_TIME;
218 int sd_failfast_enable			= 1;
219 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
220 int sd_report_pfa			= 1;
221 int sd_max_throttle			= SD_MAX_THROTTLE;
222 int sd_min_throttle			= SD_MIN_THROTTLE;
223 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
224 int sd_qfull_throttle_enable		= TRUE;
225 
226 int sd_retry_on_reservation_conflict	= 1;
227 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
228 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
229 
230 static int sd_dtype_optical_bind	= -1;
231 
232 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
233 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
234 
235 /*
236  * Global data for debug logging. To enable debug printing, sd_component_mask
237  * and sd_level_mask should be set to the desired bit patterns as outlined in
238  * sddef.h.
239  */
240 uint_t	sd_component_mask		= 0x0;
241 uint_t	sd_level_mask			= 0x0;
242 struct	sd_lun *sd_debug_un		= NULL;
243 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
244 
245 /* Note: these may go away in the future... */
246 static uint32_t	sd_xbuf_active_limit	= 512;
247 static uint32_t sd_xbuf_reserve_limit	= 16;
248 
249 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
250 
251 /*
252  * Timer value used to reset the throttle after it has been reduced
253  * (typically in response to TRAN_BUSY or STATUS_QFULL)
254  */
255 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
256 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
257 
258 /*
259  * Interval value associated with the media change scsi watch.
260  */
261 static int sd_check_media_time		= 3000000;
262 
263 /*
264  * Wait value used for in progress operations during a DDI_SUSPEND
265  */
266 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
267 
268 /*
269  * sd_label_mutex protects a static buffer used in the disk label
270  * component of the driver
271  */
272 static kmutex_t sd_label_mutex;
273 
274 /*
275  * sd_detach_mutex protects un_layer_count, un_detach_count, and
276  * un_opens_in_progress in the sd_lun structure.
277  */
278 static kmutex_t sd_detach_mutex;
279 
280 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
281 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
282 
283 /*
284  * Global buffer and mutex for debug logging
285  */
286 static char	sd_log_buf[1024];
287 static kmutex_t	sd_log_mutex;
288 
289 /*
290  * Structs and globals for recording attached lun information.
291  * This maintains a chain. Each node in the chain represents a SCSI controller.
292  * The structure records the number of luns attached to each target connected
293  * with the controller.
294  * For parallel scsi device only.
295  */
296 struct sd_scsi_hba_tgt_lun {
297 	struct sd_scsi_hba_tgt_lun	*next;
298 	dev_info_t			*pdip;
299 	int				nlun[NTARGETS_WIDE];
300 };
301 
302 /*
303  * Flag to indicate the lun is attached or detached
304  */
305 #define	SD_SCSI_LUN_ATTACH	0
306 #define	SD_SCSI_LUN_DETACH	1
307 
308 static kmutex_t	sd_scsi_target_lun_mutex;
309 static struct sd_scsi_hba_tgt_lun	*sd_scsi_target_lun_head = NULL;
310 
311 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
312     sd_scsi_hba_tgt_lun::next sd_scsi_hba_tgt_lun::pdip))
313 
314 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
315     sd_scsi_target_lun_head))
316 
317 /*
318  * "Smart" Probe Caching structs, globals, #defines, etc.
319  * For parallel scsi and non-self-identify device only.
320  */
321 
322 /*
323  * The following resources and routines are implemented to support
324  * "smart" probing, which caches the scsi_probe() results in an array,
325  * in order to help avoid long probe times.
326  */
327 struct sd_scsi_probe_cache {
328 	struct	sd_scsi_probe_cache	*next;
329 	dev_info_t	*pdip;
330 	int		cache[NTARGETS_WIDE];
331 };
332 
333 static kmutex_t	sd_scsi_probe_cache_mutex;
334 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
335 
336 /*
337  * Really we only need protection on the head of the linked list, but
338  * better safe than sorry.
339  */
340 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
341     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
342 
343 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
344     sd_scsi_probe_cache_head))
345 
346 
347 /*
348  * Vendor specific data name property declarations
349  */
350 
351 #if defined(__fibre) || defined(__i386) ||defined(__amd64)
352 
353 static sd_tunables seagate_properties = {
354 	SEAGATE_THROTTLE_VALUE,
355 	0,
356 	0,
357 	0,
358 	0,
359 	0,
360 	0,
361 	0,
362 	0
363 };
364 
365 
366 static sd_tunables fujitsu_properties = {
367 	FUJITSU_THROTTLE_VALUE,
368 	0,
369 	0,
370 	0,
371 	0,
372 	0,
373 	0,
374 	0,
375 	0
376 };
377 
378 static sd_tunables ibm_properties = {
379 	IBM_THROTTLE_VALUE,
380 	0,
381 	0,
382 	0,
383 	0,
384 	0,
385 	0,
386 	0,
387 	0
388 };
389 
390 static sd_tunables purple_properties = {
391 	PURPLE_THROTTLE_VALUE,
392 	0,
393 	0,
394 	PURPLE_BUSY_RETRIES,
395 	PURPLE_RESET_RETRY_COUNT,
396 	PURPLE_RESERVE_RELEASE_TIME,
397 	0,
398 	0,
399 	0
400 };
401 
402 static sd_tunables sve_properties = {
403 	SVE_THROTTLE_VALUE,
404 	0,
405 	0,
406 	SVE_BUSY_RETRIES,
407 	SVE_RESET_RETRY_COUNT,
408 	SVE_RESERVE_RELEASE_TIME,
409 	SVE_MIN_THROTTLE_VALUE,
410 	SVE_DISKSORT_DISABLED_FLAG,
411 	0
412 };
413 
414 static sd_tunables maserati_properties = {
415 	0,
416 	0,
417 	0,
418 	0,
419 	0,
420 	0,
421 	0,
422 	MASERATI_DISKSORT_DISABLED_FLAG,
423 	MASERATI_LUN_RESET_ENABLED_FLAG
424 };
425 
426 static sd_tunables pirus_properties = {
427 	PIRUS_THROTTLE_VALUE,
428 	0,
429 	PIRUS_NRR_COUNT,
430 	PIRUS_BUSY_RETRIES,
431 	PIRUS_RESET_RETRY_COUNT,
432 	0,
433 	PIRUS_MIN_THROTTLE_VALUE,
434 	PIRUS_DISKSORT_DISABLED_FLAG,
435 	PIRUS_LUN_RESET_ENABLED_FLAG
436 };
437 
438 #endif
439 
440 #if (defined(__sparc) && !defined(__fibre)) || \
441 	(defined(__i386) || defined(__amd64))
442 
443 
444 static sd_tunables elite_properties = {
445 	ELITE_THROTTLE_VALUE,
446 	0,
447 	0,
448 	0,
449 	0,
450 	0,
451 	0,
452 	0,
453 	0
454 };
455 
456 static sd_tunables st31200n_properties = {
457 	ST31200N_THROTTLE_VALUE,
458 	0,
459 	0,
460 	0,
461 	0,
462 	0,
463 	0,
464 	0,
465 	0
466 };
467 
468 #endif /* Fibre or not */
469 
470 static sd_tunables lsi_properties_scsi = {
471 	LSI_THROTTLE_VALUE,
472 	0,
473 	LSI_NOTREADY_RETRIES,
474 	0,
475 	0,
476 	0,
477 	0,
478 	0,
479 	0
480 };
481 
482 static sd_tunables symbios_properties = {
483 	SYMBIOS_THROTTLE_VALUE,
484 	0,
485 	SYMBIOS_NOTREADY_RETRIES,
486 	0,
487 	0,
488 	0,
489 	0,
490 	0,
491 	0
492 };
493 
494 static sd_tunables lsi_properties = {
495 	0,
496 	0,
497 	LSI_NOTREADY_RETRIES,
498 	0,
499 	0,
500 	0,
501 	0,
502 	0,
503 	0
504 };
505 
506 static sd_tunables lsi_oem_properties = {
507 	0,
508 	0,
509 	LSI_OEM_NOTREADY_RETRIES,
510 	0,
511 	0,
512 	0,
513 	0,
514 	0,
515 	0
516 };
517 
518 
519 
520 #if (defined(SD_PROP_TST))
521 
522 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
523 #define	SD_TST_THROTTLE_VAL	16
524 #define	SD_TST_NOTREADY_VAL	12
525 #define	SD_TST_BUSY_VAL		60
526 #define	SD_TST_RST_RETRY_VAL	36
527 #define	SD_TST_RSV_REL_TIME	60
528 
529 static sd_tunables tst_properties = {
530 	SD_TST_THROTTLE_VAL,
531 	SD_TST_CTYPE_VAL,
532 	SD_TST_NOTREADY_VAL,
533 	SD_TST_BUSY_VAL,
534 	SD_TST_RST_RETRY_VAL,
535 	SD_TST_RSV_REL_TIME,
536 	0,
537 	0,
538 	0
539 };
540 #endif
541 
542 /* This is similiar to the ANSI toupper implementation */
543 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
544 
545 /*
546  * Static Driver Configuration Table
547  *
548  * This is the table of disks which need throttle adjustment (or, perhaps
549  * something else as defined by the flags at a future time.)  device_id
550  * is a string consisting of concatenated vid (vendor), pid (product/model)
551  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
552  * the parts of the string are as defined by the sizes in the scsi_inquiry
553  * structure.  Device type is searched as far as the device_id string is
554  * defined.  Flags defines which values are to be set in the driver from the
555  * properties list.
556  *
557  * Entries below which begin and end with a "*" are a special case.
558  * These do not have a specific vendor, and the string which follows
559  * can appear anywhere in the 16 byte PID portion of the inquiry data.
560  *
561  * Entries below which begin and end with a " " (blank) are a special
562  * case. The comparison function will treat multiple consecutive blanks
563  * as equivalent to a single blank. For example, this causes a
564  * sd_disk_table entry of " NEC CDROM " to match a device's id string
565  * of  "NEC       CDROM".
566  *
567  * Note: The MD21 controller type has been obsoleted.
568  *	 ST318202F is a Legacy device
569  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
570  *	 made with an FC connection. The entries here are a legacy.
571  */
572 static sd_disk_config_t sd_disk_table[] = {
573 #if defined(__fibre) || defined(__i386) || defined(__amd64)
574 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
575 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
576 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
577 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
578 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
579 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
580 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
581 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
582 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
583 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
584 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
585 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
586 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
587 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
588 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
589 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
590 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
591 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
592 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
593 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
594 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
595 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
596 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
597 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
598 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
599 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
600 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
601 	{ "IBM     1724-100",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
602 	{ "IBM     1726-2xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
603 	{ "IBM     1726-4xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
604 	{ "IBM     1726-3xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
605 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
606 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
607 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
608 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
609 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
610 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
611 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
612 	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
613 	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
614 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
615 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
616 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
617 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
618 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
619 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
620 	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
621 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
622 	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
623 			SD_CONF_BSET_BSY_RETRY_COUNT|
624 			SD_CONF_BSET_RST_RETRIES|
625 			SD_CONF_BSET_RSV_REL_TIME,
626 		&purple_properties },
627 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
628 		SD_CONF_BSET_BSY_RETRY_COUNT|
629 		SD_CONF_BSET_RST_RETRIES|
630 		SD_CONF_BSET_RSV_REL_TIME|
631 		SD_CONF_BSET_MIN_THROTTLE|
632 		SD_CONF_BSET_DISKSORT_DISABLED,
633 		&sve_properties },
634 	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
635 			SD_CONF_BSET_BSY_RETRY_COUNT|
636 			SD_CONF_BSET_RST_RETRIES|
637 			SD_CONF_BSET_RSV_REL_TIME,
638 		&purple_properties },
639 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
640 		SD_CONF_BSET_LUN_RESET_ENABLED,
641 		&maserati_properties },
642 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
643 		SD_CONF_BSET_NRR_COUNT|
644 		SD_CONF_BSET_BSY_RETRY_COUNT|
645 		SD_CONF_BSET_RST_RETRIES|
646 		SD_CONF_BSET_MIN_THROTTLE|
647 		SD_CONF_BSET_DISKSORT_DISABLED|
648 		SD_CONF_BSET_LUN_RESET_ENABLED,
649 		&pirus_properties },
650 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
651 		SD_CONF_BSET_NRR_COUNT|
652 		SD_CONF_BSET_BSY_RETRY_COUNT|
653 		SD_CONF_BSET_RST_RETRIES|
654 		SD_CONF_BSET_MIN_THROTTLE|
655 		SD_CONF_BSET_DISKSORT_DISABLED|
656 		SD_CONF_BSET_LUN_RESET_ENABLED,
657 		&pirus_properties },
658 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
659 		SD_CONF_BSET_NRR_COUNT|
660 		SD_CONF_BSET_BSY_RETRY_COUNT|
661 		SD_CONF_BSET_RST_RETRIES|
662 		SD_CONF_BSET_MIN_THROTTLE|
663 		SD_CONF_BSET_DISKSORT_DISABLED|
664 		SD_CONF_BSET_LUN_RESET_ENABLED,
665 		&pirus_properties },
666 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
667 		SD_CONF_BSET_NRR_COUNT|
668 		SD_CONF_BSET_BSY_RETRY_COUNT|
669 		SD_CONF_BSET_RST_RETRIES|
670 		SD_CONF_BSET_MIN_THROTTLE|
671 		SD_CONF_BSET_DISKSORT_DISABLED|
672 		SD_CONF_BSET_LUN_RESET_ENABLED,
673 		&pirus_properties },
674 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
675 		SD_CONF_BSET_NRR_COUNT|
676 		SD_CONF_BSET_BSY_RETRY_COUNT|
677 		SD_CONF_BSET_RST_RETRIES|
678 		SD_CONF_BSET_MIN_THROTTLE|
679 		SD_CONF_BSET_DISKSORT_DISABLED|
680 		SD_CONF_BSET_LUN_RESET_ENABLED,
681 		&pirus_properties },
682 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
683 		SD_CONF_BSET_NRR_COUNT|
684 		SD_CONF_BSET_BSY_RETRY_COUNT|
685 		SD_CONF_BSET_RST_RETRIES|
686 		SD_CONF_BSET_MIN_THROTTLE|
687 		SD_CONF_BSET_DISKSORT_DISABLED|
688 		SD_CONF_BSET_LUN_RESET_ENABLED,
689 		&pirus_properties },
690 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
691 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
692 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
693 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
694 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
695 #endif /* fibre or NON-sparc platforms */
696 #if ((defined(__sparc) && !defined(__fibre)) ||\
697 	(defined(__i386) || defined(__amd64)))
698 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
699 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
700 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
701 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
702 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
703 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
704 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
705 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
706 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
707 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
708 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
709 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
710 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
711 	    &symbios_properties },
712 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
713 	    &lsi_properties_scsi },
714 #if defined(__i386) || defined(__amd64)
715 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
716 				    | SD_CONF_BSET_READSUB_BCD
717 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
718 				    | SD_CONF_BSET_NO_READ_HEADER
719 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
720 
721 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
722 				    | SD_CONF_BSET_READSUB_BCD
723 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
724 				    | SD_CONF_BSET_NO_READ_HEADER
725 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
726 #endif /* __i386 || __amd64 */
727 #endif /* sparc NON-fibre or NON-sparc platforms */
728 
729 #if (defined(SD_PROP_TST))
730 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
731 				| SD_CONF_BSET_CTYPE
732 				| SD_CONF_BSET_NRR_COUNT
733 				| SD_CONF_BSET_FAB_DEVID
734 				| SD_CONF_BSET_NOCACHE
735 				| SD_CONF_BSET_BSY_RETRY_COUNT
736 				| SD_CONF_BSET_PLAYMSF_BCD
737 				| SD_CONF_BSET_READSUB_BCD
738 				| SD_CONF_BSET_READ_TOC_TRK_BCD
739 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
740 				| SD_CONF_BSET_NO_READ_HEADER
741 				| SD_CONF_BSET_READ_CD_XD4
742 				| SD_CONF_BSET_RST_RETRIES
743 				| SD_CONF_BSET_RSV_REL_TIME
744 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
745 #endif
746 };
747 
748 static const int sd_disk_table_size =
749 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
750 
751 
752 
753 #define	SD_INTERCONNECT_PARALLEL	0
754 #define	SD_INTERCONNECT_FABRIC		1
755 #define	SD_INTERCONNECT_FIBRE		2
756 #define	SD_INTERCONNECT_SSA		3
757 #define	SD_INTERCONNECT_SATA		4
758 #define	SD_IS_PARALLEL_SCSI(un)		\
759 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
760 #define	SD_IS_SERIAL(un)		\
761 	((un)->un_interconnect_type == SD_INTERCONNECT_SATA)
762 
763 /*
764  * Definitions used by device id registration routines
765  */
766 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
767 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
768 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
769 
770 static kmutex_t sd_sense_mutex = {0};
771 
772 /*
773  * Macros for updates of the driver state
774  */
775 #define	New_state(un, s)        \
776 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
777 #define	Restore_state(un)	\
778 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
779 
780 static struct sd_cdbinfo sd_cdbtab[] = {
781 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
782 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
783 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
784 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
785 };
786 
787 /*
788  * Specifies the number of seconds that must have elapsed since the last
789  * cmd. has completed for a device to be declared idle to the PM framework.
790  */
791 static int sd_pm_idletime = 1;
792 
793 /*
794  * Internal function prototypes
795  */
796 
797 #if (defined(__fibre))
798 /*
799  * These #defines are to avoid namespace collisions that occur because this
800  * code is currently used to compile two seperate driver modules: sd and ssd.
801  * All function names need to be treated this way (even if declared static)
802  * in order to allow the debugger to resolve the names properly.
803  * It is anticipated that in the near future the ssd module will be obsoleted,
804  * at which time this ugliness should go away.
805  */
806 #define	sd_log_trace			ssd_log_trace
807 #define	sd_log_info			ssd_log_info
808 #define	sd_log_err			ssd_log_err
809 #define	sdprobe				ssdprobe
810 #define	sdinfo				ssdinfo
811 #define	sd_prop_op			ssd_prop_op
812 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
813 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
814 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
815 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
816 #define	sd_scsi_target_lun_init		ssd_scsi_target_lun_init
817 #define	sd_scsi_target_lun_fini		ssd_scsi_target_lun_fini
818 #define	sd_scsi_get_target_lun_count	ssd_scsi_get_target_lun_count
819 #define	sd_scsi_update_lun_on_target	ssd_scsi_update_lun_on_target
820 #define	sd_spin_up_unit			ssd_spin_up_unit
821 #define	sd_enable_descr_sense		ssd_enable_descr_sense
822 #define	sd_reenable_dsense_task		ssd_reenable_dsense_task
823 #define	sd_set_mmc_caps			ssd_set_mmc_caps
824 #define	sd_read_unit_properties		ssd_read_unit_properties
825 #define	sd_process_sdconf_file		ssd_process_sdconf_file
826 #define	sd_process_sdconf_table		ssd_process_sdconf_table
827 #define	sd_sdconf_id_match		ssd_sdconf_id_match
828 #define	sd_blank_cmp			ssd_blank_cmp
829 #define	sd_chk_vers1_data		ssd_chk_vers1_data
830 #define	sd_set_vers1_properties		ssd_set_vers1_properties
831 
832 #define	sd_get_physical_geometry	ssd_get_physical_geometry
833 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
834 #define	sd_update_block_info		ssd_update_block_info
835 #define	sd_register_devid		ssd_register_devid
836 #define	sd_get_devid			ssd_get_devid
837 #define	sd_create_devid			ssd_create_devid
838 #define	sd_write_deviceid		ssd_write_deviceid
839 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
840 #define	sd_setup_pm			ssd_setup_pm
841 #define	sd_create_pm_components		ssd_create_pm_components
842 #define	sd_ddi_suspend			ssd_ddi_suspend
843 #define	sd_ddi_pm_suspend		ssd_ddi_pm_suspend
844 #define	sd_ddi_resume			ssd_ddi_resume
845 #define	sd_ddi_pm_resume		ssd_ddi_pm_resume
846 #define	sdpower				ssdpower
847 #define	sdattach			ssdattach
848 #define	sddetach			ssddetach
849 #define	sd_unit_attach			ssd_unit_attach
850 #define	sd_unit_detach			ssd_unit_detach
851 #define	sd_set_unit_attributes		ssd_set_unit_attributes
852 #define	sd_create_errstats		ssd_create_errstats
853 #define	sd_set_errstats			ssd_set_errstats
854 #define	sd_set_pstats			ssd_set_pstats
855 #define	sddump				ssddump
856 #define	sd_scsi_poll			ssd_scsi_poll
857 #define	sd_send_polled_RQS		ssd_send_polled_RQS
858 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
859 #define	sd_init_event_callbacks		ssd_init_event_callbacks
860 #define	sd_event_callback		ssd_event_callback
861 #define	sd_cache_control		ssd_cache_control
862 #define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
863 #define	sd_make_device			ssd_make_device
864 #define	sdopen				ssdopen
865 #define	sdclose				ssdclose
866 #define	sd_ready_and_valid		ssd_ready_and_valid
867 #define	sdmin				ssdmin
868 #define	sdread				ssdread
869 #define	sdwrite				ssdwrite
870 #define	sdaread				ssdaread
871 #define	sdawrite			ssdawrite
872 #define	sdstrategy			ssdstrategy
873 #define	sdioctl				ssdioctl
874 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
875 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
876 #define	sd_checksum_iostart		ssd_checksum_iostart
877 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
878 #define	sd_pm_iostart			ssd_pm_iostart
879 #define	sd_core_iostart			ssd_core_iostart
880 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
881 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
882 #define	sd_checksum_iodone		ssd_checksum_iodone
883 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
884 #define	sd_pm_iodone			ssd_pm_iodone
885 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
886 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
887 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
888 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
889 #define	sd_buf_iodone			ssd_buf_iodone
890 #define	sd_uscsi_strategy		ssd_uscsi_strategy
891 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
892 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
893 #define	sd_uscsi_iodone			ssd_uscsi_iodone
894 #define	sd_xbuf_strategy		ssd_xbuf_strategy
895 #define	sd_xbuf_init			ssd_xbuf_init
896 #define	sd_pm_entry			ssd_pm_entry
897 #define	sd_pm_exit			ssd_pm_exit
898 
899 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
900 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
901 
902 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
903 #define	sdintr				ssdintr
904 #define	sd_start_cmds			ssd_start_cmds
905 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
906 #define	sd_bioclone_alloc		ssd_bioclone_alloc
907 #define	sd_bioclone_free		ssd_bioclone_free
908 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
909 #define	sd_shadow_buf_free		ssd_shadow_buf_free
910 #define	sd_print_transport_rejected_message	\
911 					ssd_print_transport_rejected_message
912 #define	sd_retry_command		ssd_retry_command
913 #define	sd_set_retry_bp			ssd_set_retry_bp
914 #define	sd_send_request_sense_command	ssd_send_request_sense_command
915 #define	sd_start_retry_command		ssd_start_retry_command
916 #define	sd_start_direct_priority_command	\
917 					ssd_start_direct_priority_command
918 #define	sd_return_failed_command	ssd_return_failed_command
919 #define	sd_return_failed_command_no_restart	\
920 					ssd_return_failed_command_no_restart
921 #define	sd_return_command		ssd_return_command
922 #define	sd_sync_with_callback		ssd_sync_with_callback
923 #define	sdrunout			ssdrunout
924 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
925 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
926 #define	sd_reduce_throttle		ssd_reduce_throttle
927 #define	sd_restore_throttle		ssd_restore_throttle
928 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
929 #define	sd_init_cdb_limits		ssd_init_cdb_limits
930 #define	sd_pkt_status_good		ssd_pkt_status_good
931 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
932 #define	sd_pkt_status_busy		ssd_pkt_status_busy
933 #define	sd_pkt_status_reservation_conflict	\
934 					ssd_pkt_status_reservation_conflict
935 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
936 #define	sd_handle_request_sense		ssd_handle_request_sense
937 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
938 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
939 #define	sd_validate_sense_data		ssd_validate_sense_data
940 #define	sd_decode_sense			ssd_decode_sense
941 #define	sd_print_sense_msg		ssd_print_sense_msg
942 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
943 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
944 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
945 #define	sd_sense_key_medium_or_hardware_error	\
946 					ssd_sense_key_medium_or_hardware_error
947 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
948 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
949 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
950 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
951 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
952 #define	sd_sense_key_default		ssd_sense_key_default
953 #define	sd_print_retry_msg		ssd_print_retry_msg
954 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
955 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
956 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
957 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
958 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
959 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
960 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
961 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
962 #define	sd_pkt_reason_default		ssd_pkt_reason_default
963 #define	sd_reset_target			ssd_reset_target
964 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
965 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
966 #define	sd_taskq_create			ssd_taskq_create
967 #define	sd_taskq_delete			ssd_taskq_delete
968 #define	sd_media_change_task		ssd_media_change_task
969 #define	sd_handle_mchange		ssd_handle_mchange
970 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
971 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
972 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
973 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
974 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
975 					sd_send_scsi_feature_GET_CONFIGURATION
976 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
977 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
978 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
979 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
980 					ssd_send_scsi_PERSISTENT_RESERVE_IN
981 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
982 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
983 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
984 #define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
985 					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
986 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
987 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
988 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
989 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
990 #define	sd_alloc_rqs			ssd_alloc_rqs
991 #define	sd_free_rqs			ssd_free_rqs
992 #define	sd_dump_memory			ssd_dump_memory
993 #define	sd_get_media_info		ssd_get_media_info
994 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
995 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
996 #define	sd_setup_next_xfer		ssd_setup_next_xfer
997 #define	sd_dkio_get_temp		ssd_dkio_get_temp
998 #define	sd_check_mhd			ssd_check_mhd
999 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
1000 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1001 #define	sd_sname			ssd_sname
1002 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1003 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1004 #define	sd_take_ownership		ssd_take_ownership
1005 #define	sd_reserve_release		ssd_reserve_release
1006 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1007 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1008 #define	sd_persistent_reservation_in_read_keys	\
1009 					ssd_persistent_reservation_in_read_keys
1010 #define	sd_persistent_reservation_in_read_resv	\
1011 					ssd_persistent_reservation_in_read_resv
1012 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1013 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1014 #define	sd_mhdioc_release		ssd_mhdioc_release
1015 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1016 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1017 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1018 #define	sr_change_blkmode		ssr_change_blkmode
1019 #define	sr_change_speed			ssr_change_speed
1020 #define	sr_atapi_change_speed		ssr_atapi_change_speed
1021 #define	sr_pause_resume			ssr_pause_resume
1022 #define	sr_play_msf			ssr_play_msf
1023 #define	sr_play_trkind			ssr_play_trkind
1024 #define	sr_read_all_subcodes		ssr_read_all_subcodes
1025 #define	sr_read_subchannel		ssr_read_subchannel
1026 #define	sr_read_tocentry		ssr_read_tocentry
1027 #define	sr_read_tochdr			ssr_read_tochdr
1028 #define	sr_read_cdda			ssr_read_cdda
1029 #define	sr_read_cdxa			ssr_read_cdxa
1030 #define	sr_read_mode1			ssr_read_mode1
1031 #define	sr_read_mode2			ssr_read_mode2
1032 #define	sr_read_cd_mode2		ssr_read_cd_mode2
1033 #define	sr_sector_mode			ssr_sector_mode
1034 #define	sr_eject			ssr_eject
1035 #define	sr_ejected			ssr_ejected
1036 #define	sr_check_wp			ssr_check_wp
1037 #define	sd_check_media			ssd_check_media
1038 #define	sd_media_watch_cb		ssd_media_watch_cb
1039 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1040 #define	sr_volume_ctrl			ssr_volume_ctrl
1041 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1042 #define	sd_log_page_supported		ssd_log_page_supported
1043 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1044 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1045 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1046 #define	sd_range_lock			ssd_range_lock
1047 #define	sd_get_range			ssd_get_range
1048 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1049 #define	sd_range_unlock			ssd_range_unlock
1050 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1051 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1052 
1053 #define	sd_iostart_chain		ssd_iostart_chain
1054 #define	sd_iodone_chain			ssd_iodone_chain
1055 #define	sd_initpkt_map			ssd_initpkt_map
1056 #define	sd_destroypkt_map		ssd_destroypkt_map
1057 #define	sd_chain_type_map		ssd_chain_type_map
1058 #define	sd_chain_index_map		ssd_chain_index_map
1059 
1060 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1061 #define	sd_failfast_flushq		ssd_failfast_flushq
1062 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1063 
1064 #define	sd_is_lsi			ssd_is_lsi
1065 #define	sd_tg_rdwr			ssd_tg_rdwr
1066 #define	sd_tg_getinfo			ssd_tg_getinfo
1067 
1068 #endif	/* #if (defined(__fibre)) */
1069 
1070 
1071 int _init(void);
1072 int _fini(void);
1073 int _info(struct modinfo *modinfop);
1074 
1075 /*PRINTFLIKE3*/
1076 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1077 /*PRINTFLIKE3*/
1078 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1079 /*PRINTFLIKE3*/
1080 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1081 
1082 static int sdprobe(dev_info_t *devi);
1083 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1084     void **result);
1085 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1086     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1087 
1088 /*
1089  * Smart probe for parallel scsi
1090  */
1091 static void sd_scsi_probe_cache_init(void);
1092 static void sd_scsi_probe_cache_fini(void);
1093 static void sd_scsi_clear_probe_cache(void);
1094 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1095 
1096 /*
1097  * Attached luns on target for parallel scsi
1098  */
1099 static void sd_scsi_target_lun_init(void);
1100 static void sd_scsi_target_lun_fini(void);
1101 static int  sd_scsi_get_target_lun_count(dev_info_t *dip, int target);
1102 static void sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag);
1103 
1104 static int	sd_spin_up_unit(struct sd_lun *un);
1105 #ifdef _LP64
1106 static void	sd_enable_descr_sense(struct sd_lun *un);
1107 static void	sd_reenable_dsense_task(void *arg);
1108 #endif /* _LP64 */
1109 
1110 static void	sd_set_mmc_caps(struct sd_lun *un);
1111 
1112 static void sd_read_unit_properties(struct sd_lun *un);
1113 static int  sd_process_sdconf_file(struct sd_lun *un);
1114 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1115     int *data_list, sd_tunables *values);
1116 static void sd_process_sdconf_table(struct sd_lun *un);
1117 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1118 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1119 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1120 	int list_len, char *dataname_ptr);
1121 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1122     sd_tunables *prop_list);
1123 
1124 static void sd_register_devid(struct sd_lun *un, dev_info_t *devi,
1125     int reservation_flag);
1126 static int  sd_get_devid(struct sd_lun *un);
1127 static int  sd_get_serialnum(struct sd_lun *un, uchar_t *wwn, int *len);
1128 static ddi_devid_t sd_create_devid(struct sd_lun *un);
1129 static int  sd_write_deviceid(struct sd_lun *un);
1130 static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1131 static int  sd_check_vpd_page_support(struct sd_lun *un);
1132 
1133 static void sd_setup_pm(struct sd_lun *un, dev_info_t *devi);
1134 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1135 
1136 static int  sd_ddi_suspend(dev_info_t *devi);
1137 static int  sd_ddi_pm_suspend(struct sd_lun *un);
1138 static int  sd_ddi_resume(dev_info_t *devi);
1139 static int  sd_ddi_pm_resume(struct sd_lun *un);
1140 static int  sdpower(dev_info_t *devi, int component, int level);
1141 
1142 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1143 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1144 static int  sd_unit_attach(dev_info_t *devi);
1145 static int  sd_unit_detach(dev_info_t *devi);
1146 
1147 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1148 static void sd_create_errstats(struct sd_lun *un, int instance);
1149 static void sd_set_errstats(struct sd_lun *un);
1150 static void sd_set_pstats(struct sd_lun *un);
1151 
1152 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1153 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1154 static int  sd_send_polled_RQS(struct sd_lun *un);
1155 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1156 
1157 #if (defined(__fibre))
1158 /*
1159  * Event callbacks (photon)
1160  */
1161 static void sd_init_event_callbacks(struct sd_lun *un);
1162 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1163 #endif
1164 
1165 /*
1166  * Defines for sd_cache_control
1167  */
1168 
1169 #define	SD_CACHE_ENABLE		1
1170 #define	SD_CACHE_DISABLE	0
1171 #define	SD_CACHE_NOCHANGE	-1
1172 
1173 static int   sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag);
1174 static int   sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled);
1175 static dev_t sd_make_device(dev_info_t *devi);
1176 
1177 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1178 	uint64_t capacity);
1179 
1180 /*
1181  * Driver entry point functions.
1182  */
1183 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1184 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1185 static int  sd_ready_and_valid(struct sd_lun *un);
1186 
1187 static void sdmin(struct buf *bp);
1188 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1189 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1190 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1191 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1192 
1193 static int sdstrategy(struct buf *bp);
1194 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1195 
1196 /*
1197  * Function prototypes for layering functions in the iostart chain.
1198  */
1199 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1200 	struct buf *bp);
1201 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1202 	struct buf *bp);
1203 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1204 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1205 	struct buf *bp);
1206 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1207 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1208 
1209 /*
1210  * Function prototypes for layering functions in the iodone chain.
1211  */
1212 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1213 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1214 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1215 	struct buf *bp);
1216 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1217 	struct buf *bp);
1218 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1219 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1220 	struct buf *bp);
1221 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1222 
1223 /*
1224  * Prototypes for functions to support buf(9S) based IO.
1225  */
1226 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1227 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1228 static void sd_destroypkt_for_buf(struct buf *);
1229 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1230 	struct buf *bp, int flags,
1231 	int (*callback)(caddr_t), caddr_t callback_arg,
1232 	diskaddr_t lba, uint32_t blockcount);
1233 #if defined(__i386) || defined(__amd64)
1234 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1235 	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1236 #endif /* defined(__i386) || defined(__amd64) */
1237 
1238 /*
1239  * Prototypes for functions to support USCSI IO.
1240  */
1241 static int sd_uscsi_strategy(struct buf *bp);
1242 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1243 static void sd_destroypkt_for_uscsi(struct buf *);
1244 
1245 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1246 	uchar_t chain_type, void *pktinfop);
1247 
1248 static int  sd_pm_entry(struct sd_lun *un);
1249 static void sd_pm_exit(struct sd_lun *un);
1250 
1251 static void sd_pm_idletimeout_handler(void *arg);
1252 
1253 /*
1254  * sd_core internal functions (used at the sd_core_io layer).
1255  */
1256 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1257 static void sdintr(struct scsi_pkt *pktp);
1258 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1259 
1260 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
1261 	enum uio_seg dataspace, int path_flag);
1262 
1263 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1264 	daddr_t blkno, int (*func)(struct buf *));
1265 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1266 	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1267 static void sd_bioclone_free(struct buf *bp);
1268 static void sd_shadow_buf_free(struct buf *bp);
1269 
1270 static void sd_print_transport_rejected_message(struct sd_lun *un,
1271 	struct sd_xbuf *xp, int code);
1272 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1273     void *arg, int code);
1274 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1275     void *arg, int code);
1276 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1277     void *arg, int code);
1278 
1279 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1280 	int retry_check_flag,
1281 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1282 		int c),
1283 	void *user_arg, int failure_code,  clock_t retry_delay,
1284 	void (*statp)(kstat_io_t *));
1285 
1286 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1287 	clock_t retry_delay, void (*statp)(kstat_io_t *));
1288 
1289 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1290 	struct scsi_pkt *pktp);
1291 static void sd_start_retry_command(void *arg);
1292 static void sd_start_direct_priority_command(void *arg);
1293 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1294 	int errcode);
1295 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1296 	struct buf *bp, int errcode);
1297 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1298 static void sd_sync_with_callback(struct sd_lun *un);
1299 static int sdrunout(caddr_t arg);
1300 
1301 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1302 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1303 
1304 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1305 static void sd_restore_throttle(void *arg);
1306 
1307 static void sd_init_cdb_limits(struct sd_lun *un);
1308 
1309 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1310 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1311 
1312 /*
1313  * Error handling functions
1314  */
1315 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1316 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1317 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1318 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1319 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1320 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1321 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1322 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1323 
1324 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1325 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1326 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1327 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1328 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1329 	struct sd_xbuf *xp);
1330 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1331 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1332 
1333 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1334 	void *arg, int code);
1335 
1336 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1337 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1338 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1339 	uint8_t *sense_datap,
1340 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1341 static void sd_sense_key_not_ready(struct sd_lun *un,
1342 	uint8_t *sense_datap,
1343 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1344 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1345 	uint8_t *sense_datap,
1346 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1347 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1348 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1349 static void sd_sense_key_unit_attention(struct sd_lun *un,
1350 	uint8_t *sense_datap,
1351 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1352 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1353 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1354 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1355 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1356 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1357 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1358 static void sd_sense_key_default(struct sd_lun *un,
1359 	uint8_t *sense_datap,
1360 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1361 
1362 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1363 	void *arg, int flag);
1364 
1365 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1366 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1367 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1368 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1369 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1370 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1371 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1372 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1373 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1374 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1375 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1376 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1377 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1378 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1379 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1380 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1381 
1382 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1383 
1384 static void sd_start_stop_unit_callback(void *arg);
1385 static void sd_start_stop_unit_task(void *arg);
1386 
1387 static void sd_taskq_create(void);
1388 static void sd_taskq_delete(void);
1389 static void sd_media_change_task(void *arg);
1390 
1391 static int sd_handle_mchange(struct sd_lun *un);
1392 static int sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag);
1393 static int sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp,
1394 	uint32_t *lbap, int path_flag);
1395 static int sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
1396 	uint32_t *lbap, int path_flag);
1397 static int sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag,
1398 	int path_flag);
1399 static int sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr,
1400 	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1401 static int sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag);
1402 static int sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un,
1403 	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1404 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un,
1405 	uchar_t usr_cmd, uchar_t *usr_bufp);
1406 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1407 	struct dk_callback *dkc);
1408 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1409 static int sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un,
1410 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1411 	uchar_t *bufaddr, uint_t buflen, int path_flag);
1412 static int sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
1413 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1414 	uchar_t *bufaddr, uint_t buflen, char feature, int path_flag);
1415 static int sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize,
1416 	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1417 static int sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize,
1418 	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1419 static int sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
1420 	size_t buflen, daddr_t start_block, int path_flag);
1421 #define	sd_send_scsi_READ(un, bufaddr, buflen, start_block, path_flag)	\
1422 	sd_send_scsi_RDWR(un, SCMD_READ, bufaddr, buflen, start_block, \
1423 	path_flag)
1424 #define	sd_send_scsi_WRITE(un, bufaddr, buflen, start_block, path_flag)	\
1425 	sd_send_scsi_RDWR(un, SCMD_WRITE, bufaddr, buflen, start_block,\
1426 	path_flag)
1427 
1428 static int sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr,
1429 	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1430 	uint16_t param_ptr, int path_flag);
1431 
1432 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1433 static void sd_free_rqs(struct sd_lun *un);
1434 
1435 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1436 	uchar_t *data, int len, int fmt);
1437 static void sd_panic_for_res_conflict(struct sd_lun *un);
1438 
1439 /*
1440  * Disk Ioctl Function Prototypes
1441  */
1442 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1443 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1444 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1445 
1446 /*
1447  * Multi-host Ioctl Prototypes
1448  */
1449 static int sd_check_mhd(dev_t dev, int interval);
1450 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1451 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1452 static char *sd_sname(uchar_t status);
1453 static void sd_mhd_resvd_recover(void *arg);
1454 static void sd_resv_reclaim_thread();
1455 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1456 static int sd_reserve_release(dev_t dev, int cmd);
1457 static void sd_rmv_resv_reclaim_req(dev_t dev);
1458 static void sd_mhd_reset_notify_cb(caddr_t arg);
1459 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1460 	mhioc_inkeys_t *usrp, int flag);
1461 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1462 	mhioc_inresvs_t *usrp, int flag);
1463 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1464 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1465 static int sd_mhdioc_release(dev_t dev);
1466 static int sd_mhdioc_register_devid(dev_t dev);
1467 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1468 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1469 
1470 /*
1471  * SCSI removable prototypes
1472  */
1473 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1474 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1475 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1476 static int sr_pause_resume(dev_t dev, int mode);
1477 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1478 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1479 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1480 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1481 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1482 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1483 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1484 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1485 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1486 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1487 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1488 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1489 static int sr_eject(dev_t dev);
1490 static void sr_ejected(register struct sd_lun *un);
1491 static int sr_check_wp(dev_t dev);
1492 static int sd_check_media(dev_t dev, enum dkio_state state);
1493 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1494 static void sd_delayed_cv_broadcast(void *arg);
1495 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1496 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1497 
1498 static int sd_log_page_supported(struct sd_lun *un, int log_page);
1499 
1500 /*
1501  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1502  */
1503 static void sd_check_for_writable_cd(struct sd_lun *un, int path_flag);
1504 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1505 static void sd_wm_cache_destructor(void *wm, void *un);
1506 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1507 	daddr_t endb, ushort_t typ);
1508 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1509 	daddr_t endb);
1510 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1511 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1512 static void sd_read_modify_write_task(void * arg);
1513 static int
1514 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1515 	struct buf **bpp);
1516 
1517 
1518 /*
1519  * Function prototypes for failfast support.
1520  */
1521 static void sd_failfast_flushq(struct sd_lun *un);
1522 static int sd_failfast_flushq_callback(struct buf *bp);
1523 
1524 /*
1525  * Function prototypes to check for lsi devices
1526  */
1527 static void sd_is_lsi(struct sd_lun *un);
1528 
1529 /*
1530  * Function prototypes for x86 support
1531  */
1532 #if defined(__i386) || defined(__amd64)
1533 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1534 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1535 #endif
1536 
1537 
1538 /* Function prototypes for cmlb */
1539 static int sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
1540     diskaddr_t start_block, size_t reqlength, void *tg_cookie);
1541 
1542 static int sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie);
1543 
1544 /*
1545  * Constants for failfast support:
1546  *
1547  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1548  * failfast processing being performed.
1549  *
1550  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1551  * failfast processing on all bufs with B_FAILFAST set.
1552  */
1553 
1554 #define	SD_FAILFAST_INACTIVE		0
1555 #define	SD_FAILFAST_ACTIVE		1
1556 
1557 /*
1558  * Bitmask to control behavior of buf(9S) flushes when a transition to
1559  * the failfast state occurs. Optional bits include:
1560  *
1561  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1562  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1563  * be flushed.
1564  *
1565  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1566  * driver, in addition to the regular wait queue. This includes the xbuf
1567  * queues. When clear, only the driver's wait queue will be flushed.
1568  */
1569 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1570 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1571 
1572 /*
1573  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1574  * to flush all queues within the driver.
1575  */
1576 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1577 
1578 
1579 /*
1580  * SD Testing Fault Injection
1581  */
1582 #ifdef SD_FAULT_INJECTION
1583 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1584 static void sd_faultinjection(struct scsi_pkt *pktp);
1585 static void sd_injection_log(char *buf, struct sd_lun *un);
1586 #endif
1587 
1588 /*
1589  * Device driver ops vector
1590  */
1591 static struct cb_ops sd_cb_ops = {
1592 	sdopen,			/* open */
1593 	sdclose,		/* close */
1594 	sdstrategy,		/* strategy */
1595 	nodev,			/* print */
1596 	sddump,			/* dump */
1597 	sdread,			/* read */
1598 	sdwrite,		/* write */
1599 	sdioctl,		/* ioctl */
1600 	nodev,			/* devmap */
1601 	nodev,			/* mmap */
1602 	nodev,			/* segmap */
1603 	nochpoll,		/* poll */
1604 	sd_prop_op,		/* cb_prop_op */
1605 	0,			/* streamtab  */
1606 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1607 	CB_REV,			/* cb_rev */
1608 	sdaread, 		/* async I/O read entry point */
1609 	sdawrite		/* async I/O write entry point */
1610 };
1611 
1612 static struct dev_ops sd_ops = {
1613 	DEVO_REV,		/* devo_rev, */
1614 	0,			/* refcnt  */
1615 	sdinfo,			/* info */
1616 	nulldev,		/* identify */
1617 	sdprobe,		/* probe */
1618 	sdattach,		/* attach */
1619 	sddetach,		/* detach */
1620 	nodev,			/* reset */
1621 	&sd_cb_ops,		/* driver operations */
1622 	NULL,			/* bus operations */
1623 	sdpower			/* power */
1624 };
1625 
1626 
1627 /*
1628  * This is the loadable module wrapper.
1629  */
1630 #include <sys/modctl.h>
1631 
1632 static struct modldrv modldrv = {
1633 	&mod_driverops,		/* Type of module. This one is a driver */
1634 	SD_MODULE_NAME,		/* Module name. */
1635 	&sd_ops			/* driver ops */
1636 };
1637 
1638 
1639 static struct modlinkage modlinkage = {
1640 	MODREV_1,
1641 	&modldrv,
1642 	NULL
1643 };
1644 
1645 static cmlb_tg_ops_t sd_tgops = {
1646 	TG_DK_OPS_VERSION_1,
1647 	sd_tg_rdwr,
1648 	sd_tg_getinfo
1649 	};
1650 
1651 static struct scsi_asq_key_strings sd_additional_codes[] = {
1652 	0x81, 0, "Logical Unit is Reserved",
1653 	0x85, 0, "Audio Address Not Valid",
1654 	0xb6, 0, "Media Load Mechanism Failed",
1655 	0xB9, 0, "Audio Play Operation Aborted",
1656 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1657 	0x53, 2, "Medium removal prevented",
1658 	0x6f, 0, "Authentication failed during key exchange",
1659 	0x6f, 1, "Key not present",
1660 	0x6f, 2, "Key not established",
1661 	0x6f, 3, "Read without proper authentication",
1662 	0x6f, 4, "Mismatched region to this logical unit",
1663 	0x6f, 5, "Region reset count error",
1664 	0xffff, 0x0, NULL
1665 };
1666 
1667 
1668 /*
1669  * Struct for passing printing information for sense data messages
1670  */
1671 struct sd_sense_info {
1672 	int	ssi_severity;
1673 	int	ssi_pfa_flag;
1674 };
1675 
1676 /*
1677  * Table of function pointers for iostart-side routines. Seperate "chains"
1678  * of layered function calls are formed by placing the function pointers
1679  * sequentially in the desired order. Functions are called according to an
1680  * incrementing table index ordering. The last function in each chain must
1681  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1682  * in the sd_iodone_chain[] array.
1683  *
1684  * Note: It may seem more natural to organize both the iostart and iodone
1685  * functions together, into an array of structures (or some similar
1686  * organization) with a common index, rather than two seperate arrays which
1687  * must be maintained in synchronization. The purpose of this division is
1688  * to achiece improved performance: individual arrays allows for more
1689  * effective cache line utilization on certain platforms.
1690  */
1691 
1692 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1693 
1694 
1695 static sd_chain_t sd_iostart_chain[] = {
1696 
1697 	/* Chain for buf IO for disk drive targets (PM enabled) */
1698 	sd_mapblockaddr_iostart,	/* Index: 0 */
1699 	sd_pm_iostart,			/* Index: 1 */
1700 	sd_core_iostart,		/* Index: 2 */
1701 
1702 	/* Chain for buf IO for disk drive targets (PM disabled) */
1703 	sd_mapblockaddr_iostart,	/* Index: 3 */
1704 	sd_core_iostart,		/* Index: 4 */
1705 
1706 	/* Chain for buf IO for removable-media targets (PM enabled) */
1707 	sd_mapblockaddr_iostart,	/* Index: 5 */
1708 	sd_mapblocksize_iostart,	/* Index: 6 */
1709 	sd_pm_iostart,			/* Index: 7 */
1710 	sd_core_iostart,		/* Index: 8 */
1711 
1712 	/* Chain for buf IO for removable-media targets (PM disabled) */
1713 	sd_mapblockaddr_iostart,	/* Index: 9 */
1714 	sd_mapblocksize_iostart,	/* Index: 10 */
1715 	sd_core_iostart,		/* Index: 11 */
1716 
1717 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1718 	sd_mapblockaddr_iostart,	/* Index: 12 */
1719 	sd_checksum_iostart,		/* Index: 13 */
1720 	sd_pm_iostart,			/* Index: 14 */
1721 	sd_core_iostart,		/* Index: 15 */
1722 
1723 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1724 	sd_mapblockaddr_iostart,	/* Index: 16 */
1725 	sd_checksum_iostart,		/* Index: 17 */
1726 	sd_core_iostart,		/* Index: 18 */
1727 
1728 	/* Chain for USCSI commands (all targets) */
1729 	sd_pm_iostart,			/* Index: 19 */
1730 	sd_core_iostart,		/* Index: 20 */
1731 
1732 	/* Chain for checksumming USCSI commands (all targets) */
1733 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1734 	sd_pm_iostart,			/* Index: 22 */
1735 	sd_core_iostart,		/* Index: 23 */
1736 
1737 	/* Chain for "direct" USCSI commands (all targets) */
1738 	sd_core_iostart,		/* Index: 24 */
1739 
1740 	/* Chain for "direct priority" USCSI commands (all targets) */
1741 	sd_core_iostart,		/* Index: 25 */
1742 };
1743 
1744 /*
1745  * Macros to locate the first function of each iostart chain in the
1746  * sd_iostart_chain[] array. These are located by the index in the array.
1747  */
1748 #define	SD_CHAIN_DISK_IOSTART			0
1749 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1750 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1751 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1752 #define	SD_CHAIN_CHKSUM_IOSTART			12
1753 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1754 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1755 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1756 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1757 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1758 
1759 
1760 /*
1761  * Table of function pointers for the iodone-side routines for the driver-
1762  * internal layering mechanism.  The calling sequence for iodone routines
1763  * uses a decrementing table index, so the last routine called in a chain
1764  * must be at the lowest array index location for that chain.  The last
1765  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1766  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1767  * of the functions in an iodone side chain must correspond to the ordering
1768  * of the iostart routines for that chain.  Note that there is no iodone
1769  * side routine that corresponds to sd_core_iostart(), so there is no
1770  * entry in the table for this.
1771  */
1772 
1773 static sd_chain_t sd_iodone_chain[] = {
1774 
1775 	/* Chain for buf IO for disk drive targets (PM enabled) */
1776 	sd_buf_iodone,			/* Index: 0 */
1777 	sd_mapblockaddr_iodone,		/* Index: 1 */
1778 	sd_pm_iodone,			/* Index: 2 */
1779 
1780 	/* Chain for buf IO for disk drive targets (PM disabled) */
1781 	sd_buf_iodone,			/* Index: 3 */
1782 	sd_mapblockaddr_iodone,		/* Index: 4 */
1783 
1784 	/* Chain for buf IO for removable-media targets (PM enabled) */
1785 	sd_buf_iodone,			/* Index: 5 */
1786 	sd_mapblockaddr_iodone,		/* Index: 6 */
1787 	sd_mapblocksize_iodone,		/* Index: 7 */
1788 	sd_pm_iodone,			/* Index: 8 */
1789 
1790 	/* Chain for buf IO for removable-media targets (PM disabled) */
1791 	sd_buf_iodone,			/* Index: 9 */
1792 	sd_mapblockaddr_iodone,		/* Index: 10 */
1793 	sd_mapblocksize_iodone,		/* Index: 11 */
1794 
1795 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1796 	sd_buf_iodone,			/* Index: 12 */
1797 	sd_mapblockaddr_iodone,		/* Index: 13 */
1798 	sd_checksum_iodone,		/* Index: 14 */
1799 	sd_pm_iodone,			/* Index: 15 */
1800 
1801 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1802 	sd_buf_iodone,			/* Index: 16 */
1803 	sd_mapblockaddr_iodone,		/* Index: 17 */
1804 	sd_checksum_iodone,		/* Index: 18 */
1805 
1806 	/* Chain for USCSI commands (non-checksum targets) */
1807 	sd_uscsi_iodone,		/* Index: 19 */
1808 	sd_pm_iodone,			/* Index: 20 */
1809 
1810 	/* Chain for USCSI commands (checksum targets) */
1811 	sd_uscsi_iodone,		/* Index: 21 */
1812 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1813 	sd_pm_iodone,			/* Index: 22 */
1814 
1815 	/* Chain for "direct" USCSI commands (all targets) */
1816 	sd_uscsi_iodone,		/* Index: 24 */
1817 
1818 	/* Chain for "direct priority" USCSI commands (all targets) */
1819 	sd_uscsi_iodone,		/* Index: 25 */
1820 };
1821 
1822 
1823 /*
1824  * Macros to locate the "first" function in the sd_iodone_chain[] array for
1825  * each iodone-side chain. These are located by the array index, but as the
1826  * iodone side functions are called in a decrementing-index order, the
1827  * highest index number in each chain must be specified (as these correspond
1828  * to the first function in the iodone chain that will be called by the core
1829  * at IO completion time).
1830  */
1831 
1832 #define	SD_CHAIN_DISK_IODONE			2
1833 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
1834 #define	SD_CHAIN_RMMEDIA_IODONE			8
1835 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1836 #define	SD_CHAIN_CHKSUM_IODONE			15
1837 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1838 #define	SD_CHAIN_USCSI_CMD_IODONE		20
1839 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1840 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
1841 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1842 
1843 
1844 
1845 
1846 /*
1847  * Array to map a layering chain index to the appropriate initpkt routine.
1848  * The redundant entries are present so that the index used for accessing
1849  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1850  * with this table as well.
1851  */
1852 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1853 
1854 static sd_initpkt_t	sd_initpkt_map[] = {
1855 
1856 	/* Chain for buf IO for disk drive targets (PM enabled) */
1857 	sd_initpkt_for_buf,		/* Index: 0 */
1858 	sd_initpkt_for_buf,		/* Index: 1 */
1859 	sd_initpkt_for_buf,		/* Index: 2 */
1860 
1861 	/* Chain for buf IO for disk drive targets (PM disabled) */
1862 	sd_initpkt_for_buf,		/* Index: 3 */
1863 	sd_initpkt_for_buf,		/* Index: 4 */
1864 
1865 	/* Chain for buf IO for removable-media targets (PM enabled) */
1866 	sd_initpkt_for_buf,		/* Index: 5 */
1867 	sd_initpkt_for_buf,		/* Index: 6 */
1868 	sd_initpkt_for_buf,		/* Index: 7 */
1869 	sd_initpkt_for_buf,		/* Index: 8 */
1870 
1871 	/* Chain for buf IO for removable-media targets (PM disabled) */
1872 	sd_initpkt_for_buf,		/* Index: 9 */
1873 	sd_initpkt_for_buf,		/* Index: 10 */
1874 	sd_initpkt_for_buf,		/* Index: 11 */
1875 
1876 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1877 	sd_initpkt_for_buf,		/* Index: 12 */
1878 	sd_initpkt_for_buf,		/* Index: 13 */
1879 	sd_initpkt_for_buf,		/* Index: 14 */
1880 	sd_initpkt_for_buf,		/* Index: 15 */
1881 
1882 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1883 	sd_initpkt_for_buf,		/* Index: 16 */
1884 	sd_initpkt_for_buf,		/* Index: 17 */
1885 	sd_initpkt_for_buf,		/* Index: 18 */
1886 
1887 	/* Chain for USCSI commands (non-checksum targets) */
1888 	sd_initpkt_for_uscsi,		/* Index: 19 */
1889 	sd_initpkt_for_uscsi,		/* Index: 20 */
1890 
1891 	/* Chain for USCSI commands (checksum targets) */
1892 	sd_initpkt_for_uscsi,		/* Index: 21 */
1893 	sd_initpkt_for_uscsi,		/* Index: 22 */
1894 	sd_initpkt_for_uscsi,		/* Index: 22 */
1895 
1896 	/* Chain for "direct" USCSI commands (all targets) */
1897 	sd_initpkt_for_uscsi,		/* Index: 24 */
1898 
1899 	/* Chain for "direct priority" USCSI commands (all targets) */
1900 	sd_initpkt_for_uscsi,		/* Index: 25 */
1901 
1902 };
1903 
1904 
1905 /*
1906  * Array to map a layering chain index to the appropriate destroypktpkt routine.
1907  * The redundant entries are present so that the index used for accessing
1908  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1909  * with this table as well.
1910  */
1911 typedef void (*sd_destroypkt_t)(struct buf *);
1912 
1913 static sd_destroypkt_t	sd_destroypkt_map[] = {
1914 
1915 	/* Chain for buf IO for disk drive targets (PM enabled) */
1916 	sd_destroypkt_for_buf,		/* Index: 0 */
1917 	sd_destroypkt_for_buf,		/* Index: 1 */
1918 	sd_destroypkt_for_buf,		/* Index: 2 */
1919 
1920 	/* Chain for buf IO for disk drive targets (PM disabled) */
1921 	sd_destroypkt_for_buf,		/* Index: 3 */
1922 	sd_destroypkt_for_buf,		/* Index: 4 */
1923 
1924 	/* Chain for buf IO for removable-media targets (PM enabled) */
1925 	sd_destroypkt_for_buf,		/* Index: 5 */
1926 	sd_destroypkt_for_buf,		/* Index: 6 */
1927 	sd_destroypkt_for_buf,		/* Index: 7 */
1928 	sd_destroypkt_for_buf,		/* Index: 8 */
1929 
1930 	/* Chain for buf IO for removable-media targets (PM disabled) */
1931 	sd_destroypkt_for_buf,		/* Index: 9 */
1932 	sd_destroypkt_for_buf,		/* Index: 10 */
1933 	sd_destroypkt_for_buf,		/* Index: 11 */
1934 
1935 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1936 	sd_destroypkt_for_buf,		/* Index: 12 */
1937 	sd_destroypkt_for_buf,		/* Index: 13 */
1938 	sd_destroypkt_for_buf,		/* Index: 14 */
1939 	sd_destroypkt_for_buf,		/* Index: 15 */
1940 
1941 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1942 	sd_destroypkt_for_buf,		/* Index: 16 */
1943 	sd_destroypkt_for_buf,		/* Index: 17 */
1944 	sd_destroypkt_for_buf,		/* Index: 18 */
1945 
1946 	/* Chain for USCSI commands (non-checksum targets) */
1947 	sd_destroypkt_for_uscsi,	/* Index: 19 */
1948 	sd_destroypkt_for_uscsi,	/* Index: 20 */
1949 
1950 	/* Chain for USCSI commands (checksum targets) */
1951 	sd_destroypkt_for_uscsi,	/* Index: 21 */
1952 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1953 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1954 
1955 	/* Chain for "direct" USCSI commands (all targets) */
1956 	sd_destroypkt_for_uscsi,	/* Index: 24 */
1957 
1958 	/* Chain for "direct priority" USCSI commands (all targets) */
1959 	sd_destroypkt_for_uscsi,	/* Index: 25 */
1960 
1961 };
1962 
1963 
1964 
1965 /*
1966  * Array to map a layering chain index to the appropriate chain "type".
1967  * The chain type indicates a specific property/usage of the chain.
1968  * The redundant entries are present so that the index used for accessing
1969  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1970  * with this table as well.
1971  */
1972 
1973 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
1974 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
1975 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
1976 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
1977 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
1978 						/* (for error recovery) */
1979 
1980 static int sd_chain_type_map[] = {
1981 
1982 	/* Chain for buf IO for disk drive targets (PM enabled) */
1983 	SD_CHAIN_BUFIO,			/* Index: 0 */
1984 	SD_CHAIN_BUFIO,			/* Index: 1 */
1985 	SD_CHAIN_BUFIO,			/* Index: 2 */
1986 
1987 	/* Chain for buf IO for disk drive targets (PM disabled) */
1988 	SD_CHAIN_BUFIO,			/* Index: 3 */
1989 	SD_CHAIN_BUFIO,			/* Index: 4 */
1990 
1991 	/* Chain for buf IO for removable-media targets (PM enabled) */
1992 	SD_CHAIN_BUFIO,			/* Index: 5 */
1993 	SD_CHAIN_BUFIO,			/* Index: 6 */
1994 	SD_CHAIN_BUFIO,			/* Index: 7 */
1995 	SD_CHAIN_BUFIO,			/* Index: 8 */
1996 
1997 	/* Chain for buf IO for removable-media targets (PM disabled) */
1998 	SD_CHAIN_BUFIO,			/* Index: 9 */
1999 	SD_CHAIN_BUFIO,			/* Index: 10 */
2000 	SD_CHAIN_BUFIO,			/* Index: 11 */
2001 
2002 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2003 	SD_CHAIN_BUFIO,			/* Index: 12 */
2004 	SD_CHAIN_BUFIO,			/* Index: 13 */
2005 	SD_CHAIN_BUFIO,			/* Index: 14 */
2006 	SD_CHAIN_BUFIO,			/* Index: 15 */
2007 
2008 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2009 	SD_CHAIN_BUFIO,			/* Index: 16 */
2010 	SD_CHAIN_BUFIO,			/* Index: 17 */
2011 	SD_CHAIN_BUFIO,			/* Index: 18 */
2012 
2013 	/* Chain for USCSI commands (non-checksum targets) */
2014 	SD_CHAIN_USCSI,			/* Index: 19 */
2015 	SD_CHAIN_USCSI,			/* Index: 20 */
2016 
2017 	/* Chain for USCSI commands (checksum targets) */
2018 	SD_CHAIN_USCSI,			/* Index: 21 */
2019 	SD_CHAIN_USCSI,			/* Index: 22 */
2020 	SD_CHAIN_USCSI,			/* Index: 22 */
2021 
2022 	/* Chain for "direct" USCSI commands (all targets) */
2023 	SD_CHAIN_DIRECT,		/* Index: 24 */
2024 
2025 	/* Chain for "direct priority" USCSI commands (all targets) */
2026 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2027 };
2028 
2029 
2030 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2031 #define	SD_IS_BUFIO(xp)			\
2032 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2033 
2034 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2035 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2036 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2037 
2038 
2039 
2040 /*
2041  * Struct, array, and macros to map a specific chain to the appropriate
2042  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2043  *
2044  * The sd_chain_index_map[] array is used at attach time to set the various
2045  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2046  * chain to be used with the instance. This allows different instances to use
2047  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2048  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2049  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2050  * dynamically & without the use of locking; and (2) a layer may update the
2051  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2052  * to allow for deferred processing of an IO within the same chain from a
2053  * different execution context.
2054  */
2055 
2056 struct sd_chain_index {
2057 	int	sci_iostart_index;
2058 	int	sci_iodone_index;
2059 };
2060 
2061 static struct sd_chain_index	sd_chain_index_map[] = {
2062 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2063 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2064 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2065 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2066 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2067 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2068 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2069 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2070 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2071 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2072 };
2073 
2074 
2075 /*
2076  * The following are indexes into the sd_chain_index_map[] array.
2077  */
2078 
2079 /* un->un_buf_chain_type must be set to one of these */
2080 #define	SD_CHAIN_INFO_DISK		0
2081 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2082 #define	SD_CHAIN_INFO_RMMEDIA		2
2083 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2084 #define	SD_CHAIN_INFO_CHKSUM		4
2085 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2086 
2087 /* un->un_uscsi_chain_type must be set to one of these */
2088 #define	SD_CHAIN_INFO_USCSI_CMD		6
2089 /* USCSI with PM disabled is the same as DIRECT */
2090 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2091 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2092 
2093 /* un->un_direct_chain_type must be set to one of these */
2094 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2095 
2096 /* un->un_priority_chain_type must be set to one of these */
2097 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2098 
2099 /* size for devid inquiries */
2100 #define	MAX_INQUIRY_SIZE		0xF0
2101 
2102 /*
2103  * Macros used by functions to pass a given buf(9S) struct along to the
2104  * next function in the layering chain for further processing.
2105  *
2106  * In the following macros, passing more than three arguments to the called
2107  * routines causes the optimizer for the SPARC compiler to stop doing tail
2108  * call elimination which results in significant performance degradation.
2109  */
2110 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2111 	((*(sd_iostart_chain[index]))(index, un, bp))
2112 
2113 #define	SD_BEGIN_IODONE(index, un, bp)	\
2114 	((*(sd_iodone_chain[index]))(index, un, bp))
2115 
2116 #define	SD_NEXT_IOSTART(index, un, bp)				\
2117 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2118 
2119 #define	SD_NEXT_IODONE(index, un, bp)				\
2120 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2121 
2122 /*
2123  *    Function: _init
2124  *
2125  * Description: This is the driver _init(9E) entry point.
2126  *
2127  * Return Code: Returns the value from mod_install(9F) or
2128  *		ddi_soft_state_init(9F) as appropriate.
2129  *
2130  *     Context: Called when driver module loaded.
2131  */
2132 
2133 int
2134 _init(void)
2135 {
2136 	int	err;
2137 
2138 	/* establish driver name from module name */
2139 	sd_label = mod_modname(&modlinkage);
2140 
2141 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2142 		SD_MAXUNIT);
2143 
2144 	if (err != 0) {
2145 		return (err);
2146 	}
2147 
2148 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2149 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2150 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2151 
2152 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2153 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2154 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2155 
2156 	/*
2157 	 * it's ok to init here even for fibre device
2158 	 */
2159 	sd_scsi_probe_cache_init();
2160 
2161 	sd_scsi_target_lun_init();
2162 
2163 	/*
2164 	 * Creating taskq before mod_install ensures that all callers (threads)
2165 	 * that enter the module after a successfull mod_install encounter
2166 	 * a valid taskq.
2167 	 */
2168 	sd_taskq_create();
2169 
2170 	err = mod_install(&modlinkage);
2171 	if (err != 0) {
2172 		/* delete taskq if install fails */
2173 		sd_taskq_delete();
2174 
2175 		mutex_destroy(&sd_detach_mutex);
2176 		mutex_destroy(&sd_log_mutex);
2177 		mutex_destroy(&sd_label_mutex);
2178 
2179 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2180 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2181 		cv_destroy(&sd_tr.srq_inprocess_cv);
2182 
2183 		sd_scsi_probe_cache_fini();
2184 
2185 		sd_scsi_target_lun_fini();
2186 
2187 		ddi_soft_state_fini(&sd_state);
2188 		return (err);
2189 	}
2190 
2191 	return (err);
2192 }
2193 
2194 
2195 /*
2196  *    Function: _fini
2197  *
2198  * Description: This is the driver _fini(9E) entry point.
2199  *
2200  * Return Code: Returns the value from mod_remove(9F)
2201  *
2202  *     Context: Called when driver module is unloaded.
2203  */
2204 
2205 int
2206 _fini(void)
2207 {
2208 	int err;
2209 
2210 	if ((err = mod_remove(&modlinkage)) != 0) {
2211 		return (err);
2212 	}
2213 
2214 	sd_taskq_delete();
2215 
2216 	mutex_destroy(&sd_detach_mutex);
2217 	mutex_destroy(&sd_log_mutex);
2218 	mutex_destroy(&sd_label_mutex);
2219 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2220 
2221 	sd_scsi_probe_cache_fini();
2222 
2223 	sd_scsi_target_lun_fini();
2224 
2225 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2226 	cv_destroy(&sd_tr.srq_inprocess_cv);
2227 
2228 	ddi_soft_state_fini(&sd_state);
2229 
2230 	return (err);
2231 }
2232 
2233 
2234 /*
2235  *    Function: _info
2236  *
2237  * Description: This is the driver _info(9E) entry point.
2238  *
2239  *   Arguments: modinfop - pointer to the driver modinfo structure
2240  *
2241  * Return Code: Returns the value from mod_info(9F).
2242  *
2243  *     Context: Kernel thread context
2244  */
2245 
2246 int
2247 _info(struct modinfo *modinfop)
2248 {
2249 	return (mod_info(&modlinkage, modinfop));
2250 }
2251 
2252 
2253 /*
2254  * The following routines implement the driver message logging facility.
2255  * They provide component- and level- based debug output filtering.
2256  * Output may also be restricted to messages for a single instance by
2257  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2258  * to NULL, then messages for all instances are printed.
2259  *
2260  * These routines have been cloned from each other due to the language
2261  * constraints of macros and variable argument list processing.
2262  */
2263 
2264 
2265 /*
2266  *    Function: sd_log_err
2267  *
2268  * Description: This routine is called by the SD_ERROR macro for debug
2269  *		logging of error conditions.
2270  *
2271  *   Arguments: comp - driver component being logged
2272  *		dev  - pointer to driver info structure
2273  *		fmt  - error string and format to be logged
2274  */
2275 
2276 static void
2277 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2278 {
2279 	va_list		ap;
2280 	dev_info_t	*dev;
2281 
2282 	ASSERT(un != NULL);
2283 	dev = SD_DEVINFO(un);
2284 	ASSERT(dev != NULL);
2285 
2286 	/*
2287 	 * Filter messages based on the global component and level masks.
2288 	 * Also print if un matches the value of sd_debug_un, or if
2289 	 * sd_debug_un is set to NULL.
2290 	 */
2291 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2292 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2293 		mutex_enter(&sd_log_mutex);
2294 		va_start(ap, fmt);
2295 		(void) vsprintf(sd_log_buf, fmt, ap);
2296 		va_end(ap);
2297 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2298 		mutex_exit(&sd_log_mutex);
2299 	}
2300 #ifdef SD_FAULT_INJECTION
2301 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2302 	if (un->sd_injection_mask & comp) {
2303 		mutex_enter(&sd_log_mutex);
2304 		va_start(ap, fmt);
2305 		(void) vsprintf(sd_log_buf, fmt, ap);
2306 		va_end(ap);
2307 		sd_injection_log(sd_log_buf, un);
2308 		mutex_exit(&sd_log_mutex);
2309 	}
2310 #endif
2311 }
2312 
2313 
2314 /*
2315  *    Function: sd_log_info
2316  *
2317  * Description: This routine is called by the SD_INFO macro for debug
2318  *		logging of general purpose informational conditions.
2319  *
2320  *   Arguments: comp - driver component being logged
2321  *		dev  - pointer to driver info structure
2322  *		fmt  - info string and format to be logged
2323  */
2324 
2325 static void
2326 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2327 {
2328 	va_list		ap;
2329 	dev_info_t	*dev;
2330 
2331 	ASSERT(un != NULL);
2332 	dev = SD_DEVINFO(un);
2333 	ASSERT(dev != NULL);
2334 
2335 	/*
2336 	 * Filter messages based on the global component and level masks.
2337 	 * Also print if un matches the value of sd_debug_un, or if
2338 	 * sd_debug_un is set to NULL.
2339 	 */
2340 	if ((sd_component_mask & component) &&
2341 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2342 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2343 		mutex_enter(&sd_log_mutex);
2344 		va_start(ap, fmt);
2345 		(void) vsprintf(sd_log_buf, fmt, ap);
2346 		va_end(ap);
2347 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2348 		mutex_exit(&sd_log_mutex);
2349 	}
2350 #ifdef SD_FAULT_INJECTION
2351 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2352 	if (un->sd_injection_mask & component) {
2353 		mutex_enter(&sd_log_mutex);
2354 		va_start(ap, fmt);
2355 		(void) vsprintf(sd_log_buf, fmt, ap);
2356 		va_end(ap);
2357 		sd_injection_log(sd_log_buf, un);
2358 		mutex_exit(&sd_log_mutex);
2359 	}
2360 #endif
2361 }
2362 
2363 
2364 /*
2365  *    Function: sd_log_trace
2366  *
2367  * Description: This routine is called by the SD_TRACE macro for debug
2368  *		logging of trace conditions (i.e. function entry/exit).
2369  *
2370  *   Arguments: comp - driver component being logged
2371  *		dev  - pointer to driver info structure
2372  *		fmt  - trace string and format to be logged
2373  */
2374 
2375 static void
2376 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2377 {
2378 	va_list		ap;
2379 	dev_info_t	*dev;
2380 
2381 	ASSERT(un != NULL);
2382 	dev = SD_DEVINFO(un);
2383 	ASSERT(dev != NULL);
2384 
2385 	/*
2386 	 * Filter messages based on the global component and level masks.
2387 	 * Also print if un matches the value of sd_debug_un, or if
2388 	 * sd_debug_un is set to NULL.
2389 	 */
2390 	if ((sd_component_mask & component) &&
2391 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2392 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2393 		mutex_enter(&sd_log_mutex);
2394 		va_start(ap, fmt);
2395 		(void) vsprintf(sd_log_buf, fmt, ap);
2396 		va_end(ap);
2397 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2398 		mutex_exit(&sd_log_mutex);
2399 	}
2400 #ifdef SD_FAULT_INJECTION
2401 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2402 	if (un->sd_injection_mask & component) {
2403 		mutex_enter(&sd_log_mutex);
2404 		va_start(ap, fmt);
2405 		(void) vsprintf(sd_log_buf, fmt, ap);
2406 		va_end(ap);
2407 		sd_injection_log(sd_log_buf, un);
2408 		mutex_exit(&sd_log_mutex);
2409 	}
2410 #endif
2411 }
2412 
2413 
2414 /*
2415  *    Function: sdprobe
2416  *
2417  * Description: This is the driver probe(9e) entry point function.
2418  *
2419  *   Arguments: devi - opaque device info handle
2420  *
2421  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2422  *              DDI_PROBE_FAILURE: If the probe failed.
2423  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2424  *				   but may be present in the future.
2425  */
2426 
2427 static int
2428 sdprobe(dev_info_t *devi)
2429 {
2430 	struct scsi_device	*devp;
2431 	int			rval;
2432 	int			instance;
2433 
2434 	/*
2435 	 * if it wasn't for pln, sdprobe could actually be nulldev
2436 	 * in the "__fibre" case.
2437 	 */
2438 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2439 		return (DDI_PROBE_DONTCARE);
2440 	}
2441 
2442 	devp = ddi_get_driver_private(devi);
2443 
2444 	if (devp == NULL) {
2445 		/* Ooops... nexus driver is mis-configured... */
2446 		return (DDI_PROBE_FAILURE);
2447 	}
2448 
2449 	instance = ddi_get_instance(devi);
2450 
2451 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2452 		return (DDI_PROBE_PARTIAL);
2453 	}
2454 
2455 	/*
2456 	 * Call the SCSA utility probe routine to see if we actually
2457 	 * have a target at this SCSI nexus.
2458 	 */
2459 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2460 	case SCSIPROBE_EXISTS:
2461 		switch (devp->sd_inq->inq_dtype) {
2462 		case DTYPE_DIRECT:
2463 			rval = DDI_PROBE_SUCCESS;
2464 			break;
2465 		case DTYPE_RODIRECT:
2466 			/* CDs etc. Can be removable media */
2467 			rval = DDI_PROBE_SUCCESS;
2468 			break;
2469 		case DTYPE_OPTICAL:
2470 			/*
2471 			 * Rewritable optical driver HP115AA
2472 			 * Can also be removable media
2473 			 */
2474 
2475 			/*
2476 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2477 			 * pre solaris 9 sparc sd behavior is required
2478 			 *
2479 			 * If first time through and sd_dtype_optical_bind
2480 			 * has not been set in /etc/system check properties
2481 			 */
2482 
2483 			if (sd_dtype_optical_bind  < 0) {
2484 			    sd_dtype_optical_bind = ddi_prop_get_int
2485 				(DDI_DEV_T_ANY,	devi,	0,
2486 				"optical-device-bind",	1);
2487 			}
2488 
2489 			if (sd_dtype_optical_bind == 0) {
2490 				rval = DDI_PROBE_FAILURE;
2491 			} else {
2492 				rval = DDI_PROBE_SUCCESS;
2493 			}
2494 			break;
2495 
2496 		case DTYPE_NOTPRESENT:
2497 		default:
2498 			rval = DDI_PROBE_FAILURE;
2499 			break;
2500 		}
2501 		break;
2502 	default:
2503 		rval = DDI_PROBE_PARTIAL;
2504 		break;
2505 	}
2506 
2507 	/*
2508 	 * This routine checks for resource allocation prior to freeing,
2509 	 * so it will take care of the "smart probing" case where a
2510 	 * scsi_probe() may or may not have been issued and will *not*
2511 	 * free previously-freed resources.
2512 	 */
2513 	scsi_unprobe(devp);
2514 	return (rval);
2515 }
2516 
2517 
2518 /*
2519  *    Function: sdinfo
2520  *
2521  * Description: This is the driver getinfo(9e) entry point function.
2522  * 		Given the device number, return the devinfo pointer from
2523  *		the scsi_device structure or the instance number
2524  *		associated with the dev_t.
2525  *
2526  *   Arguments: dip     - pointer to device info structure
2527  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2528  *			  DDI_INFO_DEVT2INSTANCE)
2529  *		arg     - driver dev_t
2530  *		resultp - user buffer for request response
2531  *
2532  * Return Code: DDI_SUCCESS
2533  *              DDI_FAILURE
2534  */
2535 /* ARGSUSED */
2536 static int
2537 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2538 {
2539 	struct sd_lun	*un;
2540 	dev_t		dev;
2541 	int		instance;
2542 	int		error;
2543 
2544 	switch (infocmd) {
2545 	case DDI_INFO_DEVT2DEVINFO:
2546 		dev = (dev_t)arg;
2547 		instance = SDUNIT(dev);
2548 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2549 			return (DDI_FAILURE);
2550 		}
2551 		*result = (void *) SD_DEVINFO(un);
2552 		error = DDI_SUCCESS;
2553 		break;
2554 	case DDI_INFO_DEVT2INSTANCE:
2555 		dev = (dev_t)arg;
2556 		instance = SDUNIT(dev);
2557 		*result = (void *)(uintptr_t)instance;
2558 		error = DDI_SUCCESS;
2559 		break;
2560 	default:
2561 		error = DDI_FAILURE;
2562 	}
2563 	return (error);
2564 }
2565 
2566 /*
2567  *    Function: sd_prop_op
2568  *
2569  * Description: This is the driver prop_op(9e) entry point function.
2570  *		Return the number of blocks for the partition in question
2571  *		or forward the request to the property facilities.
2572  *
2573  *   Arguments: dev       - device number
2574  *		dip       - pointer to device info structure
2575  *		prop_op   - property operator
2576  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2577  *		name      - pointer to property name
2578  *		valuep    - pointer or address of the user buffer
2579  *		lengthp   - property length
2580  *
2581  * Return Code: DDI_PROP_SUCCESS
2582  *              DDI_PROP_NOT_FOUND
2583  *              DDI_PROP_UNDEFINED
2584  *              DDI_PROP_NO_MEMORY
2585  *              DDI_PROP_BUF_TOO_SMALL
2586  */
2587 
2588 static int
2589 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2590 	char *name, caddr_t valuep, int *lengthp)
2591 {
2592 	int		instance = ddi_get_instance(dip);
2593 	struct sd_lun	*un;
2594 	uint64_t	nblocks64;
2595 
2596 	/*
2597 	 * Our dynamic properties are all device specific and size oriented.
2598 	 * Requests issued under conditions where size is valid are passed
2599 	 * to ddi_prop_op_nblocks with the size information, otherwise the
2600 	 * request is passed to ddi_prop_op. Size depends on valid geometry.
2601 	 */
2602 	un = ddi_get_soft_state(sd_state, instance);
2603 	if ((dev == DDI_DEV_T_ANY) || (un == NULL)) {
2604 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2605 		    name, valuep, lengthp));
2606 	} else if (!SD_IS_VALID_LABEL(un)) {
2607 		return (ddi_prop_op(dev, dip, prop_op, mod_flags, name,
2608 		    valuep, lengthp));
2609 	}
2610 
2611 	/* get nblocks value */
2612 	ASSERT(!mutex_owned(SD_MUTEX(un)));
2613 
2614 	(void) cmlb_partinfo(un->un_cmlbhandle, SDPART(dev),
2615 	    (diskaddr_t *)&nblocks64, NULL, NULL, NULL, (void *)SD_PATH_DIRECT);
2616 
2617 	return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags,
2618 	    name, valuep, lengthp, nblocks64));
2619 }
2620 
2621 /*
2622  * The following functions are for smart probing:
2623  * sd_scsi_probe_cache_init()
2624  * sd_scsi_probe_cache_fini()
2625  * sd_scsi_clear_probe_cache()
2626  * sd_scsi_probe_with_cache()
2627  */
2628 
2629 /*
2630  *    Function: sd_scsi_probe_cache_init
2631  *
2632  * Description: Initializes the probe response cache mutex and head pointer.
2633  *
2634  *     Context: Kernel thread context
2635  */
2636 
2637 static void
2638 sd_scsi_probe_cache_init(void)
2639 {
2640 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2641 	sd_scsi_probe_cache_head = NULL;
2642 }
2643 
2644 
2645 /*
2646  *    Function: sd_scsi_probe_cache_fini
2647  *
2648  * Description: Frees all resources associated with the probe response cache.
2649  *
2650  *     Context: Kernel thread context
2651  */
2652 
2653 static void
2654 sd_scsi_probe_cache_fini(void)
2655 {
2656 	struct sd_scsi_probe_cache *cp;
2657 	struct sd_scsi_probe_cache *ncp;
2658 
2659 	/* Clean up our smart probing linked list */
2660 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2661 		ncp = cp->next;
2662 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2663 	}
2664 	sd_scsi_probe_cache_head = NULL;
2665 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2666 }
2667 
2668 
2669 /*
2670  *    Function: sd_scsi_clear_probe_cache
2671  *
2672  * Description: This routine clears the probe response cache. This is
2673  *		done when open() returns ENXIO so that when deferred
2674  *		attach is attempted (possibly after a device has been
2675  *		turned on) we will retry the probe. Since we don't know
2676  *		which target we failed to open, we just clear the
2677  *		entire cache.
2678  *
2679  *     Context: Kernel thread context
2680  */
2681 
2682 static void
2683 sd_scsi_clear_probe_cache(void)
2684 {
2685 	struct sd_scsi_probe_cache	*cp;
2686 	int				i;
2687 
2688 	mutex_enter(&sd_scsi_probe_cache_mutex);
2689 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2690 		/*
2691 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2692 		 * force probing to be performed the next time
2693 		 * sd_scsi_probe_with_cache is called.
2694 		 */
2695 		for (i = 0; i < NTARGETS_WIDE; i++) {
2696 			cp->cache[i] = SCSIPROBE_EXISTS;
2697 		}
2698 	}
2699 	mutex_exit(&sd_scsi_probe_cache_mutex);
2700 }
2701 
2702 
2703 /*
2704  *    Function: sd_scsi_probe_with_cache
2705  *
2706  * Description: This routine implements support for a scsi device probe
2707  *		with cache. The driver maintains a cache of the target
2708  *		responses to scsi probes. If we get no response from a
2709  *		target during a probe inquiry, we remember that, and we
2710  *		avoid additional calls to scsi_probe on non-zero LUNs
2711  *		on the same target until the cache is cleared. By doing
2712  *		so we avoid the 1/4 sec selection timeout for nonzero
2713  *		LUNs. lun0 of a target is always probed.
2714  *
2715  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2716  *              waitfunc - indicates what the allocator routines should
2717  *			   do when resources are not available. This value
2718  *			   is passed on to scsi_probe() when that routine
2719  *			   is called.
2720  *
2721  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2722  *		otherwise the value returned by scsi_probe(9F).
2723  *
2724  *     Context: Kernel thread context
2725  */
2726 
2727 static int
2728 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2729 {
2730 	struct sd_scsi_probe_cache	*cp;
2731 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2732 	int		lun, tgt;
2733 
2734 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2735 	    SCSI_ADDR_PROP_LUN, 0);
2736 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2737 	    SCSI_ADDR_PROP_TARGET, -1);
2738 
2739 	/* Make sure caching enabled and target in range */
2740 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2741 		/* do it the old way (no cache) */
2742 		return (scsi_probe(devp, waitfn));
2743 	}
2744 
2745 	mutex_enter(&sd_scsi_probe_cache_mutex);
2746 
2747 	/* Find the cache for this scsi bus instance */
2748 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2749 		if (cp->pdip == pdip) {
2750 			break;
2751 		}
2752 	}
2753 
2754 	/* If we can't find a cache for this pdip, create one */
2755 	if (cp == NULL) {
2756 		int i;
2757 
2758 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2759 		    KM_SLEEP);
2760 		cp->pdip = pdip;
2761 		cp->next = sd_scsi_probe_cache_head;
2762 		sd_scsi_probe_cache_head = cp;
2763 		for (i = 0; i < NTARGETS_WIDE; i++) {
2764 			cp->cache[i] = SCSIPROBE_EXISTS;
2765 		}
2766 	}
2767 
2768 	mutex_exit(&sd_scsi_probe_cache_mutex);
2769 
2770 	/* Recompute the cache for this target if LUN zero */
2771 	if (lun == 0) {
2772 		cp->cache[tgt] = SCSIPROBE_EXISTS;
2773 	}
2774 
2775 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2776 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2777 		return (SCSIPROBE_NORESP);
2778 	}
2779 
2780 	/* Do the actual probe; save & return the result */
2781 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2782 }
2783 
2784 
2785 /*
2786  *    Function: sd_scsi_target_lun_init
2787  *
2788  * Description: Initializes the attached lun chain mutex and head pointer.
2789  *
2790  *     Context: Kernel thread context
2791  */
2792 
2793 static void
2794 sd_scsi_target_lun_init(void)
2795 {
2796 	mutex_init(&sd_scsi_target_lun_mutex, NULL, MUTEX_DRIVER, NULL);
2797 	sd_scsi_target_lun_head = NULL;
2798 }
2799 
2800 
2801 /*
2802  *    Function: sd_scsi_target_lun_fini
2803  *
2804  * Description: Frees all resources associated with the attached lun
2805  *              chain
2806  *
2807  *     Context: Kernel thread context
2808  */
2809 
2810 static void
2811 sd_scsi_target_lun_fini(void)
2812 {
2813 	struct sd_scsi_hba_tgt_lun	*cp;
2814 	struct sd_scsi_hba_tgt_lun	*ncp;
2815 
2816 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = ncp) {
2817 		ncp = cp->next;
2818 		kmem_free(cp, sizeof (struct sd_scsi_hba_tgt_lun));
2819 	}
2820 	sd_scsi_target_lun_head = NULL;
2821 	mutex_destroy(&sd_scsi_target_lun_mutex);
2822 }
2823 
2824 
2825 /*
2826  *    Function: sd_scsi_get_target_lun_count
2827  *
2828  * Description: This routine will check in the attached lun chain to see
2829  * 		how many luns are attached on the required SCSI controller
2830  * 		and target. Currently, some capabilities like tagged queue
2831  *		are supported per target based by HBA. So all luns in a
2832  *		target have the same capabilities. Based on this assumption,
2833  * 		sd should only set these capabilities once per target. This
2834  *		function is called when sd needs to decide how many luns
2835  *		already attached on a target.
2836  *
2837  *   Arguments: dip	- Pointer to the system's dev_info_t for the SCSI
2838  *			  controller device.
2839  *              target	- The target ID on the controller's SCSI bus.
2840  *
2841  * Return Code: The number of luns attached on the required target and
2842  *		controller.
2843  *		-1 if target ID is not in parallel SCSI scope or the given
2844  * 		dip is not in the chain.
2845  *
2846  *     Context: Kernel thread context
2847  */
2848 
2849 static int
2850 sd_scsi_get_target_lun_count(dev_info_t *dip, int target)
2851 {
2852 	struct sd_scsi_hba_tgt_lun	*cp;
2853 
2854 	if ((target < 0) || (target >= NTARGETS_WIDE)) {
2855 		return (-1);
2856 	}
2857 
2858 	mutex_enter(&sd_scsi_target_lun_mutex);
2859 
2860 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2861 		if (cp->pdip == dip) {
2862 			break;
2863 		}
2864 	}
2865 
2866 	mutex_exit(&sd_scsi_target_lun_mutex);
2867 
2868 	if (cp == NULL) {
2869 		return (-1);
2870 	}
2871 
2872 	return (cp->nlun[target]);
2873 }
2874 
2875 
2876 /*
2877  *    Function: sd_scsi_update_lun_on_target
2878  *
2879  * Description: This routine is used to update the attached lun chain when a
2880  *		lun is attached or detached on a target.
2881  *
2882  *   Arguments: dip     - Pointer to the system's dev_info_t for the SCSI
2883  *                        controller device.
2884  *              target  - The target ID on the controller's SCSI bus.
2885  *		flag	- Indicate the lun is attached or detached.
2886  *
2887  *     Context: Kernel thread context
2888  */
2889 
2890 static void
2891 sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag)
2892 {
2893 	struct sd_scsi_hba_tgt_lun	*cp;
2894 
2895 	mutex_enter(&sd_scsi_target_lun_mutex);
2896 
2897 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2898 		if (cp->pdip == dip) {
2899 			break;
2900 		}
2901 	}
2902 
2903 	if ((cp == NULL) && (flag == SD_SCSI_LUN_ATTACH)) {
2904 		cp = kmem_zalloc(sizeof (struct sd_scsi_hba_tgt_lun),
2905 		    KM_SLEEP);
2906 		cp->pdip = dip;
2907 		cp->next = sd_scsi_target_lun_head;
2908 		sd_scsi_target_lun_head = cp;
2909 	}
2910 
2911 	mutex_exit(&sd_scsi_target_lun_mutex);
2912 
2913 	if (cp != NULL) {
2914 		if (flag == SD_SCSI_LUN_ATTACH) {
2915 			cp->nlun[target] ++;
2916 		} else {
2917 			cp->nlun[target] --;
2918 		}
2919 	}
2920 }
2921 
2922 
2923 /*
2924  *    Function: sd_spin_up_unit
2925  *
2926  * Description: Issues the following commands to spin-up the device:
2927  *		START STOP UNIT, and INQUIRY.
2928  *
2929  *   Arguments: un - driver soft state (unit) structure
2930  *
2931  * Return Code: 0 - success
2932  *		EIO - failure
2933  *		EACCES - reservation conflict
2934  *
2935  *     Context: Kernel thread context
2936  */
2937 
2938 static int
2939 sd_spin_up_unit(struct sd_lun *un)
2940 {
2941 	size_t	resid		= 0;
2942 	int	has_conflict	= FALSE;
2943 	uchar_t *bufaddr;
2944 
2945 	ASSERT(un != NULL);
2946 
2947 	/*
2948 	 * Send a throwaway START UNIT command.
2949 	 *
2950 	 * If we fail on this, we don't care presently what precisely
2951 	 * is wrong.  EMC's arrays will also fail this with a check
2952 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
2953 	 * we don't want to fail the attach because it may become
2954 	 * "active" later.
2955 	 */
2956 	if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START, SD_PATH_DIRECT)
2957 	    == EACCES)
2958 		has_conflict = TRUE;
2959 
2960 	/*
2961 	 * Send another INQUIRY command to the target. This is necessary for
2962 	 * non-removable media direct access devices because their INQUIRY data
2963 	 * may not be fully qualified until they are spun up (perhaps via the
2964 	 * START command above).  Note: This seems to be needed for some
2965 	 * legacy devices only.) The INQUIRY command should succeed even if a
2966 	 * Reservation Conflict is present.
2967 	 */
2968 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
2969 	if (sd_send_scsi_INQUIRY(un, bufaddr, SUN_INQSIZE, 0, 0, &resid) != 0) {
2970 		kmem_free(bufaddr, SUN_INQSIZE);
2971 		return (EIO);
2972 	}
2973 
2974 	/*
2975 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
2976 	 * Note that this routine does not return a failure here even if the
2977 	 * INQUIRY command did not return any data.  This is a legacy behavior.
2978 	 */
2979 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
2980 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
2981 	}
2982 
2983 	kmem_free(bufaddr, SUN_INQSIZE);
2984 
2985 	/* If we hit a reservation conflict above, tell the caller. */
2986 	if (has_conflict == TRUE) {
2987 		return (EACCES);
2988 	}
2989 
2990 	return (0);
2991 }
2992 
2993 #ifdef _LP64
2994 /*
2995  *    Function: sd_enable_descr_sense
2996  *
2997  * Description: This routine attempts to select descriptor sense format
2998  *		using the Control mode page.  Devices that support 64 bit
2999  *		LBAs (for >2TB luns) should also implement descriptor
3000  *		sense data so we will call this function whenever we see
3001  *		a lun larger than 2TB.  If for some reason the device
3002  *		supports 64 bit LBAs but doesn't support descriptor sense
3003  *		presumably the mode select will fail.  Everything will
3004  *		continue to work normally except that we will not get
3005  *		complete sense data for commands that fail with an LBA
3006  *		larger than 32 bits.
3007  *
3008  *   Arguments: un - driver soft state (unit) structure
3009  *
3010  *     Context: Kernel thread context only
3011  */
3012 
3013 static void
3014 sd_enable_descr_sense(struct sd_lun *un)
3015 {
3016 	uchar_t			*header;
3017 	struct mode_control_scsi3 *ctrl_bufp;
3018 	size_t			buflen;
3019 	size_t			bd_len;
3020 
3021 	/*
3022 	 * Read MODE SENSE page 0xA, Control Mode Page
3023 	 */
3024 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
3025 	    sizeof (struct mode_control_scsi3);
3026 	header = kmem_zalloc(buflen, KM_SLEEP);
3027 	if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
3028 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT) != 0) {
3029 		SD_ERROR(SD_LOG_COMMON, un,
3030 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
3031 		goto eds_exit;
3032 	}
3033 
3034 	/*
3035 	 * Determine size of Block Descriptors in order to locate
3036 	 * the mode page data. ATAPI devices return 0, SCSI devices
3037 	 * should return MODE_BLK_DESC_LENGTH.
3038 	 */
3039 	bd_len  = ((struct mode_header *)header)->bdesc_length;
3040 
3041 	/* Clear the mode data length field for MODE SELECT */
3042 	((struct mode_header *)header)->length = 0;
3043 
3044 	ctrl_bufp = (struct mode_control_scsi3 *)
3045 	    (header + MODE_HEADER_LENGTH + bd_len);
3046 
3047 	/*
3048 	 * If the page length is smaller than the expected value,
3049 	 * the target device doesn't support D_SENSE. Bail out here.
3050 	 */
3051 	if (ctrl_bufp->mode_page.length <
3052 	    sizeof (struct mode_control_scsi3) - 2) {
3053 		SD_ERROR(SD_LOG_COMMON, un,
3054 		    "sd_enable_descr_sense: enable D_SENSE failed\n");
3055 		goto eds_exit;
3056 	}
3057 
3058 	/*
3059 	 * Clear PS bit for MODE SELECT
3060 	 */
3061 	ctrl_bufp->mode_page.ps = 0;
3062 
3063 	/*
3064 	 * Set D_SENSE to enable descriptor sense format.
3065 	 */
3066 	ctrl_bufp->d_sense = 1;
3067 
3068 	/*
3069 	 * Use MODE SELECT to commit the change to the D_SENSE bit
3070 	 */
3071 	if (sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
3072 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT) != 0) {
3073 		SD_INFO(SD_LOG_COMMON, un,
3074 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
3075 		goto eds_exit;
3076 	}
3077 
3078 eds_exit:
3079 	kmem_free(header, buflen);
3080 }
3081 
3082 /*
3083  *    Function: sd_reenable_dsense_task
3084  *
3085  * Description: Re-enable descriptor sense after device or bus reset
3086  *
3087  *     Context: Executes in a taskq() thread context
3088  */
3089 static void
3090 sd_reenable_dsense_task(void *arg)
3091 {
3092 	struct	sd_lun	*un = arg;
3093 
3094 	ASSERT(un != NULL);
3095 	sd_enable_descr_sense(un);
3096 }
3097 #endif /* _LP64 */
3098 
3099 /*
3100  *    Function: sd_set_mmc_caps
3101  *
3102  * Description: This routine determines if the device is MMC compliant and if
3103  *		the device supports CDDA via a mode sense of the CDVD
3104  *		capabilities mode page. Also checks if the device is a
3105  *		dvdram writable device.
3106  *
3107  *   Arguments: un - driver soft state (unit) structure
3108  *
3109  *     Context: Kernel thread context only
3110  */
3111 
3112 static void
3113 sd_set_mmc_caps(struct sd_lun *un)
3114 {
3115 	struct mode_header_grp2		*sense_mhp;
3116 	uchar_t				*sense_page;
3117 	caddr_t				buf;
3118 	int				bd_len;
3119 	int				status;
3120 	struct uscsi_cmd		com;
3121 	int				rtn;
3122 	uchar_t				*out_data_rw, *out_data_hd;
3123 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3124 
3125 	ASSERT(un != NULL);
3126 
3127 	/*
3128 	 * The flags which will be set in this function are - mmc compliant,
3129 	 * dvdram writable device, cdda support. Initialize them to FALSE
3130 	 * and if a capability is detected - it will be set to TRUE.
3131 	 */
3132 	un->un_f_mmc_cap = FALSE;
3133 	un->un_f_dvdram_writable_device = FALSE;
3134 	un->un_f_cfg_cdda = FALSE;
3135 
3136 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3137 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3138 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3139 
3140 	if (status != 0) {
3141 		/* command failed; just return */
3142 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3143 		return;
3144 	}
3145 	/*
3146 	 * If the mode sense request for the CDROM CAPABILITIES
3147 	 * page (0x2A) succeeds the device is assumed to be MMC.
3148 	 */
3149 	un->un_f_mmc_cap = TRUE;
3150 
3151 	/* Get to the page data */
3152 	sense_mhp = (struct mode_header_grp2 *)buf;
3153 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3154 	    sense_mhp->bdesc_length_lo;
3155 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3156 		/*
3157 		 * We did not get back the expected block descriptor
3158 		 * length so we cannot determine if the device supports
3159 		 * CDDA. However, we still indicate the device is MMC
3160 		 * according to the successful response to the page
3161 		 * 0x2A mode sense request.
3162 		 */
3163 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3164 		    "sd_set_mmc_caps: Mode Sense returned "
3165 		    "invalid block descriptor length\n");
3166 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3167 		return;
3168 	}
3169 
3170 	/* See if read CDDA is supported */
3171 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3172 	    bd_len);
3173 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3174 
3175 	/* See if writing DVD RAM is supported. */
3176 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3177 	if (un->un_f_dvdram_writable_device == TRUE) {
3178 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3179 		return;
3180 	}
3181 
3182 	/*
3183 	 * If the device presents DVD or CD capabilities in the mode
3184 	 * page, we can return here since a RRD will not have
3185 	 * these capabilities.
3186 	 */
3187 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3188 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3189 		return;
3190 	}
3191 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3192 
3193 	/*
3194 	 * If un->un_f_dvdram_writable_device is still FALSE,
3195 	 * check for a Removable Rigid Disk (RRD).  A RRD
3196 	 * device is identified by the features RANDOM_WRITABLE and
3197 	 * HARDWARE_DEFECT_MANAGEMENT.
3198 	 */
3199 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3200 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3201 
3202 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3203 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3204 	    RANDOM_WRITABLE, SD_PATH_STANDARD);
3205 	if (rtn != 0) {
3206 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3207 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3208 		return;
3209 	}
3210 
3211 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3212 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3213 
3214 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3215 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3216 	    HARDWARE_DEFECT_MANAGEMENT, SD_PATH_STANDARD);
3217 	if (rtn == 0) {
3218 		/*
3219 		 * We have good information, check for random writable
3220 		 * and hardware defect features.
3221 		 */
3222 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3223 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3224 			un->un_f_dvdram_writable_device = TRUE;
3225 		}
3226 	}
3227 
3228 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3229 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3230 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3231 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3232 }
3233 
3234 /*
3235  *    Function: sd_check_for_writable_cd
3236  *
3237  * Description: This routine determines if the media in the device is
3238  *		writable or not. It uses the get configuration command (0x46)
3239  *		to determine if the media is writable
3240  *
3241  *   Arguments: un - driver soft state (unit) structure
3242  *              path_flag - SD_PATH_DIRECT to use the USCSI "direct"
3243  *                           chain and the normal command waitq, or
3244  *                           SD_PATH_DIRECT_PRIORITY to use the USCSI
3245  *                           "direct" chain and bypass the normal command
3246  *                           waitq.
3247  *
3248  *     Context: Never called at interrupt context.
3249  */
3250 
3251 static void
3252 sd_check_for_writable_cd(struct sd_lun *un, int path_flag)
3253 {
3254 	struct uscsi_cmd		com;
3255 	uchar_t				*out_data;
3256 	uchar_t				*rqbuf;
3257 	int				rtn;
3258 	uchar_t				*out_data_rw, *out_data_hd;
3259 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3260 	struct mode_header_grp2		*sense_mhp;
3261 	uchar_t				*sense_page;
3262 	caddr_t				buf;
3263 	int				bd_len;
3264 	int				status;
3265 
3266 	ASSERT(un != NULL);
3267 	ASSERT(mutex_owned(SD_MUTEX(un)));
3268 
3269 	/*
3270 	 * Initialize the writable media to false, if configuration info.
3271 	 * tells us otherwise then only we will set it.
3272 	 */
3273 	un->un_f_mmc_writable_media = FALSE;
3274 	mutex_exit(SD_MUTEX(un));
3275 
3276 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3277 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3278 
3279 	rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf, SENSE_LENGTH,
3280 	    out_data, SD_PROFILE_HEADER_LEN, path_flag);
3281 
3282 	mutex_enter(SD_MUTEX(un));
3283 	if (rtn == 0) {
3284 		/*
3285 		 * We have good information, check for writable DVD.
3286 		 */
3287 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3288 			un->un_f_mmc_writable_media = TRUE;
3289 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3290 			kmem_free(rqbuf, SENSE_LENGTH);
3291 			return;
3292 		}
3293 	}
3294 
3295 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3296 	kmem_free(rqbuf, SENSE_LENGTH);
3297 
3298 	/*
3299 	 * Determine if this is a RRD type device.
3300 	 */
3301 	mutex_exit(SD_MUTEX(un));
3302 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3303 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3304 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, path_flag);
3305 	mutex_enter(SD_MUTEX(un));
3306 	if (status != 0) {
3307 		/* command failed; just return */
3308 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3309 		return;
3310 	}
3311 
3312 	/* Get to the page data */
3313 	sense_mhp = (struct mode_header_grp2 *)buf;
3314 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3315 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3316 		/*
3317 		 * We did not get back the expected block descriptor length so
3318 		 * we cannot check the mode page.
3319 		 */
3320 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3321 		    "sd_check_for_writable_cd: Mode Sense returned "
3322 		    "invalid block descriptor length\n");
3323 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3324 		return;
3325 	}
3326 
3327 	/*
3328 	 * If the device presents DVD or CD capabilities in the mode
3329 	 * page, we can return here since a RRD device will not have
3330 	 * these capabilities.
3331 	 */
3332 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3333 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3334 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3335 		return;
3336 	}
3337 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3338 
3339 	/*
3340 	 * If un->un_f_mmc_writable_media is still FALSE,
3341 	 * check for RRD type media.  A RRD device is identified
3342 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3343 	 */
3344 	mutex_exit(SD_MUTEX(un));
3345 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3346 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3347 
3348 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3349 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3350 	    RANDOM_WRITABLE, path_flag);
3351 	if (rtn != 0) {
3352 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3353 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3354 		mutex_enter(SD_MUTEX(un));
3355 		return;
3356 	}
3357 
3358 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3359 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3360 
3361 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3362 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3363 	    HARDWARE_DEFECT_MANAGEMENT, path_flag);
3364 	mutex_enter(SD_MUTEX(un));
3365 	if (rtn == 0) {
3366 		/*
3367 		 * We have good information, check for random writable
3368 		 * and hardware defect features as current.
3369 		 */
3370 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3371 		    (out_data_rw[10] & 0x1) &&
3372 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3373 		    (out_data_hd[10] & 0x1)) {
3374 			un->un_f_mmc_writable_media = TRUE;
3375 		}
3376 	}
3377 
3378 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3379 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3380 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3381 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3382 }
3383 
3384 /*
3385  *    Function: sd_read_unit_properties
3386  *
3387  * Description: The following implements a property lookup mechanism.
3388  *		Properties for particular disks (keyed on vendor, model
3389  *		and rev numbers) are sought in the sd.conf file via
3390  *		sd_process_sdconf_file(), and if not found there, are
3391  *		looked for in a list hardcoded in this driver via
3392  *		sd_process_sdconf_table() Once located the properties
3393  *		are used to update the driver unit structure.
3394  *
3395  *   Arguments: un - driver soft state (unit) structure
3396  */
3397 
3398 static void
3399 sd_read_unit_properties(struct sd_lun *un)
3400 {
3401 	/*
3402 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3403 	 * the "sd-config-list" property (from the sd.conf file) or if
3404 	 * there was not a match for the inquiry vid/pid. If this event
3405 	 * occurs the static driver configuration table is searched for
3406 	 * a match.
3407 	 */
3408 	ASSERT(un != NULL);
3409 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3410 		sd_process_sdconf_table(un);
3411 	}
3412 
3413 	/* check for LSI device */
3414 	sd_is_lsi(un);
3415 
3416 
3417 }
3418 
3419 
3420 /*
3421  *    Function: sd_process_sdconf_file
3422  *
3423  * Description: Use ddi_getlongprop to obtain the properties from the
3424  *		driver's config file (ie, sd.conf) and update the driver
3425  *		soft state structure accordingly.
3426  *
3427  *   Arguments: un - driver soft state (unit) structure
3428  *
3429  * Return Code: SD_SUCCESS - The properties were successfully set according
3430  *			     to the driver configuration file.
3431  *		SD_FAILURE - The driver config list was not obtained or
3432  *			     there was no vid/pid match. This indicates that
3433  *			     the static config table should be used.
3434  *
3435  * The config file has a property, "sd-config-list", which consists of
3436  * one or more duplets as follows:
3437  *
3438  *  sd-config-list=
3439  *	<duplet>,
3440  *	[<duplet>,]
3441  *	[<duplet>];
3442  *
3443  * The structure of each duplet is as follows:
3444  *
3445  *  <duplet>:= <vid+pid>,<data-property-name_list>
3446  *
3447  * The first entry of the duplet is the device ID string (the concatenated
3448  * vid & pid; not to be confused with a device_id).  This is defined in
3449  * the same way as in the sd_disk_table.
3450  *
3451  * The second part of the duplet is a string that identifies a
3452  * data-property-name-list. The data-property-name-list is defined as
3453  * follows:
3454  *
3455  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3456  *
3457  * The syntax of <data-property-name> depends on the <version> field.
3458  *
3459  * If version = SD_CONF_VERSION_1 we have the following syntax:
3460  *
3461  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3462  *
3463  * where the prop0 value will be used to set prop0 if bit0 set in the
3464  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3465  *
3466  */
3467 
3468 static int
3469 sd_process_sdconf_file(struct sd_lun *un)
3470 {
3471 	char	*config_list = NULL;
3472 	int	config_list_len;
3473 	int	len;
3474 	int	dupletlen = 0;
3475 	char	*vidptr;
3476 	int	vidlen;
3477 	char	*dnlist_ptr;
3478 	char	*dataname_ptr;
3479 	int	dnlist_len;
3480 	int	dataname_len;
3481 	int	*data_list;
3482 	int	data_list_len;
3483 	int	rval = SD_FAILURE;
3484 	int	i;
3485 
3486 	ASSERT(un != NULL);
3487 
3488 	/* Obtain the configuration list associated with the .conf file */
3489 	if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), DDI_PROP_DONTPASS,
3490 	    sd_config_list, (caddr_t)&config_list, &config_list_len)
3491 	    != DDI_PROP_SUCCESS) {
3492 		return (SD_FAILURE);
3493 	}
3494 
3495 	/*
3496 	 * Compare vids in each duplet to the inquiry vid - if a match is
3497 	 * made, get the data value and update the soft state structure
3498 	 * accordingly.
3499 	 *
3500 	 * Note: This algorithm is complex and difficult to maintain. It should
3501 	 * be replaced with a more robust implementation.
3502 	 */
3503 	for (len = config_list_len, vidptr = config_list; len > 0;
3504 	    vidptr += dupletlen, len -= dupletlen) {
3505 		/*
3506 		 * Note: The assumption here is that each vid entry is on
3507 		 * a unique line from its associated duplet.
3508 		 */
3509 		vidlen = dupletlen = (int)strlen(vidptr);
3510 		if ((vidlen == 0) ||
3511 		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3512 			dupletlen++;
3513 			continue;
3514 		}
3515 
3516 		/*
3517 		 * dnlist contains 1 or more blank separated
3518 		 * data-property-name entries
3519 		 */
3520 		dnlist_ptr = vidptr + vidlen + 1;
3521 		dnlist_len = (int)strlen(dnlist_ptr);
3522 		dupletlen += dnlist_len + 2;
3523 
3524 		/*
3525 		 * Set a pointer for the first data-property-name
3526 		 * entry in the list
3527 		 */
3528 		dataname_ptr = dnlist_ptr;
3529 		dataname_len = 0;
3530 
3531 		/*
3532 		 * Loop through all data-property-name entries in the
3533 		 * data-property-name-list setting the properties for each.
3534 		 */
3535 		while (dataname_len < dnlist_len) {
3536 			int version;
3537 
3538 			/*
3539 			 * Determine the length of the current
3540 			 * data-property-name entry by indexing until a
3541 			 * blank or NULL is encountered. When the space is
3542 			 * encountered reset it to a NULL for compliance
3543 			 * with ddi_getlongprop().
3544 			 */
3545 			for (i = 0; ((dataname_ptr[i] != ' ') &&
3546 			    (dataname_ptr[i] != '\0')); i++) {
3547 				;
3548 			}
3549 
3550 			dataname_len += i;
3551 			/* If not null terminated, Make it so */
3552 			if (dataname_ptr[i] == ' ') {
3553 				dataname_ptr[i] = '\0';
3554 			}
3555 			dataname_len++;
3556 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3557 			    "sd_process_sdconf_file: disk:%s, data:%s\n",
3558 			    vidptr, dataname_ptr);
3559 
3560 			/* Get the data list */
3561 			if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), 0,
3562 			    dataname_ptr, (caddr_t)&data_list, &data_list_len)
3563 			    != DDI_PROP_SUCCESS) {
3564 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3565 				    "sd_process_sdconf_file: data property (%s)"
3566 				    " has no value\n", dataname_ptr);
3567 				dataname_ptr = dnlist_ptr + dataname_len;
3568 				continue;
3569 			}
3570 
3571 			version = data_list[0];
3572 
3573 			if (version == SD_CONF_VERSION_1) {
3574 				sd_tunables values;
3575 
3576 				/* Set the properties */
3577 				if (sd_chk_vers1_data(un, data_list[1],
3578 				    &data_list[2], data_list_len, dataname_ptr)
3579 				    == SD_SUCCESS) {
3580 					sd_get_tunables_from_conf(un,
3581 					    data_list[1], &data_list[2],
3582 					    &values);
3583 					sd_set_vers1_properties(un,
3584 					    data_list[1], &values);
3585 					rval = SD_SUCCESS;
3586 				} else {
3587 					rval = SD_FAILURE;
3588 				}
3589 			} else {
3590 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3591 				    "data property %s version 0x%x is invalid.",
3592 				    dataname_ptr, version);
3593 				rval = SD_FAILURE;
3594 			}
3595 			kmem_free(data_list, data_list_len);
3596 			dataname_ptr = dnlist_ptr + dataname_len;
3597 		}
3598 	}
3599 
3600 	/* free up the memory allocated by ddi_getlongprop */
3601 	if (config_list) {
3602 		kmem_free(config_list, config_list_len);
3603 	}
3604 
3605 	return (rval);
3606 }
3607 
3608 /*
3609  *    Function: sd_get_tunables_from_conf()
3610  *
3611  *
3612  *    This function reads the data list from the sd.conf file and pulls
3613  *    the values that can have numeric values as arguments and places
3614  *    the values in the apropriate sd_tunables member.
3615  *    Since the order of the data list members varies across platforms
3616  *    This function reads them from the data list in a platform specific
3617  *    order and places them into the correct sd_tunable member that is
3618  *    a consistant across all platforms.
3619  */
3620 static void
3621 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3622     sd_tunables *values)
3623 {
3624 	int i;
3625 	int mask;
3626 
3627 	bzero(values, sizeof (sd_tunables));
3628 
3629 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3630 
3631 		mask = 1 << i;
3632 		if (mask > flags) {
3633 			break;
3634 		}
3635 
3636 		switch (mask & flags) {
3637 		case 0:	/* This mask bit not set in flags */
3638 			continue;
3639 		case SD_CONF_BSET_THROTTLE:
3640 			values->sdt_throttle = data_list[i];
3641 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3642 			    "sd_get_tunables_from_conf: throttle = %d\n",
3643 			    values->sdt_throttle);
3644 			break;
3645 		case SD_CONF_BSET_CTYPE:
3646 			values->sdt_ctype = data_list[i];
3647 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3648 			    "sd_get_tunables_from_conf: ctype = %d\n",
3649 			    values->sdt_ctype);
3650 			break;
3651 		case SD_CONF_BSET_NRR_COUNT:
3652 			values->sdt_not_rdy_retries = data_list[i];
3653 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3654 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
3655 			    values->sdt_not_rdy_retries);
3656 			break;
3657 		case SD_CONF_BSET_BSY_RETRY_COUNT:
3658 			values->sdt_busy_retries = data_list[i];
3659 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3660 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
3661 			    values->sdt_busy_retries);
3662 			break;
3663 		case SD_CONF_BSET_RST_RETRIES:
3664 			values->sdt_reset_retries = data_list[i];
3665 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3666 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
3667 			    values->sdt_reset_retries);
3668 			break;
3669 		case SD_CONF_BSET_RSV_REL_TIME:
3670 			values->sdt_reserv_rel_time = data_list[i];
3671 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3672 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
3673 			    values->sdt_reserv_rel_time);
3674 			break;
3675 		case SD_CONF_BSET_MIN_THROTTLE:
3676 			values->sdt_min_throttle = data_list[i];
3677 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3678 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
3679 			    values->sdt_min_throttle);
3680 			break;
3681 		case SD_CONF_BSET_DISKSORT_DISABLED:
3682 			values->sdt_disk_sort_dis = data_list[i];
3683 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3684 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
3685 			    values->sdt_disk_sort_dis);
3686 			break;
3687 		case SD_CONF_BSET_LUN_RESET_ENABLED:
3688 			values->sdt_lun_reset_enable = data_list[i];
3689 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3690 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
3691 			    "\n", values->sdt_lun_reset_enable);
3692 			break;
3693 		}
3694 	}
3695 }
3696 
3697 /*
3698  *    Function: sd_process_sdconf_table
3699  *
3700  * Description: Search the static configuration table for a match on the
3701  *		inquiry vid/pid and update the driver soft state structure
3702  *		according to the table property values for the device.
3703  *
3704  *		The form of a configuration table entry is:
3705  *		  <vid+pid>,<flags>,<property-data>
3706  *		  "SEAGATE ST42400N",1,63,0,0			(Fibre)
3707  *		  "SEAGATE ST42400N",1,63,0,0,0,0		(Sparc)
3708  *		  "SEAGATE ST42400N",1,63,0,0,0,0,0,0,0,0,0,0	(Intel)
3709  *
3710  *   Arguments: un - driver soft state (unit) structure
3711  */
3712 
3713 static void
3714 sd_process_sdconf_table(struct sd_lun *un)
3715 {
3716 	char	*id = NULL;
3717 	int	table_index;
3718 	int	idlen;
3719 
3720 	ASSERT(un != NULL);
3721 	for (table_index = 0; table_index < sd_disk_table_size;
3722 	    table_index++) {
3723 		id = sd_disk_table[table_index].device_id;
3724 		idlen = strlen(id);
3725 		if (idlen == 0) {
3726 			continue;
3727 		}
3728 
3729 		/*
3730 		 * The static configuration table currently does not
3731 		 * implement version 10 properties. Additionally,
3732 		 * multiple data-property-name entries are not
3733 		 * implemented in the static configuration table.
3734 		 */
3735 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
3736 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3737 			    "sd_process_sdconf_table: disk %s\n", id);
3738 			sd_set_vers1_properties(un,
3739 			    sd_disk_table[table_index].flags,
3740 			    sd_disk_table[table_index].properties);
3741 			break;
3742 		}
3743 	}
3744 }
3745 
3746 
3747 /*
3748  *    Function: sd_sdconf_id_match
3749  *
3750  * Description: This local function implements a case sensitive vid/pid
3751  *		comparison as well as the boundary cases of wild card and
3752  *		multiple blanks.
3753  *
3754  *		Note: An implicit assumption made here is that the scsi
3755  *		inquiry structure will always keep the vid, pid and
3756  *		revision strings in consecutive sequence, so they can be
3757  *		read as a single string. If this assumption is not the
3758  *		case, a separate string, to be used for the check, needs
3759  *		to be built with these strings concatenated.
3760  *
3761  *   Arguments: un - driver soft state (unit) structure
3762  *		id - table or config file vid/pid
3763  *		idlen  - length of the vid/pid (bytes)
3764  *
3765  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3766  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3767  */
3768 
3769 static int
3770 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
3771 {
3772 	struct scsi_inquiry	*sd_inq;
3773 	int 			rval = SD_SUCCESS;
3774 
3775 	ASSERT(un != NULL);
3776 	sd_inq = un->un_sd->sd_inq;
3777 	ASSERT(id != NULL);
3778 
3779 	/*
3780 	 * We use the inq_vid as a pointer to a buffer containing the
3781 	 * vid and pid and use the entire vid/pid length of the table
3782 	 * entry for the comparison. This works because the inq_pid
3783 	 * data member follows inq_vid in the scsi_inquiry structure.
3784 	 */
3785 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
3786 		/*
3787 		 * The user id string is compared to the inquiry vid/pid
3788 		 * using a case insensitive comparison and ignoring
3789 		 * multiple spaces.
3790 		 */
3791 		rval = sd_blank_cmp(un, id, idlen);
3792 		if (rval != SD_SUCCESS) {
3793 			/*
3794 			 * User id strings that start and end with a "*"
3795 			 * are a special case. These do not have a
3796 			 * specific vendor, and the product string can
3797 			 * appear anywhere in the 16 byte PID portion of
3798 			 * the inquiry data. This is a simple strstr()
3799 			 * type search for the user id in the inquiry data.
3800 			 */
3801 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
3802 				char	*pidptr = &id[1];
3803 				int	i;
3804 				int	j;
3805 				int	pidstrlen = idlen - 2;
3806 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
3807 				    pidstrlen;
3808 
3809 				if (j < 0) {
3810 					return (SD_FAILURE);
3811 				}
3812 				for (i = 0; i < j; i++) {
3813 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
3814 					    pidptr, pidstrlen) == 0) {
3815 						rval = SD_SUCCESS;
3816 						break;
3817 					}
3818 				}
3819 			}
3820 		}
3821 	}
3822 	return (rval);
3823 }
3824 
3825 
3826 /*
3827  *    Function: sd_blank_cmp
3828  *
3829  * Description: If the id string starts and ends with a space, treat
3830  *		multiple consecutive spaces as equivalent to a single
3831  *		space. For example, this causes a sd_disk_table entry
3832  *		of " NEC CDROM " to match a device's id string of
3833  *		"NEC       CDROM".
3834  *
3835  *		Note: The success exit condition for this routine is if
3836  *		the pointer to the table entry is '\0' and the cnt of
3837  *		the inquiry length is zero. This will happen if the inquiry
3838  *		string returned by the device is padded with spaces to be
3839  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
3840  *		SCSI spec states that the inquiry string is to be padded with
3841  *		spaces.
3842  *
3843  *   Arguments: un - driver soft state (unit) structure
3844  *		id - table or config file vid/pid
3845  *		idlen  - length of the vid/pid (bytes)
3846  *
3847  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3848  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3849  */
3850 
3851 static int
3852 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
3853 {
3854 	char		*p1;
3855 	char		*p2;
3856 	int		cnt;
3857 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
3858 	    sizeof (SD_INQUIRY(un)->inq_pid);
3859 
3860 	ASSERT(un != NULL);
3861 	p2 = un->un_sd->sd_inq->inq_vid;
3862 	ASSERT(id != NULL);
3863 	p1 = id;
3864 
3865 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
3866 		/*
3867 		 * Note: string p1 is terminated by a NUL but string p2
3868 		 * isn't.  The end of p2 is determined by cnt.
3869 		 */
3870 		for (;;) {
3871 			/* skip over any extra blanks in both strings */
3872 			while ((*p1 != '\0') && (*p1 == ' ')) {
3873 				p1++;
3874 			}
3875 			while ((cnt != 0) && (*p2 == ' ')) {
3876 				p2++;
3877 				cnt--;
3878 			}
3879 
3880 			/* compare the two strings */
3881 			if ((cnt == 0) ||
3882 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
3883 				break;
3884 			}
3885 			while ((cnt > 0) &&
3886 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
3887 				p1++;
3888 				p2++;
3889 				cnt--;
3890 			}
3891 		}
3892 	}
3893 
3894 	/* return SD_SUCCESS if both strings match */
3895 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
3896 }
3897 
3898 
3899 /*
3900  *    Function: sd_chk_vers1_data
3901  *
3902  * Description: Verify the version 1 device properties provided by the
3903  *		user via the configuration file
3904  *
3905  *   Arguments: un	     - driver soft state (unit) structure
3906  *		flags	     - integer mask indicating properties to be set
3907  *		prop_list    - integer list of property values
3908  *		list_len     - length of user provided data
3909  *
3910  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
3911  *		SD_FAILURE - Indicates the user provided data is invalid
3912  */
3913 
3914 static int
3915 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
3916     int list_len, char *dataname_ptr)
3917 {
3918 	int i;
3919 	int mask = 1;
3920 	int index = 0;
3921 
3922 	ASSERT(un != NULL);
3923 
3924 	/* Check for a NULL property name and list */
3925 	if (dataname_ptr == NULL) {
3926 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3927 		    "sd_chk_vers1_data: NULL data property name.");
3928 		return (SD_FAILURE);
3929 	}
3930 	if (prop_list == NULL) {
3931 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3932 		    "sd_chk_vers1_data: %s NULL data property list.",
3933 		    dataname_ptr);
3934 		return (SD_FAILURE);
3935 	}
3936 
3937 	/* Display a warning if undefined bits are set in the flags */
3938 	if (flags & ~SD_CONF_BIT_MASK) {
3939 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3940 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
3941 		    "Properties not set.",
3942 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
3943 		return (SD_FAILURE);
3944 	}
3945 
3946 	/*
3947 	 * Verify the length of the list by identifying the highest bit set
3948 	 * in the flags and validating that the property list has a length
3949 	 * up to the index of this bit.
3950 	 */
3951 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3952 		if (flags & mask) {
3953 			index++;
3954 		}
3955 		mask = 1 << i;
3956 	}
3957 	if ((list_len / sizeof (int)) < (index + 2)) {
3958 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3959 		    "sd_chk_vers1_data: "
3960 		    "Data property list %s size is incorrect. "
3961 		    "Properties not set.", dataname_ptr);
3962 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
3963 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
3964 		return (SD_FAILURE);
3965 	}
3966 	return (SD_SUCCESS);
3967 }
3968 
3969 
3970 /*
3971  *    Function: sd_set_vers1_properties
3972  *
3973  * Description: Set version 1 device properties based on a property list
3974  *		retrieved from the driver configuration file or static
3975  *		configuration table. Version 1 properties have the format:
3976  *
3977  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3978  *
3979  *		where the prop0 value will be used to set prop0 if bit0
3980  *		is set in the flags
3981  *
3982  *   Arguments: un	     - driver soft state (unit) structure
3983  *		flags	     - integer mask indicating properties to be set
3984  *		prop_list    - integer list of property values
3985  */
3986 
3987 static void
3988 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
3989 {
3990 	ASSERT(un != NULL);
3991 
3992 	/*
3993 	 * Set the flag to indicate cache is to be disabled. An attempt
3994 	 * to disable the cache via sd_cache_control() will be made
3995 	 * later during attach once the basic initialization is complete.
3996 	 */
3997 	if (flags & SD_CONF_BSET_NOCACHE) {
3998 		un->un_f_opt_disable_cache = TRUE;
3999 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4000 		    "sd_set_vers1_properties: caching disabled flag set\n");
4001 	}
4002 
4003 	/* CD-specific configuration parameters */
4004 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
4005 		un->un_f_cfg_playmsf_bcd = TRUE;
4006 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4007 		    "sd_set_vers1_properties: playmsf_bcd set\n");
4008 	}
4009 	if (flags & SD_CONF_BSET_READSUB_BCD) {
4010 		un->un_f_cfg_readsub_bcd = TRUE;
4011 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4012 		    "sd_set_vers1_properties: readsub_bcd set\n");
4013 	}
4014 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
4015 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
4016 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4017 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
4018 	}
4019 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
4020 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
4021 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4022 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
4023 	}
4024 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
4025 		un->un_f_cfg_no_read_header = TRUE;
4026 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4027 			    "sd_set_vers1_properties: no_read_header set\n");
4028 	}
4029 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
4030 		un->un_f_cfg_read_cd_xd4 = TRUE;
4031 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4032 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
4033 	}
4034 
4035 	/* Support for devices which do not have valid/unique serial numbers */
4036 	if (flags & SD_CONF_BSET_FAB_DEVID) {
4037 		un->un_f_opt_fab_devid = TRUE;
4038 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4039 		    "sd_set_vers1_properties: fab_devid bit set\n");
4040 	}
4041 
4042 	/* Support for user throttle configuration */
4043 	if (flags & SD_CONF_BSET_THROTTLE) {
4044 		ASSERT(prop_list != NULL);
4045 		un->un_saved_throttle = un->un_throttle =
4046 		    prop_list->sdt_throttle;
4047 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4048 		    "sd_set_vers1_properties: throttle set to %d\n",
4049 		    prop_list->sdt_throttle);
4050 	}
4051 
4052 	/* Set the per disk retry count according to the conf file or table. */
4053 	if (flags & SD_CONF_BSET_NRR_COUNT) {
4054 		ASSERT(prop_list != NULL);
4055 		if (prop_list->sdt_not_rdy_retries) {
4056 			un->un_notready_retry_count =
4057 				prop_list->sdt_not_rdy_retries;
4058 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4059 			    "sd_set_vers1_properties: not ready retry count"
4060 			    " set to %d\n", un->un_notready_retry_count);
4061 		}
4062 	}
4063 
4064 	/* The controller type is reported for generic disk driver ioctls */
4065 	if (flags & SD_CONF_BSET_CTYPE) {
4066 		ASSERT(prop_list != NULL);
4067 		switch (prop_list->sdt_ctype) {
4068 		case CTYPE_CDROM:
4069 			un->un_ctype = prop_list->sdt_ctype;
4070 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4071 			    "sd_set_vers1_properties: ctype set to "
4072 			    "CTYPE_CDROM\n");
4073 			break;
4074 		case CTYPE_CCS:
4075 			un->un_ctype = prop_list->sdt_ctype;
4076 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4077 				"sd_set_vers1_properties: ctype set to "
4078 				"CTYPE_CCS\n");
4079 			break;
4080 		case CTYPE_ROD:		/* RW optical */
4081 			un->un_ctype = prop_list->sdt_ctype;
4082 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4083 			    "sd_set_vers1_properties: ctype set to "
4084 			    "CTYPE_ROD\n");
4085 			break;
4086 		default:
4087 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4088 			    "sd_set_vers1_properties: Could not set "
4089 			    "invalid ctype value (%d)",
4090 			    prop_list->sdt_ctype);
4091 		}
4092 	}
4093 
4094 	/* Purple failover timeout */
4095 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4096 		ASSERT(prop_list != NULL);
4097 		un->un_busy_retry_count =
4098 			prop_list->sdt_busy_retries;
4099 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4100 		    "sd_set_vers1_properties: "
4101 		    "busy retry count set to %d\n",
4102 		    un->un_busy_retry_count);
4103 	}
4104 
4105 	/* Purple reset retry count */
4106 	if (flags & SD_CONF_BSET_RST_RETRIES) {
4107 		ASSERT(prop_list != NULL);
4108 		un->un_reset_retry_count =
4109 			prop_list->sdt_reset_retries;
4110 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4111 		    "sd_set_vers1_properties: "
4112 		    "reset retry count set to %d\n",
4113 		    un->un_reset_retry_count);
4114 	}
4115 
4116 	/* Purple reservation release timeout */
4117 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4118 		ASSERT(prop_list != NULL);
4119 		un->un_reserve_release_time =
4120 			prop_list->sdt_reserv_rel_time;
4121 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4122 		    "sd_set_vers1_properties: "
4123 		    "reservation release timeout set to %d\n",
4124 		    un->un_reserve_release_time);
4125 	}
4126 
4127 	/*
4128 	 * Driver flag telling the driver to verify that no commands are pending
4129 	 * for a device before issuing a Test Unit Ready. This is a workaround
4130 	 * for a firmware bug in some Seagate eliteI drives.
4131 	 */
4132 	if (flags & SD_CONF_BSET_TUR_CHECK) {
4133 		un->un_f_cfg_tur_check = TRUE;
4134 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4135 		    "sd_set_vers1_properties: tur queue check set\n");
4136 	}
4137 
4138 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4139 		un->un_min_throttle = prop_list->sdt_min_throttle;
4140 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4141 		    "sd_set_vers1_properties: min throttle set to %d\n",
4142 		    un->un_min_throttle);
4143 	}
4144 
4145 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4146 		un->un_f_disksort_disabled =
4147 		    (prop_list->sdt_disk_sort_dis != 0) ?
4148 		    TRUE : FALSE;
4149 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4150 		    "sd_set_vers1_properties: disksort disabled "
4151 		    "flag set to %d\n",
4152 		    prop_list->sdt_disk_sort_dis);
4153 	}
4154 
4155 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4156 		un->un_f_lun_reset_enabled =
4157 		    (prop_list->sdt_lun_reset_enable != 0) ?
4158 		    TRUE : FALSE;
4159 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4160 		    "sd_set_vers1_properties: lun reset enabled "
4161 		    "flag set to %d\n",
4162 		    prop_list->sdt_lun_reset_enable);
4163 	}
4164 
4165 	/*
4166 	 * Validate the throttle values.
4167 	 * If any of the numbers are invalid, set everything to defaults.
4168 	 */
4169 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4170 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4171 	    (un->un_min_throttle > un->un_throttle)) {
4172 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4173 		un->un_min_throttle = sd_min_throttle;
4174 	}
4175 }
4176 
4177 /*
4178  *   Function: sd_is_lsi()
4179  *
4180  *   Description: Check for lsi devices, step throught the static device
4181  *	table to match vid/pid.
4182  *
4183  *   Args: un - ptr to sd_lun
4184  *
4185  *   Notes:  When creating new LSI property, need to add the new LSI property
4186  *		to this function.
4187  */
4188 static void
4189 sd_is_lsi(struct sd_lun *un)
4190 {
4191 	char	*id = NULL;
4192 	int	table_index;
4193 	int	idlen;
4194 	void	*prop;
4195 
4196 	ASSERT(un != NULL);
4197 	for (table_index = 0; table_index < sd_disk_table_size;
4198 	    table_index++) {
4199 		id = sd_disk_table[table_index].device_id;
4200 		idlen = strlen(id);
4201 		if (idlen == 0) {
4202 			continue;
4203 		}
4204 
4205 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4206 			prop = sd_disk_table[table_index].properties;
4207 			if (prop == &lsi_properties ||
4208 			    prop == &lsi_oem_properties ||
4209 			    prop == &lsi_properties_scsi ||
4210 			    prop == &symbios_properties) {
4211 				un->un_f_cfg_is_lsi = TRUE;
4212 			}
4213 			break;
4214 		}
4215 	}
4216 }
4217 
4218 /*
4219  *    Function: sd_get_physical_geometry
4220  *
4221  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4222  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4223  *		target, and use this information to initialize the physical
4224  *		geometry cache specified by pgeom_p.
4225  *
4226  *		MODE SENSE is an optional command, so failure in this case
4227  *		does not necessarily denote an error. We want to use the
4228  *		MODE SENSE commands to derive the physical geometry of the
4229  *		device, but if either command fails, the logical geometry is
4230  *		used as the fallback for disk label geometry in cmlb.
4231  *
4232  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4233  *		have already been initialized for the current target and
4234  *		that the current values be passed as args so that we don't
4235  *		end up ever trying to use -1 as a valid value. This could
4236  *		happen if either value is reset while we're not holding
4237  *		the mutex.
4238  *
4239  *   Arguments: un - driver soft state (unit) structure
4240  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4241  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4242  *			to use the USCSI "direct" chain and bypass the normal
4243  *			command waitq.
4244  *
4245  *     Context: Kernel thread only (can sleep).
4246  */
4247 
4248 static int
4249 sd_get_physical_geometry(struct sd_lun *un, cmlb_geom_t *pgeom_p,
4250 	diskaddr_t capacity, int lbasize, int path_flag)
4251 {
4252 	struct	mode_format	*page3p;
4253 	struct	mode_geometry	*page4p;
4254 	struct	mode_header	*headerp;
4255 	int	sector_size;
4256 	int	nsect;
4257 	int	nhead;
4258 	int	ncyl;
4259 	int	intrlv;
4260 	int	spc;
4261 	diskaddr_t	modesense_capacity;
4262 	int	rpm;
4263 	int	bd_len;
4264 	int	mode_header_length;
4265 	uchar_t	*p3bufp;
4266 	uchar_t	*p4bufp;
4267 	int	cdbsize;
4268 	int 	ret = EIO;
4269 
4270 	ASSERT(un != NULL);
4271 
4272 	if (lbasize == 0) {
4273 		if (ISCD(un)) {
4274 			lbasize = 2048;
4275 		} else {
4276 			lbasize = un->un_sys_blocksize;
4277 		}
4278 	}
4279 	pgeom_p->g_secsize = (unsigned short)lbasize;
4280 
4281 	/*
4282 	 * If the unit is a cd/dvd drive MODE SENSE page three
4283 	 * and MODE SENSE page four are reserved (see SBC spec
4284 	 * and MMC spec). To prevent soft errors just return
4285 	 * using the default LBA size.
4286 	 */
4287 	if (ISCD(un))
4288 		return (ret);
4289 
4290 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4291 
4292 	/*
4293 	 * Retrieve MODE SENSE page 3 - Format Device Page
4294 	 */
4295 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4296 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p3bufp,
4297 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag)
4298 	    != 0) {
4299 		SD_ERROR(SD_LOG_COMMON, un,
4300 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4301 		goto page3_exit;
4302 	}
4303 
4304 	/*
4305 	 * Determine size of Block Descriptors in order to locate the mode
4306 	 * page data.  ATAPI devices return 0, SCSI devices should return
4307 	 * MODE_BLK_DESC_LENGTH.
4308 	 */
4309 	headerp = (struct mode_header *)p3bufp;
4310 	if (un->un_f_cfg_is_atapi == TRUE) {
4311 		struct mode_header_grp2 *mhp =
4312 		    (struct mode_header_grp2 *)headerp;
4313 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4314 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4315 	} else {
4316 		mode_header_length = MODE_HEADER_LENGTH;
4317 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4318 	}
4319 
4320 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4321 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4322 		    "received unexpected bd_len of %d, page3\n", bd_len);
4323 		goto page3_exit;
4324 	}
4325 
4326 	page3p = (struct mode_format *)
4327 	    ((caddr_t)headerp + mode_header_length + bd_len);
4328 
4329 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4330 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4331 		    "mode sense pg3 code mismatch %d\n",
4332 		    page3p->mode_page.code);
4333 		goto page3_exit;
4334 	}
4335 
4336 	/*
4337 	 * Use this physical geometry data only if BOTH MODE SENSE commands
4338 	 * complete successfully; otherwise, revert to the logical geometry.
4339 	 * So, we need to save everything in temporary variables.
4340 	 */
4341 	sector_size = BE_16(page3p->data_bytes_sect);
4342 
4343 	/*
4344 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
4345 	 */
4346 	if (sector_size == 0) {
4347 		sector_size = un->un_sys_blocksize;
4348 	} else {
4349 		sector_size &= ~(un->un_sys_blocksize - 1);
4350 	}
4351 
4352 	nsect  = BE_16(page3p->sect_track);
4353 	intrlv = BE_16(page3p->interleave);
4354 
4355 	SD_INFO(SD_LOG_COMMON, un,
4356 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
4357 	SD_INFO(SD_LOG_COMMON, un,
4358 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
4359 	    page3p->mode_page.code, nsect, sector_size);
4360 	SD_INFO(SD_LOG_COMMON, un,
4361 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
4362 	    BE_16(page3p->track_skew),
4363 	    BE_16(page3p->cylinder_skew));
4364 
4365 
4366 	/*
4367 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
4368 	 */
4369 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
4370 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p4bufp,
4371 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag)
4372 	    != 0) {
4373 		SD_ERROR(SD_LOG_COMMON, un,
4374 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
4375 		goto page4_exit;
4376 	}
4377 
4378 	/*
4379 	 * Determine size of Block Descriptors in order to locate the mode
4380 	 * page data.  ATAPI devices return 0, SCSI devices should return
4381 	 * MODE_BLK_DESC_LENGTH.
4382 	 */
4383 	headerp = (struct mode_header *)p4bufp;
4384 	if (un->un_f_cfg_is_atapi == TRUE) {
4385 		struct mode_header_grp2 *mhp =
4386 		    (struct mode_header_grp2 *)headerp;
4387 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4388 	} else {
4389 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4390 	}
4391 
4392 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4393 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4394 		    "received unexpected bd_len of %d, page4\n", bd_len);
4395 		goto page4_exit;
4396 	}
4397 
4398 	page4p = (struct mode_geometry *)
4399 	    ((caddr_t)headerp + mode_header_length + bd_len);
4400 
4401 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
4402 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4403 		    "mode sense pg4 code mismatch %d\n",
4404 		    page4p->mode_page.code);
4405 		goto page4_exit;
4406 	}
4407 
4408 	/*
4409 	 * Stash the data now, after we know that both commands completed.
4410 	 */
4411 
4412 
4413 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
4414 	spc   = nhead * nsect;
4415 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
4416 	rpm   = BE_16(page4p->rpm);
4417 
4418 	modesense_capacity = spc * ncyl;
4419 
4420 	SD_INFO(SD_LOG_COMMON, un,
4421 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
4422 	SD_INFO(SD_LOG_COMMON, un,
4423 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
4424 	SD_INFO(SD_LOG_COMMON, un,
4425 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
4426 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
4427 	    (void *)pgeom_p, capacity);
4428 
4429 	/*
4430 	 * Compensate if the drive's geometry is not rectangular, i.e.,
4431 	 * the product of C * H * S returned by MODE SENSE >= that returned
4432 	 * by read capacity. This is an idiosyncrasy of the original x86
4433 	 * disk subsystem.
4434 	 */
4435 	if (modesense_capacity >= capacity) {
4436 		SD_INFO(SD_LOG_COMMON, un,
4437 		    "sd_get_physical_geometry: adjusting acyl; "
4438 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
4439 		    (modesense_capacity - capacity + spc - 1) / spc);
4440 		if (sector_size != 0) {
4441 			/* 1243403: NEC D38x7 drives don't support sec size */
4442 			pgeom_p->g_secsize = (unsigned short)sector_size;
4443 		}
4444 		pgeom_p->g_nsect    = (unsigned short)nsect;
4445 		pgeom_p->g_nhead    = (unsigned short)nhead;
4446 		pgeom_p->g_capacity = capacity;
4447 		pgeom_p->g_acyl	    =
4448 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
4449 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
4450 	}
4451 
4452 	pgeom_p->g_rpm    = (unsigned short)rpm;
4453 	pgeom_p->g_intrlv = (unsigned short)intrlv;
4454 	ret = 0;
4455 
4456 	SD_INFO(SD_LOG_COMMON, un,
4457 	    "sd_get_physical_geometry: mode sense geometry:\n");
4458 	SD_INFO(SD_LOG_COMMON, un,
4459 	    "   nsect: %d; sector size: %d; interlv: %d\n",
4460 	    nsect, sector_size, intrlv);
4461 	SD_INFO(SD_LOG_COMMON, un,
4462 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
4463 	    nhead, ncyl, rpm, modesense_capacity);
4464 	SD_INFO(SD_LOG_COMMON, un,
4465 	    "sd_get_physical_geometry: (cached)\n");
4466 	SD_INFO(SD_LOG_COMMON, un,
4467 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4468 	    pgeom_p->g_ncyl,  pgeom_p->g_acyl,
4469 	    pgeom_p->g_nhead, pgeom_p->g_nsect);
4470 	SD_INFO(SD_LOG_COMMON, un,
4471 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
4472 	    pgeom_p->g_secsize, pgeom_p->g_capacity,
4473 	    pgeom_p->g_intrlv, pgeom_p->g_rpm);
4474 
4475 page4_exit:
4476 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
4477 page3_exit:
4478 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
4479 
4480 	return (ret);
4481 }
4482 
4483 /*
4484  *    Function: sd_get_virtual_geometry
4485  *
4486  * Description: Ask the controller to tell us about the target device.
4487  *
4488  *   Arguments: un - pointer to softstate
4489  *		capacity - disk capacity in #blocks
4490  *		lbasize - disk block size in bytes
4491  *
4492  *     Context: Kernel thread only
4493  */
4494 
4495 static int
4496 sd_get_virtual_geometry(struct sd_lun *un, cmlb_geom_t *lgeom_p,
4497     diskaddr_t capacity, int lbasize)
4498 {
4499 	uint_t	geombuf;
4500 	int	spc;
4501 
4502 	ASSERT(un != NULL);
4503 
4504 	/* Set sector size, and total number of sectors */
4505 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
4506 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
4507 
4508 	/* Let the HBA tell us its geometry */
4509 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
4510 
4511 	/* A value of -1 indicates an undefined "geometry" property */
4512 	if (geombuf == (-1)) {
4513 		return (EINVAL);
4514 	}
4515 
4516 	/* Initialize the logical geometry cache. */
4517 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
4518 	lgeom_p->g_nsect   = geombuf & 0xffff;
4519 	lgeom_p->g_secsize = un->un_sys_blocksize;
4520 
4521 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
4522 
4523 	/*
4524 	 * Note: The driver originally converted the capacity value from
4525 	 * target blocks to system blocks. However, the capacity value passed
4526 	 * to this routine is already in terms of system blocks (this scaling
4527 	 * is done when the READ CAPACITY command is issued and processed).
4528 	 * This 'error' may have gone undetected because the usage of g_ncyl
4529 	 * (which is based upon g_capacity) is very limited within the driver
4530 	 */
4531 	lgeom_p->g_capacity = capacity;
4532 
4533 	/*
4534 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
4535 	 * hba may return zero values if the device has been removed.
4536 	 */
4537 	if (spc == 0) {
4538 		lgeom_p->g_ncyl = 0;
4539 	} else {
4540 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
4541 	}
4542 	lgeom_p->g_acyl = 0;
4543 
4544 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
4545 	return (0);
4546 
4547 }
4548 /*
4549  *    Function: sd_update_block_info
4550  *
4551  * Description: Calculate a byte count to sector count bitshift value
4552  *		from sector size.
4553  *
4554  *   Arguments: un: unit struct.
4555  *		lbasize: new target sector size
4556  *		capacity: new target capacity, ie. block count
4557  *
4558  *     Context: Kernel thread context
4559  */
4560 
4561 static void
4562 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
4563 {
4564 	if (lbasize != 0) {
4565 		un->un_tgt_blocksize = lbasize;
4566 		un->un_f_tgt_blocksize_is_valid	= TRUE;
4567 	}
4568 
4569 	if (capacity != 0) {
4570 		un->un_blockcount		= capacity;
4571 		un->un_f_blockcount_is_valid	= TRUE;
4572 	}
4573 }
4574 
4575 
4576 /*
4577  *    Function: sd_register_devid
4578  *
4579  * Description: This routine will obtain the device id information from the
4580  *		target, obtain the serial number, and register the device
4581  *		id with the ddi framework.
4582  *
4583  *   Arguments: devi - the system's dev_info_t for the device.
4584  *		un - driver soft state (unit) structure
4585  *		reservation_flag - indicates if a reservation conflict
4586  *		occurred during attach
4587  *
4588  *     Context: Kernel Thread
4589  */
4590 static void
4591 sd_register_devid(struct sd_lun *un, dev_info_t *devi, int reservation_flag)
4592 {
4593 	int		rval		= 0;
4594 	uchar_t		*inq80		= NULL;
4595 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
4596 	size_t		inq80_resid	= 0;
4597 	uchar_t		*inq83		= NULL;
4598 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
4599 	size_t		inq83_resid	= 0;
4600 
4601 	ASSERT(un != NULL);
4602 	ASSERT(mutex_owned(SD_MUTEX(un)));
4603 	ASSERT((SD_DEVINFO(un)) == devi);
4604 
4605 	/*
4606 	 * This is the case of antiquated Sun disk drives that have the
4607 	 * FAB_DEVID property set in the disk_table.  These drives
4608 	 * manage the devid's by storing them in last 2 available sectors
4609 	 * on the drive and have them fabricated by the ddi layer by calling
4610 	 * ddi_devid_init and passing the DEVID_FAB flag.
4611 	 */
4612 	if (un->un_f_opt_fab_devid == TRUE) {
4613 		/*
4614 		 * Depending on EINVAL isn't reliable, since a reserved disk
4615 		 * may result in invalid geometry, so check to make sure a
4616 		 * reservation conflict did not occur during attach.
4617 		 */
4618 		if ((sd_get_devid(un) == EINVAL) &&
4619 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
4620 			/*
4621 			 * The devid is invalid AND there is no reservation
4622 			 * conflict.  Fabricate a new devid.
4623 			 */
4624 			(void) sd_create_devid(un);
4625 		}
4626 
4627 		/* Register the devid if it exists */
4628 		if (un->un_devid != NULL) {
4629 			(void) ddi_devid_register(SD_DEVINFO(un),
4630 			    un->un_devid);
4631 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4632 			    "sd_register_devid: Devid Fabricated\n");
4633 		}
4634 		return;
4635 	}
4636 
4637 	/*
4638 	 * We check the availibility of the World Wide Name (0x83) and Unit
4639 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
4640 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
4641 	 * 0x83 is availible, that is the best choice.  Our next choice is
4642 	 * 0x80.  If neither are availible, we munge the devid from the device
4643 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
4644 	 * to fabricate a devid for non-Sun qualified disks.
4645 	 */
4646 	if (sd_check_vpd_page_support(un) == 0) {
4647 		/* collect page 80 data if available */
4648 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
4649 
4650 			mutex_exit(SD_MUTEX(un));
4651 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
4652 			rval = sd_send_scsi_INQUIRY(un, inq80, inq80_len,
4653 			    0x01, 0x80, &inq80_resid);
4654 
4655 			if (rval != 0) {
4656 				kmem_free(inq80, inq80_len);
4657 				inq80 = NULL;
4658 				inq80_len = 0;
4659 			}
4660 			mutex_enter(SD_MUTEX(un));
4661 		}
4662 
4663 		/* collect page 83 data if available */
4664 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
4665 			mutex_exit(SD_MUTEX(un));
4666 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
4667 			rval = sd_send_scsi_INQUIRY(un, inq83, inq83_len,
4668 			    0x01, 0x83, &inq83_resid);
4669 
4670 			if (rval != 0) {
4671 				kmem_free(inq83, inq83_len);
4672 				inq83 = NULL;
4673 				inq83_len = 0;
4674 			}
4675 			mutex_enter(SD_MUTEX(un));
4676 		}
4677 	}
4678 
4679 	/* encode best devid possible based on data available */
4680 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
4681 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
4682 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
4683 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
4684 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
4685 
4686 		/* devid successfully encoded, register devid */
4687 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
4688 
4689 	} else {
4690 		/*
4691 		 * Unable to encode a devid based on data available.
4692 		 * This is not a Sun qualified disk.  Older Sun disk
4693 		 * drives that have the SD_FAB_DEVID property
4694 		 * set in the disk_table and non Sun qualified
4695 		 * disks are treated in the same manner.  These
4696 		 * drives manage the devid's by storing them in
4697 		 * last 2 available sectors on the drive and
4698 		 * have them fabricated by the ddi layer by
4699 		 * calling ddi_devid_init and passing the
4700 		 * DEVID_FAB flag.
4701 		 * Create a fabricate devid only if there's no
4702 		 * fabricate devid existed.
4703 		 */
4704 		if (sd_get_devid(un) == EINVAL) {
4705 			(void) sd_create_devid(un);
4706 		}
4707 		un->un_f_opt_fab_devid = TRUE;
4708 
4709 		/* Register the devid if it exists */
4710 		if (un->un_devid != NULL) {
4711 			(void) ddi_devid_register(SD_DEVINFO(un),
4712 			    un->un_devid);
4713 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4714 			    "sd_register_devid: devid fabricated using "
4715 			    "ddi framework\n");
4716 		}
4717 	}
4718 
4719 	/* clean up resources */
4720 	if (inq80 != NULL) {
4721 		kmem_free(inq80, inq80_len);
4722 	}
4723 	if (inq83 != NULL) {
4724 		kmem_free(inq83, inq83_len);
4725 	}
4726 }
4727 
4728 
4729 
4730 /*
4731  *    Function: sd_get_devid
4732  *
4733  * Description: This routine will return 0 if a valid device id has been
4734  *		obtained from the target and stored in the soft state. If a
4735  *		valid device id has not been previously read and stored, a
4736  *		read attempt will be made.
4737  *
4738  *   Arguments: un - driver soft state (unit) structure
4739  *
4740  * Return Code: 0 if we successfully get the device id
4741  *
4742  *     Context: Kernel Thread
4743  */
4744 
4745 static int
4746 sd_get_devid(struct sd_lun *un)
4747 {
4748 	struct dk_devid		*dkdevid;
4749 	ddi_devid_t		tmpid;
4750 	uint_t			*ip;
4751 	size_t			sz;
4752 	diskaddr_t		blk;
4753 	int			status;
4754 	int			chksum;
4755 	int			i;
4756 	size_t			buffer_size;
4757 
4758 	ASSERT(un != NULL);
4759 	ASSERT(mutex_owned(SD_MUTEX(un)));
4760 
4761 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
4762 	    un);
4763 
4764 	if (un->un_devid != NULL) {
4765 		return (0);
4766 	}
4767 
4768 	mutex_exit(SD_MUTEX(un));
4769 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
4770 	    (void *)SD_PATH_DIRECT) != 0) {
4771 		mutex_enter(SD_MUTEX(un));
4772 		return (EINVAL);
4773 	}
4774 
4775 	/*
4776 	 * Read and verify device id, stored in the reserved cylinders at the
4777 	 * end of the disk. Backup label is on the odd sectors of the last
4778 	 * track of the last cylinder. Device id will be on track of the next
4779 	 * to last cylinder.
4780 	 */
4781 	mutex_enter(SD_MUTEX(un));
4782 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
4783 	mutex_exit(SD_MUTEX(un));
4784 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
4785 	status = sd_send_scsi_READ(un, dkdevid, buffer_size, blk,
4786 	    SD_PATH_DIRECT);
4787 	if (status != 0) {
4788 		goto error;
4789 	}
4790 
4791 	/* Validate the revision */
4792 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
4793 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
4794 		status = EINVAL;
4795 		goto error;
4796 	}
4797 
4798 	/* Calculate the checksum */
4799 	chksum = 0;
4800 	ip = (uint_t *)dkdevid;
4801 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
4802 	    i++) {
4803 		chksum ^= ip[i];
4804 	}
4805 
4806 	/* Compare the checksums */
4807 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
4808 		status = EINVAL;
4809 		goto error;
4810 	}
4811 
4812 	/* Validate the device id */
4813 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
4814 		status = EINVAL;
4815 		goto error;
4816 	}
4817 
4818 	/*
4819 	 * Store the device id in the driver soft state
4820 	 */
4821 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
4822 	tmpid = kmem_alloc(sz, KM_SLEEP);
4823 
4824 	mutex_enter(SD_MUTEX(un));
4825 
4826 	un->un_devid = tmpid;
4827 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
4828 
4829 	kmem_free(dkdevid, buffer_size);
4830 
4831 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
4832 
4833 	return (status);
4834 error:
4835 	mutex_enter(SD_MUTEX(un));
4836 	kmem_free(dkdevid, buffer_size);
4837 	return (status);
4838 }
4839 
4840 
4841 /*
4842  *    Function: sd_create_devid
4843  *
4844  * Description: This routine will fabricate the device id and write it
4845  *		to the disk.
4846  *
4847  *   Arguments: un - driver soft state (unit) structure
4848  *
4849  * Return Code: value of the fabricated device id
4850  *
4851  *     Context: Kernel Thread
4852  */
4853 
4854 static ddi_devid_t
4855 sd_create_devid(struct sd_lun *un)
4856 {
4857 	ASSERT(un != NULL);
4858 
4859 	/* Fabricate the devid */
4860 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
4861 	    == DDI_FAILURE) {
4862 		return (NULL);
4863 	}
4864 
4865 	/* Write the devid to disk */
4866 	if (sd_write_deviceid(un) != 0) {
4867 		ddi_devid_free(un->un_devid);
4868 		un->un_devid = NULL;
4869 	}
4870 
4871 	return (un->un_devid);
4872 }
4873 
4874 
4875 /*
4876  *    Function: sd_write_deviceid
4877  *
4878  * Description: This routine will write the device id to the disk
4879  *		reserved sector.
4880  *
4881  *   Arguments: un - driver soft state (unit) structure
4882  *
4883  * Return Code: EINVAL
4884  *		value returned by sd_send_scsi_cmd
4885  *
4886  *     Context: Kernel Thread
4887  */
4888 
4889 static int
4890 sd_write_deviceid(struct sd_lun *un)
4891 {
4892 	struct dk_devid		*dkdevid;
4893 	diskaddr_t		blk;
4894 	uint_t			*ip, chksum;
4895 	int			status;
4896 	int			i;
4897 
4898 	ASSERT(mutex_owned(SD_MUTEX(un)));
4899 
4900 	mutex_exit(SD_MUTEX(un));
4901 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
4902 	    (void *)SD_PATH_DIRECT) != 0) {
4903 		mutex_enter(SD_MUTEX(un));
4904 		return (-1);
4905 	}
4906 
4907 
4908 	/* Allocate the buffer */
4909 	dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
4910 
4911 	/* Fill in the revision */
4912 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
4913 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
4914 
4915 	/* Copy in the device id */
4916 	mutex_enter(SD_MUTEX(un));
4917 	bcopy(un->un_devid, &dkdevid->dkd_devid,
4918 	    ddi_devid_sizeof(un->un_devid));
4919 	mutex_exit(SD_MUTEX(un));
4920 
4921 	/* Calculate the checksum */
4922 	chksum = 0;
4923 	ip = (uint_t *)dkdevid;
4924 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
4925 	    i++) {
4926 		chksum ^= ip[i];
4927 	}
4928 
4929 	/* Fill-in checksum */
4930 	DKD_FORMCHKSUM(chksum, dkdevid);
4931 
4932 	/* Write the reserved sector */
4933 	status = sd_send_scsi_WRITE(un, dkdevid, un->un_sys_blocksize, blk,
4934 	    SD_PATH_DIRECT);
4935 
4936 	kmem_free(dkdevid, un->un_sys_blocksize);
4937 
4938 	mutex_enter(SD_MUTEX(un));
4939 	return (status);
4940 }
4941 
4942 
4943 /*
4944  *    Function: sd_check_vpd_page_support
4945  *
4946  * Description: This routine sends an inquiry command with the EVPD bit set and
4947  *		a page code of 0x00 to the device. It is used to determine which
4948  *		vital product pages are availible to find the devid. We are
4949  *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
4950  *		device does not support that command.
4951  *
4952  *   Arguments: un  - driver soft state (unit) structure
4953  *
4954  * Return Code: 0 - success
4955  *		1 - check condition
4956  *
4957  *     Context: This routine can sleep.
4958  */
4959 
4960 static int
4961 sd_check_vpd_page_support(struct sd_lun *un)
4962 {
4963 	uchar_t	*page_list	= NULL;
4964 	uchar_t	page_length	= 0xff;	/* Use max possible length */
4965 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
4966 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
4967 	int    	rval		= 0;
4968 	int	counter;
4969 
4970 	ASSERT(un != NULL);
4971 	ASSERT(mutex_owned(SD_MUTEX(un)));
4972 
4973 	mutex_exit(SD_MUTEX(un));
4974 
4975 	/*
4976 	 * We'll set the page length to the maximum to save figuring it out
4977 	 * with an additional call.
4978 	 */
4979 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
4980 
4981 	rval = sd_send_scsi_INQUIRY(un, page_list, page_length, evpd,
4982 	    page_code, NULL);
4983 
4984 	mutex_enter(SD_MUTEX(un));
4985 
4986 	/*
4987 	 * Now we must validate that the device accepted the command, as some
4988 	 * drives do not support it.  If the drive does support it, we will
4989 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
4990 	 * not, we return -1.
4991 	 */
4992 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
4993 		/* Loop to find one of the 2 pages we need */
4994 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
4995 
4996 		/*
4997 		 * Pages are returned in ascending order, and 0x83 is what we
4998 		 * are hoping for.
4999 		 */
5000 		while ((page_list[counter] <= 0x83) &&
5001 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
5002 		    VPD_HEAD_OFFSET))) {
5003 			/*
5004 			 * Add 3 because page_list[3] is the number of
5005 			 * pages minus 3
5006 			 */
5007 
5008 			switch (page_list[counter]) {
5009 			case 0x00:
5010 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
5011 				break;
5012 			case 0x80:
5013 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
5014 				break;
5015 			case 0x81:
5016 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
5017 				break;
5018 			case 0x82:
5019 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
5020 				break;
5021 			case 0x83:
5022 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
5023 				break;
5024 			}
5025 			counter++;
5026 		}
5027 
5028 	} else {
5029 		rval = -1;
5030 
5031 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
5032 		    "sd_check_vpd_page_support: This drive does not implement "
5033 		    "VPD pages.\n");
5034 	}
5035 
5036 	kmem_free(page_list, page_length);
5037 
5038 	return (rval);
5039 }
5040 
5041 
5042 /*
5043  *    Function: sd_setup_pm
5044  *
5045  * Description: Initialize Power Management on the device
5046  *
5047  *     Context: Kernel Thread
5048  */
5049 
5050 static void
5051 sd_setup_pm(struct sd_lun *un, dev_info_t *devi)
5052 {
5053 	uint_t	log_page_size;
5054 	uchar_t	*log_page_data;
5055 	int	rval;
5056 
5057 	/*
5058 	 * Since we are called from attach, holding a mutex for
5059 	 * un is unnecessary. Because some of the routines called
5060 	 * from here require SD_MUTEX to not be held, assert this
5061 	 * right up front.
5062 	 */
5063 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5064 	/*
5065 	 * Since the sd device does not have the 'reg' property,
5066 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
5067 	 * The following code is to tell cpr that this device
5068 	 * DOES need to be suspended and resumed.
5069 	 */
5070 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
5071 	    "pm-hardware-state", "needs-suspend-resume");
5072 
5073 	/*
5074 	 * This complies with the new power management framework
5075 	 * for certain desktop machines. Create the pm_components
5076 	 * property as a string array property.
5077 	 */
5078 	if (un->un_f_pm_supported) {
5079 		/*
5080 		 * not all devices have a motor, try it first.
5081 		 * some devices may return ILLEGAL REQUEST, some
5082 		 * will hang
5083 		 * The following START_STOP_UNIT is used to check if target
5084 		 * device has a motor.
5085 		 */
5086 		un->un_f_start_stop_supported = TRUE;
5087 		if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
5088 		    SD_PATH_DIRECT) != 0) {
5089 			un->un_f_start_stop_supported = FALSE;
5090 		}
5091 
5092 		/*
5093 		 * create pm properties anyways otherwise the parent can't
5094 		 * go to sleep
5095 		 */
5096 		(void) sd_create_pm_components(devi, un);
5097 		un->un_f_pm_is_enabled = TRUE;
5098 		return;
5099 	}
5100 
5101 	if (!un->un_f_log_sense_supported) {
5102 		un->un_power_level = SD_SPINDLE_ON;
5103 		un->un_f_pm_is_enabled = FALSE;
5104 		return;
5105 	}
5106 
5107 	rval = sd_log_page_supported(un, START_STOP_CYCLE_PAGE);
5108 
5109 #ifdef	SDDEBUG
5110 	if (sd_force_pm_supported) {
5111 		/* Force a successful result */
5112 		rval = 1;
5113 	}
5114 #endif
5115 
5116 	/*
5117 	 * If the start-stop cycle counter log page is not supported
5118 	 * or if the pm-capable property is SD_PM_CAPABLE_FALSE (0)
5119 	 * then we should not create the pm_components property.
5120 	 */
5121 	if (rval == -1) {
5122 		/*
5123 		 * Error.
5124 		 * Reading log sense failed, most likely this is
5125 		 * an older drive that does not support log sense.
5126 		 * If this fails auto-pm is not supported.
5127 		 */
5128 		un->un_power_level = SD_SPINDLE_ON;
5129 		un->un_f_pm_is_enabled = FALSE;
5130 
5131 	} else if (rval == 0) {
5132 		/*
5133 		 * Page not found.
5134 		 * The start stop cycle counter is implemented as page
5135 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
5136 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
5137 		 */
5138 		if (sd_log_page_supported(un, START_STOP_CYCLE_VU_PAGE) == 1) {
5139 			/*
5140 			 * Page found, use this one.
5141 			 */
5142 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
5143 			un->un_f_pm_is_enabled = TRUE;
5144 		} else {
5145 			/*
5146 			 * Error or page not found.
5147 			 * auto-pm is not supported for this device.
5148 			 */
5149 			un->un_power_level = SD_SPINDLE_ON;
5150 			un->un_f_pm_is_enabled = FALSE;
5151 		}
5152 	} else {
5153 		/*
5154 		 * Page found, use it.
5155 		 */
5156 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
5157 		un->un_f_pm_is_enabled = TRUE;
5158 	}
5159 
5160 
5161 	if (un->un_f_pm_is_enabled == TRUE) {
5162 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
5163 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
5164 
5165 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
5166 		    log_page_size, un->un_start_stop_cycle_page,
5167 		    0x01, 0, SD_PATH_DIRECT);
5168 #ifdef	SDDEBUG
5169 		if (sd_force_pm_supported) {
5170 			/* Force a successful result */
5171 			rval = 0;
5172 		}
5173 #endif
5174 
5175 		/*
5176 		 * If the Log sense for Page( Start/stop cycle counter page)
5177 		 * succeeds, then power managment is supported and we can
5178 		 * enable auto-pm.
5179 		 */
5180 		if (rval == 0)  {
5181 			(void) sd_create_pm_components(devi, un);
5182 		} else {
5183 			un->un_power_level = SD_SPINDLE_ON;
5184 			un->un_f_pm_is_enabled = FALSE;
5185 		}
5186 
5187 		kmem_free(log_page_data, log_page_size);
5188 	}
5189 }
5190 
5191 
5192 /*
5193  *    Function: sd_create_pm_components
5194  *
5195  * Description: Initialize PM property.
5196  *
5197  *     Context: Kernel thread context
5198  */
5199 
5200 static void
5201 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
5202 {
5203 	char *pm_comp[] = { "NAME=spindle-motor", "0=off", "1=on", NULL };
5204 
5205 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5206 
5207 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
5208 	    "pm-components", pm_comp, 3) == DDI_PROP_SUCCESS) {
5209 		/*
5210 		 * When components are initially created they are idle,
5211 		 * power up any non-removables.
5212 		 * Note: the return value of pm_raise_power can't be used
5213 		 * for determining if PM should be enabled for this device.
5214 		 * Even if you check the return values and remove this
5215 		 * property created above, the PM framework will not honor the
5216 		 * change after the first call to pm_raise_power. Hence,
5217 		 * removal of that property does not help if pm_raise_power
5218 		 * fails. In the case of removable media, the start/stop
5219 		 * will fail if the media is not present.
5220 		 */
5221 		if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
5222 		    SD_SPINDLE_ON) == DDI_SUCCESS)) {
5223 			mutex_enter(SD_MUTEX(un));
5224 			un->un_power_level = SD_SPINDLE_ON;
5225 			mutex_enter(&un->un_pm_mutex);
5226 			/* Set to on and not busy. */
5227 			un->un_pm_count = 0;
5228 		} else {
5229 			mutex_enter(SD_MUTEX(un));
5230 			un->un_power_level = SD_SPINDLE_OFF;
5231 			mutex_enter(&un->un_pm_mutex);
5232 			/* Set to off. */
5233 			un->un_pm_count = -1;
5234 		}
5235 		mutex_exit(&un->un_pm_mutex);
5236 		mutex_exit(SD_MUTEX(un));
5237 	} else {
5238 		un->un_power_level = SD_SPINDLE_ON;
5239 		un->un_f_pm_is_enabled = FALSE;
5240 	}
5241 }
5242 
5243 
5244 /*
5245  *    Function: sd_ddi_suspend
5246  *
5247  * Description: Performs system power-down operations. This includes
5248  *		setting the drive state to indicate its suspended so
5249  *		that no new commands will be accepted. Also, wait for
5250  *		all commands that are in transport or queued to a timer
5251  *		for retry to complete. All timeout threads are cancelled.
5252  *
5253  * Return Code: DDI_FAILURE or DDI_SUCCESS
5254  *
5255  *     Context: Kernel thread context
5256  */
5257 
5258 static int
5259 sd_ddi_suspend(dev_info_t *devi)
5260 {
5261 	struct	sd_lun	*un;
5262 	clock_t		wait_cmds_complete;
5263 
5264 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
5265 	if (un == NULL) {
5266 		return (DDI_FAILURE);
5267 	}
5268 
5269 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
5270 
5271 	mutex_enter(SD_MUTEX(un));
5272 
5273 	/* Return success if the device is already suspended. */
5274 	if (un->un_state == SD_STATE_SUSPENDED) {
5275 		mutex_exit(SD_MUTEX(un));
5276 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5277 		    "device already suspended, exiting\n");
5278 		return (DDI_SUCCESS);
5279 	}
5280 
5281 	/* Return failure if the device is being used by HA */
5282 	if (un->un_resvd_status &
5283 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
5284 		mutex_exit(SD_MUTEX(un));
5285 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5286 		    "device in use by HA, exiting\n");
5287 		return (DDI_FAILURE);
5288 	}
5289 
5290 	/*
5291 	 * Return failure if the device is in a resource wait
5292 	 * or power changing state.
5293 	 */
5294 	if ((un->un_state == SD_STATE_RWAIT) ||
5295 	    (un->un_state == SD_STATE_PM_CHANGING)) {
5296 		mutex_exit(SD_MUTEX(un));
5297 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5298 		    "device in resource wait state, exiting\n");
5299 		return (DDI_FAILURE);
5300 	}
5301 
5302 
5303 	un->un_save_state = un->un_last_state;
5304 	New_state(un, SD_STATE_SUSPENDED);
5305 
5306 	/*
5307 	 * Wait for all commands that are in transport or queued to a timer
5308 	 * for retry to complete.
5309 	 *
5310 	 * While waiting, no new commands will be accepted or sent because of
5311 	 * the new state we set above.
5312 	 *
5313 	 * Wait till current operation has completed. If we are in the resource
5314 	 * wait state (with an intr outstanding) then we need to wait till the
5315 	 * intr completes and starts the next cmd. We want to wait for
5316 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
5317 	 */
5318 	wait_cmds_complete = ddi_get_lbolt() +
5319 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
5320 
5321 	while (un->un_ncmds_in_transport != 0) {
5322 		/*
5323 		 * Fail if commands do not finish in the specified time.
5324 		 */
5325 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
5326 		    wait_cmds_complete) == -1) {
5327 			/*
5328 			 * Undo the state changes made above. Everything
5329 			 * must go back to it's original value.
5330 			 */
5331 			Restore_state(un);
5332 			un->un_last_state = un->un_save_state;
5333 			/* Wake up any threads that might be waiting. */
5334 			cv_broadcast(&un->un_suspend_cv);
5335 			mutex_exit(SD_MUTEX(un));
5336 			SD_ERROR(SD_LOG_IO_PM, un,
5337 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
5338 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
5339 			return (DDI_FAILURE);
5340 		}
5341 	}
5342 
5343 	/*
5344 	 * Cancel SCSI watch thread and timeouts, if any are active
5345 	 */
5346 
5347 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
5348 		opaque_t temp_token = un->un_swr_token;
5349 		mutex_exit(SD_MUTEX(un));
5350 		scsi_watch_suspend(temp_token);
5351 		mutex_enter(SD_MUTEX(un));
5352 	}
5353 
5354 	if (un->un_reset_throttle_timeid != NULL) {
5355 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
5356 		un->un_reset_throttle_timeid = NULL;
5357 		mutex_exit(SD_MUTEX(un));
5358 		(void) untimeout(temp_id);
5359 		mutex_enter(SD_MUTEX(un));
5360 	}
5361 
5362 	if (un->un_dcvb_timeid != NULL) {
5363 		timeout_id_t temp_id = un->un_dcvb_timeid;
5364 		un->un_dcvb_timeid = NULL;
5365 		mutex_exit(SD_MUTEX(un));
5366 		(void) untimeout(temp_id);
5367 		mutex_enter(SD_MUTEX(un));
5368 	}
5369 
5370 	mutex_enter(&un->un_pm_mutex);
5371 	if (un->un_pm_timeid != NULL) {
5372 		timeout_id_t temp_id = un->un_pm_timeid;
5373 		un->un_pm_timeid = NULL;
5374 		mutex_exit(&un->un_pm_mutex);
5375 		mutex_exit(SD_MUTEX(un));
5376 		(void) untimeout(temp_id);
5377 		mutex_enter(SD_MUTEX(un));
5378 	} else {
5379 		mutex_exit(&un->un_pm_mutex);
5380 	}
5381 
5382 	if (un->un_retry_timeid != NULL) {
5383 		timeout_id_t temp_id = un->un_retry_timeid;
5384 		un->un_retry_timeid = NULL;
5385 		mutex_exit(SD_MUTEX(un));
5386 		(void) untimeout(temp_id);
5387 		mutex_enter(SD_MUTEX(un));
5388 	}
5389 
5390 	if (un->un_direct_priority_timeid != NULL) {
5391 		timeout_id_t temp_id = un->un_direct_priority_timeid;
5392 		un->un_direct_priority_timeid = NULL;
5393 		mutex_exit(SD_MUTEX(un));
5394 		(void) untimeout(temp_id);
5395 		mutex_enter(SD_MUTEX(un));
5396 	}
5397 
5398 	if (un->un_f_is_fibre == TRUE) {
5399 		/*
5400 		 * Remove callbacks for insert and remove events
5401 		 */
5402 		if (un->un_insert_event != NULL) {
5403 			mutex_exit(SD_MUTEX(un));
5404 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
5405 			mutex_enter(SD_MUTEX(un));
5406 			un->un_insert_event = NULL;
5407 		}
5408 
5409 		if (un->un_remove_event != NULL) {
5410 			mutex_exit(SD_MUTEX(un));
5411 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
5412 			mutex_enter(SD_MUTEX(un));
5413 			un->un_remove_event = NULL;
5414 		}
5415 	}
5416 
5417 	mutex_exit(SD_MUTEX(un));
5418 
5419 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
5420 
5421 	return (DDI_SUCCESS);
5422 }
5423 
5424 
5425 /*
5426  *    Function: sd_ddi_pm_suspend
5427  *
5428  * Description: Set the drive state to low power.
5429  *		Someone else is required to actually change the drive
5430  *		power level.
5431  *
5432  *   Arguments: un - driver soft state (unit) structure
5433  *
5434  * Return Code: DDI_FAILURE or DDI_SUCCESS
5435  *
5436  *     Context: Kernel thread context
5437  */
5438 
5439 static int
5440 sd_ddi_pm_suspend(struct sd_lun *un)
5441 {
5442 	ASSERT(un != NULL);
5443 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: entry\n");
5444 
5445 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5446 	mutex_enter(SD_MUTEX(un));
5447 
5448 	/*
5449 	 * Exit if power management is not enabled for this device, or if
5450 	 * the device is being used by HA.
5451 	 */
5452 	if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
5453 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
5454 		mutex_exit(SD_MUTEX(un));
5455 		SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exiting\n");
5456 		return (DDI_SUCCESS);
5457 	}
5458 
5459 	SD_INFO(SD_LOG_POWER, un, "sd_ddi_pm_suspend: un_ncmds_in_driver=%ld\n",
5460 	    un->un_ncmds_in_driver);
5461 
5462 	/*
5463 	 * See if the device is not busy, ie.:
5464 	 *    - we have no commands in the driver for this device
5465 	 *    - not waiting for resources
5466 	 */
5467 	if ((un->un_ncmds_in_driver == 0) &&
5468 	    (un->un_state != SD_STATE_RWAIT)) {
5469 		/*
5470 		 * The device is not busy, so it is OK to go to low power state.
5471 		 * Indicate low power, but rely on someone else to actually
5472 		 * change it.
5473 		 */
5474 		mutex_enter(&un->un_pm_mutex);
5475 		un->un_pm_count = -1;
5476 		mutex_exit(&un->un_pm_mutex);
5477 		un->un_power_level = SD_SPINDLE_OFF;
5478 	}
5479 
5480 	mutex_exit(SD_MUTEX(un));
5481 
5482 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exit\n");
5483 
5484 	return (DDI_SUCCESS);
5485 }
5486 
5487 
5488 /*
5489  *    Function: sd_ddi_resume
5490  *
5491  * Description: Performs system power-up operations..
5492  *
5493  * Return Code: DDI_SUCCESS
5494  *		DDI_FAILURE
5495  *
5496  *     Context: Kernel thread context
5497  */
5498 
5499 static int
5500 sd_ddi_resume(dev_info_t *devi)
5501 {
5502 	struct	sd_lun	*un;
5503 
5504 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
5505 	if (un == NULL) {
5506 		return (DDI_FAILURE);
5507 	}
5508 
5509 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
5510 
5511 	mutex_enter(SD_MUTEX(un));
5512 	Restore_state(un);
5513 
5514 	/*
5515 	 * Restore the state which was saved to give the
5516 	 * the right state in un_last_state
5517 	 */
5518 	un->un_last_state = un->un_save_state;
5519 	/*
5520 	 * Note: throttle comes back at full.
5521 	 * Also note: this MUST be done before calling pm_raise_power
5522 	 * otherwise the system can get hung in biowait. The scenario where
5523 	 * this'll happen is under cpr suspend. Writing of the system
5524 	 * state goes through sddump, which writes 0 to un_throttle. If
5525 	 * writing the system state then fails, example if the partition is
5526 	 * too small, then cpr attempts a resume. If throttle isn't restored
5527 	 * from the saved value until after calling pm_raise_power then
5528 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
5529 	 * in biowait.
5530 	 */
5531 	un->un_throttle = un->un_saved_throttle;
5532 
5533 	/*
5534 	 * The chance of failure is very rare as the only command done in power
5535 	 * entry point is START command when you transition from 0->1 or
5536 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
5537 	 * which suspend was done. Ignore the return value as the resume should
5538 	 * not be failed. In the case of removable media the media need not be
5539 	 * inserted and hence there is a chance that raise power will fail with
5540 	 * media not present.
5541 	 */
5542 	if (un->un_f_attach_spinup) {
5543 		mutex_exit(SD_MUTEX(un));
5544 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
5545 		mutex_enter(SD_MUTEX(un));
5546 	}
5547 
5548 	/*
5549 	 * Don't broadcast to the suspend cv and therefore possibly
5550 	 * start I/O until after power has been restored.
5551 	 */
5552 	cv_broadcast(&un->un_suspend_cv);
5553 	cv_broadcast(&un->un_state_cv);
5554 
5555 	/* restart thread */
5556 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
5557 		scsi_watch_resume(un->un_swr_token);
5558 	}
5559 
5560 #if (defined(__fibre))
5561 	if (un->un_f_is_fibre == TRUE) {
5562 		/*
5563 		 * Add callbacks for insert and remove events
5564 		 */
5565 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
5566 			sd_init_event_callbacks(un);
5567 		}
5568 	}
5569 #endif
5570 
5571 	/*
5572 	 * Transport any pending commands to the target.
5573 	 *
5574 	 * If this is a low-activity device commands in queue will have to wait
5575 	 * until new commands come in, which may take awhile. Also, we
5576 	 * specifically don't check un_ncmds_in_transport because we know that
5577 	 * there really are no commands in progress after the unit was
5578 	 * suspended and we could have reached the throttle level, been
5579 	 * suspended, and have no new commands coming in for awhile. Highly
5580 	 * unlikely, but so is the low-activity disk scenario.
5581 	 */
5582 	ddi_xbuf_dispatch(un->un_xbuf_attr);
5583 
5584 	sd_start_cmds(un, NULL);
5585 	mutex_exit(SD_MUTEX(un));
5586 
5587 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
5588 
5589 	return (DDI_SUCCESS);
5590 }
5591 
5592 
5593 /*
5594  *    Function: sd_ddi_pm_resume
5595  *
5596  * Description: Set the drive state to powered on.
5597  *		Someone else is required to actually change the drive
5598  *		power level.
5599  *
5600  *   Arguments: un - driver soft state (unit) structure
5601  *
5602  * Return Code: DDI_SUCCESS
5603  *
5604  *     Context: Kernel thread context
5605  */
5606 
5607 static int
5608 sd_ddi_pm_resume(struct sd_lun *un)
5609 {
5610 	ASSERT(un != NULL);
5611 
5612 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5613 	mutex_enter(SD_MUTEX(un));
5614 	un->un_power_level = SD_SPINDLE_ON;
5615 
5616 	ASSERT(!mutex_owned(&un->un_pm_mutex));
5617 	mutex_enter(&un->un_pm_mutex);
5618 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
5619 		un->un_pm_count++;
5620 		ASSERT(un->un_pm_count == 0);
5621 		/*
5622 		 * Note: no longer do the cv_broadcast on un_suspend_cv. The
5623 		 * un_suspend_cv is for a system resume, not a power management
5624 		 * device resume. (4297749)
5625 		 *	 cv_broadcast(&un->un_suspend_cv);
5626 		 */
5627 	}
5628 	mutex_exit(&un->un_pm_mutex);
5629 	mutex_exit(SD_MUTEX(un));
5630 
5631 	return (DDI_SUCCESS);
5632 }
5633 
5634 
5635 /*
5636  *    Function: sd_pm_idletimeout_handler
5637  *
5638  * Description: A timer routine that's active only while a device is busy.
5639  *		The purpose is to extend slightly the pm framework's busy
5640  *		view of the device to prevent busy/idle thrashing for
5641  *		back-to-back commands. Do this by comparing the current time
5642  *		to the time at which the last command completed and when the
5643  *		difference is greater than sd_pm_idletime, call
5644  *		pm_idle_component. In addition to indicating idle to the pm
5645  *		framework, update the chain type to again use the internal pm
5646  *		layers of the driver.
5647  *
5648  *   Arguments: arg - driver soft state (unit) structure
5649  *
5650  *     Context: Executes in a timeout(9F) thread context
5651  */
5652 
5653 static void
5654 sd_pm_idletimeout_handler(void *arg)
5655 {
5656 	struct sd_lun *un = arg;
5657 
5658 	time_t	now;
5659 
5660 	mutex_enter(&sd_detach_mutex);
5661 	if (un->un_detach_count != 0) {
5662 		/* Abort if the instance is detaching */
5663 		mutex_exit(&sd_detach_mutex);
5664 		return;
5665 	}
5666 	mutex_exit(&sd_detach_mutex);
5667 
5668 	now = ddi_get_time();
5669 	/*
5670 	 * Grab both mutexes, in the proper order, since we're accessing
5671 	 * both PM and softstate variables.
5672 	 */
5673 	mutex_enter(SD_MUTEX(un));
5674 	mutex_enter(&un->un_pm_mutex);
5675 	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
5676 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
5677 		/*
5678 		 * Update the chain types.
5679 		 * This takes affect on the next new command received.
5680 		 */
5681 		if (un->un_f_non_devbsize_supported) {
5682 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
5683 		} else {
5684 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
5685 		}
5686 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
5687 
5688 		SD_TRACE(SD_LOG_IO_PM, un,
5689 		    "sd_pm_idletimeout_handler: idling device\n");
5690 		(void) pm_idle_component(SD_DEVINFO(un), 0);
5691 		un->un_pm_idle_timeid = NULL;
5692 	} else {
5693 		un->un_pm_idle_timeid =
5694 			timeout(sd_pm_idletimeout_handler, un,
5695 			(drv_usectohz((clock_t)300000))); /* 300 ms. */
5696 	}
5697 	mutex_exit(&un->un_pm_mutex);
5698 	mutex_exit(SD_MUTEX(un));
5699 }
5700 
5701 
5702 /*
5703  *    Function: sd_pm_timeout_handler
5704  *
5705  * Description: Callback to tell framework we are idle.
5706  *
5707  *     Context: timeout(9f) thread context.
5708  */
5709 
5710 static void
5711 sd_pm_timeout_handler(void *arg)
5712 {
5713 	struct sd_lun *un = arg;
5714 
5715 	(void) pm_idle_component(SD_DEVINFO(un), 0);
5716 	mutex_enter(&un->un_pm_mutex);
5717 	un->un_pm_timeid = NULL;
5718 	mutex_exit(&un->un_pm_mutex);
5719 }
5720 
5721 
5722 /*
5723  *    Function: sdpower
5724  *
5725  * Description: PM entry point.
5726  *
5727  * Return Code: DDI_SUCCESS
5728  *		DDI_FAILURE
5729  *
5730  *     Context: Kernel thread context
5731  */
5732 
5733 static int
5734 sdpower(dev_info_t *devi, int component, int level)
5735 {
5736 	struct sd_lun	*un;
5737 	int		instance;
5738 	int		rval = DDI_SUCCESS;
5739 	uint_t		i, log_page_size, maxcycles, ncycles;
5740 	uchar_t		*log_page_data;
5741 	int		log_sense_page;
5742 	int		medium_present;
5743 	time_t		intvlp;
5744 	dev_t		dev;
5745 	struct pm_trans_data	sd_pm_tran_data;
5746 	uchar_t		save_state;
5747 	int		sval;
5748 	uchar_t		state_before_pm;
5749 	int		got_semaphore_here;
5750 
5751 	instance = ddi_get_instance(devi);
5752 
5753 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
5754 	    (SD_SPINDLE_OFF > level) || (level > SD_SPINDLE_ON) ||
5755 	    component != 0) {
5756 		return (DDI_FAILURE);
5757 	}
5758 
5759 	dev = sd_make_device(SD_DEVINFO(un));
5760 
5761 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
5762 
5763 	/*
5764 	 * Must synchronize power down with close.
5765 	 * Attempt to decrement/acquire the open/close semaphore,
5766 	 * but do NOT wait on it. If it's not greater than zero,
5767 	 * ie. it can't be decremented without waiting, then
5768 	 * someone else, either open or close, already has it
5769 	 * and the try returns 0. Use that knowledge here to determine
5770 	 * if it's OK to change the device power level.
5771 	 * Also, only increment it on exit if it was decremented, ie. gotten,
5772 	 * here.
5773 	 */
5774 	got_semaphore_here = sema_tryp(&un->un_semoclose);
5775 
5776 	mutex_enter(SD_MUTEX(un));
5777 
5778 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
5779 	    un->un_ncmds_in_driver);
5780 
5781 	/*
5782 	 * If un_ncmds_in_driver is non-zero it indicates commands are
5783 	 * already being processed in the driver, or if the semaphore was
5784 	 * not gotten here it indicates an open or close is being processed.
5785 	 * At the same time somebody is requesting to go low power which
5786 	 * can't happen, therefore we need to return failure.
5787 	 */
5788 	if ((level == SD_SPINDLE_OFF) &&
5789 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
5790 		mutex_exit(SD_MUTEX(un));
5791 
5792 		if (got_semaphore_here != 0) {
5793 			sema_v(&un->un_semoclose);
5794 		}
5795 		SD_TRACE(SD_LOG_IO_PM, un,
5796 		    "sdpower: exit, device has queued cmds.\n");
5797 		return (DDI_FAILURE);
5798 	}
5799 
5800 	/*
5801 	 * if it is OFFLINE that means the disk is completely dead
5802 	 * in our case we have to put the disk in on or off by sending commands
5803 	 * Of course that will fail anyway so return back here.
5804 	 *
5805 	 * Power changes to a device that's OFFLINE or SUSPENDED
5806 	 * are not allowed.
5807 	 */
5808 	if ((un->un_state == SD_STATE_OFFLINE) ||
5809 	    (un->un_state == SD_STATE_SUSPENDED)) {
5810 		mutex_exit(SD_MUTEX(un));
5811 
5812 		if (got_semaphore_here != 0) {
5813 			sema_v(&un->un_semoclose);
5814 		}
5815 		SD_TRACE(SD_LOG_IO_PM, un,
5816 		    "sdpower: exit, device is off-line.\n");
5817 		return (DDI_FAILURE);
5818 	}
5819 
5820 	/*
5821 	 * Change the device's state to indicate it's power level
5822 	 * is being changed. Do this to prevent a power off in the
5823 	 * middle of commands, which is especially bad on devices
5824 	 * that are really powered off instead of just spun down.
5825 	 */
5826 	state_before_pm = un->un_state;
5827 	un->un_state = SD_STATE_PM_CHANGING;
5828 
5829 	mutex_exit(SD_MUTEX(un));
5830 
5831 	/*
5832 	 * If "pm-capable" property is set to TRUE by HBA drivers,
5833 	 * bypass the following checking, otherwise, check the log
5834 	 * sense information for this device
5835 	 */
5836 	if ((level == SD_SPINDLE_OFF) && un->un_f_log_sense_supported) {
5837 		/*
5838 		 * Get the log sense information to understand whether the
5839 		 * the powercycle counts have gone beyond the threshhold.
5840 		 */
5841 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
5842 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
5843 
5844 		mutex_enter(SD_MUTEX(un));
5845 		log_sense_page = un->un_start_stop_cycle_page;
5846 		mutex_exit(SD_MUTEX(un));
5847 
5848 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
5849 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
5850 #ifdef	SDDEBUG
5851 		if (sd_force_pm_supported) {
5852 			/* Force a successful result */
5853 			rval = 0;
5854 		}
5855 #endif
5856 		if (rval != 0) {
5857 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5858 			    "Log Sense Failed\n");
5859 			kmem_free(log_page_data, log_page_size);
5860 			/* Cannot support power management on those drives */
5861 
5862 			if (got_semaphore_here != 0) {
5863 				sema_v(&un->un_semoclose);
5864 			}
5865 			/*
5866 			 * On exit put the state back to it's original value
5867 			 * and broadcast to anyone waiting for the power
5868 			 * change completion.
5869 			 */
5870 			mutex_enter(SD_MUTEX(un));
5871 			un->un_state = state_before_pm;
5872 			cv_broadcast(&un->un_suspend_cv);
5873 			mutex_exit(SD_MUTEX(un));
5874 			SD_TRACE(SD_LOG_IO_PM, un,
5875 			    "sdpower: exit, Log Sense Failed.\n");
5876 			return (DDI_FAILURE);
5877 		}
5878 
5879 		/*
5880 		 * From the page data - Convert the essential information to
5881 		 * pm_trans_data
5882 		 */
5883 		maxcycles =
5884 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
5885 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
5886 
5887 		sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
5888 
5889 		ncycles =
5890 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
5891 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
5892 
5893 		sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
5894 
5895 		for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
5896 			sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
5897 			    log_page_data[8+i];
5898 		}
5899 
5900 		kmem_free(log_page_data, log_page_size);
5901 
5902 		/*
5903 		 * Call pm_trans_check routine to get the Ok from
5904 		 * the global policy
5905 		 */
5906 
5907 		sd_pm_tran_data.format = DC_SCSI_FORMAT;
5908 		sd_pm_tran_data.un.scsi_cycles.flag = 0;
5909 
5910 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
5911 #ifdef	SDDEBUG
5912 		if (sd_force_pm_supported) {
5913 			/* Force a successful result */
5914 			rval = 1;
5915 		}
5916 #endif
5917 		switch (rval) {
5918 		case 0:
5919 			/*
5920 			 * Not Ok to Power cycle or error in parameters passed
5921 			 * Would have given the advised time to consider power
5922 			 * cycle. Based on the new intvlp parameter we are
5923 			 * supposed to pretend we are busy so that pm framework
5924 			 * will never call our power entry point. Because of
5925 			 * that install a timeout handler and wait for the
5926 			 * recommended time to elapse so that power management
5927 			 * can be effective again.
5928 			 *
5929 			 * To effect this behavior, call pm_busy_component to
5930 			 * indicate to the framework this device is busy.
5931 			 * By not adjusting un_pm_count the rest of PM in
5932 			 * the driver will function normally, and independant
5933 			 * of this but because the framework is told the device
5934 			 * is busy it won't attempt powering down until it gets
5935 			 * a matching idle. The timeout handler sends this.
5936 			 * Note: sd_pm_entry can't be called here to do this
5937 			 * because sdpower may have been called as a result
5938 			 * of a call to pm_raise_power from within sd_pm_entry.
5939 			 *
5940 			 * If a timeout handler is already active then
5941 			 * don't install another.
5942 			 */
5943 			mutex_enter(&un->un_pm_mutex);
5944 			if (un->un_pm_timeid == NULL) {
5945 				un->un_pm_timeid =
5946 				    timeout(sd_pm_timeout_handler,
5947 				    un, intvlp * drv_usectohz(1000000));
5948 				mutex_exit(&un->un_pm_mutex);
5949 				(void) pm_busy_component(SD_DEVINFO(un), 0);
5950 			} else {
5951 				mutex_exit(&un->un_pm_mutex);
5952 			}
5953 			if (got_semaphore_here != 0) {
5954 				sema_v(&un->un_semoclose);
5955 			}
5956 			/*
5957 			 * On exit put the state back to it's original value
5958 			 * and broadcast to anyone waiting for the power
5959 			 * change completion.
5960 			 */
5961 			mutex_enter(SD_MUTEX(un));
5962 			un->un_state = state_before_pm;
5963 			cv_broadcast(&un->un_suspend_cv);
5964 			mutex_exit(SD_MUTEX(un));
5965 
5966 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
5967 			    "trans check Failed, not ok to power cycle.\n");
5968 			return (DDI_FAILURE);
5969 
5970 		case -1:
5971 			if (got_semaphore_here != 0) {
5972 				sema_v(&un->un_semoclose);
5973 			}
5974 			/*
5975 			 * On exit put the state back to it's original value
5976 			 * and broadcast to anyone waiting for the power
5977 			 * change completion.
5978 			 */
5979 			mutex_enter(SD_MUTEX(un));
5980 			un->un_state = state_before_pm;
5981 			cv_broadcast(&un->un_suspend_cv);
5982 			mutex_exit(SD_MUTEX(un));
5983 			SD_TRACE(SD_LOG_IO_PM, un,
5984 			    "sdpower: exit, trans check command Failed.\n");
5985 			return (DDI_FAILURE);
5986 		}
5987 	}
5988 
5989 	if (level == SD_SPINDLE_OFF) {
5990 		/*
5991 		 * Save the last state... if the STOP FAILS we need it
5992 		 * for restoring
5993 		 */
5994 		mutex_enter(SD_MUTEX(un));
5995 		save_state = un->un_last_state;
5996 		/*
5997 		 * There must not be any cmds. getting processed
5998 		 * in the driver when we get here. Power to the
5999 		 * device is potentially going off.
6000 		 */
6001 		ASSERT(un->un_ncmds_in_driver == 0);
6002 		mutex_exit(SD_MUTEX(un));
6003 
6004 		/*
6005 		 * For now suspend the device completely before spindle is
6006 		 * turned off
6007 		 */
6008 		if ((rval = sd_ddi_pm_suspend(un)) == DDI_FAILURE) {
6009 			if (got_semaphore_here != 0) {
6010 				sema_v(&un->un_semoclose);
6011 			}
6012 			/*
6013 			 * On exit put the state back to it's original value
6014 			 * and broadcast to anyone waiting for the power
6015 			 * change completion.
6016 			 */
6017 			mutex_enter(SD_MUTEX(un));
6018 			un->un_state = state_before_pm;
6019 			cv_broadcast(&un->un_suspend_cv);
6020 			mutex_exit(SD_MUTEX(un));
6021 			SD_TRACE(SD_LOG_IO_PM, un,
6022 			    "sdpower: exit, PM suspend Failed.\n");
6023 			return (DDI_FAILURE);
6024 		}
6025 	}
6026 
6027 	/*
6028 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
6029 	 * close, or strategy. Dump no long uses this routine, it uses it's
6030 	 * own code so it can be done in polled mode.
6031 	 */
6032 
6033 	medium_present = TRUE;
6034 
6035 	/*
6036 	 * When powering up, issue a TUR in case the device is at unit
6037 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
6038 	 * a deadlock on un_pm_busy_cv will occur.
6039 	 */
6040 	if (level == SD_SPINDLE_ON) {
6041 		(void) sd_send_scsi_TEST_UNIT_READY(un,
6042 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
6043 	}
6044 
6045 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
6046 	    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
6047 
6048 	sval = sd_send_scsi_START_STOP_UNIT(un,
6049 	    ((level == SD_SPINDLE_ON) ? SD_TARGET_START : SD_TARGET_STOP),
6050 	    SD_PATH_DIRECT);
6051 	/* Command failed, check for media present. */
6052 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
6053 		medium_present = FALSE;
6054 	}
6055 
6056 	/*
6057 	 * The conditions of interest here are:
6058 	 *   if a spindle off with media present fails,
6059 	 *	then restore the state and return an error.
6060 	 *   else if a spindle on fails,
6061 	 *	then return an error (there's no state to restore).
6062 	 * In all other cases we setup for the new state
6063 	 * and return success.
6064 	 */
6065 	switch (level) {
6066 	case SD_SPINDLE_OFF:
6067 		if ((medium_present == TRUE) && (sval != 0)) {
6068 			/* The stop command from above failed */
6069 			rval = DDI_FAILURE;
6070 			/*
6071 			 * The stop command failed, and we have media
6072 			 * present. Put the level back by calling the
6073 			 * sd_pm_resume() and set the state back to
6074 			 * it's previous value.
6075 			 */
6076 			(void) sd_ddi_pm_resume(un);
6077 			mutex_enter(SD_MUTEX(un));
6078 			un->un_last_state = save_state;
6079 			mutex_exit(SD_MUTEX(un));
6080 			break;
6081 		}
6082 		/*
6083 		 * The stop command from above succeeded.
6084 		 */
6085 		if (un->un_f_monitor_media_state) {
6086 			/*
6087 			 * Terminate watch thread in case of removable media
6088 			 * devices going into low power state. This is as per
6089 			 * the requirements of pm framework, otherwise commands
6090 			 * will be generated for the device (through watch
6091 			 * thread), even when the device is in low power state.
6092 			 */
6093 			mutex_enter(SD_MUTEX(un));
6094 			un->un_f_watcht_stopped = FALSE;
6095 			if (un->un_swr_token != NULL) {
6096 				opaque_t temp_token = un->un_swr_token;
6097 				un->un_f_watcht_stopped = TRUE;
6098 				un->un_swr_token = NULL;
6099 				mutex_exit(SD_MUTEX(un));
6100 				(void) scsi_watch_request_terminate(temp_token,
6101 				    SCSI_WATCH_TERMINATE_WAIT);
6102 			} else {
6103 				mutex_exit(SD_MUTEX(un));
6104 			}
6105 		}
6106 		break;
6107 
6108 	default:	/* The level requested is spindle on... */
6109 		/*
6110 		 * Legacy behavior: return success on a failed spinup
6111 		 * if there is no media in the drive.
6112 		 * Do this by looking at medium_present here.
6113 		 */
6114 		if ((sval != 0) && medium_present) {
6115 			/* The start command from above failed */
6116 			rval = DDI_FAILURE;
6117 			break;
6118 		}
6119 		/*
6120 		 * The start command from above succeeded
6121 		 * Resume the devices now that we have
6122 		 * started the disks
6123 		 */
6124 		(void) sd_ddi_pm_resume(un);
6125 
6126 		/*
6127 		 * Resume the watch thread since it was suspended
6128 		 * when the device went into low power mode.
6129 		 */
6130 		if (un->un_f_monitor_media_state) {
6131 			mutex_enter(SD_MUTEX(un));
6132 			if (un->un_f_watcht_stopped == TRUE) {
6133 				opaque_t temp_token;
6134 
6135 				un->un_f_watcht_stopped = FALSE;
6136 				mutex_exit(SD_MUTEX(un));
6137 				temp_token = scsi_watch_request_submit(
6138 				    SD_SCSI_DEVP(un),
6139 				    sd_check_media_time,
6140 				    SENSE_LENGTH, sd_media_watch_cb,
6141 				    (caddr_t)dev);
6142 				mutex_enter(SD_MUTEX(un));
6143 				un->un_swr_token = temp_token;
6144 			}
6145 			mutex_exit(SD_MUTEX(un));
6146 		}
6147 	}
6148 	if (got_semaphore_here != 0) {
6149 		sema_v(&un->un_semoclose);
6150 	}
6151 	/*
6152 	 * On exit put the state back to it's original value
6153 	 * and broadcast to anyone waiting for the power
6154 	 * change completion.
6155 	 */
6156 	mutex_enter(SD_MUTEX(un));
6157 	un->un_state = state_before_pm;
6158 	cv_broadcast(&un->un_suspend_cv);
6159 	mutex_exit(SD_MUTEX(un));
6160 
6161 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
6162 
6163 	return (rval);
6164 }
6165 
6166 
6167 
6168 /*
6169  *    Function: sdattach
6170  *
6171  * Description: Driver's attach(9e) entry point function.
6172  *
6173  *   Arguments: devi - opaque device info handle
6174  *		cmd  - attach  type
6175  *
6176  * Return Code: DDI_SUCCESS
6177  *		DDI_FAILURE
6178  *
6179  *     Context: Kernel thread context
6180  */
6181 
6182 static int
6183 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
6184 {
6185 	switch (cmd) {
6186 	case DDI_ATTACH:
6187 		return (sd_unit_attach(devi));
6188 	case DDI_RESUME:
6189 		return (sd_ddi_resume(devi));
6190 	default:
6191 		break;
6192 	}
6193 	return (DDI_FAILURE);
6194 }
6195 
6196 
6197 /*
6198  *    Function: sddetach
6199  *
6200  * Description: Driver's detach(9E) entry point function.
6201  *
6202  *   Arguments: devi - opaque device info handle
6203  *		cmd  - detach  type
6204  *
6205  * Return Code: DDI_SUCCESS
6206  *		DDI_FAILURE
6207  *
6208  *     Context: Kernel thread context
6209  */
6210 
6211 static int
6212 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
6213 {
6214 	switch (cmd) {
6215 	case DDI_DETACH:
6216 		return (sd_unit_detach(devi));
6217 	case DDI_SUSPEND:
6218 		return (sd_ddi_suspend(devi));
6219 	default:
6220 		break;
6221 	}
6222 	return (DDI_FAILURE);
6223 }
6224 
6225 
6226 /*
6227  *     Function: sd_sync_with_callback
6228  *
6229  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
6230  *		 state while the callback routine is active.
6231  *
6232  *    Arguments: un: softstate structure for the instance
6233  *
6234  *	Context: Kernel thread context
6235  */
6236 
6237 static void
6238 sd_sync_with_callback(struct sd_lun *un)
6239 {
6240 	ASSERT(un != NULL);
6241 
6242 	mutex_enter(SD_MUTEX(un));
6243 
6244 	ASSERT(un->un_in_callback >= 0);
6245 
6246 	while (un->un_in_callback > 0) {
6247 		mutex_exit(SD_MUTEX(un));
6248 		delay(2);
6249 		mutex_enter(SD_MUTEX(un));
6250 	}
6251 
6252 	mutex_exit(SD_MUTEX(un));
6253 }
6254 
6255 /*
6256  *    Function: sd_unit_attach
6257  *
6258  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
6259  *		the soft state structure for the device and performs
6260  *		all necessary structure and device initializations.
6261  *
6262  *   Arguments: devi: the system's dev_info_t for the device.
6263  *
6264  * Return Code: DDI_SUCCESS if attach is successful.
6265  *		DDI_FAILURE if any part of the attach fails.
6266  *
6267  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
6268  *		Kernel thread context only.  Can sleep.
6269  */
6270 
6271 static int
6272 sd_unit_attach(dev_info_t *devi)
6273 {
6274 	struct	scsi_device	*devp;
6275 	struct	sd_lun		*un;
6276 	char			*variantp;
6277 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
6278 	int	instance;
6279 	int	rval;
6280 	int	wc_enabled;
6281 	int	tgt;
6282 	uint64_t	capacity;
6283 	uint_t		lbasize = 0;
6284 	dev_info_t	*pdip = ddi_get_parent(devi);
6285 	int		offbyone = 0;
6286 	int		geom_label_valid = 0;
6287 
6288 	/*
6289 	 * Retrieve the target driver's private data area. This was set
6290 	 * up by the HBA.
6291 	 */
6292 	devp = ddi_get_driver_private(devi);
6293 
6294 	/*
6295 	 * Retrieve the target ID of the device.
6296 	 */
6297 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
6298 	    SCSI_ADDR_PROP_TARGET, -1);
6299 
6300 	/*
6301 	 * Since we have no idea what state things were left in by the last
6302 	 * user of the device, set up some 'default' settings, ie. turn 'em
6303 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
6304 	 * Do this before the scsi_probe, which sends an inquiry.
6305 	 * This is a fix for bug (4430280).
6306 	 * Of special importance is wide-xfer. The drive could have been left
6307 	 * in wide transfer mode by the last driver to communicate with it,
6308 	 * this includes us. If that's the case, and if the following is not
6309 	 * setup properly or we don't re-negotiate with the drive prior to
6310 	 * transferring data to/from the drive, it causes bus parity errors,
6311 	 * data overruns, and unexpected interrupts. This first occurred when
6312 	 * the fix for bug (4378686) was made.
6313 	 */
6314 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
6315 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
6316 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
6317 
6318 	/*
6319 	 * Currently, scsi_ifsetcap sets tagged-qing capability for all LUNs
6320 	 * on a target. Setting it per lun instance actually sets the
6321 	 * capability of this target, which affects those luns already
6322 	 * attached on the same target. So during attach, we can only disable
6323 	 * this capability only when no other lun has been attached on this
6324 	 * target. By doing this, we assume a target has the same tagged-qing
6325 	 * capability for every lun. The condition can be removed when HBA
6326 	 * is changed to support per lun based tagged-qing capability.
6327 	 */
6328 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
6329 		(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
6330 	}
6331 
6332 	/*
6333 	 * Use scsi_probe() to issue an INQUIRY command to the device.
6334 	 * This call will allocate and fill in the scsi_inquiry structure
6335 	 * and point the sd_inq member of the scsi_device structure to it.
6336 	 * If the attach succeeds, then this memory will not be de-allocated
6337 	 * (via scsi_unprobe()) until the instance is detached.
6338 	 */
6339 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
6340 		goto probe_failed;
6341 	}
6342 
6343 	/*
6344 	 * Check the device type as specified in the inquiry data and
6345 	 * claim it if it is of a type that we support.
6346 	 */
6347 	switch (devp->sd_inq->inq_dtype) {
6348 	case DTYPE_DIRECT:
6349 		break;
6350 	case DTYPE_RODIRECT:
6351 		break;
6352 	case DTYPE_OPTICAL:
6353 		break;
6354 	case DTYPE_NOTPRESENT:
6355 	default:
6356 		/* Unsupported device type; fail the attach. */
6357 		goto probe_failed;
6358 	}
6359 
6360 	/*
6361 	 * Allocate the soft state structure for this unit.
6362 	 *
6363 	 * We rely upon this memory being set to all zeroes by
6364 	 * ddi_soft_state_zalloc().  We assume that any member of the
6365 	 * soft state structure that is not explicitly initialized by
6366 	 * this routine will have a value of zero.
6367 	 */
6368 	instance = ddi_get_instance(devp->sd_dev);
6369 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
6370 		goto probe_failed;
6371 	}
6372 
6373 	/*
6374 	 * Retrieve a pointer to the newly-allocated soft state.
6375 	 *
6376 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
6377 	 * was successful, unless something has gone horribly wrong and the
6378 	 * ddi's soft state internals are corrupt (in which case it is
6379 	 * probably better to halt here than just fail the attach....)
6380 	 */
6381 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
6382 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
6383 		    instance);
6384 		/*NOTREACHED*/
6385 	}
6386 
6387 	/*
6388 	 * Link the back ptr of the driver soft state to the scsi_device
6389 	 * struct for this lun.
6390 	 * Save a pointer to the softstate in the driver-private area of
6391 	 * the scsi_device struct.
6392 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
6393 	 * we first set un->un_sd below.
6394 	 */
6395 	un->un_sd = devp;
6396 	devp->sd_private = (opaque_t)un;
6397 
6398 	/*
6399 	 * The following must be after devp is stored in the soft state struct.
6400 	 */
6401 #ifdef SDDEBUG
6402 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6403 	    "%s_unit_attach: un:0x%p instance:%d\n",
6404 	    ddi_driver_name(devi), un, instance);
6405 #endif
6406 
6407 	/*
6408 	 * Set up the device type and node type (for the minor nodes).
6409 	 * By default we assume that the device can at least support the
6410 	 * Common Command Set. Call it a CD-ROM if it reports itself
6411 	 * as a RODIRECT device.
6412 	 */
6413 	switch (devp->sd_inq->inq_dtype) {
6414 	case DTYPE_RODIRECT:
6415 		un->un_node_type = DDI_NT_CD_CHAN;
6416 		un->un_ctype	 = CTYPE_CDROM;
6417 		break;
6418 	case DTYPE_OPTICAL:
6419 		un->un_node_type = DDI_NT_BLOCK_CHAN;
6420 		un->un_ctype	 = CTYPE_ROD;
6421 		break;
6422 	default:
6423 		un->un_node_type = DDI_NT_BLOCK_CHAN;
6424 		un->un_ctype	 = CTYPE_CCS;
6425 		break;
6426 	}
6427 
6428 	/*
6429 	 * Try to read the interconnect type from the HBA.
6430 	 *
6431 	 * Note: This driver is currently compiled as two binaries, a parallel
6432 	 * scsi version (sd) and a fibre channel version (ssd). All functional
6433 	 * differences are determined at compile time. In the future a single
6434 	 * binary will be provided and the inteconnect type will be used to
6435 	 * differentiate between fibre and parallel scsi behaviors. At that time
6436 	 * it will be necessary for all fibre channel HBAs to support this
6437 	 * property.
6438 	 *
6439 	 * set un_f_is_fiber to TRUE ( default fiber )
6440 	 */
6441 	un->un_f_is_fibre = TRUE;
6442 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
6443 	case INTERCONNECT_SSA:
6444 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
6445 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6446 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
6447 		break;
6448 	case INTERCONNECT_PARALLEL:
6449 		un->un_f_is_fibre = FALSE;
6450 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
6451 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6452 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
6453 		break;
6454 	case INTERCONNECT_SATA:
6455 		un->un_f_is_fibre = FALSE;
6456 		un->un_interconnect_type = SD_INTERCONNECT_SATA;
6457 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6458 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
6459 		break;
6460 	case INTERCONNECT_FIBRE:
6461 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
6462 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6463 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
6464 		break;
6465 	case INTERCONNECT_FABRIC:
6466 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
6467 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
6468 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6469 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
6470 		break;
6471 	default:
6472 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
6473 		/*
6474 		 * The HBA does not support the "interconnect-type" property
6475 		 * (or did not provide a recognized type).
6476 		 *
6477 		 * Note: This will be obsoleted when a single fibre channel
6478 		 * and parallel scsi driver is delivered. In the meantime the
6479 		 * interconnect type will be set to the platform default.If that
6480 		 * type is not parallel SCSI, it means that we should be
6481 		 * assuming "ssd" semantics. However, here this also means that
6482 		 * the FC HBA is not supporting the "interconnect-type" property
6483 		 * like we expect it to, so log this occurrence.
6484 		 */
6485 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
6486 		if (!SD_IS_PARALLEL_SCSI(un)) {
6487 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6488 			    "sd_unit_attach: un:0x%p Assuming "
6489 			    "INTERCONNECT_FIBRE\n", un);
6490 		} else {
6491 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6492 			    "sd_unit_attach: un:0x%p Assuming "
6493 			    "INTERCONNECT_PARALLEL\n", un);
6494 			un->un_f_is_fibre = FALSE;
6495 		}
6496 #else
6497 		/*
6498 		 * Note: This source will be implemented when a single fibre
6499 		 * channel and parallel scsi driver is delivered. The default
6500 		 * will be to assume that if a device does not support the
6501 		 * "interconnect-type" property it is a parallel SCSI HBA and
6502 		 * we will set the interconnect type for parallel scsi.
6503 		 */
6504 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
6505 		un->un_f_is_fibre = FALSE;
6506 #endif
6507 		break;
6508 	}
6509 
6510 	if (un->un_f_is_fibre == TRUE) {
6511 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
6512 			SCSI_VERSION_3) {
6513 			switch (un->un_interconnect_type) {
6514 			case SD_INTERCONNECT_FIBRE:
6515 			case SD_INTERCONNECT_SSA:
6516 				un->un_node_type = DDI_NT_BLOCK_WWN;
6517 				break;
6518 			default:
6519 				break;
6520 			}
6521 		}
6522 	}
6523 
6524 	/*
6525 	 * Initialize the Request Sense command for the target
6526 	 */
6527 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
6528 		goto alloc_rqs_failed;
6529 	}
6530 
6531 	/*
6532 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
6533 	 * with seperate binary for sd and ssd.
6534 	 *
6535 	 * x86 has 1 binary, un_retry_count is set base on connection type.
6536 	 * The hardcoded values will go away when Sparc uses 1 binary
6537 	 * for sd and ssd.  This hardcoded values need to match
6538 	 * SD_RETRY_COUNT in sddef.h
6539 	 * The value used is base on interconnect type.
6540 	 * fibre = 3, parallel = 5
6541 	 */
6542 #if defined(__i386) || defined(__amd64)
6543 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
6544 #else
6545 	un->un_retry_count = SD_RETRY_COUNT;
6546 #endif
6547 
6548 	/*
6549 	 * Set the per disk retry count to the default number of retries
6550 	 * for disks and CDROMs. This value can be overridden by the
6551 	 * disk property list or an entry in sd.conf.
6552 	 */
6553 	un->un_notready_retry_count =
6554 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
6555 			: DISK_NOT_READY_RETRY_COUNT(un);
6556 
6557 	/*
6558 	 * Set the busy retry count to the default value of un_retry_count.
6559 	 * This can be overridden by entries in sd.conf or the device
6560 	 * config table.
6561 	 */
6562 	un->un_busy_retry_count = un->un_retry_count;
6563 
6564 	/*
6565 	 * Init the reset threshold for retries.  This number determines
6566 	 * how many retries must be performed before a reset can be issued
6567 	 * (for certain error conditions). This can be overridden by entries
6568 	 * in sd.conf or the device config table.
6569 	 */
6570 	un->un_reset_retry_count = (un->un_retry_count / 2);
6571 
6572 	/*
6573 	 * Set the victim_retry_count to the default un_retry_count
6574 	 */
6575 	un->un_victim_retry_count = (2 * un->un_retry_count);
6576 
6577 	/*
6578 	 * Set the reservation release timeout to the default value of
6579 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
6580 	 * device config table.
6581 	 */
6582 	un->un_reserve_release_time = 5;
6583 
6584 	/*
6585 	 * Set up the default maximum transfer size. Note that this may
6586 	 * get updated later in the attach, when setting up default wide
6587 	 * operations for disks.
6588 	 */
6589 #if defined(__i386) || defined(__amd64)
6590 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
6591 #else
6592 	un->un_max_xfer_size = (uint_t)maxphys;
6593 #endif
6594 
6595 	/*
6596 	 * Get "allow bus device reset" property (defaults to "enabled" if
6597 	 * the property was not defined). This is to disable bus resets for
6598 	 * certain kinds of error recovery. Note: In the future when a run-time
6599 	 * fibre check is available the soft state flag should default to
6600 	 * enabled.
6601 	 */
6602 	if (un->un_f_is_fibre == TRUE) {
6603 		un->un_f_allow_bus_device_reset = TRUE;
6604 	} else {
6605 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
6606 			"allow-bus-device-reset", 1) != 0) {
6607 			un->un_f_allow_bus_device_reset = TRUE;
6608 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6609 			"sd_unit_attach: un:0x%p Bus device reset enabled\n",
6610 				un);
6611 		} else {
6612 			un->un_f_allow_bus_device_reset = FALSE;
6613 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6614 			"sd_unit_attach: un:0x%p Bus device reset disabled\n",
6615 				un);
6616 		}
6617 	}
6618 
6619 	/*
6620 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
6621 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
6622 	 *
6623 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
6624 	 * property. The new "variant" property with a value of "atapi" has been
6625 	 * introduced so that future 'variants' of standard SCSI behavior (like
6626 	 * atapi) could be specified by the underlying HBA drivers by supplying
6627 	 * a new value for the "variant" property, instead of having to define a
6628 	 * new property.
6629 	 */
6630 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
6631 		un->un_f_cfg_is_atapi = TRUE;
6632 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6633 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
6634 	}
6635 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
6636 	    &variantp) == DDI_PROP_SUCCESS) {
6637 		if (strcmp(variantp, "atapi") == 0) {
6638 			un->un_f_cfg_is_atapi = TRUE;
6639 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6640 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
6641 		}
6642 		ddi_prop_free(variantp);
6643 	}
6644 
6645 	un->un_cmd_timeout	= SD_IO_TIME;
6646 
6647 	/* Info on current states, statuses, etc. (Updated frequently) */
6648 	un->un_state		= SD_STATE_NORMAL;
6649 	un->un_last_state	= SD_STATE_NORMAL;
6650 
6651 	/* Control & status info for command throttling */
6652 	un->un_throttle		= sd_max_throttle;
6653 	un->un_saved_throttle	= sd_max_throttle;
6654 	un->un_min_throttle	= sd_min_throttle;
6655 
6656 	if (un->un_f_is_fibre == TRUE) {
6657 		un->un_f_use_adaptive_throttle = TRUE;
6658 	} else {
6659 		un->un_f_use_adaptive_throttle = FALSE;
6660 	}
6661 
6662 	/* Removable media support. */
6663 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
6664 	un->un_mediastate		= DKIO_NONE;
6665 	un->un_specified_mediastate	= DKIO_NONE;
6666 
6667 	/* CVs for suspend/resume (PM or DR) */
6668 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
6669 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
6670 
6671 	/* Power management support. */
6672 	un->un_power_level = SD_SPINDLE_UNINIT;
6673 
6674 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
6675 	un->un_f_wcc_inprog = 0;
6676 
6677 	/*
6678 	 * The open/close semaphore is used to serialize threads executing
6679 	 * in the driver's open & close entry point routines for a given
6680 	 * instance.
6681 	 */
6682 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
6683 
6684 	/*
6685 	 * The conf file entry and softstate variable is a forceful override,
6686 	 * meaning a non-zero value must be entered to change the default.
6687 	 */
6688 	un->un_f_disksort_disabled = FALSE;
6689 
6690 	/*
6691 	 * Retrieve the properties from the static driver table or the driver
6692 	 * configuration file (.conf) for this unit and update the soft state
6693 	 * for the device as needed for the indicated properties.
6694 	 * Note: the property configuration needs to occur here as some of the
6695 	 * following routines may have dependancies on soft state flags set
6696 	 * as part of the driver property configuration.
6697 	 */
6698 	sd_read_unit_properties(un);
6699 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6700 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
6701 
6702 	/*
6703 	 * Only if a device has "hotpluggable" property, it is
6704 	 * treated as hotpluggable device. Otherwise, it is
6705 	 * regarded as non-hotpluggable one.
6706 	 */
6707 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
6708 	    -1) != -1) {
6709 		un->un_f_is_hotpluggable = TRUE;
6710 	}
6711 
6712 	/*
6713 	 * set unit's attributes(flags) according to "hotpluggable" and
6714 	 * RMB bit in INQUIRY data.
6715 	 */
6716 	sd_set_unit_attributes(un, devi);
6717 
6718 	/*
6719 	 * By default, we mark the capacity, lbasize, and geometry
6720 	 * as invalid. Only if we successfully read a valid capacity
6721 	 * will we update the un_blockcount and un_tgt_blocksize with the
6722 	 * valid values (the geometry will be validated later).
6723 	 */
6724 	un->un_f_blockcount_is_valid	= FALSE;
6725 	un->un_f_tgt_blocksize_is_valid	= FALSE;
6726 
6727 	/*
6728 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
6729 	 * otherwise.
6730 	 */
6731 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
6732 	un->un_blockcount = 0;
6733 
6734 	/*
6735 	 * Set up the per-instance info needed to determine the correct
6736 	 * CDBs and other info for issuing commands to the target.
6737 	 */
6738 	sd_init_cdb_limits(un);
6739 
6740 	/*
6741 	 * Set up the IO chains to use, based upon the target type.
6742 	 */
6743 	if (un->un_f_non_devbsize_supported) {
6744 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
6745 	} else {
6746 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
6747 	}
6748 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
6749 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
6750 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
6751 
6752 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
6753 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
6754 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
6755 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
6756 
6757 
6758 	if (ISCD(un)) {
6759 		un->un_additional_codes = sd_additional_codes;
6760 	} else {
6761 		un->un_additional_codes = NULL;
6762 	}
6763 
6764 	/*
6765 	 * Create the kstats here so they can be available for attach-time
6766 	 * routines that send commands to the unit (either polled or via
6767 	 * sd_send_scsi_cmd).
6768 	 *
6769 	 * Note: This is a critical sequence that needs to be maintained:
6770 	 *	1) Instantiate the kstats here, before any routines using the
6771 	 *	   iopath (i.e. sd_send_scsi_cmd).
6772 	 *	2) Instantiate and initialize the partition stats
6773 	 *	   (sd_set_pstats).
6774 	 *	3) Initialize the error stats (sd_set_errstats), following
6775 	 *	   sd_validate_geometry(),sd_register_devid(),
6776 	 *	   and sd_cache_control().
6777 	 */
6778 
6779 	un->un_stats = kstat_create(sd_label, instance,
6780 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
6781 	if (un->un_stats != NULL) {
6782 		un->un_stats->ks_lock = SD_MUTEX(un);
6783 		kstat_install(un->un_stats);
6784 	}
6785 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6786 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
6787 
6788 	sd_create_errstats(un, instance);
6789 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6790 	    "sd_unit_attach: un:0x%p errstats created\n", un);
6791 
6792 	/*
6793 	 * The following if/else code was relocated here from below as part
6794 	 * of the fix for bug (4430280). However with the default setup added
6795 	 * on entry to this routine, it's no longer absolutely necessary for
6796 	 * this to be before the call to sd_spin_up_unit.
6797 	 */
6798 	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
6799 		/*
6800 		 * If SCSI-2 tagged queueing is supported by the target
6801 		 * and by the host adapter then we will enable it.
6802 		 */
6803 		un->un_tagflags = 0;
6804 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
6805 		    (devp->sd_inq->inq_cmdque) &&
6806 		    (un->un_f_arq_enabled == TRUE)) {
6807 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
6808 			    1, 1) == 1) {
6809 				un->un_tagflags = FLAG_STAG;
6810 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6811 				    "sd_unit_attach: un:0x%p tag queueing "
6812 				    "enabled\n", un);
6813 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
6814 			    "untagged-qing", 0) == 1) {
6815 				un->un_f_opt_queueing = TRUE;
6816 				un->un_saved_throttle = un->un_throttle =
6817 				    min(un->un_throttle, 3);
6818 			} else {
6819 				un->un_f_opt_queueing = FALSE;
6820 				un->un_saved_throttle = un->un_throttle = 1;
6821 			}
6822 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
6823 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
6824 			/* The Host Adapter supports internal queueing. */
6825 			un->un_f_opt_queueing = TRUE;
6826 			un->un_saved_throttle = un->un_throttle =
6827 			    min(un->un_throttle, 3);
6828 		} else {
6829 			un->un_f_opt_queueing = FALSE;
6830 			un->un_saved_throttle = un->un_throttle = 1;
6831 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6832 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
6833 		}
6834 
6835 		/*
6836 		 * Enable large transfers for SATA/SAS drives
6837 		 */
6838 		if (SD_IS_SERIAL(un)) {
6839 			un->un_max_xfer_size =
6840 			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
6841 			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
6842 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6843 			    "sd_unit_attach: un:0x%p max transfer "
6844 			    "size=0x%x\n", un, un->un_max_xfer_size);
6845 
6846 		}
6847 
6848 		/* Setup or tear down default wide operations for disks */
6849 
6850 		/*
6851 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
6852 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
6853 		 * system and be set to different values. In the future this
6854 		 * code may need to be updated when the ssd module is
6855 		 * obsoleted and removed from the system. (4299588)
6856 		 */
6857 		if (SD_IS_PARALLEL_SCSI(un) &&
6858 		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
6859 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
6860 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
6861 			    1, 1) == 1) {
6862 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6863 				    "sd_unit_attach: un:0x%p Wide Transfer "
6864 				    "enabled\n", un);
6865 			}
6866 
6867 			/*
6868 			 * If tagged queuing has also been enabled, then
6869 			 * enable large xfers
6870 			 */
6871 			if (un->un_saved_throttle == sd_max_throttle) {
6872 				un->un_max_xfer_size =
6873 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
6874 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
6875 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6876 				    "sd_unit_attach: un:0x%p max transfer "
6877 				    "size=0x%x\n", un, un->un_max_xfer_size);
6878 			}
6879 		} else {
6880 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
6881 			    0, 1) == 1) {
6882 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6883 				    "sd_unit_attach: un:0x%p "
6884 				    "Wide Transfer disabled\n", un);
6885 			}
6886 		}
6887 	} else {
6888 		un->un_tagflags = FLAG_STAG;
6889 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
6890 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
6891 	}
6892 
6893 	/*
6894 	 * If this target supports LUN reset, try to enable it.
6895 	 */
6896 	if (un->un_f_lun_reset_enabled) {
6897 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
6898 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
6899 			    "un:0x%p lun_reset capability set\n", un);
6900 		} else {
6901 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
6902 			    "un:0x%p lun-reset capability not set\n", un);
6903 		}
6904 	}
6905 
6906 	/*
6907 	 * At this point in the attach, we have enough info in the
6908 	 * soft state to be able to issue commands to the target.
6909 	 *
6910 	 * All command paths used below MUST issue their commands as
6911 	 * SD_PATH_DIRECT. This is important as intermediate layers
6912 	 * are not all initialized yet (such as PM).
6913 	 */
6914 
6915 	/*
6916 	 * Send a TEST UNIT READY command to the device. This should clear
6917 	 * any outstanding UNIT ATTENTION that may be present.
6918 	 *
6919 	 * Note: Don't check for success, just track if there is a reservation,
6920 	 * this is a throw away command to clear any unit attentions.
6921 	 *
6922 	 * Note: This MUST be the first command issued to the target during
6923 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
6924 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
6925 	 * with attempts at spinning up a device with no media.
6926 	 */
6927 	if (sd_send_scsi_TEST_UNIT_READY(un, SD_DONT_RETRY_TUR) == EACCES) {
6928 		reservation_flag = SD_TARGET_IS_RESERVED;
6929 	}
6930 
6931 	/*
6932 	 * If the device is NOT a removable media device, attempt to spin
6933 	 * it up (using the START_STOP_UNIT command) and read its capacity
6934 	 * (using the READ CAPACITY command).  Note, however, that either
6935 	 * of these could fail and in some cases we would continue with
6936 	 * the attach despite the failure (see below).
6937 	 */
6938 	if (un->un_f_descr_format_supported) {
6939 		switch (sd_spin_up_unit(un)) {
6940 		case 0:
6941 			/*
6942 			 * Spin-up was successful; now try to read the
6943 			 * capacity.  If successful then save the results
6944 			 * and mark the capacity & lbasize as valid.
6945 			 */
6946 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6947 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
6948 
6949 			switch (sd_send_scsi_READ_CAPACITY(un, &capacity,
6950 			    &lbasize, SD_PATH_DIRECT)) {
6951 			case 0: {
6952 				if (capacity > DK_MAX_BLOCKS) {
6953 #ifdef _LP64
6954 					if (capacity + 1 >
6955 					    SD_GROUP1_MAX_ADDRESS) {
6956 						/*
6957 						 * Enable descriptor format
6958 						 * sense data so that we can
6959 						 * get 64 bit sense data
6960 						 * fields.
6961 						 */
6962 						sd_enable_descr_sense(un);
6963 					}
6964 #else
6965 					/* 32-bit kernels can't handle this */
6966 					scsi_log(SD_DEVINFO(un),
6967 					    sd_label, CE_WARN,
6968 					    "disk has %llu blocks, which "
6969 					    "is too large for a 32-bit "
6970 					    "kernel", capacity);
6971 
6972 #if defined(__i386) || defined(__amd64)
6973 					/*
6974 					 * 1TB disk was treated as (1T - 512)B
6975 					 * in the past, so that it might have
6976 					 * valid VTOC and solaris partitions,
6977 					 * we have to allow it to continue to
6978 					 * work.
6979 					 */
6980 					if (capacity -1 > DK_MAX_BLOCKS)
6981 #endif
6982 					goto spinup_failed;
6983 #endif
6984 				}
6985 
6986 				/*
6987 				 * Here it's not necessary to check the case:
6988 				 * the capacity of the device is bigger than
6989 				 * what the max hba cdb can support. Because
6990 				 * sd_send_scsi_READ_CAPACITY will retrieve
6991 				 * the capacity by sending USCSI command, which
6992 				 * is constrained by the max hba cdb. Actually,
6993 				 * sd_send_scsi_READ_CAPACITY will return
6994 				 * EINVAL when using bigger cdb than required
6995 				 * cdb length. Will handle this case in
6996 				 * "case EINVAL".
6997 				 */
6998 
6999 				/*
7000 				 * The following relies on
7001 				 * sd_send_scsi_READ_CAPACITY never
7002 				 * returning 0 for capacity and/or lbasize.
7003 				 */
7004 				sd_update_block_info(un, lbasize, capacity);
7005 
7006 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7007 				    "sd_unit_attach: un:0x%p capacity = %ld "
7008 				    "blocks; lbasize= %ld.\n", un,
7009 				    un->un_blockcount, un->un_tgt_blocksize);
7010 
7011 				break;
7012 			}
7013 			case EINVAL:
7014 				/*
7015 				 * In the case where the max-cdb-length property
7016 				 * is smaller than the required CDB length for
7017 				 * a SCSI device, a target driver can fail to
7018 				 * attach to that device.
7019 				 */
7020 				scsi_log(SD_DEVINFO(un),
7021 				    sd_label, CE_WARN,
7022 				    "disk capacity is too large "
7023 				    "for current cdb length");
7024 				goto spinup_failed;
7025 			case EACCES:
7026 				/*
7027 				 * Should never get here if the spin-up
7028 				 * succeeded, but code it in anyway.
7029 				 * From here, just continue with the attach...
7030 				 */
7031 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7032 				    "sd_unit_attach: un:0x%p "
7033 				    "sd_send_scsi_READ_CAPACITY "
7034 				    "returned reservation conflict\n", un);
7035 				reservation_flag = SD_TARGET_IS_RESERVED;
7036 				break;
7037 			default:
7038 				/*
7039 				 * Likewise, should never get here if the
7040 				 * spin-up succeeded. Just continue with
7041 				 * the attach...
7042 				 */
7043 				break;
7044 			}
7045 			break;
7046 		case EACCES:
7047 			/*
7048 			 * Device is reserved by another host.  In this case
7049 			 * we could not spin it up or read the capacity, but
7050 			 * we continue with the attach anyway.
7051 			 */
7052 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7053 			    "sd_unit_attach: un:0x%p spin-up reservation "
7054 			    "conflict.\n", un);
7055 			reservation_flag = SD_TARGET_IS_RESERVED;
7056 			break;
7057 		default:
7058 			/* Fail the attach if the spin-up failed. */
7059 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7060 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
7061 			goto spinup_failed;
7062 		}
7063 	}
7064 
7065 	/*
7066 	 * Check to see if this is a MMC drive
7067 	 */
7068 	if (ISCD(un)) {
7069 		sd_set_mmc_caps(un);
7070 	}
7071 
7072 
7073 	/*
7074 	 * Add a zero-length attribute to tell the world we support
7075 	 * kernel ioctls (for layered drivers)
7076 	 */
7077 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7078 	    DDI_KERNEL_IOCTL, NULL, 0);
7079 
7080 	/*
7081 	 * Add a boolean property to tell the world we support
7082 	 * the B_FAILFAST flag (for layered drivers)
7083 	 */
7084 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7085 	    "ddi-failfast-supported", NULL, 0);
7086 
7087 	/*
7088 	 * Initialize power management
7089 	 */
7090 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
7091 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
7092 	sd_setup_pm(un, devi);
7093 	if (un->un_f_pm_is_enabled == FALSE) {
7094 		/*
7095 		 * For performance, point to a jump table that does
7096 		 * not include pm.
7097 		 * The direct and priority chains don't change with PM.
7098 		 *
7099 		 * Note: this is currently done based on individual device
7100 		 * capabilities. When an interface for determining system
7101 		 * power enabled state becomes available, or when additional
7102 		 * layers are added to the command chain, these values will
7103 		 * have to be re-evaluated for correctness.
7104 		 */
7105 		if (un->un_f_non_devbsize_supported) {
7106 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
7107 		} else {
7108 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
7109 		}
7110 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
7111 	}
7112 
7113 	/*
7114 	 * This property is set to 0 by HA software to avoid retries
7115 	 * on a reserved disk. (The preferred property name is
7116 	 * "retry-on-reservation-conflict") (1189689)
7117 	 *
7118 	 * Note: The use of a global here can have unintended consequences. A
7119 	 * per instance variable is preferrable to match the capabilities of
7120 	 * different underlying hba's (4402600)
7121 	 */
7122 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
7123 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
7124 	    sd_retry_on_reservation_conflict);
7125 	if (sd_retry_on_reservation_conflict != 0) {
7126 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
7127 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
7128 		    sd_retry_on_reservation_conflict);
7129 	}
7130 
7131 	/* Set up options for QFULL handling. */
7132 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7133 	    "qfull-retries", -1)) != -1) {
7134 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
7135 		    rval, 1);
7136 	}
7137 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7138 	    "qfull-retry-interval", -1)) != -1) {
7139 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
7140 		    rval, 1);
7141 	}
7142 
7143 	/*
7144 	 * This just prints a message that announces the existence of the
7145 	 * device. The message is always printed in the system logfile, but
7146 	 * only appears on the console if the system is booted with the
7147 	 * -v (verbose) argument.
7148 	 */
7149 	ddi_report_dev(devi);
7150 
7151 	un->un_mediastate = DKIO_NONE;
7152 
7153 	cmlb_alloc_handle(&un->un_cmlbhandle);
7154 
7155 #if defined(__i386) || defined(__amd64)
7156 	/*
7157 	 * On x86, compensate for off-by-1 legacy error
7158 	 */
7159 	if (!un->un_f_has_removable_media && !un->un_f_is_hotpluggable &&
7160 	    (lbasize == un->un_sys_blocksize))
7161 		offbyone = CMLB_OFF_BY_ONE;
7162 #endif
7163 
7164 	if (cmlb_attach(devi, &sd_tgops, (int)devp->sd_inq->inq_dtype,
7165 	    un->un_f_has_removable_media, un->un_f_is_hotpluggable,
7166 	    un->un_node_type, offbyone, un->un_cmlbhandle,
7167 	    (void *)SD_PATH_DIRECT) != 0) {
7168 		goto cmlb_attach_failed;
7169 	}
7170 
7171 
7172 	/*
7173 	 * Read and validate the device's geometry (ie, disk label)
7174 	 * A new unformatted drive will not have a valid geometry, but
7175 	 * the driver needs to successfully attach to this device so
7176 	 * the drive can be formatted via ioctls.
7177 	 */
7178 	geom_label_valid = (cmlb_validate(un->un_cmlbhandle, 0,
7179 	    (void *)SD_PATH_DIRECT) == 0) ? 1: 0;
7180 
7181 	mutex_enter(SD_MUTEX(un));
7182 
7183 	/*
7184 	 * Read and initialize the devid for the unit.
7185 	 */
7186 	ASSERT(un->un_errstats != NULL);
7187 	if (un->un_f_devid_supported) {
7188 		sd_register_devid(un, devi, reservation_flag);
7189 	}
7190 	mutex_exit(SD_MUTEX(un));
7191 
7192 #if (defined(__fibre))
7193 	/*
7194 	 * Register callbacks for fibre only.  You can't do this soley
7195 	 * on the basis of the devid_type because this is hba specific.
7196 	 * We need to query our hba capabilities to find out whether to
7197 	 * register or not.
7198 	 */
7199 	if (un->un_f_is_fibre) {
7200 	    if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
7201 		sd_init_event_callbacks(un);
7202 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7203 		    "sd_unit_attach: un:0x%p event callbacks inserted", un);
7204 	    }
7205 	}
7206 #endif
7207 
7208 	if (un->un_f_opt_disable_cache == TRUE) {
7209 		/*
7210 		 * Disable both read cache and write cache.  This is
7211 		 * the historic behavior of the keywords in the config file.
7212 		 */
7213 		if (sd_cache_control(un, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
7214 		    0) {
7215 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7216 			    "sd_unit_attach: un:0x%p Could not disable "
7217 			    "caching", un);
7218 			goto devid_failed;
7219 		}
7220 	}
7221 
7222 	/*
7223 	 * Check the value of the WCE bit now and
7224 	 * set un_f_write_cache_enabled accordingly.
7225 	 */
7226 	(void) sd_get_write_cache_enabled(un, &wc_enabled);
7227 	mutex_enter(SD_MUTEX(un));
7228 	un->un_f_write_cache_enabled = (wc_enabled != 0);
7229 	mutex_exit(SD_MUTEX(un));
7230 
7231 	/*
7232 	 * Find out what type of reservation this disk supports.
7233 	 */
7234 	switch (sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS, 0, NULL)) {
7235 	case 0:
7236 		/*
7237 		 * SCSI-3 reservations are supported.
7238 		 */
7239 		un->un_reservation_type = SD_SCSI3_RESERVATION;
7240 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7241 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
7242 		break;
7243 	case ENOTSUP:
7244 		/*
7245 		 * The PERSISTENT RESERVE IN command would not be recognized by
7246 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
7247 		 */
7248 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7249 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
7250 		un->un_reservation_type = SD_SCSI2_RESERVATION;
7251 		break;
7252 	default:
7253 		/*
7254 		 * default to SCSI-3 reservations
7255 		 */
7256 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7257 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
7258 		un->un_reservation_type = SD_SCSI3_RESERVATION;
7259 		break;
7260 	}
7261 
7262 	/*
7263 	 * Set the pstat and error stat values here, so data obtained during the
7264 	 * previous attach-time routines is available.
7265 	 *
7266 	 * Note: This is a critical sequence that needs to be maintained:
7267 	 *	1) Instantiate the kstats before any routines using the iopath
7268 	 *	   (i.e. sd_send_scsi_cmd).
7269 	 *	2) Initialize the error stats (sd_set_errstats) and partition
7270 	 *	   stats (sd_set_pstats)here, following
7271 	 *	   cmlb_validate_geometry(), sd_register_devid(), and
7272 	 *	   sd_cache_control().
7273 	 */
7274 
7275 	if (un->un_f_pkstats_enabled && geom_label_valid) {
7276 		sd_set_pstats(un);
7277 		SD_TRACE(SD_LOG_IO_PARTITION, un,
7278 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
7279 	}
7280 
7281 	sd_set_errstats(un);
7282 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7283 	    "sd_unit_attach: un:0x%p errstats set\n", un);
7284 
7285 
7286 	/*
7287 	 * After successfully attaching an instance, we record the information
7288 	 * of how many luns have been attached on the relative target and
7289 	 * controller for parallel SCSI. This information is used when sd tries
7290 	 * to set the tagged queuing capability in HBA.
7291 	 */
7292 	if (SD_IS_PARALLEL_SCSI(un) && (tgt >= 0) && (tgt < NTARGETS_WIDE)) {
7293 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_ATTACH);
7294 	}
7295 
7296 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7297 	    "sd_unit_attach: un:0x%p exit success\n", un);
7298 
7299 	return (DDI_SUCCESS);
7300 
7301 	/*
7302 	 * An error occurred during the attach; clean up & return failure.
7303 	 */
7304 
7305 devid_failed:
7306 
7307 setup_pm_failed:
7308 	ddi_remove_minor_node(devi, NULL);
7309 
7310 cmlb_attach_failed:
7311 	/*
7312 	 * Cleanup from the scsi_ifsetcap() calls (437868)
7313 	 */
7314 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
7315 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
7316 
7317 	/*
7318 	 * Refer to the comments of setting tagged-qing in the beginning of
7319 	 * sd_unit_attach. We can only disable tagged queuing when there is
7320 	 * no lun attached on the target.
7321 	 */
7322 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
7323 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
7324 	}
7325 
7326 	if (un->un_f_is_fibre == FALSE) {
7327 	    (void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
7328 	}
7329 
7330 spinup_failed:
7331 
7332 	mutex_enter(SD_MUTEX(un));
7333 
7334 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
7335 	if (un->un_direct_priority_timeid != NULL) {
7336 		timeout_id_t temp_id = un->un_direct_priority_timeid;
7337 		un->un_direct_priority_timeid = NULL;
7338 		mutex_exit(SD_MUTEX(un));
7339 		(void) untimeout(temp_id);
7340 		mutex_enter(SD_MUTEX(un));
7341 	}
7342 
7343 	/* Cancel any pending start/stop timeouts */
7344 	if (un->un_startstop_timeid != NULL) {
7345 		timeout_id_t temp_id = un->un_startstop_timeid;
7346 		un->un_startstop_timeid = NULL;
7347 		mutex_exit(SD_MUTEX(un));
7348 		(void) untimeout(temp_id);
7349 		mutex_enter(SD_MUTEX(un));
7350 	}
7351 
7352 	/* Cancel any pending reset-throttle timeouts */
7353 	if (un->un_reset_throttle_timeid != NULL) {
7354 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
7355 		un->un_reset_throttle_timeid = NULL;
7356 		mutex_exit(SD_MUTEX(un));
7357 		(void) untimeout(temp_id);
7358 		mutex_enter(SD_MUTEX(un));
7359 	}
7360 
7361 	/* Cancel any pending retry timeouts */
7362 	if (un->un_retry_timeid != NULL) {
7363 		timeout_id_t temp_id = un->un_retry_timeid;
7364 		un->un_retry_timeid = NULL;
7365 		mutex_exit(SD_MUTEX(un));
7366 		(void) untimeout(temp_id);
7367 		mutex_enter(SD_MUTEX(un));
7368 	}
7369 
7370 	/* Cancel any pending delayed cv broadcast timeouts */
7371 	if (un->un_dcvb_timeid != NULL) {
7372 		timeout_id_t temp_id = un->un_dcvb_timeid;
7373 		un->un_dcvb_timeid = NULL;
7374 		mutex_exit(SD_MUTEX(un));
7375 		(void) untimeout(temp_id);
7376 		mutex_enter(SD_MUTEX(un));
7377 	}
7378 
7379 	mutex_exit(SD_MUTEX(un));
7380 
7381 	/* There should not be any in-progress I/O so ASSERT this check */
7382 	ASSERT(un->un_ncmds_in_transport == 0);
7383 	ASSERT(un->un_ncmds_in_driver == 0);
7384 
7385 	/* Do not free the softstate if the callback routine is active */
7386 	sd_sync_with_callback(un);
7387 
7388 	/*
7389 	 * Partition stats apparently are not used with removables. These would
7390 	 * not have been created during attach, so no need to clean them up...
7391 	 */
7392 	if (un->un_stats != NULL) {
7393 		kstat_delete(un->un_stats);
7394 		un->un_stats = NULL;
7395 	}
7396 	if (un->un_errstats != NULL) {
7397 		kstat_delete(un->un_errstats);
7398 		un->un_errstats = NULL;
7399 	}
7400 
7401 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
7402 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
7403 
7404 	ddi_prop_remove_all(devi);
7405 	sema_destroy(&un->un_semoclose);
7406 	cv_destroy(&un->un_state_cv);
7407 
7408 getrbuf_failed:
7409 
7410 	sd_free_rqs(un);
7411 
7412 alloc_rqs_failed:
7413 
7414 	devp->sd_private = NULL;
7415 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
7416 
7417 get_softstate_failed:
7418 	/*
7419 	 * Note: the man pages are unclear as to whether or not doing a
7420 	 * ddi_soft_state_free(sd_state, instance) is the right way to
7421 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
7422 	 * ddi_get_soft_state() fails.  The implication seems to be
7423 	 * that the get_soft_state cannot fail if the zalloc succeeds.
7424 	 */
7425 	ddi_soft_state_free(sd_state, instance);
7426 
7427 probe_failed:
7428 	scsi_unprobe(devp);
7429 #ifdef SDDEBUG
7430 	if ((sd_component_mask & SD_LOG_ATTACH_DETACH) &&
7431 	    (sd_level_mask & SD_LOGMASK_TRACE)) {
7432 		cmn_err(CE_CONT, "sd_unit_attach: un:0x%p exit failure\n",
7433 		    (void *)un);
7434 	}
7435 #endif
7436 	return (DDI_FAILURE);
7437 }
7438 
7439 
7440 /*
7441  *    Function: sd_unit_detach
7442  *
7443  * Description: Performs DDI_DETACH processing for sddetach().
7444  *
7445  * Return Code: DDI_SUCCESS
7446  *		DDI_FAILURE
7447  *
7448  *     Context: Kernel thread context
7449  */
7450 
7451 static int
7452 sd_unit_detach(dev_info_t *devi)
7453 {
7454 	struct scsi_device	*devp;
7455 	struct sd_lun		*un;
7456 	int			i;
7457 	int			tgt;
7458 	dev_t			dev;
7459 	dev_info_t		*pdip = ddi_get_parent(devi);
7460 	int			instance = ddi_get_instance(devi);
7461 
7462 	mutex_enter(&sd_detach_mutex);
7463 
7464 	/*
7465 	 * Fail the detach for any of the following:
7466 	 *  - Unable to get the sd_lun struct for the instance
7467 	 *  - A layered driver has an outstanding open on the instance
7468 	 *  - Another thread is already detaching this instance
7469 	 *  - Another thread is currently performing an open
7470 	 */
7471 	devp = ddi_get_driver_private(devi);
7472 	if ((devp == NULL) ||
7473 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
7474 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
7475 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
7476 		mutex_exit(&sd_detach_mutex);
7477 		return (DDI_FAILURE);
7478 	}
7479 
7480 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
7481 
7482 	/*
7483 	 * Mark this instance as currently in a detach, to inhibit any
7484 	 * opens from a layered driver.
7485 	 */
7486 	un->un_detach_count++;
7487 	mutex_exit(&sd_detach_mutex);
7488 
7489 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7490 	    SCSI_ADDR_PROP_TARGET, -1);
7491 
7492 	dev = sd_make_device(SD_DEVINFO(un));
7493 
7494 #ifndef lint
7495 	_NOTE(COMPETING_THREADS_NOW);
7496 #endif
7497 
7498 	mutex_enter(SD_MUTEX(un));
7499 
7500 	/*
7501 	 * Fail the detach if there are any outstanding layered
7502 	 * opens on this device.
7503 	 */
7504 	for (i = 0; i < NDKMAP; i++) {
7505 		if (un->un_ocmap.lyropen[i] != 0) {
7506 			goto err_notclosed;
7507 		}
7508 	}
7509 
7510 	/*
7511 	 * Verify there are NO outstanding commands issued to this device.
7512 	 * ie, un_ncmds_in_transport == 0.
7513 	 * It's possible to have outstanding commands through the physio
7514 	 * code path, even though everything's closed.
7515 	 */
7516 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
7517 	    (un->un_direct_priority_timeid != NULL) ||
7518 	    (un->un_state == SD_STATE_RWAIT)) {
7519 		mutex_exit(SD_MUTEX(un));
7520 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7521 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
7522 		goto err_stillbusy;
7523 	}
7524 
7525 	/*
7526 	 * If we have the device reserved, release the reservation.
7527 	 */
7528 	if ((un->un_resvd_status & SD_RESERVE) &&
7529 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
7530 		mutex_exit(SD_MUTEX(un));
7531 		/*
7532 		 * Note: sd_reserve_release sends a command to the device
7533 		 * via the sd_ioctlcmd() path, and can sleep.
7534 		 */
7535 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
7536 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7537 			    "sd_dr_detach: Cannot release reservation \n");
7538 		}
7539 	} else {
7540 		mutex_exit(SD_MUTEX(un));
7541 	}
7542 
7543 	/*
7544 	 * Untimeout any reserve recover, throttle reset, restart unit
7545 	 * and delayed broadcast timeout threads. Protect the timeout pointer
7546 	 * from getting nulled by their callback functions.
7547 	 */
7548 	mutex_enter(SD_MUTEX(un));
7549 	if (un->un_resvd_timeid != NULL) {
7550 		timeout_id_t temp_id = un->un_resvd_timeid;
7551 		un->un_resvd_timeid = NULL;
7552 		mutex_exit(SD_MUTEX(un));
7553 		(void) untimeout(temp_id);
7554 		mutex_enter(SD_MUTEX(un));
7555 	}
7556 
7557 	if (un->un_reset_throttle_timeid != NULL) {
7558 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
7559 		un->un_reset_throttle_timeid = NULL;
7560 		mutex_exit(SD_MUTEX(un));
7561 		(void) untimeout(temp_id);
7562 		mutex_enter(SD_MUTEX(un));
7563 	}
7564 
7565 	if (un->un_startstop_timeid != NULL) {
7566 		timeout_id_t temp_id = un->un_startstop_timeid;
7567 		un->un_startstop_timeid = NULL;
7568 		mutex_exit(SD_MUTEX(un));
7569 		(void) untimeout(temp_id);
7570 		mutex_enter(SD_MUTEX(un));
7571 	}
7572 
7573 	if (un->un_dcvb_timeid != NULL) {
7574 		timeout_id_t temp_id = un->un_dcvb_timeid;
7575 		un->un_dcvb_timeid = NULL;
7576 		mutex_exit(SD_MUTEX(un));
7577 		(void) untimeout(temp_id);
7578 	} else {
7579 		mutex_exit(SD_MUTEX(un));
7580 	}
7581 
7582 	/* Remove any pending reservation reclaim requests for this device */
7583 	sd_rmv_resv_reclaim_req(dev);
7584 
7585 	mutex_enter(SD_MUTEX(un));
7586 
7587 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
7588 	if (un->un_direct_priority_timeid != NULL) {
7589 		timeout_id_t temp_id = un->un_direct_priority_timeid;
7590 		un->un_direct_priority_timeid = NULL;
7591 		mutex_exit(SD_MUTEX(un));
7592 		(void) untimeout(temp_id);
7593 		mutex_enter(SD_MUTEX(un));
7594 	}
7595 
7596 	/* Cancel any active multi-host disk watch thread requests */
7597 	if (un->un_mhd_token != NULL) {
7598 		mutex_exit(SD_MUTEX(un));
7599 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
7600 		if (scsi_watch_request_terminate(un->un_mhd_token,
7601 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
7602 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7603 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
7604 			/*
7605 			 * Note: We are returning here after having removed
7606 			 * some driver timeouts above. This is consistent with
7607 			 * the legacy implementation but perhaps the watch
7608 			 * terminate call should be made with the wait flag set.
7609 			 */
7610 			goto err_stillbusy;
7611 		}
7612 		mutex_enter(SD_MUTEX(un));
7613 		un->un_mhd_token = NULL;
7614 	}
7615 
7616 	if (un->un_swr_token != NULL) {
7617 		mutex_exit(SD_MUTEX(un));
7618 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
7619 		if (scsi_watch_request_terminate(un->un_swr_token,
7620 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
7621 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7622 			    "sd_dr_detach: Cannot cancel swr watch request\n");
7623 			/*
7624 			 * Note: We are returning here after having removed
7625 			 * some driver timeouts above. This is consistent with
7626 			 * the legacy implementation but perhaps the watch
7627 			 * terminate call should be made with the wait flag set.
7628 			 */
7629 			goto err_stillbusy;
7630 		}
7631 		mutex_enter(SD_MUTEX(un));
7632 		un->un_swr_token = NULL;
7633 	}
7634 
7635 	mutex_exit(SD_MUTEX(un));
7636 
7637 	/*
7638 	 * Clear any scsi_reset_notifies. We clear the reset notifies
7639 	 * if we have not registered one.
7640 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
7641 	 */
7642 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
7643 	    sd_mhd_reset_notify_cb, (caddr_t)un);
7644 
7645 	/*
7646 	 * protect the timeout pointers from getting nulled by
7647 	 * their callback functions during the cancellation process.
7648 	 * In such a scenario untimeout can be invoked with a null value.
7649 	 */
7650 	_NOTE(NO_COMPETING_THREADS_NOW);
7651 
7652 	mutex_enter(&un->un_pm_mutex);
7653 	if (un->un_pm_idle_timeid != NULL) {
7654 		timeout_id_t temp_id = un->un_pm_idle_timeid;
7655 		un->un_pm_idle_timeid = NULL;
7656 		mutex_exit(&un->un_pm_mutex);
7657 
7658 		/*
7659 		 * Timeout is active; cancel it.
7660 		 * Note that it'll never be active on a device
7661 		 * that does not support PM therefore we don't
7662 		 * have to check before calling pm_idle_component.
7663 		 */
7664 		(void) untimeout(temp_id);
7665 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7666 		mutex_enter(&un->un_pm_mutex);
7667 	}
7668 
7669 	/*
7670 	 * Check whether there is already a timeout scheduled for power
7671 	 * management. If yes then don't lower the power here, that's.
7672 	 * the timeout handler's job.
7673 	 */
7674 	if (un->un_pm_timeid != NULL) {
7675 		timeout_id_t temp_id = un->un_pm_timeid;
7676 		un->un_pm_timeid = NULL;
7677 		mutex_exit(&un->un_pm_mutex);
7678 		/*
7679 		 * Timeout is active; cancel it.
7680 		 * Note that it'll never be active on a device
7681 		 * that does not support PM therefore we don't
7682 		 * have to check before calling pm_idle_component.
7683 		 */
7684 		(void) untimeout(temp_id);
7685 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7686 
7687 	} else {
7688 		mutex_exit(&un->un_pm_mutex);
7689 		if ((un->un_f_pm_is_enabled == TRUE) &&
7690 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_SPINDLE_OFF) !=
7691 		    DDI_SUCCESS)) {
7692 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7693 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
7694 			/*
7695 			 * Fix for bug: 4297749, item # 13
7696 			 * The above test now includes a check to see if PM is
7697 			 * supported by this device before call
7698 			 * pm_lower_power().
7699 			 * Note, the following is not dead code. The call to
7700 			 * pm_lower_power above will generate a call back into
7701 			 * our sdpower routine which might result in a timeout
7702 			 * handler getting activated. Therefore the following
7703 			 * code is valid and necessary.
7704 			 */
7705 			mutex_enter(&un->un_pm_mutex);
7706 			if (un->un_pm_timeid != NULL) {
7707 				timeout_id_t temp_id = un->un_pm_timeid;
7708 				un->un_pm_timeid = NULL;
7709 				mutex_exit(&un->un_pm_mutex);
7710 				(void) untimeout(temp_id);
7711 				(void) pm_idle_component(SD_DEVINFO(un), 0);
7712 			} else {
7713 				mutex_exit(&un->un_pm_mutex);
7714 			}
7715 		}
7716 	}
7717 
7718 	/*
7719 	 * Cleanup from the scsi_ifsetcap() calls (437868)
7720 	 * Relocated here from above to be after the call to
7721 	 * pm_lower_power, which was getting errors.
7722 	 */
7723 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
7724 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
7725 
7726 	/*
7727 	 * Currently, tagged queuing is supported per target based by HBA.
7728 	 * Setting this per lun instance actually sets the capability of this
7729 	 * target in HBA, which affects those luns already attached on the
7730 	 * same target. So during detach, we can only disable this capability
7731 	 * only when this is the only lun left on this target. By doing
7732 	 * this, we assume a target has the same tagged queuing capability
7733 	 * for every lun. The condition can be removed when HBA is changed to
7734 	 * support per lun based tagged queuing capability.
7735 	 */
7736 	if (sd_scsi_get_target_lun_count(pdip, tgt) <= 1) {
7737 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
7738 	}
7739 
7740 	if (un->un_f_is_fibre == FALSE) {
7741 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
7742 	}
7743 
7744 	/*
7745 	 * Remove any event callbacks, fibre only
7746 	 */
7747 	if (un->un_f_is_fibre == TRUE) {
7748 		if ((un->un_insert_event != NULL) &&
7749 			(ddi_remove_event_handler(un->un_insert_cb_id) !=
7750 				DDI_SUCCESS)) {
7751 			/*
7752 			 * Note: We are returning here after having done
7753 			 * substantial cleanup above. This is consistent
7754 			 * with the legacy implementation but this may not
7755 			 * be the right thing to do.
7756 			 */
7757 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7758 				"sd_dr_detach: Cannot cancel insert event\n");
7759 			goto err_remove_event;
7760 		}
7761 		un->un_insert_event = NULL;
7762 
7763 		if ((un->un_remove_event != NULL) &&
7764 			(ddi_remove_event_handler(un->un_remove_cb_id) !=
7765 				DDI_SUCCESS)) {
7766 			/*
7767 			 * Note: We are returning here after having done
7768 			 * substantial cleanup above. This is consistent
7769 			 * with the legacy implementation but this may not
7770 			 * be the right thing to do.
7771 			 */
7772 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7773 				"sd_dr_detach: Cannot cancel remove event\n");
7774 			goto err_remove_event;
7775 		}
7776 		un->un_remove_event = NULL;
7777 	}
7778 
7779 	/* Do not free the softstate if the callback routine is active */
7780 	sd_sync_with_callback(un);
7781 
7782 	cmlb_detach(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
7783 	cmlb_free_handle(&un->un_cmlbhandle);
7784 
7785 	/*
7786 	 * Hold the detach mutex here, to make sure that no other threads ever
7787 	 * can access a (partially) freed soft state structure.
7788 	 */
7789 	mutex_enter(&sd_detach_mutex);
7790 
7791 	/*
7792 	 * Clean up the soft state struct.
7793 	 * Cleanup is done in reverse order of allocs/inits.
7794 	 * At this point there should be no competing threads anymore.
7795 	 */
7796 
7797 	/* Unregister and free device id. */
7798 	ddi_devid_unregister(devi);
7799 	if (un->un_devid) {
7800 		ddi_devid_free(un->un_devid);
7801 		un->un_devid = NULL;
7802 	}
7803 
7804 	/*
7805 	 * Destroy wmap cache if it exists.
7806 	 */
7807 	if (un->un_wm_cache != NULL) {
7808 		kmem_cache_destroy(un->un_wm_cache);
7809 		un->un_wm_cache = NULL;
7810 	}
7811 
7812 	/*
7813 	 * kstat cleanup is done in detach for all device types (4363169).
7814 	 * We do not want to fail detach if the device kstats are not deleted
7815 	 * since there is a confusion about the devo_refcnt for the device.
7816 	 * We just delete the kstats and let detach complete successfully.
7817 	 */
7818 	if (un->un_stats != NULL) {
7819 		kstat_delete(un->un_stats);
7820 		un->un_stats = NULL;
7821 	}
7822 	if (un->un_errstats != NULL) {
7823 		kstat_delete(un->un_errstats);
7824 		un->un_errstats = NULL;
7825 	}
7826 
7827 	/* Remove partition stats */
7828 	if (un->un_f_pkstats_enabled) {
7829 		for (i = 0; i < NSDMAP; i++) {
7830 			if (un->un_pstats[i] != NULL) {
7831 				kstat_delete(un->un_pstats[i]);
7832 				un->un_pstats[i] = NULL;
7833 			}
7834 		}
7835 	}
7836 
7837 	/* Remove xbuf registration */
7838 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
7839 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
7840 
7841 	/* Remove driver properties */
7842 	ddi_prop_remove_all(devi);
7843 
7844 	mutex_destroy(&un->un_pm_mutex);
7845 	cv_destroy(&un->un_pm_busy_cv);
7846 
7847 	cv_destroy(&un->un_wcc_cv);
7848 
7849 	/* Open/close semaphore */
7850 	sema_destroy(&un->un_semoclose);
7851 
7852 	/* Removable media condvar. */
7853 	cv_destroy(&un->un_state_cv);
7854 
7855 	/* Suspend/resume condvar. */
7856 	cv_destroy(&un->un_suspend_cv);
7857 	cv_destroy(&un->un_disk_busy_cv);
7858 
7859 	sd_free_rqs(un);
7860 
7861 	/* Free up soft state */
7862 	devp->sd_private = NULL;
7863 
7864 	bzero(un, sizeof (struct sd_lun));
7865 	ddi_soft_state_free(sd_state, instance);
7866 
7867 	mutex_exit(&sd_detach_mutex);
7868 
7869 	/* This frees up the INQUIRY data associated with the device. */
7870 	scsi_unprobe(devp);
7871 
7872 	/*
7873 	 * After successfully detaching an instance, we update the information
7874 	 * of how many luns have been attached in the relative target and
7875 	 * controller for parallel SCSI. This information is used when sd tries
7876 	 * to set the tagged queuing capability in HBA.
7877 	 * Since un has been released, we can't use SD_IS_PARALLEL_SCSI(un) to
7878 	 * check if the device is parallel SCSI. However, we don't need to
7879 	 * check here because we've already checked during attach. No device
7880 	 * that is not parallel SCSI is in the chain.
7881 	 */
7882 	if ((tgt >= 0) && (tgt < NTARGETS_WIDE)) {
7883 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_DETACH);
7884 	}
7885 
7886 	return (DDI_SUCCESS);
7887 
7888 err_notclosed:
7889 	mutex_exit(SD_MUTEX(un));
7890 
7891 err_stillbusy:
7892 	_NOTE(NO_COMPETING_THREADS_NOW);
7893 
7894 err_remove_event:
7895 	mutex_enter(&sd_detach_mutex);
7896 	un->un_detach_count--;
7897 	mutex_exit(&sd_detach_mutex);
7898 
7899 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
7900 	return (DDI_FAILURE);
7901 }
7902 
7903 
7904 /*
7905  *    Function: sd_create_errstats
7906  *
7907  * Description: This routine instantiates the device error stats.
7908  *
7909  *		Note: During attach the stats are instantiated first so they are
7910  *		available for attach-time routines that utilize the driver
7911  *		iopath to send commands to the device. The stats are initialized
7912  *		separately so data obtained during some attach-time routines is
7913  *		available. (4362483)
7914  *
7915  *   Arguments: un - driver soft state (unit) structure
7916  *		instance - driver instance
7917  *
7918  *     Context: Kernel thread context
7919  */
7920 
7921 static void
7922 sd_create_errstats(struct sd_lun *un, int instance)
7923 {
7924 	struct	sd_errstats	*stp;
7925 	char	kstatmodule_err[KSTAT_STRLEN];
7926 	char	kstatname[KSTAT_STRLEN];
7927 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
7928 
7929 	ASSERT(un != NULL);
7930 
7931 	if (un->un_errstats != NULL) {
7932 		return;
7933 	}
7934 
7935 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
7936 	    "%serr", sd_label);
7937 	(void) snprintf(kstatname, sizeof (kstatname),
7938 	    "%s%d,err", sd_label, instance);
7939 
7940 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
7941 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
7942 
7943 	if (un->un_errstats == NULL) {
7944 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7945 		    "sd_create_errstats: Failed kstat_create\n");
7946 		return;
7947 	}
7948 
7949 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
7950 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
7951 	    KSTAT_DATA_UINT32);
7952 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
7953 	    KSTAT_DATA_UINT32);
7954 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
7955 	    KSTAT_DATA_UINT32);
7956 	kstat_named_init(&stp->sd_vid,		"Vendor",
7957 	    KSTAT_DATA_CHAR);
7958 	kstat_named_init(&stp->sd_pid,		"Product",
7959 	    KSTAT_DATA_CHAR);
7960 	kstat_named_init(&stp->sd_revision,	"Revision",
7961 	    KSTAT_DATA_CHAR);
7962 	kstat_named_init(&stp->sd_serial,	"Serial No",
7963 	    KSTAT_DATA_CHAR);
7964 	kstat_named_init(&stp->sd_capacity,	"Size",
7965 	    KSTAT_DATA_ULONGLONG);
7966 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
7967 	    KSTAT_DATA_UINT32);
7968 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
7969 	    KSTAT_DATA_UINT32);
7970 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
7971 	    KSTAT_DATA_UINT32);
7972 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
7973 	    KSTAT_DATA_UINT32);
7974 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
7975 	    KSTAT_DATA_UINT32);
7976 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
7977 	    KSTAT_DATA_UINT32);
7978 
7979 	un->un_errstats->ks_private = un;
7980 	un->un_errstats->ks_update  = nulldev;
7981 
7982 	kstat_install(un->un_errstats);
7983 }
7984 
7985 
7986 /*
7987  *    Function: sd_set_errstats
7988  *
7989  * Description: This routine sets the value of the vendor id, product id,
7990  *		revision, serial number, and capacity device error stats.
7991  *
7992  *		Note: During attach the stats are instantiated first so they are
7993  *		available for attach-time routines that utilize the driver
7994  *		iopath to send commands to the device. The stats are initialized
7995  *		separately so data obtained during some attach-time routines is
7996  *		available. (4362483)
7997  *
7998  *   Arguments: un - driver soft state (unit) structure
7999  *
8000  *     Context: Kernel thread context
8001  */
8002 
8003 static void
8004 sd_set_errstats(struct sd_lun *un)
8005 {
8006 	struct	sd_errstats	*stp;
8007 
8008 	ASSERT(un != NULL);
8009 	ASSERT(un->un_errstats != NULL);
8010 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
8011 	ASSERT(stp != NULL);
8012 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
8013 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
8014 	(void) strncpy(stp->sd_revision.value.c,
8015 	    un->un_sd->sd_inq->inq_revision, 4);
8016 
8017 	/*
8018 	 * All the errstats are persistent across detach/attach,
8019 	 * so reset all the errstats here in case of the hot
8020 	 * replacement of disk drives, except for not changed
8021 	 * Sun qualified drives.
8022 	 */
8023 	if ((bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) != 0) ||
8024 	    (bcmp(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
8025 	    sizeof (SD_INQUIRY(un)->inq_serial)) != 0)) {
8026 		stp->sd_softerrs.value.ui32 = 0;
8027 		stp->sd_harderrs.value.ui32 = 0;
8028 		stp->sd_transerrs.value.ui32 = 0;
8029 		stp->sd_rq_media_err.value.ui32 = 0;
8030 		stp->sd_rq_ntrdy_err.value.ui32 = 0;
8031 		stp->sd_rq_nodev_err.value.ui32 = 0;
8032 		stp->sd_rq_recov_err.value.ui32 = 0;
8033 		stp->sd_rq_illrq_err.value.ui32 = 0;
8034 		stp->sd_rq_pfa_err.value.ui32 = 0;
8035 	}
8036 
8037 	/*
8038 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
8039 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
8040 	 * (4376302))
8041 	 */
8042 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
8043 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
8044 		    sizeof (SD_INQUIRY(un)->inq_serial));
8045 	}
8046 
8047 	if (un->un_f_blockcount_is_valid != TRUE) {
8048 		/*
8049 		 * Set capacity error stat to 0 for no media. This ensures
8050 		 * a valid capacity is displayed in response to 'iostat -E'
8051 		 * when no media is present in the device.
8052 		 */
8053 		stp->sd_capacity.value.ui64 = 0;
8054 	} else {
8055 		/*
8056 		 * Multiply un_blockcount by un->un_sys_blocksize to get
8057 		 * capacity.
8058 		 *
8059 		 * Note: for non-512 blocksize devices "un_blockcount" has been
8060 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
8061 		 * (un_tgt_blocksize / un->un_sys_blocksize).
8062 		 */
8063 		stp->sd_capacity.value.ui64 = (uint64_t)
8064 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
8065 	}
8066 }
8067 
8068 
8069 /*
8070  *    Function: sd_set_pstats
8071  *
8072  * Description: This routine instantiates and initializes the partition
8073  *              stats for each partition with more than zero blocks.
8074  *		(4363169)
8075  *
8076  *   Arguments: un - driver soft state (unit) structure
8077  *
8078  *     Context: Kernel thread context
8079  */
8080 
8081 static void
8082 sd_set_pstats(struct sd_lun *un)
8083 {
8084 	char	kstatname[KSTAT_STRLEN];
8085 	int	instance;
8086 	int	i;
8087 	diskaddr_t	nblks = 0;
8088 	char	*partname = NULL;
8089 
8090 	ASSERT(un != NULL);
8091 
8092 	instance = ddi_get_instance(SD_DEVINFO(un));
8093 
8094 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
8095 	for (i = 0; i < NSDMAP; i++) {
8096 
8097 		if (cmlb_partinfo(un->un_cmlbhandle, i,
8098 		    &nblks, NULL, &partname, NULL, (void *)SD_PATH_DIRECT) != 0)
8099 			continue;
8100 		mutex_enter(SD_MUTEX(un));
8101 
8102 		if ((un->un_pstats[i] == NULL) &&
8103 		    (nblks != 0)) {
8104 
8105 			(void) snprintf(kstatname, sizeof (kstatname),
8106 			    "%s%d,%s", sd_label, instance,
8107 			    partname);
8108 
8109 			un->un_pstats[i] = kstat_create(sd_label,
8110 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
8111 			    1, KSTAT_FLAG_PERSISTENT);
8112 			if (un->un_pstats[i] != NULL) {
8113 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
8114 				kstat_install(un->un_pstats[i]);
8115 			}
8116 		}
8117 		mutex_exit(SD_MUTEX(un));
8118 	}
8119 }
8120 
8121 
8122 #if (defined(__fibre))
8123 /*
8124  *    Function: sd_init_event_callbacks
8125  *
8126  * Description: This routine initializes the insertion and removal event
8127  *		callbacks. (fibre only)
8128  *
8129  *   Arguments: un - driver soft state (unit) structure
8130  *
8131  *     Context: Kernel thread context
8132  */
8133 
8134 static void
8135 sd_init_event_callbacks(struct sd_lun *un)
8136 {
8137 	ASSERT(un != NULL);
8138 
8139 	if ((un->un_insert_event == NULL) &&
8140 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
8141 	    &un->un_insert_event) == DDI_SUCCESS)) {
8142 		/*
8143 		 * Add the callback for an insertion event
8144 		 */
8145 		(void) ddi_add_event_handler(SD_DEVINFO(un),
8146 		    un->un_insert_event, sd_event_callback, (void *)un,
8147 		    &(un->un_insert_cb_id));
8148 	}
8149 
8150 	if ((un->un_remove_event == NULL) &&
8151 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
8152 	    &un->un_remove_event) == DDI_SUCCESS)) {
8153 		/*
8154 		 * Add the callback for a removal event
8155 		 */
8156 		(void) ddi_add_event_handler(SD_DEVINFO(un),
8157 		    un->un_remove_event, sd_event_callback, (void *)un,
8158 		    &(un->un_remove_cb_id));
8159 	}
8160 }
8161 
8162 
8163 /*
8164  *    Function: sd_event_callback
8165  *
8166  * Description: This routine handles insert/remove events (photon). The
8167  *		state is changed to OFFLINE which can be used to supress
8168  *		error msgs. (fibre only)
8169  *
8170  *   Arguments: un - driver soft state (unit) structure
8171  *
8172  *     Context: Callout thread context
8173  */
8174 /* ARGSUSED */
8175 static void
8176 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
8177     void *bus_impldata)
8178 {
8179 	struct sd_lun *un = (struct sd_lun *)arg;
8180 
8181 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
8182 	if (event == un->un_insert_event) {
8183 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
8184 		mutex_enter(SD_MUTEX(un));
8185 		if (un->un_state == SD_STATE_OFFLINE) {
8186 			if (un->un_last_state != SD_STATE_SUSPENDED) {
8187 				un->un_state = un->un_last_state;
8188 			} else {
8189 				/*
8190 				 * We have gone through SUSPEND/RESUME while
8191 				 * we were offline. Restore the last state
8192 				 */
8193 				un->un_state = un->un_save_state;
8194 			}
8195 		}
8196 		mutex_exit(SD_MUTEX(un));
8197 
8198 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
8199 	} else if (event == un->un_remove_event) {
8200 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
8201 		mutex_enter(SD_MUTEX(un));
8202 		/*
8203 		 * We need to handle an event callback that occurs during
8204 		 * the suspend operation, since we don't prevent it.
8205 		 */
8206 		if (un->un_state != SD_STATE_OFFLINE) {
8207 			if (un->un_state != SD_STATE_SUSPENDED) {
8208 				New_state(un, SD_STATE_OFFLINE);
8209 			} else {
8210 				un->un_last_state = SD_STATE_OFFLINE;
8211 			}
8212 		}
8213 		mutex_exit(SD_MUTEX(un));
8214 	} else {
8215 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
8216 		    "!Unknown event\n");
8217 	}
8218 
8219 }
8220 #endif
8221 
8222 /*
8223  *    Function: sd_cache_control()
8224  *
8225  * Description: This routine is the driver entry point for setting
8226  *		read and write caching by modifying the WCE (write cache
8227  *		enable) and RCD (read cache disable) bits of mode
8228  *		page 8 (MODEPAGE_CACHING).
8229  *
8230  *   Arguments: un - driver soft state (unit) structure
8231  *		rcd_flag - flag for controlling the read cache
8232  *		wce_flag - flag for controlling the write cache
8233  *
8234  * Return Code: EIO
8235  *		code returned by sd_send_scsi_MODE_SENSE and
8236  *		sd_send_scsi_MODE_SELECT
8237  *
8238  *     Context: Kernel Thread
8239  */
8240 
8241 static int
8242 sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag)
8243 {
8244 	struct mode_caching	*mode_caching_page;
8245 	uchar_t			*header;
8246 	size_t			buflen;
8247 	int			hdrlen;
8248 	int			bd_len;
8249 	int			rval = 0;
8250 	struct mode_header_grp2	*mhp;
8251 
8252 	ASSERT(un != NULL);
8253 
8254 	/*
8255 	 * Do a test unit ready, otherwise a mode sense may not work if this
8256 	 * is the first command sent to the device after boot.
8257 	 */
8258 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
8259 
8260 	if (un->un_f_cfg_is_atapi == TRUE) {
8261 		hdrlen = MODE_HEADER_LENGTH_GRP2;
8262 	} else {
8263 		hdrlen = MODE_HEADER_LENGTH;
8264 	}
8265 
8266 	/*
8267 	 * Allocate memory for the retrieved mode page and its headers.  Set
8268 	 * a pointer to the page itself.  Use mode_cache_scsi3 to insure
8269 	 * we get all of the mode sense data otherwise, the mode select
8270 	 * will fail.  mode_cache_scsi3 is a superset of mode_caching.
8271 	 */
8272 	buflen = hdrlen + MODE_BLK_DESC_LENGTH +
8273 		sizeof (struct mode_cache_scsi3);
8274 
8275 	header = kmem_zalloc(buflen, KM_SLEEP);
8276 
8277 	/* Get the information from the device. */
8278 	if (un->un_f_cfg_is_atapi == TRUE) {
8279 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
8280 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8281 	} else {
8282 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
8283 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8284 	}
8285 	if (rval != 0) {
8286 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
8287 		    "sd_cache_control: Mode Sense Failed\n");
8288 		kmem_free(header, buflen);
8289 		return (rval);
8290 	}
8291 
8292 	/*
8293 	 * Determine size of Block Descriptors in order to locate
8294 	 * the mode page data. ATAPI devices return 0, SCSI devices
8295 	 * should return MODE_BLK_DESC_LENGTH.
8296 	 */
8297 	if (un->un_f_cfg_is_atapi == TRUE) {
8298 		mhp	= (struct mode_header_grp2 *)header;
8299 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
8300 	} else {
8301 		bd_len  = ((struct mode_header *)header)->bdesc_length;
8302 	}
8303 
8304 	if (bd_len > MODE_BLK_DESC_LENGTH) {
8305 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
8306 		    "sd_cache_control: Mode Sense returned invalid "
8307 		    "block descriptor length\n");
8308 		kmem_free(header, buflen);
8309 		return (EIO);
8310 	}
8311 
8312 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
8313 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
8314 		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
8315 		    " caching page code mismatch %d\n",
8316 		    mode_caching_page->mode_page.code);
8317 		kmem_free(header, buflen);
8318 		return (EIO);
8319 	}
8320 
8321 	/* Check the relevant bits on successful mode sense. */
8322 	if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
8323 	    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
8324 	    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
8325 	    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
8326 
8327 		size_t sbuflen;
8328 		uchar_t save_pg;
8329 
8330 		/*
8331 		 * Construct select buffer length based on the
8332 		 * length of the sense data returned.
8333 		 */
8334 		sbuflen =  hdrlen + MODE_BLK_DESC_LENGTH +
8335 				sizeof (struct mode_page) +
8336 				(int)mode_caching_page->mode_page.length;
8337 
8338 		/*
8339 		 * Set the caching bits as requested.
8340 		 */
8341 		if (rcd_flag == SD_CACHE_ENABLE)
8342 			mode_caching_page->rcd = 0;
8343 		else if (rcd_flag == SD_CACHE_DISABLE)
8344 			mode_caching_page->rcd = 1;
8345 
8346 		if (wce_flag == SD_CACHE_ENABLE)
8347 			mode_caching_page->wce = 1;
8348 		else if (wce_flag == SD_CACHE_DISABLE)
8349 			mode_caching_page->wce = 0;
8350 
8351 		/*
8352 		 * Save the page if the mode sense says the
8353 		 * drive supports it.
8354 		 */
8355 		save_pg = mode_caching_page->mode_page.ps ?
8356 				SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
8357 
8358 		/* Clear reserved bits before mode select. */
8359 		mode_caching_page->mode_page.ps = 0;
8360 
8361 		/*
8362 		 * Clear out mode header for mode select.
8363 		 * The rest of the retrieved page will be reused.
8364 		 */
8365 		bzero(header, hdrlen);
8366 
8367 		if (un->un_f_cfg_is_atapi == TRUE) {
8368 			mhp = (struct mode_header_grp2 *)header;
8369 			mhp->bdesc_length_hi = bd_len >> 8;
8370 			mhp->bdesc_length_lo = (uchar_t)bd_len & 0xff;
8371 		} else {
8372 			((struct mode_header *)header)->bdesc_length = bd_len;
8373 		}
8374 
8375 		/* Issue mode select to change the cache settings */
8376 		if (un->un_f_cfg_is_atapi == TRUE) {
8377 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, header,
8378 			    sbuflen, save_pg, SD_PATH_DIRECT);
8379 		} else {
8380 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
8381 			    sbuflen, save_pg, SD_PATH_DIRECT);
8382 		}
8383 	}
8384 
8385 	kmem_free(header, buflen);
8386 	return (rval);
8387 }
8388 
8389 
8390 /*
8391  *    Function: sd_get_write_cache_enabled()
8392  *
8393  * Description: This routine is the driver entry point for determining if
8394  *		write caching is enabled.  It examines the WCE (write cache
8395  *		enable) bits of mode page 8 (MODEPAGE_CACHING).
8396  *
8397  *   Arguments: un - driver soft state (unit) structure
8398  *		is_enabled - pointer to int where write cache enabled state
8399  *		is returned (non-zero -> write cache enabled)
8400  *
8401  *
8402  * Return Code: EIO
8403  *		code returned by sd_send_scsi_MODE_SENSE
8404  *
8405  *     Context: Kernel Thread
8406  *
8407  * NOTE: If ioctl is added to disable write cache, this sequence should
8408  * be followed so that no locking is required for accesses to
8409  * un->un_f_write_cache_enabled:
8410  * 	do mode select to clear wce
8411  * 	do synchronize cache to flush cache
8412  * 	set un->un_f_write_cache_enabled = FALSE
8413  *
8414  * Conversely, an ioctl to enable the write cache should be done
8415  * in this order:
8416  * 	set un->un_f_write_cache_enabled = TRUE
8417  * 	do mode select to set wce
8418  */
8419 
8420 static int
8421 sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled)
8422 {
8423 	struct mode_caching	*mode_caching_page;
8424 	uchar_t			*header;
8425 	size_t			buflen;
8426 	int			hdrlen;
8427 	int			bd_len;
8428 	int			rval = 0;
8429 
8430 	ASSERT(un != NULL);
8431 	ASSERT(is_enabled != NULL);
8432 
8433 	/* in case of error, flag as enabled */
8434 	*is_enabled = TRUE;
8435 
8436 	/*
8437 	 * Do a test unit ready, otherwise a mode sense may not work if this
8438 	 * is the first command sent to the device after boot.
8439 	 */
8440 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
8441 
8442 	if (un->un_f_cfg_is_atapi == TRUE) {
8443 		hdrlen = MODE_HEADER_LENGTH_GRP2;
8444 	} else {
8445 		hdrlen = MODE_HEADER_LENGTH;
8446 	}
8447 
8448 	/*
8449 	 * Allocate memory for the retrieved mode page and its headers.  Set
8450 	 * a pointer to the page itself.
8451 	 */
8452 	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
8453 	header = kmem_zalloc(buflen, KM_SLEEP);
8454 
8455 	/* Get the information from the device. */
8456 	if (un->un_f_cfg_is_atapi == TRUE) {
8457 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
8458 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8459 	} else {
8460 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
8461 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8462 	}
8463 	if (rval != 0) {
8464 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
8465 		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
8466 		kmem_free(header, buflen);
8467 		return (rval);
8468 	}
8469 
8470 	/*
8471 	 * Determine size of Block Descriptors in order to locate
8472 	 * the mode page data. ATAPI devices return 0, SCSI devices
8473 	 * should return MODE_BLK_DESC_LENGTH.
8474 	 */
8475 	if (un->un_f_cfg_is_atapi == TRUE) {
8476 		struct mode_header_grp2	*mhp;
8477 		mhp	= (struct mode_header_grp2 *)header;
8478 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
8479 	} else {
8480 		bd_len  = ((struct mode_header *)header)->bdesc_length;
8481 	}
8482 
8483 	if (bd_len > MODE_BLK_DESC_LENGTH) {
8484 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
8485 		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
8486 		    "block descriptor length\n");
8487 		kmem_free(header, buflen);
8488 		return (EIO);
8489 	}
8490 
8491 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
8492 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
8493 		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
8494 		    " caching page code mismatch %d\n",
8495 		    mode_caching_page->mode_page.code);
8496 		kmem_free(header, buflen);
8497 		return (EIO);
8498 	}
8499 	*is_enabled = mode_caching_page->wce;
8500 
8501 	kmem_free(header, buflen);
8502 	return (0);
8503 }
8504 
8505 
8506 /*
8507  *    Function: sd_make_device
8508  *
8509  * Description: Utility routine to return the Solaris device number from
8510  *		the data in the device's dev_info structure.
8511  *
8512  * Return Code: The Solaris device number
8513  *
8514  *     Context: Any
8515  */
8516 
8517 static dev_t
8518 sd_make_device(dev_info_t *devi)
8519 {
8520 	return (makedevice(ddi_name_to_major(ddi_get_name(devi)),
8521 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
8522 }
8523 
8524 
8525 /*
8526  *    Function: sd_pm_entry
8527  *
8528  * Description: Called at the start of a new command to manage power
8529  *		and busy status of a device. This includes determining whether
8530  *		the current power state of the device is sufficient for
8531  *		performing the command or whether it must be changed.
8532  *		The PM framework is notified appropriately.
8533  *		Only with a return status of DDI_SUCCESS will the
8534  *		component be busy to the framework.
8535  *
8536  *		All callers of sd_pm_entry must check the return status
8537  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
8538  *		of DDI_FAILURE indicates the device failed to power up.
8539  *		In this case un_pm_count has been adjusted so the result
8540  *		on exit is still powered down, ie. count is less than 0.
8541  *		Calling sd_pm_exit with this count value hits an ASSERT.
8542  *
8543  * Return Code: DDI_SUCCESS or DDI_FAILURE
8544  *
8545  *     Context: Kernel thread context.
8546  */
8547 
8548 static int
8549 sd_pm_entry(struct sd_lun *un)
8550 {
8551 	int return_status = DDI_SUCCESS;
8552 
8553 	ASSERT(!mutex_owned(SD_MUTEX(un)));
8554 	ASSERT(!mutex_owned(&un->un_pm_mutex));
8555 
8556 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
8557 
8558 	if (un->un_f_pm_is_enabled == FALSE) {
8559 		SD_TRACE(SD_LOG_IO_PM, un,
8560 		    "sd_pm_entry: exiting, PM not enabled\n");
8561 		return (return_status);
8562 	}
8563 
8564 	/*
8565 	 * Just increment a counter if PM is enabled. On the transition from
8566 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
8567 	 * the count with each IO and mark the device as idle when the count
8568 	 * hits 0.
8569 	 *
8570 	 * If the count is less than 0 the device is powered down. If a powered
8571 	 * down device is successfully powered up then the count must be
8572 	 * incremented to reflect the power up. Note that it'll get incremented
8573 	 * a second time to become busy.
8574 	 *
8575 	 * Because the following has the potential to change the device state
8576 	 * and must release the un_pm_mutex to do so, only one thread can be
8577 	 * allowed through at a time.
8578 	 */
8579 
8580 	mutex_enter(&un->un_pm_mutex);
8581 	while (un->un_pm_busy == TRUE) {
8582 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
8583 	}
8584 	un->un_pm_busy = TRUE;
8585 
8586 	if (un->un_pm_count < 1) {
8587 
8588 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
8589 
8590 		/*
8591 		 * Indicate we are now busy so the framework won't attempt to
8592 		 * power down the device. This call will only fail if either
8593 		 * we passed a bad component number or the device has no
8594 		 * components. Neither of these should ever happen.
8595 		 */
8596 		mutex_exit(&un->un_pm_mutex);
8597 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
8598 		ASSERT(return_status == DDI_SUCCESS);
8599 
8600 		mutex_enter(&un->un_pm_mutex);
8601 
8602 		if (un->un_pm_count < 0) {
8603 			mutex_exit(&un->un_pm_mutex);
8604 
8605 			SD_TRACE(SD_LOG_IO_PM, un,
8606 			    "sd_pm_entry: power up component\n");
8607 
8608 			/*
8609 			 * pm_raise_power will cause sdpower to be called
8610 			 * which brings the device power level to the
8611 			 * desired state, ON in this case. If successful,
8612 			 * un_pm_count and un_power_level will be updated
8613 			 * appropriately.
8614 			 */
8615 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
8616 			    SD_SPINDLE_ON);
8617 
8618 			mutex_enter(&un->un_pm_mutex);
8619 
8620 			if (return_status != DDI_SUCCESS) {
8621 				/*
8622 				 * Power up failed.
8623 				 * Idle the device and adjust the count
8624 				 * so the result on exit is that we're
8625 				 * still powered down, ie. count is less than 0.
8626 				 */
8627 				SD_TRACE(SD_LOG_IO_PM, un,
8628 				    "sd_pm_entry: power up failed,"
8629 				    " idle the component\n");
8630 
8631 				(void) pm_idle_component(SD_DEVINFO(un), 0);
8632 				un->un_pm_count--;
8633 			} else {
8634 				/*
8635 				 * Device is powered up, verify the
8636 				 * count is non-negative.
8637 				 * This is debug only.
8638 				 */
8639 				ASSERT(un->un_pm_count == 0);
8640 			}
8641 		}
8642 
8643 		if (return_status == DDI_SUCCESS) {
8644 			/*
8645 			 * For performance, now that the device has been tagged
8646 			 * as busy, and it's known to be powered up, update the
8647 			 * chain types to use jump tables that do not include
8648 			 * pm. This significantly lowers the overhead and
8649 			 * therefore improves performance.
8650 			 */
8651 
8652 			mutex_exit(&un->un_pm_mutex);
8653 			mutex_enter(SD_MUTEX(un));
8654 			SD_TRACE(SD_LOG_IO_PM, un,
8655 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
8656 			    un->un_uscsi_chain_type);
8657 
8658 			if (un->un_f_non_devbsize_supported) {
8659 				un->un_buf_chain_type =
8660 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
8661 			} else {
8662 				un->un_buf_chain_type =
8663 				    SD_CHAIN_INFO_DISK_NO_PM;
8664 			}
8665 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8666 
8667 			SD_TRACE(SD_LOG_IO_PM, un,
8668 			    "             changed  uscsi_chain_type to   %d\n",
8669 			    un->un_uscsi_chain_type);
8670 			mutex_exit(SD_MUTEX(un));
8671 			mutex_enter(&un->un_pm_mutex);
8672 
8673 			if (un->un_pm_idle_timeid == NULL) {
8674 				/* 300 ms. */
8675 				un->un_pm_idle_timeid =
8676 				    timeout(sd_pm_idletimeout_handler, un,
8677 				    (drv_usectohz((clock_t)300000)));
8678 				/*
8679 				 * Include an extra call to busy which keeps the
8680 				 * device busy with-respect-to the PM layer
8681 				 * until the timer fires, at which time it'll
8682 				 * get the extra idle call.
8683 				 */
8684 				(void) pm_busy_component(SD_DEVINFO(un), 0);
8685 			}
8686 		}
8687 	}
8688 	un->un_pm_busy = FALSE;
8689 	/* Next... */
8690 	cv_signal(&un->un_pm_busy_cv);
8691 
8692 	un->un_pm_count++;
8693 
8694 	SD_TRACE(SD_LOG_IO_PM, un,
8695 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
8696 
8697 	mutex_exit(&un->un_pm_mutex);
8698 
8699 	return (return_status);
8700 }
8701 
8702 
8703 /*
8704  *    Function: sd_pm_exit
8705  *
8706  * Description: Called at the completion of a command to manage busy
8707  *		status for the device. If the device becomes idle the
8708  *		PM framework is notified.
8709  *
8710  *     Context: Kernel thread context
8711  */
8712 
8713 static void
8714 sd_pm_exit(struct sd_lun *un)
8715 {
8716 	ASSERT(!mutex_owned(SD_MUTEX(un)));
8717 	ASSERT(!mutex_owned(&un->un_pm_mutex));
8718 
8719 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
8720 
8721 	/*
8722 	 * After attach the following flag is only read, so don't
8723 	 * take the penalty of acquiring a mutex for it.
8724 	 */
8725 	if (un->un_f_pm_is_enabled == TRUE) {
8726 
8727 		mutex_enter(&un->un_pm_mutex);
8728 		un->un_pm_count--;
8729 
8730 		SD_TRACE(SD_LOG_IO_PM, un,
8731 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
8732 
8733 		ASSERT(un->un_pm_count >= 0);
8734 		if (un->un_pm_count == 0) {
8735 			mutex_exit(&un->un_pm_mutex);
8736 
8737 			SD_TRACE(SD_LOG_IO_PM, un,
8738 			    "sd_pm_exit: idle component\n");
8739 
8740 			(void) pm_idle_component(SD_DEVINFO(un), 0);
8741 
8742 		} else {
8743 			mutex_exit(&un->un_pm_mutex);
8744 		}
8745 	}
8746 
8747 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
8748 }
8749 
8750 
8751 /*
8752  *    Function: sdopen
8753  *
8754  * Description: Driver's open(9e) entry point function.
8755  *
8756  *   Arguments: dev_i   - pointer to device number
8757  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
8758  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
8759  *		cred_p  - user credential pointer
8760  *
8761  * Return Code: EINVAL
8762  *		ENXIO
8763  *		EIO
8764  *		EROFS
8765  *		EBUSY
8766  *
8767  *     Context: Kernel thread context
8768  */
8769 /* ARGSUSED */
8770 static int
8771 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
8772 {
8773 	struct sd_lun	*un;
8774 	int		nodelay;
8775 	int		part;
8776 	uint64_t	partmask;
8777 	int		instance;
8778 	dev_t		dev;
8779 	int		rval = EIO;
8780 	diskaddr_t	nblks = 0;
8781 
8782 	/* Validate the open type */
8783 	if (otyp >= OTYPCNT) {
8784 		return (EINVAL);
8785 	}
8786 
8787 	dev = *dev_p;
8788 	instance = SDUNIT(dev);
8789 	mutex_enter(&sd_detach_mutex);
8790 
8791 	/*
8792 	 * Fail the open if there is no softstate for the instance, or
8793 	 * if another thread somewhere is trying to detach the instance.
8794 	 */
8795 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
8796 	    (un->un_detach_count != 0)) {
8797 		mutex_exit(&sd_detach_mutex);
8798 		/*
8799 		 * The probe cache only needs to be cleared when open (9e) fails
8800 		 * with ENXIO (4238046).
8801 		 */
8802 		/*
8803 		 * un-conditionally clearing probe cache is ok with
8804 		 * separate sd/ssd binaries
8805 		 * x86 platform can be an issue with both parallel
8806 		 * and fibre in 1 binary
8807 		 */
8808 		sd_scsi_clear_probe_cache();
8809 		return (ENXIO);
8810 	}
8811 
8812 	/*
8813 	 * The un_layer_count is to prevent another thread in specfs from
8814 	 * trying to detach the instance, which can happen when we are
8815 	 * called from a higher-layer driver instead of thru specfs.
8816 	 * This will not be needed when DDI provides a layered driver
8817 	 * interface that allows specfs to know that an instance is in
8818 	 * use by a layered driver & should not be detached.
8819 	 *
8820 	 * Note: the semantics for layered driver opens are exactly one
8821 	 * close for every open.
8822 	 */
8823 	if (otyp == OTYP_LYR) {
8824 		un->un_layer_count++;
8825 	}
8826 
8827 	/*
8828 	 * Keep a count of the current # of opens in progress. This is because
8829 	 * some layered drivers try to call us as a regular open. This can
8830 	 * cause problems that we cannot prevent, however by keeping this count
8831 	 * we can at least keep our open and detach routines from racing against
8832 	 * each other under such conditions.
8833 	 */
8834 	un->un_opens_in_progress++;
8835 	mutex_exit(&sd_detach_mutex);
8836 
8837 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
8838 	part	 = SDPART(dev);
8839 	partmask = 1 << part;
8840 
8841 	/*
8842 	 * We use a semaphore here in order to serialize
8843 	 * open and close requests on the device.
8844 	 */
8845 	sema_p(&un->un_semoclose);
8846 
8847 	mutex_enter(SD_MUTEX(un));
8848 
8849 	/*
8850 	 * All device accesses go thru sdstrategy() where we check
8851 	 * on suspend status but there could be a scsi_poll command,
8852 	 * which bypasses sdstrategy(), so we need to check pm
8853 	 * status.
8854 	 */
8855 
8856 	if (!nodelay) {
8857 		while ((un->un_state == SD_STATE_SUSPENDED) ||
8858 		    (un->un_state == SD_STATE_PM_CHANGING)) {
8859 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
8860 		}
8861 
8862 		mutex_exit(SD_MUTEX(un));
8863 		if (sd_pm_entry(un) != DDI_SUCCESS) {
8864 			rval = EIO;
8865 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
8866 			    "sdopen: sd_pm_entry failed\n");
8867 			goto open_failed_with_pm;
8868 		}
8869 		mutex_enter(SD_MUTEX(un));
8870 	}
8871 
8872 	/* check for previous exclusive open */
8873 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
8874 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
8875 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
8876 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
8877 
8878 	if (un->un_exclopen & (partmask)) {
8879 		goto excl_open_fail;
8880 	}
8881 
8882 	if (flag & FEXCL) {
8883 		int i;
8884 		if (un->un_ocmap.lyropen[part]) {
8885 			goto excl_open_fail;
8886 		}
8887 		for (i = 0; i < (OTYPCNT - 1); i++) {
8888 			if (un->un_ocmap.regopen[i] & (partmask)) {
8889 				goto excl_open_fail;
8890 			}
8891 		}
8892 	}
8893 
8894 	/*
8895 	 * Check the write permission if this is a removable media device,
8896 	 * NDELAY has not been set, and writable permission is requested.
8897 	 *
8898 	 * Note: If NDELAY was set and this is write-protected media the WRITE
8899 	 * attempt will fail with EIO as part of the I/O processing. This is a
8900 	 * more permissive implementation that allows the open to succeed and
8901 	 * WRITE attempts to fail when appropriate.
8902 	 */
8903 	if (un->un_f_chk_wp_open) {
8904 		if ((flag & FWRITE) && (!nodelay)) {
8905 			mutex_exit(SD_MUTEX(un));
8906 			/*
8907 			 * Defer the check for write permission on writable
8908 			 * DVD drive till sdstrategy and will not fail open even
8909 			 * if FWRITE is set as the device can be writable
8910 			 * depending upon the media and the media can change
8911 			 * after the call to open().
8912 			 */
8913 			if (un->un_f_dvdram_writable_device == FALSE) {
8914 				if (ISCD(un) || sr_check_wp(dev)) {
8915 				rval = EROFS;
8916 				mutex_enter(SD_MUTEX(un));
8917 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
8918 				    "write to cd or write protected media\n");
8919 				goto open_fail;
8920 				}
8921 			}
8922 			mutex_enter(SD_MUTEX(un));
8923 		}
8924 	}
8925 
8926 	/*
8927 	 * If opening in NDELAY/NONBLOCK mode, just return.
8928 	 * Check if disk is ready and has a valid geometry later.
8929 	 */
8930 	if (!nodelay) {
8931 		mutex_exit(SD_MUTEX(un));
8932 		rval = sd_ready_and_valid(un);
8933 		mutex_enter(SD_MUTEX(un));
8934 		/*
8935 		 * Fail if device is not ready or if the number of disk
8936 		 * blocks is zero or negative for non CD devices.
8937 		 */
8938 
8939 		nblks = 0;
8940 
8941 		if (rval == SD_READY_VALID && (!ISCD(un))) {
8942 			/* if cmlb_partinfo fails, nblks remains 0 */
8943 			mutex_exit(SD_MUTEX(un));
8944 			(void) cmlb_partinfo(un->un_cmlbhandle, part, &nblks,
8945 			    NULL, NULL, NULL, (void *)SD_PATH_DIRECT);
8946 			mutex_enter(SD_MUTEX(un));
8947 		}
8948 
8949 		if ((rval != SD_READY_VALID) ||
8950 		    (!ISCD(un) && nblks <= 0)) {
8951 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
8952 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
8953 			    "device not ready or invalid disk block value\n");
8954 			goto open_fail;
8955 		}
8956 #if defined(__i386) || defined(__amd64)
8957 	} else {
8958 		uchar_t *cp;
8959 		/*
8960 		 * x86 requires special nodelay handling, so that p0 is
8961 		 * always defined and accessible.
8962 		 * Invalidate geometry only if device is not already open.
8963 		 */
8964 		cp = &un->un_ocmap.chkd[0];
8965 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
8966 			if (*cp != (uchar_t)0) {
8967 			    break;
8968 			}
8969 			cp++;
8970 		}
8971 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
8972 			mutex_exit(SD_MUTEX(un));
8973 			cmlb_invalidate(un->un_cmlbhandle,
8974 			    (void *)SD_PATH_DIRECT);
8975 			mutex_enter(SD_MUTEX(un));
8976 		}
8977 
8978 #endif
8979 	}
8980 
8981 	if (otyp == OTYP_LYR) {
8982 		un->un_ocmap.lyropen[part]++;
8983 	} else {
8984 		un->un_ocmap.regopen[otyp] |= partmask;
8985 	}
8986 
8987 	/* Set up open and exclusive open flags */
8988 	if (flag & FEXCL) {
8989 		un->un_exclopen |= (partmask);
8990 	}
8991 
8992 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
8993 	    "open of part %d type %d\n", part, otyp);
8994 
8995 	mutex_exit(SD_MUTEX(un));
8996 	if (!nodelay) {
8997 		sd_pm_exit(un);
8998 	}
8999 
9000 	sema_v(&un->un_semoclose);
9001 
9002 	mutex_enter(&sd_detach_mutex);
9003 	un->un_opens_in_progress--;
9004 	mutex_exit(&sd_detach_mutex);
9005 
9006 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
9007 	return (DDI_SUCCESS);
9008 
9009 excl_open_fail:
9010 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
9011 	rval = EBUSY;
9012 
9013 open_fail:
9014 	mutex_exit(SD_MUTEX(un));
9015 
9016 	/*
9017 	 * On a failed open we must exit the pm management.
9018 	 */
9019 	if (!nodelay) {
9020 		sd_pm_exit(un);
9021 	}
9022 open_failed_with_pm:
9023 	sema_v(&un->un_semoclose);
9024 
9025 	mutex_enter(&sd_detach_mutex);
9026 	un->un_opens_in_progress--;
9027 	if (otyp == OTYP_LYR) {
9028 		un->un_layer_count--;
9029 	}
9030 	mutex_exit(&sd_detach_mutex);
9031 
9032 	return (rval);
9033 }
9034 
9035 
9036 /*
9037  *    Function: sdclose
9038  *
9039  * Description: Driver's close(9e) entry point function.
9040  *
9041  *   Arguments: dev    - device number
9042  *		flag   - file status flag, informational only
9043  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
9044  *		cred_p - user credential pointer
9045  *
9046  * Return Code: ENXIO
9047  *
9048  *     Context: Kernel thread context
9049  */
9050 /* ARGSUSED */
9051 static int
9052 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
9053 {
9054 	struct sd_lun	*un;
9055 	uchar_t		*cp;
9056 	int		part;
9057 	int		nodelay;
9058 	int		rval = 0;
9059 
9060 	/* Validate the open type */
9061 	if (otyp >= OTYPCNT) {
9062 		return (ENXIO);
9063 	}
9064 
9065 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9066 		return (ENXIO);
9067 	}
9068 
9069 	part = SDPART(dev);
9070 	nodelay = flag & (FNDELAY | FNONBLOCK);
9071 
9072 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
9073 	    "sdclose: close of part %d type %d\n", part, otyp);
9074 
9075 	/*
9076 	 * We use a semaphore here in order to serialize
9077 	 * open and close requests on the device.
9078 	 */
9079 	sema_p(&un->un_semoclose);
9080 
9081 	mutex_enter(SD_MUTEX(un));
9082 
9083 	/* Don't proceed if power is being changed. */
9084 	while (un->un_state == SD_STATE_PM_CHANGING) {
9085 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9086 	}
9087 
9088 	if (un->un_exclopen & (1 << part)) {
9089 		un->un_exclopen &= ~(1 << part);
9090 	}
9091 
9092 	/* Update the open partition map */
9093 	if (otyp == OTYP_LYR) {
9094 		un->un_ocmap.lyropen[part] -= 1;
9095 	} else {
9096 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
9097 	}
9098 
9099 	cp = &un->un_ocmap.chkd[0];
9100 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
9101 		if (*cp != NULL) {
9102 			break;
9103 		}
9104 		cp++;
9105 	}
9106 
9107 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
9108 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
9109 
9110 		/*
9111 		 * We avoid persistance upon the last close, and set
9112 		 * the throttle back to the maximum.
9113 		 */
9114 		un->un_throttle = un->un_saved_throttle;
9115 
9116 		if (un->un_state == SD_STATE_OFFLINE) {
9117 			if (un->un_f_is_fibre == FALSE) {
9118 				scsi_log(SD_DEVINFO(un), sd_label,
9119 					CE_WARN, "offline\n");
9120 			}
9121 			mutex_exit(SD_MUTEX(un));
9122 			cmlb_invalidate(un->un_cmlbhandle,
9123 			    (void *)SD_PATH_DIRECT);
9124 			mutex_enter(SD_MUTEX(un));
9125 
9126 		} else {
9127 			/*
9128 			 * Flush any outstanding writes in NVRAM cache.
9129 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
9130 			 * cmd, it may not work for non-Pluto devices.
9131 			 * SYNCHRONIZE CACHE is not required for removables,
9132 			 * except DVD-RAM drives.
9133 			 *
9134 			 * Also note: because SYNCHRONIZE CACHE is currently
9135 			 * the only command issued here that requires the
9136 			 * drive be powered up, only do the power up before
9137 			 * sending the Sync Cache command. If additional
9138 			 * commands are added which require a powered up
9139 			 * drive, the following sequence may have to change.
9140 			 *
9141 			 * And finally, note that parallel SCSI on SPARC
9142 			 * only issues a Sync Cache to DVD-RAM, a newly
9143 			 * supported device.
9144 			 */
9145 #if defined(__i386) || defined(__amd64)
9146 			if (un->un_f_sync_cache_supported ||
9147 			    un->un_f_dvdram_writable_device == TRUE) {
9148 #else
9149 			if (un->un_f_dvdram_writable_device == TRUE) {
9150 #endif
9151 				mutex_exit(SD_MUTEX(un));
9152 				if (sd_pm_entry(un) == DDI_SUCCESS) {
9153 					rval =
9154 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
9155 					    NULL);
9156 					/* ignore error if not supported */
9157 					if (rval == ENOTSUP) {
9158 						rval = 0;
9159 					} else if (rval != 0) {
9160 						rval = EIO;
9161 					}
9162 					sd_pm_exit(un);
9163 				} else {
9164 					rval = EIO;
9165 				}
9166 				mutex_enter(SD_MUTEX(un));
9167 			}
9168 
9169 			/*
9170 			 * For devices which supports DOOR_LOCK, send an ALLOW
9171 			 * MEDIA REMOVAL command, but don't get upset if it
9172 			 * fails. We need to raise the power of the drive before
9173 			 * we can call sd_send_scsi_DOORLOCK()
9174 			 */
9175 			if (un->un_f_doorlock_supported) {
9176 				mutex_exit(SD_MUTEX(un));
9177 				if (sd_pm_entry(un) == DDI_SUCCESS) {
9178 					rval = sd_send_scsi_DOORLOCK(un,
9179 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
9180 
9181 					sd_pm_exit(un);
9182 					if (ISCD(un) && (rval != 0) &&
9183 					    (nodelay != 0)) {
9184 						rval = ENXIO;
9185 					}
9186 				} else {
9187 					rval = EIO;
9188 				}
9189 				mutex_enter(SD_MUTEX(un));
9190 			}
9191 
9192 			/*
9193 			 * If a device has removable media, invalidate all
9194 			 * parameters related to media, such as geometry,
9195 			 * blocksize, and blockcount.
9196 			 */
9197 			if (un->un_f_has_removable_media) {
9198 				sr_ejected(un);
9199 			}
9200 
9201 			/*
9202 			 * Destroy the cache (if it exists) which was
9203 			 * allocated for the write maps since this is
9204 			 * the last close for this media.
9205 			 */
9206 			if (un->un_wm_cache) {
9207 				/*
9208 				 * Check if there are pending commands.
9209 				 * and if there are give a warning and
9210 				 * do not destroy the cache.
9211 				 */
9212 				if (un->un_ncmds_in_driver > 0) {
9213 					scsi_log(SD_DEVINFO(un),
9214 					    sd_label, CE_WARN,
9215 					    "Unable to clean up memory "
9216 					    "because of pending I/O\n");
9217 				} else {
9218 					kmem_cache_destroy(
9219 					    un->un_wm_cache);
9220 					un->un_wm_cache = NULL;
9221 				}
9222 			}
9223 		}
9224 	}
9225 
9226 	mutex_exit(SD_MUTEX(un));
9227 	sema_v(&un->un_semoclose);
9228 
9229 	if (otyp == OTYP_LYR) {
9230 		mutex_enter(&sd_detach_mutex);
9231 		/*
9232 		 * The detach routine may run when the layer count
9233 		 * drops to zero.
9234 		 */
9235 		un->un_layer_count--;
9236 		mutex_exit(&sd_detach_mutex);
9237 	}
9238 
9239 	return (rval);
9240 }
9241 
9242 
9243 /*
9244  *    Function: sd_ready_and_valid
9245  *
9246  * Description: Test if device is ready and has a valid geometry.
9247  *
9248  *   Arguments: dev - device number
9249  *		un  - driver soft state (unit) structure
9250  *
9251  * Return Code: SD_READY_VALID		ready and valid label
9252  *		SD_NOT_READY_VALID	not ready, no label
9253  *		SD_RESERVED_BY_OTHERS	reservation conflict
9254  *
9255  *     Context: Never called at interrupt context.
9256  */
9257 
9258 static int
9259 sd_ready_and_valid(struct sd_lun *un)
9260 {
9261 	struct sd_errstats	*stp;
9262 	uint64_t		capacity;
9263 	uint_t			lbasize;
9264 	int			rval = SD_READY_VALID;
9265 	char			name_str[48];
9266 	int			is_valid;
9267 
9268 	ASSERT(un != NULL);
9269 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9270 
9271 	mutex_enter(SD_MUTEX(un));
9272 	/*
9273 	 * If a device has removable media, we must check if media is
9274 	 * ready when checking if this device is ready and valid.
9275 	 */
9276 	if (un->un_f_has_removable_media) {
9277 		mutex_exit(SD_MUTEX(un));
9278 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
9279 			rval = SD_NOT_READY_VALID;
9280 			mutex_enter(SD_MUTEX(un));
9281 			goto done;
9282 		}
9283 
9284 		is_valid = SD_IS_VALID_LABEL(un);
9285 		mutex_enter(SD_MUTEX(un));
9286 		if (!is_valid ||
9287 		    (un->un_f_blockcount_is_valid == FALSE) ||
9288 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
9289 
9290 			/* capacity has to be read every open. */
9291 			mutex_exit(SD_MUTEX(un));
9292 			if (sd_send_scsi_READ_CAPACITY(un, &capacity,
9293 			    &lbasize, SD_PATH_DIRECT) != 0) {
9294 				cmlb_invalidate(un->un_cmlbhandle,
9295 				    (void *)SD_PATH_DIRECT);
9296 				mutex_enter(SD_MUTEX(un));
9297 				rval = SD_NOT_READY_VALID;
9298 				goto done;
9299 			} else {
9300 				mutex_enter(SD_MUTEX(un));
9301 				sd_update_block_info(un, lbasize, capacity);
9302 			}
9303 		}
9304 
9305 		/*
9306 		 * Check if the media in the device is writable or not.
9307 		 */
9308 		if (!is_valid && ISCD(un)) {
9309 			sd_check_for_writable_cd(un, SD_PATH_DIRECT);
9310 		}
9311 
9312 	} else {
9313 		/*
9314 		 * Do a test unit ready to clear any unit attention from non-cd
9315 		 * devices.
9316 		 */
9317 		mutex_exit(SD_MUTEX(un));
9318 		(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9319 		mutex_enter(SD_MUTEX(un));
9320 	}
9321 
9322 
9323 	/*
9324 	 * If this is a non 512 block device, allocate space for
9325 	 * the wmap cache. This is being done here since every time
9326 	 * a media is changed this routine will be called and the
9327 	 * block size is a function of media rather than device.
9328 	 */
9329 	if (un->un_f_non_devbsize_supported && NOT_DEVBSIZE(un)) {
9330 		if (!(un->un_wm_cache)) {
9331 			(void) snprintf(name_str, sizeof (name_str),
9332 			    "%s%d_cache",
9333 			    ddi_driver_name(SD_DEVINFO(un)),
9334 			    ddi_get_instance(SD_DEVINFO(un)));
9335 			un->un_wm_cache = kmem_cache_create(
9336 			    name_str, sizeof (struct sd_w_map),
9337 			    8, sd_wm_cache_constructor,
9338 			    sd_wm_cache_destructor, NULL,
9339 			    (void *)un, NULL, 0);
9340 			if (!(un->un_wm_cache)) {
9341 					rval = ENOMEM;
9342 					goto done;
9343 			}
9344 		}
9345 	}
9346 
9347 	if (un->un_state == SD_STATE_NORMAL) {
9348 		/*
9349 		 * If the target is not yet ready here (defined by a TUR
9350 		 * failure), invalidate the geometry and print an 'offline'
9351 		 * message. This is a legacy message, as the state of the
9352 		 * target is not actually changed to SD_STATE_OFFLINE.
9353 		 *
9354 		 * If the TUR fails for EACCES (Reservation Conflict),
9355 		 * SD_RESERVED_BY_OTHERS will be returned to indicate
9356 		 * reservation conflict. If the TUR fails for other
9357 		 * reasons, SD_NOT_READY_VALID will be returned.
9358 		 */
9359 		int err;
9360 
9361 		mutex_exit(SD_MUTEX(un));
9362 		err = sd_send_scsi_TEST_UNIT_READY(un, 0);
9363 		mutex_enter(SD_MUTEX(un));
9364 
9365 		if (err != 0) {
9366 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9367 			    "offline or reservation conflict\n");
9368 			mutex_exit(SD_MUTEX(un));
9369 			cmlb_invalidate(un->un_cmlbhandle,
9370 			    (void *)SD_PATH_DIRECT);
9371 			mutex_enter(SD_MUTEX(un));
9372 			if (err == EACCES) {
9373 				rval = SD_RESERVED_BY_OTHERS;
9374 			} else {
9375 				rval = SD_NOT_READY_VALID;
9376 			}
9377 			goto done;
9378 		}
9379 	}
9380 
9381 	if (un->un_f_format_in_progress == FALSE) {
9382 		mutex_exit(SD_MUTEX(un));
9383 		if (cmlb_validate(un->un_cmlbhandle, 0,
9384 		    (void *)SD_PATH_DIRECT) != 0) {
9385 			rval = SD_NOT_READY_VALID;
9386 			mutex_enter(SD_MUTEX(un));
9387 			goto done;
9388 		}
9389 		if (un->un_f_pkstats_enabled) {
9390 			sd_set_pstats(un);
9391 			SD_TRACE(SD_LOG_IO_PARTITION, un,
9392 			    "sd_ready_and_valid: un:0x%p pstats created and "
9393 			    "set\n", un);
9394 		}
9395 		mutex_enter(SD_MUTEX(un));
9396 	}
9397 
9398 	/*
9399 	 * If this device supports DOOR_LOCK command, try and send
9400 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
9401 	 * if it fails. For a CD, however, it is an error
9402 	 */
9403 	if (un->un_f_doorlock_supported) {
9404 		mutex_exit(SD_MUTEX(un));
9405 		if ((sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
9406 		    SD_PATH_DIRECT) != 0) && ISCD(un)) {
9407 			rval = SD_NOT_READY_VALID;
9408 			mutex_enter(SD_MUTEX(un));
9409 			goto done;
9410 		}
9411 		mutex_enter(SD_MUTEX(un));
9412 	}
9413 
9414 	/* The state has changed, inform the media watch routines */
9415 	un->un_mediastate = DKIO_INSERTED;
9416 	cv_broadcast(&un->un_state_cv);
9417 	rval = SD_READY_VALID;
9418 
9419 done:
9420 
9421 	/*
9422 	 * Initialize the capacity kstat value, if no media previously
9423 	 * (capacity kstat is 0) and a media has been inserted
9424 	 * (un_blockcount > 0).
9425 	 */
9426 	if (un->un_errstats != NULL) {
9427 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
9428 		if ((stp->sd_capacity.value.ui64 == 0) &&
9429 		    (un->un_f_blockcount_is_valid == TRUE)) {
9430 			stp->sd_capacity.value.ui64 =
9431 			    (uint64_t)((uint64_t)un->un_blockcount *
9432 			    un->un_sys_blocksize);
9433 		}
9434 	}
9435 
9436 	mutex_exit(SD_MUTEX(un));
9437 	return (rval);
9438 }
9439 
9440 
9441 /*
9442  *    Function: sdmin
9443  *
9444  * Description: Routine to limit the size of a data transfer. Used in
9445  *		conjunction with physio(9F).
9446  *
9447  *   Arguments: bp - pointer to the indicated buf(9S) struct.
9448  *
9449  *     Context: Kernel thread context.
9450  */
9451 
9452 static void
9453 sdmin(struct buf *bp)
9454 {
9455 	struct sd_lun	*un;
9456 	int		instance;
9457 
9458 	instance = SDUNIT(bp->b_edev);
9459 
9460 	un = ddi_get_soft_state(sd_state, instance);
9461 	ASSERT(un != NULL);
9462 
9463 	if (bp->b_bcount > un->un_max_xfer_size) {
9464 		bp->b_bcount = un->un_max_xfer_size;
9465 	}
9466 }
9467 
9468 
9469 /*
9470  *    Function: sdread
9471  *
9472  * Description: Driver's read(9e) entry point function.
9473  *
9474  *   Arguments: dev   - device number
9475  *		uio   - structure pointer describing where data is to be stored
9476  *			in user's space
9477  *		cred_p  - user credential pointer
9478  *
9479  * Return Code: ENXIO
9480  *		EIO
9481  *		EINVAL
9482  *		value returned by physio
9483  *
9484  *     Context: Kernel thread context.
9485  */
9486 /* ARGSUSED */
9487 static int
9488 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
9489 {
9490 	struct sd_lun	*un = NULL;
9491 	int		secmask;
9492 	int		err;
9493 
9494 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9495 		return (ENXIO);
9496 	}
9497 
9498 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9499 
9500 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9501 		mutex_enter(SD_MUTEX(un));
9502 		/*
9503 		 * Because the call to sd_ready_and_valid will issue I/O we
9504 		 * must wait here if either the device is suspended or
9505 		 * if it's power level is changing.
9506 		 */
9507 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9508 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9509 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9510 		}
9511 		un->un_ncmds_in_driver++;
9512 		mutex_exit(SD_MUTEX(un));
9513 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9514 			mutex_enter(SD_MUTEX(un));
9515 			un->un_ncmds_in_driver--;
9516 			ASSERT(un->un_ncmds_in_driver >= 0);
9517 			mutex_exit(SD_MUTEX(un));
9518 			return (EIO);
9519 		}
9520 		mutex_enter(SD_MUTEX(un));
9521 		un->un_ncmds_in_driver--;
9522 		ASSERT(un->un_ncmds_in_driver >= 0);
9523 		mutex_exit(SD_MUTEX(un));
9524 	}
9525 
9526 	/*
9527 	 * Read requests are restricted to multiples of the system block size.
9528 	 */
9529 	secmask = un->un_sys_blocksize - 1;
9530 
9531 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9532 		SD_ERROR(SD_LOG_READ_WRITE, un,
9533 		    "sdread: file offset not modulo %d\n",
9534 		    un->un_sys_blocksize);
9535 		err = EINVAL;
9536 	} else if (uio->uio_iov->iov_len & (secmask)) {
9537 		SD_ERROR(SD_LOG_READ_WRITE, un,
9538 		    "sdread: transfer length not modulo %d\n",
9539 		    un->un_sys_blocksize);
9540 		err = EINVAL;
9541 	} else {
9542 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
9543 	}
9544 	return (err);
9545 }
9546 
9547 
9548 /*
9549  *    Function: sdwrite
9550  *
9551  * Description: Driver's write(9e) entry point function.
9552  *
9553  *   Arguments: dev   - device number
9554  *		uio   - structure pointer describing where data is stored in
9555  *			user's space
9556  *		cred_p  - user credential pointer
9557  *
9558  * Return Code: ENXIO
9559  *		EIO
9560  *		EINVAL
9561  *		value returned by physio
9562  *
9563  *     Context: Kernel thread context.
9564  */
9565 /* ARGSUSED */
9566 static int
9567 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
9568 {
9569 	struct sd_lun	*un = NULL;
9570 	int		secmask;
9571 	int		err;
9572 
9573 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9574 		return (ENXIO);
9575 	}
9576 
9577 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9578 
9579 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9580 		mutex_enter(SD_MUTEX(un));
9581 		/*
9582 		 * Because the call to sd_ready_and_valid will issue I/O we
9583 		 * must wait here if either the device is suspended or
9584 		 * if it's power level is changing.
9585 		 */
9586 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9587 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9588 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9589 		}
9590 		un->un_ncmds_in_driver++;
9591 		mutex_exit(SD_MUTEX(un));
9592 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9593 			mutex_enter(SD_MUTEX(un));
9594 			un->un_ncmds_in_driver--;
9595 			ASSERT(un->un_ncmds_in_driver >= 0);
9596 			mutex_exit(SD_MUTEX(un));
9597 			return (EIO);
9598 		}
9599 		mutex_enter(SD_MUTEX(un));
9600 		un->un_ncmds_in_driver--;
9601 		ASSERT(un->un_ncmds_in_driver >= 0);
9602 		mutex_exit(SD_MUTEX(un));
9603 	}
9604 
9605 	/*
9606 	 * Write requests are restricted to multiples of the system block size.
9607 	 */
9608 	secmask = un->un_sys_blocksize - 1;
9609 
9610 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9611 		SD_ERROR(SD_LOG_READ_WRITE, un,
9612 		    "sdwrite: file offset not modulo %d\n",
9613 		    un->un_sys_blocksize);
9614 		err = EINVAL;
9615 	} else if (uio->uio_iov->iov_len & (secmask)) {
9616 		SD_ERROR(SD_LOG_READ_WRITE, un,
9617 		    "sdwrite: transfer length not modulo %d\n",
9618 		    un->un_sys_blocksize);
9619 		err = EINVAL;
9620 	} else {
9621 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
9622 	}
9623 	return (err);
9624 }
9625 
9626 
9627 /*
9628  *    Function: sdaread
9629  *
9630  * Description: Driver's aread(9e) entry point function.
9631  *
9632  *   Arguments: dev   - device number
9633  *		aio   - structure pointer describing where data is to be stored
9634  *		cred_p  - user credential pointer
9635  *
9636  * Return Code: ENXIO
9637  *		EIO
9638  *		EINVAL
9639  *		value returned by aphysio
9640  *
9641  *     Context: Kernel thread context.
9642  */
9643 /* ARGSUSED */
9644 static int
9645 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
9646 {
9647 	struct sd_lun	*un = NULL;
9648 	struct uio	*uio = aio->aio_uio;
9649 	int		secmask;
9650 	int		err;
9651 
9652 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9653 		return (ENXIO);
9654 	}
9655 
9656 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9657 
9658 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9659 		mutex_enter(SD_MUTEX(un));
9660 		/*
9661 		 * Because the call to sd_ready_and_valid will issue I/O we
9662 		 * must wait here if either the device is suspended or
9663 		 * if it's power level is changing.
9664 		 */
9665 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9666 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9667 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9668 		}
9669 		un->un_ncmds_in_driver++;
9670 		mutex_exit(SD_MUTEX(un));
9671 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9672 			mutex_enter(SD_MUTEX(un));
9673 			un->un_ncmds_in_driver--;
9674 			ASSERT(un->un_ncmds_in_driver >= 0);
9675 			mutex_exit(SD_MUTEX(un));
9676 			return (EIO);
9677 		}
9678 		mutex_enter(SD_MUTEX(un));
9679 		un->un_ncmds_in_driver--;
9680 		ASSERT(un->un_ncmds_in_driver >= 0);
9681 		mutex_exit(SD_MUTEX(un));
9682 	}
9683 
9684 	/*
9685 	 * Read requests are restricted to multiples of the system block size.
9686 	 */
9687 	secmask = un->un_sys_blocksize - 1;
9688 
9689 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9690 		SD_ERROR(SD_LOG_READ_WRITE, un,
9691 		    "sdaread: file offset not modulo %d\n",
9692 		    un->un_sys_blocksize);
9693 		err = EINVAL;
9694 	} else if (uio->uio_iov->iov_len & (secmask)) {
9695 		SD_ERROR(SD_LOG_READ_WRITE, un,
9696 		    "sdaread: transfer length not modulo %d\n",
9697 		    un->un_sys_blocksize);
9698 		err = EINVAL;
9699 	} else {
9700 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
9701 	}
9702 	return (err);
9703 }
9704 
9705 
9706 /*
9707  *    Function: sdawrite
9708  *
9709  * Description: Driver's awrite(9e) entry point function.
9710  *
9711  *   Arguments: dev   - device number
9712  *		aio   - structure pointer describing where data is stored
9713  *		cred_p  - user credential pointer
9714  *
9715  * Return Code: ENXIO
9716  *		EIO
9717  *		EINVAL
9718  *		value returned by aphysio
9719  *
9720  *     Context: Kernel thread context.
9721  */
9722 /* ARGSUSED */
9723 static int
9724 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
9725 {
9726 	struct sd_lun	*un = NULL;
9727 	struct uio	*uio = aio->aio_uio;
9728 	int		secmask;
9729 	int		err;
9730 
9731 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9732 		return (ENXIO);
9733 	}
9734 
9735 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9736 
9737 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9738 		mutex_enter(SD_MUTEX(un));
9739 		/*
9740 		 * Because the call to sd_ready_and_valid will issue I/O we
9741 		 * must wait here if either the device is suspended or
9742 		 * if it's power level is changing.
9743 		 */
9744 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9745 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9746 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9747 		}
9748 		un->un_ncmds_in_driver++;
9749 		mutex_exit(SD_MUTEX(un));
9750 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9751 			mutex_enter(SD_MUTEX(un));
9752 			un->un_ncmds_in_driver--;
9753 			ASSERT(un->un_ncmds_in_driver >= 0);
9754 			mutex_exit(SD_MUTEX(un));
9755 			return (EIO);
9756 		}
9757 		mutex_enter(SD_MUTEX(un));
9758 		un->un_ncmds_in_driver--;
9759 		ASSERT(un->un_ncmds_in_driver >= 0);
9760 		mutex_exit(SD_MUTEX(un));
9761 	}
9762 
9763 	/*
9764 	 * Write requests are restricted to multiples of the system block size.
9765 	 */
9766 	secmask = un->un_sys_blocksize - 1;
9767 
9768 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9769 		SD_ERROR(SD_LOG_READ_WRITE, un,
9770 		    "sdawrite: file offset not modulo %d\n",
9771 		    un->un_sys_blocksize);
9772 		err = EINVAL;
9773 	} else if (uio->uio_iov->iov_len & (secmask)) {
9774 		SD_ERROR(SD_LOG_READ_WRITE, un,
9775 		    "sdawrite: transfer length not modulo %d\n",
9776 		    un->un_sys_blocksize);
9777 		err = EINVAL;
9778 	} else {
9779 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
9780 	}
9781 	return (err);
9782 }
9783 
9784 
9785 
9786 
9787 
9788 /*
9789  * Driver IO processing follows the following sequence:
9790  *
9791  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
9792  *         |                |                     ^
9793  *         v                v                     |
9794  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
9795  *         |                |                     |                   |
9796  *         v                |                     |                   |
9797  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
9798  *         |                |                     ^                   ^
9799  *         v                v                     |                   |
9800  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
9801  *         |                |                     |                   |
9802  *     +---+                |                     +------------+      +-------+
9803  *     |                    |                                  |              |
9804  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
9805  *     |                    v                                  |              |
9806  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
9807  *     |                    |                                  ^              |
9808  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
9809  *     |                    v                                  |              |
9810  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
9811  *     |                    |                                  ^              |
9812  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
9813  *     |                    v                                  |              |
9814  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
9815  *     |                    |                                  ^              |
9816  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
9817  *     |                    v                                  |              |
9818  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
9819  *     |                    |                                  ^              |
9820  *     |                    |                                  |              |
9821  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
9822  *                          |                           ^
9823  *                          v                           |
9824  *                   sd_core_iostart()                  |
9825  *                          |                           |
9826  *                          |                           +------>(*destroypkt)()
9827  *                          +-> sd_start_cmds() <-+     |           |
9828  *                          |                     |     |           v
9829  *                          |                     |     |  scsi_destroy_pkt(9F)
9830  *                          |                     |     |
9831  *                          +->(*initpkt)()       +- sdintr()
9832  *                          |  |                        |  |
9833  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
9834  *                          |  +-> scsi_setup_cdb(9F)   |
9835  *                          |                           |
9836  *                          +--> scsi_transport(9F)     |
9837  *                                     |                |
9838  *                                     +----> SCSA ---->+
9839  *
9840  *
9841  * This code is based upon the following presumtions:
9842  *
9843  *   - iostart and iodone functions operate on buf(9S) structures. These
9844  *     functions perform the necessary operations on the buf(9S) and pass
9845  *     them along to the next function in the chain by using the macros
9846  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
9847  *     (for iodone side functions).
9848  *
9849  *   - The iostart side functions may sleep. The iodone side functions
9850  *     are called under interrupt context and may NOT sleep. Therefore
9851  *     iodone side functions also may not call iostart side functions.
9852  *     (NOTE: iostart side functions should NOT sleep for memory, as
9853  *     this could result in deadlock.)
9854  *
9855  *   - An iostart side function may call its corresponding iodone side
9856  *     function directly (if necessary).
9857  *
9858  *   - In the event of an error, an iostart side function can return a buf(9S)
9859  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
9860  *     b_error in the usual way of course).
9861  *
9862  *   - The taskq mechanism may be used by the iodone side functions to dispatch
9863  *     requests to the iostart side functions.  The iostart side functions in
9864  *     this case would be called under the context of a taskq thread, so it's
9865  *     OK for them to block/sleep/spin in this case.
9866  *
9867  *   - iostart side functions may allocate "shadow" buf(9S) structs and
9868  *     pass them along to the next function in the chain.  The corresponding
9869  *     iodone side functions must coalesce the "shadow" bufs and return
9870  *     the "original" buf to the next higher layer.
9871  *
9872  *   - The b_private field of the buf(9S) struct holds a pointer to
9873  *     an sd_xbuf struct, which contains information needed to
9874  *     construct the scsi_pkt for the command.
9875  *
9876  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
9877  *     layer must acquire & release the SD_MUTEX(un) as needed.
9878  */
9879 
9880 
9881 /*
9882  * Create taskq for all targets in the system. This is created at
9883  * _init(9E) and destroyed at _fini(9E).
9884  *
9885  * Note: here we set the minalloc to a reasonably high number to ensure that
9886  * we will have an adequate supply of task entries available at interrupt time.
9887  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
9888  * sd_create_taskq().  Since we do not want to sleep for allocations at
9889  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
9890  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
9891  * requests any one instant in time.
9892  */
9893 #define	SD_TASKQ_NUMTHREADS	8
9894 #define	SD_TASKQ_MINALLOC	256
9895 #define	SD_TASKQ_MAXALLOC	256
9896 
9897 static taskq_t	*sd_tq = NULL;
9898 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
9899 
9900 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
9901 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
9902 
9903 /*
9904  * The following task queue is being created for the write part of
9905  * read-modify-write of non-512 block size devices.
9906  * Limit the number of threads to 1 for now. This number has been choosen
9907  * considering the fact that it applies only to dvd ram drives/MO drives
9908  * currently. Performance for which is not main criteria at this stage.
9909  * Note: It needs to be explored if we can use a single taskq in future
9910  */
9911 #define	SD_WMR_TASKQ_NUMTHREADS	1
9912 static taskq_t	*sd_wmr_tq = NULL;
9913 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
9914 
9915 /*
9916  *    Function: sd_taskq_create
9917  *
9918  * Description: Create taskq thread(s) and preallocate task entries
9919  *
9920  * Return Code: Returns a pointer to the allocated taskq_t.
9921  *
9922  *     Context: Can sleep. Requires blockable context.
9923  *
9924  *       Notes: - The taskq() facility currently is NOT part of the DDI.
9925  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
9926  *		- taskq_create() will block for memory, also it will panic
9927  *		  if it cannot create the requested number of threads.
9928  *		- Currently taskq_create() creates threads that cannot be
9929  *		  swapped.
9930  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
9931  *		  supply of taskq entries at interrupt time (ie, so that we
9932  *		  do not have to sleep for memory)
9933  */
9934 
9935 static void
9936 sd_taskq_create(void)
9937 {
9938 	char	taskq_name[TASKQ_NAMELEN];
9939 
9940 	ASSERT(sd_tq == NULL);
9941 	ASSERT(sd_wmr_tq == NULL);
9942 
9943 	(void) snprintf(taskq_name, sizeof (taskq_name),
9944 	    "%s_drv_taskq", sd_label);
9945 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
9946 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
9947 	    TASKQ_PREPOPULATE));
9948 
9949 	(void) snprintf(taskq_name, sizeof (taskq_name),
9950 	    "%s_rmw_taskq", sd_label);
9951 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
9952 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
9953 	    TASKQ_PREPOPULATE));
9954 }
9955 
9956 
9957 /*
9958  *    Function: sd_taskq_delete
9959  *
9960  * Description: Complementary cleanup routine for sd_taskq_create().
9961  *
9962  *     Context: Kernel thread context.
9963  */
9964 
9965 static void
9966 sd_taskq_delete(void)
9967 {
9968 	ASSERT(sd_tq != NULL);
9969 	ASSERT(sd_wmr_tq != NULL);
9970 	taskq_destroy(sd_tq);
9971 	taskq_destroy(sd_wmr_tq);
9972 	sd_tq = NULL;
9973 	sd_wmr_tq = NULL;
9974 }
9975 
9976 
9977 /*
9978  *    Function: sdstrategy
9979  *
9980  * Description: Driver's strategy (9E) entry point function.
9981  *
9982  *   Arguments: bp - pointer to buf(9S)
9983  *
9984  * Return Code: Always returns zero
9985  *
9986  *     Context: Kernel thread context.
9987  */
9988 
9989 static int
9990 sdstrategy(struct buf *bp)
9991 {
9992 	struct sd_lun *un;
9993 
9994 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
9995 	if (un == NULL) {
9996 		bioerror(bp, EIO);
9997 		bp->b_resid = bp->b_bcount;
9998 		biodone(bp);
9999 		return (0);
10000 	}
10001 	/* As was done in the past, fail new cmds. if state is dumping. */
10002 	if (un->un_state == SD_STATE_DUMPING) {
10003 		bioerror(bp, ENXIO);
10004 		bp->b_resid = bp->b_bcount;
10005 		biodone(bp);
10006 		return (0);
10007 	}
10008 
10009 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10010 
10011 	/*
10012 	 * Commands may sneak in while we released the mutex in
10013 	 * DDI_SUSPEND, we should block new commands. However, old
10014 	 * commands that are still in the driver at this point should
10015 	 * still be allowed to drain.
10016 	 */
10017 	mutex_enter(SD_MUTEX(un));
10018 	/*
10019 	 * Must wait here if either the device is suspended or
10020 	 * if it's power level is changing.
10021 	 */
10022 	while ((un->un_state == SD_STATE_SUSPENDED) ||
10023 	    (un->un_state == SD_STATE_PM_CHANGING)) {
10024 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10025 	}
10026 
10027 	un->un_ncmds_in_driver++;
10028 
10029 	/*
10030 	 * atapi: Since we are running the CD for now in PIO mode we need to
10031 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
10032 	 * the HBA's init_pkt routine.
10033 	 */
10034 	if (un->un_f_cfg_is_atapi == TRUE) {
10035 		mutex_exit(SD_MUTEX(un));
10036 		bp_mapin(bp);
10037 		mutex_enter(SD_MUTEX(un));
10038 	}
10039 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
10040 	    un->un_ncmds_in_driver);
10041 
10042 	mutex_exit(SD_MUTEX(un));
10043 
10044 	/*
10045 	 * This will (eventually) allocate the sd_xbuf area and
10046 	 * call sd_xbuf_strategy().  We just want to return the
10047 	 * result of ddi_xbuf_qstrategy so that we have an opt-
10048 	 * imized tail call which saves us a stack frame.
10049 	 */
10050 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
10051 }
10052 
10053 
10054 /*
10055  *    Function: sd_xbuf_strategy
10056  *
10057  * Description: Function for initiating IO operations via the
10058  *		ddi_xbuf_qstrategy() mechanism.
10059  *
10060  *     Context: Kernel thread context.
10061  */
10062 
10063 static void
10064 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
10065 {
10066 	struct sd_lun *un = arg;
10067 
10068 	ASSERT(bp != NULL);
10069 	ASSERT(xp != NULL);
10070 	ASSERT(un != NULL);
10071 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10072 
10073 	/*
10074 	 * Initialize the fields in the xbuf and save a pointer to the
10075 	 * xbuf in bp->b_private.
10076 	 */
10077 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
10078 
10079 	/* Send the buf down the iostart chain */
10080 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
10081 }
10082 
10083 
10084 /*
10085  *    Function: sd_xbuf_init
10086  *
10087  * Description: Prepare the given sd_xbuf struct for use.
10088  *
10089  *   Arguments: un - ptr to softstate
10090  *		bp - ptr to associated buf(9S)
10091  *		xp - ptr to associated sd_xbuf
10092  *		chain_type - IO chain type to use:
10093  *			SD_CHAIN_NULL
10094  *			SD_CHAIN_BUFIO
10095  *			SD_CHAIN_USCSI
10096  *			SD_CHAIN_DIRECT
10097  *			SD_CHAIN_DIRECT_PRIORITY
10098  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
10099  *			initialization; may be NULL if none.
10100  *
10101  *     Context: Kernel thread context
10102  */
10103 
10104 static void
10105 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
10106 	uchar_t chain_type, void *pktinfop)
10107 {
10108 	int index;
10109 
10110 	ASSERT(un != NULL);
10111 	ASSERT(bp != NULL);
10112 	ASSERT(xp != NULL);
10113 
10114 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
10115 	    bp, chain_type);
10116 
10117 	xp->xb_un	= un;
10118 	xp->xb_pktp	= NULL;
10119 	xp->xb_pktinfo	= pktinfop;
10120 	xp->xb_private	= bp->b_private;
10121 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
10122 
10123 	/*
10124 	 * Set up the iostart and iodone chain indexes in the xbuf, based
10125 	 * upon the specified chain type to use.
10126 	 */
10127 	switch (chain_type) {
10128 	case SD_CHAIN_NULL:
10129 		/*
10130 		 * Fall thru to just use the values for the buf type, even
10131 		 * tho for the NULL chain these values will never be used.
10132 		 */
10133 		/* FALLTHRU */
10134 	case SD_CHAIN_BUFIO:
10135 		index = un->un_buf_chain_type;
10136 		break;
10137 	case SD_CHAIN_USCSI:
10138 		index = un->un_uscsi_chain_type;
10139 		break;
10140 	case SD_CHAIN_DIRECT:
10141 		index = un->un_direct_chain_type;
10142 		break;
10143 	case SD_CHAIN_DIRECT_PRIORITY:
10144 		index = un->un_priority_chain_type;
10145 		break;
10146 	default:
10147 		/* We're really broken if we ever get here... */
10148 		panic("sd_xbuf_init: illegal chain type!");
10149 		/*NOTREACHED*/
10150 	}
10151 
10152 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
10153 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
10154 
10155 	/*
10156 	 * It might be a bit easier to simply bzero the entire xbuf above,
10157 	 * but it turns out that since we init a fair number of members anyway,
10158 	 * we save a fair number cycles by doing explicit assignment of zero.
10159 	 */
10160 	xp->xb_pkt_flags	= 0;
10161 	xp->xb_dma_resid	= 0;
10162 	xp->xb_retry_count	= 0;
10163 	xp->xb_victim_retry_count = 0;
10164 	xp->xb_ua_retry_count	= 0;
10165 	xp->xb_sense_bp		= NULL;
10166 	xp->xb_sense_status	= 0;
10167 	xp->xb_sense_state	= 0;
10168 	xp->xb_sense_resid	= 0;
10169 
10170 	bp->b_private	= xp;
10171 	bp->b_flags	&= ~(B_DONE | B_ERROR);
10172 	bp->b_resid	= 0;
10173 	bp->av_forw	= NULL;
10174 	bp->av_back	= NULL;
10175 	bioerror(bp, 0);
10176 
10177 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
10178 }
10179 
10180 
10181 /*
10182  *    Function: sd_uscsi_strategy
10183  *
10184  * Description: Wrapper for calling into the USCSI chain via physio(9F)
10185  *
10186  *   Arguments: bp - buf struct ptr
10187  *
10188  * Return Code: Always returns 0
10189  *
10190  *     Context: Kernel thread context
10191  */
10192 
10193 static int
10194 sd_uscsi_strategy(struct buf *bp)
10195 {
10196 	struct sd_lun		*un;
10197 	struct sd_uscsi_info	*uip;
10198 	struct sd_xbuf		*xp;
10199 	uchar_t			chain_type;
10200 
10201 	ASSERT(bp != NULL);
10202 
10203 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
10204 	if (un == NULL) {
10205 		bioerror(bp, EIO);
10206 		bp->b_resid = bp->b_bcount;
10207 		biodone(bp);
10208 		return (0);
10209 	}
10210 
10211 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10212 
10213 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
10214 
10215 	mutex_enter(SD_MUTEX(un));
10216 	/*
10217 	 * atapi: Since we are running the CD for now in PIO mode we need to
10218 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
10219 	 * the HBA's init_pkt routine.
10220 	 */
10221 	if (un->un_f_cfg_is_atapi == TRUE) {
10222 		mutex_exit(SD_MUTEX(un));
10223 		bp_mapin(bp);
10224 		mutex_enter(SD_MUTEX(un));
10225 	}
10226 	un->un_ncmds_in_driver++;
10227 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
10228 	    un->un_ncmds_in_driver);
10229 	mutex_exit(SD_MUTEX(un));
10230 
10231 	/*
10232 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
10233 	 */
10234 	ASSERT(bp->b_private != NULL);
10235 	uip = (struct sd_uscsi_info *)bp->b_private;
10236 
10237 	switch (uip->ui_flags) {
10238 	case SD_PATH_DIRECT:
10239 		chain_type = SD_CHAIN_DIRECT;
10240 		break;
10241 	case SD_PATH_DIRECT_PRIORITY:
10242 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
10243 		break;
10244 	default:
10245 		chain_type = SD_CHAIN_USCSI;
10246 		break;
10247 	}
10248 
10249 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
10250 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
10251 
10252 	/* Use the index obtained within xbuf_init */
10253 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
10254 
10255 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
10256 
10257 	return (0);
10258 }
10259 
10260 /*
10261  *    Function: sd_send_scsi_cmd
10262  *
10263  * Description: Runs a USCSI command for user (when called thru sdioctl),
10264  *		or for the driver
10265  *
10266  *   Arguments: dev - the dev_t for the device
10267  *		incmd - ptr to a valid uscsi_cmd struct
10268  *		flag - bit flag, indicating open settings, 32/64 bit type
10269  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
10270  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
10271  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
10272  *			to use the USCSI "direct" chain and bypass the normal
10273  *			command waitq.
10274  *
10275  * Return Code: 0 -  successful completion of the given command
10276  *		EIO - scsi_uscsi_handle_command() failed
10277  *		ENXIO  - soft state not found for specified dev
10278  *		EINVAL
10279  *		EFAULT - copyin/copyout error
10280  *		return code of scsi_uscsi_handle_command():
10281  *			EIO
10282  *			ENXIO
10283  *			EACCES
10284  *
10285  *     Context: Waits for command to complete. Can sleep.
10286  */
10287 
10288 static int
10289 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
10290 	enum uio_seg dataspace, int path_flag)
10291 {
10292 	struct sd_uscsi_info	*uip;
10293 	struct uscsi_cmd	*uscmd;
10294 	struct sd_lun	*un;
10295 	int	format = 0;
10296 	int	rval;
10297 
10298 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
10299 	if (un == NULL) {
10300 		return (ENXIO);
10301 	}
10302 
10303 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10304 
10305 #ifdef SDDEBUG
10306 	switch (dataspace) {
10307 	case UIO_USERSPACE:
10308 		SD_TRACE(SD_LOG_IO, un,
10309 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_USERSPACE\n", un);
10310 		break;
10311 	case UIO_SYSSPACE:
10312 		SD_TRACE(SD_LOG_IO, un,
10313 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_SYSSPACE\n", un);
10314 		break;
10315 	default:
10316 		SD_TRACE(SD_LOG_IO, un,
10317 		    "sd_send_scsi_cmd: entry: un:0x%p UNEXPECTED SPACE\n", un);
10318 		break;
10319 	}
10320 #endif
10321 
10322 	rval = scsi_uscsi_alloc_and_copyin((intptr_t)incmd, flag,
10323 	    SD_ADDRESS(un), &uscmd);
10324 	if (rval != 0) {
10325 		SD_TRACE(SD_LOG_IO, un, "sd_sense_scsi_cmd: "
10326 		    "scsi_uscsi_alloc_and_copyin failed\n", un);
10327 		return (rval);
10328 	}
10329 
10330 	if ((uscmd->uscsi_cdb != NULL) &&
10331 	    (uscmd->uscsi_cdb[0] == SCMD_FORMAT)) {
10332 		mutex_enter(SD_MUTEX(un));
10333 		un->un_f_format_in_progress = TRUE;
10334 		mutex_exit(SD_MUTEX(un));
10335 		format = 1;
10336 	}
10337 
10338 	/*
10339 	 * Allocate an sd_uscsi_info struct and fill it with the info
10340 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
10341 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
10342 	 * since we allocate the buf here in this function, we do not
10343 	 * need to preserve the prior contents of b_private.
10344 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
10345 	 */
10346 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
10347 	uip->ui_flags = path_flag;
10348 	uip->ui_cmdp = uscmd;
10349 
10350 	/*
10351 	 * Commands sent with priority are intended for error recovery
10352 	 * situations, and do not have retries performed.
10353 	 */
10354 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
10355 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
10356 	}
10357 	uscmd->uscsi_flags &= ~USCSI_NOINTR;
10358 
10359 	rval = scsi_uscsi_handle_cmd(dev, dataspace, uscmd,
10360 	    sd_uscsi_strategy, NULL, uip);
10361 
10362 #ifdef SDDEBUG
10363 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
10364 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
10365 	    uscmd->uscsi_status, uscmd->uscsi_resid);
10366 	if (uscmd->uscsi_bufaddr != NULL) {
10367 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
10368 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
10369 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
10370 		if (dataspace == UIO_SYSSPACE) {
10371 			SD_DUMP_MEMORY(un, SD_LOG_IO,
10372 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
10373 			    uscmd->uscsi_buflen, SD_LOG_HEX);
10374 		}
10375 	}
10376 #endif
10377 
10378 	if (format == 1) {
10379 		mutex_enter(SD_MUTEX(un));
10380 		un->un_f_format_in_progress = FALSE;
10381 		mutex_exit(SD_MUTEX(un));
10382 	}
10383 
10384 	(void) scsi_uscsi_copyout_and_free((intptr_t)incmd, uscmd);
10385 	kmem_free(uip, sizeof (struct sd_uscsi_info));
10386 
10387 	return (rval);
10388 }
10389 
10390 
10391 /*
10392  *    Function: sd_buf_iodone
10393  *
10394  * Description: Frees the sd_xbuf & returns the buf to its originator.
10395  *
10396  *     Context: May be called from interrupt context.
10397  */
10398 /* ARGSUSED */
10399 static void
10400 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
10401 {
10402 	struct sd_xbuf *xp;
10403 
10404 	ASSERT(un != NULL);
10405 	ASSERT(bp != NULL);
10406 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10407 
10408 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
10409 
10410 	xp = SD_GET_XBUF(bp);
10411 	ASSERT(xp != NULL);
10412 
10413 	mutex_enter(SD_MUTEX(un));
10414 
10415 	/*
10416 	 * Grab time when the cmd completed.
10417 	 * This is used for determining if the system has been
10418 	 * idle long enough to make it idle to the PM framework.
10419 	 * This is for lowering the overhead, and therefore improving
10420 	 * performance per I/O operation.
10421 	 */
10422 	un->un_pm_idle_time = ddi_get_time();
10423 
10424 	un->un_ncmds_in_driver--;
10425 	ASSERT(un->un_ncmds_in_driver >= 0);
10426 	SD_INFO(SD_LOG_IO, un, "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
10427 	    un->un_ncmds_in_driver);
10428 
10429 	mutex_exit(SD_MUTEX(un));
10430 
10431 	ddi_xbuf_done(bp, un->un_xbuf_attr);	/* xbuf is gone after this */
10432 	biodone(bp);				/* bp is gone after this */
10433 
10434 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
10435 }
10436 
10437 
10438 /*
10439  *    Function: sd_uscsi_iodone
10440  *
10441  * Description: Frees the sd_xbuf & returns the buf to its originator.
10442  *
10443  *     Context: May be called from interrupt context.
10444  */
10445 /* ARGSUSED */
10446 static void
10447 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
10448 {
10449 	struct sd_xbuf *xp;
10450 
10451 	ASSERT(un != NULL);
10452 	ASSERT(bp != NULL);
10453 
10454 	xp = SD_GET_XBUF(bp);
10455 	ASSERT(xp != NULL);
10456 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10457 
10458 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
10459 
10460 	bp->b_private = xp->xb_private;
10461 
10462 	mutex_enter(SD_MUTEX(un));
10463 
10464 	/*
10465 	 * Grab time when the cmd completed.
10466 	 * This is used for determining if the system has been
10467 	 * idle long enough to make it idle to the PM framework.
10468 	 * This is for lowering the overhead, and therefore improving
10469 	 * performance per I/O operation.
10470 	 */
10471 	un->un_pm_idle_time = ddi_get_time();
10472 
10473 	un->un_ncmds_in_driver--;
10474 	ASSERT(un->un_ncmds_in_driver >= 0);
10475 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
10476 	    un->un_ncmds_in_driver);
10477 
10478 	mutex_exit(SD_MUTEX(un));
10479 
10480 	kmem_free(xp, sizeof (struct sd_xbuf));
10481 	biodone(bp);
10482 
10483 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
10484 }
10485 
10486 
10487 /*
10488  *    Function: sd_mapblockaddr_iostart
10489  *
10490  * Description: Verify request lies withing the partition limits for
10491  *		the indicated minor device.  Issue "overrun" buf if
10492  *		request would exceed partition range.  Converts
10493  *		partition-relative block address to absolute.
10494  *
10495  *     Context: Can sleep
10496  *
10497  *      Issues: This follows what the old code did, in terms of accessing
10498  *		some of the partition info in the unit struct without holding
10499  *		the mutext.  This is a general issue, if the partition info
10500  *		can be altered while IO is in progress... as soon as we send
10501  *		a buf, its partitioning can be invalid before it gets to the
10502  *		device.  Probably the right fix is to move partitioning out
10503  *		of the driver entirely.
10504  */
10505 
10506 static void
10507 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
10508 {
10509 	diskaddr_t	nblocks;	/* #blocks in the given partition */
10510 	daddr_t	blocknum;	/* Block number specified by the buf */
10511 	size_t	requested_nblocks;
10512 	size_t	available_nblocks;
10513 	int	partition;
10514 	diskaddr_t	partition_offset;
10515 	struct sd_xbuf *xp;
10516 
10517 
10518 	ASSERT(un != NULL);
10519 	ASSERT(bp != NULL);
10520 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10521 
10522 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10523 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
10524 
10525 	xp = SD_GET_XBUF(bp);
10526 	ASSERT(xp != NULL);
10527 
10528 	/*
10529 	 * If the geometry is not indicated as valid, attempt to access
10530 	 * the unit & verify the geometry/label. This can be the case for
10531 	 * removable-media devices, of if the device was opened in
10532 	 * NDELAY/NONBLOCK mode.
10533 	 */
10534 	if (!SD_IS_VALID_LABEL(un) &&
10535 	    (sd_ready_and_valid(un) != SD_READY_VALID)) {
10536 		/*
10537 		 * For removable devices it is possible to start an I/O
10538 		 * without a media by opening the device in nodelay mode.
10539 		 * Also for writable CDs there can be many scenarios where
10540 		 * there is no geometry yet but volume manager is trying to
10541 		 * issue a read() just because it can see TOC on the CD. So
10542 		 * do not print a message for removables.
10543 		 */
10544 		if (!un->un_f_has_removable_media) {
10545 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10546 			    "i/o to invalid geometry\n");
10547 		}
10548 		bioerror(bp, EIO);
10549 		bp->b_resid = bp->b_bcount;
10550 		SD_BEGIN_IODONE(index, un, bp);
10551 		return;
10552 	}
10553 
10554 	partition = SDPART(bp->b_edev);
10555 
10556 	nblocks = 0;
10557 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
10558 	    &nblocks, &partition_offset, NULL, NULL, (void *)SD_PATH_DIRECT);
10559 
10560 	/*
10561 	 * blocknum is the starting block number of the request. At this
10562 	 * point it is still relative to the start of the minor device.
10563 	 */
10564 	blocknum = xp->xb_blkno;
10565 
10566 	/*
10567 	 * Legacy: If the starting block number is one past the last block
10568 	 * in the partition, do not set B_ERROR in the buf.
10569 	 */
10570 	if (blocknum == nblocks)  {
10571 		goto error_exit;
10572 	}
10573 
10574 	/*
10575 	 * Confirm that the first block of the request lies within the
10576 	 * partition limits. Also the requested number of bytes must be
10577 	 * a multiple of the system block size.
10578 	 */
10579 	if ((blocknum < 0) || (blocknum >= nblocks) ||
10580 	    ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
10581 		bp->b_flags |= B_ERROR;
10582 		goto error_exit;
10583 	}
10584 
10585 	/*
10586 	 * If the requsted # blocks exceeds the available # blocks, that
10587 	 * is an overrun of the partition.
10588 	 */
10589 	requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
10590 	available_nblocks = (size_t)(nblocks - blocknum);
10591 	ASSERT(nblocks >= blocknum);
10592 
10593 	if (requested_nblocks > available_nblocks) {
10594 		/*
10595 		 * Allocate an "overrun" buf to allow the request to proceed
10596 		 * for the amount of space available in the partition. The
10597 		 * amount not transferred will be added into the b_resid
10598 		 * when the operation is complete. The overrun buf
10599 		 * replaces the original buf here, and the original buf
10600 		 * is saved inside the overrun buf, for later use.
10601 		 */
10602 		size_t resid = SD_SYSBLOCKS2BYTES(un,
10603 		    (offset_t)(requested_nblocks - available_nblocks));
10604 		size_t count = bp->b_bcount - resid;
10605 		/*
10606 		 * Note: count is an unsigned entity thus it'll NEVER
10607 		 * be less than 0 so ASSERT the original values are
10608 		 * correct.
10609 		 */
10610 		ASSERT(bp->b_bcount >= resid);
10611 
10612 		bp = sd_bioclone_alloc(bp, count, blocknum,
10613 			(int (*)(struct buf *)) sd_mapblockaddr_iodone);
10614 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
10615 		ASSERT(xp != NULL);
10616 	}
10617 
10618 	/* At this point there should be no residual for this buf. */
10619 	ASSERT(bp->b_resid == 0);
10620 
10621 	/* Convert the block number to an absolute address. */
10622 	xp->xb_blkno += partition_offset;
10623 
10624 	SD_NEXT_IOSTART(index, un, bp);
10625 
10626 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10627 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
10628 
10629 	return;
10630 
10631 error_exit:
10632 	bp->b_resid = bp->b_bcount;
10633 	SD_BEGIN_IODONE(index, un, bp);
10634 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10635 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
10636 }
10637 
10638 
10639 /*
10640  *    Function: sd_mapblockaddr_iodone
10641  *
10642  * Description: Completion-side processing for partition management.
10643  *
10644  *     Context: May be called under interrupt context
10645  */
10646 
10647 static void
10648 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
10649 {
10650 	/* int	partition; */	/* Not used, see below. */
10651 	ASSERT(un != NULL);
10652 	ASSERT(bp != NULL);
10653 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10654 
10655 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10656 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
10657 
10658 	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
10659 		/*
10660 		 * We have an "overrun" buf to deal with...
10661 		 */
10662 		struct sd_xbuf	*xp;
10663 		struct buf	*obp;	/* ptr to the original buf */
10664 
10665 		xp = SD_GET_XBUF(bp);
10666 		ASSERT(xp != NULL);
10667 
10668 		/* Retrieve the pointer to the original buf */
10669 		obp = (struct buf *)xp->xb_private;
10670 		ASSERT(obp != NULL);
10671 
10672 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
10673 		bioerror(obp, bp->b_error);
10674 
10675 		sd_bioclone_free(bp);
10676 
10677 		/*
10678 		 * Get back the original buf.
10679 		 * Note that since the restoration of xb_blkno below
10680 		 * was removed, the sd_xbuf is not needed.
10681 		 */
10682 		bp = obp;
10683 		/*
10684 		 * xp = SD_GET_XBUF(bp);
10685 		 * ASSERT(xp != NULL);
10686 		 */
10687 	}
10688 
10689 	/*
10690 	 * Convert sd->xb_blkno back to a minor-device relative value.
10691 	 * Note: this has been commented out, as it is not needed in the
10692 	 * current implementation of the driver (ie, since this function
10693 	 * is at the top of the layering chains, so the info will be
10694 	 * discarded) and it is in the "hot" IO path.
10695 	 *
10696 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
10697 	 * xp->xb_blkno -= un->un_offset[partition];
10698 	 */
10699 
10700 	SD_NEXT_IODONE(index, un, bp);
10701 
10702 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10703 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
10704 }
10705 
10706 
10707 /*
10708  *    Function: sd_mapblocksize_iostart
10709  *
10710  * Description: Convert between system block size (un->un_sys_blocksize)
10711  *		and target block size (un->un_tgt_blocksize).
10712  *
10713  *     Context: Can sleep to allocate resources.
10714  *
10715  * Assumptions: A higher layer has already performed any partition validation,
10716  *		and converted the xp->xb_blkno to an absolute value relative
10717  *		to the start of the device.
10718  *
10719  *		It is also assumed that the higher layer has implemented
10720  *		an "overrun" mechanism for the case where the request would
10721  *		read/write beyond the end of a partition.  In this case we
10722  *		assume (and ASSERT) that bp->b_resid == 0.
10723  *
10724  *		Note: The implementation for this routine assumes the target
10725  *		block size remains constant between allocation and transport.
10726  */
10727 
10728 static void
10729 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
10730 {
10731 	struct sd_mapblocksize_info	*bsp;
10732 	struct sd_xbuf			*xp;
10733 	offset_t first_byte;
10734 	daddr_t	start_block, end_block;
10735 	daddr_t	request_bytes;
10736 	ushort_t is_aligned = FALSE;
10737 
10738 	ASSERT(un != NULL);
10739 	ASSERT(bp != NULL);
10740 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10741 	ASSERT(bp->b_resid == 0);
10742 
10743 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
10744 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
10745 
10746 	/*
10747 	 * For a non-writable CD, a write request is an error
10748 	 */
10749 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
10750 	    (un->un_f_mmc_writable_media == FALSE)) {
10751 		bioerror(bp, EIO);
10752 		bp->b_resid = bp->b_bcount;
10753 		SD_BEGIN_IODONE(index, un, bp);
10754 		return;
10755 	}
10756 
10757 	/*
10758 	 * We do not need a shadow buf if the device is using
10759 	 * un->un_sys_blocksize as its block size or if bcount == 0.
10760 	 * In this case there is no layer-private data block allocated.
10761 	 */
10762 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
10763 	    (bp->b_bcount == 0)) {
10764 		goto done;
10765 	}
10766 
10767 #if defined(__i386) || defined(__amd64)
10768 	/* We do not support non-block-aligned transfers for ROD devices */
10769 	ASSERT(!ISROD(un));
10770 #endif
10771 
10772 	xp = SD_GET_XBUF(bp);
10773 	ASSERT(xp != NULL);
10774 
10775 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
10776 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
10777 	    un->un_tgt_blocksize, un->un_sys_blocksize);
10778 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
10779 	    "request start block:0x%x\n", xp->xb_blkno);
10780 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
10781 	    "request len:0x%x\n", bp->b_bcount);
10782 
10783 	/*
10784 	 * Allocate the layer-private data area for the mapblocksize layer.
10785 	 * Layers are allowed to use the xp_private member of the sd_xbuf
10786 	 * struct to store the pointer to their layer-private data block, but
10787 	 * each layer also has the responsibility of restoring the prior
10788 	 * contents of xb_private before returning the buf/xbuf to the
10789 	 * higher layer that sent it.
10790 	 *
10791 	 * Here we save the prior contents of xp->xb_private into the
10792 	 * bsp->mbs_oprivate field of our layer-private data area. This value
10793 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
10794 	 * the layer-private area and returning the buf/xbuf to the layer
10795 	 * that sent it.
10796 	 *
10797 	 * Note that here we use kmem_zalloc for the allocation as there are
10798 	 * parts of the mapblocksize code that expect certain fields to be
10799 	 * zero unless explicitly set to a required value.
10800 	 */
10801 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
10802 	bsp->mbs_oprivate = xp->xb_private;
10803 	xp->xb_private = bsp;
10804 
10805 	/*
10806 	 * This treats the data on the disk (target) as an array of bytes.
10807 	 * first_byte is the byte offset, from the beginning of the device,
10808 	 * to the location of the request. This is converted from a
10809 	 * un->un_sys_blocksize block address to a byte offset, and then back
10810 	 * to a block address based upon a un->un_tgt_blocksize block size.
10811 	 *
10812 	 * xp->xb_blkno should be absolute upon entry into this function,
10813 	 * but, but it is based upon partitions that use the "system"
10814 	 * block size. It must be adjusted to reflect the block size of
10815 	 * the target.
10816 	 *
10817 	 * Note that end_block is actually the block that follows the last
10818 	 * block of the request, but that's what is needed for the computation.
10819 	 */
10820 	first_byte  = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
10821 	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
10822 	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
10823 	    un->un_tgt_blocksize;
10824 
10825 	/* request_bytes is rounded up to a multiple of the target block size */
10826 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
10827 
10828 	/*
10829 	 * See if the starting address of the request and the request
10830 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
10831 	 * then we do not need to allocate a shadow buf to handle the request.
10832 	 */
10833 	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
10834 	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
10835 		is_aligned = TRUE;
10836 	}
10837 
10838 	if ((bp->b_flags & B_READ) == 0) {
10839 		/*
10840 		 * Lock the range for a write operation. An aligned request is
10841 		 * considered a simple write; otherwise the request must be a
10842 		 * read-modify-write.
10843 		 */
10844 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
10845 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
10846 	}
10847 
10848 	/*
10849 	 * Alloc a shadow buf if the request is not aligned. Also, this is
10850 	 * where the READ command is generated for a read-modify-write. (The
10851 	 * write phase is deferred until after the read completes.)
10852 	 */
10853 	if (is_aligned == FALSE) {
10854 
10855 		struct sd_mapblocksize_info	*shadow_bsp;
10856 		struct sd_xbuf	*shadow_xp;
10857 		struct buf	*shadow_bp;
10858 
10859 		/*
10860 		 * Allocate the shadow buf and it associated xbuf. Note that
10861 		 * after this call the xb_blkno value in both the original
10862 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
10863 		 * same: absolute relative to the start of the device, and
10864 		 * adjusted for the target block size. The b_blkno in the
10865 		 * shadow buf will also be set to this value. We should never
10866 		 * change b_blkno in the original bp however.
10867 		 *
10868 		 * Note also that the shadow buf will always need to be a
10869 		 * READ command, regardless of whether the incoming command
10870 		 * is a READ or a WRITE.
10871 		 */
10872 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
10873 		    xp->xb_blkno,
10874 		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
10875 
10876 		shadow_xp = SD_GET_XBUF(shadow_bp);
10877 
10878 		/*
10879 		 * Allocate the layer-private data for the shadow buf.
10880 		 * (No need to preserve xb_private in the shadow xbuf.)
10881 		 */
10882 		shadow_xp->xb_private = shadow_bsp =
10883 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
10884 
10885 		/*
10886 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
10887 		 * to figure out where the start of the user data is (based upon
10888 		 * the system block size) in the data returned by the READ
10889 		 * command (which will be based upon the target blocksize). Note
10890 		 * that this is only really used if the request is unaligned.
10891 		 */
10892 		bsp->mbs_copy_offset = (ssize_t)(first_byte -
10893 		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
10894 		ASSERT((bsp->mbs_copy_offset >= 0) &&
10895 		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
10896 
10897 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
10898 
10899 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
10900 
10901 		/* Transfer the wmap (if any) to the shadow buf */
10902 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
10903 		bsp->mbs_wmp = NULL;
10904 
10905 		/*
10906 		 * The shadow buf goes on from here in place of the
10907 		 * original buf.
10908 		 */
10909 		shadow_bsp->mbs_orig_bp = bp;
10910 		bp = shadow_bp;
10911 	}
10912 
10913 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
10914 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
10915 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
10916 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
10917 	    request_bytes);
10918 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
10919 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
10920 
10921 done:
10922 	SD_NEXT_IOSTART(index, un, bp);
10923 
10924 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
10925 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
10926 }
10927 
10928 
10929 /*
10930  *    Function: sd_mapblocksize_iodone
10931  *
10932  * Description: Completion side processing for block-size mapping.
10933  *
10934  *     Context: May be called under interrupt context
10935  */
10936 
10937 static void
10938 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
10939 {
10940 	struct sd_mapblocksize_info	*bsp;
10941 	struct sd_xbuf	*xp;
10942 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
10943 	struct buf	*orig_bp;	/* ptr to the original buf */
10944 	offset_t	shadow_end;
10945 	offset_t	request_end;
10946 	offset_t	shadow_start;
10947 	ssize_t		copy_offset;
10948 	size_t		copy_length;
10949 	size_t		shortfall;
10950 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
10951 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
10952 
10953 	ASSERT(un != NULL);
10954 	ASSERT(bp != NULL);
10955 
10956 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
10957 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
10958 
10959 	/*
10960 	 * There is no shadow buf or layer-private data if the target is
10961 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
10962 	 */
10963 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
10964 	    (bp->b_bcount == 0)) {
10965 		goto exit;
10966 	}
10967 
10968 	xp = SD_GET_XBUF(bp);
10969 	ASSERT(xp != NULL);
10970 
10971 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
10972 	bsp = xp->xb_private;
10973 
10974 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
10975 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
10976 
10977 	if (is_write) {
10978 		/*
10979 		 * For a WRITE request we must free up the block range that
10980 		 * we have locked up.  This holds regardless of whether this is
10981 		 * an aligned write request or a read-modify-write request.
10982 		 */
10983 		sd_range_unlock(un, bsp->mbs_wmp);
10984 		bsp->mbs_wmp = NULL;
10985 	}
10986 
10987 	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
10988 		/*
10989 		 * An aligned read or write command will have no shadow buf;
10990 		 * there is not much else to do with it.
10991 		 */
10992 		goto done;
10993 	}
10994 
10995 	orig_bp = bsp->mbs_orig_bp;
10996 	ASSERT(orig_bp != NULL);
10997 	orig_xp = SD_GET_XBUF(orig_bp);
10998 	ASSERT(orig_xp != NULL);
10999 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11000 
11001 	if (!is_write && has_wmap) {
11002 		/*
11003 		 * A READ with a wmap means this is the READ phase of a
11004 		 * read-modify-write. If an error occurred on the READ then
11005 		 * we do not proceed with the WRITE phase or copy any data.
11006 		 * Just release the write maps and return with an error.
11007 		 */
11008 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
11009 			orig_bp->b_resid = orig_bp->b_bcount;
11010 			bioerror(orig_bp, bp->b_error);
11011 			sd_range_unlock(un, bsp->mbs_wmp);
11012 			goto freebuf_done;
11013 		}
11014 	}
11015 
11016 	/*
11017 	 * Here is where we set up to copy the data from the shadow buf
11018 	 * into the space associated with the original buf.
11019 	 *
11020 	 * To deal with the conversion between block sizes, these
11021 	 * computations treat the data as an array of bytes, with the
11022 	 * first byte (byte 0) corresponding to the first byte in the
11023 	 * first block on the disk.
11024 	 */
11025 
11026 	/*
11027 	 * shadow_start and shadow_len indicate the location and size of
11028 	 * the data returned with the shadow IO request.
11029 	 */
11030 	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
11031 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
11032 
11033 	/*
11034 	 * copy_offset gives the offset (in bytes) from the start of the first
11035 	 * block of the READ request to the beginning of the data.  We retrieve
11036 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
11037 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
11038 	 * data to be copied (in bytes).
11039 	 */
11040 	copy_offset  = bsp->mbs_copy_offset;
11041 	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
11042 	copy_length  = orig_bp->b_bcount;
11043 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
11044 
11045 	/*
11046 	 * Set up the resid and error fields of orig_bp as appropriate.
11047 	 */
11048 	if (shadow_end >= request_end) {
11049 		/* We got all the requested data; set resid to zero */
11050 		orig_bp->b_resid = 0;
11051 	} else {
11052 		/*
11053 		 * We failed to get enough data to fully satisfy the original
11054 		 * request. Just copy back whatever data we got and set
11055 		 * up the residual and error code as required.
11056 		 *
11057 		 * 'shortfall' is the amount by which the data received with the
11058 		 * shadow buf has "fallen short" of the requested amount.
11059 		 */
11060 		shortfall = (size_t)(request_end - shadow_end);
11061 
11062 		if (shortfall > orig_bp->b_bcount) {
11063 			/*
11064 			 * We did not get enough data to even partially
11065 			 * fulfill the original request.  The residual is
11066 			 * equal to the amount requested.
11067 			 */
11068 			orig_bp->b_resid = orig_bp->b_bcount;
11069 		} else {
11070 			/*
11071 			 * We did not get all the data that we requested
11072 			 * from the device, but we will try to return what
11073 			 * portion we did get.
11074 			 */
11075 			orig_bp->b_resid = shortfall;
11076 		}
11077 		ASSERT(copy_length >= orig_bp->b_resid);
11078 		copy_length  -= orig_bp->b_resid;
11079 	}
11080 
11081 	/* Propagate the error code from the shadow buf to the original buf */
11082 	bioerror(orig_bp, bp->b_error);
11083 
11084 	if (is_write) {
11085 		goto freebuf_done;	/* No data copying for a WRITE */
11086 	}
11087 
11088 	if (has_wmap) {
11089 		/*
11090 		 * This is a READ command from the READ phase of a
11091 		 * read-modify-write request. We have to copy the data given
11092 		 * by the user OVER the data returned by the READ command,
11093 		 * then convert the command from a READ to a WRITE and send
11094 		 * it back to the target.
11095 		 */
11096 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
11097 		    copy_length);
11098 
11099 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
11100 
11101 		/*
11102 		 * Dispatch the WRITE command to the taskq thread, which
11103 		 * will in turn send the command to the target. When the
11104 		 * WRITE command completes, we (sd_mapblocksize_iodone())
11105 		 * will get called again as part of the iodone chain
11106 		 * processing for it. Note that we will still be dealing
11107 		 * with the shadow buf at that point.
11108 		 */
11109 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
11110 		    KM_NOSLEEP) != 0) {
11111 			/*
11112 			 * Dispatch was successful so we are done. Return
11113 			 * without going any higher up the iodone chain. Do
11114 			 * not free up any layer-private data until after the
11115 			 * WRITE completes.
11116 			 */
11117 			return;
11118 		}
11119 
11120 		/*
11121 		 * Dispatch of the WRITE command failed; set up the error
11122 		 * condition and send this IO back up the iodone chain.
11123 		 */
11124 		bioerror(orig_bp, EIO);
11125 		orig_bp->b_resid = orig_bp->b_bcount;
11126 
11127 	} else {
11128 		/*
11129 		 * This is a regular READ request (ie, not a RMW). Copy the
11130 		 * data from the shadow buf into the original buf. The
11131 		 * copy_offset compensates for any "misalignment" between the
11132 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
11133 		 * original buf (with its un->un_sys_blocksize blocks).
11134 		 */
11135 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
11136 		    copy_length);
11137 	}
11138 
11139 freebuf_done:
11140 
11141 	/*
11142 	 * At this point we still have both the shadow buf AND the original
11143 	 * buf to deal with, as well as the layer-private data area in each.
11144 	 * Local variables are as follows:
11145 	 *
11146 	 * bp -- points to shadow buf
11147 	 * xp -- points to xbuf of shadow buf
11148 	 * bsp -- points to layer-private data area of shadow buf
11149 	 * orig_bp -- points to original buf
11150 	 *
11151 	 * First free the shadow buf and its associated xbuf, then free the
11152 	 * layer-private data area from the shadow buf. There is no need to
11153 	 * restore xb_private in the shadow xbuf.
11154 	 */
11155 	sd_shadow_buf_free(bp);
11156 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
11157 
11158 	/*
11159 	 * Now update the local variables to point to the original buf, xbuf,
11160 	 * and layer-private area.
11161 	 */
11162 	bp = orig_bp;
11163 	xp = SD_GET_XBUF(bp);
11164 	ASSERT(xp != NULL);
11165 	ASSERT(xp == orig_xp);
11166 	bsp = xp->xb_private;
11167 	ASSERT(bsp != NULL);
11168 
11169 done:
11170 	/*
11171 	 * Restore xb_private to whatever it was set to by the next higher
11172 	 * layer in the chain, then free the layer-private data area.
11173 	 */
11174 	xp->xb_private = bsp->mbs_oprivate;
11175 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
11176 
11177 exit:
11178 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
11179 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
11180 
11181 	SD_NEXT_IODONE(index, un, bp);
11182 }
11183 
11184 
11185 /*
11186  *    Function: sd_checksum_iostart
11187  *
11188  * Description: A stub function for a layer that's currently not used.
11189  *		For now just a placeholder.
11190  *
11191  *     Context: Kernel thread context
11192  */
11193 
11194 static void
11195 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
11196 {
11197 	ASSERT(un != NULL);
11198 	ASSERT(bp != NULL);
11199 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11200 	SD_NEXT_IOSTART(index, un, bp);
11201 }
11202 
11203 
11204 /*
11205  *    Function: sd_checksum_iodone
11206  *
11207  * Description: A stub function for a layer that's currently not used.
11208  *		For now just a placeholder.
11209  *
11210  *     Context: May be called under interrupt context
11211  */
11212 
11213 static void
11214 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
11215 {
11216 	ASSERT(un != NULL);
11217 	ASSERT(bp != NULL);
11218 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11219 	SD_NEXT_IODONE(index, un, bp);
11220 }
11221 
11222 
11223 /*
11224  *    Function: sd_checksum_uscsi_iostart
11225  *
11226  * Description: A stub function for a layer that's currently not used.
11227  *		For now just a placeholder.
11228  *
11229  *     Context: Kernel thread context
11230  */
11231 
11232 static void
11233 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
11234 {
11235 	ASSERT(un != NULL);
11236 	ASSERT(bp != NULL);
11237 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11238 	SD_NEXT_IOSTART(index, un, bp);
11239 }
11240 
11241 
11242 /*
11243  *    Function: sd_checksum_uscsi_iodone
11244  *
11245  * Description: A stub function for a layer that's currently not used.
11246  *		For now just a placeholder.
11247  *
11248  *     Context: May be called under interrupt context
11249  */
11250 
11251 static void
11252 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
11253 {
11254 	ASSERT(un != NULL);
11255 	ASSERT(bp != NULL);
11256 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11257 	SD_NEXT_IODONE(index, un, bp);
11258 }
11259 
11260 
11261 /*
11262  *    Function: sd_pm_iostart
11263  *
11264  * Description: iostart-side routine for Power mangement.
11265  *
11266  *     Context: Kernel thread context
11267  */
11268 
11269 static void
11270 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
11271 {
11272 	ASSERT(un != NULL);
11273 	ASSERT(bp != NULL);
11274 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11275 	ASSERT(!mutex_owned(&un->un_pm_mutex));
11276 
11277 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
11278 
11279 	if (sd_pm_entry(un) != DDI_SUCCESS) {
11280 		/*
11281 		 * Set up to return the failed buf back up the 'iodone'
11282 		 * side of the calling chain.
11283 		 */
11284 		bioerror(bp, EIO);
11285 		bp->b_resid = bp->b_bcount;
11286 
11287 		SD_BEGIN_IODONE(index, un, bp);
11288 
11289 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
11290 		return;
11291 	}
11292 
11293 	SD_NEXT_IOSTART(index, un, bp);
11294 
11295 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
11296 }
11297 
11298 
11299 /*
11300  *    Function: sd_pm_iodone
11301  *
11302  * Description: iodone-side routine for power mangement.
11303  *
11304  *     Context: may be called from interrupt context
11305  */
11306 
11307 static void
11308 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
11309 {
11310 	ASSERT(un != NULL);
11311 	ASSERT(bp != NULL);
11312 	ASSERT(!mutex_owned(&un->un_pm_mutex));
11313 
11314 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
11315 
11316 	/*
11317 	 * After attach the following flag is only read, so don't
11318 	 * take the penalty of acquiring a mutex for it.
11319 	 */
11320 	if (un->un_f_pm_is_enabled == TRUE) {
11321 		sd_pm_exit(un);
11322 	}
11323 
11324 	SD_NEXT_IODONE(index, un, bp);
11325 
11326 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
11327 }
11328 
11329 
11330 /*
11331  *    Function: sd_core_iostart
11332  *
11333  * Description: Primary driver function for enqueuing buf(9S) structs from
11334  *		the system and initiating IO to the target device
11335  *
11336  *     Context: Kernel thread context. Can sleep.
11337  *
11338  * Assumptions:  - The given xp->xb_blkno is absolute
11339  *		   (ie, relative to the start of the device).
11340  *		 - The IO is to be done using the native blocksize of
11341  *		   the device, as specified in un->un_tgt_blocksize.
11342  */
11343 /* ARGSUSED */
11344 static void
11345 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
11346 {
11347 	struct sd_xbuf *xp;
11348 
11349 	ASSERT(un != NULL);
11350 	ASSERT(bp != NULL);
11351 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11352 	ASSERT(bp->b_resid == 0);
11353 
11354 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
11355 
11356 	xp = SD_GET_XBUF(bp);
11357 	ASSERT(xp != NULL);
11358 
11359 	mutex_enter(SD_MUTEX(un));
11360 
11361 	/*
11362 	 * If we are currently in the failfast state, fail any new IO
11363 	 * that has B_FAILFAST set, then return.
11364 	 */
11365 	if ((bp->b_flags & B_FAILFAST) &&
11366 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
11367 		mutex_exit(SD_MUTEX(un));
11368 		bioerror(bp, EIO);
11369 		bp->b_resid = bp->b_bcount;
11370 		SD_BEGIN_IODONE(index, un, bp);
11371 		return;
11372 	}
11373 
11374 	if (SD_IS_DIRECT_PRIORITY(xp)) {
11375 		/*
11376 		 * Priority command -- transport it immediately.
11377 		 *
11378 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
11379 		 * because all direct priority commands should be associated
11380 		 * with error recovery actions which we don't want to retry.
11381 		 */
11382 		sd_start_cmds(un, bp);
11383 	} else {
11384 		/*
11385 		 * Normal command -- add it to the wait queue, then start
11386 		 * transporting commands from the wait queue.
11387 		 */
11388 		sd_add_buf_to_waitq(un, bp);
11389 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
11390 		sd_start_cmds(un, NULL);
11391 	}
11392 
11393 	mutex_exit(SD_MUTEX(un));
11394 
11395 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
11396 }
11397 
11398 
11399 /*
11400  *    Function: sd_init_cdb_limits
11401  *
11402  * Description: This is to handle scsi_pkt initialization differences
11403  *		between the driver platforms.
11404  *
11405  *		Legacy behaviors:
11406  *
11407  *		If the block number or the sector count exceeds the
11408  *		capabilities of a Group 0 command, shift over to a
11409  *		Group 1 command. We don't blindly use Group 1
11410  *		commands because a) some drives (CDC Wren IVs) get a
11411  *		bit confused, and b) there is probably a fair amount
11412  *		of speed difference for a target to receive and decode
11413  *		a 10 byte command instead of a 6 byte command.
11414  *
11415  *		The xfer time difference of 6 vs 10 byte CDBs is
11416  *		still significant so this code is still worthwhile.
11417  *		10 byte CDBs are very inefficient with the fas HBA driver
11418  *		and older disks. Each CDB byte took 1 usec with some
11419  *		popular disks.
11420  *
11421  *     Context: Must be called at attach time
11422  */
11423 
11424 static void
11425 sd_init_cdb_limits(struct sd_lun *un)
11426 {
11427 	int hba_cdb_limit;
11428 
11429 	/*
11430 	 * Use CDB_GROUP1 commands for most devices except for
11431 	 * parallel SCSI fixed drives in which case we get better
11432 	 * performance using CDB_GROUP0 commands (where applicable).
11433 	 */
11434 	un->un_mincdb = SD_CDB_GROUP1;
11435 #if !defined(__fibre)
11436 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
11437 	    !un->un_f_has_removable_media) {
11438 		un->un_mincdb = SD_CDB_GROUP0;
11439 	}
11440 #endif
11441 
11442 	/*
11443 	 * Try to read the max-cdb-length supported by HBA.
11444 	 */
11445 	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
11446 	if (0 >= un->un_max_hba_cdb) {
11447 		un->un_max_hba_cdb = CDB_GROUP4;
11448 		hba_cdb_limit = SD_CDB_GROUP4;
11449 	} else if (0 < un->un_max_hba_cdb &&
11450 	    un->un_max_hba_cdb < CDB_GROUP1) {
11451 		hba_cdb_limit = SD_CDB_GROUP0;
11452 	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
11453 	    un->un_max_hba_cdb < CDB_GROUP5) {
11454 		hba_cdb_limit = SD_CDB_GROUP1;
11455 	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
11456 	    un->un_max_hba_cdb < CDB_GROUP4) {
11457 		hba_cdb_limit = SD_CDB_GROUP5;
11458 	} else {
11459 		hba_cdb_limit = SD_CDB_GROUP4;
11460 	}
11461 
11462 	/*
11463 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
11464 	 * commands for fixed disks unless we are building for a 32 bit
11465 	 * kernel.
11466 	 */
11467 #ifdef _LP64
11468 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
11469 	    min(hba_cdb_limit, SD_CDB_GROUP4);
11470 #else
11471 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
11472 	    min(hba_cdb_limit, SD_CDB_GROUP1);
11473 #endif
11474 
11475 	/*
11476 	 * x86 systems require the PKT_DMA_PARTIAL flag
11477 	 */
11478 #if defined(__x86)
11479 	un->un_pkt_flags = PKT_DMA_PARTIAL;
11480 #else
11481 	un->un_pkt_flags = 0;
11482 #endif
11483 
11484 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
11485 	    ? sizeof (struct scsi_arq_status) : 1);
11486 	un->un_cmd_timeout = (ushort_t)sd_io_time;
11487 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
11488 }
11489 
11490 
11491 /*
11492  *    Function: sd_initpkt_for_buf
11493  *
11494  * Description: Allocate and initialize for transport a scsi_pkt struct,
11495  *		based upon the info specified in the given buf struct.
11496  *
11497  *		Assumes the xb_blkno in the request is absolute (ie,
11498  *		relative to the start of the device (NOT partition!).
11499  *		Also assumes that the request is using the native block
11500  *		size of the device (as returned by the READ CAPACITY
11501  *		command).
11502  *
11503  * Return Code: SD_PKT_ALLOC_SUCCESS
11504  *		SD_PKT_ALLOC_FAILURE
11505  *		SD_PKT_ALLOC_FAILURE_NO_DMA
11506  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
11507  *
11508  *     Context: Kernel thread and may be called from software interrupt context
11509  *		as part of a sdrunout callback. This function may not block or
11510  *		call routines that block
11511  */
11512 
11513 static int
11514 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
11515 {
11516 	struct sd_xbuf	*xp;
11517 	struct scsi_pkt *pktp = NULL;
11518 	struct sd_lun	*un;
11519 	size_t		blockcount;
11520 	daddr_t		startblock;
11521 	int		rval;
11522 	int		cmd_flags;
11523 
11524 	ASSERT(bp != NULL);
11525 	ASSERT(pktpp != NULL);
11526 	xp = SD_GET_XBUF(bp);
11527 	ASSERT(xp != NULL);
11528 	un = SD_GET_UN(bp);
11529 	ASSERT(un != NULL);
11530 	ASSERT(mutex_owned(SD_MUTEX(un)));
11531 	ASSERT(bp->b_resid == 0);
11532 
11533 	SD_TRACE(SD_LOG_IO_CORE, un,
11534 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
11535 
11536 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
11537 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
11538 		/*
11539 		 * Already have a scsi_pkt -- just need DMA resources.
11540 		 * We must recompute the CDB in case the mapping returns
11541 		 * a nonzero pkt_resid.
11542 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
11543 		 * that is being retried, the unmap/remap of the DMA resouces
11544 		 * will result in the entire transfer starting over again
11545 		 * from the very first block.
11546 		 */
11547 		ASSERT(xp->xb_pktp != NULL);
11548 		pktp = xp->xb_pktp;
11549 	} else {
11550 		pktp = NULL;
11551 	}
11552 #endif /* __i386 || __amd64 */
11553 
11554 	startblock = xp->xb_blkno;	/* Absolute block num. */
11555 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
11556 
11557 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
11558 
11559 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
11560 
11561 #else
11562 
11563 	cmd_flags = un->un_pkt_flags | xp->xb_pkt_flags;
11564 
11565 #endif
11566 
11567 	/*
11568 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
11569 	 * call scsi_init_pkt, and build the CDB.
11570 	 */
11571 	rval = sd_setup_rw_pkt(un, &pktp, bp,
11572 	    cmd_flags, sdrunout, (caddr_t)un,
11573 	    startblock, blockcount);
11574 
11575 	if (rval == 0) {
11576 		/*
11577 		 * Success.
11578 		 *
11579 		 * If partial DMA is being used and required for this transfer.
11580 		 * set it up here.
11581 		 */
11582 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
11583 		    (pktp->pkt_resid != 0)) {
11584 
11585 			/*
11586 			 * Save the CDB length and pkt_resid for the
11587 			 * next xfer
11588 			 */
11589 			xp->xb_dma_resid = pktp->pkt_resid;
11590 
11591 			/* rezero resid */
11592 			pktp->pkt_resid = 0;
11593 
11594 		} else {
11595 			xp->xb_dma_resid = 0;
11596 		}
11597 
11598 		pktp->pkt_flags = un->un_tagflags;
11599 		pktp->pkt_time  = un->un_cmd_timeout;
11600 		pktp->pkt_comp  = sdintr;
11601 
11602 		pktp->pkt_private = bp;
11603 		*pktpp = pktp;
11604 
11605 		SD_TRACE(SD_LOG_IO_CORE, un,
11606 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
11607 
11608 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
11609 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
11610 #endif
11611 
11612 		return (SD_PKT_ALLOC_SUCCESS);
11613 
11614 	}
11615 
11616 	/*
11617 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
11618 	 * from sd_setup_rw_pkt.
11619 	 */
11620 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
11621 
11622 	if (rval == SD_PKT_ALLOC_FAILURE) {
11623 		*pktpp = NULL;
11624 		/*
11625 		 * Set the driver state to RWAIT to indicate the driver
11626 		 * is waiting on resource allocations. The driver will not
11627 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
11628 		 */
11629 		New_state(un, SD_STATE_RWAIT);
11630 
11631 		SD_ERROR(SD_LOG_IO_CORE, un,
11632 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
11633 
11634 		if ((bp->b_flags & B_ERROR) != 0) {
11635 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
11636 		}
11637 		return (SD_PKT_ALLOC_FAILURE);
11638 	} else {
11639 		/*
11640 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
11641 		 *
11642 		 * This should never happen.  Maybe someone messed with the
11643 		 * kernel's minphys?
11644 		 */
11645 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
11646 		    "Request rejected: too large for CDB: "
11647 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
11648 		SD_ERROR(SD_LOG_IO_CORE, un,
11649 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
11650 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11651 
11652 	}
11653 }
11654 
11655 
11656 /*
11657  *    Function: sd_destroypkt_for_buf
11658  *
11659  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
11660  *
11661  *     Context: Kernel thread or interrupt context
11662  */
11663 
11664 static void
11665 sd_destroypkt_for_buf(struct buf *bp)
11666 {
11667 	ASSERT(bp != NULL);
11668 	ASSERT(SD_GET_UN(bp) != NULL);
11669 
11670 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
11671 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
11672 
11673 	ASSERT(SD_GET_PKTP(bp) != NULL);
11674 	scsi_destroy_pkt(SD_GET_PKTP(bp));
11675 
11676 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
11677 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
11678 }
11679 
11680 /*
11681  *    Function: sd_setup_rw_pkt
11682  *
11683  * Description: Determines appropriate CDB group for the requested LBA
11684  *		and transfer length, calls scsi_init_pkt, and builds
11685  *		the CDB.  Do not use for partial DMA transfers except
11686  *		for the initial transfer since the CDB size must
11687  *		remain constant.
11688  *
11689  *     Context: Kernel thread and may be called from software interrupt
11690  *		context as part of a sdrunout callback. This function may not
11691  *		block or call routines that block
11692  */
11693 
11694 
11695 int
11696 sd_setup_rw_pkt(struct sd_lun *un,
11697     struct scsi_pkt **pktpp, struct buf *bp, int flags,
11698     int (*callback)(caddr_t), caddr_t callback_arg,
11699     diskaddr_t lba, uint32_t blockcount)
11700 {
11701 	struct scsi_pkt *return_pktp;
11702 	union scsi_cdb *cdbp;
11703 	struct sd_cdbinfo *cp = NULL;
11704 	int i;
11705 
11706 	/*
11707 	 * See which size CDB to use, based upon the request.
11708 	 */
11709 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
11710 
11711 		/*
11712 		 * Check lba and block count against sd_cdbtab limits.
11713 		 * In the partial DMA case, we have to use the same size
11714 		 * CDB for all the transfers.  Check lba + blockcount
11715 		 * against the max LBA so we know that segment of the
11716 		 * transfer can use the CDB we select.
11717 		 */
11718 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
11719 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
11720 
11721 			/*
11722 			 * The command will fit into the CDB type
11723 			 * specified by sd_cdbtab[i].
11724 			 */
11725 			cp = sd_cdbtab + i;
11726 
11727 			/*
11728 			 * Call scsi_init_pkt so we can fill in the
11729 			 * CDB.
11730 			 */
11731 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
11732 			    bp, cp->sc_grpcode, un->un_status_len, 0,
11733 			    flags, callback, callback_arg);
11734 
11735 			if (return_pktp != NULL) {
11736 
11737 				/*
11738 				 * Return new value of pkt
11739 				 */
11740 				*pktpp = return_pktp;
11741 
11742 				/*
11743 				 * To be safe, zero the CDB insuring there is
11744 				 * no leftover data from a previous command.
11745 				 */
11746 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
11747 
11748 				/*
11749 				 * Handle partial DMA mapping
11750 				 */
11751 				if (return_pktp->pkt_resid != 0) {
11752 
11753 					/*
11754 					 * Not going to xfer as many blocks as
11755 					 * originally expected
11756 					 */
11757 					blockcount -=
11758 					    SD_BYTES2TGTBLOCKS(un,
11759 						return_pktp->pkt_resid);
11760 				}
11761 
11762 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
11763 
11764 				/*
11765 				 * Set command byte based on the CDB
11766 				 * type we matched.
11767 				 */
11768 				cdbp->scc_cmd = cp->sc_grpmask |
11769 				    ((bp->b_flags & B_READ) ?
11770 					SCMD_READ : SCMD_WRITE);
11771 
11772 				SD_FILL_SCSI1_LUN(un, return_pktp);
11773 
11774 				/*
11775 				 * Fill in LBA and length
11776 				 */
11777 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
11778 				    (cp->sc_grpcode == CDB_GROUP4) ||
11779 				    (cp->sc_grpcode == CDB_GROUP0) ||
11780 				    (cp->sc_grpcode == CDB_GROUP5));
11781 
11782 				if (cp->sc_grpcode == CDB_GROUP1) {
11783 					FORMG1ADDR(cdbp, lba);
11784 					FORMG1COUNT(cdbp, blockcount);
11785 					return (0);
11786 				} else if (cp->sc_grpcode == CDB_GROUP4) {
11787 					FORMG4LONGADDR(cdbp, lba);
11788 					FORMG4COUNT(cdbp, blockcount);
11789 					return (0);
11790 				} else if (cp->sc_grpcode == CDB_GROUP0) {
11791 					FORMG0ADDR(cdbp, lba);
11792 					FORMG0COUNT(cdbp, blockcount);
11793 					return (0);
11794 				} else if (cp->sc_grpcode == CDB_GROUP5) {
11795 					FORMG5ADDR(cdbp, lba);
11796 					FORMG5COUNT(cdbp, blockcount);
11797 					return (0);
11798 				}
11799 
11800 				/*
11801 				 * It should be impossible to not match one
11802 				 * of the CDB types above, so we should never
11803 				 * reach this point.  Set the CDB command byte
11804 				 * to test-unit-ready to avoid writing
11805 				 * to somewhere we don't intend.
11806 				 */
11807 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
11808 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11809 			} else {
11810 				/*
11811 				 * Couldn't get scsi_pkt
11812 				 */
11813 				return (SD_PKT_ALLOC_FAILURE);
11814 			}
11815 		}
11816 	}
11817 
11818 	/*
11819 	 * None of the available CDB types were suitable.  This really
11820 	 * should never happen:  on a 64 bit system we support
11821 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
11822 	 * and on a 32 bit system we will refuse to bind to a device
11823 	 * larger than 2TB so addresses will never be larger than 32 bits.
11824 	 */
11825 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11826 }
11827 
11828 #if defined(__i386) || defined(__amd64)
11829 /*
11830  *    Function: sd_setup_next_rw_pkt
11831  *
11832  * Description: Setup packet for partial DMA transfers, except for the
11833  * 		initial transfer.  sd_setup_rw_pkt should be used for
11834  *		the initial transfer.
11835  *
11836  *     Context: Kernel thread and may be called from interrupt context.
11837  */
11838 
11839 int
11840 sd_setup_next_rw_pkt(struct sd_lun *un,
11841     struct scsi_pkt *pktp, struct buf *bp,
11842     diskaddr_t lba, uint32_t blockcount)
11843 {
11844 	uchar_t com;
11845 	union scsi_cdb *cdbp;
11846 	uchar_t cdb_group_id;
11847 
11848 	ASSERT(pktp != NULL);
11849 	ASSERT(pktp->pkt_cdbp != NULL);
11850 
11851 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
11852 	com = cdbp->scc_cmd;
11853 	cdb_group_id = CDB_GROUPID(com);
11854 
11855 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
11856 	    (cdb_group_id == CDB_GROUPID_1) ||
11857 	    (cdb_group_id == CDB_GROUPID_4) ||
11858 	    (cdb_group_id == CDB_GROUPID_5));
11859 
11860 	/*
11861 	 * Move pkt to the next portion of the xfer.
11862 	 * func is NULL_FUNC so we do not have to release
11863 	 * the disk mutex here.
11864 	 */
11865 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
11866 	    NULL_FUNC, NULL) == pktp) {
11867 		/* Success.  Handle partial DMA */
11868 		if (pktp->pkt_resid != 0) {
11869 			blockcount -=
11870 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
11871 		}
11872 
11873 		cdbp->scc_cmd = com;
11874 		SD_FILL_SCSI1_LUN(un, pktp);
11875 		if (cdb_group_id == CDB_GROUPID_1) {
11876 			FORMG1ADDR(cdbp, lba);
11877 			FORMG1COUNT(cdbp, blockcount);
11878 			return (0);
11879 		} else if (cdb_group_id == CDB_GROUPID_4) {
11880 			FORMG4LONGADDR(cdbp, lba);
11881 			FORMG4COUNT(cdbp, blockcount);
11882 			return (0);
11883 		} else if (cdb_group_id == CDB_GROUPID_0) {
11884 			FORMG0ADDR(cdbp, lba);
11885 			FORMG0COUNT(cdbp, blockcount);
11886 			return (0);
11887 		} else if (cdb_group_id == CDB_GROUPID_5) {
11888 			FORMG5ADDR(cdbp, lba);
11889 			FORMG5COUNT(cdbp, blockcount);
11890 			return (0);
11891 		}
11892 
11893 		/* Unreachable */
11894 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11895 	}
11896 
11897 	/*
11898 	 * Error setting up next portion of cmd transfer.
11899 	 * Something is definitely very wrong and this
11900 	 * should not happen.
11901 	 */
11902 	return (SD_PKT_ALLOC_FAILURE);
11903 }
11904 #endif /* defined(__i386) || defined(__amd64) */
11905 
11906 /*
11907  *    Function: sd_initpkt_for_uscsi
11908  *
11909  * Description: Allocate and initialize for transport a scsi_pkt struct,
11910  *		based upon the info specified in the given uscsi_cmd struct.
11911  *
11912  * Return Code: SD_PKT_ALLOC_SUCCESS
11913  *		SD_PKT_ALLOC_FAILURE
11914  *		SD_PKT_ALLOC_FAILURE_NO_DMA
11915  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
11916  *
11917  *     Context: Kernel thread and may be called from software interrupt context
11918  *		as part of a sdrunout callback. This function may not block or
11919  *		call routines that block
11920  */
11921 
11922 static int
11923 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
11924 {
11925 	struct uscsi_cmd *uscmd;
11926 	struct sd_xbuf	*xp;
11927 	struct scsi_pkt	*pktp;
11928 	struct sd_lun	*un;
11929 	uint32_t	flags = 0;
11930 
11931 	ASSERT(bp != NULL);
11932 	ASSERT(pktpp != NULL);
11933 	xp = SD_GET_XBUF(bp);
11934 	ASSERT(xp != NULL);
11935 	un = SD_GET_UN(bp);
11936 	ASSERT(un != NULL);
11937 	ASSERT(mutex_owned(SD_MUTEX(un)));
11938 
11939 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
11940 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
11941 	ASSERT(uscmd != NULL);
11942 
11943 	SD_TRACE(SD_LOG_IO_CORE, un,
11944 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
11945 
11946 	/*
11947 	 * Allocate the scsi_pkt for the command.
11948 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
11949 	 *	 during scsi_init_pkt time and will continue to use the
11950 	 *	 same path as long as the same scsi_pkt is used without
11951 	 *	 intervening scsi_dma_free(). Since uscsi command does
11952 	 *	 not call scsi_dmafree() before retry failed command, it
11953 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
11954 	 *	 set such that scsi_vhci can use other available path for
11955 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
11956 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
11957 	 */
11958 	pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
11959 	    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
11960 	    sizeof (struct scsi_arq_status), 0,
11961 	    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
11962 	    sdrunout, (caddr_t)un);
11963 
11964 	if (pktp == NULL) {
11965 		*pktpp = NULL;
11966 		/*
11967 		 * Set the driver state to RWAIT to indicate the driver
11968 		 * is waiting on resource allocations. The driver will not
11969 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
11970 		 */
11971 		New_state(un, SD_STATE_RWAIT);
11972 
11973 		SD_ERROR(SD_LOG_IO_CORE, un,
11974 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
11975 
11976 		if ((bp->b_flags & B_ERROR) != 0) {
11977 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
11978 		}
11979 		return (SD_PKT_ALLOC_FAILURE);
11980 	}
11981 
11982 	/*
11983 	 * We do not do DMA breakup for USCSI commands, so return failure
11984 	 * here if all the needed DMA resources were not allocated.
11985 	 */
11986 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
11987 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
11988 		scsi_destroy_pkt(pktp);
11989 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
11990 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
11991 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
11992 	}
11993 
11994 	/* Init the cdb from the given uscsi struct */
11995 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
11996 	    uscmd->uscsi_cdb[0], 0, 0, 0);
11997 
11998 	SD_FILL_SCSI1_LUN(un, pktp);
11999 
12000 	/*
12001 	 * Set up the optional USCSI flags. See the uscsi (7I) man page
12002 	 * for listing of the supported flags.
12003 	 */
12004 
12005 	if (uscmd->uscsi_flags & USCSI_SILENT) {
12006 		flags |= FLAG_SILENT;
12007 	}
12008 
12009 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
12010 		flags |= FLAG_DIAGNOSE;
12011 	}
12012 
12013 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
12014 		flags |= FLAG_ISOLATE;
12015 	}
12016 
12017 	if (un->un_f_is_fibre == FALSE) {
12018 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
12019 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
12020 		}
12021 	}
12022 
12023 	/*
12024 	 * Set the pkt flags here so we save time later.
12025 	 * Note: These flags are NOT in the uscsi man page!!!
12026 	 */
12027 	if (uscmd->uscsi_flags & USCSI_HEAD) {
12028 		flags |= FLAG_HEAD;
12029 	}
12030 
12031 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
12032 		flags |= FLAG_NOINTR;
12033 	}
12034 
12035 	/*
12036 	 * For tagged queueing, things get a bit complicated.
12037 	 * Check first for head of queue and last for ordered queue.
12038 	 * If neither head nor order, use the default driver tag flags.
12039 	 */
12040 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
12041 		if (uscmd->uscsi_flags & USCSI_HTAG) {
12042 			flags |= FLAG_HTAG;
12043 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
12044 			flags |= FLAG_OTAG;
12045 		} else {
12046 			flags |= un->un_tagflags & FLAG_TAGMASK;
12047 		}
12048 	}
12049 
12050 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
12051 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
12052 	}
12053 
12054 	pktp->pkt_flags = flags;
12055 
12056 	/* Copy the caller's CDB into the pkt... */
12057 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
12058 
12059 	if (uscmd->uscsi_timeout == 0) {
12060 		pktp->pkt_time = un->un_uscsi_timeout;
12061 	} else {
12062 		pktp->pkt_time = uscmd->uscsi_timeout;
12063 	}
12064 
12065 	/* need it later to identify USCSI request in sdintr */
12066 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
12067 
12068 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
12069 
12070 	pktp->pkt_private = bp;
12071 	pktp->pkt_comp = sdintr;
12072 	*pktpp = pktp;
12073 
12074 	SD_TRACE(SD_LOG_IO_CORE, un,
12075 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
12076 
12077 	return (SD_PKT_ALLOC_SUCCESS);
12078 }
12079 
12080 
12081 /*
12082  *    Function: sd_destroypkt_for_uscsi
12083  *
12084  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
12085  *		IOs.. Also saves relevant info into the associated uscsi_cmd
12086  *		struct.
12087  *
12088  *     Context: May be called under interrupt context
12089  */
12090 
12091 static void
12092 sd_destroypkt_for_uscsi(struct buf *bp)
12093 {
12094 	struct uscsi_cmd *uscmd;
12095 	struct sd_xbuf	*xp;
12096 	struct scsi_pkt	*pktp;
12097 	struct sd_lun	*un;
12098 
12099 	ASSERT(bp != NULL);
12100 	xp = SD_GET_XBUF(bp);
12101 	ASSERT(xp != NULL);
12102 	un = SD_GET_UN(bp);
12103 	ASSERT(un != NULL);
12104 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12105 	pktp = SD_GET_PKTP(bp);
12106 	ASSERT(pktp != NULL);
12107 
12108 	SD_TRACE(SD_LOG_IO_CORE, un,
12109 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
12110 
12111 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
12112 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
12113 	ASSERT(uscmd != NULL);
12114 
12115 	/* Save the status and the residual into the uscsi_cmd struct */
12116 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
12117 	uscmd->uscsi_resid  = bp->b_resid;
12118 
12119 	/*
12120 	 * If enabled, copy any saved sense data into the area specified
12121 	 * by the uscsi command.
12122 	 */
12123 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
12124 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
12125 		/*
12126 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
12127 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
12128 		 */
12129 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
12130 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
12131 		bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf, SENSE_LENGTH);
12132 	}
12133 
12134 	/* We are done with the scsi_pkt; free it now */
12135 	ASSERT(SD_GET_PKTP(bp) != NULL);
12136 	scsi_destroy_pkt(SD_GET_PKTP(bp));
12137 
12138 	SD_TRACE(SD_LOG_IO_CORE, un,
12139 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
12140 }
12141 
12142 
12143 /*
12144  *    Function: sd_bioclone_alloc
12145  *
12146  * Description: Allocate a buf(9S) and init it as per the given buf
12147  *		and the various arguments.  The associated sd_xbuf
12148  *		struct is (nearly) duplicated.  The struct buf *bp
12149  *		argument is saved in new_xp->xb_private.
12150  *
12151  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
12152  *		datalen - size of data area for the shadow bp
12153  *		blkno - starting LBA
12154  *		func - function pointer for b_iodone in the shadow buf. (May
12155  *			be NULL if none.)
12156  *
12157  * Return Code: Pointer to allocates buf(9S) struct
12158  *
12159  *     Context: Can sleep.
12160  */
12161 
12162 static struct buf *
12163 sd_bioclone_alloc(struct buf *bp, size_t datalen,
12164 	daddr_t blkno, int (*func)(struct buf *))
12165 {
12166 	struct	sd_lun	*un;
12167 	struct	sd_xbuf	*xp;
12168 	struct	sd_xbuf	*new_xp;
12169 	struct	buf	*new_bp;
12170 
12171 	ASSERT(bp != NULL);
12172 	xp = SD_GET_XBUF(bp);
12173 	ASSERT(xp != NULL);
12174 	un = SD_GET_UN(bp);
12175 	ASSERT(un != NULL);
12176 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12177 
12178 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
12179 	    NULL, KM_SLEEP);
12180 
12181 	new_bp->b_lblkno	= blkno;
12182 
12183 	/*
12184 	 * Allocate an xbuf for the shadow bp and copy the contents of the
12185 	 * original xbuf into it.
12186 	 */
12187 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
12188 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
12189 
12190 	/*
12191 	 * The given bp is automatically saved in the xb_private member
12192 	 * of the new xbuf.  Callers are allowed to depend on this.
12193 	 */
12194 	new_xp->xb_private = bp;
12195 
12196 	new_bp->b_private  = new_xp;
12197 
12198 	return (new_bp);
12199 }
12200 
12201 /*
12202  *    Function: sd_shadow_buf_alloc
12203  *
12204  * Description: Allocate a buf(9S) and init it as per the given buf
12205  *		and the various arguments.  The associated sd_xbuf
12206  *		struct is (nearly) duplicated.  The struct buf *bp
12207  *		argument is saved in new_xp->xb_private.
12208  *
12209  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
12210  *		datalen - size of data area for the shadow bp
12211  *		bflags - B_READ or B_WRITE (pseudo flag)
12212  *		blkno - starting LBA
12213  *		func - function pointer for b_iodone in the shadow buf. (May
12214  *			be NULL if none.)
12215  *
12216  * Return Code: Pointer to allocates buf(9S) struct
12217  *
12218  *     Context: Can sleep.
12219  */
12220 
12221 static struct buf *
12222 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
12223 	daddr_t blkno, int (*func)(struct buf *))
12224 {
12225 	struct	sd_lun	*un;
12226 	struct	sd_xbuf	*xp;
12227 	struct	sd_xbuf	*new_xp;
12228 	struct	buf	*new_bp;
12229 
12230 	ASSERT(bp != NULL);
12231 	xp = SD_GET_XBUF(bp);
12232 	ASSERT(xp != NULL);
12233 	un = SD_GET_UN(bp);
12234 	ASSERT(un != NULL);
12235 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12236 
12237 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
12238 		bp_mapin(bp);
12239 	}
12240 
12241 	bflags &= (B_READ | B_WRITE);
12242 #if defined(__i386) || defined(__amd64)
12243 	new_bp = getrbuf(KM_SLEEP);
12244 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
12245 	new_bp->b_bcount = datalen;
12246 	new_bp->b_flags = bflags |
12247 	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
12248 #else
12249 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
12250 	    datalen, bflags, SLEEP_FUNC, NULL);
12251 #endif
12252 	new_bp->av_forw	= NULL;
12253 	new_bp->av_back	= NULL;
12254 	new_bp->b_dev	= bp->b_dev;
12255 	new_bp->b_blkno	= blkno;
12256 	new_bp->b_iodone = func;
12257 	new_bp->b_edev	= bp->b_edev;
12258 	new_bp->b_resid	= 0;
12259 
12260 	/* We need to preserve the B_FAILFAST flag */
12261 	if (bp->b_flags & B_FAILFAST) {
12262 		new_bp->b_flags |= B_FAILFAST;
12263 	}
12264 
12265 	/*
12266 	 * Allocate an xbuf for the shadow bp and copy the contents of the
12267 	 * original xbuf into it.
12268 	 */
12269 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
12270 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
12271 
12272 	/* Need later to copy data between the shadow buf & original buf! */
12273 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
12274 
12275 	/*
12276 	 * The given bp is automatically saved in the xb_private member
12277 	 * of the new xbuf.  Callers are allowed to depend on this.
12278 	 */
12279 	new_xp->xb_private = bp;
12280 
12281 	new_bp->b_private  = new_xp;
12282 
12283 	return (new_bp);
12284 }
12285 
12286 /*
12287  *    Function: sd_bioclone_free
12288  *
12289  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
12290  *		in the larger than partition operation.
12291  *
12292  *     Context: May be called under interrupt context
12293  */
12294 
12295 static void
12296 sd_bioclone_free(struct buf *bp)
12297 {
12298 	struct sd_xbuf	*xp;
12299 
12300 	ASSERT(bp != NULL);
12301 	xp = SD_GET_XBUF(bp);
12302 	ASSERT(xp != NULL);
12303 
12304 	/*
12305 	 * Call bp_mapout() before freeing the buf,  in case a lower
12306 	 * layer or HBA  had done a bp_mapin().  we must do this here
12307 	 * as we are the "originator" of the shadow buf.
12308 	 */
12309 	bp_mapout(bp);
12310 
12311 	/*
12312 	 * Null out b_iodone before freeing the bp, to ensure that the driver
12313 	 * never gets confused by a stale value in this field. (Just a little
12314 	 * extra defensiveness here.)
12315 	 */
12316 	bp->b_iodone = NULL;
12317 
12318 	freerbuf(bp);
12319 
12320 	kmem_free(xp, sizeof (struct sd_xbuf));
12321 }
12322 
12323 /*
12324  *    Function: sd_shadow_buf_free
12325  *
12326  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
12327  *
12328  *     Context: May be called under interrupt context
12329  */
12330 
12331 static void
12332 sd_shadow_buf_free(struct buf *bp)
12333 {
12334 	struct sd_xbuf	*xp;
12335 
12336 	ASSERT(bp != NULL);
12337 	xp = SD_GET_XBUF(bp);
12338 	ASSERT(xp != NULL);
12339 
12340 #if defined(__sparc)
12341 	/*
12342 	 * Call bp_mapout() before freeing the buf,  in case a lower
12343 	 * layer or HBA  had done a bp_mapin().  we must do this here
12344 	 * as we are the "originator" of the shadow buf.
12345 	 */
12346 	bp_mapout(bp);
12347 #endif
12348 
12349 	/*
12350 	 * Null out b_iodone before freeing the bp, to ensure that the driver
12351 	 * never gets confused by a stale value in this field. (Just a little
12352 	 * extra defensiveness here.)
12353 	 */
12354 	bp->b_iodone = NULL;
12355 
12356 #if defined(__i386) || defined(__amd64)
12357 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
12358 	freerbuf(bp);
12359 #else
12360 	scsi_free_consistent_buf(bp);
12361 #endif
12362 
12363 	kmem_free(xp, sizeof (struct sd_xbuf));
12364 }
12365 
12366 
12367 /*
12368  *    Function: sd_print_transport_rejected_message
12369  *
12370  * Description: This implements the ludicrously complex rules for printing
12371  *		a "transport rejected" message.  This is to address the
12372  *		specific problem of having a flood of this error message
12373  *		produced when a failover occurs.
12374  *
12375  *     Context: Any.
12376  */
12377 
12378 static void
12379 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
12380 	int code)
12381 {
12382 	ASSERT(un != NULL);
12383 	ASSERT(mutex_owned(SD_MUTEX(un)));
12384 	ASSERT(xp != NULL);
12385 
12386 	/*
12387 	 * Print the "transport rejected" message under the following
12388 	 * conditions:
12389 	 *
12390 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
12391 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
12392 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
12393 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
12394 	 *   scsi_transport(9F) (which indicates that the target might have
12395 	 *   gone off-line).  This uses the un->un_tran_fatal_count
12396 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
12397 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
12398 	 *   from scsi_transport().
12399 	 *
12400 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
12401 	 * the preceeding cases in order for the message to be printed.
12402 	 */
12403 	if ((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) {
12404 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
12405 		    (code != TRAN_FATAL_ERROR) ||
12406 		    (un->un_tran_fatal_count == 1)) {
12407 			switch (code) {
12408 			case TRAN_BADPKT:
12409 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12410 				    "transport rejected bad packet\n");
12411 				break;
12412 			case TRAN_FATAL_ERROR:
12413 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12414 				    "transport rejected fatal error\n");
12415 				break;
12416 			default:
12417 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12418 				    "transport rejected (%d)\n", code);
12419 				break;
12420 			}
12421 		}
12422 	}
12423 }
12424 
12425 
12426 /*
12427  *    Function: sd_add_buf_to_waitq
12428  *
12429  * Description: Add the given buf(9S) struct to the wait queue for the
12430  *		instance.  If sorting is enabled, then the buf is added
12431  *		to the queue via an elevator sort algorithm (a la
12432  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
12433  *		If sorting is not enabled, then the buf is just added
12434  *		to the end of the wait queue.
12435  *
12436  * Return Code: void
12437  *
12438  *     Context: Does not sleep/block, therefore technically can be called
12439  *		from any context.  However if sorting is enabled then the
12440  *		execution time is indeterminate, and may take long if
12441  *		the wait queue grows large.
12442  */
12443 
12444 static void
12445 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
12446 {
12447 	struct buf *ap;
12448 
12449 	ASSERT(bp != NULL);
12450 	ASSERT(un != NULL);
12451 	ASSERT(mutex_owned(SD_MUTEX(un)));
12452 
12453 	/* If the queue is empty, add the buf as the only entry & return. */
12454 	if (un->un_waitq_headp == NULL) {
12455 		ASSERT(un->un_waitq_tailp == NULL);
12456 		un->un_waitq_headp = un->un_waitq_tailp = bp;
12457 		bp->av_forw = NULL;
12458 		return;
12459 	}
12460 
12461 	ASSERT(un->un_waitq_tailp != NULL);
12462 
12463 	/*
12464 	 * If sorting is disabled, just add the buf to the tail end of
12465 	 * the wait queue and return.
12466 	 */
12467 	if (un->un_f_disksort_disabled) {
12468 		un->un_waitq_tailp->av_forw = bp;
12469 		un->un_waitq_tailp = bp;
12470 		bp->av_forw = NULL;
12471 		return;
12472 	}
12473 
12474 	/*
12475 	 * Sort thru the list of requests currently on the wait queue
12476 	 * and add the new buf request at the appropriate position.
12477 	 *
12478 	 * The un->un_waitq_headp is an activity chain pointer on which
12479 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
12480 	 * first queue holds those requests which are positioned after
12481 	 * the current SD_GET_BLKNO() (in the first request); the second holds
12482 	 * requests which came in after their SD_GET_BLKNO() number was passed.
12483 	 * Thus we implement a one way scan, retracting after reaching
12484 	 * the end of the drive to the first request on the second
12485 	 * queue, at which time it becomes the first queue.
12486 	 * A one-way scan is natural because of the way UNIX read-ahead
12487 	 * blocks are allocated.
12488 	 *
12489 	 * If we lie after the first request, then we must locate the
12490 	 * second request list and add ourselves to it.
12491 	 */
12492 	ap = un->un_waitq_headp;
12493 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
12494 		while (ap->av_forw != NULL) {
12495 			/*
12496 			 * Look for an "inversion" in the (normally
12497 			 * ascending) block numbers. This indicates
12498 			 * the start of the second request list.
12499 			 */
12500 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
12501 				/*
12502 				 * Search the second request list for the
12503 				 * first request at a larger block number.
12504 				 * We go before that; however if there is
12505 				 * no such request, we go at the end.
12506 				 */
12507 				do {
12508 					if (SD_GET_BLKNO(bp) <
12509 					    SD_GET_BLKNO(ap->av_forw)) {
12510 						goto insert;
12511 					}
12512 					ap = ap->av_forw;
12513 				} while (ap->av_forw != NULL);
12514 				goto insert;		/* after last */
12515 			}
12516 			ap = ap->av_forw;
12517 		}
12518 
12519 		/*
12520 		 * No inversions... we will go after the last, and
12521 		 * be the first request in the second request list.
12522 		 */
12523 		goto insert;
12524 	}
12525 
12526 	/*
12527 	 * Request is at/after the current request...
12528 	 * sort in the first request list.
12529 	 */
12530 	while (ap->av_forw != NULL) {
12531 		/*
12532 		 * We want to go after the current request (1) if
12533 		 * there is an inversion after it (i.e. it is the end
12534 		 * of the first request list), or (2) if the next
12535 		 * request is a larger block no. than our request.
12536 		 */
12537 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
12538 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
12539 			goto insert;
12540 		}
12541 		ap = ap->av_forw;
12542 	}
12543 
12544 	/*
12545 	 * Neither a second list nor a larger request, therefore
12546 	 * we go at the end of the first list (which is the same
12547 	 * as the end of the whole schebang).
12548 	 */
12549 insert:
12550 	bp->av_forw = ap->av_forw;
12551 	ap->av_forw = bp;
12552 
12553 	/*
12554 	 * If we inserted onto the tail end of the waitq, make sure the
12555 	 * tail pointer is updated.
12556 	 */
12557 	if (ap == un->un_waitq_tailp) {
12558 		un->un_waitq_tailp = bp;
12559 	}
12560 }
12561 
12562 
12563 /*
12564  *    Function: sd_start_cmds
12565  *
12566  * Description: Remove and transport cmds from the driver queues.
12567  *
12568  *   Arguments: un - pointer to the unit (soft state) struct for the target.
12569  *
12570  *		immed_bp - ptr to a buf to be transported immediately. Only
12571  *		the immed_bp is transported; bufs on the waitq are not
12572  *		processed and the un_retry_bp is not checked.  If immed_bp is
12573  *		NULL, then normal queue processing is performed.
12574  *
12575  *     Context: May be called from kernel thread context, interrupt context,
12576  *		or runout callback context. This function may not block or
12577  *		call routines that block.
12578  */
12579 
12580 static void
12581 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
12582 {
12583 	struct	sd_xbuf	*xp;
12584 	struct	buf	*bp;
12585 	void	(*statp)(kstat_io_t *);
12586 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12587 	void	(*saved_statp)(kstat_io_t *);
12588 #endif
12589 	int	rval;
12590 
12591 	ASSERT(un != NULL);
12592 	ASSERT(mutex_owned(SD_MUTEX(un)));
12593 	ASSERT(un->un_ncmds_in_transport >= 0);
12594 	ASSERT(un->un_throttle >= 0);
12595 
12596 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
12597 
12598 	do {
12599 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12600 		saved_statp = NULL;
12601 #endif
12602 
12603 		/*
12604 		 * If we are syncing or dumping, fail the command to
12605 		 * avoid recursively calling back into scsi_transport().
12606 		 * The dump I/O itself uses a separate code path so this
12607 		 * only prevents non-dump I/O from being sent while dumping.
12608 		 * File system sync takes place before dumping begins.
12609 		 * During panic, filesystem I/O is allowed provided
12610 		 * un_in_callback is <= 1.  This is to prevent recursion
12611 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
12612 		 * sd_start_cmds and so on.  See panic.c for more information
12613 		 * about the states the system can be in during panic.
12614 		 */
12615 		if ((un->un_state == SD_STATE_DUMPING) ||
12616 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
12617 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12618 			    "sd_start_cmds: panicking\n");
12619 			goto exit;
12620 		}
12621 
12622 		if ((bp = immed_bp) != NULL) {
12623 			/*
12624 			 * We have a bp that must be transported immediately.
12625 			 * It's OK to transport the immed_bp here without doing
12626 			 * the throttle limit check because the immed_bp is
12627 			 * always used in a retry/recovery case. This means
12628 			 * that we know we are not at the throttle limit by
12629 			 * virtue of the fact that to get here we must have
12630 			 * already gotten a command back via sdintr(). This also
12631 			 * relies on (1) the command on un_retry_bp preventing
12632 			 * further commands from the waitq from being issued;
12633 			 * and (2) the code in sd_retry_command checking the
12634 			 * throttle limit before issuing a delayed or immediate
12635 			 * retry. This holds even if the throttle limit is
12636 			 * currently ratcheted down from its maximum value.
12637 			 */
12638 			statp = kstat_runq_enter;
12639 			if (bp == un->un_retry_bp) {
12640 				ASSERT((un->un_retry_statp == NULL) ||
12641 				    (un->un_retry_statp == kstat_waitq_enter) ||
12642 				    (un->un_retry_statp ==
12643 				    kstat_runq_back_to_waitq));
12644 				/*
12645 				 * If the waitq kstat was incremented when
12646 				 * sd_set_retry_bp() queued this bp for a retry,
12647 				 * then we must set up statp so that the waitq
12648 				 * count will get decremented correctly below.
12649 				 * Also we must clear un->un_retry_statp to
12650 				 * ensure that we do not act on a stale value
12651 				 * in this field.
12652 				 */
12653 				if ((un->un_retry_statp == kstat_waitq_enter) ||
12654 				    (un->un_retry_statp ==
12655 				    kstat_runq_back_to_waitq)) {
12656 					statp = kstat_waitq_to_runq;
12657 				}
12658 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12659 				saved_statp = un->un_retry_statp;
12660 #endif
12661 				un->un_retry_statp = NULL;
12662 
12663 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
12664 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
12665 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
12666 				    un, un->un_retry_bp, un->un_throttle,
12667 				    un->un_ncmds_in_transport);
12668 			} else {
12669 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
12670 				    "processing priority bp:0x%p\n", bp);
12671 			}
12672 
12673 		} else if ((bp = un->un_waitq_headp) != NULL) {
12674 			/*
12675 			 * A command on the waitq is ready to go, but do not
12676 			 * send it if:
12677 			 *
12678 			 * (1) the throttle limit has been reached, or
12679 			 * (2) a retry is pending, or
12680 			 * (3) a START_STOP_UNIT callback pending, or
12681 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
12682 			 *	command is pending.
12683 			 *
12684 			 * For all of these conditions, IO processing will
12685 			 * restart after the condition is cleared.
12686 			 */
12687 			if (un->un_ncmds_in_transport >= un->un_throttle) {
12688 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12689 				    "sd_start_cmds: exiting, "
12690 				    "throttle limit reached!\n");
12691 				goto exit;
12692 			}
12693 			if (un->un_retry_bp != NULL) {
12694 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12695 				    "sd_start_cmds: exiting, retry pending!\n");
12696 				goto exit;
12697 			}
12698 			if (un->un_startstop_timeid != NULL) {
12699 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12700 				    "sd_start_cmds: exiting, "
12701 				    "START_STOP pending!\n");
12702 				goto exit;
12703 			}
12704 			if (un->un_direct_priority_timeid != NULL) {
12705 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12706 				    "sd_start_cmds: exiting, "
12707 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
12708 				goto exit;
12709 			}
12710 
12711 			/* Dequeue the command */
12712 			un->un_waitq_headp = bp->av_forw;
12713 			if (un->un_waitq_headp == NULL) {
12714 				un->un_waitq_tailp = NULL;
12715 			}
12716 			bp->av_forw = NULL;
12717 			statp = kstat_waitq_to_runq;
12718 			SD_TRACE(SD_LOG_IO_CORE, un,
12719 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
12720 
12721 		} else {
12722 			/* No work to do so bail out now */
12723 			SD_TRACE(SD_LOG_IO_CORE, un,
12724 			    "sd_start_cmds: no more work, exiting!\n");
12725 			goto exit;
12726 		}
12727 
12728 		/*
12729 		 * Reset the state to normal. This is the mechanism by which
12730 		 * the state transitions from either SD_STATE_RWAIT or
12731 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
12732 		 * If state is SD_STATE_PM_CHANGING then this command is
12733 		 * part of the device power control and the state must
12734 		 * not be put back to normal. Doing so would would
12735 		 * allow new commands to proceed when they shouldn't,
12736 		 * the device may be going off.
12737 		 */
12738 		if ((un->un_state != SD_STATE_SUSPENDED) &&
12739 		    (un->un_state != SD_STATE_PM_CHANGING)) {
12740 			New_state(un, SD_STATE_NORMAL);
12741 		    }
12742 
12743 		xp = SD_GET_XBUF(bp);
12744 		ASSERT(xp != NULL);
12745 
12746 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12747 		/*
12748 		 * Allocate the scsi_pkt if we need one, or attach DMA
12749 		 * resources if we have a scsi_pkt that needs them. The
12750 		 * latter should only occur for commands that are being
12751 		 * retried.
12752 		 */
12753 		if ((xp->xb_pktp == NULL) ||
12754 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
12755 #else
12756 		if (xp->xb_pktp == NULL) {
12757 #endif
12758 			/*
12759 			 * There is no scsi_pkt allocated for this buf. Call
12760 			 * the initpkt function to allocate & init one.
12761 			 *
12762 			 * The scsi_init_pkt runout callback functionality is
12763 			 * implemented as follows:
12764 			 *
12765 			 * 1) The initpkt function always calls
12766 			 *    scsi_init_pkt(9F) with sdrunout specified as the
12767 			 *    callback routine.
12768 			 * 2) A successful packet allocation is initialized and
12769 			 *    the I/O is transported.
12770 			 * 3) The I/O associated with an allocation resource
12771 			 *    failure is left on its queue to be retried via
12772 			 *    runout or the next I/O.
12773 			 * 4) The I/O associated with a DMA error is removed
12774 			 *    from the queue and failed with EIO. Processing of
12775 			 *    the transport queues is also halted to be
12776 			 *    restarted via runout or the next I/O.
12777 			 * 5) The I/O associated with a CDB size or packet
12778 			 *    size error is removed from the queue and failed
12779 			 *    with EIO. Processing of the transport queues is
12780 			 *    continued.
12781 			 *
12782 			 * Note: there is no interface for canceling a runout
12783 			 * callback. To prevent the driver from detaching or
12784 			 * suspending while a runout is pending the driver
12785 			 * state is set to SD_STATE_RWAIT
12786 			 *
12787 			 * Note: using the scsi_init_pkt callback facility can
12788 			 * result in an I/O request persisting at the head of
12789 			 * the list which cannot be satisfied even after
12790 			 * multiple retries. In the future the driver may
12791 			 * implement some kind of maximum runout count before
12792 			 * failing an I/O.
12793 			 *
12794 			 * Note: the use of funcp below may seem superfluous,
12795 			 * but it helps warlock figure out the correct
12796 			 * initpkt function calls (see [s]sd.wlcmd).
12797 			 */
12798 			struct scsi_pkt	*pktp;
12799 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
12800 
12801 			ASSERT(bp != un->un_rqs_bp);
12802 
12803 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
12804 			switch ((*funcp)(bp, &pktp)) {
12805 			case  SD_PKT_ALLOC_SUCCESS:
12806 				xp->xb_pktp = pktp;
12807 				SD_TRACE(SD_LOG_IO_CORE, un,
12808 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
12809 				    pktp);
12810 				goto got_pkt;
12811 
12812 			case SD_PKT_ALLOC_FAILURE:
12813 				/*
12814 				 * Temporary (hopefully) resource depletion.
12815 				 * Since retries and RQS commands always have a
12816 				 * scsi_pkt allocated, these cases should never
12817 				 * get here. So the only cases this needs to
12818 				 * handle is a bp from the waitq (which we put
12819 				 * back onto the waitq for sdrunout), or a bp
12820 				 * sent as an immed_bp (which we just fail).
12821 				 */
12822 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12823 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
12824 
12825 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12826 
12827 				if (bp == immed_bp) {
12828 					/*
12829 					 * If SD_XB_DMA_FREED is clear, then
12830 					 * this is a failure to allocate a
12831 					 * scsi_pkt, and we must fail the
12832 					 * command.
12833 					 */
12834 					if ((xp->xb_pkt_flags &
12835 					    SD_XB_DMA_FREED) == 0) {
12836 						break;
12837 					}
12838 
12839 					/*
12840 					 * If this immediate command is NOT our
12841 					 * un_retry_bp, then we must fail it.
12842 					 */
12843 					if (bp != un->un_retry_bp) {
12844 						break;
12845 					}
12846 
12847 					/*
12848 					 * We get here if this cmd is our
12849 					 * un_retry_bp that was DMAFREED, but
12850 					 * scsi_init_pkt() failed to reallocate
12851 					 * DMA resources when we attempted to
12852 					 * retry it. This can happen when an
12853 					 * mpxio failover is in progress, but
12854 					 * we don't want to just fail the
12855 					 * command in this case.
12856 					 *
12857 					 * Use timeout(9F) to restart it after
12858 					 * a 100ms delay.  We don't want to
12859 					 * let sdrunout() restart it, because
12860 					 * sdrunout() is just supposed to start
12861 					 * commands that are sitting on the
12862 					 * wait queue.  The un_retry_bp stays
12863 					 * set until the command completes, but
12864 					 * sdrunout can be called many times
12865 					 * before that happens.  Since sdrunout
12866 					 * cannot tell if the un_retry_bp is
12867 					 * already in the transport, it could
12868 					 * end up calling scsi_transport() for
12869 					 * the un_retry_bp multiple times.
12870 					 *
12871 					 * Also: don't schedule the callback
12872 					 * if some other callback is already
12873 					 * pending.
12874 					 */
12875 					if (un->un_retry_statp == NULL) {
12876 						/*
12877 						 * restore the kstat pointer to
12878 						 * keep kstat counts coherent
12879 						 * when we do retry the command.
12880 						 */
12881 						un->un_retry_statp =
12882 						    saved_statp;
12883 					}
12884 
12885 					if ((un->un_startstop_timeid == NULL) &&
12886 					    (un->un_retry_timeid == NULL) &&
12887 					    (un->un_direct_priority_timeid ==
12888 					    NULL)) {
12889 
12890 						un->un_retry_timeid =
12891 						    timeout(
12892 						    sd_start_retry_command,
12893 						    un, SD_RESTART_TIMEOUT);
12894 					}
12895 					goto exit;
12896 				}
12897 
12898 #else
12899 				if (bp == immed_bp) {
12900 					break;	/* Just fail the command */
12901 				}
12902 #endif
12903 
12904 				/* Add the buf back to the head of the waitq */
12905 				bp->av_forw = un->un_waitq_headp;
12906 				un->un_waitq_headp = bp;
12907 				if (un->un_waitq_tailp == NULL) {
12908 					un->un_waitq_tailp = bp;
12909 				}
12910 				goto exit;
12911 
12912 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
12913 				/*
12914 				 * HBA DMA resource failure. Fail the command
12915 				 * and continue processing of the queues.
12916 				 */
12917 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12918 				    "sd_start_cmds: "
12919 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
12920 				break;
12921 
12922 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
12923 				/*
12924 				 * Note:x86: Partial DMA mapping not supported
12925 				 * for USCSI commands, and all the needed DMA
12926 				 * resources were not allocated.
12927 				 */
12928 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12929 				    "sd_start_cmds: "
12930 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
12931 				break;
12932 
12933 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
12934 				/*
12935 				 * Note:x86: Request cannot fit into CDB based
12936 				 * on lba and len.
12937 				 */
12938 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12939 				    "sd_start_cmds: "
12940 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
12941 				break;
12942 
12943 			default:
12944 				/* Should NEVER get here! */
12945 				panic("scsi_initpkt error");
12946 				/*NOTREACHED*/
12947 			}
12948 
12949 			/*
12950 			 * Fatal error in allocating a scsi_pkt for this buf.
12951 			 * Update kstats & return the buf with an error code.
12952 			 * We must use sd_return_failed_command_no_restart() to
12953 			 * avoid a recursive call back into sd_start_cmds().
12954 			 * However this also means that we must keep processing
12955 			 * the waitq here in order to avoid stalling.
12956 			 */
12957 			if (statp == kstat_waitq_to_runq) {
12958 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
12959 			}
12960 			sd_return_failed_command_no_restart(un, bp, EIO);
12961 			if (bp == immed_bp) {
12962 				/* immed_bp is gone by now, so clear this */
12963 				immed_bp = NULL;
12964 			}
12965 			continue;
12966 		}
12967 got_pkt:
12968 		if (bp == immed_bp) {
12969 			/* goto the head of the class.... */
12970 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
12971 		}
12972 
12973 		un->un_ncmds_in_transport++;
12974 		SD_UPDATE_KSTATS(un, statp, bp);
12975 
12976 		/*
12977 		 * Call scsi_transport() to send the command to the target.
12978 		 * According to SCSA architecture, we must drop the mutex here
12979 		 * before calling scsi_transport() in order to avoid deadlock.
12980 		 * Note that the scsi_pkt's completion routine can be executed
12981 		 * (from interrupt context) even before the call to
12982 		 * scsi_transport() returns.
12983 		 */
12984 		SD_TRACE(SD_LOG_IO_CORE, un,
12985 		    "sd_start_cmds: calling scsi_transport()\n");
12986 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
12987 
12988 		mutex_exit(SD_MUTEX(un));
12989 		rval = scsi_transport(xp->xb_pktp);
12990 		mutex_enter(SD_MUTEX(un));
12991 
12992 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12993 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
12994 
12995 		switch (rval) {
12996 		case TRAN_ACCEPT:
12997 			/* Clear this with every pkt accepted by the HBA */
12998 			un->un_tran_fatal_count = 0;
12999 			break;	/* Success; try the next cmd (if any) */
13000 
13001 		case TRAN_BUSY:
13002 			un->un_ncmds_in_transport--;
13003 			ASSERT(un->un_ncmds_in_transport >= 0);
13004 
13005 			/*
13006 			 * Don't retry request sense, the sense data
13007 			 * is lost when another request is sent.
13008 			 * Free up the rqs buf and retry
13009 			 * the original failed cmd.  Update kstat.
13010 			 */
13011 			if (bp == un->un_rqs_bp) {
13012 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
13013 				bp = sd_mark_rqs_idle(un, xp);
13014 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
13015 					NULL, NULL, EIO, SD_BSY_TIMEOUT / 500,
13016 					kstat_waitq_enter);
13017 				goto exit;
13018 			}
13019 
13020 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13021 			/*
13022 			 * Free the DMA resources for the  scsi_pkt. This will
13023 			 * allow mpxio to select another path the next time
13024 			 * we call scsi_transport() with this scsi_pkt.
13025 			 * See sdintr() for the rationalization behind this.
13026 			 */
13027 			if ((un->un_f_is_fibre == TRUE) &&
13028 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
13029 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
13030 				scsi_dmafree(xp->xb_pktp);
13031 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
13032 			}
13033 #endif
13034 
13035 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
13036 				/*
13037 				 * Commands that are SD_PATH_DIRECT_PRIORITY
13038 				 * are for error recovery situations. These do
13039 				 * not use the normal command waitq, so if they
13040 				 * get a TRAN_BUSY we cannot put them back onto
13041 				 * the waitq for later retry. One possible
13042 				 * problem is that there could already be some
13043 				 * other command on un_retry_bp that is waiting
13044 				 * for this one to complete, so we would be
13045 				 * deadlocked if we put this command back onto
13046 				 * the waitq for later retry (since un_retry_bp
13047 				 * must complete before the driver gets back to
13048 				 * commands on the waitq).
13049 				 *
13050 				 * To avoid deadlock we must schedule a callback
13051 				 * that will restart this command after a set
13052 				 * interval.  This should keep retrying for as
13053 				 * long as the underlying transport keeps
13054 				 * returning TRAN_BUSY (just like for other
13055 				 * commands).  Use the same timeout interval as
13056 				 * for the ordinary TRAN_BUSY retry.
13057 				 */
13058 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13059 				    "sd_start_cmds: scsi_transport() returned "
13060 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
13061 
13062 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
13063 				un->un_direct_priority_timeid =
13064 				    timeout(sd_start_direct_priority_command,
13065 				    bp, SD_BSY_TIMEOUT / 500);
13066 
13067 				goto exit;
13068 			}
13069 
13070 			/*
13071 			 * For TRAN_BUSY, we want to reduce the throttle value,
13072 			 * unless we are retrying a command.
13073 			 */
13074 			if (bp != un->un_retry_bp) {
13075 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
13076 			}
13077 
13078 			/*
13079 			 * Set up the bp to be tried again 10 ms later.
13080 			 * Note:x86: Is there a timeout value in the sd_lun
13081 			 * for this condition?
13082 			 */
13083 			sd_set_retry_bp(un, bp, SD_BSY_TIMEOUT / 500,
13084 				kstat_runq_back_to_waitq);
13085 			goto exit;
13086 
13087 		case TRAN_FATAL_ERROR:
13088 			un->un_tran_fatal_count++;
13089 			/* FALLTHRU */
13090 
13091 		case TRAN_BADPKT:
13092 		default:
13093 			un->un_ncmds_in_transport--;
13094 			ASSERT(un->un_ncmds_in_transport >= 0);
13095 
13096 			/*
13097 			 * If this is our REQUEST SENSE command with a
13098 			 * transport error, we must get back the pointers
13099 			 * to the original buf, and mark the REQUEST
13100 			 * SENSE command as "available".
13101 			 */
13102 			if (bp == un->un_rqs_bp) {
13103 				bp = sd_mark_rqs_idle(un, xp);
13104 				xp = SD_GET_XBUF(bp);
13105 			} else {
13106 				/*
13107 				 * Legacy behavior: do not update transport
13108 				 * error count for request sense commands.
13109 				 */
13110 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
13111 			}
13112 
13113 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
13114 			sd_print_transport_rejected_message(un, xp, rval);
13115 
13116 			/*
13117 			 * We must use sd_return_failed_command_no_restart() to
13118 			 * avoid a recursive call back into sd_start_cmds().
13119 			 * However this also means that we must keep processing
13120 			 * the waitq here in order to avoid stalling.
13121 			 */
13122 			sd_return_failed_command_no_restart(un, bp, EIO);
13123 
13124 			/*
13125 			 * Notify any threads waiting in sd_ddi_suspend() that
13126 			 * a command completion has occurred.
13127 			 */
13128 			if (un->un_state == SD_STATE_SUSPENDED) {
13129 				cv_broadcast(&un->un_disk_busy_cv);
13130 			}
13131 
13132 			if (bp == immed_bp) {
13133 				/* immed_bp is gone by now, so clear this */
13134 				immed_bp = NULL;
13135 			}
13136 			break;
13137 		}
13138 
13139 	} while (immed_bp == NULL);
13140 
13141 exit:
13142 	ASSERT(mutex_owned(SD_MUTEX(un)));
13143 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
13144 }
13145 
13146 
13147 /*
13148  *    Function: sd_return_command
13149  *
13150  * Description: Returns a command to its originator (with or without an
13151  *		error).  Also starts commands waiting to be transported
13152  *		to the target.
13153  *
13154  *     Context: May be called from interrupt, kernel, or timeout context
13155  */
13156 
13157 static void
13158 sd_return_command(struct sd_lun *un, struct buf *bp)
13159 {
13160 	struct sd_xbuf *xp;
13161 #if defined(__i386) || defined(__amd64)
13162 	struct scsi_pkt *pktp;
13163 #endif
13164 
13165 	ASSERT(bp != NULL);
13166 	ASSERT(un != NULL);
13167 	ASSERT(mutex_owned(SD_MUTEX(un)));
13168 	ASSERT(bp != un->un_rqs_bp);
13169 	xp = SD_GET_XBUF(bp);
13170 	ASSERT(xp != NULL);
13171 
13172 #if defined(__i386) || defined(__amd64)
13173 	pktp = SD_GET_PKTP(bp);
13174 #endif
13175 
13176 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
13177 
13178 #if defined(__i386) || defined(__amd64)
13179 	/*
13180 	 * Note:x86: check for the "sdrestart failed" case.
13181 	 */
13182 	if (((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
13183 		(geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
13184 		(xp->xb_pktp->pkt_resid == 0)) {
13185 
13186 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
13187 			/*
13188 			 * Successfully set up next portion of cmd
13189 			 * transfer, try sending it
13190 			 */
13191 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
13192 			    NULL, NULL, 0, (clock_t)0, NULL);
13193 			sd_start_cmds(un, NULL);
13194 			return;	/* Note:x86: need a return here? */
13195 		}
13196 	}
13197 #endif
13198 
13199 	/*
13200 	 * If this is the failfast bp, clear it from un_failfast_bp. This
13201 	 * can happen if upon being re-tried the failfast bp either
13202 	 * succeeded or encountered another error (possibly even a different
13203 	 * error than the one that precipitated the failfast state, but in
13204 	 * that case it would have had to exhaust retries as well). Regardless,
13205 	 * this should not occur whenever the instance is in the active
13206 	 * failfast state.
13207 	 */
13208 	if (bp == un->un_failfast_bp) {
13209 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
13210 		un->un_failfast_bp = NULL;
13211 	}
13212 
13213 	/*
13214 	 * Clear the failfast state upon successful completion of ANY cmd.
13215 	 */
13216 	if (bp->b_error == 0) {
13217 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
13218 	}
13219 
13220 	/*
13221 	 * This is used if the command was retried one or more times. Show that
13222 	 * we are done with it, and allow processing of the waitq to resume.
13223 	 */
13224 	if (bp == un->un_retry_bp) {
13225 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13226 		    "sd_return_command: un:0x%p: "
13227 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
13228 		un->un_retry_bp = NULL;
13229 		un->un_retry_statp = NULL;
13230 	}
13231 
13232 	SD_UPDATE_RDWR_STATS(un, bp);
13233 	SD_UPDATE_PARTITION_STATS(un, bp);
13234 
13235 	switch (un->un_state) {
13236 	case SD_STATE_SUSPENDED:
13237 		/*
13238 		 * Notify any threads waiting in sd_ddi_suspend() that
13239 		 * a command completion has occurred.
13240 		 */
13241 		cv_broadcast(&un->un_disk_busy_cv);
13242 		break;
13243 	default:
13244 		sd_start_cmds(un, NULL);
13245 		break;
13246 	}
13247 
13248 	/* Return this command up the iodone chain to its originator. */
13249 	mutex_exit(SD_MUTEX(un));
13250 
13251 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
13252 	xp->xb_pktp = NULL;
13253 
13254 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
13255 
13256 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13257 	mutex_enter(SD_MUTEX(un));
13258 
13259 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
13260 }
13261 
13262 
13263 /*
13264  *    Function: sd_return_failed_command
13265  *
13266  * Description: Command completion when an error occurred.
13267  *
13268  *     Context: May be called from interrupt context
13269  */
13270 
13271 static void
13272 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
13273 {
13274 	ASSERT(bp != NULL);
13275 	ASSERT(un != NULL);
13276 	ASSERT(mutex_owned(SD_MUTEX(un)));
13277 
13278 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13279 	    "sd_return_failed_command: entry\n");
13280 
13281 	/*
13282 	 * b_resid could already be nonzero due to a partial data
13283 	 * transfer, so do not change it here.
13284 	 */
13285 	SD_BIOERROR(bp, errcode);
13286 
13287 	sd_return_command(un, bp);
13288 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13289 	    "sd_return_failed_command: exit\n");
13290 }
13291 
13292 
13293 /*
13294  *    Function: sd_return_failed_command_no_restart
13295  *
13296  * Description: Same as sd_return_failed_command, but ensures that no
13297  *		call back into sd_start_cmds will be issued.
13298  *
13299  *     Context: May be called from interrupt context
13300  */
13301 
13302 static void
13303 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
13304 	int errcode)
13305 {
13306 	struct sd_xbuf *xp;
13307 
13308 	ASSERT(bp != NULL);
13309 	ASSERT(un != NULL);
13310 	ASSERT(mutex_owned(SD_MUTEX(un)));
13311 	xp = SD_GET_XBUF(bp);
13312 	ASSERT(xp != NULL);
13313 	ASSERT(errcode != 0);
13314 
13315 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13316 	    "sd_return_failed_command_no_restart: entry\n");
13317 
13318 	/*
13319 	 * b_resid could already be nonzero due to a partial data
13320 	 * transfer, so do not change it here.
13321 	 */
13322 	SD_BIOERROR(bp, errcode);
13323 
13324 	/*
13325 	 * If this is the failfast bp, clear it. This can happen if the
13326 	 * failfast bp encounterd a fatal error when we attempted to
13327 	 * re-try it (such as a scsi_transport(9F) failure).  However
13328 	 * we should NOT be in an active failfast state if the failfast
13329 	 * bp is not NULL.
13330 	 */
13331 	if (bp == un->un_failfast_bp) {
13332 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
13333 		un->un_failfast_bp = NULL;
13334 	}
13335 
13336 	if (bp == un->un_retry_bp) {
13337 		/*
13338 		 * This command was retried one or more times. Show that we are
13339 		 * done with it, and allow processing of the waitq to resume.
13340 		 */
13341 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13342 		    "sd_return_failed_command_no_restart: "
13343 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
13344 		un->un_retry_bp = NULL;
13345 		un->un_retry_statp = NULL;
13346 	}
13347 
13348 	SD_UPDATE_RDWR_STATS(un, bp);
13349 	SD_UPDATE_PARTITION_STATS(un, bp);
13350 
13351 	mutex_exit(SD_MUTEX(un));
13352 
13353 	if (xp->xb_pktp != NULL) {
13354 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
13355 		xp->xb_pktp = NULL;
13356 	}
13357 
13358 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
13359 
13360 	mutex_enter(SD_MUTEX(un));
13361 
13362 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13363 	    "sd_return_failed_command_no_restart: exit\n");
13364 }
13365 
13366 
13367 /*
13368  *    Function: sd_retry_command
13369  *
13370  * Description: queue up a command for retry, or (optionally) fail it
13371  *		if retry counts are exhausted.
13372  *
13373  *   Arguments: un - Pointer to the sd_lun struct for the target.
13374  *
13375  *		bp - Pointer to the buf for the command to be retried.
13376  *
13377  *		retry_check_flag - Flag to see which (if any) of the retry
13378  *		   counts should be decremented/checked. If the indicated
13379  *		   retry count is exhausted, then the command will not be
13380  *		   retried; it will be failed instead. This should use a
13381  *		   value equal to one of the following:
13382  *
13383  *			SD_RETRIES_NOCHECK
13384  *			SD_RESD_RETRIES_STANDARD
13385  *			SD_RETRIES_VICTIM
13386  *
13387  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
13388  *		   if the check should be made to see of FLAG_ISOLATE is set
13389  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
13390  *		   not retried, it is simply failed.
13391  *
13392  *		user_funcp - Ptr to function to call before dispatching the
13393  *		   command. May be NULL if no action needs to be performed.
13394  *		   (Primarily intended for printing messages.)
13395  *
13396  *		user_arg - Optional argument to be passed along to
13397  *		   the user_funcp call.
13398  *
13399  *		failure_code - errno return code to set in the bp if the
13400  *		   command is going to be failed.
13401  *
13402  *		retry_delay - Retry delay interval in (clock_t) units. May
13403  *		   be zero which indicates that the retry should be retried
13404  *		   immediately (ie, without an intervening delay).
13405  *
13406  *		statp - Ptr to kstat function to be updated if the command
13407  *		   is queued for a delayed retry. May be NULL if no kstat
13408  *		   update is desired.
13409  *
13410  *     Context: May be called from interupt context.
13411  */
13412 
13413 static void
13414 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
13415 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
13416 	code), void *user_arg, int failure_code,  clock_t retry_delay,
13417 	void (*statp)(kstat_io_t *))
13418 {
13419 	struct sd_xbuf	*xp;
13420 	struct scsi_pkt	*pktp;
13421 
13422 	ASSERT(un != NULL);
13423 	ASSERT(mutex_owned(SD_MUTEX(un)));
13424 	ASSERT(bp != NULL);
13425 	xp = SD_GET_XBUF(bp);
13426 	ASSERT(xp != NULL);
13427 	pktp = SD_GET_PKTP(bp);
13428 	ASSERT(pktp != NULL);
13429 
13430 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
13431 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
13432 
13433 	/*
13434 	 * If we are syncing or dumping, fail the command to avoid
13435 	 * recursively calling back into scsi_transport().
13436 	 */
13437 	if (ddi_in_panic()) {
13438 		goto fail_command_no_log;
13439 	}
13440 
13441 	/*
13442 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
13443 	 * log an error and fail the command.
13444 	 */
13445 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
13446 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
13447 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
13448 		sd_dump_memory(un, SD_LOG_IO, "CDB",
13449 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
13450 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
13451 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
13452 		goto fail_command;
13453 	}
13454 
13455 	/*
13456 	 * If we are suspended, then put the command onto head of the
13457 	 * wait queue since we don't want to start more commands.
13458 	 */
13459 	switch (un->un_state) {
13460 	case SD_STATE_SUSPENDED:
13461 	case SD_STATE_DUMPING:
13462 		bp->av_forw = un->un_waitq_headp;
13463 		un->un_waitq_headp = bp;
13464 		if (un->un_waitq_tailp == NULL) {
13465 			un->un_waitq_tailp = bp;
13466 		}
13467 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
13468 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
13469 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
13470 		return;
13471 	default:
13472 		break;
13473 	}
13474 
13475 	/*
13476 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
13477 	 * is set; if it is then we do not want to retry the command.
13478 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
13479 	 */
13480 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
13481 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
13482 			goto fail_command;
13483 		}
13484 	}
13485 
13486 
13487 	/*
13488 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
13489 	 * command timeout or a selection timeout has occurred. This means
13490 	 * that we were unable to establish an kind of communication with
13491 	 * the target, and subsequent retries and/or commands are likely
13492 	 * to encounter similar results and take a long time to complete.
13493 	 *
13494 	 * If this is a failfast error condition, we need to update the
13495 	 * failfast state, even if this bp does not have B_FAILFAST set.
13496 	 */
13497 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
13498 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
13499 			ASSERT(un->un_failfast_bp == NULL);
13500 			/*
13501 			 * If we are already in the active failfast state, and
13502 			 * another failfast error condition has been detected,
13503 			 * then fail this command if it has B_FAILFAST set.
13504 			 * If B_FAILFAST is clear, then maintain the legacy
13505 			 * behavior of retrying heroically, even tho this will
13506 			 * take a lot more time to fail the command.
13507 			 */
13508 			if (bp->b_flags & B_FAILFAST) {
13509 				goto fail_command;
13510 			}
13511 		} else {
13512 			/*
13513 			 * We're not in the active failfast state, but we
13514 			 * have a failfast error condition, so we must begin
13515 			 * transition to the next state. We do this regardless
13516 			 * of whether or not this bp has B_FAILFAST set.
13517 			 */
13518 			if (un->un_failfast_bp == NULL) {
13519 				/*
13520 				 * This is the first bp to meet a failfast
13521 				 * condition so save it on un_failfast_bp &
13522 				 * do normal retry processing. Do not enter
13523 				 * active failfast state yet. This marks
13524 				 * entry into the "failfast pending" state.
13525 				 */
13526 				un->un_failfast_bp = bp;
13527 
13528 			} else if (un->un_failfast_bp == bp) {
13529 				/*
13530 				 * This is the second time *this* bp has
13531 				 * encountered a failfast error condition,
13532 				 * so enter active failfast state & flush
13533 				 * queues as appropriate.
13534 				 */
13535 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
13536 				un->un_failfast_bp = NULL;
13537 				sd_failfast_flushq(un);
13538 
13539 				/*
13540 				 * Fail this bp now if B_FAILFAST set;
13541 				 * otherwise continue with retries. (It would
13542 				 * be pretty ironic if this bp succeeded on a
13543 				 * subsequent retry after we just flushed all
13544 				 * the queues).
13545 				 */
13546 				if (bp->b_flags & B_FAILFAST) {
13547 					goto fail_command;
13548 				}
13549 
13550 #if !defined(lint) && !defined(__lint)
13551 			} else {
13552 				/*
13553 				 * If neither of the preceeding conditionals
13554 				 * was true, it means that there is some
13555 				 * *other* bp that has met an inital failfast
13556 				 * condition and is currently either being
13557 				 * retried or is waiting to be retried. In
13558 				 * that case we should perform normal retry
13559 				 * processing on *this* bp, since there is a
13560 				 * chance that the current failfast condition
13561 				 * is transient and recoverable. If that does
13562 				 * not turn out to be the case, then retries
13563 				 * will be cleared when the wait queue is
13564 				 * flushed anyway.
13565 				 */
13566 #endif
13567 			}
13568 		}
13569 	} else {
13570 		/*
13571 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
13572 		 * likely were able to at least establish some level of
13573 		 * communication with the target and subsequent commands
13574 		 * and/or retries are likely to get through to the target,
13575 		 * In this case we want to be aggressive about clearing
13576 		 * the failfast state. Note that this does not affect
13577 		 * the "failfast pending" condition.
13578 		 */
13579 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
13580 	}
13581 
13582 
13583 	/*
13584 	 * Check the specified retry count to see if we can still do
13585 	 * any retries with this pkt before we should fail it.
13586 	 */
13587 	switch (retry_check_flag & SD_RETRIES_MASK) {
13588 	case SD_RETRIES_VICTIM:
13589 		/*
13590 		 * Check the victim retry count. If exhausted, then fall
13591 		 * thru & check against the standard retry count.
13592 		 */
13593 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
13594 			/* Increment count & proceed with the retry */
13595 			xp->xb_victim_retry_count++;
13596 			break;
13597 		}
13598 		/* Victim retries exhausted, fall back to std. retries... */
13599 		/* FALLTHRU */
13600 
13601 	case SD_RETRIES_STANDARD:
13602 		if (xp->xb_retry_count >= un->un_retry_count) {
13603 			/* Retries exhausted, fail the command */
13604 			SD_TRACE(SD_LOG_IO_CORE, un,
13605 			    "sd_retry_command: retries exhausted!\n");
13606 			/*
13607 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
13608 			 * commands with nonzero pkt_resid.
13609 			 */
13610 			if ((pktp->pkt_reason == CMD_CMPLT) &&
13611 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
13612 			    (pktp->pkt_resid != 0)) {
13613 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
13614 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
13615 					SD_UPDATE_B_RESID(bp, pktp);
13616 				}
13617 			}
13618 			goto fail_command;
13619 		}
13620 		xp->xb_retry_count++;
13621 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13622 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
13623 		break;
13624 
13625 	case SD_RETRIES_UA:
13626 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
13627 			/* Retries exhausted, fail the command */
13628 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13629 			    "Unit Attention retries exhausted. "
13630 			    "Check the target.\n");
13631 			goto fail_command;
13632 		}
13633 		xp->xb_ua_retry_count++;
13634 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13635 		    "sd_retry_command: retry count:%d\n",
13636 			xp->xb_ua_retry_count);
13637 		break;
13638 
13639 	case SD_RETRIES_BUSY:
13640 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
13641 			/* Retries exhausted, fail the command */
13642 			SD_TRACE(SD_LOG_IO_CORE, un,
13643 			    "sd_retry_command: retries exhausted!\n");
13644 			goto fail_command;
13645 		}
13646 		xp->xb_retry_count++;
13647 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13648 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
13649 		break;
13650 
13651 	case SD_RETRIES_NOCHECK:
13652 	default:
13653 		/* No retry count to check. Just proceed with the retry */
13654 		break;
13655 	}
13656 
13657 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
13658 
13659 	/*
13660 	 * If we were given a zero timeout, we must attempt to retry the
13661 	 * command immediately (ie, without a delay).
13662 	 */
13663 	if (retry_delay == 0) {
13664 		/*
13665 		 * Check some limiting conditions to see if we can actually
13666 		 * do the immediate retry.  If we cannot, then we must
13667 		 * fall back to queueing up a delayed retry.
13668 		 */
13669 		if (un->un_ncmds_in_transport >= un->un_throttle) {
13670 			/*
13671 			 * We are at the throttle limit for the target,
13672 			 * fall back to delayed retry.
13673 			 */
13674 			retry_delay = SD_BSY_TIMEOUT;
13675 			statp = kstat_waitq_enter;
13676 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13677 			    "sd_retry_command: immed. retry hit "
13678 			    "throttle!\n");
13679 		} else {
13680 			/*
13681 			 * We're clear to proceed with the immediate retry.
13682 			 * First call the user-provided function (if any)
13683 			 */
13684 			if (user_funcp != NULL) {
13685 				(*user_funcp)(un, bp, user_arg,
13686 				    SD_IMMEDIATE_RETRY_ISSUED);
13687 #ifdef __lock_lint
13688 				sd_print_incomplete_msg(un, bp, user_arg,
13689 				    SD_IMMEDIATE_RETRY_ISSUED);
13690 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
13691 				    SD_IMMEDIATE_RETRY_ISSUED);
13692 				sd_print_sense_failed_msg(un, bp, user_arg,
13693 				    SD_IMMEDIATE_RETRY_ISSUED);
13694 #endif
13695 			}
13696 
13697 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13698 			    "sd_retry_command: issuing immediate retry\n");
13699 
13700 			/*
13701 			 * Call sd_start_cmds() to transport the command to
13702 			 * the target.
13703 			 */
13704 			sd_start_cmds(un, bp);
13705 
13706 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13707 			    "sd_retry_command exit\n");
13708 			return;
13709 		}
13710 	}
13711 
13712 	/*
13713 	 * Set up to retry the command after a delay.
13714 	 * First call the user-provided function (if any)
13715 	 */
13716 	if (user_funcp != NULL) {
13717 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
13718 	}
13719 
13720 	sd_set_retry_bp(un, bp, retry_delay, statp);
13721 
13722 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
13723 	return;
13724 
13725 fail_command:
13726 
13727 	if (user_funcp != NULL) {
13728 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
13729 	}
13730 
13731 fail_command_no_log:
13732 
13733 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13734 	    "sd_retry_command: returning failed command\n");
13735 
13736 	sd_return_failed_command(un, bp, failure_code);
13737 
13738 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
13739 }
13740 
13741 
13742 /*
13743  *    Function: sd_set_retry_bp
13744  *
13745  * Description: Set up the given bp for retry.
13746  *
13747  *   Arguments: un - ptr to associated softstate
13748  *		bp - ptr to buf(9S) for the command
13749  *		retry_delay - time interval before issuing retry (may be 0)
13750  *		statp - optional pointer to kstat function
13751  *
13752  *     Context: May be called under interrupt context
13753  */
13754 
13755 static void
13756 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
13757 	void (*statp)(kstat_io_t *))
13758 {
13759 	ASSERT(un != NULL);
13760 	ASSERT(mutex_owned(SD_MUTEX(un)));
13761 	ASSERT(bp != NULL);
13762 
13763 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
13764 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
13765 
13766 	/*
13767 	 * Indicate that the command is being retried. This will not allow any
13768 	 * other commands on the wait queue to be transported to the target
13769 	 * until this command has been completed (success or failure). The
13770 	 * "retry command" is not transported to the target until the given
13771 	 * time delay expires, unless the user specified a 0 retry_delay.
13772 	 *
13773 	 * Note: the timeout(9F) callback routine is what actually calls
13774 	 * sd_start_cmds() to transport the command, with the exception of a
13775 	 * zero retry_delay. The only current implementor of a zero retry delay
13776 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
13777 	 */
13778 	if (un->un_retry_bp == NULL) {
13779 		ASSERT(un->un_retry_statp == NULL);
13780 		un->un_retry_bp = bp;
13781 
13782 		/*
13783 		 * If the user has not specified a delay the command should
13784 		 * be queued and no timeout should be scheduled.
13785 		 */
13786 		if (retry_delay == 0) {
13787 			/*
13788 			 * Save the kstat pointer that will be used in the
13789 			 * call to SD_UPDATE_KSTATS() below, so that
13790 			 * sd_start_cmds() can correctly decrement the waitq
13791 			 * count when it is time to transport this command.
13792 			 */
13793 			un->un_retry_statp = statp;
13794 			goto done;
13795 		}
13796 	}
13797 
13798 	if (un->un_retry_bp == bp) {
13799 		/*
13800 		 * Save the kstat pointer that will be used in the call to
13801 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
13802 		 * correctly decrement the waitq count when it is time to
13803 		 * transport this command.
13804 		 */
13805 		un->un_retry_statp = statp;
13806 
13807 		/*
13808 		 * Schedule a timeout if:
13809 		 *   1) The user has specified a delay.
13810 		 *   2) There is not a START_STOP_UNIT callback pending.
13811 		 *
13812 		 * If no delay has been specified, then it is up to the caller
13813 		 * to ensure that IO processing continues without stalling.
13814 		 * Effectively, this means that the caller will issue the
13815 		 * required call to sd_start_cmds(). The START_STOP_UNIT
13816 		 * callback does this after the START STOP UNIT command has
13817 		 * completed. In either of these cases we should not schedule
13818 		 * a timeout callback here.  Also don't schedule the timeout if
13819 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
13820 		 */
13821 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
13822 		    (un->un_direct_priority_timeid == NULL)) {
13823 			un->un_retry_timeid =
13824 			    timeout(sd_start_retry_command, un, retry_delay);
13825 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13826 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
13827 			    " bp:0x%p un_retry_timeid:0x%p\n",
13828 			    un, bp, un->un_retry_timeid);
13829 		}
13830 	} else {
13831 		/*
13832 		 * We only get in here if there is already another command
13833 		 * waiting to be retried.  In this case, we just put the
13834 		 * given command onto the wait queue, so it can be transported
13835 		 * after the current retry command has completed.
13836 		 *
13837 		 * Also we have to make sure that if the command at the head
13838 		 * of the wait queue is the un_failfast_bp, that we do not
13839 		 * put ahead of it any other commands that are to be retried.
13840 		 */
13841 		if ((un->un_failfast_bp != NULL) &&
13842 		    (un->un_failfast_bp == un->un_waitq_headp)) {
13843 			/*
13844 			 * Enqueue this command AFTER the first command on
13845 			 * the wait queue (which is also un_failfast_bp).
13846 			 */
13847 			bp->av_forw = un->un_waitq_headp->av_forw;
13848 			un->un_waitq_headp->av_forw = bp;
13849 			if (un->un_waitq_headp == un->un_waitq_tailp) {
13850 				un->un_waitq_tailp = bp;
13851 			}
13852 		} else {
13853 			/* Enqueue this command at the head of the waitq. */
13854 			bp->av_forw = un->un_waitq_headp;
13855 			un->un_waitq_headp = bp;
13856 			if (un->un_waitq_tailp == NULL) {
13857 				un->un_waitq_tailp = bp;
13858 			}
13859 		}
13860 
13861 		if (statp == NULL) {
13862 			statp = kstat_waitq_enter;
13863 		}
13864 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13865 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
13866 	}
13867 
13868 done:
13869 	if (statp != NULL) {
13870 		SD_UPDATE_KSTATS(un, statp, bp);
13871 	}
13872 
13873 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13874 	    "sd_set_retry_bp: exit un:0x%p\n", un);
13875 }
13876 
13877 
13878 /*
13879  *    Function: sd_start_retry_command
13880  *
13881  * Description: Start the command that has been waiting on the target's
13882  *		retry queue.  Called from timeout(9F) context after the
13883  *		retry delay interval has expired.
13884  *
13885  *   Arguments: arg - pointer to associated softstate for the device.
13886  *
13887  *     Context: timeout(9F) thread context.  May not sleep.
13888  */
13889 
13890 static void
13891 sd_start_retry_command(void *arg)
13892 {
13893 	struct sd_lun *un = arg;
13894 
13895 	ASSERT(un != NULL);
13896 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13897 
13898 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13899 	    "sd_start_retry_command: entry\n");
13900 
13901 	mutex_enter(SD_MUTEX(un));
13902 
13903 	un->un_retry_timeid = NULL;
13904 
13905 	if (un->un_retry_bp != NULL) {
13906 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13907 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
13908 		    un, un->un_retry_bp);
13909 		sd_start_cmds(un, un->un_retry_bp);
13910 	}
13911 
13912 	mutex_exit(SD_MUTEX(un));
13913 
13914 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13915 	    "sd_start_retry_command: exit\n");
13916 }
13917 
13918 
13919 /*
13920  *    Function: sd_start_direct_priority_command
13921  *
13922  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
13923  *		received TRAN_BUSY when we called scsi_transport() to send it
13924  *		to the underlying HBA. This function is called from timeout(9F)
13925  *		context after the delay interval has expired.
13926  *
13927  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
13928  *
13929  *     Context: timeout(9F) thread context.  May not sleep.
13930  */
13931 
13932 static void
13933 sd_start_direct_priority_command(void *arg)
13934 {
13935 	struct buf	*priority_bp = arg;
13936 	struct sd_lun	*un;
13937 
13938 	ASSERT(priority_bp != NULL);
13939 	un = SD_GET_UN(priority_bp);
13940 	ASSERT(un != NULL);
13941 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13942 
13943 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13944 	    "sd_start_direct_priority_command: entry\n");
13945 
13946 	mutex_enter(SD_MUTEX(un));
13947 	un->un_direct_priority_timeid = NULL;
13948 	sd_start_cmds(un, priority_bp);
13949 	mutex_exit(SD_MUTEX(un));
13950 
13951 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13952 	    "sd_start_direct_priority_command: exit\n");
13953 }
13954 
13955 
13956 /*
13957  *    Function: sd_send_request_sense_command
13958  *
13959  * Description: Sends a REQUEST SENSE command to the target
13960  *
13961  *     Context: May be called from interrupt context.
13962  */
13963 
13964 static void
13965 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
13966 	struct scsi_pkt *pktp)
13967 {
13968 	ASSERT(bp != NULL);
13969 	ASSERT(un != NULL);
13970 	ASSERT(mutex_owned(SD_MUTEX(un)));
13971 
13972 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
13973 	    "entry: buf:0x%p\n", bp);
13974 
13975 	/*
13976 	 * If we are syncing or dumping, then fail the command to avoid a
13977 	 * recursive callback into scsi_transport(). Also fail the command
13978 	 * if we are suspended (legacy behavior).
13979 	 */
13980 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
13981 	    (un->un_state == SD_STATE_DUMPING)) {
13982 		sd_return_failed_command(un, bp, EIO);
13983 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13984 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
13985 		return;
13986 	}
13987 
13988 	/*
13989 	 * Retry the failed command and don't issue the request sense if:
13990 	 *    1) the sense buf is busy
13991 	 *    2) we have 1 or more outstanding commands on the target
13992 	 *    (the sense data will be cleared or invalidated any way)
13993 	 *
13994 	 * Note: There could be an issue with not checking a retry limit here,
13995 	 * the problem is determining which retry limit to check.
13996 	 */
13997 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
13998 		/* Don't retry if the command is flagged as non-retryable */
13999 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
14000 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
14001 			    NULL, NULL, 0, SD_BSY_TIMEOUT, kstat_waitq_enter);
14002 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14003 			    "sd_send_request_sense_command: "
14004 			    "at full throttle, retrying exit\n");
14005 		} else {
14006 			sd_return_failed_command(un, bp, EIO);
14007 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14008 			    "sd_send_request_sense_command: "
14009 			    "at full throttle, non-retryable exit\n");
14010 		}
14011 		return;
14012 	}
14013 
14014 	sd_mark_rqs_busy(un, bp);
14015 	sd_start_cmds(un, un->un_rqs_bp);
14016 
14017 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14018 	    "sd_send_request_sense_command: exit\n");
14019 }
14020 
14021 
14022 /*
14023  *    Function: sd_mark_rqs_busy
14024  *
14025  * Description: Indicate that the request sense bp for this instance is
14026  *		in use.
14027  *
14028  *     Context: May be called under interrupt context
14029  */
14030 
14031 static void
14032 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
14033 {
14034 	struct sd_xbuf	*sense_xp;
14035 
14036 	ASSERT(un != NULL);
14037 	ASSERT(bp != NULL);
14038 	ASSERT(mutex_owned(SD_MUTEX(un)));
14039 	ASSERT(un->un_sense_isbusy == 0);
14040 
14041 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
14042 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
14043 
14044 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
14045 	ASSERT(sense_xp != NULL);
14046 
14047 	SD_INFO(SD_LOG_IO, un,
14048 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
14049 
14050 	ASSERT(sense_xp->xb_pktp != NULL);
14051 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
14052 	    == (FLAG_SENSING | FLAG_HEAD));
14053 
14054 	un->un_sense_isbusy = 1;
14055 	un->un_rqs_bp->b_resid = 0;
14056 	sense_xp->xb_pktp->pkt_resid  = 0;
14057 	sense_xp->xb_pktp->pkt_reason = 0;
14058 
14059 	/* So we can get back the bp at interrupt time! */
14060 	sense_xp->xb_sense_bp = bp;
14061 
14062 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
14063 
14064 	/*
14065 	 * Mark this buf as awaiting sense data. (This is already set in
14066 	 * the pkt_flags for the RQS packet.)
14067 	 */
14068 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
14069 
14070 	sense_xp->xb_retry_count	= 0;
14071 	sense_xp->xb_victim_retry_count = 0;
14072 	sense_xp->xb_ua_retry_count	= 0;
14073 	sense_xp->xb_dma_resid  = 0;
14074 
14075 	/* Clean up the fields for auto-request sense */
14076 	sense_xp->xb_sense_status = 0;
14077 	sense_xp->xb_sense_state  = 0;
14078 	sense_xp->xb_sense_resid  = 0;
14079 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
14080 
14081 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
14082 }
14083 
14084 
14085 /*
14086  *    Function: sd_mark_rqs_idle
14087  *
14088  * Description: SD_MUTEX must be held continuously through this routine
14089  *		to prevent reuse of the rqs struct before the caller can
14090  *		complete it's processing.
14091  *
14092  * Return Code: Pointer to the RQS buf
14093  *
14094  *     Context: May be called under interrupt context
14095  */
14096 
14097 static struct buf *
14098 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
14099 {
14100 	struct buf *bp;
14101 	ASSERT(un != NULL);
14102 	ASSERT(sense_xp != NULL);
14103 	ASSERT(mutex_owned(SD_MUTEX(un)));
14104 	ASSERT(un->un_sense_isbusy != 0);
14105 
14106 	un->un_sense_isbusy = 0;
14107 	bp = sense_xp->xb_sense_bp;
14108 	sense_xp->xb_sense_bp = NULL;
14109 
14110 	/* This pkt is no longer interested in getting sense data */
14111 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
14112 
14113 	return (bp);
14114 }
14115 
14116 
14117 
14118 /*
14119  *    Function: sd_alloc_rqs
14120  *
14121  * Description: Set up the unit to receive auto request sense data
14122  *
14123  * Return Code: DDI_SUCCESS or DDI_FAILURE
14124  *
14125  *     Context: Called under attach(9E) context
14126  */
14127 
14128 static int
14129 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
14130 {
14131 	struct sd_xbuf *xp;
14132 
14133 	ASSERT(un != NULL);
14134 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14135 	ASSERT(un->un_rqs_bp == NULL);
14136 	ASSERT(un->un_rqs_pktp == NULL);
14137 
14138 	/*
14139 	 * First allocate the required buf and scsi_pkt structs, then set up
14140 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
14141 	 */
14142 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
14143 	    SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
14144 	if (un->un_rqs_bp == NULL) {
14145 		return (DDI_FAILURE);
14146 	}
14147 
14148 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
14149 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
14150 
14151 	if (un->un_rqs_pktp == NULL) {
14152 		sd_free_rqs(un);
14153 		return (DDI_FAILURE);
14154 	}
14155 
14156 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
14157 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
14158 	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
14159 
14160 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
14161 
14162 	/* Set up the other needed members in the ARQ scsi_pkt. */
14163 	un->un_rqs_pktp->pkt_comp   = sdintr;
14164 	un->un_rqs_pktp->pkt_time   = sd_io_time;
14165 	un->un_rqs_pktp->pkt_flags |=
14166 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
14167 
14168 	/*
14169 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
14170 	 * provide any intpkt, destroypkt routines as we take care of
14171 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
14172 	 */
14173 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14174 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
14175 	xp->xb_pktp = un->un_rqs_pktp;
14176 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
14177 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
14178 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
14179 
14180 	/*
14181 	 * Save the pointer to the request sense private bp so it can
14182 	 * be retrieved in sdintr.
14183 	 */
14184 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
14185 	ASSERT(un->un_rqs_bp->b_private == xp);
14186 
14187 	/*
14188 	 * See if the HBA supports auto-request sense for the specified
14189 	 * target/lun. If it does, then try to enable it (if not already
14190 	 * enabled).
14191 	 *
14192 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
14193 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
14194 	 * return success.  However, in both of these cases ARQ is always
14195 	 * enabled and scsi_ifgetcap will always return true. The best approach
14196 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
14197 	 *
14198 	 * The 3rd case is the HBA (adp) always return enabled on
14199 	 * scsi_ifgetgetcap even when it's not enable, the best approach
14200 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
14201 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
14202 	 */
14203 
14204 	if (un->un_f_is_fibre == TRUE) {
14205 		un->un_f_arq_enabled = TRUE;
14206 	} else {
14207 #if defined(__i386) || defined(__amd64)
14208 		/*
14209 		 * Circumvent the Adaptec bug, remove this code when
14210 		 * the bug is fixed
14211 		 */
14212 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
14213 #endif
14214 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
14215 		case 0:
14216 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
14217 				"sd_alloc_rqs: HBA supports ARQ\n");
14218 			/*
14219 			 * ARQ is supported by this HBA but currently is not
14220 			 * enabled. Attempt to enable it and if successful then
14221 			 * mark this instance as ARQ enabled.
14222 			 */
14223 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
14224 				== 1) {
14225 				/* Successfully enabled ARQ in the HBA */
14226 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
14227 					"sd_alloc_rqs: ARQ enabled\n");
14228 				un->un_f_arq_enabled = TRUE;
14229 			} else {
14230 				/* Could not enable ARQ in the HBA */
14231 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
14232 				"sd_alloc_rqs: failed ARQ enable\n");
14233 				un->un_f_arq_enabled = FALSE;
14234 			}
14235 			break;
14236 		case 1:
14237 			/*
14238 			 * ARQ is supported by this HBA and is already enabled.
14239 			 * Just mark ARQ as enabled for this instance.
14240 			 */
14241 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
14242 				"sd_alloc_rqs: ARQ already enabled\n");
14243 			un->un_f_arq_enabled = TRUE;
14244 			break;
14245 		default:
14246 			/*
14247 			 * ARQ is not supported by this HBA; disable it for this
14248 			 * instance.
14249 			 */
14250 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
14251 				"sd_alloc_rqs: HBA does not support ARQ\n");
14252 			un->un_f_arq_enabled = FALSE;
14253 			break;
14254 		}
14255 	}
14256 
14257 	return (DDI_SUCCESS);
14258 }
14259 
14260 
14261 /*
14262  *    Function: sd_free_rqs
14263  *
14264  * Description: Cleanup for the pre-instance RQS command.
14265  *
14266  *     Context: Kernel thread context
14267  */
14268 
14269 static void
14270 sd_free_rqs(struct sd_lun *un)
14271 {
14272 	ASSERT(un != NULL);
14273 
14274 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
14275 
14276 	/*
14277 	 * If consistent memory is bound to a scsi_pkt, the pkt
14278 	 * has to be destroyed *before* freeing the consistent memory.
14279 	 * Don't change the sequence of this operations.
14280 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
14281 	 * after it was freed in scsi_free_consistent_buf().
14282 	 */
14283 	if (un->un_rqs_pktp != NULL) {
14284 		scsi_destroy_pkt(un->un_rqs_pktp);
14285 		un->un_rqs_pktp = NULL;
14286 	}
14287 
14288 	if (un->un_rqs_bp != NULL) {
14289 		kmem_free(SD_GET_XBUF(un->un_rqs_bp), sizeof (struct sd_xbuf));
14290 		scsi_free_consistent_buf(un->un_rqs_bp);
14291 		un->un_rqs_bp = NULL;
14292 	}
14293 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
14294 }
14295 
14296 
14297 
14298 /*
14299  *    Function: sd_reduce_throttle
14300  *
14301  * Description: Reduces the maximun # of outstanding commands on a
14302  *		target to the current number of outstanding commands.
14303  *		Queues a tiemout(9F) callback to restore the limit
14304  *		after a specified interval has elapsed.
14305  *		Typically used when we get a TRAN_BUSY return code
14306  *		back from scsi_transport().
14307  *
14308  *   Arguments: un - ptr to the sd_lun softstate struct
14309  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
14310  *
14311  *     Context: May be called from interrupt context
14312  */
14313 
14314 static void
14315 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
14316 {
14317 	ASSERT(un != NULL);
14318 	ASSERT(mutex_owned(SD_MUTEX(un)));
14319 	ASSERT(un->un_ncmds_in_transport >= 0);
14320 
14321 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
14322 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
14323 	    un, un->un_throttle, un->un_ncmds_in_transport);
14324 
14325 	if (un->un_throttle > 1) {
14326 		if (un->un_f_use_adaptive_throttle == TRUE) {
14327 			switch (throttle_type) {
14328 			case SD_THROTTLE_TRAN_BUSY:
14329 				if (un->un_busy_throttle == 0) {
14330 					un->un_busy_throttle = un->un_throttle;
14331 				}
14332 				break;
14333 			case SD_THROTTLE_QFULL:
14334 				un->un_busy_throttle = 0;
14335 				break;
14336 			default:
14337 				ASSERT(FALSE);
14338 			}
14339 
14340 			if (un->un_ncmds_in_transport > 0) {
14341 			    un->un_throttle = un->un_ncmds_in_transport;
14342 			}
14343 
14344 		} else {
14345 			if (un->un_ncmds_in_transport == 0) {
14346 				un->un_throttle = 1;
14347 			} else {
14348 				un->un_throttle = un->un_ncmds_in_transport;
14349 			}
14350 		}
14351 	}
14352 
14353 	/* Reschedule the timeout if none is currently active */
14354 	if (un->un_reset_throttle_timeid == NULL) {
14355 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
14356 		    un, SD_THROTTLE_RESET_INTERVAL);
14357 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14358 		    "sd_reduce_throttle: timeout scheduled!\n");
14359 	}
14360 
14361 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
14362 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
14363 }
14364 
14365 
14366 
14367 /*
14368  *    Function: sd_restore_throttle
14369  *
14370  * Description: Callback function for timeout(9F).  Resets the current
14371  *		value of un->un_throttle to its default.
14372  *
14373  *   Arguments: arg - pointer to associated softstate for the device.
14374  *
14375  *     Context: May be called from interrupt context
14376  */
14377 
14378 static void
14379 sd_restore_throttle(void *arg)
14380 {
14381 	struct sd_lun	*un = arg;
14382 
14383 	ASSERT(un != NULL);
14384 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14385 
14386 	mutex_enter(SD_MUTEX(un));
14387 
14388 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
14389 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
14390 
14391 	un->un_reset_throttle_timeid = NULL;
14392 
14393 	if (un->un_f_use_adaptive_throttle == TRUE) {
14394 		/*
14395 		 * If un_busy_throttle is nonzero, then it contains the
14396 		 * value that un_throttle was when we got a TRAN_BUSY back
14397 		 * from scsi_transport(). We want to revert back to this
14398 		 * value.
14399 		 *
14400 		 * In the QFULL case, the throttle limit will incrementally
14401 		 * increase until it reaches max throttle.
14402 		 */
14403 		if (un->un_busy_throttle > 0) {
14404 			un->un_throttle = un->un_busy_throttle;
14405 			un->un_busy_throttle = 0;
14406 		} else {
14407 			/*
14408 			 * increase throttle by 10% open gate slowly, schedule
14409 			 * another restore if saved throttle has not been
14410 			 * reached
14411 			 */
14412 			short throttle;
14413 			if (sd_qfull_throttle_enable) {
14414 				throttle = un->un_throttle +
14415 				    max((un->un_throttle / 10), 1);
14416 				un->un_throttle =
14417 				    (throttle < un->un_saved_throttle) ?
14418 				    throttle : un->un_saved_throttle;
14419 				if (un->un_throttle < un->un_saved_throttle) {
14420 				    un->un_reset_throttle_timeid =
14421 					timeout(sd_restore_throttle,
14422 					un, SD_QFULL_THROTTLE_RESET_INTERVAL);
14423 				}
14424 			}
14425 		}
14426 
14427 		/*
14428 		 * If un_throttle has fallen below the low-water mark, we
14429 		 * restore the maximum value here (and allow it to ratchet
14430 		 * down again if necessary).
14431 		 */
14432 		if (un->un_throttle < un->un_min_throttle) {
14433 			un->un_throttle = un->un_saved_throttle;
14434 		}
14435 	} else {
14436 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
14437 		    "restoring limit from 0x%x to 0x%x\n",
14438 		    un->un_throttle, un->un_saved_throttle);
14439 		un->un_throttle = un->un_saved_throttle;
14440 	}
14441 
14442 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14443 	    "sd_restore_throttle: calling sd_start_cmds!\n");
14444 
14445 	sd_start_cmds(un, NULL);
14446 
14447 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14448 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
14449 	    un, un->un_throttle);
14450 
14451 	mutex_exit(SD_MUTEX(un));
14452 
14453 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
14454 }
14455 
14456 /*
14457  *    Function: sdrunout
14458  *
14459  * Description: Callback routine for scsi_init_pkt when a resource allocation
14460  *		fails.
14461  *
14462  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
14463  *		soft state instance.
14464  *
14465  * Return Code: The scsi_init_pkt routine allows for the callback function to
14466  *		return a 0 indicating the callback should be rescheduled or a 1
14467  *		indicating not to reschedule. This routine always returns 1
14468  *		because the driver always provides a callback function to
14469  *		scsi_init_pkt. This results in a callback always being scheduled
14470  *		(via the scsi_init_pkt callback implementation) if a resource
14471  *		failure occurs.
14472  *
14473  *     Context: This callback function may not block or call routines that block
14474  *
14475  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
14476  *		request persisting at the head of the list which cannot be
14477  *		satisfied even after multiple retries. In the future the driver
14478  *		may implement some time of maximum runout count before failing
14479  *		an I/O.
14480  */
14481 
14482 static int
14483 sdrunout(caddr_t arg)
14484 {
14485 	struct sd_lun	*un = (struct sd_lun *)arg;
14486 
14487 	ASSERT(un != NULL);
14488 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14489 
14490 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
14491 
14492 	mutex_enter(SD_MUTEX(un));
14493 	sd_start_cmds(un, NULL);
14494 	mutex_exit(SD_MUTEX(un));
14495 	/*
14496 	 * This callback routine always returns 1 (i.e. do not reschedule)
14497 	 * because we always specify sdrunout as the callback handler for
14498 	 * scsi_init_pkt inside the call to sd_start_cmds.
14499 	 */
14500 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
14501 	return (1);
14502 }
14503 
14504 
14505 /*
14506  *    Function: sdintr
14507  *
14508  * Description: Completion callback routine for scsi_pkt(9S) structs
14509  *		sent to the HBA driver via scsi_transport(9F).
14510  *
14511  *     Context: Interrupt context
14512  */
14513 
14514 static void
14515 sdintr(struct scsi_pkt *pktp)
14516 {
14517 	struct buf	*bp;
14518 	struct sd_xbuf	*xp;
14519 	struct sd_lun	*un;
14520 
14521 	ASSERT(pktp != NULL);
14522 	bp = (struct buf *)pktp->pkt_private;
14523 	ASSERT(bp != NULL);
14524 	xp = SD_GET_XBUF(bp);
14525 	ASSERT(xp != NULL);
14526 	ASSERT(xp->xb_pktp != NULL);
14527 	un = SD_GET_UN(bp);
14528 	ASSERT(un != NULL);
14529 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14530 
14531 #ifdef SD_FAULT_INJECTION
14532 
14533 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
14534 	/* SD FaultInjection */
14535 	sd_faultinjection(pktp);
14536 
14537 #endif /* SD_FAULT_INJECTION */
14538 
14539 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
14540 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
14541 
14542 	mutex_enter(SD_MUTEX(un));
14543 
14544 	/* Reduce the count of the #commands currently in transport */
14545 	un->un_ncmds_in_transport--;
14546 	ASSERT(un->un_ncmds_in_transport >= 0);
14547 
14548 	/* Increment counter to indicate that the callback routine is active */
14549 	un->un_in_callback++;
14550 
14551 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14552 
14553 #ifdef	SDDEBUG
14554 	if (bp == un->un_retry_bp) {
14555 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
14556 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
14557 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
14558 	}
14559 #endif
14560 
14561 	/*
14562 	 * If pkt_reason is CMD_DEV_GONE, just fail the command
14563 	 */
14564 	if (pktp->pkt_reason == CMD_DEV_GONE) {
14565 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
14566 			    "Device is gone\n");
14567 		sd_return_failed_command(un, bp, EIO);
14568 		goto exit;
14569 	}
14570 
14571 	/*
14572 	 * First see if the pkt has auto-request sense data with it....
14573 	 * Look at the packet state first so we don't take a performance
14574 	 * hit looking at the arq enabled flag unless absolutely necessary.
14575 	 */
14576 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
14577 	    (un->un_f_arq_enabled == TRUE)) {
14578 		/*
14579 		 * The HBA did an auto request sense for this command so check
14580 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
14581 		 * driver command that should not be retried.
14582 		 */
14583 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
14584 			/*
14585 			 * Save the relevant sense info into the xp for the
14586 			 * original cmd.
14587 			 */
14588 			struct scsi_arq_status *asp;
14589 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
14590 			xp->xb_sense_status =
14591 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
14592 			xp->xb_sense_state  = asp->sts_rqpkt_state;
14593 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
14594 			bcopy(&asp->sts_sensedata, xp->xb_sense_data,
14595 			    min(sizeof (struct scsi_extended_sense),
14596 			    SENSE_LENGTH));
14597 
14598 			/* fail the command */
14599 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14600 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
14601 			sd_return_failed_command(un, bp, EIO);
14602 			goto exit;
14603 		}
14604 
14605 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
14606 		/*
14607 		 * We want to either retry or fail this command, so free
14608 		 * the DMA resources here.  If we retry the command then
14609 		 * the DMA resources will be reallocated in sd_start_cmds().
14610 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
14611 		 * causes the *entire* transfer to start over again from the
14612 		 * beginning of the request, even for PARTIAL chunks that
14613 		 * have already transferred successfully.
14614 		 */
14615 		if ((un->un_f_is_fibre == TRUE) &&
14616 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14617 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
14618 			scsi_dmafree(pktp);
14619 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14620 		}
14621 #endif
14622 
14623 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14624 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
14625 
14626 		sd_handle_auto_request_sense(un, bp, xp, pktp);
14627 		goto exit;
14628 	}
14629 
14630 	/* Next see if this is the REQUEST SENSE pkt for the instance */
14631 	if (pktp->pkt_flags & FLAG_SENSING)  {
14632 		/* This pktp is from the unit's REQUEST_SENSE command */
14633 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14634 		    "sdintr: sd_handle_request_sense\n");
14635 		sd_handle_request_sense(un, bp, xp, pktp);
14636 		goto exit;
14637 	}
14638 
14639 	/*
14640 	 * Check to see if the command successfully completed as requested;
14641 	 * this is the most common case (and also the hot performance path).
14642 	 *
14643 	 * Requirements for successful completion are:
14644 	 * pkt_reason is CMD_CMPLT and packet status is status good.
14645 	 * In addition:
14646 	 * - A residual of zero indicates successful completion no matter what
14647 	 *   the command is.
14648 	 * - If the residual is not zero and the command is not a read or
14649 	 *   write, then it's still defined as successful completion. In other
14650 	 *   words, if the command is a read or write the residual must be
14651 	 *   zero for successful completion.
14652 	 * - If the residual is not zero and the command is a read or
14653 	 *   write, and it's a USCSICMD, then it's still defined as
14654 	 *   successful completion.
14655 	 */
14656 	if ((pktp->pkt_reason == CMD_CMPLT) &&
14657 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
14658 
14659 		/*
14660 		 * Since this command is returned with a good status, we
14661 		 * can reset the count for Sonoma failover.
14662 		 */
14663 		un->un_sonoma_failure_count = 0;
14664 
14665 		/*
14666 		 * Return all USCSI commands on good status
14667 		 */
14668 		if (pktp->pkt_resid == 0) {
14669 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14670 			    "sdintr: returning command for resid == 0\n");
14671 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
14672 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
14673 			SD_UPDATE_B_RESID(bp, pktp);
14674 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14675 			    "sdintr: returning command for resid != 0\n");
14676 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
14677 			SD_UPDATE_B_RESID(bp, pktp);
14678 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14679 				"sdintr: returning uscsi command\n");
14680 		} else {
14681 			goto not_successful;
14682 		}
14683 		sd_return_command(un, bp);
14684 
14685 		/*
14686 		 * Decrement counter to indicate that the callback routine
14687 		 * is done.
14688 		 */
14689 		un->un_in_callback--;
14690 		ASSERT(un->un_in_callback >= 0);
14691 		mutex_exit(SD_MUTEX(un));
14692 
14693 		return;
14694 	}
14695 
14696 not_successful:
14697 
14698 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
14699 	/*
14700 	 * The following is based upon knowledge of the underlying transport
14701 	 * and its use of DMA resources.  This code should be removed when
14702 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
14703 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
14704 	 * and sd_start_cmds().
14705 	 *
14706 	 * Free any DMA resources associated with this command if there
14707 	 * is a chance it could be retried or enqueued for later retry.
14708 	 * If we keep the DMA binding then mpxio cannot reissue the
14709 	 * command on another path whenever a path failure occurs.
14710 	 *
14711 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
14712 	 * causes the *entire* transfer to start over again from the
14713 	 * beginning of the request, even for PARTIAL chunks that
14714 	 * have already transferred successfully.
14715 	 *
14716 	 * This is only done for non-uscsi commands (and also skipped for the
14717 	 * driver's internal RQS command). Also just do this for Fibre Channel
14718 	 * devices as these are the only ones that support mpxio.
14719 	 */
14720 	if ((un->un_f_is_fibre == TRUE) &&
14721 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14722 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
14723 		scsi_dmafree(pktp);
14724 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14725 	}
14726 #endif
14727 
14728 	/*
14729 	 * The command did not successfully complete as requested so check
14730 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
14731 	 * driver command that should not be retried so just return. If
14732 	 * FLAG_DIAGNOSE is not set the error will be processed below.
14733 	 */
14734 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
14735 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14736 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
14737 		/*
14738 		 * Issue a request sense if a check condition caused the error
14739 		 * (we handle the auto request sense case above), otherwise
14740 		 * just fail the command.
14741 		 */
14742 		if ((pktp->pkt_reason == CMD_CMPLT) &&
14743 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
14744 			sd_send_request_sense_command(un, bp, pktp);
14745 		} else {
14746 			sd_return_failed_command(un, bp, EIO);
14747 		}
14748 		goto exit;
14749 	}
14750 
14751 	/*
14752 	 * The command did not successfully complete as requested so process
14753 	 * the error, retry, and/or attempt recovery.
14754 	 */
14755 	switch (pktp->pkt_reason) {
14756 	case CMD_CMPLT:
14757 		switch (SD_GET_PKT_STATUS(pktp)) {
14758 		case STATUS_GOOD:
14759 			/*
14760 			 * The command completed successfully with a non-zero
14761 			 * residual
14762 			 */
14763 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14764 			    "sdintr: STATUS_GOOD \n");
14765 			sd_pkt_status_good(un, bp, xp, pktp);
14766 			break;
14767 
14768 		case STATUS_CHECK:
14769 		case STATUS_TERMINATED:
14770 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14771 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
14772 			sd_pkt_status_check_condition(un, bp, xp, pktp);
14773 			break;
14774 
14775 		case STATUS_BUSY:
14776 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14777 			    "sdintr: STATUS_BUSY\n");
14778 			sd_pkt_status_busy(un, bp, xp, pktp);
14779 			break;
14780 
14781 		case STATUS_RESERVATION_CONFLICT:
14782 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14783 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
14784 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
14785 			break;
14786 
14787 		case STATUS_QFULL:
14788 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14789 			    "sdintr: STATUS_QFULL\n");
14790 			sd_pkt_status_qfull(un, bp, xp, pktp);
14791 			break;
14792 
14793 		case STATUS_MET:
14794 		case STATUS_INTERMEDIATE:
14795 		case STATUS_SCSI2:
14796 		case STATUS_INTERMEDIATE_MET:
14797 		case STATUS_ACA_ACTIVE:
14798 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
14799 			    "Unexpected SCSI status received: 0x%x\n",
14800 			    SD_GET_PKT_STATUS(pktp));
14801 			sd_return_failed_command(un, bp, EIO);
14802 			break;
14803 
14804 		default:
14805 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
14806 			    "Invalid SCSI status received: 0x%x\n",
14807 			    SD_GET_PKT_STATUS(pktp));
14808 			sd_return_failed_command(un, bp, EIO);
14809 			break;
14810 
14811 		}
14812 		break;
14813 
14814 	case CMD_INCOMPLETE:
14815 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14816 		    "sdintr:  CMD_INCOMPLETE\n");
14817 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
14818 		break;
14819 	case CMD_TRAN_ERR:
14820 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14821 		    "sdintr: CMD_TRAN_ERR\n");
14822 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
14823 		break;
14824 	case CMD_RESET:
14825 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14826 		    "sdintr: CMD_RESET \n");
14827 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
14828 		break;
14829 	case CMD_ABORTED:
14830 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14831 		    "sdintr: CMD_ABORTED \n");
14832 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
14833 		break;
14834 	case CMD_TIMEOUT:
14835 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14836 		    "sdintr: CMD_TIMEOUT\n");
14837 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
14838 		break;
14839 	case CMD_UNX_BUS_FREE:
14840 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14841 		    "sdintr: CMD_UNX_BUS_FREE \n");
14842 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
14843 		break;
14844 	case CMD_TAG_REJECT:
14845 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14846 		    "sdintr: CMD_TAG_REJECT\n");
14847 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
14848 		break;
14849 	default:
14850 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14851 		    "sdintr: default\n");
14852 		sd_pkt_reason_default(un, bp, xp, pktp);
14853 		break;
14854 	}
14855 
14856 exit:
14857 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
14858 
14859 	/* Decrement counter to indicate that the callback routine is done. */
14860 	un->un_in_callback--;
14861 	ASSERT(un->un_in_callback >= 0);
14862 
14863 	/*
14864 	 * At this point, the pkt has been dispatched, ie, it is either
14865 	 * being re-tried or has been returned to its caller and should
14866 	 * not be referenced.
14867 	 */
14868 
14869 	mutex_exit(SD_MUTEX(un));
14870 }
14871 
14872 
14873 /*
14874  *    Function: sd_print_incomplete_msg
14875  *
14876  * Description: Prints the error message for a CMD_INCOMPLETE error.
14877  *
14878  *   Arguments: un - ptr to associated softstate for the device.
14879  *		bp - ptr to the buf(9S) for the command.
14880  *		arg - message string ptr
14881  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
14882  *			or SD_NO_RETRY_ISSUED.
14883  *
14884  *     Context: May be called under interrupt context
14885  */
14886 
14887 static void
14888 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
14889 {
14890 	struct scsi_pkt	*pktp;
14891 	char	*msgp;
14892 	char	*cmdp = arg;
14893 
14894 	ASSERT(un != NULL);
14895 	ASSERT(mutex_owned(SD_MUTEX(un)));
14896 	ASSERT(bp != NULL);
14897 	ASSERT(arg != NULL);
14898 	pktp = SD_GET_PKTP(bp);
14899 	ASSERT(pktp != NULL);
14900 
14901 	switch (code) {
14902 	case SD_DELAYED_RETRY_ISSUED:
14903 	case SD_IMMEDIATE_RETRY_ISSUED:
14904 		msgp = "retrying";
14905 		break;
14906 	case SD_NO_RETRY_ISSUED:
14907 	default:
14908 		msgp = "giving up";
14909 		break;
14910 	}
14911 
14912 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
14913 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14914 		    "incomplete %s- %s\n", cmdp, msgp);
14915 	}
14916 }
14917 
14918 
14919 
14920 /*
14921  *    Function: sd_pkt_status_good
14922  *
14923  * Description: Processing for a STATUS_GOOD code in pkt_status.
14924  *
14925  *     Context: May be called under interrupt context
14926  */
14927 
14928 static void
14929 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
14930 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
14931 {
14932 	char	*cmdp;
14933 
14934 	ASSERT(un != NULL);
14935 	ASSERT(mutex_owned(SD_MUTEX(un)));
14936 	ASSERT(bp != NULL);
14937 	ASSERT(xp != NULL);
14938 	ASSERT(pktp != NULL);
14939 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
14940 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
14941 	ASSERT(pktp->pkt_resid != 0);
14942 
14943 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
14944 
14945 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
14946 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
14947 	case SCMD_READ:
14948 		cmdp = "read";
14949 		break;
14950 	case SCMD_WRITE:
14951 		cmdp = "write";
14952 		break;
14953 	default:
14954 		SD_UPDATE_B_RESID(bp, pktp);
14955 		sd_return_command(un, bp);
14956 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
14957 		return;
14958 	}
14959 
14960 	/*
14961 	 * See if we can retry the read/write, preferrably immediately.
14962 	 * If retries are exhaused, then sd_retry_command() will update
14963 	 * the b_resid count.
14964 	 */
14965 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
14966 	    cmdp, EIO, (clock_t)0, NULL);
14967 
14968 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
14969 }
14970 
14971 
14972 
14973 
14974 
14975 /*
14976  *    Function: sd_handle_request_sense
14977  *
14978  * Description: Processing for non-auto Request Sense command.
14979  *
14980  *   Arguments: un - ptr to associated softstate
14981  *		sense_bp - ptr to buf(9S) for the RQS command
14982  *		sense_xp - ptr to the sd_xbuf for the RQS command
14983  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
14984  *
14985  *     Context: May be called under interrupt context
14986  */
14987 
14988 static void
14989 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
14990 	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
14991 {
14992 	struct buf	*cmd_bp;	/* buf for the original command */
14993 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
14994 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
14995 
14996 	ASSERT(un != NULL);
14997 	ASSERT(mutex_owned(SD_MUTEX(un)));
14998 	ASSERT(sense_bp != NULL);
14999 	ASSERT(sense_xp != NULL);
15000 	ASSERT(sense_pktp != NULL);
15001 
15002 	/*
15003 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
15004 	 * RQS command and not the original command.
15005 	 */
15006 	ASSERT(sense_pktp == un->un_rqs_pktp);
15007 	ASSERT(sense_bp   == un->un_rqs_bp);
15008 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
15009 	    (FLAG_SENSING | FLAG_HEAD));
15010 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
15011 	    FLAG_SENSING) == FLAG_SENSING);
15012 
15013 	/* These are the bp, xp, and pktp for the original command */
15014 	cmd_bp = sense_xp->xb_sense_bp;
15015 	cmd_xp = SD_GET_XBUF(cmd_bp);
15016 	cmd_pktp = SD_GET_PKTP(cmd_bp);
15017 
15018 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
15019 		/*
15020 		 * The REQUEST SENSE command failed.  Release the REQUEST
15021 		 * SENSE command for re-use, get back the bp for the original
15022 		 * command, and attempt to re-try the original command if
15023 		 * FLAG_DIAGNOSE is not set in the original packet.
15024 		 */
15025 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
15026 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15027 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
15028 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
15029 			    NULL, NULL, EIO, (clock_t)0, NULL);
15030 			return;
15031 		}
15032 	}
15033 
15034 	/*
15035 	 * Save the relevant sense info into the xp for the original cmd.
15036 	 *
15037 	 * Note: if the request sense failed the state info will be zero
15038 	 * as set in sd_mark_rqs_busy()
15039 	 */
15040 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
15041 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
15042 	cmd_xp->xb_sense_resid  = sense_pktp->pkt_resid;
15043 	bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data, SENSE_LENGTH);
15044 
15045 	/*
15046 	 *  Free up the RQS command....
15047 	 *  NOTE:
15048 	 *	Must do this BEFORE calling sd_validate_sense_data!
15049 	 *	sd_validate_sense_data may return the original command in
15050 	 *	which case the pkt will be freed and the flags can no
15051 	 *	longer be touched.
15052 	 *	SD_MUTEX is held through this process until the command
15053 	 *	is dispatched based upon the sense data, so there are
15054 	 *	no race conditions.
15055 	 */
15056 	(void) sd_mark_rqs_idle(un, sense_xp);
15057 
15058 	/*
15059 	 * For a retryable command see if we have valid sense data, if so then
15060 	 * turn it over to sd_decode_sense() to figure out the right course of
15061 	 * action. Just fail a non-retryable command.
15062 	 */
15063 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15064 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp) ==
15065 		    SD_SENSE_DATA_IS_VALID) {
15066 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
15067 		}
15068 	} else {
15069 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
15070 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15071 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
15072 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15073 		sd_return_failed_command(un, cmd_bp, EIO);
15074 	}
15075 }
15076 
15077 
15078 
15079 
15080 /*
15081  *    Function: sd_handle_auto_request_sense
15082  *
15083  * Description: Processing for auto-request sense information.
15084  *
15085  *   Arguments: un - ptr to associated softstate
15086  *		bp - ptr to buf(9S) for the command
15087  *		xp - ptr to the sd_xbuf for the command
15088  *		pktp - ptr to the scsi_pkt(9S) for the command
15089  *
15090  *     Context: May be called under interrupt context
15091  */
15092 
15093 static void
15094 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
15095 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
15096 {
15097 	struct scsi_arq_status *asp;
15098 
15099 	ASSERT(un != NULL);
15100 	ASSERT(mutex_owned(SD_MUTEX(un)));
15101 	ASSERT(bp != NULL);
15102 	ASSERT(xp != NULL);
15103 	ASSERT(pktp != NULL);
15104 	ASSERT(pktp != un->un_rqs_pktp);
15105 	ASSERT(bp   != un->un_rqs_bp);
15106 
15107 	/*
15108 	 * For auto-request sense, we get a scsi_arq_status back from
15109 	 * the HBA, with the sense data in the sts_sensedata member.
15110 	 * The pkt_scbp of the packet points to this scsi_arq_status.
15111 	 */
15112 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
15113 
15114 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
15115 		/*
15116 		 * The auto REQUEST SENSE failed; see if we can re-try
15117 		 * the original command.
15118 		 */
15119 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15120 		    "auto request sense failed (reason=%s)\n",
15121 		    scsi_rname(asp->sts_rqpkt_reason));
15122 
15123 		sd_reset_target(un, pktp);
15124 
15125 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15126 		    NULL, NULL, EIO, (clock_t)0, NULL);
15127 		return;
15128 	}
15129 
15130 	/* Save the relevant sense info into the xp for the original cmd. */
15131 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
15132 	xp->xb_sense_state  = asp->sts_rqpkt_state;
15133 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
15134 	bcopy(&asp->sts_sensedata, xp->xb_sense_data,
15135 	    min(sizeof (struct scsi_extended_sense), SENSE_LENGTH));
15136 
15137 	/*
15138 	 * See if we have valid sense data, if so then turn it over to
15139 	 * sd_decode_sense() to figure out the right course of action.
15140 	 */
15141 	if (sd_validate_sense_data(un, bp, xp) == SD_SENSE_DATA_IS_VALID) {
15142 		sd_decode_sense(un, bp, xp, pktp);
15143 	}
15144 }
15145 
15146 
15147 /*
15148  *    Function: sd_print_sense_failed_msg
15149  *
15150  * Description: Print log message when RQS has failed.
15151  *
15152  *   Arguments: un - ptr to associated softstate
15153  *		bp - ptr to buf(9S) for the command
15154  *		arg - generic message string ptr
15155  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
15156  *			or SD_NO_RETRY_ISSUED
15157  *
15158  *     Context: May be called from interrupt context
15159  */
15160 
15161 static void
15162 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
15163 	int code)
15164 {
15165 	char	*msgp = arg;
15166 
15167 	ASSERT(un != NULL);
15168 	ASSERT(mutex_owned(SD_MUTEX(un)));
15169 	ASSERT(bp != NULL);
15170 
15171 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
15172 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
15173 	}
15174 }
15175 
15176 
15177 /*
15178  *    Function: sd_validate_sense_data
15179  *
15180  * Description: Check the given sense data for validity.
15181  *		If the sense data is not valid, the command will
15182  *		be either failed or retried!
15183  *
15184  * Return Code: SD_SENSE_DATA_IS_INVALID
15185  *		SD_SENSE_DATA_IS_VALID
15186  *
15187  *     Context: May be called from interrupt context
15188  */
15189 
15190 static int
15191 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp)
15192 {
15193 	struct scsi_extended_sense *esp;
15194 	struct	scsi_pkt *pktp;
15195 	size_t	actual_len;
15196 	char	*msgp = NULL;
15197 
15198 	ASSERT(un != NULL);
15199 	ASSERT(mutex_owned(SD_MUTEX(un)));
15200 	ASSERT(bp != NULL);
15201 	ASSERT(bp != un->un_rqs_bp);
15202 	ASSERT(xp != NULL);
15203 
15204 	pktp = SD_GET_PKTP(bp);
15205 	ASSERT(pktp != NULL);
15206 
15207 	/*
15208 	 * Check the status of the RQS command (auto or manual).
15209 	 */
15210 	switch (xp->xb_sense_status & STATUS_MASK) {
15211 	case STATUS_GOOD:
15212 		break;
15213 
15214 	case STATUS_RESERVATION_CONFLICT:
15215 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
15216 		return (SD_SENSE_DATA_IS_INVALID);
15217 
15218 	case STATUS_BUSY:
15219 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15220 		    "Busy Status on REQUEST SENSE\n");
15221 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
15222 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
15223 		return (SD_SENSE_DATA_IS_INVALID);
15224 
15225 	case STATUS_QFULL:
15226 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15227 		    "QFULL Status on REQUEST SENSE\n");
15228 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
15229 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
15230 		return (SD_SENSE_DATA_IS_INVALID);
15231 
15232 	case STATUS_CHECK:
15233 	case STATUS_TERMINATED:
15234 		msgp = "Check Condition on REQUEST SENSE\n";
15235 		goto sense_failed;
15236 
15237 	default:
15238 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
15239 		goto sense_failed;
15240 	}
15241 
15242 	/*
15243 	 * See if we got the minimum required amount of sense data.
15244 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
15245 	 * or less.
15246 	 */
15247 	actual_len = (int)(SENSE_LENGTH - xp->xb_sense_resid);
15248 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
15249 	    (actual_len == 0)) {
15250 		msgp = "Request Sense couldn't get sense data\n";
15251 		goto sense_failed;
15252 	}
15253 
15254 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
15255 		msgp = "Not enough sense information\n";
15256 		goto sense_failed;
15257 	}
15258 
15259 	/*
15260 	 * We require the extended sense data
15261 	 */
15262 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
15263 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
15264 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
15265 			static char tmp[8];
15266 			static char buf[148];
15267 			char *p = (char *)(xp->xb_sense_data);
15268 			int i;
15269 
15270 			mutex_enter(&sd_sense_mutex);
15271 			(void) strcpy(buf, "undecodable sense information:");
15272 			for (i = 0; i < actual_len; i++) {
15273 				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
15274 				(void) strcpy(&buf[strlen(buf)], tmp);
15275 			}
15276 			i = strlen(buf);
15277 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
15278 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, buf);
15279 			mutex_exit(&sd_sense_mutex);
15280 		}
15281 		/* Note: Legacy behavior, fail the command with no retry */
15282 		sd_return_failed_command(un, bp, EIO);
15283 		return (SD_SENSE_DATA_IS_INVALID);
15284 	}
15285 
15286 	/*
15287 	 * Check that es_code is valid (es_class concatenated with es_code
15288 	 * make up the "response code" field.  es_class will always be 7, so
15289 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
15290 	 * format.
15291 	 */
15292 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
15293 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
15294 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
15295 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
15296 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
15297 		goto sense_failed;
15298 	}
15299 
15300 	return (SD_SENSE_DATA_IS_VALID);
15301 
15302 sense_failed:
15303 	/*
15304 	 * If the request sense failed (for whatever reason), attempt
15305 	 * to retry the original command.
15306 	 */
15307 #if defined(__i386) || defined(__amd64)
15308 	/*
15309 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
15310 	 * sddef.h for Sparc platform, and x86 uses 1 binary
15311 	 * for both SCSI/FC.
15312 	 * The SD_RETRY_DELAY value need to be adjusted here
15313 	 * when SD_RETRY_DELAY change in sddef.h
15314 	 */
15315 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15316 	    sd_print_sense_failed_msg, msgp, EIO,
15317 		un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
15318 #else
15319 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15320 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
15321 #endif
15322 
15323 	return (SD_SENSE_DATA_IS_INVALID);
15324 }
15325 
15326 
15327 
15328 /*
15329  *    Function: sd_decode_sense
15330  *
15331  * Description: Take recovery action(s) when SCSI Sense Data is received.
15332  *
15333  *     Context: Interrupt context.
15334  */
15335 
15336 static void
15337 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
15338 	struct scsi_pkt *pktp)
15339 {
15340 	uint8_t sense_key;
15341 
15342 	ASSERT(un != NULL);
15343 	ASSERT(mutex_owned(SD_MUTEX(un)));
15344 	ASSERT(bp != NULL);
15345 	ASSERT(bp != un->un_rqs_bp);
15346 	ASSERT(xp != NULL);
15347 	ASSERT(pktp != NULL);
15348 
15349 	sense_key = scsi_sense_key(xp->xb_sense_data);
15350 
15351 	switch (sense_key) {
15352 	case KEY_NO_SENSE:
15353 		sd_sense_key_no_sense(un, bp, xp, pktp);
15354 		break;
15355 	case KEY_RECOVERABLE_ERROR:
15356 		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
15357 		    bp, xp, pktp);
15358 		break;
15359 	case KEY_NOT_READY:
15360 		sd_sense_key_not_ready(un, xp->xb_sense_data,
15361 		    bp, xp, pktp);
15362 		break;
15363 	case KEY_MEDIUM_ERROR:
15364 	case KEY_HARDWARE_ERROR:
15365 		sd_sense_key_medium_or_hardware_error(un,
15366 		    xp->xb_sense_data, bp, xp, pktp);
15367 		break;
15368 	case KEY_ILLEGAL_REQUEST:
15369 		sd_sense_key_illegal_request(un, bp, xp, pktp);
15370 		break;
15371 	case KEY_UNIT_ATTENTION:
15372 		sd_sense_key_unit_attention(un, xp->xb_sense_data,
15373 		    bp, xp, pktp);
15374 		break;
15375 	case KEY_WRITE_PROTECT:
15376 	case KEY_VOLUME_OVERFLOW:
15377 	case KEY_MISCOMPARE:
15378 		sd_sense_key_fail_command(un, bp, xp, pktp);
15379 		break;
15380 	case KEY_BLANK_CHECK:
15381 		sd_sense_key_blank_check(un, bp, xp, pktp);
15382 		break;
15383 	case KEY_ABORTED_COMMAND:
15384 		sd_sense_key_aborted_command(un, bp, xp, pktp);
15385 		break;
15386 	case KEY_VENDOR_UNIQUE:
15387 	case KEY_COPY_ABORTED:
15388 	case KEY_EQUAL:
15389 	case KEY_RESERVED:
15390 	default:
15391 		sd_sense_key_default(un, xp->xb_sense_data,
15392 		    bp, xp, pktp);
15393 		break;
15394 	}
15395 }
15396 
15397 
15398 /*
15399  *    Function: sd_dump_memory
15400  *
15401  * Description: Debug logging routine to print the contents of a user provided
15402  *		buffer. The output of the buffer is broken up into 256 byte
15403  *		segments due to a size constraint of the scsi_log.
15404  *		implementation.
15405  *
15406  *   Arguments: un - ptr to softstate
15407  *		comp - component mask
15408  *		title - "title" string to preceed data when printed
15409  *		data - ptr to data block to be printed
15410  *		len - size of data block to be printed
15411  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
15412  *
15413  *     Context: May be called from interrupt context
15414  */
15415 
15416 #define	SD_DUMP_MEMORY_BUF_SIZE	256
15417 
15418 static char *sd_dump_format_string[] = {
15419 		" 0x%02x",
15420 		" %c"
15421 };
15422 
15423 static void
15424 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
15425     int len, int fmt)
15426 {
15427 	int	i, j;
15428 	int	avail_count;
15429 	int	start_offset;
15430 	int	end_offset;
15431 	size_t	entry_len;
15432 	char	*bufp;
15433 	char	*local_buf;
15434 	char	*format_string;
15435 
15436 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
15437 
15438 	/*
15439 	 * In the debug version of the driver, this function is called from a
15440 	 * number of places which are NOPs in the release driver.
15441 	 * The debug driver therefore has additional methods of filtering
15442 	 * debug output.
15443 	 */
15444 #ifdef SDDEBUG
15445 	/*
15446 	 * In the debug version of the driver we can reduce the amount of debug
15447 	 * messages by setting sd_error_level to something other than
15448 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
15449 	 * sd_component_mask.
15450 	 */
15451 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
15452 	    (sd_error_level != SCSI_ERR_ALL)) {
15453 		return;
15454 	}
15455 	if (((sd_component_mask & comp) == 0) ||
15456 	    (sd_error_level != SCSI_ERR_ALL)) {
15457 		return;
15458 	}
15459 #else
15460 	if (sd_error_level != SCSI_ERR_ALL) {
15461 		return;
15462 	}
15463 #endif
15464 
15465 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
15466 	bufp = local_buf;
15467 	/*
15468 	 * Available length is the length of local_buf[], minus the
15469 	 * length of the title string, minus one for the ":", minus
15470 	 * one for the newline, minus one for the NULL terminator.
15471 	 * This gives the #bytes available for holding the printed
15472 	 * values from the given data buffer.
15473 	 */
15474 	if (fmt == SD_LOG_HEX) {
15475 		format_string = sd_dump_format_string[0];
15476 	} else /* SD_LOG_CHAR */ {
15477 		format_string = sd_dump_format_string[1];
15478 	}
15479 	/*
15480 	 * Available count is the number of elements from the given
15481 	 * data buffer that we can fit into the available length.
15482 	 * This is based upon the size of the format string used.
15483 	 * Make one entry and find it's size.
15484 	 */
15485 	(void) sprintf(bufp, format_string, data[0]);
15486 	entry_len = strlen(bufp);
15487 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
15488 
15489 	j = 0;
15490 	while (j < len) {
15491 		bufp = local_buf;
15492 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
15493 		start_offset = j;
15494 
15495 		end_offset = start_offset + avail_count;
15496 
15497 		(void) sprintf(bufp, "%s:", title);
15498 		bufp += strlen(bufp);
15499 		for (i = start_offset; ((i < end_offset) && (j < len));
15500 		    i++, j++) {
15501 			(void) sprintf(bufp, format_string, data[i]);
15502 			bufp += entry_len;
15503 		}
15504 		(void) sprintf(bufp, "\n");
15505 
15506 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
15507 	}
15508 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
15509 }
15510 
15511 /*
15512  *    Function: sd_print_sense_msg
15513  *
15514  * Description: Log a message based upon the given sense data.
15515  *
15516  *   Arguments: un - ptr to associated softstate
15517  *		bp - ptr to buf(9S) for the command
15518  *		arg - ptr to associate sd_sense_info struct
15519  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
15520  *			or SD_NO_RETRY_ISSUED
15521  *
15522  *     Context: May be called from interrupt context
15523  */
15524 
15525 static void
15526 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
15527 {
15528 	struct sd_xbuf	*xp;
15529 	struct scsi_pkt	*pktp;
15530 	uint8_t *sensep;
15531 	daddr_t request_blkno;
15532 	diskaddr_t err_blkno;
15533 	int severity;
15534 	int pfa_flag;
15535 	extern struct scsi_key_strings scsi_cmds[];
15536 
15537 	ASSERT(un != NULL);
15538 	ASSERT(mutex_owned(SD_MUTEX(un)));
15539 	ASSERT(bp != NULL);
15540 	xp = SD_GET_XBUF(bp);
15541 	ASSERT(xp != NULL);
15542 	pktp = SD_GET_PKTP(bp);
15543 	ASSERT(pktp != NULL);
15544 	ASSERT(arg != NULL);
15545 
15546 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
15547 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
15548 
15549 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
15550 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
15551 		severity = SCSI_ERR_RETRYABLE;
15552 	}
15553 
15554 	/* Use absolute block number for the request block number */
15555 	request_blkno = xp->xb_blkno;
15556 
15557 	/*
15558 	 * Now try to get the error block number from the sense data
15559 	 */
15560 	sensep = xp->xb_sense_data;
15561 
15562 	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
15563 		(uint64_t *)&err_blkno)) {
15564 		/*
15565 		 * We retrieved the error block number from the information
15566 		 * portion of the sense data.
15567 		 *
15568 		 * For USCSI commands we are better off using the error
15569 		 * block no. as the requested block no. (This is the best
15570 		 * we can estimate.)
15571 		 */
15572 		if ((SD_IS_BUFIO(xp) == FALSE) &&
15573 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
15574 			request_blkno = err_blkno;
15575 		}
15576 	} else {
15577 		/*
15578 		 * Without the es_valid bit set (for fixed format) or an
15579 		 * information descriptor (for descriptor format) we cannot
15580 		 * be certain of the error blkno, so just use the
15581 		 * request_blkno.
15582 		 */
15583 		err_blkno = (diskaddr_t)request_blkno;
15584 	}
15585 
15586 	/*
15587 	 * The following will log the buffer contents for the release driver
15588 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
15589 	 * level is set to verbose.
15590 	 */
15591 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
15592 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15593 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15594 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
15595 
15596 	if (pfa_flag == FALSE) {
15597 		/* This is normally only set for USCSI */
15598 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
15599 			return;
15600 		}
15601 
15602 		if ((SD_IS_BUFIO(xp) == TRUE) &&
15603 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
15604 		    (severity < sd_error_level))) {
15605 			return;
15606 		}
15607 	}
15608 
15609 	/*
15610 	 * Check for Sonoma Failover and keep a count of how many failed I/O's
15611 	 */
15612 	if ((SD_IS_LSI(un)) &&
15613 	    (scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) &&
15614 	    (scsi_sense_asc(sensep) == 0x94) &&
15615 	    (scsi_sense_ascq(sensep) == 0x01)) {
15616 		un->un_sonoma_failure_count++;
15617 		if (un->un_sonoma_failure_count > 1) {
15618 			return;
15619 		}
15620 	}
15621 
15622 	scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
15623 	    request_blkno, err_blkno, scsi_cmds,
15624 	    (struct scsi_extended_sense *)sensep,
15625 	    un->un_additional_codes, NULL);
15626 }
15627 
15628 /*
15629  *    Function: sd_sense_key_no_sense
15630  *
15631  * Description: Recovery action when sense data was not received.
15632  *
15633  *     Context: May be called from interrupt context
15634  */
15635 
15636 static void
15637 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
15638 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
15639 {
15640 	struct sd_sense_info	si;
15641 
15642 	ASSERT(un != NULL);
15643 	ASSERT(mutex_owned(SD_MUTEX(un)));
15644 	ASSERT(bp != NULL);
15645 	ASSERT(xp != NULL);
15646 	ASSERT(pktp != NULL);
15647 
15648 	si.ssi_severity = SCSI_ERR_FATAL;
15649 	si.ssi_pfa_flag = FALSE;
15650 
15651 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
15652 
15653 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
15654 		&si, EIO, (clock_t)0, NULL);
15655 }
15656 
15657 
15658 /*
15659  *    Function: sd_sense_key_recoverable_error
15660  *
15661  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
15662  *
15663  *     Context: May be called from interrupt context
15664  */
15665 
15666 static void
15667 sd_sense_key_recoverable_error(struct sd_lun *un,
15668 	uint8_t *sense_datap,
15669 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
15670 {
15671 	struct sd_sense_info	si;
15672 	uint8_t asc = scsi_sense_asc(sense_datap);
15673 
15674 	ASSERT(un != NULL);
15675 	ASSERT(mutex_owned(SD_MUTEX(un)));
15676 	ASSERT(bp != NULL);
15677 	ASSERT(xp != NULL);
15678 	ASSERT(pktp != NULL);
15679 
15680 	/*
15681 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
15682 	 */
15683 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
15684 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
15685 		si.ssi_severity = SCSI_ERR_INFO;
15686 		si.ssi_pfa_flag = TRUE;
15687 	} else {
15688 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
15689 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
15690 		si.ssi_severity = SCSI_ERR_RECOVERED;
15691 		si.ssi_pfa_flag = FALSE;
15692 	}
15693 
15694 	if (pktp->pkt_resid == 0) {
15695 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
15696 		sd_return_command(un, bp);
15697 		return;
15698 	}
15699 
15700 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
15701 	    &si, EIO, (clock_t)0, NULL);
15702 }
15703 
15704 
15705 
15706 
15707 /*
15708  *    Function: sd_sense_key_not_ready
15709  *
15710  * Description: Recovery actions for a SCSI "Not Ready" sense key.
15711  *
15712  *     Context: May be called from interrupt context
15713  */
15714 
15715 static void
15716 sd_sense_key_not_ready(struct sd_lun *un,
15717 	uint8_t *sense_datap,
15718 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
15719 {
15720 	struct sd_sense_info	si;
15721 	uint8_t asc = scsi_sense_asc(sense_datap);
15722 	uint8_t ascq = scsi_sense_ascq(sense_datap);
15723 
15724 	ASSERT(un != NULL);
15725 	ASSERT(mutex_owned(SD_MUTEX(un)));
15726 	ASSERT(bp != NULL);
15727 	ASSERT(xp != NULL);
15728 	ASSERT(pktp != NULL);
15729 
15730 	si.ssi_severity = SCSI_ERR_FATAL;
15731 	si.ssi_pfa_flag = FALSE;
15732 
15733 	/*
15734 	 * Update error stats after first NOT READY error. Disks may have
15735 	 * been powered down and may need to be restarted.  For CDROMs,
15736 	 * report NOT READY errors only if media is present.
15737 	 */
15738 	if ((ISCD(un) && (asc == 0x3A)) ||
15739 	    (xp->xb_retry_count > 0)) {
15740 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
15741 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
15742 	}
15743 
15744 	/*
15745 	 * Just fail if the "not ready" retry limit has been reached.
15746 	 */
15747 	if (xp->xb_retry_count >= un->un_notready_retry_count) {
15748 		/* Special check for error message printing for removables. */
15749 		if (un->un_f_has_removable_media && (asc == 0x04) &&
15750 		    (ascq >= 0x04)) {
15751 			si.ssi_severity = SCSI_ERR_ALL;
15752 		}
15753 		goto fail_command;
15754 	}
15755 
15756 	/*
15757 	 * Check the ASC and ASCQ in the sense data as needed, to determine
15758 	 * what to do.
15759 	 */
15760 	switch (asc) {
15761 	case 0x04:	/* LOGICAL UNIT NOT READY */
15762 		/*
15763 		 * disk drives that don't spin up result in a very long delay
15764 		 * in format without warning messages. We will log a message
15765 		 * if the error level is set to verbose.
15766 		 */
15767 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
15768 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15769 			    "logical unit not ready, resetting disk\n");
15770 		}
15771 
15772 		/*
15773 		 * There are different requirements for CDROMs and disks for
15774 		 * the number of retries.  If a CD-ROM is giving this, it is
15775 		 * probably reading TOC and is in the process of getting
15776 		 * ready, so we should keep on trying for a long time to make
15777 		 * sure that all types of media are taken in account (for
15778 		 * some media the drive takes a long time to read TOC).  For
15779 		 * disks we do not want to retry this too many times as this
15780 		 * can cause a long hang in format when the drive refuses to
15781 		 * spin up (a very common failure).
15782 		 */
15783 		switch (ascq) {
15784 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
15785 			/*
15786 			 * Disk drives frequently refuse to spin up which
15787 			 * results in a very long hang in format without
15788 			 * warning messages.
15789 			 *
15790 			 * Note: This code preserves the legacy behavior of
15791 			 * comparing xb_retry_count against zero for fibre
15792 			 * channel targets instead of comparing against the
15793 			 * un_reset_retry_count value.  The reason for this
15794 			 * discrepancy has been so utterly lost beneath the
15795 			 * Sands of Time that even Indiana Jones could not
15796 			 * find it.
15797 			 */
15798 			if (un->un_f_is_fibre == TRUE) {
15799 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
15800 					(xp->xb_retry_count > 0)) &&
15801 					(un->un_startstop_timeid == NULL)) {
15802 					scsi_log(SD_DEVINFO(un), sd_label,
15803 					CE_WARN, "logical unit not ready, "
15804 					"resetting disk\n");
15805 					sd_reset_target(un, pktp);
15806 				}
15807 			} else {
15808 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
15809 					(xp->xb_retry_count >
15810 					un->un_reset_retry_count)) &&
15811 					(un->un_startstop_timeid == NULL)) {
15812 					scsi_log(SD_DEVINFO(un), sd_label,
15813 					CE_WARN, "logical unit not ready, "
15814 					"resetting disk\n");
15815 					sd_reset_target(un, pktp);
15816 				}
15817 			}
15818 			break;
15819 
15820 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
15821 			/*
15822 			 * If the target is in the process of becoming
15823 			 * ready, just proceed with the retry. This can
15824 			 * happen with CD-ROMs that take a long time to
15825 			 * read TOC after a power cycle or reset.
15826 			 */
15827 			goto do_retry;
15828 
15829 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
15830 			break;
15831 
15832 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
15833 			/*
15834 			 * Retries cannot help here so just fail right away.
15835 			 */
15836 			goto fail_command;
15837 
15838 		case 0x88:
15839 			/*
15840 			 * Vendor-unique code for T3/T4: it indicates a
15841 			 * path problem in a mutipathed config, but as far as
15842 			 * the target driver is concerned it equates to a fatal
15843 			 * error, so we should just fail the command right away
15844 			 * (without printing anything to the console). If this
15845 			 * is not a T3/T4, fall thru to the default recovery
15846 			 * action.
15847 			 * T3/T4 is FC only, don't need to check is_fibre
15848 			 */
15849 			if (SD_IS_T3(un) || SD_IS_T4(un)) {
15850 				sd_return_failed_command(un, bp, EIO);
15851 				return;
15852 			}
15853 			/* FALLTHRU */
15854 
15855 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
15856 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
15857 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
15858 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
15859 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
15860 		default:    /* Possible future codes in SCSI spec? */
15861 			/*
15862 			 * For removable-media devices, do not retry if
15863 			 * ASCQ > 2 as these result mostly from USCSI commands
15864 			 * on MMC devices issued to check status of an
15865 			 * operation initiated in immediate mode.  Also for
15866 			 * ASCQ >= 4 do not print console messages as these
15867 			 * mainly represent a user-initiated operation
15868 			 * instead of a system failure.
15869 			 */
15870 			if (un->un_f_has_removable_media) {
15871 				si.ssi_severity = SCSI_ERR_ALL;
15872 				goto fail_command;
15873 			}
15874 			break;
15875 		}
15876 
15877 		/*
15878 		 * As part of our recovery attempt for the NOT READY
15879 		 * condition, we issue a START STOP UNIT command. However
15880 		 * we want to wait for a short delay before attempting this
15881 		 * as there may still be more commands coming back from the
15882 		 * target with the check condition. To do this we use
15883 		 * timeout(9F) to call sd_start_stop_unit_callback() after
15884 		 * the delay interval expires. (sd_start_stop_unit_callback()
15885 		 * dispatches sd_start_stop_unit_task(), which will issue
15886 		 * the actual START STOP UNIT command. The delay interval
15887 		 * is one-half of the delay that we will use to retry the
15888 		 * command that generated the NOT READY condition.
15889 		 *
15890 		 * Note that we could just dispatch sd_start_stop_unit_task()
15891 		 * from here and allow it to sleep for the delay interval,
15892 		 * but then we would be tying up the taskq thread
15893 		 * uncesessarily for the duration of the delay.
15894 		 *
15895 		 * Do not issue the START STOP UNIT if the current command
15896 		 * is already a START STOP UNIT.
15897 		 */
15898 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
15899 			break;
15900 		}
15901 
15902 		/*
15903 		 * Do not schedule the timeout if one is already pending.
15904 		 */
15905 		if (un->un_startstop_timeid != NULL) {
15906 			SD_INFO(SD_LOG_ERROR, un,
15907 			    "sd_sense_key_not_ready: restart already issued to"
15908 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
15909 			    ddi_get_instance(SD_DEVINFO(un)));
15910 			break;
15911 		}
15912 
15913 		/*
15914 		 * Schedule the START STOP UNIT command, then queue the command
15915 		 * for a retry.
15916 		 *
15917 		 * Note: A timeout is not scheduled for this retry because we
15918 		 * want the retry to be serial with the START_STOP_UNIT. The
15919 		 * retry will be started when the START_STOP_UNIT is completed
15920 		 * in sd_start_stop_unit_task.
15921 		 */
15922 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
15923 		    un, SD_BSY_TIMEOUT / 2);
15924 		xp->xb_retry_count++;
15925 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
15926 		return;
15927 
15928 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
15929 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
15930 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15931 			    "unit does not respond to selection\n");
15932 		}
15933 		break;
15934 
15935 	case 0x3A:	/* MEDIUM NOT PRESENT */
15936 		if (sd_error_level >= SCSI_ERR_FATAL) {
15937 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15938 			    "Caddy not inserted in drive\n");
15939 		}
15940 
15941 		sr_ejected(un);
15942 		un->un_mediastate = DKIO_EJECTED;
15943 		/* The state has changed, inform the media watch routines */
15944 		cv_broadcast(&un->un_state_cv);
15945 		/* Just fail if no media is present in the drive. */
15946 		goto fail_command;
15947 
15948 	default:
15949 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
15950 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15951 			    "Unit not Ready. Additional sense code 0x%x\n",
15952 			    asc);
15953 		}
15954 		break;
15955 	}
15956 
15957 do_retry:
15958 
15959 	/*
15960 	 * Retry the command, as some targets may report NOT READY for
15961 	 * several seconds after being reset.
15962 	 */
15963 	xp->xb_retry_count++;
15964 	si.ssi_severity = SCSI_ERR_RETRYABLE;
15965 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
15966 	    &si, EIO, SD_BSY_TIMEOUT, NULL);
15967 
15968 	return;
15969 
15970 fail_command:
15971 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
15972 	sd_return_failed_command(un, bp, EIO);
15973 }
15974 
15975 
15976 
15977 /*
15978  *    Function: sd_sense_key_medium_or_hardware_error
15979  *
15980  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
15981  *		sense key.
15982  *
15983  *     Context: May be called from interrupt context
15984  */
15985 
15986 static void
15987 sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
15988 	uint8_t *sense_datap,
15989 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
15990 {
15991 	struct sd_sense_info	si;
15992 	uint8_t sense_key = scsi_sense_key(sense_datap);
15993 	uint8_t asc = scsi_sense_asc(sense_datap);
15994 
15995 	ASSERT(un != NULL);
15996 	ASSERT(mutex_owned(SD_MUTEX(un)));
15997 	ASSERT(bp != NULL);
15998 	ASSERT(xp != NULL);
15999 	ASSERT(pktp != NULL);
16000 
16001 	si.ssi_severity = SCSI_ERR_FATAL;
16002 	si.ssi_pfa_flag = FALSE;
16003 
16004 	if (sense_key == KEY_MEDIUM_ERROR) {
16005 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
16006 	}
16007 
16008 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16009 
16010 	if ((un->un_reset_retry_count != 0) &&
16011 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
16012 		mutex_exit(SD_MUTEX(un));
16013 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
16014 		if (un->un_f_allow_bus_device_reset == TRUE) {
16015 
16016 			boolean_t try_resetting_target = B_TRUE;
16017 
16018 			/*
16019 			 * We need to be able to handle specific ASC when we are
16020 			 * handling a KEY_HARDWARE_ERROR. In particular
16021 			 * taking the default action of resetting the target may
16022 			 * not be the appropriate way to attempt recovery.
16023 			 * Resetting a target because of a single LUN failure
16024 			 * victimizes all LUNs on that target.
16025 			 *
16026 			 * This is true for the LSI arrays, if an LSI
16027 			 * array controller returns an ASC of 0x84 (LUN Dead) we
16028 			 * should trust it.
16029 			 */
16030 
16031 			if (sense_key == KEY_HARDWARE_ERROR) {
16032 				switch (asc) {
16033 				case 0x84:
16034 					if (SD_IS_LSI(un)) {
16035 						try_resetting_target = B_FALSE;
16036 					}
16037 					break;
16038 				default:
16039 					break;
16040 				}
16041 			}
16042 
16043 			if (try_resetting_target == B_TRUE) {
16044 				int reset_retval = 0;
16045 				if (un->un_f_lun_reset_enabled == TRUE) {
16046 					SD_TRACE(SD_LOG_IO_CORE, un,
16047 					    "sd_sense_key_medium_or_hardware_"
16048 					    "error: issuing RESET_LUN\n");
16049 					reset_retval =
16050 					    scsi_reset(SD_ADDRESS(un),
16051 					    RESET_LUN);
16052 				}
16053 				if (reset_retval == 0) {
16054 					SD_TRACE(SD_LOG_IO_CORE, un,
16055 					    "sd_sense_key_medium_or_hardware_"
16056 					    "error: issuing RESET_TARGET\n");
16057 					(void) scsi_reset(SD_ADDRESS(un),
16058 					    RESET_TARGET);
16059 				}
16060 			}
16061 		}
16062 		mutex_enter(SD_MUTEX(un));
16063 	}
16064 
16065 	/*
16066 	 * This really ought to be a fatal error, but we will retry anyway
16067 	 * as some drives report this as a spurious error.
16068 	 */
16069 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
16070 	    &si, EIO, (clock_t)0, NULL);
16071 }
16072 
16073 
16074 
16075 /*
16076  *    Function: sd_sense_key_illegal_request
16077  *
16078  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
16079  *
16080  *     Context: May be called from interrupt context
16081  */
16082 
16083 static void
16084 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
16085 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16086 {
16087 	struct sd_sense_info	si;
16088 
16089 	ASSERT(un != NULL);
16090 	ASSERT(mutex_owned(SD_MUTEX(un)));
16091 	ASSERT(bp != NULL);
16092 	ASSERT(xp != NULL);
16093 	ASSERT(pktp != NULL);
16094 
16095 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
16096 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
16097 
16098 	si.ssi_severity = SCSI_ERR_INFO;
16099 	si.ssi_pfa_flag = FALSE;
16100 
16101 	/* Pointless to retry if the target thinks it's an illegal request */
16102 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16103 	sd_return_failed_command(un, bp, EIO);
16104 }
16105 
16106 
16107 
16108 
16109 /*
16110  *    Function: sd_sense_key_unit_attention
16111  *
16112  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
16113  *
16114  *     Context: May be called from interrupt context
16115  */
16116 
16117 static void
16118 sd_sense_key_unit_attention(struct sd_lun *un,
16119 	uint8_t *sense_datap,
16120 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
16121 {
16122 	/*
16123 	 * For UNIT ATTENTION we allow retries for one minute. Devices
16124 	 * like Sonoma can return UNIT ATTENTION close to a minute
16125 	 * under certain conditions.
16126 	 */
16127 	int	retry_check_flag = SD_RETRIES_UA;
16128 	boolean_t	kstat_updated = B_FALSE;
16129 	struct	sd_sense_info		si;
16130 	uint8_t asc = scsi_sense_asc(sense_datap);
16131 
16132 	ASSERT(un != NULL);
16133 	ASSERT(mutex_owned(SD_MUTEX(un)));
16134 	ASSERT(bp != NULL);
16135 	ASSERT(xp != NULL);
16136 	ASSERT(pktp != NULL);
16137 
16138 	si.ssi_severity = SCSI_ERR_INFO;
16139 	si.ssi_pfa_flag = FALSE;
16140 
16141 
16142 	switch (asc) {
16143 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
16144 		if (sd_report_pfa != 0) {
16145 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
16146 			si.ssi_pfa_flag = TRUE;
16147 			retry_check_flag = SD_RETRIES_STANDARD;
16148 			goto do_retry;
16149 		}
16150 
16151 		break;
16152 
16153 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
16154 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
16155 			un->un_resvd_status |=
16156 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
16157 		}
16158 #ifdef _LP64
16159 		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
16160 			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
16161 			    un, KM_NOSLEEP) == 0) {
16162 				/*
16163 				 * If we can't dispatch the task we'll just
16164 				 * live without descriptor sense.  We can
16165 				 * try again on the next "unit attention"
16166 				 */
16167 				SD_ERROR(SD_LOG_ERROR, un,
16168 				    "sd_sense_key_unit_attention: "
16169 				    "Could not dispatch "
16170 				    "sd_reenable_dsense_task\n");
16171 			}
16172 		}
16173 #endif /* _LP64 */
16174 		/* FALLTHRU */
16175 
16176 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
16177 		if (!un->un_f_has_removable_media) {
16178 			break;
16179 		}
16180 
16181 		/*
16182 		 * When we get a unit attention from a removable-media device,
16183 		 * it may be in a state that will take a long time to recover
16184 		 * (e.g., from a reset).  Since we are executing in interrupt
16185 		 * context here, we cannot wait around for the device to come
16186 		 * back. So hand this command off to sd_media_change_task()
16187 		 * for deferred processing under taskq thread context. (Note
16188 		 * that the command still may be failed if a problem is
16189 		 * encountered at a later time.)
16190 		 */
16191 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
16192 		    KM_NOSLEEP) == 0) {
16193 			/*
16194 			 * Cannot dispatch the request so fail the command.
16195 			 */
16196 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
16197 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
16198 			si.ssi_severity = SCSI_ERR_FATAL;
16199 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16200 			sd_return_failed_command(un, bp, EIO);
16201 		}
16202 
16203 		/*
16204 		 * If failed to dispatch sd_media_change_task(), we already
16205 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
16206 		 * we should update kstat later if it encounters an error. So,
16207 		 * we update kstat_updated flag here.
16208 		 */
16209 		kstat_updated = B_TRUE;
16210 
16211 		/*
16212 		 * Either the command has been successfully dispatched to a
16213 		 * task Q for retrying, or the dispatch failed. In either case
16214 		 * do NOT retry again by calling sd_retry_command. This sets up
16215 		 * two retries of the same command and when one completes and
16216 		 * frees the resources the other will access freed memory,
16217 		 * a bad thing.
16218 		 */
16219 		return;
16220 
16221 	default:
16222 		break;
16223 	}
16224 
16225 	/*
16226 	 * Update kstat if we haven't done that.
16227 	 */
16228 	if (!kstat_updated) {
16229 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16230 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
16231 	}
16232 
16233 do_retry:
16234 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
16235 	    EIO, SD_UA_RETRY_DELAY, NULL);
16236 }
16237 
16238 
16239 
16240 /*
16241  *    Function: sd_sense_key_fail_command
16242  *
16243  * Description: Use to fail a command when we don't like the sense key that
16244  *		was returned.
16245  *
16246  *     Context: May be called from interrupt context
16247  */
16248 
16249 static void
16250 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
16251 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16252 {
16253 	struct sd_sense_info	si;
16254 
16255 	ASSERT(un != NULL);
16256 	ASSERT(mutex_owned(SD_MUTEX(un)));
16257 	ASSERT(bp != NULL);
16258 	ASSERT(xp != NULL);
16259 	ASSERT(pktp != NULL);
16260 
16261 	si.ssi_severity = SCSI_ERR_FATAL;
16262 	si.ssi_pfa_flag = FALSE;
16263 
16264 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16265 	sd_return_failed_command(un, bp, EIO);
16266 }
16267 
16268 
16269 
16270 /*
16271  *    Function: sd_sense_key_blank_check
16272  *
16273  * Description: Recovery actions for a SCSI "Blank Check" sense key.
16274  *		Has no monetary connotation.
16275  *
16276  *     Context: May be called from interrupt context
16277  */
16278 
16279 static void
16280 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
16281 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16282 {
16283 	struct sd_sense_info	si;
16284 
16285 	ASSERT(un != NULL);
16286 	ASSERT(mutex_owned(SD_MUTEX(un)));
16287 	ASSERT(bp != NULL);
16288 	ASSERT(xp != NULL);
16289 	ASSERT(pktp != NULL);
16290 
16291 	/*
16292 	 * Blank check is not fatal for removable devices, therefore
16293 	 * it does not require a console message.
16294 	 */
16295 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
16296 	    SCSI_ERR_FATAL;
16297 	si.ssi_pfa_flag = FALSE;
16298 
16299 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16300 	sd_return_failed_command(un, bp, EIO);
16301 }
16302 
16303 
16304 
16305 
16306 /*
16307  *    Function: sd_sense_key_aborted_command
16308  *
16309  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
16310  *
16311  *     Context: May be called from interrupt context
16312  */
16313 
16314 static void
16315 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
16316 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16317 {
16318 	struct sd_sense_info	si;
16319 
16320 	ASSERT(un != NULL);
16321 	ASSERT(mutex_owned(SD_MUTEX(un)));
16322 	ASSERT(bp != NULL);
16323 	ASSERT(xp != NULL);
16324 	ASSERT(pktp != NULL);
16325 
16326 	si.ssi_severity = SCSI_ERR_FATAL;
16327 	si.ssi_pfa_flag = FALSE;
16328 
16329 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16330 
16331 	/*
16332 	 * This really ought to be a fatal error, but we will retry anyway
16333 	 * as some drives report this as a spurious error.
16334 	 */
16335 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
16336 	    &si, EIO, (clock_t)0, NULL);
16337 }
16338 
16339 
16340 
16341 /*
16342  *    Function: sd_sense_key_default
16343  *
16344  * Description: Default recovery action for several SCSI sense keys (basically
16345  *		attempts a retry).
16346  *
16347  *     Context: May be called from interrupt context
16348  */
16349 
16350 static void
16351 sd_sense_key_default(struct sd_lun *un,
16352 	uint8_t *sense_datap,
16353 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
16354 {
16355 	struct sd_sense_info	si;
16356 	uint8_t sense_key = scsi_sense_key(sense_datap);
16357 
16358 	ASSERT(un != NULL);
16359 	ASSERT(mutex_owned(SD_MUTEX(un)));
16360 	ASSERT(bp != NULL);
16361 	ASSERT(xp != NULL);
16362 	ASSERT(pktp != NULL);
16363 
16364 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16365 
16366 	/*
16367 	 * Undecoded sense key.	Attempt retries and hope that will fix
16368 	 * the problem.  Otherwise, we're dead.
16369 	 */
16370 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16371 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16372 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
16373 	}
16374 
16375 	si.ssi_severity = SCSI_ERR_FATAL;
16376 	si.ssi_pfa_flag = FALSE;
16377 
16378 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
16379 	    &si, EIO, (clock_t)0, NULL);
16380 }
16381 
16382 
16383 
16384 /*
16385  *    Function: sd_print_retry_msg
16386  *
16387  * Description: Print a message indicating the retry action being taken.
16388  *
16389  *   Arguments: un - ptr to associated softstate
16390  *		bp - ptr to buf(9S) for the command
16391  *		arg - not used.
16392  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16393  *			or SD_NO_RETRY_ISSUED
16394  *
16395  *     Context: May be called from interrupt context
16396  */
16397 /* ARGSUSED */
16398 static void
16399 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
16400 {
16401 	struct sd_xbuf	*xp;
16402 	struct scsi_pkt *pktp;
16403 	char *reasonp;
16404 	char *msgp;
16405 
16406 	ASSERT(un != NULL);
16407 	ASSERT(mutex_owned(SD_MUTEX(un)));
16408 	ASSERT(bp != NULL);
16409 	pktp = SD_GET_PKTP(bp);
16410 	ASSERT(pktp != NULL);
16411 	xp = SD_GET_XBUF(bp);
16412 	ASSERT(xp != NULL);
16413 
16414 	ASSERT(!mutex_owned(&un->un_pm_mutex));
16415 	mutex_enter(&un->un_pm_mutex);
16416 	if ((un->un_state == SD_STATE_SUSPENDED) ||
16417 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
16418 	    (pktp->pkt_flags & FLAG_SILENT)) {
16419 		mutex_exit(&un->un_pm_mutex);
16420 		goto update_pkt_reason;
16421 	}
16422 	mutex_exit(&un->un_pm_mutex);
16423 
16424 	/*
16425 	 * Suppress messages if they are all the same pkt_reason; with
16426 	 * TQ, many (up to 256) are returned with the same pkt_reason.
16427 	 * If we are in panic, then suppress the retry messages.
16428 	 */
16429 	switch (flag) {
16430 	case SD_NO_RETRY_ISSUED:
16431 		msgp = "giving up";
16432 		break;
16433 	case SD_IMMEDIATE_RETRY_ISSUED:
16434 	case SD_DELAYED_RETRY_ISSUED:
16435 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
16436 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
16437 		    (sd_error_level != SCSI_ERR_ALL))) {
16438 			return;
16439 		}
16440 		msgp = "retrying command";
16441 		break;
16442 	default:
16443 		goto update_pkt_reason;
16444 	}
16445 
16446 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
16447 	    scsi_rname(pktp->pkt_reason));
16448 
16449 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16450 	    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
16451 
16452 update_pkt_reason:
16453 	/*
16454 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
16455 	 * This is to prevent multiple console messages for the same failure
16456 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
16457 	 * when the command is retried successfully because there still may be
16458 	 * more commands coming back with the same value of pktp->pkt_reason.
16459 	 */
16460 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
16461 		un->un_last_pkt_reason = pktp->pkt_reason;
16462 	}
16463 }
16464 
16465 
16466 /*
16467  *    Function: sd_print_cmd_incomplete_msg
16468  *
16469  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
16470  *
16471  *   Arguments: un - ptr to associated softstate
16472  *		bp - ptr to buf(9S) for the command
16473  *		arg - passed to sd_print_retry_msg()
16474  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16475  *			or SD_NO_RETRY_ISSUED
16476  *
16477  *     Context: May be called from interrupt context
16478  */
16479 
16480 static void
16481 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
16482 	int code)
16483 {
16484 	dev_info_t	*dip;
16485 
16486 	ASSERT(un != NULL);
16487 	ASSERT(mutex_owned(SD_MUTEX(un)));
16488 	ASSERT(bp != NULL);
16489 
16490 	switch (code) {
16491 	case SD_NO_RETRY_ISSUED:
16492 		/* Command was failed. Someone turned off this target? */
16493 		if (un->un_state != SD_STATE_OFFLINE) {
16494 			/*
16495 			 * Suppress message if we are detaching and
16496 			 * device has been disconnected
16497 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
16498 			 * private interface and not part of the DDI
16499 			 */
16500 			dip = un->un_sd->sd_dev;
16501 			if (!(DEVI_IS_DETACHING(dip) &&
16502 			    DEVI_IS_DEVICE_REMOVED(dip))) {
16503 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16504 				"disk not responding to selection\n");
16505 			}
16506 			New_state(un, SD_STATE_OFFLINE);
16507 		}
16508 		break;
16509 
16510 	case SD_DELAYED_RETRY_ISSUED:
16511 	case SD_IMMEDIATE_RETRY_ISSUED:
16512 	default:
16513 		/* Command was successfully queued for retry */
16514 		sd_print_retry_msg(un, bp, arg, code);
16515 		break;
16516 	}
16517 }
16518 
16519 
16520 /*
16521  *    Function: sd_pkt_reason_cmd_incomplete
16522  *
16523  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
16524  *
16525  *     Context: May be called from interrupt context
16526  */
16527 
16528 static void
16529 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
16530 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16531 {
16532 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
16533 
16534 	ASSERT(un != NULL);
16535 	ASSERT(mutex_owned(SD_MUTEX(un)));
16536 	ASSERT(bp != NULL);
16537 	ASSERT(xp != NULL);
16538 	ASSERT(pktp != NULL);
16539 
16540 	/* Do not do a reset if selection did not complete */
16541 	/* Note: Should this not just check the bit? */
16542 	if (pktp->pkt_state != STATE_GOT_BUS) {
16543 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
16544 		sd_reset_target(un, pktp);
16545 	}
16546 
16547 	/*
16548 	 * If the target was not successfully selected, then set
16549 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
16550 	 * with the target, and further retries and/or commands are
16551 	 * likely to take a long time.
16552 	 */
16553 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
16554 		flag |= SD_RETRIES_FAILFAST;
16555 	}
16556 
16557 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16558 
16559 	sd_retry_command(un, bp, flag,
16560 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16561 }
16562 
16563 
16564 
16565 /*
16566  *    Function: sd_pkt_reason_cmd_tran_err
16567  *
16568  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
16569  *
16570  *     Context: May be called from interrupt context
16571  */
16572 
16573 static void
16574 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
16575 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16576 {
16577 	ASSERT(un != NULL);
16578 	ASSERT(mutex_owned(SD_MUTEX(un)));
16579 	ASSERT(bp != NULL);
16580 	ASSERT(xp != NULL);
16581 	ASSERT(pktp != NULL);
16582 
16583 	/*
16584 	 * Do not reset if we got a parity error, or if
16585 	 * selection did not complete.
16586 	 */
16587 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16588 	/* Note: Should this not just check the bit for pkt_state? */
16589 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
16590 	    (pktp->pkt_state != STATE_GOT_BUS)) {
16591 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
16592 		sd_reset_target(un, pktp);
16593 	}
16594 
16595 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16596 
16597 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
16598 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16599 }
16600 
16601 
16602 
16603 /*
16604  *    Function: sd_pkt_reason_cmd_reset
16605  *
16606  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
16607  *
16608  *     Context: May be called from interrupt context
16609  */
16610 
16611 static void
16612 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
16613 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16614 {
16615 	ASSERT(un != NULL);
16616 	ASSERT(mutex_owned(SD_MUTEX(un)));
16617 	ASSERT(bp != NULL);
16618 	ASSERT(xp != NULL);
16619 	ASSERT(pktp != NULL);
16620 
16621 	/* The target may still be running the command, so try to reset. */
16622 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16623 	sd_reset_target(un, pktp);
16624 
16625 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16626 
16627 	/*
16628 	 * If pkt_reason is CMD_RESET chances are that this pkt got
16629 	 * reset because another target on this bus caused it. The target
16630 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
16631 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
16632 	 */
16633 
16634 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
16635 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16636 }
16637 
16638 
16639 
16640 
16641 /*
16642  *    Function: sd_pkt_reason_cmd_aborted
16643  *
16644  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
16645  *
16646  *     Context: May be called from interrupt context
16647  */
16648 
16649 static void
16650 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
16651 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16652 {
16653 	ASSERT(un != NULL);
16654 	ASSERT(mutex_owned(SD_MUTEX(un)));
16655 	ASSERT(bp != NULL);
16656 	ASSERT(xp != NULL);
16657 	ASSERT(pktp != NULL);
16658 
16659 	/* The target may still be running the command, so try to reset. */
16660 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16661 	sd_reset_target(un, pktp);
16662 
16663 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16664 
16665 	/*
16666 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
16667 	 * aborted because another target on this bus caused it. The target
16668 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
16669 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
16670 	 */
16671 
16672 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
16673 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16674 }
16675 
16676 
16677 
16678 /*
16679  *    Function: sd_pkt_reason_cmd_timeout
16680  *
16681  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
16682  *
16683  *     Context: May be called from interrupt context
16684  */
16685 
16686 static void
16687 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
16688 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16689 {
16690 	ASSERT(un != NULL);
16691 	ASSERT(mutex_owned(SD_MUTEX(un)));
16692 	ASSERT(bp != NULL);
16693 	ASSERT(xp != NULL);
16694 	ASSERT(pktp != NULL);
16695 
16696 
16697 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16698 	sd_reset_target(un, pktp);
16699 
16700 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16701 
16702 	/*
16703 	 * A command timeout indicates that we could not establish
16704 	 * communication with the target, so set SD_RETRIES_FAILFAST
16705 	 * as further retries/commands are likely to take a long time.
16706 	 */
16707 	sd_retry_command(un, bp,
16708 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
16709 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16710 }
16711 
16712 
16713 
16714 /*
16715  *    Function: sd_pkt_reason_cmd_unx_bus_free
16716  *
16717  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
16718  *
16719  *     Context: May be called from interrupt context
16720  */
16721 
16722 static void
16723 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
16724 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16725 {
16726 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
16727 
16728 	ASSERT(un != NULL);
16729 	ASSERT(mutex_owned(SD_MUTEX(un)));
16730 	ASSERT(bp != NULL);
16731 	ASSERT(xp != NULL);
16732 	ASSERT(pktp != NULL);
16733 
16734 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16735 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16736 
16737 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
16738 	    sd_print_retry_msg : NULL;
16739 
16740 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
16741 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16742 }
16743 
16744 
16745 /*
16746  *    Function: sd_pkt_reason_cmd_tag_reject
16747  *
16748  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
16749  *
16750  *     Context: May be called from interrupt context
16751  */
16752 
16753 static void
16754 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
16755 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16756 {
16757 	ASSERT(un != NULL);
16758 	ASSERT(mutex_owned(SD_MUTEX(un)));
16759 	ASSERT(bp != NULL);
16760 	ASSERT(xp != NULL);
16761 	ASSERT(pktp != NULL);
16762 
16763 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16764 	pktp->pkt_flags = 0;
16765 	un->un_tagflags = 0;
16766 	if (un->un_f_opt_queueing == TRUE) {
16767 		un->un_throttle = min(un->un_throttle, 3);
16768 	} else {
16769 		un->un_throttle = 1;
16770 	}
16771 	mutex_exit(SD_MUTEX(un));
16772 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
16773 	mutex_enter(SD_MUTEX(un));
16774 
16775 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16776 
16777 	/* Legacy behavior not to check retry counts here. */
16778 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
16779 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16780 }
16781 
16782 
16783 /*
16784  *    Function: sd_pkt_reason_default
16785  *
16786  * Description: Default recovery actions for SCSA pkt_reason values that
16787  *		do not have more explicit recovery actions.
16788  *
16789  *     Context: May be called from interrupt context
16790  */
16791 
16792 static void
16793 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
16794 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16795 {
16796 	ASSERT(un != NULL);
16797 	ASSERT(mutex_owned(SD_MUTEX(un)));
16798 	ASSERT(bp != NULL);
16799 	ASSERT(xp != NULL);
16800 	ASSERT(pktp != NULL);
16801 
16802 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16803 	sd_reset_target(un, pktp);
16804 
16805 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16806 
16807 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
16808 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16809 }
16810 
16811 
16812 
16813 /*
16814  *    Function: sd_pkt_status_check_condition
16815  *
16816  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
16817  *
16818  *     Context: May be called from interrupt context
16819  */
16820 
16821 static void
16822 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
16823 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16824 {
16825 	ASSERT(un != NULL);
16826 	ASSERT(mutex_owned(SD_MUTEX(un)));
16827 	ASSERT(bp != NULL);
16828 	ASSERT(xp != NULL);
16829 	ASSERT(pktp != NULL);
16830 
16831 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
16832 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
16833 
16834 	/*
16835 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
16836 	 * command will be retried after the request sense). Otherwise, retry
16837 	 * the command. Note: we are issuing the request sense even though the
16838 	 * retry limit may have been reached for the failed command.
16839 	 */
16840 	if (un->un_f_arq_enabled == FALSE) {
16841 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
16842 		    "no ARQ, sending request sense command\n");
16843 		sd_send_request_sense_command(un, bp, pktp);
16844 	} else {
16845 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
16846 		    "ARQ,retrying request sense command\n");
16847 #if defined(__i386) || defined(__amd64)
16848 		/*
16849 		 * The SD_RETRY_DELAY value need to be adjusted here
16850 		 * when SD_RETRY_DELAY change in sddef.h
16851 		 */
16852 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
16853 			un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
16854 			NULL);
16855 #else
16856 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
16857 		    EIO, SD_RETRY_DELAY, NULL);
16858 #endif
16859 	}
16860 
16861 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
16862 }
16863 
16864 
16865 /*
16866  *    Function: sd_pkt_status_busy
16867  *
16868  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
16869  *
16870  *     Context: May be called from interrupt context
16871  */
16872 
16873 static void
16874 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
16875 	struct scsi_pkt *pktp)
16876 {
16877 	ASSERT(un != NULL);
16878 	ASSERT(mutex_owned(SD_MUTEX(un)));
16879 	ASSERT(bp != NULL);
16880 	ASSERT(xp != NULL);
16881 	ASSERT(pktp != NULL);
16882 
16883 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16884 	    "sd_pkt_status_busy: entry\n");
16885 
16886 	/* If retries are exhausted, just fail the command. */
16887 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
16888 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16889 		    "device busy too long\n");
16890 		sd_return_failed_command(un, bp, EIO);
16891 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16892 		    "sd_pkt_status_busy: exit\n");
16893 		return;
16894 	}
16895 	xp->xb_retry_count++;
16896 
16897 	/*
16898 	 * Try to reset the target. However, we do not want to perform
16899 	 * more than one reset if the device continues to fail. The reset
16900 	 * will be performed when the retry count reaches the reset
16901 	 * threshold.  This threshold should be set such that at least
16902 	 * one retry is issued before the reset is performed.
16903 	 */
16904 	if (xp->xb_retry_count ==
16905 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
16906 		int rval = 0;
16907 		mutex_exit(SD_MUTEX(un));
16908 		if (un->un_f_allow_bus_device_reset == TRUE) {
16909 			/*
16910 			 * First try to reset the LUN; if we cannot then
16911 			 * try to reset the target.
16912 			 */
16913 			if (un->un_f_lun_reset_enabled == TRUE) {
16914 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16915 				    "sd_pkt_status_busy: RESET_LUN\n");
16916 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
16917 			}
16918 			if (rval == 0) {
16919 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16920 				    "sd_pkt_status_busy: RESET_TARGET\n");
16921 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
16922 			}
16923 		}
16924 		if (rval == 0) {
16925 			/*
16926 			 * If the RESET_LUN and/or RESET_TARGET failed,
16927 			 * try RESET_ALL
16928 			 */
16929 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16930 			    "sd_pkt_status_busy: RESET_ALL\n");
16931 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
16932 		}
16933 		mutex_enter(SD_MUTEX(un));
16934 		if (rval == 0) {
16935 			/*
16936 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
16937 			 * At this point we give up & fail the command.
16938 			 */
16939 			sd_return_failed_command(un, bp, EIO);
16940 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16941 			    "sd_pkt_status_busy: exit (failed cmd)\n");
16942 			return;
16943 		}
16944 	}
16945 
16946 	/*
16947 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
16948 	 * we have already checked the retry counts above.
16949 	 */
16950 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
16951 	    EIO, SD_BSY_TIMEOUT, NULL);
16952 
16953 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16954 	    "sd_pkt_status_busy: exit\n");
16955 }
16956 
16957 
16958 /*
16959  *    Function: sd_pkt_status_reservation_conflict
16960  *
16961  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
16962  *		command status.
16963  *
16964  *     Context: May be called from interrupt context
16965  */
16966 
16967 static void
16968 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
16969 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16970 {
16971 	ASSERT(un != NULL);
16972 	ASSERT(mutex_owned(SD_MUTEX(un)));
16973 	ASSERT(bp != NULL);
16974 	ASSERT(xp != NULL);
16975 	ASSERT(pktp != NULL);
16976 
16977 	/*
16978 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
16979 	 * conflict could be due to various reasons like incorrect keys, not
16980 	 * registered or not reserved etc. So, we return EACCES to the caller.
16981 	 */
16982 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
16983 		int cmd = SD_GET_PKT_OPCODE(pktp);
16984 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
16985 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
16986 			sd_return_failed_command(un, bp, EACCES);
16987 			return;
16988 		}
16989 	}
16990 
16991 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
16992 
16993 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
16994 		if (sd_failfast_enable != 0) {
16995 			/* By definition, we must panic here.... */
16996 			sd_panic_for_res_conflict(un);
16997 			/*NOTREACHED*/
16998 		}
16999 		SD_ERROR(SD_LOG_IO, un,
17000 		    "sd_handle_resv_conflict: Disk Reserved\n");
17001 		sd_return_failed_command(un, bp, EACCES);
17002 		return;
17003 	}
17004 
17005 	/*
17006 	 * 1147670: retry only if sd_retry_on_reservation_conflict
17007 	 * property is set (default is 1). Retries will not succeed
17008 	 * on a disk reserved by another initiator. HA systems
17009 	 * may reset this via sd.conf to avoid these retries.
17010 	 *
17011 	 * Note: The legacy return code for this failure is EIO, however EACCES
17012 	 * seems more appropriate for a reservation conflict.
17013 	 */
17014 	if (sd_retry_on_reservation_conflict == 0) {
17015 		SD_ERROR(SD_LOG_IO, un,
17016 		    "sd_handle_resv_conflict: Device Reserved\n");
17017 		sd_return_failed_command(un, bp, EIO);
17018 		return;
17019 	}
17020 
17021 	/*
17022 	 * Retry the command if we can.
17023 	 *
17024 	 * Note: The legacy return code for this failure is EIO, however EACCES
17025 	 * seems more appropriate for a reservation conflict.
17026 	 */
17027 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
17028 	    (clock_t)2, NULL);
17029 }
17030 
17031 
17032 
17033 /*
17034  *    Function: sd_pkt_status_qfull
17035  *
17036  * Description: Handle a QUEUE FULL condition from the target.  This can
17037  *		occur if the HBA does not handle the queue full condition.
17038  *		(Basically this means third-party HBAs as Sun HBAs will
17039  *		handle the queue full condition.)  Note that if there are
17040  *		some commands already in the transport, then the queue full
17041  *		has occurred because the queue for this nexus is actually
17042  *		full. If there are no commands in the transport, then the
17043  *		queue full is resulting from some other initiator or lun
17044  *		consuming all the resources at the target.
17045  *
17046  *     Context: May be called from interrupt context
17047  */
17048 
17049 static void
17050 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
17051 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17052 {
17053 	ASSERT(un != NULL);
17054 	ASSERT(mutex_owned(SD_MUTEX(un)));
17055 	ASSERT(bp != NULL);
17056 	ASSERT(xp != NULL);
17057 	ASSERT(pktp != NULL);
17058 
17059 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17060 	    "sd_pkt_status_qfull: entry\n");
17061 
17062 	/*
17063 	 * Just lower the QFULL throttle and retry the command.  Note that
17064 	 * we do not limit the number of retries here.
17065 	 */
17066 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
17067 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
17068 	    SD_RESTART_TIMEOUT, NULL);
17069 
17070 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17071 	    "sd_pkt_status_qfull: exit\n");
17072 }
17073 
17074 
17075 /*
17076  *    Function: sd_reset_target
17077  *
17078  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
17079  *		RESET_TARGET, or RESET_ALL.
17080  *
17081  *     Context: May be called under interrupt context.
17082  */
17083 
17084 static void
17085 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
17086 {
17087 	int rval = 0;
17088 
17089 	ASSERT(un != NULL);
17090 	ASSERT(mutex_owned(SD_MUTEX(un)));
17091 	ASSERT(pktp != NULL);
17092 
17093 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
17094 
17095 	/*
17096 	 * No need to reset if the transport layer has already done so.
17097 	 */
17098 	if ((pktp->pkt_statistics &
17099 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
17100 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17101 		    "sd_reset_target: no reset\n");
17102 		return;
17103 	}
17104 
17105 	mutex_exit(SD_MUTEX(un));
17106 
17107 	if (un->un_f_allow_bus_device_reset == TRUE) {
17108 		if (un->un_f_lun_reset_enabled == TRUE) {
17109 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17110 			    "sd_reset_target: RESET_LUN\n");
17111 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
17112 		}
17113 		if (rval == 0) {
17114 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17115 			    "sd_reset_target: RESET_TARGET\n");
17116 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
17117 		}
17118 	}
17119 
17120 	if (rval == 0) {
17121 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17122 		    "sd_reset_target: RESET_ALL\n");
17123 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
17124 	}
17125 
17126 	mutex_enter(SD_MUTEX(un));
17127 
17128 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
17129 }
17130 
17131 
17132 /*
17133  *    Function: sd_media_change_task
17134  *
17135  * Description: Recovery action for CDROM to become available.
17136  *
17137  *     Context: Executes in a taskq() thread context
17138  */
17139 
17140 static void
17141 sd_media_change_task(void *arg)
17142 {
17143 	struct	scsi_pkt	*pktp = arg;
17144 	struct	sd_lun		*un;
17145 	struct	buf		*bp;
17146 	struct	sd_xbuf		*xp;
17147 	int	err		= 0;
17148 	int	retry_count	= 0;
17149 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
17150 	struct	sd_sense_info	si;
17151 
17152 	ASSERT(pktp != NULL);
17153 	bp = (struct buf *)pktp->pkt_private;
17154 	ASSERT(bp != NULL);
17155 	xp = SD_GET_XBUF(bp);
17156 	ASSERT(xp != NULL);
17157 	un = SD_GET_UN(bp);
17158 	ASSERT(un != NULL);
17159 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17160 	ASSERT(un->un_f_monitor_media_state);
17161 
17162 	si.ssi_severity = SCSI_ERR_INFO;
17163 	si.ssi_pfa_flag = FALSE;
17164 
17165 	/*
17166 	 * When a reset is issued on a CDROM, it takes a long time to
17167 	 * recover. First few attempts to read capacity and other things
17168 	 * related to handling unit attention fail (with a ASC 0x4 and
17169 	 * ASCQ 0x1). In that case we want to do enough retries and we want
17170 	 * to limit the retries in other cases of genuine failures like
17171 	 * no media in drive.
17172 	 */
17173 	while (retry_count++ < retry_limit) {
17174 		if ((err = sd_handle_mchange(un)) == 0) {
17175 			break;
17176 		}
17177 		if (err == EAGAIN) {
17178 			retry_limit = SD_UNIT_ATTENTION_RETRY;
17179 		}
17180 		/* Sleep for 0.5 sec. & try again */
17181 		delay(drv_usectohz(500000));
17182 	}
17183 
17184 	/*
17185 	 * Dispatch (retry or fail) the original command here,
17186 	 * along with appropriate console messages....
17187 	 *
17188 	 * Must grab the mutex before calling sd_retry_command,
17189 	 * sd_print_sense_msg and sd_return_failed_command.
17190 	 */
17191 	mutex_enter(SD_MUTEX(un));
17192 	if (err != SD_CMD_SUCCESS) {
17193 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17194 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17195 		si.ssi_severity = SCSI_ERR_FATAL;
17196 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17197 		sd_return_failed_command(un, bp, EIO);
17198 	} else {
17199 		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17200 		    &si, EIO, (clock_t)0, NULL);
17201 	}
17202 	mutex_exit(SD_MUTEX(un));
17203 }
17204 
17205 
17206 
17207 /*
17208  *    Function: sd_handle_mchange
17209  *
17210  * Description: Perform geometry validation & other recovery when CDROM
17211  *		has been removed from drive.
17212  *
17213  * Return Code: 0 for success
17214  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
17215  *		sd_send_scsi_READ_CAPACITY()
17216  *
17217  *     Context: Executes in a taskq() thread context
17218  */
17219 
17220 static int
17221 sd_handle_mchange(struct sd_lun *un)
17222 {
17223 	uint64_t	capacity;
17224 	uint32_t	lbasize;
17225 	int		rval;
17226 
17227 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17228 	ASSERT(un->un_f_monitor_media_state);
17229 
17230 	if ((rval = sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
17231 	    SD_PATH_DIRECT_PRIORITY)) != 0) {
17232 		return (rval);
17233 	}
17234 
17235 	mutex_enter(SD_MUTEX(un));
17236 	sd_update_block_info(un, lbasize, capacity);
17237 
17238 	if (un->un_errstats != NULL) {
17239 		struct	sd_errstats *stp =
17240 		    (struct sd_errstats *)un->un_errstats->ks_data;
17241 		stp->sd_capacity.value.ui64 = (uint64_t)
17242 		    ((uint64_t)un->un_blockcount *
17243 		    (uint64_t)un->un_tgt_blocksize);
17244 	}
17245 
17246 
17247 	/*
17248 	 * Check if the media in the device is writable or not
17249 	 */
17250 	if (ISCD(un))
17251 		sd_check_for_writable_cd(un, SD_PATH_DIRECT_PRIORITY);
17252 
17253 	/*
17254 	 * Note: Maybe let the strategy/partitioning chain worry about getting
17255 	 * valid geometry.
17256 	 */
17257 	mutex_exit(SD_MUTEX(un));
17258 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
17259 
17260 
17261 	if (cmlb_validate(un->un_cmlbhandle, 0,
17262 	    (void *)SD_PATH_DIRECT_PRIORITY) != 0) {
17263 		return (EIO);
17264 	} else {
17265 		if (un->un_f_pkstats_enabled) {
17266 			sd_set_pstats(un);
17267 			SD_TRACE(SD_LOG_IO_PARTITION, un,
17268 			    "sd_handle_mchange: un:0x%p pstats created and "
17269 			    "set\n", un);
17270 		}
17271 	}
17272 
17273 
17274 	/*
17275 	 * Try to lock the door
17276 	 */
17277 	return (sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
17278 	    SD_PATH_DIRECT_PRIORITY));
17279 }
17280 
17281 
17282 /*
17283  *    Function: sd_send_scsi_DOORLOCK
17284  *
17285  * Description: Issue the scsi DOOR LOCK command
17286  *
17287  *   Arguments: un    - pointer to driver soft state (unit) structure for
17288  *			this target.
17289  *		flag  - SD_REMOVAL_ALLOW
17290  *			SD_REMOVAL_PREVENT
17291  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17292  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17293  *			to use the USCSI "direct" chain and bypass the normal
17294  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
17295  *			command is issued as part of an error recovery action.
17296  *
17297  * Return Code: 0   - Success
17298  *		errno return code from sd_send_scsi_cmd()
17299  *
17300  *     Context: Can sleep.
17301  */
17302 
17303 static int
17304 sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag)
17305 {
17306 	union scsi_cdb		cdb;
17307 	struct uscsi_cmd	ucmd_buf;
17308 	struct scsi_extended_sense	sense_buf;
17309 	int			status;
17310 
17311 	ASSERT(un != NULL);
17312 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17313 
17314 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
17315 
17316 	/* already determined doorlock is not supported, fake success */
17317 	if (un->un_f_doorlock_supported == FALSE) {
17318 		return (0);
17319 	}
17320 
17321 	/*
17322 	 * If we are ejecting and see an SD_REMOVAL_PREVENT
17323 	 * ignore the command so we can complete the eject
17324 	 * operation.
17325 	 */
17326 	if (flag == SD_REMOVAL_PREVENT) {
17327 		mutex_enter(SD_MUTEX(un));
17328 		if (un->un_f_ejecting == TRUE) {
17329 			mutex_exit(SD_MUTEX(un));
17330 			return (EAGAIN);
17331 		}
17332 		mutex_exit(SD_MUTEX(un));
17333 	}
17334 
17335 	bzero(&cdb, sizeof (cdb));
17336 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17337 
17338 	cdb.scc_cmd = SCMD_DOORLOCK;
17339 	cdb.cdb_opaque[4] = (uchar_t)flag;
17340 
17341 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17342 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
17343 	ucmd_buf.uscsi_bufaddr	= NULL;
17344 	ucmd_buf.uscsi_buflen	= 0;
17345 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17346 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
17347 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
17348 	ucmd_buf.uscsi_timeout	= 15;
17349 
17350 	SD_TRACE(SD_LOG_IO, un,
17351 	    "sd_send_scsi_DOORLOCK: returning sd_send_scsi_cmd()\n");
17352 
17353 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17354 	    UIO_SYSSPACE, path_flag);
17355 
17356 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
17357 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
17358 	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
17359 		/* fake success and skip subsequent doorlock commands */
17360 		un->un_f_doorlock_supported = FALSE;
17361 		return (0);
17362 	}
17363 
17364 	return (status);
17365 }
17366 
17367 /*
17368  *    Function: sd_send_scsi_READ_CAPACITY
17369  *
17370  * Description: This routine uses the scsi READ CAPACITY command to determine
17371  *		the device capacity in number of blocks and the device native
17372  *		block size. If this function returns a failure, then the
17373  *		values in *capp and *lbap are undefined.  If the capacity
17374  *		returned is 0xffffffff then the lun is too large for a
17375  *		normal READ CAPACITY command and the results of a
17376  *		READ CAPACITY 16 will be used instead.
17377  *
17378  *   Arguments: un   - ptr to soft state struct for the target
17379  *		capp - ptr to unsigned 64-bit variable to receive the
17380  *			capacity value from the command.
17381  *		lbap - ptr to unsigned 32-bit varaible to receive the
17382  *			block size value from the command
17383  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17384  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17385  *			to use the USCSI "direct" chain and bypass the normal
17386  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
17387  *			command is issued as part of an error recovery action.
17388  *
17389  * Return Code: 0   - Success
17390  *		EIO - IO error
17391  *		EACCES - Reservation conflict detected
17392  *		EAGAIN - Device is becoming ready
17393  *		errno return code from sd_send_scsi_cmd()
17394  *
17395  *     Context: Can sleep.  Blocks until command completes.
17396  */
17397 
17398 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
17399 
17400 static int
17401 sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp, uint32_t *lbap,
17402 	int path_flag)
17403 {
17404 	struct	scsi_extended_sense	sense_buf;
17405 	struct	uscsi_cmd	ucmd_buf;
17406 	union	scsi_cdb	cdb;
17407 	uint32_t		*capacity_buf;
17408 	uint64_t		capacity;
17409 	uint32_t		lbasize;
17410 	int			status;
17411 
17412 	ASSERT(un != NULL);
17413 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17414 	ASSERT(capp != NULL);
17415 	ASSERT(lbap != NULL);
17416 
17417 	SD_TRACE(SD_LOG_IO, un,
17418 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
17419 
17420 	/*
17421 	 * First send a READ_CAPACITY command to the target.
17422 	 * (This command is mandatory under SCSI-2.)
17423 	 *
17424 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
17425 	 * Medium Indicator bit is cleared.  The address field must be
17426 	 * zero if the PMI bit is zero.
17427 	 */
17428 	bzero(&cdb, sizeof (cdb));
17429 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17430 
17431 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
17432 
17433 	cdb.scc_cmd = SCMD_READ_CAPACITY;
17434 
17435 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17436 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
17437 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
17438 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
17439 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17440 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
17441 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
17442 	ucmd_buf.uscsi_timeout	= 60;
17443 
17444 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17445 	    UIO_SYSSPACE, path_flag);
17446 
17447 	switch (status) {
17448 	case 0:
17449 		/* Return failure if we did not get valid capacity data. */
17450 		if (ucmd_buf.uscsi_resid != 0) {
17451 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17452 			return (EIO);
17453 		}
17454 
17455 		/*
17456 		 * Read capacity and block size from the READ CAPACITY 10 data.
17457 		 * This data may be adjusted later due to device specific
17458 		 * issues.
17459 		 *
17460 		 * According to the SCSI spec, the READ CAPACITY 10
17461 		 * command returns the following:
17462 		 *
17463 		 *  bytes 0-3: Maximum logical block address available.
17464 		 *		(MSB in byte:0 & LSB in byte:3)
17465 		 *
17466 		 *  bytes 4-7: Block length in bytes
17467 		 *		(MSB in byte:4 & LSB in byte:7)
17468 		 *
17469 		 */
17470 		capacity = BE_32(capacity_buf[0]);
17471 		lbasize = BE_32(capacity_buf[1]);
17472 
17473 		/*
17474 		 * Done with capacity_buf
17475 		 */
17476 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17477 
17478 		/*
17479 		 * if the reported capacity is set to all 0xf's, then
17480 		 * this disk is too large and requires SBC-2 commands.
17481 		 * Reissue the request using READ CAPACITY 16.
17482 		 */
17483 		if (capacity == 0xffffffff) {
17484 			status = sd_send_scsi_READ_CAPACITY_16(un, &capacity,
17485 			    &lbasize, path_flag);
17486 			if (status != 0) {
17487 				return (status);
17488 			}
17489 		}
17490 		break;	/* Success! */
17491 	case EIO:
17492 		switch (ucmd_buf.uscsi_status) {
17493 		case STATUS_RESERVATION_CONFLICT:
17494 			status = EACCES;
17495 			break;
17496 		case STATUS_CHECK:
17497 			/*
17498 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
17499 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
17500 			 */
17501 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
17502 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
17503 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
17504 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17505 				return (EAGAIN);
17506 			}
17507 			break;
17508 		default:
17509 			break;
17510 		}
17511 		/* FALLTHRU */
17512 	default:
17513 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17514 		return (status);
17515 	}
17516 
17517 	/*
17518 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
17519 	 * (2352 and 0 are common) so for these devices always force the value
17520 	 * to 2048 as required by the ATAPI specs.
17521 	 */
17522 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
17523 		lbasize = 2048;
17524 	}
17525 
17526 	/*
17527 	 * Get the maximum LBA value from the READ CAPACITY data.
17528 	 * Here we assume that the Partial Medium Indicator (PMI) bit
17529 	 * was cleared when issuing the command. This means that the LBA
17530 	 * returned from the device is the LBA of the last logical block
17531 	 * on the logical unit.  The actual logical block count will be
17532 	 * this value plus one.
17533 	 *
17534 	 * Currently the capacity is saved in terms of un->un_sys_blocksize,
17535 	 * so scale the capacity value to reflect this.
17536 	 */
17537 	capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
17538 
17539 	/*
17540 	 * Copy the values from the READ CAPACITY command into the space
17541 	 * provided by the caller.
17542 	 */
17543 	*capp = capacity;
17544 	*lbap = lbasize;
17545 
17546 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
17547 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
17548 
17549 	/*
17550 	 * Both the lbasize and capacity from the device must be nonzero,
17551 	 * otherwise we assume that the values are not valid and return
17552 	 * failure to the caller. (4203735)
17553 	 */
17554 	if ((capacity == 0) || (lbasize == 0)) {
17555 		return (EIO);
17556 	}
17557 
17558 	return (0);
17559 }
17560 
17561 /*
17562  *    Function: sd_send_scsi_READ_CAPACITY_16
17563  *
17564  * Description: This routine uses the scsi READ CAPACITY 16 command to
17565  *		determine the device capacity in number of blocks and the
17566  *		device native block size.  If this function returns a failure,
17567  *		then the values in *capp and *lbap are undefined.
17568  *		This routine should always be called by
17569  *		sd_send_scsi_READ_CAPACITY which will appy any device
17570  *		specific adjustments to capacity and lbasize.
17571  *
17572  *   Arguments: un   - ptr to soft state struct for the target
17573  *		capp - ptr to unsigned 64-bit variable to receive the
17574  *			capacity value from the command.
17575  *		lbap - ptr to unsigned 32-bit varaible to receive the
17576  *			block size value from the command
17577  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17578  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17579  *			to use the USCSI "direct" chain and bypass the normal
17580  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
17581  *			this command is issued as part of an error recovery
17582  *			action.
17583  *
17584  * Return Code: 0   - Success
17585  *		EIO - IO error
17586  *		EACCES - Reservation conflict detected
17587  *		EAGAIN - Device is becoming ready
17588  *		errno return code from sd_send_scsi_cmd()
17589  *
17590  *     Context: Can sleep.  Blocks until command completes.
17591  */
17592 
17593 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
17594 
17595 static int
17596 sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
17597 	uint32_t *lbap, int path_flag)
17598 {
17599 	struct	scsi_extended_sense	sense_buf;
17600 	struct	uscsi_cmd	ucmd_buf;
17601 	union	scsi_cdb	cdb;
17602 	uint64_t		*capacity16_buf;
17603 	uint64_t		capacity;
17604 	uint32_t		lbasize;
17605 	int			status;
17606 
17607 	ASSERT(un != NULL);
17608 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17609 	ASSERT(capp != NULL);
17610 	ASSERT(lbap != NULL);
17611 
17612 	SD_TRACE(SD_LOG_IO, un,
17613 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
17614 
17615 	/*
17616 	 * First send a READ_CAPACITY_16 command to the target.
17617 	 *
17618 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
17619 	 * Medium Indicator bit is cleared.  The address field must be
17620 	 * zero if the PMI bit is zero.
17621 	 */
17622 	bzero(&cdb, sizeof (cdb));
17623 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17624 
17625 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
17626 
17627 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17628 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
17629 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
17630 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
17631 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17632 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
17633 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
17634 	ucmd_buf.uscsi_timeout	= 60;
17635 
17636 	/*
17637 	 * Read Capacity (16) is a Service Action In command.  One
17638 	 * command byte (0x9E) is overloaded for multiple operations,
17639 	 * with the second CDB byte specifying the desired operation
17640 	 */
17641 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
17642 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
17643 
17644 	/*
17645 	 * Fill in allocation length field
17646 	 */
17647 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
17648 
17649 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17650 	    UIO_SYSSPACE, path_flag);
17651 
17652 	switch (status) {
17653 	case 0:
17654 		/* Return failure if we did not get valid capacity data. */
17655 		if (ucmd_buf.uscsi_resid > 20) {
17656 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17657 			return (EIO);
17658 		}
17659 
17660 		/*
17661 		 * Read capacity and block size from the READ CAPACITY 10 data.
17662 		 * This data may be adjusted later due to device specific
17663 		 * issues.
17664 		 *
17665 		 * According to the SCSI spec, the READ CAPACITY 10
17666 		 * command returns the following:
17667 		 *
17668 		 *  bytes 0-7: Maximum logical block address available.
17669 		 *		(MSB in byte:0 & LSB in byte:7)
17670 		 *
17671 		 *  bytes 8-11: Block length in bytes
17672 		 *		(MSB in byte:8 & LSB in byte:11)
17673 		 *
17674 		 */
17675 		capacity = BE_64(capacity16_buf[0]);
17676 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
17677 
17678 		/*
17679 		 * Done with capacity16_buf
17680 		 */
17681 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17682 
17683 		/*
17684 		 * if the reported capacity is set to all 0xf's, then
17685 		 * this disk is too large.  This could only happen with
17686 		 * a device that supports LBAs larger than 64 bits which
17687 		 * are not defined by any current T10 standards.
17688 		 */
17689 		if (capacity == 0xffffffffffffffff) {
17690 			return (EIO);
17691 		}
17692 		break;	/* Success! */
17693 	case EIO:
17694 		switch (ucmd_buf.uscsi_status) {
17695 		case STATUS_RESERVATION_CONFLICT:
17696 			status = EACCES;
17697 			break;
17698 		case STATUS_CHECK:
17699 			/*
17700 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
17701 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
17702 			 */
17703 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
17704 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
17705 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
17706 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17707 				return (EAGAIN);
17708 			}
17709 			break;
17710 		default:
17711 			break;
17712 		}
17713 		/* FALLTHRU */
17714 	default:
17715 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17716 		return (status);
17717 	}
17718 
17719 	*capp = capacity;
17720 	*lbap = lbasize;
17721 
17722 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
17723 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
17724 
17725 	return (0);
17726 }
17727 
17728 
17729 /*
17730  *    Function: sd_send_scsi_START_STOP_UNIT
17731  *
17732  * Description: Issue a scsi START STOP UNIT command to the target.
17733  *
17734  *   Arguments: un    - pointer to driver soft state (unit) structure for
17735  *			this target.
17736  *		flag  - SD_TARGET_START
17737  *			SD_TARGET_STOP
17738  *			SD_TARGET_EJECT
17739  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17740  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17741  *			to use the USCSI "direct" chain and bypass the normal
17742  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
17743  *			command is issued as part of an error recovery action.
17744  *
17745  * Return Code: 0   - Success
17746  *		EIO - IO error
17747  *		EACCES - Reservation conflict detected
17748  *		ENXIO  - Not Ready, medium not present
17749  *		errno return code from sd_send_scsi_cmd()
17750  *
17751  *     Context: Can sleep.
17752  */
17753 
17754 static int
17755 sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag, int path_flag)
17756 {
17757 	struct	scsi_extended_sense	sense_buf;
17758 	union scsi_cdb		cdb;
17759 	struct uscsi_cmd	ucmd_buf;
17760 	int			status;
17761 
17762 	ASSERT(un != NULL);
17763 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17764 
17765 	SD_TRACE(SD_LOG_IO, un,
17766 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
17767 
17768 	if (un->un_f_check_start_stop &&
17769 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
17770 	    (un->un_f_start_stop_supported != TRUE)) {
17771 		return (0);
17772 	}
17773 
17774 	/*
17775 	 * If we are performing an eject operation and
17776 	 * we receive any command other than SD_TARGET_EJECT
17777 	 * we should immediately return.
17778 	 */
17779 	if (flag != SD_TARGET_EJECT) {
17780 		mutex_enter(SD_MUTEX(un));
17781 		if (un->un_f_ejecting == TRUE) {
17782 			mutex_exit(SD_MUTEX(un));
17783 			return (EAGAIN);
17784 		}
17785 		mutex_exit(SD_MUTEX(un));
17786 	}
17787 
17788 	bzero(&cdb, sizeof (cdb));
17789 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17790 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
17791 
17792 	cdb.scc_cmd = SCMD_START_STOP;
17793 	cdb.cdb_opaque[4] = (uchar_t)flag;
17794 
17795 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17796 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
17797 	ucmd_buf.uscsi_bufaddr	= NULL;
17798 	ucmd_buf.uscsi_buflen	= 0;
17799 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17800 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
17801 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
17802 	ucmd_buf.uscsi_timeout	= 200;
17803 
17804 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17805 	    UIO_SYSSPACE, path_flag);
17806 
17807 	switch (status) {
17808 	case 0:
17809 		break;	/* Success! */
17810 	case EIO:
17811 		switch (ucmd_buf.uscsi_status) {
17812 		case STATUS_RESERVATION_CONFLICT:
17813 			status = EACCES;
17814 			break;
17815 		case STATUS_CHECK:
17816 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
17817 				switch (scsi_sense_key(
17818 						(uint8_t *)&sense_buf)) {
17819 				case KEY_ILLEGAL_REQUEST:
17820 					status = ENOTSUP;
17821 					break;
17822 				case KEY_NOT_READY:
17823 					if (scsi_sense_asc(
17824 						    (uint8_t *)&sense_buf)
17825 					    == 0x3A) {
17826 						status = ENXIO;
17827 					}
17828 					break;
17829 				default:
17830 					break;
17831 				}
17832 			}
17833 			break;
17834 		default:
17835 			break;
17836 		}
17837 		break;
17838 	default:
17839 		break;
17840 	}
17841 
17842 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
17843 
17844 	return (status);
17845 }
17846 
17847 
17848 /*
17849  *    Function: sd_start_stop_unit_callback
17850  *
17851  * Description: timeout(9F) callback to begin recovery process for a
17852  *		device that has spun down.
17853  *
17854  *   Arguments: arg - pointer to associated softstate struct.
17855  *
17856  *     Context: Executes in a timeout(9F) thread context
17857  */
17858 
17859 static void
17860 sd_start_stop_unit_callback(void *arg)
17861 {
17862 	struct sd_lun	*un = arg;
17863 	ASSERT(un != NULL);
17864 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17865 
17866 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
17867 
17868 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
17869 }
17870 
17871 
17872 /*
17873  *    Function: sd_start_stop_unit_task
17874  *
17875  * Description: Recovery procedure when a drive is spun down.
17876  *
17877  *   Arguments: arg - pointer to associated softstate struct.
17878  *
17879  *     Context: Executes in a taskq() thread context
17880  */
17881 
17882 static void
17883 sd_start_stop_unit_task(void *arg)
17884 {
17885 	struct sd_lun	*un = arg;
17886 
17887 	ASSERT(un != NULL);
17888 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17889 
17890 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
17891 
17892 	/*
17893 	 * Some unformatted drives report not ready error, no need to
17894 	 * restart if format has been initiated.
17895 	 */
17896 	mutex_enter(SD_MUTEX(un));
17897 	if (un->un_f_format_in_progress == TRUE) {
17898 		mutex_exit(SD_MUTEX(un));
17899 		return;
17900 	}
17901 	mutex_exit(SD_MUTEX(un));
17902 
17903 	/*
17904 	 * When a START STOP command is issued from here, it is part of a
17905 	 * failure recovery operation and must be issued before any other
17906 	 * commands, including any pending retries. Thus it must be sent
17907 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
17908 	 * succeeds or not, we will start I/O after the attempt.
17909 	 */
17910 	(void) sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
17911 	    SD_PATH_DIRECT_PRIORITY);
17912 
17913 	/*
17914 	 * The above call blocks until the START_STOP_UNIT command completes.
17915 	 * Now that it has completed, we must re-try the original IO that
17916 	 * received the NOT READY condition in the first place. There are
17917 	 * three possible conditions here:
17918 	 *
17919 	 *  (1) The original IO is on un_retry_bp.
17920 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
17921 	 *	is NULL.
17922 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
17923 	 *	points to some other, unrelated bp.
17924 	 *
17925 	 * For each case, we must call sd_start_cmds() with un_retry_bp
17926 	 * as the argument. If un_retry_bp is NULL, this will initiate
17927 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
17928 	 * then this will process the bp on un_retry_bp. That may or may not
17929 	 * be the original IO, but that does not matter: the important thing
17930 	 * is to keep the IO processing going at this point.
17931 	 *
17932 	 * Note: This is a very specific error recovery sequence associated
17933 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
17934 	 * serialize the I/O with completion of the spin-up.
17935 	 */
17936 	mutex_enter(SD_MUTEX(un));
17937 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17938 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
17939 	    un, un->un_retry_bp);
17940 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
17941 	sd_start_cmds(un, un->un_retry_bp);
17942 	mutex_exit(SD_MUTEX(un));
17943 
17944 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
17945 }
17946 
17947 
17948 /*
17949  *    Function: sd_send_scsi_INQUIRY
17950  *
17951  * Description: Issue the scsi INQUIRY command.
17952  *
17953  *   Arguments: un
17954  *		bufaddr
17955  *		buflen
17956  *		evpd
17957  *		page_code
17958  *		page_length
17959  *
17960  * Return Code: 0   - Success
17961  *		errno return code from sd_send_scsi_cmd()
17962  *
17963  *     Context: Can sleep. Does not return until command is completed.
17964  */
17965 
17966 static int
17967 sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr, size_t buflen,
17968 	uchar_t evpd, uchar_t page_code, size_t *residp)
17969 {
17970 	union scsi_cdb		cdb;
17971 	struct uscsi_cmd	ucmd_buf;
17972 	int			status;
17973 
17974 	ASSERT(un != NULL);
17975 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17976 	ASSERT(bufaddr != NULL);
17977 
17978 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
17979 
17980 	bzero(&cdb, sizeof (cdb));
17981 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17982 	bzero(bufaddr, buflen);
17983 
17984 	cdb.scc_cmd = SCMD_INQUIRY;
17985 	cdb.cdb_opaque[1] = evpd;
17986 	cdb.cdb_opaque[2] = page_code;
17987 	FORMG0COUNT(&cdb, buflen);
17988 
17989 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17990 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
17991 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
17992 	ucmd_buf.uscsi_buflen	= buflen;
17993 	ucmd_buf.uscsi_rqbuf	= NULL;
17994 	ucmd_buf.uscsi_rqlen	= 0;
17995 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
17996 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
17997 
17998 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17999 	    UIO_SYSSPACE, SD_PATH_DIRECT);
18000 
18001 	if ((status == 0) && (residp != NULL)) {
18002 		*residp = ucmd_buf.uscsi_resid;
18003 	}
18004 
18005 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
18006 
18007 	return (status);
18008 }
18009 
18010 
18011 /*
18012  *    Function: sd_send_scsi_TEST_UNIT_READY
18013  *
18014  * Description: Issue the scsi TEST UNIT READY command.
18015  *		This routine can be told to set the flag USCSI_DIAGNOSE to
18016  *		prevent retrying failed commands. Use this when the intent
18017  *		is either to check for device readiness, to clear a Unit
18018  *		Attention, or to clear any outstanding sense data.
18019  *		However under specific conditions the expected behavior
18020  *		is for retries to bring a device ready, so use the flag
18021  *		with caution.
18022  *
18023  *   Arguments: un
18024  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
18025  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
18026  *			0: dont check for media present, do retries on cmd.
18027  *
18028  * Return Code: 0   - Success
18029  *		EIO - IO error
18030  *		EACCES - Reservation conflict detected
18031  *		ENXIO  - Not Ready, medium not present
18032  *		errno return code from sd_send_scsi_cmd()
18033  *
18034  *     Context: Can sleep. Does not return until command is completed.
18035  */
18036 
18037 static int
18038 sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag)
18039 {
18040 	struct	scsi_extended_sense	sense_buf;
18041 	union scsi_cdb		cdb;
18042 	struct uscsi_cmd	ucmd_buf;
18043 	int			status;
18044 
18045 	ASSERT(un != NULL);
18046 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18047 
18048 	SD_TRACE(SD_LOG_IO, un,
18049 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
18050 
18051 	/*
18052 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
18053 	 * timeouts when they receive a TUR and the queue is not empty. Check
18054 	 * the configuration flag set during attach (indicating the drive has
18055 	 * this firmware bug) and un_ncmds_in_transport before issuing the
18056 	 * TUR. If there are
18057 	 * pending commands return success, this is a bit arbitrary but is ok
18058 	 * for non-removables (i.e. the eliteI disks) and non-clustering
18059 	 * configurations.
18060 	 */
18061 	if (un->un_f_cfg_tur_check == TRUE) {
18062 		mutex_enter(SD_MUTEX(un));
18063 		if (un->un_ncmds_in_transport != 0) {
18064 			mutex_exit(SD_MUTEX(un));
18065 			return (0);
18066 		}
18067 		mutex_exit(SD_MUTEX(un));
18068 	}
18069 
18070 	bzero(&cdb, sizeof (cdb));
18071 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18072 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18073 
18074 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
18075 
18076 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18077 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
18078 	ucmd_buf.uscsi_bufaddr	= NULL;
18079 	ucmd_buf.uscsi_buflen	= 0;
18080 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18081 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18082 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
18083 
18084 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
18085 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
18086 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
18087 	}
18088 	ucmd_buf.uscsi_timeout	= 60;
18089 
18090 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18091 	    UIO_SYSSPACE, ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT :
18092 	    SD_PATH_STANDARD));
18093 
18094 	switch (status) {
18095 	case 0:
18096 		break;	/* Success! */
18097 	case EIO:
18098 		switch (ucmd_buf.uscsi_status) {
18099 		case STATUS_RESERVATION_CONFLICT:
18100 			status = EACCES;
18101 			break;
18102 		case STATUS_CHECK:
18103 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
18104 				break;
18105 			}
18106 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18107 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
18108 				KEY_NOT_READY) &&
18109 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
18110 				status = ENXIO;
18111 			}
18112 			break;
18113 		default:
18114 			break;
18115 		}
18116 		break;
18117 	default:
18118 		break;
18119 	}
18120 
18121 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
18122 
18123 	return (status);
18124 }
18125 
18126 
18127 /*
18128  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
18129  *
18130  * Description: Issue the scsi PERSISTENT RESERVE IN command.
18131  *
18132  *   Arguments: un
18133  *
18134  * Return Code: 0   - Success
18135  *		EACCES
18136  *		ENOTSUP
18137  *		errno return code from sd_send_scsi_cmd()
18138  *
18139  *     Context: Can sleep. Does not return until command is completed.
18140  */
18141 
18142 static int
18143 sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un, uchar_t  usr_cmd,
18144 	uint16_t data_len, uchar_t *data_bufp)
18145 {
18146 	struct scsi_extended_sense	sense_buf;
18147 	union scsi_cdb		cdb;
18148 	struct uscsi_cmd	ucmd_buf;
18149 	int			status;
18150 	int			no_caller_buf = FALSE;
18151 
18152 	ASSERT(un != NULL);
18153 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18154 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
18155 
18156 	SD_TRACE(SD_LOG_IO, un,
18157 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
18158 
18159 	bzero(&cdb, sizeof (cdb));
18160 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18161 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18162 	if (data_bufp == NULL) {
18163 		/* Allocate a default buf if the caller did not give one */
18164 		ASSERT(data_len == 0);
18165 		data_len  = MHIOC_RESV_KEY_SIZE;
18166 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
18167 		no_caller_buf = TRUE;
18168 	}
18169 
18170 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
18171 	cdb.cdb_opaque[1] = usr_cmd;
18172 	FORMG1COUNT(&cdb, data_len);
18173 
18174 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18175 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
18176 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
18177 	ucmd_buf.uscsi_buflen	= data_len;
18178 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18179 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18180 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
18181 	ucmd_buf.uscsi_timeout	= 60;
18182 
18183 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18184 	    UIO_SYSSPACE, SD_PATH_STANDARD);
18185 
18186 	switch (status) {
18187 	case 0:
18188 		break;	/* Success! */
18189 	case EIO:
18190 		switch (ucmd_buf.uscsi_status) {
18191 		case STATUS_RESERVATION_CONFLICT:
18192 			status = EACCES;
18193 			break;
18194 		case STATUS_CHECK:
18195 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18196 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
18197 				KEY_ILLEGAL_REQUEST)) {
18198 				status = ENOTSUP;
18199 			}
18200 			break;
18201 		default:
18202 			break;
18203 		}
18204 		break;
18205 	default:
18206 		break;
18207 	}
18208 
18209 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
18210 
18211 	if (no_caller_buf == TRUE) {
18212 		kmem_free(data_bufp, data_len);
18213 	}
18214 
18215 	return (status);
18216 }
18217 
18218 
18219 /*
18220  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
18221  *
18222  * Description: This routine is the driver entry point for handling CD-ROM
18223  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
18224  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
18225  *		device.
18226  *
18227  *   Arguments: un  -   Pointer to soft state struct for the target.
18228  *		usr_cmd SCSI-3 reservation facility command (one of
18229  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
18230  *			SD_SCSI3_PREEMPTANDABORT)
18231  *		usr_bufp - user provided pointer register, reserve descriptor or
18232  *			preempt and abort structure (mhioc_register_t,
18233  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
18234  *
18235  * Return Code: 0   - Success
18236  *		EACCES
18237  *		ENOTSUP
18238  *		errno return code from sd_send_scsi_cmd()
18239  *
18240  *     Context: Can sleep. Does not return until command is completed.
18241  */
18242 
18243 static int
18244 sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un, uchar_t usr_cmd,
18245 	uchar_t	*usr_bufp)
18246 {
18247 	struct scsi_extended_sense	sense_buf;
18248 	union scsi_cdb		cdb;
18249 	struct uscsi_cmd	ucmd_buf;
18250 	int			status;
18251 	uchar_t			data_len = sizeof (sd_prout_t);
18252 	sd_prout_t		*prp;
18253 
18254 	ASSERT(un != NULL);
18255 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18256 	ASSERT(data_len == 24);	/* required by scsi spec */
18257 
18258 	SD_TRACE(SD_LOG_IO, un,
18259 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
18260 
18261 	if (usr_bufp == NULL) {
18262 		return (EINVAL);
18263 	}
18264 
18265 	bzero(&cdb, sizeof (cdb));
18266 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18267 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18268 	prp = kmem_zalloc(data_len, KM_SLEEP);
18269 
18270 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
18271 	cdb.cdb_opaque[1] = usr_cmd;
18272 	FORMG1COUNT(&cdb, data_len);
18273 
18274 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18275 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
18276 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
18277 	ucmd_buf.uscsi_buflen	= data_len;
18278 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18279 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18280 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
18281 	ucmd_buf.uscsi_timeout	= 60;
18282 
18283 	switch (usr_cmd) {
18284 	case SD_SCSI3_REGISTER: {
18285 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
18286 
18287 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
18288 		bcopy(ptr->newkey.key, prp->service_key,
18289 		    MHIOC_RESV_KEY_SIZE);
18290 		prp->aptpl = ptr->aptpl;
18291 		break;
18292 	}
18293 	case SD_SCSI3_RESERVE:
18294 	case SD_SCSI3_RELEASE: {
18295 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
18296 
18297 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
18298 		prp->scope_address = BE_32(ptr->scope_specific_addr);
18299 		cdb.cdb_opaque[2] = ptr->type;
18300 		break;
18301 	}
18302 	case SD_SCSI3_PREEMPTANDABORT: {
18303 		mhioc_preemptandabort_t *ptr =
18304 		    (mhioc_preemptandabort_t *)usr_bufp;
18305 
18306 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
18307 		bcopy(ptr->victim_key.key, prp->service_key,
18308 		    MHIOC_RESV_KEY_SIZE);
18309 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
18310 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
18311 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
18312 		break;
18313 	}
18314 	case SD_SCSI3_REGISTERANDIGNOREKEY:
18315 	{
18316 		mhioc_registerandignorekey_t *ptr;
18317 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
18318 		bcopy(ptr->newkey.key,
18319 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
18320 		prp->aptpl = ptr->aptpl;
18321 		break;
18322 	}
18323 	default:
18324 		ASSERT(FALSE);
18325 		break;
18326 	}
18327 
18328 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18329 	    UIO_SYSSPACE, SD_PATH_STANDARD);
18330 
18331 	switch (status) {
18332 	case 0:
18333 		break;	/* Success! */
18334 	case EIO:
18335 		switch (ucmd_buf.uscsi_status) {
18336 		case STATUS_RESERVATION_CONFLICT:
18337 			status = EACCES;
18338 			break;
18339 		case STATUS_CHECK:
18340 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18341 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
18342 				KEY_ILLEGAL_REQUEST)) {
18343 				status = ENOTSUP;
18344 			}
18345 			break;
18346 		default:
18347 			break;
18348 		}
18349 		break;
18350 	default:
18351 		break;
18352 	}
18353 
18354 	kmem_free(prp, data_len);
18355 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
18356 	return (status);
18357 }
18358 
18359 
18360 /*
18361  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
18362  *
18363  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
18364  *
18365  *   Arguments: un - pointer to the target's soft state struct
18366  *
18367  * Return Code: 0 - success
18368  *		errno-type error code
18369  *
18370  *     Context: kernel thread context only.
18371  */
18372 
18373 static int
18374 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
18375 {
18376 	struct sd_uscsi_info	*uip;
18377 	struct uscsi_cmd	*uscmd;
18378 	union scsi_cdb		*cdb;
18379 	struct buf		*bp;
18380 	int			rval = 0;
18381 
18382 	SD_TRACE(SD_LOG_IO, un,
18383 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
18384 
18385 	ASSERT(un != NULL);
18386 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18387 
18388 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
18389 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
18390 
18391 	/*
18392 	 * First get some memory for the uscsi_cmd struct and cdb
18393 	 * and initialize for SYNCHRONIZE_CACHE cmd.
18394 	 */
18395 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
18396 	uscmd->uscsi_cdblen = CDB_GROUP1;
18397 	uscmd->uscsi_cdb = (caddr_t)cdb;
18398 	uscmd->uscsi_bufaddr = NULL;
18399 	uscmd->uscsi_buflen = 0;
18400 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
18401 	uscmd->uscsi_rqlen = SENSE_LENGTH;
18402 	uscmd->uscsi_rqresid = SENSE_LENGTH;
18403 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
18404 	uscmd->uscsi_timeout = sd_io_time;
18405 
18406 	/*
18407 	 * Allocate an sd_uscsi_info struct and fill it with the info
18408 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
18409 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
18410 	 * since we allocate the buf here in this function, we do not
18411 	 * need to preserve the prior contents of b_private.
18412 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
18413 	 */
18414 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
18415 	uip->ui_flags = SD_PATH_DIRECT;
18416 	uip->ui_cmdp  = uscmd;
18417 
18418 	bp = getrbuf(KM_SLEEP);
18419 	bp->b_private = uip;
18420 
18421 	/*
18422 	 * Setup buffer to carry uscsi request.
18423 	 */
18424 	bp->b_flags  = B_BUSY;
18425 	bp->b_bcount = 0;
18426 	bp->b_blkno  = 0;
18427 
18428 	if (dkc != NULL) {
18429 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
18430 		uip->ui_dkc = *dkc;
18431 	}
18432 
18433 	bp->b_edev = SD_GET_DEV(un);
18434 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
18435 
18436 	(void) sd_uscsi_strategy(bp);
18437 
18438 	/*
18439 	 * If synchronous request, wait for completion
18440 	 * If async just return and let b_iodone callback
18441 	 * cleanup.
18442 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
18443 	 * but it was also incremented in sd_uscsi_strategy(), so
18444 	 * we should be ok.
18445 	 */
18446 	if (dkc == NULL) {
18447 		(void) biowait(bp);
18448 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
18449 	}
18450 
18451 	return (rval);
18452 }
18453 
18454 
18455 static int
18456 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
18457 {
18458 	struct sd_uscsi_info *uip;
18459 	struct uscsi_cmd *uscmd;
18460 	uint8_t *sense_buf;
18461 	struct sd_lun *un;
18462 	int status;
18463 
18464 	uip = (struct sd_uscsi_info *)(bp->b_private);
18465 	ASSERT(uip != NULL);
18466 
18467 	uscmd = uip->ui_cmdp;
18468 	ASSERT(uscmd != NULL);
18469 
18470 	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
18471 	ASSERT(sense_buf != NULL);
18472 
18473 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
18474 	ASSERT(un != NULL);
18475 
18476 	status = geterror(bp);
18477 	switch (status) {
18478 	case 0:
18479 		break;	/* Success! */
18480 	case EIO:
18481 		switch (uscmd->uscsi_status) {
18482 		case STATUS_RESERVATION_CONFLICT:
18483 			/* Ignore reservation conflict */
18484 			status = 0;
18485 			goto done;
18486 
18487 		case STATUS_CHECK:
18488 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
18489 			    (scsi_sense_key(sense_buf) ==
18490 				KEY_ILLEGAL_REQUEST)) {
18491 				/* Ignore Illegal Request error */
18492 				mutex_enter(SD_MUTEX(un));
18493 				un->un_f_sync_cache_supported = FALSE;
18494 				mutex_exit(SD_MUTEX(un));
18495 				status = ENOTSUP;
18496 				goto done;
18497 			}
18498 			break;
18499 		default:
18500 			break;
18501 		}
18502 		/* FALLTHRU */
18503 	default:
18504 		/*
18505 		 * Don't log an error message if this device
18506 		 * has removable media.
18507 		 */
18508 		if (!un->un_f_has_removable_media) {
18509 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18510 			    "SYNCHRONIZE CACHE command failed (%d)\n", status);
18511 		}
18512 		break;
18513 	}
18514 
18515 done:
18516 	if (uip->ui_dkc.dkc_callback != NULL) {
18517 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
18518 	}
18519 
18520 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
18521 	freerbuf(bp);
18522 	kmem_free(uip, sizeof (struct sd_uscsi_info));
18523 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
18524 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
18525 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
18526 
18527 	return (status);
18528 }
18529 
18530 
18531 /*
18532  *    Function: sd_send_scsi_GET_CONFIGURATION
18533  *
18534  * Description: Issues the get configuration command to the device.
18535  *		Called from sd_check_for_writable_cd & sd_get_media_info
18536  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
18537  *   Arguments: un
18538  *		ucmdbuf
18539  *		rqbuf
18540  *		rqbuflen
18541  *		bufaddr
18542  *		buflen
18543  *		path_flag
18544  *
18545  * Return Code: 0   - Success
18546  *		errno return code from sd_send_scsi_cmd()
18547  *
18548  *     Context: Can sleep. Does not return until command is completed.
18549  *
18550  */
18551 
18552 static int
18553 sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un, struct uscsi_cmd *ucmdbuf,
18554 	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen,
18555 	int path_flag)
18556 {
18557 	char	cdb[CDB_GROUP1];
18558 	int	status;
18559 
18560 	ASSERT(un != NULL);
18561 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18562 	ASSERT(bufaddr != NULL);
18563 	ASSERT(ucmdbuf != NULL);
18564 	ASSERT(rqbuf != NULL);
18565 
18566 	SD_TRACE(SD_LOG_IO, un,
18567 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
18568 
18569 	bzero(cdb, sizeof (cdb));
18570 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
18571 	bzero(rqbuf, rqbuflen);
18572 	bzero(bufaddr, buflen);
18573 
18574 	/*
18575 	 * Set up cdb field for the get configuration command.
18576 	 */
18577 	cdb[0] = SCMD_GET_CONFIGURATION;
18578 	cdb[1] = 0x02;  /* Requested Type */
18579 	cdb[8] = SD_PROFILE_HEADER_LEN;
18580 	ucmdbuf->uscsi_cdb = cdb;
18581 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
18582 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
18583 	ucmdbuf->uscsi_buflen = buflen;
18584 	ucmdbuf->uscsi_timeout = sd_io_time;
18585 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
18586 	ucmdbuf->uscsi_rqlen = rqbuflen;
18587 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
18588 
18589 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, FKIOCTL,
18590 	    UIO_SYSSPACE, path_flag);
18591 
18592 	switch (status) {
18593 	case 0:
18594 		break;  /* Success! */
18595 	case EIO:
18596 		switch (ucmdbuf->uscsi_status) {
18597 		case STATUS_RESERVATION_CONFLICT:
18598 			status = EACCES;
18599 			break;
18600 		default:
18601 			break;
18602 		}
18603 		break;
18604 	default:
18605 		break;
18606 	}
18607 
18608 	if (status == 0) {
18609 		SD_DUMP_MEMORY(un, SD_LOG_IO,
18610 		    "sd_send_scsi_GET_CONFIGURATION: data",
18611 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
18612 	}
18613 
18614 	SD_TRACE(SD_LOG_IO, un,
18615 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
18616 
18617 	return (status);
18618 }
18619 
18620 /*
18621  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
18622  *
18623  * Description: Issues the get configuration command to the device to
18624  *              retrieve a specfic feature. Called from
18625  *		sd_check_for_writable_cd & sd_set_mmc_caps.
18626  *   Arguments: un
18627  *              ucmdbuf
18628  *              rqbuf
18629  *              rqbuflen
18630  *              bufaddr
18631  *              buflen
18632  *		feature
18633  *
18634  * Return Code: 0   - Success
18635  *              errno return code from sd_send_scsi_cmd()
18636  *
18637  *     Context: Can sleep. Does not return until command is completed.
18638  *
18639  */
18640 static int
18641 sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
18642 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
18643 	uchar_t *bufaddr, uint_t buflen, char feature, int path_flag)
18644 {
18645 	char    cdb[CDB_GROUP1];
18646 	int	status;
18647 
18648 	ASSERT(un != NULL);
18649 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18650 	ASSERT(bufaddr != NULL);
18651 	ASSERT(ucmdbuf != NULL);
18652 	ASSERT(rqbuf != NULL);
18653 
18654 	SD_TRACE(SD_LOG_IO, un,
18655 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
18656 
18657 	bzero(cdb, sizeof (cdb));
18658 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
18659 	bzero(rqbuf, rqbuflen);
18660 	bzero(bufaddr, buflen);
18661 
18662 	/*
18663 	 * Set up cdb field for the get configuration command.
18664 	 */
18665 	cdb[0] = SCMD_GET_CONFIGURATION;
18666 	cdb[1] = 0x02;  /* Requested Type */
18667 	cdb[3] = feature;
18668 	cdb[8] = buflen;
18669 	ucmdbuf->uscsi_cdb = cdb;
18670 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
18671 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
18672 	ucmdbuf->uscsi_buflen = buflen;
18673 	ucmdbuf->uscsi_timeout = sd_io_time;
18674 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
18675 	ucmdbuf->uscsi_rqlen = rqbuflen;
18676 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
18677 
18678 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, FKIOCTL,
18679 	    UIO_SYSSPACE, path_flag);
18680 
18681 	switch (status) {
18682 	case 0:
18683 		break;  /* Success! */
18684 	case EIO:
18685 		switch (ucmdbuf->uscsi_status) {
18686 		case STATUS_RESERVATION_CONFLICT:
18687 			status = EACCES;
18688 			break;
18689 		default:
18690 			break;
18691 		}
18692 		break;
18693 	default:
18694 		break;
18695 	}
18696 
18697 	if (status == 0) {
18698 		SD_DUMP_MEMORY(un, SD_LOG_IO,
18699 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
18700 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
18701 	}
18702 
18703 	SD_TRACE(SD_LOG_IO, un,
18704 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
18705 
18706 	return (status);
18707 }
18708 
18709 
18710 /*
18711  *    Function: sd_send_scsi_MODE_SENSE
18712  *
18713  * Description: Utility function for issuing a scsi MODE SENSE command.
18714  *		Note: This routine uses a consistent implementation for Group0,
18715  *		Group1, and Group2 commands across all platforms. ATAPI devices
18716  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
18717  *
18718  *   Arguments: un - pointer to the softstate struct for the target.
18719  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
18720  *			  CDB_GROUP[1|2] (10 byte).
18721  *		bufaddr - buffer for page data retrieved from the target.
18722  *		buflen - size of page to be retrieved.
18723  *		page_code - page code of data to be retrieved from the target.
18724  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18725  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18726  *			to use the USCSI "direct" chain and bypass the normal
18727  *			command waitq.
18728  *
18729  * Return Code: 0   - Success
18730  *		errno return code from sd_send_scsi_cmd()
18731  *
18732  *     Context: Can sleep. Does not return until command is completed.
18733  */
18734 
18735 static int
18736 sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
18737 	size_t buflen,  uchar_t page_code, int path_flag)
18738 {
18739 	struct	scsi_extended_sense	sense_buf;
18740 	union scsi_cdb		cdb;
18741 	struct uscsi_cmd	ucmd_buf;
18742 	int			status;
18743 	int			headlen;
18744 
18745 	ASSERT(un != NULL);
18746 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18747 	ASSERT(bufaddr != NULL);
18748 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
18749 	    (cdbsize == CDB_GROUP2));
18750 
18751 	SD_TRACE(SD_LOG_IO, un,
18752 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
18753 
18754 	bzero(&cdb, sizeof (cdb));
18755 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18756 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18757 	bzero(bufaddr, buflen);
18758 
18759 	if (cdbsize == CDB_GROUP0) {
18760 		cdb.scc_cmd = SCMD_MODE_SENSE;
18761 		cdb.cdb_opaque[2] = page_code;
18762 		FORMG0COUNT(&cdb, buflen);
18763 		headlen = MODE_HEADER_LENGTH;
18764 	} else {
18765 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
18766 		cdb.cdb_opaque[2] = page_code;
18767 		FORMG1COUNT(&cdb, buflen);
18768 		headlen = MODE_HEADER_LENGTH_GRP2;
18769 	}
18770 
18771 	ASSERT(headlen <= buflen);
18772 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
18773 
18774 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18775 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
18776 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
18777 	ucmd_buf.uscsi_buflen	= buflen;
18778 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18779 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18780 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
18781 	ucmd_buf.uscsi_timeout	= 60;
18782 
18783 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18784 	    UIO_SYSSPACE, path_flag);
18785 
18786 	switch (status) {
18787 	case 0:
18788 		/*
18789 		 * sr_check_wp() uses 0x3f page code and check the header of
18790 		 * mode page to determine if target device is write-protected.
18791 		 * But some USB devices return 0 bytes for 0x3f page code. For
18792 		 * this case, make sure that mode page header is returned at
18793 		 * least.
18794 		 */
18795 		if (buflen - ucmd_buf.uscsi_resid <  headlen)
18796 			status = EIO;
18797 		break;	/* Success! */
18798 	case EIO:
18799 		switch (ucmd_buf.uscsi_status) {
18800 		case STATUS_RESERVATION_CONFLICT:
18801 			status = EACCES;
18802 			break;
18803 		default:
18804 			break;
18805 		}
18806 		break;
18807 	default:
18808 		break;
18809 	}
18810 
18811 	if (status == 0) {
18812 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
18813 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
18814 	}
18815 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
18816 
18817 	return (status);
18818 }
18819 
18820 
18821 /*
18822  *    Function: sd_send_scsi_MODE_SELECT
18823  *
18824  * Description: Utility function for issuing a scsi MODE SELECT command.
18825  *		Note: This routine uses a consistent implementation for Group0,
18826  *		Group1, and Group2 commands across all platforms. ATAPI devices
18827  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
18828  *
18829  *   Arguments: un - pointer to the softstate struct for the target.
18830  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
18831  *			  CDB_GROUP[1|2] (10 byte).
18832  *		bufaddr - buffer for page data retrieved from the target.
18833  *		buflen - size of page to be retrieved.
18834  *		save_page - boolean to determin if SP bit should be set.
18835  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18836  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18837  *			to use the USCSI "direct" chain and bypass the normal
18838  *			command waitq.
18839  *
18840  * Return Code: 0   - Success
18841  *		errno return code from sd_send_scsi_cmd()
18842  *
18843  *     Context: Can sleep. Does not return until command is completed.
18844  */
18845 
18846 static int
18847 sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
18848 	size_t buflen,  uchar_t save_page, int path_flag)
18849 {
18850 	struct	scsi_extended_sense	sense_buf;
18851 	union scsi_cdb		cdb;
18852 	struct uscsi_cmd	ucmd_buf;
18853 	int			status;
18854 
18855 	ASSERT(un != NULL);
18856 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18857 	ASSERT(bufaddr != NULL);
18858 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
18859 	    (cdbsize == CDB_GROUP2));
18860 
18861 	SD_TRACE(SD_LOG_IO, un,
18862 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
18863 
18864 	bzero(&cdb, sizeof (cdb));
18865 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18866 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18867 
18868 	/* Set the PF bit for many third party drives */
18869 	cdb.cdb_opaque[1] = 0x10;
18870 
18871 	/* Set the savepage(SP) bit if given */
18872 	if (save_page == SD_SAVE_PAGE) {
18873 		cdb.cdb_opaque[1] |= 0x01;
18874 	}
18875 
18876 	if (cdbsize == CDB_GROUP0) {
18877 		cdb.scc_cmd = SCMD_MODE_SELECT;
18878 		FORMG0COUNT(&cdb, buflen);
18879 	} else {
18880 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
18881 		FORMG1COUNT(&cdb, buflen);
18882 	}
18883 
18884 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
18885 
18886 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18887 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
18888 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
18889 	ucmd_buf.uscsi_buflen	= buflen;
18890 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18891 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18892 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
18893 	ucmd_buf.uscsi_timeout	= 60;
18894 
18895 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18896 	    UIO_SYSSPACE, path_flag);
18897 
18898 	switch (status) {
18899 	case 0:
18900 		break;	/* Success! */
18901 	case EIO:
18902 		switch (ucmd_buf.uscsi_status) {
18903 		case STATUS_RESERVATION_CONFLICT:
18904 			status = EACCES;
18905 			break;
18906 		default:
18907 			break;
18908 		}
18909 		break;
18910 	default:
18911 		break;
18912 	}
18913 
18914 	if (status == 0) {
18915 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
18916 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
18917 	}
18918 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
18919 
18920 	return (status);
18921 }
18922 
18923 
18924 /*
18925  *    Function: sd_send_scsi_RDWR
18926  *
18927  * Description: Issue a scsi READ or WRITE command with the given parameters.
18928  *
18929  *   Arguments: un:      Pointer to the sd_lun struct for the target.
18930  *		cmd:	 SCMD_READ or SCMD_WRITE
18931  *		bufaddr: Address of caller's buffer to receive the RDWR data
18932  *		buflen:  Length of caller's buffer receive the RDWR data.
18933  *		start_block: Block number for the start of the RDWR operation.
18934  *			 (Assumes target-native block size.)
18935  *		residp:  Pointer to variable to receive the redisual of the
18936  *			 RDWR operation (may be NULL of no residual requested).
18937  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18938  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18939  *			to use the USCSI "direct" chain and bypass the normal
18940  *			command waitq.
18941  *
18942  * Return Code: 0   - Success
18943  *		errno return code from sd_send_scsi_cmd()
18944  *
18945  *     Context: Can sleep. Does not return until command is completed.
18946  */
18947 
18948 static int
18949 sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
18950 	size_t buflen, daddr_t start_block, int path_flag)
18951 {
18952 	struct	scsi_extended_sense	sense_buf;
18953 	union scsi_cdb		cdb;
18954 	struct uscsi_cmd	ucmd_buf;
18955 	uint32_t		block_count;
18956 	int			status;
18957 	int			cdbsize;
18958 	uchar_t			flag;
18959 
18960 	ASSERT(un != NULL);
18961 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18962 	ASSERT(bufaddr != NULL);
18963 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
18964 
18965 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
18966 
18967 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
18968 		return (EINVAL);
18969 	}
18970 
18971 	mutex_enter(SD_MUTEX(un));
18972 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
18973 	mutex_exit(SD_MUTEX(un));
18974 
18975 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
18976 
18977 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
18978 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
18979 	    bufaddr, buflen, start_block, block_count);
18980 
18981 	bzero(&cdb, sizeof (cdb));
18982 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18983 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18984 
18985 	/* Compute CDB size to use */
18986 	if (start_block > 0xffffffff)
18987 		cdbsize = CDB_GROUP4;
18988 	else if ((start_block & 0xFFE00000) ||
18989 	    (un->un_f_cfg_is_atapi == TRUE))
18990 		cdbsize = CDB_GROUP1;
18991 	else
18992 		cdbsize = CDB_GROUP0;
18993 
18994 	switch (cdbsize) {
18995 	case CDB_GROUP0:	/* 6-byte CDBs */
18996 		cdb.scc_cmd = cmd;
18997 		FORMG0ADDR(&cdb, start_block);
18998 		FORMG0COUNT(&cdb, block_count);
18999 		break;
19000 	case CDB_GROUP1:	/* 10-byte CDBs */
19001 		cdb.scc_cmd = cmd | SCMD_GROUP1;
19002 		FORMG1ADDR(&cdb, start_block);
19003 		FORMG1COUNT(&cdb, block_count);
19004 		break;
19005 	case CDB_GROUP4:	/* 16-byte CDBs */
19006 		cdb.scc_cmd = cmd | SCMD_GROUP4;
19007 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
19008 		FORMG4COUNT(&cdb, block_count);
19009 		break;
19010 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
19011 	default:
19012 		/* All others reserved */
19013 		return (EINVAL);
19014 	}
19015 
19016 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
19017 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
19018 
19019 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19020 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
19021 	ucmd_buf.uscsi_bufaddr	= bufaddr;
19022 	ucmd_buf.uscsi_buflen	= buflen;
19023 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19024 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19025 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
19026 	ucmd_buf.uscsi_timeout	= 60;
19027 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
19028 	    UIO_SYSSPACE, path_flag);
19029 	switch (status) {
19030 	case 0:
19031 		break;	/* Success! */
19032 	case EIO:
19033 		switch (ucmd_buf.uscsi_status) {
19034 		case STATUS_RESERVATION_CONFLICT:
19035 			status = EACCES;
19036 			break;
19037 		default:
19038 			break;
19039 		}
19040 		break;
19041 	default:
19042 		break;
19043 	}
19044 
19045 	if (status == 0) {
19046 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
19047 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
19048 	}
19049 
19050 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
19051 
19052 	return (status);
19053 }
19054 
19055 
19056 /*
19057  *    Function: sd_send_scsi_LOG_SENSE
19058  *
19059  * Description: Issue a scsi LOG_SENSE command with the given parameters.
19060  *
19061  *   Arguments: un:      Pointer to the sd_lun struct for the target.
19062  *
19063  * Return Code: 0   - Success
19064  *		errno return code from sd_send_scsi_cmd()
19065  *
19066  *     Context: Can sleep. Does not return until command is completed.
19067  */
19068 
19069 static int
19070 sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr, uint16_t buflen,
19071 	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
19072 	int path_flag)
19073 
19074 {
19075 	struct	scsi_extended_sense	sense_buf;
19076 	union scsi_cdb		cdb;
19077 	struct uscsi_cmd	ucmd_buf;
19078 	int			status;
19079 
19080 	ASSERT(un != NULL);
19081 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19082 
19083 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
19084 
19085 	bzero(&cdb, sizeof (cdb));
19086 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19087 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19088 
19089 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
19090 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
19091 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
19092 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
19093 	FORMG1COUNT(&cdb, buflen);
19094 
19095 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19096 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19097 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19098 	ucmd_buf.uscsi_buflen	= buflen;
19099 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19100 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19101 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19102 	ucmd_buf.uscsi_timeout	= 60;
19103 
19104 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
19105 	    UIO_SYSSPACE, path_flag);
19106 
19107 	switch (status) {
19108 	case 0:
19109 		break;
19110 	case EIO:
19111 		switch (ucmd_buf.uscsi_status) {
19112 		case STATUS_RESERVATION_CONFLICT:
19113 			status = EACCES;
19114 			break;
19115 		case STATUS_CHECK:
19116 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19117 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
19118 				KEY_ILLEGAL_REQUEST) &&
19119 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
19120 				/*
19121 				 * ASC 0x24: INVALID FIELD IN CDB
19122 				 */
19123 				switch (page_code) {
19124 				case START_STOP_CYCLE_PAGE:
19125 					/*
19126 					 * The start stop cycle counter is
19127 					 * implemented as page 0x31 in earlier
19128 					 * generation disks. In new generation
19129 					 * disks the start stop cycle counter is
19130 					 * implemented as page 0xE. To properly
19131 					 * handle this case if an attempt for
19132 					 * log page 0xE is made and fails we
19133 					 * will try again using page 0x31.
19134 					 *
19135 					 * Network storage BU committed to
19136 					 * maintain the page 0x31 for this
19137 					 * purpose and will not have any other
19138 					 * page implemented with page code 0x31
19139 					 * until all disks transition to the
19140 					 * standard page.
19141 					 */
19142 					mutex_enter(SD_MUTEX(un));
19143 					un->un_start_stop_cycle_page =
19144 					    START_STOP_CYCLE_VU_PAGE;
19145 					cdb.cdb_opaque[2] =
19146 					    (char)(page_control << 6) |
19147 					    un->un_start_stop_cycle_page;
19148 					mutex_exit(SD_MUTEX(un));
19149 					status = sd_send_scsi_cmd(
19150 					    SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
19151 					    UIO_SYSSPACE, path_flag);
19152 
19153 					break;
19154 				case TEMPERATURE_PAGE:
19155 					status = ENOTTY;
19156 					break;
19157 				default:
19158 					break;
19159 				}
19160 			}
19161 			break;
19162 		default:
19163 			break;
19164 		}
19165 		break;
19166 	default:
19167 		break;
19168 	}
19169 
19170 	if (status == 0) {
19171 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
19172 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
19173 	}
19174 
19175 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
19176 
19177 	return (status);
19178 }
19179 
19180 
19181 /*
19182  *    Function: sdioctl
19183  *
19184  * Description: Driver's ioctl(9e) entry point function.
19185  *
19186  *   Arguments: dev     - device number
19187  *		cmd     - ioctl operation to be performed
19188  *		arg     - user argument, contains data to be set or reference
19189  *			  parameter for get
19190  *		flag    - bit flag, indicating open settings, 32/64 bit type
19191  *		cred_p  - user credential pointer
19192  *		rval_p  - calling process return value (OPT)
19193  *
19194  * Return Code: EINVAL
19195  *		ENOTTY
19196  *		ENXIO
19197  *		EIO
19198  *		EFAULT
19199  *		ENOTSUP
19200  *		EPERM
19201  *
19202  *     Context: Called from the device switch at normal priority.
19203  */
19204 
19205 static int
19206 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
19207 {
19208 	struct sd_lun	*un = NULL;
19209 	int		err = 0;
19210 	int		i = 0;
19211 	cred_t		*cr;
19212 	int		tmprval = EINVAL;
19213 	int 		is_valid;
19214 
19215 	/*
19216 	 * All device accesses go thru sdstrategy where we check on suspend
19217 	 * status
19218 	 */
19219 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
19220 		return (ENXIO);
19221 	}
19222 
19223 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19224 
19225 
19226 	is_valid = SD_IS_VALID_LABEL(un);
19227 
19228 	/*
19229 	 * Moved this wait from sd_uscsi_strategy to here for
19230 	 * reasons of deadlock prevention. Internal driver commands,
19231 	 * specifically those to change a devices power level, result
19232 	 * in a call to sd_uscsi_strategy.
19233 	 */
19234 	mutex_enter(SD_MUTEX(un));
19235 	while ((un->un_state == SD_STATE_SUSPENDED) ||
19236 	    (un->un_state == SD_STATE_PM_CHANGING)) {
19237 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
19238 	}
19239 	/*
19240 	 * Twiddling the counter here protects commands from now
19241 	 * through to the top of sd_uscsi_strategy. Without the
19242 	 * counter inc. a power down, for example, could get in
19243 	 * after the above check for state is made and before
19244 	 * execution gets to the top of sd_uscsi_strategy.
19245 	 * That would cause problems.
19246 	 */
19247 	un->un_ncmds_in_driver++;
19248 
19249 	if (!is_valid &&
19250 	    (flag & (FNDELAY | FNONBLOCK))) {
19251 		switch (cmd) {
19252 		case DKIOCGGEOM:	/* SD_PATH_DIRECT */
19253 		case DKIOCGVTOC:
19254 		case DKIOCGAPART:
19255 		case DKIOCPARTINFO:
19256 		case DKIOCSGEOM:
19257 		case DKIOCSAPART:
19258 		case DKIOCGETEFI:
19259 		case DKIOCPARTITION:
19260 		case DKIOCSVTOC:
19261 		case DKIOCSETEFI:
19262 		case DKIOCGMBOOT:
19263 		case DKIOCSMBOOT:
19264 		case DKIOCG_PHYGEOM:
19265 		case DKIOCG_VIRTGEOM:
19266 			/* let cmlb handle it */
19267 			goto skip_ready_valid;
19268 
19269 		case CDROMPAUSE:
19270 		case CDROMRESUME:
19271 		case CDROMPLAYMSF:
19272 		case CDROMPLAYTRKIND:
19273 		case CDROMREADTOCHDR:
19274 		case CDROMREADTOCENTRY:
19275 		case CDROMSTOP:
19276 		case CDROMSTART:
19277 		case CDROMVOLCTRL:
19278 		case CDROMSUBCHNL:
19279 		case CDROMREADMODE2:
19280 		case CDROMREADMODE1:
19281 		case CDROMREADOFFSET:
19282 		case CDROMSBLKMODE:
19283 		case CDROMGBLKMODE:
19284 		case CDROMGDRVSPEED:
19285 		case CDROMSDRVSPEED:
19286 		case CDROMCDDA:
19287 		case CDROMCDXA:
19288 		case CDROMSUBCODE:
19289 			if (!ISCD(un)) {
19290 				un->un_ncmds_in_driver--;
19291 				ASSERT(un->un_ncmds_in_driver >= 0);
19292 				mutex_exit(SD_MUTEX(un));
19293 				return (ENOTTY);
19294 			}
19295 			break;
19296 		case FDEJECT:
19297 		case DKIOCEJECT:
19298 		case CDROMEJECT:
19299 			if (!un->un_f_eject_media_supported) {
19300 				un->un_ncmds_in_driver--;
19301 				ASSERT(un->un_ncmds_in_driver >= 0);
19302 				mutex_exit(SD_MUTEX(un));
19303 				return (ENOTTY);
19304 			}
19305 			break;
19306 		case DKIOCFLUSHWRITECACHE:
19307 			mutex_exit(SD_MUTEX(un));
19308 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
19309 			if (err != 0) {
19310 				mutex_enter(SD_MUTEX(un));
19311 				un->un_ncmds_in_driver--;
19312 				ASSERT(un->un_ncmds_in_driver >= 0);
19313 				mutex_exit(SD_MUTEX(un));
19314 				return (EIO);
19315 			}
19316 			mutex_enter(SD_MUTEX(un));
19317 			/* FALLTHROUGH */
19318 		case DKIOCREMOVABLE:
19319 		case DKIOCHOTPLUGGABLE:
19320 		case DKIOCINFO:
19321 		case DKIOCGMEDIAINFO:
19322 		case MHIOCENFAILFAST:
19323 		case MHIOCSTATUS:
19324 		case MHIOCTKOWN:
19325 		case MHIOCRELEASE:
19326 		case MHIOCGRP_INKEYS:
19327 		case MHIOCGRP_INRESV:
19328 		case MHIOCGRP_REGISTER:
19329 		case MHIOCGRP_RESERVE:
19330 		case MHIOCGRP_PREEMPTANDABORT:
19331 		case MHIOCGRP_REGISTERANDIGNOREKEY:
19332 		case CDROMCLOSETRAY:
19333 		case USCSICMD:
19334 			goto skip_ready_valid;
19335 		default:
19336 			break;
19337 		}
19338 
19339 		mutex_exit(SD_MUTEX(un));
19340 		err = sd_ready_and_valid(un);
19341 		mutex_enter(SD_MUTEX(un));
19342 
19343 		if (err != SD_READY_VALID) {
19344 			switch (cmd) {
19345 			case DKIOCSTATE:
19346 			case CDROMGDRVSPEED:
19347 			case CDROMSDRVSPEED:
19348 			case FDEJECT:	/* for eject command */
19349 			case DKIOCEJECT:
19350 			case CDROMEJECT:
19351 			case DKIOCREMOVABLE:
19352 			case DKIOCHOTPLUGGABLE:
19353 				break;
19354 			default:
19355 				if (un->un_f_has_removable_media) {
19356 					err = ENXIO;
19357 				} else {
19358 				/* Do not map SD_RESERVED_BY_OTHERS to EIO */
19359 					if (err == SD_RESERVED_BY_OTHERS) {
19360 						err = EACCES;
19361 					} else {
19362 						err = EIO;
19363 					}
19364 				}
19365 				un->un_ncmds_in_driver--;
19366 				ASSERT(un->un_ncmds_in_driver >= 0);
19367 				mutex_exit(SD_MUTEX(un));
19368 				return (err);
19369 			}
19370 		}
19371 	}
19372 
19373 skip_ready_valid:
19374 	mutex_exit(SD_MUTEX(un));
19375 
19376 	switch (cmd) {
19377 	case DKIOCINFO:
19378 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
19379 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
19380 		break;
19381 
19382 	case DKIOCGMEDIAINFO:
19383 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
19384 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
19385 		break;
19386 
19387 	case DKIOCGGEOM:
19388 	case DKIOCGVTOC:
19389 	case DKIOCGAPART:
19390 	case DKIOCPARTINFO:
19391 	case DKIOCSGEOM:
19392 	case DKIOCSAPART:
19393 	case DKIOCGETEFI:
19394 	case DKIOCPARTITION:
19395 	case DKIOCSVTOC:
19396 	case DKIOCSETEFI:
19397 	case DKIOCGMBOOT:
19398 	case DKIOCSMBOOT:
19399 	case DKIOCG_PHYGEOM:
19400 	case DKIOCG_VIRTGEOM:
19401 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOC %d\n", cmd);
19402 
19403 		/* TUR should spin up */
19404 
19405 		if (un->un_f_has_removable_media)
19406 			err = sd_send_scsi_TEST_UNIT_READY(un,
19407 			    SD_CHECK_FOR_MEDIA);
19408 		else
19409 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
19410 
19411 		if (err != 0)
19412 			break;
19413 
19414 		err = cmlb_ioctl(un->un_cmlbhandle, dev,
19415 		    cmd, arg, flag, cred_p, rval_p, (void *)SD_PATH_DIRECT);
19416 
19417 		if ((err == 0) &&
19418 		    ((cmd == DKIOCSETEFI) ||
19419 		    (un->un_f_pkstats_enabled) &&
19420 		    (cmd == DKIOCSAPART || cmd == DKIOCSVTOC))) {
19421 
19422 			tmprval = cmlb_validate(un->un_cmlbhandle, CMLB_SILENT,
19423 			    (void *)SD_PATH_DIRECT);
19424 			if ((tmprval == 0) && un->un_f_pkstats_enabled) {
19425 				sd_set_pstats(un);
19426 				SD_TRACE(SD_LOG_IO_PARTITION, un,
19427 				    "sd_ioctl: un:0x%p pstats created and "
19428 				    "set\n", un);
19429 			}
19430 		}
19431 
19432 		if ((cmd == DKIOCSVTOC) ||
19433 		    ((cmd == DKIOCSETEFI) && (tmprval == 0))) {
19434 
19435 			mutex_enter(SD_MUTEX(un));
19436 			if (un->un_f_devid_supported &&
19437 			    (un->un_f_opt_fab_devid == TRUE)) {
19438 				if (un->un_devid == NULL) {
19439 					sd_register_devid(un, SD_DEVINFO(un),
19440 					    SD_TARGET_IS_UNRESERVED);
19441 				} else {
19442 					/*
19443 					 * The device id for this disk
19444 					 * has been fabricated. The
19445 					 * device id must be preserved
19446 					 * by writing it back out to
19447 					 * disk.
19448 					 */
19449 					if (sd_write_deviceid(un) != 0) {
19450 						ddi_devid_free(un->un_devid);
19451 						un->un_devid = NULL;
19452 					}
19453 				}
19454 			}
19455 			mutex_exit(SD_MUTEX(un));
19456 		}
19457 
19458 		break;
19459 
19460 	case DKIOCLOCK:
19461 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
19462 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
19463 		    SD_PATH_STANDARD);
19464 		break;
19465 
19466 	case DKIOCUNLOCK:
19467 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
19468 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
19469 		    SD_PATH_STANDARD);
19470 		break;
19471 
19472 	case DKIOCSTATE: {
19473 		enum dkio_state		state;
19474 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
19475 
19476 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
19477 			err = EFAULT;
19478 		} else {
19479 			err = sd_check_media(dev, state);
19480 			if (err == 0) {
19481 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
19482 				    sizeof (int), flag) != 0)
19483 					err = EFAULT;
19484 			}
19485 		}
19486 		break;
19487 	}
19488 
19489 	case DKIOCREMOVABLE:
19490 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
19491 		i = un->un_f_has_removable_media ? 1 : 0;
19492 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
19493 			err = EFAULT;
19494 		} else {
19495 			err = 0;
19496 		}
19497 		break;
19498 
19499 	case DKIOCHOTPLUGGABLE:
19500 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
19501 		i = un->un_f_is_hotpluggable ? 1 : 0;
19502 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
19503 			err = EFAULT;
19504 		} else {
19505 			err = 0;
19506 		}
19507 		break;
19508 
19509 	case DKIOCGTEMPERATURE:
19510 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
19511 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
19512 		break;
19513 
19514 	case MHIOCENFAILFAST:
19515 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
19516 		if ((err = drv_priv(cred_p)) == 0) {
19517 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
19518 		}
19519 		break;
19520 
19521 	case MHIOCTKOWN:
19522 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
19523 		if ((err = drv_priv(cred_p)) == 0) {
19524 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
19525 		}
19526 		break;
19527 
19528 	case MHIOCRELEASE:
19529 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
19530 		if ((err = drv_priv(cred_p)) == 0) {
19531 			err = sd_mhdioc_release(dev);
19532 		}
19533 		break;
19534 
19535 	case MHIOCSTATUS:
19536 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
19537 		if ((err = drv_priv(cred_p)) == 0) {
19538 			switch (sd_send_scsi_TEST_UNIT_READY(un, 0)) {
19539 			case 0:
19540 				err = 0;
19541 				break;
19542 			case EACCES:
19543 				*rval_p = 1;
19544 				err = 0;
19545 				break;
19546 			default:
19547 				err = EIO;
19548 				break;
19549 			}
19550 		}
19551 		break;
19552 
19553 	case MHIOCQRESERVE:
19554 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
19555 		if ((err = drv_priv(cred_p)) == 0) {
19556 			err = sd_reserve_release(dev, SD_RESERVE);
19557 		}
19558 		break;
19559 
19560 	case MHIOCREREGISTERDEVID:
19561 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
19562 		if (drv_priv(cred_p) == EPERM) {
19563 			err = EPERM;
19564 		} else if (!un->un_f_devid_supported) {
19565 			err = ENOTTY;
19566 		} else {
19567 			err = sd_mhdioc_register_devid(dev);
19568 		}
19569 		break;
19570 
19571 	case MHIOCGRP_INKEYS:
19572 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
19573 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
19574 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19575 				err = ENOTSUP;
19576 			} else {
19577 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
19578 				    flag);
19579 			}
19580 		}
19581 		break;
19582 
19583 	case MHIOCGRP_INRESV:
19584 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
19585 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
19586 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19587 				err = ENOTSUP;
19588 			} else {
19589 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
19590 			}
19591 		}
19592 		break;
19593 
19594 	case MHIOCGRP_REGISTER:
19595 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
19596 		if ((err = drv_priv(cred_p)) != EPERM) {
19597 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19598 				err = ENOTSUP;
19599 			} else if (arg != NULL) {
19600 				mhioc_register_t reg;
19601 				if (ddi_copyin((void *)arg, &reg,
19602 				    sizeof (mhioc_register_t), flag) != 0) {
19603 					err = EFAULT;
19604 				} else {
19605 					err =
19606 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19607 					    un, SD_SCSI3_REGISTER,
19608 					    (uchar_t *)&reg);
19609 				}
19610 			}
19611 		}
19612 		break;
19613 
19614 	case MHIOCGRP_RESERVE:
19615 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
19616 		if ((err = drv_priv(cred_p)) != EPERM) {
19617 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19618 				err = ENOTSUP;
19619 			} else if (arg != NULL) {
19620 				mhioc_resv_desc_t resv_desc;
19621 				if (ddi_copyin((void *)arg, &resv_desc,
19622 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
19623 					err = EFAULT;
19624 				} else {
19625 					err =
19626 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19627 					    un, SD_SCSI3_RESERVE,
19628 					    (uchar_t *)&resv_desc);
19629 				}
19630 			}
19631 		}
19632 		break;
19633 
19634 	case MHIOCGRP_PREEMPTANDABORT:
19635 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
19636 		if ((err = drv_priv(cred_p)) != EPERM) {
19637 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19638 				err = ENOTSUP;
19639 			} else if (arg != NULL) {
19640 				mhioc_preemptandabort_t preempt_abort;
19641 				if (ddi_copyin((void *)arg, &preempt_abort,
19642 				    sizeof (mhioc_preemptandabort_t),
19643 				    flag) != 0) {
19644 					err = EFAULT;
19645 				} else {
19646 					err =
19647 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19648 					    un, SD_SCSI3_PREEMPTANDABORT,
19649 					    (uchar_t *)&preempt_abort);
19650 				}
19651 			}
19652 		}
19653 		break;
19654 
19655 	case MHIOCGRP_REGISTERANDIGNOREKEY:
19656 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTERANDIGNOREKEY\n");
19657 		if ((err = drv_priv(cred_p)) != EPERM) {
19658 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19659 				err = ENOTSUP;
19660 			} else if (arg != NULL) {
19661 				mhioc_registerandignorekey_t r_and_i;
19662 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
19663 				    sizeof (mhioc_registerandignorekey_t),
19664 				    flag) != 0) {
19665 					err = EFAULT;
19666 				} else {
19667 					err =
19668 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19669 					    un, SD_SCSI3_REGISTERANDIGNOREKEY,
19670 					    (uchar_t *)&r_and_i);
19671 				}
19672 			}
19673 		}
19674 		break;
19675 
19676 	case USCSICMD:
19677 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
19678 		cr = ddi_get_cred();
19679 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
19680 			err = EPERM;
19681 		} else {
19682 			enum uio_seg	uioseg;
19683 			uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE :
19684 			    UIO_USERSPACE;
19685 			if (un->un_f_format_in_progress == TRUE) {
19686 				err = EAGAIN;
19687 				break;
19688 			}
19689 			err = sd_send_scsi_cmd(dev, (struct uscsi_cmd *)arg,
19690 			    flag, uioseg, SD_PATH_STANDARD);
19691 		}
19692 		break;
19693 
19694 	case CDROMPAUSE:
19695 	case CDROMRESUME:
19696 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
19697 		if (!ISCD(un)) {
19698 			err = ENOTTY;
19699 		} else {
19700 			err = sr_pause_resume(dev, cmd);
19701 		}
19702 		break;
19703 
19704 	case CDROMPLAYMSF:
19705 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
19706 		if (!ISCD(un)) {
19707 			err = ENOTTY;
19708 		} else {
19709 			err = sr_play_msf(dev, (caddr_t)arg, flag);
19710 		}
19711 		break;
19712 
19713 	case CDROMPLAYTRKIND:
19714 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
19715 #if defined(__i386) || defined(__amd64)
19716 		/*
19717 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
19718 		 */
19719 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
19720 #else
19721 		if (!ISCD(un)) {
19722 #endif
19723 			err = ENOTTY;
19724 		} else {
19725 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
19726 		}
19727 		break;
19728 
19729 	case CDROMREADTOCHDR:
19730 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
19731 		if (!ISCD(un)) {
19732 			err = ENOTTY;
19733 		} else {
19734 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
19735 		}
19736 		break;
19737 
19738 	case CDROMREADTOCENTRY:
19739 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
19740 		if (!ISCD(un)) {
19741 			err = ENOTTY;
19742 		} else {
19743 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
19744 		}
19745 		break;
19746 
19747 	case CDROMSTOP:
19748 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
19749 		if (!ISCD(un)) {
19750 			err = ENOTTY;
19751 		} else {
19752 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_STOP,
19753 			    SD_PATH_STANDARD);
19754 		}
19755 		break;
19756 
19757 	case CDROMSTART:
19758 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
19759 		if (!ISCD(un)) {
19760 			err = ENOTTY;
19761 		} else {
19762 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
19763 			    SD_PATH_STANDARD);
19764 		}
19765 		break;
19766 
19767 	case CDROMCLOSETRAY:
19768 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
19769 		if (!ISCD(un)) {
19770 			err = ENOTTY;
19771 		} else {
19772 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_CLOSE,
19773 			    SD_PATH_STANDARD);
19774 		}
19775 		break;
19776 
19777 	case FDEJECT:	/* for eject command */
19778 	case DKIOCEJECT:
19779 	case CDROMEJECT:
19780 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
19781 		if (!un->un_f_eject_media_supported) {
19782 			err = ENOTTY;
19783 		} else {
19784 			err = sr_eject(dev);
19785 		}
19786 		break;
19787 
19788 	case CDROMVOLCTRL:
19789 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
19790 		if (!ISCD(un)) {
19791 			err = ENOTTY;
19792 		} else {
19793 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
19794 		}
19795 		break;
19796 
19797 	case CDROMSUBCHNL:
19798 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
19799 		if (!ISCD(un)) {
19800 			err = ENOTTY;
19801 		} else {
19802 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
19803 		}
19804 		break;
19805 
19806 	case CDROMREADMODE2:
19807 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
19808 		if (!ISCD(un)) {
19809 			err = ENOTTY;
19810 		} else if (un->un_f_cfg_is_atapi == TRUE) {
19811 			/*
19812 			 * If the drive supports READ CD, use that instead of
19813 			 * switching the LBA size via a MODE SELECT
19814 			 * Block Descriptor
19815 			 */
19816 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
19817 		} else {
19818 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
19819 		}
19820 		break;
19821 
19822 	case CDROMREADMODE1:
19823 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
19824 		if (!ISCD(un)) {
19825 			err = ENOTTY;
19826 		} else {
19827 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
19828 		}
19829 		break;
19830 
19831 	case CDROMREADOFFSET:
19832 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
19833 		if (!ISCD(un)) {
19834 			err = ENOTTY;
19835 		} else {
19836 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
19837 			    flag);
19838 		}
19839 		break;
19840 
19841 	case CDROMSBLKMODE:
19842 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
19843 		/*
19844 		 * There is no means of changing block size in case of atapi
19845 		 * drives, thus return ENOTTY if drive type is atapi
19846 		 */
19847 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
19848 			err = ENOTTY;
19849 		} else if (un->un_f_mmc_cap == TRUE) {
19850 
19851 			/*
19852 			 * MMC Devices do not support changing the
19853 			 * logical block size
19854 			 *
19855 			 * Note: EINVAL is being returned instead of ENOTTY to
19856 			 * maintain consistancy with the original mmc
19857 			 * driver update.
19858 			 */
19859 			err = EINVAL;
19860 		} else {
19861 			mutex_enter(SD_MUTEX(un));
19862 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
19863 			    (un->un_ncmds_in_transport > 0)) {
19864 				mutex_exit(SD_MUTEX(un));
19865 				err = EINVAL;
19866 			} else {
19867 				mutex_exit(SD_MUTEX(un));
19868 				err = sr_change_blkmode(dev, cmd, arg, flag);
19869 			}
19870 		}
19871 		break;
19872 
19873 	case CDROMGBLKMODE:
19874 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
19875 		if (!ISCD(un)) {
19876 			err = ENOTTY;
19877 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
19878 		    (un->un_f_blockcount_is_valid != FALSE)) {
19879 			/*
19880 			 * Drive is an ATAPI drive so return target block
19881 			 * size for ATAPI drives since we cannot change the
19882 			 * blocksize on ATAPI drives. Used primarily to detect
19883 			 * if an ATAPI cdrom is present.
19884 			 */
19885 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
19886 			    sizeof (int), flag) != 0) {
19887 				err = EFAULT;
19888 			} else {
19889 				err = 0;
19890 			}
19891 
19892 		} else {
19893 			/*
19894 			 * Drive supports changing block sizes via a Mode
19895 			 * Select.
19896 			 */
19897 			err = sr_change_blkmode(dev, cmd, arg, flag);
19898 		}
19899 		break;
19900 
19901 	case CDROMGDRVSPEED:
19902 	case CDROMSDRVSPEED:
19903 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
19904 		if (!ISCD(un)) {
19905 			err = ENOTTY;
19906 		} else if (un->un_f_mmc_cap == TRUE) {
19907 			/*
19908 			 * Note: In the future the driver implementation
19909 			 * for getting and
19910 			 * setting cd speed should entail:
19911 			 * 1) If non-mmc try the Toshiba mode page
19912 			 *    (sr_change_speed)
19913 			 * 2) If mmc but no support for Real Time Streaming try
19914 			 *    the SET CD SPEED (0xBB) command
19915 			 *   (sr_atapi_change_speed)
19916 			 * 3) If mmc and support for Real Time Streaming
19917 			 *    try the GET PERFORMANCE and SET STREAMING
19918 			 *    commands (not yet implemented, 4380808)
19919 			 */
19920 			/*
19921 			 * As per recent MMC spec, CD-ROM speed is variable
19922 			 * and changes with LBA. Since there is no such
19923 			 * things as drive speed now, fail this ioctl.
19924 			 *
19925 			 * Note: EINVAL is returned for consistancy of original
19926 			 * implementation which included support for getting
19927 			 * the drive speed of mmc devices but not setting
19928 			 * the drive speed. Thus EINVAL would be returned
19929 			 * if a set request was made for an mmc device.
19930 			 * We no longer support get or set speed for
19931 			 * mmc but need to remain consistant with regard
19932 			 * to the error code returned.
19933 			 */
19934 			err = EINVAL;
19935 		} else if (un->un_f_cfg_is_atapi == TRUE) {
19936 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
19937 		} else {
19938 			err = sr_change_speed(dev, cmd, arg, flag);
19939 		}
19940 		break;
19941 
19942 	case CDROMCDDA:
19943 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
19944 		if (!ISCD(un)) {
19945 			err = ENOTTY;
19946 		} else {
19947 			err = sr_read_cdda(dev, (void *)arg, flag);
19948 		}
19949 		break;
19950 
19951 	case CDROMCDXA:
19952 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
19953 		if (!ISCD(un)) {
19954 			err = ENOTTY;
19955 		} else {
19956 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
19957 		}
19958 		break;
19959 
19960 	case CDROMSUBCODE:
19961 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
19962 		if (!ISCD(un)) {
19963 			err = ENOTTY;
19964 		} else {
19965 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
19966 		}
19967 		break;
19968 
19969 
19970 #ifdef SDDEBUG
19971 /* RESET/ABORTS testing ioctls */
19972 	case DKIOCRESET: {
19973 		int	reset_level;
19974 
19975 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
19976 			err = EFAULT;
19977 		} else {
19978 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
19979 			    "reset_level = 0x%lx\n", reset_level);
19980 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
19981 				err = 0;
19982 			} else {
19983 				err = EIO;
19984 			}
19985 		}
19986 		break;
19987 	}
19988 
19989 	case DKIOCABORT:
19990 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
19991 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
19992 			err = 0;
19993 		} else {
19994 			err = EIO;
19995 		}
19996 		break;
19997 #endif
19998 
19999 #ifdef SD_FAULT_INJECTION
20000 /* SDIOC FaultInjection testing ioctls */
20001 	case SDIOCSTART:
20002 	case SDIOCSTOP:
20003 	case SDIOCINSERTPKT:
20004 	case SDIOCINSERTXB:
20005 	case SDIOCINSERTUN:
20006 	case SDIOCINSERTARQ:
20007 	case SDIOCPUSH:
20008 	case SDIOCRETRIEVE:
20009 	case SDIOCRUN:
20010 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
20011 		    "SDIOC detected cmd:0x%X:\n", cmd);
20012 		/* call error generator */
20013 		sd_faultinjection_ioctl(cmd, arg, un);
20014 		err = 0;
20015 		break;
20016 
20017 #endif /* SD_FAULT_INJECTION */
20018 
20019 	case DKIOCFLUSHWRITECACHE:
20020 		{
20021 			struct dk_callback *dkc = (struct dk_callback *)arg;
20022 
20023 			mutex_enter(SD_MUTEX(un));
20024 			if (!un->un_f_sync_cache_supported ||
20025 			    !un->un_f_write_cache_enabled) {
20026 				err = un->un_f_sync_cache_supported ?
20027 					0 : ENOTSUP;
20028 				mutex_exit(SD_MUTEX(un));
20029 				if ((flag & FKIOCTL) && dkc != NULL &&
20030 				    dkc->dkc_callback != NULL) {
20031 					(*dkc->dkc_callback)(dkc->dkc_cookie,
20032 					    err);
20033 					/*
20034 					 * Did callback and reported error.
20035 					 * Since we did a callback, ioctl
20036 					 * should return 0.
20037 					 */
20038 					err = 0;
20039 				}
20040 				break;
20041 			}
20042 			mutex_exit(SD_MUTEX(un));
20043 
20044 			if ((flag & FKIOCTL) && dkc != NULL &&
20045 			    dkc->dkc_callback != NULL) {
20046 				/* async SYNC CACHE request */
20047 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
20048 			} else {
20049 				/* synchronous SYNC CACHE request */
20050 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
20051 			}
20052 		}
20053 		break;
20054 
20055 	case DKIOCGETWCE: {
20056 
20057 		int wce;
20058 
20059 		if ((err = sd_get_write_cache_enabled(un, &wce)) != 0) {
20060 			break;
20061 		}
20062 
20063 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
20064 			err = EFAULT;
20065 		}
20066 		break;
20067 	}
20068 
20069 	case DKIOCSETWCE: {
20070 
20071 		int wce, sync_supported;
20072 
20073 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
20074 			err = EFAULT;
20075 			break;
20076 		}
20077 
20078 		/*
20079 		 * Synchronize multiple threads trying to enable
20080 		 * or disable the cache via the un_f_wcc_cv
20081 		 * condition variable.
20082 		 */
20083 		mutex_enter(SD_MUTEX(un));
20084 
20085 		/*
20086 		 * Don't allow the cache to be enabled if the
20087 		 * config file has it disabled.
20088 		 */
20089 		if (un->un_f_opt_disable_cache && wce) {
20090 			mutex_exit(SD_MUTEX(un));
20091 			err = EINVAL;
20092 			break;
20093 		}
20094 
20095 		/*
20096 		 * Wait for write cache change in progress
20097 		 * bit to be clear before proceeding.
20098 		 */
20099 		while (un->un_f_wcc_inprog)
20100 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
20101 
20102 		un->un_f_wcc_inprog = 1;
20103 
20104 		if (un->un_f_write_cache_enabled && wce == 0) {
20105 			/*
20106 			 * Disable the write cache.  Don't clear
20107 			 * un_f_write_cache_enabled until after
20108 			 * the mode select and flush are complete.
20109 			 */
20110 			sync_supported = un->un_f_sync_cache_supported;
20111 			mutex_exit(SD_MUTEX(un));
20112 			if ((err = sd_cache_control(un, SD_CACHE_NOCHANGE,
20113 			    SD_CACHE_DISABLE)) == 0 && sync_supported) {
20114 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
20115 			}
20116 
20117 			mutex_enter(SD_MUTEX(un));
20118 			if (err == 0) {
20119 				un->un_f_write_cache_enabled = 0;
20120 			}
20121 
20122 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
20123 			/*
20124 			 * Set un_f_write_cache_enabled first, so there is
20125 			 * no window where the cache is enabled, but the
20126 			 * bit says it isn't.
20127 			 */
20128 			un->un_f_write_cache_enabled = 1;
20129 			mutex_exit(SD_MUTEX(un));
20130 
20131 			err = sd_cache_control(un, SD_CACHE_NOCHANGE,
20132 				SD_CACHE_ENABLE);
20133 
20134 			mutex_enter(SD_MUTEX(un));
20135 
20136 			if (err) {
20137 				un->un_f_write_cache_enabled = 0;
20138 			}
20139 		}
20140 
20141 		un->un_f_wcc_inprog = 0;
20142 		cv_broadcast(&un->un_wcc_cv);
20143 		mutex_exit(SD_MUTEX(un));
20144 		break;
20145 	}
20146 
20147 	default:
20148 		err = ENOTTY;
20149 		break;
20150 	}
20151 	mutex_enter(SD_MUTEX(un));
20152 	un->un_ncmds_in_driver--;
20153 	ASSERT(un->un_ncmds_in_driver >= 0);
20154 	mutex_exit(SD_MUTEX(un));
20155 
20156 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
20157 	return (err);
20158 }
20159 
20160 
20161 /*
20162  *    Function: sd_dkio_ctrl_info
20163  *
20164  * Description: This routine is the driver entry point for handling controller
20165  *		information ioctl requests (DKIOCINFO).
20166  *
20167  *   Arguments: dev  - the device number
20168  *		arg  - pointer to user provided dk_cinfo structure
20169  *		       specifying the controller type and attributes.
20170  *		flag - this argument is a pass through to ddi_copyxxx()
20171  *		       directly from the mode argument of ioctl().
20172  *
20173  * Return Code: 0
20174  *		EFAULT
20175  *		ENXIO
20176  */
20177 
20178 static int
20179 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
20180 {
20181 	struct sd_lun	*un = NULL;
20182 	struct dk_cinfo	*info;
20183 	dev_info_t	*pdip;
20184 	int		lun, tgt;
20185 
20186 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20187 		return (ENXIO);
20188 	}
20189 
20190 	info = (struct dk_cinfo *)
20191 		kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
20192 
20193 	switch (un->un_ctype) {
20194 	case CTYPE_CDROM:
20195 		info->dki_ctype = DKC_CDROM;
20196 		break;
20197 	default:
20198 		info->dki_ctype = DKC_SCSI_CCS;
20199 		break;
20200 	}
20201 	pdip = ddi_get_parent(SD_DEVINFO(un));
20202 	info->dki_cnum = ddi_get_instance(pdip);
20203 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
20204 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
20205 	} else {
20206 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
20207 		    DK_DEVLEN - 1);
20208 	}
20209 
20210 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
20211 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
20212 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
20213 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
20214 
20215 	/* Unit Information */
20216 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
20217 	info->dki_slave = ((tgt << 3) | lun);
20218 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
20219 	    DK_DEVLEN - 1);
20220 	info->dki_flags = DKI_FMTVOL;
20221 	info->dki_partition = SDPART(dev);
20222 
20223 	/* Max Transfer size of this device in blocks */
20224 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
20225 	info->dki_addr = 0;
20226 	info->dki_space = 0;
20227 	info->dki_prio = 0;
20228 	info->dki_vec = 0;
20229 
20230 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
20231 		kmem_free(info, sizeof (struct dk_cinfo));
20232 		return (EFAULT);
20233 	} else {
20234 		kmem_free(info, sizeof (struct dk_cinfo));
20235 		return (0);
20236 	}
20237 }
20238 
20239 
20240 /*
20241  *    Function: sd_get_media_info
20242  *
20243  * Description: This routine is the driver entry point for handling ioctl
20244  *		requests for the media type or command set profile used by the
20245  *		drive to operate on the media (DKIOCGMEDIAINFO).
20246  *
20247  *   Arguments: dev	- the device number
20248  *		arg	- pointer to user provided dk_minfo structure
20249  *			  specifying the media type, logical block size and
20250  *			  drive capacity.
20251  *		flag	- this argument is a pass through to ddi_copyxxx()
20252  *			  directly from the mode argument of ioctl().
20253  *
20254  * Return Code: 0
20255  *		EACCESS
20256  *		EFAULT
20257  *		ENXIO
20258  *		EIO
20259  */
20260 
20261 static int
20262 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
20263 {
20264 	struct sd_lun		*un = NULL;
20265 	struct uscsi_cmd	com;
20266 	struct scsi_inquiry	*sinq;
20267 	struct dk_minfo		media_info;
20268 	u_longlong_t		media_capacity;
20269 	uint64_t		capacity;
20270 	uint_t			lbasize;
20271 	uchar_t			*out_data;
20272 	uchar_t			*rqbuf;
20273 	int			rval = 0;
20274 	int			rtn;
20275 
20276 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
20277 	    (un->un_state == SD_STATE_OFFLINE)) {
20278 		return (ENXIO);
20279 	}
20280 
20281 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
20282 
20283 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
20284 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
20285 
20286 	/* Issue a TUR to determine if the drive is ready with media present */
20287 	rval = sd_send_scsi_TEST_UNIT_READY(un, SD_CHECK_FOR_MEDIA);
20288 	if (rval == ENXIO) {
20289 		goto done;
20290 	}
20291 
20292 	/* Now get configuration data */
20293 	if (ISCD(un)) {
20294 		media_info.dki_media_type = DK_CDROM;
20295 
20296 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
20297 		if (un->un_f_mmc_cap == TRUE) {
20298 			rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf,
20299 				SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN,
20300 				SD_PATH_STANDARD);
20301 
20302 			if (rtn) {
20303 				/*
20304 				 * Failed for other than an illegal request
20305 				 * or command not supported
20306 				 */
20307 				if ((com.uscsi_status == STATUS_CHECK) &&
20308 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
20309 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
20310 					    (rqbuf[12] != 0x20)) {
20311 						rval = EIO;
20312 						goto done;
20313 					}
20314 				}
20315 			} else {
20316 				/*
20317 				 * The GET CONFIGURATION command succeeded
20318 				 * so set the media type according to the
20319 				 * returned data
20320 				 */
20321 				media_info.dki_media_type = out_data[6];
20322 				media_info.dki_media_type <<= 8;
20323 				media_info.dki_media_type |= out_data[7];
20324 			}
20325 		}
20326 	} else {
20327 		/*
20328 		 * The profile list is not available, so we attempt to identify
20329 		 * the media type based on the inquiry data
20330 		 */
20331 		sinq = un->un_sd->sd_inq;
20332 		if ((sinq->inq_dtype == DTYPE_DIRECT) ||
20333 		    (sinq->inq_dtype == DTYPE_OPTICAL)) {
20334 			/* This is a direct access device  or optical disk */
20335 			media_info.dki_media_type = DK_FIXED_DISK;
20336 
20337 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
20338 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
20339 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
20340 					media_info.dki_media_type = DK_ZIP;
20341 				} else if (
20342 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
20343 					media_info.dki_media_type = DK_JAZ;
20344 				}
20345 			}
20346 		} else {
20347 			/*
20348 			 * Not a CD, direct access or optical disk so return
20349 			 * unknown media
20350 			 */
20351 			media_info.dki_media_type = DK_UNKNOWN;
20352 		}
20353 	}
20354 
20355 	/* Now read the capacity so we can provide the lbasize and capacity */
20356 	switch (sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
20357 	    SD_PATH_DIRECT)) {
20358 	case 0:
20359 		break;
20360 	case EACCES:
20361 		rval = EACCES;
20362 		goto done;
20363 	default:
20364 		rval = EIO;
20365 		goto done;
20366 	}
20367 
20368 	media_info.dki_lbsize = lbasize;
20369 	media_capacity = capacity;
20370 
20371 	/*
20372 	 * sd_send_scsi_READ_CAPACITY() reports capacity in
20373 	 * un->un_sys_blocksize chunks. So we need to convert it into
20374 	 * cap.lbasize chunks.
20375 	 */
20376 	media_capacity *= un->un_sys_blocksize;
20377 	media_capacity /= lbasize;
20378 	media_info.dki_capacity = media_capacity;
20379 
20380 	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
20381 		rval = EFAULT;
20382 		/* Put goto. Anybody might add some code below in future */
20383 		goto done;
20384 	}
20385 done:
20386 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
20387 	kmem_free(rqbuf, SENSE_LENGTH);
20388 	return (rval);
20389 }
20390 
20391 
20392 /*
20393  *    Function: sd_check_media
20394  *
20395  * Description: This utility routine implements the functionality for the
20396  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
20397  *		driver state changes from that specified by the user
20398  *		(inserted or ejected). For example, if the user specifies
20399  *		DKIO_EJECTED and the current media state is inserted this
20400  *		routine will immediately return DKIO_INSERTED. However, if the
20401  *		current media state is not inserted the user thread will be
20402  *		blocked until the drive state changes. If DKIO_NONE is specified
20403  *		the user thread will block until a drive state change occurs.
20404  *
20405  *   Arguments: dev  - the device number
20406  *		state  - user pointer to a dkio_state, updated with the current
20407  *			drive state at return.
20408  *
20409  * Return Code: ENXIO
20410  *		EIO
20411  *		EAGAIN
20412  *		EINTR
20413  */
20414 
20415 static int
20416 sd_check_media(dev_t dev, enum dkio_state state)
20417 {
20418 	struct sd_lun		*un = NULL;
20419 	enum dkio_state		prev_state;
20420 	opaque_t		token = NULL;
20421 	int			rval = 0;
20422 
20423 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20424 		return (ENXIO);
20425 	}
20426 
20427 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
20428 
20429 	mutex_enter(SD_MUTEX(un));
20430 
20431 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
20432 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
20433 
20434 	prev_state = un->un_mediastate;
20435 
20436 	/* is there anything to do? */
20437 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
20438 		/*
20439 		 * submit the request to the scsi_watch service;
20440 		 * scsi_media_watch_cb() does the real work
20441 		 */
20442 		mutex_exit(SD_MUTEX(un));
20443 
20444 		/*
20445 		 * This change handles the case where a scsi watch request is
20446 		 * added to a device that is powered down. To accomplish this
20447 		 * we power up the device before adding the scsi watch request,
20448 		 * since the scsi watch sends a TUR directly to the device
20449 		 * which the device cannot handle if it is powered down.
20450 		 */
20451 		if (sd_pm_entry(un) != DDI_SUCCESS) {
20452 			mutex_enter(SD_MUTEX(un));
20453 			goto done;
20454 		}
20455 
20456 		token = scsi_watch_request_submit(SD_SCSI_DEVP(un),
20457 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
20458 		    (caddr_t)dev);
20459 
20460 		sd_pm_exit(un);
20461 
20462 		mutex_enter(SD_MUTEX(un));
20463 		if (token == NULL) {
20464 			rval = EAGAIN;
20465 			goto done;
20466 		}
20467 
20468 		/*
20469 		 * This is a special case IOCTL that doesn't return
20470 		 * until the media state changes. Routine sdpower
20471 		 * knows about and handles this so don't count it
20472 		 * as an active cmd in the driver, which would
20473 		 * keep the device busy to the pm framework.
20474 		 * If the count isn't decremented the device can't
20475 		 * be powered down.
20476 		 */
20477 		un->un_ncmds_in_driver--;
20478 		ASSERT(un->un_ncmds_in_driver >= 0);
20479 
20480 		/*
20481 		 * if a prior request had been made, this will be the same
20482 		 * token, as scsi_watch was designed that way.
20483 		 */
20484 		un->un_swr_token = token;
20485 		un->un_specified_mediastate = state;
20486 
20487 		/*
20488 		 * now wait for media change
20489 		 * we will not be signalled unless mediastate == state but it is
20490 		 * still better to test for this condition, since there is a
20491 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
20492 		 */
20493 		SD_TRACE(SD_LOG_COMMON, un,
20494 		    "sd_check_media: waiting for media state change\n");
20495 		while (un->un_mediastate == state) {
20496 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
20497 				SD_TRACE(SD_LOG_COMMON, un,
20498 				    "sd_check_media: waiting for media state "
20499 				    "was interrupted\n");
20500 				un->un_ncmds_in_driver++;
20501 				rval = EINTR;
20502 				goto done;
20503 			}
20504 			SD_TRACE(SD_LOG_COMMON, un,
20505 			    "sd_check_media: received signal, state=%x\n",
20506 			    un->un_mediastate);
20507 		}
20508 		/*
20509 		 * Inc the counter to indicate the device once again
20510 		 * has an active outstanding cmd.
20511 		 */
20512 		un->un_ncmds_in_driver++;
20513 	}
20514 
20515 	/* invalidate geometry */
20516 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
20517 		sr_ejected(un);
20518 	}
20519 
20520 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
20521 		uint64_t	capacity;
20522 		uint_t		lbasize;
20523 
20524 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
20525 		mutex_exit(SD_MUTEX(un));
20526 		/*
20527 		 * Since the following routines use SD_PATH_DIRECT, we must
20528 		 * call PM directly before the upcoming disk accesses. This
20529 		 * may cause the disk to be power/spin up.
20530 		 */
20531 
20532 		if (sd_pm_entry(un) == DDI_SUCCESS) {
20533 			rval = sd_send_scsi_READ_CAPACITY(un,
20534 			    &capacity,
20535 			    &lbasize, SD_PATH_DIRECT);
20536 			if (rval != 0) {
20537 				sd_pm_exit(un);
20538 				mutex_enter(SD_MUTEX(un));
20539 				goto done;
20540 			}
20541 		} else {
20542 			rval = EIO;
20543 			mutex_enter(SD_MUTEX(un));
20544 			goto done;
20545 		}
20546 		mutex_enter(SD_MUTEX(un));
20547 
20548 		sd_update_block_info(un, lbasize, capacity);
20549 
20550 		/*
20551 		 *  Check if the media in the device is writable or not
20552 		 */
20553 		if (ISCD(un))
20554 			sd_check_for_writable_cd(un, SD_PATH_DIRECT);
20555 
20556 		mutex_exit(SD_MUTEX(un));
20557 		cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
20558 		if ((cmlb_validate(un->un_cmlbhandle, 0,
20559 		    (void *)SD_PATH_DIRECT) == 0) && un->un_f_pkstats_enabled) {
20560 			sd_set_pstats(un);
20561 			SD_TRACE(SD_LOG_IO_PARTITION, un,
20562 			    "sd_check_media: un:0x%p pstats created and "
20563 			    "set\n", un);
20564 		}
20565 
20566 		rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
20567 		    SD_PATH_DIRECT);
20568 		sd_pm_exit(un);
20569 
20570 		mutex_enter(SD_MUTEX(un));
20571 	}
20572 done:
20573 	un->un_f_watcht_stopped = FALSE;
20574 	if (un->un_swr_token) {
20575 		/*
20576 		 * Use of this local token and the mutex ensures that we avoid
20577 		 * some race conditions associated with terminating the
20578 		 * scsi watch.
20579 		 */
20580 		token = un->un_swr_token;
20581 		un->un_swr_token = (opaque_t)NULL;
20582 		mutex_exit(SD_MUTEX(un));
20583 		(void) scsi_watch_request_terminate(token,
20584 		    SCSI_WATCH_TERMINATE_WAIT);
20585 		mutex_enter(SD_MUTEX(un));
20586 	}
20587 
20588 	/*
20589 	 * Update the capacity kstat value, if no media previously
20590 	 * (capacity kstat is 0) and a media has been inserted
20591 	 * (un_f_blockcount_is_valid == TRUE)
20592 	 */
20593 	if (un->un_errstats) {
20594 		struct sd_errstats	*stp = NULL;
20595 
20596 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
20597 		if ((stp->sd_capacity.value.ui64 == 0) &&
20598 		    (un->un_f_blockcount_is_valid == TRUE)) {
20599 			stp->sd_capacity.value.ui64 =
20600 			    (uint64_t)((uint64_t)un->un_blockcount *
20601 			    un->un_sys_blocksize);
20602 		}
20603 	}
20604 	mutex_exit(SD_MUTEX(un));
20605 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
20606 	return (rval);
20607 }
20608 
20609 
20610 /*
20611  *    Function: sd_delayed_cv_broadcast
20612  *
20613  * Description: Delayed cv_broadcast to allow for target to recover from media
20614  *		insertion.
20615  *
20616  *   Arguments: arg - driver soft state (unit) structure
20617  */
20618 
20619 static void
20620 sd_delayed_cv_broadcast(void *arg)
20621 {
20622 	struct sd_lun *un = arg;
20623 
20624 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
20625 
20626 	mutex_enter(SD_MUTEX(un));
20627 	un->un_dcvb_timeid = NULL;
20628 	cv_broadcast(&un->un_state_cv);
20629 	mutex_exit(SD_MUTEX(un));
20630 }
20631 
20632 
20633 /*
20634  *    Function: sd_media_watch_cb
20635  *
20636  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
20637  *		routine processes the TUR sense data and updates the driver
20638  *		state if a transition has occurred. The user thread
20639  *		(sd_check_media) is then signalled.
20640  *
20641  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
20642  *			among multiple watches that share this callback function
20643  *		resultp - scsi watch facility result packet containing scsi
20644  *			  packet, status byte and sense data
20645  *
20646  * Return Code: 0 for success, -1 for failure
20647  */
20648 
20649 static int
20650 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
20651 {
20652 	struct sd_lun			*un;
20653 	struct scsi_status		*statusp = resultp->statusp;
20654 	uint8_t				*sensep = (uint8_t *)resultp->sensep;
20655 	enum dkio_state			state = DKIO_NONE;
20656 	dev_t				dev = (dev_t)arg;
20657 	uchar_t				actual_sense_length;
20658 	uint8_t				skey, asc, ascq;
20659 
20660 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20661 		return (-1);
20662 	}
20663 	actual_sense_length = resultp->actual_sense_length;
20664 
20665 	mutex_enter(SD_MUTEX(un));
20666 	SD_TRACE(SD_LOG_COMMON, un,
20667 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
20668 	    *((char *)statusp), (void *)sensep, actual_sense_length);
20669 
20670 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
20671 		un->un_mediastate = DKIO_DEV_GONE;
20672 		cv_broadcast(&un->un_state_cv);
20673 		mutex_exit(SD_MUTEX(un));
20674 
20675 		return (0);
20676 	}
20677 
20678 	/*
20679 	 * If there was a check condition then sensep points to valid sense data
20680 	 * If status was not a check condition but a reservation or busy status
20681 	 * then the new state is DKIO_NONE
20682 	 */
20683 	if (sensep != NULL) {
20684 		skey = scsi_sense_key(sensep);
20685 		asc = scsi_sense_asc(sensep);
20686 		ascq = scsi_sense_ascq(sensep);
20687 
20688 		SD_INFO(SD_LOG_COMMON, un,
20689 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
20690 		    skey, asc, ascq);
20691 		/* This routine only uses up to 13 bytes of sense data. */
20692 		if (actual_sense_length >= 13) {
20693 			if (skey == KEY_UNIT_ATTENTION) {
20694 				if (asc == 0x28) {
20695 					state = DKIO_INSERTED;
20696 				}
20697 			} else if (skey == KEY_NOT_READY) {
20698 				/*
20699 				 * if 02/04/02  means that the host
20700 				 * should send start command. Explicitly
20701 				 * leave the media state as is
20702 				 * (inserted) as the media is inserted
20703 				 * and host has stopped device for PM
20704 				 * reasons. Upon next true read/write
20705 				 * to this media will bring the
20706 				 * device to the right state good for
20707 				 * media access.
20708 				 */
20709 				if (asc == 0x3a) {
20710 					state = DKIO_EJECTED;
20711 				} else {
20712 					/*
20713 					 * If the drive is busy with an
20714 					 * operation or long write, keep the
20715 					 * media in an inserted state.
20716 					 */
20717 
20718 					if ((asc == 0x04) &&
20719 					    ((ascq == 0x02) ||
20720 					    (ascq == 0x07) ||
20721 					    (ascq == 0x08))) {
20722 						state = DKIO_INSERTED;
20723 					}
20724 				}
20725 			} else if (skey == KEY_NO_SENSE) {
20726 				if ((asc == 0x00) && (ascq == 0x00)) {
20727 					/*
20728 					 * Sense Data 00/00/00 does not provide
20729 					 * any information about the state of
20730 					 * the media. Ignore it.
20731 					 */
20732 					mutex_exit(SD_MUTEX(un));
20733 					return (0);
20734 				}
20735 			}
20736 		}
20737 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
20738 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
20739 		state = DKIO_INSERTED;
20740 	}
20741 
20742 	SD_TRACE(SD_LOG_COMMON, un,
20743 	    "sd_media_watch_cb: state=%x, specified=%x\n",
20744 	    state, un->un_specified_mediastate);
20745 
20746 	/*
20747 	 * now signal the waiting thread if this is *not* the specified state;
20748 	 * delay the signal if the state is DKIO_INSERTED to allow the target
20749 	 * to recover
20750 	 */
20751 	if (state != un->un_specified_mediastate) {
20752 		un->un_mediastate = state;
20753 		if (state == DKIO_INSERTED) {
20754 			/*
20755 			 * delay the signal to give the drive a chance
20756 			 * to do what it apparently needs to do
20757 			 */
20758 			SD_TRACE(SD_LOG_COMMON, un,
20759 			    "sd_media_watch_cb: delayed cv_broadcast\n");
20760 			if (un->un_dcvb_timeid == NULL) {
20761 				un->un_dcvb_timeid =
20762 				    timeout(sd_delayed_cv_broadcast, un,
20763 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
20764 			}
20765 		} else {
20766 			SD_TRACE(SD_LOG_COMMON, un,
20767 			    "sd_media_watch_cb: immediate cv_broadcast\n");
20768 			cv_broadcast(&un->un_state_cv);
20769 		}
20770 	}
20771 	mutex_exit(SD_MUTEX(un));
20772 	return (0);
20773 }
20774 
20775 
20776 /*
20777  *    Function: sd_dkio_get_temp
20778  *
20779  * Description: This routine is the driver entry point for handling ioctl
20780  *		requests to get the disk temperature.
20781  *
20782  *   Arguments: dev  - the device number
20783  *		arg  - pointer to user provided dk_temperature structure.
20784  *		flag - this argument is a pass through to ddi_copyxxx()
20785  *		       directly from the mode argument of ioctl().
20786  *
20787  * Return Code: 0
20788  *		EFAULT
20789  *		ENXIO
20790  *		EAGAIN
20791  */
20792 
20793 static int
20794 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
20795 {
20796 	struct sd_lun		*un = NULL;
20797 	struct dk_temperature	*dktemp = NULL;
20798 	uchar_t			*temperature_page;
20799 	int			rval = 0;
20800 	int			path_flag = SD_PATH_STANDARD;
20801 
20802 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20803 		return (ENXIO);
20804 	}
20805 
20806 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
20807 
20808 	/* copyin the disk temp argument to get the user flags */
20809 	if (ddi_copyin((void *)arg, dktemp,
20810 	    sizeof (struct dk_temperature), flag) != 0) {
20811 		rval = EFAULT;
20812 		goto done;
20813 	}
20814 
20815 	/* Initialize the temperature to invalid. */
20816 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
20817 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
20818 
20819 	/*
20820 	 * Note: Investigate removing the "bypass pm" semantic.
20821 	 * Can we just bypass PM always?
20822 	 */
20823 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
20824 		path_flag = SD_PATH_DIRECT;
20825 		ASSERT(!mutex_owned(&un->un_pm_mutex));
20826 		mutex_enter(&un->un_pm_mutex);
20827 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
20828 			/*
20829 			 * If DKT_BYPASS_PM is set, and the drive happens to be
20830 			 * in low power mode, we can not wake it up, Need to
20831 			 * return EAGAIN.
20832 			 */
20833 			mutex_exit(&un->un_pm_mutex);
20834 			rval = EAGAIN;
20835 			goto done;
20836 		} else {
20837 			/*
20838 			 * Indicate to PM the device is busy. This is required
20839 			 * to avoid a race - i.e. the ioctl is issuing a
20840 			 * command and the pm framework brings down the device
20841 			 * to low power mode (possible power cut-off on some
20842 			 * platforms).
20843 			 */
20844 			mutex_exit(&un->un_pm_mutex);
20845 			if (sd_pm_entry(un) != DDI_SUCCESS) {
20846 				rval = EAGAIN;
20847 				goto done;
20848 			}
20849 		}
20850 	}
20851 
20852 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
20853 
20854 	if ((rval = sd_send_scsi_LOG_SENSE(un, temperature_page,
20855 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag)) != 0) {
20856 		goto done2;
20857 	}
20858 
20859 	/*
20860 	 * For the current temperature verify that the parameter length is 0x02
20861 	 * and the parameter code is 0x00
20862 	 */
20863 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
20864 	    (temperature_page[5] == 0x00)) {
20865 		if (temperature_page[9] == 0xFF) {
20866 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
20867 		} else {
20868 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
20869 		}
20870 	}
20871 
20872 	/*
20873 	 * For the reference temperature verify that the parameter
20874 	 * length is 0x02 and the parameter code is 0x01
20875 	 */
20876 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
20877 	    (temperature_page[11] == 0x01)) {
20878 		if (temperature_page[15] == 0xFF) {
20879 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
20880 		} else {
20881 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
20882 		}
20883 	}
20884 
20885 	/* Do the copyout regardless of the temperature commands status. */
20886 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
20887 	    flag) != 0) {
20888 		rval = EFAULT;
20889 	}
20890 
20891 done2:
20892 	if (path_flag == SD_PATH_DIRECT) {
20893 		sd_pm_exit(un);
20894 	}
20895 
20896 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
20897 done:
20898 	if (dktemp != NULL) {
20899 		kmem_free(dktemp, sizeof (struct dk_temperature));
20900 	}
20901 
20902 	return (rval);
20903 }
20904 
20905 
20906 /*
20907  *    Function: sd_log_page_supported
20908  *
20909  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
20910  *		supported log pages.
20911  *
20912  *   Arguments: un -
20913  *		log_page -
20914  *
20915  * Return Code: -1 - on error (log sense is optional and may not be supported).
20916  *		0  - log page not found.
20917  *  		1  - log page found.
20918  */
20919 
20920 static int
20921 sd_log_page_supported(struct sd_lun *un, int log_page)
20922 {
20923 	uchar_t *log_page_data;
20924 	int	i;
20925 	int	match = 0;
20926 	int	log_size;
20927 
20928 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
20929 
20930 	if (sd_send_scsi_LOG_SENSE(un, log_page_data, 0xFF, 0, 0x01, 0,
20931 	    SD_PATH_DIRECT) != 0) {
20932 		SD_ERROR(SD_LOG_COMMON, un,
20933 		    "sd_log_page_supported: failed log page retrieval\n");
20934 		kmem_free(log_page_data, 0xFF);
20935 		return (-1);
20936 	}
20937 	log_size = log_page_data[3];
20938 
20939 	/*
20940 	 * The list of supported log pages start from the fourth byte. Check
20941 	 * until we run out of log pages or a match is found.
20942 	 */
20943 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
20944 		if (log_page_data[i] == log_page) {
20945 			match++;
20946 		}
20947 	}
20948 	kmem_free(log_page_data, 0xFF);
20949 	return (match);
20950 }
20951 
20952 
20953 /*
20954  *    Function: sd_mhdioc_failfast
20955  *
20956  * Description: This routine is the driver entry point for handling ioctl
20957  *		requests to enable/disable the multihost failfast option.
20958  *		(MHIOCENFAILFAST)
20959  *
20960  *   Arguments: dev	- the device number
20961  *		arg	- user specified probing interval.
20962  *		flag	- this argument is a pass through to ddi_copyxxx()
20963  *			  directly from the mode argument of ioctl().
20964  *
20965  * Return Code: 0
20966  *		EFAULT
20967  *		ENXIO
20968  */
20969 
20970 static int
20971 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
20972 {
20973 	struct sd_lun	*un = NULL;
20974 	int		mh_time;
20975 	int		rval = 0;
20976 
20977 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20978 		return (ENXIO);
20979 	}
20980 
20981 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
20982 		return (EFAULT);
20983 
20984 	if (mh_time) {
20985 		mutex_enter(SD_MUTEX(un));
20986 		un->un_resvd_status |= SD_FAILFAST;
20987 		mutex_exit(SD_MUTEX(un));
20988 		/*
20989 		 * If mh_time is INT_MAX, then this ioctl is being used for
20990 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
20991 		 */
20992 		if (mh_time != INT_MAX) {
20993 			rval = sd_check_mhd(dev, mh_time);
20994 		}
20995 	} else {
20996 		(void) sd_check_mhd(dev, 0);
20997 		mutex_enter(SD_MUTEX(un));
20998 		un->un_resvd_status &= ~SD_FAILFAST;
20999 		mutex_exit(SD_MUTEX(un));
21000 	}
21001 	return (rval);
21002 }
21003 
21004 
21005 /*
21006  *    Function: sd_mhdioc_takeown
21007  *
21008  * Description: This routine is the driver entry point for handling ioctl
21009  *		requests to forcefully acquire exclusive access rights to the
21010  *		multihost disk (MHIOCTKOWN).
21011  *
21012  *   Arguments: dev	- the device number
21013  *		arg	- user provided structure specifying the delay
21014  *			  parameters in milliseconds
21015  *		flag	- this argument is a pass through to ddi_copyxxx()
21016  *			  directly from the mode argument of ioctl().
21017  *
21018  * Return Code: 0
21019  *		EFAULT
21020  *		ENXIO
21021  */
21022 
21023 static int
21024 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
21025 {
21026 	struct sd_lun		*un = NULL;
21027 	struct mhioctkown	*tkown = NULL;
21028 	int			rval = 0;
21029 
21030 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21031 		return (ENXIO);
21032 	}
21033 
21034 	if (arg != NULL) {
21035 		tkown = (struct mhioctkown *)
21036 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
21037 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
21038 		if (rval != 0) {
21039 			rval = EFAULT;
21040 			goto error;
21041 		}
21042 	}
21043 
21044 	rval = sd_take_ownership(dev, tkown);
21045 	mutex_enter(SD_MUTEX(un));
21046 	if (rval == 0) {
21047 		un->un_resvd_status |= SD_RESERVE;
21048 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
21049 			sd_reinstate_resv_delay =
21050 			    tkown->reinstate_resv_delay * 1000;
21051 		} else {
21052 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
21053 		}
21054 		/*
21055 		 * Give the scsi_watch routine interval set by
21056 		 * the MHIOCENFAILFAST ioctl precedence here.
21057 		 */
21058 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
21059 			mutex_exit(SD_MUTEX(un));
21060 			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
21061 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
21062 			    "sd_mhdioc_takeown : %d\n",
21063 			    sd_reinstate_resv_delay);
21064 		} else {
21065 			mutex_exit(SD_MUTEX(un));
21066 		}
21067 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
21068 		    sd_mhd_reset_notify_cb, (caddr_t)un);
21069 	} else {
21070 		un->un_resvd_status &= ~SD_RESERVE;
21071 		mutex_exit(SD_MUTEX(un));
21072 	}
21073 
21074 error:
21075 	if (tkown != NULL) {
21076 		kmem_free(tkown, sizeof (struct mhioctkown));
21077 	}
21078 	return (rval);
21079 }
21080 
21081 
21082 /*
21083  *    Function: sd_mhdioc_release
21084  *
21085  * Description: This routine is the driver entry point for handling ioctl
21086  *		requests to release exclusive access rights to the multihost
21087  *		disk (MHIOCRELEASE).
21088  *
21089  *   Arguments: dev	- the device number
21090  *
21091  * Return Code: 0
21092  *		ENXIO
21093  */
21094 
21095 static int
21096 sd_mhdioc_release(dev_t dev)
21097 {
21098 	struct sd_lun		*un = NULL;
21099 	timeout_id_t		resvd_timeid_save;
21100 	int			resvd_status_save;
21101 	int			rval = 0;
21102 
21103 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21104 		return (ENXIO);
21105 	}
21106 
21107 	mutex_enter(SD_MUTEX(un));
21108 	resvd_status_save = un->un_resvd_status;
21109 	un->un_resvd_status &=
21110 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
21111 	if (un->un_resvd_timeid) {
21112 		resvd_timeid_save = un->un_resvd_timeid;
21113 		un->un_resvd_timeid = NULL;
21114 		mutex_exit(SD_MUTEX(un));
21115 		(void) untimeout(resvd_timeid_save);
21116 	} else {
21117 		mutex_exit(SD_MUTEX(un));
21118 	}
21119 
21120 	/*
21121 	 * destroy any pending timeout thread that may be attempting to
21122 	 * reinstate reservation on this device.
21123 	 */
21124 	sd_rmv_resv_reclaim_req(dev);
21125 
21126 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
21127 		mutex_enter(SD_MUTEX(un));
21128 		if ((un->un_mhd_token) &&
21129 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
21130 			mutex_exit(SD_MUTEX(un));
21131 			(void) sd_check_mhd(dev, 0);
21132 		} else {
21133 			mutex_exit(SD_MUTEX(un));
21134 		}
21135 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
21136 		    sd_mhd_reset_notify_cb, (caddr_t)un);
21137 	} else {
21138 		/*
21139 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
21140 		 */
21141 		mutex_enter(SD_MUTEX(un));
21142 		un->un_resvd_status = resvd_status_save;
21143 		mutex_exit(SD_MUTEX(un));
21144 	}
21145 	return (rval);
21146 }
21147 
21148 
21149 /*
21150  *    Function: sd_mhdioc_register_devid
21151  *
21152  * Description: This routine is the driver entry point for handling ioctl
21153  *		requests to register the device id (MHIOCREREGISTERDEVID).
21154  *
21155  *		Note: The implementation for this ioctl has been updated to
21156  *		be consistent with the original PSARC case (1999/357)
21157  *		(4375899, 4241671, 4220005)
21158  *
21159  *   Arguments: dev	- the device number
21160  *
21161  * Return Code: 0
21162  *		ENXIO
21163  */
21164 
21165 static int
21166 sd_mhdioc_register_devid(dev_t dev)
21167 {
21168 	struct sd_lun	*un = NULL;
21169 	int		rval = 0;
21170 
21171 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21172 		return (ENXIO);
21173 	}
21174 
21175 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21176 
21177 	mutex_enter(SD_MUTEX(un));
21178 
21179 	/* If a devid already exists, de-register it */
21180 	if (un->un_devid != NULL) {
21181 		ddi_devid_unregister(SD_DEVINFO(un));
21182 		/*
21183 		 * After unregister devid, needs to free devid memory
21184 		 */
21185 		ddi_devid_free(un->un_devid);
21186 		un->un_devid = NULL;
21187 	}
21188 
21189 	/* Check for reservation conflict */
21190 	mutex_exit(SD_MUTEX(un));
21191 	rval = sd_send_scsi_TEST_UNIT_READY(un, 0);
21192 	mutex_enter(SD_MUTEX(un));
21193 
21194 	switch (rval) {
21195 	case 0:
21196 		sd_register_devid(un, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
21197 		break;
21198 	case EACCES:
21199 		break;
21200 	default:
21201 		rval = EIO;
21202 	}
21203 
21204 	mutex_exit(SD_MUTEX(un));
21205 	return (rval);
21206 }
21207 
21208 
21209 /*
21210  *    Function: sd_mhdioc_inkeys
21211  *
21212  * Description: This routine is the driver entry point for handling ioctl
21213  *		requests to issue the SCSI-3 Persistent In Read Keys command
21214  *		to the device (MHIOCGRP_INKEYS).
21215  *
21216  *   Arguments: dev	- the device number
21217  *		arg	- user provided in_keys structure
21218  *		flag	- this argument is a pass through to ddi_copyxxx()
21219  *			  directly from the mode argument of ioctl().
21220  *
21221  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
21222  *		ENXIO
21223  *		EFAULT
21224  */
21225 
21226 static int
21227 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
21228 {
21229 	struct sd_lun		*un;
21230 	mhioc_inkeys_t		inkeys;
21231 	int			rval = 0;
21232 
21233 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21234 		return (ENXIO);
21235 	}
21236 
21237 #ifdef _MULTI_DATAMODEL
21238 	switch (ddi_model_convert_from(flag & FMODELS)) {
21239 	case DDI_MODEL_ILP32: {
21240 		struct mhioc_inkeys32	inkeys32;
21241 
21242 		if (ddi_copyin(arg, &inkeys32,
21243 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
21244 			return (EFAULT);
21245 		}
21246 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
21247 		if ((rval = sd_persistent_reservation_in_read_keys(un,
21248 		    &inkeys, flag)) != 0) {
21249 			return (rval);
21250 		}
21251 		inkeys32.generation = inkeys.generation;
21252 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
21253 		    flag) != 0) {
21254 			return (EFAULT);
21255 		}
21256 		break;
21257 	}
21258 	case DDI_MODEL_NONE:
21259 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
21260 		    flag) != 0) {
21261 			return (EFAULT);
21262 		}
21263 		if ((rval = sd_persistent_reservation_in_read_keys(un,
21264 		    &inkeys, flag)) != 0) {
21265 			return (rval);
21266 		}
21267 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
21268 		    flag) != 0) {
21269 			return (EFAULT);
21270 		}
21271 		break;
21272 	}
21273 
21274 #else /* ! _MULTI_DATAMODEL */
21275 
21276 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
21277 		return (EFAULT);
21278 	}
21279 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
21280 	if (rval != 0) {
21281 		return (rval);
21282 	}
21283 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
21284 		return (EFAULT);
21285 	}
21286 
21287 #endif /* _MULTI_DATAMODEL */
21288 
21289 	return (rval);
21290 }
21291 
21292 
21293 /*
21294  *    Function: sd_mhdioc_inresv
21295  *
21296  * Description: This routine is the driver entry point for handling ioctl
21297  *		requests to issue the SCSI-3 Persistent In Read Reservations
21298  *		command to the device (MHIOCGRP_INKEYS).
21299  *
21300  *   Arguments: dev	- the device number
21301  *		arg	- user provided in_resv structure
21302  *		flag	- this argument is a pass through to ddi_copyxxx()
21303  *			  directly from the mode argument of ioctl().
21304  *
21305  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
21306  *		ENXIO
21307  *		EFAULT
21308  */
21309 
21310 static int
21311 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
21312 {
21313 	struct sd_lun		*un;
21314 	mhioc_inresvs_t		inresvs;
21315 	int			rval = 0;
21316 
21317 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21318 		return (ENXIO);
21319 	}
21320 
21321 #ifdef _MULTI_DATAMODEL
21322 
21323 	switch (ddi_model_convert_from(flag & FMODELS)) {
21324 	case DDI_MODEL_ILP32: {
21325 		struct mhioc_inresvs32	inresvs32;
21326 
21327 		if (ddi_copyin(arg, &inresvs32,
21328 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
21329 			return (EFAULT);
21330 		}
21331 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
21332 		if ((rval = sd_persistent_reservation_in_read_resv(un,
21333 		    &inresvs, flag)) != 0) {
21334 			return (rval);
21335 		}
21336 		inresvs32.generation = inresvs.generation;
21337 		if (ddi_copyout(&inresvs32, arg,
21338 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
21339 			return (EFAULT);
21340 		}
21341 		break;
21342 	}
21343 	case DDI_MODEL_NONE:
21344 		if (ddi_copyin(arg, &inresvs,
21345 		    sizeof (mhioc_inresvs_t), flag) != 0) {
21346 			return (EFAULT);
21347 		}
21348 		if ((rval = sd_persistent_reservation_in_read_resv(un,
21349 		    &inresvs, flag)) != 0) {
21350 			return (rval);
21351 		}
21352 		if (ddi_copyout(&inresvs, arg,
21353 		    sizeof (mhioc_inresvs_t), flag) != 0) {
21354 			return (EFAULT);
21355 		}
21356 		break;
21357 	}
21358 
21359 #else /* ! _MULTI_DATAMODEL */
21360 
21361 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
21362 		return (EFAULT);
21363 	}
21364 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
21365 	if (rval != 0) {
21366 		return (rval);
21367 	}
21368 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
21369 		return (EFAULT);
21370 	}
21371 
21372 #endif /* ! _MULTI_DATAMODEL */
21373 
21374 	return (rval);
21375 }
21376 
21377 
21378 /*
21379  * The following routines support the clustering functionality described below
21380  * and implement lost reservation reclaim functionality.
21381  *
21382  * Clustering
21383  * ----------
21384  * The clustering code uses two different, independent forms of SCSI
21385  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
21386  * Persistent Group Reservations. For any particular disk, it will use either
21387  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
21388  *
21389  * SCSI-2
21390  * The cluster software takes ownership of a multi-hosted disk by issuing the
21391  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
21392  * MHIOCRELEASE ioctl.Closely related is the MHIOCENFAILFAST ioctl -- a cluster,
21393  * just after taking ownership of the disk with the MHIOCTKOWN ioctl then issues
21394  * the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the driver. The
21395  * meaning of failfast is that if the driver (on this host) ever encounters the
21396  * scsi error return code RESERVATION_CONFLICT from the device, it should
21397  * immediately panic the host. The motivation for this ioctl is that if this
21398  * host does encounter reservation conflict, the underlying cause is that some
21399  * other host of the cluster has decided that this host is no longer in the
21400  * cluster and has seized control of the disks for itself. Since this host is no
21401  * longer in the cluster, it ought to panic itself. The MHIOCENFAILFAST ioctl
21402  * does two things:
21403  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
21404  *      error to panic the host
21405  *      (b) it sets up a periodic timer to test whether this host still has
21406  *      "access" (in that no other host has reserved the device):  if the
21407  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
21408  *      purpose of that periodic timer is to handle scenarios where the host is
21409  *      otherwise temporarily quiescent, temporarily doing no real i/o.
21410  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
21411  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
21412  * the device itself.
21413  *
21414  * SCSI-3 PGR
21415  * A direct semantic implementation of the SCSI-3 Persistent Reservation
21416  * facility is supported through the shared multihost disk ioctls
21417  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
21418  * MHIOCGRP_PREEMPTANDABORT)
21419  *
21420  * Reservation Reclaim:
21421  * --------------------
21422  * To support the lost reservation reclaim operations this driver creates a
21423  * single thread to handle reinstating reservations on all devices that have
21424  * lost reservations sd_resv_reclaim_requests are logged for all devices that
21425  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
21426  * and the reservation reclaim thread loops through the requests to regain the
21427  * lost reservations.
21428  */
21429 
21430 /*
21431  *    Function: sd_check_mhd()
21432  *
21433  * Description: This function sets up and submits a scsi watch request or
21434  *		terminates an existing watch request. This routine is used in
21435  *		support of reservation reclaim.
21436  *
21437  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
21438  *			 among multiple watches that share the callback function
21439  *		interval - the number of microseconds specifying the watch
21440  *			   interval for issuing TEST UNIT READY commands. If
21441  *			   set to 0 the watch should be terminated. If the
21442  *			   interval is set to 0 and if the device is required
21443  *			   to hold reservation while disabling failfast, the
21444  *			   watch is restarted with an interval of
21445  *			   reinstate_resv_delay.
21446  *
21447  * Return Code: 0	   - Successful submit/terminate of scsi watch request
21448  *		ENXIO      - Indicates an invalid device was specified
21449  *		EAGAIN     - Unable to submit the scsi watch request
21450  */
21451 
21452 static int
21453 sd_check_mhd(dev_t dev, int interval)
21454 {
21455 	struct sd_lun	*un;
21456 	opaque_t	token;
21457 
21458 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21459 		return (ENXIO);
21460 	}
21461 
21462 	/* is this a watch termination request? */
21463 	if (interval == 0) {
21464 		mutex_enter(SD_MUTEX(un));
21465 		/* if there is an existing watch task then terminate it */
21466 		if (un->un_mhd_token) {
21467 			token = un->un_mhd_token;
21468 			un->un_mhd_token = NULL;
21469 			mutex_exit(SD_MUTEX(un));
21470 			(void) scsi_watch_request_terminate(token,
21471 			    SCSI_WATCH_TERMINATE_WAIT);
21472 			mutex_enter(SD_MUTEX(un));
21473 		} else {
21474 			mutex_exit(SD_MUTEX(un));
21475 			/*
21476 			 * Note: If we return here we don't check for the
21477 			 * failfast case. This is the original legacy
21478 			 * implementation but perhaps we should be checking
21479 			 * the failfast case.
21480 			 */
21481 			return (0);
21482 		}
21483 		/*
21484 		 * If the device is required to hold reservation while
21485 		 * disabling failfast, we need to restart the scsi_watch
21486 		 * routine with an interval of reinstate_resv_delay.
21487 		 */
21488 		if (un->un_resvd_status & SD_RESERVE) {
21489 			interval = sd_reinstate_resv_delay/1000;
21490 		} else {
21491 			/* no failfast so bail */
21492 			mutex_exit(SD_MUTEX(un));
21493 			return (0);
21494 		}
21495 		mutex_exit(SD_MUTEX(un));
21496 	}
21497 
21498 	/*
21499 	 * adjust minimum time interval to 1 second,
21500 	 * and convert from msecs to usecs
21501 	 */
21502 	if (interval > 0 && interval < 1000) {
21503 		interval = 1000;
21504 	}
21505 	interval *= 1000;
21506 
21507 	/*
21508 	 * submit the request to the scsi_watch service
21509 	 */
21510 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
21511 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
21512 	if (token == NULL) {
21513 		return (EAGAIN);
21514 	}
21515 
21516 	/*
21517 	 * save token for termination later on
21518 	 */
21519 	mutex_enter(SD_MUTEX(un));
21520 	un->un_mhd_token = token;
21521 	mutex_exit(SD_MUTEX(un));
21522 	return (0);
21523 }
21524 
21525 
21526 /*
21527  *    Function: sd_mhd_watch_cb()
21528  *
21529  * Description: This function is the call back function used by the scsi watch
21530  *		facility. The scsi watch facility sends the "Test Unit Ready"
21531  *		and processes the status. If applicable (i.e. a "Unit Attention"
21532  *		status and automatic "Request Sense" not used) the scsi watch
21533  *		facility will send a "Request Sense" and retrieve the sense data
21534  *		to be passed to this callback function. In either case the
21535  *		automatic "Request Sense" or the facility submitting one, this
21536  *		callback is passed the status and sense data.
21537  *
21538  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
21539  *			among multiple watches that share this callback function
21540  *		resultp - scsi watch facility result packet containing scsi
21541  *			  packet, status byte and sense data
21542  *
21543  * Return Code: 0 - continue the watch task
21544  *		non-zero - terminate the watch task
21545  */
21546 
21547 static int
21548 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
21549 {
21550 	struct sd_lun			*un;
21551 	struct scsi_status		*statusp;
21552 	uint8_t				*sensep;
21553 	struct scsi_pkt			*pkt;
21554 	uchar_t				actual_sense_length;
21555 	dev_t  				dev = (dev_t)arg;
21556 
21557 	ASSERT(resultp != NULL);
21558 	statusp			= resultp->statusp;
21559 	sensep			= (uint8_t *)resultp->sensep;
21560 	pkt			= resultp->pkt;
21561 	actual_sense_length	= resultp->actual_sense_length;
21562 
21563 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21564 		return (ENXIO);
21565 	}
21566 
21567 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
21568 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
21569 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
21570 
21571 	/* Begin processing of the status and/or sense data */
21572 	if (pkt->pkt_reason != CMD_CMPLT) {
21573 		/* Handle the incomplete packet */
21574 		sd_mhd_watch_incomplete(un, pkt);
21575 		return (0);
21576 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
21577 		if (*((unsigned char *)statusp)
21578 		    == STATUS_RESERVATION_CONFLICT) {
21579 			/*
21580 			 * Handle a reservation conflict by panicking if
21581 			 * configured for failfast or by logging the conflict
21582 			 * and updating the reservation status
21583 			 */
21584 			mutex_enter(SD_MUTEX(un));
21585 			if ((un->un_resvd_status & SD_FAILFAST) &&
21586 			    (sd_failfast_enable)) {
21587 				sd_panic_for_res_conflict(un);
21588 				/*NOTREACHED*/
21589 			}
21590 			SD_INFO(SD_LOG_IOCTL_MHD, un,
21591 			    "sd_mhd_watch_cb: Reservation Conflict\n");
21592 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
21593 			mutex_exit(SD_MUTEX(un));
21594 		}
21595 	}
21596 
21597 	if (sensep != NULL) {
21598 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
21599 			mutex_enter(SD_MUTEX(un));
21600 			if ((scsi_sense_asc(sensep) ==
21601 			    SD_SCSI_RESET_SENSE_CODE) &&
21602 			    (un->un_resvd_status & SD_RESERVE)) {
21603 				/*
21604 				 * The additional sense code indicates a power
21605 				 * on or bus device reset has occurred; update
21606 				 * the reservation status.
21607 				 */
21608 				un->un_resvd_status |=
21609 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
21610 				SD_INFO(SD_LOG_IOCTL_MHD, un,
21611 				    "sd_mhd_watch_cb: Lost Reservation\n");
21612 			}
21613 		} else {
21614 			return (0);
21615 		}
21616 	} else {
21617 		mutex_enter(SD_MUTEX(un));
21618 	}
21619 
21620 	if ((un->un_resvd_status & SD_RESERVE) &&
21621 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
21622 		if (un->un_resvd_status & SD_WANT_RESERVE) {
21623 			/*
21624 			 * A reset occurred in between the last probe and this
21625 			 * one so if a timeout is pending cancel it.
21626 			 */
21627 			if (un->un_resvd_timeid) {
21628 				timeout_id_t temp_id = un->un_resvd_timeid;
21629 				un->un_resvd_timeid = NULL;
21630 				mutex_exit(SD_MUTEX(un));
21631 				(void) untimeout(temp_id);
21632 				mutex_enter(SD_MUTEX(un));
21633 			}
21634 			un->un_resvd_status &= ~SD_WANT_RESERVE;
21635 		}
21636 		if (un->un_resvd_timeid == 0) {
21637 			/* Schedule a timeout to handle the lost reservation */
21638 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
21639 			    (void *)dev,
21640 			    drv_usectohz(sd_reinstate_resv_delay));
21641 		}
21642 	}
21643 	mutex_exit(SD_MUTEX(un));
21644 	return (0);
21645 }
21646 
21647 
21648 /*
21649  *    Function: sd_mhd_watch_incomplete()
21650  *
21651  * Description: This function is used to find out why a scsi pkt sent by the
21652  *		scsi watch facility was not completed. Under some scenarios this
21653  *		routine will return. Otherwise it will send a bus reset to see
21654  *		if the drive is still online.
21655  *
21656  *   Arguments: un  - driver soft state (unit) structure
21657  *		pkt - incomplete scsi pkt
21658  */
21659 
21660 static void
21661 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
21662 {
21663 	int	be_chatty;
21664 	int	perr;
21665 
21666 	ASSERT(pkt != NULL);
21667 	ASSERT(un != NULL);
21668 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
21669 	perr		= (pkt->pkt_statistics & STAT_PERR);
21670 
21671 	mutex_enter(SD_MUTEX(un));
21672 	if (un->un_state == SD_STATE_DUMPING) {
21673 		mutex_exit(SD_MUTEX(un));
21674 		return;
21675 	}
21676 
21677 	switch (pkt->pkt_reason) {
21678 	case CMD_UNX_BUS_FREE:
21679 		/*
21680 		 * If we had a parity error that caused the target to drop BSY*,
21681 		 * don't be chatty about it.
21682 		 */
21683 		if (perr && be_chatty) {
21684 			be_chatty = 0;
21685 		}
21686 		break;
21687 	case CMD_TAG_REJECT:
21688 		/*
21689 		 * The SCSI-2 spec states that a tag reject will be sent by the
21690 		 * target if tagged queuing is not supported. A tag reject may
21691 		 * also be sent during certain initialization periods or to
21692 		 * control internal resources. For the latter case the target
21693 		 * may also return Queue Full.
21694 		 *
21695 		 * If this driver receives a tag reject from a target that is
21696 		 * going through an init period or controlling internal
21697 		 * resources tagged queuing will be disabled. This is a less
21698 		 * than optimal behavior but the driver is unable to determine
21699 		 * the target state and assumes tagged queueing is not supported
21700 		 */
21701 		pkt->pkt_flags = 0;
21702 		un->un_tagflags = 0;
21703 
21704 		if (un->un_f_opt_queueing == TRUE) {
21705 			un->un_throttle = min(un->un_throttle, 3);
21706 		} else {
21707 			un->un_throttle = 1;
21708 		}
21709 		mutex_exit(SD_MUTEX(un));
21710 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
21711 		mutex_enter(SD_MUTEX(un));
21712 		break;
21713 	case CMD_INCOMPLETE:
21714 		/*
21715 		 * The transport stopped with an abnormal state, fallthrough and
21716 		 * reset the target and/or bus unless selection did not complete
21717 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
21718 		 * go through a target/bus reset
21719 		 */
21720 		if (pkt->pkt_state == STATE_GOT_BUS) {
21721 			break;
21722 		}
21723 		/*FALLTHROUGH*/
21724 
21725 	case CMD_TIMEOUT:
21726 	default:
21727 		/*
21728 		 * The lun may still be running the command, so a lun reset
21729 		 * should be attempted. If the lun reset fails or cannot be
21730 		 * issued, than try a target reset. Lastly try a bus reset.
21731 		 */
21732 		if ((pkt->pkt_statistics &
21733 		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
21734 			int reset_retval = 0;
21735 			mutex_exit(SD_MUTEX(un));
21736 			if (un->un_f_allow_bus_device_reset == TRUE) {
21737 				if (un->un_f_lun_reset_enabled == TRUE) {
21738 					reset_retval =
21739 					    scsi_reset(SD_ADDRESS(un),
21740 					    RESET_LUN);
21741 				}
21742 				if (reset_retval == 0) {
21743 					reset_retval =
21744 					    scsi_reset(SD_ADDRESS(un),
21745 					    RESET_TARGET);
21746 				}
21747 			}
21748 			if (reset_retval == 0) {
21749 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
21750 			}
21751 			mutex_enter(SD_MUTEX(un));
21752 		}
21753 		break;
21754 	}
21755 
21756 	/* A device/bus reset has occurred; update the reservation status. */
21757 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
21758 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
21759 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
21760 			un->un_resvd_status |=
21761 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
21762 			SD_INFO(SD_LOG_IOCTL_MHD, un,
21763 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
21764 		}
21765 	}
21766 
21767 	/*
21768 	 * The disk has been turned off; Update the device state.
21769 	 *
21770 	 * Note: Should we be offlining the disk here?
21771 	 */
21772 	if (pkt->pkt_state == STATE_GOT_BUS) {
21773 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
21774 		    "Disk not responding to selection\n");
21775 		if (un->un_state != SD_STATE_OFFLINE) {
21776 			New_state(un, SD_STATE_OFFLINE);
21777 		}
21778 	} else if (be_chatty) {
21779 		/*
21780 		 * suppress messages if they are all the same pkt reason;
21781 		 * with TQ, many (up to 256) are returned with the same
21782 		 * pkt_reason
21783 		 */
21784 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
21785 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
21786 			    "sd_mhd_watch_incomplete: "
21787 			    "SCSI transport failed: reason '%s'\n",
21788 			    scsi_rname(pkt->pkt_reason));
21789 		}
21790 	}
21791 	un->un_last_pkt_reason = pkt->pkt_reason;
21792 	mutex_exit(SD_MUTEX(un));
21793 }
21794 
21795 
21796 /*
21797  *    Function: sd_sname()
21798  *
21799  * Description: This is a simple little routine to return a string containing
21800  *		a printable description of command status byte for use in
21801  *		logging.
21802  *
21803  *   Arguments: status - pointer to a status byte
21804  *
21805  * Return Code: char * - string containing status description.
21806  */
21807 
21808 static char *
21809 sd_sname(uchar_t status)
21810 {
21811 	switch (status & STATUS_MASK) {
21812 	case STATUS_GOOD:
21813 		return ("good status");
21814 	case STATUS_CHECK:
21815 		return ("check condition");
21816 	case STATUS_MET:
21817 		return ("condition met");
21818 	case STATUS_BUSY:
21819 		return ("busy");
21820 	case STATUS_INTERMEDIATE:
21821 		return ("intermediate");
21822 	case STATUS_INTERMEDIATE_MET:
21823 		return ("intermediate - condition met");
21824 	case STATUS_RESERVATION_CONFLICT:
21825 		return ("reservation_conflict");
21826 	case STATUS_TERMINATED:
21827 		return ("command terminated");
21828 	case STATUS_QFULL:
21829 		return ("queue full");
21830 	default:
21831 		return ("<unknown status>");
21832 	}
21833 }
21834 
21835 
21836 /*
21837  *    Function: sd_mhd_resvd_recover()
21838  *
21839  * Description: This function adds a reservation entry to the
21840  *		sd_resv_reclaim_request list and signals the reservation
21841  *		reclaim thread that there is work pending. If the reservation
21842  *		reclaim thread has not been previously created this function
21843  *		will kick it off.
21844  *
21845  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
21846  *			among multiple watches that share this callback function
21847  *
21848  *     Context: This routine is called by timeout() and is run in interrupt
21849  *		context. It must not sleep or call other functions which may
21850  *		sleep.
21851  */
21852 
21853 static void
21854 sd_mhd_resvd_recover(void *arg)
21855 {
21856 	dev_t			dev = (dev_t)arg;
21857 	struct sd_lun		*un;
21858 	struct sd_thr_request	*sd_treq = NULL;
21859 	struct sd_thr_request	*sd_cur = NULL;
21860 	struct sd_thr_request	*sd_prev = NULL;
21861 	int			already_there = 0;
21862 
21863 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21864 		return;
21865 	}
21866 
21867 	mutex_enter(SD_MUTEX(un));
21868 	un->un_resvd_timeid = NULL;
21869 	if (un->un_resvd_status & SD_WANT_RESERVE) {
21870 		/*
21871 		 * There was a reset so don't issue the reserve, allow the
21872 		 * sd_mhd_watch_cb callback function to notice this and
21873 		 * reschedule the timeout for reservation.
21874 		 */
21875 		mutex_exit(SD_MUTEX(un));
21876 		return;
21877 	}
21878 	mutex_exit(SD_MUTEX(un));
21879 
21880 	/*
21881 	 * Add this device to the sd_resv_reclaim_request list and the
21882 	 * sd_resv_reclaim_thread should take care of the rest.
21883 	 *
21884 	 * Note: We can't sleep in this context so if the memory allocation
21885 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
21886 	 * reschedule the timeout for reservation.  (4378460)
21887 	 */
21888 	sd_treq = (struct sd_thr_request *)
21889 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
21890 	if (sd_treq == NULL) {
21891 		return;
21892 	}
21893 
21894 	sd_treq->sd_thr_req_next = NULL;
21895 	sd_treq->dev = dev;
21896 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
21897 	if (sd_tr.srq_thr_req_head == NULL) {
21898 		sd_tr.srq_thr_req_head = sd_treq;
21899 	} else {
21900 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
21901 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
21902 			if (sd_cur->dev == dev) {
21903 				/*
21904 				 * already in Queue so don't log
21905 				 * another request for the device
21906 				 */
21907 				already_there = 1;
21908 				break;
21909 			}
21910 			sd_prev = sd_cur;
21911 		}
21912 		if (!already_there) {
21913 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
21914 			    "logging request for %lx\n", dev);
21915 			sd_prev->sd_thr_req_next = sd_treq;
21916 		} else {
21917 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
21918 		}
21919 	}
21920 
21921 	/*
21922 	 * Create a kernel thread to do the reservation reclaim and free up this
21923 	 * thread. We cannot block this thread while we go away to do the
21924 	 * reservation reclaim
21925 	 */
21926 	if (sd_tr.srq_resv_reclaim_thread == NULL)
21927 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
21928 		    sd_resv_reclaim_thread, NULL,
21929 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
21930 
21931 	/* Tell the reservation reclaim thread that it has work to do */
21932 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
21933 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
21934 }
21935 
21936 /*
21937  *    Function: sd_resv_reclaim_thread()
21938  *
21939  * Description: This function implements the reservation reclaim operations
21940  *
21941  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
21942  *		      among multiple watches that share this callback function
21943  */
21944 
21945 static void
21946 sd_resv_reclaim_thread()
21947 {
21948 	struct sd_lun		*un;
21949 	struct sd_thr_request	*sd_mhreq;
21950 
21951 	/* Wait for work */
21952 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
21953 	if (sd_tr.srq_thr_req_head == NULL) {
21954 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
21955 		    &sd_tr.srq_resv_reclaim_mutex);
21956 	}
21957 
21958 	/* Loop while we have work */
21959 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
21960 		un = ddi_get_soft_state(sd_state,
21961 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
21962 		if (un == NULL) {
21963 			/*
21964 			 * softstate structure is NULL so just
21965 			 * dequeue the request and continue
21966 			 */
21967 			sd_tr.srq_thr_req_head =
21968 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
21969 			kmem_free(sd_tr.srq_thr_cur_req,
21970 			    sizeof (struct sd_thr_request));
21971 			continue;
21972 		}
21973 
21974 		/* dequeue the request */
21975 		sd_mhreq = sd_tr.srq_thr_cur_req;
21976 		sd_tr.srq_thr_req_head =
21977 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
21978 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
21979 
21980 		/*
21981 		 * Reclaim reservation only if SD_RESERVE is still set. There
21982 		 * may have been a call to MHIOCRELEASE before we got here.
21983 		 */
21984 		mutex_enter(SD_MUTEX(un));
21985 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
21986 			/*
21987 			 * Note: The SD_LOST_RESERVE flag is cleared before
21988 			 * reclaiming the reservation. If this is done after the
21989 			 * call to sd_reserve_release a reservation loss in the
21990 			 * window between pkt completion of reserve cmd and
21991 			 * mutex_enter below may not be recognized
21992 			 */
21993 			un->un_resvd_status &= ~SD_LOST_RESERVE;
21994 			mutex_exit(SD_MUTEX(un));
21995 
21996 			if (sd_reserve_release(sd_mhreq->dev,
21997 			    SD_RESERVE) == 0) {
21998 				mutex_enter(SD_MUTEX(un));
21999 				un->un_resvd_status |= SD_RESERVE;
22000 				mutex_exit(SD_MUTEX(un));
22001 				SD_INFO(SD_LOG_IOCTL_MHD, un,
22002 				    "sd_resv_reclaim_thread: "
22003 				    "Reservation Recovered\n");
22004 			} else {
22005 				mutex_enter(SD_MUTEX(un));
22006 				un->un_resvd_status |= SD_LOST_RESERVE;
22007 				mutex_exit(SD_MUTEX(un));
22008 				SD_INFO(SD_LOG_IOCTL_MHD, un,
22009 				    "sd_resv_reclaim_thread: Failed "
22010 				    "Reservation Recovery\n");
22011 			}
22012 		} else {
22013 			mutex_exit(SD_MUTEX(un));
22014 		}
22015 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
22016 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
22017 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
22018 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
22019 		/*
22020 		 * wakeup the destroy thread if anyone is waiting on
22021 		 * us to complete.
22022 		 */
22023 		cv_signal(&sd_tr.srq_inprocess_cv);
22024 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
22025 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
22026 	}
22027 
22028 	/*
22029 	 * cleanup the sd_tr structure now that this thread will not exist
22030 	 */
22031 	ASSERT(sd_tr.srq_thr_req_head == NULL);
22032 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
22033 	sd_tr.srq_resv_reclaim_thread = NULL;
22034 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
22035 	thread_exit();
22036 }
22037 
22038 
22039 /*
22040  *    Function: sd_rmv_resv_reclaim_req()
22041  *
22042  * Description: This function removes any pending reservation reclaim requests
22043  *		for the specified device.
22044  *
22045  *   Arguments: dev - the device 'dev_t'
22046  */
22047 
22048 static void
22049 sd_rmv_resv_reclaim_req(dev_t dev)
22050 {
22051 	struct sd_thr_request *sd_mhreq;
22052 	struct sd_thr_request *sd_prev;
22053 
22054 	/* Remove a reservation reclaim request from the list */
22055 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
22056 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
22057 		/*
22058 		 * We are attempting to reinstate reservation for
22059 		 * this device. We wait for sd_reserve_release()
22060 		 * to return before we return.
22061 		 */
22062 		cv_wait(&sd_tr.srq_inprocess_cv,
22063 		    &sd_tr.srq_resv_reclaim_mutex);
22064 	} else {
22065 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
22066 		if (sd_mhreq && sd_mhreq->dev == dev) {
22067 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
22068 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
22069 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
22070 			return;
22071 		}
22072 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
22073 			if (sd_mhreq && sd_mhreq->dev == dev) {
22074 				break;
22075 			}
22076 			sd_prev = sd_mhreq;
22077 		}
22078 		if (sd_mhreq != NULL) {
22079 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
22080 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
22081 		}
22082 	}
22083 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
22084 }
22085 
22086 
22087 /*
22088  *    Function: sd_mhd_reset_notify_cb()
22089  *
22090  * Description: This is a call back function for scsi_reset_notify. This
22091  *		function updates the softstate reserved status and logs the
22092  *		reset. The driver scsi watch facility callback function
22093  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
22094  *		will reclaim the reservation.
22095  *
22096  *   Arguments: arg  - driver soft state (unit) structure
22097  */
22098 
22099 static void
22100 sd_mhd_reset_notify_cb(caddr_t arg)
22101 {
22102 	struct sd_lun *un = (struct sd_lun *)arg;
22103 
22104 	mutex_enter(SD_MUTEX(un));
22105 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
22106 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
22107 		SD_INFO(SD_LOG_IOCTL_MHD, un,
22108 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
22109 	}
22110 	mutex_exit(SD_MUTEX(un));
22111 }
22112 
22113 
22114 /*
22115  *    Function: sd_take_ownership()
22116  *
22117  * Description: This routine implements an algorithm to achieve a stable
22118  *		reservation on disks which don't implement priority reserve,
22119  *		and makes sure that other host lose re-reservation attempts.
22120  *		This algorithm contains of a loop that keeps issuing the RESERVE
22121  *		for some period of time (min_ownership_delay, default 6 seconds)
22122  *		During that loop, it looks to see if there has been a bus device
22123  *		reset or bus reset (both of which cause an existing reservation
22124  *		to be lost). If the reservation is lost issue RESERVE until a
22125  *		period of min_ownership_delay with no resets has gone by, or
22126  *		until max_ownership_delay has expired. This loop ensures that
22127  *		the host really did manage to reserve the device, in spite of
22128  *		resets. The looping for min_ownership_delay (default six
22129  *		seconds) is important to early generation clustering products,
22130  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
22131  *		MHIOCENFAILFAST periodic timer of two seconds. By having
22132  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
22133  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
22134  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
22135  *		have already noticed, via the MHIOCENFAILFAST polling, that it
22136  *		no longer "owns" the disk and will have panicked itself.  Thus,
22137  *		the host issuing the MHIOCTKOWN is assured (with timing
22138  *		dependencies) that by the time it actually starts to use the
22139  *		disk for real work, the old owner is no longer accessing it.
22140  *
22141  *		min_ownership_delay is the minimum amount of time for which the
22142  *		disk must be reserved continuously devoid of resets before the
22143  *		MHIOCTKOWN ioctl will return success.
22144  *
22145  *		max_ownership_delay indicates the amount of time by which the
22146  *		take ownership should succeed or timeout with an error.
22147  *
22148  *   Arguments: dev - the device 'dev_t'
22149  *		*p  - struct containing timing info.
22150  *
22151  * Return Code: 0 for success or error code
22152  */
22153 
22154 static int
22155 sd_take_ownership(dev_t dev, struct mhioctkown *p)
22156 {
22157 	struct sd_lun	*un;
22158 	int		rval;
22159 	int		err;
22160 	int		reservation_count   = 0;
22161 	int		min_ownership_delay =  6000000; /* in usec */
22162 	int		max_ownership_delay = 30000000; /* in usec */
22163 	clock_t		start_time;	/* starting time of this algorithm */
22164 	clock_t		end_time;	/* time limit for giving up */
22165 	clock_t		ownership_time;	/* time limit for stable ownership */
22166 	clock_t		current_time;
22167 	clock_t		previous_current_time;
22168 
22169 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22170 		return (ENXIO);
22171 	}
22172 
22173 	/*
22174 	 * Attempt a device reservation. A priority reservation is requested.
22175 	 */
22176 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
22177 	    != SD_SUCCESS) {
22178 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
22179 		    "sd_take_ownership: return(1)=%d\n", rval);
22180 		return (rval);
22181 	}
22182 
22183 	/* Update the softstate reserved status to indicate the reservation */
22184 	mutex_enter(SD_MUTEX(un));
22185 	un->un_resvd_status |= SD_RESERVE;
22186 	un->un_resvd_status &=
22187 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
22188 	mutex_exit(SD_MUTEX(un));
22189 
22190 	if (p != NULL) {
22191 		if (p->min_ownership_delay != 0) {
22192 			min_ownership_delay = p->min_ownership_delay * 1000;
22193 		}
22194 		if (p->max_ownership_delay != 0) {
22195 			max_ownership_delay = p->max_ownership_delay * 1000;
22196 		}
22197 	}
22198 	SD_INFO(SD_LOG_IOCTL_MHD, un,
22199 	    "sd_take_ownership: min, max delays: %d, %d\n",
22200 	    min_ownership_delay, max_ownership_delay);
22201 
22202 	start_time = ddi_get_lbolt();
22203 	current_time	= start_time;
22204 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
22205 	end_time	= start_time + drv_usectohz(max_ownership_delay);
22206 
22207 	while (current_time - end_time < 0) {
22208 		delay(drv_usectohz(500000));
22209 
22210 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
22211 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
22212 				mutex_enter(SD_MUTEX(un));
22213 				rval = (un->un_resvd_status &
22214 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
22215 				mutex_exit(SD_MUTEX(un));
22216 				break;
22217 			}
22218 		}
22219 		previous_current_time = current_time;
22220 		current_time = ddi_get_lbolt();
22221 		mutex_enter(SD_MUTEX(un));
22222 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
22223 			ownership_time = ddi_get_lbolt() +
22224 			    drv_usectohz(min_ownership_delay);
22225 			reservation_count = 0;
22226 		} else {
22227 			reservation_count++;
22228 		}
22229 		un->un_resvd_status |= SD_RESERVE;
22230 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
22231 		mutex_exit(SD_MUTEX(un));
22232 
22233 		SD_INFO(SD_LOG_IOCTL_MHD, un,
22234 		    "sd_take_ownership: ticks for loop iteration=%ld, "
22235 		    "reservation=%s\n", (current_time - previous_current_time),
22236 		    reservation_count ? "ok" : "reclaimed");
22237 
22238 		if (current_time - ownership_time >= 0 &&
22239 		    reservation_count >= 4) {
22240 			rval = 0; /* Achieved a stable ownership */
22241 			break;
22242 		}
22243 		if (current_time - end_time >= 0) {
22244 			rval = EACCES; /* No ownership in max possible time */
22245 			break;
22246 		}
22247 	}
22248 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
22249 	    "sd_take_ownership: return(2)=%d\n", rval);
22250 	return (rval);
22251 }
22252 
22253 
22254 /*
22255  *    Function: sd_reserve_release()
22256  *
22257  * Description: This function builds and sends scsi RESERVE, RELEASE, and
22258  *		PRIORITY RESERVE commands based on a user specified command type
22259  *
22260  *   Arguments: dev - the device 'dev_t'
22261  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
22262  *		      SD_RESERVE, SD_RELEASE
22263  *
22264  * Return Code: 0 or Error Code
22265  */
22266 
22267 static int
22268 sd_reserve_release(dev_t dev, int cmd)
22269 {
22270 	struct uscsi_cmd	*com = NULL;
22271 	struct sd_lun		*un = NULL;
22272 	char			cdb[CDB_GROUP0];
22273 	int			rval;
22274 
22275 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
22276 	    (cmd == SD_PRIORITY_RESERVE));
22277 
22278 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22279 		return (ENXIO);
22280 	}
22281 
22282 	/* instantiate and initialize the command and cdb */
22283 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
22284 	bzero(cdb, CDB_GROUP0);
22285 	com->uscsi_flags   = USCSI_SILENT;
22286 	com->uscsi_timeout = un->un_reserve_release_time;
22287 	com->uscsi_cdblen  = CDB_GROUP0;
22288 	com->uscsi_cdb	   = cdb;
22289 	if (cmd == SD_RELEASE) {
22290 		cdb[0] = SCMD_RELEASE;
22291 	} else {
22292 		cdb[0] = SCMD_RESERVE;
22293 	}
22294 
22295 	/* Send the command. */
22296 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
22297 	    SD_PATH_STANDARD);
22298 
22299 	/*
22300 	 * "break" a reservation that is held by another host, by issuing a
22301 	 * reset if priority reserve is desired, and we could not get the
22302 	 * device.
22303 	 */
22304 	if ((cmd == SD_PRIORITY_RESERVE) &&
22305 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
22306 		/*
22307 		 * First try to reset the LUN. If we cannot, then try a target
22308 		 * reset, followed by a bus reset if the target reset fails.
22309 		 */
22310 		int reset_retval = 0;
22311 		if (un->un_f_lun_reset_enabled == TRUE) {
22312 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
22313 		}
22314 		if (reset_retval == 0) {
22315 			/* The LUN reset either failed or was not issued */
22316 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
22317 		}
22318 		if ((reset_retval == 0) &&
22319 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
22320 			rval = EIO;
22321 			kmem_free(com, sizeof (*com));
22322 			return (rval);
22323 		}
22324 
22325 		bzero(com, sizeof (struct uscsi_cmd));
22326 		com->uscsi_flags   = USCSI_SILENT;
22327 		com->uscsi_cdb	   = cdb;
22328 		com->uscsi_cdblen  = CDB_GROUP0;
22329 		com->uscsi_timeout = 5;
22330 
22331 		/*
22332 		 * Reissue the last reserve command, this time without request
22333 		 * sense.  Assume that it is just a regular reserve command.
22334 		 */
22335 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
22336 		    SD_PATH_STANDARD);
22337 	}
22338 
22339 	/* Return an error if still getting a reservation conflict. */
22340 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
22341 		rval = EACCES;
22342 	}
22343 
22344 	kmem_free(com, sizeof (*com));
22345 	return (rval);
22346 }
22347 
22348 
22349 #define	SD_NDUMP_RETRIES	12
22350 /*
22351  *	System Crash Dump routine
22352  */
22353 
22354 static int
22355 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
22356 {
22357 	int		instance;
22358 	int		partition;
22359 	int		i;
22360 	int		err;
22361 	struct sd_lun	*un;
22362 	struct scsi_pkt *wr_pktp;
22363 	struct buf	*wr_bp;
22364 	struct buf	wr_buf;
22365 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
22366 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
22367 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
22368 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
22369 	size_t		io_start_offset;
22370 	int		doing_rmw = FALSE;
22371 	int		rval;
22372 #if defined(__i386) || defined(__amd64)
22373 	ssize_t dma_resid;
22374 	daddr_t oblkno;
22375 #endif
22376 	diskaddr_t	nblks = 0;
22377 	diskaddr_t	start_block;
22378 
22379 	instance = SDUNIT(dev);
22380 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
22381 	    !SD_IS_VALID_LABEL(un) || ISCD(un)) {
22382 		return (ENXIO);
22383 	}
22384 
22385 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
22386 
22387 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
22388 
22389 	partition = SDPART(dev);
22390 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
22391 
22392 	/* Validate blocks to dump at against partition size. */
22393 
22394 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
22395 	    &nblks, &start_block, NULL, NULL, (void *)SD_PATH_DIRECT);
22396 
22397 	if ((blkno + nblk) > nblks) {
22398 		SD_TRACE(SD_LOG_DUMP, un,
22399 		    "sddump: dump range larger than partition: "
22400 		    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
22401 		    blkno, nblk, nblks);
22402 		return (EINVAL);
22403 	}
22404 
22405 	mutex_enter(&un->un_pm_mutex);
22406 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
22407 		struct scsi_pkt *start_pktp;
22408 
22409 		mutex_exit(&un->un_pm_mutex);
22410 
22411 		/*
22412 		 * use pm framework to power on HBA 1st
22413 		 */
22414 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
22415 
22416 		/*
22417 		 * Dump no long uses sdpower to power on a device, it's
22418 		 * in-line here so it can be done in polled mode.
22419 		 */
22420 
22421 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
22422 
22423 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
22424 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
22425 
22426 		if (start_pktp == NULL) {
22427 			/* We were not given a SCSI packet, fail. */
22428 			return (EIO);
22429 		}
22430 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
22431 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
22432 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
22433 		start_pktp->pkt_flags = FLAG_NOINTR;
22434 
22435 		mutex_enter(SD_MUTEX(un));
22436 		SD_FILL_SCSI1_LUN(un, start_pktp);
22437 		mutex_exit(SD_MUTEX(un));
22438 		/*
22439 		 * Scsi_poll returns 0 (success) if the command completes and
22440 		 * the status block is STATUS_GOOD.
22441 		 */
22442 		if (sd_scsi_poll(un, start_pktp) != 0) {
22443 			scsi_destroy_pkt(start_pktp);
22444 			return (EIO);
22445 		}
22446 		scsi_destroy_pkt(start_pktp);
22447 		(void) sd_ddi_pm_resume(un);
22448 	} else {
22449 		mutex_exit(&un->un_pm_mutex);
22450 	}
22451 
22452 	mutex_enter(SD_MUTEX(un));
22453 	un->un_throttle = 0;
22454 
22455 	/*
22456 	 * The first time through, reset the specific target device.
22457 	 * However, when cpr calls sddump we know that sd is in a
22458 	 * a good state so no bus reset is required.
22459 	 * Clear sense data via Request Sense cmd.
22460 	 * In sddump we don't care about allow_bus_device_reset anymore
22461 	 */
22462 
22463 	if ((un->un_state != SD_STATE_SUSPENDED) &&
22464 	    (un->un_state != SD_STATE_DUMPING)) {
22465 
22466 		New_state(un, SD_STATE_DUMPING);
22467 
22468 		if (un->un_f_is_fibre == FALSE) {
22469 			mutex_exit(SD_MUTEX(un));
22470 			/*
22471 			 * Attempt a bus reset for parallel scsi.
22472 			 *
22473 			 * Note: A bus reset is required because on some host
22474 			 * systems (i.e. E420R) a bus device reset is
22475 			 * insufficient to reset the state of the target.
22476 			 *
22477 			 * Note: Don't issue the reset for fibre-channel,
22478 			 * because this tends to hang the bus (loop) for
22479 			 * too long while everyone is logging out and in
22480 			 * and the deadman timer for dumping will fire
22481 			 * before the dump is complete.
22482 			 */
22483 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
22484 				mutex_enter(SD_MUTEX(un));
22485 				Restore_state(un);
22486 				mutex_exit(SD_MUTEX(un));
22487 				return (EIO);
22488 			}
22489 
22490 			/* Delay to give the device some recovery time. */
22491 			drv_usecwait(10000);
22492 
22493 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
22494 				SD_INFO(SD_LOG_DUMP, un,
22495 					"sddump: sd_send_polled_RQS failed\n");
22496 			}
22497 			mutex_enter(SD_MUTEX(un));
22498 		}
22499 	}
22500 
22501 	/*
22502 	 * Convert the partition-relative block number to a
22503 	 * disk physical block number.
22504 	 */
22505 	blkno += start_block;
22506 
22507 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
22508 
22509 
22510 	/*
22511 	 * Check if the device has a non-512 block size.
22512 	 */
22513 	wr_bp = NULL;
22514 	if (NOT_DEVBSIZE(un)) {
22515 		tgt_byte_offset = blkno * un->un_sys_blocksize;
22516 		tgt_byte_count = nblk * un->un_sys_blocksize;
22517 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
22518 		    (tgt_byte_count % un->un_tgt_blocksize)) {
22519 			doing_rmw = TRUE;
22520 			/*
22521 			 * Calculate the block number and number of block
22522 			 * in terms of the media block size.
22523 			 */
22524 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
22525 			tgt_nblk =
22526 			    ((tgt_byte_offset + tgt_byte_count +
22527 				(un->un_tgt_blocksize - 1)) /
22528 				un->un_tgt_blocksize) - tgt_blkno;
22529 
22530 			/*
22531 			 * Invoke the routine which is going to do read part
22532 			 * of read-modify-write.
22533 			 * Note that this routine returns a pointer to
22534 			 * a valid bp in wr_bp.
22535 			 */
22536 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
22537 			    &wr_bp);
22538 			if (err) {
22539 				mutex_exit(SD_MUTEX(un));
22540 				return (err);
22541 			}
22542 			/*
22543 			 * Offset is being calculated as -
22544 			 * (original block # * system block size) -
22545 			 * (new block # * target block size)
22546 			 */
22547 			io_start_offset =
22548 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
22549 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
22550 
22551 			ASSERT((io_start_offset >= 0) &&
22552 			    (io_start_offset < un->un_tgt_blocksize));
22553 			/*
22554 			 * Do the modify portion of read modify write.
22555 			 */
22556 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
22557 			    (size_t)nblk * un->un_sys_blocksize);
22558 		} else {
22559 			doing_rmw = FALSE;
22560 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
22561 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
22562 		}
22563 
22564 		/* Convert blkno and nblk to target blocks */
22565 		blkno = tgt_blkno;
22566 		nblk = tgt_nblk;
22567 	} else {
22568 		wr_bp = &wr_buf;
22569 		bzero(wr_bp, sizeof (struct buf));
22570 		wr_bp->b_flags		= B_BUSY;
22571 		wr_bp->b_un.b_addr	= addr;
22572 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
22573 		wr_bp->b_resid		= 0;
22574 	}
22575 
22576 	mutex_exit(SD_MUTEX(un));
22577 
22578 	/*
22579 	 * Obtain a SCSI packet for the write command.
22580 	 * It should be safe to call the allocator here without
22581 	 * worrying about being locked for DVMA mapping because
22582 	 * the address we're passed is already a DVMA mapping
22583 	 *
22584 	 * We are also not going to worry about semaphore ownership
22585 	 * in the dump buffer. Dumping is single threaded at present.
22586 	 */
22587 
22588 	wr_pktp = NULL;
22589 
22590 #if defined(__i386) || defined(__amd64)
22591 	dma_resid = wr_bp->b_bcount;
22592 	oblkno = blkno;
22593 	while (dma_resid != 0) {
22594 #endif
22595 
22596 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
22597 		wr_bp->b_flags &= ~B_ERROR;
22598 
22599 #if defined(__i386) || defined(__amd64)
22600 		blkno = oblkno +
22601 			((wr_bp->b_bcount - dma_resid) /
22602 			    un->un_tgt_blocksize);
22603 		nblk = dma_resid / un->un_tgt_blocksize;
22604 
22605 		if (wr_pktp) {
22606 			/* Partial DMA transfers after initial transfer */
22607 			rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
22608 			    blkno, nblk);
22609 		} else {
22610 			/* Initial transfer */
22611 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
22612 			    un->un_pkt_flags, NULL_FUNC, NULL,
22613 			    blkno, nblk);
22614 		}
22615 #else
22616 		rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
22617 		    0, NULL_FUNC, NULL, blkno, nblk);
22618 #endif
22619 
22620 		if (rval == 0) {
22621 			/* We were given a SCSI packet, continue. */
22622 			break;
22623 		}
22624 
22625 		if (i == 0) {
22626 			if (wr_bp->b_flags & B_ERROR) {
22627 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
22628 				    "no resources for dumping; "
22629 				    "error code: 0x%x, retrying",
22630 				    geterror(wr_bp));
22631 			} else {
22632 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
22633 				    "no resources for dumping; retrying");
22634 			}
22635 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
22636 			if (wr_bp->b_flags & B_ERROR) {
22637 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22638 				    "no resources for dumping; error code: "
22639 				    "0x%x, retrying\n", geterror(wr_bp));
22640 			}
22641 		} else {
22642 			if (wr_bp->b_flags & B_ERROR) {
22643 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22644 				    "no resources for dumping; "
22645 				    "error code: 0x%x, retries failed, "
22646 				    "giving up.\n", geterror(wr_bp));
22647 			} else {
22648 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22649 				    "no resources for dumping; "
22650 				    "retries failed, giving up.\n");
22651 			}
22652 			mutex_enter(SD_MUTEX(un));
22653 			Restore_state(un);
22654 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
22655 				mutex_exit(SD_MUTEX(un));
22656 				scsi_free_consistent_buf(wr_bp);
22657 			} else {
22658 				mutex_exit(SD_MUTEX(un));
22659 			}
22660 			return (EIO);
22661 		}
22662 		drv_usecwait(10000);
22663 	}
22664 
22665 #if defined(__i386) || defined(__amd64)
22666 	/*
22667 	 * save the resid from PARTIAL_DMA
22668 	 */
22669 	dma_resid = wr_pktp->pkt_resid;
22670 	if (dma_resid != 0)
22671 		nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
22672 	wr_pktp->pkt_resid = 0;
22673 #endif
22674 
22675 	/* SunBug 1222170 */
22676 	wr_pktp->pkt_flags = FLAG_NOINTR;
22677 
22678 	err = EIO;
22679 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
22680 
22681 		/*
22682 		 * Scsi_poll returns 0 (success) if the command completes and
22683 		 * the status block is STATUS_GOOD.  We should only check
22684 		 * errors if this condition is not true.  Even then we should
22685 		 * send our own request sense packet only if we have a check
22686 		 * condition and auto request sense has not been performed by
22687 		 * the hba.
22688 		 */
22689 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
22690 
22691 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
22692 		    (wr_pktp->pkt_resid == 0)) {
22693 			err = SD_SUCCESS;
22694 			break;
22695 		}
22696 
22697 		/*
22698 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
22699 		 */
22700 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
22701 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22702 			    "Device is gone\n");
22703 			break;
22704 		}
22705 
22706 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
22707 			SD_INFO(SD_LOG_DUMP, un,
22708 			    "sddump: write failed with CHECK, try # %d\n", i);
22709 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
22710 				(void) sd_send_polled_RQS(un);
22711 			}
22712 
22713 			continue;
22714 		}
22715 
22716 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
22717 			int reset_retval = 0;
22718 
22719 			SD_INFO(SD_LOG_DUMP, un,
22720 			    "sddump: write failed with BUSY, try # %d\n", i);
22721 
22722 			if (un->un_f_lun_reset_enabled == TRUE) {
22723 				reset_retval = scsi_reset(SD_ADDRESS(un),
22724 				    RESET_LUN);
22725 			}
22726 			if (reset_retval == 0) {
22727 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
22728 			}
22729 			(void) sd_send_polled_RQS(un);
22730 
22731 		} else {
22732 			SD_INFO(SD_LOG_DUMP, un,
22733 			    "sddump: write failed with 0x%x, try # %d\n",
22734 			    SD_GET_PKT_STATUS(wr_pktp), i);
22735 			mutex_enter(SD_MUTEX(un));
22736 			sd_reset_target(un, wr_pktp);
22737 			mutex_exit(SD_MUTEX(un));
22738 		}
22739 
22740 		/*
22741 		 * If we are not getting anywhere with lun/target resets,
22742 		 * let's reset the bus.
22743 		 */
22744 		if (i == SD_NDUMP_RETRIES/2) {
22745 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
22746 			(void) sd_send_polled_RQS(un);
22747 		}
22748 
22749 	}
22750 #if defined(__i386) || defined(__amd64)
22751 	}	/* dma_resid */
22752 #endif
22753 
22754 	scsi_destroy_pkt(wr_pktp);
22755 	mutex_enter(SD_MUTEX(un));
22756 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
22757 		mutex_exit(SD_MUTEX(un));
22758 		scsi_free_consistent_buf(wr_bp);
22759 	} else {
22760 		mutex_exit(SD_MUTEX(un));
22761 	}
22762 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
22763 	return (err);
22764 }
22765 
22766 /*
22767  *    Function: sd_scsi_poll()
22768  *
22769  * Description: This is a wrapper for the scsi_poll call.
22770  *
22771  *   Arguments: sd_lun - The unit structure
22772  *              scsi_pkt - The scsi packet being sent to the device.
22773  *
22774  * Return Code: 0 - Command completed successfully with good status
22775  *             -1 - Command failed.  This could indicate a check condition
22776  *                  or other status value requiring recovery action.
22777  *
22778  */
22779 
22780 static int
22781 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
22782 {
22783 	int status;
22784 
22785 	ASSERT(un != NULL);
22786 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22787 	ASSERT(pktp != NULL);
22788 
22789 	status = SD_SUCCESS;
22790 
22791 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
22792 		pktp->pkt_flags |= un->un_tagflags;
22793 		pktp->pkt_flags &= ~FLAG_NODISCON;
22794 	}
22795 
22796 	status = sd_ddi_scsi_poll(pktp);
22797 	/*
22798 	 * Scsi_poll returns 0 (success) if the command completes and the
22799 	 * status block is STATUS_GOOD.  We should only check errors if this
22800 	 * condition is not true.  Even then we should send our own request
22801 	 * sense packet only if we have a check condition and auto
22802 	 * request sense has not been performed by the hba.
22803 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
22804 	 */
22805 	if ((status != SD_SUCCESS) &&
22806 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
22807 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
22808 	    (pktp->pkt_reason != CMD_DEV_GONE))
22809 		(void) sd_send_polled_RQS(un);
22810 
22811 	return (status);
22812 }
22813 
22814 /*
22815  *    Function: sd_send_polled_RQS()
22816  *
22817  * Description: This sends the request sense command to a device.
22818  *
22819  *   Arguments: sd_lun - The unit structure
22820  *
22821  * Return Code: 0 - Command completed successfully with good status
22822  *             -1 - Command failed.
22823  *
22824  */
22825 
22826 static int
22827 sd_send_polled_RQS(struct sd_lun *un)
22828 {
22829 	int	ret_val;
22830 	struct	scsi_pkt	*rqs_pktp;
22831 	struct	buf		*rqs_bp;
22832 
22833 	ASSERT(un != NULL);
22834 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22835 
22836 	ret_val = SD_SUCCESS;
22837 
22838 	rqs_pktp = un->un_rqs_pktp;
22839 	rqs_bp	 = un->un_rqs_bp;
22840 
22841 	mutex_enter(SD_MUTEX(un));
22842 
22843 	if (un->un_sense_isbusy) {
22844 		ret_val = SD_FAILURE;
22845 		mutex_exit(SD_MUTEX(un));
22846 		return (ret_val);
22847 	}
22848 
22849 	/*
22850 	 * If the request sense buffer (and packet) is not in use,
22851 	 * let's set the un_sense_isbusy and send our packet
22852 	 */
22853 	un->un_sense_isbusy 	= 1;
22854 	rqs_pktp->pkt_resid  	= 0;
22855 	rqs_pktp->pkt_reason 	= 0;
22856 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
22857 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
22858 
22859 	mutex_exit(SD_MUTEX(un));
22860 
22861 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
22862 	    " 0x%p\n", rqs_bp->b_un.b_addr);
22863 
22864 	/*
22865 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
22866 	 * axle - it has a call into us!
22867 	 */
22868 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
22869 		SD_INFO(SD_LOG_COMMON, un,
22870 		    "sd_send_polled_RQS: RQS failed\n");
22871 	}
22872 
22873 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
22874 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
22875 
22876 	mutex_enter(SD_MUTEX(un));
22877 	un->un_sense_isbusy = 0;
22878 	mutex_exit(SD_MUTEX(un));
22879 
22880 	return (ret_val);
22881 }
22882 
22883 /*
22884  * Defines needed for localized version of the scsi_poll routine.
22885  */
22886 #define	SD_CSEC		10000			/* usecs */
22887 #define	SD_SEC_TO_CSEC	(1000000/SD_CSEC)
22888 
22889 
22890 /*
22891  *    Function: sd_ddi_scsi_poll()
22892  *
22893  * Description: Localized version of the scsi_poll routine.  The purpose is to
22894  *		send a scsi_pkt to a device as a polled command.  This version
22895  *		is to ensure more robust handling of transport errors.
22896  *		Specifically this routine cures not ready, coming ready
22897  *		transition for power up and reset of sonoma's.  This can take
22898  *		up to 45 seconds for power-on and 20 seconds for reset of a
22899  * 		sonoma lun.
22900  *
22901  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
22902  *
22903  * Return Code: 0 - Command completed successfully with good status
22904  *             -1 - Command failed.
22905  *
22906  */
22907 
22908 static int
22909 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
22910 {
22911 	int busy_count;
22912 	int timeout;
22913 	int rval = SD_FAILURE;
22914 	int savef;
22915 	uint8_t *sensep;
22916 	long savet;
22917 	void (*savec)();
22918 	/*
22919 	 * The following is defined in machdep.c and is used in determining if
22920 	 * the scsi transport system will do polled I/O instead of interrupt
22921 	 * I/O when called from xx_dump().
22922 	 */
22923 	extern int do_polled_io;
22924 
22925 	/*
22926 	 * save old flags in pkt, to restore at end
22927 	 */
22928 	savef = pkt->pkt_flags;
22929 	savec = pkt->pkt_comp;
22930 	savet = pkt->pkt_time;
22931 
22932 	pkt->pkt_flags |= FLAG_NOINTR;
22933 
22934 	/*
22935 	 * XXX there is nothing in the SCSA spec that states that we should not
22936 	 * do a callback for polled cmds; however, removing this will break sd
22937 	 * and probably other target drivers
22938 	 */
22939 	pkt->pkt_comp = NULL;
22940 
22941 	/*
22942 	 * we don't like a polled command without timeout.
22943 	 * 60 seconds seems long enough.
22944 	 */
22945 	if (pkt->pkt_time == 0) {
22946 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
22947 	}
22948 
22949 	/*
22950 	 * Send polled cmd.
22951 	 *
22952 	 * We do some error recovery for various errors.  Tran_busy,
22953 	 * queue full, and non-dispatched commands are retried every 10 msec.
22954 	 * as they are typically transient failures.  Busy status and Not
22955 	 * Ready are retried every second as this status takes a while to
22956 	 * change.  Unit attention is retried for pkt_time (60) times
22957 	 * with no delay.
22958 	 */
22959 	timeout = pkt->pkt_time * SD_SEC_TO_CSEC;
22960 
22961 	for (busy_count = 0; busy_count < timeout; busy_count++) {
22962 		int rc;
22963 		int poll_delay;
22964 
22965 		/*
22966 		 * Initialize pkt status variables.
22967 		 */
22968 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
22969 
22970 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
22971 			if (rc != TRAN_BUSY) {
22972 				/* Transport failed - give up. */
22973 				break;
22974 			} else {
22975 				/* Transport busy - try again. */
22976 				poll_delay = 1 * SD_CSEC; /* 10 msec */
22977 			}
22978 		} else {
22979 			/*
22980 			 * Transport accepted - check pkt status.
22981 			 */
22982 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
22983 			if (pkt->pkt_reason == CMD_CMPLT &&
22984 			    rc == STATUS_CHECK &&
22985 			    pkt->pkt_state & STATE_ARQ_DONE) {
22986 				struct scsi_arq_status *arqstat =
22987 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
22988 
22989 				sensep = (uint8_t *)&arqstat->sts_sensedata;
22990 			} else {
22991 				sensep = NULL;
22992 			}
22993 
22994 			if ((pkt->pkt_reason == CMD_CMPLT) &&
22995 			    (rc == STATUS_GOOD)) {
22996 				/* No error - we're done */
22997 				rval = SD_SUCCESS;
22998 				break;
22999 
23000 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
23001 				/* Lost connection - give up */
23002 				break;
23003 
23004 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
23005 			    (pkt->pkt_state == 0)) {
23006 				/* Pkt not dispatched - try again. */
23007 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
23008 
23009 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
23010 			    (rc == STATUS_QFULL)) {
23011 				/* Queue full - try again. */
23012 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
23013 
23014 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
23015 			    (rc == STATUS_BUSY)) {
23016 				/* Busy - try again. */
23017 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
23018 				busy_count += (SD_SEC_TO_CSEC - 1);
23019 
23020 			} else if ((sensep != NULL) &&
23021 			    (scsi_sense_key(sensep) ==
23022 				KEY_UNIT_ATTENTION)) {
23023 				/* Unit Attention - try again */
23024 				busy_count += (SD_SEC_TO_CSEC - 1); /* 1 */
23025 				continue;
23026 
23027 			} else if ((sensep != NULL) &&
23028 			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
23029 			    (scsi_sense_asc(sensep) == 0x04) &&
23030 			    (scsi_sense_ascq(sensep) == 0x01)) {
23031 				/* Not ready -> ready - try again. */
23032 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
23033 				busy_count += (SD_SEC_TO_CSEC - 1);
23034 
23035 			} else {
23036 				/* BAD status - give up. */
23037 				break;
23038 			}
23039 		}
23040 
23041 		if ((curthread->t_flag & T_INTR_THREAD) == 0 &&
23042 		    !do_polled_io) {
23043 			delay(drv_usectohz(poll_delay));
23044 		} else {
23045 			/* we busy wait during cpr_dump or interrupt threads */
23046 			drv_usecwait(poll_delay);
23047 		}
23048 	}
23049 
23050 	pkt->pkt_flags = savef;
23051 	pkt->pkt_comp = savec;
23052 	pkt->pkt_time = savet;
23053 	return (rval);
23054 }
23055 
23056 
23057 /*
23058  *    Function: sd_persistent_reservation_in_read_keys
23059  *
23060  * Description: This routine is the driver entry point for handling CD-ROM
23061  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
23062  *		by sending the SCSI-3 PRIN commands to the device.
23063  *		Processes the read keys command response by copying the
23064  *		reservation key information into the user provided buffer.
23065  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
23066  *
23067  *   Arguments: un   -  Pointer to soft state struct for the target.
23068  *		usrp -	user provided pointer to multihost Persistent In Read
23069  *			Keys structure (mhioc_inkeys_t)
23070  *		flag -	this argument is a pass through to ddi_copyxxx()
23071  *			directly from the mode argument of ioctl().
23072  *
23073  * Return Code: 0   - Success
23074  *		EACCES
23075  *		ENOTSUP
23076  *		errno return code from sd_send_scsi_cmd()
23077  *
23078  *     Context: Can sleep. Does not return until command is completed.
23079  */
23080 
23081 static int
23082 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
23083     mhioc_inkeys_t *usrp, int flag)
23084 {
23085 #ifdef _MULTI_DATAMODEL
23086 	struct mhioc_key_list32	li32;
23087 #endif
23088 	sd_prin_readkeys_t	*in;
23089 	mhioc_inkeys_t		*ptr;
23090 	mhioc_key_list_t	li;
23091 	uchar_t			*data_bufp;
23092 	int 			data_len;
23093 	int			rval;
23094 	size_t			copysz;
23095 
23096 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
23097 		return (EINVAL);
23098 	}
23099 	bzero(&li, sizeof (mhioc_key_list_t));
23100 
23101 	/*
23102 	 * Get the listsize from user
23103 	 */
23104 #ifdef _MULTI_DATAMODEL
23105 
23106 	switch (ddi_model_convert_from(flag & FMODELS)) {
23107 	case DDI_MODEL_ILP32:
23108 		copysz = sizeof (struct mhioc_key_list32);
23109 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
23110 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23111 			    "sd_persistent_reservation_in_read_keys: "
23112 			    "failed ddi_copyin: mhioc_key_list32_t\n");
23113 			rval = EFAULT;
23114 			goto done;
23115 		}
23116 		li.listsize = li32.listsize;
23117 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
23118 		break;
23119 
23120 	case DDI_MODEL_NONE:
23121 		copysz = sizeof (mhioc_key_list_t);
23122 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
23123 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23124 			    "sd_persistent_reservation_in_read_keys: "
23125 			    "failed ddi_copyin: mhioc_key_list_t\n");
23126 			rval = EFAULT;
23127 			goto done;
23128 		}
23129 		break;
23130 	}
23131 
23132 #else /* ! _MULTI_DATAMODEL */
23133 	copysz = sizeof (mhioc_key_list_t);
23134 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
23135 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23136 		    "sd_persistent_reservation_in_read_keys: "
23137 		    "failed ddi_copyin: mhioc_key_list_t\n");
23138 		rval = EFAULT;
23139 		goto done;
23140 	}
23141 #endif
23142 
23143 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
23144 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
23145 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
23146 
23147 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS,
23148 	    data_len, data_bufp)) != 0) {
23149 		goto done;
23150 	}
23151 	in = (sd_prin_readkeys_t *)data_bufp;
23152 	ptr->generation = BE_32(in->generation);
23153 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
23154 
23155 	/*
23156 	 * Return the min(listsize, listlen) keys
23157 	 */
23158 #ifdef _MULTI_DATAMODEL
23159 
23160 	switch (ddi_model_convert_from(flag & FMODELS)) {
23161 	case DDI_MODEL_ILP32:
23162 		li32.listlen = li.listlen;
23163 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
23164 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23165 			    "sd_persistent_reservation_in_read_keys: "
23166 			    "failed ddi_copyout: mhioc_key_list32_t\n");
23167 			rval = EFAULT;
23168 			goto done;
23169 		}
23170 		break;
23171 
23172 	case DDI_MODEL_NONE:
23173 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
23174 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23175 			    "sd_persistent_reservation_in_read_keys: "
23176 			    "failed ddi_copyout: mhioc_key_list_t\n");
23177 			rval = EFAULT;
23178 			goto done;
23179 		}
23180 		break;
23181 	}
23182 
23183 #else /* ! _MULTI_DATAMODEL */
23184 
23185 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
23186 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23187 		    "sd_persistent_reservation_in_read_keys: "
23188 		    "failed ddi_copyout: mhioc_key_list_t\n");
23189 		rval = EFAULT;
23190 		goto done;
23191 	}
23192 
23193 #endif /* _MULTI_DATAMODEL */
23194 
23195 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
23196 	    li.listsize * MHIOC_RESV_KEY_SIZE);
23197 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
23198 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23199 		    "sd_persistent_reservation_in_read_keys: "
23200 		    "failed ddi_copyout: keylist\n");
23201 		rval = EFAULT;
23202 	}
23203 done:
23204 	kmem_free(data_bufp, data_len);
23205 	return (rval);
23206 }
23207 
23208 
23209 /*
23210  *    Function: sd_persistent_reservation_in_read_resv
23211  *
23212  * Description: This routine is the driver entry point for handling CD-ROM
23213  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
23214  *		by sending the SCSI-3 PRIN commands to the device.
23215  *		Process the read persistent reservations command response by
23216  *		copying the reservation information into the user provided
23217  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
23218  *
23219  *   Arguments: un   -  Pointer to soft state struct for the target.
23220  *		usrp -	user provided pointer to multihost Persistent In Read
23221  *			Keys structure (mhioc_inkeys_t)
23222  *		flag -	this argument is a pass through to ddi_copyxxx()
23223  *			directly from the mode argument of ioctl().
23224  *
23225  * Return Code: 0   - Success
23226  *		EACCES
23227  *		ENOTSUP
23228  *		errno return code from sd_send_scsi_cmd()
23229  *
23230  *     Context: Can sleep. Does not return until command is completed.
23231  */
23232 
23233 static int
23234 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
23235     mhioc_inresvs_t *usrp, int flag)
23236 {
23237 #ifdef _MULTI_DATAMODEL
23238 	struct mhioc_resv_desc_list32 resvlist32;
23239 #endif
23240 	sd_prin_readresv_t	*in;
23241 	mhioc_inresvs_t		*ptr;
23242 	sd_readresv_desc_t	*readresv_ptr;
23243 	mhioc_resv_desc_list_t	resvlist;
23244 	mhioc_resv_desc_t 	resvdesc;
23245 	uchar_t			*data_bufp;
23246 	int 			data_len;
23247 	int			rval;
23248 	int			i;
23249 	size_t			copysz;
23250 	mhioc_resv_desc_t	*bufp;
23251 
23252 	if ((ptr = usrp) == NULL) {
23253 		return (EINVAL);
23254 	}
23255 
23256 	/*
23257 	 * Get the listsize from user
23258 	 */
23259 #ifdef _MULTI_DATAMODEL
23260 	switch (ddi_model_convert_from(flag & FMODELS)) {
23261 	case DDI_MODEL_ILP32:
23262 		copysz = sizeof (struct mhioc_resv_desc_list32);
23263 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
23264 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23265 			    "sd_persistent_reservation_in_read_resv: "
23266 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
23267 			rval = EFAULT;
23268 			goto done;
23269 		}
23270 		resvlist.listsize = resvlist32.listsize;
23271 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
23272 		break;
23273 
23274 	case DDI_MODEL_NONE:
23275 		copysz = sizeof (mhioc_resv_desc_list_t);
23276 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
23277 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23278 			    "sd_persistent_reservation_in_read_resv: "
23279 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
23280 			rval = EFAULT;
23281 			goto done;
23282 		}
23283 		break;
23284 	}
23285 #else /* ! _MULTI_DATAMODEL */
23286 	copysz = sizeof (mhioc_resv_desc_list_t);
23287 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
23288 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23289 		    "sd_persistent_reservation_in_read_resv: "
23290 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
23291 		rval = EFAULT;
23292 		goto done;
23293 	}
23294 #endif /* ! _MULTI_DATAMODEL */
23295 
23296 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
23297 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
23298 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
23299 
23300 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_RESV,
23301 	    data_len, data_bufp)) != 0) {
23302 		goto done;
23303 	}
23304 	in = (sd_prin_readresv_t *)data_bufp;
23305 	ptr->generation = BE_32(in->generation);
23306 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
23307 
23308 	/*
23309 	 * Return the min(listsize, listlen( keys
23310 	 */
23311 #ifdef _MULTI_DATAMODEL
23312 
23313 	switch (ddi_model_convert_from(flag & FMODELS)) {
23314 	case DDI_MODEL_ILP32:
23315 		resvlist32.listlen = resvlist.listlen;
23316 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
23317 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23318 			    "sd_persistent_reservation_in_read_resv: "
23319 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
23320 			rval = EFAULT;
23321 			goto done;
23322 		}
23323 		break;
23324 
23325 	case DDI_MODEL_NONE:
23326 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
23327 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23328 			    "sd_persistent_reservation_in_read_resv: "
23329 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
23330 			rval = EFAULT;
23331 			goto done;
23332 		}
23333 		break;
23334 	}
23335 
23336 #else /* ! _MULTI_DATAMODEL */
23337 
23338 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
23339 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23340 		    "sd_persistent_reservation_in_read_resv: "
23341 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
23342 		rval = EFAULT;
23343 		goto done;
23344 	}
23345 
23346 #endif /* ! _MULTI_DATAMODEL */
23347 
23348 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
23349 	bufp = resvlist.list;
23350 	copysz = sizeof (mhioc_resv_desc_t);
23351 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
23352 	    i++, readresv_ptr++, bufp++) {
23353 
23354 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
23355 		    MHIOC_RESV_KEY_SIZE);
23356 		resvdesc.type  = readresv_ptr->type;
23357 		resvdesc.scope = readresv_ptr->scope;
23358 		resvdesc.scope_specific_addr =
23359 		    BE_32(readresv_ptr->scope_specific_addr);
23360 
23361 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
23362 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23363 			    "sd_persistent_reservation_in_read_resv: "
23364 			    "failed ddi_copyout: resvlist\n");
23365 			rval = EFAULT;
23366 			goto done;
23367 		}
23368 	}
23369 done:
23370 	kmem_free(data_bufp, data_len);
23371 	return (rval);
23372 }
23373 
23374 
23375 /*
23376  *    Function: sr_change_blkmode()
23377  *
23378  * Description: This routine is the driver entry point for handling CD-ROM
23379  *		block mode ioctl requests. Support for returning and changing
23380  *		the current block size in use by the device is implemented. The
23381  *		LBA size is changed via a MODE SELECT Block Descriptor.
23382  *
23383  *		This routine issues a mode sense with an allocation length of
23384  *		12 bytes for the mode page header and a single block descriptor.
23385  *
23386  *   Arguments: dev - the device 'dev_t'
23387  *		cmd - the request type; one of CDROMGBLKMODE (get) or
23388  *		      CDROMSBLKMODE (set)
23389  *		data - current block size or requested block size
23390  *		flag - this argument is a pass through to ddi_copyxxx() directly
23391  *		       from the mode argument of ioctl().
23392  *
23393  * Return Code: the code returned by sd_send_scsi_cmd()
23394  *		EINVAL if invalid arguments are provided
23395  *		EFAULT if ddi_copyxxx() fails
23396  *		ENXIO if fail ddi_get_soft_state
23397  *		EIO if invalid mode sense block descriptor length
23398  *
23399  */
23400 
23401 static int
23402 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
23403 {
23404 	struct sd_lun			*un = NULL;
23405 	struct mode_header		*sense_mhp, *select_mhp;
23406 	struct block_descriptor		*sense_desc, *select_desc;
23407 	int				current_bsize;
23408 	int				rval = EINVAL;
23409 	uchar_t				*sense = NULL;
23410 	uchar_t				*select = NULL;
23411 
23412 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
23413 
23414 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23415 		return (ENXIO);
23416 	}
23417 
23418 	/*
23419 	 * The block length is changed via the Mode Select block descriptor, the
23420 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
23421 	 * required as part of this routine. Therefore the mode sense allocation
23422 	 * length is specified to be the length of a mode page header and a
23423 	 * block descriptor.
23424 	 */
23425 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
23426 
23427 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
23428 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD)) != 0) {
23429 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23430 		    "sr_change_blkmode: Mode Sense Failed\n");
23431 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23432 		return (rval);
23433 	}
23434 
23435 	/* Check the block descriptor len to handle only 1 block descriptor */
23436 	sense_mhp = (struct mode_header *)sense;
23437 	if ((sense_mhp->bdesc_length == 0) ||
23438 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
23439 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23440 		    "sr_change_blkmode: Mode Sense returned invalid block"
23441 		    " descriptor length\n");
23442 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23443 		return (EIO);
23444 	}
23445 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
23446 	current_bsize = ((sense_desc->blksize_hi << 16) |
23447 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
23448 
23449 	/* Process command */
23450 	switch (cmd) {
23451 	case CDROMGBLKMODE:
23452 		/* Return the block size obtained during the mode sense */
23453 		if (ddi_copyout(&current_bsize, (void *)data,
23454 		    sizeof (int), flag) != 0)
23455 			rval = EFAULT;
23456 		break;
23457 	case CDROMSBLKMODE:
23458 		/* Validate the requested block size */
23459 		switch (data) {
23460 		case CDROM_BLK_512:
23461 		case CDROM_BLK_1024:
23462 		case CDROM_BLK_2048:
23463 		case CDROM_BLK_2056:
23464 		case CDROM_BLK_2336:
23465 		case CDROM_BLK_2340:
23466 		case CDROM_BLK_2352:
23467 		case CDROM_BLK_2368:
23468 		case CDROM_BLK_2448:
23469 		case CDROM_BLK_2646:
23470 		case CDROM_BLK_2647:
23471 			break;
23472 		default:
23473 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23474 			    "sr_change_blkmode: "
23475 			    "Block Size '%ld' Not Supported\n", data);
23476 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23477 			return (EINVAL);
23478 		}
23479 
23480 		/*
23481 		 * The current block size matches the requested block size so
23482 		 * there is no need to send the mode select to change the size
23483 		 */
23484 		if (current_bsize == data) {
23485 			break;
23486 		}
23487 
23488 		/* Build the select data for the requested block size */
23489 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
23490 		select_mhp = (struct mode_header *)select;
23491 		select_desc =
23492 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
23493 		/*
23494 		 * The LBA size is changed via the block descriptor, so the
23495 		 * descriptor is built according to the user data
23496 		 */
23497 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
23498 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
23499 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
23500 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
23501 
23502 		/* Send the mode select for the requested block size */
23503 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
23504 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
23505 		    SD_PATH_STANDARD)) != 0) {
23506 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23507 			    "sr_change_blkmode: Mode Select Failed\n");
23508 			/*
23509 			 * The mode select failed for the requested block size,
23510 			 * so reset the data for the original block size and
23511 			 * send it to the target. The error is indicated by the
23512 			 * return value for the failed mode select.
23513 			 */
23514 			select_desc->blksize_hi  = sense_desc->blksize_hi;
23515 			select_desc->blksize_mid = sense_desc->blksize_mid;
23516 			select_desc->blksize_lo  = sense_desc->blksize_lo;
23517 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
23518 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
23519 			    SD_PATH_STANDARD);
23520 		} else {
23521 			ASSERT(!mutex_owned(SD_MUTEX(un)));
23522 			mutex_enter(SD_MUTEX(un));
23523 			sd_update_block_info(un, (uint32_t)data, 0);
23524 			mutex_exit(SD_MUTEX(un));
23525 		}
23526 		break;
23527 	default:
23528 		/* should not reach here, but check anyway */
23529 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23530 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
23531 		rval = EINVAL;
23532 		break;
23533 	}
23534 
23535 	if (select) {
23536 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
23537 	}
23538 	if (sense) {
23539 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23540 	}
23541 	return (rval);
23542 }
23543 
23544 
23545 /*
23546  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
23547  * implement driver support for getting and setting the CD speed. The command
23548  * set used will be based on the device type. If the device has not been
23549  * identified as MMC the Toshiba vendor specific mode page will be used. If
23550  * the device is MMC but does not support the Real Time Streaming feature
23551  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
23552  * be used to read the speed.
23553  */
23554 
23555 /*
23556  *    Function: sr_change_speed()
23557  *
23558  * Description: This routine is the driver entry point for handling CD-ROM
23559  *		drive speed ioctl requests for devices supporting the Toshiba
23560  *		vendor specific drive speed mode page. Support for returning
23561  *		and changing the current drive speed in use by the device is
23562  *		implemented.
23563  *
23564  *   Arguments: dev - the device 'dev_t'
23565  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
23566  *		      CDROMSDRVSPEED (set)
23567  *		data - current drive speed or requested drive speed
23568  *		flag - this argument is a pass through to ddi_copyxxx() directly
23569  *		       from the mode argument of ioctl().
23570  *
23571  * Return Code: the code returned by sd_send_scsi_cmd()
23572  *		EINVAL if invalid arguments are provided
23573  *		EFAULT if ddi_copyxxx() fails
23574  *		ENXIO if fail ddi_get_soft_state
23575  *		EIO if invalid mode sense block descriptor length
23576  */
23577 
23578 static int
23579 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
23580 {
23581 	struct sd_lun			*un = NULL;
23582 	struct mode_header		*sense_mhp, *select_mhp;
23583 	struct mode_speed		*sense_page, *select_page;
23584 	int				current_speed;
23585 	int				rval = EINVAL;
23586 	int				bd_len;
23587 	uchar_t				*sense = NULL;
23588 	uchar_t				*select = NULL;
23589 
23590 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
23591 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23592 		return (ENXIO);
23593 	}
23594 
23595 	/*
23596 	 * Note: The drive speed is being modified here according to a Toshiba
23597 	 * vendor specific mode page (0x31).
23598 	 */
23599 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
23600 
23601 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
23602 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
23603 	    SD_PATH_STANDARD)) != 0) {
23604 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23605 		    "sr_change_speed: Mode Sense Failed\n");
23606 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23607 		return (rval);
23608 	}
23609 	sense_mhp  = (struct mode_header *)sense;
23610 
23611 	/* Check the block descriptor len to handle only 1 block descriptor */
23612 	bd_len = sense_mhp->bdesc_length;
23613 	if (bd_len > MODE_BLK_DESC_LENGTH) {
23614 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23615 		    "sr_change_speed: Mode Sense returned invalid block "
23616 		    "descriptor length\n");
23617 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23618 		return (EIO);
23619 	}
23620 
23621 	sense_page = (struct mode_speed *)
23622 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
23623 	current_speed = sense_page->speed;
23624 
23625 	/* Process command */
23626 	switch (cmd) {
23627 	case CDROMGDRVSPEED:
23628 		/* Return the drive speed obtained during the mode sense */
23629 		if (current_speed == 0x2) {
23630 			current_speed = CDROM_TWELVE_SPEED;
23631 		}
23632 		if (ddi_copyout(&current_speed, (void *)data,
23633 		    sizeof (int), flag) != 0) {
23634 			rval = EFAULT;
23635 		}
23636 		break;
23637 	case CDROMSDRVSPEED:
23638 		/* Validate the requested drive speed */
23639 		switch ((uchar_t)data) {
23640 		case CDROM_TWELVE_SPEED:
23641 			data = 0x2;
23642 			/*FALLTHROUGH*/
23643 		case CDROM_NORMAL_SPEED:
23644 		case CDROM_DOUBLE_SPEED:
23645 		case CDROM_QUAD_SPEED:
23646 		case CDROM_MAXIMUM_SPEED:
23647 			break;
23648 		default:
23649 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23650 			    "sr_change_speed: "
23651 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
23652 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23653 			return (EINVAL);
23654 		}
23655 
23656 		/*
23657 		 * The current drive speed matches the requested drive speed so
23658 		 * there is no need to send the mode select to change the speed
23659 		 */
23660 		if (current_speed == data) {
23661 			break;
23662 		}
23663 
23664 		/* Build the select data for the requested drive speed */
23665 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
23666 		select_mhp = (struct mode_header *)select;
23667 		select_mhp->bdesc_length = 0;
23668 		select_page =
23669 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
23670 		select_page =
23671 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
23672 		select_page->mode_page.code = CDROM_MODE_SPEED;
23673 		select_page->mode_page.length = 2;
23674 		select_page->speed = (uchar_t)data;
23675 
23676 		/* Send the mode select for the requested block size */
23677 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
23678 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
23679 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
23680 			/*
23681 			 * The mode select failed for the requested drive speed,
23682 			 * so reset the data for the original drive speed and
23683 			 * send it to the target. The error is indicated by the
23684 			 * return value for the failed mode select.
23685 			 */
23686 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23687 			    "sr_drive_speed: Mode Select Failed\n");
23688 			select_page->speed = sense_page->speed;
23689 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
23690 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
23691 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
23692 		}
23693 		break;
23694 	default:
23695 		/* should not reach here, but check anyway */
23696 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23697 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
23698 		rval = EINVAL;
23699 		break;
23700 	}
23701 
23702 	if (select) {
23703 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
23704 	}
23705 	if (sense) {
23706 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23707 	}
23708 
23709 	return (rval);
23710 }
23711 
23712 
23713 /*
23714  *    Function: sr_atapi_change_speed()
23715  *
23716  * Description: This routine is the driver entry point for handling CD-ROM
23717  *		drive speed ioctl requests for MMC devices that do not support
23718  *		the Real Time Streaming feature (0x107).
23719  *
23720  *		Note: This routine will use the SET SPEED command which may not
23721  *		be supported by all devices.
23722  *
23723  *   Arguments: dev- the device 'dev_t'
23724  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
23725  *		     CDROMSDRVSPEED (set)
23726  *		data- current drive speed or requested drive speed
23727  *		flag- this argument is a pass through to ddi_copyxxx() directly
23728  *		      from the mode argument of ioctl().
23729  *
23730  * Return Code: the code returned by sd_send_scsi_cmd()
23731  *		EINVAL if invalid arguments are provided
23732  *		EFAULT if ddi_copyxxx() fails
23733  *		ENXIO if fail ddi_get_soft_state
23734  *		EIO if invalid mode sense block descriptor length
23735  */
23736 
23737 static int
23738 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
23739 {
23740 	struct sd_lun			*un;
23741 	struct uscsi_cmd		*com = NULL;
23742 	struct mode_header_grp2		*sense_mhp;
23743 	uchar_t				*sense_page;
23744 	uchar_t				*sense = NULL;
23745 	char				cdb[CDB_GROUP5];
23746 	int				bd_len;
23747 	int				current_speed = 0;
23748 	int				max_speed = 0;
23749 	int				rval;
23750 
23751 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
23752 
23753 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23754 		return (ENXIO);
23755 	}
23756 
23757 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
23758 
23759 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
23760 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
23761 	    SD_PATH_STANDARD)) != 0) {
23762 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23763 		    "sr_atapi_change_speed: Mode Sense Failed\n");
23764 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23765 		return (rval);
23766 	}
23767 
23768 	/* Check the block descriptor len to handle only 1 block descriptor */
23769 	sense_mhp = (struct mode_header_grp2 *)sense;
23770 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
23771 	if (bd_len > MODE_BLK_DESC_LENGTH) {
23772 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23773 		    "sr_atapi_change_speed: Mode Sense returned invalid "
23774 		    "block descriptor length\n");
23775 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23776 		return (EIO);
23777 	}
23778 
23779 	/* Calculate the current and maximum drive speeds */
23780 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
23781 	current_speed = (sense_page[14] << 8) | sense_page[15];
23782 	max_speed = (sense_page[8] << 8) | sense_page[9];
23783 
23784 	/* Process the command */
23785 	switch (cmd) {
23786 	case CDROMGDRVSPEED:
23787 		current_speed /= SD_SPEED_1X;
23788 		if (ddi_copyout(&current_speed, (void *)data,
23789 		    sizeof (int), flag) != 0)
23790 			rval = EFAULT;
23791 		break;
23792 	case CDROMSDRVSPEED:
23793 		/* Convert the speed code to KB/sec */
23794 		switch ((uchar_t)data) {
23795 		case CDROM_NORMAL_SPEED:
23796 			current_speed = SD_SPEED_1X;
23797 			break;
23798 		case CDROM_DOUBLE_SPEED:
23799 			current_speed = 2 * SD_SPEED_1X;
23800 			break;
23801 		case CDROM_QUAD_SPEED:
23802 			current_speed = 4 * SD_SPEED_1X;
23803 			break;
23804 		case CDROM_TWELVE_SPEED:
23805 			current_speed = 12 * SD_SPEED_1X;
23806 			break;
23807 		case CDROM_MAXIMUM_SPEED:
23808 			current_speed = 0xffff;
23809 			break;
23810 		default:
23811 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23812 			    "sr_atapi_change_speed: invalid drive speed %d\n",
23813 			    (uchar_t)data);
23814 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23815 			return (EINVAL);
23816 		}
23817 
23818 		/* Check the request against the drive's max speed. */
23819 		if (current_speed != 0xffff) {
23820 			if (current_speed > max_speed) {
23821 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23822 				return (EINVAL);
23823 			}
23824 		}
23825 
23826 		/*
23827 		 * Build and send the SET SPEED command
23828 		 *
23829 		 * Note: The SET SPEED (0xBB) command used in this routine is
23830 		 * obsolete per the SCSI MMC spec but still supported in the
23831 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
23832 		 * therefore the command is still implemented in this routine.
23833 		 */
23834 		bzero(cdb, sizeof (cdb));
23835 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
23836 		cdb[2] = (uchar_t)(current_speed >> 8);
23837 		cdb[3] = (uchar_t)current_speed;
23838 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
23839 		com->uscsi_cdb	   = (caddr_t)cdb;
23840 		com->uscsi_cdblen  = CDB_GROUP5;
23841 		com->uscsi_bufaddr = NULL;
23842 		com->uscsi_buflen  = 0;
23843 		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
23844 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, 0, SD_PATH_STANDARD);
23845 		break;
23846 	default:
23847 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23848 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
23849 		rval = EINVAL;
23850 	}
23851 
23852 	if (sense) {
23853 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23854 	}
23855 	if (com) {
23856 		kmem_free(com, sizeof (*com));
23857 	}
23858 	return (rval);
23859 }
23860 
23861 
23862 /*
23863  *    Function: sr_pause_resume()
23864  *
23865  * Description: This routine is the driver entry point for handling CD-ROM
23866  *		pause/resume ioctl requests. This only affects the audio play
23867  *		operation.
23868  *
23869  *   Arguments: dev - the device 'dev_t'
23870  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
23871  *		      for setting the resume bit of the cdb.
23872  *
23873  * Return Code: the code returned by sd_send_scsi_cmd()
23874  *		EINVAL if invalid mode specified
23875  *
23876  */
23877 
23878 static int
23879 sr_pause_resume(dev_t dev, int cmd)
23880 {
23881 	struct sd_lun		*un;
23882 	struct uscsi_cmd	*com;
23883 	char			cdb[CDB_GROUP1];
23884 	int			rval;
23885 
23886 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23887 		return (ENXIO);
23888 	}
23889 
23890 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
23891 	bzero(cdb, CDB_GROUP1);
23892 	cdb[0] = SCMD_PAUSE_RESUME;
23893 	switch (cmd) {
23894 	case CDROMRESUME:
23895 		cdb[8] = 1;
23896 		break;
23897 	case CDROMPAUSE:
23898 		cdb[8] = 0;
23899 		break;
23900 	default:
23901 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
23902 		    " Command '%x' Not Supported\n", cmd);
23903 		rval = EINVAL;
23904 		goto done;
23905 	}
23906 
23907 	com->uscsi_cdb    = cdb;
23908 	com->uscsi_cdblen = CDB_GROUP1;
23909 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
23910 
23911 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
23912 	    SD_PATH_STANDARD);
23913 
23914 done:
23915 	kmem_free(com, sizeof (*com));
23916 	return (rval);
23917 }
23918 
23919 
23920 /*
23921  *    Function: sr_play_msf()
23922  *
23923  * Description: This routine is the driver entry point for handling CD-ROM
23924  *		ioctl requests to output the audio signals at the specified
23925  *		starting address and continue the audio play until the specified
23926  *		ending address (CDROMPLAYMSF) The address is in Minute Second
23927  *		Frame (MSF) format.
23928  *
23929  *   Arguments: dev	- the device 'dev_t'
23930  *		data	- pointer to user provided audio msf structure,
23931  *		          specifying start/end addresses.
23932  *		flag	- this argument is a pass through to ddi_copyxxx()
23933  *		          directly from the mode argument of ioctl().
23934  *
23935  * Return Code: the code returned by sd_send_scsi_cmd()
23936  *		EFAULT if ddi_copyxxx() fails
23937  *		ENXIO if fail ddi_get_soft_state
23938  *		EINVAL if data pointer is NULL
23939  */
23940 
23941 static int
23942 sr_play_msf(dev_t dev, caddr_t data, int flag)
23943 {
23944 	struct sd_lun		*un;
23945 	struct uscsi_cmd	*com;
23946 	struct cdrom_msf	msf_struct;
23947 	struct cdrom_msf	*msf = &msf_struct;
23948 	char			cdb[CDB_GROUP1];
23949 	int			rval;
23950 
23951 	if (data == NULL) {
23952 		return (EINVAL);
23953 	}
23954 
23955 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23956 		return (ENXIO);
23957 	}
23958 
23959 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
23960 		return (EFAULT);
23961 	}
23962 
23963 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
23964 	bzero(cdb, CDB_GROUP1);
23965 	cdb[0] = SCMD_PLAYAUDIO_MSF;
23966 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
23967 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
23968 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
23969 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
23970 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
23971 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
23972 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
23973 	} else {
23974 		cdb[3] = msf->cdmsf_min0;
23975 		cdb[4] = msf->cdmsf_sec0;
23976 		cdb[5] = msf->cdmsf_frame0;
23977 		cdb[6] = msf->cdmsf_min1;
23978 		cdb[7] = msf->cdmsf_sec1;
23979 		cdb[8] = msf->cdmsf_frame1;
23980 	}
23981 	com->uscsi_cdb    = cdb;
23982 	com->uscsi_cdblen = CDB_GROUP1;
23983 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
23984 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
23985 	    SD_PATH_STANDARD);
23986 	kmem_free(com, sizeof (*com));
23987 	return (rval);
23988 }
23989 
23990 
23991 /*
23992  *    Function: sr_play_trkind()
23993  *
23994  * Description: This routine is the driver entry point for handling CD-ROM
23995  *		ioctl requests to output the audio signals at the specified
23996  *		starting address and continue the audio play until the specified
23997  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
23998  *		format.
23999  *
24000  *   Arguments: dev	- the device 'dev_t'
24001  *		data	- pointer to user provided audio track/index structure,
24002  *		          specifying start/end addresses.
24003  *		flag	- this argument is a pass through to ddi_copyxxx()
24004  *		          directly from the mode argument of ioctl().
24005  *
24006  * Return Code: the code returned by sd_send_scsi_cmd()
24007  *		EFAULT if ddi_copyxxx() fails
24008  *		ENXIO if fail ddi_get_soft_state
24009  *		EINVAL if data pointer is NULL
24010  */
24011 
24012 static int
24013 sr_play_trkind(dev_t dev, caddr_t data, int flag)
24014 {
24015 	struct cdrom_ti		ti_struct;
24016 	struct cdrom_ti		*ti = &ti_struct;
24017 	struct uscsi_cmd	*com = NULL;
24018 	char			cdb[CDB_GROUP1];
24019 	int			rval;
24020 
24021 	if (data == NULL) {
24022 		return (EINVAL);
24023 	}
24024 
24025 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
24026 		return (EFAULT);
24027 	}
24028 
24029 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24030 	bzero(cdb, CDB_GROUP1);
24031 	cdb[0] = SCMD_PLAYAUDIO_TI;
24032 	cdb[4] = ti->cdti_trk0;
24033 	cdb[5] = ti->cdti_ind0;
24034 	cdb[7] = ti->cdti_trk1;
24035 	cdb[8] = ti->cdti_ind1;
24036 	com->uscsi_cdb    = cdb;
24037 	com->uscsi_cdblen = CDB_GROUP1;
24038 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
24039 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24040 	    SD_PATH_STANDARD);
24041 	kmem_free(com, sizeof (*com));
24042 	return (rval);
24043 }
24044 
24045 
24046 /*
24047  *    Function: sr_read_all_subcodes()
24048  *
24049  * Description: This routine is the driver entry point for handling CD-ROM
24050  *		ioctl requests to return raw subcode data while the target is
24051  *		playing audio (CDROMSUBCODE).
24052  *
24053  *   Arguments: dev	- the device 'dev_t'
24054  *		data	- pointer to user provided cdrom subcode structure,
24055  *		          specifying the transfer length and address.
24056  *		flag	- this argument is a pass through to ddi_copyxxx()
24057  *		          directly from the mode argument of ioctl().
24058  *
24059  * Return Code: the code returned by sd_send_scsi_cmd()
24060  *		EFAULT if ddi_copyxxx() fails
24061  *		ENXIO if fail ddi_get_soft_state
24062  *		EINVAL if data pointer is NULL
24063  */
24064 
24065 static int
24066 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
24067 {
24068 	struct sd_lun		*un = NULL;
24069 	struct uscsi_cmd	*com = NULL;
24070 	struct cdrom_subcode	*subcode = NULL;
24071 	int			rval;
24072 	size_t			buflen;
24073 	char			cdb[CDB_GROUP5];
24074 
24075 #ifdef _MULTI_DATAMODEL
24076 	/* To support ILP32 applications in an LP64 world */
24077 	struct cdrom_subcode32		cdrom_subcode32;
24078 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
24079 #endif
24080 	if (data == NULL) {
24081 		return (EINVAL);
24082 	}
24083 
24084 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24085 		return (ENXIO);
24086 	}
24087 
24088 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
24089 
24090 #ifdef _MULTI_DATAMODEL
24091 	switch (ddi_model_convert_from(flag & FMODELS)) {
24092 	case DDI_MODEL_ILP32:
24093 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
24094 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24095 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
24096 			kmem_free(subcode, sizeof (struct cdrom_subcode));
24097 			return (EFAULT);
24098 		}
24099 		/* Convert the ILP32 uscsi data from the application to LP64 */
24100 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
24101 		break;
24102 	case DDI_MODEL_NONE:
24103 		if (ddi_copyin(data, subcode,
24104 		    sizeof (struct cdrom_subcode), flag)) {
24105 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24106 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
24107 			kmem_free(subcode, sizeof (struct cdrom_subcode));
24108 			return (EFAULT);
24109 		}
24110 		break;
24111 	}
24112 #else /* ! _MULTI_DATAMODEL */
24113 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
24114 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24115 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
24116 		kmem_free(subcode, sizeof (struct cdrom_subcode));
24117 		return (EFAULT);
24118 	}
24119 #endif /* _MULTI_DATAMODEL */
24120 
24121 	/*
24122 	 * Since MMC-2 expects max 3 bytes for length, check if the
24123 	 * length input is greater than 3 bytes
24124 	 */
24125 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
24126 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24127 		    "sr_read_all_subcodes: "
24128 		    "cdrom transfer length too large: %d (limit %d)\n",
24129 		    subcode->cdsc_length, 0xFFFFFF);
24130 		kmem_free(subcode, sizeof (struct cdrom_subcode));
24131 		return (EINVAL);
24132 	}
24133 
24134 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
24135 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24136 	bzero(cdb, CDB_GROUP5);
24137 
24138 	if (un->un_f_mmc_cap == TRUE) {
24139 		cdb[0] = (char)SCMD_READ_CD;
24140 		cdb[2] = (char)0xff;
24141 		cdb[3] = (char)0xff;
24142 		cdb[4] = (char)0xff;
24143 		cdb[5] = (char)0xff;
24144 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
24145 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
24146 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
24147 		cdb[10] = 1;
24148 	} else {
24149 		/*
24150 		 * Note: A vendor specific command (0xDF) is being used her to
24151 		 * request a read of all subcodes.
24152 		 */
24153 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
24154 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
24155 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
24156 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
24157 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
24158 	}
24159 	com->uscsi_cdb	   = cdb;
24160 	com->uscsi_cdblen  = CDB_GROUP5;
24161 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
24162 	com->uscsi_buflen  = buflen;
24163 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24164 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
24165 	    SD_PATH_STANDARD);
24166 	kmem_free(subcode, sizeof (struct cdrom_subcode));
24167 	kmem_free(com, sizeof (*com));
24168 	return (rval);
24169 }
24170 
24171 
24172 /*
24173  *    Function: sr_read_subchannel()
24174  *
24175  * Description: This routine is the driver entry point for handling CD-ROM
24176  *		ioctl requests to return the Q sub-channel data of the CD
24177  *		current position block. (CDROMSUBCHNL) The data includes the
24178  *		track number, index number, absolute CD-ROM address (LBA or MSF
24179  *		format per the user) , track relative CD-ROM address (LBA or MSF
24180  *		format per the user), control data and audio status.
24181  *
24182  *   Arguments: dev	- the device 'dev_t'
24183  *		data	- pointer to user provided cdrom sub-channel structure
24184  *		flag	- this argument is a pass through to ddi_copyxxx()
24185  *		          directly from the mode argument of ioctl().
24186  *
24187  * Return Code: the code returned by sd_send_scsi_cmd()
24188  *		EFAULT if ddi_copyxxx() fails
24189  *		ENXIO if fail ddi_get_soft_state
24190  *		EINVAL if data pointer is NULL
24191  */
24192 
24193 static int
24194 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
24195 {
24196 	struct sd_lun		*un;
24197 	struct uscsi_cmd	*com;
24198 	struct cdrom_subchnl	subchanel;
24199 	struct cdrom_subchnl	*subchnl = &subchanel;
24200 	char			cdb[CDB_GROUP1];
24201 	caddr_t			buffer;
24202 	int			rval;
24203 
24204 	if (data == NULL) {
24205 		return (EINVAL);
24206 	}
24207 
24208 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24209 	    (un->un_state == SD_STATE_OFFLINE)) {
24210 		return (ENXIO);
24211 	}
24212 
24213 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
24214 		return (EFAULT);
24215 	}
24216 
24217 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
24218 	bzero(cdb, CDB_GROUP1);
24219 	cdb[0] = SCMD_READ_SUBCHANNEL;
24220 	/* Set the MSF bit based on the user requested address format */
24221 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
24222 	/*
24223 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
24224 	 * returned
24225 	 */
24226 	cdb[2] = 0x40;
24227 	/*
24228 	 * Set byte 3 to specify the return data format. A value of 0x01
24229 	 * indicates that the CD-ROM current position should be returned.
24230 	 */
24231 	cdb[3] = 0x01;
24232 	cdb[8] = 0x10;
24233 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24234 	com->uscsi_cdb	   = cdb;
24235 	com->uscsi_cdblen  = CDB_GROUP1;
24236 	com->uscsi_bufaddr = buffer;
24237 	com->uscsi_buflen  = 16;
24238 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24239 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24240 	    SD_PATH_STANDARD);
24241 	if (rval != 0) {
24242 		kmem_free(buffer, 16);
24243 		kmem_free(com, sizeof (*com));
24244 		return (rval);
24245 	}
24246 
24247 	/* Process the returned Q sub-channel data */
24248 	subchnl->cdsc_audiostatus = buffer[1];
24249 	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
24250 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
24251 	subchnl->cdsc_trk	= buffer[6];
24252 	subchnl->cdsc_ind	= buffer[7];
24253 	if (subchnl->cdsc_format & CDROM_LBA) {
24254 		subchnl->cdsc_absaddr.lba =
24255 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
24256 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
24257 		subchnl->cdsc_reladdr.lba =
24258 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
24259 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
24260 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
24261 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
24262 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
24263 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
24264 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
24265 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
24266 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
24267 	} else {
24268 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
24269 		subchnl->cdsc_absaddr.msf.second = buffer[10];
24270 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
24271 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
24272 		subchnl->cdsc_reladdr.msf.second = buffer[14];
24273 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
24274 	}
24275 	kmem_free(buffer, 16);
24276 	kmem_free(com, sizeof (*com));
24277 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
24278 	    != 0) {
24279 		return (EFAULT);
24280 	}
24281 	return (rval);
24282 }
24283 
24284 
24285 /*
24286  *    Function: sr_read_tocentry()
24287  *
24288  * Description: This routine is the driver entry point for handling CD-ROM
24289  *		ioctl requests to read from the Table of Contents (TOC)
24290  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
24291  *		fields, the starting address (LBA or MSF format per the user)
24292  *		and the data mode if the user specified track is a data track.
24293  *
24294  *		Note: The READ HEADER (0x44) command used in this routine is
24295  *		obsolete per the SCSI MMC spec but still supported in the
24296  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
24297  *		therefore the command is still implemented in this routine.
24298  *
24299  *   Arguments: dev	- the device 'dev_t'
24300  *		data	- pointer to user provided toc entry structure,
24301  *			  specifying the track # and the address format
24302  *			  (LBA or MSF).
24303  *		flag	- this argument is a pass through to ddi_copyxxx()
24304  *		          directly from the mode argument of ioctl().
24305  *
24306  * Return Code: the code returned by sd_send_scsi_cmd()
24307  *		EFAULT if ddi_copyxxx() fails
24308  *		ENXIO if fail ddi_get_soft_state
24309  *		EINVAL if data pointer is NULL
24310  */
24311 
24312 static int
24313 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
24314 {
24315 	struct sd_lun		*un = NULL;
24316 	struct uscsi_cmd	*com;
24317 	struct cdrom_tocentry	toc_entry;
24318 	struct cdrom_tocentry	*entry = &toc_entry;
24319 	caddr_t			buffer;
24320 	int			rval;
24321 	char			cdb[CDB_GROUP1];
24322 
24323 	if (data == NULL) {
24324 		return (EINVAL);
24325 	}
24326 
24327 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24328 	    (un->un_state == SD_STATE_OFFLINE)) {
24329 		return (ENXIO);
24330 	}
24331 
24332 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
24333 		return (EFAULT);
24334 	}
24335 
24336 	/* Validate the requested track and address format */
24337 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
24338 		return (EINVAL);
24339 	}
24340 
24341 	if (entry->cdte_track == 0) {
24342 		return (EINVAL);
24343 	}
24344 
24345 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
24346 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24347 	bzero(cdb, CDB_GROUP1);
24348 
24349 	cdb[0] = SCMD_READ_TOC;
24350 	/* Set the MSF bit based on the user requested address format  */
24351 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
24352 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
24353 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
24354 	} else {
24355 		cdb[6] = entry->cdte_track;
24356 	}
24357 
24358 	/*
24359 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
24360 	 * (4 byte TOC response header + 8 byte track descriptor)
24361 	 */
24362 	cdb[8] = 12;
24363 	com->uscsi_cdb	   = cdb;
24364 	com->uscsi_cdblen  = CDB_GROUP1;
24365 	com->uscsi_bufaddr = buffer;
24366 	com->uscsi_buflen  = 0x0C;
24367 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
24368 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24369 	    SD_PATH_STANDARD);
24370 	if (rval != 0) {
24371 		kmem_free(buffer, 12);
24372 		kmem_free(com, sizeof (*com));
24373 		return (rval);
24374 	}
24375 
24376 	/* Process the toc entry */
24377 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
24378 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
24379 	if (entry->cdte_format & CDROM_LBA) {
24380 		entry->cdte_addr.lba =
24381 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
24382 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
24383 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
24384 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
24385 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
24386 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
24387 		/*
24388 		 * Send a READ TOC command using the LBA address format to get
24389 		 * the LBA for the track requested so it can be used in the
24390 		 * READ HEADER request
24391 		 *
24392 		 * Note: The MSF bit of the READ HEADER command specifies the
24393 		 * output format. The block address specified in that command
24394 		 * must be in LBA format.
24395 		 */
24396 		cdb[1] = 0;
24397 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24398 		    SD_PATH_STANDARD);
24399 		if (rval != 0) {
24400 			kmem_free(buffer, 12);
24401 			kmem_free(com, sizeof (*com));
24402 			return (rval);
24403 		}
24404 	} else {
24405 		entry->cdte_addr.msf.minute	= buffer[9];
24406 		entry->cdte_addr.msf.second	= buffer[10];
24407 		entry->cdte_addr.msf.frame	= buffer[11];
24408 		/*
24409 		 * Send a READ TOC command using the LBA address format to get
24410 		 * the LBA for the track requested so it can be used in the
24411 		 * READ HEADER request
24412 		 *
24413 		 * Note: The MSF bit of the READ HEADER command specifies the
24414 		 * output format. The block address specified in that command
24415 		 * must be in LBA format.
24416 		 */
24417 		cdb[1] = 0;
24418 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24419 		    SD_PATH_STANDARD);
24420 		if (rval != 0) {
24421 			kmem_free(buffer, 12);
24422 			kmem_free(com, sizeof (*com));
24423 			return (rval);
24424 		}
24425 	}
24426 
24427 	/*
24428 	 * Build and send the READ HEADER command to determine the data mode of
24429 	 * the user specified track.
24430 	 */
24431 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
24432 	    (entry->cdte_track != CDROM_LEADOUT)) {
24433 		bzero(cdb, CDB_GROUP1);
24434 		cdb[0] = SCMD_READ_HEADER;
24435 		cdb[2] = buffer[8];
24436 		cdb[3] = buffer[9];
24437 		cdb[4] = buffer[10];
24438 		cdb[5] = buffer[11];
24439 		cdb[8] = 0x08;
24440 		com->uscsi_buflen = 0x08;
24441 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24442 		    SD_PATH_STANDARD);
24443 		if (rval == 0) {
24444 			entry->cdte_datamode = buffer[0];
24445 		} else {
24446 			/*
24447 			 * READ HEADER command failed, since this is
24448 			 * obsoleted in one spec, its better to return
24449 			 * -1 for an invlid track so that we can still
24450 			 * recieve the rest of the TOC data.
24451 			 */
24452 			entry->cdte_datamode = (uchar_t)-1;
24453 		}
24454 	} else {
24455 		entry->cdte_datamode = (uchar_t)-1;
24456 	}
24457 
24458 	kmem_free(buffer, 12);
24459 	kmem_free(com, sizeof (*com));
24460 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
24461 		return (EFAULT);
24462 
24463 	return (rval);
24464 }
24465 
24466 
24467 /*
24468  *    Function: sr_read_tochdr()
24469  *
24470  * Description: This routine is the driver entry point for handling CD-ROM
24471  * 		ioctl requests to read the Table of Contents (TOC) header
24472  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
24473  *		and ending track numbers
24474  *
24475  *   Arguments: dev	- the device 'dev_t'
24476  *		data	- pointer to user provided toc header structure,
24477  *			  specifying the starting and ending track numbers.
24478  *		flag	- this argument is a pass through to ddi_copyxxx()
24479  *			  directly from the mode argument of ioctl().
24480  *
24481  * Return Code: the code returned by sd_send_scsi_cmd()
24482  *		EFAULT if ddi_copyxxx() fails
24483  *		ENXIO if fail ddi_get_soft_state
24484  *		EINVAL if data pointer is NULL
24485  */
24486 
24487 static int
24488 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
24489 {
24490 	struct sd_lun		*un;
24491 	struct uscsi_cmd	*com;
24492 	struct cdrom_tochdr	toc_header;
24493 	struct cdrom_tochdr	*hdr = &toc_header;
24494 	char			cdb[CDB_GROUP1];
24495 	int			rval;
24496 	caddr_t			buffer;
24497 
24498 	if (data == NULL) {
24499 		return (EINVAL);
24500 	}
24501 
24502 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24503 	    (un->un_state == SD_STATE_OFFLINE)) {
24504 		return (ENXIO);
24505 	}
24506 
24507 	buffer = kmem_zalloc(4, KM_SLEEP);
24508 	bzero(cdb, CDB_GROUP1);
24509 	cdb[0] = SCMD_READ_TOC;
24510 	/*
24511 	 * Specifying a track number of 0x00 in the READ TOC command indicates
24512 	 * that the TOC header should be returned
24513 	 */
24514 	cdb[6] = 0x00;
24515 	/*
24516 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
24517 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
24518 	 */
24519 	cdb[8] = 0x04;
24520 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24521 	com->uscsi_cdb	   = cdb;
24522 	com->uscsi_cdblen  = CDB_GROUP1;
24523 	com->uscsi_bufaddr = buffer;
24524 	com->uscsi_buflen  = 0x04;
24525 	com->uscsi_timeout = 300;
24526 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24527 
24528 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24529 	    SD_PATH_STANDARD);
24530 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
24531 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
24532 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
24533 	} else {
24534 		hdr->cdth_trk0 = buffer[2];
24535 		hdr->cdth_trk1 = buffer[3];
24536 	}
24537 	kmem_free(buffer, 4);
24538 	kmem_free(com, sizeof (*com));
24539 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
24540 		return (EFAULT);
24541 	}
24542 	return (rval);
24543 }
24544 
24545 
24546 /*
24547  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
24548  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
24549  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
24550  * digital audio and extended architecture digital audio. These modes are
24551  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
24552  * MMC specs.
24553  *
24554  * In addition to support for the various data formats these routines also
24555  * include support for devices that implement only the direct access READ
24556  * commands (0x08, 0x28), devices that implement the READ_CD commands
24557  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
24558  * READ CDXA commands (0xD8, 0xDB)
24559  */
24560 
24561 /*
24562  *    Function: sr_read_mode1()
24563  *
24564  * Description: This routine is the driver entry point for handling CD-ROM
24565  *		ioctl read mode1 requests (CDROMREADMODE1).
24566  *
24567  *   Arguments: dev	- the device 'dev_t'
24568  *		data	- pointer to user provided cd read structure specifying
24569  *			  the lba buffer address and length.
24570  *		flag	- this argument is a pass through to ddi_copyxxx()
24571  *			  directly from the mode argument of ioctl().
24572  *
24573  * Return Code: the code returned by sd_send_scsi_cmd()
24574  *		EFAULT if ddi_copyxxx() fails
24575  *		ENXIO if fail ddi_get_soft_state
24576  *		EINVAL if data pointer is NULL
24577  */
24578 
24579 static int
24580 sr_read_mode1(dev_t dev, caddr_t data, int flag)
24581 {
24582 	struct sd_lun		*un;
24583 	struct cdrom_read	mode1_struct;
24584 	struct cdrom_read	*mode1 = &mode1_struct;
24585 	int			rval;
24586 #ifdef _MULTI_DATAMODEL
24587 	/* To support ILP32 applications in an LP64 world */
24588 	struct cdrom_read32	cdrom_read32;
24589 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
24590 #endif /* _MULTI_DATAMODEL */
24591 
24592 	if (data == NULL) {
24593 		return (EINVAL);
24594 	}
24595 
24596 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24597 	    (un->un_state == SD_STATE_OFFLINE)) {
24598 		return (ENXIO);
24599 	}
24600 
24601 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24602 	    "sd_read_mode1: entry: un:0x%p\n", un);
24603 
24604 #ifdef _MULTI_DATAMODEL
24605 	switch (ddi_model_convert_from(flag & FMODELS)) {
24606 	case DDI_MODEL_ILP32:
24607 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
24608 			return (EFAULT);
24609 		}
24610 		/* Convert the ILP32 uscsi data from the application to LP64 */
24611 		cdrom_read32tocdrom_read(cdrd32, mode1);
24612 		break;
24613 	case DDI_MODEL_NONE:
24614 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
24615 			return (EFAULT);
24616 		}
24617 	}
24618 #else /* ! _MULTI_DATAMODEL */
24619 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
24620 		return (EFAULT);
24621 	}
24622 #endif /* _MULTI_DATAMODEL */
24623 
24624 	rval = sd_send_scsi_READ(un, mode1->cdread_bufaddr,
24625 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
24626 
24627 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24628 	    "sd_read_mode1: exit: un:0x%p\n", un);
24629 
24630 	return (rval);
24631 }
24632 
24633 
24634 /*
24635  *    Function: sr_read_cd_mode2()
24636  *
24637  * Description: This routine is the driver entry point for handling CD-ROM
24638  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
24639  *		support the READ CD (0xBE) command or the 1st generation
24640  *		READ CD (0xD4) command.
24641  *
24642  *   Arguments: dev	- the device 'dev_t'
24643  *		data	- pointer to user provided cd read structure specifying
24644  *			  the lba buffer address and length.
24645  *		flag	- this argument is a pass through to ddi_copyxxx()
24646  *			  directly from the mode argument of ioctl().
24647  *
24648  * Return Code: the code returned by sd_send_scsi_cmd()
24649  *		EFAULT if ddi_copyxxx() fails
24650  *		ENXIO if fail ddi_get_soft_state
24651  *		EINVAL if data pointer is NULL
24652  */
24653 
24654 static int
24655 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
24656 {
24657 	struct sd_lun		*un;
24658 	struct uscsi_cmd	*com;
24659 	struct cdrom_read	mode2_struct;
24660 	struct cdrom_read	*mode2 = &mode2_struct;
24661 	uchar_t			cdb[CDB_GROUP5];
24662 	int			nblocks;
24663 	int			rval;
24664 #ifdef _MULTI_DATAMODEL
24665 	/*  To support ILP32 applications in an LP64 world */
24666 	struct cdrom_read32	cdrom_read32;
24667 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
24668 #endif /* _MULTI_DATAMODEL */
24669 
24670 	if (data == NULL) {
24671 		return (EINVAL);
24672 	}
24673 
24674 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24675 	    (un->un_state == SD_STATE_OFFLINE)) {
24676 		return (ENXIO);
24677 	}
24678 
24679 #ifdef _MULTI_DATAMODEL
24680 	switch (ddi_model_convert_from(flag & FMODELS)) {
24681 	case DDI_MODEL_ILP32:
24682 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
24683 			return (EFAULT);
24684 		}
24685 		/* Convert the ILP32 uscsi data from the application to LP64 */
24686 		cdrom_read32tocdrom_read(cdrd32, mode2);
24687 		break;
24688 	case DDI_MODEL_NONE:
24689 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
24690 			return (EFAULT);
24691 		}
24692 		break;
24693 	}
24694 
24695 #else /* ! _MULTI_DATAMODEL */
24696 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
24697 		return (EFAULT);
24698 	}
24699 #endif /* _MULTI_DATAMODEL */
24700 
24701 	bzero(cdb, sizeof (cdb));
24702 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
24703 		/* Read command supported by 1st generation atapi drives */
24704 		cdb[0] = SCMD_READ_CDD4;
24705 	} else {
24706 		/* Universal CD Access Command */
24707 		cdb[0] = SCMD_READ_CD;
24708 	}
24709 
24710 	/*
24711 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
24712 	 */
24713 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
24714 
24715 	/* set the start address */
24716 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
24717 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
24718 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
24719 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
24720 
24721 	/* set the transfer length */
24722 	nblocks = mode2->cdread_buflen / 2336;
24723 	cdb[6] = (uchar_t)(nblocks >> 16);
24724 	cdb[7] = (uchar_t)(nblocks >> 8);
24725 	cdb[8] = (uchar_t)nblocks;
24726 
24727 	/* set the filter bits */
24728 	cdb[9] = CDROM_READ_CD_USERDATA;
24729 
24730 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24731 	com->uscsi_cdb = (caddr_t)cdb;
24732 	com->uscsi_cdblen = sizeof (cdb);
24733 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
24734 	com->uscsi_buflen = mode2->cdread_buflen;
24735 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24736 
24737 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
24738 	    SD_PATH_STANDARD);
24739 	kmem_free(com, sizeof (*com));
24740 	return (rval);
24741 }
24742 
24743 
24744 /*
24745  *    Function: sr_read_mode2()
24746  *
24747  * Description: This routine is the driver entry point for handling CD-ROM
24748  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
24749  *		do not support the READ CD (0xBE) command.
24750  *
24751  *   Arguments: dev	- the device 'dev_t'
24752  *		data	- pointer to user provided cd read structure specifying
24753  *			  the lba buffer address and length.
24754  *		flag	- this argument is a pass through to ddi_copyxxx()
24755  *			  directly from the mode argument of ioctl().
24756  *
24757  * Return Code: the code returned by sd_send_scsi_cmd()
24758  *		EFAULT if ddi_copyxxx() fails
24759  *		ENXIO if fail ddi_get_soft_state
24760  *		EINVAL if data pointer is NULL
24761  *		EIO if fail to reset block size
24762  *		EAGAIN if commands are in progress in the driver
24763  */
24764 
24765 static int
24766 sr_read_mode2(dev_t dev, caddr_t data, int flag)
24767 {
24768 	struct sd_lun		*un;
24769 	struct cdrom_read	mode2_struct;
24770 	struct cdrom_read	*mode2 = &mode2_struct;
24771 	int			rval;
24772 	uint32_t		restore_blksize;
24773 	struct uscsi_cmd	*com;
24774 	uchar_t			cdb[CDB_GROUP0];
24775 	int			nblocks;
24776 
24777 #ifdef _MULTI_DATAMODEL
24778 	/* To support ILP32 applications in an LP64 world */
24779 	struct cdrom_read32	cdrom_read32;
24780 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
24781 #endif /* _MULTI_DATAMODEL */
24782 
24783 	if (data == NULL) {
24784 		return (EINVAL);
24785 	}
24786 
24787 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24788 	    (un->un_state == SD_STATE_OFFLINE)) {
24789 		return (ENXIO);
24790 	}
24791 
24792 	/*
24793 	 * Because this routine will update the device and driver block size
24794 	 * being used we want to make sure there are no commands in progress.
24795 	 * If commands are in progress the user will have to try again.
24796 	 *
24797 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
24798 	 * in sdioctl to protect commands from sdioctl through to the top of
24799 	 * sd_uscsi_strategy. See sdioctl for details.
24800 	 */
24801 	mutex_enter(SD_MUTEX(un));
24802 	if (un->un_ncmds_in_driver != 1) {
24803 		mutex_exit(SD_MUTEX(un));
24804 		return (EAGAIN);
24805 	}
24806 	mutex_exit(SD_MUTEX(un));
24807 
24808 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24809 	    "sd_read_mode2: entry: un:0x%p\n", un);
24810 
24811 #ifdef _MULTI_DATAMODEL
24812 	switch (ddi_model_convert_from(flag & FMODELS)) {
24813 	case DDI_MODEL_ILP32:
24814 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
24815 			return (EFAULT);
24816 		}
24817 		/* Convert the ILP32 uscsi data from the application to LP64 */
24818 		cdrom_read32tocdrom_read(cdrd32, mode2);
24819 		break;
24820 	case DDI_MODEL_NONE:
24821 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
24822 			return (EFAULT);
24823 		}
24824 		break;
24825 	}
24826 #else /* ! _MULTI_DATAMODEL */
24827 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
24828 		return (EFAULT);
24829 	}
24830 #endif /* _MULTI_DATAMODEL */
24831 
24832 	/* Store the current target block size for restoration later */
24833 	restore_blksize = un->un_tgt_blocksize;
24834 
24835 	/* Change the device and soft state target block size to 2336 */
24836 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
24837 		rval = EIO;
24838 		goto done;
24839 	}
24840 
24841 
24842 	bzero(cdb, sizeof (cdb));
24843 
24844 	/* set READ operation */
24845 	cdb[0] = SCMD_READ;
24846 
24847 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
24848 	mode2->cdread_lba >>= 2;
24849 
24850 	/* set the start address */
24851 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
24852 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
24853 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
24854 
24855 	/* set the transfer length */
24856 	nblocks = mode2->cdread_buflen / 2336;
24857 	cdb[4] = (uchar_t)nblocks & 0xFF;
24858 
24859 	/* build command */
24860 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24861 	com->uscsi_cdb = (caddr_t)cdb;
24862 	com->uscsi_cdblen = sizeof (cdb);
24863 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
24864 	com->uscsi_buflen = mode2->cdread_buflen;
24865 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24866 
24867 	/*
24868 	 * Issue SCSI command with user space address for read buffer.
24869 	 *
24870 	 * This sends the command through main channel in the driver.
24871 	 *
24872 	 * Since this is accessed via an IOCTL call, we go through the
24873 	 * standard path, so that if the device was powered down, then
24874 	 * it would be 'awakened' to handle the command.
24875 	 */
24876 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
24877 	    SD_PATH_STANDARD);
24878 
24879 	kmem_free(com, sizeof (*com));
24880 
24881 	/* Restore the device and soft state target block size */
24882 	if (sr_sector_mode(dev, restore_blksize) != 0) {
24883 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24884 		    "can't do switch back to mode 1\n");
24885 		/*
24886 		 * If sd_send_scsi_READ succeeded we still need to report
24887 		 * an error because we failed to reset the block size
24888 		 */
24889 		if (rval == 0) {
24890 			rval = EIO;
24891 		}
24892 	}
24893 
24894 done:
24895 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24896 	    "sd_read_mode2: exit: un:0x%p\n", un);
24897 
24898 	return (rval);
24899 }
24900 
24901 
24902 /*
24903  *    Function: sr_sector_mode()
24904  *
24905  * Description: This utility function is used by sr_read_mode2 to set the target
24906  *		block size based on the user specified size. This is a legacy
24907  *		implementation based upon a vendor specific mode page
24908  *
24909  *   Arguments: dev	- the device 'dev_t'
24910  *		data	- flag indicating if block size is being set to 2336 or
24911  *			  512.
24912  *
24913  * Return Code: the code returned by sd_send_scsi_cmd()
24914  *		EFAULT if ddi_copyxxx() fails
24915  *		ENXIO if fail ddi_get_soft_state
24916  *		EINVAL if data pointer is NULL
24917  */
24918 
24919 static int
24920 sr_sector_mode(dev_t dev, uint32_t blksize)
24921 {
24922 	struct sd_lun	*un;
24923 	uchar_t		*sense;
24924 	uchar_t		*select;
24925 	int		rval;
24926 
24927 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24928 	    (un->un_state == SD_STATE_OFFLINE)) {
24929 		return (ENXIO);
24930 	}
24931 
24932 	sense = kmem_zalloc(20, KM_SLEEP);
24933 
24934 	/* Note: This is a vendor specific mode page (0x81) */
24935 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, 20, 0x81,
24936 	    SD_PATH_STANDARD)) != 0) {
24937 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
24938 		    "sr_sector_mode: Mode Sense failed\n");
24939 		kmem_free(sense, 20);
24940 		return (rval);
24941 	}
24942 	select = kmem_zalloc(20, KM_SLEEP);
24943 	select[3] = 0x08;
24944 	select[10] = ((blksize >> 8) & 0xff);
24945 	select[11] = (blksize & 0xff);
24946 	select[12] = 0x01;
24947 	select[13] = 0x06;
24948 	select[14] = sense[14];
24949 	select[15] = sense[15];
24950 	if (blksize == SD_MODE2_BLKSIZE) {
24951 		select[14] |= 0x01;
24952 	}
24953 
24954 	if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select, 20,
24955 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
24956 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
24957 		    "sr_sector_mode: Mode Select failed\n");
24958 	} else {
24959 		/*
24960 		 * Only update the softstate block size if we successfully
24961 		 * changed the device block mode.
24962 		 */
24963 		mutex_enter(SD_MUTEX(un));
24964 		sd_update_block_info(un, blksize, 0);
24965 		mutex_exit(SD_MUTEX(un));
24966 	}
24967 	kmem_free(sense, 20);
24968 	kmem_free(select, 20);
24969 	return (rval);
24970 }
24971 
24972 
24973 /*
24974  *    Function: sr_read_cdda()
24975  *
24976  * Description: This routine is the driver entry point for handling CD-ROM
24977  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
24978  *		the target supports CDDA these requests are handled via a vendor
24979  *		specific command (0xD8) If the target does not support CDDA
24980  *		these requests are handled via the READ CD command (0xBE).
24981  *
24982  *   Arguments: dev	- the device 'dev_t'
24983  *		data	- pointer to user provided CD-DA structure specifying
24984  *			  the track starting address, transfer length, and
24985  *			  subcode options.
24986  *		flag	- this argument is a pass through to ddi_copyxxx()
24987  *			  directly from the mode argument of ioctl().
24988  *
24989  * Return Code: the code returned by sd_send_scsi_cmd()
24990  *		EFAULT if ddi_copyxxx() fails
24991  *		ENXIO if fail ddi_get_soft_state
24992  *		EINVAL if invalid arguments are provided
24993  *		ENOTTY
24994  */
24995 
24996 static int
24997 sr_read_cdda(dev_t dev, caddr_t data, int flag)
24998 {
24999 	struct sd_lun			*un;
25000 	struct uscsi_cmd		*com;
25001 	struct cdrom_cdda		*cdda;
25002 	int				rval;
25003 	size_t				buflen;
25004 	char				cdb[CDB_GROUP5];
25005 
25006 #ifdef _MULTI_DATAMODEL
25007 	/* To support ILP32 applications in an LP64 world */
25008 	struct cdrom_cdda32	cdrom_cdda32;
25009 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
25010 #endif /* _MULTI_DATAMODEL */
25011 
25012 	if (data == NULL) {
25013 		return (EINVAL);
25014 	}
25015 
25016 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25017 		return (ENXIO);
25018 	}
25019 
25020 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
25021 
25022 #ifdef _MULTI_DATAMODEL
25023 	switch (ddi_model_convert_from(flag & FMODELS)) {
25024 	case DDI_MODEL_ILP32:
25025 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
25026 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25027 			    "sr_read_cdda: ddi_copyin Failed\n");
25028 			kmem_free(cdda, sizeof (struct cdrom_cdda));
25029 			return (EFAULT);
25030 		}
25031 		/* Convert the ILP32 uscsi data from the application to LP64 */
25032 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
25033 		break;
25034 	case DDI_MODEL_NONE:
25035 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
25036 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25037 			    "sr_read_cdda: ddi_copyin Failed\n");
25038 			kmem_free(cdda, sizeof (struct cdrom_cdda));
25039 			return (EFAULT);
25040 		}
25041 		break;
25042 	}
25043 #else /* ! _MULTI_DATAMODEL */
25044 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
25045 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25046 		    "sr_read_cdda: ddi_copyin Failed\n");
25047 		kmem_free(cdda, sizeof (struct cdrom_cdda));
25048 		return (EFAULT);
25049 	}
25050 #endif /* _MULTI_DATAMODEL */
25051 
25052 	/*
25053 	 * Since MMC-2 expects max 3 bytes for length, check if the
25054 	 * length input is greater than 3 bytes
25055 	 */
25056 	if ((cdda->cdda_length & 0xFF000000) != 0) {
25057 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
25058 		    "cdrom transfer length too large: %d (limit %d)\n",
25059 		    cdda->cdda_length, 0xFFFFFF);
25060 		kmem_free(cdda, sizeof (struct cdrom_cdda));
25061 		return (EINVAL);
25062 	}
25063 
25064 	switch (cdda->cdda_subcode) {
25065 	case CDROM_DA_NO_SUBCODE:
25066 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
25067 		break;
25068 	case CDROM_DA_SUBQ:
25069 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
25070 		break;
25071 	case CDROM_DA_ALL_SUBCODE:
25072 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
25073 		break;
25074 	case CDROM_DA_SUBCODE_ONLY:
25075 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
25076 		break;
25077 	default:
25078 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25079 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
25080 		    cdda->cdda_subcode);
25081 		kmem_free(cdda, sizeof (struct cdrom_cdda));
25082 		return (EINVAL);
25083 	}
25084 
25085 	/* Build and send the command */
25086 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25087 	bzero(cdb, CDB_GROUP5);
25088 
25089 	if (un->un_f_cfg_cdda == TRUE) {
25090 		cdb[0] = (char)SCMD_READ_CD;
25091 		cdb[1] = 0x04;
25092 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
25093 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
25094 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
25095 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
25096 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
25097 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
25098 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
25099 		cdb[9] = 0x10;
25100 		switch (cdda->cdda_subcode) {
25101 		case CDROM_DA_NO_SUBCODE :
25102 			cdb[10] = 0x0;
25103 			break;
25104 		case CDROM_DA_SUBQ :
25105 			cdb[10] = 0x2;
25106 			break;
25107 		case CDROM_DA_ALL_SUBCODE :
25108 			cdb[10] = 0x1;
25109 			break;
25110 		case CDROM_DA_SUBCODE_ONLY :
25111 			/* FALLTHROUGH */
25112 		default :
25113 			kmem_free(cdda, sizeof (struct cdrom_cdda));
25114 			kmem_free(com, sizeof (*com));
25115 			return (ENOTTY);
25116 		}
25117 	} else {
25118 		cdb[0] = (char)SCMD_READ_CDDA;
25119 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
25120 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
25121 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
25122 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
25123 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
25124 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
25125 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
25126 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
25127 		cdb[10] = cdda->cdda_subcode;
25128 	}
25129 
25130 	com->uscsi_cdb = cdb;
25131 	com->uscsi_cdblen = CDB_GROUP5;
25132 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
25133 	com->uscsi_buflen = buflen;
25134 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
25135 
25136 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
25137 	    SD_PATH_STANDARD);
25138 
25139 	kmem_free(cdda, sizeof (struct cdrom_cdda));
25140 	kmem_free(com, sizeof (*com));
25141 	return (rval);
25142 }
25143 
25144 
25145 /*
25146  *    Function: sr_read_cdxa()
25147  *
25148  * Description: This routine is the driver entry point for handling CD-ROM
25149  *		ioctl requests to return CD-XA (Extended Architecture) data.
25150  *		(CDROMCDXA).
25151  *
25152  *   Arguments: dev	- the device 'dev_t'
25153  *		data	- pointer to user provided CD-XA structure specifying
25154  *			  the data starting address, transfer length, and format
25155  *		flag	- this argument is a pass through to ddi_copyxxx()
25156  *			  directly from the mode argument of ioctl().
25157  *
25158  * Return Code: the code returned by sd_send_scsi_cmd()
25159  *		EFAULT if ddi_copyxxx() fails
25160  *		ENXIO if fail ddi_get_soft_state
25161  *		EINVAL if data pointer is NULL
25162  */
25163 
25164 static int
25165 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
25166 {
25167 	struct sd_lun		*un;
25168 	struct uscsi_cmd	*com;
25169 	struct cdrom_cdxa	*cdxa;
25170 	int			rval;
25171 	size_t			buflen;
25172 	char			cdb[CDB_GROUP5];
25173 	uchar_t			read_flags;
25174 
25175 #ifdef _MULTI_DATAMODEL
25176 	/* To support ILP32 applications in an LP64 world */
25177 	struct cdrom_cdxa32		cdrom_cdxa32;
25178 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
25179 #endif /* _MULTI_DATAMODEL */
25180 
25181 	if (data == NULL) {
25182 		return (EINVAL);
25183 	}
25184 
25185 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25186 		return (ENXIO);
25187 	}
25188 
25189 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
25190 
25191 #ifdef _MULTI_DATAMODEL
25192 	switch (ddi_model_convert_from(flag & FMODELS)) {
25193 	case DDI_MODEL_ILP32:
25194 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
25195 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25196 			return (EFAULT);
25197 		}
25198 		/*
25199 		 * Convert the ILP32 uscsi data from the
25200 		 * application to LP64 for internal use.
25201 		 */
25202 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
25203 		break;
25204 	case DDI_MODEL_NONE:
25205 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
25206 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25207 			return (EFAULT);
25208 		}
25209 		break;
25210 	}
25211 #else /* ! _MULTI_DATAMODEL */
25212 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
25213 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25214 		return (EFAULT);
25215 	}
25216 #endif /* _MULTI_DATAMODEL */
25217 
25218 	/*
25219 	 * Since MMC-2 expects max 3 bytes for length, check if the
25220 	 * length input is greater than 3 bytes
25221 	 */
25222 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
25223 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
25224 		    "cdrom transfer length too large: %d (limit %d)\n",
25225 		    cdxa->cdxa_length, 0xFFFFFF);
25226 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25227 		return (EINVAL);
25228 	}
25229 
25230 	switch (cdxa->cdxa_format) {
25231 	case CDROM_XA_DATA:
25232 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
25233 		read_flags = 0x10;
25234 		break;
25235 	case CDROM_XA_SECTOR_DATA:
25236 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
25237 		read_flags = 0xf8;
25238 		break;
25239 	case CDROM_XA_DATA_W_ERROR:
25240 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
25241 		read_flags = 0xfc;
25242 		break;
25243 	default:
25244 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25245 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
25246 		    cdxa->cdxa_format);
25247 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25248 		return (EINVAL);
25249 	}
25250 
25251 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25252 	bzero(cdb, CDB_GROUP5);
25253 	if (un->un_f_mmc_cap == TRUE) {
25254 		cdb[0] = (char)SCMD_READ_CD;
25255 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
25256 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
25257 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
25258 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
25259 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
25260 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
25261 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
25262 		cdb[9] = (char)read_flags;
25263 	} else {
25264 		/*
25265 		 * Note: A vendor specific command (0xDB) is being used her to
25266 		 * request a read of all subcodes.
25267 		 */
25268 		cdb[0] = (char)SCMD_READ_CDXA;
25269 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
25270 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
25271 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
25272 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
25273 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
25274 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
25275 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
25276 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
25277 		cdb[10] = cdxa->cdxa_format;
25278 	}
25279 	com->uscsi_cdb	   = cdb;
25280 	com->uscsi_cdblen  = CDB_GROUP5;
25281 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
25282 	com->uscsi_buflen  = buflen;
25283 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
25284 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
25285 	    SD_PATH_STANDARD);
25286 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25287 	kmem_free(com, sizeof (*com));
25288 	return (rval);
25289 }
25290 
25291 
25292 /*
25293  *    Function: sr_eject()
25294  *
25295  * Description: This routine is the driver entry point for handling CD-ROM
25296  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
25297  *
25298  *   Arguments: dev	- the device 'dev_t'
25299  *
25300  * Return Code: the code returned by sd_send_scsi_cmd()
25301  */
25302 
25303 static int
25304 sr_eject(dev_t dev)
25305 {
25306 	struct sd_lun	*un;
25307 	int		rval;
25308 
25309 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25310 	    (un->un_state == SD_STATE_OFFLINE)) {
25311 		return (ENXIO);
25312 	}
25313 
25314 	/*
25315 	 * To prevent race conditions with the eject
25316 	 * command, keep track of an eject command as
25317 	 * it progresses. If we are already handling
25318 	 * an eject command in the driver for the given
25319 	 * unit and another request to eject is received
25320 	 * immediately return EAGAIN so we don't lose
25321 	 * the command if the current eject command fails.
25322 	 */
25323 	mutex_enter(SD_MUTEX(un));
25324 	if (un->un_f_ejecting == TRUE) {
25325 		mutex_exit(SD_MUTEX(un));
25326 		return (EAGAIN);
25327 	}
25328 	un->un_f_ejecting = TRUE;
25329 	mutex_exit(SD_MUTEX(un));
25330 
25331 	if ((rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
25332 	    SD_PATH_STANDARD)) != 0) {
25333 		mutex_enter(SD_MUTEX(un));
25334 		un->un_f_ejecting = FALSE;
25335 		mutex_exit(SD_MUTEX(un));
25336 		return (rval);
25337 	}
25338 
25339 	rval = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_EJECT,
25340 	    SD_PATH_STANDARD);
25341 
25342 	if (rval == 0) {
25343 		mutex_enter(SD_MUTEX(un));
25344 		sr_ejected(un);
25345 		un->un_mediastate = DKIO_EJECTED;
25346 		un->un_f_ejecting = FALSE;
25347 		cv_broadcast(&un->un_state_cv);
25348 		mutex_exit(SD_MUTEX(un));
25349 	} else {
25350 		mutex_enter(SD_MUTEX(un));
25351 		un->un_f_ejecting = FALSE;
25352 		mutex_exit(SD_MUTEX(un));
25353 	}
25354 	return (rval);
25355 }
25356 
25357 
25358 /*
25359  *    Function: sr_ejected()
25360  *
25361  * Description: This routine updates the soft state structure to invalidate the
25362  *		geometry information after the media has been ejected or a
25363  *		media eject has been detected.
25364  *
25365  *   Arguments: un - driver soft state (unit) structure
25366  */
25367 
25368 static void
25369 sr_ejected(struct sd_lun *un)
25370 {
25371 	struct sd_errstats *stp;
25372 
25373 	ASSERT(un != NULL);
25374 	ASSERT(mutex_owned(SD_MUTEX(un)));
25375 
25376 	un->un_f_blockcount_is_valid	= FALSE;
25377 	un->un_f_tgt_blocksize_is_valid	= FALSE;
25378 	mutex_exit(SD_MUTEX(un));
25379 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
25380 	mutex_enter(SD_MUTEX(un));
25381 
25382 	if (un->un_errstats != NULL) {
25383 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
25384 		stp->sd_capacity.value.ui64 = 0;
25385 	}
25386 }
25387 
25388 
25389 /*
25390  *    Function: sr_check_wp()
25391  *
25392  * Description: This routine checks the write protection of a removable
25393  *      media disk and hotpluggable devices via the write protect bit of
25394  *      the Mode Page Header device specific field. Some devices choke
25395  *      on unsupported mode page. In order to workaround this issue,
25396  *      this routine has been implemented to use 0x3f mode page(request
25397  *      for all pages) for all device types.
25398  *
25399  *   Arguments: dev		- the device 'dev_t'
25400  *
25401  * Return Code: int indicating if the device is write protected (1) or not (0)
25402  *
25403  *     Context: Kernel thread.
25404  *
25405  */
25406 
25407 static int
25408 sr_check_wp(dev_t dev)
25409 {
25410 	struct sd_lun	*un;
25411 	uchar_t		device_specific;
25412 	uchar_t		*sense;
25413 	int		hdrlen;
25414 	int		rval = FALSE;
25415 
25416 	/*
25417 	 * Note: The return codes for this routine should be reworked to
25418 	 * properly handle the case of a NULL softstate.
25419 	 */
25420 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25421 		return (FALSE);
25422 	}
25423 
25424 	if (un->un_f_cfg_is_atapi == TRUE) {
25425 		/*
25426 		 * The mode page contents are not required; set the allocation
25427 		 * length for the mode page header only
25428 		 */
25429 		hdrlen = MODE_HEADER_LENGTH_GRP2;
25430 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
25431 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense, hdrlen,
25432 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
25433 			goto err_exit;
25434 		device_specific =
25435 		    ((struct mode_header_grp2 *)sense)->device_specific;
25436 	} else {
25437 		hdrlen = MODE_HEADER_LENGTH;
25438 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
25439 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, hdrlen,
25440 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
25441 			goto err_exit;
25442 		device_specific =
25443 		    ((struct mode_header *)sense)->device_specific;
25444 	}
25445 
25446 	/*
25447 	 * Write protect mode sense failed; not all disks
25448 	 * understand this query. Return FALSE assuming that
25449 	 * these devices are not writable.
25450 	 */
25451 	if (device_specific & WRITE_PROTECT) {
25452 		rval = TRUE;
25453 	}
25454 
25455 err_exit:
25456 	kmem_free(sense, hdrlen);
25457 	return (rval);
25458 }
25459 
25460 /*
25461  *    Function: sr_volume_ctrl()
25462  *
25463  * Description: This routine is the driver entry point for handling CD-ROM
25464  *		audio output volume ioctl requests. (CDROMVOLCTRL)
25465  *
25466  *   Arguments: dev	- the device 'dev_t'
25467  *		data	- pointer to user audio volume control structure
25468  *		flag	- this argument is a pass through to ddi_copyxxx()
25469  *			  directly from the mode argument of ioctl().
25470  *
25471  * Return Code: the code returned by sd_send_scsi_cmd()
25472  *		EFAULT if ddi_copyxxx() fails
25473  *		ENXIO if fail ddi_get_soft_state
25474  *		EINVAL if data pointer is NULL
25475  *
25476  */
25477 
25478 static int
25479 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
25480 {
25481 	struct sd_lun		*un;
25482 	struct cdrom_volctrl    volume;
25483 	struct cdrom_volctrl    *vol = &volume;
25484 	uchar_t			*sense_page;
25485 	uchar_t			*select_page;
25486 	uchar_t			*sense;
25487 	uchar_t			*select;
25488 	int			sense_buflen;
25489 	int			select_buflen;
25490 	int			rval;
25491 
25492 	if (data == NULL) {
25493 		return (EINVAL);
25494 	}
25495 
25496 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25497 	    (un->un_state == SD_STATE_OFFLINE)) {
25498 		return (ENXIO);
25499 	}
25500 
25501 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
25502 		return (EFAULT);
25503 	}
25504 
25505 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
25506 		struct mode_header_grp2		*sense_mhp;
25507 		struct mode_header_grp2		*select_mhp;
25508 		int				bd_len;
25509 
25510 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
25511 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
25512 		    MODEPAGE_AUDIO_CTRL_LEN;
25513 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
25514 		select = kmem_zalloc(select_buflen, KM_SLEEP);
25515 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
25516 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
25517 		    SD_PATH_STANDARD)) != 0) {
25518 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
25519 			    "sr_volume_ctrl: Mode Sense Failed\n");
25520 			kmem_free(sense, sense_buflen);
25521 			kmem_free(select, select_buflen);
25522 			return (rval);
25523 		}
25524 		sense_mhp = (struct mode_header_grp2 *)sense;
25525 		select_mhp = (struct mode_header_grp2 *)select;
25526 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
25527 		    sense_mhp->bdesc_length_lo;
25528 		if (bd_len > MODE_BLK_DESC_LENGTH) {
25529 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25530 			    "sr_volume_ctrl: Mode Sense returned invalid "
25531 			    "block descriptor length\n");
25532 			kmem_free(sense, sense_buflen);
25533 			kmem_free(select, select_buflen);
25534 			return (EIO);
25535 		}
25536 		sense_page = (uchar_t *)
25537 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
25538 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
25539 		select_mhp->length_msb = 0;
25540 		select_mhp->length_lsb = 0;
25541 		select_mhp->bdesc_length_hi = 0;
25542 		select_mhp->bdesc_length_lo = 0;
25543 	} else {
25544 		struct mode_header		*sense_mhp, *select_mhp;
25545 
25546 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
25547 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
25548 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
25549 		select = kmem_zalloc(select_buflen, KM_SLEEP);
25550 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
25551 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
25552 		    SD_PATH_STANDARD)) != 0) {
25553 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25554 			    "sr_volume_ctrl: Mode Sense Failed\n");
25555 			kmem_free(sense, sense_buflen);
25556 			kmem_free(select, select_buflen);
25557 			return (rval);
25558 		}
25559 		sense_mhp  = (struct mode_header *)sense;
25560 		select_mhp = (struct mode_header *)select;
25561 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
25562 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25563 			    "sr_volume_ctrl: Mode Sense returned invalid "
25564 			    "block descriptor length\n");
25565 			kmem_free(sense, sense_buflen);
25566 			kmem_free(select, select_buflen);
25567 			return (EIO);
25568 		}
25569 		sense_page = (uchar_t *)
25570 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
25571 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
25572 		select_mhp->length = 0;
25573 		select_mhp->bdesc_length = 0;
25574 	}
25575 	/*
25576 	 * Note: An audio control data structure could be created and overlayed
25577 	 * on the following in place of the array indexing method implemented.
25578 	 */
25579 
25580 	/* Build the select data for the user volume data */
25581 	select_page[0] = MODEPAGE_AUDIO_CTRL;
25582 	select_page[1] = 0xE;
25583 	/* Set the immediate bit */
25584 	select_page[2] = 0x04;
25585 	/* Zero out reserved fields */
25586 	select_page[3] = 0x00;
25587 	select_page[4] = 0x00;
25588 	/* Return sense data for fields not to be modified */
25589 	select_page[5] = sense_page[5];
25590 	select_page[6] = sense_page[6];
25591 	select_page[7] = sense_page[7];
25592 	/* Set the user specified volume levels for channel 0 and 1 */
25593 	select_page[8] = 0x01;
25594 	select_page[9] = vol->channel0;
25595 	select_page[10] = 0x02;
25596 	select_page[11] = vol->channel1;
25597 	/* Channel 2 and 3 are currently unsupported so return the sense data */
25598 	select_page[12] = sense_page[12];
25599 	select_page[13] = sense_page[13];
25600 	select_page[14] = sense_page[14];
25601 	select_page[15] = sense_page[15];
25602 
25603 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
25604 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, select,
25605 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
25606 	} else {
25607 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
25608 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
25609 	}
25610 
25611 	kmem_free(sense, sense_buflen);
25612 	kmem_free(select, select_buflen);
25613 	return (rval);
25614 }
25615 
25616 
25617 /*
25618  *    Function: sr_read_sony_session_offset()
25619  *
25620  * Description: This routine is the driver entry point for handling CD-ROM
25621  *		ioctl requests for session offset information. (CDROMREADOFFSET)
25622  *		The address of the first track in the last session of a
25623  *		multi-session CD-ROM is returned
25624  *
25625  *		Note: This routine uses a vendor specific key value in the
25626  *		command control field without implementing any vendor check here
25627  *		or in the ioctl routine.
25628  *
25629  *   Arguments: dev	- the device 'dev_t'
25630  *		data	- pointer to an int to hold the requested address
25631  *		flag	- this argument is a pass through to ddi_copyxxx()
25632  *			  directly from the mode argument of ioctl().
25633  *
25634  * Return Code: the code returned by sd_send_scsi_cmd()
25635  *		EFAULT if ddi_copyxxx() fails
25636  *		ENXIO if fail ddi_get_soft_state
25637  *		EINVAL if data pointer is NULL
25638  */
25639 
25640 static int
25641 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
25642 {
25643 	struct sd_lun		*un;
25644 	struct uscsi_cmd	*com;
25645 	caddr_t			buffer;
25646 	char			cdb[CDB_GROUP1];
25647 	int			session_offset = 0;
25648 	int			rval;
25649 
25650 	if (data == NULL) {
25651 		return (EINVAL);
25652 	}
25653 
25654 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25655 	    (un->un_state == SD_STATE_OFFLINE)) {
25656 		return (ENXIO);
25657 	}
25658 
25659 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
25660 	bzero(cdb, CDB_GROUP1);
25661 	cdb[0] = SCMD_READ_TOC;
25662 	/*
25663 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
25664 	 * (4 byte TOC response header + 8 byte response data)
25665 	 */
25666 	cdb[8] = SONY_SESSION_OFFSET_LEN;
25667 	/* Byte 9 is the control byte. A vendor specific value is used */
25668 	cdb[9] = SONY_SESSION_OFFSET_KEY;
25669 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25670 	com->uscsi_cdb = cdb;
25671 	com->uscsi_cdblen = CDB_GROUP1;
25672 	com->uscsi_bufaddr = buffer;
25673 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
25674 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
25675 
25676 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
25677 	    SD_PATH_STANDARD);
25678 	if (rval != 0) {
25679 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
25680 		kmem_free(com, sizeof (*com));
25681 		return (rval);
25682 	}
25683 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
25684 		session_offset =
25685 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
25686 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
25687 		/*
25688 		 * Offset returned offset in current lbasize block's. Convert to
25689 		 * 2k block's to return to the user
25690 		 */
25691 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
25692 			session_offset >>= 2;
25693 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
25694 			session_offset >>= 1;
25695 		}
25696 	}
25697 
25698 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
25699 		rval = EFAULT;
25700 	}
25701 
25702 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
25703 	kmem_free(com, sizeof (*com));
25704 	return (rval);
25705 }
25706 
25707 
25708 /*
25709  *    Function: sd_wm_cache_constructor()
25710  *
25711  * Description: Cache Constructor for the wmap cache for the read/modify/write
25712  * 		devices.
25713  *
25714  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
25715  *		un	- sd_lun structure for the device.
25716  *		flag	- the km flags passed to constructor
25717  *
25718  * Return Code: 0 on success.
25719  *		-1 on failure.
25720  */
25721 
25722 /*ARGSUSED*/
25723 static int
25724 sd_wm_cache_constructor(void *wm, void *un, int flags)
25725 {
25726 	bzero(wm, sizeof (struct sd_w_map));
25727 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
25728 	return (0);
25729 }
25730 
25731 
25732 /*
25733  *    Function: sd_wm_cache_destructor()
25734  *
25735  * Description: Cache destructor for the wmap cache for the read/modify/write
25736  * 		devices.
25737  *
25738  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
25739  *		un	- sd_lun structure for the device.
25740  */
25741 /*ARGSUSED*/
25742 static void
25743 sd_wm_cache_destructor(void *wm, void *un)
25744 {
25745 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
25746 }
25747 
25748 
25749 /*
25750  *    Function: sd_range_lock()
25751  *
25752  * Description: Lock the range of blocks specified as parameter to ensure
25753  *		that read, modify write is atomic and no other i/o writes
25754  *		to the same location. The range is specified in terms
25755  *		of start and end blocks. Block numbers are the actual
25756  *		media block numbers and not system.
25757  *
25758  *   Arguments: un	- sd_lun structure for the device.
25759  *		startb - The starting block number
25760  *		endb - The end block number
25761  *		typ - type of i/o - simple/read_modify_write
25762  *
25763  * Return Code: wm  - pointer to the wmap structure.
25764  *
25765  *     Context: This routine can sleep.
25766  */
25767 
25768 static struct sd_w_map *
25769 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
25770 {
25771 	struct sd_w_map *wmp = NULL;
25772 	struct sd_w_map *sl_wmp = NULL;
25773 	struct sd_w_map *tmp_wmp;
25774 	wm_state state = SD_WM_CHK_LIST;
25775 
25776 
25777 	ASSERT(un != NULL);
25778 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25779 
25780 	mutex_enter(SD_MUTEX(un));
25781 
25782 	while (state != SD_WM_DONE) {
25783 
25784 		switch (state) {
25785 		case SD_WM_CHK_LIST:
25786 			/*
25787 			 * This is the starting state. Check the wmap list
25788 			 * to see if the range is currently available.
25789 			 */
25790 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
25791 				/*
25792 				 * If this is a simple write and no rmw
25793 				 * i/o is pending then try to lock the
25794 				 * range as the range should be available.
25795 				 */
25796 				state = SD_WM_LOCK_RANGE;
25797 			} else {
25798 				tmp_wmp = sd_get_range(un, startb, endb);
25799 				if (tmp_wmp != NULL) {
25800 					if ((wmp != NULL) && ONLIST(un, wmp)) {
25801 						/*
25802 						 * Should not keep onlist wmps
25803 						 * while waiting this macro
25804 						 * will also do wmp = NULL;
25805 						 */
25806 						FREE_ONLIST_WMAP(un, wmp);
25807 					}
25808 					/*
25809 					 * sl_wmp is the wmap on which wait
25810 					 * is done, since the tmp_wmp points
25811 					 * to the inuse wmap, set sl_wmp to
25812 					 * tmp_wmp and change the state to sleep
25813 					 */
25814 					sl_wmp = tmp_wmp;
25815 					state = SD_WM_WAIT_MAP;
25816 				} else {
25817 					state = SD_WM_LOCK_RANGE;
25818 				}
25819 
25820 			}
25821 			break;
25822 
25823 		case SD_WM_LOCK_RANGE:
25824 			ASSERT(un->un_wm_cache);
25825 			/*
25826 			 * The range need to be locked, try to get a wmap.
25827 			 * First attempt it with NO_SLEEP, want to avoid a sleep
25828 			 * if possible as we will have to release the sd mutex
25829 			 * if we have to sleep.
25830 			 */
25831 			if (wmp == NULL)
25832 				wmp = kmem_cache_alloc(un->un_wm_cache,
25833 				    KM_NOSLEEP);
25834 			if (wmp == NULL) {
25835 				mutex_exit(SD_MUTEX(un));
25836 				_NOTE(DATA_READABLE_WITHOUT_LOCK
25837 				    (sd_lun::un_wm_cache))
25838 				wmp = kmem_cache_alloc(un->un_wm_cache,
25839 				    KM_SLEEP);
25840 				mutex_enter(SD_MUTEX(un));
25841 				/*
25842 				 * we released the mutex so recheck and go to
25843 				 * check list state.
25844 				 */
25845 				state = SD_WM_CHK_LIST;
25846 			} else {
25847 				/*
25848 				 * We exit out of state machine since we
25849 				 * have the wmap. Do the housekeeping first.
25850 				 * place the wmap on the wmap list if it is not
25851 				 * on it already and then set the state to done.
25852 				 */
25853 				wmp->wm_start = startb;
25854 				wmp->wm_end = endb;
25855 				wmp->wm_flags = typ | SD_WM_BUSY;
25856 				if (typ & SD_WTYPE_RMW) {
25857 					un->un_rmw_count++;
25858 				}
25859 				/*
25860 				 * If not already on the list then link
25861 				 */
25862 				if (!ONLIST(un, wmp)) {
25863 					wmp->wm_next = un->un_wm;
25864 					wmp->wm_prev = NULL;
25865 					if (wmp->wm_next)
25866 						wmp->wm_next->wm_prev = wmp;
25867 					un->un_wm = wmp;
25868 				}
25869 				state = SD_WM_DONE;
25870 			}
25871 			break;
25872 
25873 		case SD_WM_WAIT_MAP:
25874 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
25875 			/*
25876 			 * Wait is done on sl_wmp, which is set in the
25877 			 * check_list state.
25878 			 */
25879 			sl_wmp->wm_wanted_count++;
25880 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
25881 			sl_wmp->wm_wanted_count--;
25882 			/*
25883 			 * We can reuse the memory from the completed sl_wmp
25884 			 * lock range for our new lock, but only if noone is
25885 			 * waiting for it.
25886 			 */
25887 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
25888 			if (sl_wmp->wm_wanted_count == 0) {
25889 				if (wmp != NULL)
25890 					CHK_N_FREEWMP(un, wmp);
25891 				wmp = sl_wmp;
25892 			}
25893 			sl_wmp = NULL;
25894 			/*
25895 			 * After waking up, need to recheck for availability of
25896 			 * range.
25897 			 */
25898 			state = SD_WM_CHK_LIST;
25899 			break;
25900 
25901 		default:
25902 			panic("sd_range_lock: "
25903 			    "Unknown state %d in sd_range_lock", state);
25904 			/*NOTREACHED*/
25905 		} /* switch(state) */
25906 
25907 	} /* while(state != SD_WM_DONE) */
25908 
25909 	mutex_exit(SD_MUTEX(un));
25910 
25911 	ASSERT(wmp != NULL);
25912 
25913 	return (wmp);
25914 }
25915 
25916 
25917 /*
25918  *    Function: sd_get_range()
25919  *
25920  * Description: Find if there any overlapping I/O to this one
25921  *		Returns the write-map of 1st such I/O, NULL otherwise.
25922  *
25923  *   Arguments: un	- sd_lun structure for the device.
25924  *		startb - The starting block number
25925  *		endb - The end block number
25926  *
25927  * Return Code: wm  - pointer to the wmap structure.
25928  */
25929 
25930 static struct sd_w_map *
25931 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
25932 {
25933 	struct sd_w_map *wmp;
25934 
25935 	ASSERT(un != NULL);
25936 
25937 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
25938 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
25939 			continue;
25940 		}
25941 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
25942 			break;
25943 		}
25944 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
25945 			break;
25946 		}
25947 	}
25948 
25949 	return (wmp);
25950 }
25951 
25952 
25953 /*
25954  *    Function: sd_free_inlist_wmap()
25955  *
25956  * Description: Unlink and free a write map struct.
25957  *
25958  *   Arguments: un      - sd_lun structure for the device.
25959  *		wmp	- sd_w_map which needs to be unlinked.
25960  */
25961 
25962 static void
25963 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
25964 {
25965 	ASSERT(un != NULL);
25966 
25967 	if (un->un_wm == wmp) {
25968 		un->un_wm = wmp->wm_next;
25969 	} else {
25970 		wmp->wm_prev->wm_next = wmp->wm_next;
25971 	}
25972 
25973 	if (wmp->wm_next) {
25974 		wmp->wm_next->wm_prev = wmp->wm_prev;
25975 	}
25976 
25977 	wmp->wm_next = wmp->wm_prev = NULL;
25978 
25979 	kmem_cache_free(un->un_wm_cache, wmp);
25980 }
25981 
25982 
25983 /*
25984  *    Function: sd_range_unlock()
25985  *
25986  * Description: Unlock the range locked by wm.
25987  *		Free write map if nobody else is waiting on it.
25988  *
25989  *   Arguments: un      - sd_lun structure for the device.
25990  *              wmp     - sd_w_map which needs to be unlinked.
25991  */
25992 
25993 static void
25994 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
25995 {
25996 	ASSERT(un != NULL);
25997 	ASSERT(wm != NULL);
25998 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25999 
26000 	mutex_enter(SD_MUTEX(un));
26001 
26002 	if (wm->wm_flags & SD_WTYPE_RMW) {
26003 		un->un_rmw_count--;
26004 	}
26005 
26006 	if (wm->wm_wanted_count) {
26007 		wm->wm_flags = 0;
26008 		/*
26009 		 * Broadcast that the wmap is available now.
26010 		 */
26011 		cv_broadcast(&wm->wm_avail);
26012 	} else {
26013 		/*
26014 		 * If no one is waiting on the map, it should be free'ed.
26015 		 */
26016 		sd_free_inlist_wmap(un, wm);
26017 	}
26018 
26019 	mutex_exit(SD_MUTEX(un));
26020 }
26021 
26022 
26023 /*
26024  *    Function: sd_read_modify_write_task
26025  *
26026  * Description: Called from a taskq thread to initiate the write phase of
26027  *		a read-modify-write request.  This is used for targets where
26028  *		un->un_sys_blocksize != un->un_tgt_blocksize.
26029  *
26030  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
26031  *
26032  *     Context: Called under taskq thread context.
26033  */
26034 
26035 static void
26036 sd_read_modify_write_task(void *arg)
26037 {
26038 	struct sd_mapblocksize_info	*bsp;
26039 	struct buf	*bp;
26040 	struct sd_xbuf	*xp;
26041 	struct sd_lun	*un;
26042 
26043 	bp = arg;	/* The bp is given in arg */
26044 	ASSERT(bp != NULL);
26045 
26046 	/* Get the pointer to the layer-private data struct */
26047 	xp = SD_GET_XBUF(bp);
26048 	ASSERT(xp != NULL);
26049 	bsp = xp->xb_private;
26050 	ASSERT(bsp != NULL);
26051 
26052 	un = SD_GET_UN(bp);
26053 	ASSERT(un != NULL);
26054 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26055 
26056 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
26057 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
26058 
26059 	/*
26060 	 * This is the write phase of a read-modify-write request, called
26061 	 * under the context of a taskq thread in response to the completion
26062 	 * of the read portion of the rmw request completing under interrupt
26063 	 * context. The write request must be sent from here down the iostart
26064 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
26065 	 * we use the layer index saved in the layer-private data area.
26066 	 */
26067 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
26068 
26069 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
26070 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
26071 }
26072 
26073 
26074 /*
26075  *    Function: sddump_do_read_of_rmw()
26076  *
26077  * Description: This routine will be called from sddump, If sddump is called
26078  *		with an I/O which not aligned on device blocksize boundary
26079  *		then the write has to be converted to read-modify-write.
26080  *		Do the read part here in order to keep sddump simple.
26081  *		Note - That the sd_mutex is held across the call to this
26082  *		routine.
26083  *
26084  *   Arguments: un	- sd_lun
26085  *		blkno	- block number in terms of media block size.
26086  *		nblk	- number of blocks.
26087  *		bpp	- pointer to pointer to the buf structure. On return
26088  *			from this function, *bpp points to the valid buffer
26089  *			to which the write has to be done.
26090  *
26091  * Return Code: 0 for success or errno-type return code
26092  */
26093 
26094 static int
26095 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
26096 	struct buf **bpp)
26097 {
26098 	int err;
26099 	int i;
26100 	int rval;
26101 	struct buf *bp;
26102 	struct scsi_pkt *pkt = NULL;
26103 	uint32_t target_blocksize;
26104 
26105 	ASSERT(un != NULL);
26106 	ASSERT(mutex_owned(SD_MUTEX(un)));
26107 
26108 	target_blocksize = un->un_tgt_blocksize;
26109 
26110 	mutex_exit(SD_MUTEX(un));
26111 
26112 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
26113 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
26114 	if (bp == NULL) {
26115 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26116 		    "no resources for dumping; giving up");
26117 		err = ENOMEM;
26118 		goto done;
26119 	}
26120 
26121 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
26122 	    blkno, nblk);
26123 	if (rval != 0) {
26124 		scsi_free_consistent_buf(bp);
26125 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26126 		    "no resources for dumping; giving up");
26127 		err = ENOMEM;
26128 		goto done;
26129 	}
26130 
26131 	pkt->pkt_flags |= FLAG_NOINTR;
26132 
26133 	err = EIO;
26134 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26135 
26136 		/*
26137 		 * Scsi_poll returns 0 (success) if the command completes and
26138 		 * the status block is STATUS_GOOD.  We should only check
26139 		 * errors if this condition is not true.  Even then we should
26140 		 * send our own request sense packet only if we have a check
26141 		 * condition and auto request sense has not been performed by
26142 		 * the hba.
26143 		 */
26144 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
26145 
26146 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
26147 			err = 0;
26148 			break;
26149 		}
26150 
26151 		/*
26152 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
26153 		 * no need to read RQS data.
26154 		 */
26155 		if (pkt->pkt_reason == CMD_DEV_GONE) {
26156 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26157 			    "Device is gone\n");
26158 			break;
26159 		}
26160 
26161 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
26162 			SD_INFO(SD_LOG_DUMP, un,
26163 			    "sddump: read failed with CHECK, try # %d\n", i);
26164 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
26165 				(void) sd_send_polled_RQS(un);
26166 			}
26167 
26168 			continue;
26169 		}
26170 
26171 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
26172 			int reset_retval = 0;
26173 
26174 			SD_INFO(SD_LOG_DUMP, un,
26175 			    "sddump: read failed with BUSY, try # %d\n", i);
26176 
26177 			if (un->un_f_lun_reset_enabled == TRUE) {
26178 				reset_retval = scsi_reset(SD_ADDRESS(un),
26179 				    RESET_LUN);
26180 			}
26181 			if (reset_retval == 0) {
26182 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26183 			}
26184 			(void) sd_send_polled_RQS(un);
26185 
26186 		} else {
26187 			SD_INFO(SD_LOG_DUMP, un,
26188 			    "sddump: read failed with 0x%x, try # %d\n",
26189 			    SD_GET_PKT_STATUS(pkt), i);
26190 			mutex_enter(SD_MUTEX(un));
26191 			sd_reset_target(un, pkt);
26192 			mutex_exit(SD_MUTEX(un));
26193 		}
26194 
26195 		/*
26196 		 * If we are not getting anywhere with lun/target resets,
26197 		 * let's reset the bus.
26198 		 */
26199 		if (i > SD_NDUMP_RETRIES/2) {
26200 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26201 			(void) sd_send_polled_RQS(un);
26202 		}
26203 
26204 	}
26205 	scsi_destroy_pkt(pkt);
26206 
26207 	if (err != 0) {
26208 		scsi_free_consistent_buf(bp);
26209 		*bpp = NULL;
26210 	} else {
26211 		*bpp = bp;
26212 	}
26213 
26214 done:
26215 	mutex_enter(SD_MUTEX(un));
26216 	return (err);
26217 }
26218 
26219 
26220 /*
26221  *    Function: sd_failfast_flushq
26222  *
26223  * Description: Take all bp's on the wait queue that have B_FAILFAST set
26224  *		in b_flags and move them onto the failfast queue, then kick
26225  *		off a thread to return all bp's on the failfast queue to
26226  *		their owners with an error set.
26227  *
26228  *   Arguments: un - pointer to the soft state struct for the instance.
26229  *
26230  *     Context: may execute in interrupt context.
26231  */
26232 
26233 static void
26234 sd_failfast_flushq(struct sd_lun *un)
26235 {
26236 	struct buf *bp;
26237 	struct buf *next_waitq_bp;
26238 	struct buf *prev_waitq_bp = NULL;
26239 
26240 	ASSERT(un != NULL);
26241 	ASSERT(mutex_owned(SD_MUTEX(un)));
26242 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
26243 	ASSERT(un->un_failfast_bp == NULL);
26244 
26245 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
26246 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
26247 
26248 	/*
26249 	 * Check if we should flush all bufs when entering failfast state, or
26250 	 * just those with B_FAILFAST set.
26251 	 */
26252 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
26253 		/*
26254 		 * Move *all* bp's on the wait queue to the failfast flush
26255 		 * queue, including those that do NOT have B_FAILFAST set.
26256 		 */
26257 		if (un->un_failfast_headp == NULL) {
26258 			ASSERT(un->un_failfast_tailp == NULL);
26259 			un->un_failfast_headp = un->un_waitq_headp;
26260 		} else {
26261 			ASSERT(un->un_failfast_tailp != NULL);
26262 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
26263 		}
26264 
26265 		un->un_failfast_tailp = un->un_waitq_tailp;
26266 
26267 		/* update kstat for each bp moved out of the waitq */
26268 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
26269 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
26270 		}
26271 
26272 		/* empty the waitq */
26273 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
26274 
26275 	} else {
26276 		/*
26277 		 * Go thru the wait queue, pick off all entries with
26278 		 * B_FAILFAST set, and move these onto the failfast queue.
26279 		 */
26280 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
26281 			/*
26282 			 * Save the pointer to the next bp on the wait queue,
26283 			 * so we get to it on the next iteration of this loop.
26284 			 */
26285 			next_waitq_bp = bp->av_forw;
26286 
26287 			/*
26288 			 * If this bp from the wait queue does NOT have
26289 			 * B_FAILFAST set, just move on to the next element
26290 			 * in the wait queue. Note, this is the only place
26291 			 * where it is correct to set prev_waitq_bp.
26292 			 */
26293 			if ((bp->b_flags & B_FAILFAST) == 0) {
26294 				prev_waitq_bp = bp;
26295 				continue;
26296 			}
26297 
26298 			/*
26299 			 * Remove the bp from the wait queue.
26300 			 */
26301 			if (bp == un->un_waitq_headp) {
26302 				/* The bp is the first element of the waitq. */
26303 				un->un_waitq_headp = next_waitq_bp;
26304 				if (un->un_waitq_headp == NULL) {
26305 					/* The wait queue is now empty */
26306 					un->un_waitq_tailp = NULL;
26307 				}
26308 			} else {
26309 				/*
26310 				 * The bp is either somewhere in the middle
26311 				 * or at the end of the wait queue.
26312 				 */
26313 				ASSERT(un->un_waitq_headp != NULL);
26314 				ASSERT(prev_waitq_bp != NULL);
26315 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
26316 				    == 0);
26317 				if (bp == un->un_waitq_tailp) {
26318 					/* bp is the last entry on the waitq. */
26319 					ASSERT(next_waitq_bp == NULL);
26320 					un->un_waitq_tailp = prev_waitq_bp;
26321 				}
26322 				prev_waitq_bp->av_forw = next_waitq_bp;
26323 			}
26324 			bp->av_forw = NULL;
26325 
26326 			/*
26327 			 * update kstat since the bp is moved out of
26328 			 * the waitq
26329 			 */
26330 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
26331 
26332 			/*
26333 			 * Now put the bp onto the failfast queue.
26334 			 */
26335 			if (un->un_failfast_headp == NULL) {
26336 				/* failfast queue is currently empty */
26337 				ASSERT(un->un_failfast_tailp == NULL);
26338 				un->un_failfast_headp =
26339 				    un->un_failfast_tailp = bp;
26340 			} else {
26341 				/* Add the bp to the end of the failfast q */
26342 				ASSERT(un->un_failfast_tailp != NULL);
26343 				ASSERT(un->un_failfast_tailp->b_flags &
26344 				    B_FAILFAST);
26345 				un->un_failfast_tailp->av_forw = bp;
26346 				un->un_failfast_tailp = bp;
26347 			}
26348 		}
26349 	}
26350 
26351 	/*
26352 	 * Now return all bp's on the failfast queue to their owners.
26353 	 */
26354 	while ((bp = un->un_failfast_headp) != NULL) {
26355 
26356 		un->un_failfast_headp = bp->av_forw;
26357 		if (un->un_failfast_headp == NULL) {
26358 			un->un_failfast_tailp = NULL;
26359 		}
26360 
26361 		/*
26362 		 * We want to return the bp with a failure error code, but
26363 		 * we do not want a call to sd_start_cmds() to occur here,
26364 		 * so use sd_return_failed_command_no_restart() instead of
26365 		 * sd_return_failed_command().
26366 		 */
26367 		sd_return_failed_command_no_restart(un, bp, EIO);
26368 	}
26369 
26370 	/* Flush the xbuf queues if required. */
26371 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
26372 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
26373 	}
26374 
26375 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
26376 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
26377 }
26378 
26379 
26380 /*
26381  *    Function: sd_failfast_flushq_callback
26382  *
26383  * Description: Return TRUE if the given bp meets the criteria for failfast
26384  *		flushing. Used with ddi_xbuf_flushq(9F).
26385  *
26386  *   Arguments: bp - ptr to buf struct to be examined.
26387  *
26388  *     Context: Any
26389  */
26390 
26391 static int
26392 sd_failfast_flushq_callback(struct buf *bp)
26393 {
26394 	/*
26395 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
26396 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
26397 	 */
26398 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
26399 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
26400 }
26401 
26402 
26403 
26404 #if defined(__i386) || defined(__amd64)
26405 /*
26406  * Function: sd_setup_next_xfer
26407  *
26408  * Description: Prepare next I/O operation using DMA_PARTIAL
26409  *
26410  */
26411 
26412 static int
26413 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
26414     struct scsi_pkt *pkt, struct sd_xbuf *xp)
26415 {
26416 	ssize_t	num_blks_not_xfered;
26417 	daddr_t	strt_blk_num;
26418 	ssize_t	bytes_not_xfered;
26419 	int	rval;
26420 
26421 	ASSERT(pkt->pkt_resid == 0);
26422 
26423 	/*
26424 	 * Calculate next block number and amount to be transferred.
26425 	 *
26426 	 * How much data NOT transfered to the HBA yet.
26427 	 */
26428 	bytes_not_xfered = xp->xb_dma_resid;
26429 
26430 	/*
26431 	 * figure how many blocks NOT transfered to the HBA yet.
26432 	 */
26433 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
26434 
26435 	/*
26436 	 * set starting block number to the end of what WAS transfered.
26437 	 */
26438 	strt_blk_num = xp->xb_blkno +
26439 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
26440 
26441 	/*
26442 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
26443 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
26444 	 * the disk mutex here.
26445 	 */
26446 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
26447 	    strt_blk_num, num_blks_not_xfered);
26448 
26449 	if (rval == 0) {
26450 
26451 		/*
26452 		 * Success.
26453 		 *
26454 		 * Adjust things if there are still more blocks to be
26455 		 * transfered.
26456 		 */
26457 		xp->xb_dma_resid = pkt->pkt_resid;
26458 		pkt->pkt_resid = 0;
26459 
26460 		return (1);
26461 	}
26462 
26463 	/*
26464 	 * There's really only one possible return value from
26465 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
26466 	 * returns NULL.
26467 	 */
26468 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
26469 
26470 	bp->b_resid = bp->b_bcount;
26471 	bp->b_flags |= B_ERROR;
26472 
26473 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26474 	    "Error setting up next portion of DMA transfer\n");
26475 
26476 	return (0);
26477 }
26478 #endif
26479 
26480 /*
26481  *    Function: sd_panic_for_res_conflict
26482  *
26483  * Description: Call panic with a string formated with "Reservation Conflict"
26484  *		and a human readable identifier indicating the SD instance
26485  *		that experienced the reservation conflict.
26486  *
26487  *   Arguments: un - pointer to the soft state struct for the instance.
26488  *
26489  *     Context: may execute in interrupt context.
26490  */
26491 
26492 #define	SD_RESV_CONFLICT_FMT_LEN 40
26493 void
26494 sd_panic_for_res_conflict(struct sd_lun *un)
26495 {
26496 	char panic_str[SD_RESV_CONFLICT_FMT_LEN+MAXPATHLEN];
26497 	char path_str[MAXPATHLEN];
26498 
26499 	(void) snprintf(panic_str, sizeof (panic_str),
26500 	    "Reservation Conflict\nDisk: %s",
26501 	    ddi_pathname(SD_DEVINFO(un), path_str));
26502 
26503 	panic(panic_str);
26504 }
26505 
26506 /*
26507  * Note: The following sd_faultinjection_ioctl( ) routines implement
26508  * driver support for handling fault injection for error analysis
26509  * causing faults in multiple layers of the driver.
26510  *
26511  */
26512 
26513 #ifdef SD_FAULT_INJECTION
26514 static uint_t   sd_fault_injection_on = 0;
26515 
26516 /*
26517  *    Function: sd_faultinjection_ioctl()
26518  *
26519  * Description: This routine is the driver entry point for handling
26520  *              faultinjection ioctls to inject errors into the
26521  *              layer model
26522  *
26523  *   Arguments: cmd	- the ioctl cmd recieved
26524  *		arg	- the arguments from user and returns
26525  */
26526 
26527 static void
26528 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
26529 
26530 	uint_t i;
26531 	uint_t rval;
26532 
26533 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
26534 
26535 	mutex_enter(SD_MUTEX(un));
26536 
26537 	switch (cmd) {
26538 	case SDIOCRUN:
26539 		/* Allow pushed faults to be injected */
26540 		SD_INFO(SD_LOG_SDTEST, un,
26541 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
26542 
26543 		sd_fault_injection_on = 1;
26544 
26545 		SD_INFO(SD_LOG_IOERR, un,
26546 		    "sd_faultinjection_ioctl: run finished\n");
26547 		break;
26548 
26549 	case SDIOCSTART:
26550 		/* Start Injection Session */
26551 		SD_INFO(SD_LOG_SDTEST, un,
26552 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
26553 
26554 		sd_fault_injection_on = 0;
26555 		un->sd_injection_mask = 0xFFFFFFFF;
26556 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
26557 			un->sd_fi_fifo_pkt[i] = NULL;
26558 			un->sd_fi_fifo_xb[i] = NULL;
26559 			un->sd_fi_fifo_un[i] = NULL;
26560 			un->sd_fi_fifo_arq[i] = NULL;
26561 		}
26562 		un->sd_fi_fifo_start = 0;
26563 		un->sd_fi_fifo_end = 0;
26564 
26565 		mutex_enter(&(un->un_fi_mutex));
26566 		un->sd_fi_log[0] = '\0';
26567 		un->sd_fi_buf_len = 0;
26568 		mutex_exit(&(un->un_fi_mutex));
26569 
26570 		SD_INFO(SD_LOG_IOERR, un,
26571 		    "sd_faultinjection_ioctl: start finished\n");
26572 		break;
26573 
26574 	case SDIOCSTOP:
26575 		/* Stop Injection Session */
26576 		SD_INFO(SD_LOG_SDTEST, un,
26577 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
26578 		sd_fault_injection_on = 0;
26579 		un->sd_injection_mask = 0x0;
26580 
26581 		/* Empty stray or unuseds structs from fifo */
26582 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
26583 			if (un->sd_fi_fifo_pkt[i] != NULL) {
26584 				kmem_free(un->sd_fi_fifo_pkt[i],
26585 				    sizeof (struct sd_fi_pkt));
26586 			}
26587 			if (un->sd_fi_fifo_xb[i] != NULL) {
26588 				kmem_free(un->sd_fi_fifo_xb[i],
26589 				    sizeof (struct sd_fi_xb));
26590 			}
26591 			if (un->sd_fi_fifo_un[i] != NULL) {
26592 				kmem_free(un->sd_fi_fifo_un[i],
26593 				    sizeof (struct sd_fi_un));
26594 			}
26595 			if (un->sd_fi_fifo_arq[i] != NULL) {
26596 				kmem_free(un->sd_fi_fifo_arq[i],
26597 				    sizeof (struct sd_fi_arq));
26598 			}
26599 			un->sd_fi_fifo_pkt[i] = NULL;
26600 			un->sd_fi_fifo_un[i] = NULL;
26601 			un->sd_fi_fifo_xb[i] = NULL;
26602 			un->sd_fi_fifo_arq[i] = NULL;
26603 		}
26604 		un->sd_fi_fifo_start = 0;
26605 		un->sd_fi_fifo_end = 0;
26606 
26607 		SD_INFO(SD_LOG_IOERR, un,
26608 		    "sd_faultinjection_ioctl: stop finished\n");
26609 		break;
26610 
26611 	case SDIOCINSERTPKT:
26612 		/* Store a packet struct to be pushed onto fifo */
26613 		SD_INFO(SD_LOG_SDTEST, un,
26614 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
26615 
26616 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26617 
26618 		sd_fault_injection_on = 0;
26619 
26620 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
26621 		if (un->sd_fi_fifo_pkt[i] != NULL) {
26622 			kmem_free(un->sd_fi_fifo_pkt[i],
26623 			    sizeof (struct sd_fi_pkt));
26624 		}
26625 		if (arg != NULL) {
26626 			un->sd_fi_fifo_pkt[i] =
26627 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
26628 			if (un->sd_fi_fifo_pkt[i] == NULL) {
26629 				/* Alloc failed don't store anything */
26630 				break;
26631 			}
26632 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
26633 			    sizeof (struct sd_fi_pkt), 0);
26634 			if (rval == -1) {
26635 				kmem_free(un->sd_fi_fifo_pkt[i],
26636 				    sizeof (struct sd_fi_pkt));
26637 				un->sd_fi_fifo_pkt[i] = NULL;
26638 			}
26639 		} else {
26640 			SD_INFO(SD_LOG_IOERR, un,
26641 			    "sd_faultinjection_ioctl: pkt null\n");
26642 		}
26643 		break;
26644 
26645 	case SDIOCINSERTXB:
26646 		/* Store a xb struct to be pushed onto fifo */
26647 		SD_INFO(SD_LOG_SDTEST, un,
26648 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
26649 
26650 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26651 
26652 		sd_fault_injection_on = 0;
26653 
26654 		if (un->sd_fi_fifo_xb[i] != NULL) {
26655 			kmem_free(un->sd_fi_fifo_xb[i],
26656 			    sizeof (struct sd_fi_xb));
26657 			un->sd_fi_fifo_xb[i] = NULL;
26658 		}
26659 		if (arg != NULL) {
26660 			un->sd_fi_fifo_xb[i] =
26661 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
26662 			if (un->sd_fi_fifo_xb[i] == NULL) {
26663 				/* Alloc failed don't store anything */
26664 				break;
26665 			}
26666 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
26667 			    sizeof (struct sd_fi_xb), 0);
26668 
26669 			if (rval == -1) {
26670 				kmem_free(un->sd_fi_fifo_xb[i],
26671 				    sizeof (struct sd_fi_xb));
26672 				un->sd_fi_fifo_xb[i] = NULL;
26673 			}
26674 		} else {
26675 			SD_INFO(SD_LOG_IOERR, un,
26676 			    "sd_faultinjection_ioctl: xb null\n");
26677 		}
26678 		break;
26679 
26680 	case SDIOCINSERTUN:
26681 		/* Store a un struct to be pushed onto fifo */
26682 		SD_INFO(SD_LOG_SDTEST, un,
26683 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
26684 
26685 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26686 
26687 		sd_fault_injection_on = 0;
26688 
26689 		if (un->sd_fi_fifo_un[i] != NULL) {
26690 			kmem_free(un->sd_fi_fifo_un[i],
26691 			    sizeof (struct sd_fi_un));
26692 			un->sd_fi_fifo_un[i] = NULL;
26693 		}
26694 		if (arg != NULL) {
26695 			un->sd_fi_fifo_un[i] =
26696 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
26697 			if (un->sd_fi_fifo_un[i] == NULL) {
26698 				/* Alloc failed don't store anything */
26699 				break;
26700 			}
26701 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
26702 			    sizeof (struct sd_fi_un), 0);
26703 			if (rval == -1) {
26704 				kmem_free(un->sd_fi_fifo_un[i],
26705 				    sizeof (struct sd_fi_un));
26706 				un->sd_fi_fifo_un[i] = NULL;
26707 			}
26708 
26709 		} else {
26710 			SD_INFO(SD_LOG_IOERR, un,
26711 			    "sd_faultinjection_ioctl: un null\n");
26712 		}
26713 
26714 		break;
26715 
26716 	case SDIOCINSERTARQ:
26717 		/* Store a arq struct to be pushed onto fifo */
26718 		SD_INFO(SD_LOG_SDTEST, un,
26719 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
26720 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26721 
26722 		sd_fault_injection_on = 0;
26723 
26724 		if (un->sd_fi_fifo_arq[i] != NULL) {
26725 			kmem_free(un->sd_fi_fifo_arq[i],
26726 			    sizeof (struct sd_fi_arq));
26727 			un->sd_fi_fifo_arq[i] = NULL;
26728 		}
26729 		if (arg != NULL) {
26730 			un->sd_fi_fifo_arq[i] =
26731 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
26732 			if (un->sd_fi_fifo_arq[i] == NULL) {
26733 				/* Alloc failed don't store anything */
26734 				break;
26735 			}
26736 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
26737 			    sizeof (struct sd_fi_arq), 0);
26738 			if (rval == -1) {
26739 				kmem_free(un->sd_fi_fifo_arq[i],
26740 				    sizeof (struct sd_fi_arq));
26741 				un->sd_fi_fifo_arq[i] = NULL;
26742 			}
26743 
26744 		} else {
26745 			SD_INFO(SD_LOG_IOERR, un,
26746 			    "sd_faultinjection_ioctl: arq null\n");
26747 		}
26748 
26749 		break;
26750 
26751 	case SDIOCPUSH:
26752 		/* Push stored xb, pkt, un, and arq onto fifo */
26753 		sd_fault_injection_on = 0;
26754 
26755 		if (arg != NULL) {
26756 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
26757 			if (rval != -1 &&
26758 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
26759 				un->sd_fi_fifo_end += i;
26760 			}
26761 		} else {
26762 			SD_INFO(SD_LOG_IOERR, un,
26763 			    "sd_faultinjection_ioctl: push arg null\n");
26764 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
26765 				un->sd_fi_fifo_end++;
26766 			}
26767 		}
26768 		SD_INFO(SD_LOG_IOERR, un,
26769 		    "sd_faultinjection_ioctl: push to end=%d\n",
26770 		    un->sd_fi_fifo_end);
26771 		break;
26772 
26773 	case SDIOCRETRIEVE:
26774 		/* Return buffer of log from Injection session */
26775 		SD_INFO(SD_LOG_SDTEST, un,
26776 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
26777 
26778 		sd_fault_injection_on = 0;
26779 
26780 		mutex_enter(&(un->un_fi_mutex));
26781 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
26782 		    un->sd_fi_buf_len+1, 0);
26783 		mutex_exit(&(un->un_fi_mutex));
26784 
26785 		if (rval == -1) {
26786 			/*
26787 			 * arg is possibly invalid setting
26788 			 * it to NULL for return
26789 			 */
26790 			arg = NULL;
26791 		}
26792 		break;
26793 	}
26794 
26795 	mutex_exit(SD_MUTEX(un));
26796 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
26797 			    " exit\n");
26798 }
26799 
26800 
26801 /*
26802  *    Function: sd_injection_log()
26803  *
26804  * Description: This routine adds buff to the already existing injection log
26805  *              for retrieval via faultinjection_ioctl for use in fault
26806  *              detection and recovery
26807  *
26808  *   Arguments: buf - the string to add to the log
26809  */
26810 
26811 static void
26812 sd_injection_log(char *buf, struct sd_lun *un)
26813 {
26814 	uint_t len;
26815 
26816 	ASSERT(un != NULL);
26817 	ASSERT(buf != NULL);
26818 
26819 	mutex_enter(&(un->un_fi_mutex));
26820 
26821 	len = min(strlen(buf), 255);
26822 	/* Add logged value to Injection log to be returned later */
26823 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
26824 		uint_t	offset = strlen((char *)un->sd_fi_log);
26825 		char *destp = (char *)un->sd_fi_log + offset;
26826 		int i;
26827 		for (i = 0; i < len; i++) {
26828 			*destp++ = *buf++;
26829 		}
26830 		un->sd_fi_buf_len += len;
26831 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
26832 	}
26833 
26834 	mutex_exit(&(un->un_fi_mutex));
26835 }
26836 
26837 
26838 /*
26839  *    Function: sd_faultinjection()
26840  *
26841  * Description: This routine takes the pkt and changes its
26842  *		content based on error injection scenerio.
26843  *
26844  *   Arguments: pktp	- packet to be changed
26845  */
26846 
26847 static void
26848 sd_faultinjection(struct scsi_pkt *pktp)
26849 {
26850 	uint_t i;
26851 	struct sd_fi_pkt *fi_pkt;
26852 	struct sd_fi_xb *fi_xb;
26853 	struct sd_fi_un *fi_un;
26854 	struct sd_fi_arq *fi_arq;
26855 	struct buf *bp;
26856 	struct sd_xbuf *xb;
26857 	struct sd_lun *un;
26858 
26859 	ASSERT(pktp != NULL);
26860 
26861 	/* pull bp xb and un from pktp */
26862 	bp = (struct buf *)pktp->pkt_private;
26863 	xb = SD_GET_XBUF(bp);
26864 	un = SD_GET_UN(bp);
26865 
26866 	ASSERT(un != NULL);
26867 
26868 	mutex_enter(SD_MUTEX(un));
26869 
26870 	SD_TRACE(SD_LOG_SDTEST, un,
26871 	    "sd_faultinjection: entry Injection from sdintr\n");
26872 
26873 	/* if injection is off return */
26874 	if (sd_fault_injection_on == 0 ||
26875 		un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
26876 		mutex_exit(SD_MUTEX(un));
26877 		return;
26878 	}
26879 
26880 
26881 	/* take next set off fifo */
26882 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
26883 
26884 	fi_pkt = un->sd_fi_fifo_pkt[i];
26885 	fi_xb = un->sd_fi_fifo_xb[i];
26886 	fi_un = un->sd_fi_fifo_un[i];
26887 	fi_arq = un->sd_fi_fifo_arq[i];
26888 
26889 
26890 	/* set variables accordingly */
26891 	/* set pkt if it was on fifo */
26892 	if (fi_pkt != NULL) {
26893 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
26894 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
26895 		SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
26896 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
26897 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
26898 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
26899 
26900 	}
26901 
26902 	/* set xb if it was on fifo */
26903 	if (fi_xb != NULL) {
26904 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
26905 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
26906 		SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
26907 		SD_CONDSET(xb, xb, xb_victim_retry_count,
26908 		    "xb_victim_retry_count");
26909 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
26910 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
26911 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
26912 
26913 		/* copy in block data from sense */
26914 		if (fi_xb->xb_sense_data[0] != -1) {
26915 			bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
26916 			    SENSE_LENGTH);
26917 		}
26918 
26919 		/* copy in extended sense codes */
26920 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_code,
26921 		    "es_code");
26922 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_key,
26923 		    "es_key");
26924 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_add_code,
26925 		    "es_add_code");
26926 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb,
26927 		    es_qual_code, "es_qual_code");
26928 	}
26929 
26930 	/* set un if it was on fifo */
26931 	if (fi_un != NULL) {
26932 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
26933 		SD_CONDSET(un, un, un_ctype, "un_ctype");
26934 		SD_CONDSET(un, un, un_reset_retry_count,
26935 		    "un_reset_retry_count");
26936 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
26937 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
26938 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
26939 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
26940 		    "un_f_allow_bus_device_reset");
26941 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
26942 
26943 	}
26944 
26945 	/* copy in auto request sense if it was on fifo */
26946 	if (fi_arq != NULL) {
26947 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
26948 	}
26949 
26950 	/* free structs */
26951 	if (un->sd_fi_fifo_pkt[i] != NULL) {
26952 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
26953 	}
26954 	if (un->sd_fi_fifo_xb[i] != NULL) {
26955 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
26956 	}
26957 	if (un->sd_fi_fifo_un[i] != NULL) {
26958 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
26959 	}
26960 	if (un->sd_fi_fifo_arq[i] != NULL) {
26961 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
26962 	}
26963 
26964 	/*
26965 	 * kmem_free does not gurantee to set to NULL
26966 	 * since we uses these to determine if we set
26967 	 * values or not lets confirm they are always
26968 	 * NULL after free
26969 	 */
26970 	un->sd_fi_fifo_pkt[i] = NULL;
26971 	un->sd_fi_fifo_un[i] = NULL;
26972 	un->sd_fi_fifo_xb[i] = NULL;
26973 	un->sd_fi_fifo_arq[i] = NULL;
26974 
26975 	un->sd_fi_fifo_start++;
26976 
26977 	mutex_exit(SD_MUTEX(un));
26978 
26979 	SD_TRACE(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
26980 }
26981 
26982 #endif /* SD_FAULT_INJECTION */
26983 
26984 /*
26985  * This routine is invoked in sd_unit_attach(). Before calling it, the
26986  * properties in conf file should be processed already, and "hotpluggable"
26987  * property was processed also.
26988  *
26989  * The sd driver distinguishes 3 different type of devices: removable media,
26990  * non-removable media, and hotpluggable. Below the differences are defined:
26991  *
26992  * 1. Device ID
26993  *
26994  *     The device ID of a device is used to identify this device. Refer to
26995  *     ddi_devid_register(9F).
26996  *
26997  *     For a non-removable media disk device which can provide 0x80 or 0x83
26998  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
26999  *     device ID is created to identify this device. For other non-removable
27000  *     media devices, a default device ID is created only if this device has
27001  *     at least 2 alter cylinders. Otherwise, this device has no devid.
27002  *
27003  *     -------------------------------------------------------
27004  *     removable media   hotpluggable  | Can Have Device ID
27005  *     -------------------------------------------------------
27006  *         false             false     |     Yes
27007  *         false             true      |     Yes
27008  *         true                x       |     No
27009  *     ------------------------------------------------------
27010  *
27011  *
27012  * 2. SCSI group 4 commands
27013  *
27014  *     In SCSI specs, only some commands in group 4 command set can use
27015  *     8-byte addresses that can be used to access >2TB storage spaces.
27016  *     Other commands have no such capability. Without supporting group4,
27017  *     it is impossible to make full use of storage spaces of a disk with
27018  *     capacity larger than 2TB.
27019  *
27020  *     -----------------------------------------------
27021  *     removable media   hotpluggable   LP64  |  Group
27022  *     -----------------------------------------------
27023  *           false          false       false |   1
27024  *           false          false       true  |   4
27025  *           false          true        false |   1
27026  *           false          true        true  |   4
27027  *           true             x           x   |   5
27028  *     -----------------------------------------------
27029  *
27030  *
27031  * 3. Check for VTOC Label
27032  *
27033  *     If a direct-access disk has no EFI label, sd will check if it has a
27034  *     valid VTOC label. Now, sd also does that check for removable media
27035  *     and hotpluggable devices.
27036  *
27037  *     --------------------------------------------------------------
27038  *     Direct-Access   removable media    hotpluggable |  Check Label
27039  *     -------------------------------------------------------------
27040  *         false          false           false        |   No
27041  *         false          false           true         |   No
27042  *         false          true            false        |   Yes
27043  *         false          true            true         |   Yes
27044  *         true            x                x          |   Yes
27045  *     --------------------------------------------------------------
27046  *
27047  *
27048  * 4. Building default VTOC label
27049  *
27050  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
27051  *     If those devices have no valid VTOC label, sd(7d) will attempt to
27052  *     create default VTOC for them. Currently sd creates default VTOC label
27053  *     for all devices on x86 platform (VTOC_16), but only for removable
27054  *     media devices on SPARC (VTOC_8).
27055  *
27056  *     -----------------------------------------------------------
27057  *       removable media hotpluggable platform   |   Default Label
27058  *     -----------------------------------------------------------
27059  *             false          false    sparc     |     No
27060  *             false          true      x86      |     Yes
27061  *             false          true     sparc     |     Yes
27062  *             true             x        x       |     Yes
27063  *     ----------------------------------------------------------
27064  *
27065  *
27066  * 5. Supported blocksizes of target devices
27067  *
27068  *     Sd supports non-512-byte blocksize for removable media devices only.
27069  *     For other devices, only 512-byte blocksize is supported. This may be
27070  *     changed in near future because some RAID devices require non-512-byte
27071  *     blocksize
27072  *
27073  *     -----------------------------------------------------------
27074  *     removable media    hotpluggable    | non-512-byte blocksize
27075  *     -----------------------------------------------------------
27076  *           false          false         |   No
27077  *           false          true          |   No
27078  *           true             x           |   Yes
27079  *     -----------------------------------------------------------
27080  *
27081  *
27082  * 6. Automatic mount & unmount
27083  *
27084  *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
27085  *     if a device is removable media device. It return 1 for removable media
27086  *     devices, and 0 for others.
27087  *
27088  *     The automatic mounting subsystem should distinguish between the types
27089  *     of devices and apply automounting policies to each.
27090  *
27091  *
27092  * 7. fdisk partition management
27093  *
27094  *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
27095  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
27096  *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
27097  *     fdisk partitions on both x86 and SPARC platform.
27098  *
27099  *     -----------------------------------------------------------
27100  *       platform   removable media  USB/1394  |  fdisk supported
27101  *     -----------------------------------------------------------
27102  *        x86         X               X        |       true
27103  *     ------------------------------------------------------------
27104  *        sparc       X               X        |       false
27105  *     ------------------------------------------------------------
27106  *
27107  *
27108  * 8. MBOOT/MBR
27109  *
27110  *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
27111  *     read/write mboot for removable media devices on sparc platform.
27112  *
27113  *     -----------------------------------------------------------
27114  *       platform   removable media  USB/1394  |  mboot supported
27115  *     -----------------------------------------------------------
27116  *        x86         X               X        |       true
27117  *     ------------------------------------------------------------
27118  *        sparc      false           false     |       false
27119  *        sparc      false           true      |       true
27120  *        sparc      true            false     |       true
27121  *        sparc      true            true      |       true
27122  *     ------------------------------------------------------------
27123  *
27124  *
27125  * 9.  error handling during opening device
27126  *
27127  *     If failed to open a disk device, an errno is returned. For some kinds
27128  *     of errors, different errno is returned depending on if this device is
27129  *     a removable media device. This brings USB/1394 hard disks in line with
27130  *     expected hard disk behavior. It is not expected that this breaks any
27131  *     application.
27132  *
27133  *     ------------------------------------------------------
27134  *       removable media    hotpluggable   |  errno
27135  *     ------------------------------------------------------
27136  *             false          false        |   EIO
27137  *             false          true         |   EIO
27138  *             true             x          |   ENXIO
27139  *     ------------------------------------------------------
27140  *
27141  *
27142  * 11. ioctls: DKIOCEJECT, CDROMEJECT
27143  *
27144  *     These IOCTLs are applicable only to removable media devices.
27145  *
27146  *     -----------------------------------------------------------
27147  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
27148  *     -----------------------------------------------------------
27149  *             false          false        |     No
27150  *             false          true         |     No
27151  *             true            x           |     Yes
27152  *     -----------------------------------------------------------
27153  *
27154  *
27155  * 12. Kstats for partitions
27156  *
27157  *     sd creates partition kstat for non-removable media devices. USB and
27158  *     Firewire hard disks now have partition kstats
27159  *
27160  *      ------------------------------------------------------
27161  *       removable media    hotplugable    |   kstat
27162  *      ------------------------------------------------------
27163  *             false          false        |    Yes
27164  *             false          true         |    Yes
27165  *             true             x          |    No
27166  *       ------------------------------------------------------
27167  *
27168  *
27169  * 13. Removable media & hotpluggable properties
27170  *
27171  *     Sd driver creates a "removable-media" property for removable media
27172  *     devices. Parent nexus drivers create a "hotpluggable" property if
27173  *     it supports hotplugging.
27174  *
27175  *     ---------------------------------------------------------------------
27176  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
27177  *     ---------------------------------------------------------------------
27178  *       false            false       |    No                   No
27179  *       false            true        |    No                   Yes
27180  *       true             false       |    Yes                  No
27181  *       true             true        |    Yes                  Yes
27182  *     ---------------------------------------------------------------------
27183  *
27184  *
27185  * 14. Power Management
27186  *
27187  *     sd only power manages removable media devices or devices that support
27188  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
27189  *
27190  *     A parent nexus that supports hotplugging can also set "pm-capable"
27191  *     if the disk can be power managed.
27192  *
27193  *     ------------------------------------------------------------
27194  *       removable media hotpluggable pm-capable  |   power manage
27195  *     ------------------------------------------------------------
27196  *             false          false     false     |     No
27197  *             false          false     true      |     Yes
27198  *             false          true      false     |     No
27199  *             false          true      true      |     Yes
27200  *             true             x        x        |     Yes
27201  *     ------------------------------------------------------------
27202  *
27203  *      USB and firewire hard disks can now be power managed independently
27204  *      of the framebuffer
27205  *
27206  *
27207  * 15. Support for USB disks with capacity larger than 1TB
27208  *
27209  *     Currently, sd doesn't permit a fixed disk device with capacity
27210  *     larger than 1TB to be used in a 32-bit operating system environment.
27211  *     However, sd doesn't do that for removable media devices. Instead, it
27212  *     assumes that removable media devices cannot have a capacity larger
27213  *     than 1TB. Therefore, using those devices on 32-bit system is partially
27214  *     supported, which can cause some unexpected results.
27215  *
27216  *     ---------------------------------------------------------------------
27217  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
27218  *     ---------------------------------------------------------------------
27219  *             false          false  |   true         |     no
27220  *             false          true   |   true         |     no
27221  *             true           false  |   true         |     Yes
27222  *             true           true   |   true         |     Yes
27223  *     ---------------------------------------------------------------------
27224  *
27225  *
27226  * 16. Check write-protection at open time
27227  *
27228  *     When a removable media device is being opened for writing without NDELAY
27229  *     flag, sd will check if this device is writable. If attempting to open
27230  *     without NDELAY flag a write-protected device, this operation will abort.
27231  *
27232  *     ------------------------------------------------------------
27233  *       removable media    USB/1394   |   WP Check
27234  *     ------------------------------------------------------------
27235  *             false          false    |     No
27236  *             false          true     |     No
27237  *             true           false    |     Yes
27238  *             true           true     |     Yes
27239  *     ------------------------------------------------------------
27240  *
27241  *
27242  * 17. syslog when corrupted VTOC is encountered
27243  *
27244  *      Currently, if an invalid VTOC is encountered, sd only print syslog
27245  *      for fixed SCSI disks.
27246  *     ------------------------------------------------------------
27247  *       removable media    USB/1394   |   print syslog
27248  *     ------------------------------------------------------------
27249  *             false          false    |     Yes
27250  *             false          true     |     No
27251  *             true           false    |     No
27252  *             true           true     |     No
27253  *     ------------------------------------------------------------
27254  */
27255 static void
27256 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
27257 {
27258 	int	pm_capable_prop;
27259 
27260 	ASSERT(un->un_sd);
27261 	ASSERT(un->un_sd->sd_inq);
27262 
27263 	/*
27264 	 * Enable SYNC CACHE support for all devices.
27265 	 */
27266 	un->un_f_sync_cache_supported = TRUE;
27267 
27268 	if (un->un_sd->sd_inq->inq_rmb) {
27269 		/*
27270 		 * The media of this device is removable. And for this kind
27271 		 * of devices, it is possible to change medium after opening
27272 		 * devices. Thus we should support this operation.
27273 		 */
27274 		un->un_f_has_removable_media = TRUE;
27275 
27276 		/*
27277 		 * support non-512-byte blocksize of removable media devices
27278 		 */
27279 		un->un_f_non_devbsize_supported = TRUE;
27280 
27281 		/*
27282 		 * Assume that all removable media devices support DOOR_LOCK
27283 		 */
27284 		un->un_f_doorlock_supported = TRUE;
27285 
27286 		/*
27287 		 * For a removable media device, it is possible to be opened
27288 		 * with NDELAY flag when there is no media in drive, in this
27289 		 * case we don't care if device is writable. But if without
27290 		 * NDELAY flag, we need to check if media is write-protected.
27291 		 */
27292 		un->un_f_chk_wp_open = TRUE;
27293 
27294 		/*
27295 		 * need to start a SCSI watch thread to monitor media state,
27296 		 * when media is being inserted or ejected, notify syseventd.
27297 		 */
27298 		un->un_f_monitor_media_state = TRUE;
27299 
27300 		/*
27301 		 * Some devices don't support START_STOP_UNIT command.
27302 		 * Therefore, we'd better check if a device supports it
27303 		 * before sending it.
27304 		 */
27305 		un->un_f_check_start_stop = TRUE;
27306 
27307 		/*
27308 		 * support eject media ioctl:
27309 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
27310 		 */
27311 		un->un_f_eject_media_supported = TRUE;
27312 
27313 		/*
27314 		 * Because many removable-media devices don't support
27315 		 * LOG_SENSE, we couldn't use this command to check if
27316 		 * a removable media device support power-management.
27317 		 * We assume that they support power-management via
27318 		 * START_STOP_UNIT command and can be spun up and down
27319 		 * without limitations.
27320 		 */
27321 		un->un_f_pm_supported = TRUE;
27322 
27323 		/*
27324 		 * Need to create a zero length (Boolean) property
27325 		 * removable-media for the removable media devices.
27326 		 * Note that the return value of the property is not being
27327 		 * checked, since if unable to create the property
27328 		 * then do not want the attach to fail altogether. Consistent
27329 		 * with other property creation in attach.
27330 		 */
27331 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
27332 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
27333 
27334 	} else {
27335 		/*
27336 		 * create device ID for device
27337 		 */
27338 		un->un_f_devid_supported = TRUE;
27339 
27340 		/*
27341 		 * Spin up non-removable-media devices once it is attached
27342 		 */
27343 		un->un_f_attach_spinup = TRUE;
27344 
27345 		/*
27346 		 * According to SCSI specification, Sense data has two kinds of
27347 		 * format: fixed format, and descriptor format. At present, we
27348 		 * don't support descriptor format sense data for removable
27349 		 * media.
27350 		 */
27351 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
27352 			un->un_f_descr_format_supported = TRUE;
27353 		}
27354 
27355 		/*
27356 		 * kstats are created only for non-removable media devices.
27357 		 *
27358 		 * Set this in sd.conf to 0 in order to disable kstats.  The
27359 		 * default is 1, so they are enabled by default.
27360 		 */
27361 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
27362 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
27363 			"enable-partition-kstats", 1));
27364 
27365 		/*
27366 		 * Check if HBA has set the "pm-capable" property.
27367 		 * If "pm-capable" exists and is non-zero then we can
27368 		 * power manage the device without checking the start/stop
27369 		 * cycle count log sense page.
27370 		 *
27371 		 * If "pm-capable" exists and is SD_PM_CAPABLE_FALSE (0)
27372 		 * then we should not power manage the device.
27373 		 *
27374 		 * If "pm-capable" doesn't exist then pm_capable_prop will
27375 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
27376 		 * sd will check the start/stop cycle count log sense page
27377 		 * and power manage the device if the cycle count limit has
27378 		 * not been exceeded.
27379 		 */
27380 		pm_capable_prop = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
27381 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
27382 		if (pm_capable_prop == SD_PM_CAPABLE_UNDEFINED) {
27383 			un->un_f_log_sense_supported = TRUE;
27384 		} else {
27385 			/*
27386 			 * pm-capable property exists.
27387 			 *
27388 			 * Convert "TRUE" values for pm_capable_prop to
27389 			 * SD_PM_CAPABLE_TRUE (1) to make it easier to check
27390 			 * later. "TRUE" values are any values except
27391 			 * SD_PM_CAPABLE_FALSE (0) and
27392 			 * SD_PM_CAPABLE_UNDEFINED (-1)
27393 			 */
27394 			if (pm_capable_prop == SD_PM_CAPABLE_FALSE) {
27395 				un->un_f_log_sense_supported = FALSE;
27396 			} else {
27397 				un->un_f_pm_supported = TRUE;
27398 			}
27399 
27400 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
27401 			    "sd_unit_attach: un:0x%p pm-capable "
27402 			    "property set to %d.\n", un, un->un_f_pm_supported);
27403 		}
27404 	}
27405 
27406 	if (un->un_f_is_hotpluggable) {
27407 
27408 		/*
27409 		 * Have to watch hotpluggable devices as well, since
27410 		 * that's the only way for userland applications to
27411 		 * detect hot removal while device is busy/mounted.
27412 		 */
27413 		un->un_f_monitor_media_state = TRUE;
27414 
27415 		un->un_f_check_start_stop = TRUE;
27416 
27417 	}
27418 }
27419 
27420 /*
27421  * sd_tg_rdwr:
27422  * Provides rdwr access for cmlb via sd_tgops. The start_block is
27423  * in sys block size, req_length in bytes.
27424  *
27425  */
27426 static int
27427 sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
27428     diskaddr_t start_block, size_t reqlength, void *tg_cookie)
27429 {
27430 	struct sd_lun *un;
27431 	int path_flag = (int)(uintptr_t)tg_cookie;
27432 	char *dkl = NULL;
27433 	diskaddr_t real_addr = start_block;
27434 	diskaddr_t first_byte, end_block;
27435 
27436 	size_t	buffer_size = reqlength;
27437 	int rval;
27438 	diskaddr_t	cap;
27439 	uint32_t	lbasize;
27440 
27441 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
27442 	if (un == NULL)
27443 		return (ENXIO);
27444 
27445 	if (cmd != TG_READ && cmd != TG_WRITE)
27446 		return (EINVAL);
27447 
27448 	mutex_enter(SD_MUTEX(un));
27449 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
27450 		mutex_exit(SD_MUTEX(un));
27451 		rval = sd_send_scsi_READ_CAPACITY(un, (uint64_t *)&cap,
27452 		    &lbasize, path_flag);
27453 		if (rval != 0)
27454 			return (rval);
27455 		mutex_enter(SD_MUTEX(un));
27456 		sd_update_block_info(un, lbasize, cap);
27457 		if ((un->un_f_tgt_blocksize_is_valid == FALSE)) {
27458 			mutex_exit(SD_MUTEX(un));
27459 			return (EIO);
27460 		}
27461 	}
27462 
27463 	if (NOT_DEVBSIZE(un)) {
27464 		/*
27465 		 * sys_blocksize != tgt_blocksize, need to re-adjust
27466 		 * blkno and save the index to beginning of dk_label
27467 		 */
27468 		first_byte  = SD_SYSBLOCKS2BYTES(un, start_block);
27469 		real_addr = first_byte / un->un_tgt_blocksize;
27470 
27471 		end_block = (first_byte + reqlength +
27472 		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
27473 
27474 		/* round up buffer size to multiple of target block size */
27475 		buffer_size = (end_block - real_addr) * un->un_tgt_blocksize;
27476 
27477 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_tg_rdwr",
27478 		    "label_addr: 0x%x allocation size: 0x%x\n",
27479 		    real_addr, buffer_size);
27480 
27481 		if (((first_byte % un->un_tgt_blocksize) != 0) ||
27482 		    (reqlength % un->un_tgt_blocksize) != 0)
27483 			/* the request is not aligned */
27484 			dkl = kmem_zalloc(buffer_size, KM_SLEEP);
27485 	}
27486 
27487 	/*
27488 	 * The MMC standard allows READ CAPACITY to be
27489 	 * inaccurate by a bounded amount (in the interest of
27490 	 * response latency).  As a result, failed READs are
27491 	 * commonplace (due to the reading of metadata and not
27492 	 * data). Depending on the per-Vendor/drive Sense data,
27493 	 * the failed READ can cause many (unnecessary) retries.
27494 	 */
27495 
27496 	if (ISCD(un) && (cmd == TG_READ) &&
27497 	    (un->un_f_blockcount_is_valid == TRUE) &&
27498 	    ((start_block == (un->un_blockcount - 1))||
27499 	    (start_block == (un->un_blockcount - 2)))) {
27500 			path_flag = SD_PATH_DIRECT_PRIORITY;
27501 	}
27502 
27503 	mutex_exit(SD_MUTEX(un));
27504 	if (cmd == TG_READ) {
27505 		rval = sd_send_scsi_READ(un, (dkl != NULL)? dkl: bufaddr,
27506 		    buffer_size, real_addr, path_flag);
27507 		if (dkl != NULL)
27508 			bcopy(dkl + SD_TGTBYTEOFFSET(un, start_block,
27509 			    real_addr), bufaddr, reqlength);
27510 	} else {
27511 		if (dkl) {
27512 			rval = sd_send_scsi_READ(un, dkl, buffer_size,
27513 			    real_addr, path_flag);
27514 			if (rval) {
27515 				kmem_free(dkl, buffer_size);
27516 				return (rval);
27517 			}
27518 			bcopy(bufaddr, dkl + SD_TGTBYTEOFFSET(un, start_block,
27519 			    real_addr), reqlength);
27520 		}
27521 		rval = sd_send_scsi_WRITE(un, (dkl != NULL)? dkl: bufaddr,
27522 		    buffer_size, real_addr, path_flag);
27523 	}
27524 
27525 	if (dkl != NULL)
27526 		kmem_free(dkl, buffer_size);
27527 
27528 	return (rval);
27529 }
27530 
27531 
27532 static int
27533 sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie)
27534 {
27535 
27536 	struct sd_lun *un;
27537 	diskaddr_t	cap;
27538 	uint32_t	lbasize;
27539 	int		path_flag = (int)(uintptr_t)tg_cookie;
27540 	int		ret = 0;
27541 
27542 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
27543 	if (un == NULL)
27544 		return (ENXIO);
27545 
27546 	switch (cmd) {
27547 	case TG_GETPHYGEOM:
27548 	case TG_GETVIRTGEOM:
27549 	case TG_GETCAPACITY:
27550 	case  TG_GETBLOCKSIZE:
27551 		mutex_enter(SD_MUTEX(un));
27552 
27553 		if ((un->un_f_blockcount_is_valid == TRUE) &&
27554 		    (un->un_f_tgt_blocksize_is_valid == TRUE)) {
27555 			cap = un->un_blockcount;
27556 			lbasize = un->un_tgt_blocksize;
27557 			mutex_exit(SD_MUTEX(un));
27558 		} else {
27559 			mutex_exit(SD_MUTEX(un));
27560 			ret = sd_send_scsi_READ_CAPACITY(un, (uint64_t *)&cap,
27561 			    &lbasize, path_flag);
27562 			if (ret != 0)
27563 				return (ret);
27564 			mutex_enter(SD_MUTEX(un));
27565 			sd_update_block_info(un, lbasize, cap);
27566 			if ((un->un_f_blockcount_is_valid == FALSE) ||
27567 			    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
27568 				mutex_exit(SD_MUTEX(un));
27569 				return (EIO);
27570 			}
27571 			mutex_exit(SD_MUTEX(un));
27572 		}
27573 
27574 		if (cmd == TG_GETCAPACITY) {
27575 			*(diskaddr_t *)arg = cap;
27576 			return (0);
27577 		}
27578 
27579 		if (cmd == TG_GETBLOCKSIZE) {
27580 			*(uint32_t *)arg = lbasize;
27581 			return (0);
27582 		}
27583 
27584 		if (cmd == TG_GETPHYGEOM)
27585 			ret = sd_get_physical_geometry(un, (cmlb_geom_t *)arg,
27586 			    cap, lbasize, path_flag);
27587 		else
27588 			/* TG_GETVIRTGEOM */
27589 			ret = sd_get_virtual_geometry(un,
27590 			    (cmlb_geom_t *)arg, cap, lbasize);
27591 
27592 		return (ret);
27593 
27594 	case TG_GETATTR:
27595 		mutex_enter(SD_MUTEX(un));
27596 		((tg_attribute_t *)arg)->media_is_writable =
27597 		    un->un_f_mmc_writable_media;
27598 		mutex_exit(SD_MUTEX(un));
27599 		return (0);
27600 	default:
27601 		return (ENOTTY);
27602 
27603 	}
27604 
27605 }
27606