xref: /titanic_41/usr/src/uts/common/io/scsi/targets/sd.c (revision 4bd2082ff2d009263265d7de938de336894b6009)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * SCSI disk target driver.
30  */
31 #include <sys/scsi/scsi.h>
32 #include <sys/dkbad.h>
33 #include <sys/dklabel.h>
34 #include <sys/dkio.h>
35 #include <sys/fdio.h>
36 #include <sys/cdio.h>
37 #include <sys/mhd.h>
38 #include <sys/vtoc.h>
39 #include <sys/dktp/fdisk.h>
40 #include <sys/kstat.h>
41 #include <sys/vtrace.h>
42 #include <sys/note.h>
43 #include <sys/thread.h>
44 #include <sys/proc.h>
45 #include <sys/efi_partition.h>
46 #include <sys/var.h>
47 #include <sys/aio_req.h>
48 
49 #ifdef __lock_lint
50 #define	_LP64
51 #define	__amd64
52 #endif
53 
54 #if (defined(__fibre))
55 /* Note: is there a leadville version of the following? */
56 #include <sys/fc4/fcal_linkapp.h>
57 #endif
58 #include <sys/taskq.h>
59 #include <sys/uuid.h>
60 #include <sys/byteorder.h>
61 #include <sys/sdt.h>
62 
63 #include "sd_xbuf.h"
64 
65 #include <sys/scsi/targets/sddef.h>
66 #include <sys/cmlb.h>
67 
68 
69 /*
70  * Loadable module info.
71  */
72 #if (defined(__fibre))
73 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver %I%"
74 char _depends_on[]	= "misc/scsi misc/cmlb drv/fcp";
75 #else
76 #define	SD_MODULE_NAME	"SCSI Disk Driver %I%"
77 char _depends_on[]	= "misc/scsi misc/cmlb";
78 #endif
79 
80 /*
81  * Define the interconnect type, to allow the driver to distinguish
82  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
83  *
84  * This is really for backward compatability. In the future, the driver
85  * should actually check the "interconnect-type" property as reported by
86  * the HBA; however at present this property is not defined by all HBAs,
87  * so we will use this #define (1) to permit the driver to run in
88  * backward-compatability mode; and (2) to print a notification message
89  * if an FC HBA does not support the "interconnect-type" property.  The
90  * behavior of the driver will be to assume parallel SCSI behaviors unless
91  * the "interconnect-type" property is defined by the HBA **AND** has a
92  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
93  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
94  * Channel behaviors (as per the old ssd).  (Note that the
95  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
96  * will result in the driver assuming parallel SCSI behaviors.)
97  *
98  * (see common/sys/scsi/impl/services.h)
99  *
100  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
101  * since some FC HBAs may already support that, and there is some code in
102  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
103  * default would confuse that code, and besides things should work fine
104  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
105  * "interconnect_type" property.
106  *
107  */
108 #if (defined(__fibre))
109 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
110 #else
111 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
112 #endif
113 
114 /*
115  * The name of the driver, established from the module name in _init.
116  */
117 static	char *sd_label			= NULL;
118 
119 /*
120  * Driver name is unfortunately prefixed on some driver.conf properties.
121  */
122 #if (defined(__fibre))
123 #define	sd_max_xfer_size		ssd_max_xfer_size
124 #define	sd_config_list			ssd_config_list
125 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
126 static	char *sd_config_list		= "ssd-config-list";
127 #else
128 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
129 static	char *sd_config_list		= "sd-config-list";
130 #endif
131 
132 /*
133  * Driver global variables
134  */
135 
136 #if (defined(__fibre))
137 /*
138  * These #defines are to avoid namespace collisions that occur because this
139  * code is currently used to compile two seperate driver modules: sd and ssd.
140  * All global variables need to be treated this way (even if declared static)
141  * in order to allow the debugger to resolve the names properly.
142  * It is anticipated that in the near future the ssd module will be obsoleted,
143  * at which time this namespace issue should go away.
144  */
145 #define	sd_state			ssd_state
146 #define	sd_io_time			ssd_io_time
147 #define	sd_failfast_enable		ssd_failfast_enable
148 #define	sd_ua_retry_count		ssd_ua_retry_count
149 #define	sd_report_pfa			ssd_report_pfa
150 #define	sd_max_throttle			ssd_max_throttle
151 #define	sd_min_throttle			ssd_min_throttle
152 #define	sd_rot_delay			ssd_rot_delay
153 
154 #define	sd_retry_on_reservation_conflict	\
155 					ssd_retry_on_reservation_conflict
156 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
157 #define	sd_resv_conflict_name		ssd_resv_conflict_name
158 
159 #define	sd_component_mask		ssd_component_mask
160 #define	sd_level_mask			ssd_level_mask
161 #define	sd_debug_un			ssd_debug_un
162 #define	sd_error_level			ssd_error_level
163 
164 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
165 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
166 
167 #define	sd_tr				ssd_tr
168 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
169 #define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
170 #define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
171 #define	sd_check_media_time		ssd_check_media_time
172 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
173 #define	sd_label_mutex			ssd_label_mutex
174 #define	sd_detach_mutex			ssd_detach_mutex
175 #define	sd_log_buf			ssd_log_buf
176 #define	sd_log_mutex			ssd_log_mutex
177 
178 #define	sd_disk_table			ssd_disk_table
179 #define	sd_disk_table_size		ssd_disk_table_size
180 #define	sd_sense_mutex			ssd_sense_mutex
181 #define	sd_cdbtab			ssd_cdbtab
182 
183 #define	sd_cb_ops			ssd_cb_ops
184 #define	sd_ops				ssd_ops
185 #define	sd_additional_codes		ssd_additional_codes
186 #define	sd_tgops			ssd_tgops
187 
188 #define	sd_minor_data			ssd_minor_data
189 #define	sd_minor_data_efi		ssd_minor_data_efi
190 
191 #define	sd_tq				ssd_tq
192 #define	sd_wmr_tq			ssd_wmr_tq
193 #define	sd_taskq_name			ssd_taskq_name
194 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
195 #define	sd_taskq_minalloc		ssd_taskq_minalloc
196 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
197 
198 #define	sd_dump_format_string		ssd_dump_format_string
199 
200 #define	sd_iostart_chain		ssd_iostart_chain
201 #define	sd_iodone_chain			ssd_iodone_chain
202 
203 #define	sd_pm_idletime			ssd_pm_idletime
204 
205 #define	sd_force_pm_supported		ssd_force_pm_supported
206 
207 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
208 
209 #endif
210 
211 
212 #ifdef	SDDEBUG
213 int	sd_force_pm_supported		= 0;
214 #endif	/* SDDEBUG */
215 
216 void *sd_state				= NULL;
217 int sd_io_time				= SD_IO_TIME;
218 int sd_failfast_enable			= 1;
219 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
220 int sd_report_pfa			= 1;
221 int sd_max_throttle			= SD_MAX_THROTTLE;
222 int sd_min_throttle			= SD_MIN_THROTTLE;
223 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
224 int sd_qfull_throttle_enable		= TRUE;
225 
226 int sd_retry_on_reservation_conflict	= 1;
227 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
228 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
229 
230 static int sd_dtype_optical_bind	= -1;
231 
232 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
233 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
234 
235 /*
236  * Global data for debug logging. To enable debug printing, sd_component_mask
237  * and sd_level_mask should be set to the desired bit patterns as outlined in
238  * sddef.h.
239  */
240 uint_t	sd_component_mask		= 0x0;
241 uint_t	sd_level_mask			= 0x0;
242 struct	sd_lun *sd_debug_un		= NULL;
243 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
244 
245 /* Note: these may go away in the future... */
246 static uint32_t	sd_xbuf_active_limit	= 512;
247 static uint32_t sd_xbuf_reserve_limit	= 16;
248 
249 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
250 
251 /*
252  * Timer value used to reset the throttle after it has been reduced
253  * (typically in response to TRAN_BUSY or STATUS_QFULL)
254  */
255 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
256 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
257 
258 /*
259  * Interval value associated with the media change scsi watch.
260  */
261 static int sd_check_media_time		= 3000000;
262 
263 /*
264  * Wait value used for in progress operations during a DDI_SUSPEND
265  */
266 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
267 
268 /*
269  * sd_label_mutex protects a static buffer used in the disk label
270  * component of the driver
271  */
272 static kmutex_t sd_label_mutex;
273 
274 /*
275  * sd_detach_mutex protects un_layer_count, un_detach_count, and
276  * un_opens_in_progress in the sd_lun structure.
277  */
278 static kmutex_t sd_detach_mutex;
279 
280 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
281 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
282 
283 /*
284  * Global buffer and mutex for debug logging
285  */
286 static char	sd_log_buf[1024];
287 static kmutex_t	sd_log_mutex;
288 
289 /*
290  * Structs and globals for recording attached lun information.
291  * This maintains a chain. Each node in the chain represents a SCSI controller.
292  * The structure records the number of luns attached to each target connected
293  * with the controller.
294  * For parallel scsi device only.
295  */
296 struct sd_scsi_hba_tgt_lun {
297 	struct sd_scsi_hba_tgt_lun	*next;
298 	dev_info_t			*pdip;
299 	int				nlun[NTARGETS_WIDE];
300 };
301 
302 /*
303  * Flag to indicate the lun is attached or detached
304  */
305 #define	SD_SCSI_LUN_ATTACH	0
306 #define	SD_SCSI_LUN_DETACH	1
307 
308 static kmutex_t	sd_scsi_target_lun_mutex;
309 static struct sd_scsi_hba_tgt_lun	*sd_scsi_target_lun_head = NULL;
310 
311 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
312     sd_scsi_hba_tgt_lun::next sd_scsi_hba_tgt_lun::pdip))
313 
314 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
315     sd_scsi_target_lun_head))
316 
317 /*
318  * "Smart" Probe Caching structs, globals, #defines, etc.
319  * For parallel scsi and non-self-identify device only.
320  */
321 
322 /*
323  * The following resources and routines are implemented to support
324  * "smart" probing, which caches the scsi_probe() results in an array,
325  * in order to help avoid long probe times.
326  */
327 struct sd_scsi_probe_cache {
328 	struct	sd_scsi_probe_cache	*next;
329 	dev_info_t	*pdip;
330 	int		cache[NTARGETS_WIDE];
331 };
332 
333 static kmutex_t	sd_scsi_probe_cache_mutex;
334 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
335 
336 /*
337  * Really we only need protection on the head of the linked list, but
338  * better safe than sorry.
339  */
340 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
341     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
342 
343 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
344     sd_scsi_probe_cache_head))
345 
346 
347 /*
348  * Vendor specific data name property declarations
349  */
350 
351 #if defined(__fibre) || defined(__i386) ||defined(__amd64)
352 
353 static sd_tunables seagate_properties = {
354 	SEAGATE_THROTTLE_VALUE,
355 	0,
356 	0,
357 	0,
358 	0,
359 	0,
360 	0,
361 	0,
362 	0
363 };
364 
365 
366 static sd_tunables fujitsu_properties = {
367 	FUJITSU_THROTTLE_VALUE,
368 	0,
369 	0,
370 	0,
371 	0,
372 	0,
373 	0,
374 	0,
375 	0
376 };
377 
378 static sd_tunables ibm_properties = {
379 	IBM_THROTTLE_VALUE,
380 	0,
381 	0,
382 	0,
383 	0,
384 	0,
385 	0,
386 	0,
387 	0
388 };
389 
390 static sd_tunables purple_properties = {
391 	PURPLE_THROTTLE_VALUE,
392 	0,
393 	0,
394 	PURPLE_BUSY_RETRIES,
395 	PURPLE_RESET_RETRY_COUNT,
396 	PURPLE_RESERVE_RELEASE_TIME,
397 	0,
398 	0,
399 	0
400 };
401 
402 static sd_tunables sve_properties = {
403 	SVE_THROTTLE_VALUE,
404 	0,
405 	0,
406 	SVE_BUSY_RETRIES,
407 	SVE_RESET_RETRY_COUNT,
408 	SVE_RESERVE_RELEASE_TIME,
409 	SVE_MIN_THROTTLE_VALUE,
410 	SVE_DISKSORT_DISABLED_FLAG,
411 	0
412 };
413 
414 static sd_tunables maserati_properties = {
415 	0,
416 	0,
417 	0,
418 	0,
419 	0,
420 	0,
421 	0,
422 	MASERATI_DISKSORT_DISABLED_FLAG,
423 	MASERATI_LUN_RESET_ENABLED_FLAG
424 };
425 
426 static sd_tunables pirus_properties = {
427 	PIRUS_THROTTLE_VALUE,
428 	0,
429 	PIRUS_NRR_COUNT,
430 	PIRUS_BUSY_RETRIES,
431 	PIRUS_RESET_RETRY_COUNT,
432 	0,
433 	PIRUS_MIN_THROTTLE_VALUE,
434 	PIRUS_DISKSORT_DISABLED_FLAG,
435 	PIRUS_LUN_RESET_ENABLED_FLAG
436 };
437 
438 #endif
439 
440 #if (defined(__sparc) && !defined(__fibre)) || \
441 	(defined(__i386) || defined(__amd64))
442 
443 
444 static sd_tunables elite_properties = {
445 	ELITE_THROTTLE_VALUE,
446 	0,
447 	0,
448 	0,
449 	0,
450 	0,
451 	0,
452 	0,
453 	0
454 };
455 
456 static sd_tunables st31200n_properties = {
457 	ST31200N_THROTTLE_VALUE,
458 	0,
459 	0,
460 	0,
461 	0,
462 	0,
463 	0,
464 	0,
465 	0
466 };
467 
468 #endif /* Fibre or not */
469 
470 static sd_tunables lsi_properties_scsi = {
471 	LSI_THROTTLE_VALUE,
472 	0,
473 	LSI_NOTREADY_RETRIES,
474 	0,
475 	0,
476 	0,
477 	0,
478 	0,
479 	0
480 };
481 
482 static sd_tunables symbios_properties = {
483 	SYMBIOS_THROTTLE_VALUE,
484 	0,
485 	SYMBIOS_NOTREADY_RETRIES,
486 	0,
487 	0,
488 	0,
489 	0,
490 	0,
491 	0
492 };
493 
494 static sd_tunables lsi_properties = {
495 	0,
496 	0,
497 	LSI_NOTREADY_RETRIES,
498 	0,
499 	0,
500 	0,
501 	0,
502 	0,
503 	0
504 };
505 
506 static sd_tunables lsi_oem_properties = {
507 	0,
508 	0,
509 	LSI_OEM_NOTREADY_RETRIES,
510 	0,
511 	0,
512 	0,
513 	0,
514 	0,
515 	0
516 };
517 
518 
519 
520 #if (defined(SD_PROP_TST))
521 
522 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
523 #define	SD_TST_THROTTLE_VAL	16
524 #define	SD_TST_NOTREADY_VAL	12
525 #define	SD_TST_BUSY_VAL		60
526 #define	SD_TST_RST_RETRY_VAL	36
527 #define	SD_TST_RSV_REL_TIME	60
528 
529 static sd_tunables tst_properties = {
530 	SD_TST_THROTTLE_VAL,
531 	SD_TST_CTYPE_VAL,
532 	SD_TST_NOTREADY_VAL,
533 	SD_TST_BUSY_VAL,
534 	SD_TST_RST_RETRY_VAL,
535 	SD_TST_RSV_REL_TIME,
536 	0,
537 	0,
538 	0
539 };
540 #endif
541 
542 /* This is similiar to the ANSI toupper implementation */
543 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
544 
545 /*
546  * Static Driver Configuration Table
547  *
548  * This is the table of disks which need throttle adjustment (or, perhaps
549  * something else as defined by the flags at a future time.)  device_id
550  * is a string consisting of concatenated vid (vendor), pid (product/model)
551  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
552  * the parts of the string are as defined by the sizes in the scsi_inquiry
553  * structure.  Device type is searched as far as the device_id string is
554  * defined.  Flags defines which values are to be set in the driver from the
555  * properties list.
556  *
557  * Entries below which begin and end with a "*" are a special case.
558  * These do not have a specific vendor, and the string which follows
559  * can appear anywhere in the 16 byte PID portion of the inquiry data.
560  *
561  * Entries below which begin and end with a " " (blank) are a special
562  * case. The comparison function will treat multiple consecutive blanks
563  * as equivalent to a single blank. For example, this causes a
564  * sd_disk_table entry of " NEC CDROM " to match a device's id string
565  * of  "NEC       CDROM".
566  *
567  * Note: The MD21 controller type has been obsoleted.
568  *	 ST318202F is a Legacy device
569  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
570  *	 made with an FC connection. The entries here are a legacy.
571  */
572 static sd_disk_config_t sd_disk_table[] = {
573 #if defined(__fibre) || defined(__i386) || defined(__amd64)
574 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
575 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
576 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
577 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
578 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
579 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
580 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
581 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
582 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
583 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
584 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
585 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
586 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
587 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
588 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
589 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
590 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
591 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
592 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
593 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
594 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
595 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
596 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
597 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
598 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
599 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
600 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
601 	{ "IBM     1726-2xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
602 	{ "IBM     1726-4xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
603 	{ "IBM     1726-3xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
604 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
605 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
606 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
607 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
608 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
609 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
610 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
611 	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
612 	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
613 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
614 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
615 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
616 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
617 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
618 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
619 	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
620 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
621 	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
622 			SD_CONF_BSET_BSY_RETRY_COUNT|
623 			SD_CONF_BSET_RST_RETRIES|
624 			SD_CONF_BSET_RSV_REL_TIME,
625 		&purple_properties },
626 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
627 		SD_CONF_BSET_BSY_RETRY_COUNT|
628 		SD_CONF_BSET_RST_RETRIES|
629 		SD_CONF_BSET_RSV_REL_TIME|
630 		SD_CONF_BSET_MIN_THROTTLE|
631 		SD_CONF_BSET_DISKSORT_DISABLED,
632 		&sve_properties },
633 	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
634 			SD_CONF_BSET_BSY_RETRY_COUNT|
635 			SD_CONF_BSET_RST_RETRIES|
636 			SD_CONF_BSET_RSV_REL_TIME,
637 		&purple_properties },
638 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
639 		SD_CONF_BSET_LUN_RESET_ENABLED,
640 		&maserati_properties },
641 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
642 		SD_CONF_BSET_NRR_COUNT|
643 		SD_CONF_BSET_BSY_RETRY_COUNT|
644 		SD_CONF_BSET_RST_RETRIES|
645 		SD_CONF_BSET_MIN_THROTTLE|
646 		SD_CONF_BSET_DISKSORT_DISABLED|
647 		SD_CONF_BSET_LUN_RESET_ENABLED,
648 		&pirus_properties },
649 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
650 		SD_CONF_BSET_NRR_COUNT|
651 		SD_CONF_BSET_BSY_RETRY_COUNT|
652 		SD_CONF_BSET_RST_RETRIES|
653 		SD_CONF_BSET_MIN_THROTTLE|
654 		SD_CONF_BSET_DISKSORT_DISABLED|
655 		SD_CONF_BSET_LUN_RESET_ENABLED,
656 		&pirus_properties },
657 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
658 		SD_CONF_BSET_NRR_COUNT|
659 		SD_CONF_BSET_BSY_RETRY_COUNT|
660 		SD_CONF_BSET_RST_RETRIES|
661 		SD_CONF_BSET_MIN_THROTTLE|
662 		SD_CONF_BSET_DISKSORT_DISABLED|
663 		SD_CONF_BSET_LUN_RESET_ENABLED,
664 		&pirus_properties },
665 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
666 		SD_CONF_BSET_NRR_COUNT|
667 		SD_CONF_BSET_BSY_RETRY_COUNT|
668 		SD_CONF_BSET_RST_RETRIES|
669 		SD_CONF_BSET_MIN_THROTTLE|
670 		SD_CONF_BSET_DISKSORT_DISABLED|
671 		SD_CONF_BSET_LUN_RESET_ENABLED,
672 		&pirus_properties },
673 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
674 		SD_CONF_BSET_NRR_COUNT|
675 		SD_CONF_BSET_BSY_RETRY_COUNT|
676 		SD_CONF_BSET_RST_RETRIES|
677 		SD_CONF_BSET_MIN_THROTTLE|
678 		SD_CONF_BSET_DISKSORT_DISABLED|
679 		SD_CONF_BSET_LUN_RESET_ENABLED,
680 		&pirus_properties },
681 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
682 		SD_CONF_BSET_NRR_COUNT|
683 		SD_CONF_BSET_BSY_RETRY_COUNT|
684 		SD_CONF_BSET_RST_RETRIES|
685 		SD_CONF_BSET_MIN_THROTTLE|
686 		SD_CONF_BSET_DISKSORT_DISABLED|
687 		SD_CONF_BSET_LUN_RESET_ENABLED,
688 		&pirus_properties },
689 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
690 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
691 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
692 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
693 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
694 #endif /* fibre or NON-sparc platforms */
695 #if ((defined(__sparc) && !defined(__fibre)) ||\
696 	(defined(__i386) || defined(__amd64)))
697 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
698 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
699 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
700 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
701 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
702 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
703 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
704 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
705 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
706 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
707 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
708 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
709 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
710 	    &symbios_properties },
711 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
712 	    &lsi_properties_scsi },
713 #if defined(__i386) || defined(__amd64)
714 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
715 				    | SD_CONF_BSET_READSUB_BCD
716 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
717 				    | SD_CONF_BSET_NO_READ_HEADER
718 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
719 
720 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
721 				    | SD_CONF_BSET_READSUB_BCD
722 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
723 				    | SD_CONF_BSET_NO_READ_HEADER
724 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
725 #endif /* __i386 || __amd64 */
726 #endif /* sparc NON-fibre or NON-sparc platforms */
727 
728 #if (defined(SD_PROP_TST))
729 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
730 				| SD_CONF_BSET_CTYPE
731 				| SD_CONF_BSET_NRR_COUNT
732 				| SD_CONF_BSET_FAB_DEVID
733 				| SD_CONF_BSET_NOCACHE
734 				| SD_CONF_BSET_BSY_RETRY_COUNT
735 				| SD_CONF_BSET_PLAYMSF_BCD
736 				| SD_CONF_BSET_READSUB_BCD
737 				| SD_CONF_BSET_READ_TOC_TRK_BCD
738 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
739 				| SD_CONF_BSET_NO_READ_HEADER
740 				| SD_CONF_BSET_READ_CD_XD4
741 				| SD_CONF_BSET_RST_RETRIES
742 				| SD_CONF_BSET_RSV_REL_TIME
743 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
744 #endif
745 };
746 
747 static const int sd_disk_table_size =
748 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
749 
750 
751 
752 #define	SD_INTERCONNECT_PARALLEL	0
753 #define	SD_INTERCONNECT_FABRIC		1
754 #define	SD_INTERCONNECT_FIBRE		2
755 #define	SD_INTERCONNECT_SSA		3
756 #define	SD_INTERCONNECT_SATA		4
757 #define	SD_IS_PARALLEL_SCSI(un)		\
758 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
759 #define	SD_IS_SERIAL(un)		\
760 	((un)->un_interconnect_type == SD_INTERCONNECT_SATA)
761 
762 /*
763  * Definitions used by device id registration routines
764  */
765 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
766 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
767 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
768 
769 static kmutex_t sd_sense_mutex = {0};
770 
771 /*
772  * Macros for updates of the driver state
773  */
774 #define	New_state(un, s)        \
775 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
776 #define	Restore_state(un)	\
777 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
778 
779 static struct sd_cdbinfo sd_cdbtab[] = {
780 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
781 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
782 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
783 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
784 };
785 
786 /*
787  * Specifies the number of seconds that must have elapsed since the last
788  * cmd. has completed for a device to be declared idle to the PM framework.
789  */
790 static int sd_pm_idletime = 1;
791 
792 /*
793  * Internal function prototypes
794  */
795 
796 #if (defined(__fibre))
797 /*
798  * These #defines are to avoid namespace collisions that occur because this
799  * code is currently used to compile two seperate driver modules: sd and ssd.
800  * All function names need to be treated this way (even if declared static)
801  * in order to allow the debugger to resolve the names properly.
802  * It is anticipated that in the near future the ssd module will be obsoleted,
803  * at which time this ugliness should go away.
804  */
805 #define	sd_log_trace			ssd_log_trace
806 #define	sd_log_info			ssd_log_info
807 #define	sd_log_err			ssd_log_err
808 #define	sdprobe				ssdprobe
809 #define	sdinfo				ssdinfo
810 #define	sd_prop_op			ssd_prop_op
811 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
812 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
813 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
814 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
815 #define	sd_scsi_target_lun_init		ssd_scsi_target_lun_init
816 #define	sd_scsi_target_lun_fini		ssd_scsi_target_lun_fini
817 #define	sd_scsi_get_target_lun_count	ssd_scsi_get_target_lun_count
818 #define	sd_scsi_update_lun_on_target	ssd_scsi_update_lun_on_target
819 #define	sd_spin_up_unit			ssd_spin_up_unit
820 #define	sd_enable_descr_sense		ssd_enable_descr_sense
821 #define	sd_reenable_dsense_task		ssd_reenable_dsense_task
822 #define	sd_set_mmc_caps			ssd_set_mmc_caps
823 #define	sd_read_unit_properties		ssd_read_unit_properties
824 #define	sd_process_sdconf_file		ssd_process_sdconf_file
825 #define	sd_process_sdconf_table		ssd_process_sdconf_table
826 #define	sd_sdconf_id_match		ssd_sdconf_id_match
827 #define	sd_blank_cmp			ssd_blank_cmp
828 #define	sd_chk_vers1_data		ssd_chk_vers1_data
829 #define	sd_set_vers1_properties		ssd_set_vers1_properties
830 
831 #define	sd_get_physical_geometry	ssd_get_physical_geometry
832 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
833 #define	sd_update_block_info		ssd_update_block_info
834 #define	sd_register_devid		ssd_register_devid
835 #define	sd_get_devid			ssd_get_devid
836 #define	sd_create_devid			ssd_create_devid
837 #define	sd_write_deviceid		ssd_write_deviceid
838 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
839 #define	sd_setup_pm			ssd_setup_pm
840 #define	sd_create_pm_components		ssd_create_pm_components
841 #define	sd_ddi_suspend			ssd_ddi_suspend
842 #define	sd_ddi_pm_suspend		ssd_ddi_pm_suspend
843 #define	sd_ddi_resume			ssd_ddi_resume
844 #define	sd_ddi_pm_resume		ssd_ddi_pm_resume
845 #define	sdpower				ssdpower
846 #define	sdattach			ssdattach
847 #define	sddetach			ssddetach
848 #define	sd_unit_attach			ssd_unit_attach
849 #define	sd_unit_detach			ssd_unit_detach
850 #define	sd_set_unit_attributes		ssd_set_unit_attributes
851 #define	sd_create_errstats		ssd_create_errstats
852 #define	sd_set_errstats			ssd_set_errstats
853 #define	sd_set_pstats			ssd_set_pstats
854 #define	sddump				ssddump
855 #define	sd_scsi_poll			ssd_scsi_poll
856 #define	sd_send_polled_RQS		ssd_send_polled_RQS
857 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
858 #define	sd_init_event_callbacks		ssd_init_event_callbacks
859 #define	sd_event_callback		ssd_event_callback
860 #define	sd_cache_control		ssd_cache_control
861 #define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
862 #define	sd_make_device			ssd_make_device
863 #define	sdopen				ssdopen
864 #define	sdclose				ssdclose
865 #define	sd_ready_and_valid		ssd_ready_and_valid
866 #define	sdmin				ssdmin
867 #define	sdread				ssdread
868 #define	sdwrite				ssdwrite
869 #define	sdaread				ssdaread
870 #define	sdawrite			ssdawrite
871 #define	sdstrategy			ssdstrategy
872 #define	sdioctl				ssdioctl
873 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
874 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
875 #define	sd_checksum_iostart		ssd_checksum_iostart
876 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
877 #define	sd_pm_iostart			ssd_pm_iostart
878 #define	sd_core_iostart			ssd_core_iostart
879 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
880 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
881 #define	sd_checksum_iodone		ssd_checksum_iodone
882 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
883 #define	sd_pm_iodone			ssd_pm_iodone
884 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
885 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
886 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
887 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
888 #define	sd_buf_iodone			ssd_buf_iodone
889 #define	sd_uscsi_strategy		ssd_uscsi_strategy
890 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
891 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
892 #define	sd_uscsi_iodone			ssd_uscsi_iodone
893 #define	sd_xbuf_strategy		ssd_xbuf_strategy
894 #define	sd_xbuf_init			ssd_xbuf_init
895 #define	sd_pm_entry			ssd_pm_entry
896 #define	sd_pm_exit			ssd_pm_exit
897 
898 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
899 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
900 
901 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
902 #define	sdintr				ssdintr
903 #define	sd_start_cmds			ssd_start_cmds
904 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
905 #define	sd_bioclone_alloc		ssd_bioclone_alloc
906 #define	sd_bioclone_free		ssd_bioclone_free
907 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
908 #define	sd_shadow_buf_free		ssd_shadow_buf_free
909 #define	sd_print_transport_rejected_message	\
910 					ssd_print_transport_rejected_message
911 #define	sd_retry_command		ssd_retry_command
912 #define	sd_set_retry_bp			ssd_set_retry_bp
913 #define	sd_send_request_sense_command	ssd_send_request_sense_command
914 #define	sd_start_retry_command		ssd_start_retry_command
915 #define	sd_start_direct_priority_command	\
916 					ssd_start_direct_priority_command
917 #define	sd_return_failed_command	ssd_return_failed_command
918 #define	sd_return_failed_command_no_restart	\
919 					ssd_return_failed_command_no_restart
920 #define	sd_return_command		ssd_return_command
921 #define	sd_sync_with_callback		ssd_sync_with_callback
922 #define	sdrunout			ssdrunout
923 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
924 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
925 #define	sd_reduce_throttle		ssd_reduce_throttle
926 #define	sd_restore_throttle		ssd_restore_throttle
927 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
928 #define	sd_init_cdb_limits		ssd_init_cdb_limits
929 #define	sd_pkt_status_good		ssd_pkt_status_good
930 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
931 #define	sd_pkt_status_busy		ssd_pkt_status_busy
932 #define	sd_pkt_status_reservation_conflict	\
933 					ssd_pkt_status_reservation_conflict
934 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
935 #define	sd_handle_request_sense		ssd_handle_request_sense
936 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
937 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
938 #define	sd_validate_sense_data		ssd_validate_sense_data
939 #define	sd_decode_sense			ssd_decode_sense
940 #define	sd_print_sense_msg		ssd_print_sense_msg
941 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
942 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
943 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
944 #define	sd_sense_key_medium_or_hardware_error	\
945 					ssd_sense_key_medium_or_hardware_error
946 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
947 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
948 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
949 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
950 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
951 #define	sd_sense_key_default		ssd_sense_key_default
952 #define	sd_print_retry_msg		ssd_print_retry_msg
953 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
954 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
955 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
956 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
957 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
958 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
959 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
960 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
961 #define	sd_pkt_reason_default		ssd_pkt_reason_default
962 #define	sd_reset_target			ssd_reset_target
963 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
964 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
965 #define	sd_taskq_create			ssd_taskq_create
966 #define	sd_taskq_delete			ssd_taskq_delete
967 #define	sd_media_change_task		ssd_media_change_task
968 #define	sd_handle_mchange		ssd_handle_mchange
969 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
970 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
971 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
972 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
973 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
974 					sd_send_scsi_feature_GET_CONFIGURATION
975 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
976 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
977 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
978 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
979 					ssd_send_scsi_PERSISTENT_RESERVE_IN
980 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
981 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
982 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
983 #define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
984 					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
985 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
986 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
987 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
988 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
989 #define	sd_alloc_rqs			ssd_alloc_rqs
990 #define	sd_free_rqs			ssd_free_rqs
991 #define	sd_dump_memory			ssd_dump_memory
992 #define	sd_get_media_info		ssd_get_media_info
993 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
994 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
995 #define	sd_setup_next_xfer		ssd_setup_next_xfer
996 #define	sd_dkio_get_temp		ssd_dkio_get_temp
997 #define	sd_check_mhd			ssd_check_mhd
998 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
999 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1000 #define	sd_sname			ssd_sname
1001 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1002 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1003 #define	sd_take_ownership		ssd_take_ownership
1004 #define	sd_reserve_release		ssd_reserve_release
1005 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1006 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1007 #define	sd_persistent_reservation_in_read_keys	\
1008 					ssd_persistent_reservation_in_read_keys
1009 #define	sd_persistent_reservation_in_read_resv	\
1010 					ssd_persistent_reservation_in_read_resv
1011 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1012 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1013 #define	sd_mhdioc_release		ssd_mhdioc_release
1014 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1015 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1016 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1017 #define	sr_change_blkmode		ssr_change_blkmode
1018 #define	sr_change_speed			ssr_change_speed
1019 #define	sr_atapi_change_speed		ssr_atapi_change_speed
1020 #define	sr_pause_resume			ssr_pause_resume
1021 #define	sr_play_msf			ssr_play_msf
1022 #define	sr_play_trkind			ssr_play_trkind
1023 #define	sr_read_all_subcodes		ssr_read_all_subcodes
1024 #define	sr_read_subchannel		ssr_read_subchannel
1025 #define	sr_read_tocentry		ssr_read_tocentry
1026 #define	sr_read_tochdr			ssr_read_tochdr
1027 #define	sr_read_cdda			ssr_read_cdda
1028 #define	sr_read_cdxa			ssr_read_cdxa
1029 #define	sr_read_mode1			ssr_read_mode1
1030 #define	sr_read_mode2			ssr_read_mode2
1031 #define	sr_read_cd_mode2		ssr_read_cd_mode2
1032 #define	sr_sector_mode			ssr_sector_mode
1033 #define	sr_eject			ssr_eject
1034 #define	sr_ejected			ssr_ejected
1035 #define	sr_check_wp			ssr_check_wp
1036 #define	sd_check_media			ssd_check_media
1037 #define	sd_media_watch_cb		ssd_media_watch_cb
1038 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1039 #define	sr_volume_ctrl			ssr_volume_ctrl
1040 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1041 #define	sd_log_page_supported		ssd_log_page_supported
1042 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1043 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1044 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1045 #define	sd_range_lock			ssd_range_lock
1046 #define	sd_get_range			ssd_get_range
1047 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1048 #define	sd_range_unlock			ssd_range_unlock
1049 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1050 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1051 
1052 #define	sd_iostart_chain		ssd_iostart_chain
1053 #define	sd_iodone_chain			ssd_iodone_chain
1054 #define	sd_initpkt_map			ssd_initpkt_map
1055 #define	sd_destroypkt_map		ssd_destroypkt_map
1056 #define	sd_chain_type_map		ssd_chain_type_map
1057 #define	sd_chain_index_map		ssd_chain_index_map
1058 
1059 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1060 #define	sd_failfast_flushq		ssd_failfast_flushq
1061 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1062 
1063 #define	sd_is_lsi			ssd_is_lsi
1064 #define	sd_tg_rdwr			ssd_tg_rdwr
1065 #define	sd_tg_getinfo			ssd_tg_getinfo
1066 
1067 #endif	/* #if (defined(__fibre)) */
1068 
1069 
1070 int _init(void);
1071 int _fini(void);
1072 int _info(struct modinfo *modinfop);
1073 
1074 /*PRINTFLIKE3*/
1075 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1076 /*PRINTFLIKE3*/
1077 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1078 /*PRINTFLIKE3*/
1079 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1080 
1081 static int sdprobe(dev_info_t *devi);
1082 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1083     void **result);
1084 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1085     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1086 
1087 /*
1088  * Smart probe for parallel scsi
1089  */
1090 static void sd_scsi_probe_cache_init(void);
1091 static void sd_scsi_probe_cache_fini(void);
1092 static void sd_scsi_clear_probe_cache(void);
1093 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1094 
1095 /*
1096  * Attached luns on target for parallel scsi
1097  */
1098 static void sd_scsi_target_lun_init(void);
1099 static void sd_scsi_target_lun_fini(void);
1100 static int  sd_scsi_get_target_lun_count(dev_info_t *dip, int target);
1101 static void sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag);
1102 
1103 static int	sd_spin_up_unit(struct sd_lun *un);
1104 #ifdef _LP64
1105 static void	sd_enable_descr_sense(struct sd_lun *un);
1106 static void	sd_reenable_dsense_task(void *arg);
1107 #endif /* _LP64 */
1108 
1109 static void	sd_set_mmc_caps(struct sd_lun *un);
1110 
1111 static void sd_read_unit_properties(struct sd_lun *un);
1112 static int  sd_process_sdconf_file(struct sd_lun *un);
1113 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1114     int *data_list, sd_tunables *values);
1115 static void sd_process_sdconf_table(struct sd_lun *un);
1116 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1117 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1118 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1119 	int list_len, char *dataname_ptr);
1120 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1121     sd_tunables *prop_list);
1122 
1123 static void sd_register_devid(struct sd_lun *un, dev_info_t *devi,
1124     int reservation_flag);
1125 static int  sd_get_devid(struct sd_lun *un);
1126 static int  sd_get_serialnum(struct sd_lun *un, uchar_t *wwn, int *len);
1127 static ddi_devid_t sd_create_devid(struct sd_lun *un);
1128 static int  sd_write_deviceid(struct sd_lun *un);
1129 static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1130 static int  sd_check_vpd_page_support(struct sd_lun *un);
1131 
1132 static void sd_setup_pm(struct sd_lun *un, dev_info_t *devi);
1133 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1134 
1135 static int  sd_ddi_suspend(dev_info_t *devi);
1136 static int  sd_ddi_pm_suspend(struct sd_lun *un);
1137 static int  sd_ddi_resume(dev_info_t *devi);
1138 static int  sd_ddi_pm_resume(struct sd_lun *un);
1139 static int  sdpower(dev_info_t *devi, int component, int level);
1140 
1141 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1142 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1143 static int  sd_unit_attach(dev_info_t *devi);
1144 static int  sd_unit_detach(dev_info_t *devi);
1145 
1146 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1147 static void sd_create_errstats(struct sd_lun *un, int instance);
1148 static void sd_set_errstats(struct sd_lun *un);
1149 static void sd_set_pstats(struct sd_lun *un);
1150 
1151 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1152 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1153 static int  sd_send_polled_RQS(struct sd_lun *un);
1154 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1155 
1156 #if (defined(__fibre))
1157 /*
1158  * Event callbacks (photon)
1159  */
1160 static void sd_init_event_callbacks(struct sd_lun *un);
1161 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1162 #endif
1163 
1164 /*
1165  * Defines for sd_cache_control
1166  */
1167 
1168 #define	SD_CACHE_ENABLE		1
1169 #define	SD_CACHE_DISABLE	0
1170 #define	SD_CACHE_NOCHANGE	-1
1171 
1172 static int   sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag);
1173 static int   sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled);
1174 static dev_t sd_make_device(dev_info_t *devi);
1175 
1176 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1177 	uint64_t capacity);
1178 
1179 /*
1180  * Driver entry point functions.
1181  */
1182 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1183 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1184 static int  sd_ready_and_valid(struct sd_lun *un);
1185 
1186 static void sdmin(struct buf *bp);
1187 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1188 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1189 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1190 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1191 
1192 static int sdstrategy(struct buf *bp);
1193 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1194 
1195 /*
1196  * Function prototypes for layering functions in the iostart chain.
1197  */
1198 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1199 	struct buf *bp);
1200 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1201 	struct buf *bp);
1202 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1203 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1204 	struct buf *bp);
1205 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1206 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1207 
1208 /*
1209  * Function prototypes for layering functions in the iodone chain.
1210  */
1211 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1212 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1213 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1214 	struct buf *bp);
1215 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1216 	struct buf *bp);
1217 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1218 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1219 	struct buf *bp);
1220 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1221 
1222 /*
1223  * Prototypes for functions to support buf(9S) based IO.
1224  */
1225 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1226 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1227 static void sd_destroypkt_for_buf(struct buf *);
1228 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1229 	struct buf *bp, int flags,
1230 	int (*callback)(caddr_t), caddr_t callback_arg,
1231 	diskaddr_t lba, uint32_t blockcount);
1232 #if defined(__i386) || defined(__amd64)
1233 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1234 	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1235 #endif /* defined(__i386) || defined(__amd64) */
1236 
1237 /*
1238  * Prototypes for functions to support USCSI IO.
1239  */
1240 static int sd_uscsi_strategy(struct buf *bp);
1241 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1242 static void sd_destroypkt_for_uscsi(struct buf *);
1243 
1244 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1245 	uchar_t chain_type, void *pktinfop);
1246 
1247 static int  sd_pm_entry(struct sd_lun *un);
1248 static void sd_pm_exit(struct sd_lun *un);
1249 
1250 static void sd_pm_idletimeout_handler(void *arg);
1251 
1252 /*
1253  * sd_core internal functions (used at the sd_core_io layer).
1254  */
1255 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1256 static void sdintr(struct scsi_pkt *pktp);
1257 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1258 
1259 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
1260 	enum uio_seg dataspace, int path_flag);
1261 
1262 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1263 	daddr_t blkno, int (*func)(struct buf *));
1264 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1265 	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1266 static void sd_bioclone_free(struct buf *bp);
1267 static void sd_shadow_buf_free(struct buf *bp);
1268 
1269 static void sd_print_transport_rejected_message(struct sd_lun *un,
1270 	struct sd_xbuf *xp, int code);
1271 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1272     void *arg, int code);
1273 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1274     void *arg, int code);
1275 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1276     void *arg, int code);
1277 
1278 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1279 	int retry_check_flag,
1280 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1281 		int c),
1282 	void *user_arg, int failure_code,  clock_t retry_delay,
1283 	void (*statp)(kstat_io_t *));
1284 
1285 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1286 	clock_t retry_delay, void (*statp)(kstat_io_t *));
1287 
1288 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1289 	struct scsi_pkt *pktp);
1290 static void sd_start_retry_command(void *arg);
1291 static void sd_start_direct_priority_command(void *arg);
1292 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1293 	int errcode);
1294 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1295 	struct buf *bp, int errcode);
1296 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1297 static void sd_sync_with_callback(struct sd_lun *un);
1298 static int sdrunout(caddr_t arg);
1299 
1300 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1301 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1302 
1303 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1304 static void sd_restore_throttle(void *arg);
1305 
1306 static void sd_init_cdb_limits(struct sd_lun *un);
1307 
1308 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1309 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1310 
1311 /*
1312  * Error handling functions
1313  */
1314 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1315 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1316 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1317 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1318 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1319 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1320 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1321 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1322 
1323 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1324 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1325 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1326 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1327 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1328 	struct sd_xbuf *xp);
1329 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1330 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1331 
1332 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1333 	void *arg, int code);
1334 
1335 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1336 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1337 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1338 	uint8_t *sense_datap,
1339 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1340 static void sd_sense_key_not_ready(struct sd_lun *un,
1341 	uint8_t *sense_datap,
1342 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1343 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1344 	uint8_t *sense_datap,
1345 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1346 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1347 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1348 static void sd_sense_key_unit_attention(struct sd_lun *un,
1349 	uint8_t *sense_datap,
1350 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1351 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1352 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1353 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1354 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1355 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1356 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1357 static void sd_sense_key_default(struct sd_lun *un,
1358 	uint8_t *sense_datap,
1359 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1360 
1361 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1362 	void *arg, int flag);
1363 
1364 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1365 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1366 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1367 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1368 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1369 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1370 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1371 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1372 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1373 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1374 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1375 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1376 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1377 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1378 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1379 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1380 
1381 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1382 
1383 static void sd_start_stop_unit_callback(void *arg);
1384 static void sd_start_stop_unit_task(void *arg);
1385 
1386 static void sd_taskq_create(void);
1387 static void sd_taskq_delete(void);
1388 static void sd_media_change_task(void *arg);
1389 
1390 static int sd_handle_mchange(struct sd_lun *un);
1391 static int sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag);
1392 static int sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp,
1393 	uint32_t *lbap, int path_flag);
1394 static int sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
1395 	uint32_t *lbap, int path_flag);
1396 static int sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag,
1397 	int path_flag);
1398 static int sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr,
1399 	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1400 static int sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag);
1401 static int sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un,
1402 	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1403 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un,
1404 	uchar_t usr_cmd, uchar_t *usr_bufp);
1405 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1406 	struct dk_callback *dkc);
1407 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1408 static int sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un,
1409 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1410 	uchar_t *bufaddr, uint_t buflen, int path_flag);
1411 static int sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
1412 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1413 	uchar_t *bufaddr, uint_t buflen, char feature, int path_flag);
1414 static int sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize,
1415 	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1416 static int sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize,
1417 	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1418 static int sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
1419 	size_t buflen, daddr_t start_block, int path_flag);
1420 #define	sd_send_scsi_READ(un, bufaddr, buflen, start_block, path_flag)	\
1421 	sd_send_scsi_RDWR(un, SCMD_READ, bufaddr, buflen, start_block, \
1422 	path_flag)
1423 #define	sd_send_scsi_WRITE(un, bufaddr, buflen, start_block, path_flag)	\
1424 	sd_send_scsi_RDWR(un, SCMD_WRITE, bufaddr, buflen, start_block,\
1425 	path_flag)
1426 
1427 static int sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr,
1428 	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1429 	uint16_t param_ptr, int path_flag);
1430 
1431 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1432 static void sd_free_rqs(struct sd_lun *un);
1433 
1434 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1435 	uchar_t *data, int len, int fmt);
1436 static void sd_panic_for_res_conflict(struct sd_lun *un);
1437 
1438 /*
1439  * Disk Ioctl Function Prototypes
1440  */
1441 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1442 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1443 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1444 
1445 /*
1446  * Multi-host Ioctl Prototypes
1447  */
1448 static int sd_check_mhd(dev_t dev, int interval);
1449 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1450 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1451 static char *sd_sname(uchar_t status);
1452 static void sd_mhd_resvd_recover(void *arg);
1453 static void sd_resv_reclaim_thread();
1454 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1455 static int sd_reserve_release(dev_t dev, int cmd);
1456 static void sd_rmv_resv_reclaim_req(dev_t dev);
1457 static void sd_mhd_reset_notify_cb(caddr_t arg);
1458 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1459 	mhioc_inkeys_t *usrp, int flag);
1460 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1461 	mhioc_inresvs_t *usrp, int flag);
1462 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1463 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1464 static int sd_mhdioc_release(dev_t dev);
1465 static int sd_mhdioc_register_devid(dev_t dev);
1466 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1467 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1468 
1469 /*
1470  * SCSI removable prototypes
1471  */
1472 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1473 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1474 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1475 static int sr_pause_resume(dev_t dev, int mode);
1476 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1477 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1478 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1479 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1480 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1481 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1482 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1483 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1484 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1485 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1486 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1487 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1488 static int sr_eject(dev_t dev);
1489 static void sr_ejected(register struct sd_lun *un);
1490 static int sr_check_wp(dev_t dev);
1491 static int sd_check_media(dev_t dev, enum dkio_state state);
1492 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1493 static void sd_delayed_cv_broadcast(void *arg);
1494 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1495 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1496 
1497 static int sd_log_page_supported(struct sd_lun *un, int log_page);
1498 
1499 /*
1500  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1501  */
1502 static void sd_check_for_writable_cd(struct sd_lun *un, int path_flag);
1503 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1504 static void sd_wm_cache_destructor(void *wm, void *un);
1505 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1506 	daddr_t endb, ushort_t typ);
1507 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1508 	daddr_t endb);
1509 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1510 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1511 static void sd_read_modify_write_task(void * arg);
1512 static int
1513 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1514 	struct buf **bpp);
1515 
1516 
1517 /*
1518  * Function prototypes for failfast support.
1519  */
1520 static void sd_failfast_flushq(struct sd_lun *un);
1521 static int sd_failfast_flushq_callback(struct buf *bp);
1522 
1523 /*
1524  * Function prototypes to check for lsi devices
1525  */
1526 static void sd_is_lsi(struct sd_lun *un);
1527 
1528 /*
1529  * Function prototypes for x86 support
1530  */
1531 #if defined(__i386) || defined(__amd64)
1532 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1533 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1534 #endif
1535 
1536 
1537 /* Function prototypes for cmlb */
1538 static int sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
1539     diskaddr_t start_block, size_t reqlength, void *tg_cookie);
1540 
1541 static int sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie);
1542 
1543 /*
1544  * Constants for failfast support:
1545  *
1546  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1547  * failfast processing being performed.
1548  *
1549  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1550  * failfast processing on all bufs with B_FAILFAST set.
1551  */
1552 
1553 #define	SD_FAILFAST_INACTIVE		0
1554 #define	SD_FAILFAST_ACTIVE		1
1555 
1556 /*
1557  * Bitmask to control behavior of buf(9S) flushes when a transition to
1558  * the failfast state occurs. Optional bits include:
1559  *
1560  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1561  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1562  * be flushed.
1563  *
1564  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1565  * driver, in addition to the regular wait queue. This includes the xbuf
1566  * queues. When clear, only the driver's wait queue will be flushed.
1567  */
1568 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1569 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1570 
1571 /*
1572  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1573  * to flush all queues within the driver.
1574  */
1575 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1576 
1577 
1578 /*
1579  * SD Testing Fault Injection
1580  */
1581 #ifdef SD_FAULT_INJECTION
1582 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1583 static void sd_faultinjection(struct scsi_pkt *pktp);
1584 static void sd_injection_log(char *buf, struct sd_lun *un);
1585 #endif
1586 
1587 /*
1588  * Device driver ops vector
1589  */
1590 static struct cb_ops sd_cb_ops = {
1591 	sdopen,			/* open */
1592 	sdclose,		/* close */
1593 	sdstrategy,		/* strategy */
1594 	nodev,			/* print */
1595 	sddump,			/* dump */
1596 	sdread,			/* read */
1597 	sdwrite,		/* write */
1598 	sdioctl,		/* ioctl */
1599 	nodev,			/* devmap */
1600 	nodev,			/* mmap */
1601 	nodev,			/* segmap */
1602 	nochpoll,		/* poll */
1603 	sd_prop_op,		/* cb_prop_op */
1604 	0,			/* streamtab  */
1605 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1606 	CB_REV,			/* cb_rev */
1607 	sdaread, 		/* async I/O read entry point */
1608 	sdawrite		/* async I/O write entry point */
1609 };
1610 
1611 static struct dev_ops sd_ops = {
1612 	DEVO_REV,		/* devo_rev, */
1613 	0,			/* refcnt  */
1614 	sdinfo,			/* info */
1615 	nulldev,		/* identify */
1616 	sdprobe,		/* probe */
1617 	sdattach,		/* attach */
1618 	sddetach,		/* detach */
1619 	nodev,			/* reset */
1620 	&sd_cb_ops,		/* driver operations */
1621 	NULL,			/* bus operations */
1622 	sdpower			/* power */
1623 };
1624 
1625 
1626 /*
1627  * This is the loadable module wrapper.
1628  */
1629 #include <sys/modctl.h>
1630 
1631 static struct modldrv modldrv = {
1632 	&mod_driverops,		/* Type of module. This one is a driver */
1633 	SD_MODULE_NAME,		/* Module name. */
1634 	&sd_ops			/* driver ops */
1635 };
1636 
1637 
1638 static struct modlinkage modlinkage = {
1639 	MODREV_1,
1640 	&modldrv,
1641 	NULL
1642 };
1643 
1644 static cmlb_tg_ops_t sd_tgops = {
1645 	TG_DK_OPS_VERSION_1,
1646 	sd_tg_rdwr,
1647 	sd_tg_getinfo
1648 	};
1649 
1650 static struct scsi_asq_key_strings sd_additional_codes[] = {
1651 	0x81, 0, "Logical Unit is Reserved",
1652 	0x85, 0, "Audio Address Not Valid",
1653 	0xb6, 0, "Media Load Mechanism Failed",
1654 	0xB9, 0, "Audio Play Operation Aborted",
1655 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1656 	0x53, 2, "Medium removal prevented",
1657 	0x6f, 0, "Authentication failed during key exchange",
1658 	0x6f, 1, "Key not present",
1659 	0x6f, 2, "Key not established",
1660 	0x6f, 3, "Read without proper authentication",
1661 	0x6f, 4, "Mismatched region to this logical unit",
1662 	0x6f, 5, "Region reset count error",
1663 	0xffff, 0x0, NULL
1664 };
1665 
1666 
1667 /*
1668  * Struct for passing printing information for sense data messages
1669  */
1670 struct sd_sense_info {
1671 	int	ssi_severity;
1672 	int	ssi_pfa_flag;
1673 };
1674 
1675 /*
1676  * Table of function pointers for iostart-side routines. Seperate "chains"
1677  * of layered function calls are formed by placing the function pointers
1678  * sequentially in the desired order. Functions are called according to an
1679  * incrementing table index ordering. The last function in each chain must
1680  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1681  * in the sd_iodone_chain[] array.
1682  *
1683  * Note: It may seem more natural to organize both the iostart and iodone
1684  * functions together, into an array of structures (or some similar
1685  * organization) with a common index, rather than two seperate arrays which
1686  * must be maintained in synchronization. The purpose of this division is
1687  * to achiece improved performance: individual arrays allows for more
1688  * effective cache line utilization on certain platforms.
1689  */
1690 
1691 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1692 
1693 
1694 static sd_chain_t sd_iostart_chain[] = {
1695 
1696 	/* Chain for buf IO for disk drive targets (PM enabled) */
1697 	sd_mapblockaddr_iostart,	/* Index: 0 */
1698 	sd_pm_iostart,			/* Index: 1 */
1699 	sd_core_iostart,		/* Index: 2 */
1700 
1701 	/* Chain for buf IO for disk drive targets (PM disabled) */
1702 	sd_mapblockaddr_iostart,	/* Index: 3 */
1703 	sd_core_iostart,		/* Index: 4 */
1704 
1705 	/* Chain for buf IO for removable-media targets (PM enabled) */
1706 	sd_mapblockaddr_iostart,	/* Index: 5 */
1707 	sd_mapblocksize_iostart,	/* Index: 6 */
1708 	sd_pm_iostart,			/* Index: 7 */
1709 	sd_core_iostart,		/* Index: 8 */
1710 
1711 	/* Chain for buf IO for removable-media targets (PM disabled) */
1712 	sd_mapblockaddr_iostart,	/* Index: 9 */
1713 	sd_mapblocksize_iostart,	/* Index: 10 */
1714 	sd_core_iostart,		/* Index: 11 */
1715 
1716 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1717 	sd_mapblockaddr_iostart,	/* Index: 12 */
1718 	sd_checksum_iostart,		/* Index: 13 */
1719 	sd_pm_iostart,			/* Index: 14 */
1720 	sd_core_iostart,		/* Index: 15 */
1721 
1722 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1723 	sd_mapblockaddr_iostart,	/* Index: 16 */
1724 	sd_checksum_iostart,		/* Index: 17 */
1725 	sd_core_iostart,		/* Index: 18 */
1726 
1727 	/* Chain for USCSI commands (all targets) */
1728 	sd_pm_iostart,			/* Index: 19 */
1729 	sd_core_iostart,		/* Index: 20 */
1730 
1731 	/* Chain for checksumming USCSI commands (all targets) */
1732 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1733 	sd_pm_iostart,			/* Index: 22 */
1734 	sd_core_iostart,		/* Index: 23 */
1735 
1736 	/* Chain for "direct" USCSI commands (all targets) */
1737 	sd_core_iostart,		/* Index: 24 */
1738 
1739 	/* Chain for "direct priority" USCSI commands (all targets) */
1740 	sd_core_iostart,		/* Index: 25 */
1741 };
1742 
1743 /*
1744  * Macros to locate the first function of each iostart chain in the
1745  * sd_iostart_chain[] array. These are located by the index in the array.
1746  */
1747 #define	SD_CHAIN_DISK_IOSTART			0
1748 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1749 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1750 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1751 #define	SD_CHAIN_CHKSUM_IOSTART			12
1752 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1753 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1754 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1755 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1756 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1757 
1758 
1759 /*
1760  * Table of function pointers for the iodone-side routines for the driver-
1761  * internal layering mechanism.  The calling sequence for iodone routines
1762  * uses a decrementing table index, so the last routine called in a chain
1763  * must be at the lowest array index location for that chain.  The last
1764  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1765  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1766  * of the functions in an iodone side chain must correspond to the ordering
1767  * of the iostart routines for that chain.  Note that there is no iodone
1768  * side routine that corresponds to sd_core_iostart(), so there is no
1769  * entry in the table for this.
1770  */
1771 
1772 static sd_chain_t sd_iodone_chain[] = {
1773 
1774 	/* Chain for buf IO for disk drive targets (PM enabled) */
1775 	sd_buf_iodone,			/* Index: 0 */
1776 	sd_mapblockaddr_iodone,		/* Index: 1 */
1777 	sd_pm_iodone,			/* Index: 2 */
1778 
1779 	/* Chain for buf IO for disk drive targets (PM disabled) */
1780 	sd_buf_iodone,			/* Index: 3 */
1781 	sd_mapblockaddr_iodone,		/* Index: 4 */
1782 
1783 	/* Chain for buf IO for removable-media targets (PM enabled) */
1784 	sd_buf_iodone,			/* Index: 5 */
1785 	sd_mapblockaddr_iodone,		/* Index: 6 */
1786 	sd_mapblocksize_iodone,		/* Index: 7 */
1787 	sd_pm_iodone,			/* Index: 8 */
1788 
1789 	/* Chain for buf IO for removable-media targets (PM disabled) */
1790 	sd_buf_iodone,			/* Index: 9 */
1791 	sd_mapblockaddr_iodone,		/* Index: 10 */
1792 	sd_mapblocksize_iodone,		/* Index: 11 */
1793 
1794 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1795 	sd_buf_iodone,			/* Index: 12 */
1796 	sd_mapblockaddr_iodone,		/* Index: 13 */
1797 	sd_checksum_iodone,		/* Index: 14 */
1798 	sd_pm_iodone,			/* Index: 15 */
1799 
1800 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1801 	sd_buf_iodone,			/* Index: 16 */
1802 	sd_mapblockaddr_iodone,		/* Index: 17 */
1803 	sd_checksum_iodone,		/* Index: 18 */
1804 
1805 	/* Chain for USCSI commands (non-checksum targets) */
1806 	sd_uscsi_iodone,		/* Index: 19 */
1807 	sd_pm_iodone,			/* Index: 20 */
1808 
1809 	/* Chain for USCSI commands (checksum targets) */
1810 	sd_uscsi_iodone,		/* Index: 21 */
1811 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1812 	sd_pm_iodone,			/* Index: 22 */
1813 
1814 	/* Chain for "direct" USCSI commands (all targets) */
1815 	sd_uscsi_iodone,		/* Index: 24 */
1816 
1817 	/* Chain for "direct priority" USCSI commands (all targets) */
1818 	sd_uscsi_iodone,		/* Index: 25 */
1819 };
1820 
1821 
1822 /*
1823  * Macros to locate the "first" function in the sd_iodone_chain[] array for
1824  * each iodone-side chain. These are located by the array index, but as the
1825  * iodone side functions are called in a decrementing-index order, the
1826  * highest index number in each chain must be specified (as these correspond
1827  * to the first function in the iodone chain that will be called by the core
1828  * at IO completion time).
1829  */
1830 
1831 #define	SD_CHAIN_DISK_IODONE			2
1832 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
1833 #define	SD_CHAIN_RMMEDIA_IODONE			8
1834 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1835 #define	SD_CHAIN_CHKSUM_IODONE			15
1836 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1837 #define	SD_CHAIN_USCSI_CMD_IODONE		20
1838 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1839 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
1840 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1841 
1842 
1843 
1844 
1845 /*
1846  * Array to map a layering chain index to the appropriate initpkt routine.
1847  * The redundant entries are present so that the index used for accessing
1848  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1849  * with this table as well.
1850  */
1851 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1852 
1853 static sd_initpkt_t	sd_initpkt_map[] = {
1854 
1855 	/* Chain for buf IO for disk drive targets (PM enabled) */
1856 	sd_initpkt_for_buf,		/* Index: 0 */
1857 	sd_initpkt_for_buf,		/* Index: 1 */
1858 	sd_initpkt_for_buf,		/* Index: 2 */
1859 
1860 	/* Chain for buf IO for disk drive targets (PM disabled) */
1861 	sd_initpkt_for_buf,		/* Index: 3 */
1862 	sd_initpkt_for_buf,		/* Index: 4 */
1863 
1864 	/* Chain for buf IO for removable-media targets (PM enabled) */
1865 	sd_initpkt_for_buf,		/* Index: 5 */
1866 	sd_initpkt_for_buf,		/* Index: 6 */
1867 	sd_initpkt_for_buf,		/* Index: 7 */
1868 	sd_initpkt_for_buf,		/* Index: 8 */
1869 
1870 	/* Chain for buf IO for removable-media targets (PM disabled) */
1871 	sd_initpkt_for_buf,		/* Index: 9 */
1872 	sd_initpkt_for_buf,		/* Index: 10 */
1873 	sd_initpkt_for_buf,		/* Index: 11 */
1874 
1875 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1876 	sd_initpkt_for_buf,		/* Index: 12 */
1877 	sd_initpkt_for_buf,		/* Index: 13 */
1878 	sd_initpkt_for_buf,		/* Index: 14 */
1879 	sd_initpkt_for_buf,		/* Index: 15 */
1880 
1881 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1882 	sd_initpkt_for_buf,		/* Index: 16 */
1883 	sd_initpkt_for_buf,		/* Index: 17 */
1884 	sd_initpkt_for_buf,		/* Index: 18 */
1885 
1886 	/* Chain for USCSI commands (non-checksum targets) */
1887 	sd_initpkt_for_uscsi,		/* Index: 19 */
1888 	sd_initpkt_for_uscsi,		/* Index: 20 */
1889 
1890 	/* Chain for USCSI commands (checksum targets) */
1891 	sd_initpkt_for_uscsi,		/* Index: 21 */
1892 	sd_initpkt_for_uscsi,		/* Index: 22 */
1893 	sd_initpkt_for_uscsi,		/* Index: 22 */
1894 
1895 	/* Chain for "direct" USCSI commands (all targets) */
1896 	sd_initpkt_for_uscsi,		/* Index: 24 */
1897 
1898 	/* Chain for "direct priority" USCSI commands (all targets) */
1899 	sd_initpkt_for_uscsi,		/* Index: 25 */
1900 
1901 };
1902 
1903 
1904 /*
1905  * Array to map a layering chain index to the appropriate destroypktpkt routine.
1906  * The redundant entries are present so that the index used for accessing
1907  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1908  * with this table as well.
1909  */
1910 typedef void (*sd_destroypkt_t)(struct buf *);
1911 
1912 static sd_destroypkt_t	sd_destroypkt_map[] = {
1913 
1914 	/* Chain for buf IO for disk drive targets (PM enabled) */
1915 	sd_destroypkt_for_buf,		/* Index: 0 */
1916 	sd_destroypkt_for_buf,		/* Index: 1 */
1917 	sd_destroypkt_for_buf,		/* Index: 2 */
1918 
1919 	/* Chain for buf IO for disk drive targets (PM disabled) */
1920 	sd_destroypkt_for_buf,		/* Index: 3 */
1921 	sd_destroypkt_for_buf,		/* Index: 4 */
1922 
1923 	/* Chain for buf IO for removable-media targets (PM enabled) */
1924 	sd_destroypkt_for_buf,		/* Index: 5 */
1925 	sd_destroypkt_for_buf,		/* Index: 6 */
1926 	sd_destroypkt_for_buf,		/* Index: 7 */
1927 	sd_destroypkt_for_buf,		/* Index: 8 */
1928 
1929 	/* Chain for buf IO for removable-media targets (PM disabled) */
1930 	sd_destroypkt_for_buf,		/* Index: 9 */
1931 	sd_destroypkt_for_buf,		/* Index: 10 */
1932 	sd_destroypkt_for_buf,		/* Index: 11 */
1933 
1934 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1935 	sd_destroypkt_for_buf,		/* Index: 12 */
1936 	sd_destroypkt_for_buf,		/* Index: 13 */
1937 	sd_destroypkt_for_buf,		/* Index: 14 */
1938 	sd_destroypkt_for_buf,		/* Index: 15 */
1939 
1940 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1941 	sd_destroypkt_for_buf,		/* Index: 16 */
1942 	sd_destroypkt_for_buf,		/* Index: 17 */
1943 	sd_destroypkt_for_buf,		/* Index: 18 */
1944 
1945 	/* Chain for USCSI commands (non-checksum targets) */
1946 	sd_destroypkt_for_uscsi,	/* Index: 19 */
1947 	sd_destroypkt_for_uscsi,	/* Index: 20 */
1948 
1949 	/* Chain for USCSI commands (checksum targets) */
1950 	sd_destroypkt_for_uscsi,	/* Index: 21 */
1951 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1952 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1953 
1954 	/* Chain for "direct" USCSI commands (all targets) */
1955 	sd_destroypkt_for_uscsi,	/* Index: 24 */
1956 
1957 	/* Chain for "direct priority" USCSI commands (all targets) */
1958 	sd_destroypkt_for_uscsi,	/* Index: 25 */
1959 
1960 };
1961 
1962 
1963 
1964 /*
1965  * Array to map a layering chain index to the appropriate chain "type".
1966  * The chain type indicates a specific property/usage of the chain.
1967  * The redundant entries are present so that the index used for accessing
1968  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1969  * with this table as well.
1970  */
1971 
1972 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
1973 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
1974 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
1975 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
1976 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
1977 						/* (for error recovery) */
1978 
1979 static int sd_chain_type_map[] = {
1980 
1981 	/* Chain for buf IO for disk drive targets (PM enabled) */
1982 	SD_CHAIN_BUFIO,			/* Index: 0 */
1983 	SD_CHAIN_BUFIO,			/* Index: 1 */
1984 	SD_CHAIN_BUFIO,			/* Index: 2 */
1985 
1986 	/* Chain for buf IO for disk drive targets (PM disabled) */
1987 	SD_CHAIN_BUFIO,			/* Index: 3 */
1988 	SD_CHAIN_BUFIO,			/* Index: 4 */
1989 
1990 	/* Chain for buf IO for removable-media targets (PM enabled) */
1991 	SD_CHAIN_BUFIO,			/* Index: 5 */
1992 	SD_CHAIN_BUFIO,			/* Index: 6 */
1993 	SD_CHAIN_BUFIO,			/* Index: 7 */
1994 	SD_CHAIN_BUFIO,			/* Index: 8 */
1995 
1996 	/* Chain for buf IO for removable-media targets (PM disabled) */
1997 	SD_CHAIN_BUFIO,			/* Index: 9 */
1998 	SD_CHAIN_BUFIO,			/* Index: 10 */
1999 	SD_CHAIN_BUFIO,			/* Index: 11 */
2000 
2001 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2002 	SD_CHAIN_BUFIO,			/* Index: 12 */
2003 	SD_CHAIN_BUFIO,			/* Index: 13 */
2004 	SD_CHAIN_BUFIO,			/* Index: 14 */
2005 	SD_CHAIN_BUFIO,			/* Index: 15 */
2006 
2007 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2008 	SD_CHAIN_BUFIO,			/* Index: 16 */
2009 	SD_CHAIN_BUFIO,			/* Index: 17 */
2010 	SD_CHAIN_BUFIO,			/* Index: 18 */
2011 
2012 	/* Chain for USCSI commands (non-checksum targets) */
2013 	SD_CHAIN_USCSI,			/* Index: 19 */
2014 	SD_CHAIN_USCSI,			/* Index: 20 */
2015 
2016 	/* Chain for USCSI commands (checksum targets) */
2017 	SD_CHAIN_USCSI,			/* Index: 21 */
2018 	SD_CHAIN_USCSI,			/* Index: 22 */
2019 	SD_CHAIN_USCSI,			/* Index: 22 */
2020 
2021 	/* Chain for "direct" USCSI commands (all targets) */
2022 	SD_CHAIN_DIRECT,		/* Index: 24 */
2023 
2024 	/* Chain for "direct priority" USCSI commands (all targets) */
2025 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2026 };
2027 
2028 
2029 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2030 #define	SD_IS_BUFIO(xp)			\
2031 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2032 
2033 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2034 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2035 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2036 
2037 
2038 
2039 /*
2040  * Struct, array, and macros to map a specific chain to the appropriate
2041  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2042  *
2043  * The sd_chain_index_map[] array is used at attach time to set the various
2044  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2045  * chain to be used with the instance. This allows different instances to use
2046  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2047  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2048  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2049  * dynamically & without the use of locking; and (2) a layer may update the
2050  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2051  * to allow for deferred processing of an IO within the same chain from a
2052  * different execution context.
2053  */
2054 
2055 struct sd_chain_index {
2056 	int	sci_iostart_index;
2057 	int	sci_iodone_index;
2058 };
2059 
2060 static struct sd_chain_index	sd_chain_index_map[] = {
2061 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2062 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2063 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2064 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2065 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2066 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2067 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2068 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2069 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2070 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2071 };
2072 
2073 
2074 /*
2075  * The following are indexes into the sd_chain_index_map[] array.
2076  */
2077 
2078 /* un->un_buf_chain_type must be set to one of these */
2079 #define	SD_CHAIN_INFO_DISK		0
2080 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2081 #define	SD_CHAIN_INFO_RMMEDIA		2
2082 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2083 #define	SD_CHAIN_INFO_CHKSUM		4
2084 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2085 
2086 /* un->un_uscsi_chain_type must be set to one of these */
2087 #define	SD_CHAIN_INFO_USCSI_CMD		6
2088 /* USCSI with PM disabled is the same as DIRECT */
2089 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2090 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2091 
2092 /* un->un_direct_chain_type must be set to one of these */
2093 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2094 
2095 /* un->un_priority_chain_type must be set to one of these */
2096 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2097 
2098 /* size for devid inquiries */
2099 #define	MAX_INQUIRY_SIZE		0xF0
2100 
2101 /*
2102  * Macros used by functions to pass a given buf(9S) struct along to the
2103  * next function in the layering chain for further processing.
2104  *
2105  * In the following macros, passing more than three arguments to the called
2106  * routines causes the optimizer for the SPARC compiler to stop doing tail
2107  * call elimination which results in significant performance degradation.
2108  */
2109 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2110 	((*(sd_iostart_chain[index]))(index, un, bp))
2111 
2112 #define	SD_BEGIN_IODONE(index, un, bp)	\
2113 	((*(sd_iodone_chain[index]))(index, un, bp))
2114 
2115 #define	SD_NEXT_IOSTART(index, un, bp)				\
2116 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2117 
2118 #define	SD_NEXT_IODONE(index, un, bp)				\
2119 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2120 
2121 /*
2122  *    Function: _init
2123  *
2124  * Description: This is the driver _init(9E) entry point.
2125  *
2126  * Return Code: Returns the value from mod_install(9F) or
2127  *		ddi_soft_state_init(9F) as appropriate.
2128  *
2129  *     Context: Called when driver module loaded.
2130  */
2131 
2132 int
2133 _init(void)
2134 {
2135 	int	err;
2136 
2137 	/* establish driver name from module name */
2138 	sd_label = mod_modname(&modlinkage);
2139 
2140 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2141 		SD_MAXUNIT);
2142 
2143 	if (err != 0) {
2144 		return (err);
2145 	}
2146 
2147 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2148 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2149 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2150 
2151 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2152 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2153 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2154 
2155 	/*
2156 	 * it's ok to init here even for fibre device
2157 	 */
2158 	sd_scsi_probe_cache_init();
2159 
2160 	sd_scsi_target_lun_init();
2161 
2162 	/*
2163 	 * Creating taskq before mod_install ensures that all callers (threads)
2164 	 * that enter the module after a successfull mod_install encounter
2165 	 * a valid taskq.
2166 	 */
2167 	sd_taskq_create();
2168 
2169 	err = mod_install(&modlinkage);
2170 	if (err != 0) {
2171 		/* delete taskq if install fails */
2172 		sd_taskq_delete();
2173 
2174 		mutex_destroy(&sd_detach_mutex);
2175 		mutex_destroy(&sd_log_mutex);
2176 		mutex_destroy(&sd_label_mutex);
2177 
2178 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2179 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2180 		cv_destroy(&sd_tr.srq_inprocess_cv);
2181 
2182 		sd_scsi_probe_cache_fini();
2183 
2184 		sd_scsi_target_lun_fini();
2185 
2186 		ddi_soft_state_fini(&sd_state);
2187 		return (err);
2188 	}
2189 
2190 	return (err);
2191 }
2192 
2193 
2194 /*
2195  *    Function: _fini
2196  *
2197  * Description: This is the driver _fini(9E) entry point.
2198  *
2199  * Return Code: Returns the value from mod_remove(9F)
2200  *
2201  *     Context: Called when driver module is unloaded.
2202  */
2203 
2204 int
2205 _fini(void)
2206 {
2207 	int err;
2208 
2209 	if ((err = mod_remove(&modlinkage)) != 0) {
2210 		return (err);
2211 	}
2212 
2213 	sd_taskq_delete();
2214 
2215 	mutex_destroy(&sd_detach_mutex);
2216 	mutex_destroy(&sd_log_mutex);
2217 	mutex_destroy(&sd_label_mutex);
2218 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2219 
2220 	sd_scsi_probe_cache_fini();
2221 
2222 	sd_scsi_target_lun_fini();
2223 
2224 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2225 	cv_destroy(&sd_tr.srq_inprocess_cv);
2226 
2227 	ddi_soft_state_fini(&sd_state);
2228 
2229 	return (err);
2230 }
2231 
2232 
2233 /*
2234  *    Function: _info
2235  *
2236  * Description: This is the driver _info(9E) entry point.
2237  *
2238  *   Arguments: modinfop - pointer to the driver modinfo structure
2239  *
2240  * Return Code: Returns the value from mod_info(9F).
2241  *
2242  *     Context: Kernel thread context
2243  */
2244 
2245 int
2246 _info(struct modinfo *modinfop)
2247 {
2248 	return (mod_info(&modlinkage, modinfop));
2249 }
2250 
2251 
2252 /*
2253  * The following routines implement the driver message logging facility.
2254  * They provide component- and level- based debug output filtering.
2255  * Output may also be restricted to messages for a single instance by
2256  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2257  * to NULL, then messages for all instances are printed.
2258  *
2259  * These routines have been cloned from each other due to the language
2260  * constraints of macros and variable argument list processing.
2261  */
2262 
2263 
2264 /*
2265  *    Function: sd_log_err
2266  *
2267  * Description: This routine is called by the SD_ERROR macro for debug
2268  *		logging of error conditions.
2269  *
2270  *   Arguments: comp - driver component being logged
2271  *		dev  - pointer to driver info structure
2272  *		fmt  - error string and format to be logged
2273  */
2274 
2275 static void
2276 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2277 {
2278 	va_list		ap;
2279 	dev_info_t	*dev;
2280 
2281 	ASSERT(un != NULL);
2282 	dev = SD_DEVINFO(un);
2283 	ASSERT(dev != NULL);
2284 
2285 	/*
2286 	 * Filter messages based on the global component and level masks.
2287 	 * Also print if un matches the value of sd_debug_un, or if
2288 	 * sd_debug_un is set to NULL.
2289 	 */
2290 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2291 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2292 		mutex_enter(&sd_log_mutex);
2293 		va_start(ap, fmt);
2294 		(void) vsprintf(sd_log_buf, fmt, ap);
2295 		va_end(ap);
2296 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2297 		mutex_exit(&sd_log_mutex);
2298 	}
2299 #ifdef SD_FAULT_INJECTION
2300 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2301 	if (un->sd_injection_mask & comp) {
2302 		mutex_enter(&sd_log_mutex);
2303 		va_start(ap, fmt);
2304 		(void) vsprintf(sd_log_buf, fmt, ap);
2305 		va_end(ap);
2306 		sd_injection_log(sd_log_buf, un);
2307 		mutex_exit(&sd_log_mutex);
2308 	}
2309 #endif
2310 }
2311 
2312 
2313 /*
2314  *    Function: sd_log_info
2315  *
2316  * Description: This routine is called by the SD_INFO macro for debug
2317  *		logging of general purpose informational conditions.
2318  *
2319  *   Arguments: comp - driver component being logged
2320  *		dev  - pointer to driver info structure
2321  *		fmt  - info string and format to be logged
2322  */
2323 
2324 static void
2325 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2326 {
2327 	va_list		ap;
2328 	dev_info_t	*dev;
2329 
2330 	ASSERT(un != NULL);
2331 	dev = SD_DEVINFO(un);
2332 	ASSERT(dev != NULL);
2333 
2334 	/*
2335 	 * Filter messages based on the global component and level masks.
2336 	 * Also print if un matches the value of sd_debug_un, or if
2337 	 * sd_debug_un is set to NULL.
2338 	 */
2339 	if ((sd_component_mask & component) &&
2340 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2341 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2342 		mutex_enter(&sd_log_mutex);
2343 		va_start(ap, fmt);
2344 		(void) vsprintf(sd_log_buf, fmt, ap);
2345 		va_end(ap);
2346 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2347 		mutex_exit(&sd_log_mutex);
2348 	}
2349 #ifdef SD_FAULT_INJECTION
2350 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2351 	if (un->sd_injection_mask & component) {
2352 		mutex_enter(&sd_log_mutex);
2353 		va_start(ap, fmt);
2354 		(void) vsprintf(sd_log_buf, fmt, ap);
2355 		va_end(ap);
2356 		sd_injection_log(sd_log_buf, un);
2357 		mutex_exit(&sd_log_mutex);
2358 	}
2359 #endif
2360 }
2361 
2362 
2363 /*
2364  *    Function: sd_log_trace
2365  *
2366  * Description: This routine is called by the SD_TRACE macro for debug
2367  *		logging of trace conditions (i.e. function entry/exit).
2368  *
2369  *   Arguments: comp - driver component being logged
2370  *		dev  - pointer to driver info structure
2371  *		fmt  - trace string and format to be logged
2372  */
2373 
2374 static void
2375 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2376 {
2377 	va_list		ap;
2378 	dev_info_t	*dev;
2379 
2380 	ASSERT(un != NULL);
2381 	dev = SD_DEVINFO(un);
2382 	ASSERT(dev != NULL);
2383 
2384 	/*
2385 	 * Filter messages based on the global component and level masks.
2386 	 * Also print if un matches the value of sd_debug_un, or if
2387 	 * sd_debug_un is set to NULL.
2388 	 */
2389 	if ((sd_component_mask & component) &&
2390 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2391 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2392 		mutex_enter(&sd_log_mutex);
2393 		va_start(ap, fmt);
2394 		(void) vsprintf(sd_log_buf, fmt, ap);
2395 		va_end(ap);
2396 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2397 		mutex_exit(&sd_log_mutex);
2398 	}
2399 #ifdef SD_FAULT_INJECTION
2400 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2401 	if (un->sd_injection_mask & component) {
2402 		mutex_enter(&sd_log_mutex);
2403 		va_start(ap, fmt);
2404 		(void) vsprintf(sd_log_buf, fmt, ap);
2405 		va_end(ap);
2406 		sd_injection_log(sd_log_buf, un);
2407 		mutex_exit(&sd_log_mutex);
2408 	}
2409 #endif
2410 }
2411 
2412 
2413 /*
2414  *    Function: sdprobe
2415  *
2416  * Description: This is the driver probe(9e) entry point function.
2417  *
2418  *   Arguments: devi - opaque device info handle
2419  *
2420  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2421  *              DDI_PROBE_FAILURE: If the probe failed.
2422  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2423  *				   but may be present in the future.
2424  */
2425 
2426 static int
2427 sdprobe(dev_info_t *devi)
2428 {
2429 	struct scsi_device	*devp;
2430 	int			rval;
2431 	int			instance;
2432 
2433 	/*
2434 	 * if it wasn't for pln, sdprobe could actually be nulldev
2435 	 * in the "__fibre" case.
2436 	 */
2437 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2438 		return (DDI_PROBE_DONTCARE);
2439 	}
2440 
2441 	devp = ddi_get_driver_private(devi);
2442 
2443 	if (devp == NULL) {
2444 		/* Ooops... nexus driver is mis-configured... */
2445 		return (DDI_PROBE_FAILURE);
2446 	}
2447 
2448 	instance = ddi_get_instance(devi);
2449 
2450 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2451 		return (DDI_PROBE_PARTIAL);
2452 	}
2453 
2454 	/*
2455 	 * Call the SCSA utility probe routine to see if we actually
2456 	 * have a target at this SCSI nexus.
2457 	 */
2458 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2459 	case SCSIPROBE_EXISTS:
2460 		switch (devp->sd_inq->inq_dtype) {
2461 		case DTYPE_DIRECT:
2462 			rval = DDI_PROBE_SUCCESS;
2463 			break;
2464 		case DTYPE_RODIRECT:
2465 			/* CDs etc. Can be removable media */
2466 			rval = DDI_PROBE_SUCCESS;
2467 			break;
2468 		case DTYPE_OPTICAL:
2469 			/*
2470 			 * Rewritable optical driver HP115AA
2471 			 * Can also be removable media
2472 			 */
2473 
2474 			/*
2475 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2476 			 * pre solaris 9 sparc sd behavior is required
2477 			 *
2478 			 * If first time through and sd_dtype_optical_bind
2479 			 * has not been set in /etc/system check properties
2480 			 */
2481 
2482 			if (sd_dtype_optical_bind  < 0) {
2483 			    sd_dtype_optical_bind = ddi_prop_get_int
2484 				(DDI_DEV_T_ANY,	devi,	0,
2485 				"optical-device-bind",	1);
2486 			}
2487 
2488 			if (sd_dtype_optical_bind == 0) {
2489 				rval = DDI_PROBE_FAILURE;
2490 			} else {
2491 				rval = DDI_PROBE_SUCCESS;
2492 			}
2493 			break;
2494 
2495 		case DTYPE_NOTPRESENT:
2496 		default:
2497 			rval = DDI_PROBE_FAILURE;
2498 			break;
2499 		}
2500 		break;
2501 	default:
2502 		rval = DDI_PROBE_PARTIAL;
2503 		break;
2504 	}
2505 
2506 	/*
2507 	 * This routine checks for resource allocation prior to freeing,
2508 	 * so it will take care of the "smart probing" case where a
2509 	 * scsi_probe() may or may not have been issued and will *not*
2510 	 * free previously-freed resources.
2511 	 */
2512 	scsi_unprobe(devp);
2513 	return (rval);
2514 }
2515 
2516 
2517 /*
2518  *    Function: sdinfo
2519  *
2520  * Description: This is the driver getinfo(9e) entry point function.
2521  * 		Given the device number, return the devinfo pointer from
2522  *		the scsi_device structure or the instance number
2523  *		associated with the dev_t.
2524  *
2525  *   Arguments: dip     - pointer to device info structure
2526  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2527  *			  DDI_INFO_DEVT2INSTANCE)
2528  *		arg     - driver dev_t
2529  *		resultp - user buffer for request response
2530  *
2531  * Return Code: DDI_SUCCESS
2532  *              DDI_FAILURE
2533  */
2534 /* ARGSUSED */
2535 static int
2536 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2537 {
2538 	struct sd_lun	*un;
2539 	dev_t		dev;
2540 	int		instance;
2541 	int		error;
2542 
2543 	switch (infocmd) {
2544 	case DDI_INFO_DEVT2DEVINFO:
2545 		dev = (dev_t)arg;
2546 		instance = SDUNIT(dev);
2547 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2548 			return (DDI_FAILURE);
2549 		}
2550 		*result = (void *) SD_DEVINFO(un);
2551 		error = DDI_SUCCESS;
2552 		break;
2553 	case DDI_INFO_DEVT2INSTANCE:
2554 		dev = (dev_t)arg;
2555 		instance = SDUNIT(dev);
2556 		*result = (void *)(uintptr_t)instance;
2557 		error = DDI_SUCCESS;
2558 		break;
2559 	default:
2560 		error = DDI_FAILURE;
2561 	}
2562 	return (error);
2563 }
2564 
2565 /*
2566  *    Function: sd_prop_op
2567  *
2568  * Description: This is the driver prop_op(9e) entry point function.
2569  *		Return the number of blocks for the partition in question
2570  *		or forward the request to the property facilities.
2571  *
2572  *   Arguments: dev       - device number
2573  *		dip       - pointer to device info structure
2574  *		prop_op   - property operator
2575  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2576  *		name      - pointer to property name
2577  *		valuep    - pointer or address of the user buffer
2578  *		lengthp   - property length
2579  *
2580  * Return Code: DDI_PROP_SUCCESS
2581  *              DDI_PROP_NOT_FOUND
2582  *              DDI_PROP_UNDEFINED
2583  *              DDI_PROP_NO_MEMORY
2584  *              DDI_PROP_BUF_TOO_SMALL
2585  */
2586 
2587 static int
2588 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2589 	char *name, caddr_t valuep, int *lengthp)
2590 {
2591 	int		instance = ddi_get_instance(dip);
2592 	struct sd_lun	*un;
2593 	uint64_t	nblocks64;
2594 
2595 	/*
2596 	 * Our dynamic properties are all device specific and size oriented.
2597 	 * Requests issued under conditions where size is valid are passed
2598 	 * to ddi_prop_op_nblocks with the size information, otherwise the
2599 	 * request is passed to ddi_prop_op. Size depends on valid geometry.
2600 	 */
2601 	un = ddi_get_soft_state(sd_state, instance);
2602 	if ((dev == DDI_DEV_T_ANY) || (un == NULL)) {
2603 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2604 		    name, valuep, lengthp));
2605 	} else if (!SD_IS_VALID_LABEL(un)) {
2606 		(void) cmlb_validate(un->un_cmlbhandle, 0,
2607 		    (void *)SD_PATH_DIRECT);
2608 		if (!SD_IS_VALID_LABEL(un))
2609 			return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2610 			    name, valuep, lengthp));
2611 	}
2612 
2613 	/* get nblocks value */
2614 	ASSERT(!mutex_owned(SD_MUTEX(un)));
2615 
2616 	(void) cmlb_partinfo(un->un_cmlbhandle, SDPART(dev),
2617 	    (diskaddr_t *)&nblocks64, NULL, NULL, NULL, (void *)SD_PATH_DIRECT);
2618 
2619 	return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags,
2620 	    name, valuep, lengthp, nblocks64));
2621 }
2622 
2623 /*
2624  * The following functions are for smart probing:
2625  * sd_scsi_probe_cache_init()
2626  * sd_scsi_probe_cache_fini()
2627  * sd_scsi_clear_probe_cache()
2628  * sd_scsi_probe_with_cache()
2629  */
2630 
2631 /*
2632  *    Function: sd_scsi_probe_cache_init
2633  *
2634  * Description: Initializes the probe response cache mutex and head pointer.
2635  *
2636  *     Context: Kernel thread context
2637  */
2638 
2639 static void
2640 sd_scsi_probe_cache_init(void)
2641 {
2642 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2643 	sd_scsi_probe_cache_head = NULL;
2644 }
2645 
2646 
2647 /*
2648  *    Function: sd_scsi_probe_cache_fini
2649  *
2650  * Description: Frees all resources associated with the probe response cache.
2651  *
2652  *     Context: Kernel thread context
2653  */
2654 
2655 static void
2656 sd_scsi_probe_cache_fini(void)
2657 {
2658 	struct sd_scsi_probe_cache *cp;
2659 	struct sd_scsi_probe_cache *ncp;
2660 
2661 	/* Clean up our smart probing linked list */
2662 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2663 		ncp = cp->next;
2664 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2665 	}
2666 	sd_scsi_probe_cache_head = NULL;
2667 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2668 }
2669 
2670 
2671 /*
2672  *    Function: sd_scsi_clear_probe_cache
2673  *
2674  * Description: This routine clears the probe response cache. This is
2675  *		done when open() returns ENXIO so that when deferred
2676  *		attach is attempted (possibly after a device has been
2677  *		turned on) we will retry the probe. Since we don't know
2678  *		which target we failed to open, we just clear the
2679  *		entire cache.
2680  *
2681  *     Context: Kernel thread context
2682  */
2683 
2684 static void
2685 sd_scsi_clear_probe_cache(void)
2686 {
2687 	struct sd_scsi_probe_cache	*cp;
2688 	int				i;
2689 
2690 	mutex_enter(&sd_scsi_probe_cache_mutex);
2691 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2692 		/*
2693 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2694 		 * force probing to be performed the next time
2695 		 * sd_scsi_probe_with_cache is called.
2696 		 */
2697 		for (i = 0; i < NTARGETS_WIDE; i++) {
2698 			cp->cache[i] = SCSIPROBE_EXISTS;
2699 		}
2700 	}
2701 	mutex_exit(&sd_scsi_probe_cache_mutex);
2702 }
2703 
2704 
2705 /*
2706  *    Function: sd_scsi_probe_with_cache
2707  *
2708  * Description: This routine implements support for a scsi device probe
2709  *		with cache. The driver maintains a cache of the target
2710  *		responses to scsi probes. If we get no response from a
2711  *		target during a probe inquiry, we remember that, and we
2712  *		avoid additional calls to scsi_probe on non-zero LUNs
2713  *		on the same target until the cache is cleared. By doing
2714  *		so we avoid the 1/4 sec selection timeout for nonzero
2715  *		LUNs. lun0 of a target is always probed.
2716  *
2717  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2718  *              waitfunc - indicates what the allocator routines should
2719  *			   do when resources are not available. This value
2720  *			   is passed on to scsi_probe() when that routine
2721  *			   is called.
2722  *
2723  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2724  *		otherwise the value returned by scsi_probe(9F).
2725  *
2726  *     Context: Kernel thread context
2727  */
2728 
2729 static int
2730 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2731 {
2732 	struct sd_scsi_probe_cache	*cp;
2733 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2734 	int		lun, tgt;
2735 
2736 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2737 	    SCSI_ADDR_PROP_LUN, 0);
2738 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2739 	    SCSI_ADDR_PROP_TARGET, -1);
2740 
2741 	/* Make sure caching enabled and target in range */
2742 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2743 		/* do it the old way (no cache) */
2744 		return (scsi_probe(devp, waitfn));
2745 	}
2746 
2747 	mutex_enter(&sd_scsi_probe_cache_mutex);
2748 
2749 	/* Find the cache for this scsi bus instance */
2750 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2751 		if (cp->pdip == pdip) {
2752 			break;
2753 		}
2754 	}
2755 
2756 	/* If we can't find a cache for this pdip, create one */
2757 	if (cp == NULL) {
2758 		int i;
2759 
2760 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2761 		    KM_SLEEP);
2762 		cp->pdip = pdip;
2763 		cp->next = sd_scsi_probe_cache_head;
2764 		sd_scsi_probe_cache_head = cp;
2765 		for (i = 0; i < NTARGETS_WIDE; i++) {
2766 			cp->cache[i] = SCSIPROBE_EXISTS;
2767 		}
2768 	}
2769 
2770 	mutex_exit(&sd_scsi_probe_cache_mutex);
2771 
2772 	/* Recompute the cache for this target if LUN zero */
2773 	if (lun == 0) {
2774 		cp->cache[tgt] = SCSIPROBE_EXISTS;
2775 	}
2776 
2777 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2778 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2779 		return (SCSIPROBE_NORESP);
2780 	}
2781 
2782 	/* Do the actual probe; save & return the result */
2783 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2784 }
2785 
2786 
2787 /*
2788  *    Function: sd_scsi_target_lun_init
2789  *
2790  * Description: Initializes the attached lun chain mutex and head pointer.
2791  *
2792  *     Context: Kernel thread context
2793  */
2794 
2795 static void
2796 sd_scsi_target_lun_init(void)
2797 {
2798 	mutex_init(&sd_scsi_target_lun_mutex, NULL, MUTEX_DRIVER, NULL);
2799 	sd_scsi_target_lun_head = NULL;
2800 }
2801 
2802 
2803 /*
2804  *    Function: sd_scsi_target_lun_fini
2805  *
2806  * Description: Frees all resources associated with the attached lun
2807  *              chain
2808  *
2809  *     Context: Kernel thread context
2810  */
2811 
2812 static void
2813 sd_scsi_target_lun_fini(void)
2814 {
2815 	struct sd_scsi_hba_tgt_lun	*cp;
2816 	struct sd_scsi_hba_tgt_lun	*ncp;
2817 
2818 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = ncp) {
2819 		ncp = cp->next;
2820 		kmem_free(cp, sizeof (struct sd_scsi_hba_tgt_lun));
2821 	}
2822 	sd_scsi_target_lun_head = NULL;
2823 	mutex_destroy(&sd_scsi_target_lun_mutex);
2824 }
2825 
2826 
2827 /*
2828  *    Function: sd_scsi_get_target_lun_count
2829  *
2830  * Description: This routine will check in the attached lun chain to see
2831  * 		how many luns are attached on the required SCSI controller
2832  * 		and target. Currently, some capabilities like tagged queue
2833  *		are supported per target based by HBA. So all luns in a
2834  *		target have the same capabilities. Based on this assumption,
2835  * 		sd should only set these capabilities once per target. This
2836  *		function is called when sd needs to decide how many luns
2837  *		already attached on a target.
2838  *
2839  *   Arguments: dip	- Pointer to the system's dev_info_t for the SCSI
2840  *			  controller device.
2841  *              target	- The target ID on the controller's SCSI bus.
2842  *
2843  * Return Code: The number of luns attached on the required target and
2844  *		controller.
2845  *		-1 if target ID is not in parallel SCSI scope or the given
2846  * 		dip is not in the chain.
2847  *
2848  *     Context: Kernel thread context
2849  */
2850 
2851 static int
2852 sd_scsi_get_target_lun_count(dev_info_t *dip, int target)
2853 {
2854 	struct sd_scsi_hba_tgt_lun	*cp;
2855 
2856 	if ((target < 0) || (target >= NTARGETS_WIDE)) {
2857 		return (-1);
2858 	}
2859 
2860 	mutex_enter(&sd_scsi_target_lun_mutex);
2861 
2862 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2863 		if (cp->pdip == dip) {
2864 			break;
2865 		}
2866 	}
2867 
2868 	mutex_exit(&sd_scsi_target_lun_mutex);
2869 
2870 	if (cp == NULL) {
2871 		return (-1);
2872 	}
2873 
2874 	return (cp->nlun[target]);
2875 }
2876 
2877 
2878 /*
2879  *    Function: sd_scsi_update_lun_on_target
2880  *
2881  * Description: This routine is used to update the attached lun chain when a
2882  *		lun is attached or detached on a target.
2883  *
2884  *   Arguments: dip     - Pointer to the system's dev_info_t for the SCSI
2885  *                        controller device.
2886  *              target  - The target ID on the controller's SCSI bus.
2887  *		flag	- Indicate the lun is attached or detached.
2888  *
2889  *     Context: Kernel thread context
2890  */
2891 
2892 static void
2893 sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag)
2894 {
2895 	struct sd_scsi_hba_tgt_lun	*cp;
2896 
2897 	mutex_enter(&sd_scsi_target_lun_mutex);
2898 
2899 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2900 		if (cp->pdip == dip) {
2901 			break;
2902 		}
2903 	}
2904 
2905 	if ((cp == NULL) && (flag == SD_SCSI_LUN_ATTACH)) {
2906 		cp = kmem_zalloc(sizeof (struct sd_scsi_hba_tgt_lun),
2907 		    KM_SLEEP);
2908 		cp->pdip = dip;
2909 		cp->next = sd_scsi_target_lun_head;
2910 		sd_scsi_target_lun_head = cp;
2911 	}
2912 
2913 	mutex_exit(&sd_scsi_target_lun_mutex);
2914 
2915 	if (cp != NULL) {
2916 		if (flag == SD_SCSI_LUN_ATTACH) {
2917 			cp->nlun[target] ++;
2918 		} else {
2919 			cp->nlun[target] --;
2920 		}
2921 	}
2922 }
2923 
2924 
2925 /*
2926  *    Function: sd_spin_up_unit
2927  *
2928  * Description: Issues the following commands to spin-up the device:
2929  *		START STOP UNIT, and INQUIRY.
2930  *
2931  *   Arguments: un - driver soft state (unit) structure
2932  *
2933  * Return Code: 0 - success
2934  *		EIO - failure
2935  *		EACCES - reservation conflict
2936  *
2937  *     Context: Kernel thread context
2938  */
2939 
2940 static int
2941 sd_spin_up_unit(struct sd_lun *un)
2942 {
2943 	size_t	resid		= 0;
2944 	int	has_conflict	= FALSE;
2945 	uchar_t *bufaddr;
2946 
2947 	ASSERT(un != NULL);
2948 
2949 	/*
2950 	 * Send a throwaway START UNIT command.
2951 	 *
2952 	 * If we fail on this, we don't care presently what precisely
2953 	 * is wrong.  EMC's arrays will also fail this with a check
2954 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
2955 	 * we don't want to fail the attach because it may become
2956 	 * "active" later.
2957 	 */
2958 	if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START, SD_PATH_DIRECT)
2959 	    == EACCES)
2960 		has_conflict = TRUE;
2961 
2962 	/*
2963 	 * Send another INQUIRY command to the target. This is necessary for
2964 	 * non-removable media direct access devices because their INQUIRY data
2965 	 * may not be fully qualified until they are spun up (perhaps via the
2966 	 * START command above).  Note: This seems to be needed for some
2967 	 * legacy devices only.) The INQUIRY command should succeed even if a
2968 	 * Reservation Conflict is present.
2969 	 */
2970 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
2971 	if (sd_send_scsi_INQUIRY(un, bufaddr, SUN_INQSIZE, 0, 0, &resid) != 0) {
2972 		kmem_free(bufaddr, SUN_INQSIZE);
2973 		return (EIO);
2974 	}
2975 
2976 	/*
2977 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
2978 	 * Note that this routine does not return a failure here even if the
2979 	 * INQUIRY command did not return any data.  This is a legacy behavior.
2980 	 */
2981 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
2982 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
2983 	}
2984 
2985 	kmem_free(bufaddr, SUN_INQSIZE);
2986 
2987 	/* If we hit a reservation conflict above, tell the caller. */
2988 	if (has_conflict == TRUE) {
2989 		return (EACCES);
2990 	}
2991 
2992 	return (0);
2993 }
2994 
2995 #ifdef _LP64
2996 /*
2997  *    Function: sd_enable_descr_sense
2998  *
2999  * Description: This routine attempts to select descriptor sense format
3000  *		using the Control mode page.  Devices that support 64 bit
3001  *		LBAs (for >2TB luns) should also implement descriptor
3002  *		sense data so we will call this function whenever we see
3003  *		a lun larger than 2TB.  If for some reason the device
3004  *		supports 64 bit LBAs but doesn't support descriptor sense
3005  *		presumably the mode select will fail.  Everything will
3006  *		continue to work normally except that we will not get
3007  *		complete sense data for commands that fail with an LBA
3008  *		larger than 32 bits.
3009  *
3010  *   Arguments: un - driver soft state (unit) structure
3011  *
3012  *     Context: Kernel thread context only
3013  */
3014 
3015 static void
3016 sd_enable_descr_sense(struct sd_lun *un)
3017 {
3018 	uchar_t			*header;
3019 	struct mode_control_scsi3 *ctrl_bufp;
3020 	size_t			buflen;
3021 	size_t			bd_len;
3022 
3023 	/*
3024 	 * Read MODE SENSE page 0xA, Control Mode Page
3025 	 */
3026 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
3027 	    sizeof (struct mode_control_scsi3);
3028 	header = kmem_zalloc(buflen, KM_SLEEP);
3029 	if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
3030 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT) != 0) {
3031 		SD_ERROR(SD_LOG_COMMON, un,
3032 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
3033 		goto eds_exit;
3034 	}
3035 
3036 	/*
3037 	 * Determine size of Block Descriptors in order to locate
3038 	 * the mode page data. ATAPI devices return 0, SCSI devices
3039 	 * should return MODE_BLK_DESC_LENGTH.
3040 	 */
3041 	bd_len  = ((struct mode_header *)header)->bdesc_length;
3042 
3043 	ctrl_bufp = (struct mode_control_scsi3 *)
3044 	    (header + MODE_HEADER_LENGTH + bd_len);
3045 
3046 	/*
3047 	 * Clear PS bit for MODE SELECT
3048 	 */
3049 	ctrl_bufp->mode_page.ps = 0;
3050 
3051 	/*
3052 	 * Set D_SENSE to enable descriptor sense format.
3053 	 */
3054 	ctrl_bufp->d_sense = 1;
3055 
3056 	/*
3057 	 * Use MODE SELECT to commit the change to the D_SENSE bit
3058 	 */
3059 	if (sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
3060 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT) != 0) {
3061 		SD_INFO(SD_LOG_COMMON, un,
3062 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
3063 		goto eds_exit;
3064 	}
3065 
3066 eds_exit:
3067 	kmem_free(header, buflen);
3068 }
3069 
3070 /*
3071  *    Function: sd_reenable_dsense_task
3072  *
3073  * Description: Re-enable descriptor sense after device or bus reset
3074  *
3075  *     Context: Executes in a taskq() thread context
3076  */
3077 static void
3078 sd_reenable_dsense_task(void *arg)
3079 {
3080 	struct	sd_lun	*un = arg;
3081 
3082 	ASSERT(un != NULL);
3083 	sd_enable_descr_sense(un);
3084 }
3085 #endif /* _LP64 */
3086 
3087 /*
3088  *    Function: sd_set_mmc_caps
3089  *
3090  * Description: This routine determines if the device is MMC compliant and if
3091  *		the device supports CDDA via a mode sense of the CDVD
3092  *		capabilities mode page. Also checks if the device is a
3093  *		dvdram writable device.
3094  *
3095  *   Arguments: un - driver soft state (unit) structure
3096  *
3097  *     Context: Kernel thread context only
3098  */
3099 
3100 static void
3101 sd_set_mmc_caps(struct sd_lun *un)
3102 {
3103 	struct mode_header_grp2		*sense_mhp;
3104 	uchar_t				*sense_page;
3105 	caddr_t				buf;
3106 	int				bd_len;
3107 	int				status;
3108 	struct uscsi_cmd		com;
3109 	int				rtn;
3110 	uchar_t				*out_data_rw, *out_data_hd;
3111 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3112 
3113 	ASSERT(un != NULL);
3114 
3115 	/*
3116 	 * The flags which will be set in this function are - mmc compliant,
3117 	 * dvdram writable device, cdda support. Initialize them to FALSE
3118 	 * and if a capability is detected - it will be set to TRUE.
3119 	 */
3120 	un->un_f_mmc_cap = FALSE;
3121 	un->un_f_dvdram_writable_device = FALSE;
3122 	un->un_f_cfg_cdda = FALSE;
3123 
3124 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3125 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3126 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3127 
3128 	if (status != 0) {
3129 		/* command failed; just return */
3130 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3131 		return;
3132 	}
3133 	/*
3134 	 * If the mode sense request for the CDROM CAPABILITIES
3135 	 * page (0x2A) succeeds the device is assumed to be MMC.
3136 	 */
3137 	un->un_f_mmc_cap = TRUE;
3138 
3139 	/* Get to the page data */
3140 	sense_mhp = (struct mode_header_grp2 *)buf;
3141 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3142 	    sense_mhp->bdesc_length_lo;
3143 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3144 		/*
3145 		 * We did not get back the expected block descriptor
3146 		 * length so we cannot determine if the device supports
3147 		 * CDDA. However, we still indicate the device is MMC
3148 		 * according to the successful response to the page
3149 		 * 0x2A mode sense request.
3150 		 */
3151 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3152 		    "sd_set_mmc_caps: Mode Sense returned "
3153 		    "invalid block descriptor length\n");
3154 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3155 		return;
3156 	}
3157 
3158 	/* See if read CDDA is supported */
3159 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3160 	    bd_len);
3161 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3162 
3163 	/* See if writing DVD RAM is supported. */
3164 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3165 	if (un->un_f_dvdram_writable_device == TRUE) {
3166 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3167 		return;
3168 	}
3169 
3170 	/*
3171 	 * If the device presents DVD or CD capabilities in the mode
3172 	 * page, we can return here since a RRD will not have
3173 	 * these capabilities.
3174 	 */
3175 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3176 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3177 		return;
3178 	}
3179 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3180 
3181 	/*
3182 	 * If un->un_f_dvdram_writable_device is still FALSE,
3183 	 * check for a Removable Rigid Disk (RRD).  A RRD
3184 	 * device is identified by the features RANDOM_WRITABLE and
3185 	 * HARDWARE_DEFECT_MANAGEMENT.
3186 	 */
3187 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3188 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3189 
3190 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3191 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3192 	    RANDOM_WRITABLE, SD_PATH_STANDARD);
3193 	if (rtn != 0) {
3194 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3195 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3196 		return;
3197 	}
3198 
3199 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3200 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3201 
3202 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3203 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3204 	    HARDWARE_DEFECT_MANAGEMENT, SD_PATH_STANDARD);
3205 	if (rtn == 0) {
3206 		/*
3207 		 * We have good information, check for random writable
3208 		 * and hardware defect features.
3209 		 */
3210 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3211 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3212 			un->un_f_dvdram_writable_device = TRUE;
3213 		}
3214 	}
3215 
3216 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3217 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3218 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3219 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3220 }
3221 
3222 /*
3223  *    Function: sd_check_for_writable_cd
3224  *
3225  * Description: This routine determines if the media in the device is
3226  *		writable or not. It uses the get configuration command (0x46)
3227  *		to determine if the media is writable
3228  *
3229  *   Arguments: un - driver soft state (unit) structure
3230  *              path_flag - SD_PATH_DIRECT to use the USCSI "direct"
3231  *                           chain and the normal command waitq, or
3232  *                           SD_PATH_DIRECT_PRIORITY to use the USCSI
3233  *                           "direct" chain and bypass the normal command
3234  *                           waitq.
3235  *
3236  *     Context: Never called at interrupt context.
3237  */
3238 
3239 static void
3240 sd_check_for_writable_cd(struct sd_lun *un, int path_flag)
3241 {
3242 	struct uscsi_cmd		com;
3243 	uchar_t				*out_data;
3244 	uchar_t				*rqbuf;
3245 	int				rtn;
3246 	uchar_t				*out_data_rw, *out_data_hd;
3247 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3248 	struct mode_header_grp2		*sense_mhp;
3249 	uchar_t				*sense_page;
3250 	caddr_t				buf;
3251 	int				bd_len;
3252 	int				status;
3253 
3254 	ASSERT(un != NULL);
3255 	ASSERT(mutex_owned(SD_MUTEX(un)));
3256 
3257 	/*
3258 	 * Initialize the writable media to false, if configuration info.
3259 	 * tells us otherwise then only we will set it.
3260 	 */
3261 	un->un_f_mmc_writable_media = FALSE;
3262 	mutex_exit(SD_MUTEX(un));
3263 
3264 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3265 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3266 
3267 	rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf, SENSE_LENGTH,
3268 	    out_data, SD_PROFILE_HEADER_LEN, path_flag);
3269 
3270 	mutex_enter(SD_MUTEX(un));
3271 	if (rtn == 0) {
3272 		/*
3273 		 * We have good information, check for writable DVD.
3274 		 */
3275 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3276 			un->un_f_mmc_writable_media = TRUE;
3277 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3278 			kmem_free(rqbuf, SENSE_LENGTH);
3279 			return;
3280 		}
3281 	}
3282 
3283 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3284 	kmem_free(rqbuf, SENSE_LENGTH);
3285 
3286 	/*
3287 	 * Determine if this is a RRD type device.
3288 	 */
3289 	mutex_exit(SD_MUTEX(un));
3290 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3291 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3292 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, path_flag);
3293 	mutex_enter(SD_MUTEX(un));
3294 	if (status != 0) {
3295 		/* command failed; just return */
3296 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3297 		return;
3298 	}
3299 
3300 	/* Get to the page data */
3301 	sense_mhp = (struct mode_header_grp2 *)buf;
3302 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3303 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3304 		/*
3305 		 * We did not get back the expected block descriptor length so
3306 		 * we cannot check the mode page.
3307 		 */
3308 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3309 		    "sd_check_for_writable_cd: Mode Sense returned "
3310 		    "invalid block descriptor length\n");
3311 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3312 		return;
3313 	}
3314 
3315 	/*
3316 	 * If the device presents DVD or CD capabilities in the mode
3317 	 * page, we can return here since a RRD device will not have
3318 	 * these capabilities.
3319 	 */
3320 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3321 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3322 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3323 		return;
3324 	}
3325 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3326 
3327 	/*
3328 	 * If un->un_f_mmc_writable_media is still FALSE,
3329 	 * check for RRD type media.  A RRD device is identified
3330 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3331 	 */
3332 	mutex_exit(SD_MUTEX(un));
3333 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3334 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3335 
3336 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3337 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3338 	    RANDOM_WRITABLE, path_flag);
3339 	if (rtn != 0) {
3340 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3341 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3342 		mutex_enter(SD_MUTEX(un));
3343 		return;
3344 	}
3345 
3346 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3347 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3348 
3349 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3350 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3351 	    HARDWARE_DEFECT_MANAGEMENT, path_flag);
3352 	mutex_enter(SD_MUTEX(un));
3353 	if (rtn == 0) {
3354 		/*
3355 		 * We have good information, check for random writable
3356 		 * and hardware defect features as current.
3357 		 */
3358 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3359 		    (out_data_rw[10] & 0x1) &&
3360 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3361 		    (out_data_hd[10] & 0x1)) {
3362 			un->un_f_mmc_writable_media = TRUE;
3363 		}
3364 	}
3365 
3366 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3367 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3368 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3369 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3370 }
3371 
3372 /*
3373  *    Function: sd_read_unit_properties
3374  *
3375  * Description: The following implements a property lookup mechanism.
3376  *		Properties for particular disks (keyed on vendor, model
3377  *		and rev numbers) are sought in the sd.conf file via
3378  *		sd_process_sdconf_file(), and if not found there, are
3379  *		looked for in a list hardcoded in this driver via
3380  *		sd_process_sdconf_table() Once located the properties
3381  *		are used to update the driver unit structure.
3382  *
3383  *   Arguments: un - driver soft state (unit) structure
3384  */
3385 
3386 static void
3387 sd_read_unit_properties(struct sd_lun *un)
3388 {
3389 	/*
3390 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3391 	 * the "sd-config-list" property (from the sd.conf file) or if
3392 	 * there was not a match for the inquiry vid/pid. If this event
3393 	 * occurs the static driver configuration table is searched for
3394 	 * a match.
3395 	 */
3396 	ASSERT(un != NULL);
3397 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3398 		sd_process_sdconf_table(un);
3399 	}
3400 
3401 	/* check for LSI device */
3402 	sd_is_lsi(un);
3403 
3404 
3405 }
3406 
3407 
3408 /*
3409  *    Function: sd_process_sdconf_file
3410  *
3411  * Description: Use ddi_getlongprop to obtain the properties from the
3412  *		driver's config file (ie, sd.conf) and update the driver
3413  *		soft state structure accordingly.
3414  *
3415  *   Arguments: un - driver soft state (unit) structure
3416  *
3417  * Return Code: SD_SUCCESS - The properties were successfully set according
3418  *			     to the driver configuration file.
3419  *		SD_FAILURE - The driver config list was not obtained or
3420  *			     there was no vid/pid match. This indicates that
3421  *			     the static config table should be used.
3422  *
3423  * The config file has a property, "sd-config-list", which consists of
3424  * one or more duplets as follows:
3425  *
3426  *  sd-config-list=
3427  *	<duplet>,
3428  *	[<duplet>,]
3429  *	[<duplet>];
3430  *
3431  * The structure of each duplet is as follows:
3432  *
3433  *  <duplet>:= <vid+pid>,<data-property-name_list>
3434  *
3435  * The first entry of the duplet is the device ID string (the concatenated
3436  * vid & pid; not to be confused with a device_id).  This is defined in
3437  * the same way as in the sd_disk_table.
3438  *
3439  * The second part of the duplet is a string that identifies a
3440  * data-property-name-list. The data-property-name-list is defined as
3441  * follows:
3442  *
3443  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3444  *
3445  * The syntax of <data-property-name> depends on the <version> field.
3446  *
3447  * If version = SD_CONF_VERSION_1 we have the following syntax:
3448  *
3449  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3450  *
3451  * where the prop0 value will be used to set prop0 if bit0 set in the
3452  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3453  *
3454  */
3455 
3456 static int
3457 sd_process_sdconf_file(struct sd_lun *un)
3458 {
3459 	char	*config_list = NULL;
3460 	int	config_list_len;
3461 	int	len;
3462 	int	dupletlen = 0;
3463 	char	*vidptr;
3464 	int	vidlen;
3465 	char	*dnlist_ptr;
3466 	char	*dataname_ptr;
3467 	int	dnlist_len;
3468 	int	dataname_len;
3469 	int	*data_list;
3470 	int	data_list_len;
3471 	int	rval = SD_FAILURE;
3472 	int	i;
3473 
3474 	ASSERT(un != NULL);
3475 
3476 	/* Obtain the configuration list associated with the .conf file */
3477 	if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), DDI_PROP_DONTPASS,
3478 	    sd_config_list, (caddr_t)&config_list, &config_list_len)
3479 	    != DDI_PROP_SUCCESS) {
3480 		return (SD_FAILURE);
3481 	}
3482 
3483 	/*
3484 	 * Compare vids in each duplet to the inquiry vid - if a match is
3485 	 * made, get the data value and update the soft state structure
3486 	 * accordingly.
3487 	 *
3488 	 * Note: This algorithm is complex and difficult to maintain. It should
3489 	 * be replaced with a more robust implementation.
3490 	 */
3491 	for (len = config_list_len, vidptr = config_list; len > 0;
3492 	    vidptr += dupletlen, len -= dupletlen) {
3493 		/*
3494 		 * Note: The assumption here is that each vid entry is on
3495 		 * a unique line from its associated duplet.
3496 		 */
3497 		vidlen = dupletlen = (int)strlen(vidptr);
3498 		if ((vidlen == 0) ||
3499 		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3500 			dupletlen++;
3501 			continue;
3502 		}
3503 
3504 		/*
3505 		 * dnlist contains 1 or more blank separated
3506 		 * data-property-name entries
3507 		 */
3508 		dnlist_ptr = vidptr + vidlen + 1;
3509 		dnlist_len = (int)strlen(dnlist_ptr);
3510 		dupletlen += dnlist_len + 2;
3511 
3512 		/*
3513 		 * Set a pointer for the first data-property-name
3514 		 * entry in the list
3515 		 */
3516 		dataname_ptr = dnlist_ptr;
3517 		dataname_len = 0;
3518 
3519 		/*
3520 		 * Loop through all data-property-name entries in the
3521 		 * data-property-name-list setting the properties for each.
3522 		 */
3523 		while (dataname_len < dnlist_len) {
3524 			int version;
3525 
3526 			/*
3527 			 * Determine the length of the current
3528 			 * data-property-name entry by indexing until a
3529 			 * blank or NULL is encountered. When the space is
3530 			 * encountered reset it to a NULL for compliance
3531 			 * with ddi_getlongprop().
3532 			 */
3533 			for (i = 0; ((dataname_ptr[i] != ' ') &&
3534 			    (dataname_ptr[i] != '\0')); i++) {
3535 				;
3536 			}
3537 
3538 			dataname_len += i;
3539 			/* If not null terminated, Make it so */
3540 			if (dataname_ptr[i] == ' ') {
3541 				dataname_ptr[i] = '\0';
3542 			}
3543 			dataname_len++;
3544 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3545 			    "sd_process_sdconf_file: disk:%s, data:%s\n",
3546 			    vidptr, dataname_ptr);
3547 
3548 			/* Get the data list */
3549 			if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), 0,
3550 			    dataname_ptr, (caddr_t)&data_list, &data_list_len)
3551 			    != DDI_PROP_SUCCESS) {
3552 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3553 				    "sd_process_sdconf_file: data property (%s)"
3554 				    " has no value\n", dataname_ptr);
3555 				dataname_ptr = dnlist_ptr + dataname_len;
3556 				continue;
3557 			}
3558 
3559 			version = data_list[0];
3560 
3561 			if (version == SD_CONF_VERSION_1) {
3562 				sd_tunables values;
3563 
3564 				/* Set the properties */
3565 				if (sd_chk_vers1_data(un, data_list[1],
3566 				    &data_list[2], data_list_len, dataname_ptr)
3567 				    == SD_SUCCESS) {
3568 					sd_get_tunables_from_conf(un,
3569 					    data_list[1], &data_list[2],
3570 					    &values);
3571 					sd_set_vers1_properties(un,
3572 					    data_list[1], &values);
3573 					rval = SD_SUCCESS;
3574 				} else {
3575 					rval = SD_FAILURE;
3576 				}
3577 			} else {
3578 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3579 				    "data property %s version 0x%x is invalid.",
3580 				    dataname_ptr, version);
3581 				rval = SD_FAILURE;
3582 			}
3583 			kmem_free(data_list, data_list_len);
3584 			dataname_ptr = dnlist_ptr + dataname_len;
3585 		}
3586 	}
3587 
3588 	/* free up the memory allocated by ddi_getlongprop */
3589 	if (config_list) {
3590 		kmem_free(config_list, config_list_len);
3591 	}
3592 
3593 	return (rval);
3594 }
3595 
3596 /*
3597  *    Function: sd_get_tunables_from_conf()
3598  *
3599  *
3600  *    This function reads the data list from the sd.conf file and pulls
3601  *    the values that can have numeric values as arguments and places
3602  *    the values in the apropriate sd_tunables member.
3603  *    Since the order of the data list members varies across platforms
3604  *    This function reads them from the data list in a platform specific
3605  *    order and places them into the correct sd_tunable member that is
3606  *    a consistant across all platforms.
3607  */
3608 static void
3609 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3610     sd_tunables *values)
3611 {
3612 	int i;
3613 	int mask;
3614 
3615 	bzero(values, sizeof (sd_tunables));
3616 
3617 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3618 
3619 		mask = 1 << i;
3620 		if (mask > flags) {
3621 			break;
3622 		}
3623 
3624 		switch (mask & flags) {
3625 		case 0:	/* This mask bit not set in flags */
3626 			continue;
3627 		case SD_CONF_BSET_THROTTLE:
3628 			values->sdt_throttle = data_list[i];
3629 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3630 			    "sd_get_tunables_from_conf: throttle = %d\n",
3631 			    values->sdt_throttle);
3632 			break;
3633 		case SD_CONF_BSET_CTYPE:
3634 			values->sdt_ctype = data_list[i];
3635 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3636 			    "sd_get_tunables_from_conf: ctype = %d\n",
3637 			    values->sdt_ctype);
3638 			break;
3639 		case SD_CONF_BSET_NRR_COUNT:
3640 			values->sdt_not_rdy_retries = data_list[i];
3641 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3642 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
3643 			    values->sdt_not_rdy_retries);
3644 			break;
3645 		case SD_CONF_BSET_BSY_RETRY_COUNT:
3646 			values->sdt_busy_retries = data_list[i];
3647 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3648 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
3649 			    values->sdt_busy_retries);
3650 			break;
3651 		case SD_CONF_BSET_RST_RETRIES:
3652 			values->sdt_reset_retries = data_list[i];
3653 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3654 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
3655 			    values->sdt_reset_retries);
3656 			break;
3657 		case SD_CONF_BSET_RSV_REL_TIME:
3658 			values->sdt_reserv_rel_time = data_list[i];
3659 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3660 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
3661 			    values->sdt_reserv_rel_time);
3662 			break;
3663 		case SD_CONF_BSET_MIN_THROTTLE:
3664 			values->sdt_min_throttle = data_list[i];
3665 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3666 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
3667 			    values->sdt_min_throttle);
3668 			break;
3669 		case SD_CONF_BSET_DISKSORT_DISABLED:
3670 			values->sdt_disk_sort_dis = data_list[i];
3671 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3672 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
3673 			    values->sdt_disk_sort_dis);
3674 			break;
3675 		case SD_CONF_BSET_LUN_RESET_ENABLED:
3676 			values->sdt_lun_reset_enable = data_list[i];
3677 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3678 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
3679 			    "\n", values->sdt_lun_reset_enable);
3680 			break;
3681 		}
3682 	}
3683 }
3684 
3685 /*
3686  *    Function: sd_process_sdconf_table
3687  *
3688  * Description: Search the static configuration table for a match on the
3689  *		inquiry vid/pid and update the driver soft state structure
3690  *		according to the table property values for the device.
3691  *
3692  *		The form of a configuration table entry is:
3693  *		  <vid+pid>,<flags>,<property-data>
3694  *		  "SEAGATE ST42400N",1,63,0,0			(Fibre)
3695  *		  "SEAGATE ST42400N",1,63,0,0,0,0		(Sparc)
3696  *		  "SEAGATE ST42400N",1,63,0,0,0,0,0,0,0,0,0,0	(Intel)
3697  *
3698  *   Arguments: un - driver soft state (unit) structure
3699  */
3700 
3701 static void
3702 sd_process_sdconf_table(struct sd_lun *un)
3703 {
3704 	char	*id = NULL;
3705 	int	table_index;
3706 	int	idlen;
3707 
3708 	ASSERT(un != NULL);
3709 	for (table_index = 0; table_index < sd_disk_table_size;
3710 	    table_index++) {
3711 		id = sd_disk_table[table_index].device_id;
3712 		idlen = strlen(id);
3713 		if (idlen == 0) {
3714 			continue;
3715 		}
3716 
3717 		/*
3718 		 * The static configuration table currently does not
3719 		 * implement version 10 properties. Additionally,
3720 		 * multiple data-property-name entries are not
3721 		 * implemented in the static configuration table.
3722 		 */
3723 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
3724 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3725 			    "sd_process_sdconf_table: disk %s\n", id);
3726 			sd_set_vers1_properties(un,
3727 			    sd_disk_table[table_index].flags,
3728 			    sd_disk_table[table_index].properties);
3729 			break;
3730 		}
3731 	}
3732 }
3733 
3734 
3735 /*
3736  *    Function: sd_sdconf_id_match
3737  *
3738  * Description: This local function implements a case sensitive vid/pid
3739  *		comparison as well as the boundary cases of wild card and
3740  *		multiple blanks.
3741  *
3742  *		Note: An implicit assumption made here is that the scsi
3743  *		inquiry structure will always keep the vid, pid and
3744  *		revision strings in consecutive sequence, so they can be
3745  *		read as a single string. If this assumption is not the
3746  *		case, a separate string, to be used for the check, needs
3747  *		to be built with these strings concatenated.
3748  *
3749  *   Arguments: un - driver soft state (unit) structure
3750  *		id - table or config file vid/pid
3751  *		idlen  - length of the vid/pid (bytes)
3752  *
3753  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3754  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3755  */
3756 
3757 static int
3758 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
3759 {
3760 	struct scsi_inquiry	*sd_inq;
3761 	int 			rval = SD_SUCCESS;
3762 
3763 	ASSERT(un != NULL);
3764 	sd_inq = un->un_sd->sd_inq;
3765 	ASSERT(id != NULL);
3766 
3767 	/*
3768 	 * We use the inq_vid as a pointer to a buffer containing the
3769 	 * vid and pid and use the entire vid/pid length of the table
3770 	 * entry for the comparison. This works because the inq_pid
3771 	 * data member follows inq_vid in the scsi_inquiry structure.
3772 	 */
3773 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
3774 		/*
3775 		 * The user id string is compared to the inquiry vid/pid
3776 		 * using a case insensitive comparison and ignoring
3777 		 * multiple spaces.
3778 		 */
3779 		rval = sd_blank_cmp(un, id, idlen);
3780 		if (rval != SD_SUCCESS) {
3781 			/*
3782 			 * User id strings that start and end with a "*"
3783 			 * are a special case. These do not have a
3784 			 * specific vendor, and the product string can
3785 			 * appear anywhere in the 16 byte PID portion of
3786 			 * the inquiry data. This is a simple strstr()
3787 			 * type search for the user id in the inquiry data.
3788 			 */
3789 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
3790 				char	*pidptr = &id[1];
3791 				int	i;
3792 				int	j;
3793 				int	pidstrlen = idlen - 2;
3794 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
3795 				    pidstrlen;
3796 
3797 				if (j < 0) {
3798 					return (SD_FAILURE);
3799 				}
3800 				for (i = 0; i < j; i++) {
3801 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
3802 					    pidptr, pidstrlen) == 0) {
3803 						rval = SD_SUCCESS;
3804 						break;
3805 					}
3806 				}
3807 			}
3808 		}
3809 	}
3810 	return (rval);
3811 }
3812 
3813 
3814 /*
3815  *    Function: sd_blank_cmp
3816  *
3817  * Description: If the id string starts and ends with a space, treat
3818  *		multiple consecutive spaces as equivalent to a single
3819  *		space. For example, this causes a sd_disk_table entry
3820  *		of " NEC CDROM " to match a device's id string of
3821  *		"NEC       CDROM".
3822  *
3823  *		Note: The success exit condition for this routine is if
3824  *		the pointer to the table entry is '\0' and the cnt of
3825  *		the inquiry length is zero. This will happen if the inquiry
3826  *		string returned by the device is padded with spaces to be
3827  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
3828  *		SCSI spec states that the inquiry string is to be padded with
3829  *		spaces.
3830  *
3831  *   Arguments: un - driver soft state (unit) structure
3832  *		id - table or config file vid/pid
3833  *		idlen  - length of the vid/pid (bytes)
3834  *
3835  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3836  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3837  */
3838 
3839 static int
3840 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
3841 {
3842 	char		*p1;
3843 	char		*p2;
3844 	int		cnt;
3845 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
3846 	    sizeof (SD_INQUIRY(un)->inq_pid);
3847 
3848 	ASSERT(un != NULL);
3849 	p2 = un->un_sd->sd_inq->inq_vid;
3850 	ASSERT(id != NULL);
3851 	p1 = id;
3852 
3853 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
3854 		/*
3855 		 * Note: string p1 is terminated by a NUL but string p2
3856 		 * isn't.  The end of p2 is determined by cnt.
3857 		 */
3858 		for (;;) {
3859 			/* skip over any extra blanks in both strings */
3860 			while ((*p1 != '\0') && (*p1 == ' ')) {
3861 				p1++;
3862 			}
3863 			while ((cnt != 0) && (*p2 == ' ')) {
3864 				p2++;
3865 				cnt--;
3866 			}
3867 
3868 			/* compare the two strings */
3869 			if ((cnt == 0) ||
3870 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
3871 				break;
3872 			}
3873 			while ((cnt > 0) &&
3874 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
3875 				p1++;
3876 				p2++;
3877 				cnt--;
3878 			}
3879 		}
3880 	}
3881 
3882 	/* return SD_SUCCESS if both strings match */
3883 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
3884 }
3885 
3886 
3887 /*
3888  *    Function: sd_chk_vers1_data
3889  *
3890  * Description: Verify the version 1 device properties provided by the
3891  *		user via the configuration file
3892  *
3893  *   Arguments: un	     - driver soft state (unit) structure
3894  *		flags	     - integer mask indicating properties to be set
3895  *		prop_list    - integer list of property values
3896  *		list_len     - length of user provided data
3897  *
3898  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
3899  *		SD_FAILURE - Indicates the user provided data is invalid
3900  */
3901 
3902 static int
3903 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
3904     int list_len, char *dataname_ptr)
3905 {
3906 	int i;
3907 	int mask = 1;
3908 	int index = 0;
3909 
3910 	ASSERT(un != NULL);
3911 
3912 	/* Check for a NULL property name and list */
3913 	if (dataname_ptr == NULL) {
3914 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3915 		    "sd_chk_vers1_data: NULL data property name.");
3916 		return (SD_FAILURE);
3917 	}
3918 	if (prop_list == NULL) {
3919 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3920 		    "sd_chk_vers1_data: %s NULL data property list.",
3921 		    dataname_ptr);
3922 		return (SD_FAILURE);
3923 	}
3924 
3925 	/* Display a warning if undefined bits are set in the flags */
3926 	if (flags & ~SD_CONF_BIT_MASK) {
3927 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3928 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
3929 		    "Properties not set.",
3930 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
3931 		return (SD_FAILURE);
3932 	}
3933 
3934 	/*
3935 	 * Verify the length of the list by identifying the highest bit set
3936 	 * in the flags and validating that the property list has a length
3937 	 * up to the index of this bit.
3938 	 */
3939 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3940 		if (flags & mask) {
3941 			index++;
3942 		}
3943 		mask = 1 << i;
3944 	}
3945 	if ((list_len / sizeof (int)) < (index + 2)) {
3946 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3947 		    "sd_chk_vers1_data: "
3948 		    "Data property list %s size is incorrect. "
3949 		    "Properties not set.", dataname_ptr);
3950 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
3951 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
3952 		return (SD_FAILURE);
3953 	}
3954 	return (SD_SUCCESS);
3955 }
3956 
3957 
3958 /*
3959  *    Function: sd_set_vers1_properties
3960  *
3961  * Description: Set version 1 device properties based on a property list
3962  *		retrieved from the driver configuration file or static
3963  *		configuration table. Version 1 properties have the format:
3964  *
3965  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3966  *
3967  *		where the prop0 value will be used to set prop0 if bit0
3968  *		is set in the flags
3969  *
3970  *   Arguments: un	     - driver soft state (unit) structure
3971  *		flags	     - integer mask indicating properties to be set
3972  *		prop_list    - integer list of property values
3973  */
3974 
3975 static void
3976 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
3977 {
3978 	ASSERT(un != NULL);
3979 
3980 	/*
3981 	 * Set the flag to indicate cache is to be disabled. An attempt
3982 	 * to disable the cache via sd_cache_control() will be made
3983 	 * later during attach once the basic initialization is complete.
3984 	 */
3985 	if (flags & SD_CONF_BSET_NOCACHE) {
3986 		un->un_f_opt_disable_cache = TRUE;
3987 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3988 		    "sd_set_vers1_properties: caching disabled flag set\n");
3989 	}
3990 
3991 	/* CD-specific configuration parameters */
3992 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
3993 		un->un_f_cfg_playmsf_bcd = TRUE;
3994 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3995 		    "sd_set_vers1_properties: playmsf_bcd set\n");
3996 	}
3997 	if (flags & SD_CONF_BSET_READSUB_BCD) {
3998 		un->un_f_cfg_readsub_bcd = TRUE;
3999 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4000 		    "sd_set_vers1_properties: readsub_bcd set\n");
4001 	}
4002 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
4003 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
4004 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4005 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
4006 	}
4007 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
4008 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
4009 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4010 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
4011 	}
4012 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
4013 		un->un_f_cfg_no_read_header = TRUE;
4014 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4015 			    "sd_set_vers1_properties: no_read_header set\n");
4016 	}
4017 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
4018 		un->un_f_cfg_read_cd_xd4 = TRUE;
4019 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4020 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
4021 	}
4022 
4023 	/* Support for devices which do not have valid/unique serial numbers */
4024 	if (flags & SD_CONF_BSET_FAB_DEVID) {
4025 		un->un_f_opt_fab_devid = TRUE;
4026 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4027 		    "sd_set_vers1_properties: fab_devid bit set\n");
4028 	}
4029 
4030 	/* Support for user throttle configuration */
4031 	if (flags & SD_CONF_BSET_THROTTLE) {
4032 		ASSERT(prop_list != NULL);
4033 		un->un_saved_throttle = un->un_throttle =
4034 		    prop_list->sdt_throttle;
4035 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4036 		    "sd_set_vers1_properties: throttle set to %d\n",
4037 		    prop_list->sdt_throttle);
4038 	}
4039 
4040 	/* Set the per disk retry count according to the conf file or table. */
4041 	if (flags & SD_CONF_BSET_NRR_COUNT) {
4042 		ASSERT(prop_list != NULL);
4043 		if (prop_list->sdt_not_rdy_retries) {
4044 			un->un_notready_retry_count =
4045 				prop_list->sdt_not_rdy_retries;
4046 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4047 			    "sd_set_vers1_properties: not ready retry count"
4048 			    " set to %d\n", un->un_notready_retry_count);
4049 		}
4050 	}
4051 
4052 	/* The controller type is reported for generic disk driver ioctls */
4053 	if (flags & SD_CONF_BSET_CTYPE) {
4054 		ASSERT(prop_list != NULL);
4055 		switch (prop_list->sdt_ctype) {
4056 		case CTYPE_CDROM:
4057 			un->un_ctype = prop_list->sdt_ctype;
4058 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4059 			    "sd_set_vers1_properties: ctype set to "
4060 			    "CTYPE_CDROM\n");
4061 			break;
4062 		case CTYPE_CCS:
4063 			un->un_ctype = prop_list->sdt_ctype;
4064 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4065 				"sd_set_vers1_properties: ctype set to "
4066 				"CTYPE_CCS\n");
4067 			break;
4068 		case CTYPE_ROD:		/* RW optical */
4069 			un->un_ctype = prop_list->sdt_ctype;
4070 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4071 			    "sd_set_vers1_properties: ctype set to "
4072 			    "CTYPE_ROD\n");
4073 			break;
4074 		default:
4075 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4076 			    "sd_set_vers1_properties: Could not set "
4077 			    "invalid ctype value (%d)",
4078 			    prop_list->sdt_ctype);
4079 		}
4080 	}
4081 
4082 	/* Purple failover timeout */
4083 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4084 		ASSERT(prop_list != NULL);
4085 		un->un_busy_retry_count =
4086 			prop_list->sdt_busy_retries;
4087 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4088 		    "sd_set_vers1_properties: "
4089 		    "busy retry count set to %d\n",
4090 		    un->un_busy_retry_count);
4091 	}
4092 
4093 	/* Purple reset retry count */
4094 	if (flags & SD_CONF_BSET_RST_RETRIES) {
4095 		ASSERT(prop_list != NULL);
4096 		un->un_reset_retry_count =
4097 			prop_list->sdt_reset_retries;
4098 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4099 		    "sd_set_vers1_properties: "
4100 		    "reset retry count set to %d\n",
4101 		    un->un_reset_retry_count);
4102 	}
4103 
4104 	/* Purple reservation release timeout */
4105 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4106 		ASSERT(prop_list != NULL);
4107 		un->un_reserve_release_time =
4108 			prop_list->sdt_reserv_rel_time;
4109 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4110 		    "sd_set_vers1_properties: "
4111 		    "reservation release timeout set to %d\n",
4112 		    un->un_reserve_release_time);
4113 	}
4114 
4115 	/*
4116 	 * Driver flag telling the driver to verify that no commands are pending
4117 	 * for a device before issuing a Test Unit Ready. This is a workaround
4118 	 * for a firmware bug in some Seagate eliteI drives.
4119 	 */
4120 	if (flags & SD_CONF_BSET_TUR_CHECK) {
4121 		un->un_f_cfg_tur_check = TRUE;
4122 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4123 		    "sd_set_vers1_properties: tur queue check set\n");
4124 	}
4125 
4126 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4127 		un->un_min_throttle = prop_list->sdt_min_throttle;
4128 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4129 		    "sd_set_vers1_properties: min throttle set to %d\n",
4130 		    un->un_min_throttle);
4131 	}
4132 
4133 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4134 		un->un_f_disksort_disabled =
4135 		    (prop_list->sdt_disk_sort_dis != 0) ?
4136 		    TRUE : FALSE;
4137 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4138 		    "sd_set_vers1_properties: disksort disabled "
4139 		    "flag set to %d\n",
4140 		    prop_list->sdt_disk_sort_dis);
4141 	}
4142 
4143 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4144 		un->un_f_lun_reset_enabled =
4145 		    (prop_list->sdt_lun_reset_enable != 0) ?
4146 		    TRUE : FALSE;
4147 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4148 		    "sd_set_vers1_properties: lun reset enabled "
4149 		    "flag set to %d\n",
4150 		    prop_list->sdt_lun_reset_enable);
4151 	}
4152 
4153 	/*
4154 	 * Validate the throttle values.
4155 	 * If any of the numbers are invalid, set everything to defaults.
4156 	 */
4157 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4158 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4159 	    (un->un_min_throttle > un->un_throttle)) {
4160 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4161 		un->un_min_throttle = sd_min_throttle;
4162 	}
4163 }
4164 
4165 /*
4166  *   Function: sd_is_lsi()
4167  *
4168  *   Description: Check for lsi devices, step throught the static device
4169  *	table to match vid/pid.
4170  *
4171  *   Args: un - ptr to sd_lun
4172  *
4173  *   Notes:  When creating new LSI property, need to add the new LSI property
4174  *		to this function.
4175  */
4176 static void
4177 sd_is_lsi(struct sd_lun *un)
4178 {
4179 	char	*id = NULL;
4180 	int	table_index;
4181 	int	idlen;
4182 	void	*prop;
4183 
4184 	ASSERT(un != NULL);
4185 	for (table_index = 0; table_index < sd_disk_table_size;
4186 	    table_index++) {
4187 		id = sd_disk_table[table_index].device_id;
4188 		idlen = strlen(id);
4189 		if (idlen == 0) {
4190 			continue;
4191 		}
4192 
4193 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4194 			prop = sd_disk_table[table_index].properties;
4195 			if (prop == &lsi_properties ||
4196 			    prop == &lsi_oem_properties ||
4197 			    prop == &lsi_properties_scsi ||
4198 			    prop == &symbios_properties) {
4199 				un->un_f_cfg_is_lsi = TRUE;
4200 			}
4201 			break;
4202 		}
4203 	}
4204 }
4205 
4206 /*
4207  *    Function: sd_get_physical_geometry
4208  *
4209  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4210  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4211  *		target, and use this information to initialize the physical
4212  *		geometry cache specified by pgeom_p.
4213  *
4214  *		MODE SENSE is an optional command, so failure in this case
4215  *		does not necessarily denote an error. We want to use the
4216  *		MODE SENSE commands to derive the physical geometry of the
4217  *		device, but if either command fails, the logical geometry is
4218  *		used as the fallback for disk label geometry in cmlb.
4219  *
4220  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4221  *		have already been initialized for the current target and
4222  *		that the current values be passed as args so that we don't
4223  *		end up ever trying to use -1 as a valid value. This could
4224  *		happen if either value is reset while we're not holding
4225  *		the mutex.
4226  *
4227  *   Arguments: un - driver soft state (unit) structure
4228  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4229  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4230  *			to use the USCSI "direct" chain and bypass the normal
4231  *			command waitq.
4232  *
4233  *     Context: Kernel thread only (can sleep).
4234  */
4235 
4236 static int
4237 sd_get_physical_geometry(struct sd_lun *un, cmlb_geom_t *pgeom_p,
4238 	diskaddr_t capacity, int lbasize, int path_flag)
4239 {
4240 	struct	mode_format	*page3p;
4241 	struct	mode_geometry	*page4p;
4242 	struct	mode_header	*headerp;
4243 	int	sector_size;
4244 	int	nsect;
4245 	int	nhead;
4246 	int	ncyl;
4247 	int	intrlv;
4248 	int	spc;
4249 	diskaddr_t	modesense_capacity;
4250 	int	rpm;
4251 	int	bd_len;
4252 	int	mode_header_length;
4253 	uchar_t	*p3bufp;
4254 	uchar_t	*p4bufp;
4255 	int	cdbsize;
4256 	int 	ret = EIO;
4257 
4258 	ASSERT(un != NULL);
4259 
4260 	if (lbasize == 0) {
4261 		if (ISCD(un)) {
4262 			lbasize = 2048;
4263 		} else {
4264 			lbasize = un->un_sys_blocksize;
4265 		}
4266 	}
4267 	pgeom_p->g_secsize = (unsigned short)lbasize;
4268 
4269 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4270 
4271 	/*
4272 	 * Retrieve MODE SENSE page 3 - Format Device Page
4273 	 */
4274 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4275 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p3bufp,
4276 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag)
4277 	    != 0) {
4278 		SD_ERROR(SD_LOG_COMMON, un,
4279 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4280 		goto page3_exit;
4281 	}
4282 
4283 	/*
4284 	 * Determine size of Block Descriptors in order to locate the mode
4285 	 * page data.  ATAPI devices return 0, SCSI devices should return
4286 	 * MODE_BLK_DESC_LENGTH.
4287 	 */
4288 	headerp = (struct mode_header *)p3bufp;
4289 	if (un->un_f_cfg_is_atapi == TRUE) {
4290 		struct mode_header_grp2 *mhp =
4291 		    (struct mode_header_grp2 *)headerp;
4292 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4293 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4294 	} else {
4295 		mode_header_length = MODE_HEADER_LENGTH;
4296 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4297 	}
4298 
4299 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4300 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4301 		    "received unexpected bd_len of %d, page3\n", bd_len);
4302 		goto page3_exit;
4303 	}
4304 
4305 	page3p = (struct mode_format *)
4306 	    ((caddr_t)headerp + mode_header_length + bd_len);
4307 
4308 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4309 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4310 		    "mode sense pg3 code mismatch %d\n",
4311 		    page3p->mode_page.code);
4312 		goto page3_exit;
4313 	}
4314 
4315 	/*
4316 	 * Use this physical geometry data only if BOTH MODE SENSE commands
4317 	 * complete successfully; otherwise, revert to the logical geometry.
4318 	 * So, we need to save everything in temporary variables.
4319 	 */
4320 	sector_size = BE_16(page3p->data_bytes_sect);
4321 
4322 	/*
4323 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
4324 	 */
4325 	if (sector_size == 0) {
4326 		sector_size = (ISCD(un)) ? 2048 : un->un_sys_blocksize;
4327 	} else {
4328 		sector_size &= ~(un->un_sys_blocksize - 1);
4329 	}
4330 
4331 	nsect  = BE_16(page3p->sect_track);
4332 	intrlv = BE_16(page3p->interleave);
4333 
4334 	SD_INFO(SD_LOG_COMMON, un,
4335 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
4336 	SD_INFO(SD_LOG_COMMON, un,
4337 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
4338 	    page3p->mode_page.code, nsect, sector_size);
4339 	SD_INFO(SD_LOG_COMMON, un,
4340 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
4341 	    BE_16(page3p->track_skew),
4342 	    BE_16(page3p->cylinder_skew));
4343 
4344 
4345 	/*
4346 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
4347 	 */
4348 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
4349 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p4bufp,
4350 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag)
4351 	    != 0) {
4352 		SD_ERROR(SD_LOG_COMMON, un,
4353 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
4354 		goto page4_exit;
4355 	}
4356 
4357 	/*
4358 	 * Determine size of Block Descriptors in order to locate the mode
4359 	 * page data.  ATAPI devices return 0, SCSI devices should return
4360 	 * MODE_BLK_DESC_LENGTH.
4361 	 */
4362 	headerp = (struct mode_header *)p4bufp;
4363 	if (un->un_f_cfg_is_atapi == TRUE) {
4364 		struct mode_header_grp2 *mhp =
4365 		    (struct mode_header_grp2 *)headerp;
4366 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4367 	} else {
4368 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4369 	}
4370 
4371 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4372 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4373 		    "received unexpected bd_len of %d, page4\n", bd_len);
4374 		goto page4_exit;
4375 	}
4376 
4377 	page4p = (struct mode_geometry *)
4378 	    ((caddr_t)headerp + mode_header_length + bd_len);
4379 
4380 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
4381 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4382 		    "mode sense pg4 code mismatch %d\n",
4383 		    page4p->mode_page.code);
4384 		goto page4_exit;
4385 	}
4386 
4387 	/*
4388 	 * Stash the data now, after we know that both commands completed.
4389 	 */
4390 
4391 
4392 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
4393 	spc   = nhead * nsect;
4394 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
4395 	rpm   = BE_16(page4p->rpm);
4396 
4397 	modesense_capacity = spc * ncyl;
4398 
4399 	SD_INFO(SD_LOG_COMMON, un,
4400 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
4401 	SD_INFO(SD_LOG_COMMON, un,
4402 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
4403 	SD_INFO(SD_LOG_COMMON, un,
4404 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
4405 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
4406 	    (void *)pgeom_p, capacity);
4407 
4408 	/*
4409 	 * Compensate if the drive's geometry is not rectangular, i.e.,
4410 	 * the product of C * H * S returned by MODE SENSE >= that returned
4411 	 * by read capacity. This is an idiosyncrasy of the original x86
4412 	 * disk subsystem.
4413 	 */
4414 	if (modesense_capacity >= capacity) {
4415 		SD_INFO(SD_LOG_COMMON, un,
4416 		    "sd_get_physical_geometry: adjusting acyl; "
4417 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
4418 		    (modesense_capacity - capacity + spc - 1) / spc);
4419 		if (sector_size != 0) {
4420 			/* 1243403: NEC D38x7 drives don't support sec size */
4421 			pgeom_p->g_secsize = (unsigned short)sector_size;
4422 		}
4423 		pgeom_p->g_nsect    = (unsigned short)nsect;
4424 		pgeom_p->g_nhead    = (unsigned short)nhead;
4425 		pgeom_p->g_capacity = capacity;
4426 		pgeom_p->g_acyl	    =
4427 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
4428 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
4429 	}
4430 
4431 	pgeom_p->g_rpm    = (unsigned short)rpm;
4432 	pgeom_p->g_intrlv = (unsigned short)intrlv;
4433 	ret = 0;
4434 
4435 	SD_INFO(SD_LOG_COMMON, un,
4436 	    "sd_get_physical_geometry: mode sense geometry:\n");
4437 	SD_INFO(SD_LOG_COMMON, un,
4438 	    "   nsect: %d; sector size: %d; interlv: %d\n",
4439 	    nsect, sector_size, intrlv);
4440 	SD_INFO(SD_LOG_COMMON, un,
4441 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
4442 	    nhead, ncyl, rpm, modesense_capacity);
4443 	SD_INFO(SD_LOG_COMMON, un,
4444 	    "sd_get_physical_geometry: (cached)\n");
4445 	SD_INFO(SD_LOG_COMMON, un,
4446 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4447 	    pgeom_p->g_ncyl,  pgeom_p->g_acyl,
4448 	    pgeom_p->g_nhead, pgeom_p->g_nsect);
4449 	SD_INFO(SD_LOG_COMMON, un,
4450 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
4451 	    pgeom_p->g_secsize, pgeom_p->g_capacity,
4452 	    pgeom_p->g_intrlv, pgeom_p->g_rpm);
4453 
4454 page4_exit:
4455 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
4456 page3_exit:
4457 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
4458 
4459 	return (ret);
4460 }
4461 
4462 /*
4463  *    Function: sd_get_virtual_geometry
4464  *
4465  * Description: Ask the controller to tell us about the target device.
4466  *
4467  *   Arguments: un - pointer to softstate
4468  *		capacity - disk capacity in #blocks
4469  *		lbasize - disk block size in bytes
4470  *
4471  *     Context: Kernel thread only
4472  */
4473 
4474 static int
4475 sd_get_virtual_geometry(struct sd_lun *un, cmlb_geom_t *lgeom_p,
4476     diskaddr_t capacity, int lbasize)
4477 {
4478 	uint_t	geombuf;
4479 	int	spc;
4480 
4481 	ASSERT(un != NULL);
4482 
4483 	/* Set sector size, and total number of sectors */
4484 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
4485 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
4486 
4487 	/* Let the HBA tell us its geometry */
4488 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
4489 
4490 	/* A value of -1 indicates an undefined "geometry" property */
4491 	if (geombuf == (-1)) {
4492 		return (EINVAL);
4493 	}
4494 
4495 	/* Initialize the logical geometry cache. */
4496 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
4497 	lgeom_p->g_nsect   = geombuf & 0xffff;
4498 	lgeom_p->g_secsize = un->un_sys_blocksize;
4499 
4500 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
4501 
4502 	/*
4503 	 * Note: The driver originally converted the capacity value from
4504 	 * target blocks to system blocks. However, the capacity value passed
4505 	 * to this routine is already in terms of system blocks (this scaling
4506 	 * is done when the READ CAPACITY command is issued and processed).
4507 	 * This 'error' may have gone undetected because the usage of g_ncyl
4508 	 * (which is based upon g_capacity) is very limited within the driver
4509 	 */
4510 	lgeom_p->g_capacity = capacity;
4511 
4512 	/*
4513 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
4514 	 * hba may return zero values if the device has been removed.
4515 	 */
4516 	if (spc == 0) {
4517 		lgeom_p->g_ncyl = 0;
4518 	} else {
4519 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
4520 	}
4521 	lgeom_p->g_acyl = 0;
4522 
4523 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
4524 	return (0);
4525 
4526 }
4527 /*
4528  *    Function: sd_update_block_info
4529  *
4530  * Description: Calculate a byte count to sector count bitshift value
4531  *		from sector size.
4532  *
4533  *   Arguments: un: unit struct.
4534  *		lbasize: new target sector size
4535  *		capacity: new target capacity, ie. block count
4536  *
4537  *     Context: Kernel thread context
4538  */
4539 
4540 static void
4541 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
4542 {
4543 	if (lbasize != 0) {
4544 		un->un_tgt_blocksize = lbasize;
4545 		un->un_f_tgt_blocksize_is_valid	= TRUE;
4546 	}
4547 
4548 	if (capacity != 0) {
4549 		un->un_blockcount		= capacity;
4550 		un->un_f_blockcount_is_valid	= TRUE;
4551 	}
4552 }
4553 
4554 
4555 /*
4556  *    Function: sd_register_devid
4557  *
4558  * Description: This routine will obtain the device id information from the
4559  *		target, obtain the serial number, and register the device
4560  *		id with the ddi framework.
4561  *
4562  *   Arguments: devi - the system's dev_info_t for the device.
4563  *		un - driver soft state (unit) structure
4564  *		reservation_flag - indicates if a reservation conflict
4565  *		occurred during attach
4566  *
4567  *     Context: Kernel Thread
4568  */
4569 static void
4570 sd_register_devid(struct sd_lun *un, dev_info_t *devi, int reservation_flag)
4571 {
4572 	int		rval		= 0;
4573 	uchar_t		*inq80		= NULL;
4574 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
4575 	size_t		inq80_resid	= 0;
4576 	uchar_t		*inq83		= NULL;
4577 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
4578 	size_t		inq83_resid	= 0;
4579 
4580 	ASSERT(un != NULL);
4581 	ASSERT(mutex_owned(SD_MUTEX(un)));
4582 	ASSERT((SD_DEVINFO(un)) == devi);
4583 
4584 	/*
4585 	 * This is the case of antiquated Sun disk drives that have the
4586 	 * FAB_DEVID property set in the disk_table.  These drives
4587 	 * manage the devid's by storing them in last 2 available sectors
4588 	 * on the drive and have them fabricated by the ddi layer by calling
4589 	 * ddi_devid_init and passing the DEVID_FAB flag.
4590 	 */
4591 	if (un->un_f_opt_fab_devid == TRUE) {
4592 		/*
4593 		 * Depending on EINVAL isn't reliable, since a reserved disk
4594 		 * may result in invalid geometry, so check to make sure a
4595 		 * reservation conflict did not occur during attach.
4596 		 */
4597 		if ((sd_get_devid(un) == EINVAL) &&
4598 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
4599 			/*
4600 			 * The devid is invalid AND there is no reservation
4601 			 * conflict.  Fabricate a new devid.
4602 			 */
4603 			(void) sd_create_devid(un);
4604 		}
4605 
4606 		/* Register the devid if it exists */
4607 		if (un->un_devid != NULL) {
4608 			(void) ddi_devid_register(SD_DEVINFO(un),
4609 			    un->un_devid);
4610 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4611 			    "sd_register_devid: Devid Fabricated\n");
4612 		}
4613 		return;
4614 	}
4615 
4616 	/*
4617 	 * We check the availibility of the World Wide Name (0x83) and Unit
4618 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
4619 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
4620 	 * 0x83 is availible, that is the best choice.  Our next choice is
4621 	 * 0x80.  If neither are availible, we munge the devid from the device
4622 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
4623 	 * to fabricate a devid for non-Sun qualified disks.
4624 	 */
4625 	if (sd_check_vpd_page_support(un) == 0) {
4626 		/* collect page 80 data if available */
4627 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
4628 
4629 			mutex_exit(SD_MUTEX(un));
4630 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
4631 			rval = sd_send_scsi_INQUIRY(un, inq80, inq80_len,
4632 			    0x01, 0x80, &inq80_resid);
4633 
4634 			if (rval != 0) {
4635 				kmem_free(inq80, inq80_len);
4636 				inq80 = NULL;
4637 				inq80_len = 0;
4638 			}
4639 			mutex_enter(SD_MUTEX(un));
4640 		}
4641 
4642 		/* collect page 83 data if available */
4643 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
4644 			mutex_exit(SD_MUTEX(un));
4645 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
4646 			rval = sd_send_scsi_INQUIRY(un, inq83, inq83_len,
4647 			    0x01, 0x83, &inq83_resid);
4648 
4649 			if (rval != 0) {
4650 				kmem_free(inq83, inq83_len);
4651 				inq83 = NULL;
4652 				inq83_len = 0;
4653 			}
4654 			mutex_enter(SD_MUTEX(un));
4655 		}
4656 	}
4657 
4658 	/* encode best devid possible based on data available */
4659 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
4660 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
4661 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
4662 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
4663 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
4664 
4665 		/* devid successfully encoded, register devid */
4666 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
4667 
4668 	} else {
4669 		/*
4670 		 * Unable to encode a devid based on data available.
4671 		 * This is not a Sun qualified disk.  Older Sun disk
4672 		 * drives that have the SD_FAB_DEVID property
4673 		 * set in the disk_table and non Sun qualified
4674 		 * disks are treated in the same manner.  These
4675 		 * drives manage the devid's by storing them in
4676 		 * last 2 available sectors on the drive and
4677 		 * have them fabricated by the ddi layer by
4678 		 * calling ddi_devid_init and passing the
4679 		 * DEVID_FAB flag.
4680 		 * Create a fabricate devid only if there's no
4681 		 * fabricate devid existed.
4682 		 */
4683 		if (sd_get_devid(un) == EINVAL) {
4684 			(void) sd_create_devid(un);
4685 		}
4686 		un->un_f_opt_fab_devid = TRUE;
4687 
4688 		/* Register the devid if it exists */
4689 		if (un->un_devid != NULL) {
4690 			(void) ddi_devid_register(SD_DEVINFO(un),
4691 			    un->un_devid);
4692 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4693 			    "sd_register_devid: devid fabricated using "
4694 			    "ddi framework\n");
4695 		}
4696 	}
4697 
4698 	/* clean up resources */
4699 	if (inq80 != NULL) {
4700 		kmem_free(inq80, inq80_len);
4701 	}
4702 	if (inq83 != NULL) {
4703 		kmem_free(inq83, inq83_len);
4704 	}
4705 }
4706 
4707 
4708 
4709 /*
4710  *    Function: sd_get_devid
4711  *
4712  * Description: This routine will return 0 if a valid device id has been
4713  *		obtained from the target and stored in the soft state. If a
4714  *		valid device id has not been previously read and stored, a
4715  *		read attempt will be made.
4716  *
4717  *   Arguments: un - driver soft state (unit) structure
4718  *
4719  * Return Code: 0 if we successfully get the device id
4720  *
4721  *     Context: Kernel Thread
4722  */
4723 
4724 static int
4725 sd_get_devid(struct sd_lun *un)
4726 {
4727 	struct dk_devid		*dkdevid;
4728 	ddi_devid_t		tmpid;
4729 	uint_t			*ip;
4730 	size_t			sz;
4731 	diskaddr_t		blk;
4732 	int			status;
4733 	int			chksum;
4734 	int			i;
4735 	size_t			buffer_size;
4736 
4737 	ASSERT(un != NULL);
4738 	ASSERT(mutex_owned(SD_MUTEX(un)));
4739 
4740 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
4741 	    un);
4742 
4743 	if (un->un_devid != NULL) {
4744 		return (0);
4745 	}
4746 
4747 	mutex_exit(SD_MUTEX(un));
4748 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
4749 	    (void *)SD_PATH_DIRECT) != 0) {
4750 		mutex_enter(SD_MUTEX(un));
4751 		return (EINVAL);
4752 	}
4753 
4754 	/*
4755 	 * Read and verify device id, stored in the reserved cylinders at the
4756 	 * end of the disk. Backup label is on the odd sectors of the last
4757 	 * track of the last cylinder. Device id will be on track of the next
4758 	 * to last cylinder.
4759 	 */
4760 	mutex_enter(SD_MUTEX(un));
4761 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
4762 	mutex_exit(SD_MUTEX(un));
4763 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
4764 	status = sd_send_scsi_READ(un, dkdevid, buffer_size, blk,
4765 	    SD_PATH_DIRECT);
4766 	if (status != 0) {
4767 		goto error;
4768 	}
4769 
4770 	/* Validate the revision */
4771 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
4772 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
4773 		status = EINVAL;
4774 		goto error;
4775 	}
4776 
4777 	/* Calculate the checksum */
4778 	chksum = 0;
4779 	ip = (uint_t *)dkdevid;
4780 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
4781 	    i++) {
4782 		chksum ^= ip[i];
4783 	}
4784 
4785 	/* Compare the checksums */
4786 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
4787 		status = EINVAL;
4788 		goto error;
4789 	}
4790 
4791 	/* Validate the device id */
4792 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
4793 		status = EINVAL;
4794 		goto error;
4795 	}
4796 
4797 	/*
4798 	 * Store the device id in the driver soft state
4799 	 */
4800 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
4801 	tmpid = kmem_alloc(sz, KM_SLEEP);
4802 
4803 	mutex_enter(SD_MUTEX(un));
4804 
4805 	un->un_devid = tmpid;
4806 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
4807 
4808 	kmem_free(dkdevid, buffer_size);
4809 
4810 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
4811 
4812 	return (status);
4813 error:
4814 	mutex_enter(SD_MUTEX(un));
4815 	kmem_free(dkdevid, buffer_size);
4816 	return (status);
4817 }
4818 
4819 
4820 /*
4821  *    Function: sd_create_devid
4822  *
4823  * Description: This routine will fabricate the device id and write it
4824  *		to the disk.
4825  *
4826  *   Arguments: un - driver soft state (unit) structure
4827  *
4828  * Return Code: value of the fabricated device id
4829  *
4830  *     Context: Kernel Thread
4831  */
4832 
4833 static ddi_devid_t
4834 sd_create_devid(struct sd_lun *un)
4835 {
4836 	ASSERT(un != NULL);
4837 
4838 	/* Fabricate the devid */
4839 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
4840 	    == DDI_FAILURE) {
4841 		return (NULL);
4842 	}
4843 
4844 	/* Write the devid to disk */
4845 	if (sd_write_deviceid(un) != 0) {
4846 		ddi_devid_free(un->un_devid);
4847 		un->un_devid = NULL;
4848 	}
4849 
4850 	return (un->un_devid);
4851 }
4852 
4853 
4854 /*
4855  *    Function: sd_write_deviceid
4856  *
4857  * Description: This routine will write the device id to the disk
4858  *		reserved sector.
4859  *
4860  *   Arguments: un - driver soft state (unit) structure
4861  *
4862  * Return Code: EINVAL
4863  *		value returned by sd_send_scsi_cmd
4864  *
4865  *     Context: Kernel Thread
4866  */
4867 
4868 static int
4869 sd_write_deviceid(struct sd_lun *un)
4870 {
4871 	struct dk_devid		*dkdevid;
4872 	diskaddr_t		blk;
4873 	uint_t			*ip, chksum;
4874 	int			status;
4875 	int			i;
4876 
4877 	ASSERT(mutex_owned(SD_MUTEX(un)));
4878 
4879 	mutex_exit(SD_MUTEX(un));
4880 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
4881 	    (void *)SD_PATH_DIRECT) != 0) {
4882 		mutex_enter(SD_MUTEX(un));
4883 		return (-1);
4884 	}
4885 
4886 
4887 	/* Allocate the buffer */
4888 	dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
4889 
4890 	/* Fill in the revision */
4891 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
4892 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
4893 
4894 	/* Copy in the device id */
4895 	mutex_enter(SD_MUTEX(un));
4896 	bcopy(un->un_devid, &dkdevid->dkd_devid,
4897 	    ddi_devid_sizeof(un->un_devid));
4898 	mutex_exit(SD_MUTEX(un));
4899 
4900 	/* Calculate the checksum */
4901 	chksum = 0;
4902 	ip = (uint_t *)dkdevid;
4903 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
4904 	    i++) {
4905 		chksum ^= ip[i];
4906 	}
4907 
4908 	/* Fill-in checksum */
4909 	DKD_FORMCHKSUM(chksum, dkdevid);
4910 
4911 	/* Write the reserved sector */
4912 	status = sd_send_scsi_WRITE(un, dkdevid, un->un_sys_blocksize, blk,
4913 	    SD_PATH_DIRECT);
4914 
4915 	kmem_free(dkdevid, un->un_sys_blocksize);
4916 
4917 	mutex_enter(SD_MUTEX(un));
4918 	return (status);
4919 }
4920 
4921 
4922 /*
4923  *    Function: sd_check_vpd_page_support
4924  *
4925  * Description: This routine sends an inquiry command with the EVPD bit set and
4926  *		a page code of 0x00 to the device. It is used to determine which
4927  *		vital product pages are availible to find the devid. We are
4928  *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
4929  *		device does not support that command.
4930  *
4931  *   Arguments: un  - driver soft state (unit) structure
4932  *
4933  * Return Code: 0 - success
4934  *		1 - check condition
4935  *
4936  *     Context: This routine can sleep.
4937  */
4938 
4939 static int
4940 sd_check_vpd_page_support(struct sd_lun *un)
4941 {
4942 	uchar_t	*page_list	= NULL;
4943 	uchar_t	page_length	= 0xff;	/* Use max possible length */
4944 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
4945 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
4946 	int    	rval		= 0;
4947 	int	counter;
4948 
4949 	ASSERT(un != NULL);
4950 	ASSERT(mutex_owned(SD_MUTEX(un)));
4951 
4952 	mutex_exit(SD_MUTEX(un));
4953 
4954 	/*
4955 	 * We'll set the page length to the maximum to save figuring it out
4956 	 * with an additional call.
4957 	 */
4958 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
4959 
4960 	rval = sd_send_scsi_INQUIRY(un, page_list, page_length, evpd,
4961 	    page_code, NULL);
4962 
4963 	mutex_enter(SD_MUTEX(un));
4964 
4965 	/*
4966 	 * Now we must validate that the device accepted the command, as some
4967 	 * drives do not support it.  If the drive does support it, we will
4968 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
4969 	 * not, we return -1.
4970 	 */
4971 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
4972 		/* Loop to find one of the 2 pages we need */
4973 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
4974 
4975 		/*
4976 		 * Pages are returned in ascending order, and 0x83 is what we
4977 		 * are hoping for.
4978 		 */
4979 		while ((page_list[counter] <= 0x83) &&
4980 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
4981 		    VPD_HEAD_OFFSET))) {
4982 			/*
4983 			 * Add 3 because page_list[3] is the number of
4984 			 * pages minus 3
4985 			 */
4986 
4987 			switch (page_list[counter]) {
4988 			case 0x00:
4989 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
4990 				break;
4991 			case 0x80:
4992 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
4993 				break;
4994 			case 0x81:
4995 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
4996 				break;
4997 			case 0x82:
4998 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
4999 				break;
5000 			case 0x83:
5001 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
5002 				break;
5003 			}
5004 			counter++;
5005 		}
5006 
5007 	} else {
5008 		rval = -1;
5009 
5010 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
5011 		    "sd_check_vpd_page_support: This drive does not implement "
5012 		    "VPD pages.\n");
5013 	}
5014 
5015 	kmem_free(page_list, page_length);
5016 
5017 	return (rval);
5018 }
5019 
5020 
5021 /*
5022  *    Function: sd_setup_pm
5023  *
5024  * Description: Initialize Power Management on the device
5025  *
5026  *     Context: Kernel Thread
5027  */
5028 
5029 static void
5030 sd_setup_pm(struct sd_lun *un, dev_info_t *devi)
5031 {
5032 	uint_t	log_page_size;
5033 	uchar_t	*log_page_data;
5034 	int	rval;
5035 
5036 	/*
5037 	 * Since we are called from attach, holding a mutex for
5038 	 * un is unnecessary. Because some of the routines called
5039 	 * from here require SD_MUTEX to not be held, assert this
5040 	 * right up front.
5041 	 */
5042 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5043 	/*
5044 	 * Since the sd device does not have the 'reg' property,
5045 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
5046 	 * The following code is to tell cpr that this device
5047 	 * DOES need to be suspended and resumed.
5048 	 */
5049 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
5050 	    "pm-hardware-state", "needs-suspend-resume");
5051 
5052 	/*
5053 	 * This complies with the new power management framework
5054 	 * for certain desktop machines. Create the pm_components
5055 	 * property as a string array property.
5056 	 */
5057 	if (un->un_f_pm_supported) {
5058 		/*
5059 		 * not all devices have a motor, try it first.
5060 		 * some devices may return ILLEGAL REQUEST, some
5061 		 * will hang
5062 		 * The following START_STOP_UNIT is used to check if target
5063 		 * device has a motor.
5064 		 */
5065 		un->un_f_start_stop_supported = TRUE;
5066 		if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
5067 		    SD_PATH_DIRECT) != 0) {
5068 			un->un_f_start_stop_supported = FALSE;
5069 		}
5070 
5071 		/*
5072 		 * create pm properties anyways otherwise the parent can't
5073 		 * go to sleep
5074 		 */
5075 		(void) sd_create_pm_components(devi, un);
5076 		un->un_f_pm_is_enabled = TRUE;
5077 		return;
5078 	}
5079 
5080 	if (!un->un_f_log_sense_supported) {
5081 		un->un_power_level = SD_SPINDLE_ON;
5082 		un->un_f_pm_is_enabled = FALSE;
5083 		return;
5084 	}
5085 
5086 	rval = sd_log_page_supported(un, START_STOP_CYCLE_PAGE);
5087 
5088 #ifdef	SDDEBUG
5089 	if (sd_force_pm_supported) {
5090 		/* Force a successful result */
5091 		rval = 1;
5092 	}
5093 #endif
5094 
5095 	/*
5096 	 * If the start-stop cycle counter log page is not supported
5097 	 * or if the pm-capable property is SD_PM_CAPABLE_FALSE (0)
5098 	 * then we should not create the pm_components property.
5099 	 */
5100 	if (rval == -1) {
5101 		/*
5102 		 * Error.
5103 		 * Reading log sense failed, most likely this is
5104 		 * an older drive that does not support log sense.
5105 		 * If this fails auto-pm is not supported.
5106 		 */
5107 		un->un_power_level = SD_SPINDLE_ON;
5108 		un->un_f_pm_is_enabled = FALSE;
5109 
5110 	} else if (rval == 0) {
5111 		/*
5112 		 * Page not found.
5113 		 * The start stop cycle counter is implemented as page
5114 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
5115 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
5116 		 */
5117 		if (sd_log_page_supported(un, START_STOP_CYCLE_VU_PAGE) == 1) {
5118 			/*
5119 			 * Page found, use this one.
5120 			 */
5121 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
5122 			un->un_f_pm_is_enabled = TRUE;
5123 		} else {
5124 			/*
5125 			 * Error or page not found.
5126 			 * auto-pm is not supported for this device.
5127 			 */
5128 			un->un_power_level = SD_SPINDLE_ON;
5129 			un->un_f_pm_is_enabled = FALSE;
5130 		}
5131 	} else {
5132 		/*
5133 		 * Page found, use it.
5134 		 */
5135 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
5136 		un->un_f_pm_is_enabled = TRUE;
5137 	}
5138 
5139 
5140 	if (un->un_f_pm_is_enabled == TRUE) {
5141 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
5142 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
5143 
5144 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
5145 		    log_page_size, un->un_start_stop_cycle_page,
5146 		    0x01, 0, SD_PATH_DIRECT);
5147 #ifdef	SDDEBUG
5148 		if (sd_force_pm_supported) {
5149 			/* Force a successful result */
5150 			rval = 0;
5151 		}
5152 #endif
5153 
5154 		/*
5155 		 * If the Log sense for Page( Start/stop cycle counter page)
5156 		 * succeeds, then power managment is supported and we can
5157 		 * enable auto-pm.
5158 		 */
5159 		if (rval == 0)  {
5160 			(void) sd_create_pm_components(devi, un);
5161 		} else {
5162 			un->un_power_level = SD_SPINDLE_ON;
5163 			un->un_f_pm_is_enabled = FALSE;
5164 		}
5165 
5166 		kmem_free(log_page_data, log_page_size);
5167 	}
5168 }
5169 
5170 
5171 /*
5172  *    Function: sd_create_pm_components
5173  *
5174  * Description: Initialize PM property.
5175  *
5176  *     Context: Kernel thread context
5177  */
5178 
5179 static void
5180 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
5181 {
5182 	char *pm_comp[] = { "NAME=spindle-motor", "0=off", "1=on", NULL };
5183 
5184 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5185 
5186 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
5187 	    "pm-components", pm_comp, 3) == DDI_PROP_SUCCESS) {
5188 		/*
5189 		 * When components are initially created they are idle,
5190 		 * power up any non-removables.
5191 		 * Note: the return value of pm_raise_power can't be used
5192 		 * for determining if PM should be enabled for this device.
5193 		 * Even if you check the return values and remove this
5194 		 * property created above, the PM framework will not honor the
5195 		 * change after the first call to pm_raise_power. Hence,
5196 		 * removal of that property does not help if pm_raise_power
5197 		 * fails. In the case of removable media, the start/stop
5198 		 * will fail if the media is not present.
5199 		 */
5200 		if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
5201 		    SD_SPINDLE_ON) == DDI_SUCCESS)) {
5202 			mutex_enter(SD_MUTEX(un));
5203 			un->un_power_level = SD_SPINDLE_ON;
5204 			mutex_enter(&un->un_pm_mutex);
5205 			/* Set to on and not busy. */
5206 			un->un_pm_count = 0;
5207 		} else {
5208 			mutex_enter(SD_MUTEX(un));
5209 			un->un_power_level = SD_SPINDLE_OFF;
5210 			mutex_enter(&un->un_pm_mutex);
5211 			/* Set to off. */
5212 			un->un_pm_count = -1;
5213 		}
5214 		mutex_exit(&un->un_pm_mutex);
5215 		mutex_exit(SD_MUTEX(un));
5216 	} else {
5217 		un->un_power_level = SD_SPINDLE_ON;
5218 		un->un_f_pm_is_enabled = FALSE;
5219 	}
5220 }
5221 
5222 
5223 /*
5224  *    Function: sd_ddi_suspend
5225  *
5226  * Description: Performs system power-down operations. This includes
5227  *		setting the drive state to indicate its suspended so
5228  *		that no new commands will be accepted. Also, wait for
5229  *		all commands that are in transport or queued to a timer
5230  *		for retry to complete. All timeout threads are cancelled.
5231  *
5232  * Return Code: DDI_FAILURE or DDI_SUCCESS
5233  *
5234  *     Context: Kernel thread context
5235  */
5236 
5237 static int
5238 sd_ddi_suspend(dev_info_t *devi)
5239 {
5240 	struct	sd_lun	*un;
5241 	clock_t		wait_cmds_complete;
5242 
5243 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
5244 	if (un == NULL) {
5245 		return (DDI_FAILURE);
5246 	}
5247 
5248 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
5249 
5250 	mutex_enter(SD_MUTEX(un));
5251 
5252 	/* Return success if the device is already suspended. */
5253 	if (un->un_state == SD_STATE_SUSPENDED) {
5254 		mutex_exit(SD_MUTEX(un));
5255 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5256 		    "device already suspended, exiting\n");
5257 		return (DDI_SUCCESS);
5258 	}
5259 
5260 	/* Return failure if the device is being used by HA */
5261 	if (un->un_resvd_status &
5262 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
5263 		mutex_exit(SD_MUTEX(un));
5264 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5265 		    "device in use by HA, exiting\n");
5266 		return (DDI_FAILURE);
5267 	}
5268 
5269 	/*
5270 	 * Return failure if the device is in a resource wait
5271 	 * or power changing state.
5272 	 */
5273 	if ((un->un_state == SD_STATE_RWAIT) ||
5274 	    (un->un_state == SD_STATE_PM_CHANGING)) {
5275 		mutex_exit(SD_MUTEX(un));
5276 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5277 		    "device in resource wait state, exiting\n");
5278 		return (DDI_FAILURE);
5279 	}
5280 
5281 
5282 	un->un_save_state = un->un_last_state;
5283 	New_state(un, SD_STATE_SUSPENDED);
5284 
5285 	/*
5286 	 * Wait for all commands that are in transport or queued to a timer
5287 	 * for retry to complete.
5288 	 *
5289 	 * While waiting, no new commands will be accepted or sent because of
5290 	 * the new state we set above.
5291 	 *
5292 	 * Wait till current operation has completed. If we are in the resource
5293 	 * wait state (with an intr outstanding) then we need to wait till the
5294 	 * intr completes and starts the next cmd. We want to wait for
5295 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
5296 	 */
5297 	wait_cmds_complete = ddi_get_lbolt() +
5298 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
5299 
5300 	while (un->un_ncmds_in_transport != 0) {
5301 		/*
5302 		 * Fail if commands do not finish in the specified time.
5303 		 */
5304 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
5305 		    wait_cmds_complete) == -1) {
5306 			/*
5307 			 * Undo the state changes made above. Everything
5308 			 * must go back to it's original value.
5309 			 */
5310 			Restore_state(un);
5311 			un->un_last_state = un->un_save_state;
5312 			/* Wake up any threads that might be waiting. */
5313 			cv_broadcast(&un->un_suspend_cv);
5314 			mutex_exit(SD_MUTEX(un));
5315 			SD_ERROR(SD_LOG_IO_PM, un,
5316 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
5317 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
5318 			return (DDI_FAILURE);
5319 		}
5320 	}
5321 
5322 	/*
5323 	 * Cancel SCSI watch thread and timeouts, if any are active
5324 	 */
5325 
5326 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
5327 		opaque_t temp_token = un->un_swr_token;
5328 		mutex_exit(SD_MUTEX(un));
5329 		scsi_watch_suspend(temp_token);
5330 		mutex_enter(SD_MUTEX(un));
5331 	}
5332 
5333 	if (un->un_reset_throttle_timeid != NULL) {
5334 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
5335 		un->un_reset_throttle_timeid = NULL;
5336 		mutex_exit(SD_MUTEX(un));
5337 		(void) untimeout(temp_id);
5338 		mutex_enter(SD_MUTEX(un));
5339 	}
5340 
5341 	if (un->un_dcvb_timeid != NULL) {
5342 		timeout_id_t temp_id = un->un_dcvb_timeid;
5343 		un->un_dcvb_timeid = NULL;
5344 		mutex_exit(SD_MUTEX(un));
5345 		(void) untimeout(temp_id);
5346 		mutex_enter(SD_MUTEX(un));
5347 	}
5348 
5349 	mutex_enter(&un->un_pm_mutex);
5350 	if (un->un_pm_timeid != NULL) {
5351 		timeout_id_t temp_id = un->un_pm_timeid;
5352 		un->un_pm_timeid = NULL;
5353 		mutex_exit(&un->un_pm_mutex);
5354 		mutex_exit(SD_MUTEX(un));
5355 		(void) untimeout(temp_id);
5356 		mutex_enter(SD_MUTEX(un));
5357 	} else {
5358 		mutex_exit(&un->un_pm_mutex);
5359 	}
5360 
5361 	if (un->un_retry_timeid != NULL) {
5362 		timeout_id_t temp_id = un->un_retry_timeid;
5363 		un->un_retry_timeid = NULL;
5364 		mutex_exit(SD_MUTEX(un));
5365 		(void) untimeout(temp_id);
5366 		mutex_enter(SD_MUTEX(un));
5367 	}
5368 
5369 	if (un->un_direct_priority_timeid != NULL) {
5370 		timeout_id_t temp_id = un->un_direct_priority_timeid;
5371 		un->un_direct_priority_timeid = NULL;
5372 		mutex_exit(SD_MUTEX(un));
5373 		(void) untimeout(temp_id);
5374 		mutex_enter(SD_MUTEX(un));
5375 	}
5376 
5377 	if (un->un_f_is_fibre == TRUE) {
5378 		/*
5379 		 * Remove callbacks for insert and remove events
5380 		 */
5381 		if (un->un_insert_event != NULL) {
5382 			mutex_exit(SD_MUTEX(un));
5383 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
5384 			mutex_enter(SD_MUTEX(un));
5385 			un->un_insert_event = NULL;
5386 		}
5387 
5388 		if (un->un_remove_event != NULL) {
5389 			mutex_exit(SD_MUTEX(un));
5390 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
5391 			mutex_enter(SD_MUTEX(un));
5392 			un->un_remove_event = NULL;
5393 		}
5394 	}
5395 
5396 	mutex_exit(SD_MUTEX(un));
5397 
5398 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
5399 
5400 	return (DDI_SUCCESS);
5401 }
5402 
5403 
5404 /*
5405  *    Function: sd_ddi_pm_suspend
5406  *
5407  * Description: Set the drive state to low power.
5408  *		Someone else is required to actually change the drive
5409  *		power level.
5410  *
5411  *   Arguments: un - driver soft state (unit) structure
5412  *
5413  * Return Code: DDI_FAILURE or DDI_SUCCESS
5414  *
5415  *     Context: Kernel thread context
5416  */
5417 
5418 static int
5419 sd_ddi_pm_suspend(struct sd_lun *un)
5420 {
5421 	ASSERT(un != NULL);
5422 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: entry\n");
5423 
5424 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5425 	mutex_enter(SD_MUTEX(un));
5426 
5427 	/*
5428 	 * Exit if power management is not enabled for this device, or if
5429 	 * the device is being used by HA.
5430 	 */
5431 	if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
5432 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
5433 		mutex_exit(SD_MUTEX(un));
5434 		SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exiting\n");
5435 		return (DDI_SUCCESS);
5436 	}
5437 
5438 	SD_INFO(SD_LOG_POWER, un, "sd_ddi_pm_suspend: un_ncmds_in_driver=%ld\n",
5439 	    un->un_ncmds_in_driver);
5440 
5441 	/*
5442 	 * See if the device is not busy, ie.:
5443 	 *    - we have no commands in the driver for this device
5444 	 *    - not waiting for resources
5445 	 */
5446 	if ((un->un_ncmds_in_driver == 0) &&
5447 	    (un->un_state != SD_STATE_RWAIT)) {
5448 		/*
5449 		 * The device is not busy, so it is OK to go to low power state.
5450 		 * Indicate low power, but rely on someone else to actually
5451 		 * change it.
5452 		 */
5453 		mutex_enter(&un->un_pm_mutex);
5454 		un->un_pm_count = -1;
5455 		mutex_exit(&un->un_pm_mutex);
5456 		un->un_power_level = SD_SPINDLE_OFF;
5457 	}
5458 
5459 	mutex_exit(SD_MUTEX(un));
5460 
5461 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exit\n");
5462 
5463 	return (DDI_SUCCESS);
5464 }
5465 
5466 
5467 /*
5468  *    Function: sd_ddi_resume
5469  *
5470  * Description: Performs system power-up operations..
5471  *
5472  * Return Code: DDI_SUCCESS
5473  *		DDI_FAILURE
5474  *
5475  *     Context: Kernel thread context
5476  */
5477 
5478 static int
5479 sd_ddi_resume(dev_info_t *devi)
5480 {
5481 	struct	sd_lun	*un;
5482 
5483 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
5484 	if (un == NULL) {
5485 		return (DDI_FAILURE);
5486 	}
5487 
5488 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
5489 
5490 	mutex_enter(SD_MUTEX(un));
5491 	Restore_state(un);
5492 
5493 	/*
5494 	 * Restore the state which was saved to give the
5495 	 * the right state in un_last_state
5496 	 */
5497 	un->un_last_state = un->un_save_state;
5498 	/*
5499 	 * Note: throttle comes back at full.
5500 	 * Also note: this MUST be done before calling pm_raise_power
5501 	 * otherwise the system can get hung in biowait. The scenario where
5502 	 * this'll happen is under cpr suspend. Writing of the system
5503 	 * state goes through sddump, which writes 0 to un_throttle. If
5504 	 * writing the system state then fails, example if the partition is
5505 	 * too small, then cpr attempts a resume. If throttle isn't restored
5506 	 * from the saved value until after calling pm_raise_power then
5507 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
5508 	 * in biowait.
5509 	 */
5510 	un->un_throttle = un->un_saved_throttle;
5511 
5512 	/*
5513 	 * The chance of failure is very rare as the only command done in power
5514 	 * entry point is START command when you transition from 0->1 or
5515 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
5516 	 * which suspend was done. Ignore the return value as the resume should
5517 	 * not be failed. In the case of removable media the media need not be
5518 	 * inserted and hence there is a chance that raise power will fail with
5519 	 * media not present.
5520 	 */
5521 	if (un->un_f_attach_spinup) {
5522 		mutex_exit(SD_MUTEX(un));
5523 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
5524 		mutex_enter(SD_MUTEX(un));
5525 	}
5526 
5527 	/*
5528 	 * Don't broadcast to the suspend cv and therefore possibly
5529 	 * start I/O until after power has been restored.
5530 	 */
5531 	cv_broadcast(&un->un_suspend_cv);
5532 	cv_broadcast(&un->un_state_cv);
5533 
5534 	/* restart thread */
5535 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
5536 		scsi_watch_resume(un->un_swr_token);
5537 	}
5538 
5539 #if (defined(__fibre))
5540 	if (un->un_f_is_fibre == TRUE) {
5541 		/*
5542 		 * Add callbacks for insert and remove events
5543 		 */
5544 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
5545 			sd_init_event_callbacks(un);
5546 		}
5547 	}
5548 #endif
5549 
5550 	/*
5551 	 * Transport any pending commands to the target.
5552 	 *
5553 	 * If this is a low-activity device commands in queue will have to wait
5554 	 * until new commands come in, which may take awhile. Also, we
5555 	 * specifically don't check un_ncmds_in_transport because we know that
5556 	 * there really are no commands in progress after the unit was
5557 	 * suspended and we could have reached the throttle level, been
5558 	 * suspended, and have no new commands coming in for awhile. Highly
5559 	 * unlikely, but so is the low-activity disk scenario.
5560 	 */
5561 	ddi_xbuf_dispatch(un->un_xbuf_attr);
5562 
5563 	sd_start_cmds(un, NULL);
5564 	mutex_exit(SD_MUTEX(un));
5565 
5566 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
5567 
5568 	return (DDI_SUCCESS);
5569 }
5570 
5571 
5572 /*
5573  *    Function: sd_ddi_pm_resume
5574  *
5575  * Description: Set the drive state to powered on.
5576  *		Someone else is required to actually change the drive
5577  *		power level.
5578  *
5579  *   Arguments: un - driver soft state (unit) structure
5580  *
5581  * Return Code: DDI_SUCCESS
5582  *
5583  *     Context: Kernel thread context
5584  */
5585 
5586 static int
5587 sd_ddi_pm_resume(struct sd_lun *un)
5588 {
5589 	ASSERT(un != NULL);
5590 
5591 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5592 	mutex_enter(SD_MUTEX(un));
5593 	un->un_power_level = SD_SPINDLE_ON;
5594 
5595 	ASSERT(!mutex_owned(&un->un_pm_mutex));
5596 	mutex_enter(&un->un_pm_mutex);
5597 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
5598 		un->un_pm_count++;
5599 		ASSERT(un->un_pm_count == 0);
5600 		/*
5601 		 * Note: no longer do the cv_broadcast on un_suspend_cv. The
5602 		 * un_suspend_cv is for a system resume, not a power management
5603 		 * device resume. (4297749)
5604 		 *	 cv_broadcast(&un->un_suspend_cv);
5605 		 */
5606 	}
5607 	mutex_exit(&un->un_pm_mutex);
5608 	mutex_exit(SD_MUTEX(un));
5609 
5610 	return (DDI_SUCCESS);
5611 }
5612 
5613 
5614 /*
5615  *    Function: sd_pm_idletimeout_handler
5616  *
5617  * Description: A timer routine that's active only while a device is busy.
5618  *		The purpose is to extend slightly the pm framework's busy
5619  *		view of the device to prevent busy/idle thrashing for
5620  *		back-to-back commands. Do this by comparing the current time
5621  *		to the time at which the last command completed and when the
5622  *		difference is greater than sd_pm_idletime, call
5623  *		pm_idle_component. In addition to indicating idle to the pm
5624  *		framework, update the chain type to again use the internal pm
5625  *		layers of the driver.
5626  *
5627  *   Arguments: arg - driver soft state (unit) structure
5628  *
5629  *     Context: Executes in a timeout(9F) thread context
5630  */
5631 
5632 static void
5633 sd_pm_idletimeout_handler(void *arg)
5634 {
5635 	struct sd_lun *un = arg;
5636 
5637 	time_t	now;
5638 
5639 	mutex_enter(&sd_detach_mutex);
5640 	if (un->un_detach_count != 0) {
5641 		/* Abort if the instance is detaching */
5642 		mutex_exit(&sd_detach_mutex);
5643 		return;
5644 	}
5645 	mutex_exit(&sd_detach_mutex);
5646 
5647 	now = ddi_get_time();
5648 	/*
5649 	 * Grab both mutexes, in the proper order, since we're accessing
5650 	 * both PM and softstate variables.
5651 	 */
5652 	mutex_enter(SD_MUTEX(un));
5653 	mutex_enter(&un->un_pm_mutex);
5654 	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
5655 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
5656 		/*
5657 		 * Update the chain types.
5658 		 * This takes affect on the next new command received.
5659 		 */
5660 		if (un->un_f_non_devbsize_supported) {
5661 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
5662 		} else {
5663 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
5664 		}
5665 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
5666 
5667 		SD_TRACE(SD_LOG_IO_PM, un,
5668 		    "sd_pm_idletimeout_handler: idling device\n");
5669 		(void) pm_idle_component(SD_DEVINFO(un), 0);
5670 		un->un_pm_idle_timeid = NULL;
5671 	} else {
5672 		un->un_pm_idle_timeid =
5673 			timeout(sd_pm_idletimeout_handler, un,
5674 			(drv_usectohz((clock_t)300000))); /* 300 ms. */
5675 	}
5676 	mutex_exit(&un->un_pm_mutex);
5677 	mutex_exit(SD_MUTEX(un));
5678 }
5679 
5680 
5681 /*
5682  *    Function: sd_pm_timeout_handler
5683  *
5684  * Description: Callback to tell framework we are idle.
5685  *
5686  *     Context: timeout(9f) thread context.
5687  */
5688 
5689 static void
5690 sd_pm_timeout_handler(void *arg)
5691 {
5692 	struct sd_lun *un = arg;
5693 
5694 	(void) pm_idle_component(SD_DEVINFO(un), 0);
5695 	mutex_enter(&un->un_pm_mutex);
5696 	un->un_pm_timeid = NULL;
5697 	mutex_exit(&un->un_pm_mutex);
5698 }
5699 
5700 
5701 /*
5702  *    Function: sdpower
5703  *
5704  * Description: PM entry point.
5705  *
5706  * Return Code: DDI_SUCCESS
5707  *		DDI_FAILURE
5708  *
5709  *     Context: Kernel thread context
5710  */
5711 
5712 static int
5713 sdpower(dev_info_t *devi, int component, int level)
5714 {
5715 	struct sd_lun	*un;
5716 	int		instance;
5717 	int		rval = DDI_SUCCESS;
5718 	uint_t		i, log_page_size, maxcycles, ncycles;
5719 	uchar_t		*log_page_data;
5720 	int		log_sense_page;
5721 	int		medium_present;
5722 	time_t		intvlp;
5723 	dev_t		dev;
5724 	struct pm_trans_data	sd_pm_tran_data;
5725 	uchar_t		save_state;
5726 	int		sval;
5727 	uchar_t		state_before_pm;
5728 	int		got_semaphore_here;
5729 
5730 	instance = ddi_get_instance(devi);
5731 
5732 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
5733 	    (SD_SPINDLE_OFF > level) || (level > SD_SPINDLE_ON) ||
5734 	    component != 0) {
5735 		return (DDI_FAILURE);
5736 	}
5737 
5738 	dev = sd_make_device(SD_DEVINFO(un));
5739 
5740 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
5741 
5742 	/*
5743 	 * Must synchronize power down with close.
5744 	 * Attempt to decrement/acquire the open/close semaphore,
5745 	 * but do NOT wait on it. If it's not greater than zero,
5746 	 * ie. it can't be decremented without waiting, then
5747 	 * someone else, either open or close, already has it
5748 	 * and the try returns 0. Use that knowledge here to determine
5749 	 * if it's OK to change the device power level.
5750 	 * Also, only increment it on exit if it was decremented, ie. gotten,
5751 	 * here.
5752 	 */
5753 	got_semaphore_here = sema_tryp(&un->un_semoclose);
5754 
5755 	mutex_enter(SD_MUTEX(un));
5756 
5757 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
5758 	    un->un_ncmds_in_driver);
5759 
5760 	/*
5761 	 * If un_ncmds_in_driver is non-zero it indicates commands are
5762 	 * already being processed in the driver, or if the semaphore was
5763 	 * not gotten here it indicates an open or close is being processed.
5764 	 * At the same time somebody is requesting to go low power which
5765 	 * can't happen, therefore we need to return failure.
5766 	 */
5767 	if ((level == SD_SPINDLE_OFF) &&
5768 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
5769 		mutex_exit(SD_MUTEX(un));
5770 
5771 		if (got_semaphore_here != 0) {
5772 			sema_v(&un->un_semoclose);
5773 		}
5774 		SD_TRACE(SD_LOG_IO_PM, un,
5775 		    "sdpower: exit, device has queued cmds.\n");
5776 		return (DDI_FAILURE);
5777 	}
5778 
5779 	/*
5780 	 * if it is OFFLINE that means the disk is completely dead
5781 	 * in our case we have to put the disk in on or off by sending commands
5782 	 * Of course that will fail anyway so return back here.
5783 	 *
5784 	 * Power changes to a device that's OFFLINE or SUSPENDED
5785 	 * are not allowed.
5786 	 */
5787 	if ((un->un_state == SD_STATE_OFFLINE) ||
5788 	    (un->un_state == SD_STATE_SUSPENDED)) {
5789 		mutex_exit(SD_MUTEX(un));
5790 
5791 		if (got_semaphore_here != 0) {
5792 			sema_v(&un->un_semoclose);
5793 		}
5794 		SD_TRACE(SD_LOG_IO_PM, un,
5795 		    "sdpower: exit, device is off-line.\n");
5796 		return (DDI_FAILURE);
5797 	}
5798 
5799 	/*
5800 	 * Change the device's state to indicate it's power level
5801 	 * is being changed. Do this to prevent a power off in the
5802 	 * middle of commands, which is especially bad on devices
5803 	 * that are really powered off instead of just spun down.
5804 	 */
5805 	state_before_pm = un->un_state;
5806 	un->un_state = SD_STATE_PM_CHANGING;
5807 
5808 	mutex_exit(SD_MUTEX(un));
5809 
5810 	/*
5811 	 * If "pm-capable" property is set to TRUE by HBA drivers,
5812 	 * bypass the following checking, otherwise, check the log
5813 	 * sense information for this device
5814 	 */
5815 	if ((level == SD_SPINDLE_OFF) && un->un_f_log_sense_supported) {
5816 		/*
5817 		 * Get the log sense information to understand whether the
5818 		 * the powercycle counts have gone beyond the threshhold.
5819 		 */
5820 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
5821 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
5822 
5823 		mutex_enter(SD_MUTEX(un));
5824 		log_sense_page = un->un_start_stop_cycle_page;
5825 		mutex_exit(SD_MUTEX(un));
5826 
5827 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
5828 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
5829 #ifdef	SDDEBUG
5830 		if (sd_force_pm_supported) {
5831 			/* Force a successful result */
5832 			rval = 0;
5833 		}
5834 #endif
5835 		if (rval != 0) {
5836 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5837 			    "Log Sense Failed\n");
5838 			kmem_free(log_page_data, log_page_size);
5839 			/* Cannot support power management on those drives */
5840 
5841 			if (got_semaphore_here != 0) {
5842 				sema_v(&un->un_semoclose);
5843 			}
5844 			/*
5845 			 * On exit put the state back to it's original value
5846 			 * and broadcast to anyone waiting for the power
5847 			 * change completion.
5848 			 */
5849 			mutex_enter(SD_MUTEX(un));
5850 			un->un_state = state_before_pm;
5851 			cv_broadcast(&un->un_suspend_cv);
5852 			mutex_exit(SD_MUTEX(un));
5853 			SD_TRACE(SD_LOG_IO_PM, un,
5854 			    "sdpower: exit, Log Sense Failed.\n");
5855 			return (DDI_FAILURE);
5856 		}
5857 
5858 		/*
5859 		 * From the page data - Convert the essential information to
5860 		 * pm_trans_data
5861 		 */
5862 		maxcycles =
5863 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
5864 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
5865 
5866 		sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
5867 
5868 		ncycles =
5869 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
5870 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
5871 
5872 		sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
5873 
5874 		for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
5875 			sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
5876 			    log_page_data[8+i];
5877 		}
5878 
5879 		kmem_free(log_page_data, log_page_size);
5880 
5881 		/*
5882 		 * Call pm_trans_check routine to get the Ok from
5883 		 * the global policy
5884 		 */
5885 
5886 		sd_pm_tran_data.format = DC_SCSI_FORMAT;
5887 		sd_pm_tran_data.un.scsi_cycles.flag = 0;
5888 
5889 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
5890 #ifdef	SDDEBUG
5891 		if (sd_force_pm_supported) {
5892 			/* Force a successful result */
5893 			rval = 1;
5894 		}
5895 #endif
5896 		switch (rval) {
5897 		case 0:
5898 			/*
5899 			 * Not Ok to Power cycle or error in parameters passed
5900 			 * Would have given the advised time to consider power
5901 			 * cycle. Based on the new intvlp parameter we are
5902 			 * supposed to pretend we are busy so that pm framework
5903 			 * will never call our power entry point. Because of
5904 			 * that install a timeout handler and wait for the
5905 			 * recommended time to elapse so that power management
5906 			 * can be effective again.
5907 			 *
5908 			 * To effect this behavior, call pm_busy_component to
5909 			 * indicate to the framework this device is busy.
5910 			 * By not adjusting un_pm_count the rest of PM in
5911 			 * the driver will function normally, and independant
5912 			 * of this but because the framework is told the device
5913 			 * is busy it won't attempt powering down until it gets
5914 			 * a matching idle. The timeout handler sends this.
5915 			 * Note: sd_pm_entry can't be called here to do this
5916 			 * because sdpower may have been called as a result
5917 			 * of a call to pm_raise_power from within sd_pm_entry.
5918 			 *
5919 			 * If a timeout handler is already active then
5920 			 * don't install another.
5921 			 */
5922 			mutex_enter(&un->un_pm_mutex);
5923 			if (un->un_pm_timeid == NULL) {
5924 				un->un_pm_timeid =
5925 				    timeout(sd_pm_timeout_handler,
5926 				    un, intvlp * drv_usectohz(1000000));
5927 				mutex_exit(&un->un_pm_mutex);
5928 				(void) pm_busy_component(SD_DEVINFO(un), 0);
5929 			} else {
5930 				mutex_exit(&un->un_pm_mutex);
5931 			}
5932 			if (got_semaphore_here != 0) {
5933 				sema_v(&un->un_semoclose);
5934 			}
5935 			/*
5936 			 * On exit put the state back to it's original value
5937 			 * and broadcast to anyone waiting for the power
5938 			 * change completion.
5939 			 */
5940 			mutex_enter(SD_MUTEX(un));
5941 			un->un_state = state_before_pm;
5942 			cv_broadcast(&un->un_suspend_cv);
5943 			mutex_exit(SD_MUTEX(un));
5944 
5945 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
5946 			    "trans check Failed, not ok to power cycle.\n");
5947 			return (DDI_FAILURE);
5948 
5949 		case -1:
5950 			if (got_semaphore_here != 0) {
5951 				sema_v(&un->un_semoclose);
5952 			}
5953 			/*
5954 			 * On exit put the state back to it's original value
5955 			 * and broadcast to anyone waiting for the power
5956 			 * change completion.
5957 			 */
5958 			mutex_enter(SD_MUTEX(un));
5959 			un->un_state = state_before_pm;
5960 			cv_broadcast(&un->un_suspend_cv);
5961 			mutex_exit(SD_MUTEX(un));
5962 			SD_TRACE(SD_LOG_IO_PM, un,
5963 			    "sdpower: exit, trans check command Failed.\n");
5964 			return (DDI_FAILURE);
5965 		}
5966 	}
5967 
5968 	if (level == SD_SPINDLE_OFF) {
5969 		/*
5970 		 * Save the last state... if the STOP FAILS we need it
5971 		 * for restoring
5972 		 */
5973 		mutex_enter(SD_MUTEX(un));
5974 		save_state = un->un_last_state;
5975 		/*
5976 		 * There must not be any cmds. getting processed
5977 		 * in the driver when we get here. Power to the
5978 		 * device is potentially going off.
5979 		 */
5980 		ASSERT(un->un_ncmds_in_driver == 0);
5981 		mutex_exit(SD_MUTEX(un));
5982 
5983 		/*
5984 		 * For now suspend the device completely before spindle is
5985 		 * turned off
5986 		 */
5987 		if ((rval = sd_ddi_pm_suspend(un)) == DDI_FAILURE) {
5988 			if (got_semaphore_here != 0) {
5989 				sema_v(&un->un_semoclose);
5990 			}
5991 			/*
5992 			 * On exit put the state back to it's original value
5993 			 * and broadcast to anyone waiting for the power
5994 			 * change completion.
5995 			 */
5996 			mutex_enter(SD_MUTEX(un));
5997 			un->un_state = state_before_pm;
5998 			cv_broadcast(&un->un_suspend_cv);
5999 			mutex_exit(SD_MUTEX(un));
6000 			SD_TRACE(SD_LOG_IO_PM, un,
6001 			    "sdpower: exit, PM suspend Failed.\n");
6002 			return (DDI_FAILURE);
6003 		}
6004 	}
6005 
6006 	/*
6007 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
6008 	 * close, or strategy. Dump no long uses this routine, it uses it's
6009 	 * own code so it can be done in polled mode.
6010 	 */
6011 
6012 	medium_present = TRUE;
6013 
6014 	/*
6015 	 * When powering up, issue a TUR in case the device is at unit
6016 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
6017 	 * a deadlock on un_pm_busy_cv will occur.
6018 	 */
6019 	if (level == SD_SPINDLE_ON) {
6020 		(void) sd_send_scsi_TEST_UNIT_READY(un,
6021 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
6022 	}
6023 
6024 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
6025 	    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
6026 
6027 	sval = sd_send_scsi_START_STOP_UNIT(un,
6028 	    ((level == SD_SPINDLE_ON) ? SD_TARGET_START : SD_TARGET_STOP),
6029 	    SD_PATH_DIRECT);
6030 	/* Command failed, check for media present. */
6031 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
6032 		medium_present = FALSE;
6033 	}
6034 
6035 	/*
6036 	 * The conditions of interest here are:
6037 	 *   if a spindle off with media present fails,
6038 	 *	then restore the state and return an error.
6039 	 *   else if a spindle on fails,
6040 	 *	then return an error (there's no state to restore).
6041 	 * In all other cases we setup for the new state
6042 	 * and return success.
6043 	 */
6044 	switch (level) {
6045 	case SD_SPINDLE_OFF:
6046 		if ((medium_present == TRUE) && (sval != 0)) {
6047 			/* The stop command from above failed */
6048 			rval = DDI_FAILURE;
6049 			/*
6050 			 * The stop command failed, and we have media
6051 			 * present. Put the level back by calling the
6052 			 * sd_pm_resume() and set the state back to
6053 			 * it's previous value.
6054 			 */
6055 			(void) sd_ddi_pm_resume(un);
6056 			mutex_enter(SD_MUTEX(un));
6057 			un->un_last_state = save_state;
6058 			mutex_exit(SD_MUTEX(un));
6059 			break;
6060 		}
6061 		/*
6062 		 * The stop command from above succeeded.
6063 		 */
6064 		if (un->un_f_monitor_media_state) {
6065 			/*
6066 			 * Terminate watch thread in case of removable media
6067 			 * devices going into low power state. This is as per
6068 			 * the requirements of pm framework, otherwise commands
6069 			 * will be generated for the device (through watch
6070 			 * thread), even when the device is in low power state.
6071 			 */
6072 			mutex_enter(SD_MUTEX(un));
6073 			un->un_f_watcht_stopped = FALSE;
6074 			if (un->un_swr_token != NULL) {
6075 				opaque_t temp_token = un->un_swr_token;
6076 				un->un_f_watcht_stopped = TRUE;
6077 				un->un_swr_token = NULL;
6078 				mutex_exit(SD_MUTEX(un));
6079 				(void) scsi_watch_request_terminate(temp_token,
6080 				    SCSI_WATCH_TERMINATE_WAIT);
6081 			} else {
6082 				mutex_exit(SD_MUTEX(un));
6083 			}
6084 		}
6085 		break;
6086 
6087 	default:	/* The level requested is spindle on... */
6088 		/*
6089 		 * Legacy behavior: return success on a failed spinup
6090 		 * if there is no media in the drive.
6091 		 * Do this by looking at medium_present here.
6092 		 */
6093 		if ((sval != 0) && medium_present) {
6094 			/* The start command from above failed */
6095 			rval = DDI_FAILURE;
6096 			break;
6097 		}
6098 		/*
6099 		 * The start command from above succeeded
6100 		 * Resume the devices now that we have
6101 		 * started the disks
6102 		 */
6103 		(void) sd_ddi_pm_resume(un);
6104 
6105 		/*
6106 		 * Resume the watch thread since it was suspended
6107 		 * when the device went into low power mode.
6108 		 */
6109 		if (un->un_f_monitor_media_state) {
6110 			mutex_enter(SD_MUTEX(un));
6111 			if (un->un_f_watcht_stopped == TRUE) {
6112 				opaque_t temp_token;
6113 
6114 				un->un_f_watcht_stopped = FALSE;
6115 				mutex_exit(SD_MUTEX(un));
6116 				temp_token = scsi_watch_request_submit(
6117 				    SD_SCSI_DEVP(un),
6118 				    sd_check_media_time,
6119 				    SENSE_LENGTH, sd_media_watch_cb,
6120 				    (caddr_t)dev);
6121 				mutex_enter(SD_MUTEX(un));
6122 				un->un_swr_token = temp_token;
6123 			}
6124 			mutex_exit(SD_MUTEX(un));
6125 		}
6126 	}
6127 	if (got_semaphore_here != 0) {
6128 		sema_v(&un->un_semoclose);
6129 	}
6130 	/*
6131 	 * On exit put the state back to it's original value
6132 	 * and broadcast to anyone waiting for the power
6133 	 * change completion.
6134 	 */
6135 	mutex_enter(SD_MUTEX(un));
6136 	un->un_state = state_before_pm;
6137 	cv_broadcast(&un->un_suspend_cv);
6138 	mutex_exit(SD_MUTEX(un));
6139 
6140 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
6141 
6142 	return (rval);
6143 }
6144 
6145 
6146 
6147 /*
6148  *    Function: sdattach
6149  *
6150  * Description: Driver's attach(9e) entry point function.
6151  *
6152  *   Arguments: devi - opaque device info handle
6153  *		cmd  - attach  type
6154  *
6155  * Return Code: DDI_SUCCESS
6156  *		DDI_FAILURE
6157  *
6158  *     Context: Kernel thread context
6159  */
6160 
6161 static int
6162 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
6163 {
6164 	switch (cmd) {
6165 	case DDI_ATTACH:
6166 		return (sd_unit_attach(devi));
6167 	case DDI_RESUME:
6168 		return (sd_ddi_resume(devi));
6169 	default:
6170 		break;
6171 	}
6172 	return (DDI_FAILURE);
6173 }
6174 
6175 
6176 /*
6177  *    Function: sddetach
6178  *
6179  * Description: Driver's detach(9E) entry point function.
6180  *
6181  *   Arguments: devi - opaque device info handle
6182  *		cmd  - detach  type
6183  *
6184  * Return Code: DDI_SUCCESS
6185  *		DDI_FAILURE
6186  *
6187  *     Context: Kernel thread context
6188  */
6189 
6190 static int
6191 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
6192 {
6193 	switch (cmd) {
6194 	case DDI_DETACH:
6195 		return (sd_unit_detach(devi));
6196 	case DDI_SUSPEND:
6197 		return (sd_ddi_suspend(devi));
6198 	default:
6199 		break;
6200 	}
6201 	return (DDI_FAILURE);
6202 }
6203 
6204 
6205 /*
6206  *     Function: sd_sync_with_callback
6207  *
6208  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
6209  *		 state while the callback routine is active.
6210  *
6211  *    Arguments: un: softstate structure for the instance
6212  *
6213  *	Context: Kernel thread context
6214  */
6215 
6216 static void
6217 sd_sync_with_callback(struct sd_lun *un)
6218 {
6219 	ASSERT(un != NULL);
6220 
6221 	mutex_enter(SD_MUTEX(un));
6222 
6223 	ASSERT(un->un_in_callback >= 0);
6224 
6225 	while (un->un_in_callback > 0) {
6226 		mutex_exit(SD_MUTEX(un));
6227 		delay(2);
6228 		mutex_enter(SD_MUTEX(un));
6229 	}
6230 
6231 	mutex_exit(SD_MUTEX(un));
6232 }
6233 
6234 /*
6235  *    Function: sd_unit_attach
6236  *
6237  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
6238  *		the soft state structure for the device and performs
6239  *		all necessary structure and device initializations.
6240  *
6241  *   Arguments: devi: the system's dev_info_t for the device.
6242  *
6243  * Return Code: DDI_SUCCESS if attach is successful.
6244  *		DDI_FAILURE if any part of the attach fails.
6245  *
6246  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
6247  *		Kernel thread context only.  Can sleep.
6248  */
6249 
6250 static int
6251 sd_unit_attach(dev_info_t *devi)
6252 {
6253 	struct	scsi_device	*devp;
6254 	struct	sd_lun		*un;
6255 	char			*variantp;
6256 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
6257 	int	instance;
6258 	int	rval;
6259 	int	wc_enabled;
6260 	int	tgt;
6261 	uint64_t	capacity;
6262 	uint_t		lbasize = 0;
6263 	dev_info_t	*pdip = ddi_get_parent(devi);
6264 	int		offbyone = 0;
6265 	int		geom_label_valid = 0;
6266 
6267 	/*
6268 	 * Retrieve the target driver's private data area. This was set
6269 	 * up by the HBA.
6270 	 */
6271 	devp = ddi_get_driver_private(devi);
6272 
6273 	/*
6274 	 * Retrieve the target ID of the device.
6275 	 */
6276 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
6277 	    SCSI_ADDR_PROP_TARGET, -1);
6278 
6279 	/*
6280 	 * Since we have no idea what state things were left in by the last
6281 	 * user of the device, set up some 'default' settings, ie. turn 'em
6282 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
6283 	 * Do this before the scsi_probe, which sends an inquiry.
6284 	 * This is a fix for bug (4430280).
6285 	 * Of special importance is wide-xfer. The drive could have been left
6286 	 * in wide transfer mode by the last driver to communicate with it,
6287 	 * this includes us. If that's the case, and if the following is not
6288 	 * setup properly or we don't re-negotiate with the drive prior to
6289 	 * transferring data to/from the drive, it causes bus parity errors,
6290 	 * data overruns, and unexpected interrupts. This first occurred when
6291 	 * the fix for bug (4378686) was made.
6292 	 */
6293 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
6294 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
6295 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
6296 
6297 	/*
6298 	 * Currently, scsi_ifsetcap sets tagged-qing capability for all LUNs
6299 	 * on a target. Setting it per lun instance actually sets the
6300 	 * capability of this target, which affects those luns already
6301 	 * attached on the same target. So during attach, we can only disable
6302 	 * this capability only when no other lun has been attached on this
6303 	 * target. By doing this, we assume a target has the same tagged-qing
6304 	 * capability for every lun. The condition can be removed when HBA
6305 	 * is changed to support per lun based tagged-qing capability.
6306 	 */
6307 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
6308 		(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
6309 	}
6310 
6311 	/*
6312 	 * Use scsi_probe() to issue an INQUIRY command to the device.
6313 	 * This call will allocate and fill in the scsi_inquiry structure
6314 	 * and point the sd_inq member of the scsi_device structure to it.
6315 	 * If the attach succeeds, then this memory will not be de-allocated
6316 	 * (via scsi_unprobe()) until the instance is detached.
6317 	 */
6318 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
6319 		goto probe_failed;
6320 	}
6321 
6322 	/*
6323 	 * Check the device type as specified in the inquiry data and
6324 	 * claim it if it is of a type that we support.
6325 	 */
6326 	switch (devp->sd_inq->inq_dtype) {
6327 	case DTYPE_DIRECT:
6328 		break;
6329 	case DTYPE_RODIRECT:
6330 		break;
6331 	case DTYPE_OPTICAL:
6332 		break;
6333 	case DTYPE_NOTPRESENT:
6334 	default:
6335 		/* Unsupported device type; fail the attach. */
6336 		goto probe_failed;
6337 	}
6338 
6339 	/*
6340 	 * Allocate the soft state structure for this unit.
6341 	 *
6342 	 * We rely upon this memory being set to all zeroes by
6343 	 * ddi_soft_state_zalloc().  We assume that any member of the
6344 	 * soft state structure that is not explicitly initialized by
6345 	 * this routine will have a value of zero.
6346 	 */
6347 	instance = ddi_get_instance(devp->sd_dev);
6348 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
6349 		goto probe_failed;
6350 	}
6351 
6352 	/*
6353 	 * Retrieve a pointer to the newly-allocated soft state.
6354 	 *
6355 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
6356 	 * was successful, unless something has gone horribly wrong and the
6357 	 * ddi's soft state internals are corrupt (in which case it is
6358 	 * probably better to halt here than just fail the attach....)
6359 	 */
6360 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
6361 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
6362 		    instance);
6363 		/*NOTREACHED*/
6364 	}
6365 
6366 	/*
6367 	 * Link the back ptr of the driver soft state to the scsi_device
6368 	 * struct for this lun.
6369 	 * Save a pointer to the softstate in the driver-private area of
6370 	 * the scsi_device struct.
6371 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
6372 	 * we first set un->un_sd below.
6373 	 */
6374 	un->un_sd = devp;
6375 	devp->sd_private = (opaque_t)un;
6376 
6377 	/*
6378 	 * The following must be after devp is stored in the soft state struct.
6379 	 */
6380 #ifdef SDDEBUG
6381 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6382 	    "%s_unit_attach: un:0x%p instance:%d\n",
6383 	    ddi_driver_name(devi), un, instance);
6384 #endif
6385 
6386 	/*
6387 	 * Set up the device type and node type (for the minor nodes).
6388 	 * By default we assume that the device can at least support the
6389 	 * Common Command Set. Call it a CD-ROM if it reports itself
6390 	 * as a RODIRECT device.
6391 	 */
6392 	switch (devp->sd_inq->inq_dtype) {
6393 	case DTYPE_RODIRECT:
6394 		un->un_node_type = DDI_NT_CD_CHAN;
6395 		un->un_ctype	 = CTYPE_CDROM;
6396 		break;
6397 	case DTYPE_OPTICAL:
6398 		un->un_node_type = DDI_NT_BLOCK_CHAN;
6399 		un->un_ctype	 = CTYPE_ROD;
6400 		break;
6401 	default:
6402 		un->un_node_type = DDI_NT_BLOCK_CHAN;
6403 		un->un_ctype	 = CTYPE_CCS;
6404 		break;
6405 	}
6406 
6407 	/*
6408 	 * Try to read the interconnect type from the HBA.
6409 	 *
6410 	 * Note: This driver is currently compiled as two binaries, a parallel
6411 	 * scsi version (sd) and a fibre channel version (ssd). All functional
6412 	 * differences are determined at compile time. In the future a single
6413 	 * binary will be provided and the inteconnect type will be used to
6414 	 * differentiate between fibre and parallel scsi behaviors. At that time
6415 	 * it will be necessary for all fibre channel HBAs to support this
6416 	 * property.
6417 	 *
6418 	 * set un_f_is_fiber to TRUE ( default fiber )
6419 	 */
6420 	un->un_f_is_fibre = TRUE;
6421 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
6422 	case INTERCONNECT_SSA:
6423 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
6424 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6425 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
6426 		break;
6427 	case INTERCONNECT_PARALLEL:
6428 		un->un_f_is_fibre = FALSE;
6429 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
6430 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6431 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
6432 		break;
6433 	case INTERCONNECT_SATA:
6434 		un->un_f_is_fibre = FALSE;
6435 		un->un_interconnect_type = SD_INTERCONNECT_SATA;
6436 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6437 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
6438 		break;
6439 	case INTERCONNECT_FIBRE:
6440 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
6441 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6442 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
6443 		break;
6444 	case INTERCONNECT_FABRIC:
6445 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
6446 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
6447 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6448 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
6449 		break;
6450 	default:
6451 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
6452 		/*
6453 		 * The HBA does not support the "interconnect-type" property
6454 		 * (or did not provide a recognized type).
6455 		 *
6456 		 * Note: This will be obsoleted when a single fibre channel
6457 		 * and parallel scsi driver is delivered. In the meantime the
6458 		 * interconnect type will be set to the platform default.If that
6459 		 * type is not parallel SCSI, it means that we should be
6460 		 * assuming "ssd" semantics. However, here this also means that
6461 		 * the FC HBA is not supporting the "interconnect-type" property
6462 		 * like we expect it to, so log this occurrence.
6463 		 */
6464 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
6465 		if (!SD_IS_PARALLEL_SCSI(un)) {
6466 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6467 			    "sd_unit_attach: un:0x%p Assuming "
6468 			    "INTERCONNECT_FIBRE\n", un);
6469 		} else {
6470 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6471 			    "sd_unit_attach: un:0x%p Assuming "
6472 			    "INTERCONNECT_PARALLEL\n", un);
6473 			un->un_f_is_fibre = FALSE;
6474 		}
6475 #else
6476 		/*
6477 		 * Note: This source will be implemented when a single fibre
6478 		 * channel and parallel scsi driver is delivered. The default
6479 		 * will be to assume that if a device does not support the
6480 		 * "interconnect-type" property it is a parallel SCSI HBA and
6481 		 * we will set the interconnect type for parallel scsi.
6482 		 */
6483 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
6484 		un->un_f_is_fibre = FALSE;
6485 #endif
6486 		break;
6487 	}
6488 
6489 	if (un->un_f_is_fibre == TRUE) {
6490 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
6491 			SCSI_VERSION_3) {
6492 			switch (un->un_interconnect_type) {
6493 			case SD_INTERCONNECT_FIBRE:
6494 			case SD_INTERCONNECT_SSA:
6495 				un->un_node_type = DDI_NT_BLOCK_WWN;
6496 				break;
6497 			default:
6498 				break;
6499 			}
6500 		}
6501 	}
6502 
6503 	/*
6504 	 * Initialize the Request Sense command for the target
6505 	 */
6506 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
6507 		goto alloc_rqs_failed;
6508 	}
6509 
6510 	/*
6511 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
6512 	 * with seperate binary for sd and ssd.
6513 	 *
6514 	 * x86 has 1 binary, un_retry_count is set base on connection type.
6515 	 * The hardcoded values will go away when Sparc uses 1 binary
6516 	 * for sd and ssd.  This hardcoded values need to match
6517 	 * SD_RETRY_COUNT in sddef.h
6518 	 * The value used is base on interconnect type.
6519 	 * fibre = 3, parallel = 5
6520 	 */
6521 #if defined(__i386) || defined(__amd64)
6522 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
6523 #else
6524 	un->un_retry_count = SD_RETRY_COUNT;
6525 #endif
6526 
6527 	/*
6528 	 * Set the per disk retry count to the default number of retries
6529 	 * for disks and CDROMs. This value can be overridden by the
6530 	 * disk property list or an entry in sd.conf.
6531 	 */
6532 	un->un_notready_retry_count =
6533 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
6534 			: DISK_NOT_READY_RETRY_COUNT(un);
6535 
6536 	/*
6537 	 * Set the busy retry count to the default value of un_retry_count.
6538 	 * This can be overridden by entries in sd.conf or the device
6539 	 * config table.
6540 	 */
6541 	un->un_busy_retry_count = un->un_retry_count;
6542 
6543 	/*
6544 	 * Init the reset threshold for retries.  This number determines
6545 	 * how many retries must be performed before a reset can be issued
6546 	 * (for certain error conditions). This can be overridden by entries
6547 	 * in sd.conf or the device config table.
6548 	 */
6549 	un->un_reset_retry_count = (un->un_retry_count / 2);
6550 
6551 	/*
6552 	 * Set the victim_retry_count to the default un_retry_count
6553 	 */
6554 	un->un_victim_retry_count = (2 * un->un_retry_count);
6555 
6556 	/*
6557 	 * Set the reservation release timeout to the default value of
6558 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
6559 	 * device config table.
6560 	 */
6561 	un->un_reserve_release_time = 5;
6562 
6563 	/*
6564 	 * Set up the default maximum transfer size. Note that this may
6565 	 * get updated later in the attach, when setting up default wide
6566 	 * operations for disks.
6567 	 */
6568 #if defined(__i386) || defined(__amd64)
6569 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
6570 #else
6571 	un->un_max_xfer_size = (uint_t)maxphys;
6572 #endif
6573 
6574 	/*
6575 	 * Get "allow bus device reset" property (defaults to "enabled" if
6576 	 * the property was not defined). This is to disable bus resets for
6577 	 * certain kinds of error recovery. Note: In the future when a run-time
6578 	 * fibre check is available the soft state flag should default to
6579 	 * enabled.
6580 	 */
6581 	if (un->un_f_is_fibre == TRUE) {
6582 		un->un_f_allow_bus_device_reset = TRUE;
6583 	} else {
6584 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
6585 			"allow-bus-device-reset", 1) != 0) {
6586 			un->un_f_allow_bus_device_reset = TRUE;
6587 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6588 			"sd_unit_attach: un:0x%p Bus device reset enabled\n",
6589 				un);
6590 		} else {
6591 			un->un_f_allow_bus_device_reset = FALSE;
6592 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6593 			"sd_unit_attach: un:0x%p Bus device reset disabled\n",
6594 				un);
6595 		}
6596 	}
6597 
6598 	/*
6599 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
6600 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
6601 	 *
6602 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
6603 	 * property. The new "variant" property with a value of "atapi" has been
6604 	 * introduced so that future 'variants' of standard SCSI behavior (like
6605 	 * atapi) could be specified by the underlying HBA drivers by supplying
6606 	 * a new value for the "variant" property, instead of having to define a
6607 	 * new property.
6608 	 */
6609 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
6610 		un->un_f_cfg_is_atapi = TRUE;
6611 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6612 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
6613 	}
6614 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
6615 	    &variantp) == DDI_PROP_SUCCESS) {
6616 		if (strcmp(variantp, "atapi") == 0) {
6617 			un->un_f_cfg_is_atapi = TRUE;
6618 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6619 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
6620 		}
6621 		ddi_prop_free(variantp);
6622 	}
6623 
6624 	un->un_cmd_timeout	= SD_IO_TIME;
6625 
6626 	/* Info on current states, statuses, etc. (Updated frequently) */
6627 	un->un_state		= SD_STATE_NORMAL;
6628 	un->un_last_state	= SD_STATE_NORMAL;
6629 
6630 	/* Control & status info for command throttling */
6631 	un->un_throttle		= sd_max_throttle;
6632 	un->un_saved_throttle	= sd_max_throttle;
6633 	un->un_min_throttle	= sd_min_throttle;
6634 
6635 	if (un->un_f_is_fibre == TRUE) {
6636 		un->un_f_use_adaptive_throttle = TRUE;
6637 	} else {
6638 		un->un_f_use_adaptive_throttle = FALSE;
6639 	}
6640 
6641 	/* Removable media support. */
6642 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
6643 	un->un_mediastate		= DKIO_NONE;
6644 	un->un_specified_mediastate	= DKIO_NONE;
6645 
6646 	/* CVs for suspend/resume (PM or DR) */
6647 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
6648 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
6649 
6650 	/* Power management support. */
6651 	un->un_power_level = SD_SPINDLE_UNINIT;
6652 
6653 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
6654 	un->un_f_wcc_inprog = 0;
6655 
6656 	/*
6657 	 * The open/close semaphore is used to serialize threads executing
6658 	 * in the driver's open & close entry point routines for a given
6659 	 * instance.
6660 	 */
6661 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
6662 
6663 	/*
6664 	 * The conf file entry and softstate variable is a forceful override,
6665 	 * meaning a non-zero value must be entered to change the default.
6666 	 */
6667 	un->un_f_disksort_disabled = FALSE;
6668 
6669 	/*
6670 	 * Retrieve the properties from the static driver table or the driver
6671 	 * configuration file (.conf) for this unit and update the soft state
6672 	 * for the device as needed for the indicated properties.
6673 	 * Note: the property configuration needs to occur here as some of the
6674 	 * following routines may have dependancies on soft state flags set
6675 	 * as part of the driver property configuration.
6676 	 */
6677 	sd_read_unit_properties(un);
6678 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6679 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
6680 
6681 	/*
6682 	 * Only if a device has "hotpluggable" property, it is
6683 	 * treated as hotpluggable device. Otherwise, it is
6684 	 * regarded as non-hotpluggable one.
6685 	 */
6686 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
6687 	    -1) != -1) {
6688 		un->un_f_is_hotpluggable = TRUE;
6689 	}
6690 
6691 	/*
6692 	 * set unit's attributes(flags) according to "hotpluggable" and
6693 	 * RMB bit in INQUIRY data.
6694 	 */
6695 	sd_set_unit_attributes(un, devi);
6696 
6697 	/*
6698 	 * By default, we mark the capacity, lbasize, and geometry
6699 	 * as invalid. Only if we successfully read a valid capacity
6700 	 * will we update the un_blockcount and un_tgt_blocksize with the
6701 	 * valid values (the geometry will be validated later).
6702 	 */
6703 	un->un_f_blockcount_is_valid	= FALSE;
6704 	un->un_f_tgt_blocksize_is_valid	= FALSE;
6705 
6706 	/*
6707 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
6708 	 * otherwise.
6709 	 */
6710 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
6711 	un->un_blockcount = 0;
6712 
6713 	/*
6714 	 * Set up the per-instance info needed to determine the correct
6715 	 * CDBs and other info for issuing commands to the target.
6716 	 */
6717 	sd_init_cdb_limits(un);
6718 
6719 	/*
6720 	 * Set up the IO chains to use, based upon the target type.
6721 	 */
6722 	if (un->un_f_non_devbsize_supported) {
6723 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
6724 	} else {
6725 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
6726 	}
6727 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
6728 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
6729 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
6730 
6731 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
6732 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
6733 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
6734 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
6735 
6736 
6737 	if (ISCD(un)) {
6738 		un->un_additional_codes = sd_additional_codes;
6739 	} else {
6740 		un->un_additional_codes = NULL;
6741 	}
6742 
6743 	/*
6744 	 * Create the kstats here so they can be available for attach-time
6745 	 * routines that send commands to the unit (either polled or via
6746 	 * sd_send_scsi_cmd).
6747 	 *
6748 	 * Note: This is a critical sequence that needs to be maintained:
6749 	 *	1) Instantiate the kstats here, before any routines using the
6750 	 *	   iopath (i.e. sd_send_scsi_cmd).
6751 	 *	2) Instantiate and initialize the partition stats
6752 	 *	   (sd_set_pstats).
6753 	 *	3) Initialize the error stats (sd_set_errstats), following
6754 	 *	   sd_validate_geometry(),sd_register_devid(),
6755 	 *	   and sd_cache_control().
6756 	 */
6757 
6758 	un->un_stats = kstat_create(sd_label, instance,
6759 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
6760 	if (un->un_stats != NULL) {
6761 		un->un_stats->ks_lock = SD_MUTEX(un);
6762 		kstat_install(un->un_stats);
6763 	}
6764 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6765 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
6766 
6767 	sd_create_errstats(un, instance);
6768 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6769 	    "sd_unit_attach: un:0x%p errstats created\n", un);
6770 
6771 	/*
6772 	 * The following if/else code was relocated here from below as part
6773 	 * of the fix for bug (4430280). However with the default setup added
6774 	 * on entry to this routine, it's no longer absolutely necessary for
6775 	 * this to be before the call to sd_spin_up_unit.
6776 	 */
6777 	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
6778 		/*
6779 		 * If SCSI-2 tagged queueing is supported by the target
6780 		 * and by the host adapter then we will enable it.
6781 		 */
6782 		un->un_tagflags = 0;
6783 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
6784 		    (devp->sd_inq->inq_cmdque) &&
6785 		    (un->un_f_arq_enabled == TRUE)) {
6786 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
6787 			    1, 1) == 1) {
6788 				un->un_tagflags = FLAG_STAG;
6789 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6790 				    "sd_unit_attach: un:0x%p tag queueing "
6791 				    "enabled\n", un);
6792 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
6793 			    "untagged-qing", 0) == 1) {
6794 				un->un_f_opt_queueing = TRUE;
6795 				un->un_saved_throttle = un->un_throttle =
6796 				    min(un->un_throttle, 3);
6797 			} else {
6798 				un->un_f_opt_queueing = FALSE;
6799 				un->un_saved_throttle = un->un_throttle = 1;
6800 			}
6801 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
6802 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
6803 			/* The Host Adapter supports internal queueing. */
6804 			un->un_f_opt_queueing = TRUE;
6805 			un->un_saved_throttle = un->un_throttle =
6806 			    min(un->un_throttle, 3);
6807 		} else {
6808 			un->un_f_opt_queueing = FALSE;
6809 			un->un_saved_throttle = un->un_throttle = 1;
6810 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6811 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
6812 		}
6813 
6814 		/*
6815 		 * Enable large transfers for SATA/SAS drives
6816 		 */
6817 		if (SD_IS_SERIAL(un)) {
6818 			un->un_max_xfer_size =
6819 			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
6820 			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
6821 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6822 			    "sd_unit_attach: un:0x%p max transfer "
6823 			    "size=0x%x\n", un, un->un_max_xfer_size);
6824 
6825 		}
6826 
6827 		/* Setup or tear down default wide operations for disks */
6828 
6829 		/*
6830 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
6831 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
6832 		 * system and be set to different values. In the future this
6833 		 * code may need to be updated when the ssd module is
6834 		 * obsoleted and removed from the system. (4299588)
6835 		 */
6836 		if (SD_IS_PARALLEL_SCSI(un) &&
6837 		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
6838 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
6839 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
6840 			    1, 1) == 1) {
6841 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6842 				    "sd_unit_attach: un:0x%p Wide Transfer "
6843 				    "enabled\n", un);
6844 			}
6845 
6846 			/*
6847 			 * If tagged queuing has also been enabled, then
6848 			 * enable large xfers
6849 			 */
6850 			if (un->un_saved_throttle == sd_max_throttle) {
6851 				un->un_max_xfer_size =
6852 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
6853 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
6854 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6855 				    "sd_unit_attach: un:0x%p max transfer "
6856 				    "size=0x%x\n", un, un->un_max_xfer_size);
6857 			}
6858 		} else {
6859 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
6860 			    0, 1) == 1) {
6861 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6862 				    "sd_unit_attach: un:0x%p "
6863 				    "Wide Transfer disabled\n", un);
6864 			}
6865 		}
6866 	} else {
6867 		un->un_tagflags = FLAG_STAG;
6868 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
6869 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
6870 	}
6871 
6872 	/*
6873 	 * If this target supports LUN reset, try to enable it.
6874 	 */
6875 	if (un->un_f_lun_reset_enabled) {
6876 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
6877 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
6878 			    "un:0x%p lun_reset capability set\n", un);
6879 		} else {
6880 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
6881 			    "un:0x%p lun-reset capability not set\n", un);
6882 		}
6883 	}
6884 
6885 	/*
6886 	 * At this point in the attach, we have enough info in the
6887 	 * soft state to be able to issue commands to the target.
6888 	 *
6889 	 * All command paths used below MUST issue their commands as
6890 	 * SD_PATH_DIRECT. This is important as intermediate layers
6891 	 * are not all initialized yet (such as PM).
6892 	 */
6893 
6894 	/*
6895 	 * Send a TEST UNIT READY command to the device. This should clear
6896 	 * any outstanding UNIT ATTENTION that may be present.
6897 	 *
6898 	 * Note: Don't check for success, just track if there is a reservation,
6899 	 * this is a throw away command to clear any unit attentions.
6900 	 *
6901 	 * Note: This MUST be the first command issued to the target during
6902 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
6903 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
6904 	 * with attempts at spinning up a device with no media.
6905 	 */
6906 	if (sd_send_scsi_TEST_UNIT_READY(un, SD_DONT_RETRY_TUR) == EACCES) {
6907 		reservation_flag = SD_TARGET_IS_RESERVED;
6908 	}
6909 
6910 	/*
6911 	 * If the device is NOT a removable media device, attempt to spin
6912 	 * it up (using the START_STOP_UNIT command) and read its capacity
6913 	 * (using the READ CAPACITY command).  Note, however, that either
6914 	 * of these could fail and in some cases we would continue with
6915 	 * the attach despite the failure (see below).
6916 	 */
6917 	if (un->un_f_descr_format_supported) {
6918 		switch (sd_spin_up_unit(un)) {
6919 		case 0:
6920 			/*
6921 			 * Spin-up was successful; now try to read the
6922 			 * capacity.  If successful then save the results
6923 			 * and mark the capacity & lbasize as valid.
6924 			 */
6925 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6926 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
6927 
6928 			switch (sd_send_scsi_READ_CAPACITY(un, &capacity,
6929 			    &lbasize, SD_PATH_DIRECT)) {
6930 			case 0: {
6931 				if (capacity > DK_MAX_BLOCKS) {
6932 #ifdef _LP64
6933 					if (capacity + 1 >
6934 					    SD_GROUP1_MAX_ADDRESS) {
6935 						/*
6936 						 * Enable descriptor format
6937 						 * sense data so that we can
6938 						 * get 64 bit sense data
6939 						 * fields.
6940 						 */
6941 						sd_enable_descr_sense(un);
6942 					}
6943 #else
6944 					/* 32-bit kernels can't handle this */
6945 					scsi_log(SD_DEVINFO(un),
6946 					    sd_label, CE_WARN,
6947 					    "disk has %llu blocks, which "
6948 					    "is too large for a 32-bit "
6949 					    "kernel", capacity);
6950 
6951 #if defined(__i386) || defined(__amd64)
6952 					/*
6953 					 * 1TB disk was treated as (1T - 512)B
6954 					 * in the past, so that it might have
6955 					 * valid VTOC and solaris partitions,
6956 					 * we have to allow it to continue to
6957 					 * work.
6958 					 */
6959 					if (capacity -1 > DK_MAX_BLOCKS)
6960 #endif
6961 					goto spinup_failed;
6962 #endif
6963 				}
6964 
6965 				/*
6966 				 * Here it's not necessary to check the case:
6967 				 * the capacity of the device is bigger than
6968 				 * what the max hba cdb can support. Because
6969 				 * sd_send_scsi_READ_CAPACITY will retrieve
6970 				 * the capacity by sending USCSI command, which
6971 				 * is constrained by the max hba cdb. Actually,
6972 				 * sd_send_scsi_READ_CAPACITY will return
6973 				 * EINVAL when using bigger cdb than required
6974 				 * cdb length. Will handle this case in
6975 				 * "case EINVAL".
6976 				 */
6977 
6978 				/*
6979 				 * The following relies on
6980 				 * sd_send_scsi_READ_CAPACITY never
6981 				 * returning 0 for capacity and/or lbasize.
6982 				 */
6983 				sd_update_block_info(un, lbasize, capacity);
6984 
6985 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6986 				    "sd_unit_attach: un:0x%p capacity = %ld "
6987 				    "blocks; lbasize= %ld.\n", un,
6988 				    un->un_blockcount, un->un_tgt_blocksize);
6989 
6990 				break;
6991 			}
6992 			case EINVAL:
6993 				/*
6994 				 * In the case where the max-cdb-length property
6995 				 * is smaller than the required CDB length for
6996 				 * a SCSI device, a target driver can fail to
6997 				 * attach to that device.
6998 				 */
6999 				scsi_log(SD_DEVINFO(un),
7000 				    sd_label, CE_WARN,
7001 				    "disk capacity is too large "
7002 				    "for current cdb length");
7003 				goto spinup_failed;
7004 			case EACCES:
7005 				/*
7006 				 * Should never get here if the spin-up
7007 				 * succeeded, but code it in anyway.
7008 				 * From here, just continue with the attach...
7009 				 */
7010 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7011 				    "sd_unit_attach: un:0x%p "
7012 				    "sd_send_scsi_READ_CAPACITY "
7013 				    "returned reservation conflict\n", un);
7014 				reservation_flag = SD_TARGET_IS_RESERVED;
7015 				break;
7016 			default:
7017 				/*
7018 				 * Likewise, should never get here if the
7019 				 * spin-up succeeded. Just continue with
7020 				 * the attach...
7021 				 */
7022 				break;
7023 			}
7024 			break;
7025 		case EACCES:
7026 			/*
7027 			 * Device is reserved by another host.  In this case
7028 			 * we could not spin it up or read the capacity, but
7029 			 * we continue with the attach anyway.
7030 			 */
7031 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7032 			    "sd_unit_attach: un:0x%p spin-up reservation "
7033 			    "conflict.\n", un);
7034 			reservation_flag = SD_TARGET_IS_RESERVED;
7035 			break;
7036 		default:
7037 			/* Fail the attach if the spin-up failed. */
7038 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7039 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
7040 			goto spinup_failed;
7041 		}
7042 	}
7043 
7044 	/*
7045 	 * Check to see if this is a MMC drive
7046 	 */
7047 	if (ISCD(un)) {
7048 		sd_set_mmc_caps(un);
7049 	}
7050 
7051 
7052 	/*
7053 	 * Add a zero-length attribute to tell the world we support
7054 	 * kernel ioctls (for layered drivers)
7055 	 */
7056 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7057 	    DDI_KERNEL_IOCTL, NULL, 0);
7058 
7059 	/*
7060 	 * Add a boolean property to tell the world we support
7061 	 * the B_FAILFAST flag (for layered drivers)
7062 	 */
7063 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7064 	    "ddi-failfast-supported", NULL, 0);
7065 
7066 	/*
7067 	 * Initialize power management
7068 	 */
7069 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
7070 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
7071 	sd_setup_pm(un, devi);
7072 	if (un->un_f_pm_is_enabled == FALSE) {
7073 		/*
7074 		 * For performance, point to a jump table that does
7075 		 * not include pm.
7076 		 * The direct and priority chains don't change with PM.
7077 		 *
7078 		 * Note: this is currently done based on individual device
7079 		 * capabilities. When an interface for determining system
7080 		 * power enabled state becomes available, or when additional
7081 		 * layers are added to the command chain, these values will
7082 		 * have to be re-evaluated for correctness.
7083 		 */
7084 		if (un->un_f_non_devbsize_supported) {
7085 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
7086 		} else {
7087 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
7088 		}
7089 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
7090 	}
7091 
7092 	/*
7093 	 * This property is set to 0 by HA software to avoid retries
7094 	 * on a reserved disk. (The preferred property name is
7095 	 * "retry-on-reservation-conflict") (1189689)
7096 	 *
7097 	 * Note: The use of a global here can have unintended consequences. A
7098 	 * per instance variable is preferrable to match the capabilities of
7099 	 * different underlying hba's (4402600)
7100 	 */
7101 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
7102 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
7103 	    sd_retry_on_reservation_conflict);
7104 	if (sd_retry_on_reservation_conflict != 0) {
7105 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
7106 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
7107 		    sd_retry_on_reservation_conflict);
7108 	}
7109 
7110 	/* Set up options for QFULL handling. */
7111 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7112 	    "qfull-retries", -1)) != -1) {
7113 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
7114 		    rval, 1);
7115 	}
7116 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7117 	    "qfull-retry-interval", -1)) != -1) {
7118 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
7119 		    rval, 1);
7120 	}
7121 
7122 	/*
7123 	 * This just prints a message that announces the existence of the
7124 	 * device. The message is always printed in the system logfile, but
7125 	 * only appears on the console if the system is booted with the
7126 	 * -v (verbose) argument.
7127 	 */
7128 	ddi_report_dev(devi);
7129 
7130 	un->un_mediastate = DKIO_NONE;
7131 
7132 	cmlb_alloc_handle(&un->un_cmlbhandle);
7133 
7134 #if defined(__i386) || defined(__amd64)
7135 	/*
7136 	 * On x86, compensate for off-by-1 legacy error
7137 	 */
7138 	if (!un->un_f_has_removable_media && !un->un_f_is_hotpluggable &&
7139 	    (lbasize == un->un_sys_blocksize))
7140 		offbyone = CMLB_OFF_BY_ONE;
7141 #endif
7142 
7143 	if (cmlb_attach(devi, &sd_tgops, (int)devp->sd_inq->inq_dtype,
7144 	    un->un_f_has_removable_media, un->un_f_is_hotpluggable,
7145 	    un->un_node_type, offbyone, un->un_cmlbhandle,
7146 	    (void *)SD_PATH_DIRECT) != 0) {
7147 		goto cmlb_attach_failed;
7148 	}
7149 
7150 
7151 	/*
7152 	 * Read and validate the device's geometry (ie, disk label)
7153 	 * A new unformatted drive will not have a valid geometry, but
7154 	 * the driver needs to successfully attach to this device so
7155 	 * the drive can be formatted via ioctls.
7156 	 */
7157 	geom_label_valid = (cmlb_validate(un->un_cmlbhandle, 0,
7158 	    (void *)SD_PATH_DIRECT) == 0) ? 1: 0;
7159 
7160 	mutex_enter(SD_MUTEX(un));
7161 
7162 	/*
7163 	 * Read and initialize the devid for the unit.
7164 	 */
7165 	ASSERT(un->un_errstats != NULL);
7166 	if (un->un_f_devid_supported) {
7167 		sd_register_devid(un, devi, reservation_flag);
7168 	}
7169 	mutex_exit(SD_MUTEX(un));
7170 
7171 #if (defined(__fibre))
7172 	/*
7173 	 * Register callbacks for fibre only.  You can't do this soley
7174 	 * on the basis of the devid_type because this is hba specific.
7175 	 * We need to query our hba capabilities to find out whether to
7176 	 * register or not.
7177 	 */
7178 	if (un->un_f_is_fibre) {
7179 	    if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
7180 		sd_init_event_callbacks(un);
7181 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7182 		    "sd_unit_attach: un:0x%p event callbacks inserted", un);
7183 	    }
7184 	}
7185 #endif
7186 
7187 	if (un->un_f_opt_disable_cache == TRUE) {
7188 		/*
7189 		 * Disable both read cache and write cache.  This is
7190 		 * the historic behavior of the keywords in the config file.
7191 		 */
7192 		if (sd_cache_control(un, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
7193 		    0) {
7194 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7195 			    "sd_unit_attach: un:0x%p Could not disable "
7196 			    "caching", un);
7197 			goto devid_failed;
7198 		}
7199 	}
7200 
7201 	/*
7202 	 * Check the value of the WCE bit now and
7203 	 * set un_f_write_cache_enabled accordingly.
7204 	 */
7205 	(void) sd_get_write_cache_enabled(un, &wc_enabled);
7206 	mutex_enter(SD_MUTEX(un));
7207 	un->un_f_write_cache_enabled = (wc_enabled != 0);
7208 	mutex_exit(SD_MUTEX(un));
7209 
7210 	/*
7211 	 * Set the pstat and error stat values here, so data obtained during the
7212 	 * previous attach-time routines is available.
7213 	 *
7214 	 * Note: This is a critical sequence that needs to be maintained:
7215 	 *	1) Instantiate the kstats before any routines using the iopath
7216 	 *	   (i.e. sd_send_scsi_cmd).
7217 	 *	2) Initialize the error stats (sd_set_errstats) and partition
7218 	 *	   stats (sd_set_pstats)here, following
7219 	 *	   cmlb_validate_geometry(), sd_register_devid(), and
7220 	 *	   sd_cache_control().
7221 	 */
7222 
7223 	if (un->un_f_pkstats_enabled && geom_label_valid) {
7224 		sd_set_pstats(un);
7225 		SD_TRACE(SD_LOG_IO_PARTITION, un,
7226 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
7227 	}
7228 
7229 	sd_set_errstats(un);
7230 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7231 	    "sd_unit_attach: un:0x%p errstats set\n", un);
7232 
7233 	/*
7234 	 * Find out what type of reservation this disk supports.
7235 	 */
7236 	switch (sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS, 0, NULL)) {
7237 	case 0:
7238 		/*
7239 		 * SCSI-3 reservations are supported.
7240 		 */
7241 		un->un_reservation_type = SD_SCSI3_RESERVATION;
7242 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7243 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
7244 		break;
7245 	case ENOTSUP:
7246 		/*
7247 		 * The PERSISTENT RESERVE IN command would not be recognized by
7248 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
7249 		 */
7250 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7251 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
7252 		un->un_reservation_type = SD_SCSI2_RESERVATION;
7253 		break;
7254 	default:
7255 		/*
7256 		 * default to SCSI-3 reservations
7257 		 */
7258 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7259 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
7260 		un->un_reservation_type = SD_SCSI3_RESERVATION;
7261 		break;
7262 	}
7263 
7264 	/*
7265 	 * After successfully attaching an instance, we record the information
7266 	 * of how many luns have been attached on the relative target and
7267 	 * controller for parallel SCSI. This information is used when sd tries
7268 	 * to set the tagged queuing capability in HBA.
7269 	 */
7270 	if (SD_IS_PARALLEL_SCSI(un) && (tgt >= 0) && (tgt < NTARGETS_WIDE)) {
7271 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_ATTACH);
7272 	}
7273 
7274 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7275 	    "sd_unit_attach: un:0x%p exit success\n", un);
7276 
7277 	return (DDI_SUCCESS);
7278 
7279 	/*
7280 	 * An error occurred during the attach; clean up & return failure.
7281 	 */
7282 
7283 devid_failed:
7284 
7285 setup_pm_failed:
7286 	ddi_remove_minor_node(devi, NULL);
7287 
7288 cmlb_attach_failed:
7289 	/*
7290 	 * Cleanup from the scsi_ifsetcap() calls (437868)
7291 	 */
7292 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
7293 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
7294 
7295 	/*
7296 	 * Refer to the comments of setting tagged-qing in the beginning of
7297 	 * sd_unit_attach. We can only disable tagged queuing when there is
7298 	 * no lun attached on the target.
7299 	 */
7300 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
7301 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
7302 	}
7303 
7304 	if (un->un_f_is_fibre == FALSE) {
7305 	    (void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
7306 	}
7307 
7308 spinup_failed:
7309 
7310 	mutex_enter(SD_MUTEX(un));
7311 
7312 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
7313 	if (un->un_direct_priority_timeid != NULL) {
7314 		timeout_id_t temp_id = un->un_direct_priority_timeid;
7315 		un->un_direct_priority_timeid = NULL;
7316 		mutex_exit(SD_MUTEX(un));
7317 		(void) untimeout(temp_id);
7318 		mutex_enter(SD_MUTEX(un));
7319 	}
7320 
7321 	/* Cancel any pending start/stop timeouts */
7322 	if (un->un_startstop_timeid != NULL) {
7323 		timeout_id_t temp_id = un->un_startstop_timeid;
7324 		un->un_startstop_timeid = NULL;
7325 		mutex_exit(SD_MUTEX(un));
7326 		(void) untimeout(temp_id);
7327 		mutex_enter(SD_MUTEX(un));
7328 	}
7329 
7330 	/* Cancel any pending reset-throttle timeouts */
7331 	if (un->un_reset_throttle_timeid != NULL) {
7332 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
7333 		un->un_reset_throttle_timeid = NULL;
7334 		mutex_exit(SD_MUTEX(un));
7335 		(void) untimeout(temp_id);
7336 		mutex_enter(SD_MUTEX(un));
7337 	}
7338 
7339 	/* Cancel any pending retry timeouts */
7340 	if (un->un_retry_timeid != NULL) {
7341 		timeout_id_t temp_id = un->un_retry_timeid;
7342 		un->un_retry_timeid = NULL;
7343 		mutex_exit(SD_MUTEX(un));
7344 		(void) untimeout(temp_id);
7345 		mutex_enter(SD_MUTEX(un));
7346 	}
7347 
7348 	/* Cancel any pending delayed cv broadcast timeouts */
7349 	if (un->un_dcvb_timeid != NULL) {
7350 		timeout_id_t temp_id = un->un_dcvb_timeid;
7351 		un->un_dcvb_timeid = NULL;
7352 		mutex_exit(SD_MUTEX(un));
7353 		(void) untimeout(temp_id);
7354 		mutex_enter(SD_MUTEX(un));
7355 	}
7356 
7357 	mutex_exit(SD_MUTEX(un));
7358 
7359 	/* There should not be any in-progress I/O so ASSERT this check */
7360 	ASSERT(un->un_ncmds_in_transport == 0);
7361 	ASSERT(un->un_ncmds_in_driver == 0);
7362 
7363 	/* Do not free the softstate if the callback routine is active */
7364 	sd_sync_with_callback(un);
7365 
7366 	/*
7367 	 * Partition stats apparently are not used with removables. These would
7368 	 * not have been created during attach, so no need to clean them up...
7369 	 */
7370 	if (un->un_stats != NULL) {
7371 		kstat_delete(un->un_stats);
7372 		un->un_stats = NULL;
7373 	}
7374 	if (un->un_errstats != NULL) {
7375 		kstat_delete(un->un_errstats);
7376 		un->un_errstats = NULL;
7377 	}
7378 
7379 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
7380 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
7381 
7382 	ddi_prop_remove_all(devi);
7383 	sema_destroy(&un->un_semoclose);
7384 	cv_destroy(&un->un_state_cv);
7385 
7386 getrbuf_failed:
7387 
7388 	sd_free_rqs(un);
7389 
7390 alloc_rqs_failed:
7391 
7392 	devp->sd_private = NULL;
7393 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
7394 
7395 get_softstate_failed:
7396 	/*
7397 	 * Note: the man pages are unclear as to whether or not doing a
7398 	 * ddi_soft_state_free(sd_state, instance) is the right way to
7399 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
7400 	 * ddi_get_soft_state() fails.  The implication seems to be
7401 	 * that the get_soft_state cannot fail if the zalloc succeeds.
7402 	 */
7403 	ddi_soft_state_free(sd_state, instance);
7404 
7405 probe_failed:
7406 	scsi_unprobe(devp);
7407 #ifdef SDDEBUG
7408 	if ((sd_component_mask & SD_LOG_ATTACH_DETACH) &&
7409 	    (sd_level_mask & SD_LOGMASK_TRACE)) {
7410 		cmn_err(CE_CONT, "sd_unit_attach: un:0x%p exit failure\n",
7411 		    (void *)un);
7412 	}
7413 #endif
7414 	return (DDI_FAILURE);
7415 }
7416 
7417 
7418 /*
7419  *    Function: sd_unit_detach
7420  *
7421  * Description: Performs DDI_DETACH processing for sddetach().
7422  *
7423  * Return Code: DDI_SUCCESS
7424  *		DDI_FAILURE
7425  *
7426  *     Context: Kernel thread context
7427  */
7428 
7429 static int
7430 sd_unit_detach(dev_info_t *devi)
7431 {
7432 	struct scsi_device	*devp;
7433 	struct sd_lun		*un;
7434 	int			i;
7435 	int			tgt;
7436 	dev_t			dev;
7437 	dev_info_t		*pdip = ddi_get_parent(devi);
7438 	int			instance = ddi_get_instance(devi);
7439 
7440 	mutex_enter(&sd_detach_mutex);
7441 
7442 	/*
7443 	 * Fail the detach for any of the following:
7444 	 *  - Unable to get the sd_lun struct for the instance
7445 	 *  - A layered driver has an outstanding open on the instance
7446 	 *  - Another thread is already detaching this instance
7447 	 *  - Another thread is currently performing an open
7448 	 */
7449 	devp = ddi_get_driver_private(devi);
7450 	if ((devp == NULL) ||
7451 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
7452 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
7453 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
7454 		mutex_exit(&sd_detach_mutex);
7455 		return (DDI_FAILURE);
7456 	}
7457 
7458 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
7459 
7460 	/*
7461 	 * Mark this instance as currently in a detach, to inhibit any
7462 	 * opens from a layered driver.
7463 	 */
7464 	un->un_detach_count++;
7465 	mutex_exit(&sd_detach_mutex);
7466 
7467 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7468 	    SCSI_ADDR_PROP_TARGET, -1);
7469 
7470 	dev = sd_make_device(SD_DEVINFO(un));
7471 
7472 #ifndef lint
7473 	_NOTE(COMPETING_THREADS_NOW);
7474 #endif
7475 
7476 	mutex_enter(SD_MUTEX(un));
7477 
7478 	/*
7479 	 * Fail the detach if there are any outstanding layered
7480 	 * opens on this device.
7481 	 */
7482 	for (i = 0; i < NDKMAP; i++) {
7483 		if (un->un_ocmap.lyropen[i] != 0) {
7484 			goto err_notclosed;
7485 		}
7486 	}
7487 
7488 	/*
7489 	 * Verify there are NO outstanding commands issued to this device.
7490 	 * ie, un_ncmds_in_transport == 0.
7491 	 * It's possible to have outstanding commands through the physio
7492 	 * code path, even though everything's closed.
7493 	 */
7494 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
7495 	    (un->un_direct_priority_timeid != NULL) ||
7496 	    (un->un_state == SD_STATE_RWAIT)) {
7497 		mutex_exit(SD_MUTEX(un));
7498 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7499 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
7500 		goto err_stillbusy;
7501 	}
7502 
7503 	/*
7504 	 * If we have the device reserved, release the reservation.
7505 	 */
7506 	if ((un->un_resvd_status & SD_RESERVE) &&
7507 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
7508 		mutex_exit(SD_MUTEX(un));
7509 		/*
7510 		 * Note: sd_reserve_release sends a command to the device
7511 		 * via the sd_ioctlcmd() path, and can sleep.
7512 		 */
7513 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
7514 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7515 			    "sd_dr_detach: Cannot release reservation \n");
7516 		}
7517 	} else {
7518 		mutex_exit(SD_MUTEX(un));
7519 	}
7520 
7521 	/*
7522 	 * Untimeout any reserve recover, throttle reset, restart unit
7523 	 * and delayed broadcast timeout threads. Protect the timeout pointer
7524 	 * from getting nulled by their callback functions.
7525 	 */
7526 	mutex_enter(SD_MUTEX(un));
7527 	if (un->un_resvd_timeid != NULL) {
7528 		timeout_id_t temp_id = un->un_resvd_timeid;
7529 		un->un_resvd_timeid = NULL;
7530 		mutex_exit(SD_MUTEX(un));
7531 		(void) untimeout(temp_id);
7532 		mutex_enter(SD_MUTEX(un));
7533 	}
7534 
7535 	if (un->un_reset_throttle_timeid != NULL) {
7536 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
7537 		un->un_reset_throttle_timeid = NULL;
7538 		mutex_exit(SD_MUTEX(un));
7539 		(void) untimeout(temp_id);
7540 		mutex_enter(SD_MUTEX(un));
7541 	}
7542 
7543 	if (un->un_startstop_timeid != NULL) {
7544 		timeout_id_t temp_id = un->un_startstop_timeid;
7545 		un->un_startstop_timeid = NULL;
7546 		mutex_exit(SD_MUTEX(un));
7547 		(void) untimeout(temp_id);
7548 		mutex_enter(SD_MUTEX(un));
7549 	}
7550 
7551 	if (un->un_dcvb_timeid != NULL) {
7552 		timeout_id_t temp_id = un->un_dcvb_timeid;
7553 		un->un_dcvb_timeid = NULL;
7554 		mutex_exit(SD_MUTEX(un));
7555 		(void) untimeout(temp_id);
7556 	} else {
7557 		mutex_exit(SD_MUTEX(un));
7558 	}
7559 
7560 	/* Remove any pending reservation reclaim requests for this device */
7561 	sd_rmv_resv_reclaim_req(dev);
7562 
7563 	mutex_enter(SD_MUTEX(un));
7564 
7565 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
7566 	if (un->un_direct_priority_timeid != NULL) {
7567 		timeout_id_t temp_id = un->un_direct_priority_timeid;
7568 		un->un_direct_priority_timeid = NULL;
7569 		mutex_exit(SD_MUTEX(un));
7570 		(void) untimeout(temp_id);
7571 		mutex_enter(SD_MUTEX(un));
7572 	}
7573 
7574 	/* Cancel any active multi-host disk watch thread requests */
7575 	if (un->un_mhd_token != NULL) {
7576 		mutex_exit(SD_MUTEX(un));
7577 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
7578 		if (scsi_watch_request_terminate(un->un_mhd_token,
7579 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
7580 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7581 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
7582 			/*
7583 			 * Note: We are returning here after having removed
7584 			 * some driver timeouts above. This is consistent with
7585 			 * the legacy implementation but perhaps the watch
7586 			 * terminate call should be made with the wait flag set.
7587 			 */
7588 			goto err_stillbusy;
7589 		}
7590 		mutex_enter(SD_MUTEX(un));
7591 		un->un_mhd_token = NULL;
7592 	}
7593 
7594 	if (un->un_swr_token != NULL) {
7595 		mutex_exit(SD_MUTEX(un));
7596 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
7597 		if (scsi_watch_request_terminate(un->un_swr_token,
7598 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
7599 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7600 			    "sd_dr_detach: Cannot cancel swr watch request\n");
7601 			/*
7602 			 * Note: We are returning here after having removed
7603 			 * some driver timeouts above. This is consistent with
7604 			 * the legacy implementation but perhaps the watch
7605 			 * terminate call should be made with the wait flag set.
7606 			 */
7607 			goto err_stillbusy;
7608 		}
7609 		mutex_enter(SD_MUTEX(un));
7610 		un->un_swr_token = NULL;
7611 	}
7612 
7613 	mutex_exit(SD_MUTEX(un));
7614 
7615 	/*
7616 	 * Clear any scsi_reset_notifies. We clear the reset notifies
7617 	 * if we have not registered one.
7618 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
7619 	 */
7620 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
7621 	    sd_mhd_reset_notify_cb, (caddr_t)un);
7622 
7623 	/*
7624 	 * protect the timeout pointers from getting nulled by
7625 	 * their callback functions during the cancellation process.
7626 	 * In such a scenario untimeout can be invoked with a null value.
7627 	 */
7628 	_NOTE(NO_COMPETING_THREADS_NOW);
7629 
7630 	mutex_enter(&un->un_pm_mutex);
7631 	if (un->un_pm_idle_timeid != NULL) {
7632 		timeout_id_t temp_id = un->un_pm_idle_timeid;
7633 		un->un_pm_idle_timeid = NULL;
7634 		mutex_exit(&un->un_pm_mutex);
7635 
7636 		/*
7637 		 * Timeout is active; cancel it.
7638 		 * Note that it'll never be active on a device
7639 		 * that does not support PM therefore we don't
7640 		 * have to check before calling pm_idle_component.
7641 		 */
7642 		(void) untimeout(temp_id);
7643 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7644 		mutex_enter(&un->un_pm_mutex);
7645 	}
7646 
7647 	/*
7648 	 * Check whether there is already a timeout scheduled for power
7649 	 * management. If yes then don't lower the power here, that's.
7650 	 * the timeout handler's job.
7651 	 */
7652 	if (un->un_pm_timeid != NULL) {
7653 		timeout_id_t temp_id = un->un_pm_timeid;
7654 		un->un_pm_timeid = NULL;
7655 		mutex_exit(&un->un_pm_mutex);
7656 		/*
7657 		 * Timeout is active; cancel it.
7658 		 * Note that it'll never be active on a device
7659 		 * that does not support PM therefore we don't
7660 		 * have to check before calling pm_idle_component.
7661 		 */
7662 		(void) untimeout(temp_id);
7663 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7664 
7665 	} else {
7666 		mutex_exit(&un->un_pm_mutex);
7667 		if ((un->un_f_pm_is_enabled == TRUE) &&
7668 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_SPINDLE_OFF) !=
7669 		    DDI_SUCCESS)) {
7670 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7671 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
7672 			/*
7673 			 * Fix for bug: 4297749, item # 13
7674 			 * The above test now includes a check to see if PM is
7675 			 * supported by this device before call
7676 			 * pm_lower_power().
7677 			 * Note, the following is not dead code. The call to
7678 			 * pm_lower_power above will generate a call back into
7679 			 * our sdpower routine which might result in a timeout
7680 			 * handler getting activated. Therefore the following
7681 			 * code is valid and necessary.
7682 			 */
7683 			mutex_enter(&un->un_pm_mutex);
7684 			if (un->un_pm_timeid != NULL) {
7685 				timeout_id_t temp_id = un->un_pm_timeid;
7686 				un->un_pm_timeid = NULL;
7687 				mutex_exit(&un->un_pm_mutex);
7688 				(void) untimeout(temp_id);
7689 				(void) pm_idle_component(SD_DEVINFO(un), 0);
7690 			} else {
7691 				mutex_exit(&un->un_pm_mutex);
7692 			}
7693 		}
7694 	}
7695 
7696 	/*
7697 	 * Cleanup from the scsi_ifsetcap() calls (437868)
7698 	 * Relocated here from above to be after the call to
7699 	 * pm_lower_power, which was getting errors.
7700 	 */
7701 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
7702 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
7703 
7704 	/*
7705 	 * Currently, tagged queuing is supported per target based by HBA.
7706 	 * Setting this per lun instance actually sets the capability of this
7707 	 * target in HBA, which affects those luns already attached on the
7708 	 * same target. So during detach, we can only disable this capability
7709 	 * only when this is the only lun left on this target. By doing
7710 	 * this, we assume a target has the same tagged queuing capability
7711 	 * for every lun. The condition can be removed when HBA is changed to
7712 	 * support per lun based tagged queuing capability.
7713 	 */
7714 	if (sd_scsi_get_target_lun_count(pdip, tgt) <= 1) {
7715 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
7716 	}
7717 
7718 	if (un->un_f_is_fibre == FALSE) {
7719 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
7720 	}
7721 
7722 	/*
7723 	 * Remove any event callbacks, fibre only
7724 	 */
7725 	if (un->un_f_is_fibre == TRUE) {
7726 		if ((un->un_insert_event != NULL) &&
7727 			(ddi_remove_event_handler(un->un_insert_cb_id) !=
7728 				DDI_SUCCESS)) {
7729 			/*
7730 			 * Note: We are returning here after having done
7731 			 * substantial cleanup above. This is consistent
7732 			 * with the legacy implementation but this may not
7733 			 * be the right thing to do.
7734 			 */
7735 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7736 				"sd_dr_detach: Cannot cancel insert event\n");
7737 			goto err_remove_event;
7738 		}
7739 		un->un_insert_event = NULL;
7740 
7741 		if ((un->un_remove_event != NULL) &&
7742 			(ddi_remove_event_handler(un->un_remove_cb_id) !=
7743 				DDI_SUCCESS)) {
7744 			/*
7745 			 * Note: We are returning here after having done
7746 			 * substantial cleanup above. This is consistent
7747 			 * with the legacy implementation but this may not
7748 			 * be the right thing to do.
7749 			 */
7750 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7751 				"sd_dr_detach: Cannot cancel remove event\n");
7752 			goto err_remove_event;
7753 		}
7754 		un->un_remove_event = NULL;
7755 	}
7756 
7757 	/* Do not free the softstate if the callback routine is active */
7758 	sd_sync_with_callback(un);
7759 
7760 	cmlb_detach(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
7761 	cmlb_free_handle(&un->un_cmlbhandle);
7762 
7763 	/*
7764 	 * Hold the detach mutex here, to make sure that no other threads ever
7765 	 * can access a (partially) freed soft state structure.
7766 	 */
7767 	mutex_enter(&sd_detach_mutex);
7768 
7769 	/*
7770 	 * Clean up the soft state struct.
7771 	 * Cleanup is done in reverse order of allocs/inits.
7772 	 * At this point there should be no competing threads anymore.
7773 	 */
7774 
7775 	/* Unregister and free device id. */
7776 	ddi_devid_unregister(devi);
7777 	if (un->un_devid) {
7778 		ddi_devid_free(un->un_devid);
7779 		un->un_devid = NULL;
7780 	}
7781 
7782 	/*
7783 	 * Destroy wmap cache if it exists.
7784 	 */
7785 	if (un->un_wm_cache != NULL) {
7786 		kmem_cache_destroy(un->un_wm_cache);
7787 		un->un_wm_cache = NULL;
7788 	}
7789 
7790 	/*
7791 	 * kstat cleanup is done in detach for all device types (4363169).
7792 	 * We do not want to fail detach if the device kstats are not deleted
7793 	 * since there is a confusion about the devo_refcnt for the device.
7794 	 * We just delete the kstats and let detach complete successfully.
7795 	 */
7796 	if (un->un_stats != NULL) {
7797 		kstat_delete(un->un_stats);
7798 		un->un_stats = NULL;
7799 	}
7800 	if (un->un_errstats != NULL) {
7801 		kstat_delete(un->un_errstats);
7802 		un->un_errstats = NULL;
7803 	}
7804 
7805 	/* Remove partition stats */
7806 	if (un->un_f_pkstats_enabled) {
7807 		for (i = 0; i < NSDMAP; i++) {
7808 			if (un->un_pstats[i] != NULL) {
7809 				kstat_delete(un->un_pstats[i]);
7810 				un->un_pstats[i] = NULL;
7811 			}
7812 		}
7813 	}
7814 
7815 	/* Remove xbuf registration */
7816 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
7817 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
7818 
7819 	/* Remove driver properties */
7820 	ddi_prop_remove_all(devi);
7821 
7822 	mutex_destroy(&un->un_pm_mutex);
7823 	cv_destroy(&un->un_pm_busy_cv);
7824 
7825 	cv_destroy(&un->un_wcc_cv);
7826 
7827 	/* Open/close semaphore */
7828 	sema_destroy(&un->un_semoclose);
7829 
7830 	/* Removable media condvar. */
7831 	cv_destroy(&un->un_state_cv);
7832 
7833 	/* Suspend/resume condvar. */
7834 	cv_destroy(&un->un_suspend_cv);
7835 	cv_destroy(&un->un_disk_busy_cv);
7836 
7837 	sd_free_rqs(un);
7838 
7839 	/* Free up soft state */
7840 	devp->sd_private = NULL;
7841 
7842 	bzero(un, sizeof (struct sd_lun));
7843 	ddi_soft_state_free(sd_state, instance);
7844 
7845 	mutex_exit(&sd_detach_mutex);
7846 
7847 	/* This frees up the INQUIRY data associated with the device. */
7848 	scsi_unprobe(devp);
7849 
7850 	/*
7851 	 * After successfully detaching an instance, we update the information
7852 	 * of how many luns have been attached in the relative target and
7853 	 * controller for parallel SCSI. This information is used when sd tries
7854 	 * to set the tagged queuing capability in HBA.
7855 	 * Since un has been released, we can't use SD_IS_PARALLEL_SCSI(un) to
7856 	 * check if the device is parallel SCSI. However, we don't need to
7857 	 * check here because we've already checked during attach. No device
7858 	 * that is not parallel SCSI is in the chain.
7859 	 */
7860 	if ((tgt >= 0) && (tgt < NTARGETS_WIDE)) {
7861 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_DETACH);
7862 	}
7863 
7864 	return (DDI_SUCCESS);
7865 
7866 err_notclosed:
7867 	mutex_exit(SD_MUTEX(un));
7868 
7869 err_stillbusy:
7870 	_NOTE(NO_COMPETING_THREADS_NOW);
7871 
7872 err_remove_event:
7873 	mutex_enter(&sd_detach_mutex);
7874 	un->un_detach_count--;
7875 	mutex_exit(&sd_detach_mutex);
7876 
7877 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
7878 	return (DDI_FAILURE);
7879 }
7880 
7881 
7882 /*
7883  *    Function: sd_create_errstats
7884  *
7885  * Description: This routine instantiates the device error stats.
7886  *
7887  *		Note: During attach the stats are instantiated first so they are
7888  *		available for attach-time routines that utilize the driver
7889  *		iopath to send commands to the device. The stats are initialized
7890  *		separately so data obtained during some attach-time routines is
7891  *		available. (4362483)
7892  *
7893  *   Arguments: un - driver soft state (unit) structure
7894  *		instance - driver instance
7895  *
7896  *     Context: Kernel thread context
7897  */
7898 
7899 static void
7900 sd_create_errstats(struct sd_lun *un, int instance)
7901 {
7902 	struct	sd_errstats	*stp;
7903 	char	kstatmodule_err[KSTAT_STRLEN];
7904 	char	kstatname[KSTAT_STRLEN];
7905 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
7906 
7907 	ASSERT(un != NULL);
7908 
7909 	if (un->un_errstats != NULL) {
7910 		return;
7911 	}
7912 
7913 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
7914 	    "%serr", sd_label);
7915 	(void) snprintf(kstatname, sizeof (kstatname),
7916 	    "%s%d,err", sd_label, instance);
7917 
7918 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
7919 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
7920 
7921 	if (un->un_errstats == NULL) {
7922 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7923 		    "sd_create_errstats: Failed kstat_create\n");
7924 		return;
7925 	}
7926 
7927 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
7928 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
7929 	    KSTAT_DATA_UINT32);
7930 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
7931 	    KSTAT_DATA_UINT32);
7932 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
7933 	    KSTAT_DATA_UINT32);
7934 	kstat_named_init(&stp->sd_vid,		"Vendor",
7935 	    KSTAT_DATA_CHAR);
7936 	kstat_named_init(&stp->sd_pid,		"Product",
7937 	    KSTAT_DATA_CHAR);
7938 	kstat_named_init(&stp->sd_revision,	"Revision",
7939 	    KSTAT_DATA_CHAR);
7940 	kstat_named_init(&stp->sd_serial,	"Serial No",
7941 	    KSTAT_DATA_CHAR);
7942 	kstat_named_init(&stp->sd_capacity,	"Size",
7943 	    KSTAT_DATA_ULONGLONG);
7944 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
7945 	    KSTAT_DATA_UINT32);
7946 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
7947 	    KSTAT_DATA_UINT32);
7948 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
7949 	    KSTAT_DATA_UINT32);
7950 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
7951 	    KSTAT_DATA_UINT32);
7952 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
7953 	    KSTAT_DATA_UINT32);
7954 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
7955 	    KSTAT_DATA_UINT32);
7956 
7957 	un->un_errstats->ks_private = un;
7958 	un->un_errstats->ks_update  = nulldev;
7959 
7960 	kstat_install(un->un_errstats);
7961 }
7962 
7963 
7964 /*
7965  *    Function: sd_set_errstats
7966  *
7967  * Description: This routine sets the value of the vendor id, product id,
7968  *		revision, serial number, and capacity device error stats.
7969  *
7970  *		Note: During attach the stats are instantiated first so they are
7971  *		available for attach-time routines that utilize the driver
7972  *		iopath to send commands to the device. The stats are initialized
7973  *		separately so data obtained during some attach-time routines is
7974  *		available. (4362483)
7975  *
7976  *   Arguments: un - driver soft state (unit) structure
7977  *
7978  *     Context: Kernel thread context
7979  */
7980 
7981 static void
7982 sd_set_errstats(struct sd_lun *un)
7983 {
7984 	struct	sd_errstats	*stp;
7985 
7986 	ASSERT(un != NULL);
7987 	ASSERT(un->un_errstats != NULL);
7988 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
7989 	ASSERT(stp != NULL);
7990 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
7991 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
7992 	(void) strncpy(stp->sd_revision.value.c,
7993 	    un->un_sd->sd_inq->inq_revision, 4);
7994 
7995 	/*
7996 	 * All the errstats are persistent across detach/attach,
7997 	 * so reset all the errstats here in case of the hot
7998 	 * replacement of disk drives, except for not changed
7999 	 * Sun qualified drives.
8000 	 */
8001 	if ((bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) != 0) ||
8002 	    (bcmp(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
8003 	    sizeof (SD_INQUIRY(un)->inq_serial)) != 0)) {
8004 		stp->sd_softerrs.value.ui32 = 0;
8005 		stp->sd_harderrs.value.ui32 = 0;
8006 		stp->sd_transerrs.value.ui32 = 0;
8007 		stp->sd_rq_media_err.value.ui32 = 0;
8008 		stp->sd_rq_ntrdy_err.value.ui32 = 0;
8009 		stp->sd_rq_nodev_err.value.ui32 = 0;
8010 		stp->sd_rq_recov_err.value.ui32 = 0;
8011 		stp->sd_rq_illrq_err.value.ui32 = 0;
8012 		stp->sd_rq_pfa_err.value.ui32 = 0;
8013 	}
8014 
8015 	/*
8016 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
8017 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
8018 	 * (4376302))
8019 	 */
8020 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
8021 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
8022 		    sizeof (SD_INQUIRY(un)->inq_serial));
8023 	}
8024 
8025 	if (un->un_f_blockcount_is_valid != TRUE) {
8026 		/*
8027 		 * Set capacity error stat to 0 for no media. This ensures
8028 		 * a valid capacity is displayed in response to 'iostat -E'
8029 		 * when no media is present in the device.
8030 		 */
8031 		stp->sd_capacity.value.ui64 = 0;
8032 	} else {
8033 		/*
8034 		 * Multiply un_blockcount by un->un_sys_blocksize to get
8035 		 * capacity.
8036 		 *
8037 		 * Note: for non-512 blocksize devices "un_blockcount" has been
8038 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
8039 		 * (un_tgt_blocksize / un->un_sys_blocksize).
8040 		 */
8041 		stp->sd_capacity.value.ui64 = (uint64_t)
8042 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
8043 	}
8044 }
8045 
8046 
8047 /*
8048  *    Function: sd_set_pstats
8049  *
8050  * Description: This routine instantiates and initializes the partition
8051  *              stats for each partition with more than zero blocks.
8052  *		(4363169)
8053  *
8054  *   Arguments: un - driver soft state (unit) structure
8055  *
8056  *     Context: Kernel thread context
8057  */
8058 
8059 static void
8060 sd_set_pstats(struct sd_lun *un)
8061 {
8062 	char	kstatname[KSTAT_STRLEN];
8063 	int	instance;
8064 	int	i;
8065 	diskaddr_t	nblks = 0;
8066 	char	*partname = NULL;
8067 
8068 	ASSERT(un != NULL);
8069 
8070 	instance = ddi_get_instance(SD_DEVINFO(un));
8071 
8072 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
8073 	for (i = 0; i < NSDMAP; i++) {
8074 
8075 		if (cmlb_partinfo(un->un_cmlbhandle, i,
8076 		    &nblks, NULL, &partname, NULL, (void *)SD_PATH_DIRECT) != 0)
8077 			continue;
8078 		mutex_enter(SD_MUTEX(un));
8079 
8080 		if ((un->un_pstats[i] == NULL) &&
8081 		    (nblks != 0)) {
8082 
8083 			(void) snprintf(kstatname, sizeof (kstatname),
8084 			    "%s%d,%s", sd_label, instance,
8085 			    partname);
8086 
8087 			un->un_pstats[i] = kstat_create(sd_label,
8088 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
8089 			    1, KSTAT_FLAG_PERSISTENT);
8090 			if (un->un_pstats[i] != NULL) {
8091 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
8092 				kstat_install(un->un_pstats[i]);
8093 			}
8094 		}
8095 		mutex_exit(SD_MUTEX(un));
8096 	}
8097 }
8098 
8099 
8100 #if (defined(__fibre))
8101 /*
8102  *    Function: sd_init_event_callbacks
8103  *
8104  * Description: This routine initializes the insertion and removal event
8105  *		callbacks. (fibre only)
8106  *
8107  *   Arguments: un - driver soft state (unit) structure
8108  *
8109  *     Context: Kernel thread context
8110  */
8111 
8112 static void
8113 sd_init_event_callbacks(struct sd_lun *un)
8114 {
8115 	ASSERT(un != NULL);
8116 
8117 	if ((un->un_insert_event == NULL) &&
8118 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
8119 	    &un->un_insert_event) == DDI_SUCCESS)) {
8120 		/*
8121 		 * Add the callback for an insertion event
8122 		 */
8123 		(void) ddi_add_event_handler(SD_DEVINFO(un),
8124 		    un->un_insert_event, sd_event_callback, (void *)un,
8125 		    &(un->un_insert_cb_id));
8126 	}
8127 
8128 	if ((un->un_remove_event == NULL) &&
8129 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
8130 	    &un->un_remove_event) == DDI_SUCCESS)) {
8131 		/*
8132 		 * Add the callback for a removal event
8133 		 */
8134 		(void) ddi_add_event_handler(SD_DEVINFO(un),
8135 		    un->un_remove_event, sd_event_callback, (void *)un,
8136 		    &(un->un_remove_cb_id));
8137 	}
8138 }
8139 
8140 
8141 /*
8142  *    Function: sd_event_callback
8143  *
8144  * Description: This routine handles insert/remove events (photon). The
8145  *		state is changed to OFFLINE which can be used to supress
8146  *		error msgs. (fibre only)
8147  *
8148  *   Arguments: un - driver soft state (unit) structure
8149  *
8150  *     Context: Callout thread context
8151  */
8152 /* ARGSUSED */
8153 static void
8154 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
8155     void *bus_impldata)
8156 {
8157 	struct sd_lun *un = (struct sd_lun *)arg;
8158 
8159 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
8160 	if (event == un->un_insert_event) {
8161 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
8162 		mutex_enter(SD_MUTEX(un));
8163 		if (un->un_state == SD_STATE_OFFLINE) {
8164 			if (un->un_last_state != SD_STATE_SUSPENDED) {
8165 				un->un_state = un->un_last_state;
8166 			} else {
8167 				/*
8168 				 * We have gone through SUSPEND/RESUME while
8169 				 * we were offline. Restore the last state
8170 				 */
8171 				un->un_state = un->un_save_state;
8172 			}
8173 		}
8174 		mutex_exit(SD_MUTEX(un));
8175 
8176 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
8177 	} else if (event == un->un_remove_event) {
8178 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
8179 		mutex_enter(SD_MUTEX(un));
8180 		/*
8181 		 * We need to handle an event callback that occurs during
8182 		 * the suspend operation, since we don't prevent it.
8183 		 */
8184 		if (un->un_state != SD_STATE_OFFLINE) {
8185 			if (un->un_state != SD_STATE_SUSPENDED) {
8186 				New_state(un, SD_STATE_OFFLINE);
8187 			} else {
8188 				un->un_last_state = SD_STATE_OFFLINE;
8189 			}
8190 		}
8191 		mutex_exit(SD_MUTEX(un));
8192 	} else {
8193 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
8194 		    "!Unknown event\n");
8195 	}
8196 
8197 }
8198 #endif
8199 
8200 /*
8201  *    Function: sd_cache_control()
8202  *
8203  * Description: This routine is the driver entry point for setting
8204  *		read and write caching by modifying the WCE (write cache
8205  *		enable) and RCD (read cache disable) bits of mode
8206  *		page 8 (MODEPAGE_CACHING).
8207  *
8208  *   Arguments: un - driver soft state (unit) structure
8209  *		rcd_flag - flag for controlling the read cache
8210  *		wce_flag - flag for controlling the write cache
8211  *
8212  * Return Code: EIO
8213  *		code returned by sd_send_scsi_MODE_SENSE and
8214  *		sd_send_scsi_MODE_SELECT
8215  *
8216  *     Context: Kernel Thread
8217  */
8218 
8219 static int
8220 sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag)
8221 {
8222 	struct mode_caching	*mode_caching_page;
8223 	uchar_t			*header;
8224 	size_t			buflen;
8225 	int			hdrlen;
8226 	int			bd_len;
8227 	int			rval = 0;
8228 	struct mode_header_grp2	*mhp;
8229 
8230 	ASSERT(un != NULL);
8231 
8232 	/*
8233 	 * Do a test unit ready, otherwise a mode sense may not work if this
8234 	 * is the first command sent to the device after boot.
8235 	 */
8236 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
8237 
8238 	if (un->un_f_cfg_is_atapi == TRUE) {
8239 		hdrlen = MODE_HEADER_LENGTH_GRP2;
8240 	} else {
8241 		hdrlen = MODE_HEADER_LENGTH;
8242 	}
8243 
8244 	/*
8245 	 * Allocate memory for the retrieved mode page and its headers.  Set
8246 	 * a pointer to the page itself.  Use mode_cache_scsi3 to insure
8247 	 * we get all of the mode sense data otherwise, the mode select
8248 	 * will fail.  mode_cache_scsi3 is a superset of mode_caching.
8249 	 */
8250 	buflen = hdrlen + MODE_BLK_DESC_LENGTH +
8251 		sizeof (struct mode_cache_scsi3);
8252 
8253 	header = kmem_zalloc(buflen, KM_SLEEP);
8254 
8255 	/* Get the information from the device. */
8256 	if (un->un_f_cfg_is_atapi == TRUE) {
8257 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
8258 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8259 	} else {
8260 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
8261 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8262 	}
8263 	if (rval != 0) {
8264 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
8265 		    "sd_cache_control: Mode Sense Failed\n");
8266 		kmem_free(header, buflen);
8267 		return (rval);
8268 	}
8269 
8270 	/*
8271 	 * Determine size of Block Descriptors in order to locate
8272 	 * the mode page data. ATAPI devices return 0, SCSI devices
8273 	 * should return MODE_BLK_DESC_LENGTH.
8274 	 */
8275 	if (un->un_f_cfg_is_atapi == TRUE) {
8276 		mhp	= (struct mode_header_grp2 *)header;
8277 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
8278 	} else {
8279 		bd_len  = ((struct mode_header *)header)->bdesc_length;
8280 	}
8281 
8282 	if (bd_len > MODE_BLK_DESC_LENGTH) {
8283 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
8284 		    "sd_cache_control: Mode Sense returned invalid "
8285 		    "block descriptor length\n");
8286 		kmem_free(header, buflen);
8287 		return (EIO);
8288 	}
8289 
8290 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
8291 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
8292 		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
8293 		    " caching page code mismatch %d\n",
8294 		    mode_caching_page->mode_page.code);
8295 		kmem_free(header, buflen);
8296 		return (EIO);
8297 	}
8298 
8299 	/* Check the relevant bits on successful mode sense. */
8300 	if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
8301 	    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
8302 	    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
8303 	    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
8304 
8305 		size_t sbuflen;
8306 		uchar_t save_pg;
8307 
8308 		/*
8309 		 * Construct select buffer length based on the
8310 		 * length of the sense data returned.
8311 		 */
8312 		sbuflen =  hdrlen + MODE_BLK_DESC_LENGTH +
8313 				sizeof (struct mode_page) +
8314 				(int)mode_caching_page->mode_page.length;
8315 
8316 		/*
8317 		 * Set the caching bits as requested.
8318 		 */
8319 		if (rcd_flag == SD_CACHE_ENABLE)
8320 			mode_caching_page->rcd = 0;
8321 		else if (rcd_flag == SD_CACHE_DISABLE)
8322 			mode_caching_page->rcd = 1;
8323 
8324 		if (wce_flag == SD_CACHE_ENABLE)
8325 			mode_caching_page->wce = 1;
8326 		else if (wce_flag == SD_CACHE_DISABLE)
8327 			mode_caching_page->wce = 0;
8328 
8329 		/*
8330 		 * Save the page if the mode sense says the
8331 		 * drive supports it.
8332 		 */
8333 		save_pg = mode_caching_page->mode_page.ps ?
8334 				SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
8335 
8336 		/* Clear reserved bits before mode select. */
8337 		mode_caching_page->mode_page.ps = 0;
8338 
8339 		/*
8340 		 * Clear out mode header for mode select.
8341 		 * The rest of the retrieved page will be reused.
8342 		 */
8343 		bzero(header, hdrlen);
8344 
8345 		if (un->un_f_cfg_is_atapi == TRUE) {
8346 			mhp = (struct mode_header_grp2 *)header;
8347 			mhp->bdesc_length_hi = bd_len >> 8;
8348 			mhp->bdesc_length_lo = (uchar_t)bd_len & 0xff;
8349 		} else {
8350 			((struct mode_header *)header)->bdesc_length = bd_len;
8351 		}
8352 
8353 		/* Issue mode select to change the cache settings */
8354 		if (un->un_f_cfg_is_atapi == TRUE) {
8355 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, header,
8356 			    sbuflen, save_pg, SD_PATH_DIRECT);
8357 		} else {
8358 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
8359 			    sbuflen, save_pg, SD_PATH_DIRECT);
8360 		}
8361 	}
8362 
8363 	kmem_free(header, buflen);
8364 	return (rval);
8365 }
8366 
8367 
8368 /*
8369  *    Function: sd_get_write_cache_enabled()
8370  *
8371  * Description: This routine is the driver entry point for determining if
8372  *		write caching is enabled.  It examines the WCE (write cache
8373  *		enable) bits of mode page 8 (MODEPAGE_CACHING).
8374  *
8375  *   Arguments: un - driver soft state (unit) structure
8376  *		is_enabled - pointer to int where write cache enabled state
8377  *		is returned (non-zero -> write cache enabled)
8378  *
8379  *
8380  * Return Code: EIO
8381  *		code returned by sd_send_scsi_MODE_SENSE
8382  *
8383  *     Context: Kernel Thread
8384  *
8385  * NOTE: If ioctl is added to disable write cache, this sequence should
8386  * be followed so that no locking is required for accesses to
8387  * un->un_f_write_cache_enabled:
8388  * 	do mode select to clear wce
8389  * 	do synchronize cache to flush cache
8390  * 	set un->un_f_write_cache_enabled = FALSE
8391  *
8392  * Conversely, an ioctl to enable the write cache should be done
8393  * in this order:
8394  * 	set un->un_f_write_cache_enabled = TRUE
8395  * 	do mode select to set wce
8396  */
8397 
8398 static int
8399 sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled)
8400 {
8401 	struct mode_caching	*mode_caching_page;
8402 	uchar_t			*header;
8403 	size_t			buflen;
8404 	int			hdrlen;
8405 	int			bd_len;
8406 	int			rval = 0;
8407 
8408 	ASSERT(un != NULL);
8409 	ASSERT(is_enabled != NULL);
8410 
8411 	/* in case of error, flag as enabled */
8412 	*is_enabled = TRUE;
8413 
8414 	/*
8415 	 * Do a test unit ready, otherwise a mode sense may not work if this
8416 	 * is the first command sent to the device after boot.
8417 	 */
8418 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
8419 
8420 	if (un->un_f_cfg_is_atapi == TRUE) {
8421 		hdrlen = MODE_HEADER_LENGTH_GRP2;
8422 	} else {
8423 		hdrlen = MODE_HEADER_LENGTH;
8424 	}
8425 
8426 	/*
8427 	 * Allocate memory for the retrieved mode page and its headers.  Set
8428 	 * a pointer to the page itself.
8429 	 */
8430 	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
8431 	header = kmem_zalloc(buflen, KM_SLEEP);
8432 
8433 	/* Get the information from the device. */
8434 	if (un->un_f_cfg_is_atapi == TRUE) {
8435 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
8436 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8437 	} else {
8438 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
8439 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8440 	}
8441 	if (rval != 0) {
8442 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
8443 		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
8444 		kmem_free(header, buflen);
8445 		return (rval);
8446 	}
8447 
8448 	/*
8449 	 * Determine size of Block Descriptors in order to locate
8450 	 * the mode page data. ATAPI devices return 0, SCSI devices
8451 	 * should return MODE_BLK_DESC_LENGTH.
8452 	 */
8453 	if (un->un_f_cfg_is_atapi == TRUE) {
8454 		struct mode_header_grp2	*mhp;
8455 		mhp	= (struct mode_header_grp2 *)header;
8456 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
8457 	} else {
8458 		bd_len  = ((struct mode_header *)header)->bdesc_length;
8459 	}
8460 
8461 	if (bd_len > MODE_BLK_DESC_LENGTH) {
8462 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
8463 		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
8464 		    "block descriptor length\n");
8465 		kmem_free(header, buflen);
8466 		return (EIO);
8467 	}
8468 
8469 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
8470 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
8471 		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
8472 		    " caching page code mismatch %d\n",
8473 		    mode_caching_page->mode_page.code);
8474 		kmem_free(header, buflen);
8475 		return (EIO);
8476 	}
8477 	*is_enabled = mode_caching_page->wce;
8478 
8479 	kmem_free(header, buflen);
8480 	return (0);
8481 }
8482 
8483 
8484 /*
8485  *    Function: sd_make_device
8486  *
8487  * Description: Utility routine to return the Solaris device number from
8488  *		the data in the device's dev_info structure.
8489  *
8490  * Return Code: The Solaris device number
8491  *
8492  *     Context: Any
8493  */
8494 
8495 static dev_t
8496 sd_make_device(dev_info_t *devi)
8497 {
8498 	return (makedevice(ddi_name_to_major(ddi_get_name(devi)),
8499 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
8500 }
8501 
8502 
8503 /*
8504  *    Function: sd_pm_entry
8505  *
8506  * Description: Called at the start of a new command to manage power
8507  *		and busy status of a device. This includes determining whether
8508  *		the current power state of the device is sufficient for
8509  *		performing the command or whether it must be changed.
8510  *		The PM framework is notified appropriately.
8511  *		Only with a return status of DDI_SUCCESS will the
8512  *		component be busy to the framework.
8513  *
8514  *		All callers of sd_pm_entry must check the return status
8515  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
8516  *		of DDI_FAILURE indicates the device failed to power up.
8517  *		In this case un_pm_count has been adjusted so the result
8518  *		on exit is still powered down, ie. count is less than 0.
8519  *		Calling sd_pm_exit with this count value hits an ASSERT.
8520  *
8521  * Return Code: DDI_SUCCESS or DDI_FAILURE
8522  *
8523  *     Context: Kernel thread context.
8524  */
8525 
8526 static int
8527 sd_pm_entry(struct sd_lun *un)
8528 {
8529 	int return_status = DDI_SUCCESS;
8530 
8531 	ASSERT(!mutex_owned(SD_MUTEX(un)));
8532 	ASSERT(!mutex_owned(&un->un_pm_mutex));
8533 
8534 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
8535 
8536 	if (un->un_f_pm_is_enabled == FALSE) {
8537 		SD_TRACE(SD_LOG_IO_PM, un,
8538 		    "sd_pm_entry: exiting, PM not enabled\n");
8539 		return (return_status);
8540 	}
8541 
8542 	/*
8543 	 * Just increment a counter if PM is enabled. On the transition from
8544 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
8545 	 * the count with each IO and mark the device as idle when the count
8546 	 * hits 0.
8547 	 *
8548 	 * If the count is less than 0 the device is powered down. If a powered
8549 	 * down device is successfully powered up then the count must be
8550 	 * incremented to reflect the power up. Note that it'll get incremented
8551 	 * a second time to become busy.
8552 	 *
8553 	 * Because the following has the potential to change the device state
8554 	 * and must release the un_pm_mutex to do so, only one thread can be
8555 	 * allowed through at a time.
8556 	 */
8557 
8558 	mutex_enter(&un->un_pm_mutex);
8559 	while (un->un_pm_busy == TRUE) {
8560 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
8561 	}
8562 	un->un_pm_busy = TRUE;
8563 
8564 	if (un->un_pm_count < 1) {
8565 
8566 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
8567 
8568 		/*
8569 		 * Indicate we are now busy so the framework won't attempt to
8570 		 * power down the device. This call will only fail if either
8571 		 * we passed a bad component number or the device has no
8572 		 * components. Neither of these should ever happen.
8573 		 */
8574 		mutex_exit(&un->un_pm_mutex);
8575 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
8576 		ASSERT(return_status == DDI_SUCCESS);
8577 
8578 		mutex_enter(&un->un_pm_mutex);
8579 
8580 		if (un->un_pm_count < 0) {
8581 			mutex_exit(&un->un_pm_mutex);
8582 
8583 			SD_TRACE(SD_LOG_IO_PM, un,
8584 			    "sd_pm_entry: power up component\n");
8585 
8586 			/*
8587 			 * pm_raise_power will cause sdpower to be called
8588 			 * which brings the device power level to the
8589 			 * desired state, ON in this case. If successful,
8590 			 * un_pm_count and un_power_level will be updated
8591 			 * appropriately.
8592 			 */
8593 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
8594 			    SD_SPINDLE_ON);
8595 
8596 			mutex_enter(&un->un_pm_mutex);
8597 
8598 			if (return_status != DDI_SUCCESS) {
8599 				/*
8600 				 * Power up failed.
8601 				 * Idle the device and adjust the count
8602 				 * so the result on exit is that we're
8603 				 * still powered down, ie. count is less than 0.
8604 				 */
8605 				SD_TRACE(SD_LOG_IO_PM, un,
8606 				    "sd_pm_entry: power up failed,"
8607 				    " idle the component\n");
8608 
8609 				(void) pm_idle_component(SD_DEVINFO(un), 0);
8610 				un->un_pm_count--;
8611 			} else {
8612 				/*
8613 				 * Device is powered up, verify the
8614 				 * count is non-negative.
8615 				 * This is debug only.
8616 				 */
8617 				ASSERT(un->un_pm_count == 0);
8618 			}
8619 		}
8620 
8621 		if (return_status == DDI_SUCCESS) {
8622 			/*
8623 			 * For performance, now that the device has been tagged
8624 			 * as busy, and it's known to be powered up, update the
8625 			 * chain types to use jump tables that do not include
8626 			 * pm. This significantly lowers the overhead and
8627 			 * therefore improves performance.
8628 			 */
8629 
8630 			mutex_exit(&un->un_pm_mutex);
8631 			mutex_enter(SD_MUTEX(un));
8632 			SD_TRACE(SD_LOG_IO_PM, un,
8633 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
8634 			    un->un_uscsi_chain_type);
8635 
8636 			if (un->un_f_non_devbsize_supported) {
8637 				un->un_buf_chain_type =
8638 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
8639 			} else {
8640 				un->un_buf_chain_type =
8641 				    SD_CHAIN_INFO_DISK_NO_PM;
8642 			}
8643 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8644 
8645 			SD_TRACE(SD_LOG_IO_PM, un,
8646 			    "             changed  uscsi_chain_type to   %d\n",
8647 			    un->un_uscsi_chain_type);
8648 			mutex_exit(SD_MUTEX(un));
8649 			mutex_enter(&un->un_pm_mutex);
8650 
8651 			if (un->un_pm_idle_timeid == NULL) {
8652 				/* 300 ms. */
8653 				un->un_pm_idle_timeid =
8654 				    timeout(sd_pm_idletimeout_handler, un,
8655 				    (drv_usectohz((clock_t)300000)));
8656 				/*
8657 				 * Include an extra call to busy which keeps the
8658 				 * device busy with-respect-to the PM layer
8659 				 * until the timer fires, at which time it'll
8660 				 * get the extra idle call.
8661 				 */
8662 				(void) pm_busy_component(SD_DEVINFO(un), 0);
8663 			}
8664 		}
8665 	}
8666 	un->un_pm_busy = FALSE;
8667 	/* Next... */
8668 	cv_signal(&un->un_pm_busy_cv);
8669 
8670 	un->un_pm_count++;
8671 
8672 	SD_TRACE(SD_LOG_IO_PM, un,
8673 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
8674 
8675 	mutex_exit(&un->un_pm_mutex);
8676 
8677 	return (return_status);
8678 }
8679 
8680 
8681 /*
8682  *    Function: sd_pm_exit
8683  *
8684  * Description: Called at the completion of a command to manage busy
8685  *		status for the device. If the device becomes idle the
8686  *		PM framework is notified.
8687  *
8688  *     Context: Kernel thread context
8689  */
8690 
8691 static void
8692 sd_pm_exit(struct sd_lun *un)
8693 {
8694 	ASSERT(!mutex_owned(SD_MUTEX(un)));
8695 	ASSERT(!mutex_owned(&un->un_pm_mutex));
8696 
8697 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
8698 
8699 	/*
8700 	 * After attach the following flag is only read, so don't
8701 	 * take the penalty of acquiring a mutex for it.
8702 	 */
8703 	if (un->un_f_pm_is_enabled == TRUE) {
8704 
8705 		mutex_enter(&un->un_pm_mutex);
8706 		un->un_pm_count--;
8707 
8708 		SD_TRACE(SD_LOG_IO_PM, un,
8709 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
8710 
8711 		ASSERT(un->un_pm_count >= 0);
8712 		if (un->un_pm_count == 0) {
8713 			mutex_exit(&un->un_pm_mutex);
8714 
8715 			SD_TRACE(SD_LOG_IO_PM, un,
8716 			    "sd_pm_exit: idle component\n");
8717 
8718 			(void) pm_idle_component(SD_DEVINFO(un), 0);
8719 
8720 		} else {
8721 			mutex_exit(&un->un_pm_mutex);
8722 		}
8723 	}
8724 
8725 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
8726 }
8727 
8728 
8729 /*
8730  *    Function: sdopen
8731  *
8732  * Description: Driver's open(9e) entry point function.
8733  *
8734  *   Arguments: dev_i   - pointer to device number
8735  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
8736  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
8737  *		cred_p  - user credential pointer
8738  *
8739  * Return Code: EINVAL
8740  *		ENXIO
8741  *		EIO
8742  *		EROFS
8743  *		EBUSY
8744  *
8745  *     Context: Kernel thread context
8746  */
8747 /* ARGSUSED */
8748 static int
8749 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
8750 {
8751 	struct sd_lun	*un;
8752 	int		nodelay;
8753 	int		part;
8754 	uint64_t	partmask;
8755 	int		instance;
8756 	dev_t		dev;
8757 	int		rval = EIO;
8758 	diskaddr_t	nblks = 0;
8759 
8760 	/* Validate the open type */
8761 	if (otyp >= OTYPCNT) {
8762 		return (EINVAL);
8763 	}
8764 
8765 	dev = *dev_p;
8766 	instance = SDUNIT(dev);
8767 	mutex_enter(&sd_detach_mutex);
8768 
8769 	/*
8770 	 * Fail the open if there is no softstate for the instance, or
8771 	 * if another thread somewhere is trying to detach the instance.
8772 	 */
8773 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
8774 	    (un->un_detach_count != 0)) {
8775 		mutex_exit(&sd_detach_mutex);
8776 		/*
8777 		 * The probe cache only needs to be cleared when open (9e) fails
8778 		 * with ENXIO (4238046).
8779 		 */
8780 		/*
8781 		 * un-conditionally clearing probe cache is ok with
8782 		 * separate sd/ssd binaries
8783 		 * x86 platform can be an issue with both parallel
8784 		 * and fibre in 1 binary
8785 		 */
8786 		sd_scsi_clear_probe_cache();
8787 		return (ENXIO);
8788 	}
8789 
8790 	/*
8791 	 * The un_layer_count is to prevent another thread in specfs from
8792 	 * trying to detach the instance, which can happen when we are
8793 	 * called from a higher-layer driver instead of thru specfs.
8794 	 * This will not be needed when DDI provides a layered driver
8795 	 * interface that allows specfs to know that an instance is in
8796 	 * use by a layered driver & should not be detached.
8797 	 *
8798 	 * Note: the semantics for layered driver opens are exactly one
8799 	 * close for every open.
8800 	 */
8801 	if (otyp == OTYP_LYR) {
8802 		un->un_layer_count++;
8803 	}
8804 
8805 	/*
8806 	 * Keep a count of the current # of opens in progress. This is because
8807 	 * some layered drivers try to call us as a regular open. This can
8808 	 * cause problems that we cannot prevent, however by keeping this count
8809 	 * we can at least keep our open and detach routines from racing against
8810 	 * each other under such conditions.
8811 	 */
8812 	un->un_opens_in_progress++;
8813 	mutex_exit(&sd_detach_mutex);
8814 
8815 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
8816 	part	 = SDPART(dev);
8817 	partmask = 1 << part;
8818 
8819 	/*
8820 	 * We use a semaphore here in order to serialize
8821 	 * open and close requests on the device.
8822 	 */
8823 	sema_p(&un->un_semoclose);
8824 
8825 	mutex_enter(SD_MUTEX(un));
8826 
8827 	/*
8828 	 * All device accesses go thru sdstrategy() where we check
8829 	 * on suspend status but there could be a scsi_poll command,
8830 	 * which bypasses sdstrategy(), so we need to check pm
8831 	 * status.
8832 	 */
8833 
8834 	if (!nodelay) {
8835 		while ((un->un_state == SD_STATE_SUSPENDED) ||
8836 		    (un->un_state == SD_STATE_PM_CHANGING)) {
8837 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
8838 		}
8839 
8840 		mutex_exit(SD_MUTEX(un));
8841 		if (sd_pm_entry(un) != DDI_SUCCESS) {
8842 			rval = EIO;
8843 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
8844 			    "sdopen: sd_pm_entry failed\n");
8845 			goto open_failed_with_pm;
8846 		}
8847 		mutex_enter(SD_MUTEX(un));
8848 	}
8849 
8850 	/* check for previous exclusive open */
8851 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
8852 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
8853 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
8854 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
8855 
8856 	if (un->un_exclopen & (partmask)) {
8857 		goto excl_open_fail;
8858 	}
8859 
8860 	if (flag & FEXCL) {
8861 		int i;
8862 		if (un->un_ocmap.lyropen[part]) {
8863 			goto excl_open_fail;
8864 		}
8865 		for (i = 0; i < (OTYPCNT - 1); i++) {
8866 			if (un->un_ocmap.regopen[i] & (partmask)) {
8867 				goto excl_open_fail;
8868 			}
8869 		}
8870 	}
8871 
8872 	/*
8873 	 * Check the write permission if this is a removable media device,
8874 	 * NDELAY has not been set, and writable permission is requested.
8875 	 *
8876 	 * Note: If NDELAY was set and this is write-protected media the WRITE
8877 	 * attempt will fail with EIO as part of the I/O processing. This is a
8878 	 * more permissive implementation that allows the open to succeed and
8879 	 * WRITE attempts to fail when appropriate.
8880 	 */
8881 	if (un->un_f_chk_wp_open) {
8882 		if ((flag & FWRITE) && (!nodelay)) {
8883 			mutex_exit(SD_MUTEX(un));
8884 			/*
8885 			 * Defer the check for write permission on writable
8886 			 * DVD drive till sdstrategy and will not fail open even
8887 			 * if FWRITE is set as the device can be writable
8888 			 * depending upon the media and the media can change
8889 			 * after the call to open().
8890 			 */
8891 			if (un->un_f_dvdram_writable_device == FALSE) {
8892 				if (ISCD(un) || sr_check_wp(dev)) {
8893 				rval = EROFS;
8894 				mutex_enter(SD_MUTEX(un));
8895 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
8896 				    "write to cd or write protected media\n");
8897 				goto open_fail;
8898 				}
8899 			}
8900 			mutex_enter(SD_MUTEX(un));
8901 		}
8902 	}
8903 
8904 	/*
8905 	 * If opening in NDELAY/NONBLOCK mode, just return.
8906 	 * Check if disk is ready and has a valid geometry later.
8907 	 */
8908 	if (!nodelay) {
8909 		mutex_exit(SD_MUTEX(un));
8910 		rval = sd_ready_and_valid(un);
8911 		mutex_enter(SD_MUTEX(un));
8912 		/*
8913 		 * Fail if device is not ready or if the number of disk
8914 		 * blocks is zero or negative for non CD devices.
8915 		 */
8916 
8917 		nblks = 0;
8918 
8919 		if (rval == SD_READY_VALID && (!ISCD(un))) {
8920 			/* if cmlb_partinfo fails, nblks remains 0 */
8921 			mutex_exit(SD_MUTEX(un));
8922 			(void) cmlb_partinfo(un->un_cmlbhandle, part, &nblks,
8923 			    NULL, NULL, NULL, (void *)SD_PATH_DIRECT);
8924 			mutex_enter(SD_MUTEX(un));
8925 		}
8926 
8927 		if ((rval != SD_READY_VALID) ||
8928 		    (!ISCD(un) && nblks <= 0)) {
8929 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
8930 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
8931 			    "device not ready or invalid disk block value\n");
8932 			goto open_fail;
8933 		}
8934 #if defined(__i386) || defined(__amd64)
8935 	} else {
8936 		uchar_t *cp;
8937 		/*
8938 		 * x86 requires special nodelay handling, so that p0 is
8939 		 * always defined and accessible.
8940 		 * Invalidate geometry only if device is not already open.
8941 		 */
8942 		cp = &un->un_ocmap.chkd[0];
8943 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
8944 			if (*cp != (uchar_t)0) {
8945 			    break;
8946 			}
8947 			cp++;
8948 		}
8949 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
8950 			mutex_exit(SD_MUTEX(un));
8951 			cmlb_invalidate(un->un_cmlbhandle,
8952 			    (void *)SD_PATH_DIRECT);
8953 			mutex_enter(SD_MUTEX(un));
8954 		}
8955 
8956 #endif
8957 	}
8958 
8959 	if (otyp == OTYP_LYR) {
8960 		un->un_ocmap.lyropen[part]++;
8961 	} else {
8962 		un->un_ocmap.regopen[otyp] |= partmask;
8963 	}
8964 
8965 	/* Set up open and exclusive open flags */
8966 	if (flag & FEXCL) {
8967 		un->un_exclopen |= (partmask);
8968 	}
8969 
8970 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
8971 	    "open of part %d type %d\n", part, otyp);
8972 
8973 	mutex_exit(SD_MUTEX(un));
8974 	if (!nodelay) {
8975 		sd_pm_exit(un);
8976 	}
8977 
8978 	sema_v(&un->un_semoclose);
8979 
8980 	mutex_enter(&sd_detach_mutex);
8981 	un->un_opens_in_progress--;
8982 	mutex_exit(&sd_detach_mutex);
8983 
8984 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
8985 	return (DDI_SUCCESS);
8986 
8987 excl_open_fail:
8988 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
8989 	rval = EBUSY;
8990 
8991 open_fail:
8992 	mutex_exit(SD_MUTEX(un));
8993 
8994 	/*
8995 	 * On a failed open we must exit the pm management.
8996 	 */
8997 	if (!nodelay) {
8998 		sd_pm_exit(un);
8999 	}
9000 open_failed_with_pm:
9001 	sema_v(&un->un_semoclose);
9002 
9003 	mutex_enter(&sd_detach_mutex);
9004 	un->un_opens_in_progress--;
9005 	if (otyp == OTYP_LYR) {
9006 		un->un_layer_count--;
9007 	}
9008 	mutex_exit(&sd_detach_mutex);
9009 
9010 	return (rval);
9011 }
9012 
9013 
9014 /*
9015  *    Function: sdclose
9016  *
9017  * Description: Driver's close(9e) entry point function.
9018  *
9019  *   Arguments: dev    - device number
9020  *		flag   - file status flag, informational only
9021  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
9022  *		cred_p - user credential pointer
9023  *
9024  * Return Code: ENXIO
9025  *
9026  *     Context: Kernel thread context
9027  */
9028 /* ARGSUSED */
9029 static int
9030 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
9031 {
9032 	struct sd_lun	*un;
9033 	uchar_t		*cp;
9034 	int		part;
9035 	int		nodelay;
9036 	int		rval = 0;
9037 
9038 	/* Validate the open type */
9039 	if (otyp >= OTYPCNT) {
9040 		return (ENXIO);
9041 	}
9042 
9043 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9044 		return (ENXIO);
9045 	}
9046 
9047 	part = SDPART(dev);
9048 	nodelay = flag & (FNDELAY | FNONBLOCK);
9049 
9050 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
9051 	    "sdclose: close of part %d type %d\n", part, otyp);
9052 
9053 	/*
9054 	 * We use a semaphore here in order to serialize
9055 	 * open and close requests on the device.
9056 	 */
9057 	sema_p(&un->un_semoclose);
9058 
9059 	mutex_enter(SD_MUTEX(un));
9060 
9061 	/* Don't proceed if power is being changed. */
9062 	while (un->un_state == SD_STATE_PM_CHANGING) {
9063 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9064 	}
9065 
9066 	if (un->un_exclopen & (1 << part)) {
9067 		un->un_exclopen &= ~(1 << part);
9068 	}
9069 
9070 	/* Update the open partition map */
9071 	if (otyp == OTYP_LYR) {
9072 		un->un_ocmap.lyropen[part] -= 1;
9073 	} else {
9074 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
9075 	}
9076 
9077 	cp = &un->un_ocmap.chkd[0];
9078 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
9079 		if (*cp != NULL) {
9080 			break;
9081 		}
9082 		cp++;
9083 	}
9084 
9085 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
9086 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
9087 
9088 		/*
9089 		 * We avoid persistance upon the last close, and set
9090 		 * the throttle back to the maximum.
9091 		 */
9092 		un->un_throttle = un->un_saved_throttle;
9093 
9094 		if (un->un_state == SD_STATE_OFFLINE) {
9095 			if (un->un_f_is_fibre == FALSE) {
9096 				scsi_log(SD_DEVINFO(un), sd_label,
9097 					CE_WARN, "offline\n");
9098 			}
9099 			mutex_exit(SD_MUTEX(un));
9100 			cmlb_invalidate(un->un_cmlbhandle,
9101 			    (void *)SD_PATH_DIRECT);
9102 			mutex_enter(SD_MUTEX(un));
9103 
9104 		} else {
9105 			/*
9106 			 * Flush any outstanding writes in NVRAM cache.
9107 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
9108 			 * cmd, it may not work for non-Pluto devices.
9109 			 * SYNCHRONIZE CACHE is not required for removables,
9110 			 * except DVD-RAM drives.
9111 			 *
9112 			 * Also note: because SYNCHRONIZE CACHE is currently
9113 			 * the only command issued here that requires the
9114 			 * drive be powered up, only do the power up before
9115 			 * sending the Sync Cache command. If additional
9116 			 * commands are added which require a powered up
9117 			 * drive, the following sequence may have to change.
9118 			 *
9119 			 * And finally, note that parallel SCSI on SPARC
9120 			 * only issues a Sync Cache to DVD-RAM, a newly
9121 			 * supported device.
9122 			 */
9123 #if defined(__i386) || defined(__amd64)
9124 			if (un->un_f_sync_cache_supported ||
9125 			    un->un_f_dvdram_writable_device == TRUE) {
9126 #else
9127 			if (un->un_f_dvdram_writable_device == TRUE) {
9128 #endif
9129 				mutex_exit(SD_MUTEX(un));
9130 				if (sd_pm_entry(un) == DDI_SUCCESS) {
9131 					rval =
9132 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
9133 					    NULL);
9134 					/* ignore error if not supported */
9135 					if (rval == ENOTSUP) {
9136 						rval = 0;
9137 					} else if (rval != 0) {
9138 						rval = EIO;
9139 					}
9140 					sd_pm_exit(un);
9141 				} else {
9142 					rval = EIO;
9143 				}
9144 				mutex_enter(SD_MUTEX(un));
9145 			}
9146 
9147 			/*
9148 			 * For devices which supports DOOR_LOCK, send an ALLOW
9149 			 * MEDIA REMOVAL command, but don't get upset if it
9150 			 * fails. We need to raise the power of the drive before
9151 			 * we can call sd_send_scsi_DOORLOCK()
9152 			 */
9153 			if (un->un_f_doorlock_supported) {
9154 				mutex_exit(SD_MUTEX(un));
9155 				if (sd_pm_entry(un) == DDI_SUCCESS) {
9156 					rval = sd_send_scsi_DOORLOCK(un,
9157 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
9158 
9159 					sd_pm_exit(un);
9160 					if (ISCD(un) && (rval != 0) &&
9161 					    (nodelay != 0)) {
9162 						rval = ENXIO;
9163 					}
9164 				} else {
9165 					rval = EIO;
9166 				}
9167 				mutex_enter(SD_MUTEX(un));
9168 			}
9169 
9170 			/*
9171 			 * If a device has removable media, invalidate all
9172 			 * parameters related to media, such as geometry,
9173 			 * blocksize, and blockcount.
9174 			 */
9175 			if (un->un_f_has_removable_media) {
9176 				sr_ejected(un);
9177 			}
9178 
9179 			/*
9180 			 * Destroy the cache (if it exists) which was
9181 			 * allocated for the write maps since this is
9182 			 * the last close for this media.
9183 			 */
9184 			if (un->un_wm_cache) {
9185 				/*
9186 				 * Check if there are pending commands.
9187 				 * and if there are give a warning and
9188 				 * do not destroy the cache.
9189 				 */
9190 				if (un->un_ncmds_in_driver > 0) {
9191 					scsi_log(SD_DEVINFO(un),
9192 					    sd_label, CE_WARN,
9193 					    "Unable to clean up memory "
9194 					    "because of pending I/O\n");
9195 				} else {
9196 					kmem_cache_destroy(
9197 					    un->un_wm_cache);
9198 					un->un_wm_cache = NULL;
9199 				}
9200 			}
9201 		}
9202 	}
9203 
9204 	mutex_exit(SD_MUTEX(un));
9205 	sema_v(&un->un_semoclose);
9206 
9207 	if (otyp == OTYP_LYR) {
9208 		mutex_enter(&sd_detach_mutex);
9209 		/*
9210 		 * The detach routine may run when the layer count
9211 		 * drops to zero.
9212 		 */
9213 		un->un_layer_count--;
9214 		mutex_exit(&sd_detach_mutex);
9215 	}
9216 
9217 	return (rval);
9218 }
9219 
9220 
9221 /*
9222  *    Function: sd_ready_and_valid
9223  *
9224  * Description: Test if device is ready and has a valid geometry.
9225  *
9226  *   Arguments: dev - device number
9227  *		un  - driver soft state (unit) structure
9228  *
9229  * Return Code: SD_READY_VALID		ready and valid label
9230  *		SD_NOT_READY_VALID	not ready, no label
9231  *		SD_RESERVED_BY_OTHERS	reservation conflict
9232  *
9233  *     Context: Never called at interrupt context.
9234  */
9235 
9236 static int
9237 sd_ready_and_valid(struct sd_lun *un)
9238 {
9239 	struct sd_errstats	*stp;
9240 	uint64_t		capacity;
9241 	uint_t			lbasize;
9242 	int			rval = SD_READY_VALID;
9243 	char			name_str[48];
9244 	int			is_valid;
9245 
9246 	ASSERT(un != NULL);
9247 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9248 
9249 	mutex_enter(SD_MUTEX(un));
9250 	/*
9251 	 * If a device has removable media, we must check if media is
9252 	 * ready when checking if this device is ready and valid.
9253 	 */
9254 	if (un->un_f_has_removable_media) {
9255 		mutex_exit(SD_MUTEX(un));
9256 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
9257 			rval = SD_NOT_READY_VALID;
9258 			mutex_enter(SD_MUTEX(un));
9259 			goto done;
9260 		}
9261 
9262 		is_valid = SD_IS_VALID_LABEL(un);
9263 		mutex_enter(SD_MUTEX(un));
9264 		if (!is_valid ||
9265 		    (un->un_f_blockcount_is_valid == FALSE) ||
9266 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
9267 
9268 			/* capacity has to be read every open. */
9269 			mutex_exit(SD_MUTEX(un));
9270 			if (sd_send_scsi_READ_CAPACITY(un, &capacity,
9271 			    &lbasize, SD_PATH_DIRECT) != 0) {
9272 				cmlb_invalidate(un->un_cmlbhandle,
9273 				    (void *)SD_PATH_DIRECT);
9274 				mutex_enter(SD_MUTEX(un));
9275 				rval = SD_NOT_READY_VALID;
9276 				goto done;
9277 			} else {
9278 				mutex_enter(SD_MUTEX(un));
9279 				sd_update_block_info(un, lbasize, capacity);
9280 			}
9281 		}
9282 
9283 		/*
9284 		 * Check if the media in the device is writable or not.
9285 		 */
9286 		if (!is_valid && ISCD(un)) {
9287 			sd_check_for_writable_cd(un, SD_PATH_DIRECT);
9288 		}
9289 
9290 	} else {
9291 		/*
9292 		 * Do a test unit ready to clear any unit attention from non-cd
9293 		 * devices.
9294 		 */
9295 		mutex_exit(SD_MUTEX(un));
9296 		(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9297 		mutex_enter(SD_MUTEX(un));
9298 	}
9299 
9300 
9301 	/*
9302 	 * If this is a non 512 block device, allocate space for
9303 	 * the wmap cache. This is being done here since every time
9304 	 * a media is changed this routine will be called and the
9305 	 * block size is a function of media rather than device.
9306 	 */
9307 	if (un->un_f_non_devbsize_supported && NOT_DEVBSIZE(un)) {
9308 		if (!(un->un_wm_cache)) {
9309 			(void) snprintf(name_str, sizeof (name_str),
9310 			    "%s%d_cache",
9311 			    ddi_driver_name(SD_DEVINFO(un)),
9312 			    ddi_get_instance(SD_DEVINFO(un)));
9313 			un->un_wm_cache = kmem_cache_create(
9314 			    name_str, sizeof (struct sd_w_map),
9315 			    8, sd_wm_cache_constructor,
9316 			    sd_wm_cache_destructor, NULL,
9317 			    (void *)un, NULL, 0);
9318 			if (!(un->un_wm_cache)) {
9319 					rval = ENOMEM;
9320 					goto done;
9321 			}
9322 		}
9323 	}
9324 
9325 	if (un->un_state == SD_STATE_NORMAL) {
9326 		/*
9327 		 * If the target is not yet ready here (defined by a TUR
9328 		 * failure), invalidate the geometry and print an 'offline'
9329 		 * message. This is a legacy message, as the state of the
9330 		 * target is not actually changed to SD_STATE_OFFLINE.
9331 		 *
9332 		 * If the TUR fails for EACCES (Reservation Conflict),
9333 		 * SD_RESERVED_BY_OTHERS will be returned to indicate
9334 		 * reservation conflict. If the TUR fails for other
9335 		 * reasons, SD_NOT_READY_VALID will be returned.
9336 		 */
9337 		int err;
9338 
9339 		mutex_exit(SD_MUTEX(un));
9340 		err = sd_send_scsi_TEST_UNIT_READY(un, 0);
9341 		mutex_enter(SD_MUTEX(un));
9342 
9343 		if (err != 0) {
9344 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9345 			    "offline or reservation conflict\n");
9346 			mutex_exit(SD_MUTEX(un));
9347 			cmlb_invalidate(un->un_cmlbhandle,
9348 			    (void *)SD_PATH_DIRECT);
9349 			mutex_enter(SD_MUTEX(un));
9350 			if (err == EACCES) {
9351 				rval = SD_RESERVED_BY_OTHERS;
9352 			} else {
9353 				rval = SD_NOT_READY_VALID;
9354 			}
9355 			goto done;
9356 		}
9357 	}
9358 
9359 	if (un->un_f_format_in_progress == FALSE) {
9360 		mutex_exit(SD_MUTEX(un));
9361 		if (cmlb_validate(un->un_cmlbhandle, 0,
9362 		    (void *)SD_PATH_DIRECT) != 0) {
9363 			rval = SD_NOT_READY_VALID;
9364 			mutex_enter(SD_MUTEX(un));
9365 			goto done;
9366 		}
9367 		if (un->un_f_pkstats_enabled) {
9368 			sd_set_pstats(un);
9369 			SD_TRACE(SD_LOG_IO_PARTITION, un,
9370 			    "sd_ready_and_valid: un:0x%p pstats created and "
9371 			    "set\n", un);
9372 		}
9373 		mutex_enter(SD_MUTEX(un));
9374 	}
9375 
9376 	/*
9377 	 * If this device supports DOOR_LOCK command, try and send
9378 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
9379 	 * if it fails. For a CD, however, it is an error
9380 	 */
9381 	if (un->un_f_doorlock_supported) {
9382 		mutex_exit(SD_MUTEX(un));
9383 		if ((sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
9384 		    SD_PATH_DIRECT) != 0) && ISCD(un)) {
9385 			rval = SD_NOT_READY_VALID;
9386 			mutex_enter(SD_MUTEX(un));
9387 			goto done;
9388 		}
9389 		mutex_enter(SD_MUTEX(un));
9390 	}
9391 
9392 	/* The state has changed, inform the media watch routines */
9393 	un->un_mediastate = DKIO_INSERTED;
9394 	cv_broadcast(&un->un_state_cv);
9395 	rval = SD_READY_VALID;
9396 
9397 done:
9398 
9399 	/*
9400 	 * Initialize the capacity kstat value, if no media previously
9401 	 * (capacity kstat is 0) and a media has been inserted
9402 	 * (un_blockcount > 0).
9403 	 */
9404 	if (un->un_errstats != NULL) {
9405 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
9406 		if ((stp->sd_capacity.value.ui64 == 0) &&
9407 		    (un->un_f_blockcount_is_valid == TRUE)) {
9408 			stp->sd_capacity.value.ui64 =
9409 			    (uint64_t)((uint64_t)un->un_blockcount *
9410 			    un->un_sys_blocksize);
9411 		}
9412 	}
9413 
9414 	mutex_exit(SD_MUTEX(un));
9415 	return (rval);
9416 }
9417 
9418 
9419 /*
9420  *    Function: sdmin
9421  *
9422  * Description: Routine to limit the size of a data transfer. Used in
9423  *		conjunction with physio(9F).
9424  *
9425  *   Arguments: bp - pointer to the indicated buf(9S) struct.
9426  *
9427  *     Context: Kernel thread context.
9428  */
9429 
9430 static void
9431 sdmin(struct buf *bp)
9432 {
9433 	struct sd_lun	*un;
9434 	int		instance;
9435 
9436 	instance = SDUNIT(bp->b_edev);
9437 
9438 	un = ddi_get_soft_state(sd_state, instance);
9439 	ASSERT(un != NULL);
9440 
9441 	if (bp->b_bcount > un->un_max_xfer_size) {
9442 		bp->b_bcount = un->un_max_xfer_size;
9443 	}
9444 }
9445 
9446 
9447 /*
9448  *    Function: sdread
9449  *
9450  * Description: Driver's read(9e) entry point function.
9451  *
9452  *   Arguments: dev   - device number
9453  *		uio   - structure pointer describing where data is to be stored
9454  *			in user's space
9455  *		cred_p  - user credential pointer
9456  *
9457  * Return Code: ENXIO
9458  *		EIO
9459  *		EINVAL
9460  *		value returned by physio
9461  *
9462  *     Context: Kernel thread context.
9463  */
9464 /* ARGSUSED */
9465 static int
9466 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
9467 {
9468 	struct sd_lun	*un = NULL;
9469 	int		secmask;
9470 	int		err;
9471 
9472 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9473 		return (ENXIO);
9474 	}
9475 
9476 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9477 
9478 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9479 		mutex_enter(SD_MUTEX(un));
9480 		/*
9481 		 * Because the call to sd_ready_and_valid will issue I/O we
9482 		 * must wait here if either the device is suspended or
9483 		 * if it's power level is changing.
9484 		 */
9485 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9486 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9487 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9488 		}
9489 		un->un_ncmds_in_driver++;
9490 		mutex_exit(SD_MUTEX(un));
9491 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9492 			mutex_enter(SD_MUTEX(un));
9493 			un->un_ncmds_in_driver--;
9494 			ASSERT(un->un_ncmds_in_driver >= 0);
9495 			mutex_exit(SD_MUTEX(un));
9496 			return (EIO);
9497 		}
9498 		mutex_enter(SD_MUTEX(un));
9499 		un->un_ncmds_in_driver--;
9500 		ASSERT(un->un_ncmds_in_driver >= 0);
9501 		mutex_exit(SD_MUTEX(un));
9502 	}
9503 
9504 	/*
9505 	 * Read requests are restricted to multiples of the system block size.
9506 	 */
9507 	secmask = un->un_sys_blocksize - 1;
9508 
9509 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9510 		SD_ERROR(SD_LOG_READ_WRITE, un,
9511 		    "sdread: file offset not modulo %d\n",
9512 		    un->un_sys_blocksize);
9513 		err = EINVAL;
9514 	} else if (uio->uio_iov->iov_len & (secmask)) {
9515 		SD_ERROR(SD_LOG_READ_WRITE, un,
9516 		    "sdread: transfer length not modulo %d\n",
9517 		    un->un_sys_blocksize);
9518 		err = EINVAL;
9519 	} else {
9520 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
9521 	}
9522 	return (err);
9523 }
9524 
9525 
9526 /*
9527  *    Function: sdwrite
9528  *
9529  * Description: Driver's write(9e) entry point function.
9530  *
9531  *   Arguments: dev   - device number
9532  *		uio   - structure pointer describing where data is stored in
9533  *			user's space
9534  *		cred_p  - user credential pointer
9535  *
9536  * Return Code: ENXIO
9537  *		EIO
9538  *		EINVAL
9539  *		value returned by physio
9540  *
9541  *     Context: Kernel thread context.
9542  */
9543 /* ARGSUSED */
9544 static int
9545 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
9546 {
9547 	struct sd_lun	*un = NULL;
9548 	int		secmask;
9549 	int		err;
9550 
9551 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9552 		return (ENXIO);
9553 	}
9554 
9555 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9556 
9557 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9558 		mutex_enter(SD_MUTEX(un));
9559 		/*
9560 		 * Because the call to sd_ready_and_valid will issue I/O we
9561 		 * must wait here if either the device is suspended or
9562 		 * if it's power level is changing.
9563 		 */
9564 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9565 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9566 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9567 		}
9568 		un->un_ncmds_in_driver++;
9569 		mutex_exit(SD_MUTEX(un));
9570 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9571 			mutex_enter(SD_MUTEX(un));
9572 			un->un_ncmds_in_driver--;
9573 			ASSERT(un->un_ncmds_in_driver >= 0);
9574 			mutex_exit(SD_MUTEX(un));
9575 			return (EIO);
9576 		}
9577 		mutex_enter(SD_MUTEX(un));
9578 		un->un_ncmds_in_driver--;
9579 		ASSERT(un->un_ncmds_in_driver >= 0);
9580 		mutex_exit(SD_MUTEX(un));
9581 	}
9582 
9583 	/*
9584 	 * Write requests are restricted to multiples of the system block size.
9585 	 */
9586 	secmask = un->un_sys_blocksize - 1;
9587 
9588 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9589 		SD_ERROR(SD_LOG_READ_WRITE, un,
9590 		    "sdwrite: file offset not modulo %d\n",
9591 		    un->un_sys_blocksize);
9592 		err = EINVAL;
9593 	} else if (uio->uio_iov->iov_len & (secmask)) {
9594 		SD_ERROR(SD_LOG_READ_WRITE, un,
9595 		    "sdwrite: transfer length not modulo %d\n",
9596 		    un->un_sys_blocksize);
9597 		err = EINVAL;
9598 	} else {
9599 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
9600 	}
9601 	return (err);
9602 }
9603 
9604 
9605 /*
9606  *    Function: sdaread
9607  *
9608  * Description: Driver's aread(9e) entry point function.
9609  *
9610  *   Arguments: dev   - device number
9611  *		aio   - structure pointer describing where data is to be stored
9612  *		cred_p  - user credential pointer
9613  *
9614  * Return Code: ENXIO
9615  *		EIO
9616  *		EINVAL
9617  *		value returned by aphysio
9618  *
9619  *     Context: Kernel thread context.
9620  */
9621 /* ARGSUSED */
9622 static int
9623 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
9624 {
9625 	struct sd_lun	*un = NULL;
9626 	struct uio	*uio = aio->aio_uio;
9627 	int		secmask;
9628 	int		err;
9629 
9630 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9631 		return (ENXIO);
9632 	}
9633 
9634 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9635 
9636 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9637 		mutex_enter(SD_MUTEX(un));
9638 		/*
9639 		 * Because the call to sd_ready_and_valid will issue I/O we
9640 		 * must wait here if either the device is suspended or
9641 		 * if it's power level is changing.
9642 		 */
9643 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9644 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9645 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9646 		}
9647 		un->un_ncmds_in_driver++;
9648 		mutex_exit(SD_MUTEX(un));
9649 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9650 			mutex_enter(SD_MUTEX(un));
9651 			un->un_ncmds_in_driver--;
9652 			ASSERT(un->un_ncmds_in_driver >= 0);
9653 			mutex_exit(SD_MUTEX(un));
9654 			return (EIO);
9655 		}
9656 		mutex_enter(SD_MUTEX(un));
9657 		un->un_ncmds_in_driver--;
9658 		ASSERT(un->un_ncmds_in_driver >= 0);
9659 		mutex_exit(SD_MUTEX(un));
9660 	}
9661 
9662 	/*
9663 	 * Read requests are restricted to multiples of the system block size.
9664 	 */
9665 	secmask = un->un_sys_blocksize - 1;
9666 
9667 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9668 		SD_ERROR(SD_LOG_READ_WRITE, un,
9669 		    "sdaread: file offset not modulo %d\n",
9670 		    un->un_sys_blocksize);
9671 		err = EINVAL;
9672 	} else if (uio->uio_iov->iov_len & (secmask)) {
9673 		SD_ERROR(SD_LOG_READ_WRITE, un,
9674 		    "sdaread: transfer length not modulo %d\n",
9675 		    un->un_sys_blocksize);
9676 		err = EINVAL;
9677 	} else {
9678 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
9679 	}
9680 	return (err);
9681 }
9682 
9683 
9684 /*
9685  *    Function: sdawrite
9686  *
9687  * Description: Driver's awrite(9e) entry point function.
9688  *
9689  *   Arguments: dev   - device number
9690  *		aio   - structure pointer describing where data is stored
9691  *		cred_p  - user credential pointer
9692  *
9693  * Return Code: ENXIO
9694  *		EIO
9695  *		EINVAL
9696  *		value returned by aphysio
9697  *
9698  *     Context: Kernel thread context.
9699  */
9700 /* ARGSUSED */
9701 static int
9702 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
9703 {
9704 	struct sd_lun	*un = NULL;
9705 	struct uio	*uio = aio->aio_uio;
9706 	int		secmask;
9707 	int		err;
9708 
9709 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9710 		return (ENXIO);
9711 	}
9712 
9713 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9714 
9715 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9716 		mutex_enter(SD_MUTEX(un));
9717 		/*
9718 		 * Because the call to sd_ready_and_valid will issue I/O we
9719 		 * must wait here if either the device is suspended or
9720 		 * if it's power level is changing.
9721 		 */
9722 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9723 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9724 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9725 		}
9726 		un->un_ncmds_in_driver++;
9727 		mutex_exit(SD_MUTEX(un));
9728 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9729 			mutex_enter(SD_MUTEX(un));
9730 			un->un_ncmds_in_driver--;
9731 			ASSERT(un->un_ncmds_in_driver >= 0);
9732 			mutex_exit(SD_MUTEX(un));
9733 			return (EIO);
9734 		}
9735 		mutex_enter(SD_MUTEX(un));
9736 		un->un_ncmds_in_driver--;
9737 		ASSERT(un->un_ncmds_in_driver >= 0);
9738 		mutex_exit(SD_MUTEX(un));
9739 	}
9740 
9741 	/*
9742 	 * Write requests are restricted to multiples of the system block size.
9743 	 */
9744 	secmask = un->un_sys_blocksize - 1;
9745 
9746 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9747 		SD_ERROR(SD_LOG_READ_WRITE, un,
9748 		    "sdawrite: file offset not modulo %d\n",
9749 		    un->un_sys_blocksize);
9750 		err = EINVAL;
9751 	} else if (uio->uio_iov->iov_len & (secmask)) {
9752 		SD_ERROR(SD_LOG_READ_WRITE, un,
9753 		    "sdawrite: transfer length not modulo %d\n",
9754 		    un->un_sys_blocksize);
9755 		err = EINVAL;
9756 	} else {
9757 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
9758 	}
9759 	return (err);
9760 }
9761 
9762 
9763 
9764 
9765 
9766 /*
9767  * Driver IO processing follows the following sequence:
9768  *
9769  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
9770  *         |                |                     ^
9771  *         v                v                     |
9772  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
9773  *         |                |                     |                   |
9774  *         v                |                     |                   |
9775  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
9776  *         |                |                     ^                   ^
9777  *         v                v                     |                   |
9778  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
9779  *         |                |                     |                   |
9780  *     +---+                |                     +------------+      +-------+
9781  *     |                    |                                  |              |
9782  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
9783  *     |                    v                                  |              |
9784  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
9785  *     |                    |                                  ^              |
9786  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
9787  *     |                    v                                  |              |
9788  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
9789  *     |                    |                                  ^              |
9790  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
9791  *     |                    v                                  |              |
9792  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
9793  *     |                    |                                  ^              |
9794  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
9795  *     |                    v                                  |              |
9796  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
9797  *     |                    |                                  ^              |
9798  *     |                    |                                  |              |
9799  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
9800  *                          |                           ^
9801  *                          v                           |
9802  *                   sd_core_iostart()                  |
9803  *                          |                           |
9804  *                          |                           +------>(*destroypkt)()
9805  *                          +-> sd_start_cmds() <-+     |           |
9806  *                          |                     |     |           v
9807  *                          |                     |     |  scsi_destroy_pkt(9F)
9808  *                          |                     |     |
9809  *                          +->(*initpkt)()       +- sdintr()
9810  *                          |  |                        |  |
9811  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
9812  *                          |  +-> scsi_setup_cdb(9F)   |
9813  *                          |                           |
9814  *                          +--> scsi_transport(9F)     |
9815  *                                     |                |
9816  *                                     +----> SCSA ---->+
9817  *
9818  *
9819  * This code is based upon the following presumtions:
9820  *
9821  *   - iostart and iodone functions operate on buf(9S) structures. These
9822  *     functions perform the necessary operations on the buf(9S) and pass
9823  *     them along to the next function in the chain by using the macros
9824  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
9825  *     (for iodone side functions).
9826  *
9827  *   - The iostart side functions may sleep. The iodone side functions
9828  *     are called under interrupt context and may NOT sleep. Therefore
9829  *     iodone side functions also may not call iostart side functions.
9830  *     (NOTE: iostart side functions should NOT sleep for memory, as
9831  *     this could result in deadlock.)
9832  *
9833  *   - An iostart side function may call its corresponding iodone side
9834  *     function directly (if necessary).
9835  *
9836  *   - In the event of an error, an iostart side function can return a buf(9S)
9837  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
9838  *     b_error in the usual way of course).
9839  *
9840  *   - The taskq mechanism may be used by the iodone side functions to dispatch
9841  *     requests to the iostart side functions.  The iostart side functions in
9842  *     this case would be called under the context of a taskq thread, so it's
9843  *     OK for them to block/sleep/spin in this case.
9844  *
9845  *   - iostart side functions may allocate "shadow" buf(9S) structs and
9846  *     pass them along to the next function in the chain.  The corresponding
9847  *     iodone side functions must coalesce the "shadow" bufs and return
9848  *     the "original" buf to the next higher layer.
9849  *
9850  *   - The b_private field of the buf(9S) struct holds a pointer to
9851  *     an sd_xbuf struct, which contains information needed to
9852  *     construct the scsi_pkt for the command.
9853  *
9854  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
9855  *     layer must acquire & release the SD_MUTEX(un) as needed.
9856  */
9857 
9858 
9859 /*
9860  * Create taskq for all targets in the system. This is created at
9861  * _init(9E) and destroyed at _fini(9E).
9862  *
9863  * Note: here we set the minalloc to a reasonably high number to ensure that
9864  * we will have an adequate supply of task entries available at interrupt time.
9865  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
9866  * sd_create_taskq().  Since we do not want to sleep for allocations at
9867  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
9868  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
9869  * requests any one instant in time.
9870  */
9871 #define	SD_TASKQ_NUMTHREADS	8
9872 #define	SD_TASKQ_MINALLOC	256
9873 #define	SD_TASKQ_MAXALLOC	256
9874 
9875 static taskq_t	*sd_tq = NULL;
9876 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
9877 
9878 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
9879 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
9880 
9881 /*
9882  * The following task queue is being created for the write part of
9883  * read-modify-write of non-512 block size devices.
9884  * Limit the number of threads to 1 for now. This number has been choosen
9885  * considering the fact that it applies only to dvd ram drives/MO drives
9886  * currently. Performance for which is not main criteria at this stage.
9887  * Note: It needs to be explored if we can use a single taskq in future
9888  */
9889 #define	SD_WMR_TASKQ_NUMTHREADS	1
9890 static taskq_t	*sd_wmr_tq = NULL;
9891 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
9892 
9893 /*
9894  *    Function: sd_taskq_create
9895  *
9896  * Description: Create taskq thread(s) and preallocate task entries
9897  *
9898  * Return Code: Returns a pointer to the allocated taskq_t.
9899  *
9900  *     Context: Can sleep. Requires blockable context.
9901  *
9902  *       Notes: - The taskq() facility currently is NOT part of the DDI.
9903  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
9904  *		- taskq_create() will block for memory, also it will panic
9905  *		  if it cannot create the requested number of threads.
9906  *		- Currently taskq_create() creates threads that cannot be
9907  *		  swapped.
9908  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
9909  *		  supply of taskq entries at interrupt time (ie, so that we
9910  *		  do not have to sleep for memory)
9911  */
9912 
9913 static void
9914 sd_taskq_create(void)
9915 {
9916 	char	taskq_name[TASKQ_NAMELEN];
9917 
9918 	ASSERT(sd_tq == NULL);
9919 	ASSERT(sd_wmr_tq == NULL);
9920 
9921 	(void) snprintf(taskq_name, sizeof (taskq_name),
9922 	    "%s_drv_taskq", sd_label);
9923 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
9924 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
9925 	    TASKQ_PREPOPULATE));
9926 
9927 	(void) snprintf(taskq_name, sizeof (taskq_name),
9928 	    "%s_rmw_taskq", sd_label);
9929 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
9930 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
9931 	    TASKQ_PREPOPULATE));
9932 }
9933 
9934 
9935 /*
9936  *    Function: sd_taskq_delete
9937  *
9938  * Description: Complementary cleanup routine for sd_taskq_create().
9939  *
9940  *     Context: Kernel thread context.
9941  */
9942 
9943 static void
9944 sd_taskq_delete(void)
9945 {
9946 	ASSERT(sd_tq != NULL);
9947 	ASSERT(sd_wmr_tq != NULL);
9948 	taskq_destroy(sd_tq);
9949 	taskq_destroy(sd_wmr_tq);
9950 	sd_tq = NULL;
9951 	sd_wmr_tq = NULL;
9952 }
9953 
9954 
9955 /*
9956  *    Function: sdstrategy
9957  *
9958  * Description: Driver's strategy (9E) entry point function.
9959  *
9960  *   Arguments: bp - pointer to buf(9S)
9961  *
9962  * Return Code: Always returns zero
9963  *
9964  *     Context: Kernel thread context.
9965  */
9966 
9967 static int
9968 sdstrategy(struct buf *bp)
9969 {
9970 	struct sd_lun *un;
9971 
9972 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
9973 	if (un == NULL) {
9974 		bioerror(bp, EIO);
9975 		bp->b_resid = bp->b_bcount;
9976 		biodone(bp);
9977 		return (0);
9978 	}
9979 	/* As was done in the past, fail new cmds. if state is dumping. */
9980 	if (un->un_state == SD_STATE_DUMPING) {
9981 		bioerror(bp, ENXIO);
9982 		bp->b_resid = bp->b_bcount;
9983 		biodone(bp);
9984 		return (0);
9985 	}
9986 
9987 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9988 
9989 	/*
9990 	 * Commands may sneak in while we released the mutex in
9991 	 * DDI_SUSPEND, we should block new commands. However, old
9992 	 * commands that are still in the driver at this point should
9993 	 * still be allowed to drain.
9994 	 */
9995 	mutex_enter(SD_MUTEX(un));
9996 	/*
9997 	 * Must wait here if either the device is suspended or
9998 	 * if it's power level is changing.
9999 	 */
10000 	while ((un->un_state == SD_STATE_SUSPENDED) ||
10001 	    (un->un_state == SD_STATE_PM_CHANGING)) {
10002 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10003 	}
10004 
10005 	un->un_ncmds_in_driver++;
10006 
10007 	/*
10008 	 * atapi: Since we are running the CD for now in PIO mode we need to
10009 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
10010 	 * the HBA's init_pkt routine.
10011 	 */
10012 	if (un->un_f_cfg_is_atapi == TRUE) {
10013 		mutex_exit(SD_MUTEX(un));
10014 		bp_mapin(bp);
10015 		mutex_enter(SD_MUTEX(un));
10016 	}
10017 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
10018 	    un->un_ncmds_in_driver);
10019 
10020 	mutex_exit(SD_MUTEX(un));
10021 
10022 	/*
10023 	 * This will (eventually) allocate the sd_xbuf area and
10024 	 * call sd_xbuf_strategy().  We just want to return the
10025 	 * result of ddi_xbuf_qstrategy so that we have an opt-
10026 	 * imized tail call which saves us a stack frame.
10027 	 */
10028 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
10029 }
10030 
10031 
10032 /*
10033  *    Function: sd_xbuf_strategy
10034  *
10035  * Description: Function for initiating IO operations via the
10036  *		ddi_xbuf_qstrategy() mechanism.
10037  *
10038  *     Context: Kernel thread context.
10039  */
10040 
10041 static void
10042 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
10043 {
10044 	struct sd_lun *un = arg;
10045 
10046 	ASSERT(bp != NULL);
10047 	ASSERT(xp != NULL);
10048 	ASSERT(un != NULL);
10049 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10050 
10051 	/*
10052 	 * Initialize the fields in the xbuf and save a pointer to the
10053 	 * xbuf in bp->b_private.
10054 	 */
10055 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
10056 
10057 	/* Send the buf down the iostart chain */
10058 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
10059 }
10060 
10061 
10062 /*
10063  *    Function: sd_xbuf_init
10064  *
10065  * Description: Prepare the given sd_xbuf struct for use.
10066  *
10067  *   Arguments: un - ptr to softstate
10068  *		bp - ptr to associated buf(9S)
10069  *		xp - ptr to associated sd_xbuf
10070  *		chain_type - IO chain type to use:
10071  *			SD_CHAIN_NULL
10072  *			SD_CHAIN_BUFIO
10073  *			SD_CHAIN_USCSI
10074  *			SD_CHAIN_DIRECT
10075  *			SD_CHAIN_DIRECT_PRIORITY
10076  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
10077  *			initialization; may be NULL if none.
10078  *
10079  *     Context: Kernel thread context
10080  */
10081 
10082 static void
10083 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
10084 	uchar_t chain_type, void *pktinfop)
10085 {
10086 	int index;
10087 
10088 	ASSERT(un != NULL);
10089 	ASSERT(bp != NULL);
10090 	ASSERT(xp != NULL);
10091 
10092 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
10093 	    bp, chain_type);
10094 
10095 	xp->xb_un	= un;
10096 	xp->xb_pktp	= NULL;
10097 	xp->xb_pktinfo	= pktinfop;
10098 	xp->xb_private	= bp->b_private;
10099 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
10100 
10101 	/*
10102 	 * Set up the iostart and iodone chain indexes in the xbuf, based
10103 	 * upon the specified chain type to use.
10104 	 */
10105 	switch (chain_type) {
10106 	case SD_CHAIN_NULL:
10107 		/*
10108 		 * Fall thru to just use the values for the buf type, even
10109 		 * tho for the NULL chain these values will never be used.
10110 		 */
10111 		/* FALLTHRU */
10112 	case SD_CHAIN_BUFIO:
10113 		index = un->un_buf_chain_type;
10114 		break;
10115 	case SD_CHAIN_USCSI:
10116 		index = un->un_uscsi_chain_type;
10117 		break;
10118 	case SD_CHAIN_DIRECT:
10119 		index = un->un_direct_chain_type;
10120 		break;
10121 	case SD_CHAIN_DIRECT_PRIORITY:
10122 		index = un->un_priority_chain_type;
10123 		break;
10124 	default:
10125 		/* We're really broken if we ever get here... */
10126 		panic("sd_xbuf_init: illegal chain type!");
10127 		/*NOTREACHED*/
10128 	}
10129 
10130 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
10131 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
10132 
10133 	/*
10134 	 * It might be a bit easier to simply bzero the entire xbuf above,
10135 	 * but it turns out that since we init a fair number of members anyway,
10136 	 * we save a fair number cycles by doing explicit assignment of zero.
10137 	 */
10138 	xp->xb_pkt_flags	= 0;
10139 	xp->xb_dma_resid	= 0;
10140 	xp->xb_retry_count	= 0;
10141 	xp->xb_victim_retry_count = 0;
10142 	xp->xb_ua_retry_count	= 0;
10143 	xp->xb_sense_bp		= NULL;
10144 	xp->xb_sense_status	= 0;
10145 	xp->xb_sense_state	= 0;
10146 	xp->xb_sense_resid	= 0;
10147 
10148 	bp->b_private	= xp;
10149 	bp->b_flags	&= ~(B_DONE | B_ERROR);
10150 	bp->b_resid	= 0;
10151 	bp->av_forw	= NULL;
10152 	bp->av_back	= NULL;
10153 	bioerror(bp, 0);
10154 
10155 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
10156 }
10157 
10158 
10159 /*
10160  *    Function: sd_uscsi_strategy
10161  *
10162  * Description: Wrapper for calling into the USCSI chain via physio(9F)
10163  *
10164  *   Arguments: bp - buf struct ptr
10165  *
10166  * Return Code: Always returns 0
10167  *
10168  *     Context: Kernel thread context
10169  */
10170 
10171 static int
10172 sd_uscsi_strategy(struct buf *bp)
10173 {
10174 	struct sd_lun		*un;
10175 	struct sd_uscsi_info	*uip;
10176 	struct sd_xbuf		*xp;
10177 	uchar_t			chain_type;
10178 
10179 	ASSERT(bp != NULL);
10180 
10181 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
10182 	if (un == NULL) {
10183 		bioerror(bp, EIO);
10184 		bp->b_resid = bp->b_bcount;
10185 		biodone(bp);
10186 		return (0);
10187 	}
10188 
10189 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10190 
10191 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
10192 
10193 	mutex_enter(SD_MUTEX(un));
10194 	/*
10195 	 * atapi: Since we are running the CD for now in PIO mode we need to
10196 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
10197 	 * the HBA's init_pkt routine.
10198 	 */
10199 	if (un->un_f_cfg_is_atapi == TRUE) {
10200 		mutex_exit(SD_MUTEX(un));
10201 		bp_mapin(bp);
10202 		mutex_enter(SD_MUTEX(un));
10203 	}
10204 	un->un_ncmds_in_driver++;
10205 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
10206 	    un->un_ncmds_in_driver);
10207 	mutex_exit(SD_MUTEX(un));
10208 
10209 	/*
10210 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
10211 	 */
10212 	ASSERT(bp->b_private != NULL);
10213 	uip = (struct sd_uscsi_info *)bp->b_private;
10214 
10215 	switch (uip->ui_flags) {
10216 	case SD_PATH_DIRECT:
10217 		chain_type = SD_CHAIN_DIRECT;
10218 		break;
10219 	case SD_PATH_DIRECT_PRIORITY:
10220 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
10221 		break;
10222 	default:
10223 		chain_type = SD_CHAIN_USCSI;
10224 		break;
10225 	}
10226 
10227 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
10228 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
10229 
10230 	/* Use the index obtained within xbuf_init */
10231 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
10232 
10233 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
10234 
10235 	return (0);
10236 }
10237 
10238 /*
10239  *    Function: sd_send_scsi_cmd
10240  *
10241  * Description: Runs a USCSI command for user (when called thru sdioctl),
10242  *		or for the driver
10243  *
10244  *   Arguments: dev - the dev_t for the device
10245  *		incmd - ptr to a valid uscsi_cmd struct
10246  *		flag - bit flag, indicating open settings, 32/64 bit type
10247  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
10248  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
10249  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
10250  *			to use the USCSI "direct" chain and bypass the normal
10251  *			command waitq.
10252  *
10253  * Return Code: 0 -  successful completion of the given command
10254  *		EIO - scsi_uscsi_handle_command() failed
10255  *		ENXIO  - soft state not found for specified dev
10256  *		EINVAL
10257  *		EFAULT - copyin/copyout error
10258  *		return code of scsi_uscsi_handle_command():
10259  *			EIO
10260  *			ENXIO
10261  *			EACCES
10262  *
10263  *     Context: Waits for command to complete. Can sleep.
10264  */
10265 
10266 static int
10267 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
10268 	enum uio_seg dataspace, int path_flag)
10269 {
10270 	struct sd_uscsi_info	*uip;
10271 	struct uscsi_cmd	*uscmd;
10272 	struct sd_lun	*un;
10273 	int	format = 0;
10274 	int	rval;
10275 
10276 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
10277 	if (un == NULL) {
10278 		return (ENXIO);
10279 	}
10280 
10281 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10282 
10283 #ifdef SDDEBUG
10284 	switch (dataspace) {
10285 	case UIO_USERSPACE:
10286 		SD_TRACE(SD_LOG_IO, un,
10287 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_USERSPACE\n", un);
10288 		break;
10289 	case UIO_SYSSPACE:
10290 		SD_TRACE(SD_LOG_IO, un,
10291 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_SYSSPACE\n", un);
10292 		break;
10293 	default:
10294 		SD_TRACE(SD_LOG_IO, un,
10295 		    "sd_send_scsi_cmd: entry: un:0x%p UNEXPECTED SPACE\n", un);
10296 		break;
10297 	}
10298 #endif
10299 
10300 	rval = scsi_uscsi_alloc_and_copyin((intptr_t)incmd, flag,
10301 	    SD_ADDRESS(un), &uscmd);
10302 	if (rval != 0) {
10303 		SD_TRACE(SD_LOG_IO, un, "sd_sense_scsi_cmd: "
10304 		    "scsi_uscsi_alloc_and_copyin failed\n", un);
10305 		return (rval);
10306 	}
10307 
10308 	if ((uscmd->uscsi_cdb != NULL) &&
10309 	    (uscmd->uscsi_cdb[0] == SCMD_FORMAT)) {
10310 		mutex_enter(SD_MUTEX(un));
10311 		un->un_f_format_in_progress = TRUE;
10312 		mutex_exit(SD_MUTEX(un));
10313 		format = 1;
10314 	}
10315 
10316 	/*
10317 	 * Allocate an sd_uscsi_info struct and fill it with the info
10318 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
10319 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
10320 	 * since we allocate the buf here in this function, we do not
10321 	 * need to preserve the prior contents of b_private.
10322 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
10323 	 */
10324 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
10325 	uip->ui_flags = path_flag;
10326 	uip->ui_cmdp = uscmd;
10327 
10328 	/*
10329 	 * Commands sent with priority are intended for error recovery
10330 	 * situations, and do not have retries performed.
10331 	 */
10332 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
10333 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
10334 	}
10335 	uscmd->uscsi_flags &= ~USCSI_NOINTR;
10336 
10337 	rval = scsi_uscsi_handle_cmd(dev, dataspace, uscmd,
10338 	    sd_uscsi_strategy, NULL, uip);
10339 
10340 #ifdef SDDEBUG
10341 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
10342 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
10343 	    uscmd->uscsi_status, uscmd->uscsi_resid);
10344 	if (uscmd->uscsi_bufaddr != NULL) {
10345 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
10346 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
10347 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
10348 		if (dataspace == UIO_SYSSPACE) {
10349 			SD_DUMP_MEMORY(un, SD_LOG_IO,
10350 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
10351 			    uscmd->uscsi_buflen, SD_LOG_HEX);
10352 		}
10353 	}
10354 #endif
10355 
10356 	if (format == 1) {
10357 		mutex_enter(SD_MUTEX(un));
10358 		un->un_f_format_in_progress = FALSE;
10359 		mutex_exit(SD_MUTEX(un));
10360 	}
10361 
10362 	(void) scsi_uscsi_copyout_and_free((intptr_t)incmd, uscmd);
10363 	kmem_free(uip, sizeof (struct sd_uscsi_info));
10364 
10365 	return (rval);
10366 }
10367 
10368 
10369 /*
10370  *    Function: sd_buf_iodone
10371  *
10372  * Description: Frees the sd_xbuf & returns the buf to its originator.
10373  *
10374  *     Context: May be called from interrupt context.
10375  */
10376 /* ARGSUSED */
10377 static void
10378 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
10379 {
10380 	struct sd_xbuf *xp;
10381 
10382 	ASSERT(un != NULL);
10383 	ASSERT(bp != NULL);
10384 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10385 
10386 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
10387 
10388 	xp = SD_GET_XBUF(bp);
10389 	ASSERT(xp != NULL);
10390 
10391 	mutex_enter(SD_MUTEX(un));
10392 
10393 	/*
10394 	 * Grab time when the cmd completed.
10395 	 * This is used for determining if the system has been
10396 	 * idle long enough to make it idle to the PM framework.
10397 	 * This is for lowering the overhead, and therefore improving
10398 	 * performance per I/O operation.
10399 	 */
10400 	un->un_pm_idle_time = ddi_get_time();
10401 
10402 	un->un_ncmds_in_driver--;
10403 	ASSERT(un->un_ncmds_in_driver >= 0);
10404 	SD_INFO(SD_LOG_IO, un, "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
10405 	    un->un_ncmds_in_driver);
10406 
10407 	mutex_exit(SD_MUTEX(un));
10408 
10409 	ddi_xbuf_done(bp, un->un_xbuf_attr);	/* xbuf is gone after this */
10410 	biodone(bp);				/* bp is gone after this */
10411 
10412 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
10413 }
10414 
10415 
10416 /*
10417  *    Function: sd_uscsi_iodone
10418  *
10419  * Description: Frees the sd_xbuf & returns the buf to its originator.
10420  *
10421  *     Context: May be called from interrupt context.
10422  */
10423 /* ARGSUSED */
10424 static void
10425 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
10426 {
10427 	struct sd_xbuf *xp;
10428 
10429 	ASSERT(un != NULL);
10430 	ASSERT(bp != NULL);
10431 
10432 	xp = SD_GET_XBUF(bp);
10433 	ASSERT(xp != NULL);
10434 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10435 
10436 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
10437 
10438 	bp->b_private = xp->xb_private;
10439 
10440 	mutex_enter(SD_MUTEX(un));
10441 
10442 	/*
10443 	 * Grab time when the cmd completed.
10444 	 * This is used for determining if the system has been
10445 	 * idle long enough to make it idle to the PM framework.
10446 	 * This is for lowering the overhead, and therefore improving
10447 	 * performance per I/O operation.
10448 	 */
10449 	un->un_pm_idle_time = ddi_get_time();
10450 
10451 	un->un_ncmds_in_driver--;
10452 	ASSERT(un->un_ncmds_in_driver >= 0);
10453 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
10454 	    un->un_ncmds_in_driver);
10455 
10456 	mutex_exit(SD_MUTEX(un));
10457 
10458 	kmem_free(xp, sizeof (struct sd_xbuf));
10459 	biodone(bp);
10460 
10461 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
10462 }
10463 
10464 
10465 /*
10466  *    Function: sd_mapblockaddr_iostart
10467  *
10468  * Description: Verify request lies withing the partition limits for
10469  *		the indicated minor device.  Issue "overrun" buf if
10470  *		request would exceed partition range.  Converts
10471  *		partition-relative block address to absolute.
10472  *
10473  *     Context: Can sleep
10474  *
10475  *      Issues: This follows what the old code did, in terms of accessing
10476  *		some of the partition info in the unit struct without holding
10477  *		the mutext.  This is a general issue, if the partition info
10478  *		can be altered while IO is in progress... as soon as we send
10479  *		a buf, its partitioning can be invalid before it gets to the
10480  *		device.  Probably the right fix is to move partitioning out
10481  *		of the driver entirely.
10482  */
10483 
10484 static void
10485 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
10486 {
10487 	diskaddr_t	nblocks;	/* #blocks in the given partition */
10488 	daddr_t	blocknum;	/* Block number specified by the buf */
10489 	size_t	requested_nblocks;
10490 	size_t	available_nblocks;
10491 	int	partition;
10492 	diskaddr_t	partition_offset;
10493 	struct sd_xbuf *xp;
10494 
10495 
10496 	ASSERT(un != NULL);
10497 	ASSERT(bp != NULL);
10498 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10499 
10500 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10501 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
10502 
10503 	xp = SD_GET_XBUF(bp);
10504 	ASSERT(xp != NULL);
10505 
10506 	/*
10507 	 * If the geometry is not indicated as valid, attempt to access
10508 	 * the unit & verify the geometry/label. This can be the case for
10509 	 * removable-media devices, of if the device was opened in
10510 	 * NDELAY/NONBLOCK mode.
10511 	 */
10512 	if (!SD_IS_VALID_LABEL(un) &&
10513 	    (sd_ready_and_valid(un) != SD_READY_VALID)) {
10514 		/*
10515 		 * For removable devices it is possible to start an I/O
10516 		 * without a media by opening the device in nodelay mode.
10517 		 * Also for writable CDs there can be many scenarios where
10518 		 * there is no geometry yet but volume manager is trying to
10519 		 * issue a read() just because it can see TOC on the CD. So
10520 		 * do not print a message for removables.
10521 		 */
10522 		if (!un->un_f_has_removable_media) {
10523 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10524 			    "i/o to invalid geometry\n");
10525 		}
10526 		bioerror(bp, EIO);
10527 		bp->b_resid = bp->b_bcount;
10528 		SD_BEGIN_IODONE(index, un, bp);
10529 		return;
10530 	}
10531 
10532 	partition = SDPART(bp->b_edev);
10533 
10534 	nblocks = 0;
10535 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
10536 	    &nblocks, &partition_offset, NULL, NULL, (void *)SD_PATH_DIRECT);
10537 
10538 	/*
10539 	 * blocknum is the starting block number of the request. At this
10540 	 * point it is still relative to the start of the minor device.
10541 	 */
10542 	blocknum = xp->xb_blkno;
10543 
10544 	/*
10545 	 * Legacy: If the starting block number is one past the last block
10546 	 * in the partition, do not set B_ERROR in the buf.
10547 	 */
10548 	if (blocknum == nblocks)  {
10549 		goto error_exit;
10550 	}
10551 
10552 	/*
10553 	 * Confirm that the first block of the request lies within the
10554 	 * partition limits. Also the requested number of bytes must be
10555 	 * a multiple of the system block size.
10556 	 */
10557 	if ((blocknum < 0) || (blocknum >= nblocks) ||
10558 	    ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
10559 		bp->b_flags |= B_ERROR;
10560 		goto error_exit;
10561 	}
10562 
10563 	/*
10564 	 * If the requsted # blocks exceeds the available # blocks, that
10565 	 * is an overrun of the partition.
10566 	 */
10567 	requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
10568 	available_nblocks = (size_t)(nblocks - blocknum);
10569 	ASSERT(nblocks >= blocknum);
10570 
10571 	if (requested_nblocks > available_nblocks) {
10572 		/*
10573 		 * Allocate an "overrun" buf to allow the request to proceed
10574 		 * for the amount of space available in the partition. The
10575 		 * amount not transferred will be added into the b_resid
10576 		 * when the operation is complete. The overrun buf
10577 		 * replaces the original buf here, and the original buf
10578 		 * is saved inside the overrun buf, for later use.
10579 		 */
10580 		size_t resid = SD_SYSBLOCKS2BYTES(un,
10581 		    (offset_t)(requested_nblocks - available_nblocks));
10582 		size_t count = bp->b_bcount - resid;
10583 		/*
10584 		 * Note: count is an unsigned entity thus it'll NEVER
10585 		 * be less than 0 so ASSERT the original values are
10586 		 * correct.
10587 		 */
10588 		ASSERT(bp->b_bcount >= resid);
10589 
10590 		bp = sd_bioclone_alloc(bp, count, blocknum,
10591 			(int (*)(struct buf *)) sd_mapblockaddr_iodone);
10592 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
10593 		ASSERT(xp != NULL);
10594 	}
10595 
10596 	/* At this point there should be no residual for this buf. */
10597 	ASSERT(bp->b_resid == 0);
10598 
10599 	/* Convert the block number to an absolute address. */
10600 	xp->xb_blkno += partition_offset;
10601 
10602 	SD_NEXT_IOSTART(index, un, bp);
10603 
10604 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10605 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
10606 
10607 	return;
10608 
10609 error_exit:
10610 	bp->b_resid = bp->b_bcount;
10611 	SD_BEGIN_IODONE(index, un, bp);
10612 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10613 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
10614 }
10615 
10616 
10617 /*
10618  *    Function: sd_mapblockaddr_iodone
10619  *
10620  * Description: Completion-side processing for partition management.
10621  *
10622  *     Context: May be called under interrupt context
10623  */
10624 
10625 static void
10626 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
10627 {
10628 	/* int	partition; */	/* Not used, see below. */
10629 	ASSERT(un != NULL);
10630 	ASSERT(bp != NULL);
10631 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10632 
10633 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10634 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
10635 
10636 	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
10637 		/*
10638 		 * We have an "overrun" buf to deal with...
10639 		 */
10640 		struct sd_xbuf	*xp;
10641 		struct buf	*obp;	/* ptr to the original buf */
10642 
10643 		xp = SD_GET_XBUF(bp);
10644 		ASSERT(xp != NULL);
10645 
10646 		/* Retrieve the pointer to the original buf */
10647 		obp = (struct buf *)xp->xb_private;
10648 		ASSERT(obp != NULL);
10649 
10650 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
10651 		bioerror(obp, bp->b_error);
10652 
10653 		sd_bioclone_free(bp);
10654 
10655 		/*
10656 		 * Get back the original buf.
10657 		 * Note that since the restoration of xb_blkno below
10658 		 * was removed, the sd_xbuf is not needed.
10659 		 */
10660 		bp = obp;
10661 		/*
10662 		 * xp = SD_GET_XBUF(bp);
10663 		 * ASSERT(xp != NULL);
10664 		 */
10665 	}
10666 
10667 	/*
10668 	 * Convert sd->xb_blkno back to a minor-device relative value.
10669 	 * Note: this has been commented out, as it is not needed in the
10670 	 * current implementation of the driver (ie, since this function
10671 	 * is at the top of the layering chains, so the info will be
10672 	 * discarded) and it is in the "hot" IO path.
10673 	 *
10674 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
10675 	 * xp->xb_blkno -= un->un_offset[partition];
10676 	 */
10677 
10678 	SD_NEXT_IODONE(index, un, bp);
10679 
10680 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10681 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
10682 }
10683 
10684 
10685 /*
10686  *    Function: sd_mapblocksize_iostart
10687  *
10688  * Description: Convert between system block size (un->un_sys_blocksize)
10689  *		and target block size (un->un_tgt_blocksize).
10690  *
10691  *     Context: Can sleep to allocate resources.
10692  *
10693  * Assumptions: A higher layer has already performed any partition validation,
10694  *		and converted the xp->xb_blkno to an absolute value relative
10695  *		to the start of the device.
10696  *
10697  *		It is also assumed that the higher layer has implemented
10698  *		an "overrun" mechanism for the case where the request would
10699  *		read/write beyond the end of a partition.  In this case we
10700  *		assume (and ASSERT) that bp->b_resid == 0.
10701  *
10702  *		Note: The implementation for this routine assumes the target
10703  *		block size remains constant between allocation and transport.
10704  */
10705 
10706 static void
10707 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
10708 {
10709 	struct sd_mapblocksize_info	*bsp;
10710 	struct sd_xbuf			*xp;
10711 	offset_t first_byte;
10712 	daddr_t	start_block, end_block;
10713 	daddr_t	request_bytes;
10714 	ushort_t is_aligned = FALSE;
10715 
10716 	ASSERT(un != NULL);
10717 	ASSERT(bp != NULL);
10718 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10719 	ASSERT(bp->b_resid == 0);
10720 
10721 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
10722 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
10723 
10724 	/*
10725 	 * For a non-writable CD, a write request is an error
10726 	 */
10727 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
10728 	    (un->un_f_mmc_writable_media == FALSE)) {
10729 		bioerror(bp, EIO);
10730 		bp->b_resid = bp->b_bcount;
10731 		SD_BEGIN_IODONE(index, un, bp);
10732 		return;
10733 	}
10734 
10735 	/*
10736 	 * We do not need a shadow buf if the device is using
10737 	 * un->un_sys_blocksize as its block size or if bcount == 0.
10738 	 * In this case there is no layer-private data block allocated.
10739 	 */
10740 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
10741 	    (bp->b_bcount == 0)) {
10742 		goto done;
10743 	}
10744 
10745 #if defined(__i386) || defined(__amd64)
10746 	/* We do not support non-block-aligned transfers for ROD devices */
10747 	ASSERT(!ISROD(un));
10748 #endif
10749 
10750 	xp = SD_GET_XBUF(bp);
10751 	ASSERT(xp != NULL);
10752 
10753 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
10754 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
10755 	    un->un_tgt_blocksize, un->un_sys_blocksize);
10756 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
10757 	    "request start block:0x%x\n", xp->xb_blkno);
10758 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
10759 	    "request len:0x%x\n", bp->b_bcount);
10760 
10761 	/*
10762 	 * Allocate the layer-private data area for the mapblocksize layer.
10763 	 * Layers are allowed to use the xp_private member of the sd_xbuf
10764 	 * struct to store the pointer to their layer-private data block, but
10765 	 * each layer also has the responsibility of restoring the prior
10766 	 * contents of xb_private before returning the buf/xbuf to the
10767 	 * higher layer that sent it.
10768 	 *
10769 	 * Here we save the prior contents of xp->xb_private into the
10770 	 * bsp->mbs_oprivate field of our layer-private data area. This value
10771 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
10772 	 * the layer-private area and returning the buf/xbuf to the layer
10773 	 * that sent it.
10774 	 *
10775 	 * Note that here we use kmem_zalloc for the allocation as there are
10776 	 * parts of the mapblocksize code that expect certain fields to be
10777 	 * zero unless explicitly set to a required value.
10778 	 */
10779 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
10780 	bsp->mbs_oprivate = xp->xb_private;
10781 	xp->xb_private = bsp;
10782 
10783 	/*
10784 	 * This treats the data on the disk (target) as an array of bytes.
10785 	 * first_byte is the byte offset, from the beginning of the device,
10786 	 * to the location of the request. This is converted from a
10787 	 * un->un_sys_blocksize block address to a byte offset, and then back
10788 	 * to a block address based upon a un->un_tgt_blocksize block size.
10789 	 *
10790 	 * xp->xb_blkno should be absolute upon entry into this function,
10791 	 * but, but it is based upon partitions that use the "system"
10792 	 * block size. It must be adjusted to reflect the block size of
10793 	 * the target.
10794 	 *
10795 	 * Note that end_block is actually the block that follows the last
10796 	 * block of the request, but that's what is needed for the computation.
10797 	 */
10798 	first_byte  = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
10799 	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
10800 	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
10801 	    un->un_tgt_blocksize;
10802 
10803 	/* request_bytes is rounded up to a multiple of the target block size */
10804 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
10805 
10806 	/*
10807 	 * See if the starting address of the request and the request
10808 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
10809 	 * then we do not need to allocate a shadow buf to handle the request.
10810 	 */
10811 	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
10812 	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
10813 		is_aligned = TRUE;
10814 	}
10815 
10816 	if ((bp->b_flags & B_READ) == 0) {
10817 		/*
10818 		 * Lock the range for a write operation. An aligned request is
10819 		 * considered a simple write; otherwise the request must be a
10820 		 * read-modify-write.
10821 		 */
10822 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
10823 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
10824 	}
10825 
10826 	/*
10827 	 * Alloc a shadow buf if the request is not aligned. Also, this is
10828 	 * where the READ command is generated for a read-modify-write. (The
10829 	 * write phase is deferred until after the read completes.)
10830 	 */
10831 	if (is_aligned == FALSE) {
10832 
10833 		struct sd_mapblocksize_info	*shadow_bsp;
10834 		struct sd_xbuf	*shadow_xp;
10835 		struct buf	*shadow_bp;
10836 
10837 		/*
10838 		 * Allocate the shadow buf and it associated xbuf. Note that
10839 		 * after this call the xb_blkno value in both the original
10840 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
10841 		 * same: absolute relative to the start of the device, and
10842 		 * adjusted for the target block size. The b_blkno in the
10843 		 * shadow buf will also be set to this value. We should never
10844 		 * change b_blkno in the original bp however.
10845 		 *
10846 		 * Note also that the shadow buf will always need to be a
10847 		 * READ command, regardless of whether the incoming command
10848 		 * is a READ or a WRITE.
10849 		 */
10850 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
10851 		    xp->xb_blkno,
10852 		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
10853 
10854 		shadow_xp = SD_GET_XBUF(shadow_bp);
10855 
10856 		/*
10857 		 * Allocate the layer-private data for the shadow buf.
10858 		 * (No need to preserve xb_private in the shadow xbuf.)
10859 		 */
10860 		shadow_xp->xb_private = shadow_bsp =
10861 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
10862 
10863 		/*
10864 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
10865 		 * to figure out where the start of the user data is (based upon
10866 		 * the system block size) in the data returned by the READ
10867 		 * command (which will be based upon the target blocksize). Note
10868 		 * that this is only really used if the request is unaligned.
10869 		 */
10870 		bsp->mbs_copy_offset = (ssize_t)(first_byte -
10871 		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
10872 		ASSERT((bsp->mbs_copy_offset >= 0) &&
10873 		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
10874 
10875 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
10876 
10877 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
10878 
10879 		/* Transfer the wmap (if any) to the shadow buf */
10880 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
10881 		bsp->mbs_wmp = NULL;
10882 
10883 		/*
10884 		 * The shadow buf goes on from here in place of the
10885 		 * original buf.
10886 		 */
10887 		shadow_bsp->mbs_orig_bp = bp;
10888 		bp = shadow_bp;
10889 	}
10890 
10891 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
10892 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
10893 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
10894 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
10895 	    request_bytes);
10896 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
10897 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
10898 
10899 done:
10900 	SD_NEXT_IOSTART(index, un, bp);
10901 
10902 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
10903 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
10904 }
10905 
10906 
10907 /*
10908  *    Function: sd_mapblocksize_iodone
10909  *
10910  * Description: Completion side processing for block-size mapping.
10911  *
10912  *     Context: May be called under interrupt context
10913  */
10914 
10915 static void
10916 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
10917 {
10918 	struct sd_mapblocksize_info	*bsp;
10919 	struct sd_xbuf	*xp;
10920 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
10921 	struct buf	*orig_bp;	/* ptr to the original buf */
10922 	offset_t	shadow_end;
10923 	offset_t	request_end;
10924 	offset_t	shadow_start;
10925 	ssize_t		copy_offset;
10926 	size_t		copy_length;
10927 	size_t		shortfall;
10928 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
10929 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
10930 
10931 	ASSERT(un != NULL);
10932 	ASSERT(bp != NULL);
10933 
10934 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
10935 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
10936 
10937 	/*
10938 	 * There is no shadow buf or layer-private data if the target is
10939 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
10940 	 */
10941 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
10942 	    (bp->b_bcount == 0)) {
10943 		goto exit;
10944 	}
10945 
10946 	xp = SD_GET_XBUF(bp);
10947 	ASSERT(xp != NULL);
10948 
10949 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
10950 	bsp = xp->xb_private;
10951 
10952 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
10953 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
10954 
10955 	if (is_write) {
10956 		/*
10957 		 * For a WRITE request we must free up the block range that
10958 		 * we have locked up.  This holds regardless of whether this is
10959 		 * an aligned write request or a read-modify-write request.
10960 		 */
10961 		sd_range_unlock(un, bsp->mbs_wmp);
10962 		bsp->mbs_wmp = NULL;
10963 	}
10964 
10965 	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
10966 		/*
10967 		 * An aligned read or write command will have no shadow buf;
10968 		 * there is not much else to do with it.
10969 		 */
10970 		goto done;
10971 	}
10972 
10973 	orig_bp = bsp->mbs_orig_bp;
10974 	ASSERT(orig_bp != NULL);
10975 	orig_xp = SD_GET_XBUF(orig_bp);
10976 	ASSERT(orig_xp != NULL);
10977 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10978 
10979 	if (!is_write && has_wmap) {
10980 		/*
10981 		 * A READ with a wmap means this is the READ phase of a
10982 		 * read-modify-write. If an error occurred on the READ then
10983 		 * we do not proceed with the WRITE phase or copy any data.
10984 		 * Just release the write maps and return with an error.
10985 		 */
10986 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
10987 			orig_bp->b_resid = orig_bp->b_bcount;
10988 			bioerror(orig_bp, bp->b_error);
10989 			sd_range_unlock(un, bsp->mbs_wmp);
10990 			goto freebuf_done;
10991 		}
10992 	}
10993 
10994 	/*
10995 	 * Here is where we set up to copy the data from the shadow buf
10996 	 * into the space associated with the original buf.
10997 	 *
10998 	 * To deal with the conversion between block sizes, these
10999 	 * computations treat the data as an array of bytes, with the
11000 	 * first byte (byte 0) corresponding to the first byte in the
11001 	 * first block on the disk.
11002 	 */
11003 
11004 	/*
11005 	 * shadow_start and shadow_len indicate the location and size of
11006 	 * the data returned with the shadow IO request.
11007 	 */
11008 	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
11009 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
11010 
11011 	/*
11012 	 * copy_offset gives the offset (in bytes) from the start of the first
11013 	 * block of the READ request to the beginning of the data.  We retrieve
11014 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
11015 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
11016 	 * data to be copied (in bytes).
11017 	 */
11018 	copy_offset  = bsp->mbs_copy_offset;
11019 	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
11020 	copy_length  = orig_bp->b_bcount;
11021 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
11022 
11023 	/*
11024 	 * Set up the resid and error fields of orig_bp as appropriate.
11025 	 */
11026 	if (shadow_end >= request_end) {
11027 		/* We got all the requested data; set resid to zero */
11028 		orig_bp->b_resid = 0;
11029 	} else {
11030 		/*
11031 		 * We failed to get enough data to fully satisfy the original
11032 		 * request. Just copy back whatever data we got and set
11033 		 * up the residual and error code as required.
11034 		 *
11035 		 * 'shortfall' is the amount by which the data received with the
11036 		 * shadow buf has "fallen short" of the requested amount.
11037 		 */
11038 		shortfall = (size_t)(request_end - shadow_end);
11039 
11040 		if (shortfall > orig_bp->b_bcount) {
11041 			/*
11042 			 * We did not get enough data to even partially
11043 			 * fulfill the original request.  The residual is
11044 			 * equal to the amount requested.
11045 			 */
11046 			orig_bp->b_resid = orig_bp->b_bcount;
11047 		} else {
11048 			/*
11049 			 * We did not get all the data that we requested
11050 			 * from the device, but we will try to return what
11051 			 * portion we did get.
11052 			 */
11053 			orig_bp->b_resid = shortfall;
11054 		}
11055 		ASSERT(copy_length >= orig_bp->b_resid);
11056 		copy_length  -= orig_bp->b_resid;
11057 	}
11058 
11059 	/* Propagate the error code from the shadow buf to the original buf */
11060 	bioerror(orig_bp, bp->b_error);
11061 
11062 	if (is_write) {
11063 		goto freebuf_done;	/* No data copying for a WRITE */
11064 	}
11065 
11066 	if (has_wmap) {
11067 		/*
11068 		 * This is a READ command from the READ phase of a
11069 		 * read-modify-write request. We have to copy the data given
11070 		 * by the user OVER the data returned by the READ command,
11071 		 * then convert the command from a READ to a WRITE and send
11072 		 * it back to the target.
11073 		 */
11074 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
11075 		    copy_length);
11076 
11077 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
11078 
11079 		/*
11080 		 * Dispatch the WRITE command to the taskq thread, which
11081 		 * will in turn send the command to the target. When the
11082 		 * WRITE command completes, we (sd_mapblocksize_iodone())
11083 		 * will get called again as part of the iodone chain
11084 		 * processing for it. Note that we will still be dealing
11085 		 * with the shadow buf at that point.
11086 		 */
11087 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
11088 		    KM_NOSLEEP) != 0) {
11089 			/*
11090 			 * Dispatch was successful so we are done. Return
11091 			 * without going any higher up the iodone chain. Do
11092 			 * not free up any layer-private data until after the
11093 			 * WRITE completes.
11094 			 */
11095 			return;
11096 		}
11097 
11098 		/*
11099 		 * Dispatch of the WRITE command failed; set up the error
11100 		 * condition and send this IO back up the iodone chain.
11101 		 */
11102 		bioerror(orig_bp, EIO);
11103 		orig_bp->b_resid = orig_bp->b_bcount;
11104 
11105 	} else {
11106 		/*
11107 		 * This is a regular READ request (ie, not a RMW). Copy the
11108 		 * data from the shadow buf into the original buf. The
11109 		 * copy_offset compensates for any "misalignment" between the
11110 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
11111 		 * original buf (with its un->un_sys_blocksize blocks).
11112 		 */
11113 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
11114 		    copy_length);
11115 	}
11116 
11117 freebuf_done:
11118 
11119 	/*
11120 	 * At this point we still have both the shadow buf AND the original
11121 	 * buf to deal with, as well as the layer-private data area in each.
11122 	 * Local variables are as follows:
11123 	 *
11124 	 * bp -- points to shadow buf
11125 	 * xp -- points to xbuf of shadow buf
11126 	 * bsp -- points to layer-private data area of shadow buf
11127 	 * orig_bp -- points to original buf
11128 	 *
11129 	 * First free the shadow buf and its associated xbuf, then free the
11130 	 * layer-private data area from the shadow buf. There is no need to
11131 	 * restore xb_private in the shadow xbuf.
11132 	 */
11133 	sd_shadow_buf_free(bp);
11134 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
11135 
11136 	/*
11137 	 * Now update the local variables to point to the original buf, xbuf,
11138 	 * and layer-private area.
11139 	 */
11140 	bp = orig_bp;
11141 	xp = SD_GET_XBUF(bp);
11142 	ASSERT(xp != NULL);
11143 	ASSERT(xp == orig_xp);
11144 	bsp = xp->xb_private;
11145 	ASSERT(bsp != NULL);
11146 
11147 done:
11148 	/*
11149 	 * Restore xb_private to whatever it was set to by the next higher
11150 	 * layer in the chain, then free the layer-private data area.
11151 	 */
11152 	xp->xb_private = bsp->mbs_oprivate;
11153 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
11154 
11155 exit:
11156 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
11157 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
11158 
11159 	SD_NEXT_IODONE(index, un, bp);
11160 }
11161 
11162 
11163 /*
11164  *    Function: sd_checksum_iostart
11165  *
11166  * Description: A stub function for a layer that's currently not used.
11167  *		For now just a placeholder.
11168  *
11169  *     Context: Kernel thread context
11170  */
11171 
11172 static void
11173 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
11174 {
11175 	ASSERT(un != NULL);
11176 	ASSERT(bp != NULL);
11177 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11178 	SD_NEXT_IOSTART(index, un, bp);
11179 }
11180 
11181 
11182 /*
11183  *    Function: sd_checksum_iodone
11184  *
11185  * Description: A stub function for a layer that's currently not used.
11186  *		For now just a placeholder.
11187  *
11188  *     Context: May be called under interrupt context
11189  */
11190 
11191 static void
11192 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
11193 {
11194 	ASSERT(un != NULL);
11195 	ASSERT(bp != NULL);
11196 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11197 	SD_NEXT_IODONE(index, un, bp);
11198 }
11199 
11200 
11201 /*
11202  *    Function: sd_checksum_uscsi_iostart
11203  *
11204  * Description: A stub function for a layer that's currently not used.
11205  *		For now just a placeholder.
11206  *
11207  *     Context: Kernel thread context
11208  */
11209 
11210 static void
11211 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
11212 {
11213 	ASSERT(un != NULL);
11214 	ASSERT(bp != NULL);
11215 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11216 	SD_NEXT_IOSTART(index, un, bp);
11217 }
11218 
11219 
11220 /*
11221  *    Function: sd_checksum_uscsi_iodone
11222  *
11223  * Description: A stub function for a layer that's currently not used.
11224  *		For now just a placeholder.
11225  *
11226  *     Context: May be called under interrupt context
11227  */
11228 
11229 static void
11230 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
11231 {
11232 	ASSERT(un != NULL);
11233 	ASSERT(bp != NULL);
11234 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11235 	SD_NEXT_IODONE(index, un, bp);
11236 }
11237 
11238 
11239 /*
11240  *    Function: sd_pm_iostart
11241  *
11242  * Description: iostart-side routine for Power mangement.
11243  *
11244  *     Context: Kernel thread context
11245  */
11246 
11247 static void
11248 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
11249 {
11250 	ASSERT(un != NULL);
11251 	ASSERT(bp != NULL);
11252 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11253 	ASSERT(!mutex_owned(&un->un_pm_mutex));
11254 
11255 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
11256 
11257 	if (sd_pm_entry(un) != DDI_SUCCESS) {
11258 		/*
11259 		 * Set up to return the failed buf back up the 'iodone'
11260 		 * side of the calling chain.
11261 		 */
11262 		bioerror(bp, EIO);
11263 		bp->b_resid = bp->b_bcount;
11264 
11265 		SD_BEGIN_IODONE(index, un, bp);
11266 
11267 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
11268 		return;
11269 	}
11270 
11271 	SD_NEXT_IOSTART(index, un, bp);
11272 
11273 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
11274 }
11275 
11276 
11277 /*
11278  *    Function: sd_pm_iodone
11279  *
11280  * Description: iodone-side routine for power mangement.
11281  *
11282  *     Context: may be called from interrupt context
11283  */
11284 
11285 static void
11286 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
11287 {
11288 	ASSERT(un != NULL);
11289 	ASSERT(bp != NULL);
11290 	ASSERT(!mutex_owned(&un->un_pm_mutex));
11291 
11292 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
11293 
11294 	/*
11295 	 * After attach the following flag is only read, so don't
11296 	 * take the penalty of acquiring a mutex for it.
11297 	 */
11298 	if (un->un_f_pm_is_enabled == TRUE) {
11299 		sd_pm_exit(un);
11300 	}
11301 
11302 	SD_NEXT_IODONE(index, un, bp);
11303 
11304 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
11305 }
11306 
11307 
11308 /*
11309  *    Function: sd_core_iostart
11310  *
11311  * Description: Primary driver function for enqueuing buf(9S) structs from
11312  *		the system and initiating IO to the target device
11313  *
11314  *     Context: Kernel thread context. Can sleep.
11315  *
11316  * Assumptions:  - The given xp->xb_blkno is absolute
11317  *		   (ie, relative to the start of the device).
11318  *		 - The IO is to be done using the native blocksize of
11319  *		   the device, as specified in un->un_tgt_blocksize.
11320  */
11321 /* ARGSUSED */
11322 static void
11323 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
11324 {
11325 	struct sd_xbuf *xp;
11326 
11327 	ASSERT(un != NULL);
11328 	ASSERT(bp != NULL);
11329 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11330 	ASSERT(bp->b_resid == 0);
11331 
11332 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
11333 
11334 	xp = SD_GET_XBUF(bp);
11335 	ASSERT(xp != NULL);
11336 
11337 	mutex_enter(SD_MUTEX(un));
11338 
11339 	/*
11340 	 * If we are currently in the failfast state, fail any new IO
11341 	 * that has B_FAILFAST set, then return.
11342 	 */
11343 	if ((bp->b_flags & B_FAILFAST) &&
11344 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
11345 		mutex_exit(SD_MUTEX(un));
11346 		bioerror(bp, EIO);
11347 		bp->b_resid = bp->b_bcount;
11348 		SD_BEGIN_IODONE(index, un, bp);
11349 		return;
11350 	}
11351 
11352 	if (SD_IS_DIRECT_PRIORITY(xp)) {
11353 		/*
11354 		 * Priority command -- transport it immediately.
11355 		 *
11356 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
11357 		 * because all direct priority commands should be associated
11358 		 * with error recovery actions which we don't want to retry.
11359 		 */
11360 		sd_start_cmds(un, bp);
11361 	} else {
11362 		/*
11363 		 * Normal command -- add it to the wait queue, then start
11364 		 * transporting commands from the wait queue.
11365 		 */
11366 		sd_add_buf_to_waitq(un, bp);
11367 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
11368 		sd_start_cmds(un, NULL);
11369 	}
11370 
11371 	mutex_exit(SD_MUTEX(un));
11372 
11373 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
11374 }
11375 
11376 
11377 /*
11378  *    Function: sd_init_cdb_limits
11379  *
11380  * Description: This is to handle scsi_pkt initialization differences
11381  *		between the driver platforms.
11382  *
11383  *		Legacy behaviors:
11384  *
11385  *		If the block number or the sector count exceeds the
11386  *		capabilities of a Group 0 command, shift over to a
11387  *		Group 1 command. We don't blindly use Group 1
11388  *		commands because a) some drives (CDC Wren IVs) get a
11389  *		bit confused, and b) there is probably a fair amount
11390  *		of speed difference for a target to receive and decode
11391  *		a 10 byte command instead of a 6 byte command.
11392  *
11393  *		The xfer time difference of 6 vs 10 byte CDBs is
11394  *		still significant so this code is still worthwhile.
11395  *		10 byte CDBs are very inefficient with the fas HBA driver
11396  *		and older disks. Each CDB byte took 1 usec with some
11397  *		popular disks.
11398  *
11399  *     Context: Must be called at attach time
11400  */
11401 
11402 static void
11403 sd_init_cdb_limits(struct sd_lun *un)
11404 {
11405 	int hba_cdb_limit;
11406 
11407 	/*
11408 	 * Use CDB_GROUP1 commands for most devices except for
11409 	 * parallel SCSI fixed drives in which case we get better
11410 	 * performance using CDB_GROUP0 commands (where applicable).
11411 	 */
11412 	un->un_mincdb = SD_CDB_GROUP1;
11413 #if !defined(__fibre)
11414 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
11415 	    !un->un_f_has_removable_media) {
11416 		un->un_mincdb = SD_CDB_GROUP0;
11417 	}
11418 #endif
11419 
11420 	/*
11421 	 * Try to read the max-cdb-length supported by HBA.
11422 	 */
11423 	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
11424 	if (0 >= un->un_max_hba_cdb) {
11425 		un->un_max_hba_cdb = CDB_GROUP4;
11426 		hba_cdb_limit = SD_CDB_GROUP4;
11427 	} else if (0 < un->un_max_hba_cdb &&
11428 	    un->un_max_hba_cdb < CDB_GROUP1) {
11429 		hba_cdb_limit = SD_CDB_GROUP0;
11430 	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
11431 	    un->un_max_hba_cdb < CDB_GROUP5) {
11432 		hba_cdb_limit = SD_CDB_GROUP1;
11433 	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
11434 	    un->un_max_hba_cdb < CDB_GROUP4) {
11435 		hba_cdb_limit = SD_CDB_GROUP5;
11436 	} else {
11437 		hba_cdb_limit = SD_CDB_GROUP4;
11438 	}
11439 
11440 	/*
11441 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
11442 	 * commands for fixed disks unless we are building for a 32 bit
11443 	 * kernel.
11444 	 */
11445 #ifdef _LP64
11446 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
11447 	    min(hba_cdb_limit, SD_CDB_GROUP4);
11448 #else
11449 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
11450 	    min(hba_cdb_limit, SD_CDB_GROUP1);
11451 #endif
11452 
11453 	/*
11454 	 * x86 systems require the PKT_DMA_PARTIAL flag
11455 	 */
11456 #if defined(__x86)
11457 	un->un_pkt_flags = PKT_DMA_PARTIAL;
11458 #else
11459 	un->un_pkt_flags = 0;
11460 #endif
11461 
11462 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
11463 	    ? sizeof (struct scsi_arq_status) : 1);
11464 	un->un_cmd_timeout = (ushort_t)sd_io_time;
11465 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
11466 }
11467 
11468 
11469 /*
11470  *    Function: sd_initpkt_for_buf
11471  *
11472  * Description: Allocate and initialize for transport a scsi_pkt struct,
11473  *		based upon the info specified in the given buf struct.
11474  *
11475  *		Assumes the xb_blkno in the request is absolute (ie,
11476  *		relative to the start of the device (NOT partition!).
11477  *		Also assumes that the request is using the native block
11478  *		size of the device (as returned by the READ CAPACITY
11479  *		command).
11480  *
11481  * Return Code: SD_PKT_ALLOC_SUCCESS
11482  *		SD_PKT_ALLOC_FAILURE
11483  *		SD_PKT_ALLOC_FAILURE_NO_DMA
11484  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
11485  *
11486  *     Context: Kernel thread and may be called from software interrupt context
11487  *		as part of a sdrunout callback. This function may not block or
11488  *		call routines that block
11489  */
11490 
11491 static int
11492 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
11493 {
11494 	struct sd_xbuf	*xp;
11495 	struct scsi_pkt *pktp = NULL;
11496 	struct sd_lun	*un;
11497 	size_t		blockcount;
11498 	daddr_t		startblock;
11499 	int		rval;
11500 	int		cmd_flags;
11501 
11502 	ASSERT(bp != NULL);
11503 	ASSERT(pktpp != NULL);
11504 	xp = SD_GET_XBUF(bp);
11505 	ASSERT(xp != NULL);
11506 	un = SD_GET_UN(bp);
11507 	ASSERT(un != NULL);
11508 	ASSERT(mutex_owned(SD_MUTEX(un)));
11509 	ASSERT(bp->b_resid == 0);
11510 
11511 	SD_TRACE(SD_LOG_IO_CORE, un,
11512 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
11513 
11514 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
11515 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
11516 		/*
11517 		 * Already have a scsi_pkt -- just need DMA resources.
11518 		 * We must recompute the CDB in case the mapping returns
11519 		 * a nonzero pkt_resid.
11520 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
11521 		 * that is being retried, the unmap/remap of the DMA resouces
11522 		 * will result in the entire transfer starting over again
11523 		 * from the very first block.
11524 		 */
11525 		ASSERT(xp->xb_pktp != NULL);
11526 		pktp = xp->xb_pktp;
11527 	} else {
11528 		pktp = NULL;
11529 	}
11530 #endif /* __i386 || __amd64 */
11531 
11532 	startblock = xp->xb_blkno;	/* Absolute block num. */
11533 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
11534 
11535 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
11536 
11537 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
11538 
11539 #else
11540 
11541 	cmd_flags = un->un_pkt_flags | xp->xb_pkt_flags;
11542 
11543 #endif
11544 
11545 	/*
11546 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
11547 	 * call scsi_init_pkt, and build the CDB.
11548 	 */
11549 	rval = sd_setup_rw_pkt(un, &pktp, bp,
11550 	    cmd_flags, sdrunout, (caddr_t)un,
11551 	    startblock, blockcount);
11552 
11553 	if (rval == 0) {
11554 		/*
11555 		 * Success.
11556 		 *
11557 		 * If partial DMA is being used and required for this transfer.
11558 		 * set it up here.
11559 		 */
11560 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
11561 		    (pktp->pkt_resid != 0)) {
11562 
11563 			/*
11564 			 * Save the CDB length and pkt_resid for the
11565 			 * next xfer
11566 			 */
11567 			xp->xb_dma_resid = pktp->pkt_resid;
11568 
11569 			/* rezero resid */
11570 			pktp->pkt_resid = 0;
11571 
11572 		} else {
11573 			xp->xb_dma_resid = 0;
11574 		}
11575 
11576 		pktp->pkt_flags = un->un_tagflags;
11577 		pktp->pkt_time  = un->un_cmd_timeout;
11578 		pktp->pkt_comp  = sdintr;
11579 
11580 		pktp->pkt_private = bp;
11581 		*pktpp = pktp;
11582 
11583 		SD_TRACE(SD_LOG_IO_CORE, un,
11584 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
11585 
11586 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
11587 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
11588 #endif
11589 
11590 		return (SD_PKT_ALLOC_SUCCESS);
11591 
11592 	}
11593 
11594 	/*
11595 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
11596 	 * from sd_setup_rw_pkt.
11597 	 */
11598 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
11599 
11600 	if (rval == SD_PKT_ALLOC_FAILURE) {
11601 		*pktpp = NULL;
11602 		/*
11603 		 * Set the driver state to RWAIT to indicate the driver
11604 		 * is waiting on resource allocations. The driver will not
11605 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
11606 		 */
11607 		New_state(un, SD_STATE_RWAIT);
11608 
11609 		SD_ERROR(SD_LOG_IO_CORE, un,
11610 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
11611 
11612 		if ((bp->b_flags & B_ERROR) != 0) {
11613 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
11614 		}
11615 		return (SD_PKT_ALLOC_FAILURE);
11616 	} else {
11617 		/*
11618 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
11619 		 *
11620 		 * This should never happen.  Maybe someone messed with the
11621 		 * kernel's minphys?
11622 		 */
11623 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
11624 		    "Request rejected: too large for CDB: "
11625 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
11626 		SD_ERROR(SD_LOG_IO_CORE, un,
11627 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
11628 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11629 
11630 	}
11631 }
11632 
11633 
11634 /*
11635  *    Function: sd_destroypkt_for_buf
11636  *
11637  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
11638  *
11639  *     Context: Kernel thread or interrupt context
11640  */
11641 
11642 static void
11643 sd_destroypkt_for_buf(struct buf *bp)
11644 {
11645 	ASSERT(bp != NULL);
11646 	ASSERT(SD_GET_UN(bp) != NULL);
11647 
11648 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
11649 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
11650 
11651 	ASSERT(SD_GET_PKTP(bp) != NULL);
11652 	scsi_destroy_pkt(SD_GET_PKTP(bp));
11653 
11654 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
11655 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
11656 }
11657 
11658 /*
11659  *    Function: sd_setup_rw_pkt
11660  *
11661  * Description: Determines appropriate CDB group for the requested LBA
11662  *		and transfer length, calls scsi_init_pkt, and builds
11663  *		the CDB.  Do not use for partial DMA transfers except
11664  *		for the initial transfer since the CDB size must
11665  *		remain constant.
11666  *
11667  *     Context: Kernel thread and may be called from software interrupt
11668  *		context as part of a sdrunout callback. This function may not
11669  *		block or call routines that block
11670  */
11671 
11672 
11673 int
11674 sd_setup_rw_pkt(struct sd_lun *un,
11675     struct scsi_pkt **pktpp, struct buf *bp, int flags,
11676     int (*callback)(caddr_t), caddr_t callback_arg,
11677     diskaddr_t lba, uint32_t blockcount)
11678 {
11679 	struct scsi_pkt *return_pktp;
11680 	union scsi_cdb *cdbp;
11681 	struct sd_cdbinfo *cp = NULL;
11682 	int i;
11683 
11684 	/*
11685 	 * See which size CDB to use, based upon the request.
11686 	 */
11687 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
11688 
11689 		/*
11690 		 * Check lba and block count against sd_cdbtab limits.
11691 		 * In the partial DMA case, we have to use the same size
11692 		 * CDB for all the transfers.  Check lba + blockcount
11693 		 * against the max LBA so we know that segment of the
11694 		 * transfer can use the CDB we select.
11695 		 */
11696 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
11697 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
11698 
11699 			/*
11700 			 * The command will fit into the CDB type
11701 			 * specified by sd_cdbtab[i].
11702 			 */
11703 			cp = sd_cdbtab + i;
11704 
11705 			/*
11706 			 * Call scsi_init_pkt so we can fill in the
11707 			 * CDB.
11708 			 */
11709 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
11710 			    bp, cp->sc_grpcode, un->un_status_len, 0,
11711 			    flags, callback, callback_arg);
11712 
11713 			if (return_pktp != NULL) {
11714 
11715 				/*
11716 				 * Return new value of pkt
11717 				 */
11718 				*pktpp = return_pktp;
11719 
11720 				/*
11721 				 * To be safe, zero the CDB insuring there is
11722 				 * no leftover data from a previous command.
11723 				 */
11724 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
11725 
11726 				/*
11727 				 * Handle partial DMA mapping
11728 				 */
11729 				if (return_pktp->pkt_resid != 0) {
11730 
11731 					/*
11732 					 * Not going to xfer as many blocks as
11733 					 * originally expected
11734 					 */
11735 					blockcount -=
11736 					    SD_BYTES2TGTBLOCKS(un,
11737 						return_pktp->pkt_resid);
11738 				}
11739 
11740 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
11741 
11742 				/*
11743 				 * Set command byte based on the CDB
11744 				 * type we matched.
11745 				 */
11746 				cdbp->scc_cmd = cp->sc_grpmask |
11747 				    ((bp->b_flags & B_READ) ?
11748 					SCMD_READ : SCMD_WRITE);
11749 
11750 				SD_FILL_SCSI1_LUN(un, return_pktp);
11751 
11752 				/*
11753 				 * Fill in LBA and length
11754 				 */
11755 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
11756 				    (cp->sc_grpcode == CDB_GROUP4) ||
11757 				    (cp->sc_grpcode == CDB_GROUP0) ||
11758 				    (cp->sc_grpcode == CDB_GROUP5));
11759 
11760 				if (cp->sc_grpcode == CDB_GROUP1) {
11761 					FORMG1ADDR(cdbp, lba);
11762 					FORMG1COUNT(cdbp, blockcount);
11763 					return (0);
11764 				} else if (cp->sc_grpcode == CDB_GROUP4) {
11765 					FORMG4LONGADDR(cdbp, lba);
11766 					FORMG4COUNT(cdbp, blockcount);
11767 					return (0);
11768 				} else if (cp->sc_grpcode == CDB_GROUP0) {
11769 					FORMG0ADDR(cdbp, lba);
11770 					FORMG0COUNT(cdbp, blockcount);
11771 					return (0);
11772 				} else if (cp->sc_grpcode == CDB_GROUP5) {
11773 					FORMG5ADDR(cdbp, lba);
11774 					FORMG5COUNT(cdbp, blockcount);
11775 					return (0);
11776 				}
11777 
11778 				/*
11779 				 * It should be impossible to not match one
11780 				 * of the CDB types above, so we should never
11781 				 * reach this point.  Set the CDB command byte
11782 				 * to test-unit-ready to avoid writing
11783 				 * to somewhere we don't intend.
11784 				 */
11785 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
11786 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11787 			} else {
11788 				/*
11789 				 * Couldn't get scsi_pkt
11790 				 */
11791 				return (SD_PKT_ALLOC_FAILURE);
11792 			}
11793 		}
11794 	}
11795 
11796 	/*
11797 	 * None of the available CDB types were suitable.  This really
11798 	 * should never happen:  on a 64 bit system we support
11799 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
11800 	 * and on a 32 bit system we will refuse to bind to a device
11801 	 * larger than 2TB so addresses will never be larger than 32 bits.
11802 	 */
11803 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11804 }
11805 
11806 #if defined(__i386) || defined(__amd64)
11807 /*
11808  *    Function: sd_setup_next_rw_pkt
11809  *
11810  * Description: Setup packet for partial DMA transfers, except for the
11811  * 		initial transfer.  sd_setup_rw_pkt should be used for
11812  *		the initial transfer.
11813  *
11814  *     Context: Kernel thread and may be called from interrupt context.
11815  */
11816 
11817 int
11818 sd_setup_next_rw_pkt(struct sd_lun *un,
11819     struct scsi_pkt *pktp, struct buf *bp,
11820     diskaddr_t lba, uint32_t blockcount)
11821 {
11822 	uchar_t com;
11823 	union scsi_cdb *cdbp;
11824 	uchar_t cdb_group_id;
11825 
11826 	ASSERT(pktp != NULL);
11827 	ASSERT(pktp->pkt_cdbp != NULL);
11828 
11829 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
11830 	com = cdbp->scc_cmd;
11831 	cdb_group_id = CDB_GROUPID(com);
11832 
11833 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
11834 	    (cdb_group_id == CDB_GROUPID_1) ||
11835 	    (cdb_group_id == CDB_GROUPID_4) ||
11836 	    (cdb_group_id == CDB_GROUPID_5));
11837 
11838 	/*
11839 	 * Move pkt to the next portion of the xfer.
11840 	 * func is NULL_FUNC so we do not have to release
11841 	 * the disk mutex here.
11842 	 */
11843 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
11844 	    NULL_FUNC, NULL) == pktp) {
11845 		/* Success.  Handle partial DMA */
11846 		if (pktp->pkt_resid != 0) {
11847 			blockcount -=
11848 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
11849 		}
11850 
11851 		cdbp->scc_cmd = com;
11852 		SD_FILL_SCSI1_LUN(un, pktp);
11853 		if (cdb_group_id == CDB_GROUPID_1) {
11854 			FORMG1ADDR(cdbp, lba);
11855 			FORMG1COUNT(cdbp, blockcount);
11856 			return (0);
11857 		} else if (cdb_group_id == CDB_GROUPID_4) {
11858 			FORMG4LONGADDR(cdbp, lba);
11859 			FORMG4COUNT(cdbp, blockcount);
11860 			return (0);
11861 		} else if (cdb_group_id == CDB_GROUPID_0) {
11862 			FORMG0ADDR(cdbp, lba);
11863 			FORMG0COUNT(cdbp, blockcount);
11864 			return (0);
11865 		} else if (cdb_group_id == CDB_GROUPID_5) {
11866 			FORMG5ADDR(cdbp, lba);
11867 			FORMG5COUNT(cdbp, blockcount);
11868 			return (0);
11869 		}
11870 
11871 		/* Unreachable */
11872 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11873 	}
11874 
11875 	/*
11876 	 * Error setting up next portion of cmd transfer.
11877 	 * Something is definitely very wrong and this
11878 	 * should not happen.
11879 	 */
11880 	return (SD_PKT_ALLOC_FAILURE);
11881 }
11882 #endif /* defined(__i386) || defined(__amd64) */
11883 
11884 /*
11885  *    Function: sd_initpkt_for_uscsi
11886  *
11887  * Description: Allocate and initialize for transport a scsi_pkt struct,
11888  *		based upon the info specified in the given uscsi_cmd struct.
11889  *
11890  * Return Code: SD_PKT_ALLOC_SUCCESS
11891  *		SD_PKT_ALLOC_FAILURE
11892  *		SD_PKT_ALLOC_FAILURE_NO_DMA
11893  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
11894  *
11895  *     Context: Kernel thread and may be called from software interrupt context
11896  *		as part of a sdrunout callback. This function may not block or
11897  *		call routines that block
11898  */
11899 
11900 static int
11901 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
11902 {
11903 	struct uscsi_cmd *uscmd;
11904 	struct sd_xbuf	*xp;
11905 	struct scsi_pkt	*pktp;
11906 	struct sd_lun	*un;
11907 	uint32_t	flags = 0;
11908 
11909 	ASSERT(bp != NULL);
11910 	ASSERT(pktpp != NULL);
11911 	xp = SD_GET_XBUF(bp);
11912 	ASSERT(xp != NULL);
11913 	un = SD_GET_UN(bp);
11914 	ASSERT(un != NULL);
11915 	ASSERT(mutex_owned(SD_MUTEX(un)));
11916 
11917 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
11918 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
11919 	ASSERT(uscmd != NULL);
11920 
11921 	SD_TRACE(SD_LOG_IO_CORE, un,
11922 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
11923 
11924 	/*
11925 	 * Allocate the scsi_pkt for the command.
11926 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
11927 	 *	 during scsi_init_pkt time and will continue to use the
11928 	 *	 same path as long as the same scsi_pkt is used without
11929 	 *	 intervening scsi_dma_free(). Since uscsi command does
11930 	 *	 not call scsi_dmafree() before retry failed command, it
11931 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
11932 	 *	 set such that scsi_vhci can use other available path for
11933 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
11934 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
11935 	 */
11936 	pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
11937 	    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
11938 	    sizeof (struct scsi_arq_status), 0,
11939 	    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
11940 	    sdrunout, (caddr_t)un);
11941 
11942 	if (pktp == NULL) {
11943 		*pktpp = NULL;
11944 		/*
11945 		 * Set the driver state to RWAIT to indicate the driver
11946 		 * is waiting on resource allocations. The driver will not
11947 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
11948 		 */
11949 		New_state(un, SD_STATE_RWAIT);
11950 
11951 		SD_ERROR(SD_LOG_IO_CORE, un,
11952 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
11953 
11954 		if ((bp->b_flags & B_ERROR) != 0) {
11955 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
11956 		}
11957 		return (SD_PKT_ALLOC_FAILURE);
11958 	}
11959 
11960 	/*
11961 	 * We do not do DMA breakup for USCSI commands, so return failure
11962 	 * here if all the needed DMA resources were not allocated.
11963 	 */
11964 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
11965 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
11966 		scsi_destroy_pkt(pktp);
11967 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
11968 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
11969 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
11970 	}
11971 
11972 	/* Init the cdb from the given uscsi struct */
11973 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
11974 	    uscmd->uscsi_cdb[0], 0, 0, 0);
11975 
11976 	SD_FILL_SCSI1_LUN(un, pktp);
11977 
11978 	/*
11979 	 * Set up the optional USCSI flags. See the uscsi (7I) man page
11980 	 * for listing of the supported flags.
11981 	 */
11982 
11983 	if (uscmd->uscsi_flags & USCSI_SILENT) {
11984 		flags |= FLAG_SILENT;
11985 	}
11986 
11987 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
11988 		flags |= FLAG_DIAGNOSE;
11989 	}
11990 
11991 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
11992 		flags |= FLAG_ISOLATE;
11993 	}
11994 
11995 	if (un->un_f_is_fibre == FALSE) {
11996 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
11997 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
11998 		}
11999 	}
12000 
12001 	/*
12002 	 * Set the pkt flags here so we save time later.
12003 	 * Note: These flags are NOT in the uscsi man page!!!
12004 	 */
12005 	if (uscmd->uscsi_flags & USCSI_HEAD) {
12006 		flags |= FLAG_HEAD;
12007 	}
12008 
12009 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
12010 		flags |= FLAG_NOINTR;
12011 	}
12012 
12013 	/*
12014 	 * For tagged queueing, things get a bit complicated.
12015 	 * Check first for head of queue and last for ordered queue.
12016 	 * If neither head nor order, use the default driver tag flags.
12017 	 */
12018 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
12019 		if (uscmd->uscsi_flags & USCSI_HTAG) {
12020 			flags |= FLAG_HTAG;
12021 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
12022 			flags |= FLAG_OTAG;
12023 		} else {
12024 			flags |= un->un_tagflags & FLAG_TAGMASK;
12025 		}
12026 	}
12027 
12028 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
12029 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
12030 	}
12031 
12032 	pktp->pkt_flags = flags;
12033 
12034 	/* Copy the caller's CDB into the pkt... */
12035 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
12036 
12037 	if (uscmd->uscsi_timeout == 0) {
12038 		pktp->pkt_time = un->un_uscsi_timeout;
12039 	} else {
12040 		pktp->pkt_time = uscmd->uscsi_timeout;
12041 	}
12042 
12043 	/* need it later to identify USCSI request in sdintr */
12044 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
12045 
12046 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
12047 
12048 	pktp->pkt_private = bp;
12049 	pktp->pkt_comp = sdintr;
12050 	*pktpp = pktp;
12051 
12052 	SD_TRACE(SD_LOG_IO_CORE, un,
12053 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
12054 
12055 	return (SD_PKT_ALLOC_SUCCESS);
12056 }
12057 
12058 
12059 /*
12060  *    Function: sd_destroypkt_for_uscsi
12061  *
12062  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
12063  *		IOs.. Also saves relevant info into the associated uscsi_cmd
12064  *		struct.
12065  *
12066  *     Context: May be called under interrupt context
12067  */
12068 
12069 static void
12070 sd_destroypkt_for_uscsi(struct buf *bp)
12071 {
12072 	struct uscsi_cmd *uscmd;
12073 	struct sd_xbuf	*xp;
12074 	struct scsi_pkt	*pktp;
12075 	struct sd_lun	*un;
12076 
12077 	ASSERT(bp != NULL);
12078 	xp = SD_GET_XBUF(bp);
12079 	ASSERT(xp != NULL);
12080 	un = SD_GET_UN(bp);
12081 	ASSERT(un != NULL);
12082 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12083 	pktp = SD_GET_PKTP(bp);
12084 	ASSERT(pktp != NULL);
12085 
12086 	SD_TRACE(SD_LOG_IO_CORE, un,
12087 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
12088 
12089 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
12090 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
12091 	ASSERT(uscmd != NULL);
12092 
12093 	/* Save the status and the residual into the uscsi_cmd struct */
12094 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
12095 	uscmd->uscsi_resid  = bp->b_resid;
12096 
12097 	/*
12098 	 * If enabled, copy any saved sense data into the area specified
12099 	 * by the uscsi command.
12100 	 */
12101 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
12102 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
12103 		/*
12104 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
12105 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
12106 		 */
12107 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
12108 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
12109 		bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf, SENSE_LENGTH);
12110 	}
12111 
12112 	/* We are done with the scsi_pkt; free it now */
12113 	ASSERT(SD_GET_PKTP(bp) != NULL);
12114 	scsi_destroy_pkt(SD_GET_PKTP(bp));
12115 
12116 	SD_TRACE(SD_LOG_IO_CORE, un,
12117 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
12118 }
12119 
12120 
12121 /*
12122  *    Function: sd_bioclone_alloc
12123  *
12124  * Description: Allocate a buf(9S) and init it as per the given buf
12125  *		and the various arguments.  The associated sd_xbuf
12126  *		struct is (nearly) duplicated.  The struct buf *bp
12127  *		argument is saved in new_xp->xb_private.
12128  *
12129  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
12130  *		datalen - size of data area for the shadow bp
12131  *		blkno - starting LBA
12132  *		func - function pointer for b_iodone in the shadow buf. (May
12133  *			be NULL if none.)
12134  *
12135  * Return Code: Pointer to allocates buf(9S) struct
12136  *
12137  *     Context: Can sleep.
12138  */
12139 
12140 static struct buf *
12141 sd_bioclone_alloc(struct buf *bp, size_t datalen,
12142 	daddr_t blkno, int (*func)(struct buf *))
12143 {
12144 	struct	sd_lun	*un;
12145 	struct	sd_xbuf	*xp;
12146 	struct	sd_xbuf	*new_xp;
12147 	struct	buf	*new_bp;
12148 
12149 	ASSERT(bp != NULL);
12150 	xp = SD_GET_XBUF(bp);
12151 	ASSERT(xp != NULL);
12152 	un = SD_GET_UN(bp);
12153 	ASSERT(un != NULL);
12154 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12155 
12156 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
12157 	    NULL, KM_SLEEP);
12158 
12159 	new_bp->b_lblkno	= blkno;
12160 
12161 	/*
12162 	 * Allocate an xbuf for the shadow bp and copy the contents of the
12163 	 * original xbuf into it.
12164 	 */
12165 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
12166 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
12167 
12168 	/*
12169 	 * The given bp is automatically saved in the xb_private member
12170 	 * of the new xbuf.  Callers are allowed to depend on this.
12171 	 */
12172 	new_xp->xb_private = bp;
12173 
12174 	new_bp->b_private  = new_xp;
12175 
12176 	return (new_bp);
12177 }
12178 
12179 /*
12180  *    Function: sd_shadow_buf_alloc
12181  *
12182  * Description: Allocate a buf(9S) and init it as per the given buf
12183  *		and the various arguments.  The associated sd_xbuf
12184  *		struct is (nearly) duplicated.  The struct buf *bp
12185  *		argument is saved in new_xp->xb_private.
12186  *
12187  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
12188  *		datalen - size of data area for the shadow bp
12189  *		bflags - B_READ or B_WRITE (pseudo flag)
12190  *		blkno - starting LBA
12191  *		func - function pointer for b_iodone in the shadow buf. (May
12192  *			be NULL if none.)
12193  *
12194  * Return Code: Pointer to allocates buf(9S) struct
12195  *
12196  *     Context: Can sleep.
12197  */
12198 
12199 static struct buf *
12200 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
12201 	daddr_t blkno, int (*func)(struct buf *))
12202 {
12203 	struct	sd_lun	*un;
12204 	struct	sd_xbuf	*xp;
12205 	struct	sd_xbuf	*new_xp;
12206 	struct	buf	*new_bp;
12207 
12208 	ASSERT(bp != NULL);
12209 	xp = SD_GET_XBUF(bp);
12210 	ASSERT(xp != NULL);
12211 	un = SD_GET_UN(bp);
12212 	ASSERT(un != NULL);
12213 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12214 
12215 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
12216 		bp_mapin(bp);
12217 	}
12218 
12219 	bflags &= (B_READ | B_WRITE);
12220 #if defined(__i386) || defined(__amd64)
12221 	new_bp = getrbuf(KM_SLEEP);
12222 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
12223 	new_bp->b_bcount = datalen;
12224 	new_bp->b_flags = bflags |
12225 	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
12226 #else
12227 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
12228 	    datalen, bflags, SLEEP_FUNC, NULL);
12229 #endif
12230 	new_bp->av_forw	= NULL;
12231 	new_bp->av_back	= NULL;
12232 	new_bp->b_dev	= bp->b_dev;
12233 	new_bp->b_blkno	= blkno;
12234 	new_bp->b_iodone = func;
12235 	new_bp->b_edev	= bp->b_edev;
12236 	new_bp->b_resid	= 0;
12237 
12238 	/* We need to preserve the B_FAILFAST flag */
12239 	if (bp->b_flags & B_FAILFAST) {
12240 		new_bp->b_flags |= B_FAILFAST;
12241 	}
12242 
12243 	/*
12244 	 * Allocate an xbuf for the shadow bp and copy the contents of the
12245 	 * original xbuf into it.
12246 	 */
12247 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
12248 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
12249 
12250 	/* Need later to copy data between the shadow buf & original buf! */
12251 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
12252 
12253 	/*
12254 	 * The given bp is automatically saved in the xb_private member
12255 	 * of the new xbuf.  Callers are allowed to depend on this.
12256 	 */
12257 	new_xp->xb_private = bp;
12258 
12259 	new_bp->b_private  = new_xp;
12260 
12261 	return (new_bp);
12262 }
12263 
12264 /*
12265  *    Function: sd_bioclone_free
12266  *
12267  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
12268  *		in the larger than partition operation.
12269  *
12270  *     Context: May be called under interrupt context
12271  */
12272 
12273 static void
12274 sd_bioclone_free(struct buf *bp)
12275 {
12276 	struct sd_xbuf	*xp;
12277 
12278 	ASSERT(bp != NULL);
12279 	xp = SD_GET_XBUF(bp);
12280 	ASSERT(xp != NULL);
12281 
12282 	/*
12283 	 * Call bp_mapout() before freeing the buf,  in case a lower
12284 	 * layer or HBA  had done a bp_mapin().  we must do this here
12285 	 * as we are the "originator" of the shadow buf.
12286 	 */
12287 	bp_mapout(bp);
12288 
12289 	/*
12290 	 * Null out b_iodone before freeing the bp, to ensure that the driver
12291 	 * never gets confused by a stale value in this field. (Just a little
12292 	 * extra defensiveness here.)
12293 	 */
12294 	bp->b_iodone = NULL;
12295 
12296 	freerbuf(bp);
12297 
12298 	kmem_free(xp, sizeof (struct sd_xbuf));
12299 }
12300 
12301 /*
12302  *    Function: sd_shadow_buf_free
12303  *
12304  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
12305  *
12306  *     Context: May be called under interrupt context
12307  */
12308 
12309 static void
12310 sd_shadow_buf_free(struct buf *bp)
12311 {
12312 	struct sd_xbuf	*xp;
12313 
12314 	ASSERT(bp != NULL);
12315 	xp = SD_GET_XBUF(bp);
12316 	ASSERT(xp != NULL);
12317 
12318 #if defined(__sparc)
12319 	/*
12320 	 * Call bp_mapout() before freeing the buf,  in case a lower
12321 	 * layer or HBA  had done a bp_mapin().  we must do this here
12322 	 * as we are the "originator" of the shadow buf.
12323 	 */
12324 	bp_mapout(bp);
12325 #endif
12326 
12327 	/*
12328 	 * Null out b_iodone before freeing the bp, to ensure that the driver
12329 	 * never gets confused by a stale value in this field. (Just a little
12330 	 * extra defensiveness here.)
12331 	 */
12332 	bp->b_iodone = NULL;
12333 
12334 #if defined(__i386) || defined(__amd64)
12335 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
12336 	freerbuf(bp);
12337 #else
12338 	scsi_free_consistent_buf(bp);
12339 #endif
12340 
12341 	kmem_free(xp, sizeof (struct sd_xbuf));
12342 }
12343 
12344 
12345 /*
12346  *    Function: sd_print_transport_rejected_message
12347  *
12348  * Description: This implements the ludicrously complex rules for printing
12349  *		a "transport rejected" message.  This is to address the
12350  *		specific problem of having a flood of this error message
12351  *		produced when a failover occurs.
12352  *
12353  *     Context: Any.
12354  */
12355 
12356 static void
12357 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
12358 	int code)
12359 {
12360 	ASSERT(un != NULL);
12361 	ASSERT(mutex_owned(SD_MUTEX(un)));
12362 	ASSERT(xp != NULL);
12363 
12364 	/*
12365 	 * Print the "transport rejected" message under the following
12366 	 * conditions:
12367 	 *
12368 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
12369 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
12370 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
12371 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
12372 	 *   scsi_transport(9F) (which indicates that the target might have
12373 	 *   gone off-line).  This uses the un->un_tran_fatal_count
12374 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
12375 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
12376 	 *   from scsi_transport().
12377 	 *
12378 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
12379 	 * the preceeding cases in order for the message to be printed.
12380 	 */
12381 	if ((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) {
12382 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
12383 		    (code != TRAN_FATAL_ERROR) ||
12384 		    (un->un_tran_fatal_count == 1)) {
12385 			switch (code) {
12386 			case TRAN_BADPKT:
12387 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12388 				    "transport rejected bad packet\n");
12389 				break;
12390 			case TRAN_FATAL_ERROR:
12391 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12392 				    "transport rejected fatal error\n");
12393 				break;
12394 			default:
12395 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12396 				    "transport rejected (%d)\n", code);
12397 				break;
12398 			}
12399 		}
12400 	}
12401 }
12402 
12403 
12404 /*
12405  *    Function: sd_add_buf_to_waitq
12406  *
12407  * Description: Add the given buf(9S) struct to the wait queue for the
12408  *		instance.  If sorting is enabled, then the buf is added
12409  *		to the queue via an elevator sort algorithm (a la
12410  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
12411  *		If sorting is not enabled, then the buf is just added
12412  *		to the end of the wait queue.
12413  *
12414  * Return Code: void
12415  *
12416  *     Context: Does not sleep/block, therefore technically can be called
12417  *		from any context.  However if sorting is enabled then the
12418  *		execution time is indeterminate, and may take long if
12419  *		the wait queue grows large.
12420  */
12421 
12422 static void
12423 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
12424 {
12425 	struct buf *ap;
12426 
12427 	ASSERT(bp != NULL);
12428 	ASSERT(un != NULL);
12429 	ASSERT(mutex_owned(SD_MUTEX(un)));
12430 
12431 	/* If the queue is empty, add the buf as the only entry & return. */
12432 	if (un->un_waitq_headp == NULL) {
12433 		ASSERT(un->un_waitq_tailp == NULL);
12434 		un->un_waitq_headp = un->un_waitq_tailp = bp;
12435 		bp->av_forw = NULL;
12436 		return;
12437 	}
12438 
12439 	ASSERT(un->un_waitq_tailp != NULL);
12440 
12441 	/*
12442 	 * If sorting is disabled, just add the buf to the tail end of
12443 	 * the wait queue and return.
12444 	 */
12445 	if (un->un_f_disksort_disabled) {
12446 		un->un_waitq_tailp->av_forw = bp;
12447 		un->un_waitq_tailp = bp;
12448 		bp->av_forw = NULL;
12449 		return;
12450 	}
12451 
12452 	/*
12453 	 * Sort thru the list of requests currently on the wait queue
12454 	 * and add the new buf request at the appropriate position.
12455 	 *
12456 	 * The un->un_waitq_headp is an activity chain pointer on which
12457 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
12458 	 * first queue holds those requests which are positioned after
12459 	 * the current SD_GET_BLKNO() (in the first request); the second holds
12460 	 * requests which came in after their SD_GET_BLKNO() number was passed.
12461 	 * Thus we implement a one way scan, retracting after reaching
12462 	 * the end of the drive to the first request on the second
12463 	 * queue, at which time it becomes the first queue.
12464 	 * A one-way scan is natural because of the way UNIX read-ahead
12465 	 * blocks are allocated.
12466 	 *
12467 	 * If we lie after the first request, then we must locate the
12468 	 * second request list and add ourselves to it.
12469 	 */
12470 	ap = un->un_waitq_headp;
12471 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
12472 		while (ap->av_forw != NULL) {
12473 			/*
12474 			 * Look for an "inversion" in the (normally
12475 			 * ascending) block numbers. This indicates
12476 			 * the start of the second request list.
12477 			 */
12478 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
12479 				/*
12480 				 * Search the second request list for the
12481 				 * first request at a larger block number.
12482 				 * We go before that; however if there is
12483 				 * no such request, we go at the end.
12484 				 */
12485 				do {
12486 					if (SD_GET_BLKNO(bp) <
12487 					    SD_GET_BLKNO(ap->av_forw)) {
12488 						goto insert;
12489 					}
12490 					ap = ap->av_forw;
12491 				} while (ap->av_forw != NULL);
12492 				goto insert;		/* after last */
12493 			}
12494 			ap = ap->av_forw;
12495 		}
12496 
12497 		/*
12498 		 * No inversions... we will go after the last, and
12499 		 * be the first request in the second request list.
12500 		 */
12501 		goto insert;
12502 	}
12503 
12504 	/*
12505 	 * Request is at/after the current request...
12506 	 * sort in the first request list.
12507 	 */
12508 	while (ap->av_forw != NULL) {
12509 		/*
12510 		 * We want to go after the current request (1) if
12511 		 * there is an inversion after it (i.e. it is the end
12512 		 * of the first request list), or (2) if the next
12513 		 * request is a larger block no. than our request.
12514 		 */
12515 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
12516 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
12517 			goto insert;
12518 		}
12519 		ap = ap->av_forw;
12520 	}
12521 
12522 	/*
12523 	 * Neither a second list nor a larger request, therefore
12524 	 * we go at the end of the first list (which is the same
12525 	 * as the end of the whole schebang).
12526 	 */
12527 insert:
12528 	bp->av_forw = ap->av_forw;
12529 	ap->av_forw = bp;
12530 
12531 	/*
12532 	 * If we inserted onto the tail end of the waitq, make sure the
12533 	 * tail pointer is updated.
12534 	 */
12535 	if (ap == un->un_waitq_tailp) {
12536 		un->un_waitq_tailp = bp;
12537 	}
12538 }
12539 
12540 
12541 /*
12542  *    Function: sd_start_cmds
12543  *
12544  * Description: Remove and transport cmds from the driver queues.
12545  *
12546  *   Arguments: un - pointer to the unit (soft state) struct for the target.
12547  *
12548  *		immed_bp - ptr to a buf to be transported immediately. Only
12549  *		the immed_bp is transported; bufs on the waitq are not
12550  *		processed and the un_retry_bp is not checked.  If immed_bp is
12551  *		NULL, then normal queue processing is performed.
12552  *
12553  *     Context: May be called from kernel thread context, interrupt context,
12554  *		or runout callback context. This function may not block or
12555  *		call routines that block.
12556  */
12557 
12558 static void
12559 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
12560 {
12561 	struct	sd_xbuf	*xp;
12562 	struct	buf	*bp;
12563 	void	(*statp)(kstat_io_t *);
12564 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12565 	void	(*saved_statp)(kstat_io_t *);
12566 #endif
12567 	int	rval;
12568 
12569 	ASSERT(un != NULL);
12570 	ASSERT(mutex_owned(SD_MUTEX(un)));
12571 	ASSERT(un->un_ncmds_in_transport >= 0);
12572 	ASSERT(un->un_throttle >= 0);
12573 
12574 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
12575 
12576 	do {
12577 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12578 		saved_statp = NULL;
12579 #endif
12580 
12581 		/*
12582 		 * If we are syncing or dumping, fail the command to
12583 		 * avoid recursively calling back into scsi_transport().
12584 		 * The dump I/O itself uses a separate code path so this
12585 		 * only prevents non-dump I/O from being sent while dumping.
12586 		 * File system sync takes place before dumping begins.
12587 		 * During panic, filesystem I/O is allowed provided
12588 		 * un_in_callback is <= 1.  This is to prevent recursion
12589 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
12590 		 * sd_start_cmds and so on.  See panic.c for more information
12591 		 * about the states the system can be in during panic.
12592 		 */
12593 		if ((un->un_state == SD_STATE_DUMPING) ||
12594 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
12595 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12596 			    "sd_start_cmds: panicking\n");
12597 			goto exit;
12598 		}
12599 
12600 		if ((bp = immed_bp) != NULL) {
12601 			/*
12602 			 * We have a bp that must be transported immediately.
12603 			 * It's OK to transport the immed_bp here without doing
12604 			 * the throttle limit check because the immed_bp is
12605 			 * always used in a retry/recovery case. This means
12606 			 * that we know we are not at the throttle limit by
12607 			 * virtue of the fact that to get here we must have
12608 			 * already gotten a command back via sdintr(). This also
12609 			 * relies on (1) the command on un_retry_bp preventing
12610 			 * further commands from the waitq from being issued;
12611 			 * and (2) the code in sd_retry_command checking the
12612 			 * throttle limit before issuing a delayed or immediate
12613 			 * retry. This holds even if the throttle limit is
12614 			 * currently ratcheted down from its maximum value.
12615 			 */
12616 			statp = kstat_runq_enter;
12617 			if (bp == un->un_retry_bp) {
12618 				ASSERT((un->un_retry_statp == NULL) ||
12619 				    (un->un_retry_statp == kstat_waitq_enter) ||
12620 				    (un->un_retry_statp ==
12621 				    kstat_runq_back_to_waitq));
12622 				/*
12623 				 * If the waitq kstat was incremented when
12624 				 * sd_set_retry_bp() queued this bp for a retry,
12625 				 * then we must set up statp so that the waitq
12626 				 * count will get decremented correctly below.
12627 				 * Also we must clear un->un_retry_statp to
12628 				 * ensure that we do not act on a stale value
12629 				 * in this field.
12630 				 */
12631 				if ((un->un_retry_statp == kstat_waitq_enter) ||
12632 				    (un->un_retry_statp ==
12633 				    kstat_runq_back_to_waitq)) {
12634 					statp = kstat_waitq_to_runq;
12635 				}
12636 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12637 				saved_statp = un->un_retry_statp;
12638 #endif
12639 				un->un_retry_statp = NULL;
12640 
12641 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
12642 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
12643 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
12644 				    un, un->un_retry_bp, un->un_throttle,
12645 				    un->un_ncmds_in_transport);
12646 			} else {
12647 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
12648 				    "processing priority bp:0x%p\n", bp);
12649 			}
12650 
12651 		} else if ((bp = un->un_waitq_headp) != NULL) {
12652 			/*
12653 			 * A command on the waitq is ready to go, but do not
12654 			 * send it if:
12655 			 *
12656 			 * (1) the throttle limit has been reached, or
12657 			 * (2) a retry is pending, or
12658 			 * (3) a START_STOP_UNIT callback pending, or
12659 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
12660 			 *	command is pending.
12661 			 *
12662 			 * For all of these conditions, IO processing will
12663 			 * restart after the condition is cleared.
12664 			 */
12665 			if (un->un_ncmds_in_transport >= un->un_throttle) {
12666 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12667 				    "sd_start_cmds: exiting, "
12668 				    "throttle limit reached!\n");
12669 				goto exit;
12670 			}
12671 			if (un->un_retry_bp != NULL) {
12672 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12673 				    "sd_start_cmds: exiting, retry pending!\n");
12674 				goto exit;
12675 			}
12676 			if (un->un_startstop_timeid != NULL) {
12677 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12678 				    "sd_start_cmds: exiting, "
12679 				    "START_STOP pending!\n");
12680 				goto exit;
12681 			}
12682 			if (un->un_direct_priority_timeid != NULL) {
12683 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12684 				    "sd_start_cmds: exiting, "
12685 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
12686 				goto exit;
12687 			}
12688 
12689 			/* Dequeue the command */
12690 			un->un_waitq_headp = bp->av_forw;
12691 			if (un->un_waitq_headp == NULL) {
12692 				un->un_waitq_tailp = NULL;
12693 			}
12694 			bp->av_forw = NULL;
12695 			statp = kstat_waitq_to_runq;
12696 			SD_TRACE(SD_LOG_IO_CORE, un,
12697 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
12698 
12699 		} else {
12700 			/* No work to do so bail out now */
12701 			SD_TRACE(SD_LOG_IO_CORE, un,
12702 			    "sd_start_cmds: no more work, exiting!\n");
12703 			goto exit;
12704 		}
12705 
12706 		/*
12707 		 * Reset the state to normal. This is the mechanism by which
12708 		 * the state transitions from either SD_STATE_RWAIT or
12709 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
12710 		 * If state is SD_STATE_PM_CHANGING then this command is
12711 		 * part of the device power control and the state must
12712 		 * not be put back to normal. Doing so would would
12713 		 * allow new commands to proceed when they shouldn't,
12714 		 * the device may be going off.
12715 		 */
12716 		if ((un->un_state != SD_STATE_SUSPENDED) &&
12717 		    (un->un_state != SD_STATE_PM_CHANGING)) {
12718 			New_state(un, SD_STATE_NORMAL);
12719 		    }
12720 
12721 		xp = SD_GET_XBUF(bp);
12722 		ASSERT(xp != NULL);
12723 
12724 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12725 		/*
12726 		 * Allocate the scsi_pkt if we need one, or attach DMA
12727 		 * resources if we have a scsi_pkt that needs them. The
12728 		 * latter should only occur for commands that are being
12729 		 * retried.
12730 		 */
12731 		if ((xp->xb_pktp == NULL) ||
12732 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
12733 #else
12734 		if (xp->xb_pktp == NULL) {
12735 #endif
12736 			/*
12737 			 * There is no scsi_pkt allocated for this buf. Call
12738 			 * the initpkt function to allocate & init one.
12739 			 *
12740 			 * The scsi_init_pkt runout callback functionality is
12741 			 * implemented as follows:
12742 			 *
12743 			 * 1) The initpkt function always calls
12744 			 *    scsi_init_pkt(9F) with sdrunout specified as the
12745 			 *    callback routine.
12746 			 * 2) A successful packet allocation is initialized and
12747 			 *    the I/O is transported.
12748 			 * 3) The I/O associated with an allocation resource
12749 			 *    failure is left on its queue to be retried via
12750 			 *    runout or the next I/O.
12751 			 * 4) The I/O associated with a DMA error is removed
12752 			 *    from the queue and failed with EIO. Processing of
12753 			 *    the transport queues is also halted to be
12754 			 *    restarted via runout or the next I/O.
12755 			 * 5) The I/O associated with a CDB size or packet
12756 			 *    size error is removed from the queue and failed
12757 			 *    with EIO. Processing of the transport queues is
12758 			 *    continued.
12759 			 *
12760 			 * Note: there is no interface for canceling a runout
12761 			 * callback. To prevent the driver from detaching or
12762 			 * suspending while a runout is pending the driver
12763 			 * state is set to SD_STATE_RWAIT
12764 			 *
12765 			 * Note: using the scsi_init_pkt callback facility can
12766 			 * result in an I/O request persisting at the head of
12767 			 * the list which cannot be satisfied even after
12768 			 * multiple retries. In the future the driver may
12769 			 * implement some kind of maximum runout count before
12770 			 * failing an I/O.
12771 			 *
12772 			 * Note: the use of funcp below may seem superfluous,
12773 			 * but it helps warlock figure out the correct
12774 			 * initpkt function calls (see [s]sd.wlcmd).
12775 			 */
12776 			struct scsi_pkt	*pktp;
12777 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
12778 
12779 			ASSERT(bp != un->un_rqs_bp);
12780 
12781 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
12782 			switch ((*funcp)(bp, &pktp)) {
12783 			case  SD_PKT_ALLOC_SUCCESS:
12784 				xp->xb_pktp = pktp;
12785 				SD_TRACE(SD_LOG_IO_CORE, un,
12786 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
12787 				    pktp);
12788 				goto got_pkt;
12789 
12790 			case SD_PKT_ALLOC_FAILURE:
12791 				/*
12792 				 * Temporary (hopefully) resource depletion.
12793 				 * Since retries and RQS commands always have a
12794 				 * scsi_pkt allocated, these cases should never
12795 				 * get here. So the only cases this needs to
12796 				 * handle is a bp from the waitq (which we put
12797 				 * back onto the waitq for sdrunout), or a bp
12798 				 * sent as an immed_bp (which we just fail).
12799 				 */
12800 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12801 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
12802 
12803 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12804 
12805 				if (bp == immed_bp) {
12806 					/*
12807 					 * If SD_XB_DMA_FREED is clear, then
12808 					 * this is a failure to allocate a
12809 					 * scsi_pkt, and we must fail the
12810 					 * command.
12811 					 */
12812 					if ((xp->xb_pkt_flags &
12813 					    SD_XB_DMA_FREED) == 0) {
12814 						break;
12815 					}
12816 
12817 					/*
12818 					 * If this immediate command is NOT our
12819 					 * un_retry_bp, then we must fail it.
12820 					 */
12821 					if (bp != un->un_retry_bp) {
12822 						break;
12823 					}
12824 
12825 					/*
12826 					 * We get here if this cmd is our
12827 					 * un_retry_bp that was DMAFREED, but
12828 					 * scsi_init_pkt() failed to reallocate
12829 					 * DMA resources when we attempted to
12830 					 * retry it. This can happen when an
12831 					 * mpxio failover is in progress, but
12832 					 * we don't want to just fail the
12833 					 * command in this case.
12834 					 *
12835 					 * Use timeout(9F) to restart it after
12836 					 * a 100ms delay.  We don't want to
12837 					 * let sdrunout() restart it, because
12838 					 * sdrunout() is just supposed to start
12839 					 * commands that are sitting on the
12840 					 * wait queue.  The un_retry_bp stays
12841 					 * set until the command completes, but
12842 					 * sdrunout can be called many times
12843 					 * before that happens.  Since sdrunout
12844 					 * cannot tell if the un_retry_bp is
12845 					 * already in the transport, it could
12846 					 * end up calling scsi_transport() for
12847 					 * the un_retry_bp multiple times.
12848 					 *
12849 					 * Also: don't schedule the callback
12850 					 * if some other callback is already
12851 					 * pending.
12852 					 */
12853 					if (un->un_retry_statp == NULL) {
12854 						/*
12855 						 * restore the kstat pointer to
12856 						 * keep kstat counts coherent
12857 						 * when we do retry the command.
12858 						 */
12859 						un->un_retry_statp =
12860 						    saved_statp;
12861 					}
12862 
12863 					if ((un->un_startstop_timeid == NULL) &&
12864 					    (un->un_retry_timeid == NULL) &&
12865 					    (un->un_direct_priority_timeid ==
12866 					    NULL)) {
12867 
12868 						un->un_retry_timeid =
12869 						    timeout(
12870 						    sd_start_retry_command,
12871 						    un, SD_RESTART_TIMEOUT);
12872 					}
12873 					goto exit;
12874 				}
12875 
12876 #else
12877 				if (bp == immed_bp) {
12878 					break;	/* Just fail the command */
12879 				}
12880 #endif
12881 
12882 				/* Add the buf back to the head of the waitq */
12883 				bp->av_forw = un->un_waitq_headp;
12884 				un->un_waitq_headp = bp;
12885 				if (un->un_waitq_tailp == NULL) {
12886 					un->un_waitq_tailp = bp;
12887 				}
12888 				goto exit;
12889 
12890 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
12891 				/*
12892 				 * HBA DMA resource failure. Fail the command
12893 				 * and continue processing of the queues.
12894 				 */
12895 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12896 				    "sd_start_cmds: "
12897 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
12898 				break;
12899 
12900 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
12901 				/*
12902 				 * Note:x86: Partial DMA mapping not supported
12903 				 * for USCSI commands, and all the needed DMA
12904 				 * resources were not allocated.
12905 				 */
12906 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12907 				    "sd_start_cmds: "
12908 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
12909 				break;
12910 
12911 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
12912 				/*
12913 				 * Note:x86: Request cannot fit into CDB based
12914 				 * on lba and len.
12915 				 */
12916 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12917 				    "sd_start_cmds: "
12918 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
12919 				break;
12920 
12921 			default:
12922 				/* Should NEVER get here! */
12923 				panic("scsi_initpkt error");
12924 				/*NOTREACHED*/
12925 			}
12926 
12927 			/*
12928 			 * Fatal error in allocating a scsi_pkt for this buf.
12929 			 * Update kstats & return the buf with an error code.
12930 			 * We must use sd_return_failed_command_no_restart() to
12931 			 * avoid a recursive call back into sd_start_cmds().
12932 			 * However this also means that we must keep processing
12933 			 * the waitq here in order to avoid stalling.
12934 			 */
12935 			if (statp == kstat_waitq_to_runq) {
12936 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
12937 			}
12938 			sd_return_failed_command_no_restart(un, bp, EIO);
12939 			if (bp == immed_bp) {
12940 				/* immed_bp is gone by now, so clear this */
12941 				immed_bp = NULL;
12942 			}
12943 			continue;
12944 		}
12945 got_pkt:
12946 		if (bp == immed_bp) {
12947 			/* goto the head of the class.... */
12948 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
12949 		}
12950 
12951 		un->un_ncmds_in_transport++;
12952 		SD_UPDATE_KSTATS(un, statp, bp);
12953 
12954 		/*
12955 		 * Call scsi_transport() to send the command to the target.
12956 		 * According to SCSA architecture, we must drop the mutex here
12957 		 * before calling scsi_transport() in order to avoid deadlock.
12958 		 * Note that the scsi_pkt's completion routine can be executed
12959 		 * (from interrupt context) even before the call to
12960 		 * scsi_transport() returns.
12961 		 */
12962 		SD_TRACE(SD_LOG_IO_CORE, un,
12963 		    "sd_start_cmds: calling scsi_transport()\n");
12964 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
12965 
12966 		mutex_exit(SD_MUTEX(un));
12967 		rval = scsi_transport(xp->xb_pktp);
12968 		mutex_enter(SD_MUTEX(un));
12969 
12970 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12971 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
12972 
12973 		switch (rval) {
12974 		case TRAN_ACCEPT:
12975 			/* Clear this with every pkt accepted by the HBA */
12976 			un->un_tran_fatal_count = 0;
12977 			break;	/* Success; try the next cmd (if any) */
12978 
12979 		case TRAN_BUSY:
12980 			un->un_ncmds_in_transport--;
12981 			ASSERT(un->un_ncmds_in_transport >= 0);
12982 
12983 			/*
12984 			 * Don't retry request sense, the sense data
12985 			 * is lost when another request is sent.
12986 			 * Free up the rqs buf and retry
12987 			 * the original failed cmd.  Update kstat.
12988 			 */
12989 			if (bp == un->un_rqs_bp) {
12990 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
12991 				bp = sd_mark_rqs_idle(un, xp);
12992 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
12993 					NULL, NULL, EIO, SD_BSY_TIMEOUT / 500,
12994 					kstat_waitq_enter);
12995 				goto exit;
12996 			}
12997 
12998 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12999 			/*
13000 			 * Free the DMA resources for the  scsi_pkt. This will
13001 			 * allow mpxio to select another path the next time
13002 			 * we call scsi_transport() with this scsi_pkt.
13003 			 * See sdintr() for the rationalization behind this.
13004 			 */
13005 			if ((un->un_f_is_fibre == TRUE) &&
13006 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
13007 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
13008 				scsi_dmafree(xp->xb_pktp);
13009 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
13010 			}
13011 #endif
13012 
13013 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
13014 				/*
13015 				 * Commands that are SD_PATH_DIRECT_PRIORITY
13016 				 * are for error recovery situations. These do
13017 				 * not use the normal command waitq, so if they
13018 				 * get a TRAN_BUSY we cannot put them back onto
13019 				 * the waitq for later retry. One possible
13020 				 * problem is that there could already be some
13021 				 * other command on un_retry_bp that is waiting
13022 				 * for this one to complete, so we would be
13023 				 * deadlocked if we put this command back onto
13024 				 * the waitq for later retry (since un_retry_bp
13025 				 * must complete before the driver gets back to
13026 				 * commands on the waitq).
13027 				 *
13028 				 * To avoid deadlock we must schedule a callback
13029 				 * that will restart this command after a set
13030 				 * interval.  This should keep retrying for as
13031 				 * long as the underlying transport keeps
13032 				 * returning TRAN_BUSY (just like for other
13033 				 * commands).  Use the same timeout interval as
13034 				 * for the ordinary TRAN_BUSY retry.
13035 				 */
13036 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13037 				    "sd_start_cmds: scsi_transport() returned "
13038 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
13039 
13040 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
13041 				un->un_direct_priority_timeid =
13042 				    timeout(sd_start_direct_priority_command,
13043 				    bp, SD_BSY_TIMEOUT / 500);
13044 
13045 				goto exit;
13046 			}
13047 
13048 			/*
13049 			 * For TRAN_BUSY, we want to reduce the throttle value,
13050 			 * unless we are retrying a command.
13051 			 */
13052 			if (bp != un->un_retry_bp) {
13053 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
13054 			}
13055 
13056 			/*
13057 			 * Set up the bp to be tried again 10 ms later.
13058 			 * Note:x86: Is there a timeout value in the sd_lun
13059 			 * for this condition?
13060 			 */
13061 			sd_set_retry_bp(un, bp, SD_BSY_TIMEOUT / 500,
13062 				kstat_runq_back_to_waitq);
13063 			goto exit;
13064 
13065 		case TRAN_FATAL_ERROR:
13066 			un->un_tran_fatal_count++;
13067 			/* FALLTHRU */
13068 
13069 		case TRAN_BADPKT:
13070 		default:
13071 			un->un_ncmds_in_transport--;
13072 			ASSERT(un->un_ncmds_in_transport >= 0);
13073 
13074 			/*
13075 			 * If this is our REQUEST SENSE command with a
13076 			 * transport error, we must get back the pointers
13077 			 * to the original buf, and mark the REQUEST
13078 			 * SENSE command as "available".
13079 			 */
13080 			if (bp == un->un_rqs_bp) {
13081 				bp = sd_mark_rqs_idle(un, xp);
13082 				xp = SD_GET_XBUF(bp);
13083 			} else {
13084 				/*
13085 				 * Legacy behavior: do not update transport
13086 				 * error count for request sense commands.
13087 				 */
13088 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
13089 			}
13090 
13091 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
13092 			sd_print_transport_rejected_message(un, xp, rval);
13093 
13094 			/*
13095 			 * We must use sd_return_failed_command_no_restart() to
13096 			 * avoid a recursive call back into sd_start_cmds().
13097 			 * However this also means that we must keep processing
13098 			 * the waitq here in order to avoid stalling.
13099 			 */
13100 			sd_return_failed_command_no_restart(un, bp, EIO);
13101 
13102 			/*
13103 			 * Notify any threads waiting in sd_ddi_suspend() that
13104 			 * a command completion has occurred.
13105 			 */
13106 			if (un->un_state == SD_STATE_SUSPENDED) {
13107 				cv_broadcast(&un->un_disk_busy_cv);
13108 			}
13109 
13110 			if (bp == immed_bp) {
13111 				/* immed_bp is gone by now, so clear this */
13112 				immed_bp = NULL;
13113 			}
13114 			break;
13115 		}
13116 
13117 	} while (immed_bp == NULL);
13118 
13119 exit:
13120 	ASSERT(mutex_owned(SD_MUTEX(un)));
13121 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
13122 }
13123 
13124 
13125 /*
13126  *    Function: sd_return_command
13127  *
13128  * Description: Returns a command to its originator (with or without an
13129  *		error).  Also starts commands waiting to be transported
13130  *		to the target.
13131  *
13132  *     Context: May be called from interrupt, kernel, or timeout context
13133  */
13134 
13135 static void
13136 sd_return_command(struct sd_lun *un, struct buf *bp)
13137 {
13138 	struct sd_xbuf *xp;
13139 #if defined(__i386) || defined(__amd64)
13140 	struct scsi_pkt *pktp;
13141 #endif
13142 
13143 	ASSERT(bp != NULL);
13144 	ASSERT(un != NULL);
13145 	ASSERT(mutex_owned(SD_MUTEX(un)));
13146 	ASSERT(bp != un->un_rqs_bp);
13147 	xp = SD_GET_XBUF(bp);
13148 	ASSERT(xp != NULL);
13149 
13150 #if defined(__i386) || defined(__amd64)
13151 	pktp = SD_GET_PKTP(bp);
13152 #endif
13153 
13154 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
13155 
13156 #if defined(__i386) || defined(__amd64)
13157 	/*
13158 	 * Note:x86: check for the "sdrestart failed" case.
13159 	 */
13160 	if (((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
13161 		(geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
13162 		(xp->xb_pktp->pkt_resid == 0)) {
13163 
13164 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
13165 			/*
13166 			 * Successfully set up next portion of cmd
13167 			 * transfer, try sending it
13168 			 */
13169 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
13170 			    NULL, NULL, 0, (clock_t)0, NULL);
13171 			sd_start_cmds(un, NULL);
13172 			return;	/* Note:x86: need a return here? */
13173 		}
13174 	}
13175 #endif
13176 
13177 	/*
13178 	 * If this is the failfast bp, clear it from un_failfast_bp. This
13179 	 * can happen if upon being re-tried the failfast bp either
13180 	 * succeeded or encountered another error (possibly even a different
13181 	 * error than the one that precipitated the failfast state, but in
13182 	 * that case it would have had to exhaust retries as well). Regardless,
13183 	 * this should not occur whenever the instance is in the active
13184 	 * failfast state.
13185 	 */
13186 	if (bp == un->un_failfast_bp) {
13187 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
13188 		un->un_failfast_bp = NULL;
13189 	}
13190 
13191 	/*
13192 	 * Clear the failfast state upon successful completion of ANY cmd.
13193 	 */
13194 	if (bp->b_error == 0) {
13195 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
13196 	}
13197 
13198 	/*
13199 	 * This is used if the command was retried one or more times. Show that
13200 	 * we are done with it, and allow processing of the waitq to resume.
13201 	 */
13202 	if (bp == un->un_retry_bp) {
13203 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13204 		    "sd_return_command: un:0x%p: "
13205 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
13206 		un->un_retry_bp = NULL;
13207 		un->un_retry_statp = NULL;
13208 	}
13209 
13210 	SD_UPDATE_RDWR_STATS(un, bp);
13211 	SD_UPDATE_PARTITION_STATS(un, bp);
13212 
13213 	switch (un->un_state) {
13214 	case SD_STATE_SUSPENDED:
13215 		/*
13216 		 * Notify any threads waiting in sd_ddi_suspend() that
13217 		 * a command completion has occurred.
13218 		 */
13219 		cv_broadcast(&un->un_disk_busy_cv);
13220 		break;
13221 	default:
13222 		sd_start_cmds(un, NULL);
13223 		break;
13224 	}
13225 
13226 	/* Return this command up the iodone chain to its originator. */
13227 	mutex_exit(SD_MUTEX(un));
13228 
13229 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
13230 	xp->xb_pktp = NULL;
13231 
13232 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
13233 
13234 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13235 	mutex_enter(SD_MUTEX(un));
13236 
13237 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
13238 }
13239 
13240 
13241 /*
13242  *    Function: sd_return_failed_command
13243  *
13244  * Description: Command completion when an error occurred.
13245  *
13246  *     Context: May be called from interrupt context
13247  */
13248 
13249 static void
13250 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
13251 {
13252 	ASSERT(bp != NULL);
13253 	ASSERT(un != NULL);
13254 	ASSERT(mutex_owned(SD_MUTEX(un)));
13255 
13256 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13257 	    "sd_return_failed_command: entry\n");
13258 
13259 	/*
13260 	 * b_resid could already be nonzero due to a partial data
13261 	 * transfer, so do not change it here.
13262 	 */
13263 	SD_BIOERROR(bp, errcode);
13264 
13265 	sd_return_command(un, bp);
13266 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13267 	    "sd_return_failed_command: exit\n");
13268 }
13269 
13270 
13271 /*
13272  *    Function: sd_return_failed_command_no_restart
13273  *
13274  * Description: Same as sd_return_failed_command, but ensures that no
13275  *		call back into sd_start_cmds will be issued.
13276  *
13277  *     Context: May be called from interrupt context
13278  */
13279 
13280 static void
13281 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
13282 	int errcode)
13283 {
13284 	struct sd_xbuf *xp;
13285 
13286 	ASSERT(bp != NULL);
13287 	ASSERT(un != NULL);
13288 	ASSERT(mutex_owned(SD_MUTEX(un)));
13289 	xp = SD_GET_XBUF(bp);
13290 	ASSERT(xp != NULL);
13291 	ASSERT(errcode != 0);
13292 
13293 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13294 	    "sd_return_failed_command_no_restart: entry\n");
13295 
13296 	/*
13297 	 * b_resid could already be nonzero due to a partial data
13298 	 * transfer, so do not change it here.
13299 	 */
13300 	SD_BIOERROR(bp, errcode);
13301 
13302 	/*
13303 	 * If this is the failfast bp, clear it. This can happen if the
13304 	 * failfast bp encounterd a fatal error when we attempted to
13305 	 * re-try it (such as a scsi_transport(9F) failure).  However
13306 	 * we should NOT be in an active failfast state if the failfast
13307 	 * bp is not NULL.
13308 	 */
13309 	if (bp == un->un_failfast_bp) {
13310 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
13311 		un->un_failfast_bp = NULL;
13312 	}
13313 
13314 	if (bp == un->un_retry_bp) {
13315 		/*
13316 		 * This command was retried one or more times. Show that we are
13317 		 * done with it, and allow processing of the waitq to resume.
13318 		 */
13319 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13320 		    "sd_return_failed_command_no_restart: "
13321 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
13322 		un->un_retry_bp = NULL;
13323 		un->un_retry_statp = NULL;
13324 	}
13325 
13326 	SD_UPDATE_RDWR_STATS(un, bp);
13327 	SD_UPDATE_PARTITION_STATS(un, bp);
13328 
13329 	mutex_exit(SD_MUTEX(un));
13330 
13331 	if (xp->xb_pktp != NULL) {
13332 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
13333 		xp->xb_pktp = NULL;
13334 	}
13335 
13336 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
13337 
13338 	mutex_enter(SD_MUTEX(un));
13339 
13340 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13341 	    "sd_return_failed_command_no_restart: exit\n");
13342 }
13343 
13344 
13345 /*
13346  *    Function: sd_retry_command
13347  *
13348  * Description: queue up a command for retry, or (optionally) fail it
13349  *		if retry counts are exhausted.
13350  *
13351  *   Arguments: un - Pointer to the sd_lun struct for the target.
13352  *
13353  *		bp - Pointer to the buf for the command to be retried.
13354  *
13355  *		retry_check_flag - Flag to see which (if any) of the retry
13356  *		   counts should be decremented/checked. If the indicated
13357  *		   retry count is exhausted, then the command will not be
13358  *		   retried; it will be failed instead. This should use a
13359  *		   value equal to one of the following:
13360  *
13361  *			SD_RETRIES_NOCHECK
13362  *			SD_RESD_RETRIES_STANDARD
13363  *			SD_RETRIES_VICTIM
13364  *
13365  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
13366  *		   if the check should be made to see of FLAG_ISOLATE is set
13367  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
13368  *		   not retried, it is simply failed.
13369  *
13370  *		user_funcp - Ptr to function to call before dispatching the
13371  *		   command. May be NULL if no action needs to be performed.
13372  *		   (Primarily intended for printing messages.)
13373  *
13374  *		user_arg - Optional argument to be passed along to
13375  *		   the user_funcp call.
13376  *
13377  *		failure_code - errno return code to set in the bp if the
13378  *		   command is going to be failed.
13379  *
13380  *		retry_delay - Retry delay interval in (clock_t) units. May
13381  *		   be zero which indicates that the retry should be retried
13382  *		   immediately (ie, without an intervening delay).
13383  *
13384  *		statp - Ptr to kstat function to be updated if the command
13385  *		   is queued for a delayed retry. May be NULL if no kstat
13386  *		   update is desired.
13387  *
13388  *     Context: May be called from interupt context.
13389  */
13390 
13391 static void
13392 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
13393 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
13394 	code), void *user_arg, int failure_code,  clock_t retry_delay,
13395 	void (*statp)(kstat_io_t *))
13396 {
13397 	struct sd_xbuf	*xp;
13398 	struct scsi_pkt	*pktp;
13399 
13400 	ASSERT(un != NULL);
13401 	ASSERT(mutex_owned(SD_MUTEX(un)));
13402 	ASSERT(bp != NULL);
13403 	xp = SD_GET_XBUF(bp);
13404 	ASSERT(xp != NULL);
13405 	pktp = SD_GET_PKTP(bp);
13406 	ASSERT(pktp != NULL);
13407 
13408 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
13409 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
13410 
13411 	/*
13412 	 * If we are syncing or dumping, fail the command to avoid
13413 	 * recursively calling back into scsi_transport().
13414 	 */
13415 	if (ddi_in_panic()) {
13416 		goto fail_command_no_log;
13417 	}
13418 
13419 	/*
13420 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
13421 	 * log an error and fail the command.
13422 	 */
13423 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
13424 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
13425 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
13426 		sd_dump_memory(un, SD_LOG_IO, "CDB",
13427 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
13428 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
13429 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
13430 		goto fail_command;
13431 	}
13432 
13433 	/*
13434 	 * If we are suspended, then put the command onto head of the
13435 	 * wait queue since we don't want to start more commands.
13436 	 */
13437 	switch (un->un_state) {
13438 	case SD_STATE_SUSPENDED:
13439 	case SD_STATE_DUMPING:
13440 		bp->av_forw = un->un_waitq_headp;
13441 		un->un_waitq_headp = bp;
13442 		if (un->un_waitq_tailp == NULL) {
13443 			un->un_waitq_tailp = bp;
13444 		}
13445 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
13446 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
13447 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
13448 		return;
13449 	default:
13450 		break;
13451 	}
13452 
13453 	/*
13454 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
13455 	 * is set; if it is then we do not want to retry the command.
13456 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
13457 	 */
13458 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
13459 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
13460 			goto fail_command;
13461 		}
13462 	}
13463 
13464 
13465 	/*
13466 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
13467 	 * command timeout or a selection timeout has occurred. This means
13468 	 * that we were unable to establish an kind of communication with
13469 	 * the target, and subsequent retries and/or commands are likely
13470 	 * to encounter similar results and take a long time to complete.
13471 	 *
13472 	 * If this is a failfast error condition, we need to update the
13473 	 * failfast state, even if this bp does not have B_FAILFAST set.
13474 	 */
13475 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
13476 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
13477 			ASSERT(un->un_failfast_bp == NULL);
13478 			/*
13479 			 * If we are already in the active failfast state, and
13480 			 * another failfast error condition has been detected,
13481 			 * then fail this command if it has B_FAILFAST set.
13482 			 * If B_FAILFAST is clear, then maintain the legacy
13483 			 * behavior of retrying heroically, even tho this will
13484 			 * take a lot more time to fail the command.
13485 			 */
13486 			if (bp->b_flags & B_FAILFAST) {
13487 				goto fail_command;
13488 			}
13489 		} else {
13490 			/*
13491 			 * We're not in the active failfast state, but we
13492 			 * have a failfast error condition, so we must begin
13493 			 * transition to the next state. We do this regardless
13494 			 * of whether or not this bp has B_FAILFAST set.
13495 			 */
13496 			if (un->un_failfast_bp == NULL) {
13497 				/*
13498 				 * This is the first bp to meet a failfast
13499 				 * condition so save it on un_failfast_bp &
13500 				 * do normal retry processing. Do not enter
13501 				 * active failfast state yet. This marks
13502 				 * entry into the "failfast pending" state.
13503 				 */
13504 				un->un_failfast_bp = bp;
13505 
13506 			} else if (un->un_failfast_bp == bp) {
13507 				/*
13508 				 * This is the second time *this* bp has
13509 				 * encountered a failfast error condition,
13510 				 * so enter active failfast state & flush
13511 				 * queues as appropriate.
13512 				 */
13513 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
13514 				un->un_failfast_bp = NULL;
13515 				sd_failfast_flushq(un);
13516 
13517 				/*
13518 				 * Fail this bp now if B_FAILFAST set;
13519 				 * otherwise continue with retries. (It would
13520 				 * be pretty ironic if this bp succeeded on a
13521 				 * subsequent retry after we just flushed all
13522 				 * the queues).
13523 				 */
13524 				if (bp->b_flags & B_FAILFAST) {
13525 					goto fail_command;
13526 				}
13527 
13528 #if !defined(lint) && !defined(__lint)
13529 			} else {
13530 				/*
13531 				 * If neither of the preceeding conditionals
13532 				 * was true, it means that there is some
13533 				 * *other* bp that has met an inital failfast
13534 				 * condition and is currently either being
13535 				 * retried or is waiting to be retried. In
13536 				 * that case we should perform normal retry
13537 				 * processing on *this* bp, since there is a
13538 				 * chance that the current failfast condition
13539 				 * is transient and recoverable. If that does
13540 				 * not turn out to be the case, then retries
13541 				 * will be cleared when the wait queue is
13542 				 * flushed anyway.
13543 				 */
13544 #endif
13545 			}
13546 		}
13547 	} else {
13548 		/*
13549 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
13550 		 * likely were able to at least establish some level of
13551 		 * communication with the target and subsequent commands
13552 		 * and/or retries are likely to get through to the target,
13553 		 * In this case we want to be aggressive about clearing
13554 		 * the failfast state. Note that this does not affect
13555 		 * the "failfast pending" condition.
13556 		 */
13557 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
13558 	}
13559 
13560 
13561 	/*
13562 	 * Check the specified retry count to see if we can still do
13563 	 * any retries with this pkt before we should fail it.
13564 	 */
13565 	switch (retry_check_flag & SD_RETRIES_MASK) {
13566 	case SD_RETRIES_VICTIM:
13567 		/*
13568 		 * Check the victim retry count. If exhausted, then fall
13569 		 * thru & check against the standard retry count.
13570 		 */
13571 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
13572 			/* Increment count & proceed with the retry */
13573 			xp->xb_victim_retry_count++;
13574 			break;
13575 		}
13576 		/* Victim retries exhausted, fall back to std. retries... */
13577 		/* FALLTHRU */
13578 
13579 	case SD_RETRIES_STANDARD:
13580 		if (xp->xb_retry_count >= un->un_retry_count) {
13581 			/* Retries exhausted, fail the command */
13582 			SD_TRACE(SD_LOG_IO_CORE, un,
13583 			    "sd_retry_command: retries exhausted!\n");
13584 			/*
13585 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
13586 			 * commands with nonzero pkt_resid.
13587 			 */
13588 			if ((pktp->pkt_reason == CMD_CMPLT) &&
13589 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
13590 			    (pktp->pkt_resid != 0)) {
13591 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
13592 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
13593 					SD_UPDATE_B_RESID(bp, pktp);
13594 				}
13595 			}
13596 			goto fail_command;
13597 		}
13598 		xp->xb_retry_count++;
13599 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13600 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
13601 		break;
13602 
13603 	case SD_RETRIES_UA:
13604 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
13605 			/* Retries exhausted, fail the command */
13606 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13607 			    "Unit Attention retries exhausted. "
13608 			    "Check the target.\n");
13609 			goto fail_command;
13610 		}
13611 		xp->xb_ua_retry_count++;
13612 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13613 		    "sd_retry_command: retry count:%d\n",
13614 			xp->xb_ua_retry_count);
13615 		break;
13616 
13617 	case SD_RETRIES_BUSY:
13618 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
13619 			/* Retries exhausted, fail the command */
13620 			SD_TRACE(SD_LOG_IO_CORE, un,
13621 			    "sd_retry_command: retries exhausted!\n");
13622 			goto fail_command;
13623 		}
13624 		xp->xb_retry_count++;
13625 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13626 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
13627 		break;
13628 
13629 	case SD_RETRIES_NOCHECK:
13630 	default:
13631 		/* No retry count to check. Just proceed with the retry */
13632 		break;
13633 	}
13634 
13635 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
13636 
13637 	/*
13638 	 * If we were given a zero timeout, we must attempt to retry the
13639 	 * command immediately (ie, without a delay).
13640 	 */
13641 	if (retry_delay == 0) {
13642 		/*
13643 		 * Check some limiting conditions to see if we can actually
13644 		 * do the immediate retry.  If we cannot, then we must
13645 		 * fall back to queueing up a delayed retry.
13646 		 */
13647 		if (un->un_ncmds_in_transport >= un->un_throttle) {
13648 			/*
13649 			 * We are at the throttle limit for the target,
13650 			 * fall back to delayed retry.
13651 			 */
13652 			retry_delay = SD_BSY_TIMEOUT;
13653 			statp = kstat_waitq_enter;
13654 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13655 			    "sd_retry_command: immed. retry hit "
13656 			    "throttle!\n");
13657 		} else {
13658 			/*
13659 			 * We're clear to proceed with the immediate retry.
13660 			 * First call the user-provided function (if any)
13661 			 */
13662 			if (user_funcp != NULL) {
13663 				(*user_funcp)(un, bp, user_arg,
13664 				    SD_IMMEDIATE_RETRY_ISSUED);
13665 #ifdef __lock_lint
13666 				sd_print_incomplete_msg(un, bp, user_arg,
13667 				    SD_IMMEDIATE_RETRY_ISSUED);
13668 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
13669 				    SD_IMMEDIATE_RETRY_ISSUED);
13670 				sd_print_sense_failed_msg(un, bp, user_arg,
13671 				    SD_IMMEDIATE_RETRY_ISSUED);
13672 #endif
13673 			}
13674 
13675 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13676 			    "sd_retry_command: issuing immediate retry\n");
13677 
13678 			/*
13679 			 * Call sd_start_cmds() to transport the command to
13680 			 * the target.
13681 			 */
13682 			sd_start_cmds(un, bp);
13683 
13684 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13685 			    "sd_retry_command exit\n");
13686 			return;
13687 		}
13688 	}
13689 
13690 	/*
13691 	 * Set up to retry the command after a delay.
13692 	 * First call the user-provided function (if any)
13693 	 */
13694 	if (user_funcp != NULL) {
13695 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
13696 	}
13697 
13698 	sd_set_retry_bp(un, bp, retry_delay, statp);
13699 
13700 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
13701 	return;
13702 
13703 fail_command:
13704 
13705 	if (user_funcp != NULL) {
13706 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
13707 	}
13708 
13709 fail_command_no_log:
13710 
13711 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13712 	    "sd_retry_command: returning failed command\n");
13713 
13714 	sd_return_failed_command(un, bp, failure_code);
13715 
13716 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
13717 }
13718 
13719 
13720 /*
13721  *    Function: sd_set_retry_bp
13722  *
13723  * Description: Set up the given bp for retry.
13724  *
13725  *   Arguments: un - ptr to associated softstate
13726  *		bp - ptr to buf(9S) for the command
13727  *		retry_delay - time interval before issuing retry (may be 0)
13728  *		statp - optional pointer to kstat function
13729  *
13730  *     Context: May be called under interrupt context
13731  */
13732 
13733 static void
13734 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
13735 	void (*statp)(kstat_io_t *))
13736 {
13737 	ASSERT(un != NULL);
13738 	ASSERT(mutex_owned(SD_MUTEX(un)));
13739 	ASSERT(bp != NULL);
13740 
13741 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
13742 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
13743 
13744 	/*
13745 	 * Indicate that the command is being retried. This will not allow any
13746 	 * other commands on the wait queue to be transported to the target
13747 	 * until this command has been completed (success or failure). The
13748 	 * "retry command" is not transported to the target until the given
13749 	 * time delay expires, unless the user specified a 0 retry_delay.
13750 	 *
13751 	 * Note: the timeout(9F) callback routine is what actually calls
13752 	 * sd_start_cmds() to transport the command, with the exception of a
13753 	 * zero retry_delay. The only current implementor of a zero retry delay
13754 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
13755 	 */
13756 	if (un->un_retry_bp == NULL) {
13757 		ASSERT(un->un_retry_statp == NULL);
13758 		un->un_retry_bp = bp;
13759 
13760 		/*
13761 		 * If the user has not specified a delay the command should
13762 		 * be queued and no timeout should be scheduled.
13763 		 */
13764 		if (retry_delay == 0) {
13765 			/*
13766 			 * Save the kstat pointer that will be used in the
13767 			 * call to SD_UPDATE_KSTATS() below, so that
13768 			 * sd_start_cmds() can correctly decrement the waitq
13769 			 * count when it is time to transport this command.
13770 			 */
13771 			un->un_retry_statp = statp;
13772 			goto done;
13773 		}
13774 	}
13775 
13776 	if (un->un_retry_bp == bp) {
13777 		/*
13778 		 * Save the kstat pointer that will be used in the call to
13779 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
13780 		 * correctly decrement the waitq count when it is time to
13781 		 * transport this command.
13782 		 */
13783 		un->un_retry_statp = statp;
13784 
13785 		/*
13786 		 * Schedule a timeout if:
13787 		 *   1) The user has specified a delay.
13788 		 *   2) There is not a START_STOP_UNIT callback pending.
13789 		 *
13790 		 * If no delay has been specified, then it is up to the caller
13791 		 * to ensure that IO processing continues without stalling.
13792 		 * Effectively, this means that the caller will issue the
13793 		 * required call to sd_start_cmds(). The START_STOP_UNIT
13794 		 * callback does this after the START STOP UNIT command has
13795 		 * completed. In either of these cases we should not schedule
13796 		 * a timeout callback here.  Also don't schedule the timeout if
13797 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
13798 		 */
13799 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
13800 		    (un->un_direct_priority_timeid == NULL)) {
13801 			un->un_retry_timeid =
13802 			    timeout(sd_start_retry_command, un, retry_delay);
13803 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13804 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
13805 			    " bp:0x%p un_retry_timeid:0x%p\n",
13806 			    un, bp, un->un_retry_timeid);
13807 		}
13808 	} else {
13809 		/*
13810 		 * We only get in here if there is already another command
13811 		 * waiting to be retried.  In this case, we just put the
13812 		 * given command onto the wait queue, so it can be transported
13813 		 * after the current retry command has completed.
13814 		 *
13815 		 * Also we have to make sure that if the command at the head
13816 		 * of the wait queue is the un_failfast_bp, that we do not
13817 		 * put ahead of it any other commands that are to be retried.
13818 		 */
13819 		if ((un->un_failfast_bp != NULL) &&
13820 		    (un->un_failfast_bp == un->un_waitq_headp)) {
13821 			/*
13822 			 * Enqueue this command AFTER the first command on
13823 			 * the wait queue (which is also un_failfast_bp).
13824 			 */
13825 			bp->av_forw = un->un_waitq_headp->av_forw;
13826 			un->un_waitq_headp->av_forw = bp;
13827 			if (un->un_waitq_headp == un->un_waitq_tailp) {
13828 				un->un_waitq_tailp = bp;
13829 			}
13830 		} else {
13831 			/* Enqueue this command at the head of the waitq. */
13832 			bp->av_forw = un->un_waitq_headp;
13833 			un->un_waitq_headp = bp;
13834 			if (un->un_waitq_tailp == NULL) {
13835 				un->un_waitq_tailp = bp;
13836 			}
13837 		}
13838 
13839 		if (statp == NULL) {
13840 			statp = kstat_waitq_enter;
13841 		}
13842 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13843 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
13844 	}
13845 
13846 done:
13847 	if (statp != NULL) {
13848 		SD_UPDATE_KSTATS(un, statp, bp);
13849 	}
13850 
13851 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13852 	    "sd_set_retry_bp: exit un:0x%p\n", un);
13853 }
13854 
13855 
13856 /*
13857  *    Function: sd_start_retry_command
13858  *
13859  * Description: Start the command that has been waiting on the target's
13860  *		retry queue.  Called from timeout(9F) context after the
13861  *		retry delay interval has expired.
13862  *
13863  *   Arguments: arg - pointer to associated softstate for the device.
13864  *
13865  *     Context: timeout(9F) thread context.  May not sleep.
13866  */
13867 
13868 static void
13869 sd_start_retry_command(void *arg)
13870 {
13871 	struct sd_lun *un = arg;
13872 
13873 	ASSERT(un != NULL);
13874 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13875 
13876 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13877 	    "sd_start_retry_command: entry\n");
13878 
13879 	mutex_enter(SD_MUTEX(un));
13880 
13881 	un->un_retry_timeid = NULL;
13882 
13883 	if (un->un_retry_bp != NULL) {
13884 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13885 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
13886 		    un, un->un_retry_bp);
13887 		sd_start_cmds(un, un->un_retry_bp);
13888 	}
13889 
13890 	mutex_exit(SD_MUTEX(un));
13891 
13892 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13893 	    "sd_start_retry_command: exit\n");
13894 }
13895 
13896 
13897 /*
13898  *    Function: sd_start_direct_priority_command
13899  *
13900  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
13901  *		received TRAN_BUSY when we called scsi_transport() to send it
13902  *		to the underlying HBA. This function is called from timeout(9F)
13903  *		context after the delay interval has expired.
13904  *
13905  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
13906  *
13907  *     Context: timeout(9F) thread context.  May not sleep.
13908  */
13909 
13910 static void
13911 sd_start_direct_priority_command(void *arg)
13912 {
13913 	struct buf	*priority_bp = arg;
13914 	struct sd_lun	*un;
13915 
13916 	ASSERT(priority_bp != NULL);
13917 	un = SD_GET_UN(priority_bp);
13918 	ASSERT(un != NULL);
13919 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13920 
13921 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13922 	    "sd_start_direct_priority_command: entry\n");
13923 
13924 	mutex_enter(SD_MUTEX(un));
13925 	un->un_direct_priority_timeid = NULL;
13926 	sd_start_cmds(un, priority_bp);
13927 	mutex_exit(SD_MUTEX(un));
13928 
13929 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13930 	    "sd_start_direct_priority_command: exit\n");
13931 }
13932 
13933 
13934 /*
13935  *    Function: sd_send_request_sense_command
13936  *
13937  * Description: Sends a REQUEST SENSE command to the target
13938  *
13939  *     Context: May be called from interrupt context.
13940  */
13941 
13942 static void
13943 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
13944 	struct scsi_pkt *pktp)
13945 {
13946 	ASSERT(bp != NULL);
13947 	ASSERT(un != NULL);
13948 	ASSERT(mutex_owned(SD_MUTEX(un)));
13949 
13950 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
13951 	    "entry: buf:0x%p\n", bp);
13952 
13953 	/*
13954 	 * If we are syncing or dumping, then fail the command to avoid a
13955 	 * recursive callback into scsi_transport(). Also fail the command
13956 	 * if we are suspended (legacy behavior).
13957 	 */
13958 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
13959 	    (un->un_state == SD_STATE_DUMPING)) {
13960 		sd_return_failed_command(un, bp, EIO);
13961 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13962 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
13963 		return;
13964 	}
13965 
13966 	/*
13967 	 * Retry the failed command and don't issue the request sense if:
13968 	 *    1) the sense buf is busy
13969 	 *    2) we have 1 or more outstanding commands on the target
13970 	 *    (the sense data will be cleared or invalidated any way)
13971 	 *
13972 	 * Note: There could be an issue with not checking a retry limit here,
13973 	 * the problem is determining which retry limit to check.
13974 	 */
13975 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
13976 		/* Don't retry if the command is flagged as non-retryable */
13977 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
13978 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
13979 			    NULL, NULL, 0, SD_BSY_TIMEOUT, kstat_waitq_enter);
13980 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13981 			    "sd_send_request_sense_command: "
13982 			    "at full throttle, retrying exit\n");
13983 		} else {
13984 			sd_return_failed_command(un, bp, EIO);
13985 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13986 			    "sd_send_request_sense_command: "
13987 			    "at full throttle, non-retryable exit\n");
13988 		}
13989 		return;
13990 	}
13991 
13992 	sd_mark_rqs_busy(un, bp);
13993 	sd_start_cmds(un, un->un_rqs_bp);
13994 
13995 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13996 	    "sd_send_request_sense_command: exit\n");
13997 }
13998 
13999 
14000 /*
14001  *    Function: sd_mark_rqs_busy
14002  *
14003  * Description: Indicate that the request sense bp for this instance is
14004  *		in use.
14005  *
14006  *     Context: May be called under interrupt context
14007  */
14008 
14009 static void
14010 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
14011 {
14012 	struct sd_xbuf	*sense_xp;
14013 
14014 	ASSERT(un != NULL);
14015 	ASSERT(bp != NULL);
14016 	ASSERT(mutex_owned(SD_MUTEX(un)));
14017 	ASSERT(un->un_sense_isbusy == 0);
14018 
14019 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
14020 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
14021 
14022 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
14023 	ASSERT(sense_xp != NULL);
14024 
14025 	SD_INFO(SD_LOG_IO, un,
14026 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
14027 
14028 	ASSERT(sense_xp->xb_pktp != NULL);
14029 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
14030 	    == (FLAG_SENSING | FLAG_HEAD));
14031 
14032 	un->un_sense_isbusy = 1;
14033 	un->un_rqs_bp->b_resid = 0;
14034 	sense_xp->xb_pktp->pkt_resid  = 0;
14035 	sense_xp->xb_pktp->pkt_reason = 0;
14036 
14037 	/* So we can get back the bp at interrupt time! */
14038 	sense_xp->xb_sense_bp = bp;
14039 
14040 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
14041 
14042 	/*
14043 	 * Mark this buf as awaiting sense data. (This is already set in
14044 	 * the pkt_flags for the RQS packet.)
14045 	 */
14046 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
14047 
14048 	sense_xp->xb_retry_count	= 0;
14049 	sense_xp->xb_victim_retry_count = 0;
14050 	sense_xp->xb_ua_retry_count	= 0;
14051 	sense_xp->xb_dma_resid  = 0;
14052 
14053 	/* Clean up the fields for auto-request sense */
14054 	sense_xp->xb_sense_status = 0;
14055 	sense_xp->xb_sense_state  = 0;
14056 	sense_xp->xb_sense_resid  = 0;
14057 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
14058 
14059 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
14060 }
14061 
14062 
14063 /*
14064  *    Function: sd_mark_rqs_idle
14065  *
14066  * Description: SD_MUTEX must be held continuously through this routine
14067  *		to prevent reuse of the rqs struct before the caller can
14068  *		complete it's processing.
14069  *
14070  * Return Code: Pointer to the RQS buf
14071  *
14072  *     Context: May be called under interrupt context
14073  */
14074 
14075 static struct buf *
14076 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
14077 {
14078 	struct buf *bp;
14079 	ASSERT(un != NULL);
14080 	ASSERT(sense_xp != NULL);
14081 	ASSERT(mutex_owned(SD_MUTEX(un)));
14082 	ASSERT(un->un_sense_isbusy != 0);
14083 
14084 	un->un_sense_isbusy = 0;
14085 	bp = sense_xp->xb_sense_bp;
14086 	sense_xp->xb_sense_bp = NULL;
14087 
14088 	/* This pkt is no longer interested in getting sense data */
14089 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
14090 
14091 	return (bp);
14092 }
14093 
14094 
14095 
14096 /*
14097  *    Function: sd_alloc_rqs
14098  *
14099  * Description: Set up the unit to receive auto request sense data
14100  *
14101  * Return Code: DDI_SUCCESS or DDI_FAILURE
14102  *
14103  *     Context: Called under attach(9E) context
14104  */
14105 
14106 static int
14107 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
14108 {
14109 	struct sd_xbuf *xp;
14110 
14111 	ASSERT(un != NULL);
14112 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14113 	ASSERT(un->un_rqs_bp == NULL);
14114 	ASSERT(un->un_rqs_pktp == NULL);
14115 
14116 	/*
14117 	 * First allocate the required buf and scsi_pkt structs, then set up
14118 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
14119 	 */
14120 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
14121 	    SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
14122 	if (un->un_rqs_bp == NULL) {
14123 		return (DDI_FAILURE);
14124 	}
14125 
14126 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
14127 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
14128 
14129 	if (un->un_rqs_pktp == NULL) {
14130 		sd_free_rqs(un);
14131 		return (DDI_FAILURE);
14132 	}
14133 
14134 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
14135 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
14136 	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
14137 
14138 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
14139 
14140 	/* Set up the other needed members in the ARQ scsi_pkt. */
14141 	un->un_rqs_pktp->pkt_comp   = sdintr;
14142 	un->un_rqs_pktp->pkt_time   = sd_io_time;
14143 	un->un_rqs_pktp->pkt_flags |=
14144 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
14145 
14146 	/*
14147 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
14148 	 * provide any intpkt, destroypkt routines as we take care of
14149 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
14150 	 */
14151 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14152 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
14153 	xp->xb_pktp = un->un_rqs_pktp;
14154 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
14155 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
14156 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
14157 
14158 	/*
14159 	 * Save the pointer to the request sense private bp so it can
14160 	 * be retrieved in sdintr.
14161 	 */
14162 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
14163 	ASSERT(un->un_rqs_bp->b_private == xp);
14164 
14165 	/*
14166 	 * See if the HBA supports auto-request sense for the specified
14167 	 * target/lun. If it does, then try to enable it (if not already
14168 	 * enabled).
14169 	 *
14170 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
14171 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
14172 	 * return success.  However, in both of these cases ARQ is always
14173 	 * enabled and scsi_ifgetcap will always return true. The best approach
14174 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
14175 	 *
14176 	 * The 3rd case is the HBA (adp) always return enabled on
14177 	 * scsi_ifgetgetcap even when it's not enable, the best approach
14178 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
14179 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
14180 	 */
14181 
14182 	if (un->un_f_is_fibre == TRUE) {
14183 		un->un_f_arq_enabled = TRUE;
14184 	} else {
14185 #if defined(__i386) || defined(__amd64)
14186 		/*
14187 		 * Circumvent the Adaptec bug, remove this code when
14188 		 * the bug is fixed
14189 		 */
14190 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
14191 #endif
14192 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
14193 		case 0:
14194 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
14195 				"sd_alloc_rqs: HBA supports ARQ\n");
14196 			/*
14197 			 * ARQ is supported by this HBA but currently is not
14198 			 * enabled. Attempt to enable it and if successful then
14199 			 * mark this instance as ARQ enabled.
14200 			 */
14201 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
14202 				== 1) {
14203 				/* Successfully enabled ARQ in the HBA */
14204 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
14205 					"sd_alloc_rqs: ARQ enabled\n");
14206 				un->un_f_arq_enabled = TRUE;
14207 			} else {
14208 				/* Could not enable ARQ in the HBA */
14209 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
14210 				"sd_alloc_rqs: failed ARQ enable\n");
14211 				un->un_f_arq_enabled = FALSE;
14212 			}
14213 			break;
14214 		case 1:
14215 			/*
14216 			 * ARQ is supported by this HBA and is already enabled.
14217 			 * Just mark ARQ as enabled for this instance.
14218 			 */
14219 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
14220 				"sd_alloc_rqs: ARQ already enabled\n");
14221 			un->un_f_arq_enabled = TRUE;
14222 			break;
14223 		default:
14224 			/*
14225 			 * ARQ is not supported by this HBA; disable it for this
14226 			 * instance.
14227 			 */
14228 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
14229 				"sd_alloc_rqs: HBA does not support ARQ\n");
14230 			un->un_f_arq_enabled = FALSE;
14231 			break;
14232 		}
14233 	}
14234 
14235 	return (DDI_SUCCESS);
14236 }
14237 
14238 
14239 /*
14240  *    Function: sd_free_rqs
14241  *
14242  * Description: Cleanup for the pre-instance RQS command.
14243  *
14244  *     Context: Kernel thread context
14245  */
14246 
14247 static void
14248 sd_free_rqs(struct sd_lun *un)
14249 {
14250 	ASSERT(un != NULL);
14251 
14252 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
14253 
14254 	/*
14255 	 * If consistent memory is bound to a scsi_pkt, the pkt
14256 	 * has to be destroyed *before* freeing the consistent memory.
14257 	 * Don't change the sequence of this operations.
14258 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
14259 	 * after it was freed in scsi_free_consistent_buf().
14260 	 */
14261 	if (un->un_rqs_pktp != NULL) {
14262 		scsi_destroy_pkt(un->un_rqs_pktp);
14263 		un->un_rqs_pktp = NULL;
14264 	}
14265 
14266 	if (un->un_rqs_bp != NULL) {
14267 		kmem_free(SD_GET_XBUF(un->un_rqs_bp), sizeof (struct sd_xbuf));
14268 		scsi_free_consistent_buf(un->un_rqs_bp);
14269 		un->un_rqs_bp = NULL;
14270 	}
14271 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
14272 }
14273 
14274 
14275 
14276 /*
14277  *    Function: sd_reduce_throttle
14278  *
14279  * Description: Reduces the maximun # of outstanding commands on a
14280  *		target to the current number of outstanding commands.
14281  *		Queues a tiemout(9F) callback to restore the limit
14282  *		after a specified interval has elapsed.
14283  *		Typically used when we get a TRAN_BUSY return code
14284  *		back from scsi_transport().
14285  *
14286  *   Arguments: un - ptr to the sd_lun softstate struct
14287  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
14288  *
14289  *     Context: May be called from interrupt context
14290  */
14291 
14292 static void
14293 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
14294 {
14295 	ASSERT(un != NULL);
14296 	ASSERT(mutex_owned(SD_MUTEX(un)));
14297 	ASSERT(un->un_ncmds_in_transport >= 0);
14298 
14299 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
14300 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
14301 	    un, un->un_throttle, un->un_ncmds_in_transport);
14302 
14303 	if (un->un_throttle > 1) {
14304 		if (un->un_f_use_adaptive_throttle == TRUE) {
14305 			switch (throttle_type) {
14306 			case SD_THROTTLE_TRAN_BUSY:
14307 				if (un->un_busy_throttle == 0) {
14308 					un->un_busy_throttle = un->un_throttle;
14309 				}
14310 				break;
14311 			case SD_THROTTLE_QFULL:
14312 				un->un_busy_throttle = 0;
14313 				break;
14314 			default:
14315 				ASSERT(FALSE);
14316 			}
14317 
14318 			if (un->un_ncmds_in_transport > 0) {
14319 			    un->un_throttle = un->un_ncmds_in_transport;
14320 			}
14321 
14322 		} else {
14323 			if (un->un_ncmds_in_transport == 0) {
14324 				un->un_throttle = 1;
14325 			} else {
14326 				un->un_throttle = un->un_ncmds_in_transport;
14327 			}
14328 		}
14329 	}
14330 
14331 	/* Reschedule the timeout if none is currently active */
14332 	if (un->un_reset_throttle_timeid == NULL) {
14333 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
14334 		    un, SD_THROTTLE_RESET_INTERVAL);
14335 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14336 		    "sd_reduce_throttle: timeout scheduled!\n");
14337 	}
14338 
14339 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
14340 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
14341 }
14342 
14343 
14344 
14345 /*
14346  *    Function: sd_restore_throttle
14347  *
14348  * Description: Callback function for timeout(9F).  Resets the current
14349  *		value of un->un_throttle to its default.
14350  *
14351  *   Arguments: arg - pointer to associated softstate for the device.
14352  *
14353  *     Context: May be called from interrupt context
14354  */
14355 
14356 static void
14357 sd_restore_throttle(void *arg)
14358 {
14359 	struct sd_lun	*un = arg;
14360 
14361 	ASSERT(un != NULL);
14362 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14363 
14364 	mutex_enter(SD_MUTEX(un));
14365 
14366 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
14367 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
14368 
14369 	un->un_reset_throttle_timeid = NULL;
14370 
14371 	if (un->un_f_use_adaptive_throttle == TRUE) {
14372 		/*
14373 		 * If un_busy_throttle is nonzero, then it contains the
14374 		 * value that un_throttle was when we got a TRAN_BUSY back
14375 		 * from scsi_transport(). We want to revert back to this
14376 		 * value.
14377 		 *
14378 		 * In the QFULL case, the throttle limit will incrementally
14379 		 * increase until it reaches max throttle.
14380 		 */
14381 		if (un->un_busy_throttle > 0) {
14382 			un->un_throttle = un->un_busy_throttle;
14383 			un->un_busy_throttle = 0;
14384 		} else {
14385 			/*
14386 			 * increase throttle by 10% open gate slowly, schedule
14387 			 * another restore if saved throttle has not been
14388 			 * reached
14389 			 */
14390 			short throttle;
14391 			if (sd_qfull_throttle_enable) {
14392 				throttle = un->un_throttle +
14393 				    max((un->un_throttle / 10), 1);
14394 				un->un_throttle =
14395 				    (throttle < un->un_saved_throttle) ?
14396 				    throttle : un->un_saved_throttle;
14397 				if (un->un_throttle < un->un_saved_throttle) {
14398 				    un->un_reset_throttle_timeid =
14399 					timeout(sd_restore_throttle,
14400 					un, SD_QFULL_THROTTLE_RESET_INTERVAL);
14401 				}
14402 			}
14403 		}
14404 
14405 		/*
14406 		 * If un_throttle has fallen below the low-water mark, we
14407 		 * restore the maximum value here (and allow it to ratchet
14408 		 * down again if necessary).
14409 		 */
14410 		if (un->un_throttle < un->un_min_throttle) {
14411 			un->un_throttle = un->un_saved_throttle;
14412 		}
14413 	} else {
14414 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
14415 		    "restoring limit from 0x%x to 0x%x\n",
14416 		    un->un_throttle, un->un_saved_throttle);
14417 		un->un_throttle = un->un_saved_throttle;
14418 	}
14419 
14420 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14421 	    "sd_restore_throttle: calling sd_start_cmds!\n");
14422 
14423 	sd_start_cmds(un, NULL);
14424 
14425 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14426 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
14427 	    un, un->un_throttle);
14428 
14429 	mutex_exit(SD_MUTEX(un));
14430 
14431 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
14432 }
14433 
14434 /*
14435  *    Function: sdrunout
14436  *
14437  * Description: Callback routine for scsi_init_pkt when a resource allocation
14438  *		fails.
14439  *
14440  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
14441  *		soft state instance.
14442  *
14443  * Return Code: The scsi_init_pkt routine allows for the callback function to
14444  *		return a 0 indicating the callback should be rescheduled or a 1
14445  *		indicating not to reschedule. This routine always returns 1
14446  *		because the driver always provides a callback function to
14447  *		scsi_init_pkt. This results in a callback always being scheduled
14448  *		(via the scsi_init_pkt callback implementation) if a resource
14449  *		failure occurs.
14450  *
14451  *     Context: This callback function may not block or call routines that block
14452  *
14453  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
14454  *		request persisting at the head of the list which cannot be
14455  *		satisfied even after multiple retries. In the future the driver
14456  *		may implement some time of maximum runout count before failing
14457  *		an I/O.
14458  */
14459 
14460 static int
14461 sdrunout(caddr_t arg)
14462 {
14463 	struct sd_lun	*un = (struct sd_lun *)arg;
14464 
14465 	ASSERT(un != NULL);
14466 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14467 
14468 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
14469 
14470 	mutex_enter(SD_MUTEX(un));
14471 	sd_start_cmds(un, NULL);
14472 	mutex_exit(SD_MUTEX(un));
14473 	/*
14474 	 * This callback routine always returns 1 (i.e. do not reschedule)
14475 	 * because we always specify sdrunout as the callback handler for
14476 	 * scsi_init_pkt inside the call to sd_start_cmds.
14477 	 */
14478 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
14479 	return (1);
14480 }
14481 
14482 
14483 /*
14484  *    Function: sdintr
14485  *
14486  * Description: Completion callback routine for scsi_pkt(9S) structs
14487  *		sent to the HBA driver via scsi_transport(9F).
14488  *
14489  *     Context: Interrupt context
14490  */
14491 
14492 static void
14493 sdintr(struct scsi_pkt *pktp)
14494 {
14495 	struct buf	*bp;
14496 	struct sd_xbuf	*xp;
14497 	struct sd_lun	*un;
14498 
14499 	ASSERT(pktp != NULL);
14500 	bp = (struct buf *)pktp->pkt_private;
14501 	ASSERT(bp != NULL);
14502 	xp = SD_GET_XBUF(bp);
14503 	ASSERT(xp != NULL);
14504 	ASSERT(xp->xb_pktp != NULL);
14505 	un = SD_GET_UN(bp);
14506 	ASSERT(un != NULL);
14507 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14508 
14509 #ifdef SD_FAULT_INJECTION
14510 
14511 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
14512 	/* SD FaultInjection */
14513 	sd_faultinjection(pktp);
14514 
14515 #endif /* SD_FAULT_INJECTION */
14516 
14517 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
14518 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
14519 
14520 	mutex_enter(SD_MUTEX(un));
14521 
14522 	/* Reduce the count of the #commands currently in transport */
14523 	un->un_ncmds_in_transport--;
14524 	ASSERT(un->un_ncmds_in_transport >= 0);
14525 
14526 	/* Increment counter to indicate that the callback routine is active */
14527 	un->un_in_callback++;
14528 
14529 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14530 
14531 #ifdef	SDDEBUG
14532 	if (bp == un->un_retry_bp) {
14533 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
14534 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
14535 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
14536 	}
14537 #endif
14538 
14539 	/*
14540 	 * If pkt_reason is CMD_DEV_GONE, just fail the command
14541 	 */
14542 	if (pktp->pkt_reason == CMD_DEV_GONE) {
14543 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
14544 			    "Device is gone\n");
14545 		sd_return_failed_command(un, bp, EIO);
14546 		goto exit;
14547 	}
14548 
14549 	/*
14550 	 * First see if the pkt has auto-request sense data with it....
14551 	 * Look at the packet state first so we don't take a performance
14552 	 * hit looking at the arq enabled flag unless absolutely necessary.
14553 	 */
14554 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
14555 	    (un->un_f_arq_enabled == TRUE)) {
14556 		/*
14557 		 * The HBA did an auto request sense for this command so check
14558 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
14559 		 * driver command that should not be retried.
14560 		 */
14561 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
14562 			/*
14563 			 * Save the relevant sense info into the xp for the
14564 			 * original cmd.
14565 			 */
14566 			struct scsi_arq_status *asp;
14567 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
14568 			xp->xb_sense_status =
14569 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
14570 			xp->xb_sense_state  = asp->sts_rqpkt_state;
14571 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
14572 			bcopy(&asp->sts_sensedata, xp->xb_sense_data,
14573 			    min(sizeof (struct scsi_extended_sense),
14574 			    SENSE_LENGTH));
14575 
14576 			/* fail the command */
14577 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14578 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
14579 			sd_return_failed_command(un, bp, EIO);
14580 			goto exit;
14581 		}
14582 
14583 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
14584 		/*
14585 		 * We want to either retry or fail this command, so free
14586 		 * the DMA resources here.  If we retry the command then
14587 		 * the DMA resources will be reallocated in sd_start_cmds().
14588 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
14589 		 * causes the *entire* transfer to start over again from the
14590 		 * beginning of the request, even for PARTIAL chunks that
14591 		 * have already transferred successfully.
14592 		 */
14593 		if ((un->un_f_is_fibre == TRUE) &&
14594 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14595 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
14596 			scsi_dmafree(pktp);
14597 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14598 		}
14599 #endif
14600 
14601 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14602 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
14603 
14604 		sd_handle_auto_request_sense(un, bp, xp, pktp);
14605 		goto exit;
14606 	}
14607 
14608 	/* Next see if this is the REQUEST SENSE pkt for the instance */
14609 	if (pktp->pkt_flags & FLAG_SENSING)  {
14610 		/* This pktp is from the unit's REQUEST_SENSE command */
14611 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14612 		    "sdintr: sd_handle_request_sense\n");
14613 		sd_handle_request_sense(un, bp, xp, pktp);
14614 		goto exit;
14615 	}
14616 
14617 	/*
14618 	 * Check to see if the command successfully completed as requested;
14619 	 * this is the most common case (and also the hot performance path).
14620 	 *
14621 	 * Requirements for successful completion are:
14622 	 * pkt_reason is CMD_CMPLT and packet status is status good.
14623 	 * In addition:
14624 	 * - A residual of zero indicates successful completion no matter what
14625 	 *   the command is.
14626 	 * - If the residual is not zero and the command is not a read or
14627 	 *   write, then it's still defined as successful completion. In other
14628 	 *   words, if the command is a read or write the residual must be
14629 	 *   zero for successful completion.
14630 	 * - If the residual is not zero and the command is a read or
14631 	 *   write, and it's a USCSICMD, then it's still defined as
14632 	 *   successful completion.
14633 	 */
14634 	if ((pktp->pkt_reason == CMD_CMPLT) &&
14635 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
14636 
14637 		/*
14638 		 * Since this command is returned with a good status, we
14639 		 * can reset the count for Sonoma failover.
14640 		 */
14641 		un->un_sonoma_failure_count = 0;
14642 
14643 		/*
14644 		 * Return all USCSI commands on good status
14645 		 */
14646 		if (pktp->pkt_resid == 0) {
14647 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14648 			    "sdintr: returning command for resid == 0\n");
14649 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
14650 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
14651 			SD_UPDATE_B_RESID(bp, pktp);
14652 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14653 			    "sdintr: returning command for resid != 0\n");
14654 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
14655 			SD_UPDATE_B_RESID(bp, pktp);
14656 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14657 				"sdintr: returning uscsi command\n");
14658 		} else {
14659 			goto not_successful;
14660 		}
14661 		sd_return_command(un, bp);
14662 
14663 		/*
14664 		 * Decrement counter to indicate that the callback routine
14665 		 * is done.
14666 		 */
14667 		un->un_in_callback--;
14668 		ASSERT(un->un_in_callback >= 0);
14669 		mutex_exit(SD_MUTEX(un));
14670 
14671 		return;
14672 	}
14673 
14674 not_successful:
14675 
14676 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
14677 	/*
14678 	 * The following is based upon knowledge of the underlying transport
14679 	 * and its use of DMA resources.  This code should be removed when
14680 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
14681 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
14682 	 * and sd_start_cmds().
14683 	 *
14684 	 * Free any DMA resources associated with this command if there
14685 	 * is a chance it could be retried or enqueued for later retry.
14686 	 * If we keep the DMA binding then mpxio cannot reissue the
14687 	 * command on another path whenever a path failure occurs.
14688 	 *
14689 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
14690 	 * causes the *entire* transfer to start over again from the
14691 	 * beginning of the request, even for PARTIAL chunks that
14692 	 * have already transferred successfully.
14693 	 *
14694 	 * This is only done for non-uscsi commands (and also skipped for the
14695 	 * driver's internal RQS command). Also just do this for Fibre Channel
14696 	 * devices as these are the only ones that support mpxio.
14697 	 */
14698 	if ((un->un_f_is_fibre == TRUE) &&
14699 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14700 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
14701 		scsi_dmafree(pktp);
14702 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14703 	}
14704 #endif
14705 
14706 	/*
14707 	 * The command did not successfully complete as requested so check
14708 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
14709 	 * driver command that should not be retried so just return. If
14710 	 * FLAG_DIAGNOSE is not set the error will be processed below.
14711 	 */
14712 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
14713 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14714 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
14715 		/*
14716 		 * Issue a request sense if a check condition caused the error
14717 		 * (we handle the auto request sense case above), otherwise
14718 		 * just fail the command.
14719 		 */
14720 		if ((pktp->pkt_reason == CMD_CMPLT) &&
14721 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
14722 			sd_send_request_sense_command(un, bp, pktp);
14723 		} else {
14724 			sd_return_failed_command(un, bp, EIO);
14725 		}
14726 		goto exit;
14727 	}
14728 
14729 	/*
14730 	 * The command did not successfully complete as requested so process
14731 	 * the error, retry, and/or attempt recovery.
14732 	 */
14733 	switch (pktp->pkt_reason) {
14734 	case CMD_CMPLT:
14735 		switch (SD_GET_PKT_STATUS(pktp)) {
14736 		case STATUS_GOOD:
14737 			/*
14738 			 * The command completed successfully with a non-zero
14739 			 * residual
14740 			 */
14741 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14742 			    "sdintr: STATUS_GOOD \n");
14743 			sd_pkt_status_good(un, bp, xp, pktp);
14744 			break;
14745 
14746 		case STATUS_CHECK:
14747 		case STATUS_TERMINATED:
14748 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14749 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
14750 			sd_pkt_status_check_condition(un, bp, xp, pktp);
14751 			break;
14752 
14753 		case STATUS_BUSY:
14754 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14755 			    "sdintr: STATUS_BUSY\n");
14756 			sd_pkt_status_busy(un, bp, xp, pktp);
14757 			break;
14758 
14759 		case STATUS_RESERVATION_CONFLICT:
14760 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14761 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
14762 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
14763 			break;
14764 
14765 		case STATUS_QFULL:
14766 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14767 			    "sdintr: STATUS_QFULL\n");
14768 			sd_pkt_status_qfull(un, bp, xp, pktp);
14769 			break;
14770 
14771 		case STATUS_MET:
14772 		case STATUS_INTERMEDIATE:
14773 		case STATUS_SCSI2:
14774 		case STATUS_INTERMEDIATE_MET:
14775 		case STATUS_ACA_ACTIVE:
14776 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
14777 			    "Unexpected SCSI status received: 0x%x\n",
14778 			    SD_GET_PKT_STATUS(pktp));
14779 			sd_return_failed_command(un, bp, EIO);
14780 			break;
14781 
14782 		default:
14783 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
14784 			    "Invalid SCSI status received: 0x%x\n",
14785 			    SD_GET_PKT_STATUS(pktp));
14786 			sd_return_failed_command(un, bp, EIO);
14787 			break;
14788 
14789 		}
14790 		break;
14791 
14792 	case CMD_INCOMPLETE:
14793 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14794 		    "sdintr:  CMD_INCOMPLETE\n");
14795 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
14796 		break;
14797 	case CMD_TRAN_ERR:
14798 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14799 		    "sdintr: CMD_TRAN_ERR\n");
14800 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
14801 		break;
14802 	case CMD_RESET:
14803 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14804 		    "sdintr: CMD_RESET \n");
14805 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
14806 		break;
14807 	case CMD_ABORTED:
14808 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14809 		    "sdintr: CMD_ABORTED \n");
14810 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
14811 		break;
14812 	case CMD_TIMEOUT:
14813 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14814 		    "sdintr: CMD_TIMEOUT\n");
14815 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
14816 		break;
14817 	case CMD_UNX_BUS_FREE:
14818 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14819 		    "sdintr: CMD_UNX_BUS_FREE \n");
14820 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
14821 		break;
14822 	case CMD_TAG_REJECT:
14823 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14824 		    "sdintr: CMD_TAG_REJECT\n");
14825 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
14826 		break;
14827 	default:
14828 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14829 		    "sdintr: default\n");
14830 		sd_pkt_reason_default(un, bp, xp, pktp);
14831 		break;
14832 	}
14833 
14834 exit:
14835 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
14836 
14837 	/* Decrement counter to indicate that the callback routine is done. */
14838 	un->un_in_callback--;
14839 	ASSERT(un->un_in_callback >= 0);
14840 
14841 	/*
14842 	 * At this point, the pkt has been dispatched, ie, it is either
14843 	 * being re-tried or has been returned to its caller and should
14844 	 * not be referenced.
14845 	 */
14846 
14847 	mutex_exit(SD_MUTEX(un));
14848 }
14849 
14850 
14851 /*
14852  *    Function: sd_print_incomplete_msg
14853  *
14854  * Description: Prints the error message for a CMD_INCOMPLETE error.
14855  *
14856  *   Arguments: un - ptr to associated softstate for the device.
14857  *		bp - ptr to the buf(9S) for the command.
14858  *		arg - message string ptr
14859  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
14860  *			or SD_NO_RETRY_ISSUED.
14861  *
14862  *     Context: May be called under interrupt context
14863  */
14864 
14865 static void
14866 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
14867 {
14868 	struct scsi_pkt	*pktp;
14869 	char	*msgp;
14870 	char	*cmdp = arg;
14871 
14872 	ASSERT(un != NULL);
14873 	ASSERT(mutex_owned(SD_MUTEX(un)));
14874 	ASSERT(bp != NULL);
14875 	ASSERT(arg != NULL);
14876 	pktp = SD_GET_PKTP(bp);
14877 	ASSERT(pktp != NULL);
14878 
14879 	switch (code) {
14880 	case SD_DELAYED_RETRY_ISSUED:
14881 	case SD_IMMEDIATE_RETRY_ISSUED:
14882 		msgp = "retrying";
14883 		break;
14884 	case SD_NO_RETRY_ISSUED:
14885 	default:
14886 		msgp = "giving up";
14887 		break;
14888 	}
14889 
14890 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
14891 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14892 		    "incomplete %s- %s\n", cmdp, msgp);
14893 	}
14894 }
14895 
14896 
14897 
14898 /*
14899  *    Function: sd_pkt_status_good
14900  *
14901  * Description: Processing for a STATUS_GOOD code in pkt_status.
14902  *
14903  *     Context: May be called under interrupt context
14904  */
14905 
14906 static void
14907 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
14908 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
14909 {
14910 	char	*cmdp;
14911 
14912 	ASSERT(un != NULL);
14913 	ASSERT(mutex_owned(SD_MUTEX(un)));
14914 	ASSERT(bp != NULL);
14915 	ASSERT(xp != NULL);
14916 	ASSERT(pktp != NULL);
14917 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
14918 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
14919 	ASSERT(pktp->pkt_resid != 0);
14920 
14921 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
14922 
14923 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
14924 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
14925 	case SCMD_READ:
14926 		cmdp = "read";
14927 		break;
14928 	case SCMD_WRITE:
14929 		cmdp = "write";
14930 		break;
14931 	default:
14932 		SD_UPDATE_B_RESID(bp, pktp);
14933 		sd_return_command(un, bp);
14934 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
14935 		return;
14936 	}
14937 
14938 	/*
14939 	 * See if we can retry the read/write, preferrably immediately.
14940 	 * If retries are exhaused, then sd_retry_command() will update
14941 	 * the b_resid count.
14942 	 */
14943 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
14944 	    cmdp, EIO, (clock_t)0, NULL);
14945 
14946 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
14947 }
14948 
14949 
14950 
14951 
14952 
14953 /*
14954  *    Function: sd_handle_request_sense
14955  *
14956  * Description: Processing for non-auto Request Sense command.
14957  *
14958  *   Arguments: un - ptr to associated softstate
14959  *		sense_bp - ptr to buf(9S) for the RQS command
14960  *		sense_xp - ptr to the sd_xbuf for the RQS command
14961  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
14962  *
14963  *     Context: May be called under interrupt context
14964  */
14965 
14966 static void
14967 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
14968 	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
14969 {
14970 	struct buf	*cmd_bp;	/* buf for the original command */
14971 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
14972 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
14973 
14974 	ASSERT(un != NULL);
14975 	ASSERT(mutex_owned(SD_MUTEX(un)));
14976 	ASSERT(sense_bp != NULL);
14977 	ASSERT(sense_xp != NULL);
14978 	ASSERT(sense_pktp != NULL);
14979 
14980 	/*
14981 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
14982 	 * RQS command and not the original command.
14983 	 */
14984 	ASSERT(sense_pktp == un->un_rqs_pktp);
14985 	ASSERT(sense_bp   == un->un_rqs_bp);
14986 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
14987 	    (FLAG_SENSING | FLAG_HEAD));
14988 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
14989 	    FLAG_SENSING) == FLAG_SENSING);
14990 
14991 	/* These are the bp, xp, and pktp for the original command */
14992 	cmd_bp = sense_xp->xb_sense_bp;
14993 	cmd_xp = SD_GET_XBUF(cmd_bp);
14994 	cmd_pktp = SD_GET_PKTP(cmd_bp);
14995 
14996 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
14997 		/*
14998 		 * The REQUEST SENSE command failed.  Release the REQUEST
14999 		 * SENSE command for re-use, get back the bp for the original
15000 		 * command, and attempt to re-try the original command if
15001 		 * FLAG_DIAGNOSE is not set in the original packet.
15002 		 */
15003 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
15004 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15005 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
15006 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
15007 			    NULL, NULL, EIO, (clock_t)0, NULL);
15008 			return;
15009 		}
15010 	}
15011 
15012 	/*
15013 	 * Save the relevant sense info into the xp for the original cmd.
15014 	 *
15015 	 * Note: if the request sense failed the state info will be zero
15016 	 * as set in sd_mark_rqs_busy()
15017 	 */
15018 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
15019 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
15020 	cmd_xp->xb_sense_resid  = sense_pktp->pkt_resid;
15021 	bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data, SENSE_LENGTH);
15022 
15023 	/*
15024 	 *  Free up the RQS command....
15025 	 *  NOTE:
15026 	 *	Must do this BEFORE calling sd_validate_sense_data!
15027 	 *	sd_validate_sense_data may return the original command in
15028 	 *	which case the pkt will be freed and the flags can no
15029 	 *	longer be touched.
15030 	 *	SD_MUTEX is held through this process until the command
15031 	 *	is dispatched based upon the sense data, so there are
15032 	 *	no race conditions.
15033 	 */
15034 	(void) sd_mark_rqs_idle(un, sense_xp);
15035 
15036 	/*
15037 	 * For a retryable command see if we have valid sense data, if so then
15038 	 * turn it over to sd_decode_sense() to figure out the right course of
15039 	 * action. Just fail a non-retryable command.
15040 	 */
15041 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15042 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp) ==
15043 		    SD_SENSE_DATA_IS_VALID) {
15044 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
15045 		}
15046 	} else {
15047 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
15048 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15049 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
15050 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15051 		sd_return_failed_command(un, cmd_bp, EIO);
15052 	}
15053 }
15054 
15055 
15056 
15057 
15058 /*
15059  *    Function: sd_handle_auto_request_sense
15060  *
15061  * Description: Processing for auto-request sense information.
15062  *
15063  *   Arguments: un - ptr to associated softstate
15064  *		bp - ptr to buf(9S) for the command
15065  *		xp - ptr to the sd_xbuf for the command
15066  *		pktp - ptr to the scsi_pkt(9S) for the command
15067  *
15068  *     Context: May be called under interrupt context
15069  */
15070 
15071 static void
15072 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
15073 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
15074 {
15075 	struct scsi_arq_status *asp;
15076 
15077 	ASSERT(un != NULL);
15078 	ASSERT(mutex_owned(SD_MUTEX(un)));
15079 	ASSERT(bp != NULL);
15080 	ASSERT(xp != NULL);
15081 	ASSERT(pktp != NULL);
15082 	ASSERT(pktp != un->un_rqs_pktp);
15083 	ASSERT(bp   != un->un_rqs_bp);
15084 
15085 	/*
15086 	 * For auto-request sense, we get a scsi_arq_status back from
15087 	 * the HBA, with the sense data in the sts_sensedata member.
15088 	 * The pkt_scbp of the packet points to this scsi_arq_status.
15089 	 */
15090 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
15091 
15092 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
15093 		/*
15094 		 * The auto REQUEST SENSE failed; see if we can re-try
15095 		 * the original command.
15096 		 */
15097 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15098 		    "auto request sense failed (reason=%s)\n",
15099 		    scsi_rname(asp->sts_rqpkt_reason));
15100 
15101 		sd_reset_target(un, pktp);
15102 
15103 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15104 		    NULL, NULL, EIO, (clock_t)0, NULL);
15105 		return;
15106 	}
15107 
15108 	/* Save the relevant sense info into the xp for the original cmd. */
15109 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
15110 	xp->xb_sense_state  = asp->sts_rqpkt_state;
15111 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
15112 	bcopy(&asp->sts_sensedata, xp->xb_sense_data,
15113 	    min(sizeof (struct scsi_extended_sense), SENSE_LENGTH));
15114 
15115 	/*
15116 	 * See if we have valid sense data, if so then turn it over to
15117 	 * sd_decode_sense() to figure out the right course of action.
15118 	 */
15119 	if (sd_validate_sense_data(un, bp, xp) == SD_SENSE_DATA_IS_VALID) {
15120 		sd_decode_sense(un, bp, xp, pktp);
15121 	}
15122 }
15123 
15124 
15125 /*
15126  *    Function: sd_print_sense_failed_msg
15127  *
15128  * Description: Print log message when RQS has failed.
15129  *
15130  *   Arguments: un - ptr to associated softstate
15131  *		bp - ptr to buf(9S) for the command
15132  *		arg - generic message string ptr
15133  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
15134  *			or SD_NO_RETRY_ISSUED
15135  *
15136  *     Context: May be called from interrupt context
15137  */
15138 
15139 static void
15140 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
15141 	int code)
15142 {
15143 	char	*msgp = arg;
15144 
15145 	ASSERT(un != NULL);
15146 	ASSERT(mutex_owned(SD_MUTEX(un)));
15147 	ASSERT(bp != NULL);
15148 
15149 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
15150 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
15151 	}
15152 }
15153 
15154 
15155 /*
15156  *    Function: sd_validate_sense_data
15157  *
15158  * Description: Check the given sense data for validity.
15159  *		If the sense data is not valid, the command will
15160  *		be either failed or retried!
15161  *
15162  * Return Code: SD_SENSE_DATA_IS_INVALID
15163  *		SD_SENSE_DATA_IS_VALID
15164  *
15165  *     Context: May be called from interrupt context
15166  */
15167 
15168 static int
15169 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp)
15170 {
15171 	struct scsi_extended_sense *esp;
15172 	struct	scsi_pkt *pktp;
15173 	size_t	actual_len;
15174 	char	*msgp = NULL;
15175 
15176 	ASSERT(un != NULL);
15177 	ASSERT(mutex_owned(SD_MUTEX(un)));
15178 	ASSERT(bp != NULL);
15179 	ASSERT(bp != un->un_rqs_bp);
15180 	ASSERT(xp != NULL);
15181 
15182 	pktp = SD_GET_PKTP(bp);
15183 	ASSERT(pktp != NULL);
15184 
15185 	/*
15186 	 * Check the status of the RQS command (auto or manual).
15187 	 */
15188 	switch (xp->xb_sense_status & STATUS_MASK) {
15189 	case STATUS_GOOD:
15190 		break;
15191 
15192 	case STATUS_RESERVATION_CONFLICT:
15193 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
15194 		return (SD_SENSE_DATA_IS_INVALID);
15195 
15196 	case STATUS_BUSY:
15197 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15198 		    "Busy Status on REQUEST SENSE\n");
15199 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
15200 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
15201 		return (SD_SENSE_DATA_IS_INVALID);
15202 
15203 	case STATUS_QFULL:
15204 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15205 		    "QFULL Status on REQUEST SENSE\n");
15206 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
15207 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
15208 		return (SD_SENSE_DATA_IS_INVALID);
15209 
15210 	case STATUS_CHECK:
15211 	case STATUS_TERMINATED:
15212 		msgp = "Check Condition on REQUEST SENSE\n";
15213 		goto sense_failed;
15214 
15215 	default:
15216 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
15217 		goto sense_failed;
15218 	}
15219 
15220 	/*
15221 	 * See if we got the minimum required amount of sense data.
15222 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
15223 	 * or less.
15224 	 */
15225 	actual_len = (int)(SENSE_LENGTH - xp->xb_sense_resid);
15226 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
15227 	    (actual_len == 0)) {
15228 		msgp = "Request Sense couldn't get sense data\n";
15229 		goto sense_failed;
15230 	}
15231 
15232 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
15233 		msgp = "Not enough sense information\n";
15234 		goto sense_failed;
15235 	}
15236 
15237 	/*
15238 	 * We require the extended sense data
15239 	 */
15240 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
15241 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
15242 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
15243 			static char tmp[8];
15244 			static char buf[148];
15245 			char *p = (char *)(xp->xb_sense_data);
15246 			int i;
15247 
15248 			mutex_enter(&sd_sense_mutex);
15249 			(void) strcpy(buf, "undecodable sense information:");
15250 			for (i = 0; i < actual_len; i++) {
15251 				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
15252 				(void) strcpy(&buf[strlen(buf)], tmp);
15253 			}
15254 			i = strlen(buf);
15255 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
15256 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, buf);
15257 			mutex_exit(&sd_sense_mutex);
15258 		}
15259 		/* Note: Legacy behavior, fail the command with no retry */
15260 		sd_return_failed_command(un, bp, EIO);
15261 		return (SD_SENSE_DATA_IS_INVALID);
15262 	}
15263 
15264 	/*
15265 	 * Check that es_code is valid (es_class concatenated with es_code
15266 	 * make up the "response code" field.  es_class will always be 7, so
15267 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
15268 	 * format.
15269 	 */
15270 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
15271 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
15272 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
15273 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
15274 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
15275 		goto sense_failed;
15276 	}
15277 
15278 	return (SD_SENSE_DATA_IS_VALID);
15279 
15280 sense_failed:
15281 	/*
15282 	 * If the request sense failed (for whatever reason), attempt
15283 	 * to retry the original command.
15284 	 */
15285 #if defined(__i386) || defined(__amd64)
15286 	/*
15287 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
15288 	 * sddef.h for Sparc platform, and x86 uses 1 binary
15289 	 * for both SCSI/FC.
15290 	 * The SD_RETRY_DELAY value need to be adjusted here
15291 	 * when SD_RETRY_DELAY change in sddef.h
15292 	 */
15293 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15294 	    sd_print_sense_failed_msg, msgp, EIO,
15295 		un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
15296 #else
15297 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15298 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
15299 #endif
15300 
15301 	return (SD_SENSE_DATA_IS_INVALID);
15302 }
15303 
15304 
15305 
15306 /*
15307  *    Function: sd_decode_sense
15308  *
15309  * Description: Take recovery action(s) when SCSI Sense Data is received.
15310  *
15311  *     Context: Interrupt context.
15312  */
15313 
15314 static void
15315 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
15316 	struct scsi_pkt *pktp)
15317 {
15318 	uint8_t sense_key;
15319 
15320 	ASSERT(un != NULL);
15321 	ASSERT(mutex_owned(SD_MUTEX(un)));
15322 	ASSERT(bp != NULL);
15323 	ASSERT(bp != un->un_rqs_bp);
15324 	ASSERT(xp != NULL);
15325 	ASSERT(pktp != NULL);
15326 
15327 	sense_key = scsi_sense_key(xp->xb_sense_data);
15328 
15329 	switch (sense_key) {
15330 	case KEY_NO_SENSE:
15331 		sd_sense_key_no_sense(un, bp, xp, pktp);
15332 		break;
15333 	case KEY_RECOVERABLE_ERROR:
15334 		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
15335 		    bp, xp, pktp);
15336 		break;
15337 	case KEY_NOT_READY:
15338 		sd_sense_key_not_ready(un, xp->xb_sense_data,
15339 		    bp, xp, pktp);
15340 		break;
15341 	case KEY_MEDIUM_ERROR:
15342 	case KEY_HARDWARE_ERROR:
15343 		sd_sense_key_medium_or_hardware_error(un,
15344 		    xp->xb_sense_data, bp, xp, pktp);
15345 		break;
15346 	case KEY_ILLEGAL_REQUEST:
15347 		sd_sense_key_illegal_request(un, bp, xp, pktp);
15348 		break;
15349 	case KEY_UNIT_ATTENTION:
15350 		sd_sense_key_unit_attention(un, xp->xb_sense_data,
15351 		    bp, xp, pktp);
15352 		break;
15353 	case KEY_WRITE_PROTECT:
15354 	case KEY_VOLUME_OVERFLOW:
15355 	case KEY_MISCOMPARE:
15356 		sd_sense_key_fail_command(un, bp, xp, pktp);
15357 		break;
15358 	case KEY_BLANK_CHECK:
15359 		sd_sense_key_blank_check(un, bp, xp, pktp);
15360 		break;
15361 	case KEY_ABORTED_COMMAND:
15362 		sd_sense_key_aborted_command(un, bp, xp, pktp);
15363 		break;
15364 	case KEY_VENDOR_UNIQUE:
15365 	case KEY_COPY_ABORTED:
15366 	case KEY_EQUAL:
15367 	case KEY_RESERVED:
15368 	default:
15369 		sd_sense_key_default(un, xp->xb_sense_data,
15370 		    bp, xp, pktp);
15371 		break;
15372 	}
15373 }
15374 
15375 
15376 /*
15377  *    Function: sd_dump_memory
15378  *
15379  * Description: Debug logging routine to print the contents of a user provided
15380  *		buffer. The output of the buffer is broken up into 256 byte
15381  *		segments due to a size constraint of the scsi_log.
15382  *		implementation.
15383  *
15384  *   Arguments: un - ptr to softstate
15385  *		comp - component mask
15386  *		title - "title" string to preceed data when printed
15387  *		data - ptr to data block to be printed
15388  *		len - size of data block to be printed
15389  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
15390  *
15391  *     Context: May be called from interrupt context
15392  */
15393 
15394 #define	SD_DUMP_MEMORY_BUF_SIZE	256
15395 
15396 static char *sd_dump_format_string[] = {
15397 		" 0x%02x",
15398 		" %c"
15399 };
15400 
15401 static void
15402 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
15403     int len, int fmt)
15404 {
15405 	int	i, j;
15406 	int	avail_count;
15407 	int	start_offset;
15408 	int	end_offset;
15409 	size_t	entry_len;
15410 	char	*bufp;
15411 	char	*local_buf;
15412 	char	*format_string;
15413 
15414 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
15415 
15416 	/*
15417 	 * In the debug version of the driver, this function is called from a
15418 	 * number of places which are NOPs in the release driver.
15419 	 * The debug driver therefore has additional methods of filtering
15420 	 * debug output.
15421 	 */
15422 #ifdef SDDEBUG
15423 	/*
15424 	 * In the debug version of the driver we can reduce the amount of debug
15425 	 * messages by setting sd_error_level to something other than
15426 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
15427 	 * sd_component_mask.
15428 	 */
15429 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
15430 	    (sd_error_level != SCSI_ERR_ALL)) {
15431 		return;
15432 	}
15433 	if (((sd_component_mask & comp) == 0) ||
15434 	    (sd_error_level != SCSI_ERR_ALL)) {
15435 		return;
15436 	}
15437 #else
15438 	if (sd_error_level != SCSI_ERR_ALL) {
15439 		return;
15440 	}
15441 #endif
15442 
15443 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
15444 	bufp = local_buf;
15445 	/*
15446 	 * Available length is the length of local_buf[], minus the
15447 	 * length of the title string, minus one for the ":", minus
15448 	 * one for the newline, minus one for the NULL terminator.
15449 	 * This gives the #bytes available for holding the printed
15450 	 * values from the given data buffer.
15451 	 */
15452 	if (fmt == SD_LOG_HEX) {
15453 		format_string = sd_dump_format_string[0];
15454 	} else /* SD_LOG_CHAR */ {
15455 		format_string = sd_dump_format_string[1];
15456 	}
15457 	/*
15458 	 * Available count is the number of elements from the given
15459 	 * data buffer that we can fit into the available length.
15460 	 * This is based upon the size of the format string used.
15461 	 * Make one entry and find it's size.
15462 	 */
15463 	(void) sprintf(bufp, format_string, data[0]);
15464 	entry_len = strlen(bufp);
15465 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
15466 
15467 	j = 0;
15468 	while (j < len) {
15469 		bufp = local_buf;
15470 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
15471 		start_offset = j;
15472 
15473 		end_offset = start_offset + avail_count;
15474 
15475 		(void) sprintf(bufp, "%s:", title);
15476 		bufp += strlen(bufp);
15477 		for (i = start_offset; ((i < end_offset) && (j < len));
15478 		    i++, j++) {
15479 			(void) sprintf(bufp, format_string, data[i]);
15480 			bufp += entry_len;
15481 		}
15482 		(void) sprintf(bufp, "\n");
15483 
15484 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
15485 	}
15486 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
15487 }
15488 
15489 /*
15490  *    Function: sd_print_sense_msg
15491  *
15492  * Description: Log a message based upon the given sense data.
15493  *
15494  *   Arguments: un - ptr to associated softstate
15495  *		bp - ptr to buf(9S) for the command
15496  *		arg - ptr to associate sd_sense_info struct
15497  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
15498  *			or SD_NO_RETRY_ISSUED
15499  *
15500  *     Context: May be called from interrupt context
15501  */
15502 
15503 static void
15504 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
15505 {
15506 	struct sd_xbuf	*xp;
15507 	struct scsi_pkt	*pktp;
15508 	uint8_t *sensep;
15509 	daddr_t request_blkno;
15510 	diskaddr_t err_blkno;
15511 	int severity;
15512 	int pfa_flag;
15513 	extern struct scsi_key_strings scsi_cmds[];
15514 
15515 	ASSERT(un != NULL);
15516 	ASSERT(mutex_owned(SD_MUTEX(un)));
15517 	ASSERT(bp != NULL);
15518 	xp = SD_GET_XBUF(bp);
15519 	ASSERT(xp != NULL);
15520 	pktp = SD_GET_PKTP(bp);
15521 	ASSERT(pktp != NULL);
15522 	ASSERT(arg != NULL);
15523 
15524 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
15525 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
15526 
15527 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
15528 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
15529 		severity = SCSI_ERR_RETRYABLE;
15530 	}
15531 
15532 	/* Use absolute block number for the request block number */
15533 	request_blkno = xp->xb_blkno;
15534 
15535 	/*
15536 	 * Now try to get the error block number from the sense data
15537 	 */
15538 	sensep = xp->xb_sense_data;
15539 
15540 	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
15541 		(uint64_t *)&err_blkno)) {
15542 		/*
15543 		 * We retrieved the error block number from the information
15544 		 * portion of the sense data.
15545 		 *
15546 		 * For USCSI commands we are better off using the error
15547 		 * block no. as the requested block no. (This is the best
15548 		 * we can estimate.)
15549 		 */
15550 		if ((SD_IS_BUFIO(xp) == FALSE) &&
15551 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
15552 			request_blkno = err_blkno;
15553 		}
15554 	} else {
15555 		/*
15556 		 * Without the es_valid bit set (for fixed format) or an
15557 		 * information descriptor (for descriptor format) we cannot
15558 		 * be certain of the error blkno, so just use the
15559 		 * request_blkno.
15560 		 */
15561 		err_blkno = (diskaddr_t)request_blkno;
15562 	}
15563 
15564 	/*
15565 	 * The following will log the buffer contents for the release driver
15566 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
15567 	 * level is set to verbose.
15568 	 */
15569 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
15570 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15571 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15572 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
15573 
15574 	if (pfa_flag == FALSE) {
15575 		/* This is normally only set for USCSI */
15576 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
15577 			return;
15578 		}
15579 
15580 		if ((SD_IS_BUFIO(xp) == TRUE) &&
15581 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
15582 		    (severity < sd_error_level))) {
15583 			return;
15584 		}
15585 	}
15586 
15587 	/*
15588 	 * Check for Sonoma Failover and keep a count of how many failed I/O's
15589 	 */
15590 	if ((SD_IS_LSI(un)) &&
15591 	    (scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) &&
15592 	    (scsi_sense_asc(sensep) == 0x94) &&
15593 	    (scsi_sense_ascq(sensep) == 0x01)) {
15594 		un->un_sonoma_failure_count++;
15595 		if (un->un_sonoma_failure_count > 1) {
15596 			return;
15597 		}
15598 	}
15599 
15600 	scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
15601 	    request_blkno, err_blkno, scsi_cmds,
15602 	    (struct scsi_extended_sense *)sensep,
15603 	    un->un_additional_codes, NULL);
15604 }
15605 
15606 /*
15607  *    Function: sd_sense_key_no_sense
15608  *
15609  * Description: Recovery action when sense data was not received.
15610  *
15611  *     Context: May be called from interrupt context
15612  */
15613 
15614 static void
15615 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
15616 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
15617 {
15618 	struct sd_sense_info	si;
15619 
15620 	ASSERT(un != NULL);
15621 	ASSERT(mutex_owned(SD_MUTEX(un)));
15622 	ASSERT(bp != NULL);
15623 	ASSERT(xp != NULL);
15624 	ASSERT(pktp != NULL);
15625 
15626 	si.ssi_severity = SCSI_ERR_FATAL;
15627 	si.ssi_pfa_flag = FALSE;
15628 
15629 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
15630 
15631 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
15632 		&si, EIO, (clock_t)0, NULL);
15633 }
15634 
15635 
15636 /*
15637  *    Function: sd_sense_key_recoverable_error
15638  *
15639  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
15640  *
15641  *     Context: May be called from interrupt context
15642  */
15643 
15644 static void
15645 sd_sense_key_recoverable_error(struct sd_lun *un,
15646 	uint8_t *sense_datap,
15647 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
15648 {
15649 	struct sd_sense_info	si;
15650 	uint8_t asc = scsi_sense_asc(sense_datap);
15651 
15652 	ASSERT(un != NULL);
15653 	ASSERT(mutex_owned(SD_MUTEX(un)));
15654 	ASSERT(bp != NULL);
15655 	ASSERT(xp != NULL);
15656 	ASSERT(pktp != NULL);
15657 
15658 	/*
15659 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
15660 	 */
15661 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
15662 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
15663 		si.ssi_severity = SCSI_ERR_INFO;
15664 		si.ssi_pfa_flag = TRUE;
15665 	} else {
15666 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
15667 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
15668 		si.ssi_severity = SCSI_ERR_RECOVERED;
15669 		si.ssi_pfa_flag = FALSE;
15670 	}
15671 
15672 	if (pktp->pkt_resid == 0) {
15673 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
15674 		sd_return_command(un, bp);
15675 		return;
15676 	}
15677 
15678 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
15679 	    &si, EIO, (clock_t)0, NULL);
15680 }
15681 
15682 
15683 
15684 
15685 /*
15686  *    Function: sd_sense_key_not_ready
15687  *
15688  * Description: Recovery actions for a SCSI "Not Ready" sense key.
15689  *
15690  *     Context: May be called from interrupt context
15691  */
15692 
15693 static void
15694 sd_sense_key_not_ready(struct sd_lun *un,
15695 	uint8_t *sense_datap,
15696 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
15697 {
15698 	struct sd_sense_info	si;
15699 	uint8_t asc = scsi_sense_asc(sense_datap);
15700 	uint8_t ascq = scsi_sense_ascq(sense_datap);
15701 
15702 	ASSERT(un != NULL);
15703 	ASSERT(mutex_owned(SD_MUTEX(un)));
15704 	ASSERT(bp != NULL);
15705 	ASSERT(xp != NULL);
15706 	ASSERT(pktp != NULL);
15707 
15708 	si.ssi_severity = SCSI_ERR_FATAL;
15709 	si.ssi_pfa_flag = FALSE;
15710 
15711 	/*
15712 	 * Update error stats after first NOT READY error. Disks may have
15713 	 * been powered down and may need to be restarted.  For CDROMs,
15714 	 * report NOT READY errors only if media is present.
15715 	 */
15716 	if ((ISCD(un) && (asc == 0x3A)) ||
15717 	    (xp->xb_retry_count > 0)) {
15718 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
15719 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
15720 	}
15721 
15722 	/*
15723 	 * Just fail if the "not ready" retry limit has been reached.
15724 	 */
15725 	if (xp->xb_retry_count >= un->un_notready_retry_count) {
15726 		/* Special check for error message printing for removables. */
15727 		if (un->un_f_has_removable_media && (asc == 0x04) &&
15728 		    (ascq >= 0x04)) {
15729 			si.ssi_severity = SCSI_ERR_ALL;
15730 		}
15731 		goto fail_command;
15732 	}
15733 
15734 	/*
15735 	 * Check the ASC and ASCQ in the sense data as needed, to determine
15736 	 * what to do.
15737 	 */
15738 	switch (asc) {
15739 	case 0x04:	/* LOGICAL UNIT NOT READY */
15740 		/*
15741 		 * disk drives that don't spin up result in a very long delay
15742 		 * in format without warning messages. We will log a message
15743 		 * if the error level is set to verbose.
15744 		 */
15745 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
15746 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15747 			    "logical unit not ready, resetting disk\n");
15748 		}
15749 
15750 		/*
15751 		 * There are different requirements for CDROMs and disks for
15752 		 * the number of retries.  If a CD-ROM is giving this, it is
15753 		 * probably reading TOC and is in the process of getting
15754 		 * ready, so we should keep on trying for a long time to make
15755 		 * sure that all types of media are taken in account (for
15756 		 * some media the drive takes a long time to read TOC).  For
15757 		 * disks we do not want to retry this too many times as this
15758 		 * can cause a long hang in format when the drive refuses to
15759 		 * spin up (a very common failure).
15760 		 */
15761 		switch (ascq) {
15762 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
15763 			/*
15764 			 * Disk drives frequently refuse to spin up which
15765 			 * results in a very long hang in format without
15766 			 * warning messages.
15767 			 *
15768 			 * Note: This code preserves the legacy behavior of
15769 			 * comparing xb_retry_count against zero for fibre
15770 			 * channel targets instead of comparing against the
15771 			 * un_reset_retry_count value.  The reason for this
15772 			 * discrepancy has been so utterly lost beneath the
15773 			 * Sands of Time that even Indiana Jones could not
15774 			 * find it.
15775 			 */
15776 			if (un->un_f_is_fibre == TRUE) {
15777 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
15778 					(xp->xb_retry_count > 0)) &&
15779 					(un->un_startstop_timeid == NULL)) {
15780 					scsi_log(SD_DEVINFO(un), sd_label,
15781 					CE_WARN, "logical unit not ready, "
15782 					"resetting disk\n");
15783 					sd_reset_target(un, pktp);
15784 				}
15785 			} else {
15786 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
15787 					(xp->xb_retry_count >
15788 					un->un_reset_retry_count)) &&
15789 					(un->un_startstop_timeid == NULL)) {
15790 					scsi_log(SD_DEVINFO(un), sd_label,
15791 					CE_WARN, "logical unit not ready, "
15792 					"resetting disk\n");
15793 					sd_reset_target(un, pktp);
15794 				}
15795 			}
15796 			break;
15797 
15798 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
15799 			/*
15800 			 * If the target is in the process of becoming
15801 			 * ready, just proceed with the retry. This can
15802 			 * happen with CD-ROMs that take a long time to
15803 			 * read TOC after a power cycle or reset.
15804 			 */
15805 			goto do_retry;
15806 
15807 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
15808 			break;
15809 
15810 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
15811 			/*
15812 			 * Retries cannot help here so just fail right away.
15813 			 */
15814 			goto fail_command;
15815 
15816 		case 0x88:
15817 			/*
15818 			 * Vendor-unique code for T3/T4: it indicates a
15819 			 * path problem in a mutipathed config, but as far as
15820 			 * the target driver is concerned it equates to a fatal
15821 			 * error, so we should just fail the command right away
15822 			 * (without printing anything to the console). If this
15823 			 * is not a T3/T4, fall thru to the default recovery
15824 			 * action.
15825 			 * T3/T4 is FC only, don't need to check is_fibre
15826 			 */
15827 			if (SD_IS_T3(un) || SD_IS_T4(un)) {
15828 				sd_return_failed_command(un, bp, EIO);
15829 				return;
15830 			}
15831 			/* FALLTHRU */
15832 
15833 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
15834 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
15835 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
15836 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
15837 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
15838 		default:    /* Possible future codes in SCSI spec? */
15839 			/*
15840 			 * For removable-media devices, do not retry if
15841 			 * ASCQ > 2 as these result mostly from USCSI commands
15842 			 * on MMC devices issued to check status of an
15843 			 * operation initiated in immediate mode.  Also for
15844 			 * ASCQ >= 4 do not print console messages as these
15845 			 * mainly represent a user-initiated operation
15846 			 * instead of a system failure.
15847 			 */
15848 			if (un->un_f_has_removable_media) {
15849 				si.ssi_severity = SCSI_ERR_ALL;
15850 				goto fail_command;
15851 			}
15852 			break;
15853 		}
15854 
15855 		/*
15856 		 * As part of our recovery attempt for the NOT READY
15857 		 * condition, we issue a START STOP UNIT command. However
15858 		 * we want to wait for a short delay before attempting this
15859 		 * as there may still be more commands coming back from the
15860 		 * target with the check condition. To do this we use
15861 		 * timeout(9F) to call sd_start_stop_unit_callback() after
15862 		 * the delay interval expires. (sd_start_stop_unit_callback()
15863 		 * dispatches sd_start_stop_unit_task(), which will issue
15864 		 * the actual START STOP UNIT command. The delay interval
15865 		 * is one-half of the delay that we will use to retry the
15866 		 * command that generated the NOT READY condition.
15867 		 *
15868 		 * Note that we could just dispatch sd_start_stop_unit_task()
15869 		 * from here and allow it to sleep for the delay interval,
15870 		 * but then we would be tying up the taskq thread
15871 		 * uncesessarily for the duration of the delay.
15872 		 *
15873 		 * Do not issue the START STOP UNIT if the current command
15874 		 * is already a START STOP UNIT.
15875 		 */
15876 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
15877 			break;
15878 		}
15879 
15880 		/*
15881 		 * Do not schedule the timeout if one is already pending.
15882 		 */
15883 		if (un->un_startstop_timeid != NULL) {
15884 			SD_INFO(SD_LOG_ERROR, un,
15885 			    "sd_sense_key_not_ready: restart already issued to"
15886 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
15887 			    ddi_get_instance(SD_DEVINFO(un)));
15888 			break;
15889 		}
15890 
15891 		/*
15892 		 * Schedule the START STOP UNIT command, then queue the command
15893 		 * for a retry.
15894 		 *
15895 		 * Note: A timeout is not scheduled for this retry because we
15896 		 * want the retry to be serial with the START_STOP_UNIT. The
15897 		 * retry will be started when the START_STOP_UNIT is completed
15898 		 * in sd_start_stop_unit_task.
15899 		 */
15900 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
15901 		    un, SD_BSY_TIMEOUT / 2);
15902 		xp->xb_retry_count++;
15903 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
15904 		return;
15905 
15906 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
15907 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
15908 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15909 			    "unit does not respond to selection\n");
15910 		}
15911 		break;
15912 
15913 	case 0x3A:	/* MEDIUM NOT PRESENT */
15914 		if (sd_error_level >= SCSI_ERR_FATAL) {
15915 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15916 			    "Caddy not inserted in drive\n");
15917 		}
15918 
15919 		sr_ejected(un);
15920 		un->un_mediastate = DKIO_EJECTED;
15921 		/* The state has changed, inform the media watch routines */
15922 		cv_broadcast(&un->un_state_cv);
15923 		/* Just fail if no media is present in the drive. */
15924 		goto fail_command;
15925 
15926 	default:
15927 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
15928 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15929 			    "Unit not Ready. Additional sense code 0x%x\n",
15930 			    asc);
15931 		}
15932 		break;
15933 	}
15934 
15935 do_retry:
15936 
15937 	/*
15938 	 * Retry the command, as some targets may report NOT READY for
15939 	 * several seconds after being reset.
15940 	 */
15941 	xp->xb_retry_count++;
15942 	si.ssi_severity = SCSI_ERR_RETRYABLE;
15943 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
15944 	    &si, EIO, SD_BSY_TIMEOUT, NULL);
15945 
15946 	return;
15947 
15948 fail_command:
15949 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
15950 	sd_return_failed_command(un, bp, EIO);
15951 }
15952 
15953 
15954 
15955 /*
15956  *    Function: sd_sense_key_medium_or_hardware_error
15957  *
15958  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
15959  *		sense key.
15960  *
15961  *     Context: May be called from interrupt context
15962  */
15963 
15964 static void
15965 sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
15966 	uint8_t *sense_datap,
15967 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
15968 {
15969 	struct sd_sense_info	si;
15970 	uint8_t sense_key = scsi_sense_key(sense_datap);
15971 	uint8_t asc = scsi_sense_asc(sense_datap);
15972 
15973 	ASSERT(un != NULL);
15974 	ASSERT(mutex_owned(SD_MUTEX(un)));
15975 	ASSERT(bp != NULL);
15976 	ASSERT(xp != NULL);
15977 	ASSERT(pktp != NULL);
15978 
15979 	si.ssi_severity = SCSI_ERR_FATAL;
15980 	si.ssi_pfa_flag = FALSE;
15981 
15982 	if (sense_key == KEY_MEDIUM_ERROR) {
15983 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
15984 	}
15985 
15986 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
15987 
15988 	if ((un->un_reset_retry_count != 0) &&
15989 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
15990 		mutex_exit(SD_MUTEX(un));
15991 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
15992 		if (un->un_f_allow_bus_device_reset == TRUE) {
15993 
15994 			boolean_t try_resetting_target = B_TRUE;
15995 
15996 			/*
15997 			 * We need to be able to handle specific ASC when we are
15998 			 * handling a KEY_HARDWARE_ERROR. In particular
15999 			 * taking the default action of resetting the target may
16000 			 * not be the appropriate way to attempt recovery.
16001 			 * Resetting a target because of a single LUN failure
16002 			 * victimizes all LUNs on that target.
16003 			 *
16004 			 * This is true for the LSI arrays, if an LSI
16005 			 * array controller returns an ASC of 0x84 (LUN Dead) we
16006 			 * should trust it.
16007 			 */
16008 
16009 			if (sense_key == KEY_HARDWARE_ERROR) {
16010 				switch (asc) {
16011 				case 0x84:
16012 					if (SD_IS_LSI(un)) {
16013 						try_resetting_target = B_FALSE;
16014 					}
16015 					break;
16016 				default:
16017 					break;
16018 				}
16019 			}
16020 
16021 			if (try_resetting_target == B_TRUE) {
16022 				int reset_retval = 0;
16023 				if (un->un_f_lun_reset_enabled == TRUE) {
16024 					SD_TRACE(SD_LOG_IO_CORE, un,
16025 					    "sd_sense_key_medium_or_hardware_"
16026 					    "error: issuing RESET_LUN\n");
16027 					reset_retval =
16028 					    scsi_reset(SD_ADDRESS(un),
16029 					    RESET_LUN);
16030 				}
16031 				if (reset_retval == 0) {
16032 					SD_TRACE(SD_LOG_IO_CORE, un,
16033 					    "sd_sense_key_medium_or_hardware_"
16034 					    "error: issuing RESET_TARGET\n");
16035 					(void) scsi_reset(SD_ADDRESS(un),
16036 					    RESET_TARGET);
16037 				}
16038 			}
16039 		}
16040 		mutex_enter(SD_MUTEX(un));
16041 	}
16042 
16043 	/*
16044 	 * This really ought to be a fatal error, but we will retry anyway
16045 	 * as some drives report this as a spurious error.
16046 	 */
16047 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
16048 	    &si, EIO, (clock_t)0, NULL);
16049 }
16050 
16051 
16052 
16053 /*
16054  *    Function: sd_sense_key_illegal_request
16055  *
16056  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
16057  *
16058  *     Context: May be called from interrupt context
16059  */
16060 
16061 static void
16062 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
16063 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16064 {
16065 	struct sd_sense_info	si;
16066 
16067 	ASSERT(un != NULL);
16068 	ASSERT(mutex_owned(SD_MUTEX(un)));
16069 	ASSERT(bp != NULL);
16070 	ASSERT(xp != NULL);
16071 	ASSERT(pktp != NULL);
16072 
16073 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
16074 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
16075 
16076 	si.ssi_severity = SCSI_ERR_INFO;
16077 	si.ssi_pfa_flag = FALSE;
16078 
16079 	/* Pointless to retry if the target thinks it's an illegal request */
16080 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16081 	sd_return_failed_command(un, bp, EIO);
16082 }
16083 
16084 
16085 
16086 
16087 /*
16088  *    Function: sd_sense_key_unit_attention
16089  *
16090  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
16091  *
16092  *     Context: May be called from interrupt context
16093  */
16094 
16095 static void
16096 sd_sense_key_unit_attention(struct sd_lun *un,
16097 	uint8_t *sense_datap,
16098 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
16099 {
16100 	/*
16101 	 * For UNIT ATTENTION we allow retries for one minute. Devices
16102 	 * like Sonoma can return UNIT ATTENTION close to a minute
16103 	 * under certain conditions.
16104 	 */
16105 	int	retry_check_flag = SD_RETRIES_UA;
16106 	boolean_t	kstat_updated = B_FALSE;
16107 	struct	sd_sense_info		si;
16108 	uint8_t asc = scsi_sense_asc(sense_datap);
16109 
16110 	ASSERT(un != NULL);
16111 	ASSERT(mutex_owned(SD_MUTEX(un)));
16112 	ASSERT(bp != NULL);
16113 	ASSERT(xp != NULL);
16114 	ASSERT(pktp != NULL);
16115 
16116 	si.ssi_severity = SCSI_ERR_INFO;
16117 	si.ssi_pfa_flag = FALSE;
16118 
16119 
16120 	switch (asc) {
16121 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
16122 		if (sd_report_pfa != 0) {
16123 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
16124 			si.ssi_pfa_flag = TRUE;
16125 			retry_check_flag = SD_RETRIES_STANDARD;
16126 			goto do_retry;
16127 		}
16128 
16129 		break;
16130 
16131 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
16132 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
16133 			un->un_resvd_status |=
16134 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
16135 		}
16136 #ifdef _LP64
16137 		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
16138 			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
16139 			    un, KM_NOSLEEP) == 0) {
16140 				/*
16141 				 * If we can't dispatch the task we'll just
16142 				 * live without descriptor sense.  We can
16143 				 * try again on the next "unit attention"
16144 				 */
16145 				SD_ERROR(SD_LOG_ERROR, un,
16146 				    "sd_sense_key_unit_attention: "
16147 				    "Could not dispatch "
16148 				    "sd_reenable_dsense_task\n");
16149 			}
16150 		}
16151 #endif /* _LP64 */
16152 		/* FALLTHRU */
16153 
16154 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
16155 		if (!un->un_f_has_removable_media) {
16156 			break;
16157 		}
16158 
16159 		/*
16160 		 * When we get a unit attention from a removable-media device,
16161 		 * it may be in a state that will take a long time to recover
16162 		 * (e.g., from a reset).  Since we are executing in interrupt
16163 		 * context here, we cannot wait around for the device to come
16164 		 * back. So hand this command off to sd_media_change_task()
16165 		 * for deferred processing under taskq thread context. (Note
16166 		 * that the command still may be failed if a problem is
16167 		 * encountered at a later time.)
16168 		 */
16169 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
16170 		    KM_NOSLEEP) == 0) {
16171 			/*
16172 			 * Cannot dispatch the request so fail the command.
16173 			 */
16174 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
16175 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
16176 			si.ssi_severity = SCSI_ERR_FATAL;
16177 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16178 			sd_return_failed_command(un, bp, EIO);
16179 		}
16180 
16181 		/*
16182 		 * If failed to dispatch sd_media_change_task(), we already
16183 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
16184 		 * we should update kstat later if it encounters an error. So,
16185 		 * we update kstat_updated flag here.
16186 		 */
16187 		kstat_updated = B_TRUE;
16188 
16189 		/*
16190 		 * Either the command has been successfully dispatched to a
16191 		 * task Q for retrying, or the dispatch failed. In either case
16192 		 * do NOT retry again by calling sd_retry_command. This sets up
16193 		 * two retries of the same command and when one completes and
16194 		 * frees the resources the other will access freed memory,
16195 		 * a bad thing.
16196 		 */
16197 		return;
16198 
16199 	default:
16200 		break;
16201 	}
16202 
16203 	/*
16204 	 * Update kstat if we haven't done that.
16205 	 */
16206 	if (!kstat_updated) {
16207 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16208 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
16209 	}
16210 
16211 do_retry:
16212 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
16213 	    EIO, SD_UA_RETRY_DELAY, NULL);
16214 }
16215 
16216 
16217 
16218 /*
16219  *    Function: sd_sense_key_fail_command
16220  *
16221  * Description: Use to fail a command when we don't like the sense key that
16222  *		was returned.
16223  *
16224  *     Context: May be called from interrupt context
16225  */
16226 
16227 static void
16228 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
16229 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16230 {
16231 	struct sd_sense_info	si;
16232 
16233 	ASSERT(un != NULL);
16234 	ASSERT(mutex_owned(SD_MUTEX(un)));
16235 	ASSERT(bp != NULL);
16236 	ASSERT(xp != NULL);
16237 	ASSERT(pktp != NULL);
16238 
16239 	si.ssi_severity = SCSI_ERR_FATAL;
16240 	si.ssi_pfa_flag = FALSE;
16241 
16242 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16243 	sd_return_failed_command(un, bp, EIO);
16244 }
16245 
16246 
16247 
16248 /*
16249  *    Function: sd_sense_key_blank_check
16250  *
16251  * Description: Recovery actions for a SCSI "Blank Check" sense key.
16252  *		Has no monetary connotation.
16253  *
16254  *     Context: May be called from interrupt context
16255  */
16256 
16257 static void
16258 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
16259 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16260 {
16261 	struct sd_sense_info	si;
16262 
16263 	ASSERT(un != NULL);
16264 	ASSERT(mutex_owned(SD_MUTEX(un)));
16265 	ASSERT(bp != NULL);
16266 	ASSERT(xp != NULL);
16267 	ASSERT(pktp != NULL);
16268 
16269 	/*
16270 	 * Blank check is not fatal for removable devices, therefore
16271 	 * it does not require a console message.
16272 	 */
16273 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
16274 	    SCSI_ERR_FATAL;
16275 	si.ssi_pfa_flag = FALSE;
16276 
16277 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16278 	sd_return_failed_command(un, bp, EIO);
16279 }
16280 
16281 
16282 
16283 
16284 /*
16285  *    Function: sd_sense_key_aborted_command
16286  *
16287  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
16288  *
16289  *     Context: May be called from interrupt context
16290  */
16291 
16292 static void
16293 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
16294 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16295 {
16296 	struct sd_sense_info	si;
16297 
16298 	ASSERT(un != NULL);
16299 	ASSERT(mutex_owned(SD_MUTEX(un)));
16300 	ASSERT(bp != NULL);
16301 	ASSERT(xp != NULL);
16302 	ASSERT(pktp != NULL);
16303 
16304 	si.ssi_severity = SCSI_ERR_FATAL;
16305 	si.ssi_pfa_flag = FALSE;
16306 
16307 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16308 
16309 	/*
16310 	 * This really ought to be a fatal error, but we will retry anyway
16311 	 * as some drives report this as a spurious error.
16312 	 */
16313 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
16314 	    &si, EIO, (clock_t)0, NULL);
16315 }
16316 
16317 
16318 
16319 /*
16320  *    Function: sd_sense_key_default
16321  *
16322  * Description: Default recovery action for several SCSI sense keys (basically
16323  *		attempts a retry).
16324  *
16325  *     Context: May be called from interrupt context
16326  */
16327 
16328 static void
16329 sd_sense_key_default(struct sd_lun *un,
16330 	uint8_t *sense_datap,
16331 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
16332 {
16333 	struct sd_sense_info	si;
16334 	uint8_t sense_key = scsi_sense_key(sense_datap);
16335 
16336 	ASSERT(un != NULL);
16337 	ASSERT(mutex_owned(SD_MUTEX(un)));
16338 	ASSERT(bp != NULL);
16339 	ASSERT(xp != NULL);
16340 	ASSERT(pktp != NULL);
16341 
16342 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16343 
16344 	/*
16345 	 * Undecoded sense key.	Attempt retries and hope that will fix
16346 	 * the problem.  Otherwise, we're dead.
16347 	 */
16348 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16349 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16350 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
16351 	}
16352 
16353 	si.ssi_severity = SCSI_ERR_FATAL;
16354 	si.ssi_pfa_flag = FALSE;
16355 
16356 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
16357 	    &si, EIO, (clock_t)0, NULL);
16358 }
16359 
16360 
16361 
16362 /*
16363  *    Function: sd_print_retry_msg
16364  *
16365  * Description: Print a message indicating the retry action being taken.
16366  *
16367  *   Arguments: un - ptr to associated softstate
16368  *		bp - ptr to buf(9S) for the command
16369  *		arg - not used.
16370  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16371  *			or SD_NO_RETRY_ISSUED
16372  *
16373  *     Context: May be called from interrupt context
16374  */
16375 /* ARGSUSED */
16376 static void
16377 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
16378 {
16379 	struct sd_xbuf	*xp;
16380 	struct scsi_pkt *pktp;
16381 	char *reasonp;
16382 	char *msgp;
16383 
16384 	ASSERT(un != NULL);
16385 	ASSERT(mutex_owned(SD_MUTEX(un)));
16386 	ASSERT(bp != NULL);
16387 	pktp = SD_GET_PKTP(bp);
16388 	ASSERT(pktp != NULL);
16389 	xp = SD_GET_XBUF(bp);
16390 	ASSERT(xp != NULL);
16391 
16392 	ASSERT(!mutex_owned(&un->un_pm_mutex));
16393 	mutex_enter(&un->un_pm_mutex);
16394 	if ((un->un_state == SD_STATE_SUSPENDED) ||
16395 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
16396 	    (pktp->pkt_flags & FLAG_SILENT)) {
16397 		mutex_exit(&un->un_pm_mutex);
16398 		goto update_pkt_reason;
16399 	}
16400 	mutex_exit(&un->un_pm_mutex);
16401 
16402 	/*
16403 	 * Suppress messages if they are all the same pkt_reason; with
16404 	 * TQ, many (up to 256) are returned with the same pkt_reason.
16405 	 * If we are in panic, then suppress the retry messages.
16406 	 */
16407 	switch (flag) {
16408 	case SD_NO_RETRY_ISSUED:
16409 		msgp = "giving up";
16410 		break;
16411 	case SD_IMMEDIATE_RETRY_ISSUED:
16412 	case SD_DELAYED_RETRY_ISSUED:
16413 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
16414 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
16415 		    (sd_error_level != SCSI_ERR_ALL))) {
16416 			return;
16417 		}
16418 		msgp = "retrying command";
16419 		break;
16420 	default:
16421 		goto update_pkt_reason;
16422 	}
16423 
16424 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
16425 	    scsi_rname(pktp->pkt_reason));
16426 
16427 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16428 	    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
16429 
16430 update_pkt_reason:
16431 	/*
16432 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
16433 	 * This is to prevent multiple console messages for the same failure
16434 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
16435 	 * when the command is retried successfully because there still may be
16436 	 * more commands coming back with the same value of pktp->pkt_reason.
16437 	 */
16438 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
16439 		un->un_last_pkt_reason = pktp->pkt_reason;
16440 	}
16441 }
16442 
16443 
16444 /*
16445  *    Function: sd_print_cmd_incomplete_msg
16446  *
16447  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
16448  *
16449  *   Arguments: un - ptr to associated softstate
16450  *		bp - ptr to buf(9S) for the command
16451  *		arg - passed to sd_print_retry_msg()
16452  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16453  *			or SD_NO_RETRY_ISSUED
16454  *
16455  *     Context: May be called from interrupt context
16456  */
16457 
16458 static void
16459 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
16460 	int code)
16461 {
16462 	dev_info_t	*dip;
16463 
16464 	ASSERT(un != NULL);
16465 	ASSERT(mutex_owned(SD_MUTEX(un)));
16466 	ASSERT(bp != NULL);
16467 
16468 	switch (code) {
16469 	case SD_NO_RETRY_ISSUED:
16470 		/* Command was failed. Someone turned off this target? */
16471 		if (un->un_state != SD_STATE_OFFLINE) {
16472 			/*
16473 			 * Suppress message if we are detaching and
16474 			 * device has been disconnected
16475 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
16476 			 * private interface and not part of the DDI
16477 			 */
16478 			dip = un->un_sd->sd_dev;
16479 			if (!(DEVI_IS_DETACHING(dip) &&
16480 			    DEVI_IS_DEVICE_REMOVED(dip))) {
16481 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16482 				"disk not responding to selection\n");
16483 			}
16484 			New_state(un, SD_STATE_OFFLINE);
16485 		}
16486 		break;
16487 
16488 	case SD_DELAYED_RETRY_ISSUED:
16489 	case SD_IMMEDIATE_RETRY_ISSUED:
16490 	default:
16491 		/* Command was successfully queued for retry */
16492 		sd_print_retry_msg(un, bp, arg, code);
16493 		break;
16494 	}
16495 }
16496 
16497 
16498 /*
16499  *    Function: sd_pkt_reason_cmd_incomplete
16500  *
16501  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
16502  *
16503  *     Context: May be called from interrupt context
16504  */
16505 
16506 static void
16507 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
16508 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16509 {
16510 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
16511 
16512 	ASSERT(un != NULL);
16513 	ASSERT(mutex_owned(SD_MUTEX(un)));
16514 	ASSERT(bp != NULL);
16515 	ASSERT(xp != NULL);
16516 	ASSERT(pktp != NULL);
16517 
16518 	/* Do not do a reset if selection did not complete */
16519 	/* Note: Should this not just check the bit? */
16520 	if (pktp->pkt_state != STATE_GOT_BUS) {
16521 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
16522 		sd_reset_target(un, pktp);
16523 	}
16524 
16525 	/*
16526 	 * If the target was not successfully selected, then set
16527 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
16528 	 * with the target, and further retries and/or commands are
16529 	 * likely to take a long time.
16530 	 */
16531 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
16532 		flag |= SD_RETRIES_FAILFAST;
16533 	}
16534 
16535 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16536 
16537 	sd_retry_command(un, bp, flag,
16538 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16539 }
16540 
16541 
16542 
16543 /*
16544  *    Function: sd_pkt_reason_cmd_tran_err
16545  *
16546  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
16547  *
16548  *     Context: May be called from interrupt context
16549  */
16550 
16551 static void
16552 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
16553 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16554 {
16555 	ASSERT(un != NULL);
16556 	ASSERT(mutex_owned(SD_MUTEX(un)));
16557 	ASSERT(bp != NULL);
16558 	ASSERT(xp != NULL);
16559 	ASSERT(pktp != NULL);
16560 
16561 	/*
16562 	 * Do not reset if we got a parity error, or if
16563 	 * selection did not complete.
16564 	 */
16565 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16566 	/* Note: Should this not just check the bit for pkt_state? */
16567 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
16568 	    (pktp->pkt_state != STATE_GOT_BUS)) {
16569 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
16570 		sd_reset_target(un, pktp);
16571 	}
16572 
16573 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16574 
16575 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
16576 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16577 }
16578 
16579 
16580 
16581 /*
16582  *    Function: sd_pkt_reason_cmd_reset
16583  *
16584  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
16585  *
16586  *     Context: May be called from interrupt context
16587  */
16588 
16589 static void
16590 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
16591 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16592 {
16593 	ASSERT(un != NULL);
16594 	ASSERT(mutex_owned(SD_MUTEX(un)));
16595 	ASSERT(bp != NULL);
16596 	ASSERT(xp != NULL);
16597 	ASSERT(pktp != NULL);
16598 
16599 	/* The target may still be running the command, so try to reset. */
16600 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16601 	sd_reset_target(un, pktp);
16602 
16603 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16604 
16605 	/*
16606 	 * If pkt_reason is CMD_RESET chances are that this pkt got
16607 	 * reset because another target on this bus caused it. The target
16608 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
16609 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
16610 	 */
16611 
16612 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
16613 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16614 }
16615 
16616 
16617 
16618 
16619 /*
16620  *    Function: sd_pkt_reason_cmd_aborted
16621  *
16622  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
16623  *
16624  *     Context: May be called from interrupt context
16625  */
16626 
16627 static void
16628 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
16629 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16630 {
16631 	ASSERT(un != NULL);
16632 	ASSERT(mutex_owned(SD_MUTEX(un)));
16633 	ASSERT(bp != NULL);
16634 	ASSERT(xp != NULL);
16635 	ASSERT(pktp != NULL);
16636 
16637 	/* The target may still be running the command, so try to reset. */
16638 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16639 	sd_reset_target(un, pktp);
16640 
16641 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16642 
16643 	/*
16644 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
16645 	 * aborted because another target on this bus caused it. The target
16646 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
16647 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
16648 	 */
16649 
16650 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
16651 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16652 }
16653 
16654 
16655 
16656 /*
16657  *    Function: sd_pkt_reason_cmd_timeout
16658  *
16659  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
16660  *
16661  *     Context: May be called from interrupt context
16662  */
16663 
16664 static void
16665 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
16666 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16667 {
16668 	ASSERT(un != NULL);
16669 	ASSERT(mutex_owned(SD_MUTEX(un)));
16670 	ASSERT(bp != NULL);
16671 	ASSERT(xp != NULL);
16672 	ASSERT(pktp != NULL);
16673 
16674 
16675 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16676 	sd_reset_target(un, pktp);
16677 
16678 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16679 
16680 	/*
16681 	 * A command timeout indicates that we could not establish
16682 	 * communication with the target, so set SD_RETRIES_FAILFAST
16683 	 * as further retries/commands are likely to take a long time.
16684 	 */
16685 	sd_retry_command(un, bp,
16686 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
16687 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16688 }
16689 
16690 
16691 
16692 /*
16693  *    Function: sd_pkt_reason_cmd_unx_bus_free
16694  *
16695  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
16696  *
16697  *     Context: May be called from interrupt context
16698  */
16699 
16700 static void
16701 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
16702 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16703 {
16704 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
16705 
16706 	ASSERT(un != NULL);
16707 	ASSERT(mutex_owned(SD_MUTEX(un)));
16708 	ASSERT(bp != NULL);
16709 	ASSERT(xp != NULL);
16710 	ASSERT(pktp != NULL);
16711 
16712 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16713 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16714 
16715 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
16716 	    sd_print_retry_msg : NULL;
16717 
16718 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
16719 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16720 }
16721 
16722 
16723 /*
16724  *    Function: sd_pkt_reason_cmd_tag_reject
16725  *
16726  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
16727  *
16728  *     Context: May be called from interrupt context
16729  */
16730 
16731 static void
16732 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
16733 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16734 {
16735 	ASSERT(un != NULL);
16736 	ASSERT(mutex_owned(SD_MUTEX(un)));
16737 	ASSERT(bp != NULL);
16738 	ASSERT(xp != NULL);
16739 	ASSERT(pktp != NULL);
16740 
16741 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16742 	pktp->pkt_flags = 0;
16743 	un->un_tagflags = 0;
16744 	if (un->un_f_opt_queueing == TRUE) {
16745 		un->un_throttle = min(un->un_throttle, 3);
16746 	} else {
16747 		un->un_throttle = 1;
16748 	}
16749 	mutex_exit(SD_MUTEX(un));
16750 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
16751 	mutex_enter(SD_MUTEX(un));
16752 
16753 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16754 
16755 	/* Legacy behavior not to check retry counts here. */
16756 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
16757 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16758 }
16759 
16760 
16761 /*
16762  *    Function: sd_pkt_reason_default
16763  *
16764  * Description: Default recovery actions for SCSA pkt_reason values that
16765  *		do not have more explicit recovery actions.
16766  *
16767  *     Context: May be called from interrupt context
16768  */
16769 
16770 static void
16771 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
16772 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16773 {
16774 	ASSERT(un != NULL);
16775 	ASSERT(mutex_owned(SD_MUTEX(un)));
16776 	ASSERT(bp != NULL);
16777 	ASSERT(xp != NULL);
16778 	ASSERT(pktp != NULL);
16779 
16780 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16781 	sd_reset_target(un, pktp);
16782 
16783 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16784 
16785 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
16786 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16787 }
16788 
16789 
16790 
16791 /*
16792  *    Function: sd_pkt_status_check_condition
16793  *
16794  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
16795  *
16796  *     Context: May be called from interrupt context
16797  */
16798 
16799 static void
16800 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
16801 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16802 {
16803 	ASSERT(un != NULL);
16804 	ASSERT(mutex_owned(SD_MUTEX(un)));
16805 	ASSERT(bp != NULL);
16806 	ASSERT(xp != NULL);
16807 	ASSERT(pktp != NULL);
16808 
16809 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
16810 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
16811 
16812 	/*
16813 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
16814 	 * command will be retried after the request sense). Otherwise, retry
16815 	 * the command. Note: we are issuing the request sense even though the
16816 	 * retry limit may have been reached for the failed command.
16817 	 */
16818 	if (un->un_f_arq_enabled == FALSE) {
16819 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
16820 		    "no ARQ, sending request sense command\n");
16821 		sd_send_request_sense_command(un, bp, pktp);
16822 	} else {
16823 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
16824 		    "ARQ,retrying request sense command\n");
16825 #if defined(__i386) || defined(__amd64)
16826 		/*
16827 		 * The SD_RETRY_DELAY value need to be adjusted here
16828 		 * when SD_RETRY_DELAY change in sddef.h
16829 		 */
16830 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
16831 			un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
16832 			NULL);
16833 #else
16834 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
16835 		    EIO, SD_RETRY_DELAY, NULL);
16836 #endif
16837 	}
16838 
16839 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
16840 }
16841 
16842 
16843 /*
16844  *    Function: sd_pkt_status_busy
16845  *
16846  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
16847  *
16848  *     Context: May be called from interrupt context
16849  */
16850 
16851 static void
16852 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
16853 	struct scsi_pkt *pktp)
16854 {
16855 	ASSERT(un != NULL);
16856 	ASSERT(mutex_owned(SD_MUTEX(un)));
16857 	ASSERT(bp != NULL);
16858 	ASSERT(xp != NULL);
16859 	ASSERT(pktp != NULL);
16860 
16861 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16862 	    "sd_pkt_status_busy: entry\n");
16863 
16864 	/* If retries are exhausted, just fail the command. */
16865 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
16866 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16867 		    "device busy too long\n");
16868 		sd_return_failed_command(un, bp, EIO);
16869 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16870 		    "sd_pkt_status_busy: exit\n");
16871 		return;
16872 	}
16873 	xp->xb_retry_count++;
16874 
16875 	/*
16876 	 * Try to reset the target. However, we do not want to perform
16877 	 * more than one reset if the device continues to fail. The reset
16878 	 * will be performed when the retry count reaches the reset
16879 	 * threshold.  This threshold should be set such that at least
16880 	 * one retry is issued before the reset is performed.
16881 	 */
16882 	if (xp->xb_retry_count ==
16883 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
16884 		int rval = 0;
16885 		mutex_exit(SD_MUTEX(un));
16886 		if (un->un_f_allow_bus_device_reset == TRUE) {
16887 			/*
16888 			 * First try to reset the LUN; if we cannot then
16889 			 * try to reset the target.
16890 			 */
16891 			if (un->un_f_lun_reset_enabled == TRUE) {
16892 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16893 				    "sd_pkt_status_busy: RESET_LUN\n");
16894 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
16895 			}
16896 			if (rval == 0) {
16897 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16898 				    "sd_pkt_status_busy: RESET_TARGET\n");
16899 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
16900 			}
16901 		}
16902 		if (rval == 0) {
16903 			/*
16904 			 * If the RESET_LUN and/or RESET_TARGET failed,
16905 			 * try RESET_ALL
16906 			 */
16907 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16908 			    "sd_pkt_status_busy: RESET_ALL\n");
16909 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
16910 		}
16911 		mutex_enter(SD_MUTEX(un));
16912 		if (rval == 0) {
16913 			/*
16914 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
16915 			 * At this point we give up & fail the command.
16916 			 */
16917 			sd_return_failed_command(un, bp, EIO);
16918 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16919 			    "sd_pkt_status_busy: exit (failed cmd)\n");
16920 			return;
16921 		}
16922 	}
16923 
16924 	/*
16925 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
16926 	 * we have already checked the retry counts above.
16927 	 */
16928 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
16929 	    EIO, SD_BSY_TIMEOUT, NULL);
16930 
16931 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16932 	    "sd_pkt_status_busy: exit\n");
16933 }
16934 
16935 
16936 /*
16937  *    Function: sd_pkt_status_reservation_conflict
16938  *
16939  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
16940  *		command status.
16941  *
16942  *     Context: May be called from interrupt context
16943  */
16944 
16945 static void
16946 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
16947 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16948 {
16949 	ASSERT(un != NULL);
16950 	ASSERT(mutex_owned(SD_MUTEX(un)));
16951 	ASSERT(bp != NULL);
16952 	ASSERT(xp != NULL);
16953 	ASSERT(pktp != NULL);
16954 
16955 	/*
16956 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
16957 	 * conflict could be due to various reasons like incorrect keys, not
16958 	 * registered or not reserved etc. So, we return EACCES to the caller.
16959 	 */
16960 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
16961 		int cmd = SD_GET_PKT_OPCODE(pktp);
16962 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
16963 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
16964 			sd_return_failed_command(un, bp, EACCES);
16965 			return;
16966 		}
16967 	}
16968 
16969 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
16970 
16971 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
16972 		if (sd_failfast_enable != 0) {
16973 			/* By definition, we must panic here.... */
16974 			sd_panic_for_res_conflict(un);
16975 			/*NOTREACHED*/
16976 		}
16977 		SD_ERROR(SD_LOG_IO, un,
16978 		    "sd_handle_resv_conflict: Disk Reserved\n");
16979 		sd_return_failed_command(un, bp, EACCES);
16980 		return;
16981 	}
16982 
16983 	/*
16984 	 * 1147670: retry only if sd_retry_on_reservation_conflict
16985 	 * property is set (default is 1). Retries will not succeed
16986 	 * on a disk reserved by another initiator. HA systems
16987 	 * may reset this via sd.conf to avoid these retries.
16988 	 *
16989 	 * Note: The legacy return code for this failure is EIO, however EACCES
16990 	 * seems more appropriate for a reservation conflict.
16991 	 */
16992 	if (sd_retry_on_reservation_conflict == 0) {
16993 		SD_ERROR(SD_LOG_IO, un,
16994 		    "sd_handle_resv_conflict: Device Reserved\n");
16995 		sd_return_failed_command(un, bp, EIO);
16996 		return;
16997 	}
16998 
16999 	/*
17000 	 * Retry the command if we can.
17001 	 *
17002 	 * Note: The legacy return code for this failure is EIO, however EACCES
17003 	 * seems more appropriate for a reservation conflict.
17004 	 */
17005 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
17006 	    (clock_t)2, NULL);
17007 }
17008 
17009 
17010 
17011 /*
17012  *    Function: sd_pkt_status_qfull
17013  *
17014  * Description: Handle a QUEUE FULL condition from the target.  This can
17015  *		occur if the HBA does not handle the queue full condition.
17016  *		(Basically this means third-party HBAs as Sun HBAs will
17017  *		handle the queue full condition.)  Note that if there are
17018  *		some commands already in the transport, then the queue full
17019  *		has occurred because the queue for this nexus is actually
17020  *		full. If there are no commands in the transport, then the
17021  *		queue full is resulting from some other initiator or lun
17022  *		consuming all the resources at the target.
17023  *
17024  *     Context: May be called from interrupt context
17025  */
17026 
17027 static void
17028 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
17029 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17030 {
17031 	ASSERT(un != NULL);
17032 	ASSERT(mutex_owned(SD_MUTEX(un)));
17033 	ASSERT(bp != NULL);
17034 	ASSERT(xp != NULL);
17035 	ASSERT(pktp != NULL);
17036 
17037 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17038 	    "sd_pkt_status_qfull: entry\n");
17039 
17040 	/*
17041 	 * Just lower the QFULL throttle and retry the command.  Note that
17042 	 * we do not limit the number of retries here.
17043 	 */
17044 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
17045 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
17046 	    SD_RESTART_TIMEOUT, NULL);
17047 
17048 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17049 	    "sd_pkt_status_qfull: exit\n");
17050 }
17051 
17052 
17053 /*
17054  *    Function: sd_reset_target
17055  *
17056  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
17057  *		RESET_TARGET, or RESET_ALL.
17058  *
17059  *     Context: May be called under interrupt context.
17060  */
17061 
17062 static void
17063 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
17064 {
17065 	int rval = 0;
17066 
17067 	ASSERT(un != NULL);
17068 	ASSERT(mutex_owned(SD_MUTEX(un)));
17069 	ASSERT(pktp != NULL);
17070 
17071 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
17072 
17073 	/*
17074 	 * No need to reset if the transport layer has already done so.
17075 	 */
17076 	if ((pktp->pkt_statistics &
17077 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
17078 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17079 		    "sd_reset_target: no reset\n");
17080 		return;
17081 	}
17082 
17083 	mutex_exit(SD_MUTEX(un));
17084 
17085 	if (un->un_f_allow_bus_device_reset == TRUE) {
17086 		if (un->un_f_lun_reset_enabled == TRUE) {
17087 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17088 			    "sd_reset_target: RESET_LUN\n");
17089 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
17090 		}
17091 		if (rval == 0) {
17092 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17093 			    "sd_reset_target: RESET_TARGET\n");
17094 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
17095 		}
17096 	}
17097 
17098 	if (rval == 0) {
17099 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17100 		    "sd_reset_target: RESET_ALL\n");
17101 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
17102 	}
17103 
17104 	mutex_enter(SD_MUTEX(un));
17105 
17106 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
17107 }
17108 
17109 
17110 /*
17111  *    Function: sd_media_change_task
17112  *
17113  * Description: Recovery action for CDROM to become available.
17114  *
17115  *     Context: Executes in a taskq() thread context
17116  */
17117 
17118 static void
17119 sd_media_change_task(void *arg)
17120 {
17121 	struct	scsi_pkt	*pktp = arg;
17122 	struct	sd_lun		*un;
17123 	struct	buf		*bp;
17124 	struct	sd_xbuf		*xp;
17125 	int	err		= 0;
17126 	int	retry_count	= 0;
17127 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
17128 	struct	sd_sense_info	si;
17129 
17130 	ASSERT(pktp != NULL);
17131 	bp = (struct buf *)pktp->pkt_private;
17132 	ASSERT(bp != NULL);
17133 	xp = SD_GET_XBUF(bp);
17134 	ASSERT(xp != NULL);
17135 	un = SD_GET_UN(bp);
17136 	ASSERT(un != NULL);
17137 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17138 	ASSERT(un->un_f_monitor_media_state);
17139 
17140 	si.ssi_severity = SCSI_ERR_INFO;
17141 	si.ssi_pfa_flag = FALSE;
17142 
17143 	/*
17144 	 * When a reset is issued on a CDROM, it takes a long time to
17145 	 * recover. First few attempts to read capacity and other things
17146 	 * related to handling unit attention fail (with a ASC 0x4 and
17147 	 * ASCQ 0x1). In that case we want to do enough retries and we want
17148 	 * to limit the retries in other cases of genuine failures like
17149 	 * no media in drive.
17150 	 */
17151 	while (retry_count++ < retry_limit) {
17152 		if ((err = sd_handle_mchange(un)) == 0) {
17153 			break;
17154 		}
17155 		if (err == EAGAIN) {
17156 			retry_limit = SD_UNIT_ATTENTION_RETRY;
17157 		}
17158 		/* Sleep for 0.5 sec. & try again */
17159 		delay(drv_usectohz(500000));
17160 	}
17161 
17162 	/*
17163 	 * Dispatch (retry or fail) the original command here,
17164 	 * along with appropriate console messages....
17165 	 *
17166 	 * Must grab the mutex before calling sd_retry_command,
17167 	 * sd_print_sense_msg and sd_return_failed_command.
17168 	 */
17169 	mutex_enter(SD_MUTEX(un));
17170 	if (err != SD_CMD_SUCCESS) {
17171 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17172 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17173 		si.ssi_severity = SCSI_ERR_FATAL;
17174 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17175 		sd_return_failed_command(un, bp, EIO);
17176 	} else {
17177 		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17178 		    &si, EIO, (clock_t)0, NULL);
17179 	}
17180 	mutex_exit(SD_MUTEX(un));
17181 }
17182 
17183 
17184 
17185 /*
17186  *    Function: sd_handle_mchange
17187  *
17188  * Description: Perform geometry validation & other recovery when CDROM
17189  *		has been removed from drive.
17190  *
17191  * Return Code: 0 for success
17192  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
17193  *		sd_send_scsi_READ_CAPACITY()
17194  *
17195  *     Context: Executes in a taskq() thread context
17196  */
17197 
17198 static int
17199 sd_handle_mchange(struct sd_lun *un)
17200 {
17201 	uint64_t	capacity;
17202 	uint32_t	lbasize;
17203 	int		rval;
17204 
17205 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17206 	ASSERT(un->un_f_monitor_media_state);
17207 
17208 	if ((rval = sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
17209 	    SD_PATH_DIRECT_PRIORITY)) != 0) {
17210 		return (rval);
17211 	}
17212 
17213 	mutex_enter(SD_MUTEX(un));
17214 	sd_update_block_info(un, lbasize, capacity);
17215 
17216 	if (un->un_errstats != NULL) {
17217 		struct	sd_errstats *stp =
17218 		    (struct sd_errstats *)un->un_errstats->ks_data;
17219 		stp->sd_capacity.value.ui64 = (uint64_t)
17220 		    ((uint64_t)un->un_blockcount *
17221 		    (uint64_t)un->un_tgt_blocksize);
17222 	}
17223 
17224 
17225 	/*
17226 	 * Check if the media in the device is writable or not
17227 	 */
17228 	sd_check_for_writable_cd(un, SD_PATH_DIRECT_PRIORITY);
17229 
17230 	/*
17231 	 * Note: Maybe let the strategy/partitioning chain worry about getting
17232 	 * valid geometry.
17233 	 */
17234 	mutex_exit(SD_MUTEX(un));
17235 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
17236 
17237 
17238 	if (cmlb_validate(un->un_cmlbhandle, 0,
17239 	    (void *)SD_PATH_DIRECT_PRIORITY) != 0) {
17240 		return (EIO);
17241 	} else {
17242 		if (un->un_f_pkstats_enabled) {
17243 			sd_set_pstats(un);
17244 			SD_TRACE(SD_LOG_IO_PARTITION, un,
17245 			    "sd_handle_mchange: un:0x%p pstats created and "
17246 			    "set\n", un);
17247 		}
17248 	}
17249 
17250 
17251 	/*
17252 	 * Try to lock the door
17253 	 */
17254 	return (sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
17255 	    SD_PATH_DIRECT_PRIORITY));
17256 }
17257 
17258 
17259 /*
17260  *    Function: sd_send_scsi_DOORLOCK
17261  *
17262  * Description: Issue the scsi DOOR LOCK command
17263  *
17264  *   Arguments: un    - pointer to driver soft state (unit) structure for
17265  *			this target.
17266  *		flag  - SD_REMOVAL_ALLOW
17267  *			SD_REMOVAL_PREVENT
17268  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17269  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17270  *			to use the USCSI "direct" chain and bypass the normal
17271  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
17272  *			command is issued as part of an error recovery action.
17273  *
17274  * Return Code: 0   - Success
17275  *		errno return code from sd_send_scsi_cmd()
17276  *
17277  *     Context: Can sleep.
17278  */
17279 
17280 static int
17281 sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag)
17282 {
17283 	union scsi_cdb		cdb;
17284 	struct uscsi_cmd	ucmd_buf;
17285 	struct scsi_extended_sense	sense_buf;
17286 	int			status;
17287 
17288 	ASSERT(un != NULL);
17289 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17290 
17291 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
17292 
17293 	/* already determined doorlock is not supported, fake success */
17294 	if (un->un_f_doorlock_supported == FALSE) {
17295 		return (0);
17296 	}
17297 
17298 	/*
17299 	 * If we are ejecting and see an SD_REMOVAL_PREVENT
17300 	 * ignore the command so we can complete the eject
17301 	 * operation.
17302 	 */
17303 	if (flag == SD_REMOVAL_PREVENT) {
17304 		mutex_enter(SD_MUTEX(un));
17305 		if (un->un_f_ejecting == TRUE) {
17306 			mutex_exit(SD_MUTEX(un));
17307 			return (EAGAIN);
17308 		}
17309 		mutex_exit(SD_MUTEX(un));
17310 	}
17311 
17312 	bzero(&cdb, sizeof (cdb));
17313 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17314 
17315 	cdb.scc_cmd = SCMD_DOORLOCK;
17316 	cdb.cdb_opaque[4] = (uchar_t)flag;
17317 
17318 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17319 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
17320 	ucmd_buf.uscsi_bufaddr	= NULL;
17321 	ucmd_buf.uscsi_buflen	= 0;
17322 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17323 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
17324 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
17325 	ucmd_buf.uscsi_timeout	= 15;
17326 
17327 	SD_TRACE(SD_LOG_IO, un,
17328 	    "sd_send_scsi_DOORLOCK: returning sd_send_scsi_cmd()\n");
17329 
17330 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17331 	    UIO_SYSSPACE, path_flag);
17332 
17333 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
17334 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
17335 	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
17336 		/* fake success and skip subsequent doorlock commands */
17337 		un->un_f_doorlock_supported = FALSE;
17338 		return (0);
17339 	}
17340 
17341 	return (status);
17342 }
17343 
17344 /*
17345  *    Function: sd_send_scsi_READ_CAPACITY
17346  *
17347  * Description: This routine uses the scsi READ CAPACITY command to determine
17348  *		the device capacity in number of blocks and the device native
17349  *		block size. If this function returns a failure, then the
17350  *		values in *capp and *lbap are undefined.  If the capacity
17351  *		returned is 0xffffffff then the lun is too large for a
17352  *		normal READ CAPACITY command and the results of a
17353  *		READ CAPACITY 16 will be used instead.
17354  *
17355  *   Arguments: un   - ptr to soft state struct for the target
17356  *		capp - ptr to unsigned 64-bit variable to receive the
17357  *			capacity value from the command.
17358  *		lbap - ptr to unsigned 32-bit varaible to receive the
17359  *			block size value from the command
17360  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17361  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17362  *			to use the USCSI "direct" chain and bypass the normal
17363  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
17364  *			command is issued as part of an error recovery action.
17365  *
17366  * Return Code: 0   - Success
17367  *		EIO - IO error
17368  *		EACCES - Reservation conflict detected
17369  *		EAGAIN - Device is becoming ready
17370  *		errno return code from sd_send_scsi_cmd()
17371  *
17372  *     Context: Can sleep.  Blocks until command completes.
17373  */
17374 
17375 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
17376 
17377 static int
17378 sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp, uint32_t *lbap,
17379 	int path_flag)
17380 {
17381 	struct	scsi_extended_sense	sense_buf;
17382 	struct	uscsi_cmd	ucmd_buf;
17383 	union	scsi_cdb	cdb;
17384 	uint32_t		*capacity_buf;
17385 	uint64_t		capacity;
17386 	uint32_t		lbasize;
17387 	int			status;
17388 
17389 	ASSERT(un != NULL);
17390 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17391 	ASSERT(capp != NULL);
17392 	ASSERT(lbap != NULL);
17393 
17394 	SD_TRACE(SD_LOG_IO, un,
17395 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
17396 
17397 	/*
17398 	 * First send a READ_CAPACITY command to the target.
17399 	 * (This command is mandatory under SCSI-2.)
17400 	 *
17401 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
17402 	 * Medium Indicator bit is cleared.  The address field must be
17403 	 * zero if the PMI bit is zero.
17404 	 */
17405 	bzero(&cdb, sizeof (cdb));
17406 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17407 
17408 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
17409 
17410 	cdb.scc_cmd = SCMD_READ_CAPACITY;
17411 
17412 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17413 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
17414 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
17415 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
17416 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17417 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
17418 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
17419 	ucmd_buf.uscsi_timeout	= 60;
17420 
17421 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17422 	    UIO_SYSSPACE, path_flag);
17423 
17424 	switch (status) {
17425 	case 0:
17426 		/* Return failure if we did not get valid capacity data. */
17427 		if (ucmd_buf.uscsi_resid != 0) {
17428 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17429 			return (EIO);
17430 		}
17431 
17432 		/*
17433 		 * Read capacity and block size from the READ CAPACITY 10 data.
17434 		 * This data may be adjusted later due to device specific
17435 		 * issues.
17436 		 *
17437 		 * According to the SCSI spec, the READ CAPACITY 10
17438 		 * command returns the following:
17439 		 *
17440 		 *  bytes 0-3: Maximum logical block address available.
17441 		 *		(MSB in byte:0 & LSB in byte:3)
17442 		 *
17443 		 *  bytes 4-7: Block length in bytes
17444 		 *		(MSB in byte:4 & LSB in byte:7)
17445 		 *
17446 		 */
17447 		capacity = BE_32(capacity_buf[0]);
17448 		lbasize = BE_32(capacity_buf[1]);
17449 
17450 		/*
17451 		 * Done with capacity_buf
17452 		 */
17453 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17454 
17455 		/*
17456 		 * if the reported capacity is set to all 0xf's, then
17457 		 * this disk is too large and requires SBC-2 commands.
17458 		 * Reissue the request using READ CAPACITY 16.
17459 		 */
17460 		if (capacity == 0xffffffff) {
17461 			status = sd_send_scsi_READ_CAPACITY_16(un, &capacity,
17462 			    &lbasize, path_flag);
17463 			if (status != 0) {
17464 				return (status);
17465 			}
17466 		}
17467 		break;	/* Success! */
17468 	case EIO:
17469 		switch (ucmd_buf.uscsi_status) {
17470 		case STATUS_RESERVATION_CONFLICT:
17471 			status = EACCES;
17472 			break;
17473 		case STATUS_CHECK:
17474 			/*
17475 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
17476 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
17477 			 */
17478 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
17479 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
17480 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
17481 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17482 				return (EAGAIN);
17483 			}
17484 			break;
17485 		default:
17486 			break;
17487 		}
17488 		/* FALLTHRU */
17489 	default:
17490 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17491 		return (status);
17492 	}
17493 
17494 	/*
17495 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
17496 	 * (2352 and 0 are common) so for these devices always force the value
17497 	 * to 2048 as required by the ATAPI specs.
17498 	 */
17499 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
17500 		lbasize = 2048;
17501 	}
17502 
17503 	/*
17504 	 * Get the maximum LBA value from the READ CAPACITY data.
17505 	 * Here we assume that the Partial Medium Indicator (PMI) bit
17506 	 * was cleared when issuing the command. This means that the LBA
17507 	 * returned from the device is the LBA of the last logical block
17508 	 * on the logical unit.  The actual logical block count will be
17509 	 * this value plus one.
17510 	 *
17511 	 * Currently the capacity is saved in terms of un->un_sys_blocksize,
17512 	 * so scale the capacity value to reflect this.
17513 	 */
17514 	capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
17515 
17516 	/*
17517 	 * Copy the values from the READ CAPACITY command into the space
17518 	 * provided by the caller.
17519 	 */
17520 	*capp = capacity;
17521 	*lbap = lbasize;
17522 
17523 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
17524 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
17525 
17526 	/*
17527 	 * Both the lbasize and capacity from the device must be nonzero,
17528 	 * otherwise we assume that the values are not valid and return
17529 	 * failure to the caller. (4203735)
17530 	 */
17531 	if ((capacity == 0) || (lbasize == 0)) {
17532 		return (EIO);
17533 	}
17534 
17535 	return (0);
17536 }
17537 
17538 /*
17539  *    Function: sd_send_scsi_READ_CAPACITY_16
17540  *
17541  * Description: This routine uses the scsi READ CAPACITY 16 command to
17542  *		determine the device capacity in number of blocks and the
17543  *		device native block size.  If this function returns a failure,
17544  *		then the values in *capp and *lbap are undefined.
17545  *		This routine should always be called by
17546  *		sd_send_scsi_READ_CAPACITY which will appy any device
17547  *		specific adjustments to capacity and lbasize.
17548  *
17549  *   Arguments: un   - ptr to soft state struct for the target
17550  *		capp - ptr to unsigned 64-bit variable to receive the
17551  *			capacity value from the command.
17552  *		lbap - ptr to unsigned 32-bit varaible to receive the
17553  *			block size value from the command
17554  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17555  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17556  *			to use the USCSI "direct" chain and bypass the normal
17557  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
17558  *			this command is issued as part of an error recovery
17559  *			action.
17560  *
17561  * Return Code: 0   - Success
17562  *		EIO - IO error
17563  *		EACCES - Reservation conflict detected
17564  *		EAGAIN - Device is becoming ready
17565  *		errno return code from sd_send_scsi_cmd()
17566  *
17567  *     Context: Can sleep.  Blocks until command completes.
17568  */
17569 
17570 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
17571 
17572 static int
17573 sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
17574 	uint32_t *lbap, int path_flag)
17575 {
17576 	struct	scsi_extended_sense	sense_buf;
17577 	struct	uscsi_cmd	ucmd_buf;
17578 	union	scsi_cdb	cdb;
17579 	uint64_t		*capacity16_buf;
17580 	uint64_t		capacity;
17581 	uint32_t		lbasize;
17582 	int			status;
17583 
17584 	ASSERT(un != NULL);
17585 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17586 	ASSERT(capp != NULL);
17587 	ASSERT(lbap != NULL);
17588 
17589 	SD_TRACE(SD_LOG_IO, un,
17590 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
17591 
17592 	/*
17593 	 * First send a READ_CAPACITY_16 command to the target.
17594 	 *
17595 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
17596 	 * Medium Indicator bit is cleared.  The address field must be
17597 	 * zero if the PMI bit is zero.
17598 	 */
17599 	bzero(&cdb, sizeof (cdb));
17600 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17601 
17602 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
17603 
17604 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17605 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
17606 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
17607 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
17608 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17609 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
17610 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
17611 	ucmd_buf.uscsi_timeout	= 60;
17612 
17613 	/*
17614 	 * Read Capacity (16) is a Service Action In command.  One
17615 	 * command byte (0x9E) is overloaded for multiple operations,
17616 	 * with the second CDB byte specifying the desired operation
17617 	 */
17618 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
17619 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
17620 
17621 	/*
17622 	 * Fill in allocation length field
17623 	 */
17624 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
17625 
17626 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17627 	    UIO_SYSSPACE, path_flag);
17628 
17629 	switch (status) {
17630 	case 0:
17631 		/* Return failure if we did not get valid capacity data. */
17632 		if (ucmd_buf.uscsi_resid > 20) {
17633 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17634 			return (EIO);
17635 		}
17636 
17637 		/*
17638 		 * Read capacity and block size from the READ CAPACITY 10 data.
17639 		 * This data may be adjusted later due to device specific
17640 		 * issues.
17641 		 *
17642 		 * According to the SCSI spec, the READ CAPACITY 10
17643 		 * command returns the following:
17644 		 *
17645 		 *  bytes 0-7: Maximum logical block address available.
17646 		 *		(MSB in byte:0 & LSB in byte:7)
17647 		 *
17648 		 *  bytes 8-11: Block length in bytes
17649 		 *		(MSB in byte:8 & LSB in byte:11)
17650 		 *
17651 		 */
17652 		capacity = BE_64(capacity16_buf[0]);
17653 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
17654 
17655 		/*
17656 		 * Done with capacity16_buf
17657 		 */
17658 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17659 
17660 		/*
17661 		 * if the reported capacity is set to all 0xf's, then
17662 		 * this disk is too large.  This could only happen with
17663 		 * a device that supports LBAs larger than 64 bits which
17664 		 * are not defined by any current T10 standards.
17665 		 */
17666 		if (capacity == 0xffffffffffffffff) {
17667 			return (EIO);
17668 		}
17669 		break;	/* Success! */
17670 	case EIO:
17671 		switch (ucmd_buf.uscsi_status) {
17672 		case STATUS_RESERVATION_CONFLICT:
17673 			status = EACCES;
17674 			break;
17675 		case STATUS_CHECK:
17676 			/*
17677 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
17678 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
17679 			 */
17680 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
17681 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
17682 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
17683 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17684 				return (EAGAIN);
17685 			}
17686 			break;
17687 		default:
17688 			break;
17689 		}
17690 		/* FALLTHRU */
17691 	default:
17692 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17693 		return (status);
17694 	}
17695 
17696 	*capp = capacity;
17697 	*lbap = lbasize;
17698 
17699 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
17700 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
17701 
17702 	return (0);
17703 }
17704 
17705 
17706 /*
17707  *    Function: sd_send_scsi_START_STOP_UNIT
17708  *
17709  * Description: Issue a scsi START STOP UNIT command to the target.
17710  *
17711  *   Arguments: un    - pointer to driver soft state (unit) structure for
17712  *			this target.
17713  *		flag  - SD_TARGET_START
17714  *			SD_TARGET_STOP
17715  *			SD_TARGET_EJECT
17716  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17717  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17718  *			to use the USCSI "direct" chain and bypass the normal
17719  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
17720  *			command is issued as part of an error recovery action.
17721  *
17722  * Return Code: 0   - Success
17723  *		EIO - IO error
17724  *		EACCES - Reservation conflict detected
17725  *		ENXIO  - Not Ready, medium not present
17726  *		errno return code from sd_send_scsi_cmd()
17727  *
17728  *     Context: Can sleep.
17729  */
17730 
17731 static int
17732 sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag, int path_flag)
17733 {
17734 	struct	scsi_extended_sense	sense_buf;
17735 	union scsi_cdb		cdb;
17736 	struct uscsi_cmd	ucmd_buf;
17737 	int			status;
17738 
17739 	ASSERT(un != NULL);
17740 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17741 
17742 	SD_TRACE(SD_LOG_IO, un,
17743 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
17744 
17745 	if (un->un_f_check_start_stop &&
17746 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
17747 	    (un->un_f_start_stop_supported != TRUE)) {
17748 		return (0);
17749 	}
17750 
17751 	/*
17752 	 * If we are performing an eject operation and
17753 	 * we receive any command other than SD_TARGET_EJECT
17754 	 * we should immediately return.
17755 	 */
17756 	if (flag != SD_TARGET_EJECT) {
17757 		mutex_enter(SD_MUTEX(un));
17758 		if (un->un_f_ejecting == TRUE) {
17759 			mutex_exit(SD_MUTEX(un));
17760 			return (EAGAIN);
17761 		}
17762 		mutex_exit(SD_MUTEX(un));
17763 	}
17764 
17765 	bzero(&cdb, sizeof (cdb));
17766 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17767 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
17768 
17769 	cdb.scc_cmd = SCMD_START_STOP;
17770 	cdb.cdb_opaque[4] = (uchar_t)flag;
17771 
17772 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17773 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
17774 	ucmd_buf.uscsi_bufaddr	= NULL;
17775 	ucmd_buf.uscsi_buflen	= 0;
17776 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17777 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
17778 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
17779 	ucmd_buf.uscsi_timeout	= 200;
17780 
17781 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17782 	    UIO_SYSSPACE, path_flag);
17783 
17784 	switch (status) {
17785 	case 0:
17786 		break;	/* Success! */
17787 	case EIO:
17788 		switch (ucmd_buf.uscsi_status) {
17789 		case STATUS_RESERVATION_CONFLICT:
17790 			status = EACCES;
17791 			break;
17792 		case STATUS_CHECK:
17793 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
17794 				switch (scsi_sense_key(
17795 						(uint8_t *)&sense_buf)) {
17796 				case KEY_ILLEGAL_REQUEST:
17797 					status = ENOTSUP;
17798 					break;
17799 				case KEY_NOT_READY:
17800 					if (scsi_sense_asc(
17801 						    (uint8_t *)&sense_buf)
17802 					    == 0x3A) {
17803 						status = ENXIO;
17804 					}
17805 					break;
17806 				default:
17807 					break;
17808 				}
17809 			}
17810 			break;
17811 		default:
17812 			break;
17813 		}
17814 		break;
17815 	default:
17816 		break;
17817 	}
17818 
17819 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
17820 
17821 	return (status);
17822 }
17823 
17824 
17825 /*
17826  *    Function: sd_start_stop_unit_callback
17827  *
17828  * Description: timeout(9F) callback to begin recovery process for a
17829  *		device that has spun down.
17830  *
17831  *   Arguments: arg - pointer to associated softstate struct.
17832  *
17833  *     Context: Executes in a timeout(9F) thread context
17834  */
17835 
17836 static void
17837 sd_start_stop_unit_callback(void *arg)
17838 {
17839 	struct sd_lun	*un = arg;
17840 	ASSERT(un != NULL);
17841 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17842 
17843 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
17844 
17845 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
17846 }
17847 
17848 
17849 /*
17850  *    Function: sd_start_stop_unit_task
17851  *
17852  * Description: Recovery procedure when a drive is spun down.
17853  *
17854  *   Arguments: arg - pointer to associated softstate struct.
17855  *
17856  *     Context: Executes in a taskq() thread context
17857  */
17858 
17859 static void
17860 sd_start_stop_unit_task(void *arg)
17861 {
17862 	struct sd_lun	*un = arg;
17863 
17864 	ASSERT(un != NULL);
17865 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17866 
17867 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
17868 
17869 	/*
17870 	 * Some unformatted drives report not ready error, no need to
17871 	 * restart if format has been initiated.
17872 	 */
17873 	mutex_enter(SD_MUTEX(un));
17874 	if (un->un_f_format_in_progress == TRUE) {
17875 		mutex_exit(SD_MUTEX(un));
17876 		return;
17877 	}
17878 	mutex_exit(SD_MUTEX(un));
17879 
17880 	/*
17881 	 * When a START STOP command is issued from here, it is part of a
17882 	 * failure recovery operation and must be issued before any other
17883 	 * commands, including any pending retries. Thus it must be sent
17884 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
17885 	 * succeeds or not, we will start I/O after the attempt.
17886 	 */
17887 	(void) sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
17888 	    SD_PATH_DIRECT_PRIORITY);
17889 
17890 	/*
17891 	 * The above call blocks until the START_STOP_UNIT command completes.
17892 	 * Now that it has completed, we must re-try the original IO that
17893 	 * received the NOT READY condition in the first place. There are
17894 	 * three possible conditions here:
17895 	 *
17896 	 *  (1) The original IO is on un_retry_bp.
17897 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
17898 	 *	is NULL.
17899 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
17900 	 *	points to some other, unrelated bp.
17901 	 *
17902 	 * For each case, we must call sd_start_cmds() with un_retry_bp
17903 	 * as the argument. If un_retry_bp is NULL, this will initiate
17904 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
17905 	 * then this will process the bp on un_retry_bp. That may or may not
17906 	 * be the original IO, but that does not matter: the important thing
17907 	 * is to keep the IO processing going at this point.
17908 	 *
17909 	 * Note: This is a very specific error recovery sequence associated
17910 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
17911 	 * serialize the I/O with completion of the spin-up.
17912 	 */
17913 	mutex_enter(SD_MUTEX(un));
17914 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17915 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
17916 	    un, un->un_retry_bp);
17917 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
17918 	sd_start_cmds(un, un->un_retry_bp);
17919 	mutex_exit(SD_MUTEX(un));
17920 
17921 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
17922 }
17923 
17924 
17925 /*
17926  *    Function: sd_send_scsi_INQUIRY
17927  *
17928  * Description: Issue the scsi INQUIRY command.
17929  *
17930  *   Arguments: un
17931  *		bufaddr
17932  *		buflen
17933  *		evpd
17934  *		page_code
17935  *		page_length
17936  *
17937  * Return Code: 0   - Success
17938  *		errno return code from sd_send_scsi_cmd()
17939  *
17940  *     Context: Can sleep. Does not return until command is completed.
17941  */
17942 
17943 static int
17944 sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr, size_t buflen,
17945 	uchar_t evpd, uchar_t page_code, size_t *residp)
17946 {
17947 	union scsi_cdb		cdb;
17948 	struct uscsi_cmd	ucmd_buf;
17949 	int			status;
17950 
17951 	ASSERT(un != NULL);
17952 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17953 	ASSERT(bufaddr != NULL);
17954 
17955 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
17956 
17957 	bzero(&cdb, sizeof (cdb));
17958 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17959 	bzero(bufaddr, buflen);
17960 
17961 	cdb.scc_cmd = SCMD_INQUIRY;
17962 	cdb.cdb_opaque[1] = evpd;
17963 	cdb.cdb_opaque[2] = page_code;
17964 	FORMG0COUNT(&cdb, buflen);
17965 
17966 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17967 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
17968 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
17969 	ucmd_buf.uscsi_buflen	= buflen;
17970 	ucmd_buf.uscsi_rqbuf	= NULL;
17971 	ucmd_buf.uscsi_rqlen	= 0;
17972 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
17973 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
17974 
17975 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17976 	    UIO_SYSSPACE, SD_PATH_DIRECT);
17977 
17978 	if ((status == 0) && (residp != NULL)) {
17979 		*residp = ucmd_buf.uscsi_resid;
17980 	}
17981 
17982 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
17983 
17984 	return (status);
17985 }
17986 
17987 
17988 /*
17989  *    Function: sd_send_scsi_TEST_UNIT_READY
17990  *
17991  * Description: Issue the scsi TEST UNIT READY command.
17992  *		This routine can be told to set the flag USCSI_DIAGNOSE to
17993  *		prevent retrying failed commands. Use this when the intent
17994  *		is either to check for device readiness, to clear a Unit
17995  *		Attention, or to clear any outstanding sense data.
17996  *		However under specific conditions the expected behavior
17997  *		is for retries to bring a device ready, so use the flag
17998  *		with caution.
17999  *
18000  *   Arguments: un
18001  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
18002  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
18003  *			0: dont check for media present, do retries on cmd.
18004  *
18005  * Return Code: 0   - Success
18006  *		EIO - IO error
18007  *		EACCES - Reservation conflict detected
18008  *		ENXIO  - Not Ready, medium not present
18009  *		errno return code from sd_send_scsi_cmd()
18010  *
18011  *     Context: Can sleep. Does not return until command is completed.
18012  */
18013 
18014 static int
18015 sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag)
18016 {
18017 	struct	scsi_extended_sense	sense_buf;
18018 	union scsi_cdb		cdb;
18019 	struct uscsi_cmd	ucmd_buf;
18020 	int			status;
18021 
18022 	ASSERT(un != NULL);
18023 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18024 
18025 	SD_TRACE(SD_LOG_IO, un,
18026 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
18027 
18028 	/*
18029 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
18030 	 * timeouts when they receive a TUR and the queue is not empty. Check
18031 	 * the configuration flag set during attach (indicating the drive has
18032 	 * this firmware bug) and un_ncmds_in_transport before issuing the
18033 	 * TUR. If there are
18034 	 * pending commands return success, this is a bit arbitrary but is ok
18035 	 * for non-removables (i.e. the eliteI disks) and non-clustering
18036 	 * configurations.
18037 	 */
18038 	if (un->un_f_cfg_tur_check == TRUE) {
18039 		mutex_enter(SD_MUTEX(un));
18040 		if (un->un_ncmds_in_transport != 0) {
18041 			mutex_exit(SD_MUTEX(un));
18042 			return (0);
18043 		}
18044 		mutex_exit(SD_MUTEX(un));
18045 	}
18046 
18047 	bzero(&cdb, sizeof (cdb));
18048 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18049 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18050 
18051 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
18052 
18053 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18054 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
18055 	ucmd_buf.uscsi_bufaddr	= NULL;
18056 	ucmd_buf.uscsi_buflen	= 0;
18057 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18058 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18059 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
18060 
18061 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
18062 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
18063 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
18064 	}
18065 	ucmd_buf.uscsi_timeout	= 60;
18066 
18067 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18068 	    UIO_SYSSPACE, ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT :
18069 	    SD_PATH_STANDARD));
18070 
18071 	switch (status) {
18072 	case 0:
18073 		break;	/* Success! */
18074 	case EIO:
18075 		switch (ucmd_buf.uscsi_status) {
18076 		case STATUS_RESERVATION_CONFLICT:
18077 			status = EACCES;
18078 			break;
18079 		case STATUS_CHECK:
18080 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
18081 				break;
18082 			}
18083 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18084 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
18085 				KEY_NOT_READY) &&
18086 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
18087 				status = ENXIO;
18088 			}
18089 			break;
18090 		default:
18091 			break;
18092 		}
18093 		break;
18094 	default:
18095 		break;
18096 	}
18097 
18098 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
18099 
18100 	return (status);
18101 }
18102 
18103 
18104 /*
18105  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
18106  *
18107  * Description: Issue the scsi PERSISTENT RESERVE IN command.
18108  *
18109  *   Arguments: un
18110  *
18111  * Return Code: 0   - Success
18112  *		EACCES
18113  *		ENOTSUP
18114  *		errno return code from sd_send_scsi_cmd()
18115  *
18116  *     Context: Can sleep. Does not return until command is completed.
18117  */
18118 
18119 static int
18120 sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un, uchar_t  usr_cmd,
18121 	uint16_t data_len, uchar_t *data_bufp)
18122 {
18123 	struct scsi_extended_sense	sense_buf;
18124 	union scsi_cdb		cdb;
18125 	struct uscsi_cmd	ucmd_buf;
18126 	int			status;
18127 	int			no_caller_buf = FALSE;
18128 
18129 	ASSERT(un != NULL);
18130 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18131 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
18132 
18133 	SD_TRACE(SD_LOG_IO, un,
18134 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
18135 
18136 	bzero(&cdb, sizeof (cdb));
18137 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18138 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18139 	if (data_bufp == NULL) {
18140 		/* Allocate a default buf if the caller did not give one */
18141 		ASSERT(data_len == 0);
18142 		data_len  = MHIOC_RESV_KEY_SIZE;
18143 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
18144 		no_caller_buf = TRUE;
18145 	}
18146 
18147 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
18148 	cdb.cdb_opaque[1] = usr_cmd;
18149 	FORMG1COUNT(&cdb, data_len);
18150 
18151 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18152 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
18153 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
18154 	ucmd_buf.uscsi_buflen	= data_len;
18155 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18156 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18157 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
18158 	ucmd_buf.uscsi_timeout	= 60;
18159 
18160 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18161 	    UIO_SYSSPACE, SD_PATH_STANDARD);
18162 
18163 	switch (status) {
18164 	case 0:
18165 		break;	/* Success! */
18166 	case EIO:
18167 		switch (ucmd_buf.uscsi_status) {
18168 		case STATUS_RESERVATION_CONFLICT:
18169 			status = EACCES;
18170 			break;
18171 		case STATUS_CHECK:
18172 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18173 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
18174 				KEY_ILLEGAL_REQUEST)) {
18175 				status = ENOTSUP;
18176 			}
18177 			break;
18178 		default:
18179 			break;
18180 		}
18181 		break;
18182 	default:
18183 		break;
18184 	}
18185 
18186 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
18187 
18188 	if (no_caller_buf == TRUE) {
18189 		kmem_free(data_bufp, data_len);
18190 	}
18191 
18192 	return (status);
18193 }
18194 
18195 
18196 /*
18197  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
18198  *
18199  * Description: This routine is the driver entry point for handling CD-ROM
18200  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
18201  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
18202  *		device.
18203  *
18204  *   Arguments: un  -   Pointer to soft state struct for the target.
18205  *		usr_cmd SCSI-3 reservation facility command (one of
18206  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
18207  *			SD_SCSI3_PREEMPTANDABORT)
18208  *		usr_bufp - user provided pointer register, reserve descriptor or
18209  *			preempt and abort structure (mhioc_register_t,
18210  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
18211  *
18212  * Return Code: 0   - Success
18213  *		EACCES
18214  *		ENOTSUP
18215  *		errno return code from sd_send_scsi_cmd()
18216  *
18217  *     Context: Can sleep. Does not return until command is completed.
18218  */
18219 
18220 static int
18221 sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un, uchar_t usr_cmd,
18222 	uchar_t	*usr_bufp)
18223 {
18224 	struct scsi_extended_sense	sense_buf;
18225 	union scsi_cdb		cdb;
18226 	struct uscsi_cmd	ucmd_buf;
18227 	int			status;
18228 	uchar_t			data_len = sizeof (sd_prout_t);
18229 	sd_prout_t		*prp;
18230 
18231 	ASSERT(un != NULL);
18232 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18233 	ASSERT(data_len == 24);	/* required by scsi spec */
18234 
18235 	SD_TRACE(SD_LOG_IO, un,
18236 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
18237 
18238 	if (usr_bufp == NULL) {
18239 		return (EINVAL);
18240 	}
18241 
18242 	bzero(&cdb, sizeof (cdb));
18243 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18244 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18245 	prp = kmem_zalloc(data_len, KM_SLEEP);
18246 
18247 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
18248 	cdb.cdb_opaque[1] = usr_cmd;
18249 	FORMG1COUNT(&cdb, data_len);
18250 
18251 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18252 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
18253 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
18254 	ucmd_buf.uscsi_buflen	= data_len;
18255 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18256 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18257 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
18258 	ucmd_buf.uscsi_timeout	= 60;
18259 
18260 	switch (usr_cmd) {
18261 	case SD_SCSI3_REGISTER: {
18262 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
18263 
18264 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
18265 		bcopy(ptr->newkey.key, prp->service_key,
18266 		    MHIOC_RESV_KEY_SIZE);
18267 		prp->aptpl = ptr->aptpl;
18268 		break;
18269 	}
18270 	case SD_SCSI3_RESERVE:
18271 	case SD_SCSI3_RELEASE: {
18272 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
18273 
18274 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
18275 		prp->scope_address = BE_32(ptr->scope_specific_addr);
18276 		cdb.cdb_opaque[2] = ptr->type;
18277 		break;
18278 	}
18279 	case SD_SCSI3_PREEMPTANDABORT: {
18280 		mhioc_preemptandabort_t *ptr =
18281 		    (mhioc_preemptandabort_t *)usr_bufp;
18282 
18283 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
18284 		bcopy(ptr->victim_key.key, prp->service_key,
18285 		    MHIOC_RESV_KEY_SIZE);
18286 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
18287 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
18288 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
18289 		break;
18290 	}
18291 	case SD_SCSI3_REGISTERANDIGNOREKEY:
18292 	{
18293 		mhioc_registerandignorekey_t *ptr;
18294 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
18295 		bcopy(ptr->newkey.key,
18296 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
18297 		prp->aptpl = ptr->aptpl;
18298 		break;
18299 	}
18300 	default:
18301 		ASSERT(FALSE);
18302 		break;
18303 	}
18304 
18305 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18306 	    UIO_SYSSPACE, SD_PATH_STANDARD);
18307 
18308 	switch (status) {
18309 	case 0:
18310 		break;	/* Success! */
18311 	case EIO:
18312 		switch (ucmd_buf.uscsi_status) {
18313 		case STATUS_RESERVATION_CONFLICT:
18314 			status = EACCES;
18315 			break;
18316 		case STATUS_CHECK:
18317 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18318 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
18319 				KEY_ILLEGAL_REQUEST)) {
18320 				status = ENOTSUP;
18321 			}
18322 			break;
18323 		default:
18324 			break;
18325 		}
18326 		break;
18327 	default:
18328 		break;
18329 	}
18330 
18331 	kmem_free(prp, data_len);
18332 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
18333 	return (status);
18334 }
18335 
18336 
18337 /*
18338  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
18339  *
18340  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
18341  *
18342  *   Arguments: un - pointer to the target's soft state struct
18343  *
18344  * Return Code: 0 - success
18345  *		errno-type error code
18346  *
18347  *     Context: kernel thread context only.
18348  */
18349 
18350 static int
18351 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
18352 {
18353 	struct sd_uscsi_info	*uip;
18354 	struct uscsi_cmd	*uscmd;
18355 	union scsi_cdb		*cdb;
18356 	struct buf		*bp;
18357 	int			rval = 0;
18358 
18359 	SD_TRACE(SD_LOG_IO, un,
18360 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
18361 
18362 	ASSERT(un != NULL);
18363 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18364 
18365 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
18366 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
18367 
18368 	/*
18369 	 * First get some memory for the uscsi_cmd struct and cdb
18370 	 * and initialize for SYNCHRONIZE_CACHE cmd.
18371 	 */
18372 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
18373 	uscmd->uscsi_cdblen = CDB_GROUP1;
18374 	uscmd->uscsi_cdb = (caddr_t)cdb;
18375 	uscmd->uscsi_bufaddr = NULL;
18376 	uscmd->uscsi_buflen = 0;
18377 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
18378 	uscmd->uscsi_rqlen = SENSE_LENGTH;
18379 	uscmd->uscsi_rqresid = SENSE_LENGTH;
18380 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
18381 	uscmd->uscsi_timeout = sd_io_time;
18382 
18383 	/*
18384 	 * Allocate an sd_uscsi_info struct and fill it with the info
18385 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
18386 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
18387 	 * since we allocate the buf here in this function, we do not
18388 	 * need to preserve the prior contents of b_private.
18389 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
18390 	 */
18391 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
18392 	uip->ui_flags = SD_PATH_DIRECT;
18393 	uip->ui_cmdp  = uscmd;
18394 
18395 	bp = getrbuf(KM_SLEEP);
18396 	bp->b_private = uip;
18397 
18398 	/*
18399 	 * Setup buffer to carry uscsi request.
18400 	 */
18401 	bp->b_flags  = B_BUSY;
18402 	bp->b_bcount = 0;
18403 	bp->b_blkno  = 0;
18404 
18405 	if (dkc != NULL) {
18406 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
18407 		uip->ui_dkc = *dkc;
18408 	}
18409 
18410 	bp->b_edev = SD_GET_DEV(un);
18411 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
18412 
18413 	(void) sd_uscsi_strategy(bp);
18414 
18415 	/*
18416 	 * If synchronous request, wait for completion
18417 	 * If async just return and let b_iodone callback
18418 	 * cleanup.
18419 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
18420 	 * but it was also incremented in sd_uscsi_strategy(), so
18421 	 * we should be ok.
18422 	 */
18423 	if (dkc == NULL) {
18424 		(void) biowait(bp);
18425 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
18426 	}
18427 
18428 	return (rval);
18429 }
18430 
18431 
18432 static int
18433 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
18434 {
18435 	struct sd_uscsi_info *uip;
18436 	struct uscsi_cmd *uscmd;
18437 	uint8_t *sense_buf;
18438 	struct sd_lun *un;
18439 	int status;
18440 
18441 	uip = (struct sd_uscsi_info *)(bp->b_private);
18442 	ASSERT(uip != NULL);
18443 
18444 	uscmd = uip->ui_cmdp;
18445 	ASSERT(uscmd != NULL);
18446 
18447 	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
18448 	ASSERT(sense_buf != NULL);
18449 
18450 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
18451 	ASSERT(un != NULL);
18452 
18453 	status = geterror(bp);
18454 	switch (status) {
18455 	case 0:
18456 		break;	/* Success! */
18457 	case EIO:
18458 		switch (uscmd->uscsi_status) {
18459 		case STATUS_RESERVATION_CONFLICT:
18460 			/* Ignore reservation conflict */
18461 			status = 0;
18462 			goto done;
18463 
18464 		case STATUS_CHECK:
18465 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
18466 			    (scsi_sense_key(sense_buf) ==
18467 				KEY_ILLEGAL_REQUEST)) {
18468 				/* Ignore Illegal Request error */
18469 				mutex_enter(SD_MUTEX(un));
18470 				un->un_f_sync_cache_supported = FALSE;
18471 				mutex_exit(SD_MUTEX(un));
18472 				status = ENOTSUP;
18473 				goto done;
18474 			}
18475 			break;
18476 		default:
18477 			break;
18478 		}
18479 		/* FALLTHRU */
18480 	default:
18481 		/*
18482 		 * Don't log an error message if this device
18483 		 * has removable media.
18484 		 */
18485 		if (!un->un_f_has_removable_media) {
18486 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18487 			    "SYNCHRONIZE CACHE command failed (%d)\n", status);
18488 		}
18489 		break;
18490 	}
18491 
18492 done:
18493 	if (uip->ui_dkc.dkc_callback != NULL) {
18494 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
18495 	}
18496 
18497 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
18498 	freerbuf(bp);
18499 	kmem_free(uip, sizeof (struct sd_uscsi_info));
18500 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
18501 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
18502 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
18503 
18504 	return (status);
18505 }
18506 
18507 
18508 /*
18509  *    Function: sd_send_scsi_GET_CONFIGURATION
18510  *
18511  * Description: Issues the get configuration command to the device.
18512  *		Called from sd_check_for_writable_cd & sd_get_media_info
18513  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
18514  *   Arguments: un
18515  *		ucmdbuf
18516  *		rqbuf
18517  *		rqbuflen
18518  *		bufaddr
18519  *		buflen
18520  *		path_flag
18521  *
18522  * Return Code: 0   - Success
18523  *		errno return code from sd_send_scsi_cmd()
18524  *
18525  *     Context: Can sleep. Does not return until command is completed.
18526  *
18527  */
18528 
18529 static int
18530 sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un, struct uscsi_cmd *ucmdbuf,
18531 	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen,
18532 	int path_flag)
18533 {
18534 	char	cdb[CDB_GROUP1];
18535 	int	status;
18536 
18537 	ASSERT(un != NULL);
18538 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18539 	ASSERT(bufaddr != NULL);
18540 	ASSERT(ucmdbuf != NULL);
18541 	ASSERT(rqbuf != NULL);
18542 
18543 	SD_TRACE(SD_LOG_IO, un,
18544 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
18545 
18546 	bzero(cdb, sizeof (cdb));
18547 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
18548 	bzero(rqbuf, rqbuflen);
18549 	bzero(bufaddr, buflen);
18550 
18551 	/*
18552 	 * Set up cdb field for the get configuration command.
18553 	 */
18554 	cdb[0] = SCMD_GET_CONFIGURATION;
18555 	cdb[1] = 0x02;  /* Requested Type */
18556 	cdb[8] = SD_PROFILE_HEADER_LEN;
18557 	ucmdbuf->uscsi_cdb = cdb;
18558 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
18559 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
18560 	ucmdbuf->uscsi_buflen = buflen;
18561 	ucmdbuf->uscsi_timeout = sd_io_time;
18562 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
18563 	ucmdbuf->uscsi_rqlen = rqbuflen;
18564 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
18565 
18566 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, FKIOCTL,
18567 	    UIO_SYSSPACE, path_flag);
18568 
18569 	switch (status) {
18570 	case 0:
18571 		break;  /* Success! */
18572 	case EIO:
18573 		switch (ucmdbuf->uscsi_status) {
18574 		case STATUS_RESERVATION_CONFLICT:
18575 			status = EACCES;
18576 			break;
18577 		default:
18578 			break;
18579 		}
18580 		break;
18581 	default:
18582 		break;
18583 	}
18584 
18585 	if (status == 0) {
18586 		SD_DUMP_MEMORY(un, SD_LOG_IO,
18587 		    "sd_send_scsi_GET_CONFIGURATION: data",
18588 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
18589 	}
18590 
18591 	SD_TRACE(SD_LOG_IO, un,
18592 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
18593 
18594 	return (status);
18595 }
18596 
18597 /*
18598  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
18599  *
18600  * Description: Issues the get configuration command to the device to
18601  *              retrieve a specfic feature. Called from
18602  *		sd_check_for_writable_cd & sd_set_mmc_caps.
18603  *   Arguments: un
18604  *              ucmdbuf
18605  *              rqbuf
18606  *              rqbuflen
18607  *              bufaddr
18608  *              buflen
18609  *		feature
18610  *
18611  * Return Code: 0   - Success
18612  *              errno return code from sd_send_scsi_cmd()
18613  *
18614  *     Context: Can sleep. Does not return until command is completed.
18615  *
18616  */
18617 static int
18618 sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
18619 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
18620 	uchar_t *bufaddr, uint_t buflen, char feature, int path_flag)
18621 {
18622 	char    cdb[CDB_GROUP1];
18623 	int	status;
18624 
18625 	ASSERT(un != NULL);
18626 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18627 	ASSERT(bufaddr != NULL);
18628 	ASSERT(ucmdbuf != NULL);
18629 	ASSERT(rqbuf != NULL);
18630 
18631 	SD_TRACE(SD_LOG_IO, un,
18632 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
18633 
18634 	bzero(cdb, sizeof (cdb));
18635 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
18636 	bzero(rqbuf, rqbuflen);
18637 	bzero(bufaddr, buflen);
18638 
18639 	/*
18640 	 * Set up cdb field for the get configuration command.
18641 	 */
18642 	cdb[0] = SCMD_GET_CONFIGURATION;
18643 	cdb[1] = 0x02;  /* Requested Type */
18644 	cdb[3] = feature;
18645 	cdb[8] = buflen;
18646 	ucmdbuf->uscsi_cdb = cdb;
18647 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
18648 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
18649 	ucmdbuf->uscsi_buflen = buflen;
18650 	ucmdbuf->uscsi_timeout = sd_io_time;
18651 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
18652 	ucmdbuf->uscsi_rqlen = rqbuflen;
18653 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
18654 
18655 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, FKIOCTL,
18656 	    UIO_SYSSPACE, path_flag);
18657 
18658 	switch (status) {
18659 	case 0:
18660 		break;  /* Success! */
18661 	case EIO:
18662 		switch (ucmdbuf->uscsi_status) {
18663 		case STATUS_RESERVATION_CONFLICT:
18664 			status = EACCES;
18665 			break;
18666 		default:
18667 			break;
18668 		}
18669 		break;
18670 	default:
18671 		break;
18672 	}
18673 
18674 	if (status == 0) {
18675 		SD_DUMP_MEMORY(un, SD_LOG_IO,
18676 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
18677 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
18678 	}
18679 
18680 	SD_TRACE(SD_LOG_IO, un,
18681 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
18682 
18683 	return (status);
18684 }
18685 
18686 
18687 /*
18688  *    Function: sd_send_scsi_MODE_SENSE
18689  *
18690  * Description: Utility function for issuing a scsi MODE SENSE command.
18691  *		Note: This routine uses a consistent implementation for Group0,
18692  *		Group1, and Group2 commands across all platforms. ATAPI devices
18693  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
18694  *
18695  *   Arguments: un - pointer to the softstate struct for the target.
18696  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
18697  *			  CDB_GROUP[1|2] (10 byte).
18698  *		bufaddr - buffer for page data retrieved from the target.
18699  *		buflen - size of page to be retrieved.
18700  *		page_code - page code of data to be retrieved from the target.
18701  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18702  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18703  *			to use the USCSI "direct" chain and bypass the normal
18704  *			command waitq.
18705  *
18706  * Return Code: 0   - Success
18707  *		errno return code from sd_send_scsi_cmd()
18708  *
18709  *     Context: Can sleep. Does not return until command is completed.
18710  */
18711 
18712 static int
18713 sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
18714 	size_t buflen,  uchar_t page_code, int path_flag)
18715 {
18716 	struct	scsi_extended_sense	sense_buf;
18717 	union scsi_cdb		cdb;
18718 	struct uscsi_cmd	ucmd_buf;
18719 	int			status;
18720 	int			headlen;
18721 
18722 	ASSERT(un != NULL);
18723 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18724 	ASSERT(bufaddr != NULL);
18725 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
18726 	    (cdbsize == CDB_GROUP2));
18727 
18728 	SD_TRACE(SD_LOG_IO, un,
18729 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
18730 
18731 	bzero(&cdb, sizeof (cdb));
18732 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18733 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18734 	bzero(bufaddr, buflen);
18735 
18736 	if (cdbsize == CDB_GROUP0) {
18737 		cdb.scc_cmd = SCMD_MODE_SENSE;
18738 		cdb.cdb_opaque[2] = page_code;
18739 		FORMG0COUNT(&cdb, buflen);
18740 		headlen = MODE_HEADER_LENGTH;
18741 	} else {
18742 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
18743 		cdb.cdb_opaque[2] = page_code;
18744 		FORMG1COUNT(&cdb, buflen);
18745 		headlen = MODE_HEADER_LENGTH_GRP2;
18746 	}
18747 
18748 	ASSERT(headlen <= buflen);
18749 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
18750 
18751 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18752 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
18753 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
18754 	ucmd_buf.uscsi_buflen	= buflen;
18755 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18756 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18757 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
18758 	ucmd_buf.uscsi_timeout	= 60;
18759 
18760 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18761 	    UIO_SYSSPACE, path_flag);
18762 
18763 	switch (status) {
18764 	case 0:
18765 		/*
18766 		 * sr_check_wp() uses 0x3f page code and check the header of
18767 		 * mode page to determine if target device is write-protected.
18768 		 * But some USB devices return 0 bytes for 0x3f page code. For
18769 		 * this case, make sure that mode page header is returned at
18770 		 * least.
18771 		 */
18772 		if (buflen - ucmd_buf.uscsi_resid <  headlen)
18773 			status = EIO;
18774 		break;	/* Success! */
18775 	case EIO:
18776 		switch (ucmd_buf.uscsi_status) {
18777 		case STATUS_RESERVATION_CONFLICT:
18778 			status = EACCES;
18779 			break;
18780 		default:
18781 			break;
18782 		}
18783 		break;
18784 	default:
18785 		break;
18786 	}
18787 
18788 	if (status == 0) {
18789 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
18790 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
18791 	}
18792 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
18793 
18794 	return (status);
18795 }
18796 
18797 
18798 /*
18799  *    Function: sd_send_scsi_MODE_SELECT
18800  *
18801  * Description: Utility function for issuing a scsi MODE SELECT command.
18802  *		Note: This routine uses a consistent implementation for Group0,
18803  *		Group1, and Group2 commands across all platforms. ATAPI devices
18804  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
18805  *
18806  *   Arguments: un - pointer to the softstate struct for the target.
18807  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
18808  *			  CDB_GROUP[1|2] (10 byte).
18809  *		bufaddr - buffer for page data retrieved from the target.
18810  *		buflen - size of page to be retrieved.
18811  *		save_page - boolean to determin if SP bit should be set.
18812  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18813  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18814  *			to use the USCSI "direct" chain and bypass the normal
18815  *			command waitq.
18816  *
18817  * Return Code: 0   - Success
18818  *		errno return code from sd_send_scsi_cmd()
18819  *
18820  *     Context: Can sleep. Does not return until command is completed.
18821  */
18822 
18823 static int
18824 sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
18825 	size_t buflen,  uchar_t save_page, int path_flag)
18826 {
18827 	struct	scsi_extended_sense	sense_buf;
18828 	union scsi_cdb		cdb;
18829 	struct uscsi_cmd	ucmd_buf;
18830 	int			status;
18831 
18832 	ASSERT(un != NULL);
18833 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18834 	ASSERT(bufaddr != NULL);
18835 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
18836 	    (cdbsize == CDB_GROUP2));
18837 
18838 	SD_TRACE(SD_LOG_IO, un,
18839 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
18840 
18841 	bzero(&cdb, sizeof (cdb));
18842 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18843 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18844 
18845 	/* Set the PF bit for many third party drives */
18846 	cdb.cdb_opaque[1] = 0x10;
18847 
18848 	/* Set the savepage(SP) bit if given */
18849 	if (save_page == SD_SAVE_PAGE) {
18850 		cdb.cdb_opaque[1] |= 0x01;
18851 	}
18852 
18853 	if (cdbsize == CDB_GROUP0) {
18854 		cdb.scc_cmd = SCMD_MODE_SELECT;
18855 		FORMG0COUNT(&cdb, buflen);
18856 	} else {
18857 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
18858 		FORMG1COUNT(&cdb, buflen);
18859 	}
18860 
18861 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
18862 
18863 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18864 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
18865 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
18866 	ucmd_buf.uscsi_buflen	= buflen;
18867 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18868 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18869 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
18870 	ucmd_buf.uscsi_timeout	= 60;
18871 
18872 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18873 	    UIO_SYSSPACE, path_flag);
18874 
18875 	switch (status) {
18876 	case 0:
18877 		break;	/* Success! */
18878 	case EIO:
18879 		switch (ucmd_buf.uscsi_status) {
18880 		case STATUS_RESERVATION_CONFLICT:
18881 			status = EACCES;
18882 			break;
18883 		default:
18884 			break;
18885 		}
18886 		break;
18887 	default:
18888 		break;
18889 	}
18890 
18891 	if (status == 0) {
18892 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
18893 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
18894 	}
18895 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
18896 
18897 	return (status);
18898 }
18899 
18900 
18901 /*
18902  *    Function: sd_send_scsi_RDWR
18903  *
18904  * Description: Issue a scsi READ or WRITE command with the given parameters.
18905  *
18906  *   Arguments: un:      Pointer to the sd_lun struct for the target.
18907  *		cmd:	 SCMD_READ or SCMD_WRITE
18908  *		bufaddr: Address of caller's buffer to receive the RDWR data
18909  *		buflen:  Length of caller's buffer receive the RDWR data.
18910  *		start_block: Block number for the start of the RDWR operation.
18911  *			 (Assumes target-native block size.)
18912  *		residp:  Pointer to variable to receive the redisual of the
18913  *			 RDWR operation (may be NULL of no residual requested).
18914  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18915  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18916  *			to use the USCSI "direct" chain and bypass the normal
18917  *			command waitq.
18918  *
18919  * Return Code: 0   - Success
18920  *		errno return code from sd_send_scsi_cmd()
18921  *
18922  *     Context: Can sleep. Does not return until command is completed.
18923  */
18924 
18925 static int
18926 sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
18927 	size_t buflen, daddr_t start_block, int path_flag)
18928 {
18929 	struct	scsi_extended_sense	sense_buf;
18930 	union scsi_cdb		cdb;
18931 	struct uscsi_cmd	ucmd_buf;
18932 	uint32_t		block_count;
18933 	int			status;
18934 	int			cdbsize;
18935 	uchar_t			flag;
18936 
18937 	ASSERT(un != NULL);
18938 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18939 	ASSERT(bufaddr != NULL);
18940 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
18941 
18942 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
18943 
18944 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
18945 		return (EINVAL);
18946 	}
18947 
18948 	mutex_enter(SD_MUTEX(un));
18949 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
18950 	mutex_exit(SD_MUTEX(un));
18951 
18952 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
18953 
18954 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
18955 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
18956 	    bufaddr, buflen, start_block, block_count);
18957 
18958 	bzero(&cdb, sizeof (cdb));
18959 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18960 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18961 
18962 	/* Compute CDB size to use */
18963 	if (start_block > 0xffffffff)
18964 		cdbsize = CDB_GROUP4;
18965 	else if ((start_block & 0xFFE00000) ||
18966 	    (un->un_f_cfg_is_atapi == TRUE))
18967 		cdbsize = CDB_GROUP1;
18968 	else
18969 		cdbsize = CDB_GROUP0;
18970 
18971 	switch (cdbsize) {
18972 	case CDB_GROUP0:	/* 6-byte CDBs */
18973 		cdb.scc_cmd = cmd;
18974 		FORMG0ADDR(&cdb, start_block);
18975 		FORMG0COUNT(&cdb, block_count);
18976 		break;
18977 	case CDB_GROUP1:	/* 10-byte CDBs */
18978 		cdb.scc_cmd = cmd | SCMD_GROUP1;
18979 		FORMG1ADDR(&cdb, start_block);
18980 		FORMG1COUNT(&cdb, block_count);
18981 		break;
18982 	case CDB_GROUP4:	/* 16-byte CDBs */
18983 		cdb.scc_cmd = cmd | SCMD_GROUP4;
18984 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
18985 		FORMG4COUNT(&cdb, block_count);
18986 		break;
18987 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
18988 	default:
18989 		/* All others reserved */
18990 		return (EINVAL);
18991 	}
18992 
18993 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
18994 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
18995 
18996 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18997 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
18998 	ucmd_buf.uscsi_bufaddr	= bufaddr;
18999 	ucmd_buf.uscsi_buflen	= buflen;
19000 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19001 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19002 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
19003 	ucmd_buf.uscsi_timeout	= 60;
19004 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
19005 	    UIO_SYSSPACE, path_flag);
19006 	switch (status) {
19007 	case 0:
19008 		break;	/* Success! */
19009 	case EIO:
19010 		switch (ucmd_buf.uscsi_status) {
19011 		case STATUS_RESERVATION_CONFLICT:
19012 			status = EACCES;
19013 			break;
19014 		default:
19015 			break;
19016 		}
19017 		break;
19018 	default:
19019 		break;
19020 	}
19021 
19022 	if (status == 0) {
19023 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
19024 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
19025 	}
19026 
19027 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
19028 
19029 	return (status);
19030 }
19031 
19032 
19033 /*
19034  *    Function: sd_send_scsi_LOG_SENSE
19035  *
19036  * Description: Issue a scsi LOG_SENSE command with the given parameters.
19037  *
19038  *   Arguments: un:      Pointer to the sd_lun struct for the target.
19039  *
19040  * Return Code: 0   - Success
19041  *		errno return code from sd_send_scsi_cmd()
19042  *
19043  *     Context: Can sleep. Does not return until command is completed.
19044  */
19045 
19046 static int
19047 sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr, uint16_t buflen,
19048 	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
19049 	int path_flag)
19050 
19051 {
19052 	struct	scsi_extended_sense	sense_buf;
19053 	union scsi_cdb		cdb;
19054 	struct uscsi_cmd	ucmd_buf;
19055 	int			status;
19056 
19057 	ASSERT(un != NULL);
19058 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19059 
19060 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
19061 
19062 	bzero(&cdb, sizeof (cdb));
19063 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19064 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19065 
19066 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
19067 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
19068 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
19069 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
19070 	FORMG1COUNT(&cdb, buflen);
19071 
19072 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19073 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19074 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19075 	ucmd_buf.uscsi_buflen	= buflen;
19076 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19077 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19078 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19079 	ucmd_buf.uscsi_timeout	= 60;
19080 
19081 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
19082 	    UIO_SYSSPACE, path_flag);
19083 
19084 	switch (status) {
19085 	case 0:
19086 		break;
19087 	case EIO:
19088 		switch (ucmd_buf.uscsi_status) {
19089 		case STATUS_RESERVATION_CONFLICT:
19090 			status = EACCES;
19091 			break;
19092 		case STATUS_CHECK:
19093 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19094 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
19095 				KEY_ILLEGAL_REQUEST) &&
19096 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
19097 				/*
19098 				 * ASC 0x24: INVALID FIELD IN CDB
19099 				 */
19100 				switch (page_code) {
19101 				case START_STOP_CYCLE_PAGE:
19102 					/*
19103 					 * The start stop cycle counter is
19104 					 * implemented as page 0x31 in earlier
19105 					 * generation disks. In new generation
19106 					 * disks the start stop cycle counter is
19107 					 * implemented as page 0xE. To properly
19108 					 * handle this case if an attempt for
19109 					 * log page 0xE is made and fails we
19110 					 * will try again using page 0x31.
19111 					 *
19112 					 * Network storage BU committed to
19113 					 * maintain the page 0x31 for this
19114 					 * purpose and will not have any other
19115 					 * page implemented with page code 0x31
19116 					 * until all disks transition to the
19117 					 * standard page.
19118 					 */
19119 					mutex_enter(SD_MUTEX(un));
19120 					un->un_start_stop_cycle_page =
19121 					    START_STOP_CYCLE_VU_PAGE;
19122 					cdb.cdb_opaque[2] =
19123 					    (char)(page_control << 6) |
19124 					    un->un_start_stop_cycle_page;
19125 					mutex_exit(SD_MUTEX(un));
19126 					status = sd_send_scsi_cmd(
19127 					    SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
19128 					    UIO_SYSSPACE, path_flag);
19129 
19130 					break;
19131 				case TEMPERATURE_PAGE:
19132 					status = ENOTTY;
19133 					break;
19134 				default:
19135 					break;
19136 				}
19137 			}
19138 			break;
19139 		default:
19140 			break;
19141 		}
19142 		break;
19143 	default:
19144 		break;
19145 	}
19146 
19147 	if (status == 0) {
19148 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
19149 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
19150 	}
19151 
19152 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
19153 
19154 	return (status);
19155 }
19156 
19157 
19158 /*
19159  *    Function: sdioctl
19160  *
19161  * Description: Driver's ioctl(9e) entry point function.
19162  *
19163  *   Arguments: dev     - device number
19164  *		cmd     - ioctl operation to be performed
19165  *		arg     - user argument, contains data to be set or reference
19166  *			  parameter for get
19167  *		flag    - bit flag, indicating open settings, 32/64 bit type
19168  *		cred_p  - user credential pointer
19169  *		rval_p  - calling process return value (OPT)
19170  *
19171  * Return Code: EINVAL
19172  *		ENOTTY
19173  *		ENXIO
19174  *		EIO
19175  *		EFAULT
19176  *		ENOTSUP
19177  *		EPERM
19178  *
19179  *     Context: Called from the device switch at normal priority.
19180  */
19181 
19182 static int
19183 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
19184 {
19185 	struct sd_lun	*un = NULL;
19186 	int		err = 0;
19187 	int		i = 0;
19188 	cred_t		*cr;
19189 	int		tmprval = EINVAL;
19190 	int 		is_valid;
19191 
19192 	/*
19193 	 * All device accesses go thru sdstrategy where we check on suspend
19194 	 * status
19195 	 */
19196 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
19197 		return (ENXIO);
19198 	}
19199 
19200 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19201 
19202 
19203 	is_valid = SD_IS_VALID_LABEL(un);
19204 
19205 	/*
19206 	 * Moved this wait from sd_uscsi_strategy to here for
19207 	 * reasons of deadlock prevention. Internal driver commands,
19208 	 * specifically those to change a devices power level, result
19209 	 * in a call to sd_uscsi_strategy.
19210 	 */
19211 	mutex_enter(SD_MUTEX(un));
19212 	while ((un->un_state == SD_STATE_SUSPENDED) ||
19213 	    (un->un_state == SD_STATE_PM_CHANGING)) {
19214 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
19215 	}
19216 	/*
19217 	 * Twiddling the counter here protects commands from now
19218 	 * through to the top of sd_uscsi_strategy. Without the
19219 	 * counter inc. a power down, for example, could get in
19220 	 * after the above check for state is made and before
19221 	 * execution gets to the top of sd_uscsi_strategy.
19222 	 * That would cause problems.
19223 	 */
19224 	un->un_ncmds_in_driver++;
19225 
19226 	if (!is_valid &&
19227 	    (flag & (FNDELAY | FNONBLOCK))) {
19228 		switch (cmd) {
19229 		case DKIOCGGEOM:	/* SD_PATH_DIRECT */
19230 		case DKIOCGVTOC:
19231 		case DKIOCGAPART:
19232 		case DKIOCPARTINFO:
19233 		case DKIOCSGEOM:
19234 		case DKIOCSAPART:
19235 		case DKIOCGETEFI:
19236 		case DKIOCPARTITION:
19237 		case DKIOCSVTOC:
19238 		case DKIOCSETEFI:
19239 		case DKIOCGMBOOT:
19240 		case DKIOCSMBOOT:
19241 		case DKIOCG_PHYGEOM:
19242 		case DKIOCG_VIRTGEOM:
19243 			/* let cmlb handle it */
19244 			goto skip_ready_valid;
19245 
19246 		case CDROMPAUSE:
19247 		case CDROMRESUME:
19248 		case CDROMPLAYMSF:
19249 		case CDROMPLAYTRKIND:
19250 		case CDROMREADTOCHDR:
19251 		case CDROMREADTOCENTRY:
19252 		case CDROMSTOP:
19253 		case CDROMSTART:
19254 		case CDROMVOLCTRL:
19255 		case CDROMSUBCHNL:
19256 		case CDROMREADMODE2:
19257 		case CDROMREADMODE1:
19258 		case CDROMREADOFFSET:
19259 		case CDROMSBLKMODE:
19260 		case CDROMGBLKMODE:
19261 		case CDROMGDRVSPEED:
19262 		case CDROMSDRVSPEED:
19263 		case CDROMCDDA:
19264 		case CDROMCDXA:
19265 		case CDROMSUBCODE:
19266 			if (!ISCD(un)) {
19267 				un->un_ncmds_in_driver--;
19268 				ASSERT(un->un_ncmds_in_driver >= 0);
19269 				mutex_exit(SD_MUTEX(un));
19270 				return (ENOTTY);
19271 			}
19272 			break;
19273 		case FDEJECT:
19274 		case DKIOCEJECT:
19275 		case CDROMEJECT:
19276 			if (!un->un_f_eject_media_supported) {
19277 				un->un_ncmds_in_driver--;
19278 				ASSERT(un->un_ncmds_in_driver >= 0);
19279 				mutex_exit(SD_MUTEX(un));
19280 				return (ENOTTY);
19281 			}
19282 			break;
19283 		case DKIOCFLUSHWRITECACHE:
19284 			mutex_exit(SD_MUTEX(un));
19285 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
19286 			if (err != 0) {
19287 				mutex_enter(SD_MUTEX(un));
19288 				un->un_ncmds_in_driver--;
19289 				ASSERT(un->un_ncmds_in_driver >= 0);
19290 				mutex_exit(SD_MUTEX(un));
19291 				return (EIO);
19292 			}
19293 			mutex_enter(SD_MUTEX(un));
19294 			/* FALLTHROUGH */
19295 		case DKIOCREMOVABLE:
19296 		case DKIOCHOTPLUGGABLE:
19297 		case DKIOCINFO:
19298 		case DKIOCGMEDIAINFO:
19299 		case MHIOCENFAILFAST:
19300 		case MHIOCSTATUS:
19301 		case MHIOCTKOWN:
19302 		case MHIOCRELEASE:
19303 		case MHIOCGRP_INKEYS:
19304 		case MHIOCGRP_INRESV:
19305 		case MHIOCGRP_REGISTER:
19306 		case MHIOCGRP_RESERVE:
19307 		case MHIOCGRP_PREEMPTANDABORT:
19308 		case MHIOCGRP_REGISTERANDIGNOREKEY:
19309 		case CDROMCLOSETRAY:
19310 		case USCSICMD:
19311 			goto skip_ready_valid;
19312 		default:
19313 			break;
19314 		}
19315 
19316 		mutex_exit(SD_MUTEX(un));
19317 		err = sd_ready_and_valid(un);
19318 		mutex_enter(SD_MUTEX(un));
19319 
19320 		if (err != SD_READY_VALID) {
19321 			switch (cmd) {
19322 			case DKIOCSTATE:
19323 			case CDROMGDRVSPEED:
19324 			case CDROMSDRVSPEED:
19325 			case FDEJECT:	/* for eject command */
19326 			case DKIOCEJECT:
19327 			case CDROMEJECT:
19328 			case DKIOCREMOVABLE:
19329 			case DKIOCHOTPLUGGABLE:
19330 				break;
19331 			default:
19332 				if (un->un_f_has_removable_media) {
19333 					err = ENXIO;
19334 				} else {
19335 				/* Do not map SD_RESERVED_BY_OTHERS to EIO */
19336 					if (err == SD_RESERVED_BY_OTHERS) {
19337 						err = EACCES;
19338 					} else {
19339 						err = EIO;
19340 					}
19341 				}
19342 				un->un_ncmds_in_driver--;
19343 				ASSERT(un->un_ncmds_in_driver >= 0);
19344 				mutex_exit(SD_MUTEX(un));
19345 				return (err);
19346 			}
19347 		}
19348 	}
19349 
19350 skip_ready_valid:
19351 	mutex_exit(SD_MUTEX(un));
19352 
19353 	switch (cmd) {
19354 	case DKIOCINFO:
19355 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
19356 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
19357 		break;
19358 
19359 	case DKIOCGMEDIAINFO:
19360 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
19361 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
19362 		break;
19363 
19364 	case DKIOCGGEOM:
19365 	case DKIOCGVTOC:
19366 	case DKIOCGAPART:
19367 	case DKIOCPARTINFO:
19368 	case DKIOCSGEOM:
19369 	case DKIOCSAPART:
19370 	case DKIOCGETEFI:
19371 	case DKIOCPARTITION:
19372 	case DKIOCSVTOC:
19373 	case DKIOCSETEFI:
19374 	case DKIOCGMBOOT:
19375 	case DKIOCSMBOOT:
19376 	case DKIOCG_PHYGEOM:
19377 	case DKIOCG_VIRTGEOM:
19378 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOC %d\n", cmd);
19379 
19380 		/* TUR should spin up */
19381 
19382 		if (un->un_f_has_removable_media)
19383 			err = sd_send_scsi_TEST_UNIT_READY(un,
19384 			    SD_CHECK_FOR_MEDIA);
19385 		else
19386 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
19387 
19388 		if (err != 0)
19389 			break;
19390 
19391 		err = cmlb_ioctl(un->un_cmlbhandle, dev,
19392 		    cmd, arg, flag, cred_p, rval_p, (void *)SD_PATH_DIRECT);
19393 
19394 		if ((err == 0) &&
19395 		    ((cmd == DKIOCSETEFI) ||
19396 		    (un->un_f_pkstats_enabled) &&
19397 		    (cmd == DKIOCSAPART || cmd == DKIOCSVTOC))) {
19398 
19399 			tmprval = cmlb_validate(un->un_cmlbhandle, CMLB_SILENT,
19400 			    (void *)SD_PATH_DIRECT);
19401 			if ((tmprval == 0) && un->un_f_pkstats_enabled) {
19402 				sd_set_pstats(un);
19403 				SD_TRACE(SD_LOG_IO_PARTITION, un,
19404 				    "sd_ioctl: un:0x%p pstats created and "
19405 				    "set\n", un);
19406 			}
19407 		}
19408 
19409 		if ((cmd == DKIOCSVTOC) ||
19410 		    ((cmd == DKIOCSETEFI) && (tmprval == 0))) {
19411 
19412 			mutex_enter(SD_MUTEX(un));
19413 			if (un->un_f_devid_supported &&
19414 			    (un->un_f_opt_fab_devid == TRUE)) {
19415 				if (un->un_devid == NULL) {
19416 					sd_register_devid(un, SD_DEVINFO(un),
19417 					    SD_TARGET_IS_UNRESERVED);
19418 				} else {
19419 					/*
19420 					 * The device id for this disk
19421 					 * has been fabricated. The
19422 					 * device id must be preserved
19423 					 * by writing it back out to
19424 					 * disk.
19425 					 */
19426 					if (sd_write_deviceid(un) != 0) {
19427 						ddi_devid_free(un->un_devid);
19428 						un->un_devid = NULL;
19429 					}
19430 				}
19431 			}
19432 			mutex_exit(SD_MUTEX(un));
19433 		}
19434 
19435 		break;
19436 
19437 	case DKIOCLOCK:
19438 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
19439 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
19440 		    SD_PATH_STANDARD);
19441 		break;
19442 
19443 	case DKIOCUNLOCK:
19444 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
19445 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
19446 		    SD_PATH_STANDARD);
19447 		break;
19448 
19449 	case DKIOCSTATE: {
19450 		enum dkio_state		state;
19451 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
19452 
19453 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
19454 			err = EFAULT;
19455 		} else {
19456 			err = sd_check_media(dev, state);
19457 			if (err == 0) {
19458 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
19459 				    sizeof (int), flag) != 0)
19460 					err = EFAULT;
19461 			}
19462 		}
19463 		break;
19464 	}
19465 
19466 	case DKIOCREMOVABLE:
19467 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
19468 		i = un->un_f_has_removable_media ? 1 : 0;
19469 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
19470 			err = EFAULT;
19471 		} else {
19472 			err = 0;
19473 		}
19474 		break;
19475 
19476 	case DKIOCHOTPLUGGABLE:
19477 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
19478 		i = un->un_f_is_hotpluggable ? 1 : 0;
19479 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
19480 			err = EFAULT;
19481 		} else {
19482 			err = 0;
19483 		}
19484 		break;
19485 
19486 	case DKIOCGTEMPERATURE:
19487 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
19488 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
19489 		break;
19490 
19491 	case MHIOCENFAILFAST:
19492 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
19493 		if ((err = drv_priv(cred_p)) == 0) {
19494 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
19495 		}
19496 		break;
19497 
19498 	case MHIOCTKOWN:
19499 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
19500 		if ((err = drv_priv(cred_p)) == 0) {
19501 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
19502 		}
19503 		break;
19504 
19505 	case MHIOCRELEASE:
19506 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
19507 		if ((err = drv_priv(cred_p)) == 0) {
19508 			err = sd_mhdioc_release(dev);
19509 		}
19510 		break;
19511 
19512 	case MHIOCSTATUS:
19513 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
19514 		if ((err = drv_priv(cred_p)) == 0) {
19515 			switch (sd_send_scsi_TEST_UNIT_READY(un, 0)) {
19516 			case 0:
19517 				err = 0;
19518 				break;
19519 			case EACCES:
19520 				*rval_p = 1;
19521 				err = 0;
19522 				break;
19523 			default:
19524 				err = EIO;
19525 				break;
19526 			}
19527 		}
19528 		break;
19529 
19530 	case MHIOCQRESERVE:
19531 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
19532 		if ((err = drv_priv(cred_p)) == 0) {
19533 			err = sd_reserve_release(dev, SD_RESERVE);
19534 		}
19535 		break;
19536 
19537 	case MHIOCREREGISTERDEVID:
19538 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
19539 		if (drv_priv(cred_p) == EPERM) {
19540 			err = EPERM;
19541 		} else if (!un->un_f_devid_supported) {
19542 			err = ENOTTY;
19543 		} else {
19544 			err = sd_mhdioc_register_devid(dev);
19545 		}
19546 		break;
19547 
19548 	case MHIOCGRP_INKEYS:
19549 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
19550 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
19551 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19552 				err = ENOTSUP;
19553 			} else {
19554 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
19555 				    flag);
19556 			}
19557 		}
19558 		break;
19559 
19560 	case MHIOCGRP_INRESV:
19561 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
19562 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
19563 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19564 				err = ENOTSUP;
19565 			} else {
19566 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
19567 			}
19568 		}
19569 		break;
19570 
19571 	case MHIOCGRP_REGISTER:
19572 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
19573 		if ((err = drv_priv(cred_p)) != EPERM) {
19574 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19575 				err = ENOTSUP;
19576 			} else if (arg != NULL) {
19577 				mhioc_register_t reg;
19578 				if (ddi_copyin((void *)arg, &reg,
19579 				    sizeof (mhioc_register_t), flag) != 0) {
19580 					err = EFAULT;
19581 				} else {
19582 					err =
19583 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19584 					    un, SD_SCSI3_REGISTER,
19585 					    (uchar_t *)&reg);
19586 				}
19587 			}
19588 		}
19589 		break;
19590 
19591 	case MHIOCGRP_RESERVE:
19592 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
19593 		if ((err = drv_priv(cred_p)) != EPERM) {
19594 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19595 				err = ENOTSUP;
19596 			} else if (arg != NULL) {
19597 				mhioc_resv_desc_t resv_desc;
19598 				if (ddi_copyin((void *)arg, &resv_desc,
19599 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
19600 					err = EFAULT;
19601 				} else {
19602 					err =
19603 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19604 					    un, SD_SCSI3_RESERVE,
19605 					    (uchar_t *)&resv_desc);
19606 				}
19607 			}
19608 		}
19609 		break;
19610 
19611 	case MHIOCGRP_PREEMPTANDABORT:
19612 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
19613 		if ((err = drv_priv(cred_p)) != EPERM) {
19614 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19615 				err = ENOTSUP;
19616 			} else if (arg != NULL) {
19617 				mhioc_preemptandabort_t preempt_abort;
19618 				if (ddi_copyin((void *)arg, &preempt_abort,
19619 				    sizeof (mhioc_preemptandabort_t),
19620 				    flag) != 0) {
19621 					err = EFAULT;
19622 				} else {
19623 					err =
19624 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19625 					    un, SD_SCSI3_PREEMPTANDABORT,
19626 					    (uchar_t *)&preempt_abort);
19627 				}
19628 			}
19629 		}
19630 		break;
19631 
19632 	case MHIOCGRP_REGISTERANDIGNOREKEY:
19633 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
19634 		if ((err = drv_priv(cred_p)) != EPERM) {
19635 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19636 				err = ENOTSUP;
19637 			} else if (arg != NULL) {
19638 				mhioc_registerandignorekey_t r_and_i;
19639 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
19640 				    sizeof (mhioc_registerandignorekey_t),
19641 				    flag) != 0) {
19642 					err = EFAULT;
19643 				} else {
19644 					err =
19645 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19646 					    un, SD_SCSI3_REGISTERANDIGNOREKEY,
19647 					    (uchar_t *)&r_and_i);
19648 				}
19649 			}
19650 		}
19651 		break;
19652 
19653 	case USCSICMD:
19654 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
19655 		cr = ddi_get_cred();
19656 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
19657 			err = EPERM;
19658 		} else {
19659 			enum uio_seg	uioseg;
19660 			uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE :
19661 			    UIO_USERSPACE;
19662 			if (un->un_f_format_in_progress == TRUE) {
19663 				err = EAGAIN;
19664 				break;
19665 			}
19666 			err = sd_send_scsi_cmd(dev, (struct uscsi_cmd *)arg,
19667 			    flag, uioseg, SD_PATH_STANDARD);
19668 		}
19669 		break;
19670 
19671 	case CDROMPAUSE:
19672 	case CDROMRESUME:
19673 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
19674 		if (!ISCD(un)) {
19675 			err = ENOTTY;
19676 		} else {
19677 			err = sr_pause_resume(dev, cmd);
19678 		}
19679 		break;
19680 
19681 	case CDROMPLAYMSF:
19682 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
19683 		if (!ISCD(un)) {
19684 			err = ENOTTY;
19685 		} else {
19686 			err = sr_play_msf(dev, (caddr_t)arg, flag);
19687 		}
19688 		break;
19689 
19690 	case CDROMPLAYTRKIND:
19691 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
19692 #if defined(__i386) || defined(__amd64)
19693 		/*
19694 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
19695 		 */
19696 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
19697 #else
19698 		if (!ISCD(un)) {
19699 #endif
19700 			err = ENOTTY;
19701 		} else {
19702 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
19703 		}
19704 		break;
19705 
19706 	case CDROMREADTOCHDR:
19707 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
19708 		if (!ISCD(un)) {
19709 			err = ENOTTY;
19710 		} else {
19711 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
19712 		}
19713 		break;
19714 
19715 	case CDROMREADTOCENTRY:
19716 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
19717 		if (!ISCD(un)) {
19718 			err = ENOTTY;
19719 		} else {
19720 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
19721 		}
19722 		break;
19723 
19724 	case CDROMSTOP:
19725 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
19726 		if (!ISCD(un)) {
19727 			err = ENOTTY;
19728 		} else {
19729 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_STOP,
19730 			    SD_PATH_STANDARD);
19731 		}
19732 		break;
19733 
19734 	case CDROMSTART:
19735 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
19736 		if (!ISCD(un)) {
19737 			err = ENOTTY;
19738 		} else {
19739 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
19740 			    SD_PATH_STANDARD);
19741 		}
19742 		break;
19743 
19744 	case CDROMCLOSETRAY:
19745 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
19746 		if (!ISCD(un)) {
19747 			err = ENOTTY;
19748 		} else {
19749 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_CLOSE,
19750 			    SD_PATH_STANDARD);
19751 		}
19752 		break;
19753 
19754 	case FDEJECT:	/* for eject command */
19755 	case DKIOCEJECT:
19756 	case CDROMEJECT:
19757 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
19758 		if (!un->un_f_eject_media_supported) {
19759 			err = ENOTTY;
19760 		} else {
19761 			err = sr_eject(dev);
19762 		}
19763 		break;
19764 
19765 	case CDROMVOLCTRL:
19766 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
19767 		if (!ISCD(un)) {
19768 			err = ENOTTY;
19769 		} else {
19770 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
19771 		}
19772 		break;
19773 
19774 	case CDROMSUBCHNL:
19775 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
19776 		if (!ISCD(un)) {
19777 			err = ENOTTY;
19778 		} else {
19779 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
19780 		}
19781 		break;
19782 
19783 	case CDROMREADMODE2:
19784 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
19785 		if (!ISCD(un)) {
19786 			err = ENOTTY;
19787 		} else if (un->un_f_cfg_is_atapi == TRUE) {
19788 			/*
19789 			 * If the drive supports READ CD, use that instead of
19790 			 * switching the LBA size via a MODE SELECT
19791 			 * Block Descriptor
19792 			 */
19793 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
19794 		} else {
19795 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
19796 		}
19797 		break;
19798 
19799 	case CDROMREADMODE1:
19800 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
19801 		if (!ISCD(un)) {
19802 			err = ENOTTY;
19803 		} else {
19804 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
19805 		}
19806 		break;
19807 
19808 	case CDROMREADOFFSET:
19809 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
19810 		if (!ISCD(un)) {
19811 			err = ENOTTY;
19812 		} else {
19813 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
19814 			    flag);
19815 		}
19816 		break;
19817 
19818 	case CDROMSBLKMODE:
19819 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
19820 		/*
19821 		 * There is no means of changing block size in case of atapi
19822 		 * drives, thus return ENOTTY if drive type is atapi
19823 		 */
19824 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
19825 			err = ENOTTY;
19826 		} else if (un->un_f_mmc_cap == TRUE) {
19827 
19828 			/*
19829 			 * MMC Devices do not support changing the
19830 			 * logical block size
19831 			 *
19832 			 * Note: EINVAL is being returned instead of ENOTTY to
19833 			 * maintain consistancy with the original mmc
19834 			 * driver update.
19835 			 */
19836 			err = EINVAL;
19837 		} else {
19838 			mutex_enter(SD_MUTEX(un));
19839 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
19840 			    (un->un_ncmds_in_transport > 0)) {
19841 				mutex_exit(SD_MUTEX(un));
19842 				err = EINVAL;
19843 			} else {
19844 				mutex_exit(SD_MUTEX(un));
19845 				err = sr_change_blkmode(dev, cmd, arg, flag);
19846 			}
19847 		}
19848 		break;
19849 
19850 	case CDROMGBLKMODE:
19851 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
19852 		if (!ISCD(un)) {
19853 			err = ENOTTY;
19854 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
19855 		    (un->un_f_blockcount_is_valid != FALSE)) {
19856 			/*
19857 			 * Drive is an ATAPI drive so return target block
19858 			 * size for ATAPI drives since we cannot change the
19859 			 * blocksize on ATAPI drives. Used primarily to detect
19860 			 * if an ATAPI cdrom is present.
19861 			 */
19862 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
19863 			    sizeof (int), flag) != 0) {
19864 				err = EFAULT;
19865 			} else {
19866 				err = 0;
19867 			}
19868 
19869 		} else {
19870 			/*
19871 			 * Drive supports changing block sizes via a Mode
19872 			 * Select.
19873 			 */
19874 			err = sr_change_blkmode(dev, cmd, arg, flag);
19875 		}
19876 		break;
19877 
19878 	case CDROMGDRVSPEED:
19879 	case CDROMSDRVSPEED:
19880 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
19881 		if (!ISCD(un)) {
19882 			err = ENOTTY;
19883 		} else if (un->un_f_mmc_cap == TRUE) {
19884 			/*
19885 			 * Note: In the future the driver implementation
19886 			 * for getting and
19887 			 * setting cd speed should entail:
19888 			 * 1) If non-mmc try the Toshiba mode page
19889 			 *    (sr_change_speed)
19890 			 * 2) If mmc but no support for Real Time Streaming try
19891 			 *    the SET CD SPEED (0xBB) command
19892 			 *   (sr_atapi_change_speed)
19893 			 * 3) If mmc and support for Real Time Streaming
19894 			 *    try the GET PERFORMANCE and SET STREAMING
19895 			 *    commands (not yet implemented, 4380808)
19896 			 */
19897 			/*
19898 			 * As per recent MMC spec, CD-ROM speed is variable
19899 			 * and changes with LBA. Since there is no such
19900 			 * things as drive speed now, fail this ioctl.
19901 			 *
19902 			 * Note: EINVAL is returned for consistancy of original
19903 			 * implementation which included support for getting
19904 			 * the drive speed of mmc devices but not setting
19905 			 * the drive speed. Thus EINVAL would be returned
19906 			 * if a set request was made for an mmc device.
19907 			 * We no longer support get or set speed for
19908 			 * mmc but need to remain consistant with regard
19909 			 * to the error code returned.
19910 			 */
19911 			err = EINVAL;
19912 		} else if (un->un_f_cfg_is_atapi == TRUE) {
19913 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
19914 		} else {
19915 			err = sr_change_speed(dev, cmd, arg, flag);
19916 		}
19917 		break;
19918 
19919 	case CDROMCDDA:
19920 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
19921 		if (!ISCD(un)) {
19922 			err = ENOTTY;
19923 		} else {
19924 			err = sr_read_cdda(dev, (void *)arg, flag);
19925 		}
19926 		break;
19927 
19928 	case CDROMCDXA:
19929 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
19930 		if (!ISCD(un)) {
19931 			err = ENOTTY;
19932 		} else {
19933 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
19934 		}
19935 		break;
19936 
19937 	case CDROMSUBCODE:
19938 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
19939 		if (!ISCD(un)) {
19940 			err = ENOTTY;
19941 		} else {
19942 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
19943 		}
19944 		break;
19945 
19946 
19947 #ifdef SDDEBUG
19948 /* RESET/ABORTS testing ioctls */
19949 	case DKIOCRESET: {
19950 		int	reset_level;
19951 
19952 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
19953 			err = EFAULT;
19954 		} else {
19955 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
19956 			    "reset_level = 0x%lx\n", reset_level);
19957 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
19958 				err = 0;
19959 			} else {
19960 				err = EIO;
19961 			}
19962 		}
19963 		break;
19964 	}
19965 
19966 	case DKIOCABORT:
19967 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
19968 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
19969 			err = 0;
19970 		} else {
19971 			err = EIO;
19972 		}
19973 		break;
19974 #endif
19975 
19976 #ifdef SD_FAULT_INJECTION
19977 /* SDIOC FaultInjection testing ioctls */
19978 	case SDIOCSTART:
19979 	case SDIOCSTOP:
19980 	case SDIOCINSERTPKT:
19981 	case SDIOCINSERTXB:
19982 	case SDIOCINSERTUN:
19983 	case SDIOCINSERTARQ:
19984 	case SDIOCPUSH:
19985 	case SDIOCRETRIEVE:
19986 	case SDIOCRUN:
19987 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
19988 		    "SDIOC detected cmd:0x%X:\n", cmd);
19989 		/* call error generator */
19990 		sd_faultinjection_ioctl(cmd, arg, un);
19991 		err = 0;
19992 		break;
19993 
19994 #endif /* SD_FAULT_INJECTION */
19995 
19996 	case DKIOCFLUSHWRITECACHE:
19997 		{
19998 			struct dk_callback *dkc = (struct dk_callback *)arg;
19999 
20000 			mutex_enter(SD_MUTEX(un));
20001 			if (!un->un_f_sync_cache_supported ||
20002 			    !un->un_f_write_cache_enabled) {
20003 				err = un->un_f_sync_cache_supported ?
20004 					0 : ENOTSUP;
20005 				mutex_exit(SD_MUTEX(un));
20006 				if ((flag & FKIOCTL) && dkc != NULL &&
20007 				    dkc->dkc_callback != NULL) {
20008 					(*dkc->dkc_callback)(dkc->dkc_cookie,
20009 					    err);
20010 					/*
20011 					 * Did callback and reported error.
20012 					 * Since we did a callback, ioctl
20013 					 * should return 0.
20014 					 */
20015 					err = 0;
20016 				}
20017 				break;
20018 			}
20019 			mutex_exit(SD_MUTEX(un));
20020 
20021 			if ((flag & FKIOCTL) && dkc != NULL &&
20022 			    dkc->dkc_callback != NULL) {
20023 				/* async SYNC CACHE request */
20024 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
20025 			} else {
20026 				/* synchronous SYNC CACHE request */
20027 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
20028 			}
20029 		}
20030 		break;
20031 
20032 	case DKIOCGETWCE: {
20033 
20034 		int wce;
20035 
20036 		if ((err = sd_get_write_cache_enabled(un, &wce)) != 0) {
20037 			break;
20038 		}
20039 
20040 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
20041 			err = EFAULT;
20042 		}
20043 		break;
20044 	}
20045 
20046 	case DKIOCSETWCE: {
20047 
20048 		int wce, sync_supported;
20049 
20050 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
20051 			err = EFAULT;
20052 			break;
20053 		}
20054 
20055 		/*
20056 		 * Synchronize multiple threads trying to enable
20057 		 * or disable the cache via the un_f_wcc_cv
20058 		 * condition variable.
20059 		 */
20060 		mutex_enter(SD_MUTEX(un));
20061 
20062 		/*
20063 		 * Don't allow the cache to be enabled if the
20064 		 * config file has it disabled.
20065 		 */
20066 		if (un->un_f_opt_disable_cache && wce) {
20067 			mutex_exit(SD_MUTEX(un));
20068 			err = EINVAL;
20069 			break;
20070 		}
20071 
20072 		/*
20073 		 * Wait for write cache change in progress
20074 		 * bit to be clear before proceeding.
20075 		 */
20076 		while (un->un_f_wcc_inprog)
20077 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
20078 
20079 		un->un_f_wcc_inprog = 1;
20080 
20081 		if (un->un_f_write_cache_enabled && wce == 0) {
20082 			/*
20083 			 * Disable the write cache.  Don't clear
20084 			 * un_f_write_cache_enabled until after
20085 			 * the mode select and flush are complete.
20086 			 */
20087 			sync_supported = un->un_f_sync_cache_supported;
20088 			mutex_exit(SD_MUTEX(un));
20089 			if ((err = sd_cache_control(un, SD_CACHE_NOCHANGE,
20090 			    SD_CACHE_DISABLE)) == 0 && sync_supported) {
20091 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
20092 			}
20093 
20094 			mutex_enter(SD_MUTEX(un));
20095 			if (err == 0) {
20096 				un->un_f_write_cache_enabled = 0;
20097 			}
20098 
20099 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
20100 			/*
20101 			 * Set un_f_write_cache_enabled first, so there is
20102 			 * no window where the cache is enabled, but the
20103 			 * bit says it isn't.
20104 			 */
20105 			un->un_f_write_cache_enabled = 1;
20106 			mutex_exit(SD_MUTEX(un));
20107 
20108 			err = sd_cache_control(un, SD_CACHE_NOCHANGE,
20109 				SD_CACHE_ENABLE);
20110 
20111 			mutex_enter(SD_MUTEX(un));
20112 
20113 			if (err) {
20114 				un->un_f_write_cache_enabled = 0;
20115 			}
20116 		}
20117 
20118 		un->un_f_wcc_inprog = 0;
20119 		cv_broadcast(&un->un_wcc_cv);
20120 		mutex_exit(SD_MUTEX(un));
20121 		break;
20122 	}
20123 
20124 	default:
20125 		err = ENOTTY;
20126 		break;
20127 	}
20128 	mutex_enter(SD_MUTEX(un));
20129 	un->un_ncmds_in_driver--;
20130 	ASSERT(un->un_ncmds_in_driver >= 0);
20131 	mutex_exit(SD_MUTEX(un));
20132 
20133 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
20134 	return (err);
20135 }
20136 
20137 
20138 /*
20139  *    Function: sd_dkio_ctrl_info
20140  *
20141  * Description: This routine is the driver entry point for handling controller
20142  *		information ioctl requests (DKIOCINFO).
20143  *
20144  *   Arguments: dev  - the device number
20145  *		arg  - pointer to user provided dk_cinfo structure
20146  *		       specifying the controller type and attributes.
20147  *		flag - this argument is a pass through to ddi_copyxxx()
20148  *		       directly from the mode argument of ioctl().
20149  *
20150  * Return Code: 0
20151  *		EFAULT
20152  *		ENXIO
20153  */
20154 
20155 static int
20156 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
20157 {
20158 	struct sd_lun	*un = NULL;
20159 	struct dk_cinfo	*info;
20160 	dev_info_t	*pdip;
20161 	int		lun, tgt;
20162 
20163 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20164 		return (ENXIO);
20165 	}
20166 
20167 	info = (struct dk_cinfo *)
20168 		kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
20169 
20170 	switch (un->un_ctype) {
20171 	case CTYPE_CDROM:
20172 		info->dki_ctype = DKC_CDROM;
20173 		break;
20174 	default:
20175 		info->dki_ctype = DKC_SCSI_CCS;
20176 		break;
20177 	}
20178 	pdip = ddi_get_parent(SD_DEVINFO(un));
20179 	info->dki_cnum = ddi_get_instance(pdip);
20180 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
20181 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
20182 	} else {
20183 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
20184 		    DK_DEVLEN - 1);
20185 	}
20186 
20187 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
20188 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
20189 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
20190 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
20191 
20192 	/* Unit Information */
20193 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
20194 	info->dki_slave = ((tgt << 3) | lun);
20195 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
20196 	    DK_DEVLEN - 1);
20197 	info->dki_flags = DKI_FMTVOL;
20198 	info->dki_partition = SDPART(dev);
20199 
20200 	/* Max Transfer size of this device in blocks */
20201 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
20202 	info->dki_addr = 0;
20203 	info->dki_space = 0;
20204 	info->dki_prio = 0;
20205 	info->dki_vec = 0;
20206 
20207 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
20208 		kmem_free(info, sizeof (struct dk_cinfo));
20209 		return (EFAULT);
20210 	} else {
20211 		kmem_free(info, sizeof (struct dk_cinfo));
20212 		return (0);
20213 	}
20214 }
20215 
20216 
20217 /*
20218  *    Function: sd_get_media_info
20219  *
20220  * Description: This routine is the driver entry point for handling ioctl
20221  *		requests for the media type or command set profile used by the
20222  *		drive to operate on the media (DKIOCGMEDIAINFO).
20223  *
20224  *   Arguments: dev	- the device number
20225  *		arg	- pointer to user provided dk_minfo structure
20226  *			  specifying the media type, logical block size and
20227  *			  drive capacity.
20228  *		flag	- this argument is a pass through to ddi_copyxxx()
20229  *			  directly from the mode argument of ioctl().
20230  *
20231  * Return Code: 0
20232  *		EACCESS
20233  *		EFAULT
20234  *		ENXIO
20235  *		EIO
20236  */
20237 
20238 static int
20239 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
20240 {
20241 	struct sd_lun		*un = NULL;
20242 	struct uscsi_cmd	com;
20243 	struct scsi_inquiry	*sinq;
20244 	struct dk_minfo		media_info;
20245 	u_longlong_t		media_capacity;
20246 	uint64_t		capacity;
20247 	uint_t			lbasize;
20248 	uchar_t			*out_data;
20249 	uchar_t			*rqbuf;
20250 	int			rval = 0;
20251 	int			rtn;
20252 
20253 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
20254 	    (un->un_state == SD_STATE_OFFLINE)) {
20255 		return (ENXIO);
20256 	}
20257 
20258 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
20259 
20260 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
20261 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
20262 
20263 	/* Issue a TUR to determine if the drive is ready with media present */
20264 	rval = sd_send_scsi_TEST_UNIT_READY(un, SD_CHECK_FOR_MEDIA);
20265 	if (rval == ENXIO) {
20266 		goto done;
20267 	}
20268 
20269 	/* Now get configuration data */
20270 	if (ISCD(un)) {
20271 		media_info.dki_media_type = DK_CDROM;
20272 
20273 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
20274 		if (un->un_f_mmc_cap == TRUE) {
20275 			rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf,
20276 				SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN,
20277 				SD_PATH_STANDARD);
20278 
20279 			if (rtn) {
20280 				/*
20281 				 * Failed for other than an illegal request
20282 				 * or command not supported
20283 				 */
20284 				if ((com.uscsi_status == STATUS_CHECK) &&
20285 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
20286 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
20287 					    (rqbuf[12] != 0x20)) {
20288 						rval = EIO;
20289 						goto done;
20290 					}
20291 				}
20292 			} else {
20293 				/*
20294 				 * The GET CONFIGURATION command succeeded
20295 				 * so set the media type according to the
20296 				 * returned data
20297 				 */
20298 				media_info.dki_media_type = out_data[6];
20299 				media_info.dki_media_type <<= 8;
20300 				media_info.dki_media_type |= out_data[7];
20301 			}
20302 		}
20303 	} else {
20304 		/*
20305 		 * The profile list is not available, so we attempt to identify
20306 		 * the media type based on the inquiry data
20307 		 */
20308 		sinq = un->un_sd->sd_inq;
20309 		if ((sinq->inq_dtype == DTYPE_DIRECT) ||
20310 		    (sinq->inq_dtype == DTYPE_OPTICAL)) {
20311 			/* This is a direct access device  or optical disk */
20312 			media_info.dki_media_type = DK_FIXED_DISK;
20313 
20314 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
20315 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
20316 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
20317 					media_info.dki_media_type = DK_ZIP;
20318 				} else if (
20319 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
20320 					media_info.dki_media_type = DK_JAZ;
20321 				}
20322 			}
20323 		} else {
20324 			/*
20325 			 * Not a CD, direct access or optical disk so return
20326 			 * unknown media
20327 			 */
20328 			media_info.dki_media_type = DK_UNKNOWN;
20329 		}
20330 	}
20331 
20332 	/* Now read the capacity so we can provide the lbasize and capacity */
20333 	switch (sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
20334 	    SD_PATH_DIRECT)) {
20335 	case 0:
20336 		break;
20337 	case EACCES:
20338 		rval = EACCES;
20339 		goto done;
20340 	default:
20341 		rval = EIO;
20342 		goto done;
20343 	}
20344 
20345 	media_info.dki_lbsize = lbasize;
20346 	media_capacity = capacity;
20347 
20348 	/*
20349 	 * sd_send_scsi_READ_CAPACITY() reports capacity in
20350 	 * un->un_sys_blocksize chunks. So we need to convert it into
20351 	 * cap.lbasize chunks.
20352 	 */
20353 	media_capacity *= un->un_sys_blocksize;
20354 	media_capacity /= lbasize;
20355 	media_info.dki_capacity = media_capacity;
20356 
20357 	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
20358 		rval = EFAULT;
20359 		/* Put goto. Anybody might add some code below in future */
20360 		goto done;
20361 	}
20362 done:
20363 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
20364 	kmem_free(rqbuf, SENSE_LENGTH);
20365 	return (rval);
20366 }
20367 
20368 
20369 /*
20370  *    Function: sd_check_media
20371  *
20372  * Description: This utility routine implements the functionality for the
20373  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
20374  *		driver state changes from that specified by the user
20375  *		(inserted or ejected). For example, if the user specifies
20376  *		DKIO_EJECTED and the current media state is inserted this
20377  *		routine will immediately return DKIO_INSERTED. However, if the
20378  *		current media state is not inserted the user thread will be
20379  *		blocked until the drive state changes. If DKIO_NONE is specified
20380  *		the user thread will block until a drive state change occurs.
20381  *
20382  *   Arguments: dev  - the device number
20383  *		state  - user pointer to a dkio_state, updated with the current
20384  *			drive state at return.
20385  *
20386  * Return Code: ENXIO
20387  *		EIO
20388  *		EAGAIN
20389  *		EINTR
20390  */
20391 
20392 static int
20393 sd_check_media(dev_t dev, enum dkio_state state)
20394 {
20395 	struct sd_lun		*un = NULL;
20396 	enum dkio_state		prev_state;
20397 	opaque_t		token = NULL;
20398 	int			rval = 0;
20399 
20400 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20401 		return (ENXIO);
20402 	}
20403 
20404 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
20405 
20406 	mutex_enter(SD_MUTEX(un));
20407 
20408 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
20409 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
20410 
20411 	prev_state = un->un_mediastate;
20412 
20413 	/* is there anything to do? */
20414 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
20415 		/*
20416 		 * submit the request to the scsi_watch service;
20417 		 * scsi_media_watch_cb() does the real work
20418 		 */
20419 		mutex_exit(SD_MUTEX(un));
20420 
20421 		/*
20422 		 * This change handles the case where a scsi watch request is
20423 		 * added to a device that is powered down. To accomplish this
20424 		 * we power up the device before adding the scsi watch request,
20425 		 * since the scsi watch sends a TUR directly to the device
20426 		 * which the device cannot handle if it is powered down.
20427 		 */
20428 		if (sd_pm_entry(un) != DDI_SUCCESS) {
20429 			mutex_enter(SD_MUTEX(un));
20430 			goto done;
20431 		}
20432 
20433 		token = scsi_watch_request_submit(SD_SCSI_DEVP(un),
20434 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
20435 		    (caddr_t)dev);
20436 
20437 		sd_pm_exit(un);
20438 
20439 		mutex_enter(SD_MUTEX(un));
20440 		if (token == NULL) {
20441 			rval = EAGAIN;
20442 			goto done;
20443 		}
20444 
20445 		/*
20446 		 * This is a special case IOCTL that doesn't return
20447 		 * until the media state changes. Routine sdpower
20448 		 * knows about and handles this so don't count it
20449 		 * as an active cmd in the driver, which would
20450 		 * keep the device busy to the pm framework.
20451 		 * If the count isn't decremented the device can't
20452 		 * be powered down.
20453 		 */
20454 		un->un_ncmds_in_driver--;
20455 		ASSERT(un->un_ncmds_in_driver >= 0);
20456 
20457 		/*
20458 		 * if a prior request had been made, this will be the same
20459 		 * token, as scsi_watch was designed that way.
20460 		 */
20461 		un->un_swr_token = token;
20462 		un->un_specified_mediastate = state;
20463 
20464 		/*
20465 		 * now wait for media change
20466 		 * we will not be signalled unless mediastate == state but it is
20467 		 * still better to test for this condition, since there is a
20468 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
20469 		 */
20470 		SD_TRACE(SD_LOG_COMMON, un,
20471 		    "sd_check_media: waiting for media state change\n");
20472 		while (un->un_mediastate == state) {
20473 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
20474 				SD_TRACE(SD_LOG_COMMON, un,
20475 				    "sd_check_media: waiting for media state "
20476 				    "was interrupted\n");
20477 				un->un_ncmds_in_driver++;
20478 				rval = EINTR;
20479 				goto done;
20480 			}
20481 			SD_TRACE(SD_LOG_COMMON, un,
20482 			    "sd_check_media: received signal, state=%x\n",
20483 			    un->un_mediastate);
20484 		}
20485 		/*
20486 		 * Inc the counter to indicate the device once again
20487 		 * has an active outstanding cmd.
20488 		 */
20489 		un->un_ncmds_in_driver++;
20490 	}
20491 
20492 	/* invalidate geometry */
20493 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
20494 		sr_ejected(un);
20495 	}
20496 
20497 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
20498 		uint64_t	capacity;
20499 		uint_t		lbasize;
20500 
20501 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
20502 		mutex_exit(SD_MUTEX(un));
20503 		/*
20504 		 * Since the following routines use SD_PATH_DIRECT, we must
20505 		 * call PM directly before the upcoming disk accesses. This
20506 		 * may cause the disk to be power/spin up.
20507 		 */
20508 
20509 		if (sd_pm_entry(un) == DDI_SUCCESS) {
20510 			rval = sd_send_scsi_READ_CAPACITY(un,
20511 			    &capacity,
20512 			    &lbasize, SD_PATH_DIRECT);
20513 			if (rval != 0) {
20514 				sd_pm_exit(un);
20515 				mutex_enter(SD_MUTEX(un));
20516 				goto done;
20517 			}
20518 		} else {
20519 			rval = EIO;
20520 			mutex_enter(SD_MUTEX(un));
20521 			goto done;
20522 		}
20523 		mutex_enter(SD_MUTEX(un));
20524 
20525 		sd_update_block_info(un, lbasize, capacity);
20526 
20527 		/*
20528 		 *  Check if the media in the device is writable or not
20529 		 */
20530 		sd_check_for_writable_cd(un, SD_PATH_DIRECT);
20531 
20532 		mutex_exit(SD_MUTEX(un));
20533 		cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
20534 		if ((cmlb_validate(un->un_cmlbhandle, 0,
20535 		    (void *)SD_PATH_DIRECT) == 0) && un->un_f_pkstats_enabled) {
20536 			sd_set_pstats(un);
20537 			SD_TRACE(SD_LOG_IO_PARTITION, un,
20538 			    "sd_check_media: un:0x%p pstats created and "
20539 			    "set\n", un);
20540 		}
20541 
20542 		rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
20543 		    SD_PATH_DIRECT);
20544 		sd_pm_exit(un);
20545 
20546 		mutex_enter(SD_MUTEX(un));
20547 	}
20548 done:
20549 	un->un_f_watcht_stopped = FALSE;
20550 	if (un->un_swr_token) {
20551 		/*
20552 		 * Use of this local token and the mutex ensures that we avoid
20553 		 * some race conditions associated with terminating the
20554 		 * scsi watch.
20555 		 */
20556 		token = un->un_swr_token;
20557 		un->un_swr_token = (opaque_t)NULL;
20558 		mutex_exit(SD_MUTEX(un));
20559 		(void) scsi_watch_request_terminate(token,
20560 		    SCSI_WATCH_TERMINATE_WAIT);
20561 		mutex_enter(SD_MUTEX(un));
20562 	}
20563 
20564 	/*
20565 	 * Update the capacity kstat value, if no media previously
20566 	 * (capacity kstat is 0) and a media has been inserted
20567 	 * (un_f_blockcount_is_valid == TRUE)
20568 	 */
20569 	if (un->un_errstats) {
20570 		struct sd_errstats	*stp = NULL;
20571 
20572 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
20573 		if ((stp->sd_capacity.value.ui64 == 0) &&
20574 		    (un->un_f_blockcount_is_valid == TRUE)) {
20575 			stp->sd_capacity.value.ui64 =
20576 			    (uint64_t)((uint64_t)un->un_blockcount *
20577 			    un->un_sys_blocksize);
20578 		}
20579 	}
20580 	mutex_exit(SD_MUTEX(un));
20581 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
20582 	return (rval);
20583 }
20584 
20585 
20586 /*
20587  *    Function: sd_delayed_cv_broadcast
20588  *
20589  * Description: Delayed cv_broadcast to allow for target to recover from media
20590  *		insertion.
20591  *
20592  *   Arguments: arg - driver soft state (unit) structure
20593  */
20594 
20595 static void
20596 sd_delayed_cv_broadcast(void *arg)
20597 {
20598 	struct sd_lun *un = arg;
20599 
20600 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
20601 
20602 	mutex_enter(SD_MUTEX(un));
20603 	un->un_dcvb_timeid = NULL;
20604 	cv_broadcast(&un->un_state_cv);
20605 	mutex_exit(SD_MUTEX(un));
20606 }
20607 
20608 
20609 /*
20610  *    Function: sd_media_watch_cb
20611  *
20612  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
20613  *		routine processes the TUR sense data and updates the driver
20614  *		state if a transition has occurred. The user thread
20615  *		(sd_check_media) is then signalled.
20616  *
20617  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
20618  *			among multiple watches that share this callback function
20619  *		resultp - scsi watch facility result packet containing scsi
20620  *			  packet, status byte and sense data
20621  *
20622  * Return Code: 0 for success, -1 for failure
20623  */
20624 
20625 static int
20626 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
20627 {
20628 	struct sd_lun			*un;
20629 	struct scsi_status		*statusp = resultp->statusp;
20630 	uint8_t				*sensep = (uint8_t *)resultp->sensep;
20631 	enum dkio_state			state = DKIO_NONE;
20632 	dev_t				dev = (dev_t)arg;
20633 	uchar_t				actual_sense_length;
20634 	uint8_t				skey, asc, ascq;
20635 
20636 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20637 		return (-1);
20638 	}
20639 	actual_sense_length = resultp->actual_sense_length;
20640 
20641 	mutex_enter(SD_MUTEX(un));
20642 	SD_TRACE(SD_LOG_COMMON, un,
20643 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
20644 	    *((char *)statusp), (void *)sensep, actual_sense_length);
20645 
20646 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
20647 		un->un_mediastate = DKIO_DEV_GONE;
20648 		cv_broadcast(&un->un_state_cv);
20649 		mutex_exit(SD_MUTEX(un));
20650 
20651 		return (0);
20652 	}
20653 
20654 	/*
20655 	 * If there was a check condition then sensep points to valid sense data
20656 	 * If status was not a check condition but a reservation or busy status
20657 	 * then the new state is DKIO_NONE
20658 	 */
20659 	if (sensep != NULL) {
20660 		skey = scsi_sense_key(sensep);
20661 		asc = scsi_sense_asc(sensep);
20662 		ascq = scsi_sense_ascq(sensep);
20663 
20664 		SD_INFO(SD_LOG_COMMON, un,
20665 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
20666 		    skey, asc, ascq);
20667 		/* This routine only uses up to 13 bytes of sense data. */
20668 		if (actual_sense_length >= 13) {
20669 			if (skey == KEY_UNIT_ATTENTION) {
20670 				if (asc == 0x28) {
20671 					state = DKIO_INSERTED;
20672 				}
20673 			} else {
20674 				/*
20675 				 * if 02/04/02  means that the host
20676 				 * should send start command. Explicitly
20677 				 * leave the media state as is
20678 				 * (inserted) as the media is inserted
20679 				 * and host has stopped device for PM
20680 				 * reasons. Upon next true read/write
20681 				 * to this media will bring the
20682 				 * device to the right state good for
20683 				 * media access.
20684 				 */
20685 				if ((skey == KEY_NOT_READY) &&
20686 				    (asc == 0x3a)) {
20687 					state = DKIO_EJECTED;
20688 				}
20689 
20690 				/*
20691 				 * If the drivge is busy with an operation
20692 				 * or long write, keep the media in an
20693 				 * inserted state.
20694 				 */
20695 
20696 				if ((skey == KEY_NOT_READY) &&
20697 				    (asc == 0x04) &&
20698 				    ((ascq == 0x02) ||
20699 				    (ascq == 0x07) ||
20700 				    (ascq == 0x08))) {
20701 					state = DKIO_INSERTED;
20702 				}
20703 			}
20704 		}
20705 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
20706 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
20707 		state = DKIO_INSERTED;
20708 	}
20709 
20710 	SD_TRACE(SD_LOG_COMMON, un,
20711 	    "sd_media_watch_cb: state=%x, specified=%x\n",
20712 	    state, un->un_specified_mediastate);
20713 
20714 	/*
20715 	 * now signal the waiting thread if this is *not* the specified state;
20716 	 * delay the signal if the state is DKIO_INSERTED to allow the target
20717 	 * to recover
20718 	 */
20719 	if (state != un->un_specified_mediastate) {
20720 		un->un_mediastate = state;
20721 		if (state == DKIO_INSERTED) {
20722 			/*
20723 			 * delay the signal to give the drive a chance
20724 			 * to do what it apparently needs to do
20725 			 */
20726 			SD_TRACE(SD_LOG_COMMON, un,
20727 			    "sd_media_watch_cb: delayed cv_broadcast\n");
20728 			if (un->un_dcvb_timeid == NULL) {
20729 				un->un_dcvb_timeid =
20730 				    timeout(sd_delayed_cv_broadcast, un,
20731 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
20732 			}
20733 		} else {
20734 			SD_TRACE(SD_LOG_COMMON, un,
20735 			    "sd_media_watch_cb: immediate cv_broadcast\n");
20736 			cv_broadcast(&un->un_state_cv);
20737 		}
20738 	}
20739 	mutex_exit(SD_MUTEX(un));
20740 	return (0);
20741 }
20742 
20743 
20744 /*
20745  *    Function: sd_dkio_get_temp
20746  *
20747  * Description: This routine is the driver entry point for handling ioctl
20748  *		requests to get the disk temperature.
20749  *
20750  *   Arguments: dev  - the device number
20751  *		arg  - pointer to user provided dk_temperature structure.
20752  *		flag - this argument is a pass through to ddi_copyxxx()
20753  *		       directly from the mode argument of ioctl().
20754  *
20755  * Return Code: 0
20756  *		EFAULT
20757  *		ENXIO
20758  *		EAGAIN
20759  */
20760 
20761 static int
20762 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
20763 {
20764 	struct sd_lun		*un = NULL;
20765 	struct dk_temperature	*dktemp = NULL;
20766 	uchar_t			*temperature_page;
20767 	int			rval = 0;
20768 	int			path_flag = SD_PATH_STANDARD;
20769 
20770 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20771 		return (ENXIO);
20772 	}
20773 
20774 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
20775 
20776 	/* copyin the disk temp argument to get the user flags */
20777 	if (ddi_copyin((void *)arg, dktemp,
20778 	    sizeof (struct dk_temperature), flag) != 0) {
20779 		rval = EFAULT;
20780 		goto done;
20781 	}
20782 
20783 	/* Initialize the temperature to invalid. */
20784 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
20785 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
20786 
20787 	/*
20788 	 * Note: Investigate removing the "bypass pm" semantic.
20789 	 * Can we just bypass PM always?
20790 	 */
20791 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
20792 		path_flag = SD_PATH_DIRECT;
20793 		ASSERT(!mutex_owned(&un->un_pm_mutex));
20794 		mutex_enter(&un->un_pm_mutex);
20795 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
20796 			/*
20797 			 * If DKT_BYPASS_PM is set, and the drive happens to be
20798 			 * in low power mode, we can not wake it up, Need to
20799 			 * return EAGAIN.
20800 			 */
20801 			mutex_exit(&un->un_pm_mutex);
20802 			rval = EAGAIN;
20803 			goto done;
20804 		} else {
20805 			/*
20806 			 * Indicate to PM the device is busy. This is required
20807 			 * to avoid a race - i.e. the ioctl is issuing a
20808 			 * command and the pm framework brings down the device
20809 			 * to low power mode (possible power cut-off on some
20810 			 * platforms).
20811 			 */
20812 			mutex_exit(&un->un_pm_mutex);
20813 			if (sd_pm_entry(un) != DDI_SUCCESS) {
20814 				rval = EAGAIN;
20815 				goto done;
20816 			}
20817 		}
20818 	}
20819 
20820 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
20821 
20822 	if ((rval = sd_send_scsi_LOG_SENSE(un, temperature_page,
20823 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag)) != 0) {
20824 		goto done2;
20825 	}
20826 
20827 	/*
20828 	 * For the current temperature verify that the parameter length is 0x02
20829 	 * and the parameter code is 0x00
20830 	 */
20831 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
20832 	    (temperature_page[5] == 0x00)) {
20833 		if (temperature_page[9] == 0xFF) {
20834 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
20835 		} else {
20836 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
20837 		}
20838 	}
20839 
20840 	/*
20841 	 * For the reference temperature verify that the parameter
20842 	 * length is 0x02 and the parameter code is 0x01
20843 	 */
20844 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
20845 	    (temperature_page[11] == 0x01)) {
20846 		if (temperature_page[15] == 0xFF) {
20847 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
20848 		} else {
20849 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
20850 		}
20851 	}
20852 
20853 	/* Do the copyout regardless of the temperature commands status. */
20854 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
20855 	    flag) != 0) {
20856 		rval = EFAULT;
20857 	}
20858 
20859 done2:
20860 	if (path_flag == SD_PATH_DIRECT) {
20861 		sd_pm_exit(un);
20862 	}
20863 
20864 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
20865 done:
20866 	if (dktemp != NULL) {
20867 		kmem_free(dktemp, sizeof (struct dk_temperature));
20868 	}
20869 
20870 	return (rval);
20871 }
20872 
20873 
20874 /*
20875  *    Function: sd_log_page_supported
20876  *
20877  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
20878  *		supported log pages.
20879  *
20880  *   Arguments: un -
20881  *		log_page -
20882  *
20883  * Return Code: -1 - on error (log sense is optional and may not be supported).
20884  *		0  - log page not found.
20885  *  		1  - log page found.
20886  */
20887 
20888 static int
20889 sd_log_page_supported(struct sd_lun *un, int log_page)
20890 {
20891 	uchar_t *log_page_data;
20892 	int	i;
20893 	int	match = 0;
20894 	int	log_size;
20895 
20896 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
20897 
20898 	if (sd_send_scsi_LOG_SENSE(un, log_page_data, 0xFF, 0, 0x01, 0,
20899 	    SD_PATH_DIRECT) != 0) {
20900 		SD_ERROR(SD_LOG_COMMON, un,
20901 		    "sd_log_page_supported: failed log page retrieval\n");
20902 		kmem_free(log_page_data, 0xFF);
20903 		return (-1);
20904 	}
20905 	log_size = log_page_data[3];
20906 
20907 	/*
20908 	 * The list of supported log pages start from the fourth byte. Check
20909 	 * until we run out of log pages or a match is found.
20910 	 */
20911 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
20912 		if (log_page_data[i] == log_page) {
20913 			match++;
20914 		}
20915 	}
20916 	kmem_free(log_page_data, 0xFF);
20917 	return (match);
20918 }
20919 
20920 
20921 /*
20922  *    Function: sd_mhdioc_failfast
20923  *
20924  * Description: This routine is the driver entry point for handling ioctl
20925  *		requests to enable/disable the multihost failfast option.
20926  *		(MHIOCENFAILFAST)
20927  *
20928  *   Arguments: dev	- the device number
20929  *		arg	- user specified probing interval.
20930  *		flag	- this argument is a pass through to ddi_copyxxx()
20931  *			  directly from the mode argument of ioctl().
20932  *
20933  * Return Code: 0
20934  *		EFAULT
20935  *		ENXIO
20936  */
20937 
20938 static int
20939 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
20940 {
20941 	struct sd_lun	*un = NULL;
20942 	int		mh_time;
20943 	int		rval = 0;
20944 
20945 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20946 		return (ENXIO);
20947 	}
20948 
20949 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
20950 		return (EFAULT);
20951 
20952 	if (mh_time) {
20953 		mutex_enter(SD_MUTEX(un));
20954 		un->un_resvd_status |= SD_FAILFAST;
20955 		mutex_exit(SD_MUTEX(un));
20956 		/*
20957 		 * If mh_time is INT_MAX, then this ioctl is being used for
20958 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
20959 		 */
20960 		if (mh_time != INT_MAX) {
20961 			rval = sd_check_mhd(dev, mh_time);
20962 		}
20963 	} else {
20964 		(void) sd_check_mhd(dev, 0);
20965 		mutex_enter(SD_MUTEX(un));
20966 		un->un_resvd_status &= ~SD_FAILFAST;
20967 		mutex_exit(SD_MUTEX(un));
20968 	}
20969 	return (rval);
20970 }
20971 
20972 
20973 /*
20974  *    Function: sd_mhdioc_takeown
20975  *
20976  * Description: This routine is the driver entry point for handling ioctl
20977  *		requests to forcefully acquire exclusive access rights to the
20978  *		multihost disk (MHIOCTKOWN).
20979  *
20980  *   Arguments: dev	- the device number
20981  *		arg	- user provided structure specifying the delay
20982  *			  parameters in milliseconds
20983  *		flag	- this argument is a pass through to ddi_copyxxx()
20984  *			  directly from the mode argument of ioctl().
20985  *
20986  * Return Code: 0
20987  *		EFAULT
20988  *		ENXIO
20989  */
20990 
20991 static int
20992 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
20993 {
20994 	struct sd_lun		*un = NULL;
20995 	struct mhioctkown	*tkown = NULL;
20996 	int			rval = 0;
20997 
20998 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20999 		return (ENXIO);
21000 	}
21001 
21002 	if (arg != NULL) {
21003 		tkown = (struct mhioctkown *)
21004 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
21005 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
21006 		if (rval != 0) {
21007 			rval = EFAULT;
21008 			goto error;
21009 		}
21010 	}
21011 
21012 	rval = sd_take_ownership(dev, tkown);
21013 	mutex_enter(SD_MUTEX(un));
21014 	if (rval == 0) {
21015 		un->un_resvd_status |= SD_RESERVE;
21016 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
21017 			sd_reinstate_resv_delay =
21018 			    tkown->reinstate_resv_delay * 1000;
21019 		} else {
21020 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
21021 		}
21022 		/*
21023 		 * Give the scsi_watch routine interval set by
21024 		 * the MHIOCENFAILFAST ioctl precedence here.
21025 		 */
21026 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
21027 			mutex_exit(SD_MUTEX(un));
21028 			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
21029 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
21030 			    "sd_mhdioc_takeown : %d\n",
21031 			    sd_reinstate_resv_delay);
21032 		} else {
21033 			mutex_exit(SD_MUTEX(un));
21034 		}
21035 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
21036 		    sd_mhd_reset_notify_cb, (caddr_t)un);
21037 	} else {
21038 		un->un_resvd_status &= ~SD_RESERVE;
21039 		mutex_exit(SD_MUTEX(un));
21040 	}
21041 
21042 error:
21043 	if (tkown != NULL) {
21044 		kmem_free(tkown, sizeof (struct mhioctkown));
21045 	}
21046 	return (rval);
21047 }
21048 
21049 
21050 /*
21051  *    Function: sd_mhdioc_release
21052  *
21053  * Description: This routine is the driver entry point for handling ioctl
21054  *		requests to release exclusive access rights to the multihost
21055  *		disk (MHIOCRELEASE).
21056  *
21057  *   Arguments: dev	- the device number
21058  *
21059  * Return Code: 0
21060  *		ENXIO
21061  */
21062 
21063 static int
21064 sd_mhdioc_release(dev_t dev)
21065 {
21066 	struct sd_lun		*un = NULL;
21067 	timeout_id_t		resvd_timeid_save;
21068 	int			resvd_status_save;
21069 	int			rval = 0;
21070 
21071 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21072 		return (ENXIO);
21073 	}
21074 
21075 	mutex_enter(SD_MUTEX(un));
21076 	resvd_status_save = un->un_resvd_status;
21077 	un->un_resvd_status &=
21078 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
21079 	if (un->un_resvd_timeid) {
21080 		resvd_timeid_save = un->un_resvd_timeid;
21081 		un->un_resvd_timeid = NULL;
21082 		mutex_exit(SD_MUTEX(un));
21083 		(void) untimeout(resvd_timeid_save);
21084 	} else {
21085 		mutex_exit(SD_MUTEX(un));
21086 	}
21087 
21088 	/*
21089 	 * destroy any pending timeout thread that may be attempting to
21090 	 * reinstate reservation on this device.
21091 	 */
21092 	sd_rmv_resv_reclaim_req(dev);
21093 
21094 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
21095 		mutex_enter(SD_MUTEX(un));
21096 		if ((un->un_mhd_token) &&
21097 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
21098 			mutex_exit(SD_MUTEX(un));
21099 			(void) sd_check_mhd(dev, 0);
21100 		} else {
21101 			mutex_exit(SD_MUTEX(un));
21102 		}
21103 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
21104 		    sd_mhd_reset_notify_cb, (caddr_t)un);
21105 	} else {
21106 		/*
21107 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
21108 		 */
21109 		mutex_enter(SD_MUTEX(un));
21110 		un->un_resvd_status = resvd_status_save;
21111 		mutex_exit(SD_MUTEX(un));
21112 	}
21113 	return (rval);
21114 }
21115 
21116 
21117 /*
21118  *    Function: sd_mhdioc_register_devid
21119  *
21120  * Description: This routine is the driver entry point for handling ioctl
21121  *		requests to register the device id (MHIOCREREGISTERDEVID).
21122  *
21123  *		Note: The implementation for this ioctl has been updated to
21124  *		be consistent with the original PSARC case (1999/357)
21125  *		(4375899, 4241671, 4220005)
21126  *
21127  *   Arguments: dev	- the device number
21128  *
21129  * Return Code: 0
21130  *		ENXIO
21131  */
21132 
21133 static int
21134 sd_mhdioc_register_devid(dev_t dev)
21135 {
21136 	struct sd_lun	*un = NULL;
21137 	int		rval = 0;
21138 
21139 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21140 		return (ENXIO);
21141 	}
21142 
21143 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21144 
21145 	mutex_enter(SD_MUTEX(un));
21146 
21147 	/* If a devid already exists, de-register it */
21148 	if (un->un_devid != NULL) {
21149 		ddi_devid_unregister(SD_DEVINFO(un));
21150 		/*
21151 		 * After unregister devid, needs to free devid memory
21152 		 */
21153 		ddi_devid_free(un->un_devid);
21154 		un->un_devid = NULL;
21155 	}
21156 
21157 	/* Check for reservation conflict */
21158 	mutex_exit(SD_MUTEX(un));
21159 	rval = sd_send_scsi_TEST_UNIT_READY(un, 0);
21160 	mutex_enter(SD_MUTEX(un));
21161 
21162 	switch (rval) {
21163 	case 0:
21164 		sd_register_devid(un, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
21165 		break;
21166 	case EACCES:
21167 		break;
21168 	default:
21169 		rval = EIO;
21170 	}
21171 
21172 	mutex_exit(SD_MUTEX(un));
21173 	return (rval);
21174 }
21175 
21176 
21177 /*
21178  *    Function: sd_mhdioc_inkeys
21179  *
21180  * Description: This routine is the driver entry point for handling ioctl
21181  *		requests to issue the SCSI-3 Persistent In Read Keys command
21182  *		to the device (MHIOCGRP_INKEYS).
21183  *
21184  *   Arguments: dev	- the device number
21185  *		arg	- user provided in_keys structure
21186  *		flag	- this argument is a pass through to ddi_copyxxx()
21187  *			  directly from the mode argument of ioctl().
21188  *
21189  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
21190  *		ENXIO
21191  *		EFAULT
21192  */
21193 
21194 static int
21195 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
21196 {
21197 	struct sd_lun		*un;
21198 	mhioc_inkeys_t		inkeys;
21199 	int			rval = 0;
21200 
21201 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21202 		return (ENXIO);
21203 	}
21204 
21205 #ifdef _MULTI_DATAMODEL
21206 	switch (ddi_model_convert_from(flag & FMODELS)) {
21207 	case DDI_MODEL_ILP32: {
21208 		struct mhioc_inkeys32	inkeys32;
21209 
21210 		if (ddi_copyin(arg, &inkeys32,
21211 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
21212 			return (EFAULT);
21213 		}
21214 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
21215 		if ((rval = sd_persistent_reservation_in_read_keys(un,
21216 		    &inkeys, flag)) != 0) {
21217 			return (rval);
21218 		}
21219 		inkeys32.generation = inkeys.generation;
21220 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
21221 		    flag) != 0) {
21222 			return (EFAULT);
21223 		}
21224 		break;
21225 	}
21226 	case DDI_MODEL_NONE:
21227 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
21228 		    flag) != 0) {
21229 			return (EFAULT);
21230 		}
21231 		if ((rval = sd_persistent_reservation_in_read_keys(un,
21232 		    &inkeys, flag)) != 0) {
21233 			return (rval);
21234 		}
21235 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
21236 		    flag) != 0) {
21237 			return (EFAULT);
21238 		}
21239 		break;
21240 	}
21241 
21242 #else /* ! _MULTI_DATAMODEL */
21243 
21244 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
21245 		return (EFAULT);
21246 	}
21247 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
21248 	if (rval != 0) {
21249 		return (rval);
21250 	}
21251 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
21252 		return (EFAULT);
21253 	}
21254 
21255 #endif /* _MULTI_DATAMODEL */
21256 
21257 	return (rval);
21258 }
21259 
21260 
21261 /*
21262  *    Function: sd_mhdioc_inresv
21263  *
21264  * Description: This routine is the driver entry point for handling ioctl
21265  *		requests to issue the SCSI-3 Persistent In Read Reservations
21266  *		command to the device (MHIOCGRP_INKEYS).
21267  *
21268  *   Arguments: dev	- the device number
21269  *		arg	- user provided in_resv structure
21270  *		flag	- this argument is a pass through to ddi_copyxxx()
21271  *			  directly from the mode argument of ioctl().
21272  *
21273  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
21274  *		ENXIO
21275  *		EFAULT
21276  */
21277 
21278 static int
21279 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
21280 {
21281 	struct sd_lun		*un;
21282 	mhioc_inresvs_t		inresvs;
21283 	int			rval = 0;
21284 
21285 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21286 		return (ENXIO);
21287 	}
21288 
21289 #ifdef _MULTI_DATAMODEL
21290 
21291 	switch (ddi_model_convert_from(flag & FMODELS)) {
21292 	case DDI_MODEL_ILP32: {
21293 		struct mhioc_inresvs32	inresvs32;
21294 
21295 		if (ddi_copyin(arg, &inresvs32,
21296 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
21297 			return (EFAULT);
21298 		}
21299 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
21300 		if ((rval = sd_persistent_reservation_in_read_resv(un,
21301 		    &inresvs, flag)) != 0) {
21302 			return (rval);
21303 		}
21304 		inresvs32.generation = inresvs.generation;
21305 		if (ddi_copyout(&inresvs32, arg,
21306 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
21307 			return (EFAULT);
21308 		}
21309 		break;
21310 	}
21311 	case DDI_MODEL_NONE:
21312 		if (ddi_copyin(arg, &inresvs,
21313 		    sizeof (mhioc_inresvs_t), flag) != 0) {
21314 			return (EFAULT);
21315 		}
21316 		if ((rval = sd_persistent_reservation_in_read_resv(un,
21317 		    &inresvs, flag)) != 0) {
21318 			return (rval);
21319 		}
21320 		if (ddi_copyout(&inresvs, arg,
21321 		    sizeof (mhioc_inresvs_t), flag) != 0) {
21322 			return (EFAULT);
21323 		}
21324 		break;
21325 	}
21326 
21327 #else /* ! _MULTI_DATAMODEL */
21328 
21329 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
21330 		return (EFAULT);
21331 	}
21332 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
21333 	if (rval != 0) {
21334 		return (rval);
21335 	}
21336 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
21337 		return (EFAULT);
21338 	}
21339 
21340 #endif /* ! _MULTI_DATAMODEL */
21341 
21342 	return (rval);
21343 }
21344 
21345 
21346 /*
21347  * The following routines support the clustering functionality described below
21348  * and implement lost reservation reclaim functionality.
21349  *
21350  * Clustering
21351  * ----------
21352  * The clustering code uses two different, independent forms of SCSI
21353  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
21354  * Persistent Group Reservations. For any particular disk, it will use either
21355  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
21356  *
21357  * SCSI-2
21358  * The cluster software takes ownership of a multi-hosted disk by issuing the
21359  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
21360  * MHIOCRELEASE ioctl.Closely related is the MHIOCENFAILFAST ioctl -- a cluster,
21361  * just after taking ownership of the disk with the MHIOCTKOWN ioctl then issues
21362  * the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the driver. The
21363  * meaning of failfast is that if the driver (on this host) ever encounters the
21364  * scsi error return code RESERVATION_CONFLICT from the device, it should
21365  * immediately panic the host. The motivation for this ioctl is that if this
21366  * host does encounter reservation conflict, the underlying cause is that some
21367  * other host of the cluster has decided that this host is no longer in the
21368  * cluster and has seized control of the disks for itself. Since this host is no
21369  * longer in the cluster, it ought to panic itself. The MHIOCENFAILFAST ioctl
21370  * does two things:
21371  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
21372  *      error to panic the host
21373  *      (b) it sets up a periodic timer to test whether this host still has
21374  *      "access" (in that no other host has reserved the device):  if the
21375  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
21376  *      purpose of that periodic timer is to handle scenarios where the host is
21377  *      otherwise temporarily quiescent, temporarily doing no real i/o.
21378  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
21379  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
21380  * the device itself.
21381  *
21382  * SCSI-3 PGR
21383  * A direct semantic implementation of the SCSI-3 Persistent Reservation
21384  * facility is supported through the shared multihost disk ioctls
21385  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
21386  * MHIOCGRP_PREEMPTANDABORT)
21387  *
21388  * Reservation Reclaim:
21389  * --------------------
21390  * To support the lost reservation reclaim operations this driver creates a
21391  * single thread to handle reinstating reservations on all devices that have
21392  * lost reservations sd_resv_reclaim_requests are logged for all devices that
21393  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
21394  * and the reservation reclaim thread loops through the requests to regain the
21395  * lost reservations.
21396  */
21397 
21398 /*
21399  *    Function: sd_check_mhd()
21400  *
21401  * Description: This function sets up and submits a scsi watch request or
21402  *		terminates an existing watch request. This routine is used in
21403  *		support of reservation reclaim.
21404  *
21405  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
21406  *			 among multiple watches that share the callback function
21407  *		interval - the number of microseconds specifying the watch
21408  *			   interval for issuing TEST UNIT READY commands. If
21409  *			   set to 0 the watch should be terminated. If the
21410  *			   interval is set to 0 and if the device is required
21411  *			   to hold reservation while disabling failfast, the
21412  *			   watch is restarted with an interval of
21413  *			   reinstate_resv_delay.
21414  *
21415  * Return Code: 0	   - Successful submit/terminate of scsi watch request
21416  *		ENXIO      - Indicates an invalid device was specified
21417  *		EAGAIN     - Unable to submit the scsi watch request
21418  */
21419 
21420 static int
21421 sd_check_mhd(dev_t dev, int interval)
21422 {
21423 	struct sd_lun	*un;
21424 	opaque_t	token;
21425 
21426 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21427 		return (ENXIO);
21428 	}
21429 
21430 	/* is this a watch termination request? */
21431 	if (interval == 0) {
21432 		mutex_enter(SD_MUTEX(un));
21433 		/* if there is an existing watch task then terminate it */
21434 		if (un->un_mhd_token) {
21435 			token = un->un_mhd_token;
21436 			un->un_mhd_token = NULL;
21437 			mutex_exit(SD_MUTEX(un));
21438 			(void) scsi_watch_request_terminate(token,
21439 			    SCSI_WATCH_TERMINATE_WAIT);
21440 			mutex_enter(SD_MUTEX(un));
21441 		} else {
21442 			mutex_exit(SD_MUTEX(un));
21443 			/*
21444 			 * Note: If we return here we don't check for the
21445 			 * failfast case. This is the original legacy
21446 			 * implementation but perhaps we should be checking
21447 			 * the failfast case.
21448 			 */
21449 			return (0);
21450 		}
21451 		/*
21452 		 * If the device is required to hold reservation while
21453 		 * disabling failfast, we need to restart the scsi_watch
21454 		 * routine with an interval of reinstate_resv_delay.
21455 		 */
21456 		if (un->un_resvd_status & SD_RESERVE) {
21457 			interval = sd_reinstate_resv_delay/1000;
21458 		} else {
21459 			/* no failfast so bail */
21460 			mutex_exit(SD_MUTEX(un));
21461 			return (0);
21462 		}
21463 		mutex_exit(SD_MUTEX(un));
21464 	}
21465 
21466 	/*
21467 	 * adjust minimum time interval to 1 second,
21468 	 * and convert from msecs to usecs
21469 	 */
21470 	if (interval > 0 && interval < 1000) {
21471 		interval = 1000;
21472 	}
21473 	interval *= 1000;
21474 
21475 	/*
21476 	 * submit the request to the scsi_watch service
21477 	 */
21478 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
21479 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
21480 	if (token == NULL) {
21481 		return (EAGAIN);
21482 	}
21483 
21484 	/*
21485 	 * save token for termination later on
21486 	 */
21487 	mutex_enter(SD_MUTEX(un));
21488 	un->un_mhd_token = token;
21489 	mutex_exit(SD_MUTEX(un));
21490 	return (0);
21491 }
21492 
21493 
21494 /*
21495  *    Function: sd_mhd_watch_cb()
21496  *
21497  * Description: This function is the call back function used by the scsi watch
21498  *		facility. The scsi watch facility sends the "Test Unit Ready"
21499  *		and processes the status. If applicable (i.e. a "Unit Attention"
21500  *		status and automatic "Request Sense" not used) the scsi watch
21501  *		facility will send a "Request Sense" and retrieve the sense data
21502  *		to be passed to this callback function. In either case the
21503  *		automatic "Request Sense" or the facility submitting one, this
21504  *		callback is passed the status and sense data.
21505  *
21506  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
21507  *			among multiple watches that share this callback function
21508  *		resultp - scsi watch facility result packet containing scsi
21509  *			  packet, status byte and sense data
21510  *
21511  * Return Code: 0 - continue the watch task
21512  *		non-zero - terminate the watch task
21513  */
21514 
21515 static int
21516 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
21517 {
21518 	struct sd_lun			*un;
21519 	struct scsi_status		*statusp;
21520 	uint8_t				*sensep;
21521 	struct scsi_pkt			*pkt;
21522 	uchar_t				actual_sense_length;
21523 	dev_t  				dev = (dev_t)arg;
21524 
21525 	ASSERT(resultp != NULL);
21526 	statusp			= resultp->statusp;
21527 	sensep			= (uint8_t *)resultp->sensep;
21528 	pkt			= resultp->pkt;
21529 	actual_sense_length	= resultp->actual_sense_length;
21530 
21531 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21532 		return (ENXIO);
21533 	}
21534 
21535 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
21536 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
21537 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
21538 
21539 	/* Begin processing of the status and/or sense data */
21540 	if (pkt->pkt_reason != CMD_CMPLT) {
21541 		/* Handle the incomplete packet */
21542 		sd_mhd_watch_incomplete(un, pkt);
21543 		return (0);
21544 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
21545 		if (*((unsigned char *)statusp)
21546 		    == STATUS_RESERVATION_CONFLICT) {
21547 			/*
21548 			 * Handle a reservation conflict by panicking if
21549 			 * configured for failfast or by logging the conflict
21550 			 * and updating the reservation status
21551 			 */
21552 			mutex_enter(SD_MUTEX(un));
21553 			if ((un->un_resvd_status & SD_FAILFAST) &&
21554 			    (sd_failfast_enable)) {
21555 				sd_panic_for_res_conflict(un);
21556 				/*NOTREACHED*/
21557 			}
21558 			SD_INFO(SD_LOG_IOCTL_MHD, un,
21559 			    "sd_mhd_watch_cb: Reservation Conflict\n");
21560 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
21561 			mutex_exit(SD_MUTEX(un));
21562 		}
21563 	}
21564 
21565 	if (sensep != NULL) {
21566 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
21567 			mutex_enter(SD_MUTEX(un));
21568 			if ((scsi_sense_asc(sensep) ==
21569 			    SD_SCSI_RESET_SENSE_CODE) &&
21570 			    (un->un_resvd_status & SD_RESERVE)) {
21571 				/*
21572 				 * The additional sense code indicates a power
21573 				 * on or bus device reset has occurred; update
21574 				 * the reservation status.
21575 				 */
21576 				un->un_resvd_status |=
21577 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
21578 				SD_INFO(SD_LOG_IOCTL_MHD, un,
21579 				    "sd_mhd_watch_cb: Lost Reservation\n");
21580 			}
21581 		} else {
21582 			return (0);
21583 		}
21584 	} else {
21585 		mutex_enter(SD_MUTEX(un));
21586 	}
21587 
21588 	if ((un->un_resvd_status & SD_RESERVE) &&
21589 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
21590 		if (un->un_resvd_status & SD_WANT_RESERVE) {
21591 			/*
21592 			 * A reset occurred in between the last probe and this
21593 			 * one so if a timeout is pending cancel it.
21594 			 */
21595 			if (un->un_resvd_timeid) {
21596 				timeout_id_t temp_id = un->un_resvd_timeid;
21597 				un->un_resvd_timeid = NULL;
21598 				mutex_exit(SD_MUTEX(un));
21599 				(void) untimeout(temp_id);
21600 				mutex_enter(SD_MUTEX(un));
21601 			}
21602 			un->un_resvd_status &= ~SD_WANT_RESERVE;
21603 		}
21604 		if (un->un_resvd_timeid == 0) {
21605 			/* Schedule a timeout to handle the lost reservation */
21606 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
21607 			    (void *)dev,
21608 			    drv_usectohz(sd_reinstate_resv_delay));
21609 		}
21610 	}
21611 	mutex_exit(SD_MUTEX(un));
21612 	return (0);
21613 }
21614 
21615 
21616 /*
21617  *    Function: sd_mhd_watch_incomplete()
21618  *
21619  * Description: This function is used to find out why a scsi pkt sent by the
21620  *		scsi watch facility was not completed. Under some scenarios this
21621  *		routine will return. Otherwise it will send a bus reset to see
21622  *		if the drive is still online.
21623  *
21624  *   Arguments: un  - driver soft state (unit) structure
21625  *		pkt - incomplete scsi pkt
21626  */
21627 
21628 static void
21629 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
21630 {
21631 	int	be_chatty;
21632 	int	perr;
21633 
21634 	ASSERT(pkt != NULL);
21635 	ASSERT(un != NULL);
21636 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
21637 	perr		= (pkt->pkt_statistics & STAT_PERR);
21638 
21639 	mutex_enter(SD_MUTEX(un));
21640 	if (un->un_state == SD_STATE_DUMPING) {
21641 		mutex_exit(SD_MUTEX(un));
21642 		return;
21643 	}
21644 
21645 	switch (pkt->pkt_reason) {
21646 	case CMD_UNX_BUS_FREE:
21647 		/*
21648 		 * If we had a parity error that caused the target to drop BSY*,
21649 		 * don't be chatty about it.
21650 		 */
21651 		if (perr && be_chatty) {
21652 			be_chatty = 0;
21653 		}
21654 		break;
21655 	case CMD_TAG_REJECT:
21656 		/*
21657 		 * The SCSI-2 spec states that a tag reject will be sent by the
21658 		 * target if tagged queuing is not supported. A tag reject may
21659 		 * also be sent during certain initialization periods or to
21660 		 * control internal resources. For the latter case the target
21661 		 * may also return Queue Full.
21662 		 *
21663 		 * If this driver receives a tag reject from a target that is
21664 		 * going through an init period or controlling internal
21665 		 * resources tagged queuing will be disabled. This is a less
21666 		 * than optimal behavior but the driver is unable to determine
21667 		 * the target state and assumes tagged queueing is not supported
21668 		 */
21669 		pkt->pkt_flags = 0;
21670 		un->un_tagflags = 0;
21671 
21672 		if (un->un_f_opt_queueing == TRUE) {
21673 			un->un_throttle = min(un->un_throttle, 3);
21674 		} else {
21675 			un->un_throttle = 1;
21676 		}
21677 		mutex_exit(SD_MUTEX(un));
21678 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
21679 		mutex_enter(SD_MUTEX(un));
21680 		break;
21681 	case CMD_INCOMPLETE:
21682 		/*
21683 		 * The transport stopped with an abnormal state, fallthrough and
21684 		 * reset the target and/or bus unless selection did not complete
21685 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
21686 		 * go through a target/bus reset
21687 		 */
21688 		if (pkt->pkt_state == STATE_GOT_BUS) {
21689 			break;
21690 		}
21691 		/*FALLTHROUGH*/
21692 
21693 	case CMD_TIMEOUT:
21694 	default:
21695 		/*
21696 		 * The lun may still be running the command, so a lun reset
21697 		 * should be attempted. If the lun reset fails or cannot be
21698 		 * issued, than try a target reset. Lastly try a bus reset.
21699 		 */
21700 		if ((pkt->pkt_statistics &
21701 		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
21702 			int reset_retval = 0;
21703 			mutex_exit(SD_MUTEX(un));
21704 			if (un->un_f_allow_bus_device_reset == TRUE) {
21705 				if (un->un_f_lun_reset_enabled == TRUE) {
21706 					reset_retval =
21707 					    scsi_reset(SD_ADDRESS(un),
21708 					    RESET_LUN);
21709 				}
21710 				if (reset_retval == 0) {
21711 					reset_retval =
21712 					    scsi_reset(SD_ADDRESS(un),
21713 					    RESET_TARGET);
21714 				}
21715 			}
21716 			if (reset_retval == 0) {
21717 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
21718 			}
21719 			mutex_enter(SD_MUTEX(un));
21720 		}
21721 		break;
21722 	}
21723 
21724 	/* A device/bus reset has occurred; update the reservation status. */
21725 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
21726 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
21727 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
21728 			un->un_resvd_status |=
21729 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
21730 			SD_INFO(SD_LOG_IOCTL_MHD, un,
21731 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
21732 		}
21733 	}
21734 
21735 	/*
21736 	 * The disk has been turned off; Update the device state.
21737 	 *
21738 	 * Note: Should we be offlining the disk here?
21739 	 */
21740 	if (pkt->pkt_state == STATE_GOT_BUS) {
21741 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
21742 		    "Disk not responding to selection\n");
21743 		if (un->un_state != SD_STATE_OFFLINE) {
21744 			New_state(un, SD_STATE_OFFLINE);
21745 		}
21746 	} else if (be_chatty) {
21747 		/*
21748 		 * suppress messages if they are all the same pkt reason;
21749 		 * with TQ, many (up to 256) are returned with the same
21750 		 * pkt_reason
21751 		 */
21752 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
21753 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
21754 			    "sd_mhd_watch_incomplete: "
21755 			    "SCSI transport failed: reason '%s'\n",
21756 			    scsi_rname(pkt->pkt_reason));
21757 		}
21758 	}
21759 	un->un_last_pkt_reason = pkt->pkt_reason;
21760 	mutex_exit(SD_MUTEX(un));
21761 }
21762 
21763 
21764 /*
21765  *    Function: sd_sname()
21766  *
21767  * Description: This is a simple little routine to return a string containing
21768  *		a printable description of command status byte for use in
21769  *		logging.
21770  *
21771  *   Arguments: status - pointer to a status byte
21772  *
21773  * Return Code: char * - string containing status description.
21774  */
21775 
21776 static char *
21777 sd_sname(uchar_t status)
21778 {
21779 	switch (status & STATUS_MASK) {
21780 	case STATUS_GOOD:
21781 		return ("good status");
21782 	case STATUS_CHECK:
21783 		return ("check condition");
21784 	case STATUS_MET:
21785 		return ("condition met");
21786 	case STATUS_BUSY:
21787 		return ("busy");
21788 	case STATUS_INTERMEDIATE:
21789 		return ("intermediate");
21790 	case STATUS_INTERMEDIATE_MET:
21791 		return ("intermediate - condition met");
21792 	case STATUS_RESERVATION_CONFLICT:
21793 		return ("reservation_conflict");
21794 	case STATUS_TERMINATED:
21795 		return ("command terminated");
21796 	case STATUS_QFULL:
21797 		return ("queue full");
21798 	default:
21799 		return ("<unknown status>");
21800 	}
21801 }
21802 
21803 
21804 /*
21805  *    Function: sd_mhd_resvd_recover()
21806  *
21807  * Description: This function adds a reservation entry to the
21808  *		sd_resv_reclaim_request list and signals the reservation
21809  *		reclaim thread that there is work pending. If the reservation
21810  *		reclaim thread has not been previously created this function
21811  *		will kick it off.
21812  *
21813  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
21814  *			among multiple watches that share this callback function
21815  *
21816  *     Context: This routine is called by timeout() and is run in interrupt
21817  *		context. It must not sleep or call other functions which may
21818  *		sleep.
21819  */
21820 
21821 static void
21822 sd_mhd_resvd_recover(void *arg)
21823 {
21824 	dev_t			dev = (dev_t)arg;
21825 	struct sd_lun		*un;
21826 	struct sd_thr_request	*sd_treq = NULL;
21827 	struct sd_thr_request	*sd_cur = NULL;
21828 	struct sd_thr_request	*sd_prev = NULL;
21829 	int			already_there = 0;
21830 
21831 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21832 		return;
21833 	}
21834 
21835 	mutex_enter(SD_MUTEX(un));
21836 	un->un_resvd_timeid = NULL;
21837 	if (un->un_resvd_status & SD_WANT_RESERVE) {
21838 		/*
21839 		 * There was a reset so don't issue the reserve, allow the
21840 		 * sd_mhd_watch_cb callback function to notice this and
21841 		 * reschedule the timeout for reservation.
21842 		 */
21843 		mutex_exit(SD_MUTEX(un));
21844 		return;
21845 	}
21846 	mutex_exit(SD_MUTEX(un));
21847 
21848 	/*
21849 	 * Add this device to the sd_resv_reclaim_request list and the
21850 	 * sd_resv_reclaim_thread should take care of the rest.
21851 	 *
21852 	 * Note: We can't sleep in this context so if the memory allocation
21853 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
21854 	 * reschedule the timeout for reservation.  (4378460)
21855 	 */
21856 	sd_treq = (struct sd_thr_request *)
21857 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
21858 	if (sd_treq == NULL) {
21859 		return;
21860 	}
21861 
21862 	sd_treq->sd_thr_req_next = NULL;
21863 	sd_treq->dev = dev;
21864 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
21865 	if (sd_tr.srq_thr_req_head == NULL) {
21866 		sd_tr.srq_thr_req_head = sd_treq;
21867 	} else {
21868 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
21869 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
21870 			if (sd_cur->dev == dev) {
21871 				/*
21872 				 * already in Queue so don't log
21873 				 * another request for the device
21874 				 */
21875 				already_there = 1;
21876 				break;
21877 			}
21878 			sd_prev = sd_cur;
21879 		}
21880 		if (!already_there) {
21881 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
21882 			    "logging request for %lx\n", dev);
21883 			sd_prev->sd_thr_req_next = sd_treq;
21884 		} else {
21885 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
21886 		}
21887 	}
21888 
21889 	/*
21890 	 * Create a kernel thread to do the reservation reclaim and free up this
21891 	 * thread. We cannot block this thread while we go away to do the
21892 	 * reservation reclaim
21893 	 */
21894 	if (sd_tr.srq_resv_reclaim_thread == NULL)
21895 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
21896 		    sd_resv_reclaim_thread, NULL,
21897 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
21898 
21899 	/* Tell the reservation reclaim thread that it has work to do */
21900 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
21901 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
21902 }
21903 
21904 /*
21905  *    Function: sd_resv_reclaim_thread()
21906  *
21907  * Description: This function implements the reservation reclaim operations
21908  *
21909  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
21910  *		      among multiple watches that share this callback function
21911  */
21912 
21913 static void
21914 sd_resv_reclaim_thread()
21915 {
21916 	struct sd_lun		*un;
21917 	struct sd_thr_request	*sd_mhreq;
21918 
21919 	/* Wait for work */
21920 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
21921 	if (sd_tr.srq_thr_req_head == NULL) {
21922 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
21923 		    &sd_tr.srq_resv_reclaim_mutex);
21924 	}
21925 
21926 	/* Loop while we have work */
21927 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
21928 		un = ddi_get_soft_state(sd_state,
21929 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
21930 		if (un == NULL) {
21931 			/*
21932 			 * softstate structure is NULL so just
21933 			 * dequeue the request and continue
21934 			 */
21935 			sd_tr.srq_thr_req_head =
21936 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
21937 			kmem_free(sd_tr.srq_thr_cur_req,
21938 			    sizeof (struct sd_thr_request));
21939 			continue;
21940 		}
21941 
21942 		/* dequeue the request */
21943 		sd_mhreq = sd_tr.srq_thr_cur_req;
21944 		sd_tr.srq_thr_req_head =
21945 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
21946 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
21947 
21948 		/*
21949 		 * Reclaim reservation only if SD_RESERVE is still set. There
21950 		 * may have been a call to MHIOCRELEASE before we got here.
21951 		 */
21952 		mutex_enter(SD_MUTEX(un));
21953 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
21954 			/*
21955 			 * Note: The SD_LOST_RESERVE flag is cleared before
21956 			 * reclaiming the reservation. If this is done after the
21957 			 * call to sd_reserve_release a reservation loss in the
21958 			 * window between pkt completion of reserve cmd and
21959 			 * mutex_enter below may not be recognized
21960 			 */
21961 			un->un_resvd_status &= ~SD_LOST_RESERVE;
21962 			mutex_exit(SD_MUTEX(un));
21963 
21964 			if (sd_reserve_release(sd_mhreq->dev,
21965 			    SD_RESERVE) == 0) {
21966 				mutex_enter(SD_MUTEX(un));
21967 				un->un_resvd_status |= SD_RESERVE;
21968 				mutex_exit(SD_MUTEX(un));
21969 				SD_INFO(SD_LOG_IOCTL_MHD, un,
21970 				    "sd_resv_reclaim_thread: "
21971 				    "Reservation Recovered\n");
21972 			} else {
21973 				mutex_enter(SD_MUTEX(un));
21974 				un->un_resvd_status |= SD_LOST_RESERVE;
21975 				mutex_exit(SD_MUTEX(un));
21976 				SD_INFO(SD_LOG_IOCTL_MHD, un,
21977 				    "sd_resv_reclaim_thread: Failed "
21978 				    "Reservation Recovery\n");
21979 			}
21980 		} else {
21981 			mutex_exit(SD_MUTEX(un));
21982 		}
21983 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
21984 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
21985 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
21986 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
21987 		/*
21988 		 * wakeup the destroy thread if anyone is waiting on
21989 		 * us to complete.
21990 		 */
21991 		cv_signal(&sd_tr.srq_inprocess_cv);
21992 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
21993 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
21994 	}
21995 
21996 	/*
21997 	 * cleanup the sd_tr structure now that this thread will not exist
21998 	 */
21999 	ASSERT(sd_tr.srq_thr_req_head == NULL);
22000 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
22001 	sd_tr.srq_resv_reclaim_thread = NULL;
22002 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
22003 	thread_exit();
22004 }
22005 
22006 
22007 /*
22008  *    Function: sd_rmv_resv_reclaim_req()
22009  *
22010  * Description: This function removes any pending reservation reclaim requests
22011  *		for the specified device.
22012  *
22013  *   Arguments: dev - the device 'dev_t'
22014  */
22015 
22016 static void
22017 sd_rmv_resv_reclaim_req(dev_t dev)
22018 {
22019 	struct sd_thr_request *sd_mhreq;
22020 	struct sd_thr_request *sd_prev;
22021 
22022 	/* Remove a reservation reclaim request from the list */
22023 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
22024 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
22025 		/*
22026 		 * We are attempting to reinstate reservation for
22027 		 * this device. We wait for sd_reserve_release()
22028 		 * to return before we return.
22029 		 */
22030 		cv_wait(&sd_tr.srq_inprocess_cv,
22031 		    &sd_tr.srq_resv_reclaim_mutex);
22032 	} else {
22033 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
22034 		if (sd_mhreq && sd_mhreq->dev == dev) {
22035 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
22036 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
22037 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
22038 			return;
22039 		}
22040 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
22041 			if (sd_mhreq && sd_mhreq->dev == dev) {
22042 				break;
22043 			}
22044 			sd_prev = sd_mhreq;
22045 		}
22046 		if (sd_mhreq != NULL) {
22047 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
22048 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
22049 		}
22050 	}
22051 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
22052 }
22053 
22054 
22055 /*
22056  *    Function: sd_mhd_reset_notify_cb()
22057  *
22058  * Description: This is a call back function for scsi_reset_notify. This
22059  *		function updates the softstate reserved status and logs the
22060  *		reset. The driver scsi watch facility callback function
22061  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
22062  *		will reclaim the reservation.
22063  *
22064  *   Arguments: arg  - driver soft state (unit) structure
22065  */
22066 
22067 static void
22068 sd_mhd_reset_notify_cb(caddr_t arg)
22069 {
22070 	struct sd_lun *un = (struct sd_lun *)arg;
22071 
22072 	mutex_enter(SD_MUTEX(un));
22073 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
22074 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
22075 		SD_INFO(SD_LOG_IOCTL_MHD, un,
22076 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
22077 	}
22078 	mutex_exit(SD_MUTEX(un));
22079 }
22080 
22081 
22082 /*
22083  *    Function: sd_take_ownership()
22084  *
22085  * Description: This routine implements an algorithm to achieve a stable
22086  *		reservation on disks which don't implement priority reserve,
22087  *		and makes sure that other host lose re-reservation attempts.
22088  *		This algorithm contains of a loop that keeps issuing the RESERVE
22089  *		for some period of time (min_ownership_delay, default 6 seconds)
22090  *		During that loop, it looks to see if there has been a bus device
22091  *		reset or bus reset (both of which cause an existing reservation
22092  *		to be lost). If the reservation is lost issue RESERVE until a
22093  *		period of min_ownership_delay with no resets has gone by, or
22094  *		until max_ownership_delay has expired. This loop ensures that
22095  *		the host really did manage to reserve the device, in spite of
22096  *		resets. The looping for min_ownership_delay (default six
22097  *		seconds) is important to early generation clustering products,
22098  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
22099  *		MHIOCENFAILFAST periodic timer of two seconds. By having
22100  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
22101  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
22102  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
22103  *		have already noticed, via the MHIOCENFAILFAST polling, that it
22104  *		no longer "owns" the disk and will have panicked itself.  Thus,
22105  *		the host issuing the MHIOCTKOWN is assured (with timing
22106  *		dependencies) that by the time it actually starts to use the
22107  *		disk for real work, the old owner is no longer accessing it.
22108  *
22109  *		min_ownership_delay is the minimum amount of time for which the
22110  *		disk must be reserved continuously devoid of resets before the
22111  *		MHIOCTKOWN ioctl will return success.
22112  *
22113  *		max_ownership_delay indicates the amount of time by which the
22114  *		take ownership should succeed or timeout with an error.
22115  *
22116  *   Arguments: dev - the device 'dev_t'
22117  *		*p  - struct containing timing info.
22118  *
22119  * Return Code: 0 for success or error code
22120  */
22121 
22122 static int
22123 sd_take_ownership(dev_t dev, struct mhioctkown *p)
22124 {
22125 	struct sd_lun	*un;
22126 	int		rval;
22127 	int		err;
22128 	int		reservation_count   = 0;
22129 	int		min_ownership_delay =  6000000; /* in usec */
22130 	int		max_ownership_delay = 30000000; /* in usec */
22131 	clock_t		start_time;	/* starting time of this algorithm */
22132 	clock_t		end_time;	/* time limit for giving up */
22133 	clock_t		ownership_time;	/* time limit for stable ownership */
22134 	clock_t		current_time;
22135 	clock_t		previous_current_time;
22136 
22137 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22138 		return (ENXIO);
22139 	}
22140 
22141 	/*
22142 	 * Attempt a device reservation. A priority reservation is requested.
22143 	 */
22144 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
22145 	    != SD_SUCCESS) {
22146 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
22147 		    "sd_take_ownership: return(1)=%d\n", rval);
22148 		return (rval);
22149 	}
22150 
22151 	/* Update the softstate reserved status to indicate the reservation */
22152 	mutex_enter(SD_MUTEX(un));
22153 	un->un_resvd_status |= SD_RESERVE;
22154 	un->un_resvd_status &=
22155 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
22156 	mutex_exit(SD_MUTEX(un));
22157 
22158 	if (p != NULL) {
22159 		if (p->min_ownership_delay != 0) {
22160 			min_ownership_delay = p->min_ownership_delay * 1000;
22161 		}
22162 		if (p->max_ownership_delay != 0) {
22163 			max_ownership_delay = p->max_ownership_delay * 1000;
22164 		}
22165 	}
22166 	SD_INFO(SD_LOG_IOCTL_MHD, un,
22167 	    "sd_take_ownership: min, max delays: %d, %d\n",
22168 	    min_ownership_delay, max_ownership_delay);
22169 
22170 	start_time = ddi_get_lbolt();
22171 	current_time	= start_time;
22172 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
22173 	end_time	= start_time + drv_usectohz(max_ownership_delay);
22174 
22175 	while (current_time - end_time < 0) {
22176 		delay(drv_usectohz(500000));
22177 
22178 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
22179 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
22180 				mutex_enter(SD_MUTEX(un));
22181 				rval = (un->un_resvd_status &
22182 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
22183 				mutex_exit(SD_MUTEX(un));
22184 				break;
22185 			}
22186 		}
22187 		previous_current_time = current_time;
22188 		current_time = ddi_get_lbolt();
22189 		mutex_enter(SD_MUTEX(un));
22190 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
22191 			ownership_time = ddi_get_lbolt() +
22192 			    drv_usectohz(min_ownership_delay);
22193 			reservation_count = 0;
22194 		} else {
22195 			reservation_count++;
22196 		}
22197 		un->un_resvd_status |= SD_RESERVE;
22198 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
22199 		mutex_exit(SD_MUTEX(un));
22200 
22201 		SD_INFO(SD_LOG_IOCTL_MHD, un,
22202 		    "sd_take_ownership: ticks for loop iteration=%ld, "
22203 		    "reservation=%s\n", (current_time - previous_current_time),
22204 		    reservation_count ? "ok" : "reclaimed");
22205 
22206 		if (current_time - ownership_time >= 0 &&
22207 		    reservation_count >= 4) {
22208 			rval = 0; /* Achieved a stable ownership */
22209 			break;
22210 		}
22211 		if (current_time - end_time >= 0) {
22212 			rval = EACCES; /* No ownership in max possible time */
22213 			break;
22214 		}
22215 	}
22216 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
22217 	    "sd_take_ownership: return(2)=%d\n", rval);
22218 	return (rval);
22219 }
22220 
22221 
22222 /*
22223  *    Function: sd_reserve_release()
22224  *
22225  * Description: This function builds and sends scsi RESERVE, RELEASE, and
22226  *		PRIORITY RESERVE commands based on a user specified command type
22227  *
22228  *   Arguments: dev - the device 'dev_t'
22229  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
22230  *		      SD_RESERVE, SD_RELEASE
22231  *
22232  * Return Code: 0 or Error Code
22233  */
22234 
22235 static int
22236 sd_reserve_release(dev_t dev, int cmd)
22237 {
22238 	struct uscsi_cmd	*com = NULL;
22239 	struct sd_lun		*un = NULL;
22240 	char			cdb[CDB_GROUP0];
22241 	int			rval;
22242 
22243 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
22244 	    (cmd == SD_PRIORITY_RESERVE));
22245 
22246 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22247 		return (ENXIO);
22248 	}
22249 
22250 	/* instantiate and initialize the command and cdb */
22251 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
22252 	bzero(cdb, CDB_GROUP0);
22253 	com->uscsi_flags   = USCSI_SILENT;
22254 	com->uscsi_timeout = un->un_reserve_release_time;
22255 	com->uscsi_cdblen  = CDB_GROUP0;
22256 	com->uscsi_cdb	   = cdb;
22257 	if (cmd == SD_RELEASE) {
22258 		cdb[0] = SCMD_RELEASE;
22259 	} else {
22260 		cdb[0] = SCMD_RESERVE;
22261 	}
22262 
22263 	/* Send the command. */
22264 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
22265 	    SD_PATH_STANDARD);
22266 
22267 	/*
22268 	 * "break" a reservation that is held by another host, by issuing a
22269 	 * reset if priority reserve is desired, and we could not get the
22270 	 * device.
22271 	 */
22272 	if ((cmd == SD_PRIORITY_RESERVE) &&
22273 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
22274 		/*
22275 		 * First try to reset the LUN. If we cannot, then try a target
22276 		 * reset, followed by a bus reset if the target reset fails.
22277 		 */
22278 		int reset_retval = 0;
22279 		if (un->un_f_lun_reset_enabled == TRUE) {
22280 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
22281 		}
22282 		if (reset_retval == 0) {
22283 			/* The LUN reset either failed or was not issued */
22284 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
22285 		}
22286 		if ((reset_retval == 0) &&
22287 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
22288 			rval = EIO;
22289 			kmem_free(com, sizeof (*com));
22290 			return (rval);
22291 		}
22292 
22293 		bzero(com, sizeof (struct uscsi_cmd));
22294 		com->uscsi_flags   = USCSI_SILENT;
22295 		com->uscsi_cdb	   = cdb;
22296 		com->uscsi_cdblen  = CDB_GROUP0;
22297 		com->uscsi_timeout = 5;
22298 
22299 		/*
22300 		 * Reissue the last reserve command, this time without request
22301 		 * sense.  Assume that it is just a regular reserve command.
22302 		 */
22303 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
22304 		    SD_PATH_STANDARD);
22305 	}
22306 
22307 	/* Return an error if still getting a reservation conflict. */
22308 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
22309 		rval = EACCES;
22310 	}
22311 
22312 	kmem_free(com, sizeof (*com));
22313 	return (rval);
22314 }
22315 
22316 
22317 #define	SD_NDUMP_RETRIES	12
22318 /*
22319  *	System Crash Dump routine
22320  */
22321 
22322 static int
22323 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
22324 {
22325 	int		instance;
22326 	int		partition;
22327 	int		i;
22328 	int		err;
22329 	struct sd_lun	*un;
22330 	struct scsi_pkt *wr_pktp;
22331 	struct buf	*wr_bp;
22332 	struct buf	wr_buf;
22333 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
22334 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
22335 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
22336 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
22337 	size_t		io_start_offset;
22338 	int		doing_rmw = FALSE;
22339 	int		rval;
22340 #if defined(__i386) || defined(__amd64)
22341 	ssize_t dma_resid;
22342 	daddr_t oblkno;
22343 #endif
22344 	diskaddr_t	nblks = 0;
22345 	diskaddr_t	start_block;
22346 
22347 	instance = SDUNIT(dev);
22348 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
22349 	    !SD_IS_VALID_LABEL(un) || ISCD(un)) {
22350 		return (ENXIO);
22351 	}
22352 
22353 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
22354 
22355 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
22356 
22357 	partition = SDPART(dev);
22358 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
22359 
22360 	/* Validate blocks to dump at against partition size. */
22361 
22362 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
22363 	    &nblks, &start_block, NULL, NULL, (void *)SD_PATH_DIRECT);
22364 
22365 	if ((blkno + nblk) > nblks) {
22366 		SD_TRACE(SD_LOG_DUMP, un,
22367 		    "sddump: dump range larger than partition: "
22368 		    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
22369 		    blkno, nblk, nblks);
22370 		return (EINVAL);
22371 	}
22372 
22373 	mutex_enter(&un->un_pm_mutex);
22374 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
22375 		struct scsi_pkt *start_pktp;
22376 
22377 		mutex_exit(&un->un_pm_mutex);
22378 
22379 		/*
22380 		 * use pm framework to power on HBA 1st
22381 		 */
22382 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
22383 
22384 		/*
22385 		 * Dump no long uses sdpower to power on a device, it's
22386 		 * in-line here so it can be done in polled mode.
22387 		 */
22388 
22389 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
22390 
22391 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
22392 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
22393 
22394 		if (start_pktp == NULL) {
22395 			/* We were not given a SCSI packet, fail. */
22396 			return (EIO);
22397 		}
22398 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
22399 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
22400 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
22401 		start_pktp->pkt_flags = FLAG_NOINTR;
22402 
22403 		mutex_enter(SD_MUTEX(un));
22404 		SD_FILL_SCSI1_LUN(un, start_pktp);
22405 		mutex_exit(SD_MUTEX(un));
22406 		/*
22407 		 * Scsi_poll returns 0 (success) if the command completes and
22408 		 * the status block is STATUS_GOOD.
22409 		 */
22410 		if (sd_scsi_poll(un, start_pktp) != 0) {
22411 			scsi_destroy_pkt(start_pktp);
22412 			return (EIO);
22413 		}
22414 		scsi_destroy_pkt(start_pktp);
22415 		(void) sd_ddi_pm_resume(un);
22416 	} else {
22417 		mutex_exit(&un->un_pm_mutex);
22418 	}
22419 
22420 	mutex_enter(SD_MUTEX(un));
22421 	un->un_throttle = 0;
22422 
22423 	/*
22424 	 * The first time through, reset the specific target device.
22425 	 * However, when cpr calls sddump we know that sd is in a
22426 	 * a good state so no bus reset is required.
22427 	 * Clear sense data via Request Sense cmd.
22428 	 * In sddump we don't care about allow_bus_device_reset anymore
22429 	 */
22430 
22431 	if ((un->un_state != SD_STATE_SUSPENDED) &&
22432 	    (un->un_state != SD_STATE_DUMPING)) {
22433 
22434 		New_state(un, SD_STATE_DUMPING);
22435 
22436 		if (un->un_f_is_fibre == FALSE) {
22437 			mutex_exit(SD_MUTEX(un));
22438 			/*
22439 			 * Attempt a bus reset for parallel scsi.
22440 			 *
22441 			 * Note: A bus reset is required because on some host
22442 			 * systems (i.e. E420R) a bus device reset is
22443 			 * insufficient to reset the state of the target.
22444 			 *
22445 			 * Note: Don't issue the reset for fibre-channel,
22446 			 * because this tends to hang the bus (loop) for
22447 			 * too long while everyone is logging out and in
22448 			 * and the deadman timer for dumping will fire
22449 			 * before the dump is complete.
22450 			 */
22451 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
22452 				mutex_enter(SD_MUTEX(un));
22453 				Restore_state(un);
22454 				mutex_exit(SD_MUTEX(un));
22455 				return (EIO);
22456 			}
22457 
22458 			/* Delay to give the device some recovery time. */
22459 			drv_usecwait(10000);
22460 
22461 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
22462 				SD_INFO(SD_LOG_DUMP, un,
22463 					"sddump: sd_send_polled_RQS failed\n");
22464 			}
22465 			mutex_enter(SD_MUTEX(un));
22466 		}
22467 	}
22468 
22469 	/*
22470 	 * Convert the partition-relative block number to a
22471 	 * disk physical block number.
22472 	 */
22473 	blkno += start_block;
22474 
22475 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
22476 
22477 
22478 	/*
22479 	 * Check if the device has a non-512 block size.
22480 	 */
22481 	wr_bp = NULL;
22482 	if (NOT_DEVBSIZE(un)) {
22483 		tgt_byte_offset = blkno * un->un_sys_blocksize;
22484 		tgt_byte_count = nblk * un->un_sys_blocksize;
22485 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
22486 		    (tgt_byte_count % un->un_tgt_blocksize)) {
22487 			doing_rmw = TRUE;
22488 			/*
22489 			 * Calculate the block number and number of block
22490 			 * in terms of the media block size.
22491 			 */
22492 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
22493 			tgt_nblk =
22494 			    ((tgt_byte_offset + tgt_byte_count +
22495 				(un->un_tgt_blocksize - 1)) /
22496 				un->un_tgt_blocksize) - tgt_blkno;
22497 
22498 			/*
22499 			 * Invoke the routine which is going to do read part
22500 			 * of read-modify-write.
22501 			 * Note that this routine returns a pointer to
22502 			 * a valid bp in wr_bp.
22503 			 */
22504 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
22505 			    &wr_bp);
22506 			if (err) {
22507 				mutex_exit(SD_MUTEX(un));
22508 				return (err);
22509 			}
22510 			/*
22511 			 * Offset is being calculated as -
22512 			 * (original block # * system block size) -
22513 			 * (new block # * target block size)
22514 			 */
22515 			io_start_offset =
22516 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
22517 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
22518 
22519 			ASSERT((io_start_offset >= 0) &&
22520 			    (io_start_offset < un->un_tgt_blocksize));
22521 			/*
22522 			 * Do the modify portion of read modify write.
22523 			 */
22524 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
22525 			    (size_t)nblk * un->un_sys_blocksize);
22526 		} else {
22527 			doing_rmw = FALSE;
22528 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
22529 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
22530 		}
22531 
22532 		/* Convert blkno and nblk to target blocks */
22533 		blkno = tgt_blkno;
22534 		nblk = tgt_nblk;
22535 	} else {
22536 		wr_bp = &wr_buf;
22537 		bzero(wr_bp, sizeof (struct buf));
22538 		wr_bp->b_flags		= B_BUSY;
22539 		wr_bp->b_un.b_addr	= addr;
22540 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
22541 		wr_bp->b_resid		= 0;
22542 	}
22543 
22544 	mutex_exit(SD_MUTEX(un));
22545 
22546 	/*
22547 	 * Obtain a SCSI packet for the write command.
22548 	 * It should be safe to call the allocator here without
22549 	 * worrying about being locked for DVMA mapping because
22550 	 * the address we're passed is already a DVMA mapping
22551 	 *
22552 	 * We are also not going to worry about semaphore ownership
22553 	 * in the dump buffer. Dumping is single threaded at present.
22554 	 */
22555 
22556 	wr_pktp = NULL;
22557 
22558 #if defined(__i386) || defined(__amd64)
22559 	dma_resid = wr_bp->b_bcount;
22560 	oblkno = blkno;
22561 	while (dma_resid != 0) {
22562 #endif
22563 
22564 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
22565 		wr_bp->b_flags &= ~B_ERROR;
22566 
22567 #if defined(__i386) || defined(__amd64)
22568 		blkno = oblkno +
22569 			((wr_bp->b_bcount - dma_resid) /
22570 			    un->un_tgt_blocksize);
22571 		nblk = dma_resid / un->un_tgt_blocksize;
22572 
22573 		if (wr_pktp) {
22574 			/* Partial DMA transfers after initial transfer */
22575 			rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
22576 			    blkno, nblk);
22577 		} else {
22578 			/* Initial transfer */
22579 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
22580 			    un->un_pkt_flags, NULL_FUNC, NULL,
22581 			    blkno, nblk);
22582 		}
22583 #else
22584 		rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
22585 		    0, NULL_FUNC, NULL, blkno, nblk);
22586 #endif
22587 
22588 		if (rval == 0) {
22589 			/* We were given a SCSI packet, continue. */
22590 			break;
22591 		}
22592 
22593 		if (i == 0) {
22594 			if (wr_bp->b_flags & B_ERROR) {
22595 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
22596 				    "no resources for dumping; "
22597 				    "error code: 0x%x, retrying",
22598 				    geterror(wr_bp));
22599 			} else {
22600 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
22601 				    "no resources for dumping; retrying");
22602 			}
22603 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
22604 			if (wr_bp->b_flags & B_ERROR) {
22605 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22606 				    "no resources for dumping; error code: "
22607 				    "0x%x, retrying\n", geterror(wr_bp));
22608 			}
22609 		} else {
22610 			if (wr_bp->b_flags & B_ERROR) {
22611 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22612 				    "no resources for dumping; "
22613 				    "error code: 0x%x, retries failed, "
22614 				    "giving up.\n", geterror(wr_bp));
22615 			} else {
22616 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22617 				    "no resources for dumping; "
22618 				    "retries failed, giving up.\n");
22619 			}
22620 			mutex_enter(SD_MUTEX(un));
22621 			Restore_state(un);
22622 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
22623 				mutex_exit(SD_MUTEX(un));
22624 				scsi_free_consistent_buf(wr_bp);
22625 			} else {
22626 				mutex_exit(SD_MUTEX(un));
22627 			}
22628 			return (EIO);
22629 		}
22630 		drv_usecwait(10000);
22631 	}
22632 
22633 #if defined(__i386) || defined(__amd64)
22634 	/*
22635 	 * save the resid from PARTIAL_DMA
22636 	 */
22637 	dma_resid = wr_pktp->pkt_resid;
22638 	if (dma_resid != 0)
22639 		nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
22640 	wr_pktp->pkt_resid = 0;
22641 #endif
22642 
22643 	/* SunBug 1222170 */
22644 	wr_pktp->pkt_flags = FLAG_NOINTR;
22645 
22646 	err = EIO;
22647 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
22648 
22649 		/*
22650 		 * Scsi_poll returns 0 (success) if the command completes and
22651 		 * the status block is STATUS_GOOD.  We should only check
22652 		 * errors if this condition is not true.  Even then we should
22653 		 * send our own request sense packet only if we have a check
22654 		 * condition and auto request sense has not been performed by
22655 		 * the hba.
22656 		 */
22657 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
22658 
22659 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
22660 		    (wr_pktp->pkt_resid == 0)) {
22661 			err = SD_SUCCESS;
22662 			break;
22663 		}
22664 
22665 		/*
22666 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
22667 		 */
22668 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
22669 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22670 			    "Device is gone\n");
22671 			break;
22672 		}
22673 
22674 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
22675 			SD_INFO(SD_LOG_DUMP, un,
22676 			    "sddump: write failed with CHECK, try # %d\n", i);
22677 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
22678 				(void) sd_send_polled_RQS(un);
22679 			}
22680 
22681 			continue;
22682 		}
22683 
22684 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
22685 			int reset_retval = 0;
22686 
22687 			SD_INFO(SD_LOG_DUMP, un,
22688 			    "sddump: write failed with BUSY, try # %d\n", i);
22689 
22690 			if (un->un_f_lun_reset_enabled == TRUE) {
22691 				reset_retval = scsi_reset(SD_ADDRESS(un),
22692 				    RESET_LUN);
22693 			}
22694 			if (reset_retval == 0) {
22695 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
22696 			}
22697 			(void) sd_send_polled_RQS(un);
22698 
22699 		} else {
22700 			SD_INFO(SD_LOG_DUMP, un,
22701 			    "sddump: write failed with 0x%x, try # %d\n",
22702 			    SD_GET_PKT_STATUS(wr_pktp), i);
22703 			mutex_enter(SD_MUTEX(un));
22704 			sd_reset_target(un, wr_pktp);
22705 			mutex_exit(SD_MUTEX(un));
22706 		}
22707 
22708 		/*
22709 		 * If we are not getting anywhere with lun/target resets,
22710 		 * let's reset the bus.
22711 		 */
22712 		if (i == SD_NDUMP_RETRIES/2) {
22713 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
22714 			(void) sd_send_polled_RQS(un);
22715 		}
22716 
22717 	}
22718 #if defined(__i386) || defined(__amd64)
22719 	}	/* dma_resid */
22720 #endif
22721 
22722 	scsi_destroy_pkt(wr_pktp);
22723 	mutex_enter(SD_MUTEX(un));
22724 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
22725 		mutex_exit(SD_MUTEX(un));
22726 		scsi_free_consistent_buf(wr_bp);
22727 	} else {
22728 		mutex_exit(SD_MUTEX(un));
22729 	}
22730 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
22731 	return (err);
22732 }
22733 
22734 /*
22735  *    Function: sd_scsi_poll()
22736  *
22737  * Description: This is a wrapper for the scsi_poll call.
22738  *
22739  *   Arguments: sd_lun - The unit structure
22740  *              scsi_pkt - The scsi packet being sent to the device.
22741  *
22742  * Return Code: 0 - Command completed successfully with good status
22743  *             -1 - Command failed.  This could indicate a check condition
22744  *                  or other status value requiring recovery action.
22745  *
22746  */
22747 
22748 static int
22749 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
22750 {
22751 	int status;
22752 
22753 	ASSERT(un != NULL);
22754 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22755 	ASSERT(pktp != NULL);
22756 
22757 	status = SD_SUCCESS;
22758 
22759 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
22760 		pktp->pkt_flags |= un->un_tagflags;
22761 		pktp->pkt_flags &= ~FLAG_NODISCON;
22762 	}
22763 
22764 	status = sd_ddi_scsi_poll(pktp);
22765 	/*
22766 	 * Scsi_poll returns 0 (success) if the command completes and the
22767 	 * status block is STATUS_GOOD.  We should only check errors if this
22768 	 * condition is not true.  Even then we should send our own request
22769 	 * sense packet only if we have a check condition and auto
22770 	 * request sense has not been performed by the hba.
22771 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
22772 	 */
22773 	if ((status != SD_SUCCESS) &&
22774 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
22775 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
22776 	    (pktp->pkt_reason != CMD_DEV_GONE))
22777 		(void) sd_send_polled_RQS(un);
22778 
22779 	return (status);
22780 }
22781 
22782 /*
22783  *    Function: sd_send_polled_RQS()
22784  *
22785  * Description: This sends the request sense command to a device.
22786  *
22787  *   Arguments: sd_lun - The unit structure
22788  *
22789  * Return Code: 0 - Command completed successfully with good status
22790  *             -1 - Command failed.
22791  *
22792  */
22793 
22794 static int
22795 sd_send_polled_RQS(struct sd_lun *un)
22796 {
22797 	int	ret_val;
22798 	struct	scsi_pkt	*rqs_pktp;
22799 	struct	buf		*rqs_bp;
22800 
22801 	ASSERT(un != NULL);
22802 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22803 
22804 	ret_val = SD_SUCCESS;
22805 
22806 	rqs_pktp = un->un_rqs_pktp;
22807 	rqs_bp	 = un->un_rqs_bp;
22808 
22809 	mutex_enter(SD_MUTEX(un));
22810 
22811 	if (un->un_sense_isbusy) {
22812 		ret_val = SD_FAILURE;
22813 		mutex_exit(SD_MUTEX(un));
22814 		return (ret_val);
22815 	}
22816 
22817 	/*
22818 	 * If the request sense buffer (and packet) is not in use,
22819 	 * let's set the un_sense_isbusy and send our packet
22820 	 */
22821 	un->un_sense_isbusy 	= 1;
22822 	rqs_pktp->pkt_resid  	= 0;
22823 	rqs_pktp->pkt_reason 	= 0;
22824 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
22825 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
22826 
22827 	mutex_exit(SD_MUTEX(un));
22828 
22829 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
22830 	    " 0x%p\n", rqs_bp->b_un.b_addr);
22831 
22832 	/*
22833 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
22834 	 * axle - it has a call into us!
22835 	 */
22836 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
22837 		SD_INFO(SD_LOG_COMMON, un,
22838 		    "sd_send_polled_RQS: RQS failed\n");
22839 	}
22840 
22841 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
22842 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
22843 
22844 	mutex_enter(SD_MUTEX(un));
22845 	un->un_sense_isbusy = 0;
22846 	mutex_exit(SD_MUTEX(un));
22847 
22848 	return (ret_val);
22849 }
22850 
22851 /*
22852  * Defines needed for localized version of the scsi_poll routine.
22853  */
22854 #define	SD_CSEC		10000			/* usecs */
22855 #define	SD_SEC_TO_CSEC	(1000000/SD_CSEC)
22856 
22857 
22858 /*
22859  *    Function: sd_ddi_scsi_poll()
22860  *
22861  * Description: Localized version of the scsi_poll routine.  The purpose is to
22862  *		send a scsi_pkt to a device as a polled command.  This version
22863  *		is to ensure more robust handling of transport errors.
22864  *		Specifically this routine cures not ready, coming ready
22865  *		transition for power up and reset of sonoma's.  This can take
22866  *		up to 45 seconds for power-on and 20 seconds for reset of a
22867  * 		sonoma lun.
22868  *
22869  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
22870  *
22871  * Return Code: 0 - Command completed successfully with good status
22872  *             -1 - Command failed.
22873  *
22874  */
22875 
22876 static int
22877 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
22878 {
22879 	int busy_count;
22880 	int timeout;
22881 	int rval = SD_FAILURE;
22882 	int savef;
22883 	uint8_t *sensep;
22884 	long savet;
22885 	void (*savec)();
22886 	/*
22887 	 * The following is defined in machdep.c and is used in determining if
22888 	 * the scsi transport system will do polled I/O instead of interrupt
22889 	 * I/O when called from xx_dump().
22890 	 */
22891 	extern int do_polled_io;
22892 
22893 	/*
22894 	 * save old flags in pkt, to restore at end
22895 	 */
22896 	savef = pkt->pkt_flags;
22897 	savec = pkt->pkt_comp;
22898 	savet = pkt->pkt_time;
22899 
22900 	pkt->pkt_flags |= FLAG_NOINTR;
22901 
22902 	/*
22903 	 * XXX there is nothing in the SCSA spec that states that we should not
22904 	 * do a callback for polled cmds; however, removing this will break sd
22905 	 * and probably other target drivers
22906 	 */
22907 	pkt->pkt_comp = NULL;
22908 
22909 	/*
22910 	 * we don't like a polled command without timeout.
22911 	 * 60 seconds seems long enough.
22912 	 */
22913 	if (pkt->pkt_time == 0) {
22914 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
22915 	}
22916 
22917 	/*
22918 	 * Send polled cmd.
22919 	 *
22920 	 * We do some error recovery for various errors.  Tran_busy,
22921 	 * queue full, and non-dispatched commands are retried every 10 msec.
22922 	 * as they are typically transient failures.  Busy status and Not
22923 	 * Ready are retried every second as this status takes a while to
22924 	 * change.  Unit attention is retried for pkt_time (60) times
22925 	 * with no delay.
22926 	 */
22927 	timeout = pkt->pkt_time * SD_SEC_TO_CSEC;
22928 
22929 	for (busy_count = 0; busy_count < timeout; busy_count++) {
22930 		int rc;
22931 		int poll_delay;
22932 
22933 		/*
22934 		 * Initialize pkt status variables.
22935 		 */
22936 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
22937 
22938 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
22939 			if (rc != TRAN_BUSY) {
22940 				/* Transport failed - give up. */
22941 				break;
22942 			} else {
22943 				/* Transport busy - try again. */
22944 				poll_delay = 1 * SD_CSEC; /* 10 msec */
22945 			}
22946 		} else {
22947 			/*
22948 			 * Transport accepted - check pkt status.
22949 			 */
22950 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
22951 			if (pkt->pkt_reason == CMD_CMPLT &&
22952 			    rc == STATUS_CHECK &&
22953 			    pkt->pkt_state & STATE_ARQ_DONE) {
22954 				struct scsi_arq_status *arqstat =
22955 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
22956 
22957 				sensep = (uint8_t *)&arqstat->sts_sensedata;
22958 			} else {
22959 				sensep = NULL;
22960 			}
22961 
22962 			if ((pkt->pkt_reason == CMD_CMPLT) &&
22963 			    (rc == STATUS_GOOD)) {
22964 				/* No error - we're done */
22965 				rval = SD_SUCCESS;
22966 				break;
22967 
22968 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
22969 				/* Lost connection - give up */
22970 				break;
22971 
22972 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
22973 			    (pkt->pkt_state == 0)) {
22974 				/* Pkt not dispatched - try again. */
22975 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
22976 
22977 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
22978 			    (rc == STATUS_QFULL)) {
22979 				/* Queue full - try again. */
22980 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
22981 
22982 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
22983 			    (rc == STATUS_BUSY)) {
22984 				/* Busy - try again. */
22985 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
22986 				busy_count += (SD_SEC_TO_CSEC - 1);
22987 
22988 			} else if ((sensep != NULL) &&
22989 			    (scsi_sense_key(sensep) ==
22990 				KEY_UNIT_ATTENTION)) {
22991 				/* Unit Attention - try again */
22992 				busy_count += (SD_SEC_TO_CSEC - 1); /* 1 */
22993 				continue;
22994 
22995 			} else if ((sensep != NULL) &&
22996 			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
22997 			    (scsi_sense_asc(sensep) == 0x04) &&
22998 			    (scsi_sense_ascq(sensep) == 0x01)) {
22999 				/* Not ready -> ready - try again. */
23000 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
23001 				busy_count += (SD_SEC_TO_CSEC - 1);
23002 
23003 			} else {
23004 				/* BAD status - give up. */
23005 				break;
23006 			}
23007 		}
23008 
23009 		if ((curthread->t_flag & T_INTR_THREAD) == 0 &&
23010 		    !do_polled_io) {
23011 			delay(drv_usectohz(poll_delay));
23012 		} else {
23013 			/* we busy wait during cpr_dump or interrupt threads */
23014 			drv_usecwait(poll_delay);
23015 		}
23016 	}
23017 
23018 	pkt->pkt_flags = savef;
23019 	pkt->pkt_comp = savec;
23020 	pkt->pkt_time = savet;
23021 	return (rval);
23022 }
23023 
23024 
23025 /*
23026  *    Function: sd_persistent_reservation_in_read_keys
23027  *
23028  * Description: This routine is the driver entry point for handling CD-ROM
23029  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
23030  *		by sending the SCSI-3 PRIN commands to the device.
23031  *		Processes the read keys command response by copying the
23032  *		reservation key information into the user provided buffer.
23033  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
23034  *
23035  *   Arguments: un   -  Pointer to soft state struct for the target.
23036  *		usrp -	user provided pointer to multihost Persistent In Read
23037  *			Keys structure (mhioc_inkeys_t)
23038  *		flag -	this argument is a pass through to ddi_copyxxx()
23039  *			directly from the mode argument of ioctl().
23040  *
23041  * Return Code: 0   - Success
23042  *		EACCES
23043  *		ENOTSUP
23044  *		errno return code from sd_send_scsi_cmd()
23045  *
23046  *     Context: Can sleep. Does not return until command is completed.
23047  */
23048 
23049 static int
23050 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
23051     mhioc_inkeys_t *usrp, int flag)
23052 {
23053 #ifdef _MULTI_DATAMODEL
23054 	struct mhioc_key_list32	li32;
23055 #endif
23056 	sd_prin_readkeys_t	*in;
23057 	mhioc_inkeys_t		*ptr;
23058 	mhioc_key_list_t	li;
23059 	uchar_t			*data_bufp;
23060 	int 			data_len;
23061 	int			rval;
23062 	size_t			copysz;
23063 
23064 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
23065 		return (EINVAL);
23066 	}
23067 	bzero(&li, sizeof (mhioc_key_list_t));
23068 
23069 	/*
23070 	 * Get the listsize from user
23071 	 */
23072 #ifdef _MULTI_DATAMODEL
23073 
23074 	switch (ddi_model_convert_from(flag & FMODELS)) {
23075 	case DDI_MODEL_ILP32:
23076 		copysz = sizeof (struct mhioc_key_list32);
23077 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
23078 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23079 			    "sd_persistent_reservation_in_read_keys: "
23080 			    "failed ddi_copyin: mhioc_key_list32_t\n");
23081 			rval = EFAULT;
23082 			goto done;
23083 		}
23084 		li.listsize = li32.listsize;
23085 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
23086 		break;
23087 
23088 	case DDI_MODEL_NONE:
23089 		copysz = sizeof (mhioc_key_list_t);
23090 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
23091 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23092 			    "sd_persistent_reservation_in_read_keys: "
23093 			    "failed ddi_copyin: mhioc_key_list_t\n");
23094 			rval = EFAULT;
23095 			goto done;
23096 		}
23097 		break;
23098 	}
23099 
23100 #else /* ! _MULTI_DATAMODEL */
23101 	copysz = sizeof (mhioc_key_list_t);
23102 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
23103 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23104 		    "sd_persistent_reservation_in_read_keys: "
23105 		    "failed ddi_copyin: mhioc_key_list_t\n");
23106 		rval = EFAULT;
23107 		goto done;
23108 	}
23109 #endif
23110 
23111 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
23112 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
23113 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
23114 
23115 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS,
23116 	    data_len, data_bufp)) != 0) {
23117 		goto done;
23118 	}
23119 	in = (sd_prin_readkeys_t *)data_bufp;
23120 	ptr->generation = BE_32(in->generation);
23121 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
23122 
23123 	/*
23124 	 * Return the min(listsize, listlen) keys
23125 	 */
23126 #ifdef _MULTI_DATAMODEL
23127 
23128 	switch (ddi_model_convert_from(flag & FMODELS)) {
23129 	case DDI_MODEL_ILP32:
23130 		li32.listlen = li.listlen;
23131 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
23132 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23133 			    "sd_persistent_reservation_in_read_keys: "
23134 			    "failed ddi_copyout: mhioc_key_list32_t\n");
23135 			rval = EFAULT;
23136 			goto done;
23137 		}
23138 		break;
23139 
23140 	case DDI_MODEL_NONE:
23141 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
23142 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23143 			    "sd_persistent_reservation_in_read_keys: "
23144 			    "failed ddi_copyout: mhioc_key_list_t\n");
23145 			rval = EFAULT;
23146 			goto done;
23147 		}
23148 		break;
23149 	}
23150 
23151 #else /* ! _MULTI_DATAMODEL */
23152 
23153 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
23154 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23155 		    "sd_persistent_reservation_in_read_keys: "
23156 		    "failed ddi_copyout: mhioc_key_list_t\n");
23157 		rval = EFAULT;
23158 		goto done;
23159 	}
23160 
23161 #endif /* _MULTI_DATAMODEL */
23162 
23163 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
23164 	    li.listsize * MHIOC_RESV_KEY_SIZE);
23165 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
23166 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23167 		    "sd_persistent_reservation_in_read_keys: "
23168 		    "failed ddi_copyout: keylist\n");
23169 		rval = EFAULT;
23170 	}
23171 done:
23172 	kmem_free(data_bufp, data_len);
23173 	return (rval);
23174 }
23175 
23176 
23177 /*
23178  *    Function: sd_persistent_reservation_in_read_resv
23179  *
23180  * Description: This routine is the driver entry point for handling CD-ROM
23181  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
23182  *		by sending the SCSI-3 PRIN commands to the device.
23183  *		Process the read persistent reservations command response by
23184  *		copying the reservation information into the user provided
23185  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
23186  *
23187  *   Arguments: un   -  Pointer to soft state struct for the target.
23188  *		usrp -	user provided pointer to multihost Persistent In Read
23189  *			Keys structure (mhioc_inkeys_t)
23190  *		flag -	this argument is a pass through to ddi_copyxxx()
23191  *			directly from the mode argument of ioctl().
23192  *
23193  * Return Code: 0   - Success
23194  *		EACCES
23195  *		ENOTSUP
23196  *		errno return code from sd_send_scsi_cmd()
23197  *
23198  *     Context: Can sleep. Does not return until command is completed.
23199  */
23200 
23201 static int
23202 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
23203     mhioc_inresvs_t *usrp, int flag)
23204 {
23205 #ifdef _MULTI_DATAMODEL
23206 	struct mhioc_resv_desc_list32 resvlist32;
23207 #endif
23208 	sd_prin_readresv_t	*in;
23209 	mhioc_inresvs_t		*ptr;
23210 	sd_readresv_desc_t	*readresv_ptr;
23211 	mhioc_resv_desc_list_t	resvlist;
23212 	mhioc_resv_desc_t 	resvdesc;
23213 	uchar_t			*data_bufp;
23214 	int 			data_len;
23215 	int			rval;
23216 	int			i;
23217 	size_t			copysz;
23218 	mhioc_resv_desc_t	*bufp;
23219 
23220 	if ((ptr = usrp) == NULL) {
23221 		return (EINVAL);
23222 	}
23223 
23224 	/*
23225 	 * Get the listsize from user
23226 	 */
23227 #ifdef _MULTI_DATAMODEL
23228 	switch (ddi_model_convert_from(flag & FMODELS)) {
23229 	case DDI_MODEL_ILP32:
23230 		copysz = sizeof (struct mhioc_resv_desc_list32);
23231 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
23232 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23233 			    "sd_persistent_reservation_in_read_resv: "
23234 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
23235 			rval = EFAULT;
23236 			goto done;
23237 		}
23238 		resvlist.listsize = resvlist32.listsize;
23239 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
23240 		break;
23241 
23242 	case DDI_MODEL_NONE:
23243 		copysz = sizeof (mhioc_resv_desc_list_t);
23244 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
23245 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23246 			    "sd_persistent_reservation_in_read_resv: "
23247 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
23248 			rval = EFAULT;
23249 			goto done;
23250 		}
23251 		break;
23252 	}
23253 #else /* ! _MULTI_DATAMODEL */
23254 	copysz = sizeof (mhioc_resv_desc_list_t);
23255 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
23256 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23257 		    "sd_persistent_reservation_in_read_resv: "
23258 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
23259 		rval = EFAULT;
23260 		goto done;
23261 	}
23262 #endif /* ! _MULTI_DATAMODEL */
23263 
23264 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
23265 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
23266 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
23267 
23268 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_RESV,
23269 	    data_len, data_bufp)) != 0) {
23270 		goto done;
23271 	}
23272 	in = (sd_prin_readresv_t *)data_bufp;
23273 	ptr->generation = BE_32(in->generation);
23274 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
23275 
23276 	/*
23277 	 * Return the min(listsize, listlen( keys
23278 	 */
23279 #ifdef _MULTI_DATAMODEL
23280 
23281 	switch (ddi_model_convert_from(flag & FMODELS)) {
23282 	case DDI_MODEL_ILP32:
23283 		resvlist32.listlen = resvlist.listlen;
23284 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
23285 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23286 			    "sd_persistent_reservation_in_read_resv: "
23287 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
23288 			rval = EFAULT;
23289 			goto done;
23290 		}
23291 		break;
23292 
23293 	case DDI_MODEL_NONE:
23294 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
23295 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23296 			    "sd_persistent_reservation_in_read_resv: "
23297 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
23298 			rval = EFAULT;
23299 			goto done;
23300 		}
23301 		break;
23302 	}
23303 
23304 #else /* ! _MULTI_DATAMODEL */
23305 
23306 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
23307 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23308 		    "sd_persistent_reservation_in_read_resv: "
23309 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
23310 		rval = EFAULT;
23311 		goto done;
23312 	}
23313 
23314 #endif /* ! _MULTI_DATAMODEL */
23315 
23316 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
23317 	bufp = resvlist.list;
23318 	copysz = sizeof (mhioc_resv_desc_t);
23319 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
23320 	    i++, readresv_ptr++, bufp++) {
23321 
23322 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
23323 		    MHIOC_RESV_KEY_SIZE);
23324 		resvdesc.type  = readresv_ptr->type;
23325 		resvdesc.scope = readresv_ptr->scope;
23326 		resvdesc.scope_specific_addr =
23327 		    BE_32(readresv_ptr->scope_specific_addr);
23328 
23329 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
23330 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23331 			    "sd_persistent_reservation_in_read_resv: "
23332 			    "failed ddi_copyout: resvlist\n");
23333 			rval = EFAULT;
23334 			goto done;
23335 		}
23336 	}
23337 done:
23338 	kmem_free(data_bufp, data_len);
23339 	return (rval);
23340 }
23341 
23342 
23343 /*
23344  *    Function: sr_change_blkmode()
23345  *
23346  * Description: This routine is the driver entry point for handling CD-ROM
23347  *		block mode ioctl requests. Support for returning and changing
23348  *		the current block size in use by the device is implemented. The
23349  *		LBA size is changed via a MODE SELECT Block Descriptor.
23350  *
23351  *		This routine issues a mode sense with an allocation length of
23352  *		12 bytes for the mode page header and a single block descriptor.
23353  *
23354  *   Arguments: dev - the device 'dev_t'
23355  *		cmd - the request type; one of CDROMGBLKMODE (get) or
23356  *		      CDROMSBLKMODE (set)
23357  *		data - current block size or requested block size
23358  *		flag - this argument is a pass through to ddi_copyxxx() directly
23359  *		       from the mode argument of ioctl().
23360  *
23361  * Return Code: the code returned by sd_send_scsi_cmd()
23362  *		EINVAL if invalid arguments are provided
23363  *		EFAULT if ddi_copyxxx() fails
23364  *		ENXIO if fail ddi_get_soft_state
23365  *		EIO if invalid mode sense block descriptor length
23366  *
23367  */
23368 
23369 static int
23370 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
23371 {
23372 	struct sd_lun			*un = NULL;
23373 	struct mode_header		*sense_mhp, *select_mhp;
23374 	struct block_descriptor		*sense_desc, *select_desc;
23375 	int				current_bsize;
23376 	int				rval = EINVAL;
23377 	uchar_t				*sense = NULL;
23378 	uchar_t				*select = NULL;
23379 
23380 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
23381 
23382 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23383 		return (ENXIO);
23384 	}
23385 
23386 	/*
23387 	 * The block length is changed via the Mode Select block descriptor, the
23388 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
23389 	 * required as part of this routine. Therefore the mode sense allocation
23390 	 * length is specified to be the length of a mode page header and a
23391 	 * block descriptor.
23392 	 */
23393 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
23394 
23395 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
23396 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD)) != 0) {
23397 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23398 		    "sr_change_blkmode: Mode Sense Failed\n");
23399 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23400 		return (rval);
23401 	}
23402 
23403 	/* Check the block descriptor len to handle only 1 block descriptor */
23404 	sense_mhp = (struct mode_header *)sense;
23405 	if ((sense_mhp->bdesc_length == 0) ||
23406 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
23407 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23408 		    "sr_change_blkmode: Mode Sense returned invalid block"
23409 		    " descriptor length\n");
23410 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23411 		return (EIO);
23412 	}
23413 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
23414 	current_bsize = ((sense_desc->blksize_hi << 16) |
23415 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
23416 
23417 	/* Process command */
23418 	switch (cmd) {
23419 	case CDROMGBLKMODE:
23420 		/* Return the block size obtained during the mode sense */
23421 		if (ddi_copyout(&current_bsize, (void *)data,
23422 		    sizeof (int), flag) != 0)
23423 			rval = EFAULT;
23424 		break;
23425 	case CDROMSBLKMODE:
23426 		/* Validate the requested block size */
23427 		switch (data) {
23428 		case CDROM_BLK_512:
23429 		case CDROM_BLK_1024:
23430 		case CDROM_BLK_2048:
23431 		case CDROM_BLK_2056:
23432 		case CDROM_BLK_2336:
23433 		case CDROM_BLK_2340:
23434 		case CDROM_BLK_2352:
23435 		case CDROM_BLK_2368:
23436 		case CDROM_BLK_2448:
23437 		case CDROM_BLK_2646:
23438 		case CDROM_BLK_2647:
23439 			break;
23440 		default:
23441 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23442 			    "sr_change_blkmode: "
23443 			    "Block Size '%ld' Not Supported\n", data);
23444 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23445 			return (EINVAL);
23446 		}
23447 
23448 		/*
23449 		 * The current block size matches the requested block size so
23450 		 * there is no need to send the mode select to change the size
23451 		 */
23452 		if (current_bsize == data) {
23453 			break;
23454 		}
23455 
23456 		/* Build the select data for the requested block size */
23457 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
23458 		select_mhp = (struct mode_header *)select;
23459 		select_desc =
23460 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
23461 		/*
23462 		 * The LBA size is changed via the block descriptor, so the
23463 		 * descriptor is built according to the user data
23464 		 */
23465 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
23466 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
23467 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
23468 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
23469 
23470 		/* Send the mode select for the requested block size */
23471 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
23472 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
23473 		    SD_PATH_STANDARD)) != 0) {
23474 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23475 			    "sr_change_blkmode: Mode Select Failed\n");
23476 			/*
23477 			 * The mode select failed for the requested block size,
23478 			 * so reset the data for the original block size and
23479 			 * send it to the target. The error is indicated by the
23480 			 * return value for the failed mode select.
23481 			 */
23482 			select_desc->blksize_hi  = sense_desc->blksize_hi;
23483 			select_desc->blksize_mid = sense_desc->blksize_mid;
23484 			select_desc->blksize_lo  = sense_desc->blksize_lo;
23485 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
23486 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
23487 			    SD_PATH_STANDARD);
23488 		} else {
23489 			ASSERT(!mutex_owned(SD_MUTEX(un)));
23490 			mutex_enter(SD_MUTEX(un));
23491 			sd_update_block_info(un, (uint32_t)data, 0);
23492 			mutex_exit(SD_MUTEX(un));
23493 		}
23494 		break;
23495 	default:
23496 		/* should not reach here, but check anyway */
23497 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23498 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
23499 		rval = EINVAL;
23500 		break;
23501 	}
23502 
23503 	if (select) {
23504 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
23505 	}
23506 	if (sense) {
23507 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23508 	}
23509 	return (rval);
23510 }
23511 
23512 
23513 /*
23514  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
23515  * implement driver support for getting and setting the CD speed. The command
23516  * set used will be based on the device type. If the device has not been
23517  * identified as MMC the Toshiba vendor specific mode page will be used. If
23518  * the device is MMC but does not support the Real Time Streaming feature
23519  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
23520  * be used to read the speed.
23521  */
23522 
23523 /*
23524  *    Function: sr_change_speed()
23525  *
23526  * Description: This routine is the driver entry point for handling CD-ROM
23527  *		drive speed ioctl requests for devices supporting the Toshiba
23528  *		vendor specific drive speed mode page. Support for returning
23529  *		and changing the current drive speed in use by the device is
23530  *		implemented.
23531  *
23532  *   Arguments: dev - the device 'dev_t'
23533  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
23534  *		      CDROMSDRVSPEED (set)
23535  *		data - current drive speed or requested drive speed
23536  *		flag - this argument is a pass through to ddi_copyxxx() directly
23537  *		       from the mode argument of ioctl().
23538  *
23539  * Return Code: the code returned by sd_send_scsi_cmd()
23540  *		EINVAL if invalid arguments are provided
23541  *		EFAULT if ddi_copyxxx() fails
23542  *		ENXIO if fail ddi_get_soft_state
23543  *		EIO if invalid mode sense block descriptor length
23544  */
23545 
23546 static int
23547 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
23548 {
23549 	struct sd_lun			*un = NULL;
23550 	struct mode_header		*sense_mhp, *select_mhp;
23551 	struct mode_speed		*sense_page, *select_page;
23552 	int				current_speed;
23553 	int				rval = EINVAL;
23554 	int				bd_len;
23555 	uchar_t				*sense = NULL;
23556 	uchar_t				*select = NULL;
23557 
23558 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
23559 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23560 		return (ENXIO);
23561 	}
23562 
23563 	/*
23564 	 * Note: The drive speed is being modified here according to a Toshiba
23565 	 * vendor specific mode page (0x31).
23566 	 */
23567 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
23568 
23569 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
23570 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
23571 	    SD_PATH_STANDARD)) != 0) {
23572 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23573 		    "sr_change_speed: Mode Sense Failed\n");
23574 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23575 		return (rval);
23576 	}
23577 	sense_mhp  = (struct mode_header *)sense;
23578 
23579 	/* Check the block descriptor len to handle only 1 block descriptor */
23580 	bd_len = sense_mhp->bdesc_length;
23581 	if (bd_len > MODE_BLK_DESC_LENGTH) {
23582 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23583 		    "sr_change_speed: Mode Sense returned invalid block "
23584 		    "descriptor length\n");
23585 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23586 		return (EIO);
23587 	}
23588 
23589 	sense_page = (struct mode_speed *)
23590 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
23591 	current_speed = sense_page->speed;
23592 
23593 	/* Process command */
23594 	switch (cmd) {
23595 	case CDROMGDRVSPEED:
23596 		/* Return the drive speed obtained during the mode sense */
23597 		if (current_speed == 0x2) {
23598 			current_speed = CDROM_TWELVE_SPEED;
23599 		}
23600 		if (ddi_copyout(&current_speed, (void *)data,
23601 		    sizeof (int), flag) != 0) {
23602 			rval = EFAULT;
23603 		}
23604 		break;
23605 	case CDROMSDRVSPEED:
23606 		/* Validate the requested drive speed */
23607 		switch ((uchar_t)data) {
23608 		case CDROM_TWELVE_SPEED:
23609 			data = 0x2;
23610 			/*FALLTHROUGH*/
23611 		case CDROM_NORMAL_SPEED:
23612 		case CDROM_DOUBLE_SPEED:
23613 		case CDROM_QUAD_SPEED:
23614 		case CDROM_MAXIMUM_SPEED:
23615 			break;
23616 		default:
23617 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23618 			    "sr_change_speed: "
23619 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
23620 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23621 			return (EINVAL);
23622 		}
23623 
23624 		/*
23625 		 * The current drive speed matches the requested drive speed so
23626 		 * there is no need to send the mode select to change the speed
23627 		 */
23628 		if (current_speed == data) {
23629 			break;
23630 		}
23631 
23632 		/* Build the select data for the requested drive speed */
23633 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
23634 		select_mhp = (struct mode_header *)select;
23635 		select_mhp->bdesc_length = 0;
23636 		select_page =
23637 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
23638 		select_page =
23639 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
23640 		select_page->mode_page.code = CDROM_MODE_SPEED;
23641 		select_page->mode_page.length = 2;
23642 		select_page->speed = (uchar_t)data;
23643 
23644 		/* Send the mode select for the requested block size */
23645 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
23646 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
23647 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
23648 			/*
23649 			 * The mode select failed for the requested drive speed,
23650 			 * so reset the data for the original drive speed and
23651 			 * send it to the target. The error is indicated by the
23652 			 * return value for the failed mode select.
23653 			 */
23654 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23655 			    "sr_drive_speed: Mode Select Failed\n");
23656 			select_page->speed = sense_page->speed;
23657 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
23658 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
23659 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
23660 		}
23661 		break;
23662 	default:
23663 		/* should not reach here, but check anyway */
23664 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23665 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
23666 		rval = EINVAL;
23667 		break;
23668 	}
23669 
23670 	if (select) {
23671 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
23672 	}
23673 	if (sense) {
23674 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23675 	}
23676 
23677 	return (rval);
23678 }
23679 
23680 
23681 /*
23682  *    Function: sr_atapi_change_speed()
23683  *
23684  * Description: This routine is the driver entry point for handling CD-ROM
23685  *		drive speed ioctl requests for MMC devices that do not support
23686  *		the Real Time Streaming feature (0x107).
23687  *
23688  *		Note: This routine will use the SET SPEED command which may not
23689  *		be supported by all devices.
23690  *
23691  *   Arguments: dev- the device 'dev_t'
23692  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
23693  *		     CDROMSDRVSPEED (set)
23694  *		data- current drive speed or requested drive speed
23695  *		flag- this argument is a pass through to ddi_copyxxx() directly
23696  *		      from the mode argument of ioctl().
23697  *
23698  * Return Code: the code returned by sd_send_scsi_cmd()
23699  *		EINVAL if invalid arguments are provided
23700  *		EFAULT if ddi_copyxxx() fails
23701  *		ENXIO if fail ddi_get_soft_state
23702  *		EIO if invalid mode sense block descriptor length
23703  */
23704 
23705 static int
23706 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
23707 {
23708 	struct sd_lun			*un;
23709 	struct uscsi_cmd		*com = NULL;
23710 	struct mode_header_grp2		*sense_mhp;
23711 	uchar_t				*sense_page;
23712 	uchar_t				*sense = NULL;
23713 	char				cdb[CDB_GROUP5];
23714 	int				bd_len;
23715 	int				current_speed = 0;
23716 	int				max_speed = 0;
23717 	int				rval;
23718 
23719 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
23720 
23721 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23722 		return (ENXIO);
23723 	}
23724 
23725 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
23726 
23727 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
23728 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
23729 	    SD_PATH_STANDARD)) != 0) {
23730 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23731 		    "sr_atapi_change_speed: Mode Sense Failed\n");
23732 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23733 		return (rval);
23734 	}
23735 
23736 	/* Check the block descriptor len to handle only 1 block descriptor */
23737 	sense_mhp = (struct mode_header_grp2 *)sense;
23738 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
23739 	if (bd_len > MODE_BLK_DESC_LENGTH) {
23740 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23741 		    "sr_atapi_change_speed: Mode Sense returned invalid "
23742 		    "block descriptor length\n");
23743 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23744 		return (EIO);
23745 	}
23746 
23747 	/* Calculate the current and maximum drive speeds */
23748 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
23749 	current_speed = (sense_page[14] << 8) | sense_page[15];
23750 	max_speed = (sense_page[8] << 8) | sense_page[9];
23751 
23752 	/* Process the command */
23753 	switch (cmd) {
23754 	case CDROMGDRVSPEED:
23755 		current_speed /= SD_SPEED_1X;
23756 		if (ddi_copyout(&current_speed, (void *)data,
23757 		    sizeof (int), flag) != 0)
23758 			rval = EFAULT;
23759 		break;
23760 	case CDROMSDRVSPEED:
23761 		/* Convert the speed code to KB/sec */
23762 		switch ((uchar_t)data) {
23763 		case CDROM_NORMAL_SPEED:
23764 			current_speed = SD_SPEED_1X;
23765 			break;
23766 		case CDROM_DOUBLE_SPEED:
23767 			current_speed = 2 * SD_SPEED_1X;
23768 			break;
23769 		case CDROM_QUAD_SPEED:
23770 			current_speed = 4 * SD_SPEED_1X;
23771 			break;
23772 		case CDROM_TWELVE_SPEED:
23773 			current_speed = 12 * SD_SPEED_1X;
23774 			break;
23775 		case CDROM_MAXIMUM_SPEED:
23776 			current_speed = 0xffff;
23777 			break;
23778 		default:
23779 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23780 			    "sr_atapi_change_speed: invalid drive speed %d\n",
23781 			    (uchar_t)data);
23782 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23783 			return (EINVAL);
23784 		}
23785 
23786 		/* Check the request against the drive's max speed. */
23787 		if (current_speed != 0xffff) {
23788 			if (current_speed > max_speed) {
23789 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23790 				return (EINVAL);
23791 			}
23792 		}
23793 
23794 		/*
23795 		 * Build and send the SET SPEED command
23796 		 *
23797 		 * Note: The SET SPEED (0xBB) command used in this routine is
23798 		 * obsolete per the SCSI MMC spec but still supported in the
23799 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
23800 		 * therefore the command is still implemented in this routine.
23801 		 */
23802 		bzero(cdb, sizeof (cdb));
23803 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
23804 		cdb[2] = (uchar_t)(current_speed >> 8);
23805 		cdb[3] = (uchar_t)current_speed;
23806 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
23807 		com->uscsi_cdb	   = (caddr_t)cdb;
23808 		com->uscsi_cdblen  = CDB_GROUP5;
23809 		com->uscsi_bufaddr = NULL;
23810 		com->uscsi_buflen  = 0;
23811 		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
23812 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, 0, SD_PATH_STANDARD);
23813 		break;
23814 	default:
23815 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23816 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
23817 		rval = EINVAL;
23818 	}
23819 
23820 	if (sense) {
23821 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
23822 	}
23823 	if (com) {
23824 		kmem_free(com, sizeof (*com));
23825 	}
23826 	return (rval);
23827 }
23828 
23829 
23830 /*
23831  *    Function: sr_pause_resume()
23832  *
23833  * Description: This routine is the driver entry point for handling CD-ROM
23834  *		pause/resume ioctl requests. This only affects the audio play
23835  *		operation.
23836  *
23837  *   Arguments: dev - the device 'dev_t'
23838  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
23839  *		      for setting the resume bit of the cdb.
23840  *
23841  * Return Code: the code returned by sd_send_scsi_cmd()
23842  *		EINVAL if invalid mode specified
23843  *
23844  */
23845 
23846 static int
23847 sr_pause_resume(dev_t dev, int cmd)
23848 {
23849 	struct sd_lun		*un;
23850 	struct uscsi_cmd	*com;
23851 	char			cdb[CDB_GROUP1];
23852 	int			rval;
23853 
23854 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23855 		return (ENXIO);
23856 	}
23857 
23858 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
23859 	bzero(cdb, CDB_GROUP1);
23860 	cdb[0] = SCMD_PAUSE_RESUME;
23861 	switch (cmd) {
23862 	case CDROMRESUME:
23863 		cdb[8] = 1;
23864 		break;
23865 	case CDROMPAUSE:
23866 		cdb[8] = 0;
23867 		break;
23868 	default:
23869 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
23870 		    " Command '%x' Not Supported\n", cmd);
23871 		rval = EINVAL;
23872 		goto done;
23873 	}
23874 
23875 	com->uscsi_cdb    = cdb;
23876 	com->uscsi_cdblen = CDB_GROUP1;
23877 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
23878 
23879 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
23880 	    SD_PATH_STANDARD);
23881 
23882 done:
23883 	kmem_free(com, sizeof (*com));
23884 	return (rval);
23885 }
23886 
23887 
23888 /*
23889  *    Function: sr_play_msf()
23890  *
23891  * Description: This routine is the driver entry point for handling CD-ROM
23892  *		ioctl requests to output the audio signals at the specified
23893  *		starting address and continue the audio play until the specified
23894  *		ending address (CDROMPLAYMSF) The address is in Minute Second
23895  *		Frame (MSF) format.
23896  *
23897  *   Arguments: dev	- the device 'dev_t'
23898  *		data	- pointer to user provided audio msf structure,
23899  *		          specifying start/end addresses.
23900  *		flag	- this argument is a pass through to ddi_copyxxx()
23901  *		          directly from the mode argument of ioctl().
23902  *
23903  * Return Code: the code returned by sd_send_scsi_cmd()
23904  *		EFAULT if ddi_copyxxx() fails
23905  *		ENXIO if fail ddi_get_soft_state
23906  *		EINVAL if data pointer is NULL
23907  */
23908 
23909 static int
23910 sr_play_msf(dev_t dev, caddr_t data, int flag)
23911 {
23912 	struct sd_lun		*un;
23913 	struct uscsi_cmd	*com;
23914 	struct cdrom_msf	msf_struct;
23915 	struct cdrom_msf	*msf = &msf_struct;
23916 	char			cdb[CDB_GROUP1];
23917 	int			rval;
23918 
23919 	if (data == NULL) {
23920 		return (EINVAL);
23921 	}
23922 
23923 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23924 		return (ENXIO);
23925 	}
23926 
23927 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
23928 		return (EFAULT);
23929 	}
23930 
23931 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
23932 	bzero(cdb, CDB_GROUP1);
23933 	cdb[0] = SCMD_PLAYAUDIO_MSF;
23934 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
23935 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
23936 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
23937 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
23938 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
23939 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
23940 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
23941 	} else {
23942 		cdb[3] = msf->cdmsf_min0;
23943 		cdb[4] = msf->cdmsf_sec0;
23944 		cdb[5] = msf->cdmsf_frame0;
23945 		cdb[6] = msf->cdmsf_min1;
23946 		cdb[7] = msf->cdmsf_sec1;
23947 		cdb[8] = msf->cdmsf_frame1;
23948 	}
23949 	com->uscsi_cdb    = cdb;
23950 	com->uscsi_cdblen = CDB_GROUP1;
23951 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
23952 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
23953 	    SD_PATH_STANDARD);
23954 	kmem_free(com, sizeof (*com));
23955 	return (rval);
23956 }
23957 
23958 
23959 /*
23960  *    Function: sr_play_trkind()
23961  *
23962  * Description: This routine is the driver entry point for handling CD-ROM
23963  *		ioctl requests to output the audio signals at the specified
23964  *		starting address and continue the audio play until the specified
23965  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
23966  *		format.
23967  *
23968  *   Arguments: dev	- the device 'dev_t'
23969  *		data	- pointer to user provided audio track/index structure,
23970  *		          specifying start/end addresses.
23971  *		flag	- this argument is a pass through to ddi_copyxxx()
23972  *		          directly from the mode argument of ioctl().
23973  *
23974  * Return Code: the code returned by sd_send_scsi_cmd()
23975  *		EFAULT if ddi_copyxxx() fails
23976  *		ENXIO if fail ddi_get_soft_state
23977  *		EINVAL if data pointer is NULL
23978  */
23979 
23980 static int
23981 sr_play_trkind(dev_t dev, caddr_t data, int flag)
23982 {
23983 	struct cdrom_ti		ti_struct;
23984 	struct cdrom_ti		*ti = &ti_struct;
23985 	struct uscsi_cmd	*com = NULL;
23986 	char			cdb[CDB_GROUP1];
23987 	int			rval;
23988 
23989 	if (data == NULL) {
23990 		return (EINVAL);
23991 	}
23992 
23993 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
23994 		return (EFAULT);
23995 	}
23996 
23997 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
23998 	bzero(cdb, CDB_GROUP1);
23999 	cdb[0] = SCMD_PLAYAUDIO_TI;
24000 	cdb[4] = ti->cdti_trk0;
24001 	cdb[5] = ti->cdti_ind0;
24002 	cdb[7] = ti->cdti_trk1;
24003 	cdb[8] = ti->cdti_ind1;
24004 	com->uscsi_cdb    = cdb;
24005 	com->uscsi_cdblen = CDB_GROUP1;
24006 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
24007 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24008 	    SD_PATH_STANDARD);
24009 	kmem_free(com, sizeof (*com));
24010 	return (rval);
24011 }
24012 
24013 
24014 /*
24015  *    Function: sr_read_all_subcodes()
24016  *
24017  * Description: This routine is the driver entry point for handling CD-ROM
24018  *		ioctl requests to return raw subcode data while the target is
24019  *		playing audio (CDROMSUBCODE).
24020  *
24021  *   Arguments: dev	- the device 'dev_t'
24022  *		data	- pointer to user provided cdrom subcode structure,
24023  *		          specifying the transfer length and address.
24024  *		flag	- this argument is a pass through to ddi_copyxxx()
24025  *		          directly from the mode argument of ioctl().
24026  *
24027  * Return Code: the code returned by sd_send_scsi_cmd()
24028  *		EFAULT if ddi_copyxxx() fails
24029  *		ENXIO if fail ddi_get_soft_state
24030  *		EINVAL if data pointer is NULL
24031  */
24032 
24033 static int
24034 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
24035 {
24036 	struct sd_lun		*un = NULL;
24037 	struct uscsi_cmd	*com = NULL;
24038 	struct cdrom_subcode	*subcode = NULL;
24039 	int			rval;
24040 	size_t			buflen;
24041 	char			cdb[CDB_GROUP5];
24042 
24043 #ifdef _MULTI_DATAMODEL
24044 	/* To support ILP32 applications in an LP64 world */
24045 	struct cdrom_subcode32		cdrom_subcode32;
24046 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
24047 #endif
24048 	if (data == NULL) {
24049 		return (EINVAL);
24050 	}
24051 
24052 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24053 		return (ENXIO);
24054 	}
24055 
24056 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
24057 
24058 #ifdef _MULTI_DATAMODEL
24059 	switch (ddi_model_convert_from(flag & FMODELS)) {
24060 	case DDI_MODEL_ILP32:
24061 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
24062 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24063 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
24064 			kmem_free(subcode, sizeof (struct cdrom_subcode));
24065 			return (EFAULT);
24066 		}
24067 		/* Convert the ILP32 uscsi data from the application to LP64 */
24068 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
24069 		break;
24070 	case DDI_MODEL_NONE:
24071 		if (ddi_copyin(data, subcode,
24072 		    sizeof (struct cdrom_subcode), flag)) {
24073 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24074 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
24075 			kmem_free(subcode, sizeof (struct cdrom_subcode));
24076 			return (EFAULT);
24077 		}
24078 		break;
24079 	}
24080 #else /* ! _MULTI_DATAMODEL */
24081 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
24082 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24083 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
24084 		kmem_free(subcode, sizeof (struct cdrom_subcode));
24085 		return (EFAULT);
24086 	}
24087 #endif /* _MULTI_DATAMODEL */
24088 
24089 	/*
24090 	 * Since MMC-2 expects max 3 bytes for length, check if the
24091 	 * length input is greater than 3 bytes
24092 	 */
24093 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
24094 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24095 		    "sr_read_all_subcodes: "
24096 		    "cdrom transfer length too large: %d (limit %d)\n",
24097 		    subcode->cdsc_length, 0xFFFFFF);
24098 		kmem_free(subcode, sizeof (struct cdrom_subcode));
24099 		return (EINVAL);
24100 	}
24101 
24102 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
24103 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24104 	bzero(cdb, CDB_GROUP5);
24105 
24106 	if (un->un_f_mmc_cap == TRUE) {
24107 		cdb[0] = (char)SCMD_READ_CD;
24108 		cdb[2] = (char)0xff;
24109 		cdb[3] = (char)0xff;
24110 		cdb[4] = (char)0xff;
24111 		cdb[5] = (char)0xff;
24112 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
24113 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
24114 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
24115 		cdb[10] = 1;
24116 	} else {
24117 		/*
24118 		 * Note: A vendor specific command (0xDF) is being used her to
24119 		 * request a read of all subcodes.
24120 		 */
24121 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
24122 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
24123 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
24124 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
24125 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
24126 	}
24127 	com->uscsi_cdb	   = cdb;
24128 	com->uscsi_cdblen  = CDB_GROUP5;
24129 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
24130 	com->uscsi_buflen  = buflen;
24131 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24132 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
24133 	    SD_PATH_STANDARD);
24134 	kmem_free(subcode, sizeof (struct cdrom_subcode));
24135 	kmem_free(com, sizeof (*com));
24136 	return (rval);
24137 }
24138 
24139 
24140 /*
24141  *    Function: sr_read_subchannel()
24142  *
24143  * Description: This routine is the driver entry point for handling CD-ROM
24144  *		ioctl requests to return the Q sub-channel data of the CD
24145  *		current position block. (CDROMSUBCHNL) The data includes the
24146  *		track number, index number, absolute CD-ROM address (LBA or MSF
24147  *		format per the user) , track relative CD-ROM address (LBA or MSF
24148  *		format per the user), control data and audio status.
24149  *
24150  *   Arguments: dev	- the device 'dev_t'
24151  *		data	- pointer to user provided cdrom sub-channel structure
24152  *		flag	- this argument is a pass through to ddi_copyxxx()
24153  *		          directly from the mode argument of ioctl().
24154  *
24155  * Return Code: the code returned by sd_send_scsi_cmd()
24156  *		EFAULT if ddi_copyxxx() fails
24157  *		ENXIO if fail ddi_get_soft_state
24158  *		EINVAL if data pointer is NULL
24159  */
24160 
24161 static int
24162 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
24163 {
24164 	struct sd_lun		*un;
24165 	struct uscsi_cmd	*com;
24166 	struct cdrom_subchnl	subchanel;
24167 	struct cdrom_subchnl	*subchnl = &subchanel;
24168 	char			cdb[CDB_GROUP1];
24169 	caddr_t			buffer;
24170 	int			rval;
24171 
24172 	if (data == NULL) {
24173 		return (EINVAL);
24174 	}
24175 
24176 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24177 	    (un->un_state == SD_STATE_OFFLINE)) {
24178 		return (ENXIO);
24179 	}
24180 
24181 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
24182 		return (EFAULT);
24183 	}
24184 
24185 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
24186 	bzero(cdb, CDB_GROUP1);
24187 	cdb[0] = SCMD_READ_SUBCHANNEL;
24188 	/* Set the MSF bit based on the user requested address format */
24189 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
24190 	/*
24191 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
24192 	 * returned
24193 	 */
24194 	cdb[2] = 0x40;
24195 	/*
24196 	 * Set byte 3 to specify the return data format. A value of 0x01
24197 	 * indicates that the CD-ROM current position should be returned.
24198 	 */
24199 	cdb[3] = 0x01;
24200 	cdb[8] = 0x10;
24201 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24202 	com->uscsi_cdb	   = cdb;
24203 	com->uscsi_cdblen  = CDB_GROUP1;
24204 	com->uscsi_bufaddr = buffer;
24205 	com->uscsi_buflen  = 16;
24206 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24207 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24208 	    SD_PATH_STANDARD);
24209 	if (rval != 0) {
24210 		kmem_free(buffer, 16);
24211 		kmem_free(com, sizeof (*com));
24212 		return (rval);
24213 	}
24214 
24215 	/* Process the returned Q sub-channel data */
24216 	subchnl->cdsc_audiostatus = buffer[1];
24217 	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
24218 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
24219 	subchnl->cdsc_trk	= buffer[6];
24220 	subchnl->cdsc_ind	= buffer[7];
24221 	if (subchnl->cdsc_format & CDROM_LBA) {
24222 		subchnl->cdsc_absaddr.lba =
24223 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
24224 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
24225 		subchnl->cdsc_reladdr.lba =
24226 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
24227 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
24228 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
24229 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
24230 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
24231 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
24232 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
24233 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
24234 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
24235 	} else {
24236 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
24237 		subchnl->cdsc_absaddr.msf.second = buffer[10];
24238 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
24239 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
24240 		subchnl->cdsc_reladdr.msf.second = buffer[14];
24241 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
24242 	}
24243 	kmem_free(buffer, 16);
24244 	kmem_free(com, sizeof (*com));
24245 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
24246 	    != 0) {
24247 		return (EFAULT);
24248 	}
24249 	return (rval);
24250 }
24251 
24252 
24253 /*
24254  *    Function: sr_read_tocentry()
24255  *
24256  * Description: This routine is the driver entry point for handling CD-ROM
24257  *		ioctl requests to read from the Table of Contents (TOC)
24258  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
24259  *		fields, the starting address (LBA or MSF format per the user)
24260  *		and the data mode if the user specified track is a data track.
24261  *
24262  *		Note: The READ HEADER (0x44) command used in this routine is
24263  *		obsolete per the SCSI MMC spec but still supported in the
24264  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
24265  *		therefore the command is still implemented in this routine.
24266  *
24267  *   Arguments: dev	- the device 'dev_t'
24268  *		data	- pointer to user provided toc entry structure,
24269  *			  specifying the track # and the address format
24270  *			  (LBA or MSF).
24271  *		flag	- this argument is a pass through to ddi_copyxxx()
24272  *		          directly from the mode argument of ioctl().
24273  *
24274  * Return Code: the code returned by sd_send_scsi_cmd()
24275  *		EFAULT if ddi_copyxxx() fails
24276  *		ENXIO if fail ddi_get_soft_state
24277  *		EINVAL if data pointer is NULL
24278  */
24279 
24280 static int
24281 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
24282 {
24283 	struct sd_lun		*un = NULL;
24284 	struct uscsi_cmd	*com;
24285 	struct cdrom_tocentry	toc_entry;
24286 	struct cdrom_tocentry	*entry = &toc_entry;
24287 	caddr_t			buffer;
24288 	int			rval;
24289 	char			cdb[CDB_GROUP1];
24290 
24291 	if (data == NULL) {
24292 		return (EINVAL);
24293 	}
24294 
24295 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24296 	    (un->un_state == SD_STATE_OFFLINE)) {
24297 		return (ENXIO);
24298 	}
24299 
24300 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
24301 		return (EFAULT);
24302 	}
24303 
24304 	/* Validate the requested track and address format */
24305 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
24306 		return (EINVAL);
24307 	}
24308 
24309 	if (entry->cdte_track == 0) {
24310 		return (EINVAL);
24311 	}
24312 
24313 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
24314 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24315 	bzero(cdb, CDB_GROUP1);
24316 
24317 	cdb[0] = SCMD_READ_TOC;
24318 	/* Set the MSF bit based on the user requested address format  */
24319 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
24320 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
24321 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
24322 	} else {
24323 		cdb[6] = entry->cdte_track;
24324 	}
24325 
24326 	/*
24327 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
24328 	 * (4 byte TOC response header + 8 byte track descriptor)
24329 	 */
24330 	cdb[8] = 12;
24331 	com->uscsi_cdb	   = cdb;
24332 	com->uscsi_cdblen  = CDB_GROUP1;
24333 	com->uscsi_bufaddr = buffer;
24334 	com->uscsi_buflen  = 0x0C;
24335 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
24336 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24337 	    SD_PATH_STANDARD);
24338 	if (rval != 0) {
24339 		kmem_free(buffer, 12);
24340 		kmem_free(com, sizeof (*com));
24341 		return (rval);
24342 	}
24343 
24344 	/* Process the toc entry */
24345 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
24346 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
24347 	if (entry->cdte_format & CDROM_LBA) {
24348 		entry->cdte_addr.lba =
24349 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
24350 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
24351 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
24352 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
24353 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
24354 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
24355 		/*
24356 		 * Send a READ TOC command using the LBA address format to get
24357 		 * the LBA for the track requested so it can be used in the
24358 		 * READ HEADER request
24359 		 *
24360 		 * Note: The MSF bit of the READ HEADER command specifies the
24361 		 * output format. The block address specified in that command
24362 		 * must be in LBA format.
24363 		 */
24364 		cdb[1] = 0;
24365 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24366 		    SD_PATH_STANDARD);
24367 		if (rval != 0) {
24368 			kmem_free(buffer, 12);
24369 			kmem_free(com, sizeof (*com));
24370 			return (rval);
24371 		}
24372 	} else {
24373 		entry->cdte_addr.msf.minute	= buffer[9];
24374 		entry->cdte_addr.msf.second	= buffer[10];
24375 		entry->cdte_addr.msf.frame	= buffer[11];
24376 		/*
24377 		 * Send a READ TOC command using the LBA address format to get
24378 		 * the LBA for the track requested so it can be used in the
24379 		 * READ HEADER request
24380 		 *
24381 		 * Note: The MSF bit of the READ HEADER command specifies the
24382 		 * output format. The block address specified in that command
24383 		 * must be in LBA format.
24384 		 */
24385 		cdb[1] = 0;
24386 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24387 		    SD_PATH_STANDARD);
24388 		if (rval != 0) {
24389 			kmem_free(buffer, 12);
24390 			kmem_free(com, sizeof (*com));
24391 			return (rval);
24392 		}
24393 	}
24394 
24395 	/*
24396 	 * Build and send the READ HEADER command to determine the data mode of
24397 	 * the user specified track.
24398 	 */
24399 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
24400 	    (entry->cdte_track != CDROM_LEADOUT)) {
24401 		bzero(cdb, CDB_GROUP1);
24402 		cdb[0] = SCMD_READ_HEADER;
24403 		cdb[2] = buffer[8];
24404 		cdb[3] = buffer[9];
24405 		cdb[4] = buffer[10];
24406 		cdb[5] = buffer[11];
24407 		cdb[8] = 0x08;
24408 		com->uscsi_buflen = 0x08;
24409 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24410 		    SD_PATH_STANDARD);
24411 		if (rval == 0) {
24412 			entry->cdte_datamode = buffer[0];
24413 		} else {
24414 			/*
24415 			 * READ HEADER command failed, since this is
24416 			 * obsoleted in one spec, its better to return
24417 			 * -1 for an invlid track so that we can still
24418 			 * recieve the rest of the TOC data.
24419 			 */
24420 			entry->cdte_datamode = (uchar_t)-1;
24421 		}
24422 	} else {
24423 		entry->cdte_datamode = (uchar_t)-1;
24424 	}
24425 
24426 	kmem_free(buffer, 12);
24427 	kmem_free(com, sizeof (*com));
24428 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
24429 		return (EFAULT);
24430 
24431 	return (rval);
24432 }
24433 
24434 
24435 /*
24436  *    Function: sr_read_tochdr()
24437  *
24438  * Description: This routine is the driver entry point for handling CD-ROM
24439  * 		ioctl requests to read the Table of Contents (TOC) header
24440  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
24441  *		and ending track numbers
24442  *
24443  *   Arguments: dev	- the device 'dev_t'
24444  *		data	- pointer to user provided toc header structure,
24445  *			  specifying the starting and ending track numbers.
24446  *		flag	- this argument is a pass through to ddi_copyxxx()
24447  *			  directly from the mode argument of ioctl().
24448  *
24449  * Return Code: the code returned by sd_send_scsi_cmd()
24450  *		EFAULT if ddi_copyxxx() fails
24451  *		ENXIO if fail ddi_get_soft_state
24452  *		EINVAL if data pointer is NULL
24453  */
24454 
24455 static int
24456 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
24457 {
24458 	struct sd_lun		*un;
24459 	struct uscsi_cmd	*com;
24460 	struct cdrom_tochdr	toc_header;
24461 	struct cdrom_tochdr	*hdr = &toc_header;
24462 	char			cdb[CDB_GROUP1];
24463 	int			rval;
24464 	caddr_t			buffer;
24465 
24466 	if (data == NULL) {
24467 		return (EINVAL);
24468 	}
24469 
24470 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24471 	    (un->un_state == SD_STATE_OFFLINE)) {
24472 		return (ENXIO);
24473 	}
24474 
24475 	buffer = kmem_zalloc(4, KM_SLEEP);
24476 	bzero(cdb, CDB_GROUP1);
24477 	cdb[0] = SCMD_READ_TOC;
24478 	/*
24479 	 * Specifying a track number of 0x00 in the READ TOC command indicates
24480 	 * that the TOC header should be returned
24481 	 */
24482 	cdb[6] = 0x00;
24483 	/*
24484 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
24485 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
24486 	 */
24487 	cdb[8] = 0x04;
24488 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24489 	com->uscsi_cdb	   = cdb;
24490 	com->uscsi_cdblen  = CDB_GROUP1;
24491 	com->uscsi_bufaddr = buffer;
24492 	com->uscsi_buflen  = 0x04;
24493 	com->uscsi_timeout = 300;
24494 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24495 
24496 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24497 	    SD_PATH_STANDARD);
24498 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
24499 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
24500 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
24501 	} else {
24502 		hdr->cdth_trk0 = buffer[2];
24503 		hdr->cdth_trk1 = buffer[3];
24504 	}
24505 	kmem_free(buffer, 4);
24506 	kmem_free(com, sizeof (*com));
24507 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
24508 		return (EFAULT);
24509 	}
24510 	return (rval);
24511 }
24512 
24513 
24514 /*
24515  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
24516  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
24517  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
24518  * digital audio and extended architecture digital audio. These modes are
24519  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
24520  * MMC specs.
24521  *
24522  * In addition to support for the various data formats these routines also
24523  * include support for devices that implement only the direct access READ
24524  * commands (0x08, 0x28), devices that implement the READ_CD commands
24525  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
24526  * READ CDXA commands (0xD8, 0xDB)
24527  */
24528 
24529 /*
24530  *    Function: sr_read_mode1()
24531  *
24532  * Description: This routine is the driver entry point for handling CD-ROM
24533  *		ioctl read mode1 requests (CDROMREADMODE1).
24534  *
24535  *   Arguments: dev	- the device 'dev_t'
24536  *		data	- pointer to user provided cd read structure specifying
24537  *			  the lba buffer address and length.
24538  *		flag	- this argument is a pass through to ddi_copyxxx()
24539  *			  directly from the mode argument of ioctl().
24540  *
24541  * Return Code: the code returned by sd_send_scsi_cmd()
24542  *		EFAULT if ddi_copyxxx() fails
24543  *		ENXIO if fail ddi_get_soft_state
24544  *		EINVAL if data pointer is NULL
24545  */
24546 
24547 static int
24548 sr_read_mode1(dev_t dev, caddr_t data, int flag)
24549 {
24550 	struct sd_lun		*un;
24551 	struct cdrom_read	mode1_struct;
24552 	struct cdrom_read	*mode1 = &mode1_struct;
24553 	int			rval;
24554 #ifdef _MULTI_DATAMODEL
24555 	/* To support ILP32 applications in an LP64 world */
24556 	struct cdrom_read32	cdrom_read32;
24557 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
24558 #endif /* _MULTI_DATAMODEL */
24559 
24560 	if (data == NULL) {
24561 		return (EINVAL);
24562 	}
24563 
24564 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24565 	    (un->un_state == SD_STATE_OFFLINE)) {
24566 		return (ENXIO);
24567 	}
24568 
24569 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24570 	    "sd_read_mode1: entry: un:0x%p\n", un);
24571 
24572 #ifdef _MULTI_DATAMODEL
24573 	switch (ddi_model_convert_from(flag & FMODELS)) {
24574 	case DDI_MODEL_ILP32:
24575 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
24576 			return (EFAULT);
24577 		}
24578 		/* Convert the ILP32 uscsi data from the application to LP64 */
24579 		cdrom_read32tocdrom_read(cdrd32, mode1);
24580 		break;
24581 	case DDI_MODEL_NONE:
24582 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
24583 			return (EFAULT);
24584 		}
24585 	}
24586 #else /* ! _MULTI_DATAMODEL */
24587 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
24588 		return (EFAULT);
24589 	}
24590 #endif /* _MULTI_DATAMODEL */
24591 
24592 	rval = sd_send_scsi_READ(un, mode1->cdread_bufaddr,
24593 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
24594 
24595 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24596 	    "sd_read_mode1: exit: un:0x%p\n", un);
24597 
24598 	return (rval);
24599 }
24600 
24601 
24602 /*
24603  *    Function: sr_read_cd_mode2()
24604  *
24605  * Description: This routine is the driver entry point for handling CD-ROM
24606  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
24607  *		support the READ CD (0xBE) command or the 1st generation
24608  *		READ CD (0xD4) command.
24609  *
24610  *   Arguments: dev	- the device 'dev_t'
24611  *		data	- pointer to user provided cd read structure specifying
24612  *			  the lba buffer address and length.
24613  *		flag	- this argument is a pass through to ddi_copyxxx()
24614  *			  directly from the mode argument of ioctl().
24615  *
24616  * Return Code: the code returned by sd_send_scsi_cmd()
24617  *		EFAULT if ddi_copyxxx() fails
24618  *		ENXIO if fail ddi_get_soft_state
24619  *		EINVAL if data pointer is NULL
24620  */
24621 
24622 static int
24623 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
24624 {
24625 	struct sd_lun		*un;
24626 	struct uscsi_cmd	*com;
24627 	struct cdrom_read	mode2_struct;
24628 	struct cdrom_read	*mode2 = &mode2_struct;
24629 	uchar_t			cdb[CDB_GROUP5];
24630 	int			nblocks;
24631 	int			rval;
24632 #ifdef _MULTI_DATAMODEL
24633 	/*  To support ILP32 applications in an LP64 world */
24634 	struct cdrom_read32	cdrom_read32;
24635 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
24636 #endif /* _MULTI_DATAMODEL */
24637 
24638 	if (data == NULL) {
24639 		return (EINVAL);
24640 	}
24641 
24642 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24643 	    (un->un_state == SD_STATE_OFFLINE)) {
24644 		return (ENXIO);
24645 	}
24646 
24647 #ifdef _MULTI_DATAMODEL
24648 	switch (ddi_model_convert_from(flag & FMODELS)) {
24649 	case DDI_MODEL_ILP32:
24650 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
24651 			return (EFAULT);
24652 		}
24653 		/* Convert the ILP32 uscsi data from the application to LP64 */
24654 		cdrom_read32tocdrom_read(cdrd32, mode2);
24655 		break;
24656 	case DDI_MODEL_NONE:
24657 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
24658 			return (EFAULT);
24659 		}
24660 		break;
24661 	}
24662 
24663 #else /* ! _MULTI_DATAMODEL */
24664 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
24665 		return (EFAULT);
24666 	}
24667 #endif /* _MULTI_DATAMODEL */
24668 
24669 	bzero(cdb, sizeof (cdb));
24670 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
24671 		/* Read command supported by 1st generation atapi drives */
24672 		cdb[0] = SCMD_READ_CDD4;
24673 	} else {
24674 		/* Universal CD Access Command */
24675 		cdb[0] = SCMD_READ_CD;
24676 	}
24677 
24678 	/*
24679 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
24680 	 */
24681 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
24682 
24683 	/* set the start address */
24684 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
24685 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
24686 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
24687 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
24688 
24689 	/* set the transfer length */
24690 	nblocks = mode2->cdread_buflen / 2336;
24691 	cdb[6] = (uchar_t)(nblocks >> 16);
24692 	cdb[7] = (uchar_t)(nblocks >> 8);
24693 	cdb[8] = (uchar_t)nblocks;
24694 
24695 	/* set the filter bits */
24696 	cdb[9] = CDROM_READ_CD_USERDATA;
24697 
24698 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24699 	com->uscsi_cdb = (caddr_t)cdb;
24700 	com->uscsi_cdblen = sizeof (cdb);
24701 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
24702 	com->uscsi_buflen = mode2->cdread_buflen;
24703 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24704 
24705 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
24706 	    SD_PATH_STANDARD);
24707 	kmem_free(com, sizeof (*com));
24708 	return (rval);
24709 }
24710 
24711 
24712 /*
24713  *    Function: sr_read_mode2()
24714  *
24715  * Description: This routine is the driver entry point for handling CD-ROM
24716  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
24717  *		do not support the READ CD (0xBE) command.
24718  *
24719  *   Arguments: dev	- the device 'dev_t'
24720  *		data	- pointer to user provided cd read structure specifying
24721  *			  the lba buffer address and length.
24722  *		flag	- this argument is a pass through to ddi_copyxxx()
24723  *			  directly from the mode argument of ioctl().
24724  *
24725  * Return Code: the code returned by sd_send_scsi_cmd()
24726  *		EFAULT if ddi_copyxxx() fails
24727  *		ENXIO if fail ddi_get_soft_state
24728  *		EINVAL if data pointer is NULL
24729  *		EIO if fail to reset block size
24730  *		EAGAIN if commands are in progress in the driver
24731  */
24732 
24733 static int
24734 sr_read_mode2(dev_t dev, caddr_t data, int flag)
24735 {
24736 	struct sd_lun		*un;
24737 	struct cdrom_read	mode2_struct;
24738 	struct cdrom_read	*mode2 = &mode2_struct;
24739 	int			rval;
24740 	uint32_t		restore_blksize;
24741 	struct uscsi_cmd	*com;
24742 	uchar_t			cdb[CDB_GROUP0];
24743 	int			nblocks;
24744 
24745 #ifdef _MULTI_DATAMODEL
24746 	/* To support ILP32 applications in an LP64 world */
24747 	struct cdrom_read32	cdrom_read32;
24748 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
24749 #endif /* _MULTI_DATAMODEL */
24750 
24751 	if (data == NULL) {
24752 		return (EINVAL);
24753 	}
24754 
24755 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24756 	    (un->un_state == SD_STATE_OFFLINE)) {
24757 		return (ENXIO);
24758 	}
24759 
24760 	/*
24761 	 * Because this routine will update the device and driver block size
24762 	 * being used we want to make sure there are no commands in progress.
24763 	 * If commands are in progress the user will have to try again.
24764 	 *
24765 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
24766 	 * in sdioctl to protect commands from sdioctl through to the top of
24767 	 * sd_uscsi_strategy. See sdioctl for details.
24768 	 */
24769 	mutex_enter(SD_MUTEX(un));
24770 	if (un->un_ncmds_in_driver != 1) {
24771 		mutex_exit(SD_MUTEX(un));
24772 		return (EAGAIN);
24773 	}
24774 	mutex_exit(SD_MUTEX(un));
24775 
24776 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24777 	    "sd_read_mode2: entry: un:0x%p\n", un);
24778 
24779 #ifdef _MULTI_DATAMODEL
24780 	switch (ddi_model_convert_from(flag & FMODELS)) {
24781 	case DDI_MODEL_ILP32:
24782 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
24783 			return (EFAULT);
24784 		}
24785 		/* Convert the ILP32 uscsi data from the application to LP64 */
24786 		cdrom_read32tocdrom_read(cdrd32, mode2);
24787 		break;
24788 	case DDI_MODEL_NONE:
24789 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
24790 			return (EFAULT);
24791 		}
24792 		break;
24793 	}
24794 #else /* ! _MULTI_DATAMODEL */
24795 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
24796 		return (EFAULT);
24797 	}
24798 #endif /* _MULTI_DATAMODEL */
24799 
24800 	/* Store the current target block size for restoration later */
24801 	restore_blksize = un->un_tgt_blocksize;
24802 
24803 	/* Change the device and soft state target block size to 2336 */
24804 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
24805 		rval = EIO;
24806 		goto done;
24807 	}
24808 
24809 
24810 	bzero(cdb, sizeof (cdb));
24811 
24812 	/* set READ operation */
24813 	cdb[0] = SCMD_READ;
24814 
24815 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
24816 	mode2->cdread_lba >>= 2;
24817 
24818 	/* set the start address */
24819 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
24820 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
24821 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
24822 
24823 	/* set the transfer length */
24824 	nblocks = mode2->cdread_buflen / 2336;
24825 	cdb[4] = (uchar_t)nblocks & 0xFF;
24826 
24827 	/* build command */
24828 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24829 	com->uscsi_cdb = (caddr_t)cdb;
24830 	com->uscsi_cdblen = sizeof (cdb);
24831 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
24832 	com->uscsi_buflen = mode2->cdread_buflen;
24833 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24834 
24835 	/*
24836 	 * Issue SCSI command with user space address for read buffer.
24837 	 *
24838 	 * This sends the command through main channel in the driver.
24839 	 *
24840 	 * Since this is accessed via an IOCTL call, we go through the
24841 	 * standard path, so that if the device was powered down, then
24842 	 * it would be 'awakened' to handle the command.
24843 	 */
24844 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
24845 	    SD_PATH_STANDARD);
24846 
24847 	kmem_free(com, sizeof (*com));
24848 
24849 	/* Restore the device and soft state target block size */
24850 	if (sr_sector_mode(dev, restore_blksize) != 0) {
24851 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24852 		    "can't do switch back to mode 1\n");
24853 		/*
24854 		 * If sd_send_scsi_READ succeeded we still need to report
24855 		 * an error because we failed to reset the block size
24856 		 */
24857 		if (rval == 0) {
24858 			rval = EIO;
24859 		}
24860 	}
24861 
24862 done:
24863 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24864 	    "sd_read_mode2: exit: un:0x%p\n", un);
24865 
24866 	return (rval);
24867 }
24868 
24869 
24870 /*
24871  *    Function: sr_sector_mode()
24872  *
24873  * Description: This utility function is used by sr_read_mode2 to set the target
24874  *		block size based on the user specified size. This is a legacy
24875  *		implementation based upon a vendor specific mode page
24876  *
24877  *   Arguments: dev	- the device 'dev_t'
24878  *		data	- flag indicating if block size is being set to 2336 or
24879  *			  512.
24880  *
24881  * Return Code: the code returned by sd_send_scsi_cmd()
24882  *		EFAULT if ddi_copyxxx() fails
24883  *		ENXIO if fail ddi_get_soft_state
24884  *		EINVAL if data pointer is NULL
24885  */
24886 
24887 static int
24888 sr_sector_mode(dev_t dev, uint32_t blksize)
24889 {
24890 	struct sd_lun	*un;
24891 	uchar_t		*sense;
24892 	uchar_t		*select;
24893 	int		rval;
24894 
24895 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24896 	    (un->un_state == SD_STATE_OFFLINE)) {
24897 		return (ENXIO);
24898 	}
24899 
24900 	sense = kmem_zalloc(20, KM_SLEEP);
24901 
24902 	/* Note: This is a vendor specific mode page (0x81) */
24903 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, 20, 0x81,
24904 	    SD_PATH_STANDARD)) != 0) {
24905 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
24906 		    "sr_sector_mode: Mode Sense failed\n");
24907 		kmem_free(sense, 20);
24908 		return (rval);
24909 	}
24910 	select = kmem_zalloc(20, KM_SLEEP);
24911 	select[3] = 0x08;
24912 	select[10] = ((blksize >> 8) & 0xff);
24913 	select[11] = (blksize & 0xff);
24914 	select[12] = 0x01;
24915 	select[13] = 0x06;
24916 	select[14] = sense[14];
24917 	select[15] = sense[15];
24918 	if (blksize == SD_MODE2_BLKSIZE) {
24919 		select[14] |= 0x01;
24920 	}
24921 
24922 	if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select, 20,
24923 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
24924 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
24925 		    "sr_sector_mode: Mode Select failed\n");
24926 	} else {
24927 		/*
24928 		 * Only update the softstate block size if we successfully
24929 		 * changed the device block mode.
24930 		 */
24931 		mutex_enter(SD_MUTEX(un));
24932 		sd_update_block_info(un, blksize, 0);
24933 		mutex_exit(SD_MUTEX(un));
24934 	}
24935 	kmem_free(sense, 20);
24936 	kmem_free(select, 20);
24937 	return (rval);
24938 }
24939 
24940 
24941 /*
24942  *    Function: sr_read_cdda()
24943  *
24944  * Description: This routine is the driver entry point for handling CD-ROM
24945  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
24946  *		the target supports CDDA these requests are handled via a vendor
24947  *		specific command (0xD8) If the target does not support CDDA
24948  *		these requests are handled via the READ CD command (0xBE).
24949  *
24950  *   Arguments: dev	- the device 'dev_t'
24951  *		data	- pointer to user provided CD-DA structure specifying
24952  *			  the track starting address, transfer length, and
24953  *			  subcode options.
24954  *		flag	- this argument is a pass through to ddi_copyxxx()
24955  *			  directly from the mode argument of ioctl().
24956  *
24957  * Return Code: the code returned by sd_send_scsi_cmd()
24958  *		EFAULT if ddi_copyxxx() fails
24959  *		ENXIO if fail ddi_get_soft_state
24960  *		EINVAL if invalid arguments are provided
24961  *		ENOTTY
24962  */
24963 
24964 static int
24965 sr_read_cdda(dev_t dev, caddr_t data, int flag)
24966 {
24967 	struct sd_lun			*un;
24968 	struct uscsi_cmd		*com;
24969 	struct cdrom_cdda		*cdda;
24970 	int				rval;
24971 	size_t				buflen;
24972 	char				cdb[CDB_GROUP5];
24973 
24974 #ifdef _MULTI_DATAMODEL
24975 	/* To support ILP32 applications in an LP64 world */
24976 	struct cdrom_cdda32	cdrom_cdda32;
24977 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
24978 #endif /* _MULTI_DATAMODEL */
24979 
24980 	if (data == NULL) {
24981 		return (EINVAL);
24982 	}
24983 
24984 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24985 		return (ENXIO);
24986 	}
24987 
24988 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
24989 
24990 #ifdef _MULTI_DATAMODEL
24991 	switch (ddi_model_convert_from(flag & FMODELS)) {
24992 	case DDI_MODEL_ILP32:
24993 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
24994 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24995 			    "sr_read_cdda: ddi_copyin Failed\n");
24996 			kmem_free(cdda, sizeof (struct cdrom_cdda));
24997 			return (EFAULT);
24998 		}
24999 		/* Convert the ILP32 uscsi data from the application to LP64 */
25000 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
25001 		break;
25002 	case DDI_MODEL_NONE:
25003 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
25004 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25005 			    "sr_read_cdda: ddi_copyin Failed\n");
25006 			kmem_free(cdda, sizeof (struct cdrom_cdda));
25007 			return (EFAULT);
25008 		}
25009 		break;
25010 	}
25011 #else /* ! _MULTI_DATAMODEL */
25012 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
25013 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25014 		    "sr_read_cdda: ddi_copyin Failed\n");
25015 		kmem_free(cdda, sizeof (struct cdrom_cdda));
25016 		return (EFAULT);
25017 	}
25018 #endif /* _MULTI_DATAMODEL */
25019 
25020 	/*
25021 	 * Since MMC-2 expects max 3 bytes for length, check if the
25022 	 * length input is greater than 3 bytes
25023 	 */
25024 	if ((cdda->cdda_length & 0xFF000000) != 0) {
25025 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
25026 		    "cdrom transfer length too large: %d (limit %d)\n",
25027 		    cdda->cdda_length, 0xFFFFFF);
25028 		kmem_free(cdda, sizeof (struct cdrom_cdda));
25029 		return (EINVAL);
25030 	}
25031 
25032 	switch (cdda->cdda_subcode) {
25033 	case CDROM_DA_NO_SUBCODE:
25034 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
25035 		break;
25036 	case CDROM_DA_SUBQ:
25037 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
25038 		break;
25039 	case CDROM_DA_ALL_SUBCODE:
25040 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
25041 		break;
25042 	case CDROM_DA_SUBCODE_ONLY:
25043 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
25044 		break;
25045 	default:
25046 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25047 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
25048 		    cdda->cdda_subcode);
25049 		kmem_free(cdda, sizeof (struct cdrom_cdda));
25050 		return (EINVAL);
25051 	}
25052 
25053 	/* Build and send the command */
25054 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25055 	bzero(cdb, CDB_GROUP5);
25056 
25057 	if (un->un_f_cfg_cdda == TRUE) {
25058 		cdb[0] = (char)SCMD_READ_CD;
25059 		cdb[1] = 0x04;
25060 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
25061 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
25062 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
25063 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
25064 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
25065 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
25066 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
25067 		cdb[9] = 0x10;
25068 		switch (cdda->cdda_subcode) {
25069 		case CDROM_DA_NO_SUBCODE :
25070 			cdb[10] = 0x0;
25071 			break;
25072 		case CDROM_DA_SUBQ :
25073 			cdb[10] = 0x2;
25074 			break;
25075 		case CDROM_DA_ALL_SUBCODE :
25076 			cdb[10] = 0x1;
25077 			break;
25078 		case CDROM_DA_SUBCODE_ONLY :
25079 			/* FALLTHROUGH */
25080 		default :
25081 			kmem_free(cdda, sizeof (struct cdrom_cdda));
25082 			kmem_free(com, sizeof (*com));
25083 			return (ENOTTY);
25084 		}
25085 	} else {
25086 		cdb[0] = (char)SCMD_READ_CDDA;
25087 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
25088 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
25089 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
25090 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
25091 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
25092 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
25093 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
25094 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
25095 		cdb[10] = cdda->cdda_subcode;
25096 	}
25097 
25098 	com->uscsi_cdb = cdb;
25099 	com->uscsi_cdblen = CDB_GROUP5;
25100 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
25101 	com->uscsi_buflen = buflen;
25102 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
25103 
25104 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
25105 	    SD_PATH_STANDARD);
25106 
25107 	kmem_free(cdda, sizeof (struct cdrom_cdda));
25108 	kmem_free(com, sizeof (*com));
25109 	return (rval);
25110 }
25111 
25112 
25113 /*
25114  *    Function: sr_read_cdxa()
25115  *
25116  * Description: This routine is the driver entry point for handling CD-ROM
25117  *		ioctl requests to return CD-XA (Extended Architecture) data.
25118  *		(CDROMCDXA).
25119  *
25120  *   Arguments: dev	- the device 'dev_t'
25121  *		data	- pointer to user provided CD-XA structure specifying
25122  *			  the data starting address, transfer length, and format
25123  *		flag	- this argument is a pass through to ddi_copyxxx()
25124  *			  directly from the mode argument of ioctl().
25125  *
25126  * Return Code: the code returned by sd_send_scsi_cmd()
25127  *		EFAULT if ddi_copyxxx() fails
25128  *		ENXIO if fail ddi_get_soft_state
25129  *		EINVAL if data pointer is NULL
25130  */
25131 
25132 static int
25133 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
25134 {
25135 	struct sd_lun		*un;
25136 	struct uscsi_cmd	*com;
25137 	struct cdrom_cdxa	*cdxa;
25138 	int			rval;
25139 	size_t			buflen;
25140 	char			cdb[CDB_GROUP5];
25141 	uchar_t			read_flags;
25142 
25143 #ifdef _MULTI_DATAMODEL
25144 	/* To support ILP32 applications in an LP64 world */
25145 	struct cdrom_cdxa32		cdrom_cdxa32;
25146 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
25147 #endif /* _MULTI_DATAMODEL */
25148 
25149 	if (data == NULL) {
25150 		return (EINVAL);
25151 	}
25152 
25153 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25154 		return (ENXIO);
25155 	}
25156 
25157 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
25158 
25159 #ifdef _MULTI_DATAMODEL
25160 	switch (ddi_model_convert_from(flag & FMODELS)) {
25161 	case DDI_MODEL_ILP32:
25162 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
25163 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25164 			return (EFAULT);
25165 		}
25166 		/*
25167 		 * Convert the ILP32 uscsi data from the
25168 		 * application to LP64 for internal use.
25169 		 */
25170 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
25171 		break;
25172 	case DDI_MODEL_NONE:
25173 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
25174 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25175 			return (EFAULT);
25176 		}
25177 		break;
25178 	}
25179 #else /* ! _MULTI_DATAMODEL */
25180 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
25181 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25182 		return (EFAULT);
25183 	}
25184 #endif /* _MULTI_DATAMODEL */
25185 
25186 	/*
25187 	 * Since MMC-2 expects max 3 bytes for length, check if the
25188 	 * length input is greater than 3 bytes
25189 	 */
25190 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
25191 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
25192 		    "cdrom transfer length too large: %d (limit %d)\n",
25193 		    cdxa->cdxa_length, 0xFFFFFF);
25194 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25195 		return (EINVAL);
25196 	}
25197 
25198 	switch (cdxa->cdxa_format) {
25199 	case CDROM_XA_DATA:
25200 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
25201 		read_flags = 0x10;
25202 		break;
25203 	case CDROM_XA_SECTOR_DATA:
25204 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
25205 		read_flags = 0xf8;
25206 		break;
25207 	case CDROM_XA_DATA_W_ERROR:
25208 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
25209 		read_flags = 0xfc;
25210 		break;
25211 	default:
25212 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25213 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
25214 		    cdxa->cdxa_format);
25215 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25216 		return (EINVAL);
25217 	}
25218 
25219 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25220 	bzero(cdb, CDB_GROUP5);
25221 	if (un->un_f_mmc_cap == TRUE) {
25222 		cdb[0] = (char)SCMD_READ_CD;
25223 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
25224 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
25225 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
25226 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
25227 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
25228 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
25229 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
25230 		cdb[9] = (char)read_flags;
25231 	} else {
25232 		/*
25233 		 * Note: A vendor specific command (0xDB) is being used her to
25234 		 * request a read of all subcodes.
25235 		 */
25236 		cdb[0] = (char)SCMD_READ_CDXA;
25237 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
25238 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
25239 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
25240 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
25241 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
25242 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
25243 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
25244 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
25245 		cdb[10] = cdxa->cdxa_format;
25246 	}
25247 	com->uscsi_cdb	   = cdb;
25248 	com->uscsi_cdblen  = CDB_GROUP5;
25249 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
25250 	com->uscsi_buflen  = buflen;
25251 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
25252 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
25253 	    SD_PATH_STANDARD);
25254 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25255 	kmem_free(com, sizeof (*com));
25256 	return (rval);
25257 }
25258 
25259 
25260 /*
25261  *    Function: sr_eject()
25262  *
25263  * Description: This routine is the driver entry point for handling CD-ROM
25264  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
25265  *
25266  *   Arguments: dev	- the device 'dev_t'
25267  *
25268  * Return Code: the code returned by sd_send_scsi_cmd()
25269  */
25270 
25271 static int
25272 sr_eject(dev_t dev)
25273 {
25274 	struct sd_lun	*un;
25275 	int		rval;
25276 
25277 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25278 	    (un->un_state == SD_STATE_OFFLINE)) {
25279 		return (ENXIO);
25280 	}
25281 
25282 	/*
25283 	 * To prevent race conditions with the eject
25284 	 * command, keep track of an eject command as
25285 	 * it progresses. If we are already handling
25286 	 * an eject command in the driver for the given
25287 	 * unit and another request to eject is received
25288 	 * immediately return EAGAIN so we don't lose
25289 	 * the command if the current eject command fails.
25290 	 */
25291 	mutex_enter(SD_MUTEX(un));
25292 	if (un->un_f_ejecting == TRUE) {
25293 		mutex_exit(SD_MUTEX(un));
25294 		return (EAGAIN);
25295 	}
25296 	un->un_f_ejecting = TRUE;
25297 	mutex_exit(SD_MUTEX(un));
25298 
25299 	if ((rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
25300 	    SD_PATH_STANDARD)) != 0) {
25301 		mutex_enter(SD_MUTEX(un));
25302 		un->un_f_ejecting = FALSE;
25303 		mutex_exit(SD_MUTEX(un));
25304 		return (rval);
25305 	}
25306 
25307 	rval = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_EJECT,
25308 	    SD_PATH_STANDARD);
25309 
25310 	if (rval == 0) {
25311 		mutex_enter(SD_MUTEX(un));
25312 		sr_ejected(un);
25313 		un->un_mediastate = DKIO_EJECTED;
25314 		un->un_f_ejecting = FALSE;
25315 		cv_broadcast(&un->un_state_cv);
25316 		mutex_exit(SD_MUTEX(un));
25317 	} else {
25318 		mutex_enter(SD_MUTEX(un));
25319 		un->un_f_ejecting = FALSE;
25320 		mutex_exit(SD_MUTEX(un));
25321 	}
25322 	return (rval);
25323 }
25324 
25325 
25326 /*
25327  *    Function: sr_ejected()
25328  *
25329  * Description: This routine updates the soft state structure to invalidate the
25330  *		geometry information after the media has been ejected or a
25331  *		media eject has been detected.
25332  *
25333  *   Arguments: un - driver soft state (unit) structure
25334  */
25335 
25336 static void
25337 sr_ejected(struct sd_lun *un)
25338 {
25339 	struct sd_errstats *stp;
25340 
25341 	ASSERT(un != NULL);
25342 	ASSERT(mutex_owned(SD_MUTEX(un)));
25343 
25344 	un->un_f_blockcount_is_valid	= FALSE;
25345 	un->un_f_tgt_blocksize_is_valid	= FALSE;
25346 	mutex_exit(SD_MUTEX(un));
25347 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
25348 	mutex_enter(SD_MUTEX(un));
25349 
25350 	if (un->un_errstats != NULL) {
25351 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
25352 		stp->sd_capacity.value.ui64 = 0;
25353 	}
25354 }
25355 
25356 
25357 /*
25358  *    Function: sr_check_wp()
25359  *
25360  * Description: This routine checks the write protection of a removable
25361  *      media disk and hotpluggable devices via the write protect bit of
25362  *      the Mode Page Header device specific field. Some devices choke
25363  *      on unsupported mode page. In order to workaround this issue,
25364  *      this routine has been implemented to use 0x3f mode page(request
25365  *      for all pages) for all device types.
25366  *
25367  *   Arguments: dev		- the device 'dev_t'
25368  *
25369  * Return Code: int indicating if the device is write protected (1) or not (0)
25370  *
25371  *     Context: Kernel thread.
25372  *
25373  */
25374 
25375 static int
25376 sr_check_wp(dev_t dev)
25377 {
25378 	struct sd_lun	*un;
25379 	uchar_t		device_specific;
25380 	uchar_t		*sense;
25381 	int		hdrlen;
25382 	int		rval = FALSE;
25383 
25384 	/*
25385 	 * Note: The return codes for this routine should be reworked to
25386 	 * properly handle the case of a NULL softstate.
25387 	 */
25388 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25389 		return (FALSE);
25390 	}
25391 
25392 	if (un->un_f_cfg_is_atapi == TRUE) {
25393 		/*
25394 		 * The mode page contents are not required; set the allocation
25395 		 * length for the mode page header only
25396 		 */
25397 		hdrlen = MODE_HEADER_LENGTH_GRP2;
25398 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
25399 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense, hdrlen,
25400 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
25401 			goto err_exit;
25402 		device_specific =
25403 		    ((struct mode_header_grp2 *)sense)->device_specific;
25404 	} else {
25405 		hdrlen = MODE_HEADER_LENGTH;
25406 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
25407 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, hdrlen,
25408 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
25409 			goto err_exit;
25410 		device_specific =
25411 		    ((struct mode_header *)sense)->device_specific;
25412 	}
25413 
25414 	/*
25415 	 * Write protect mode sense failed; not all disks
25416 	 * understand this query. Return FALSE assuming that
25417 	 * these devices are not writable.
25418 	 */
25419 	if (device_specific & WRITE_PROTECT) {
25420 		rval = TRUE;
25421 	}
25422 
25423 err_exit:
25424 	kmem_free(sense, hdrlen);
25425 	return (rval);
25426 }
25427 
25428 /*
25429  *    Function: sr_volume_ctrl()
25430  *
25431  * Description: This routine is the driver entry point for handling CD-ROM
25432  *		audio output volume ioctl requests. (CDROMVOLCTRL)
25433  *
25434  *   Arguments: dev	- the device 'dev_t'
25435  *		data	- pointer to user audio volume control structure
25436  *		flag	- this argument is a pass through to ddi_copyxxx()
25437  *			  directly from the mode argument of ioctl().
25438  *
25439  * Return Code: the code returned by sd_send_scsi_cmd()
25440  *		EFAULT if ddi_copyxxx() fails
25441  *		ENXIO if fail ddi_get_soft_state
25442  *		EINVAL if data pointer is NULL
25443  *
25444  */
25445 
25446 static int
25447 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
25448 {
25449 	struct sd_lun		*un;
25450 	struct cdrom_volctrl    volume;
25451 	struct cdrom_volctrl    *vol = &volume;
25452 	uchar_t			*sense_page;
25453 	uchar_t			*select_page;
25454 	uchar_t			*sense;
25455 	uchar_t			*select;
25456 	int			sense_buflen;
25457 	int			select_buflen;
25458 	int			rval;
25459 
25460 	if (data == NULL) {
25461 		return (EINVAL);
25462 	}
25463 
25464 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25465 	    (un->un_state == SD_STATE_OFFLINE)) {
25466 		return (ENXIO);
25467 	}
25468 
25469 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
25470 		return (EFAULT);
25471 	}
25472 
25473 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
25474 		struct mode_header_grp2		*sense_mhp;
25475 		struct mode_header_grp2		*select_mhp;
25476 		int				bd_len;
25477 
25478 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
25479 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
25480 		    MODEPAGE_AUDIO_CTRL_LEN;
25481 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
25482 		select = kmem_zalloc(select_buflen, KM_SLEEP);
25483 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
25484 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
25485 		    SD_PATH_STANDARD)) != 0) {
25486 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
25487 			    "sr_volume_ctrl: Mode Sense Failed\n");
25488 			kmem_free(sense, sense_buflen);
25489 			kmem_free(select, select_buflen);
25490 			return (rval);
25491 		}
25492 		sense_mhp = (struct mode_header_grp2 *)sense;
25493 		select_mhp = (struct mode_header_grp2 *)select;
25494 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
25495 		    sense_mhp->bdesc_length_lo;
25496 		if (bd_len > MODE_BLK_DESC_LENGTH) {
25497 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25498 			    "sr_volume_ctrl: Mode Sense returned invalid "
25499 			    "block descriptor length\n");
25500 			kmem_free(sense, sense_buflen);
25501 			kmem_free(select, select_buflen);
25502 			return (EIO);
25503 		}
25504 		sense_page = (uchar_t *)
25505 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
25506 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
25507 		select_mhp->length_msb = 0;
25508 		select_mhp->length_lsb = 0;
25509 		select_mhp->bdesc_length_hi = 0;
25510 		select_mhp->bdesc_length_lo = 0;
25511 	} else {
25512 		struct mode_header		*sense_mhp, *select_mhp;
25513 
25514 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
25515 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
25516 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
25517 		select = kmem_zalloc(select_buflen, KM_SLEEP);
25518 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
25519 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
25520 		    SD_PATH_STANDARD)) != 0) {
25521 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25522 			    "sr_volume_ctrl: Mode Sense Failed\n");
25523 			kmem_free(sense, sense_buflen);
25524 			kmem_free(select, select_buflen);
25525 			return (rval);
25526 		}
25527 		sense_mhp  = (struct mode_header *)sense;
25528 		select_mhp = (struct mode_header *)select;
25529 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
25530 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25531 			    "sr_volume_ctrl: Mode Sense returned invalid "
25532 			    "block descriptor length\n");
25533 			kmem_free(sense, sense_buflen);
25534 			kmem_free(select, select_buflen);
25535 			return (EIO);
25536 		}
25537 		sense_page = (uchar_t *)
25538 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
25539 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
25540 		select_mhp->length = 0;
25541 		select_mhp->bdesc_length = 0;
25542 	}
25543 	/*
25544 	 * Note: An audio control data structure could be created and overlayed
25545 	 * on the following in place of the array indexing method implemented.
25546 	 */
25547 
25548 	/* Build the select data for the user volume data */
25549 	select_page[0] = MODEPAGE_AUDIO_CTRL;
25550 	select_page[1] = 0xE;
25551 	/* Set the immediate bit */
25552 	select_page[2] = 0x04;
25553 	/* Zero out reserved fields */
25554 	select_page[3] = 0x00;
25555 	select_page[4] = 0x00;
25556 	/* Return sense data for fields not to be modified */
25557 	select_page[5] = sense_page[5];
25558 	select_page[6] = sense_page[6];
25559 	select_page[7] = sense_page[7];
25560 	/* Set the user specified volume levels for channel 0 and 1 */
25561 	select_page[8] = 0x01;
25562 	select_page[9] = vol->channel0;
25563 	select_page[10] = 0x02;
25564 	select_page[11] = vol->channel1;
25565 	/* Channel 2 and 3 are currently unsupported so return the sense data */
25566 	select_page[12] = sense_page[12];
25567 	select_page[13] = sense_page[13];
25568 	select_page[14] = sense_page[14];
25569 	select_page[15] = sense_page[15];
25570 
25571 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
25572 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, select,
25573 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
25574 	} else {
25575 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
25576 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
25577 	}
25578 
25579 	kmem_free(sense, sense_buflen);
25580 	kmem_free(select, select_buflen);
25581 	return (rval);
25582 }
25583 
25584 
25585 /*
25586  *    Function: sr_read_sony_session_offset()
25587  *
25588  * Description: This routine is the driver entry point for handling CD-ROM
25589  *		ioctl requests for session offset information. (CDROMREADOFFSET)
25590  *		The address of the first track in the last session of a
25591  *		multi-session CD-ROM is returned
25592  *
25593  *		Note: This routine uses a vendor specific key value in the
25594  *		command control field without implementing any vendor check here
25595  *		or in the ioctl routine.
25596  *
25597  *   Arguments: dev	- the device 'dev_t'
25598  *		data	- pointer to an int to hold the requested address
25599  *		flag	- this argument is a pass through to ddi_copyxxx()
25600  *			  directly from the mode argument of ioctl().
25601  *
25602  * Return Code: the code returned by sd_send_scsi_cmd()
25603  *		EFAULT if ddi_copyxxx() fails
25604  *		ENXIO if fail ddi_get_soft_state
25605  *		EINVAL if data pointer is NULL
25606  */
25607 
25608 static int
25609 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
25610 {
25611 	struct sd_lun		*un;
25612 	struct uscsi_cmd	*com;
25613 	caddr_t			buffer;
25614 	char			cdb[CDB_GROUP1];
25615 	int			session_offset = 0;
25616 	int			rval;
25617 
25618 	if (data == NULL) {
25619 		return (EINVAL);
25620 	}
25621 
25622 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25623 	    (un->un_state == SD_STATE_OFFLINE)) {
25624 		return (ENXIO);
25625 	}
25626 
25627 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
25628 	bzero(cdb, CDB_GROUP1);
25629 	cdb[0] = SCMD_READ_TOC;
25630 	/*
25631 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
25632 	 * (4 byte TOC response header + 8 byte response data)
25633 	 */
25634 	cdb[8] = SONY_SESSION_OFFSET_LEN;
25635 	/* Byte 9 is the control byte. A vendor specific value is used */
25636 	cdb[9] = SONY_SESSION_OFFSET_KEY;
25637 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25638 	com->uscsi_cdb = cdb;
25639 	com->uscsi_cdblen = CDB_GROUP1;
25640 	com->uscsi_bufaddr = buffer;
25641 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
25642 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
25643 
25644 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
25645 	    SD_PATH_STANDARD);
25646 	if (rval != 0) {
25647 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
25648 		kmem_free(com, sizeof (*com));
25649 		return (rval);
25650 	}
25651 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
25652 		session_offset =
25653 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
25654 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
25655 		/*
25656 		 * Offset returned offset in current lbasize block's. Convert to
25657 		 * 2k block's to return to the user
25658 		 */
25659 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
25660 			session_offset >>= 2;
25661 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
25662 			session_offset >>= 1;
25663 		}
25664 	}
25665 
25666 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
25667 		rval = EFAULT;
25668 	}
25669 
25670 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
25671 	kmem_free(com, sizeof (*com));
25672 	return (rval);
25673 }
25674 
25675 
25676 /*
25677  *    Function: sd_wm_cache_constructor()
25678  *
25679  * Description: Cache Constructor for the wmap cache for the read/modify/write
25680  * 		devices.
25681  *
25682  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
25683  *		un	- sd_lun structure for the device.
25684  *		flag	- the km flags passed to constructor
25685  *
25686  * Return Code: 0 on success.
25687  *		-1 on failure.
25688  */
25689 
25690 /*ARGSUSED*/
25691 static int
25692 sd_wm_cache_constructor(void *wm, void *un, int flags)
25693 {
25694 	bzero(wm, sizeof (struct sd_w_map));
25695 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
25696 	return (0);
25697 }
25698 
25699 
25700 /*
25701  *    Function: sd_wm_cache_destructor()
25702  *
25703  * Description: Cache destructor for the wmap cache for the read/modify/write
25704  * 		devices.
25705  *
25706  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
25707  *		un	- sd_lun structure for the device.
25708  */
25709 /*ARGSUSED*/
25710 static void
25711 sd_wm_cache_destructor(void *wm, void *un)
25712 {
25713 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
25714 }
25715 
25716 
25717 /*
25718  *    Function: sd_range_lock()
25719  *
25720  * Description: Lock the range of blocks specified as parameter to ensure
25721  *		that read, modify write is atomic and no other i/o writes
25722  *		to the same location. The range is specified in terms
25723  *		of start and end blocks. Block numbers are the actual
25724  *		media block numbers and not system.
25725  *
25726  *   Arguments: un	- sd_lun structure for the device.
25727  *		startb - The starting block number
25728  *		endb - The end block number
25729  *		typ - type of i/o - simple/read_modify_write
25730  *
25731  * Return Code: wm  - pointer to the wmap structure.
25732  *
25733  *     Context: This routine can sleep.
25734  */
25735 
25736 static struct sd_w_map *
25737 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
25738 {
25739 	struct sd_w_map *wmp = NULL;
25740 	struct sd_w_map *sl_wmp = NULL;
25741 	struct sd_w_map *tmp_wmp;
25742 	wm_state state = SD_WM_CHK_LIST;
25743 
25744 
25745 	ASSERT(un != NULL);
25746 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25747 
25748 	mutex_enter(SD_MUTEX(un));
25749 
25750 	while (state != SD_WM_DONE) {
25751 
25752 		switch (state) {
25753 		case SD_WM_CHK_LIST:
25754 			/*
25755 			 * This is the starting state. Check the wmap list
25756 			 * to see if the range is currently available.
25757 			 */
25758 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
25759 				/*
25760 				 * If this is a simple write and no rmw
25761 				 * i/o is pending then try to lock the
25762 				 * range as the range should be available.
25763 				 */
25764 				state = SD_WM_LOCK_RANGE;
25765 			} else {
25766 				tmp_wmp = sd_get_range(un, startb, endb);
25767 				if (tmp_wmp != NULL) {
25768 					if ((wmp != NULL) && ONLIST(un, wmp)) {
25769 						/*
25770 						 * Should not keep onlist wmps
25771 						 * while waiting this macro
25772 						 * will also do wmp = NULL;
25773 						 */
25774 						FREE_ONLIST_WMAP(un, wmp);
25775 					}
25776 					/*
25777 					 * sl_wmp is the wmap on which wait
25778 					 * is done, since the tmp_wmp points
25779 					 * to the inuse wmap, set sl_wmp to
25780 					 * tmp_wmp and change the state to sleep
25781 					 */
25782 					sl_wmp = tmp_wmp;
25783 					state = SD_WM_WAIT_MAP;
25784 				} else {
25785 					state = SD_WM_LOCK_RANGE;
25786 				}
25787 
25788 			}
25789 			break;
25790 
25791 		case SD_WM_LOCK_RANGE:
25792 			ASSERT(un->un_wm_cache);
25793 			/*
25794 			 * The range need to be locked, try to get a wmap.
25795 			 * First attempt it with NO_SLEEP, want to avoid a sleep
25796 			 * if possible as we will have to release the sd mutex
25797 			 * if we have to sleep.
25798 			 */
25799 			if (wmp == NULL)
25800 				wmp = kmem_cache_alloc(un->un_wm_cache,
25801 				    KM_NOSLEEP);
25802 			if (wmp == NULL) {
25803 				mutex_exit(SD_MUTEX(un));
25804 				_NOTE(DATA_READABLE_WITHOUT_LOCK
25805 				    (sd_lun::un_wm_cache))
25806 				wmp = kmem_cache_alloc(un->un_wm_cache,
25807 				    KM_SLEEP);
25808 				mutex_enter(SD_MUTEX(un));
25809 				/*
25810 				 * we released the mutex so recheck and go to
25811 				 * check list state.
25812 				 */
25813 				state = SD_WM_CHK_LIST;
25814 			} else {
25815 				/*
25816 				 * We exit out of state machine since we
25817 				 * have the wmap. Do the housekeeping first.
25818 				 * place the wmap on the wmap list if it is not
25819 				 * on it already and then set the state to done.
25820 				 */
25821 				wmp->wm_start = startb;
25822 				wmp->wm_end = endb;
25823 				wmp->wm_flags = typ | SD_WM_BUSY;
25824 				if (typ & SD_WTYPE_RMW) {
25825 					un->un_rmw_count++;
25826 				}
25827 				/*
25828 				 * If not already on the list then link
25829 				 */
25830 				if (!ONLIST(un, wmp)) {
25831 					wmp->wm_next = un->un_wm;
25832 					wmp->wm_prev = NULL;
25833 					if (wmp->wm_next)
25834 						wmp->wm_next->wm_prev = wmp;
25835 					un->un_wm = wmp;
25836 				}
25837 				state = SD_WM_DONE;
25838 			}
25839 			break;
25840 
25841 		case SD_WM_WAIT_MAP:
25842 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
25843 			/*
25844 			 * Wait is done on sl_wmp, which is set in the
25845 			 * check_list state.
25846 			 */
25847 			sl_wmp->wm_wanted_count++;
25848 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
25849 			sl_wmp->wm_wanted_count--;
25850 			/*
25851 			 * We can reuse the memory from the completed sl_wmp
25852 			 * lock range for our new lock, but only if noone is
25853 			 * waiting for it.
25854 			 */
25855 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
25856 			if (sl_wmp->wm_wanted_count == 0) {
25857 				if (wmp != NULL)
25858 					CHK_N_FREEWMP(un, wmp);
25859 				wmp = sl_wmp;
25860 			}
25861 			sl_wmp = NULL;
25862 			/*
25863 			 * After waking up, need to recheck for availability of
25864 			 * range.
25865 			 */
25866 			state = SD_WM_CHK_LIST;
25867 			break;
25868 
25869 		default:
25870 			panic("sd_range_lock: "
25871 			    "Unknown state %d in sd_range_lock", state);
25872 			/*NOTREACHED*/
25873 		} /* switch(state) */
25874 
25875 	} /* while(state != SD_WM_DONE) */
25876 
25877 	mutex_exit(SD_MUTEX(un));
25878 
25879 	ASSERT(wmp != NULL);
25880 
25881 	return (wmp);
25882 }
25883 
25884 
25885 /*
25886  *    Function: sd_get_range()
25887  *
25888  * Description: Find if there any overlapping I/O to this one
25889  *		Returns the write-map of 1st such I/O, NULL otherwise.
25890  *
25891  *   Arguments: un	- sd_lun structure for the device.
25892  *		startb - The starting block number
25893  *		endb - The end block number
25894  *
25895  * Return Code: wm  - pointer to the wmap structure.
25896  */
25897 
25898 static struct sd_w_map *
25899 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
25900 {
25901 	struct sd_w_map *wmp;
25902 
25903 	ASSERT(un != NULL);
25904 
25905 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
25906 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
25907 			continue;
25908 		}
25909 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
25910 			break;
25911 		}
25912 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
25913 			break;
25914 		}
25915 	}
25916 
25917 	return (wmp);
25918 }
25919 
25920 
25921 /*
25922  *    Function: sd_free_inlist_wmap()
25923  *
25924  * Description: Unlink and free a write map struct.
25925  *
25926  *   Arguments: un      - sd_lun structure for the device.
25927  *		wmp	- sd_w_map which needs to be unlinked.
25928  */
25929 
25930 static void
25931 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
25932 {
25933 	ASSERT(un != NULL);
25934 
25935 	if (un->un_wm == wmp) {
25936 		un->un_wm = wmp->wm_next;
25937 	} else {
25938 		wmp->wm_prev->wm_next = wmp->wm_next;
25939 	}
25940 
25941 	if (wmp->wm_next) {
25942 		wmp->wm_next->wm_prev = wmp->wm_prev;
25943 	}
25944 
25945 	wmp->wm_next = wmp->wm_prev = NULL;
25946 
25947 	kmem_cache_free(un->un_wm_cache, wmp);
25948 }
25949 
25950 
25951 /*
25952  *    Function: sd_range_unlock()
25953  *
25954  * Description: Unlock the range locked by wm.
25955  *		Free write map if nobody else is waiting on it.
25956  *
25957  *   Arguments: un      - sd_lun structure for the device.
25958  *              wmp     - sd_w_map which needs to be unlinked.
25959  */
25960 
25961 static void
25962 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
25963 {
25964 	ASSERT(un != NULL);
25965 	ASSERT(wm != NULL);
25966 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25967 
25968 	mutex_enter(SD_MUTEX(un));
25969 
25970 	if (wm->wm_flags & SD_WTYPE_RMW) {
25971 		un->un_rmw_count--;
25972 	}
25973 
25974 	if (wm->wm_wanted_count) {
25975 		wm->wm_flags = 0;
25976 		/*
25977 		 * Broadcast that the wmap is available now.
25978 		 */
25979 		cv_broadcast(&wm->wm_avail);
25980 	} else {
25981 		/*
25982 		 * If no one is waiting on the map, it should be free'ed.
25983 		 */
25984 		sd_free_inlist_wmap(un, wm);
25985 	}
25986 
25987 	mutex_exit(SD_MUTEX(un));
25988 }
25989 
25990 
25991 /*
25992  *    Function: sd_read_modify_write_task
25993  *
25994  * Description: Called from a taskq thread to initiate the write phase of
25995  *		a read-modify-write request.  This is used for targets where
25996  *		un->un_sys_blocksize != un->un_tgt_blocksize.
25997  *
25998  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
25999  *
26000  *     Context: Called under taskq thread context.
26001  */
26002 
26003 static void
26004 sd_read_modify_write_task(void *arg)
26005 {
26006 	struct sd_mapblocksize_info	*bsp;
26007 	struct buf	*bp;
26008 	struct sd_xbuf	*xp;
26009 	struct sd_lun	*un;
26010 
26011 	bp = arg;	/* The bp is given in arg */
26012 	ASSERT(bp != NULL);
26013 
26014 	/* Get the pointer to the layer-private data struct */
26015 	xp = SD_GET_XBUF(bp);
26016 	ASSERT(xp != NULL);
26017 	bsp = xp->xb_private;
26018 	ASSERT(bsp != NULL);
26019 
26020 	un = SD_GET_UN(bp);
26021 	ASSERT(un != NULL);
26022 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26023 
26024 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
26025 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
26026 
26027 	/*
26028 	 * This is the write phase of a read-modify-write request, called
26029 	 * under the context of a taskq thread in response to the completion
26030 	 * of the read portion of the rmw request completing under interrupt
26031 	 * context. The write request must be sent from here down the iostart
26032 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
26033 	 * we use the layer index saved in the layer-private data area.
26034 	 */
26035 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
26036 
26037 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
26038 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
26039 }
26040 
26041 
26042 /*
26043  *    Function: sddump_do_read_of_rmw()
26044  *
26045  * Description: This routine will be called from sddump, If sddump is called
26046  *		with an I/O which not aligned on device blocksize boundary
26047  *		then the write has to be converted to read-modify-write.
26048  *		Do the read part here in order to keep sddump simple.
26049  *		Note - That the sd_mutex is held across the call to this
26050  *		routine.
26051  *
26052  *   Arguments: un	- sd_lun
26053  *		blkno	- block number in terms of media block size.
26054  *		nblk	- number of blocks.
26055  *		bpp	- pointer to pointer to the buf structure. On return
26056  *			from this function, *bpp points to the valid buffer
26057  *			to which the write has to be done.
26058  *
26059  * Return Code: 0 for success or errno-type return code
26060  */
26061 
26062 static int
26063 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
26064 	struct buf **bpp)
26065 {
26066 	int err;
26067 	int i;
26068 	int rval;
26069 	struct buf *bp;
26070 	struct scsi_pkt *pkt = NULL;
26071 	uint32_t target_blocksize;
26072 
26073 	ASSERT(un != NULL);
26074 	ASSERT(mutex_owned(SD_MUTEX(un)));
26075 
26076 	target_blocksize = un->un_tgt_blocksize;
26077 
26078 	mutex_exit(SD_MUTEX(un));
26079 
26080 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
26081 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
26082 	if (bp == NULL) {
26083 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26084 		    "no resources for dumping; giving up");
26085 		err = ENOMEM;
26086 		goto done;
26087 	}
26088 
26089 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
26090 	    blkno, nblk);
26091 	if (rval != 0) {
26092 		scsi_free_consistent_buf(bp);
26093 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26094 		    "no resources for dumping; giving up");
26095 		err = ENOMEM;
26096 		goto done;
26097 	}
26098 
26099 	pkt->pkt_flags |= FLAG_NOINTR;
26100 
26101 	err = EIO;
26102 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26103 
26104 		/*
26105 		 * Scsi_poll returns 0 (success) if the command completes and
26106 		 * the status block is STATUS_GOOD.  We should only check
26107 		 * errors if this condition is not true.  Even then we should
26108 		 * send our own request sense packet only if we have a check
26109 		 * condition and auto request sense has not been performed by
26110 		 * the hba.
26111 		 */
26112 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
26113 
26114 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
26115 			err = 0;
26116 			break;
26117 		}
26118 
26119 		/*
26120 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
26121 		 * no need to read RQS data.
26122 		 */
26123 		if (pkt->pkt_reason == CMD_DEV_GONE) {
26124 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26125 			    "Device is gone\n");
26126 			break;
26127 		}
26128 
26129 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
26130 			SD_INFO(SD_LOG_DUMP, un,
26131 			    "sddump: read failed with CHECK, try # %d\n", i);
26132 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
26133 				(void) sd_send_polled_RQS(un);
26134 			}
26135 
26136 			continue;
26137 		}
26138 
26139 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
26140 			int reset_retval = 0;
26141 
26142 			SD_INFO(SD_LOG_DUMP, un,
26143 			    "sddump: read failed with BUSY, try # %d\n", i);
26144 
26145 			if (un->un_f_lun_reset_enabled == TRUE) {
26146 				reset_retval = scsi_reset(SD_ADDRESS(un),
26147 				    RESET_LUN);
26148 			}
26149 			if (reset_retval == 0) {
26150 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26151 			}
26152 			(void) sd_send_polled_RQS(un);
26153 
26154 		} else {
26155 			SD_INFO(SD_LOG_DUMP, un,
26156 			    "sddump: read failed with 0x%x, try # %d\n",
26157 			    SD_GET_PKT_STATUS(pkt), i);
26158 			mutex_enter(SD_MUTEX(un));
26159 			sd_reset_target(un, pkt);
26160 			mutex_exit(SD_MUTEX(un));
26161 		}
26162 
26163 		/*
26164 		 * If we are not getting anywhere with lun/target resets,
26165 		 * let's reset the bus.
26166 		 */
26167 		if (i > SD_NDUMP_RETRIES/2) {
26168 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26169 			(void) sd_send_polled_RQS(un);
26170 		}
26171 
26172 	}
26173 	scsi_destroy_pkt(pkt);
26174 
26175 	if (err != 0) {
26176 		scsi_free_consistent_buf(bp);
26177 		*bpp = NULL;
26178 	} else {
26179 		*bpp = bp;
26180 	}
26181 
26182 done:
26183 	mutex_enter(SD_MUTEX(un));
26184 	return (err);
26185 }
26186 
26187 
26188 /*
26189  *    Function: sd_failfast_flushq
26190  *
26191  * Description: Take all bp's on the wait queue that have B_FAILFAST set
26192  *		in b_flags and move them onto the failfast queue, then kick
26193  *		off a thread to return all bp's on the failfast queue to
26194  *		their owners with an error set.
26195  *
26196  *   Arguments: un - pointer to the soft state struct for the instance.
26197  *
26198  *     Context: may execute in interrupt context.
26199  */
26200 
26201 static void
26202 sd_failfast_flushq(struct sd_lun *un)
26203 {
26204 	struct buf *bp;
26205 	struct buf *next_waitq_bp;
26206 	struct buf *prev_waitq_bp = NULL;
26207 
26208 	ASSERT(un != NULL);
26209 	ASSERT(mutex_owned(SD_MUTEX(un)));
26210 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
26211 	ASSERT(un->un_failfast_bp == NULL);
26212 
26213 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
26214 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
26215 
26216 	/*
26217 	 * Check if we should flush all bufs when entering failfast state, or
26218 	 * just those with B_FAILFAST set.
26219 	 */
26220 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
26221 		/*
26222 		 * Move *all* bp's on the wait queue to the failfast flush
26223 		 * queue, including those that do NOT have B_FAILFAST set.
26224 		 */
26225 		if (un->un_failfast_headp == NULL) {
26226 			ASSERT(un->un_failfast_tailp == NULL);
26227 			un->un_failfast_headp = un->un_waitq_headp;
26228 		} else {
26229 			ASSERT(un->un_failfast_tailp != NULL);
26230 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
26231 		}
26232 
26233 		un->un_failfast_tailp = un->un_waitq_tailp;
26234 
26235 		/* update kstat for each bp moved out of the waitq */
26236 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
26237 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
26238 		}
26239 
26240 		/* empty the waitq */
26241 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
26242 
26243 	} else {
26244 		/*
26245 		 * Go thru the wait queue, pick off all entries with
26246 		 * B_FAILFAST set, and move these onto the failfast queue.
26247 		 */
26248 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
26249 			/*
26250 			 * Save the pointer to the next bp on the wait queue,
26251 			 * so we get to it on the next iteration of this loop.
26252 			 */
26253 			next_waitq_bp = bp->av_forw;
26254 
26255 			/*
26256 			 * If this bp from the wait queue does NOT have
26257 			 * B_FAILFAST set, just move on to the next element
26258 			 * in the wait queue. Note, this is the only place
26259 			 * where it is correct to set prev_waitq_bp.
26260 			 */
26261 			if ((bp->b_flags & B_FAILFAST) == 0) {
26262 				prev_waitq_bp = bp;
26263 				continue;
26264 			}
26265 
26266 			/*
26267 			 * Remove the bp from the wait queue.
26268 			 */
26269 			if (bp == un->un_waitq_headp) {
26270 				/* The bp is the first element of the waitq. */
26271 				un->un_waitq_headp = next_waitq_bp;
26272 				if (un->un_waitq_headp == NULL) {
26273 					/* The wait queue is now empty */
26274 					un->un_waitq_tailp = NULL;
26275 				}
26276 			} else {
26277 				/*
26278 				 * The bp is either somewhere in the middle
26279 				 * or at the end of the wait queue.
26280 				 */
26281 				ASSERT(un->un_waitq_headp != NULL);
26282 				ASSERT(prev_waitq_bp != NULL);
26283 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
26284 				    == 0);
26285 				if (bp == un->un_waitq_tailp) {
26286 					/* bp is the last entry on the waitq. */
26287 					ASSERT(next_waitq_bp == NULL);
26288 					un->un_waitq_tailp = prev_waitq_bp;
26289 				}
26290 				prev_waitq_bp->av_forw = next_waitq_bp;
26291 			}
26292 			bp->av_forw = NULL;
26293 
26294 			/*
26295 			 * update kstat since the bp is moved out of
26296 			 * the waitq
26297 			 */
26298 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
26299 
26300 			/*
26301 			 * Now put the bp onto the failfast queue.
26302 			 */
26303 			if (un->un_failfast_headp == NULL) {
26304 				/* failfast queue is currently empty */
26305 				ASSERT(un->un_failfast_tailp == NULL);
26306 				un->un_failfast_headp =
26307 				    un->un_failfast_tailp = bp;
26308 			} else {
26309 				/* Add the bp to the end of the failfast q */
26310 				ASSERT(un->un_failfast_tailp != NULL);
26311 				ASSERT(un->un_failfast_tailp->b_flags &
26312 				    B_FAILFAST);
26313 				un->un_failfast_tailp->av_forw = bp;
26314 				un->un_failfast_tailp = bp;
26315 			}
26316 		}
26317 	}
26318 
26319 	/*
26320 	 * Now return all bp's on the failfast queue to their owners.
26321 	 */
26322 	while ((bp = un->un_failfast_headp) != NULL) {
26323 
26324 		un->un_failfast_headp = bp->av_forw;
26325 		if (un->un_failfast_headp == NULL) {
26326 			un->un_failfast_tailp = NULL;
26327 		}
26328 
26329 		/*
26330 		 * We want to return the bp with a failure error code, but
26331 		 * we do not want a call to sd_start_cmds() to occur here,
26332 		 * so use sd_return_failed_command_no_restart() instead of
26333 		 * sd_return_failed_command().
26334 		 */
26335 		sd_return_failed_command_no_restart(un, bp, EIO);
26336 	}
26337 
26338 	/* Flush the xbuf queues if required. */
26339 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
26340 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
26341 	}
26342 
26343 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
26344 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
26345 }
26346 
26347 
26348 /*
26349  *    Function: sd_failfast_flushq_callback
26350  *
26351  * Description: Return TRUE if the given bp meets the criteria for failfast
26352  *		flushing. Used with ddi_xbuf_flushq(9F).
26353  *
26354  *   Arguments: bp - ptr to buf struct to be examined.
26355  *
26356  *     Context: Any
26357  */
26358 
26359 static int
26360 sd_failfast_flushq_callback(struct buf *bp)
26361 {
26362 	/*
26363 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
26364 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
26365 	 */
26366 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
26367 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
26368 }
26369 
26370 
26371 
26372 #if defined(__i386) || defined(__amd64)
26373 /*
26374  * Function: sd_setup_next_xfer
26375  *
26376  * Description: Prepare next I/O operation using DMA_PARTIAL
26377  *
26378  */
26379 
26380 static int
26381 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
26382     struct scsi_pkt *pkt, struct sd_xbuf *xp)
26383 {
26384 	ssize_t	num_blks_not_xfered;
26385 	daddr_t	strt_blk_num;
26386 	ssize_t	bytes_not_xfered;
26387 	int	rval;
26388 
26389 	ASSERT(pkt->pkt_resid == 0);
26390 
26391 	/*
26392 	 * Calculate next block number and amount to be transferred.
26393 	 *
26394 	 * How much data NOT transfered to the HBA yet.
26395 	 */
26396 	bytes_not_xfered = xp->xb_dma_resid;
26397 
26398 	/*
26399 	 * figure how many blocks NOT transfered to the HBA yet.
26400 	 */
26401 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
26402 
26403 	/*
26404 	 * set starting block number to the end of what WAS transfered.
26405 	 */
26406 	strt_blk_num = xp->xb_blkno +
26407 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
26408 
26409 	/*
26410 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
26411 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
26412 	 * the disk mutex here.
26413 	 */
26414 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
26415 	    strt_blk_num, num_blks_not_xfered);
26416 
26417 	if (rval == 0) {
26418 
26419 		/*
26420 		 * Success.
26421 		 *
26422 		 * Adjust things if there are still more blocks to be
26423 		 * transfered.
26424 		 */
26425 		xp->xb_dma_resid = pkt->pkt_resid;
26426 		pkt->pkt_resid = 0;
26427 
26428 		return (1);
26429 	}
26430 
26431 	/*
26432 	 * There's really only one possible return value from
26433 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
26434 	 * returns NULL.
26435 	 */
26436 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
26437 
26438 	bp->b_resid = bp->b_bcount;
26439 	bp->b_flags |= B_ERROR;
26440 
26441 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26442 	    "Error setting up next portion of DMA transfer\n");
26443 
26444 	return (0);
26445 }
26446 #endif
26447 
26448 /*
26449  *    Function: sd_panic_for_res_conflict
26450  *
26451  * Description: Call panic with a string formated with "Reservation Conflict"
26452  *		and a human readable identifier indicating the SD instance
26453  *		that experienced the reservation conflict.
26454  *
26455  *   Arguments: un - pointer to the soft state struct for the instance.
26456  *
26457  *     Context: may execute in interrupt context.
26458  */
26459 
26460 #define	SD_RESV_CONFLICT_FMT_LEN 40
26461 void
26462 sd_panic_for_res_conflict(struct sd_lun *un)
26463 {
26464 	char panic_str[SD_RESV_CONFLICT_FMT_LEN+MAXPATHLEN];
26465 	char path_str[MAXPATHLEN];
26466 
26467 	(void) snprintf(panic_str, sizeof (panic_str),
26468 	    "Reservation Conflict\nDisk: %s",
26469 	    ddi_pathname(SD_DEVINFO(un), path_str));
26470 
26471 	panic(panic_str);
26472 }
26473 
26474 /*
26475  * Note: The following sd_faultinjection_ioctl( ) routines implement
26476  * driver support for handling fault injection for error analysis
26477  * causing faults in multiple layers of the driver.
26478  *
26479  */
26480 
26481 #ifdef SD_FAULT_INJECTION
26482 static uint_t   sd_fault_injection_on = 0;
26483 
26484 /*
26485  *    Function: sd_faultinjection_ioctl()
26486  *
26487  * Description: This routine is the driver entry point for handling
26488  *              faultinjection ioctls to inject errors into the
26489  *              layer model
26490  *
26491  *   Arguments: cmd	- the ioctl cmd recieved
26492  *		arg	- the arguments from user and returns
26493  */
26494 
26495 static void
26496 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
26497 
26498 	uint_t i;
26499 	uint_t rval;
26500 
26501 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
26502 
26503 	mutex_enter(SD_MUTEX(un));
26504 
26505 	switch (cmd) {
26506 	case SDIOCRUN:
26507 		/* Allow pushed faults to be injected */
26508 		SD_INFO(SD_LOG_SDTEST, un,
26509 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
26510 
26511 		sd_fault_injection_on = 1;
26512 
26513 		SD_INFO(SD_LOG_IOERR, un,
26514 		    "sd_faultinjection_ioctl: run finished\n");
26515 		break;
26516 
26517 	case SDIOCSTART:
26518 		/* Start Injection Session */
26519 		SD_INFO(SD_LOG_SDTEST, un,
26520 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
26521 
26522 		sd_fault_injection_on = 0;
26523 		un->sd_injection_mask = 0xFFFFFFFF;
26524 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
26525 			un->sd_fi_fifo_pkt[i] = NULL;
26526 			un->sd_fi_fifo_xb[i] = NULL;
26527 			un->sd_fi_fifo_un[i] = NULL;
26528 			un->sd_fi_fifo_arq[i] = NULL;
26529 		}
26530 		un->sd_fi_fifo_start = 0;
26531 		un->sd_fi_fifo_end = 0;
26532 
26533 		mutex_enter(&(un->un_fi_mutex));
26534 		un->sd_fi_log[0] = '\0';
26535 		un->sd_fi_buf_len = 0;
26536 		mutex_exit(&(un->un_fi_mutex));
26537 
26538 		SD_INFO(SD_LOG_IOERR, un,
26539 		    "sd_faultinjection_ioctl: start finished\n");
26540 		break;
26541 
26542 	case SDIOCSTOP:
26543 		/* Stop Injection Session */
26544 		SD_INFO(SD_LOG_SDTEST, un,
26545 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
26546 		sd_fault_injection_on = 0;
26547 		un->sd_injection_mask = 0x0;
26548 
26549 		/* Empty stray or unuseds structs from fifo */
26550 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
26551 			if (un->sd_fi_fifo_pkt[i] != NULL) {
26552 				kmem_free(un->sd_fi_fifo_pkt[i],
26553 				    sizeof (struct sd_fi_pkt));
26554 			}
26555 			if (un->sd_fi_fifo_xb[i] != NULL) {
26556 				kmem_free(un->sd_fi_fifo_xb[i],
26557 				    sizeof (struct sd_fi_xb));
26558 			}
26559 			if (un->sd_fi_fifo_un[i] != NULL) {
26560 				kmem_free(un->sd_fi_fifo_un[i],
26561 				    sizeof (struct sd_fi_un));
26562 			}
26563 			if (un->sd_fi_fifo_arq[i] != NULL) {
26564 				kmem_free(un->sd_fi_fifo_arq[i],
26565 				    sizeof (struct sd_fi_arq));
26566 			}
26567 			un->sd_fi_fifo_pkt[i] = NULL;
26568 			un->sd_fi_fifo_un[i] = NULL;
26569 			un->sd_fi_fifo_xb[i] = NULL;
26570 			un->sd_fi_fifo_arq[i] = NULL;
26571 		}
26572 		un->sd_fi_fifo_start = 0;
26573 		un->sd_fi_fifo_end = 0;
26574 
26575 		SD_INFO(SD_LOG_IOERR, un,
26576 		    "sd_faultinjection_ioctl: stop finished\n");
26577 		break;
26578 
26579 	case SDIOCINSERTPKT:
26580 		/* Store a packet struct to be pushed onto fifo */
26581 		SD_INFO(SD_LOG_SDTEST, un,
26582 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
26583 
26584 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26585 
26586 		sd_fault_injection_on = 0;
26587 
26588 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
26589 		if (un->sd_fi_fifo_pkt[i] != NULL) {
26590 			kmem_free(un->sd_fi_fifo_pkt[i],
26591 			    sizeof (struct sd_fi_pkt));
26592 		}
26593 		if (arg != NULL) {
26594 			un->sd_fi_fifo_pkt[i] =
26595 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
26596 			if (un->sd_fi_fifo_pkt[i] == NULL) {
26597 				/* Alloc failed don't store anything */
26598 				break;
26599 			}
26600 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
26601 			    sizeof (struct sd_fi_pkt), 0);
26602 			if (rval == -1) {
26603 				kmem_free(un->sd_fi_fifo_pkt[i],
26604 				    sizeof (struct sd_fi_pkt));
26605 				un->sd_fi_fifo_pkt[i] = NULL;
26606 			}
26607 		} else {
26608 			SD_INFO(SD_LOG_IOERR, un,
26609 			    "sd_faultinjection_ioctl: pkt null\n");
26610 		}
26611 		break;
26612 
26613 	case SDIOCINSERTXB:
26614 		/* Store a xb struct to be pushed onto fifo */
26615 		SD_INFO(SD_LOG_SDTEST, un,
26616 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
26617 
26618 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26619 
26620 		sd_fault_injection_on = 0;
26621 
26622 		if (un->sd_fi_fifo_xb[i] != NULL) {
26623 			kmem_free(un->sd_fi_fifo_xb[i],
26624 			    sizeof (struct sd_fi_xb));
26625 			un->sd_fi_fifo_xb[i] = NULL;
26626 		}
26627 		if (arg != NULL) {
26628 			un->sd_fi_fifo_xb[i] =
26629 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
26630 			if (un->sd_fi_fifo_xb[i] == NULL) {
26631 				/* Alloc failed don't store anything */
26632 				break;
26633 			}
26634 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
26635 			    sizeof (struct sd_fi_xb), 0);
26636 
26637 			if (rval == -1) {
26638 				kmem_free(un->sd_fi_fifo_xb[i],
26639 				    sizeof (struct sd_fi_xb));
26640 				un->sd_fi_fifo_xb[i] = NULL;
26641 			}
26642 		} else {
26643 			SD_INFO(SD_LOG_IOERR, un,
26644 			    "sd_faultinjection_ioctl: xb null\n");
26645 		}
26646 		break;
26647 
26648 	case SDIOCINSERTUN:
26649 		/* Store a un struct to be pushed onto fifo */
26650 		SD_INFO(SD_LOG_SDTEST, un,
26651 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
26652 
26653 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26654 
26655 		sd_fault_injection_on = 0;
26656 
26657 		if (un->sd_fi_fifo_un[i] != NULL) {
26658 			kmem_free(un->sd_fi_fifo_un[i],
26659 			    sizeof (struct sd_fi_un));
26660 			un->sd_fi_fifo_un[i] = NULL;
26661 		}
26662 		if (arg != NULL) {
26663 			un->sd_fi_fifo_un[i] =
26664 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
26665 			if (un->sd_fi_fifo_un[i] == NULL) {
26666 				/* Alloc failed don't store anything */
26667 				break;
26668 			}
26669 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
26670 			    sizeof (struct sd_fi_un), 0);
26671 			if (rval == -1) {
26672 				kmem_free(un->sd_fi_fifo_un[i],
26673 				    sizeof (struct sd_fi_un));
26674 				un->sd_fi_fifo_un[i] = NULL;
26675 			}
26676 
26677 		} else {
26678 			SD_INFO(SD_LOG_IOERR, un,
26679 			    "sd_faultinjection_ioctl: un null\n");
26680 		}
26681 
26682 		break;
26683 
26684 	case SDIOCINSERTARQ:
26685 		/* Store a arq struct to be pushed onto fifo */
26686 		SD_INFO(SD_LOG_SDTEST, un,
26687 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
26688 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26689 
26690 		sd_fault_injection_on = 0;
26691 
26692 		if (un->sd_fi_fifo_arq[i] != NULL) {
26693 			kmem_free(un->sd_fi_fifo_arq[i],
26694 			    sizeof (struct sd_fi_arq));
26695 			un->sd_fi_fifo_arq[i] = NULL;
26696 		}
26697 		if (arg != NULL) {
26698 			un->sd_fi_fifo_arq[i] =
26699 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
26700 			if (un->sd_fi_fifo_arq[i] == NULL) {
26701 				/* Alloc failed don't store anything */
26702 				break;
26703 			}
26704 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
26705 			    sizeof (struct sd_fi_arq), 0);
26706 			if (rval == -1) {
26707 				kmem_free(un->sd_fi_fifo_arq[i],
26708 				    sizeof (struct sd_fi_arq));
26709 				un->sd_fi_fifo_arq[i] = NULL;
26710 			}
26711 
26712 		} else {
26713 			SD_INFO(SD_LOG_IOERR, un,
26714 			    "sd_faultinjection_ioctl: arq null\n");
26715 		}
26716 
26717 		break;
26718 
26719 	case SDIOCPUSH:
26720 		/* Push stored xb, pkt, un, and arq onto fifo */
26721 		sd_fault_injection_on = 0;
26722 
26723 		if (arg != NULL) {
26724 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
26725 			if (rval != -1 &&
26726 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
26727 				un->sd_fi_fifo_end += i;
26728 			}
26729 		} else {
26730 			SD_INFO(SD_LOG_IOERR, un,
26731 			    "sd_faultinjection_ioctl: push arg null\n");
26732 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
26733 				un->sd_fi_fifo_end++;
26734 			}
26735 		}
26736 		SD_INFO(SD_LOG_IOERR, un,
26737 		    "sd_faultinjection_ioctl: push to end=%d\n",
26738 		    un->sd_fi_fifo_end);
26739 		break;
26740 
26741 	case SDIOCRETRIEVE:
26742 		/* Return buffer of log from Injection session */
26743 		SD_INFO(SD_LOG_SDTEST, un,
26744 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
26745 
26746 		sd_fault_injection_on = 0;
26747 
26748 		mutex_enter(&(un->un_fi_mutex));
26749 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
26750 		    un->sd_fi_buf_len+1, 0);
26751 		mutex_exit(&(un->un_fi_mutex));
26752 
26753 		if (rval == -1) {
26754 			/*
26755 			 * arg is possibly invalid setting
26756 			 * it to NULL for return
26757 			 */
26758 			arg = NULL;
26759 		}
26760 		break;
26761 	}
26762 
26763 	mutex_exit(SD_MUTEX(un));
26764 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
26765 			    " exit\n");
26766 }
26767 
26768 
26769 /*
26770  *    Function: sd_injection_log()
26771  *
26772  * Description: This routine adds buff to the already existing injection log
26773  *              for retrieval via faultinjection_ioctl for use in fault
26774  *              detection and recovery
26775  *
26776  *   Arguments: buf - the string to add to the log
26777  */
26778 
26779 static void
26780 sd_injection_log(char *buf, struct sd_lun *un)
26781 {
26782 	uint_t len;
26783 
26784 	ASSERT(un != NULL);
26785 	ASSERT(buf != NULL);
26786 
26787 	mutex_enter(&(un->un_fi_mutex));
26788 
26789 	len = min(strlen(buf), 255);
26790 	/* Add logged value to Injection log to be returned later */
26791 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
26792 		uint_t	offset = strlen((char *)un->sd_fi_log);
26793 		char *destp = (char *)un->sd_fi_log + offset;
26794 		int i;
26795 		for (i = 0; i < len; i++) {
26796 			*destp++ = *buf++;
26797 		}
26798 		un->sd_fi_buf_len += len;
26799 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
26800 	}
26801 
26802 	mutex_exit(&(un->un_fi_mutex));
26803 }
26804 
26805 
26806 /*
26807  *    Function: sd_faultinjection()
26808  *
26809  * Description: This routine takes the pkt and changes its
26810  *		content based on error injection scenerio.
26811  *
26812  *   Arguments: pktp	- packet to be changed
26813  */
26814 
26815 static void
26816 sd_faultinjection(struct scsi_pkt *pktp)
26817 {
26818 	uint_t i;
26819 	struct sd_fi_pkt *fi_pkt;
26820 	struct sd_fi_xb *fi_xb;
26821 	struct sd_fi_un *fi_un;
26822 	struct sd_fi_arq *fi_arq;
26823 	struct buf *bp;
26824 	struct sd_xbuf *xb;
26825 	struct sd_lun *un;
26826 
26827 	ASSERT(pktp != NULL);
26828 
26829 	/* pull bp xb and un from pktp */
26830 	bp = (struct buf *)pktp->pkt_private;
26831 	xb = SD_GET_XBUF(bp);
26832 	un = SD_GET_UN(bp);
26833 
26834 	ASSERT(un != NULL);
26835 
26836 	mutex_enter(SD_MUTEX(un));
26837 
26838 	SD_TRACE(SD_LOG_SDTEST, un,
26839 	    "sd_faultinjection: entry Injection from sdintr\n");
26840 
26841 	/* if injection is off return */
26842 	if (sd_fault_injection_on == 0 ||
26843 		un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
26844 		mutex_exit(SD_MUTEX(un));
26845 		return;
26846 	}
26847 
26848 
26849 	/* take next set off fifo */
26850 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
26851 
26852 	fi_pkt = un->sd_fi_fifo_pkt[i];
26853 	fi_xb = un->sd_fi_fifo_xb[i];
26854 	fi_un = un->sd_fi_fifo_un[i];
26855 	fi_arq = un->sd_fi_fifo_arq[i];
26856 
26857 
26858 	/* set variables accordingly */
26859 	/* set pkt if it was on fifo */
26860 	if (fi_pkt != NULL) {
26861 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
26862 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
26863 		SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
26864 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
26865 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
26866 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
26867 
26868 	}
26869 
26870 	/* set xb if it was on fifo */
26871 	if (fi_xb != NULL) {
26872 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
26873 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
26874 		SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
26875 		SD_CONDSET(xb, xb, xb_victim_retry_count,
26876 		    "xb_victim_retry_count");
26877 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
26878 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
26879 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
26880 
26881 		/* copy in block data from sense */
26882 		if (fi_xb->xb_sense_data[0] != -1) {
26883 			bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
26884 			    SENSE_LENGTH);
26885 		}
26886 
26887 		/* copy in extended sense codes */
26888 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_code,
26889 		    "es_code");
26890 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_key,
26891 		    "es_key");
26892 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_add_code,
26893 		    "es_add_code");
26894 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb,
26895 		    es_qual_code, "es_qual_code");
26896 	}
26897 
26898 	/* set un if it was on fifo */
26899 	if (fi_un != NULL) {
26900 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
26901 		SD_CONDSET(un, un, un_ctype, "un_ctype");
26902 		SD_CONDSET(un, un, un_reset_retry_count,
26903 		    "un_reset_retry_count");
26904 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
26905 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
26906 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
26907 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
26908 		    "un_f_allow_bus_device_reset");
26909 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
26910 
26911 	}
26912 
26913 	/* copy in auto request sense if it was on fifo */
26914 	if (fi_arq != NULL) {
26915 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
26916 	}
26917 
26918 	/* free structs */
26919 	if (un->sd_fi_fifo_pkt[i] != NULL) {
26920 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
26921 	}
26922 	if (un->sd_fi_fifo_xb[i] != NULL) {
26923 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
26924 	}
26925 	if (un->sd_fi_fifo_un[i] != NULL) {
26926 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
26927 	}
26928 	if (un->sd_fi_fifo_arq[i] != NULL) {
26929 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
26930 	}
26931 
26932 	/*
26933 	 * kmem_free does not gurantee to set to NULL
26934 	 * since we uses these to determine if we set
26935 	 * values or not lets confirm they are always
26936 	 * NULL after free
26937 	 */
26938 	un->sd_fi_fifo_pkt[i] = NULL;
26939 	un->sd_fi_fifo_un[i] = NULL;
26940 	un->sd_fi_fifo_xb[i] = NULL;
26941 	un->sd_fi_fifo_arq[i] = NULL;
26942 
26943 	un->sd_fi_fifo_start++;
26944 
26945 	mutex_exit(SD_MUTEX(un));
26946 
26947 	SD_TRACE(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
26948 }
26949 
26950 #endif /* SD_FAULT_INJECTION */
26951 
26952 /*
26953  * This routine is invoked in sd_unit_attach(). Before calling it, the
26954  * properties in conf file should be processed already, and "hotpluggable"
26955  * property was processed also.
26956  *
26957  * The sd driver distinguishes 3 different type of devices: removable media,
26958  * non-removable media, and hotpluggable. Below the differences are defined:
26959  *
26960  * 1. Device ID
26961  *
26962  *     The device ID of a device is used to identify this device. Refer to
26963  *     ddi_devid_register(9F).
26964  *
26965  *     For a non-removable media disk device which can provide 0x80 or 0x83
26966  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
26967  *     device ID is created to identify this device. For other non-removable
26968  *     media devices, a default device ID is created only if this device has
26969  *     at least 2 alter cylinders. Otherwise, this device has no devid.
26970  *
26971  *     -------------------------------------------------------
26972  *     removable media   hotpluggable  | Can Have Device ID
26973  *     -------------------------------------------------------
26974  *         false             false     |     Yes
26975  *         false             true      |     Yes
26976  *         true                x       |     No
26977  *     ------------------------------------------------------
26978  *
26979  *
26980  * 2. SCSI group 4 commands
26981  *
26982  *     In SCSI specs, only some commands in group 4 command set can use
26983  *     8-byte addresses that can be used to access >2TB storage spaces.
26984  *     Other commands have no such capability. Without supporting group4,
26985  *     it is impossible to make full use of storage spaces of a disk with
26986  *     capacity larger than 2TB.
26987  *
26988  *     -----------------------------------------------
26989  *     removable media   hotpluggable   LP64  |  Group
26990  *     -----------------------------------------------
26991  *           false          false       false |   1
26992  *           false          false       true  |   4
26993  *           false          true        false |   1
26994  *           false          true        true  |   4
26995  *           true             x           x   |   5
26996  *     -----------------------------------------------
26997  *
26998  *
26999  * 3. Check for VTOC Label
27000  *
27001  *     If a direct-access disk has no EFI label, sd will check if it has a
27002  *     valid VTOC label. Now, sd also does that check for removable media
27003  *     and hotpluggable devices.
27004  *
27005  *     --------------------------------------------------------------
27006  *     Direct-Access   removable media    hotpluggable |  Check Label
27007  *     -------------------------------------------------------------
27008  *         false          false           false        |   No
27009  *         false          false           true         |   No
27010  *         false          true            false        |   Yes
27011  *         false          true            true         |   Yes
27012  *         true            x                x          |   Yes
27013  *     --------------------------------------------------------------
27014  *
27015  *
27016  * 4. Building default VTOC label
27017  *
27018  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
27019  *     If those devices have no valid VTOC label, sd(7d) will attempt to
27020  *     create default VTOC for them. Currently sd creates default VTOC label
27021  *     for all devices on x86 platform (VTOC_16), but only for removable
27022  *     media devices on SPARC (VTOC_8).
27023  *
27024  *     -----------------------------------------------------------
27025  *       removable media hotpluggable platform   |   Default Label
27026  *     -----------------------------------------------------------
27027  *             false          false    sparc     |     No
27028  *             false          true      x86      |     Yes
27029  *             false          true     sparc     |     Yes
27030  *             true             x        x       |     Yes
27031  *     ----------------------------------------------------------
27032  *
27033  *
27034  * 5. Supported blocksizes of target devices
27035  *
27036  *     Sd supports non-512-byte blocksize for removable media devices only.
27037  *     For other devices, only 512-byte blocksize is supported. This may be
27038  *     changed in near future because some RAID devices require non-512-byte
27039  *     blocksize
27040  *
27041  *     -----------------------------------------------------------
27042  *     removable media    hotpluggable    | non-512-byte blocksize
27043  *     -----------------------------------------------------------
27044  *           false          false         |   No
27045  *           false          true          |   No
27046  *           true             x           |   Yes
27047  *     -----------------------------------------------------------
27048  *
27049  *
27050  * 6. Automatic mount & unmount
27051  *
27052  *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
27053  *     if a device is removable media device. It return 1 for removable media
27054  *     devices, and 0 for others.
27055  *
27056  *     The automatic mounting subsystem should distinguish between the types
27057  *     of devices and apply automounting policies to each.
27058  *
27059  *
27060  * 7. fdisk partition management
27061  *
27062  *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
27063  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
27064  *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
27065  *     fdisk partitions on both x86 and SPARC platform.
27066  *
27067  *     -----------------------------------------------------------
27068  *       platform   removable media  USB/1394  |  fdisk supported
27069  *     -----------------------------------------------------------
27070  *        x86         X               X        |       true
27071  *     ------------------------------------------------------------
27072  *        sparc       X               X        |       false
27073  *     ------------------------------------------------------------
27074  *
27075  *
27076  * 8. MBOOT/MBR
27077  *
27078  *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
27079  *     read/write mboot for removable media devices on sparc platform.
27080  *
27081  *     -----------------------------------------------------------
27082  *       platform   removable media  USB/1394  |  mboot supported
27083  *     -----------------------------------------------------------
27084  *        x86         X               X        |       true
27085  *     ------------------------------------------------------------
27086  *        sparc      false           false     |       false
27087  *        sparc      false           true      |       true
27088  *        sparc      true            false     |       true
27089  *        sparc      true            true      |       true
27090  *     ------------------------------------------------------------
27091  *
27092  *
27093  * 9.  error handling during opening device
27094  *
27095  *     If failed to open a disk device, an errno is returned. For some kinds
27096  *     of errors, different errno is returned depending on if this device is
27097  *     a removable media device. This brings USB/1394 hard disks in line with
27098  *     expected hard disk behavior. It is not expected that this breaks any
27099  *     application.
27100  *
27101  *     ------------------------------------------------------
27102  *       removable media    hotpluggable   |  errno
27103  *     ------------------------------------------------------
27104  *             false          false        |   EIO
27105  *             false          true         |   EIO
27106  *             true             x          |   ENXIO
27107  *     ------------------------------------------------------
27108  *
27109  *
27110  * 11. ioctls: DKIOCEJECT, CDROMEJECT
27111  *
27112  *     These IOCTLs are applicable only to removable media devices.
27113  *
27114  *     -----------------------------------------------------------
27115  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
27116  *     -----------------------------------------------------------
27117  *             false          false        |     No
27118  *             false          true         |     No
27119  *             true            x           |     Yes
27120  *     -----------------------------------------------------------
27121  *
27122  *
27123  * 12. Kstats for partitions
27124  *
27125  *     sd creates partition kstat for non-removable media devices. USB and
27126  *     Firewire hard disks now have partition kstats
27127  *
27128  *      ------------------------------------------------------
27129  *       removable media    hotplugable    |   kstat
27130  *      ------------------------------------------------------
27131  *             false          false        |    Yes
27132  *             false          true         |    Yes
27133  *             true             x          |    No
27134  *       ------------------------------------------------------
27135  *
27136  *
27137  * 13. Removable media & hotpluggable properties
27138  *
27139  *     Sd driver creates a "removable-media" property for removable media
27140  *     devices. Parent nexus drivers create a "hotpluggable" property if
27141  *     it supports hotplugging.
27142  *
27143  *     ---------------------------------------------------------------------
27144  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
27145  *     ---------------------------------------------------------------------
27146  *       false            false       |    No                   No
27147  *       false            true        |    No                   Yes
27148  *       true             false       |    Yes                  No
27149  *       true             true        |    Yes                  Yes
27150  *     ---------------------------------------------------------------------
27151  *
27152  *
27153  * 14. Power Management
27154  *
27155  *     sd only power manages removable media devices or devices that support
27156  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
27157  *
27158  *     A parent nexus that supports hotplugging can also set "pm-capable"
27159  *     if the disk can be power managed.
27160  *
27161  *     ------------------------------------------------------------
27162  *       removable media hotpluggable pm-capable  |   power manage
27163  *     ------------------------------------------------------------
27164  *             false          false     false     |     No
27165  *             false          false     true      |     Yes
27166  *             false          true      false     |     No
27167  *             false          true      true      |     Yes
27168  *             true             x        x        |     Yes
27169  *     ------------------------------------------------------------
27170  *
27171  *      USB and firewire hard disks can now be power managed independently
27172  *      of the framebuffer
27173  *
27174  *
27175  * 15. Support for USB disks with capacity larger than 1TB
27176  *
27177  *     Currently, sd doesn't permit a fixed disk device with capacity
27178  *     larger than 1TB to be used in a 32-bit operating system environment.
27179  *     However, sd doesn't do that for removable media devices. Instead, it
27180  *     assumes that removable media devices cannot have a capacity larger
27181  *     than 1TB. Therefore, using those devices on 32-bit system is partially
27182  *     supported, which can cause some unexpected results.
27183  *
27184  *     ---------------------------------------------------------------------
27185  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
27186  *     ---------------------------------------------------------------------
27187  *             false          false  |   true         |     no
27188  *             false          true   |   true         |     no
27189  *             true           false  |   true         |     Yes
27190  *             true           true   |   true         |     Yes
27191  *     ---------------------------------------------------------------------
27192  *
27193  *
27194  * 16. Check write-protection at open time
27195  *
27196  *     When a removable media device is being opened for writing without NDELAY
27197  *     flag, sd will check if this device is writable. If attempting to open
27198  *     without NDELAY flag a write-protected device, this operation will abort.
27199  *
27200  *     ------------------------------------------------------------
27201  *       removable media    USB/1394   |   WP Check
27202  *     ------------------------------------------------------------
27203  *             false          false    |     No
27204  *             false          true     |     No
27205  *             true           false    |     Yes
27206  *             true           true     |     Yes
27207  *     ------------------------------------------------------------
27208  *
27209  *
27210  * 17. syslog when corrupted VTOC is encountered
27211  *
27212  *      Currently, if an invalid VTOC is encountered, sd only print syslog
27213  *      for fixed SCSI disks.
27214  *     ------------------------------------------------------------
27215  *       removable media    USB/1394   |   print syslog
27216  *     ------------------------------------------------------------
27217  *             false          false    |     Yes
27218  *             false          true     |     No
27219  *             true           false    |     No
27220  *             true           true     |     No
27221  *     ------------------------------------------------------------
27222  */
27223 static void
27224 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
27225 {
27226 	int	pm_capable_prop;
27227 
27228 	ASSERT(un->un_sd);
27229 	ASSERT(un->un_sd->sd_inq);
27230 
27231 	/*
27232 	 * Enable SYNC CACHE support for all devices.
27233 	 */
27234 	un->un_f_sync_cache_supported = TRUE;
27235 
27236 	if (un->un_sd->sd_inq->inq_rmb) {
27237 		/*
27238 		 * The media of this device is removable. And for this kind
27239 		 * of devices, it is possible to change medium after opening
27240 		 * devices. Thus we should support this operation.
27241 		 */
27242 		un->un_f_has_removable_media = TRUE;
27243 
27244 		/*
27245 		 * support non-512-byte blocksize of removable media devices
27246 		 */
27247 		un->un_f_non_devbsize_supported = TRUE;
27248 
27249 		/*
27250 		 * Assume that all removable media devices support DOOR_LOCK
27251 		 */
27252 		un->un_f_doorlock_supported = TRUE;
27253 
27254 		/*
27255 		 * For a removable media device, it is possible to be opened
27256 		 * with NDELAY flag when there is no media in drive, in this
27257 		 * case we don't care if device is writable. But if without
27258 		 * NDELAY flag, we need to check if media is write-protected.
27259 		 */
27260 		un->un_f_chk_wp_open = TRUE;
27261 
27262 		/*
27263 		 * need to start a SCSI watch thread to monitor media state,
27264 		 * when media is being inserted or ejected, notify syseventd.
27265 		 */
27266 		un->un_f_monitor_media_state = TRUE;
27267 
27268 		/*
27269 		 * Some devices don't support START_STOP_UNIT command.
27270 		 * Therefore, we'd better check if a device supports it
27271 		 * before sending it.
27272 		 */
27273 		un->un_f_check_start_stop = TRUE;
27274 
27275 		/*
27276 		 * support eject media ioctl:
27277 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
27278 		 */
27279 		un->un_f_eject_media_supported = TRUE;
27280 
27281 		/*
27282 		 * Because many removable-media devices don't support
27283 		 * LOG_SENSE, we couldn't use this command to check if
27284 		 * a removable media device support power-management.
27285 		 * We assume that they support power-management via
27286 		 * START_STOP_UNIT command and can be spun up and down
27287 		 * without limitations.
27288 		 */
27289 		un->un_f_pm_supported = TRUE;
27290 
27291 		/*
27292 		 * Need to create a zero length (Boolean) property
27293 		 * removable-media for the removable media devices.
27294 		 * Note that the return value of the property is not being
27295 		 * checked, since if unable to create the property
27296 		 * then do not want the attach to fail altogether. Consistent
27297 		 * with other property creation in attach.
27298 		 */
27299 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
27300 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
27301 
27302 	} else {
27303 		/*
27304 		 * create device ID for device
27305 		 */
27306 		un->un_f_devid_supported = TRUE;
27307 
27308 		/*
27309 		 * Spin up non-removable-media devices once it is attached
27310 		 */
27311 		un->un_f_attach_spinup = TRUE;
27312 
27313 		/*
27314 		 * According to SCSI specification, Sense data has two kinds of
27315 		 * format: fixed format, and descriptor format. At present, we
27316 		 * don't support descriptor format sense data for removable
27317 		 * media.
27318 		 */
27319 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
27320 			un->un_f_descr_format_supported = TRUE;
27321 		}
27322 
27323 		/*
27324 		 * kstats are created only for non-removable media devices.
27325 		 *
27326 		 * Set this in sd.conf to 0 in order to disable kstats.  The
27327 		 * default is 1, so they are enabled by default.
27328 		 */
27329 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
27330 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
27331 			"enable-partition-kstats", 1));
27332 
27333 		/*
27334 		 * Check if HBA has set the "pm-capable" property.
27335 		 * If "pm-capable" exists and is non-zero then we can
27336 		 * power manage the device without checking the start/stop
27337 		 * cycle count log sense page.
27338 		 *
27339 		 * If "pm-capable" exists and is SD_PM_CAPABLE_FALSE (0)
27340 		 * then we should not power manage the device.
27341 		 *
27342 		 * If "pm-capable" doesn't exist then pm_capable_prop will
27343 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
27344 		 * sd will check the start/stop cycle count log sense page
27345 		 * and power manage the device if the cycle count limit has
27346 		 * not been exceeded.
27347 		 */
27348 		pm_capable_prop = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
27349 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
27350 		if (pm_capable_prop == SD_PM_CAPABLE_UNDEFINED) {
27351 			un->un_f_log_sense_supported = TRUE;
27352 		} else {
27353 			/*
27354 			 * pm-capable property exists.
27355 			 *
27356 			 * Convert "TRUE" values for pm_capable_prop to
27357 			 * SD_PM_CAPABLE_TRUE (1) to make it easier to check
27358 			 * later. "TRUE" values are any values except
27359 			 * SD_PM_CAPABLE_FALSE (0) and
27360 			 * SD_PM_CAPABLE_UNDEFINED (-1)
27361 			 */
27362 			if (pm_capable_prop == SD_PM_CAPABLE_FALSE) {
27363 				un->un_f_log_sense_supported = FALSE;
27364 			} else {
27365 				un->un_f_pm_supported = TRUE;
27366 			}
27367 
27368 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
27369 			    "sd_unit_attach: un:0x%p pm-capable "
27370 			    "property set to %d.\n", un, un->un_f_pm_supported);
27371 		}
27372 	}
27373 
27374 	if (un->un_f_is_hotpluggable) {
27375 
27376 		/*
27377 		 * Have to watch hotpluggable devices as well, since
27378 		 * that's the only way for userland applications to
27379 		 * detect hot removal while device is busy/mounted.
27380 		 */
27381 		un->un_f_monitor_media_state = TRUE;
27382 
27383 		un->un_f_check_start_stop = TRUE;
27384 
27385 	}
27386 }
27387 
27388 /*
27389  * sd_tg_rdwr:
27390  * Provides rdwr access for cmlb via sd_tgops. The start_block is
27391  * in sys block size, req_length in bytes.
27392  *
27393  */
27394 static int
27395 sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
27396     diskaddr_t start_block, size_t reqlength, void *tg_cookie)
27397 {
27398 	struct sd_lun *un;
27399 	int path_flag = (int)(uintptr_t)tg_cookie;
27400 	char *dkl = NULL;
27401 	diskaddr_t real_addr = start_block;
27402 	diskaddr_t first_byte, end_block;
27403 
27404 	size_t	buffer_size = reqlength;
27405 	int rval;
27406 	diskaddr_t	cap;
27407 	uint32_t	lbasize;
27408 
27409 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
27410 	if (un == NULL)
27411 		return (ENXIO);
27412 
27413 	if (cmd != TG_READ && cmd != TG_WRITE)
27414 		return (EINVAL);
27415 
27416 	mutex_enter(SD_MUTEX(un));
27417 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
27418 		mutex_exit(SD_MUTEX(un));
27419 		rval = sd_send_scsi_READ_CAPACITY(un, (uint64_t *)&cap,
27420 		    &lbasize, path_flag);
27421 		if (rval != 0)
27422 			return (rval);
27423 		mutex_enter(SD_MUTEX(un));
27424 		sd_update_block_info(un, lbasize, cap);
27425 		if ((un->un_f_tgt_blocksize_is_valid == FALSE)) {
27426 			mutex_exit(SD_MUTEX(un));
27427 			return (EIO);
27428 		}
27429 	}
27430 
27431 	if (NOT_DEVBSIZE(un)) {
27432 		/*
27433 		 * sys_blocksize != tgt_blocksize, need to re-adjust
27434 		 * blkno and save the index to beginning of dk_label
27435 		 */
27436 		first_byte  = SD_SYSBLOCKS2BYTES(un, start_block);
27437 		real_addr = first_byte / un->un_tgt_blocksize;
27438 
27439 		end_block = (first_byte + reqlength +
27440 		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
27441 
27442 		/* round up buffer size to multiple of target block size */
27443 		buffer_size = (end_block - real_addr) * un->un_tgt_blocksize;
27444 
27445 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_tg_rdwr",
27446 		    "label_addr: 0x%x allocation size: 0x%x\n",
27447 		    real_addr, buffer_size);
27448 
27449 		if (((first_byte % un->un_tgt_blocksize) != 0) ||
27450 		    (reqlength % un->un_tgt_blocksize) != 0)
27451 			/* the request is not aligned */
27452 			dkl = kmem_zalloc(buffer_size, KM_SLEEP);
27453 	}
27454 
27455 	/*
27456 	 * The MMC standard allows READ CAPACITY to be
27457 	 * inaccurate by a bounded amount (in the interest of
27458 	 * response latency).  As a result, failed READs are
27459 	 * commonplace (due to the reading of metadata and not
27460 	 * data). Depending on the per-Vendor/drive Sense data,
27461 	 * the failed READ can cause many (unnecessary) retries.
27462 	 */
27463 
27464 	if (ISCD(un) && (cmd == TG_READ) &&
27465 	    (un->un_f_blockcount_is_valid == TRUE) &&
27466 	    ((start_block == (un->un_blockcount - 1))||
27467 	    (start_block == (un->un_blockcount - 2)))) {
27468 			path_flag = SD_PATH_DIRECT_PRIORITY;
27469 	}
27470 
27471 	mutex_exit(SD_MUTEX(un));
27472 	if (cmd == TG_READ) {
27473 		rval = sd_send_scsi_READ(un, (dkl != NULL)? dkl: bufaddr,
27474 		    buffer_size, real_addr, path_flag);
27475 		if (dkl != NULL)
27476 			bcopy(dkl + SD_TGTBYTEOFFSET(un, start_block,
27477 			    real_addr), bufaddr, reqlength);
27478 	} else {
27479 		if (dkl) {
27480 			rval = sd_send_scsi_READ(un, dkl, buffer_size,
27481 			    real_addr, path_flag);
27482 			if (rval) {
27483 				kmem_free(dkl, buffer_size);
27484 				return (rval);
27485 			}
27486 			bcopy(bufaddr, dkl + SD_TGTBYTEOFFSET(un, start_block,
27487 			    real_addr), reqlength);
27488 		}
27489 		rval = sd_send_scsi_WRITE(un, (dkl != NULL)? dkl: bufaddr,
27490 		    buffer_size, real_addr, path_flag);
27491 	}
27492 
27493 	if (dkl != NULL)
27494 		kmem_free(dkl, buffer_size);
27495 
27496 	return (rval);
27497 }
27498 
27499 
27500 static int
27501 sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie)
27502 {
27503 
27504 	struct sd_lun *un;
27505 	diskaddr_t	cap;
27506 	uint32_t	lbasize;
27507 	int		path_flag = (int)(uintptr_t)tg_cookie;
27508 	int		ret = 0;
27509 
27510 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
27511 	if (un == NULL)
27512 		return (ENXIO);
27513 
27514 	switch (cmd) {
27515 	case TG_GETPHYGEOM:
27516 	case TG_GETVIRTGEOM:
27517 	case TG_GETCAPACITY:
27518 	case  TG_GETBLOCKSIZE:
27519 		mutex_enter(SD_MUTEX(un));
27520 
27521 		if ((un->un_f_blockcount_is_valid == TRUE) &&
27522 		    (un->un_f_tgt_blocksize_is_valid == TRUE)) {
27523 			cap = un->un_blockcount;
27524 			lbasize = un->un_tgt_blocksize;
27525 			mutex_exit(SD_MUTEX(un));
27526 		} else {
27527 			mutex_exit(SD_MUTEX(un));
27528 			ret = sd_send_scsi_READ_CAPACITY(un, (uint64_t *)&cap,
27529 			    &lbasize, path_flag);
27530 			if (ret != 0)
27531 				return (ret);
27532 			mutex_enter(SD_MUTEX(un));
27533 			sd_update_block_info(un, lbasize, cap);
27534 			if ((un->un_f_blockcount_is_valid == FALSE) ||
27535 			    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
27536 				mutex_exit(SD_MUTEX(un));
27537 				return (EIO);
27538 			}
27539 			mutex_exit(SD_MUTEX(un));
27540 		}
27541 
27542 		if (cmd == TG_GETCAPACITY) {
27543 			*(diskaddr_t *)arg = cap;
27544 			return (0);
27545 		}
27546 
27547 		if (cmd == TG_GETBLOCKSIZE) {
27548 			*(uint32_t *)arg = lbasize;
27549 			return (0);
27550 		}
27551 
27552 		if (cmd == TG_GETPHYGEOM)
27553 			ret = sd_get_physical_geometry(un, (cmlb_geom_t *)arg,
27554 			    cap, lbasize, path_flag);
27555 		else
27556 			/* TG_GETVIRTGEOM */
27557 			ret = sd_get_virtual_geometry(un,
27558 			    (cmlb_geom_t *)arg, cap, lbasize);
27559 
27560 		return (ret);
27561 
27562 	case TG_GETATTR:
27563 		mutex_enter(SD_MUTEX(un));
27564 		((tg_attribute_t *)arg)->media_is_writable =
27565 		    un->un_f_mmc_writable_media;
27566 		mutex_exit(SD_MUTEX(un));
27567 		return (0);
27568 	default:
27569 		return (ENOTTY);
27570 
27571 	}
27572 
27573 }
27574