xref: /titanic_44/usr/src/uts/common/io/scsi/targets/sd.c (revision b9238976491622ad75a67ab0c12edf99e36212b9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * SCSI disk target driver.
30  */
31 #include <sys/scsi/scsi.h>
32 #include <sys/dkbad.h>
33 #include <sys/dklabel.h>
34 #include <sys/dkio.h>
35 #include <sys/fdio.h>
36 #include <sys/cdio.h>
37 #include <sys/mhd.h>
38 #include <sys/vtoc.h>
39 #include <sys/dktp/fdisk.h>
40 #include <sys/kstat.h>
41 #include <sys/vtrace.h>
42 #include <sys/note.h>
43 #include <sys/thread.h>
44 #include <sys/proc.h>
45 #include <sys/efi_partition.h>
46 #include <sys/var.h>
47 #include <sys/aio_req.h>
48 
49 #ifdef __lock_lint
50 #define	_LP64
51 #define	__amd64
52 #endif
53 
54 #if (defined(__fibre))
55 /* Note: is there a leadville version of the following? */
56 #include <sys/fc4/fcal_linkapp.h>
57 #endif
58 #include <sys/taskq.h>
59 #include <sys/uuid.h>
60 #include <sys/byteorder.h>
61 #include <sys/sdt.h>
62 
63 #include "sd_xbuf.h"
64 
65 #include <sys/scsi/targets/sddef.h>
66 #include <sys/cmlb.h>
67 
68 
69 /*
70  * Loadable module info.
71  */
72 #if (defined(__fibre))
73 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver %I%"
74 char _depends_on[]	= "misc/scsi misc/cmlb drv/fcp";
75 #else
76 #define	SD_MODULE_NAME	"SCSI Disk Driver %I%"
77 char _depends_on[]	= "misc/scsi misc/cmlb";
78 #endif
79 
80 /*
81  * Define the interconnect type, to allow the driver to distinguish
82  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
83  *
84  * This is really for backward compatibility. In the future, the driver
85  * should actually check the "interconnect-type" property as reported by
86  * the HBA; however at present this property is not defined by all HBAs,
87  * so we will use this #define (1) to permit the driver to run in
88  * backward-compatibility mode; and (2) to print a notification message
89  * if an FC HBA does not support the "interconnect-type" property.  The
90  * behavior of the driver will be to assume parallel SCSI behaviors unless
91  * the "interconnect-type" property is defined by the HBA **AND** has a
92  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
93  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
94  * Channel behaviors (as per the old ssd).  (Note that the
95  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
96  * will result in the driver assuming parallel SCSI behaviors.)
97  *
98  * (see common/sys/scsi/impl/services.h)
99  *
100  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
101  * since some FC HBAs may already support that, and there is some code in
102  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
103  * default would confuse that code, and besides things should work fine
104  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
105  * "interconnect_type" property.
106  *
107  */
108 #if (defined(__fibre))
109 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
110 #else
111 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
112 #endif
113 
114 /*
115  * The name of the driver, established from the module name in _init.
116  */
117 static	char *sd_label			= NULL;
118 
119 /*
120  * Driver name is unfortunately prefixed on some driver.conf properties.
121  */
122 #if (defined(__fibre))
123 #define	sd_max_xfer_size		ssd_max_xfer_size
124 #define	sd_config_list			ssd_config_list
125 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
126 static	char *sd_config_list		= "ssd-config-list";
127 #else
128 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
129 static	char *sd_config_list		= "sd-config-list";
130 #endif
131 
132 /*
133  * Driver global variables
134  */
135 
136 #if (defined(__fibre))
137 /*
138  * These #defines are to avoid namespace collisions that occur because this
139  * code is currently used to compile two separate driver modules: sd and ssd.
140  * All global variables need to be treated this way (even if declared static)
141  * in order to allow the debugger to resolve the names properly.
142  * It is anticipated that in the near future the ssd module will be obsoleted,
143  * at which time this namespace issue should go away.
144  */
145 #define	sd_state			ssd_state
146 #define	sd_io_time			ssd_io_time
147 #define	sd_failfast_enable		ssd_failfast_enable
148 #define	sd_ua_retry_count		ssd_ua_retry_count
149 #define	sd_report_pfa			ssd_report_pfa
150 #define	sd_max_throttle			ssd_max_throttle
151 #define	sd_min_throttle			ssd_min_throttle
152 #define	sd_rot_delay			ssd_rot_delay
153 
154 #define	sd_retry_on_reservation_conflict	\
155 					ssd_retry_on_reservation_conflict
156 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
157 #define	sd_resv_conflict_name		ssd_resv_conflict_name
158 
159 #define	sd_component_mask		ssd_component_mask
160 #define	sd_level_mask			ssd_level_mask
161 #define	sd_debug_un			ssd_debug_un
162 #define	sd_error_level			ssd_error_level
163 
164 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
165 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
166 
167 #define	sd_tr				ssd_tr
168 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
169 #define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
170 #define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
171 #define	sd_check_media_time		ssd_check_media_time
172 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
173 #define	sd_label_mutex			ssd_label_mutex
174 #define	sd_detach_mutex			ssd_detach_mutex
175 #define	sd_log_buf			ssd_log_buf
176 #define	sd_log_mutex			ssd_log_mutex
177 
178 #define	sd_disk_table			ssd_disk_table
179 #define	sd_disk_table_size		ssd_disk_table_size
180 #define	sd_sense_mutex			ssd_sense_mutex
181 #define	sd_cdbtab			ssd_cdbtab
182 
183 #define	sd_cb_ops			ssd_cb_ops
184 #define	sd_ops				ssd_ops
185 #define	sd_additional_codes		ssd_additional_codes
186 #define	sd_tgops			ssd_tgops
187 
188 #define	sd_minor_data			ssd_minor_data
189 #define	sd_minor_data_efi		ssd_minor_data_efi
190 
191 #define	sd_tq				ssd_tq
192 #define	sd_wmr_tq			ssd_wmr_tq
193 #define	sd_taskq_name			ssd_taskq_name
194 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
195 #define	sd_taskq_minalloc		ssd_taskq_minalloc
196 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
197 
198 #define	sd_dump_format_string		ssd_dump_format_string
199 
200 #define	sd_iostart_chain		ssd_iostart_chain
201 #define	sd_iodone_chain			ssd_iodone_chain
202 
203 #define	sd_pm_idletime			ssd_pm_idletime
204 
205 #define	sd_force_pm_supported		ssd_force_pm_supported
206 
207 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
208 
209 #endif
210 
211 
212 #ifdef	SDDEBUG
213 int	sd_force_pm_supported		= 0;
214 #endif	/* SDDEBUG */
215 
216 void *sd_state				= NULL;
217 int sd_io_time				= SD_IO_TIME;
218 int sd_failfast_enable			= 1;
219 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
220 int sd_report_pfa			= 1;
221 int sd_max_throttle			= SD_MAX_THROTTLE;
222 int sd_min_throttle			= SD_MIN_THROTTLE;
223 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
224 int sd_qfull_throttle_enable		= TRUE;
225 
226 int sd_retry_on_reservation_conflict	= 1;
227 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
228 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
229 
230 static int sd_dtype_optical_bind	= -1;
231 
232 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
233 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
234 
235 /*
236  * Global data for debug logging. To enable debug printing, sd_component_mask
237  * and sd_level_mask should be set to the desired bit patterns as outlined in
238  * sddef.h.
239  */
240 uint_t	sd_component_mask		= 0x0;
241 uint_t	sd_level_mask			= 0x0;
242 struct	sd_lun *sd_debug_un		= NULL;
243 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
244 
245 /* Note: these may go away in the future... */
246 static uint32_t	sd_xbuf_active_limit	= 512;
247 static uint32_t sd_xbuf_reserve_limit	= 16;
248 
249 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
250 
251 /*
252  * Timer value used to reset the throttle after it has been reduced
253  * (typically in response to TRAN_BUSY or STATUS_QFULL)
254  */
255 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
256 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
257 
258 /*
259  * Interval value associated with the media change scsi watch.
260  */
261 static int sd_check_media_time		= 3000000;
262 
263 /*
264  * Wait value used for in progress operations during a DDI_SUSPEND
265  */
266 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
267 
268 /*
269  * sd_label_mutex protects a static buffer used in the disk label
270  * component of the driver
271  */
272 static kmutex_t sd_label_mutex;
273 
274 /*
275  * sd_detach_mutex protects un_layer_count, un_detach_count, and
276  * un_opens_in_progress in the sd_lun structure.
277  */
278 static kmutex_t sd_detach_mutex;
279 
280 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
281 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
282 
283 /*
284  * Global buffer and mutex for debug logging
285  */
286 static char	sd_log_buf[1024];
287 static kmutex_t	sd_log_mutex;
288 
289 /*
290  * Structs and globals for recording attached lun information.
291  * This maintains a chain. Each node in the chain represents a SCSI controller.
292  * The structure records the number of luns attached to each target connected
293  * with the controller.
294  * For parallel scsi device only.
295  */
296 struct sd_scsi_hba_tgt_lun {
297 	struct sd_scsi_hba_tgt_lun	*next;
298 	dev_info_t			*pdip;
299 	int				nlun[NTARGETS_WIDE];
300 };
301 
302 /*
303  * Flag to indicate the lun is attached or detached
304  */
305 #define	SD_SCSI_LUN_ATTACH	0
306 #define	SD_SCSI_LUN_DETACH	1
307 
308 static kmutex_t	sd_scsi_target_lun_mutex;
309 static struct sd_scsi_hba_tgt_lun	*sd_scsi_target_lun_head = NULL;
310 
311 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
312     sd_scsi_hba_tgt_lun::next sd_scsi_hba_tgt_lun::pdip))
313 
314 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
315     sd_scsi_target_lun_head))
316 
317 /*
318  * "Smart" Probe Caching structs, globals, #defines, etc.
319  * For parallel scsi and non-self-identify device only.
320  */
321 
322 /*
323  * The following resources and routines are implemented to support
324  * "smart" probing, which caches the scsi_probe() results in an array,
325  * in order to help avoid long probe times.
326  */
327 struct sd_scsi_probe_cache {
328 	struct	sd_scsi_probe_cache	*next;
329 	dev_info_t	*pdip;
330 	int		cache[NTARGETS_WIDE];
331 };
332 
333 static kmutex_t	sd_scsi_probe_cache_mutex;
334 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
335 
336 /*
337  * Really we only need protection on the head of the linked list, but
338  * better safe than sorry.
339  */
340 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
341     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
342 
343 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
344     sd_scsi_probe_cache_head))
345 
346 
347 /*
348  * Vendor specific data name property declarations
349  */
350 
351 #if defined(__fibre) || defined(__i386) ||defined(__amd64)
352 
353 static sd_tunables seagate_properties = {
354 	SEAGATE_THROTTLE_VALUE,
355 	0,
356 	0,
357 	0,
358 	0,
359 	0,
360 	0,
361 	0,
362 	0
363 };
364 
365 
366 static sd_tunables fujitsu_properties = {
367 	FUJITSU_THROTTLE_VALUE,
368 	0,
369 	0,
370 	0,
371 	0,
372 	0,
373 	0,
374 	0,
375 	0
376 };
377 
378 static sd_tunables ibm_properties = {
379 	IBM_THROTTLE_VALUE,
380 	0,
381 	0,
382 	0,
383 	0,
384 	0,
385 	0,
386 	0,
387 	0
388 };
389 
390 static sd_tunables purple_properties = {
391 	PURPLE_THROTTLE_VALUE,
392 	0,
393 	0,
394 	PURPLE_BUSY_RETRIES,
395 	PURPLE_RESET_RETRY_COUNT,
396 	PURPLE_RESERVE_RELEASE_TIME,
397 	0,
398 	0,
399 	0
400 };
401 
402 static sd_tunables sve_properties = {
403 	SVE_THROTTLE_VALUE,
404 	0,
405 	0,
406 	SVE_BUSY_RETRIES,
407 	SVE_RESET_RETRY_COUNT,
408 	SVE_RESERVE_RELEASE_TIME,
409 	SVE_MIN_THROTTLE_VALUE,
410 	SVE_DISKSORT_DISABLED_FLAG,
411 	0
412 };
413 
414 static sd_tunables maserati_properties = {
415 	0,
416 	0,
417 	0,
418 	0,
419 	0,
420 	0,
421 	0,
422 	MASERATI_DISKSORT_DISABLED_FLAG,
423 	MASERATI_LUN_RESET_ENABLED_FLAG
424 };
425 
426 static sd_tunables pirus_properties = {
427 	PIRUS_THROTTLE_VALUE,
428 	0,
429 	PIRUS_NRR_COUNT,
430 	PIRUS_BUSY_RETRIES,
431 	PIRUS_RESET_RETRY_COUNT,
432 	0,
433 	PIRUS_MIN_THROTTLE_VALUE,
434 	PIRUS_DISKSORT_DISABLED_FLAG,
435 	PIRUS_LUN_RESET_ENABLED_FLAG
436 };
437 
438 #endif
439 
440 #if (defined(__sparc) && !defined(__fibre)) || \
441 	(defined(__i386) || defined(__amd64))
442 
443 
444 static sd_tunables elite_properties = {
445 	ELITE_THROTTLE_VALUE,
446 	0,
447 	0,
448 	0,
449 	0,
450 	0,
451 	0,
452 	0,
453 	0
454 };
455 
456 static sd_tunables st31200n_properties = {
457 	ST31200N_THROTTLE_VALUE,
458 	0,
459 	0,
460 	0,
461 	0,
462 	0,
463 	0,
464 	0,
465 	0
466 };
467 
468 #endif /* Fibre or not */
469 
470 static sd_tunables lsi_properties_scsi = {
471 	LSI_THROTTLE_VALUE,
472 	0,
473 	LSI_NOTREADY_RETRIES,
474 	0,
475 	0,
476 	0,
477 	0,
478 	0,
479 	0
480 };
481 
482 static sd_tunables symbios_properties = {
483 	SYMBIOS_THROTTLE_VALUE,
484 	0,
485 	SYMBIOS_NOTREADY_RETRIES,
486 	0,
487 	0,
488 	0,
489 	0,
490 	0,
491 	0
492 };
493 
494 static sd_tunables lsi_properties = {
495 	0,
496 	0,
497 	LSI_NOTREADY_RETRIES,
498 	0,
499 	0,
500 	0,
501 	0,
502 	0,
503 	0
504 };
505 
506 static sd_tunables lsi_oem_properties = {
507 	0,
508 	0,
509 	LSI_OEM_NOTREADY_RETRIES,
510 	0,
511 	0,
512 	0,
513 	0,
514 	0,
515 	0,
516 	1
517 };
518 
519 
520 
521 #if (defined(SD_PROP_TST))
522 
523 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
524 #define	SD_TST_THROTTLE_VAL	16
525 #define	SD_TST_NOTREADY_VAL	12
526 #define	SD_TST_BUSY_VAL		60
527 #define	SD_TST_RST_RETRY_VAL	36
528 #define	SD_TST_RSV_REL_TIME	60
529 
530 static sd_tunables tst_properties = {
531 	SD_TST_THROTTLE_VAL,
532 	SD_TST_CTYPE_VAL,
533 	SD_TST_NOTREADY_VAL,
534 	SD_TST_BUSY_VAL,
535 	SD_TST_RST_RETRY_VAL,
536 	SD_TST_RSV_REL_TIME,
537 	0,
538 	0,
539 	0
540 };
541 #endif
542 
543 /* This is similar to the ANSI toupper implementation */
544 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
545 
546 /*
547  * Static Driver Configuration Table
548  *
549  * This is the table of disks which need throttle adjustment (or, perhaps
550  * something else as defined by the flags at a future time.)  device_id
551  * is a string consisting of concatenated vid (vendor), pid (product/model)
552  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
553  * the parts of the string are as defined by the sizes in the scsi_inquiry
554  * structure.  Device type is searched as far as the device_id string is
555  * defined.  Flags defines which values are to be set in the driver from the
556  * properties list.
557  *
558  * Entries below which begin and end with a "*" are a special case.
559  * These do not have a specific vendor, and the string which follows
560  * can appear anywhere in the 16 byte PID portion of the inquiry data.
561  *
562  * Entries below which begin and end with a " " (blank) are a special
563  * case. The comparison function will treat multiple consecutive blanks
564  * as equivalent to a single blank. For example, this causes a
565  * sd_disk_table entry of " NEC CDROM " to match a device's id string
566  * of  "NEC       CDROM".
567  *
568  * Note: The MD21 controller type has been obsoleted.
569  *	 ST318202F is a Legacy device
570  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
571  *	 made with an FC connection. The entries here are a legacy.
572  */
573 static sd_disk_config_t sd_disk_table[] = {
574 #if defined(__fibre) || defined(__i386) || defined(__amd64)
575 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
576 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
577 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
578 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
579 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
580 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
581 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
582 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
583 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
584 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
585 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
586 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
587 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
588 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
589 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
590 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
591 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
592 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
593 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
594 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
595 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
596 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
597 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
598 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
599 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
600 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
601 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
602 	{ "IBM     1724-100",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
603 	{ "IBM     1726-2xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
604 	{ "IBM     1726-22x",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
605 	{ "IBM     1726-4xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
606 	{ "IBM     1726-42x",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
607 	{ "IBM     1726-3xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
608 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
609 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
610 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
611 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
612 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
613 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
614 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
615 	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
616 	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
617 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
618 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
619 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
620 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
621 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT |
622 			SD_CONF_BSET_CACHE_IS_NV, &lsi_oem_properties },
623 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT |
624 			SD_CONF_BSET_CACHE_IS_NV, &lsi_oem_properties },
625 	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
626 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
627 	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
628 			SD_CONF_BSET_BSY_RETRY_COUNT|
629 			SD_CONF_BSET_RST_RETRIES|
630 			SD_CONF_BSET_RSV_REL_TIME,
631 		&purple_properties },
632 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
633 		SD_CONF_BSET_BSY_RETRY_COUNT|
634 		SD_CONF_BSET_RST_RETRIES|
635 		SD_CONF_BSET_RSV_REL_TIME|
636 		SD_CONF_BSET_MIN_THROTTLE|
637 		SD_CONF_BSET_DISKSORT_DISABLED,
638 		&sve_properties },
639 	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
640 			SD_CONF_BSET_BSY_RETRY_COUNT|
641 			SD_CONF_BSET_RST_RETRIES|
642 			SD_CONF_BSET_RSV_REL_TIME,
643 		&purple_properties },
644 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
645 		SD_CONF_BSET_LUN_RESET_ENABLED,
646 		&maserati_properties },
647 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
648 		SD_CONF_BSET_NRR_COUNT|
649 		SD_CONF_BSET_BSY_RETRY_COUNT|
650 		SD_CONF_BSET_RST_RETRIES|
651 		SD_CONF_BSET_MIN_THROTTLE|
652 		SD_CONF_BSET_DISKSORT_DISABLED|
653 		SD_CONF_BSET_LUN_RESET_ENABLED,
654 		&pirus_properties },
655 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
656 		SD_CONF_BSET_NRR_COUNT|
657 		SD_CONF_BSET_BSY_RETRY_COUNT|
658 		SD_CONF_BSET_RST_RETRIES|
659 		SD_CONF_BSET_MIN_THROTTLE|
660 		SD_CONF_BSET_DISKSORT_DISABLED|
661 		SD_CONF_BSET_LUN_RESET_ENABLED,
662 		&pirus_properties },
663 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
664 		SD_CONF_BSET_NRR_COUNT|
665 		SD_CONF_BSET_BSY_RETRY_COUNT|
666 		SD_CONF_BSET_RST_RETRIES|
667 		SD_CONF_BSET_MIN_THROTTLE|
668 		SD_CONF_BSET_DISKSORT_DISABLED|
669 		SD_CONF_BSET_LUN_RESET_ENABLED,
670 		&pirus_properties },
671 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
672 		SD_CONF_BSET_NRR_COUNT|
673 		SD_CONF_BSET_BSY_RETRY_COUNT|
674 		SD_CONF_BSET_RST_RETRIES|
675 		SD_CONF_BSET_MIN_THROTTLE|
676 		SD_CONF_BSET_DISKSORT_DISABLED|
677 		SD_CONF_BSET_LUN_RESET_ENABLED,
678 		&pirus_properties },
679 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
680 		SD_CONF_BSET_NRR_COUNT|
681 		SD_CONF_BSET_BSY_RETRY_COUNT|
682 		SD_CONF_BSET_RST_RETRIES|
683 		SD_CONF_BSET_MIN_THROTTLE|
684 		SD_CONF_BSET_DISKSORT_DISABLED|
685 		SD_CONF_BSET_LUN_RESET_ENABLED,
686 		&pirus_properties },
687 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
688 		SD_CONF_BSET_NRR_COUNT|
689 		SD_CONF_BSET_BSY_RETRY_COUNT|
690 		SD_CONF_BSET_RST_RETRIES|
691 		SD_CONF_BSET_MIN_THROTTLE|
692 		SD_CONF_BSET_DISKSORT_DISABLED|
693 		SD_CONF_BSET_LUN_RESET_ENABLED,
694 		&pirus_properties },
695 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
696 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
697 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
698 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
699 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
700 #endif /* fibre or NON-sparc platforms */
701 #if ((defined(__sparc) && !defined(__fibre)) ||\
702 	(defined(__i386) || defined(__amd64)))
703 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
704 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
705 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
706 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
707 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
708 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
709 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
710 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
711 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
712 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
713 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
714 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
715 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
716 	    &symbios_properties },
717 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
718 	    &lsi_properties_scsi },
719 #if defined(__i386) || defined(__amd64)
720 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
721 				    | SD_CONF_BSET_READSUB_BCD
722 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
723 				    | SD_CONF_BSET_NO_READ_HEADER
724 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
725 
726 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
727 				    | SD_CONF_BSET_READSUB_BCD
728 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
729 				    | SD_CONF_BSET_NO_READ_HEADER
730 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
731 #endif /* __i386 || __amd64 */
732 #endif /* sparc NON-fibre or NON-sparc platforms */
733 
734 #if (defined(SD_PROP_TST))
735 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
736 				| SD_CONF_BSET_CTYPE
737 				| SD_CONF_BSET_NRR_COUNT
738 				| SD_CONF_BSET_FAB_DEVID
739 				| SD_CONF_BSET_NOCACHE
740 				| SD_CONF_BSET_BSY_RETRY_COUNT
741 				| SD_CONF_BSET_PLAYMSF_BCD
742 				| SD_CONF_BSET_READSUB_BCD
743 				| SD_CONF_BSET_READ_TOC_TRK_BCD
744 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
745 				| SD_CONF_BSET_NO_READ_HEADER
746 				| SD_CONF_BSET_READ_CD_XD4
747 				| SD_CONF_BSET_RST_RETRIES
748 				| SD_CONF_BSET_RSV_REL_TIME
749 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
750 #endif
751 };
752 
753 static const int sd_disk_table_size =
754 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
755 
756 
757 
758 #define	SD_INTERCONNECT_PARALLEL	0
759 #define	SD_INTERCONNECT_FABRIC		1
760 #define	SD_INTERCONNECT_FIBRE		2
761 #define	SD_INTERCONNECT_SSA		3
762 #define	SD_INTERCONNECT_SATA		4
763 #define	SD_IS_PARALLEL_SCSI(un)		\
764 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
765 #define	SD_IS_SERIAL(un)		\
766 	((un)->un_interconnect_type == SD_INTERCONNECT_SATA)
767 
768 /*
769  * Definitions used by device id registration routines
770  */
771 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
772 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
773 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
774 
775 static kmutex_t sd_sense_mutex = {0};
776 
777 /*
778  * Macros for updates of the driver state
779  */
780 #define	New_state(un, s)        \
781 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
782 #define	Restore_state(un)	\
783 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
784 
785 static struct sd_cdbinfo sd_cdbtab[] = {
786 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
787 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
788 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
789 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
790 };
791 
792 /*
793  * Specifies the number of seconds that must have elapsed since the last
794  * cmd. has completed for a device to be declared idle to the PM framework.
795  */
796 static int sd_pm_idletime = 1;
797 
798 /*
799  * Internal function prototypes
800  */
801 
802 #if (defined(__fibre))
803 /*
804  * These #defines are to avoid namespace collisions that occur because this
805  * code is currently used to compile two separate driver modules: sd and ssd.
806  * All function names need to be treated this way (even if declared static)
807  * in order to allow the debugger to resolve the names properly.
808  * It is anticipated that in the near future the ssd module will be obsoleted,
809  * at which time this ugliness should go away.
810  */
811 #define	sd_log_trace			ssd_log_trace
812 #define	sd_log_info			ssd_log_info
813 #define	sd_log_err			ssd_log_err
814 #define	sdprobe				ssdprobe
815 #define	sdinfo				ssdinfo
816 #define	sd_prop_op			ssd_prop_op
817 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
818 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
819 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
820 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
821 #define	sd_scsi_target_lun_init		ssd_scsi_target_lun_init
822 #define	sd_scsi_target_lun_fini		ssd_scsi_target_lun_fini
823 #define	sd_scsi_get_target_lun_count	ssd_scsi_get_target_lun_count
824 #define	sd_scsi_update_lun_on_target	ssd_scsi_update_lun_on_target
825 #define	sd_spin_up_unit			ssd_spin_up_unit
826 #define	sd_enable_descr_sense		ssd_enable_descr_sense
827 #define	sd_reenable_dsense_task		ssd_reenable_dsense_task
828 #define	sd_set_mmc_caps			ssd_set_mmc_caps
829 #define	sd_read_unit_properties		ssd_read_unit_properties
830 #define	sd_process_sdconf_file		ssd_process_sdconf_file
831 #define	sd_process_sdconf_table		ssd_process_sdconf_table
832 #define	sd_sdconf_id_match		ssd_sdconf_id_match
833 #define	sd_blank_cmp			ssd_blank_cmp
834 #define	sd_chk_vers1_data		ssd_chk_vers1_data
835 #define	sd_set_vers1_properties		ssd_set_vers1_properties
836 
837 #define	sd_get_physical_geometry	ssd_get_physical_geometry
838 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
839 #define	sd_update_block_info		ssd_update_block_info
840 #define	sd_register_devid		ssd_register_devid
841 #define	sd_get_devid			ssd_get_devid
842 #define	sd_create_devid			ssd_create_devid
843 #define	sd_write_deviceid		ssd_write_deviceid
844 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
845 #define	sd_setup_pm			ssd_setup_pm
846 #define	sd_create_pm_components		ssd_create_pm_components
847 #define	sd_ddi_suspend			ssd_ddi_suspend
848 #define	sd_ddi_pm_suspend		ssd_ddi_pm_suspend
849 #define	sd_ddi_resume			ssd_ddi_resume
850 #define	sd_ddi_pm_resume		ssd_ddi_pm_resume
851 #define	sdpower				ssdpower
852 #define	sdattach			ssdattach
853 #define	sddetach			ssddetach
854 #define	sd_unit_attach			ssd_unit_attach
855 #define	sd_unit_detach			ssd_unit_detach
856 #define	sd_set_unit_attributes		ssd_set_unit_attributes
857 #define	sd_create_errstats		ssd_create_errstats
858 #define	sd_set_errstats			ssd_set_errstats
859 #define	sd_set_pstats			ssd_set_pstats
860 #define	sddump				ssddump
861 #define	sd_scsi_poll			ssd_scsi_poll
862 #define	sd_send_polled_RQS		ssd_send_polled_RQS
863 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
864 #define	sd_init_event_callbacks		ssd_init_event_callbacks
865 #define	sd_event_callback		ssd_event_callback
866 #define	sd_cache_control		ssd_cache_control
867 #define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
868 #define	sd_get_nv_sup			ssd_get_nv_sup
869 #define	sd_make_device			ssd_make_device
870 #define	sdopen				ssdopen
871 #define	sdclose				ssdclose
872 #define	sd_ready_and_valid		ssd_ready_and_valid
873 #define	sdmin				ssdmin
874 #define	sdread				ssdread
875 #define	sdwrite				ssdwrite
876 #define	sdaread				ssdaread
877 #define	sdawrite			ssdawrite
878 #define	sdstrategy			ssdstrategy
879 #define	sdioctl				ssdioctl
880 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
881 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
882 #define	sd_checksum_iostart		ssd_checksum_iostart
883 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
884 #define	sd_pm_iostart			ssd_pm_iostart
885 #define	sd_core_iostart			ssd_core_iostart
886 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
887 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
888 #define	sd_checksum_iodone		ssd_checksum_iodone
889 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
890 #define	sd_pm_iodone			ssd_pm_iodone
891 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
892 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
893 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
894 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
895 #define	sd_buf_iodone			ssd_buf_iodone
896 #define	sd_uscsi_strategy		ssd_uscsi_strategy
897 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
898 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
899 #define	sd_uscsi_iodone			ssd_uscsi_iodone
900 #define	sd_xbuf_strategy		ssd_xbuf_strategy
901 #define	sd_xbuf_init			ssd_xbuf_init
902 #define	sd_pm_entry			ssd_pm_entry
903 #define	sd_pm_exit			ssd_pm_exit
904 
905 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
906 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
907 
908 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
909 #define	sdintr				ssdintr
910 #define	sd_start_cmds			ssd_start_cmds
911 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
912 #define	sd_bioclone_alloc		ssd_bioclone_alloc
913 #define	sd_bioclone_free		ssd_bioclone_free
914 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
915 #define	sd_shadow_buf_free		ssd_shadow_buf_free
916 #define	sd_print_transport_rejected_message	\
917 					ssd_print_transport_rejected_message
918 #define	sd_retry_command		ssd_retry_command
919 #define	sd_set_retry_bp			ssd_set_retry_bp
920 #define	sd_send_request_sense_command	ssd_send_request_sense_command
921 #define	sd_start_retry_command		ssd_start_retry_command
922 #define	sd_start_direct_priority_command	\
923 					ssd_start_direct_priority_command
924 #define	sd_return_failed_command	ssd_return_failed_command
925 #define	sd_return_failed_command_no_restart	\
926 					ssd_return_failed_command_no_restart
927 #define	sd_return_command		ssd_return_command
928 #define	sd_sync_with_callback		ssd_sync_with_callback
929 #define	sdrunout			ssdrunout
930 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
931 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
932 #define	sd_reduce_throttle		ssd_reduce_throttle
933 #define	sd_restore_throttle		ssd_restore_throttle
934 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
935 #define	sd_init_cdb_limits		ssd_init_cdb_limits
936 #define	sd_pkt_status_good		ssd_pkt_status_good
937 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
938 #define	sd_pkt_status_busy		ssd_pkt_status_busy
939 #define	sd_pkt_status_reservation_conflict	\
940 					ssd_pkt_status_reservation_conflict
941 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
942 #define	sd_handle_request_sense		ssd_handle_request_sense
943 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
944 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
945 #define	sd_validate_sense_data		ssd_validate_sense_data
946 #define	sd_decode_sense			ssd_decode_sense
947 #define	sd_print_sense_msg		ssd_print_sense_msg
948 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
949 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
950 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
951 #define	sd_sense_key_medium_or_hardware_error	\
952 					ssd_sense_key_medium_or_hardware_error
953 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
954 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
955 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
956 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
957 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
958 #define	sd_sense_key_default		ssd_sense_key_default
959 #define	sd_print_retry_msg		ssd_print_retry_msg
960 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
961 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
962 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
963 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
964 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
965 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
966 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
967 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
968 #define	sd_pkt_reason_default		ssd_pkt_reason_default
969 #define	sd_reset_target			ssd_reset_target
970 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
971 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
972 #define	sd_taskq_create			ssd_taskq_create
973 #define	sd_taskq_delete			ssd_taskq_delete
974 #define	sd_media_change_task		ssd_media_change_task
975 #define	sd_handle_mchange		ssd_handle_mchange
976 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
977 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
978 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
979 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
980 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
981 					sd_send_scsi_feature_GET_CONFIGURATION
982 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
983 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
984 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
985 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
986 					ssd_send_scsi_PERSISTENT_RESERVE_IN
987 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
988 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
989 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
990 #define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
991 					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
992 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
993 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
994 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
995 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
996 #define	sd_alloc_rqs			ssd_alloc_rqs
997 #define	sd_free_rqs			ssd_free_rqs
998 #define	sd_dump_memory			ssd_dump_memory
999 #define	sd_get_media_info		ssd_get_media_info
1000 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
1001 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
1002 #define	sd_setup_next_xfer		ssd_setup_next_xfer
1003 #define	sd_dkio_get_temp		ssd_dkio_get_temp
1004 #define	sd_check_mhd			ssd_check_mhd
1005 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
1006 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1007 #define	sd_sname			ssd_sname
1008 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1009 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1010 #define	sd_take_ownership		ssd_take_ownership
1011 #define	sd_reserve_release		ssd_reserve_release
1012 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1013 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1014 #define	sd_persistent_reservation_in_read_keys	\
1015 					ssd_persistent_reservation_in_read_keys
1016 #define	sd_persistent_reservation_in_read_resv	\
1017 					ssd_persistent_reservation_in_read_resv
1018 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1019 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1020 #define	sd_mhdioc_release		ssd_mhdioc_release
1021 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1022 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1023 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1024 #define	sr_change_blkmode		ssr_change_blkmode
1025 #define	sr_change_speed			ssr_change_speed
1026 #define	sr_atapi_change_speed		ssr_atapi_change_speed
1027 #define	sr_pause_resume			ssr_pause_resume
1028 #define	sr_play_msf			ssr_play_msf
1029 #define	sr_play_trkind			ssr_play_trkind
1030 #define	sr_read_all_subcodes		ssr_read_all_subcodes
1031 #define	sr_read_subchannel		ssr_read_subchannel
1032 #define	sr_read_tocentry		ssr_read_tocentry
1033 #define	sr_read_tochdr			ssr_read_tochdr
1034 #define	sr_read_cdda			ssr_read_cdda
1035 #define	sr_read_cdxa			ssr_read_cdxa
1036 #define	sr_read_mode1			ssr_read_mode1
1037 #define	sr_read_mode2			ssr_read_mode2
1038 #define	sr_read_cd_mode2		ssr_read_cd_mode2
1039 #define	sr_sector_mode			ssr_sector_mode
1040 #define	sr_eject			ssr_eject
1041 #define	sr_ejected			ssr_ejected
1042 #define	sr_check_wp			ssr_check_wp
1043 #define	sd_check_media			ssd_check_media
1044 #define	sd_media_watch_cb		ssd_media_watch_cb
1045 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1046 #define	sr_volume_ctrl			ssr_volume_ctrl
1047 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1048 #define	sd_log_page_supported		ssd_log_page_supported
1049 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1050 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1051 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1052 #define	sd_range_lock			ssd_range_lock
1053 #define	sd_get_range			ssd_get_range
1054 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1055 #define	sd_range_unlock			ssd_range_unlock
1056 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1057 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1058 
1059 #define	sd_iostart_chain		ssd_iostart_chain
1060 #define	sd_iodone_chain			ssd_iodone_chain
1061 #define	sd_initpkt_map			ssd_initpkt_map
1062 #define	sd_destroypkt_map		ssd_destroypkt_map
1063 #define	sd_chain_type_map		ssd_chain_type_map
1064 #define	sd_chain_index_map		ssd_chain_index_map
1065 
1066 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1067 #define	sd_failfast_flushq		ssd_failfast_flushq
1068 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1069 
1070 #define	sd_is_lsi			ssd_is_lsi
1071 #define	sd_tg_rdwr			ssd_tg_rdwr
1072 #define	sd_tg_getinfo			ssd_tg_getinfo
1073 
1074 #endif	/* #if (defined(__fibre)) */
1075 
1076 
1077 int _init(void);
1078 int _fini(void);
1079 int _info(struct modinfo *modinfop);
1080 
1081 /*PRINTFLIKE3*/
1082 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1083 /*PRINTFLIKE3*/
1084 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1085 /*PRINTFLIKE3*/
1086 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1087 
1088 static int sdprobe(dev_info_t *devi);
1089 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1090     void **result);
1091 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1092     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1093 
1094 /*
1095  * Smart probe for parallel scsi
1096  */
1097 static void sd_scsi_probe_cache_init(void);
1098 static void sd_scsi_probe_cache_fini(void);
1099 static void sd_scsi_clear_probe_cache(void);
1100 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1101 
1102 /*
1103  * Attached luns on target for parallel scsi
1104  */
1105 static void sd_scsi_target_lun_init(void);
1106 static void sd_scsi_target_lun_fini(void);
1107 static int  sd_scsi_get_target_lun_count(dev_info_t *dip, int target);
1108 static void sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag);
1109 
1110 static int	sd_spin_up_unit(struct sd_lun *un);
1111 #ifdef _LP64
1112 static void	sd_enable_descr_sense(struct sd_lun *un);
1113 static void	sd_reenable_dsense_task(void *arg);
1114 #endif /* _LP64 */
1115 
1116 static void	sd_set_mmc_caps(struct sd_lun *un);
1117 
1118 static void sd_read_unit_properties(struct sd_lun *un);
1119 static int  sd_process_sdconf_file(struct sd_lun *un);
1120 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1121     int *data_list, sd_tunables *values);
1122 static void sd_process_sdconf_table(struct sd_lun *un);
1123 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1124 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1125 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1126 	int list_len, char *dataname_ptr);
1127 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1128     sd_tunables *prop_list);
1129 
1130 static void sd_register_devid(struct sd_lun *un, dev_info_t *devi,
1131     int reservation_flag);
1132 static int  sd_get_devid(struct sd_lun *un);
1133 static ddi_devid_t sd_create_devid(struct sd_lun *un);
1134 static int  sd_write_deviceid(struct sd_lun *un);
1135 static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1136 static int  sd_check_vpd_page_support(struct sd_lun *un);
1137 
1138 static void sd_setup_pm(struct sd_lun *un, dev_info_t *devi);
1139 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1140 
1141 static int  sd_ddi_suspend(dev_info_t *devi);
1142 static int  sd_ddi_pm_suspend(struct sd_lun *un);
1143 static int  sd_ddi_resume(dev_info_t *devi);
1144 static int  sd_ddi_pm_resume(struct sd_lun *un);
1145 static int  sdpower(dev_info_t *devi, int component, int level);
1146 
1147 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1148 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1149 static int  sd_unit_attach(dev_info_t *devi);
1150 static int  sd_unit_detach(dev_info_t *devi);
1151 
1152 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1153 static void sd_create_errstats(struct sd_lun *un, int instance);
1154 static void sd_set_errstats(struct sd_lun *un);
1155 static void sd_set_pstats(struct sd_lun *un);
1156 
1157 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1158 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1159 static int  sd_send_polled_RQS(struct sd_lun *un);
1160 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1161 
1162 #if (defined(__fibre))
1163 /*
1164  * Event callbacks (photon)
1165  */
1166 static void sd_init_event_callbacks(struct sd_lun *un);
1167 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1168 #endif
1169 
1170 /*
1171  * Defines for sd_cache_control
1172  */
1173 
1174 #define	SD_CACHE_ENABLE		1
1175 #define	SD_CACHE_DISABLE	0
1176 #define	SD_CACHE_NOCHANGE	-1
1177 
1178 static int   sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag);
1179 static int   sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled);
1180 static void  sd_get_nv_sup(struct sd_lun *un);
1181 static dev_t sd_make_device(dev_info_t *devi);
1182 
1183 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1184 	uint64_t capacity);
1185 
1186 /*
1187  * Driver entry point functions.
1188  */
1189 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1190 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1191 static int  sd_ready_and_valid(struct sd_lun *un);
1192 
1193 static void sdmin(struct buf *bp);
1194 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1195 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1196 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1197 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1198 
1199 static int sdstrategy(struct buf *bp);
1200 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1201 
1202 /*
1203  * Function prototypes for layering functions in the iostart chain.
1204  */
1205 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1206 	struct buf *bp);
1207 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1208 	struct buf *bp);
1209 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1210 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1211 	struct buf *bp);
1212 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1213 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1214 
1215 /*
1216  * Function prototypes for layering functions in the iodone chain.
1217  */
1218 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1219 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1220 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1221 	struct buf *bp);
1222 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1223 	struct buf *bp);
1224 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1225 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1226 	struct buf *bp);
1227 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1228 
1229 /*
1230  * Prototypes for functions to support buf(9S) based IO.
1231  */
1232 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1233 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1234 static void sd_destroypkt_for_buf(struct buf *);
1235 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1236 	struct buf *bp, int flags,
1237 	int (*callback)(caddr_t), caddr_t callback_arg,
1238 	diskaddr_t lba, uint32_t blockcount);
1239 #if defined(__i386) || defined(__amd64)
1240 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1241 	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1242 #endif /* defined(__i386) || defined(__amd64) */
1243 
1244 /*
1245  * Prototypes for functions to support USCSI IO.
1246  */
1247 static int sd_uscsi_strategy(struct buf *bp);
1248 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1249 static void sd_destroypkt_for_uscsi(struct buf *);
1250 
1251 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1252 	uchar_t chain_type, void *pktinfop);
1253 
1254 static int  sd_pm_entry(struct sd_lun *un);
1255 static void sd_pm_exit(struct sd_lun *un);
1256 
1257 static void sd_pm_idletimeout_handler(void *arg);
1258 
1259 /*
1260  * sd_core internal functions (used at the sd_core_io layer).
1261  */
1262 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1263 static void sdintr(struct scsi_pkt *pktp);
1264 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1265 
1266 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
1267 	enum uio_seg dataspace, int path_flag);
1268 
1269 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1270 	daddr_t blkno, int (*func)(struct buf *));
1271 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1272 	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1273 static void sd_bioclone_free(struct buf *bp);
1274 static void sd_shadow_buf_free(struct buf *bp);
1275 
1276 static void sd_print_transport_rejected_message(struct sd_lun *un,
1277 	struct sd_xbuf *xp, int code);
1278 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1279     void *arg, int code);
1280 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1281     void *arg, int code);
1282 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1283     void *arg, int code);
1284 
1285 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1286 	int retry_check_flag,
1287 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1288 		int c),
1289 	void *user_arg, int failure_code,  clock_t retry_delay,
1290 	void (*statp)(kstat_io_t *));
1291 
1292 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1293 	clock_t retry_delay, void (*statp)(kstat_io_t *));
1294 
1295 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1296 	struct scsi_pkt *pktp);
1297 static void sd_start_retry_command(void *arg);
1298 static void sd_start_direct_priority_command(void *arg);
1299 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1300 	int errcode);
1301 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1302 	struct buf *bp, int errcode);
1303 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1304 static void sd_sync_with_callback(struct sd_lun *un);
1305 static int sdrunout(caddr_t arg);
1306 
1307 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1308 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1309 
1310 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1311 static void sd_restore_throttle(void *arg);
1312 
1313 static void sd_init_cdb_limits(struct sd_lun *un);
1314 
1315 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1316 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1317 
1318 /*
1319  * Error handling functions
1320  */
1321 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1322 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1323 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1324 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1325 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1326 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1327 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1328 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1329 
1330 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1331 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1332 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1333 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1334 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1335 	struct sd_xbuf *xp);
1336 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1337 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1338 
1339 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1340 	void *arg, int code);
1341 
1342 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1343 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1344 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1345 	uint8_t *sense_datap,
1346 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1347 static void sd_sense_key_not_ready(struct sd_lun *un,
1348 	uint8_t *sense_datap,
1349 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1350 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1351 	uint8_t *sense_datap,
1352 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1353 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1354 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1355 static void sd_sense_key_unit_attention(struct sd_lun *un,
1356 	uint8_t *sense_datap,
1357 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1358 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1359 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1360 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1361 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1362 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1363 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1364 static void sd_sense_key_default(struct sd_lun *un,
1365 	uint8_t *sense_datap,
1366 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1367 
1368 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1369 	void *arg, int flag);
1370 
1371 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1372 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1373 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1374 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1375 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1376 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1377 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1378 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1379 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1380 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1381 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1382 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1383 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1384 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1385 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1386 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1387 
1388 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1389 
1390 static void sd_start_stop_unit_callback(void *arg);
1391 static void sd_start_stop_unit_task(void *arg);
1392 
1393 static void sd_taskq_create(void);
1394 static void sd_taskq_delete(void);
1395 static void sd_media_change_task(void *arg);
1396 
1397 static int sd_handle_mchange(struct sd_lun *un);
1398 static int sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag);
1399 static int sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp,
1400 	uint32_t *lbap, int path_flag);
1401 static int sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
1402 	uint32_t *lbap, int path_flag);
1403 static int sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag,
1404 	int path_flag);
1405 static int sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr,
1406 	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1407 static int sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag);
1408 static int sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un,
1409 	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1410 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un,
1411 	uchar_t usr_cmd, uchar_t *usr_bufp);
1412 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1413 	struct dk_callback *dkc);
1414 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1415 static int sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un,
1416 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1417 	uchar_t *bufaddr, uint_t buflen, int path_flag);
1418 static int sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
1419 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1420 	uchar_t *bufaddr, uint_t buflen, char feature, int path_flag);
1421 static int sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize,
1422 	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1423 static int sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize,
1424 	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1425 static int sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
1426 	size_t buflen, daddr_t start_block, int path_flag);
1427 #define	sd_send_scsi_READ(un, bufaddr, buflen, start_block, path_flag)	\
1428 	sd_send_scsi_RDWR(un, SCMD_READ, bufaddr, buflen, start_block, \
1429 	path_flag)
1430 #define	sd_send_scsi_WRITE(un, bufaddr, buflen, start_block, path_flag)	\
1431 	sd_send_scsi_RDWR(un, SCMD_WRITE, bufaddr, buflen, start_block,\
1432 	path_flag)
1433 
1434 static int sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr,
1435 	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1436 	uint16_t param_ptr, int path_flag);
1437 
1438 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1439 static void sd_free_rqs(struct sd_lun *un);
1440 
1441 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1442 	uchar_t *data, int len, int fmt);
1443 static void sd_panic_for_res_conflict(struct sd_lun *un);
1444 
1445 /*
1446  * Disk Ioctl Function Prototypes
1447  */
1448 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1449 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1450 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1451 
1452 /*
1453  * Multi-host Ioctl Prototypes
1454  */
1455 static int sd_check_mhd(dev_t dev, int interval);
1456 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1457 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1458 static char *sd_sname(uchar_t status);
1459 static void sd_mhd_resvd_recover(void *arg);
1460 static void sd_resv_reclaim_thread();
1461 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1462 static int sd_reserve_release(dev_t dev, int cmd);
1463 static void sd_rmv_resv_reclaim_req(dev_t dev);
1464 static void sd_mhd_reset_notify_cb(caddr_t arg);
1465 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1466 	mhioc_inkeys_t *usrp, int flag);
1467 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1468 	mhioc_inresvs_t *usrp, int flag);
1469 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1470 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1471 static int sd_mhdioc_release(dev_t dev);
1472 static int sd_mhdioc_register_devid(dev_t dev);
1473 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1474 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1475 
1476 /*
1477  * SCSI removable prototypes
1478  */
1479 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1480 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1481 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1482 static int sr_pause_resume(dev_t dev, int mode);
1483 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1484 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1485 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1486 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1487 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1488 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1489 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1490 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1491 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1492 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1493 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1494 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1495 static int sr_eject(dev_t dev);
1496 static void sr_ejected(register struct sd_lun *un);
1497 static int sr_check_wp(dev_t dev);
1498 static int sd_check_media(dev_t dev, enum dkio_state state);
1499 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1500 static void sd_delayed_cv_broadcast(void *arg);
1501 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1502 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1503 
1504 static int sd_log_page_supported(struct sd_lun *un, int log_page);
1505 
1506 /*
1507  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1508  */
1509 static void sd_check_for_writable_cd(struct sd_lun *un, int path_flag);
1510 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1511 static void sd_wm_cache_destructor(void *wm, void *un);
1512 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1513 	daddr_t endb, ushort_t typ);
1514 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1515 	daddr_t endb);
1516 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1517 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1518 static void sd_read_modify_write_task(void * arg);
1519 static int
1520 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1521 	struct buf **bpp);
1522 
1523 
1524 /*
1525  * Function prototypes for failfast support.
1526  */
1527 static void sd_failfast_flushq(struct sd_lun *un);
1528 static int sd_failfast_flushq_callback(struct buf *bp);
1529 
1530 /*
1531  * Function prototypes to check for lsi devices
1532  */
1533 static void sd_is_lsi(struct sd_lun *un);
1534 
1535 /*
1536  * Function prototypes for x86 support
1537  */
1538 #if defined(__i386) || defined(__amd64)
1539 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1540 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1541 #endif
1542 
1543 
1544 /* Function prototypes for cmlb */
1545 static int sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
1546     diskaddr_t start_block, size_t reqlength, void *tg_cookie);
1547 
1548 static int sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie);
1549 
1550 /*
1551  * Constants for failfast support:
1552  *
1553  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1554  * failfast processing being performed.
1555  *
1556  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1557  * failfast processing on all bufs with B_FAILFAST set.
1558  */
1559 
1560 #define	SD_FAILFAST_INACTIVE		0
1561 #define	SD_FAILFAST_ACTIVE		1
1562 
1563 /*
1564  * Bitmask to control behavior of buf(9S) flushes when a transition to
1565  * the failfast state occurs. Optional bits include:
1566  *
1567  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1568  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1569  * be flushed.
1570  *
1571  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1572  * driver, in addition to the regular wait queue. This includes the xbuf
1573  * queues. When clear, only the driver's wait queue will be flushed.
1574  */
1575 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1576 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1577 
1578 /*
1579  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1580  * to flush all queues within the driver.
1581  */
1582 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1583 
1584 
1585 /*
1586  * SD Testing Fault Injection
1587  */
1588 #ifdef SD_FAULT_INJECTION
1589 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1590 static void sd_faultinjection(struct scsi_pkt *pktp);
1591 static void sd_injection_log(char *buf, struct sd_lun *un);
1592 #endif
1593 
1594 /*
1595  * Device driver ops vector
1596  */
1597 static struct cb_ops sd_cb_ops = {
1598 	sdopen,			/* open */
1599 	sdclose,		/* close */
1600 	sdstrategy,		/* strategy */
1601 	nodev,			/* print */
1602 	sddump,			/* dump */
1603 	sdread,			/* read */
1604 	sdwrite,		/* write */
1605 	sdioctl,		/* ioctl */
1606 	nodev,			/* devmap */
1607 	nodev,			/* mmap */
1608 	nodev,			/* segmap */
1609 	nochpoll,		/* poll */
1610 	sd_prop_op,		/* cb_prop_op */
1611 	0,			/* streamtab  */
1612 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1613 	CB_REV,			/* cb_rev */
1614 	sdaread, 		/* async I/O read entry point */
1615 	sdawrite		/* async I/O write entry point */
1616 };
1617 
1618 static struct dev_ops sd_ops = {
1619 	DEVO_REV,		/* devo_rev, */
1620 	0,			/* refcnt  */
1621 	sdinfo,			/* info */
1622 	nulldev,		/* identify */
1623 	sdprobe,		/* probe */
1624 	sdattach,		/* attach */
1625 	sddetach,		/* detach */
1626 	nodev,			/* reset */
1627 	&sd_cb_ops,		/* driver operations */
1628 	NULL,			/* bus operations */
1629 	sdpower			/* power */
1630 };
1631 
1632 
1633 /*
1634  * This is the loadable module wrapper.
1635  */
1636 #include <sys/modctl.h>
1637 
1638 static struct modldrv modldrv = {
1639 	&mod_driverops,		/* Type of module. This one is a driver */
1640 	SD_MODULE_NAME,		/* Module name. */
1641 	&sd_ops			/* driver ops */
1642 };
1643 
1644 
1645 static struct modlinkage modlinkage = {
1646 	MODREV_1,
1647 	&modldrv,
1648 	NULL
1649 };
1650 
1651 static cmlb_tg_ops_t sd_tgops = {
1652 	TG_DK_OPS_VERSION_1,
1653 	sd_tg_rdwr,
1654 	sd_tg_getinfo
1655 	};
1656 
1657 static struct scsi_asq_key_strings sd_additional_codes[] = {
1658 	0x81, 0, "Logical Unit is Reserved",
1659 	0x85, 0, "Audio Address Not Valid",
1660 	0xb6, 0, "Media Load Mechanism Failed",
1661 	0xB9, 0, "Audio Play Operation Aborted",
1662 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1663 	0x53, 2, "Medium removal prevented",
1664 	0x6f, 0, "Authentication failed during key exchange",
1665 	0x6f, 1, "Key not present",
1666 	0x6f, 2, "Key not established",
1667 	0x6f, 3, "Read without proper authentication",
1668 	0x6f, 4, "Mismatched region to this logical unit",
1669 	0x6f, 5, "Region reset count error",
1670 	0xffff, 0x0, NULL
1671 };
1672 
1673 
1674 /*
1675  * Struct for passing printing information for sense data messages
1676  */
1677 struct sd_sense_info {
1678 	int	ssi_severity;
1679 	int	ssi_pfa_flag;
1680 };
1681 
1682 /*
1683  * Table of function pointers for iostart-side routines. Separate "chains"
1684  * of layered function calls are formed by placing the function pointers
1685  * sequentially in the desired order. Functions are called according to an
1686  * incrementing table index ordering. The last function in each chain must
1687  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1688  * in the sd_iodone_chain[] array.
1689  *
1690  * Note: It may seem more natural to organize both the iostart and iodone
1691  * functions together, into an array of structures (or some similar
1692  * organization) with a common index, rather than two separate arrays which
1693  * must be maintained in synchronization. The purpose of this division is
1694  * to achieve improved performance: individual arrays allows for more
1695  * effective cache line utilization on certain platforms.
1696  */
1697 
1698 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1699 
1700 
1701 static sd_chain_t sd_iostart_chain[] = {
1702 
1703 	/* Chain for buf IO for disk drive targets (PM enabled) */
1704 	sd_mapblockaddr_iostart,	/* Index: 0 */
1705 	sd_pm_iostart,			/* Index: 1 */
1706 	sd_core_iostart,		/* Index: 2 */
1707 
1708 	/* Chain for buf IO for disk drive targets (PM disabled) */
1709 	sd_mapblockaddr_iostart,	/* Index: 3 */
1710 	sd_core_iostart,		/* Index: 4 */
1711 
1712 	/* Chain for buf IO for removable-media targets (PM enabled) */
1713 	sd_mapblockaddr_iostart,	/* Index: 5 */
1714 	sd_mapblocksize_iostart,	/* Index: 6 */
1715 	sd_pm_iostart,			/* Index: 7 */
1716 	sd_core_iostart,		/* Index: 8 */
1717 
1718 	/* Chain for buf IO for removable-media targets (PM disabled) */
1719 	sd_mapblockaddr_iostart,	/* Index: 9 */
1720 	sd_mapblocksize_iostart,	/* Index: 10 */
1721 	sd_core_iostart,		/* Index: 11 */
1722 
1723 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1724 	sd_mapblockaddr_iostart,	/* Index: 12 */
1725 	sd_checksum_iostart,		/* Index: 13 */
1726 	sd_pm_iostart,			/* Index: 14 */
1727 	sd_core_iostart,		/* Index: 15 */
1728 
1729 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1730 	sd_mapblockaddr_iostart,	/* Index: 16 */
1731 	sd_checksum_iostart,		/* Index: 17 */
1732 	sd_core_iostart,		/* Index: 18 */
1733 
1734 	/* Chain for USCSI commands (all targets) */
1735 	sd_pm_iostart,			/* Index: 19 */
1736 	sd_core_iostart,		/* Index: 20 */
1737 
1738 	/* Chain for checksumming USCSI commands (all targets) */
1739 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1740 	sd_pm_iostart,			/* Index: 22 */
1741 	sd_core_iostart,		/* Index: 23 */
1742 
1743 	/* Chain for "direct" USCSI commands (all targets) */
1744 	sd_core_iostart,		/* Index: 24 */
1745 
1746 	/* Chain for "direct priority" USCSI commands (all targets) */
1747 	sd_core_iostart,		/* Index: 25 */
1748 };
1749 
1750 /*
1751  * Macros to locate the first function of each iostart chain in the
1752  * sd_iostart_chain[] array. These are located by the index in the array.
1753  */
1754 #define	SD_CHAIN_DISK_IOSTART			0
1755 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1756 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1757 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1758 #define	SD_CHAIN_CHKSUM_IOSTART			12
1759 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1760 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1761 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1762 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1763 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1764 
1765 
1766 /*
1767  * Table of function pointers for the iodone-side routines for the driver-
1768  * internal layering mechanism.  The calling sequence for iodone routines
1769  * uses a decrementing table index, so the last routine called in a chain
1770  * must be at the lowest array index location for that chain.  The last
1771  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1772  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1773  * of the functions in an iodone side chain must correspond to the ordering
1774  * of the iostart routines for that chain.  Note that there is no iodone
1775  * side routine that corresponds to sd_core_iostart(), so there is no
1776  * entry in the table for this.
1777  */
1778 
1779 static sd_chain_t sd_iodone_chain[] = {
1780 
1781 	/* Chain for buf IO for disk drive targets (PM enabled) */
1782 	sd_buf_iodone,			/* Index: 0 */
1783 	sd_mapblockaddr_iodone,		/* Index: 1 */
1784 	sd_pm_iodone,			/* Index: 2 */
1785 
1786 	/* Chain for buf IO for disk drive targets (PM disabled) */
1787 	sd_buf_iodone,			/* Index: 3 */
1788 	sd_mapblockaddr_iodone,		/* Index: 4 */
1789 
1790 	/* Chain for buf IO for removable-media targets (PM enabled) */
1791 	sd_buf_iodone,			/* Index: 5 */
1792 	sd_mapblockaddr_iodone,		/* Index: 6 */
1793 	sd_mapblocksize_iodone,		/* Index: 7 */
1794 	sd_pm_iodone,			/* Index: 8 */
1795 
1796 	/* Chain for buf IO for removable-media targets (PM disabled) */
1797 	sd_buf_iodone,			/* Index: 9 */
1798 	sd_mapblockaddr_iodone,		/* Index: 10 */
1799 	sd_mapblocksize_iodone,		/* Index: 11 */
1800 
1801 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1802 	sd_buf_iodone,			/* Index: 12 */
1803 	sd_mapblockaddr_iodone,		/* Index: 13 */
1804 	sd_checksum_iodone,		/* Index: 14 */
1805 	sd_pm_iodone,			/* Index: 15 */
1806 
1807 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1808 	sd_buf_iodone,			/* Index: 16 */
1809 	sd_mapblockaddr_iodone,		/* Index: 17 */
1810 	sd_checksum_iodone,		/* Index: 18 */
1811 
1812 	/* Chain for USCSI commands (non-checksum targets) */
1813 	sd_uscsi_iodone,		/* Index: 19 */
1814 	sd_pm_iodone,			/* Index: 20 */
1815 
1816 	/* Chain for USCSI commands (checksum targets) */
1817 	sd_uscsi_iodone,		/* Index: 21 */
1818 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1819 	sd_pm_iodone,			/* Index: 22 */
1820 
1821 	/* Chain for "direct" USCSI commands (all targets) */
1822 	sd_uscsi_iodone,		/* Index: 24 */
1823 
1824 	/* Chain for "direct priority" USCSI commands (all targets) */
1825 	sd_uscsi_iodone,		/* Index: 25 */
1826 };
1827 
1828 
1829 /*
1830  * Macros to locate the "first" function in the sd_iodone_chain[] array for
1831  * each iodone-side chain. These are located by the array index, but as the
1832  * iodone side functions are called in a decrementing-index order, the
1833  * highest index number in each chain must be specified (as these correspond
1834  * to the first function in the iodone chain that will be called by the core
1835  * at IO completion time).
1836  */
1837 
1838 #define	SD_CHAIN_DISK_IODONE			2
1839 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
1840 #define	SD_CHAIN_RMMEDIA_IODONE			8
1841 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1842 #define	SD_CHAIN_CHKSUM_IODONE			15
1843 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1844 #define	SD_CHAIN_USCSI_CMD_IODONE		20
1845 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1846 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
1847 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1848 
1849 
1850 
1851 
1852 /*
1853  * Array to map a layering chain index to the appropriate initpkt routine.
1854  * The redundant entries are present so that the index used for accessing
1855  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1856  * with this table as well.
1857  */
1858 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1859 
1860 static sd_initpkt_t	sd_initpkt_map[] = {
1861 
1862 	/* Chain for buf IO for disk drive targets (PM enabled) */
1863 	sd_initpkt_for_buf,		/* Index: 0 */
1864 	sd_initpkt_for_buf,		/* Index: 1 */
1865 	sd_initpkt_for_buf,		/* Index: 2 */
1866 
1867 	/* Chain for buf IO for disk drive targets (PM disabled) */
1868 	sd_initpkt_for_buf,		/* Index: 3 */
1869 	sd_initpkt_for_buf,		/* Index: 4 */
1870 
1871 	/* Chain for buf IO for removable-media targets (PM enabled) */
1872 	sd_initpkt_for_buf,		/* Index: 5 */
1873 	sd_initpkt_for_buf,		/* Index: 6 */
1874 	sd_initpkt_for_buf,		/* Index: 7 */
1875 	sd_initpkt_for_buf,		/* Index: 8 */
1876 
1877 	/* Chain for buf IO for removable-media targets (PM disabled) */
1878 	sd_initpkt_for_buf,		/* Index: 9 */
1879 	sd_initpkt_for_buf,		/* Index: 10 */
1880 	sd_initpkt_for_buf,		/* Index: 11 */
1881 
1882 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1883 	sd_initpkt_for_buf,		/* Index: 12 */
1884 	sd_initpkt_for_buf,		/* Index: 13 */
1885 	sd_initpkt_for_buf,		/* Index: 14 */
1886 	sd_initpkt_for_buf,		/* Index: 15 */
1887 
1888 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1889 	sd_initpkt_for_buf,		/* Index: 16 */
1890 	sd_initpkt_for_buf,		/* Index: 17 */
1891 	sd_initpkt_for_buf,		/* Index: 18 */
1892 
1893 	/* Chain for USCSI commands (non-checksum targets) */
1894 	sd_initpkt_for_uscsi,		/* Index: 19 */
1895 	sd_initpkt_for_uscsi,		/* Index: 20 */
1896 
1897 	/* Chain for USCSI commands (checksum targets) */
1898 	sd_initpkt_for_uscsi,		/* Index: 21 */
1899 	sd_initpkt_for_uscsi,		/* Index: 22 */
1900 	sd_initpkt_for_uscsi,		/* Index: 22 */
1901 
1902 	/* Chain for "direct" USCSI commands (all targets) */
1903 	sd_initpkt_for_uscsi,		/* Index: 24 */
1904 
1905 	/* Chain for "direct priority" USCSI commands (all targets) */
1906 	sd_initpkt_for_uscsi,		/* Index: 25 */
1907 
1908 };
1909 
1910 
1911 /*
1912  * Array to map a layering chain index to the appropriate destroypktpkt routine.
1913  * The redundant entries are present so that the index used for accessing
1914  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1915  * with this table as well.
1916  */
1917 typedef void (*sd_destroypkt_t)(struct buf *);
1918 
1919 static sd_destroypkt_t	sd_destroypkt_map[] = {
1920 
1921 	/* Chain for buf IO for disk drive targets (PM enabled) */
1922 	sd_destroypkt_for_buf,		/* Index: 0 */
1923 	sd_destroypkt_for_buf,		/* Index: 1 */
1924 	sd_destroypkt_for_buf,		/* Index: 2 */
1925 
1926 	/* Chain for buf IO for disk drive targets (PM disabled) */
1927 	sd_destroypkt_for_buf,		/* Index: 3 */
1928 	sd_destroypkt_for_buf,		/* Index: 4 */
1929 
1930 	/* Chain for buf IO for removable-media targets (PM enabled) */
1931 	sd_destroypkt_for_buf,		/* Index: 5 */
1932 	sd_destroypkt_for_buf,		/* Index: 6 */
1933 	sd_destroypkt_for_buf,		/* Index: 7 */
1934 	sd_destroypkt_for_buf,		/* Index: 8 */
1935 
1936 	/* Chain for buf IO for removable-media targets (PM disabled) */
1937 	sd_destroypkt_for_buf,		/* Index: 9 */
1938 	sd_destroypkt_for_buf,		/* Index: 10 */
1939 	sd_destroypkt_for_buf,		/* Index: 11 */
1940 
1941 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1942 	sd_destroypkt_for_buf,		/* Index: 12 */
1943 	sd_destroypkt_for_buf,		/* Index: 13 */
1944 	sd_destroypkt_for_buf,		/* Index: 14 */
1945 	sd_destroypkt_for_buf,		/* Index: 15 */
1946 
1947 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1948 	sd_destroypkt_for_buf,		/* Index: 16 */
1949 	sd_destroypkt_for_buf,		/* Index: 17 */
1950 	sd_destroypkt_for_buf,		/* Index: 18 */
1951 
1952 	/* Chain for USCSI commands (non-checksum targets) */
1953 	sd_destroypkt_for_uscsi,	/* Index: 19 */
1954 	sd_destroypkt_for_uscsi,	/* Index: 20 */
1955 
1956 	/* Chain for USCSI commands (checksum targets) */
1957 	sd_destroypkt_for_uscsi,	/* Index: 21 */
1958 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1959 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1960 
1961 	/* Chain for "direct" USCSI commands (all targets) */
1962 	sd_destroypkt_for_uscsi,	/* Index: 24 */
1963 
1964 	/* Chain for "direct priority" USCSI commands (all targets) */
1965 	sd_destroypkt_for_uscsi,	/* Index: 25 */
1966 
1967 };
1968 
1969 
1970 
1971 /*
1972  * Array to map a layering chain index to the appropriate chain "type".
1973  * The chain type indicates a specific property/usage of the chain.
1974  * The redundant entries are present so that the index used for accessing
1975  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1976  * with this table as well.
1977  */
1978 
1979 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
1980 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
1981 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
1982 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
1983 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
1984 						/* (for error recovery) */
1985 
1986 static int sd_chain_type_map[] = {
1987 
1988 	/* Chain for buf IO for disk drive targets (PM enabled) */
1989 	SD_CHAIN_BUFIO,			/* Index: 0 */
1990 	SD_CHAIN_BUFIO,			/* Index: 1 */
1991 	SD_CHAIN_BUFIO,			/* Index: 2 */
1992 
1993 	/* Chain for buf IO for disk drive targets (PM disabled) */
1994 	SD_CHAIN_BUFIO,			/* Index: 3 */
1995 	SD_CHAIN_BUFIO,			/* Index: 4 */
1996 
1997 	/* Chain for buf IO for removable-media targets (PM enabled) */
1998 	SD_CHAIN_BUFIO,			/* Index: 5 */
1999 	SD_CHAIN_BUFIO,			/* Index: 6 */
2000 	SD_CHAIN_BUFIO,			/* Index: 7 */
2001 	SD_CHAIN_BUFIO,			/* Index: 8 */
2002 
2003 	/* Chain for buf IO for removable-media targets (PM disabled) */
2004 	SD_CHAIN_BUFIO,			/* Index: 9 */
2005 	SD_CHAIN_BUFIO,			/* Index: 10 */
2006 	SD_CHAIN_BUFIO,			/* Index: 11 */
2007 
2008 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2009 	SD_CHAIN_BUFIO,			/* Index: 12 */
2010 	SD_CHAIN_BUFIO,			/* Index: 13 */
2011 	SD_CHAIN_BUFIO,			/* Index: 14 */
2012 	SD_CHAIN_BUFIO,			/* Index: 15 */
2013 
2014 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2015 	SD_CHAIN_BUFIO,			/* Index: 16 */
2016 	SD_CHAIN_BUFIO,			/* Index: 17 */
2017 	SD_CHAIN_BUFIO,			/* Index: 18 */
2018 
2019 	/* Chain for USCSI commands (non-checksum targets) */
2020 	SD_CHAIN_USCSI,			/* Index: 19 */
2021 	SD_CHAIN_USCSI,			/* Index: 20 */
2022 
2023 	/* Chain for USCSI commands (checksum targets) */
2024 	SD_CHAIN_USCSI,			/* Index: 21 */
2025 	SD_CHAIN_USCSI,			/* Index: 22 */
2026 	SD_CHAIN_USCSI,			/* Index: 22 */
2027 
2028 	/* Chain for "direct" USCSI commands (all targets) */
2029 	SD_CHAIN_DIRECT,		/* Index: 24 */
2030 
2031 	/* Chain for "direct priority" USCSI commands (all targets) */
2032 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2033 };
2034 
2035 
2036 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2037 #define	SD_IS_BUFIO(xp)			\
2038 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2039 
2040 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2041 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2042 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2043 
2044 
2045 
2046 /*
2047  * Struct, array, and macros to map a specific chain to the appropriate
2048  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2049  *
2050  * The sd_chain_index_map[] array is used at attach time to set the various
2051  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2052  * chain to be used with the instance. This allows different instances to use
2053  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2054  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2055  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2056  * dynamically & without the use of locking; and (2) a layer may update the
2057  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2058  * to allow for deferred processing of an IO within the same chain from a
2059  * different execution context.
2060  */
2061 
2062 struct sd_chain_index {
2063 	int	sci_iostart_index;
2064 	int	sci_iodone_index;
2065 };
2066 
2067 static struct sd_chain_index	sd_chain_index_map[] = {
2068 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2069 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2070 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2071 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2072 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2073 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2074 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2075 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2076 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2077 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2078 };
2079 
2080 
2081 /*
2082  * The following are indexes into the sd_chain_index_map[] array.
2083  */
2084 
2085 /* un->un_buf_chain_type must be set to one of these */
2086 #define	SD_CHAIN_INFO_DISK		0
2087 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2088 #define	SD_CHAIN_INFO_RMMEDIA		2
2089 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2090 #define	SD_CHAIN_INFO_CHKSUM		4
2091 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2092 
2093 /* un->un_uscsi_chain_type must be set to one of these */
2094 #define	SD_CHAIN_INFO_USCSI_CMD		6
2095 /* USCSI with PM disabled is the same as DIRECT */
2096 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2097 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2098 
2099 /* un->un_direct_chain_type must be set to one of these */
2100 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2101 
2102 /* un->un_priority_chain_type must be set to one of these */
2103 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2104 
2105 /* size for devid inquiries */
2106 #define	MAX_INQUIRY_SIZE		0xF0
2107 
2108 /*
2109  * Macros used by functions to pass a given buf(9S) struct along to the
2110  * next function in the layering chain for further processing.
2111  *
2112  * In the following macros, passing more than three arguments to the called
2113  * routines causes the optimizer for the SPARC compiler to stop doing tail
2114  * call elimination which results in significant performance degradation.
2115  */
2116 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2117 	((*(sd_iostart_chain[index]))(index, un, bp))
2118 
2119 #define	SD_BEGIN_IODONE(index, un, bp)	\
2120 	((*(sd_iodone_chain[index]))(index, un, bp))
2121 
2122 #define	SD_NEXT_IOSTART(index, un, bp)				\
2123 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2124 
2125 #define	SD_NEXT_IODONE(index, un, bp)				\
2126 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2127 
2128 /*
2129  *    Function: _init
2130  *
2131  * Description: This is the driver _init(9E) entry point.
2132  *
2133  * Return Code: Returns the value from mod_install(9F) or
2134  *		ddi_soft_state_init(9F) as appropriate.
2135  *
2136  *     Context: Called when driver module loaded.
2137  */
2138 
2139 int
2140 _init(void)
2141 {
2142 	int	err;
2143 
2144 	/* establish driver name from module name */
2145 	sd_label = mod_modname(&modlinkage);
2146 
2147 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2148 	    SD_MAXUNIT);
2149 
2150 	if (err != 0) {
2151 		return (err);
2152 	}
2153 
2154 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2155 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2156 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2157 
2158 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2159 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2160 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2161 
2162 	/*
2163 	 * it's ok to init here even for fibre device
2164 	 */
2165 	sd_scsi_probe_cache_init();
2166 
2167 	sd_scsi_target_lun_init();
2168 
2169 	/*
2170 	 * Creating taskq before mod_install ensures that all callers (threads)
2171 	 * that enter the module after a successfull mod_install encounter
2172 	 * a valid taskq.
2173 	 */
2174 	sd_taskq_create();
2175 
2176 	err = mod_install(&modlinkage);
2177 	if (err != 0) {
2178 		/* delete taskq if install fails */
2179 		sd_taskq_delete();
2180 
2181 		mutex_destroy(&sd_detach_mutex);
2182 		mutex_destroy(&sd_log_mutex);
2183 		mutex_destroy(&sd_label_mutex);
2184 
2185 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2186 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2187 		cv_destroy(&sd_tr.srq_inprocess_cv);
2188 
2189 		sd_scsi_probe_cache_fini();
2190 
2191 		sd_scsi_target_lun_fini();
2192 
2193 		ddi_soft_state_fini(&sd_state);
2194 		return (err);
2195 	}
2196 
2197 	return (err);
2198 }
2199 
2200 
2201 /*
2202  *    Function: _fini
2203  *
2204  * Description: This is the driver _fini(9E) entry point.
2205  *
2206  * Return Code: Returns the value from mod_remove(9F)
2207  *
2208  *     Context: Called when driver module is unloaded.
2209  */
2210 
2211 int
2212 _fini(void)
2213 {
2214 	int err;
2215 
2216 	if ((err = mod_remove(&modlinkage)) != 0) {
2217 		return (err);
2218 	}
2219 
2220 	sd_taskq_delete();
2221 
2222 	mutex_destroy(&sd_detach_mutex);
2223 	mutex_destroy(&sd_log_mutex);
2224 	mutex_destroy(&sd_label_mutex);
2225 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2226 
2227 	sd_scsi_probe_cache_fini();
2228 
2229 	sd_scsi_target_lun_fini();
2230 
2231 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2232 	cv_destroy(&sd_tr.srq_inprocess_cv);
2233 
2234 	ddi_soft_state_fini(&sd_state);
2235 
2236 	return (err);
2237 }
2238 
2239 
2240 /*
2241  *    Function: _info
2242  *
2243  * Description: This is the driver _info(9E) entry point.
2244  *
2245  *   Arguments: modinfop - pointer to the driver modinfo structure
2246  *
2247  * Return Code: Returns the value from mod_info(9F).
2248  *
2249  *     Context: Kernel thread context
2250  */
2251 
2252 int
2253 _info(struct modinfo *modinfop)
2254 {
2255 	return (mod_info(&modlinkage, modinfop));
2256 }
2257 
2258 
2259 /*
2260  * The following routines implement the driver message logging facility.
2261  * They provide component- and level- based debug output filtering.
2262  * Output may also be restricted to messages for a single instance by
2263  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2264  * to NULL, then messages for all instances are printed.
2265  *
2266  * These routines have been cloned from each other due to the language
2267  * constraints of macros and variable argument list processing.
2268  */
2269 
2270 
2271 /*
2272  *    Function: sd_log_err
2273  *
2274  * Description: This routine is called by the SD_ERROR macro for debug
2275  *		logging of error conditions.
2276  *
2277  *   Arguments: comp - driver component being logged
2278  *		dev  - pointer to driver info structure
2279  *		fmt  - error string and format to be logged
2280  */
2281 
2282 static void
2283 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2284 {
2285 	va_list		ap;
2286 	dev_info_t	*dev;
2287 
2288 	ASSERT(un != NULL);
2289 	dev = SD_DEVINFO(un);
2290 	ASSERT(dev != NULL);
2291 
2292 	/*
2293 	 * Filter messages based on the global component and level masks.
2294 	 * Also print if un matches the value of sd_debug_un, or if
2295 	 * sd_debug_un is set to NULL.
2296 	 */
2297 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2298 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2299 		mutex_enter(&sd_log_mutex);
2300 		va_start(ap, fmt);
2301 		(void) vsprintf(sd_log_buf, fmt, ap);
2302 		va_end(ap);
2303 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2304 		mutex_exit(&sd_log_mutex);
2305 	}
2306 #ifdef SD_FAULT_INJECTION
2307 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2308 	if (un->sd_injection_mask & comp) {
2309 		mutex_enter(&sd_log_mutex);
2310 		va_start(ap, fmt);
2311 		(void) vsprintf(sd_log_buf, fmt, ap);
2312 		va_end(ap);
2313 		sd_injection_log(sd_log_buf, un);
2314 		mutex_exit(&sd_log_mutex);
2315 	}
2316 #endif
2317 }
2318 
2319 
2320 /*
2321  *    Function: sd_log_info
2322  *
2323  * Description: This routine is called by the SD_INFO macro for debug
2324  *		logging of general purpose informational conditions.
2325  *
2326  *   Arguments: comp - driver component being logged
2327  *		dev  - pointer to driver info structure
2328  *		fmt  - info string and format to be logged
2329  */
2330 
2331 static void
2332 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2333 {
2334 	va_list		ap;
2335 	dev_info_t	*dev;
2336 
2337 	ASSERT(un != NULL);
2338 	dev = SD_DEVINFO(un);
2339 	ASSERT(dev != NULL);
2340 
2341 	/*
2342 	 * Filter messages based on the global component and level masks.
2343 	 * Also print if un matches the value of sd_debug_un, or if
2344 	 * sd_debug_un is set to NULL.
2345 	 */
2346 	if ((sd_component_mask & component) &&
2347 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2348 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2349 		mutex_enter(&sd_log_mutex);
2350 		va_start(ap, fmt);
2351 		(void) vsprintf(sd_log_buf, fmt, ap);
2352 		va_end(ap);
2353 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2354 		mutex_exit(&sd_log_mutex);
2355 	}
2356 #ifdef SD_FAULT_INJECTION
2357 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2358 	if (un->sd_injection_mask & component) {
2359 		mutex_enter(&sd_log_mutex);
2360 		va_start(ap, fmt);
2361 		(void) vsprintf(sd_log_buf, fmt, ap);
2362 		va_end(ap);
2363 		sd_injection_log(sd_log_buf, un);
2364 		mutex_exit(&sd_log_mutex);
2365 	}
2366 #endif
2367 }
2368 
2369 
2370 /*
2371  *    Function: sd_log_trace
2372  *
2373  * Description: This routine is called by the SD_TRACE macro for debug
2374  *		logging of trace conditions (i.e. function entry/exit).
2375  *
2376  *   Arguments: comp - driver component being logged
2377  *		dev  - pointer to driver info structure
2378  *		fmt  - trace string and format to be logged
2379  */
2380 
2381 static void
2382 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2383 {
2384 	va_list		ap;
2385 	dev_info_t	*dev;
2386 
2387 	ASSERT(un != NULL);
2388 	dev = SD_DEVINFO(un);
2389 	ASSERT(dev != NULL);
2390 
2391 	/*
2392 	 * Filter messages based on the global component and level masks.
2393 	 * Also print if un matches the value of sd_debug_un, or if
2394 	 * sd_debug_un is set to NULL.
2395 	 */
2396 	if ((sd_component_mask & component) &&
2397 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2398 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2399 		mutex_enter(&sd_log_mutex);
2400 		va_start(ap, fmt);
2401 		(void) vsprintf(sd_log_buf, fmt, ap);
2402 		va_end(ap);
2403 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2404 		mutex_exit(&sd_log_mutex);
2405 	}
2406 #ifdef SD_FAULT_INJECTION
2407 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2408 	if (un->sd_injection_mask & component) {
2409 		mutex_enter(&sd_log_mutex);
2410 		va_start(ap, fmt);
2411 		(void) vsprintf(sd_log_buf, fmt, ap);
2412 		va_end(ap);
2413 		sd_injection_log(sd_log_buf, un);
2414 		mutex_exit(&sd_log_mutex);
2415 	}
2416 #endif
2417 }
2418 
2419 
2420 /*
2421  *    Function: sdprobe
2422  *
2423  * Description: This is the driver probe(9e) entry point function.
2424  *
2425  *   Arguments: devi - opaque device info handle
2426  *
2427  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2428  *              DDI_PROBE_FAILURE: If the probe failed.
2429  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2430  *				   but may be present in the future.
2431  */
2432 
2433 static int
2434 sdprobe(dev_info_t *devi)
2435 {
2436 	struct scsi_device	*devp;
2437 	int			rval;
2438 	int			instance;
2439 
2440 	/*
2441 	 * if it wasn't for pln, sdprobe could actually be nulldev
2442 	 * in the "__fibre" case.
2443 	 */
2444 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2445 		return (DDI_PROBE_DONTCARE);
2446 	}
2447 
2448 	devp = ddi_get_driver_private(devi);
2449 
2450 	if (devp == NULL) {
2451 		/* Ooops... nexus driver is mis-configured... */
2452 		return (DDI_PROBE_FAILURE);
2453 	}
2454 
2455 	instance = ddi_get_instance(devi);
2456 
2457 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2458 		return (DDI_PROBE_PARTIAL);
2459 	}
2460 
2461 	/*
2462 	 * Call the SCSA utility probe routine to see if we actually
2463 	 * have a target at this SCSI nexus.
2464 	 */
2465 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2466 	case SCSIPROBE_EXISTS:
2467 		switch (devp->sd_inq->inq_dtype) {
2468 		case DTYPE_DIRECT:
2469 			rval = DDI_PROBE_SUCCESS;
2470 			break;
2471 		case DTYPE_RODIRECT:
2472 			/* CDs etc. Can be removable media */
2473 			rval = DDI_PROBE_SUCCESS;
2474 			break;
2475 		case DTYPE_OPTICAL:
2476 			/*
2477 			 * Rewritable optical driver HP115AA
2478 			 * Can also be removable media
2479 			 */
2480 
2481 			/*
2482 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2483 			 * pre solaris 9 sparc sd behavior is required
2484 			 *
2485 			 * If first time through and sd_dtype_optical_bind
2486 			 * has not been set in /etc/system check properties
2487 			 */
2488 
2489 			if (sd_dtype_optical_bind  < 0) {
2490 				sd_dtype_optical_bind = ddi_prop_get_int
2491 				    (DDI_DEV_T_ANY, devi, 0,
2492 				    "optical-device-bind", 1);
2493 			}
2494 
2495 			if (sd_dtype_optical_bind == 0) {
2496 				rval = DDI_PROBE_FAILURE;
2497 			} else {
2498 				rval = DDI_PROBE_SUCCESS;
2499 			}
2500 			break;
2501 
2502 		case DTYPE_NOTPRESENT:
2503 		default:
2504 			rval = DDI_PROBE_FAILURE;
2505 			break;
2506 		}
2507 		break;
2508 	default:
2509 		rval = DDI_PROBE_PARTIAL;
2510 		break;
2511 	}
2512 
2513 	/*
2514 	 * This routine checks for resource allocation prior to freeing,
2515 	 * so it will take care of the "smart probing" case where a
2516 	 * scsi_probe() may or may not have been issued and will *not*
2517 	 * free previously-freed resources.
2518 	 */
2519 	scsi_unprobe(devp);
2520 	return (rval);
2521 }
2522 
2523 
2524 /*
2525  *    Function: sdinfo
2526  *
2527  * Description: This is the driver getinfo(9e) entry point function.
2528  * 		Given the device number, return the devinfo pointer from
2529  *		the scsi_device structure or the instance number
2530  *		associated with the dev_t.
2531  *
2532  *   Arguments: dip     - pointer to device info structure
2533  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2534  *			  DDI_INFO_DEVT2INSTANCE)
2535  *		arg     - driver dev_t
2536  *		resultp - user buffer for request response
2537  *
2538  * Return Code: DDI_SUCCESS
2539  *              DDI_FAILURE
2540  */
2541 /* ARGSUSED */
2542 static int
2543 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2544 {
2545 	struct sd_lun	*un;
2546 	dev_t		dev;
2547 	int		instance;
2548 	int		error;
2549 
2550 	switch (infocmd) {
2551 	case DDI_INFO_DEVT2DEVINFO:
2552 		dev = (dev_t)arg;
2553 		instance = SDUNIT(dev);
2554 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2555 			return (DDI_FAILURE);
2556 		}
2557 		*result = (void *) SD_DEVINFO(un);
2558 		error = DDI_SUCCESS;
2559 		break;
2560 	case DDI_INFO_DEVT2INSTANCE:
2561 		dev = (dev_t)arg;
2562 		instance = SDUNIT(dev);
2563 		*result = (void *)(uintptr_t)instance;
2564 		error = DDI_SUCCESS;
2565 		break;
2566 	default:
2567 		error = DDI_FAILURE;
2568 	}
2569 	return (error);
2570 }
2571 
2572 /*
2573  *    Function: sd_prop_op
2574  *
2575  * Description: This is the driver prop_op(9e) entry point function.
2576  *		Return the number of blocks for the partition in question
2577  *		or forward the request to the property facilities.
2578  *
2579  *   Arguments: dev       - device number
2580  *		dip       - pointer to device info structure
2581  *		prop_op   - property operator
2582  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2583  *		name      - pointer to property name
2584  *		valuep    - pointer or address of the user buffer
2585  *		lengthp   - property length
2586  *
2587  * Return Code: DDI_PROP_SUCCESS
2588  *              DDI_PROP_NOT_FOUND
2589  *              DDI_PROP_UNDEFINED
2590  *              DDI_PROP_NO_MEMORY
2591  *              DDI_PROP_BUF_TOO_SMALL
2592  */
2593 
2594 static int
2595 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2596 	char *name, caddr_t valuep, int *lengthp)
2597 {
2598 	int		instance = ddi_get_instance(dip);
2599 	struct sd_lun	*un;
2600 	uint64_t	nblocks64;
2601 	uint_t		dblk;
2602 
2603 	/*
2604 	 * Our dynamic properties are all device specific and size oriented.
2605 	 * Requests issued under conditions where size is valid are passed
2606 	 * to ddi_prop_op_nblocks with the size information, otherwise the
2607 	 * request is passed to ddi_prop_op. Size depends on valid geometry.
2608 	 */
2609 	un = ddi_get_soft_state(sd_state, instance);
2610 	if ((dev == DDI_DEV_T_ANY) || (un == NULL)) {
2611 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2612 		    name, valuep, lengthp));
2613 	} else if (!SD_IS_VALID_LABEL(un)) {
2614 		return (ddi_prop_op(dev, dip, prop_op, mod_flags, name,
2615 		    valuep, lengthp));
2616 	}
2617 
2618 	/* get nblocks value */
2619 	ASSERT(!mutex_owned(SD_MUTEX(un)));
2620 
2621 	(void) cmlb_partinfo(un->un_cmlbhandle, SDPART(dev),
2622 	    (diskaddr_t *)&nblocks64, NULL, NULL, NULL, (void *)SD_PATH_DIRECT);
2623 
2624 	/* report size in target size blocks */
2625 	dblk = un->un_tgt_blocksize / un->un_sys_blocksize;
2626 	return (ddi_prop_op_nblocks_blksize(dev, dip, prop_op, mod_flags,
2627 	    name, valuep, lengthp, nblocks64 / dblk, un->un_tgt_blocksize));
2628 }
2629 
2630 /*
2631  * The following functions are for smart probing:
2632  * sd_scsi_probe_cache_init()
2633  * sd_scsi_probe_cache_fini()
2634  * sd_scsi_clear_probe_cache()
2635  * sd_scsi_probe_with_cache()
2636  */
2637 
2638 /*
2639  *    Function: sd_scsi_probe_cache_init
2640  *
2641  * Description: Initializes the probe response cache mutex and head pointer.
2642  *
2643  *     Context: Kernel thread context
2644  */
2645 
2646 static void
2647 sd_scsi_probe_cache_init(void)
2648 {
2649 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2650 	sd_scsi_probe_cache_head = NULL;
2651 }
2652 
2653 
2654 /*
2655  *    Function: sd_scsi_probe_cache_fini
2656  *
2657  * Description: Frees all resources associated with the probe response cache.
2658  *
2659  *     Context: Kernel thread context
2660  */
2661 
2662 static void
2663 sd_scsi_probe_cache_fini(void)
2664 {
2665 	struct sd_scsi_probe_cache *cp;
2666 	struct sd_scsi_probe_cache *ncp;
2667 
2668 	/* Clean up our smart probing linked list */
2669 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2670 		ncp = cp->next;
2671 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2672 	}
2673 	sd_scsi_probe_cache_head = NULL;
2674 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2675 }
2676 
2677 
2678 /*
2679  *    Function: sd_scsi_clear_probe_cache
2680  *
2681  * Description: This routine clears the probe response cache. This is
2682  *		done when open() returns ENXIO so that when deferred
2683  *		attach is attempted (possibly after a device has been
2684  *		turned on) we will retry the probe. Since we don't know
2685  *		which target we failed to open, we just clear the
2686  *		entire cache.
2687  *
2688  *     Context: Kernel thread context
2689  */
2690 
2691 static void
2692 sd_scsi_clear_probe_cache(void)
2693 {
2694 	struct sd_scsi_probe_cache	*cp;
2695 	int				i;
2696 
2697 	mutex_enter(&sd_scsi_probe_cache_mutex);
2698 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2699 		/*
2700 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2701 		 * force probing to be performed the next time
2702 		 * sd_scsi_probe_with_cache is called.
2703 		 */
2704 		for (i = 0; i < NTARGETS_WIDE; i++) {
2705 			cp->cache[i] = SCSIPROBE_EXISTS;
2706 		}
2707 	}
2708 	mutex_exit(&sd_scsi_probe_cache_mutex);
2709 }
2710 
2711 
2712 /*
2713  *    Function: sd_scsi_probe_with_cache
2714  *
2715  * Description: This routine implements support for a scsi device probe
2716  *		with cache. The driver maintains a cache of the target
2717  *		responses to scsi probes. If we get no response from a
2718  *		target during a probe inquiry, we remember that, and we
2719  *		avoid additional calls to scsi_probe on non-zero LUNs
2720  *		on the same target until the cache is cleared. By doing
2721  *		so we avoid the 1/4 sec selection timeout for nonzero
2722  *		LUNs. lun0 of a target is always probed.
2723  *
2724  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2725  *              waitfunc - indicates what the allocator routines should
2726  *			   do when resources are not available. This value
2727  *			   is passed on to scsi_probe() when that routine
2728  *			   is called.
2729  *
2730  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2731  *		otherwise the value returned by scsi_probe(9F).
2732  *
2733  *     Context: Kernel thread context
2734  */
2735 
2736 static int
2737 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2738 {
2739 	struct sd_scsi_probe_cache	*cp;
2740 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2741 	int		lun, tgt;
2742 
2743 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2744 	    SCSI_ADDR_PROP_LUN, 0);
2745 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2746 	    SCSI_ADDR_PROP_TARGET, -1);
2747 
2748 	/* Make sure caching enabled and target in range */
2749 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2750 		/* do it the old way (no cache) */
2751 		return (scsi_probe(devp, waitfn));
2752 	}
2753 
2754 	mutex_enter(&sd_scsi_probe_cache_mutex);
2755 
2756 	/* Find the cache for this scsi bus instance */
2757 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2758 		if (cp->pdip == pdip) {
2759 			break;
2760 		}
2761 	}
2762 
2763 	/* If we can't find a cache for this pdip, create one */
2764 	if (cp == NULL) {
2765 		int i;
2766 
2767 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2768 		    KM_SLEEP);
2769 		cp->pdip = pdip;
2770 		cp->next = sd_scsi_probe_cache_head;
2771 		sd_scsi_probe_cache_head = cp;
2772 		for (i = 0; i < NTARGETS_WIDE; i++) {
2773 			cp->cache[i] = SCSIPROBE_EXISTS;
2774 		}
2775 	}
2776 
2777 	mutex_exit(&sd_scsi_probe_cache_mutex);
2778 
2779 	/* Recompute the cache for this target if LUN zero */
2780 	if (lun == 0) {
2781 		cp->cache[tgt] = SCSIPROBE_EXISTS;
2782 	}
2783 
2784 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2785 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2786 		return (SCSIPROBE_NORESP);
2787 	}
2788 
2789 	/* Do the actual probe; save & return the result */
2790 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2791 }
2792 
2793 
2794 /*
2795  *    Function: sd_scsi_target_lun_init
2796  *
2797  * Description: Initializes the attached lun chain mutex and head pointer.
2798  *
2799  *     Context: Kernel thread context
2800  */
2801 
2802 static void
2803 sd_scsi_target_lun_init(void)
2804 {
2805 	mutex_init(&sd_scsi_target_lun_mutex, NULL, MUTEX_DRIVER, NULL);
2806 	sd_scsi_target_lun_head = NULL;
2807 }
2808 
2809 
2810 /*
2811  *    Function: sd_scsi_target_lun_fini
2812  *
2813  * Description: Frees all resources associated with the attached lun
2814  *              chain
2815  *
2816  *     Context: Kernel thread context
2817  */
2818 
2819 static void
2820 sd_scsi_target_lun_fini(void)
2821 {
2822 	struct sd_scsi_hba_tgt_lun	*cp;
2823 	struct sd_scsi_hba_tgt_lun	*ncp;
2824 
2825 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = ncp) {
2826 		ncp = cp->next;
2827 		kmem_free(cp, sizeof (struct sd_scsi_hba_tgt_lun));
2828 	}
2829 	sd_scsi_target_lun_head = NULL;
2830 	mutex_destroy(&sd_scsi_target_lun_mutex);
2831 }
2832 
2833 
2834 /*
2835  *    Function: sd_scsi_get_target_lun_count
2836  *
2837  * Description: This routine will check in the attached lun chain to see
2838  * 		how many luns are attached on the required SCSI controller
2839  * 		and target. Currently, some capabilities like tagged queue
2840  *		are supported per target based by HBA. So all luns in a
2841  *		target have the same capabilities. Based on this assumption,
2842  * 		sd should only set these capabilities once per target. This
2843  *		function is called when sd needs to decide how many luns
2844  *		already attached on a target.
2845  *
2846  *   Arguments: dip	- Pointer to the system's dev_info_t for the SCSI
2847  *			  controller device.
2848  *              target	- The target ID on the controller's SCSI bus.
2849  *
2850  * Return Code: The number of luns attached on the required target and
2851  *		controller.
2852  *		-1 if target ID is not in parallel SCSI scope or the given
2853  * 		dip is not in the chain.
2854  *
2855  *     Context: Kernel thread context
2856  */
2857 
2858 static int
2859 sd_scsi_get_target_lun_count(dev_info_t *dip, int target)
2860 {
2861 	struct sd_scsi_hba_tgt_lun	*cp;
2862 
2863 	if ((target < 0) || (target >= NTARGETS_WIDE)) {
2864 		return (-1);
2865 	}
2866 
2867 	mutex_enter(&sd_scsi_target_lun_mutex);
2868 
2869 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2870 		if (cp->pdip == dip) {
2871 			break;
2872 		}
2873 	}
2874 
2875 	mutex_exit(&sd_scsi_target_lun_mutex);
2876 
2877 	if (cp == NULL) {
2878 		return (-1);
2879 	}
2880 
2881 	return (cp->nlun[target]);
2882 }
2883 
2884 
2885 /*
2886  *    Function: sd_scsi_update_lun_on_target
2887  *
2888  * Description: This routine is used to update the attached lun chain when a
2889  *		lun is attached or detached on a target.
2890  *
2891  *   Arguments: dip     - Pointer to the system's dev_info_t for the SCSI
2892  *                        controller device.
2893  *              target  - The target ID on the controller's SCSI bus.
2894  *		flag	- Indicate the lun is attached or detached.
2895  *
2896  *     Context: Kernel thread context
2897  */
2898 
2899 static void
2900 sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag)
2901 {
2902 	struct sd_scsi_hba_tgt_lun	*cp;
2903 
2904 	mutex_enter(&sd_scsi_target_lun_mutex);
2905 
2906 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2907 		if (cp->pdip == dip) {
2908 			break;
2909 		}
2910 	}
2911 
2912 	if ((cp == NULL) && (flag == SD_SCSI_LUN_ATTACH)) {
2913 		cp = kmem_zalloc(sizeof (struct sd_scsi_hba_tgt_lun),
2914 		    KM_SLEEP);
2915 		cp->pdip = dip;
2916 		cp->next = sd_scsi_target_lun_head;
2917 		sd_scsi_target_lun_head = cp;
2918 	}
2919 
2920 	mutex_exit(&sd_scsi_target_lun_mutex);
2921 
2922 	if (cp != NULL) {
2923 		if (flag == SD_SCSI_LUN_ATTACH) {
2924 			cp->nlun[target] ++;
2925 		} else {
2926 			cp->nlun[target] --;
2927 		}
2928 	}
2929 }
2930 
2931 
2932 /*
2933  *    Function: sd_spin_up_unit
2934  *
2935  * Description: Issues the following commands to spin-up the device:
2936  *		START STOP UNIT, and INQUIRY.
2937  *
2938  *   Arguments: un - driver soft state (unit) structure
2939  *
2940  * Return Code: 0 - success
2941  *		EIO - failure
2942  *		EACCES - reservation conflict
2943  *
2944  *     Context: Kernel thread context
2945  */
2946 
2947 static int
2948 sd_spin_up_unit(struct sd_lun *un)
2949 {
2950 	size_t	resid		= 0;
2951 	int	has_conflict	= FALSE;
2952 	uchar_t *bufaddr;
2953 
2954 	ASSERT(un != NULL);
2955 
2956 	/*
2957 	 * Send a throwaway START UNIT command.
2958 	 *
2959 	 * If we fail on this, we don't care presently what precisely
2960 	 * is wrong.  EMC's arrays will also fail this with a check
2961 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
2962 	 * we don't want to fail the attach because it may become
2963 	 * "active" later.
2964 	 */
2965 	if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START, SD_PATH_DIRECT)
2966 	    == EACCES)
2967 		has_conflict = TRUE;
2968 
2969 	/*
2970 	 * Send another INQUIRY command to the target. This is necessary for
2971 	 * non-removable media direct access devices because their INQUIRY data
2972 	 * may not be fully qualified until they are spun up (perhaps via the
2973 	 * START command above).  Note: This seems to be needed for some
2974 	 * legacy devices only.) The INQUIRY command should succeed even if a
2975 	 * Reservation Conflict is present.
2976 	 */
2977 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
2978 	if (sd_send_scsi_INQUIRY(un, bufaddr, SUN_INQSIZE, 0, 0, &resid) != 0) {
2979 		kmem_free(bufaddr, SUN_INQSIZE);
2980 		return (EIO);
2981 	}
2982 
2983 	/*
2984 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
2985 	 * Note that this routine does not return a failure here even if the
2986 	 * INQUIRY command did not return any data.  This is a legacy behavior.
2987 	 */
2988 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
2989 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
2990 	}
2991 
2992 	kmem_free(bufaddr, SUN_INQSIZE);
2993 
2994 	/* If we hit a reservation conflict above, tell the caller. */
2995 	if (has_conflict == TRUE) {
2996 		return (EACCES);
2997 	}
2998 
2999 	return (0);
3000 }
3001 
3002 #ifdef _LP64
3003 /*
3004  *    Function: sd_enable_descr_sense
3005  *
3006  * Description: This routine attempts to select descriptor sense format
3007  *		using the Control mode page.  Devices that support 64 bit
3008  *		LBAs (for >2TB luns) should also implement descriptor
3009  *		sense data so we will call this function whenever we see
3010  *		a lun larger than 2TB.  If for some reason the device
3011  *		supports 64 bit LBAs but doesn't support descriptor sense
3012  *		presumably the mode select will fail.  Everything will
3013  *		continue to work normally except that we will not get
3014  *		complete sense data for commands that fail with an LBA
3015  *		larger than 32 bits.
3016  *
3017  *   Arguments: un - driver soft state (unit) structure
3018  *
3019  *     Context: Kernel thread context only
3020  */
3021 
3022 static void
3023 sd_enable_descr_sense(struct sd_lun *un)
3024 {
3025 	uchar_t			*header;
3026 	struct mode_control_scsi3 *ctrl_bufp;
3027 	size_t			buflen;
3028 	size_t			bd_len;
3029 
3030 	/*
3031 	 * Read MODE SENSE page 0xA, Control Mode Page
3032 	 */
3033 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
3034 	    sizeof (struct mode_control_scsi3);
3035 	header = kmem_zalloc(buflen, KM_SLEEP);
3036 	if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
3037 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT) != 0) {
3038 		SD_ERROR(SD_LOG_COMMON, un,
3039 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
3040 		goto eds_exit;
3041 	}
3042 
3043 	/*
3044 	 * Determine size of Block Descriptors in order to locate
3045 	 * the mode page data. ATAPI devices return 0, SCSI devices
3046 	 * should return MODE_BLK_DESC_LENGTH.
3047 	 */
3048 	bd_len  = ((struct mode_header *)header)->bdesc_length;
3049 
3050 	/* Clear the mode data length field for MODE SELECT */
3051 	((struct mode_header *)header)->length = 0;
3052 
3053 	ctrl_bufp = (struct mode_control_scsi3 *)
3054 	    (header + MODE_HEADER_LENGTH + bd_len);
3055 
3056 	/*
3057 	 * If the page length is smaller than the expected value,
3058 	 * the target device doesn't support D_SENSE. Bail out here.
3059 	 */
3060 	if (ctrl_bufp->mode_page.length <
3061 	    sizeof (struct mode_control_scsi3) - 2) {
3062 		SD_ERROR(SD_LOG_COMMON, un,
3063 		    "sd_enable_descr_sense: enable D_SENSE failed\n");
3064 		goto eds_exit;
3065 	}
3066 
3067 	/*
3068 	 * Clear PS bit for MODE SELECT
3069 	 */
3070 	ctrl_bufp->mode_page.ps = 0;
3071 
3072 	/*
3073 	 * Set D_SENSE to enable descriptor sense format.
3074 	 */
3075 	ctrl_bufp->d_sense = 1;
3076 
3077 	/*
3078 	 * Use MODE SELECT to commit the change to the D_SENSE bit
3079 	 */
3080 	if (sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
3081 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT) != 0) {
3082 		SD_INFO(SD_LOG_COMMON, un,
3083 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
3084 		goto eds_exit;
3085 	}
3086 
3087 eds_exit:
3088 	kmem_free(header, buflen);
3089 }
3090 
3091 /*
3092  *    Function: sd_reenable_dsense_task
3093  *
3094  * Description: Re-enable descriptor sense after device or bus reset
3095  *
3096  *     Context: Executes in a taskq() thread context
3097  */
3098 static void
3099 sd_reenable_dsense_task(void *arg)
3100 {
3101 	struct	sd_lun	*un = arg;
3102 
3103 	ASSERT(un != NULL);
3104 	sd_enable_descr_sense(un);
3105 }
3106 #endif /* _LP64 */
3107 
3108 /*
3109  *    Function: sd_set_mmc_caps
3110  *
3111  * Description: This routine determines if the device is MMC compliant and if
3112  *		the device supports CDDA via a mode sense of the CDVD
3113  *		capabilities mode page. Also checks if the device is a
3114  *		dvdram writable device.
3115  *
3116  *   Arguments: un - driver soft state (unit) structure
3117  *
3118  *     Context: Kernel thread context only
3119  */
3120 
3121 static void
3122 sd_set_mmc_caps(struct sd_lun *un)
3123 {
3124 	struct mode_header_grp2		*sense_mhp;
3125 	uchar_t				*sense_page;
3126 	caddr_t				buf;
3127 	int				bd_len;
3128 	int				status;
3129 	struct uscsi_cmd		com;
3130 	int				rtn;
3131 	uchar_t				*out_data_rw, *out_data_hd;
3132 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3133 
3134 	ASSERT(un != NULL);
3135 
3136 	/*
3137 	 * The flags which will be set in this function are - mmc compliant,
3138 	 * dvdram writable device, cdda support. Initialize them to FALSE
3139 	 * and if a capability is detected - it will be set to TRUE.
3140 	 */
3141 	un->un_f_mmc_cap = FALSE;
3142 	un->un_f_dvdram_writable_device = FALSE;
3143 	un->un_f_cfg_cdda = FALSE;
3144 
3145 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3146 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3147 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3148 
3149 	if (status != 0) {
3150 		/* command failed; just return */
3151 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3152 		return;
3153 	}
3154 	/*
3155 	 * If the mode sense request for the CDROM CAPABILITIES
3156 	 * page (0x2A) succeeds the device is assumed to be MMC.
3157 	 */
3158 	un->un_f_mmc_cap = TRUE;
3159 
3160 	/* Get to the page data */
3161 	sense_mhp = (struct mode_header_grp2 *)buf;
3162 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3163 	    sense_mhp->bdesc_length_lo;
3164 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3165 		/*
3166 		 * We did not get back the expected block descriptor
3167 		 * length so we cannot determine if the device supports
3168 		 * CDDA. However, we still indicate the device is MMC
3169 		 * according to the successful response to the page
3170 		 * 0x2A mode sense request.
3171 		 */
3172 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3173 		    "sd_set_mmc_caps: Mode Sense returned "
3174 		    "invalid block descriptor length\n");
3175 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3176 		return;
3177 	}
3178 
3179 	/* See if read CDDA is supported */
3180 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3181 	    bd_len);
3182 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3183 
3184 	/* See if writing DVD RAM is supported. */
3185 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3186 	if (un->un_f_dvdram_writable_device == TRUE) {
3187 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3188 		return;
3189 	}
3190 
3191 	/*
3192 	 * If the device presents DVD or CD capabilities in the mode
3193 	 * page, we can return here since a RRD will not have
3194 	 * these capabilities.
3195 	 */
3196 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3197 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3198 		return;
3199 	}
3200 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3201 
3202 	/*
3203 	 * If un->un_f_dvdram_writable_device is still FALSE,
3204 	 * check for a Removable Rigid Disk (RRD).  A RRD
3205 	 * device is identified by the features RANDOM_WRITABLE and
3206 	 * HARDWARE_DEFECT_MANAGEMENT.
3207 	 */
3208 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3209 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3210 
3211 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3212 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3213 	    RANDOM_WRITABLE, SD_PATH_STANDARD);
3214 	if (rtn != 0) {
3215 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3216 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3217 		return;
3218 	}
3219 
3220 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3221 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3222 
3223 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3224 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3225 	    HARDWARE_DEFECT_MANAGEMENT, SD_PATH_STANDARD);
3226 	if (rtn == 0) {
3227 		/*
3228 		 * We have good information, check for random writable
3229 		 * and hardware defect features.
3230 		 */
3231 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3232 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3233 			un->un_f_dvdram_writable_device = TRUE;
3234 		}
3235 	}
3236 
3237 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3238 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3239 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3240 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3241 }
3242 
3243 /*
3244  *    Function: sd_check_for_writable_cd
3245  *
3246  * Description: This routine determines if the media in the device is
3247  *		writable or not. It uses the get configuration command (0x46)
3248  *		to determine if the media is writable
3249  *
3250  *   Arguments: un - driver soft state (unit) structure
3251  *              path_flag - SD_PATH_DIRECT to use the USCSI "direct"
3252  *                           chain and the normal command waitq, or
3253  *                           SD_PATH_DIRECT_PRIORITY to use the USCSI
3254  *                           "direct" chain and bypass the normal command
3255  *                           waitq.
3256  *
3257  *     Context: Never called at interrupt context.
3258  */
3259 
3260 static void
3261 sd_check_for_writable_cd(struct sd_lun *un, int path_flag)
3262 {
3263 	struct uscsi_cmd		com;
3264 	uchar_t				*out_data;
3265 	uchar_t				*rqbuf;
3266 	int				rtn;
3267 	uchar_t				*out_data_rw, *out_data_hd;
3268 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3269 	struct mode_header_grp2		*sense_mhp;
3270 	uchar_t				*sense_page;
3271 	caddr_t				buf;
3272 	int				bd_len;
3273 	int				status;
3274 
3275 	ASSERT(un != NULL);
3276 	ASSERT(mutex_owned(SD_MUTEX(un)));
3277 
3278 	/*
3279 	 * Initialize the writable media to false, if configuration info.
3280 	 * tells us otherwise then only we will set it.
3281 	 */
3282 	un->un_f_mmc_writable_media = FALSE;
3283 	mutex_exit(SD_MUTEX(un));
3284 
3285 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3286 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3287 
3288 	rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf, SENSE_LENGTH,
3289 	    out_data, SD_PROFILE_HEADER_LEN, path_flag);
3290 
3291 	mutex_enter(SD_MUTEX(un));
3292 	if (rtn == 0) {
3293 		/*
3294 		 * We have good information, check for writable DVD.
3295 		 */
3296 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3297 			un->un_f_mmc_writable_media = TRUE;
3298 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3299 			kmem_free(rqbuf, SENSE_LENGTH);
3300 			return;
3301 		}
3302 	}
3303 
3304 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3305 	kmem_free(rqbuf, SENSE_LENGTH);
3306 
3307 	/*
3308 	 * Determine if this is a RRD type device.
3309 	 */
3310 	mutex_exit(SD_MUTEX(un));
3311 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3312 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3313 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, path_flag);
3314 	mutex_enter(SD_MUTEX(un));
3315 	if (status != 0) {
3316 		/* command failed; just return */
3317 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3318 		return;
3319 	}
3320 
3321 	/* Get to the page data */
3322 	sense_mhp = (struct mode_header_grp2 *)buf;
3323 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3324 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3325 		/*
3326 		 * We did not get back the expected block descriptor length so
3327 		 * we cannot check the mode page.
3328 		 */
3329 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3330 		    "sd_check_for_writable_cd: Mode Sense returned "
3331 		    "invalid block descriptor length\n");
3332 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3333 		return;
3334 	}
3335 
3336 	/*
3337 	 * If the device presents DVD or CD capabilities in the mode
3338 	 * page, we can return here since a RRD device will not have
3339 	 * these capabilities.
3340 	 */
3341 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3342 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3343 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3344 		return;
3345 	}
3346 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3347 
3348 	/*
3349 	 * If un->un_f_mmc_writable_media is still FALSE,
3350 	 * check for RRD type media.  A RRD device is identified
3351 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3352 	 */
3353 	mutex_exit(SD_MUTEX(un));
3354 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3355 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3356 
3357 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3358 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3359 	    RANDOM_WRITABLE, path_flag);
3360 	if (rtn != 0) {
3361 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3362 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3363 		mutex_enter(SD_MUTEX(un));
3364 		return;
3365 	}
3366 
3367 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3368 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3369 
3370 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3371 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3372 	    HARDWARE_DEFECT_MANAGEMENT, path_flag);
3373 	mutex_enter(SD_MUTEX(un));
3374 	if (rtn == 0) {
3375 		/*
3376 		 * We have good information, check for random writable
3377 		 * and hardware defect features as current.
3378 		 */
3379 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3380 		    (out_data_rw[10] & 0x1) &&
3381 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3382 		    (out_data_hd[10] & 0x1)) {
3383 			un->un_f_mmc_writable_media = TRUE;
3384 		}
3385 	}
3386 
3387 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3388 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3389 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3390 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3391 }
3392 
3393 /*
3394  *    Function: sd_read_unit_properties
3395  *
3396  * Description: The following implements a property lookup mechanism.
3397  *		Properties for particular disks (keyed on vendor, model
3398  *		and rev numbers) are sought in the sd.conf file via
3399  *		sd_process_sdconf_file(), and if not found there, are
3400  *		looked for in a list hardcoded in this driver via
3401  *		sd_process_sdconf_table() Once located the properties
3402  *		are used to update the driver unit structure.
3403  *
3404  *   Arguments: un - driver soft state (unit) structure
3405  */
3406 
3407 static void
3408 sd_read_unit_properties(struct sd_lun *un)
3409 {
3410 	/*
3411 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3412 	 * the "sd-config-list" property (from the sd.conf file) or if
3413 	 * there was not a match for the inquiry vid/pid. If this event
3414 	 * occurs the static driver configuration table is searched for
3415 	 * a match.
3416 	 */
3417 	ASSERT(un != NULL);
3418 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3419 		sd_process_sdconf_table(un);
3420 	}
3421 
3422 	/* check for LSI device */
3423 	sd_is_lsi(un);
3424 
3425 
3426 }
3427 
3428 
3429 /*
3430  *    Function: sd_process_sdconf_file
3431  *
3432  * Description: Use ddi_getlongprop to obtain the properties from the
3433  *		driver's config file (ie, sd.conf) and update the driver
3434  *		soft state structure accordingly.
3435  *
3436  *   Arguments: un - driver soft state (unit) structure
3437  *
3438  * Return Code: SD_SUCCESS - The properties were successfully set according
3439  *			     to the driver configuration file.
3440  *		SD_FAILURE - The driver config list was not obtained or
3441  *			     there was no vid/pid match. This indicates that
3442  *			     the static config table should be used.
3443  *
3444  * The config file has a property, "sd-config-list", which consists of
3445  * one or more duplets as follows:
3446  *
3447  *  sd-config-list=
3448  *	<duplet>,
3449  *	[<duplet>,]
3450  *	[<duplet>];
3451  *
3452  * The structure of each duplet is as follows:
3453  *
3454  *  <duplet>:= <vid+pid>,<data-property-name_list>
3455  *
3456  * The first entry of the duplet is the device ID string (the concatenated
3457  * vid & pid; not to be confused with a device_id).  This is defined in
3458  * the same way as in the sd_disk_table.
3459  *
3460  * The second part of the duplet is a string that identifies a
3461  * data-property-name-list. The data-property-name-list is defined as
3462  * follows:
3463  *
3464  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3465  *
3466  * The syntax of <data-property-name> depends on the <version> field.
3467  *
3468  * If version = SD_CONF_VERSION_1 we have the following syntax:
3469  *
3470  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3471  *
3472  * where the prop0 value will be used to set prop0 if bit0 set in the
3473  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3474  *
3475  */
3476 
3477 static int
3478 sd_process_sdconf_file(struct sd_lun *un)
3479 {
3480 	char	*config_list = NULL;
3481 	int	config_list_len;
3482 	int	len;
3483 	int	dupletlen = 0;
3484 	char	*vidptr;
3485 	int	vidlen;
3486 	char	*dnlist_ptr;
3487 	char	*dataname_ptr;
3488 	int	dnlist_len;
3489 	int	dataname_len;
3490 	int	*data_list;
3491 	int	data_list_len;
3492 	int	rval = SD_FAILURE;
3493 	int	i;
3494 
3495 	ASSERT(un != NULL);
3496 
3497 	/* Obtain the configuration list associated with the .conf file */
3498 	if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), DDI_PROP_DONTPASS,
3499 	    sd_config_list, (caddr_t)&config_list, &config_list_len)
3500 	    != DDI_PROP_SUCCESS) {
3501 		return (SD_FAILURE);
3502 	}
3503 
3504 	/*
3505 	 * Compare vids in each duplet to the inquiry vid - if a match is
3506 	 * made, get the data value and update the soft state structure
3507 	 * accordingly.
3508 	 *
3509 	 * Note: This algorithm is complex and difficult to maintain. It should
3510 	 * be replaced with a more robust implementation.
3511 	 */
3512 	for (len = config_list_len, vidptr = config_list; len > 0;
3513 	    vidptr += dupletlen, len -= dupletlen) {
3514 		/*
3515 		 * Note: The assumption here is that each vid entry is on
3516 		 * a unique line from its associated duplet.
3517 		 */
3518 		vidlen = dupletlen = (int)strlen(vidptr);
3519 		if ((vidlen == 0) ||
3520 		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3521 			dupletlen++;
3522 			continue;
3523 		}
3524 
3525 		/*
3526 		 * dnlist contains 1 or more blank separated
3527 		 * data-property-name entries
3528 		 */
3529 		dnlist_ptr = vidptr + vidlen + 1;
3530 		dnlist_len = (int)strlen(dnlist_ptr);
3531 		dupletlen += dnlist_len + 2;
3532 
3533 		/*
3534 		 * Set a pointer for the first data-property-name
3535 		 * entry in the list
3536 		 */
3537 		dataname_ptr = dnlist_ptr;
3538 		dataname_len = 0;
3539 
3540 		/*
3541 		 * Loop through all data-property-name entries in the
3542 		 * data-property-name-list setting the properties for each.
3543 		 */
3544 		while (dataname_len < dnlist_len) {
3545 			int version;
3546 
3547 			/*
3548 			 * Determine the length of the current
3549 			 * data-property-name entry by indexing until a
3550 			 * blank or NULL is encountered. When the space is
3551 			 * encountered reset it to a NULL for compliance
3552 			 * with ddi_getlongprop().
3553 			 */
3554 			for (i = 0; ((dataname_ptr[i] != ' ') &&
3555 			    (dataname_ptr[i] != '\0')); i++) {
3556 				;
3557 			}
3558 
3559 			dataname_len += i;
3560 			/* If not null terminated, Make it so */
3561 			if (dataname_ptr[i] == ' ') {
3562 				dataname_ptr[i] = '\0';
3563 			}
3564 			dataname_len++;
3565 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3566 			    "sd_process_sdconf_file: disk:%s, data:%s\n",
3567 			    vidptr, dataname_ptr);
3568 
3569 			/* Get the data list */
3570 			if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), 0,
3571 			    dataname_ptr, (caddr_t)&data_list, &data_list_len)
3572 			    != DDI_PROP_SUCCESS) {
3573 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3574 				    "sd_process_sdconf_file: data property (%s)"
3575 				    " has no value\n", dataname_ptr);
3576 				dataname_ptr = dnlist_ptr + dataname_len;
3577 				continue;
3578 			}
3579 
3580 			version = data_list[0];
3581 
3582 			if (version == SD_CONF_VERSION_1) {
3583 				sd_tunables values;
3584 
3585 				/* Set the properties */
3586 				if (sd_chk_vers1_data(un, data_list[1],
3587 				    &data_list[2], data_list_len, dataname_ptr)
3588 				    == SD_SUCCESS) {
3589 					sd_get_tunables_from_conf(un,
3590 					    data_list[1], &data_list[2],
3591 					    &values);
3592 					sd_set_vers1_properties(un,
3593 					    data_list[1], &values);
3594 					rval = SD_SUCCESS;
3595 				} else {
3596 					rval = SD_FAILURE;
3597 				}
3598 			} else {
3599 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3600 				    "data property %s version 0x%x is invalid.",
3601 				    dataname_ptr, version);
3602 				rval = SD_FAILURE;
3603 			}
3604 			kmem_free(data_list, data_list_len);
3605 			dataname_ptr = dnlist_ptr + dataname_len;
3606 		}
3607 	}
3608 
3609 	/* free up the memory allocated by ddi_getlongprop */
3610 	if (config_list) {
3611 		kmem_free(config_list, config_list_len);
3612 	}
3613 
3614 	return (rval);
3615 }
3616 
3617 /*
3618  *    Function: sd_get_tunables_from_conf()
3619  *
3620  *
3621  *    This function reads the data list from the sd.conf file and pulls
3622  *    the values that can have numeric values as arguments and places
3623  *    the values in the appropriate sd_tunables member.
3624  *    Since the order of the data list members varies across platforms
3625  *    This function reads them from the data list in a platform specific
3626  *    order and places them into the correct sd_tunable member that is
3627  *    consistent across all platforms.
3628  */
3629 static void
3630 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3631     sd_tunables *values)
3632 {
3633 	int i;
3634 	int mask;
3635 
3636 	bzero(values, sizeof (sd_tunables));
3637 
3638 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3639 
3640 		mask = 1 << i;
3641 		if (mask > flags) {
3642 			break;
3643 		}
3644 
3645 		switch (mask & flags) {
3646 		case 0:	/* This mask bit not set in flags */
3647 			continue;
3648 		case SD_CONF_BSET_THROTTLE:
3649 			values->sdt_throttle = data_list[i];
3650 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3651 			    "sd_get_tunables_from_conf: throttle = %d\n",
3652 			    values->sdt_throttle);
3653 			break;
3654 		case SD_CONF_BSET_CTYPE:
3655 			values->sdt_ctype = data_list[i];
3656 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3657 			    "sd_get_tunables_from_conf: ctype = %d\n",
3658 			    values->sdt_ctype);
3659 			break;
3660 		case SD_CONF_BSET_NRR_COUNT:
3661 			values->sdt_not_rdy_retries = data_list[i];
3662 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3663 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
3664 			    values->sdt_not_rdy_retries);
3665 			break;
3666 		case SD_CONF_BSET_BSY_RETRY_COUNT:
3667 			values->sdt_busy_retries = data_list[i];
3668 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3669 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
3670 			    values->sdt_busy_retries);
3671 			break;
3672 		case SD_CONF_BSET_RST_RETRIES:
3673 			values->sdt_reset_retries = data_list[i];
3674 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3675 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
3676 			    values->sdt_reset_retries);
3677 			break;
3678 		case SD_CONF_BSET_RSV_REL_TIME:
3679 			values->sdt_reserv_rel_time = data_list[i];
3680 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3681 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
3682 			    values->sdt_reserv_rel_time);
3683 			break;
3684 		case SD_CONF_BSET_MIN_THROTTLE:
3685 			values->sdt_min_throttle = data_list[i];
3686 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3687 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
3688 			    values->sdt_min_throttle);
3689 			break;
3690 		case SD_CONF_BSET_DISKSORT_DISABLED:
3691 			values->sdt_disk_sort_dis = data_list[i];
3692 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3693 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
3694 			    values->sdt_disk_sort_dis);
3695 			break;
3696 		case SD_CONF_BSET_LUN_RESET_ENABLED:
3697 			values->sdt_lun_reset_enable = data_list[i];
3698 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3699 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
3700 			    "\n", values->sdt_lun_reset_enable);
3701 			break;
3702 		case SD_CONF_BSET_CACHE_IS_NV:
3703 			values->sdt_suppress_cache_flush = data_list[i];
3704 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3705 			    "sd_get_tunables_from_conf: \
3706 			    suppress_cache_flush = %d"
3707 			    "\n", values->sdt_suppress_cache_flush);
3708 			break;
3709 		}
3710 	}
3711 }
3712 
3713 /*
3714  *    Function: sd_process_sdconf_table
3715  *
3716  * Description: Search the static configuration table for a match on the
3717  *		inquiry vid/pid and update the driver soft state structure
3718  *		according to the table property values for the device.
3719  *
3720  *		The form of a configuration table entry is:
3721  *		  <vid+pid>,<flags>,<property-data>
3722  *		  "SEAGATE ST42400N",1,0x40000,
3723  *		  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1;
3724  *
3725  *   Arguments: un - driver soft state (unit) structure
3726  */
3727 
3728 static void
3729 sd_process_sdconf_table(struct sd_lun *un)
3730 {
3731 	char	*id = NULL;
3732 	int	table_index;
3733 	int	idlen;
3734 
3735 	ASSERT(un != NULL);
3736 	for (table_index = 0; table_index < sd_disk_table_size;
3737 	    table_index++) {
3738 		id = sd_disk_table[table_index].device_id;
3739 		idlen = strlen(id);
3740 		if (idlen == 0) {
3741 			continue;
3742 		}
3743 
3744 		/*
3745 		 * The static configuration table currently does not
3746 		 * implement version 10 properties. Additionally,
3747 		 * multiple data-property-name entries are not
3748 		 * implemented in the static configuration table.
3749 		 */
3750 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
3751 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3752 			    "sd_process_sdconf_table: disk %s\n", id);
3753 			sd_set_vers1_properties(un,
3754 			    sd_disk_table[table_index].flags,
3755 			    sd_disk_table[table_index].properties);
3756 			break;
3757 		}
3758 	}
3759 }
3760 
3761 
3762 /*
3763  *    Function: sd_sdconf_id_match
3764  *
3765  * Description: This local function implements a case sensitive vid/pid
3766  *		comparison as well as the boundary cases of wild card and
3767  *		multiple blanks.
3768  *
3769  *		Note: An implicit assumption made here is that the scsi
3770  *		inquiry structure will always keep the vid, pid and
3771  *		revision strings in consecutive sequence, so they can be
3772  *		read as a single string. If this assumption is not the
3773  *		case, a separate string, to be used for the check, needs
3774  *		to be built with these strings concatenated.
3775  *
3776  *   Arguments: un - driver soft state (unit) structure
3777  *		id - table or config file vid/pid
3778  *		idlen  - length of the vid/pid (bytes)
3779  *
3780  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3781  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3782  */
3783 
3784 static int
3785 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
3786 {
3787 	struct scsi_inquiry	*sd_inq;
3788 	int 			rval = SD_SUCCESS;
3789 
3790 	ASSERT(un != NULL);
3791 	sd_inq = un->un_sd->sd_inq;
3792 	ASSERT(id != NULL);
3793 
3794 	/*
3795 	 * We use the inq_vid as a pointer to a buffer containing the
3796 	 * vid and pid and use the entire vid/pid length of the table
3797 	 * entry for the comparison. This works because the inq_pid
3798 	 * data member follows inq_vid in the scsi_inquiry structure.
3799 	 */
3800 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
3801 		/*
3802 		 * The user id string is compared to the inquiry vid/pid
3803 		 * using a case insensitive comparison and ignoring
3804 		 * multiple spaces.
3805 		 */
3806 		rval = sd_blank_cmp(un, id, idlen);
3807 		if (rval != SD_SUCCESS) {
3808 			/*
3809 			 * User id strings that start and end with a "*"
3810 			 * are a special case. These do not have a
3811 			 * specific vendor, and the product string can
3812 			 * appear anywhere in the 16 byte PID portion of
3813 			 * the inquiry data. This is a simple strstr()
3814 			 * type search for the user id in the inquiry data.
3815 			 */
3816 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
3817 				char	*pidptr = &id[1];
3818 				int	i;
3819 				int	j;
3820 				int	pidstrlen = idlen - 2;
3821 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
3822 				    pidstrlen;
3823 
3824 				if (j < 0) {
3825 					return (SD_FAILURE);
3826 				}
3827 				for (i = 0; i < j; i++) {
3828 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
3829 					    pidptr, pidstrlen) == 0) {
3830 						rval = SD_SUCCESS;
3831 						break;
3832 					}
3833 				}
3834 			}
3835 		}
3836 	}
3837 	return (rval);
3838 }
3839 
3840 
3841 /*
3842  *    Function: sd_blank_cmp
3843  *
3844  * Description: If the id string starts and ends with a space, treat
3845  *		multiple consecutive spaces as equivalent to a single
3846  *		space. For example, this causes a sd_disk_table entry
3847  *		of " NEC CDROM " to match a device's id string of
3848  *		"NEC       CDROM".
3849  *
3850  *		Note: The success exit condition for this routine is if
3851  *		the pointer to the table entry is '\0' and the cnt of
3852  *		the inquiry length is zero. This will happen if the inquiry
3853  *		string returned by the device is padded with spaces to be
3854  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
3855  *		SCSI spec states that the inquiry string is to be padded with
3856  *		spaces.
3857  *
3858  *   Arguments: un - driver soft state (unit) structure
3859  *		id - table or config file vid/pid
3860  *		idlen  - length of the vid/pid (bytes)
3861  *
3862  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3863  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3864  */
3865 
3866 static int
3867 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
3868 {
3869 	char		*p1;
3870 	char		*p2;
3871 	int		cnt;
3872 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
3873 	    sizeof (SD_INQUIRY(un)->inq_pid);
3874 
3875 	ASSERT(un != NULL);
3876 	p2 = un->un_sd->sd_inq->inq_vid;
3877 	ASSERT(id != NULL);
3878 	p1 = id;
3879 
3880 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
3881 		/*
3882 		 * Note: string p1 is terminated by a NUL but string p2
3883 		 * isn't.  The end of p2 is determined by cnt.
3884 		 */
3885 		for (;;) {
3886 			/* skip over any extra blanks in both strings */
3887 			while ((*p1 != '\0') && (*p1 == ' ')) {
3888 				p1++;
3889 			}
3890 			while ((cnt != 0) && (*p2 == ' ')) {
3891 				p2++;
3892 				cnt--;
3893 			}
3894 
3895 			/* compare the two strings */
3896 			if ((cnt == 0) ||
3897 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
3898 				break;
3899 			}
3900 			while ((cnt > 0) &&
3901 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
3902 				p1++;
3903 				p2++;
3904 				cnt--;
3905 			}
3906 		}
3907 	}
3908 
3909 	/* return SD_SUCCESS if both strings match */
3910 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
3911 }
3912 
3913 
3914 /*
3915  *    Function: sd_chk_vers1_data
3916  *
3917  * Description: Verify the version 1 device properties provided by the
3918  *		user via the configuration file
3919  *
3920  *   Arguments: un	     - driver soft state (unit) structure
3921  *		flags	     - integer mask indicating properties to be set
3922  *		prop_list    - integer list of property values
3923  *		list_len     - length of user provided data
3924  *
3925  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
3926  *		SD_FAILURE - Indicates the user provided data is invalid
3927  */
3928 
3929 static int
3930 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
3931     int list_len, char *dataname_ptr)
3932 {
3933 	int i;
3934 	int mask = 1;
3935 	int index = 0;
3936 
3937 	ASSERT(un != NULL);
3938 
3939 	/* Check for a NULL property name and list */
3940 	if (dataname_ptr == NULL) {
3941 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3942 		    "sd_chk_vers1_data: NULL data property name.");
3943 		return (SD_FAILURE);
3944 	}
3945 	if (prop_list == NULL) {
3946 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3947 		    "sd_chk_vers1_data: %s NULL data property list.",
3948 		    dataname_ptr);
3949 		return (SD_FAILURE);
3950 	}
3951 
3952 	/* Display a warning if undefined bits are set in the flags */
3953 	if (flags & ~SD_CONF_BIT_MASK) {
3954 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3955 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
3956 		    "Properties not set.",
3957 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
3958 		return (SD_FAILURE);
3959 	}
3960 
3961 	/*
3962 	 * Verify the length of the list by identifying the highest bit set
3963 	 * in the flags and validating that the property list has a length
3964 	 * up to the index of this bit.
3965 	 */
3966 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3967 		if (flags & mask) {
3968 			index++;
3969 		}
3970 		mask = 1 << i;
3971 	}
3972 	if ((list_len / sizeof (int)) < (index + 2)) {
3973 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3974 		    "sd_chk_vers1_data: "
3975 		    "Data property list %s size is incorrect. "
3976 		    "Properties not set.", dataname_ptr);
3977 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
3978 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
3979 		return (SD_FAILURE);
3980 	}
3981 	return (SD_SUCCESS);
3982 }
3983 
3984 
3985 /*
3986  *    Function: sd_set_vers1_properties
3987  *
3988  * Description: Set version 1 device properties based on a property list
3989  *		retrieved from the driver configuration file or static
3990  *		configuration table. Version 1 properties have the format:
3991  *
3992  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3993  *
3994  *		where the prop0 value will be used to set prop0 if bit0
3995  *		is set in the flags
3996  *
3997  *   Arguments: un	     - driver soft state (unit) structure
3998  *		flags	     - integer mask indicating properties to be set
3999  *		prop_list    - integer list of property values
4000  */
4001 
4002 static void
4003 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
4004 {
4005 	ASSERT(un != NULL);
4006 
4007 	/*
4008 	 * Set the flag to indicate cache is to be disabled. An attempt
4009 	 * to disable the cache via sd_cache_control() will be made
4010 	 * later during attach once the basic initialization is complete.
4011 	 */
4012 	if (flags & SD_CONF_BSET_NOCACHE) {
4013 		un->un_f_opt_disable_cache = TRUE;
4014 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4015 		    "sd_set_vers1_properties: caching disabled flag set\n");
4016 	}
4017 
4018 	/* CD-specific configuration parameters */
4019 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
4020 		un->un_f_cfg_playmsf_bcd = TRUE;
4021 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4022 		    "sd_set_vers1_properties: playmsf_bcd set\n");
4023 	}
4024 	if (flags & SD_CONF_BSET_READSUB_BCD) {
4025 		un->un_f_cfg_readsub_bcd = TRUE;
4026 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4027 		    "sd_set_vers1_properties: readsub_bcd set\n");
4028 	}
4029 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
4030 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
4031 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4032 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
4033 	}
4034 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
4035 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
4036 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4037 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
4038 	}
4039 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
4040 		un->un_f_cfg_no_read_header = TRUE;
4041 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4042 		    "sd_set_vers1_properties: no_read_header set\n");
4043 	}
4044 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
4045 		un->un_f_cfg_read_cd_xd4 = TRUE;
4046 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4047 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
4048 	}
4049 
4050 	/* Support for devices which do not have valid/unique serial numbers */
4051 	if (flags & SD_CONF_BSET_FAB_DEVID) {
4052 		un->un_f_opt_fab_devid = TRUE;
4053 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4054 		    "sd_set_vers1_properties: fab_devid bit set\n");
4055 	}
4056 
4057 	/* Support for user throttle configuration */
4058 	if (flags & SD_CONF_BSET_THROTTLE) {
4059 		ASSERT(prop_list != NULL);
4060 		un->un_saved_throttle = un->un_throttle =
4061 		    prop_list->sdt_throttle;
4062 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4063 		    "sd_set_vers1_properties: throttle set to %d\n",
4064 		    prop_list->sdt_throttle);
4065 	}
4066 
4067 	/* Set the per disk retry count according to the conf file or table. */
4068 	if (flags & SD_CONF_BSET_NRR_COUNT) {
4069 		ASSERT(prop_list != NULL);
4070 		if (prop_list->sdt_not_rdy_retries) {
4071 			un->un_notready_retry_count =
4072 			    prop_list->sdt_not_rdy_retries;
4073 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4074 			    "sd_set_vers1_properties: not ready retry count"
4075 			    " set to %d\n", un->un_notready_retry_count);
4076 		}
4077 	}
4078 
4079 	/* The controller type is reported for generic disk driver ioctls */
4080 	if (flags & SD_CONF_BSET_CTYPE) {
4081 		ASSERT(prop_list != NULL);
4082 		switch (prop_list->sdt_ctype) {
4083 		case CTYPE_CDROM:
4084 			un->un_ctype = prop_list->sdt_ctype;
4085 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4086 			    "sd_set_vers1_properties: ctype set to "
4087 			    "CTYPE_CDROM\n");
4088 			break;
4089 		case CTYPE_CCS:
4090 			un->un_ctype = prop_list->sdt_ctype;
4091 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4092 			    "sd_set_vers1_properties: ctype set to "
4093 			    "CTYPE_CCS\n");
4094 			break;
4095 		case CTYPE_ROD:		/* RW optical */
4096 			un->un_ctype = prop_list->sdt_ctype;
4097 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4098 			    "sd_set_vers1_properties: ctype set to "
4099 			    "CTYPE_ROD\n");
4100 			break;
4101 		default:
4102 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4103 			    "sd_set_vers1_properties: Could not set "
4104 			    "invalid ctype value (%d)",
4105 			    prop_list->sdt_ctype);
4106 		}
4107 	}
4108 
4109 	/* Purple failover timeout */
4110 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4111 		ASSERT(prop_list != NULL);
4112 		un->un_busy_retry_count =
4113 		    prop_list->sdt_busy_retries;
4114 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4115 		    "sd_set_vers1_properties: "
4116 		    "busy retry count set to %d\n",
4117 		    un->un_busy_retry_count);
4118 	}
4119 
4120 	/* Purple reset retry count */
4121 	if (flags & SD_CONF_BSET_RST_RETRIES) {
4122 		ASSERT(prop_list != NULL);
4123 		un->un_reset_retry_count =
4124 		    prop_list->sdt_reset_retries;
4125 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4126 		    "sd_set_vers1_properties: "
4127 		    "reset retry count set to %d\n",
4128 		    un->un_reset_retry_count);
4129 	}
4130 
4131 	/* Purple reservation release timeout */
4132 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4133 		ASSERT(prop_list != NULL);
4134 		un->un_reserve_release_time =
4135 		    prop_list->sdt_reserv_rel_time;
4136 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4137 		    "sd_set_vers1_properties: "
4138 		    "reservation release timeout set to %d\n",
4139 		    un->un_reserve_release_time);
4140 	}
4141 
4142 	/*
4143 	 * Driver flag telling the driver to verify that no commands are pending
4144 	 * for a device before issuing a Test Unit Ready. This is a workaround
4145 	 * for a firmware bug in some Seagate eliteI drives.
4146 	 */
4147 	if (flags & SD_CONF_BSET_TUR_CHECK) {
4148 		un->un_f_cfg_tur_check = TRUE;
4149 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4150 		    "sd_set_vers1_properties: tur queue check set\n");
4151 	}
4152 
4153 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4154 		un->un_min_throttle = prop_list->sdt_min_throttle;
4155 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4156 		    "sd_set_vers1_properties: min throttle set to %d\n",
4157 		    un->un_min_throttle);
4158 	}
4159 
4160 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4161 		un->un_f_disksort_disabled =
4162 		    (prop_list->sdt_disk_sort_dis != 0) ?
4163 		    TRUE : FALSE;
4164 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4165 		    "sd_set_vers1_properties: disksort disabled "
4166 		    "flag set to %d\n",
4167 		    prop_list->sdt_disk_sort_dis);
4168 	}
4169 
4170 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4171 		un->un_f_lun_reset_enabled =
4172 		    (prop_list->sdt_lun_reset_enable != 0) ?
4173 		    TRUE : FALSE;
4174 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4175 		    "sd_set_vers1_properties: lun reset enabled "
4176 		    "flag set to %d\n",
4177 		    prop_list->sdt_lun_reset_enable);
4178 	}
4179 
4180 	if (flags & SD_CONF_BSET_CACHE_IS_NV) {
4181 		un->un_f_suppress_cache_flush =
4182 		    (prop_list->sdt_suppress_cache_flush != 0) ?
4183 		    TRUE : FALSE;
4184 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4185 		    "sd_set_vers1_properties: suppress_cache_flush "
4186 		    "flag set to %d\n",
4187 		    prop_list->sdt_suppress_cache_flush);
4188 	}
4189 
4190 	/*
4191 	 * Validate the throttle values.
4192 	 * If any of the numbers are invalid, set everything to defaults.
4193 	 */
4194 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4195 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4196 	    (un->un_min_throttle > un->un_throttle)) {
4197 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4198 		un->un_min_throttle = sd_min_throttle;
4199 	}
4200 }
4201 
4202 /*
4203  *   Function: sd_is_lsi()
4204  *
4205  *   Description: Check for lsi devices, step through the static device
4206  *	table to match vid/pid.
4207  *
4208  *   Args: un - ptr to sd_lun
4209  *
4210  *   Notes:  When creating new LSI property, need to add the new LSI property
4211  *		to this function.
4212  */
4213 static void
4214 sd_is_lsi(struct sd_lun *un)
4215 {
4216 	char	*id = NULL;
4217 	int	table_index;
4218 	int	idlen;
4219 	void	*prop;
4220 
4221 	ASSERT(un != NULL);
4222 	for (table_index = 0; table_index < sd_disk_table_size;
4223 	    table_index++) {
4224 		id = sd_disk_table[table_index].device_id;
4225 		idlen = strlen(id);
4226 		if (idlen == 0) {
4227 			continue;
4228 		}
4229 
4230 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4231 			prop = sd_disk_table[table_index].properties;
4232 			if (prop == &lsi_properties ||
4233 			    prop == &lsi_oem_properties ||
4234 			    prop == &lsi_properties_scsi ||
4235 			    prop == &symbios_properties) {
4236 				un->un_f_cfg_is_lsi = TRUE;
4237 			}
4238 			break;
4239 		}
4240 	}
4241 }
4242 
4243 /*
4244  *    Function: sd_get_physical_geometry
4245  *
4246  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4247  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4248  *		target, and use this information to initialize the physical
4249  *		geometry cache specified by pgeom_p.
4250  *
4251  *		MODE SENSE is an optional command, so failure in this case
4252  *		does not necessarily denote an error. We want to use the
4253  *		MODE SENSE commands to derive the physical geometry of the
4254  *		device, but if either command fails, the logical geometry is
4255  *		used as the fallback for disk label geometry in cmlb.
4256  *
4257  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4258  *		have already been initialized for the current target and
4259  *		that the current values be passed as args so that we don't
4260  *		end up ever trying to use -1 as a valid value. This could
4261  *		happen if either value is reset while we're not holding
4262  *		the mutex.
4263  *
4264  *   Arguments: un - driver soft state (unit) structure
4265  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4266  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4267  *			to use the USCSI "direct" chain and bypass the normal
4268  *			command waitq.
4269  *
4270  *     Context: Kernel thread only (can sleep).
4271  */
4272 
4273 static int
4274 sd_get_physical_geometry(struct sd_lun *un, cmlb_geom_t *pgeom_p,
4275 	diskaddr_t capacity, int lbasize, int path_flag)
4276 {
4277 	struct	mode_format	*page3p;
4278 	struct	mode_geometry	*page4p;
4279 	struct	mode_header	*headerp;
4280 	int	sector_size;
4281 	int	nsect;
4282 	int	nhead;
4283 	int	ncyl;
4284 	int	intrlv;
4285 	int	spc;
4286 	diskaddr_t	modesense_capacity;
4287 	int	rpm;
4288 	int	bd_len;
4289 	int	mode_header_length;
4290 	uchar_t	*p3bufp;
4291 	uchar_t	*p4bufp;
4292 	int	cdbsize;
4293 	int 	ret = EIO;
4294 
4295 	ASSERT(un != NULL);
4296 
4297 	if (lbasize == 0) {
4298 		if (ISCD(un)) {
4299 			lbasize = 2048;
4300 		} else {
4301 			lbasize = un->un_sys_blocksize;
4302 		}
4303 	}
4304 	pgeom_p->g_secsize = (unsigned short)lbasize;
4305 
4306 	/*
4307 	 * If the unit is a cd/dvd drive MODE SENSE page three
4308 	 * and MODE SENSE page four are reserved (see SBC spec
4309 	 * and MMC spec). To prevent soft errors just return
4310 	 * using the default LBA size.
4311 	 */
4312 	if (ISCD(un))
4313 		return (ret);
4314 
4315 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4316 
4317 	/*
4318 	 * Retrieve MODE SENSE page 3 - Format Device Page
4319 	 */
4320 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4321 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p3bufp,
4322 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag)
4323 	    != 0) {
4324 		SD_ERROR(SD_LOG_COMMON, un,
4325 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4326 		goto page3_exit;
4327 	}
4328 
4329 	/*
4330 	 * Determine size of Block Descriptors in order to locate the mode
4331 	 * page data.  ATAPI devices return 0, SCSI devices should return
4332 	 * MODE_BLK_DESC_LENGTH.
4333 	 */
4334 	headerp = (struct mode_header *)p3bufp;
4335 	if (un->un_f_cfg_is_atapi == TRUE) {
4336 		struct mode_header_grp2 *mhp =
4337 		    (struct mode_header_grp2 *)headerp;
4338 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4339 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4340 	} else {
4341 		mode_header_length = MODE_HEADER_LENGTH;
4342 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4343 	}
4344 
4345 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4346 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4347 		    "received unexpected bd_len of %d, page3\n", bd_len);
4348 		goto page3_exit;
4349 	}
4350 
4351 	page3p = (struct mode_format *)
4352 	    ((caddr_t)headerp + mode_header_length + bd_len);
4353 
4354 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4355 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4356 		    "mode sense pg3 code mismatch %d\n",
4357 		    page3p->mode_page.code);
4358 		goto page3_exit;
4359 	}
4360 
4361 	/*
4362 	 * Use this physical geometry data only if BOTH MODE SENSE commands
4363 	 * complete successfully; otherwise, revert to the logical geometry.
4364 	 * So, we need to save everything in temporary variables.
4365 	 */
4366 	sector_size = BE_16(page3p->data_bytes_sect);
4367 
4368 	/*
4369 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
4370 	 */
4371 	if (sector_size == 0) {
4372 		sector_size = un->un_sys_blocksize;
4373 	} else {
4374 		sector_size &= ~(un->un_sys_blocksize - 1);
4375 	}
4376 
4377 	nsect  = BE_16(page3p->sect_track);
4378 	intrlv = BE_16(page3p->interleave);
4379 
4380 	SD_INFO(SD_LOG_COMMON, un,
4381 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
4382 	SD_INFO(SD_LOG_COMMON, un,
4383 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
4384 	    page3p->mode_page.code, nsect, sector_size);
4385 	SD_INFO(SD_LOG_COMMON, un,
4386 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
4387 	    BE_16(page3p->track_skew),
4388 	    BE_16(page3p->cylinder_skew));
4389 
4390 
4391 	/*
4392 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
4393 	 */
4394 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
4395 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p4bufp,
4396 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag)
4397 	    != 0) {
4398 		SD_ERROR(SD_LOG_COMMON, un,
4399 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
4400 		goto page4_exit;
4401 	}
4402 
4403 	/*
4404 	 * Determine size of Block Descriptors in order to locate the mode
4405 	 * page data.  ATAPI devices return 0, SCSI devices should return
4406 	 * MODE_BLK_DESC_LENGTH.
4407 	 */
4408 	headerp = (struct mode_header *)p4bufp;
4409 	if (un->un_f_cfg_is_atapi == TRUE) {
4410 		struct mode_header_grp2 *mhp =
4411 		    (struct mode_header_grp2 *)headerp;
4412 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4413 	} else {
4414 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4415 	}
4416 
4417 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4418 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4419 		    "received unexpected bd_len of %d, page4\n", bd_len);
4420 		goto page4_exit;
4421 	}
4422 
4423 	page4p = (struct mode_geometry *)
4424 	    ((caddr_t)headerp + mode_header_length + bd_len);
4425 
4426 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
4427 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4428 		    "mode sense pg4 code mismatch %d\n",
4429 		    page4p->mode_page.code);
4430 		goto page4_exit;
4431 	}
4432 
4433 	/*
4434 	 * Stash the data now, after we know that both commands completed.
4435 	 */
4436 
4437 
4438 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
4439 	spc   = nhead * nsect;
4440 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
4441 	rpm   = BE_16(page4p->rpm);
4442 
4443 	modesense_capacity = spc * ncyl;
4444 
4445 	SD_INFO(SD_LOG_COMMON, un,
4446 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
4447 	SD_INFO(SD_LOG_COMMON, un,
4448 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
4449 	SD_INFO(SD_LOG_COMMON, un,
4450 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
4451 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
4452 	    (void *)pgeom_p, capacity);
4453 
4454 	/*
4455 	 * Compensate if the drive's geometry is not rectangular, i.e.,
4456 	 * the product of C * H * S returned by MODE SENSE >= that returned
4457 	 * by read capacity. This is an idiosyncrasy of the original x86
4458 	 * disk subsystem.
4459 	 */
4460 	if (modesense_capacity >= capacity) {
4461 		SD_INFO(SD_LOG_COMMON, un,
4462 		    "sd_get_physical_geometry: adjusting acyl; "
4463 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
4464 		    (modesense_capacity - capacity + spc - 1) / spc);
4465 		if (sector_size != 0) {
4466 			/* 1243403: NEC D38x7 drives don't support sec size */
4467 			pgeom_p->g_secsize = (unsigned short)sector_size;
4468 		}
4469 		pgeom_p->g_nsect    = (unsigned short)nsect;
4470 		pgeom_p->g_nhead    = (unsigned short)nhead;
4471 		pgeom_p->g_capacity = capacity;
4472 		pgeom_p->g_acyl	    =
4473 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
4474 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
4475 	}
4476 
4477 	pgeom_p->g_rpm    = (unsigned short)rpm;
4478 	pgeom_p->g_intrlv = (unsigned short)intrlv;
4479 	ret = 0;
4480 
4481 	SD_INFO(SD_LOG_COMMON, un,
4482 	    "sd_get_physical_geometry: mode sense geometry:\n");
4483 	SD_INFO(SD_LOG_COMMON, un,
4484 	    "   nsect: %d; sector size: %d; interlv: %d\n",
4485 	    nsect, sector_size, intrlv);
4486 	SD_INFO(SD_LOG_COMMON, un,
4487 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
4488 	    nhead, ncyl, rpm, modesense_capacity);
4489 	SD_INFO(SD_LOG_COMMON, un,
4490 	    "sd_get_physical_geometry: (cached)\n");
4491 	SD_INFO(SD_LOG_COMMON, un,
4492 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4493 	    pgeom_p->g_ncyl,  pgeom_p->g_acyl,
4494 	    pgeom_p->g_nhead, pgeom_p->g_nsect);
4495 	SD_INFO(SD_LOG_COMMON, un,
4496 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
4497 	    pgeom_p->g_secsize, pgeom_p->g_capacity,
4498 	    pgeom_p->g_intrlv, pgeom_p->g_rpm);
4499 
4500 page4_exit:
4501 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
4502 page3_exit:
4503 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
4504 
4505 	return (ret);
4506 }
4507 
4508 /*
4509  *    Function: sd_get_virtual_geometry
4510  *
4511  * Description: Ask the controller to tell us about the target device.
4512  *
4513  *   Arguments: un - pointer to softstate
4514  *		capacity - disk capacity in #blocks
4515  *		lbasize - disk block size in bytes
4516  *
4517  *     Context: Kernel thread only
4518  */
4519 
4520 static int
4521 sd_get_virtual_geometry(struct sd_lun *un, cmlb_geom_t *lgeom_p,
4522     diskaddr_t capacity, int lbasize)
4523 {
4524 	uint_t	geombuf;
4525 	int	spc;
4526 
4527 	ASSERT(un != NULL);
4528 
4529 	/* Set sector size, and total number of sectors */
4530 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
4531 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
4532 
4533 	/* Let the HBA tell us its geometry */
4534 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
4535 
4536 	/* A value of -1 indicates an undefined "geometry" property */
4537 	if (geombuf == (-1)) {
4538 		return (EINVAL);
4539 	}
4540 
4541 	/* Initialize the logical geometry cache. */
4542 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
4543 	lgeom_p->g_nsect   = geombuf & 0xffff;
4544 	lgeom_p->g_secsize = un->un_sys_blocksize;
4545 
4546 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
4547 
4548 	/*
4549 	 * Note: The driver originally converted the capacity value from
4550 	 * target blocks to system blocks. However, the capacity value passed
4551 	 * to this routine is already in terms of system blocks (this scaling
4552 	 * is done when the READ CAPACITY command is issued and processed).
4553 	 * This 'error' may have gone undetected because the usage of g_ncyl
4554 	 * (which is based upon g_capacity) is very limited within the driver
4555 	 */
4556 	lgeom_p->g_capacity = capacity;
4557 
4558 	/*
4559 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
4560 	 * hba may return zero values if the device has been removed.
4561 	 */
4562 	if (spc == 0) {
4563 		lgeom_p->g_ncyl = 0;
4564 	} else {
4565 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
4566 	}
4567 	lgeom_p->g_acyl = 0;
4568 
4569 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
4570 	return (0);
4571 
4572 }
4573 /*
4574  *    Function: sd_update_block_info
4575  *
4576  * Description: Calculate a byte count to sector count bitshift value
4577  *		from sector size.
4578  *
4579  *   Arguments: un: unit struct.
4580  *		lbasize: new target sector size
4581  *		capacity: new target capacity, ie. block count
4582  *
4583  *     Context: Kernel thread context
4584  */
4585 
4586 static void
4587 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
4588 {
4589 	uint_t		dblk;
4590 
4591 	if (lbasize != 0) {
4592 		un->un_tgt_blocksize = lbasize;
4593 		un->un_f_tgt_blocksize_is_valid	= TRUE;
4594 	}
4595 
4596 	if (capacity != 0) {
4597 		un->un_blockcount		= capacity;
4598 		un->un_f_blockcount_is_valid	= TRUE;
4599 	}
4600 
4601 	/*
4602 	 * Update device capacity properties.
4603 	 *
4604 	 *   'device-nblocks'	number of blocks in target's units
4605 	 *   'device-blksize'	data bearing size of target's block
4606 	 *
4607 	 * NOTE: math is complicated by the fact that un_tgt_blocksize may
4608 	 * not be a power of two for checksumming disks with 520/528 byte
4609 	 * sectors.
4610 	 */
4611 	if (un->un_f_tgt_blocksize_is_valid &&
4612 	    un->un_f_blockcount_is_valid &&
4613 	    un->un_sys_blocksize) {
4614 		dblk = un->un_tgt_blocksize / un->un_sys_blocksize;
4615 		(void) ddi_prop_update_int64(DDI_DEV_T_NONE, SD_DEVINFO(un),
4616 		    "device-nblocks", un->un_blockcount / dblk);
4617 		/*
4618 		 * To save memory, only define "device-blksize" when its
4619 		 * value is differnet than the default DEV_BSIZE value.
4620 		 */
4621 		if ((un->un_sys_blocksize * dblk) != DEV_BSIZE)
4622 			(void) ddi_prop_update_int(DDI_DEV_T_NONE,
4623 			    SD_DEVINFO(un), "device-blksize",
4624 			    un->un_sys_blocksize * dblk);
4625 	}
4626 }
4627 
4628 
4629 /*
4630  *    Function: sd_register_devid
4631  *
4632  * Description: This routine will obtain the device id information from the
4633  *		target, obtain the serial number, and register the device
4634  *		id with the ddi framework.
4635  *
4636  *   Arguments: devi - the system's dev_info_t for the device.
4637  *		un - driver soft state (unit) structure
4638  *		reservation_flag - indicates if a reservation conflict
4639  *		occurred during attach
4640  *
4641  *     Context: Kernel Thread
4642  */
4643 static void
4644 sd_register_devid(struct sd_lun *un, dev_info_t *devi, int reservation_flag)
4645 {
4646 	int		rval		= 0;
4647 	uchar_t		*inq80		= NULL;
4648 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
4649 	size_t		inq80_resid	= 0;
4650 	uchar_t		*inq83		= NULL;
4651 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
4652 	size_t		inq83_resid	= 0;
4653 	int		dlen, len;
4654 	char		*sn;
4655 
4656 	ASSERT(un != NULL);
4657 	ASSERT(mutex_owned(SD_MUTEX(un)));
4658 	ASSERT((SD_DEVINFO(un)) == devi);
4659 
4660 	/*
4661 	 * If transport has already registered a devid for this target
4662 	 * then that takes precedence over the driver's determination
4663 	 * of the devid.
4664 	 */
4665 	if (ddi_devid_get(SD_DEVINFO(un), &un->un_devid) == DDI_SUCCESS) {
4666 		ASSERT(un->un_devid);
4667 		return; /* use devid registered by the transport */
4668 	}
4669 
4670 	/*
4671 	 * This is the case of antiquated Sun disk drives that have the
4672 	 * FAB_DEVID property set in the disk_table.  These drives
4673 	 * manage the devid's by storing them in last 2 available sectors
4674 	 * on the drive and have them fabricated by the ddi layer by calling
4675 	 * ddi_devid_init and passing the DEVID_FAB flag.
4676 	 */
4677 	if (un->un_f_opt_fab_devid == TRUE) {
4678 		/*
4679 		 * Depending on EINVAL isn't reliable, since a reserved disk
4680 		 * may result in invalid geometry, so check to make sure a
4681 		 * reservation conflict did not occur during attach.
4682 		 */
4683 		if ((sd_get_devid(un) == EINVAL) &&
4684 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
4685 			/*
4686 			 * The devid is invalid AND there is no reservation
4687 			 * conflict.  Fabricate a new devid.
4688 			 */
4689 			(void) sd_create_devid(un);
4690 		}
4691 
4692 		/* Register the devid if it exists */
4693 		if (un->un_devid != NULL) {
4694 			(void) ddi_devid_register(SD_DEVINFO(un),
4695 			    un->un_devid);
4696 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4697 			    "sd_register_devid: Devid Fabricated\n");
4698 		}
4699 		return;
4700 	}
4701 
4702 	/*
4703 	 * We check the availibility of the World Wide Name (0x83) and Unit
4704 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
4705 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
4706 	 * 0x83 is availible, that is the best choice.  Our next choice is
4707 	 * 0x80.  If neither are availible, we munge the devid from the device
4708 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
4709 	 * to fabricate a devid for non-Sun qualified disks.
4710 	 */
4711 	if (sd_check_vpd_page_support(un) == 0) {
4712 		/* collect page 80 data if available */
4713 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
4714 
4715 			mutex_exit(SD_MUTEX(un));
4716 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
4717 			rval = sd_send_scsi_INQUIRY(un, inq80, inq80_len,
4718 			    0x01, 0x80, &inq80_resid);
4719 
4720 			if (rval != 0) {
4721 				kmem_free(inq80, inq80_len);
4722 				inq80 = NULL;
4723 				inq80_len = 0;
4724 			} else if (ddi_prop_exists(
4725 			    DDI_DEV_T_NONE, SD_DEVINFO(un),
4726 			    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
4727 			    INQUIRY_SERIAL_NO) == 0) {
4728 				/*
4729 				 * If we don't already have a serial number
4730 				 * property, do quick verify of data returned
4731 				 * and define property.
4732 				 */
4733 				dlen = inq80_len - inq80_resid;
4734 				len = (size_t)inq80[3];
4735 				if ((dlen >= 4) && ((len + 4) <= dlen)) {
4736 					/*
4737 					 * Ensure sn termination, skip leading
4738 					 * blanks, and create property
4739 					 * 'inquiry-serial-no'.
4740 					 */
4741 					sn = (char *)&inq80[4];
4742 					sn[len] = 0;
4743 					while (*sn && (*sn == ' '))
4744 						sn++;
4745 					if (*sn) {
4746 						(void) ddi_prop_update_string(
4747 						    DDI_DEV_T_NONE,
4748 						    SD_DEVINFO(un),
4749 						    INQUIRY_SERIAL_NO, sn);
4750 					}
4751 				}
4752 			}
4753 			mutex_enter(SD_MUTEX(un));
4754 		}
4755 
4756 		/* collect page 83 data if available */
4757 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
4758 			mutex_exit(SD_MUTEX(un));
4759 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
4760 			rval = sd_send_scsi_INQUIRY(un, inq83, inq83_len,
4761 			    0x01, 0x83, &inq83_resid);
4762 
4763 			if (rval != 0) {
4764 				kmem_free(inq83, inq83_len);
4765 				inq83 = NULL;
4766 				inq83_len = 0;
4767 			}
4768 			mutex_enter(SD_MUTEX(un));
4769 		}
4770 	}
4771 
4772 	/* encode best devid possible based on data available */
4773 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
4774 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
4775 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
4776 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
4777 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
4778 
4779 		/* devid successfully encoded, register devid */
4780 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
4781 
4782 	} else {
4783 		/*
4784 		 * Unable to encode a devid based on data available.
4785 		 * This is not a Sun qualified disk.  Older Sun disk
4786 		 * drives that have the SD_FAB_DEVID property
4787 		 * set in the disk_table and non Sun qualified
4788 		 * disks are treated in the same manner.  These
4789 		 * drives manage the devid's by storing them in
4790 		 * last 2 available sectors on the drive and
4791 		 * have them fabricated by the ddi layer by
4792 		 * calling ddi_devid_init and passing the
4793 		 * DEVID_FAB flag.
4794 		 * Create a fabricate devid only if there's no
4795 		 * fabricate devid existed.
4796 		 */
4797 		if (sd_get_devid(un) == EINVAL) {
4798 			(void) sd_create_devid(un);
4799 		}
4800 		un->un_f_opt_fab_devid = TRUE;
4801 
4802 		/* Register the devid if it exists */
4803 		if (un->un_devid != NULL) {
4804 			(void) ddi_devid_register(SD_DEVINFO(un),
4805 			    un->un_devid);
4806 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4807 			    "sd_register_devid: devid fabricated using "
4808 			    "ddi framework\n");
4809 		}
4810 	}
4811 
4812 	/* clean up resources */
4813 	if (inq80 != NULL) {
4814 		kmem_free(inq80, inq80_len);
4815 	}
4816 	if (inq83 != NULL) {
4817 		kmem_free(inq83, inq83_len);
4818 	}
4819 }
4820 
4821 
4822 
4823 /*
4824  *    Function: sd_get_devid
4825  *
4826  * Description: This routine will return 0 if a valid device id has been
4827  *		obtained from the target and stored in the soft state. If a
4828  *		valid device id has not been previously read and stored, a
4829  *		read attempt will be made.
4830  *
4831  *   Arguments: un - driver soft state (unit) structure
4832  *
4833  * Return Code: 0 if we successfully get the device id
4834  *
4835  *     Context: Kernel Thread
4836  */
4837 
4838 static int
4839 sd_get_devid(struct sd_lun *un)
4840 {
4841 	struct dk_devid		*dkdevid;
4842 	ddi_devid_t		tmpid;
4843 	uint_t			*ip;
4844 	size_t			sz;
4845 	diskaddr_t		blk;
4846 	int			status;
4847 	int			chksum;
4848 	int			i;
4849 	size_t			buffer_size;
4850 
4851 	ASSERT(un != NULL);
4852 	ASSERT(mutex_owned(SD_MUTEX(un)));
4853 
4854 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
4855 	    un);
4856 
4857 	if (un->un_devid != NULL) {
4858 		return (0);
4859 	}
4860 
4861 	mutex_exit(SD_MUTEX(un));
4862 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
4863 	    (void *)SD_PATH_DIRECT) != 0) {
4864 		mutex_enter(SD_MUTEX(un));
4865 		return (EINVAL);
4866 	}
4867 
4868 	/*
4869 	 * Read and verify device id, stored in the reserved cylinders at the
4870 	 * end of the disk. Backup label is on the odd sectors of the last
4871 	 * track of the last cylinder. Device id will be on track of the next
4872 	 * to last cylinder.
4873 	 */
4874 	mutex_enter(SD_MUTEX(un));
4875 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
4876 	mutex_exit(SD_MUTEX(un));
4877 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
4878 	status = sd_send_scsi_READ(un, dkdevid, buffer_size, blk,
4879 	    SD_PATH_DIRECT);
4880 	if (status != 0) {
4881 		goto error;
4882 	}
4883 
4884 	/* Validate the revision */
4885 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
4886 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
4887 		status = EINVAL;
4888 		goto error;
4889 	}
4890 
4891 	/* Calculate the checksum */
4892 	chksum = 0;
4893 	ip = (uint_t *)dkdevid;
4894 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
4895 	    i++) {
4896 		chksum ^= ip[i];
4897 	}
4898 
4899 	/* Compare the checksums */
4900 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
4901 		status = EINVAL;
4902 		goto error;
4903 	}
4904 
4905 	/* Validate the device id */
4906 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
4907 		status = EINVAL;
4908 		goto error;
4909 	}
4910 
4911 	/*
4912 	 * Store the device id in the driver soft state
4913 	 */
4914 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
4915 	tmpid = kmem_alloc(sz, KM_SLEEP);
4916 
4917 	mutex_enter(SD_MUTEX(un));
4918 
4919 	un->un_devid = tmpid;
4920 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
4921 
4922 	kmem_free(dkdevid, buffer_size);
4923 
4924 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
4925 
4926 	return (status);
4927 error:
4928 	mutex_enter(SD_MUTEX(un));
4929 	kmem_free(dkdevid, buffer_size);
4930 	return (status);
4931 }
4932 
4933 
4934 /*
4935  *    Function: sd_create_devid
4936  *
4937  * Description: This routine will fabricate the device id and write it
4938  *		to the disk.
4939  *
4940  *   Arguments: un - driver soft state (unit) structure
4941  *
4942  * Return Code: value of the fabricated device id
4943  *
4944  *     Context: Kernel Thread
4945  */
4946 
4947 static ddi_devid_t
4948 sd_create_devid(struct sd_lun *un)
4949 {
4950 	ASSERT(un != NULL);
4951 
4952 	/* Fabricate the devid */
4953 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
4954 	    == DDI_FAILURE) {
4955 		return (NULL);
4956 	}
4957 
4958 	/* Write the devid to disk */
4959 	if (sd_write_deviceid(un) != 0) {
4960 		ddi_devid_free(un->un_devid);
4961 		un->un_devid = NULL;
4962 	}
4963 
4964 	return (un->un_devid);
4965 }
4966 
4967 
4968 /*
4969  *    Function: sd_write_deviceid
4970  *
4971  * Description: This routine will write the device id to the disk
4972  *		reserved sector.
4973  *
4974  *   Arguments: un - driver soft state (unit) structure
4975  *
4976  * Return Code: EINVAL
4977  *		value returned by sd_send_scsi_cmd
4978  *
4979  *     Context: Kernel Thread
4980  */
4981 
4982 static int
4983 sd_write_deviceid(struct sd_lun *un)
4984 {
4985 	struct dk_devid		*dkdevid;
4986 	diskaddr_t		blk;
4987 	uint_t			*ip, chksum;
4988 	int			status;
4989 	int			i;
4990 
4991 	ASSERT(mutex_owned(SD_MUTEX(un)));
4992 
4993 	mutex_exit(SD_MUTEX(un));
4994 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
4995 	    (void *)SD_PATH_DIRECT) != 0) {
4996 		mutex_enter(SD_MUTEX(un));
4997 		return (-1);
4998 	}
4999 
5000 
5001 	/* Allocate the buffer */
5002 	dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
5003 
5004 	/* Fill in the revision */
5005 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
5006 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
5007 
5008 	/* Copy in the device id */
5009 	mutex_enter(SD_MUTEX(un));
5010 	bcopy(un->un_devid, &dkdevid->dkd_devid,
5011 	    ddi_devid_sizeof(un->un_devid));
5012 	mutex_exit(SD_MUTEX(un));
5013 
5014 	/* Calculate the checksum */
5015 	chksum = 0;
5016 	ip = (uint_t *)dkdevid;
5017 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
5018 	    i++) {
5019 		chksum ^= ip[i];
5020 	}
5021 
5022 	/* Fill-in checksum */
5023 	DKD_FORMCHKSUM(chksum, dkdevid);
5024 
5025 	/* Write the reserved sector */
5026 	status = sd_send_scsi_WRITE(un, dkdevid, un->un_sys_blocksize, blk,
5027 	    SD_PATH_DIRECT);
5028 
5029 	kmem_free(dkdevid, un->un_sys_blocksize);
5030 
5031 	mutex_enter(SD_MUTEX(un));
5032 	return (status);
5033 }
5034 
5035 
5036 /*
5037  *    Function: sd_check_vpd_page_support
5038  *
5039  * Description: This routine sends an inquiry command with the EVPD bit set and
5040  *		a page code of 0x00 to the device. It is used to determine which
5041  *		vital product pages are availible to find the devid. We are
5042  *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
5043  *		device does not support that command.
5044  *
5045  *   Arguments: un  - driver soft state (unit) structure
5046  *
5047  * Return Code: 0 - success
5048  *		1 - check condition
5049  *
5050  *     Context: This routine can sleep.
5051  */
5052 
5053 static int
5054 sd_check_vpd_page_support(struct sd_lun *un)
5055 {
5056 	uchar_t	*page_list	= NULL;
5057 	uchar_t	page_length	= 0xff;	/* Use max possible length */
5058 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
5059 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
5060 	int    	rval		= 0;
5061 	int	counter;
5062 
5063 	ASSERT(un != NULL);
5064 	ASSERT(mutex_owned(SD_MUTEX(un)));
5065 
5066 	mutex_exit(SD_MUTEX(un));
5067 
5068 	/*
5069 	 * We'll set the page length to the maximum to save figuring it out
5070 	 * with an additional call.
5071 	 */
5072 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
5073 
5074 	rval = sd_send_scsi_INQUIRY(un, page_list, page_length, evpd,
5075 	    page_code, NULL);
5076 
5077 	mutex_enter(SD_MUTEX(un));
5078 
5079 	/*
5080 	 * Now we must validate that the device accepted the command, as some
5081 	 * drives do not support it.  If the drive does support it, we will
5082 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
5083 	 * not, we return -1.
5084 	 */
5085 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
5086 		/* Loop to find one of the 2 pages we need */
5087 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
5088 
5089 		/*
5090 		 * Pages are returned in ascending order, and 0x83 is what we
5091 		 * are hoping for.
5092 		 */
5093 		while ((page_list[counter] <= 0x86) &&
5094 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
5095 		    VPD_HEAD_OFFSET))) {
5096 			/*
5097 			 * Add 3 because page_list[3] is the number of
5098 			 * pages minus 3
5099 			 */
5100 
5101 			switch (page_list[counter]) {
5102 			case 0x00:
5103 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
5104 				break;
5105 			case 0x80:
5106 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
5107 				break;
5108 			case 0x81:
5109 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
5110 				break;
5111 			case 0x82:
5112 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
5113 				break;
5114 			case 0x83:
5115 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
5116 				break;
5117 			case 0x86:
5118 				un->un_vpd_page_mask |= SD_VPD_EXTENDED_DATA_PG;
5119 				break;
5120 			}
5121 			counter++;
5122 		}
5123 
5124 	} else {
5125 		rval = -1;
5126 
5127 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
5128 		    "sd_check_vpd_page_support: This drive does not implement "
5129 		    "VPD pages.\n");
5130 	}
5131 
5132 	kmem_free(page_list, page_length);
5133 
5134 	return (rval);
5135 }
5136 
5137 
5138 /*
5139  *    Function: sd_setup_pm
5140  *
5141  * Description: Initialize Power Management on the device
5142  *
5143  *     Context: Kernel Thread
5144  */
5145 
5146 static void
5147 sd_setup_pm(struct sd_lun *un, dev_info_t *devi)
5148 {
5149 	uint_t	log_page_size;
5150 	uchar_t	*log_page_data;
5151 	int	rval;
5152 
5153 	/*
5154 	 * Since we are called from attach, holding a mutex for
5155 	 * un is unnecessary. Because some of the routines called
5156 	 * from here require SD_MUTEX to not be held, assert this
5157 	 * right up front.
5158 	 */
5159 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5160 	/*
5161 	 * Since the sd device does not have the 'reg' property,
5162 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
5163 	 * The following code is to tell cpr that this device
5164 	 * DOES need to be suspended and resumed.
5165 	 */
5166 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
5167 	    "pm-hardware-state", "needs-suspend-resume");
5168 
5169 	/*
5170 	 * This complies with the new power management framework
5171 	 * for certain desktop machines. Create the pm_components
5172 	 * property as a string array property.
5173 	 */
5174 	if (un->un_f_pm_supported) {
5175 		/*
5176 		 * not all devices have a motor, try it first.
5177 		 * some devices may return ILLEGAL REQUEST, some
5178 		 * will hang
5179 		 * The following START_STOP_UNIT is used to check if target
5180 		 * device has a motor.
5181 		 */
5182 		un->un_f_start_stop_supported = TRUE;
5183 		if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
5184 		    SD_PATH_DIRECT) != 0) {
5185 			un->un_f_start_stop_supported = FALSE;
5186 		}
5187 
5188 		/*
5189 		 * create pm properties anyways otherwise the parent can't
5190 		 * go to sleep
5191 		 */
5192 		(void) sd_create_pm_components(devi, un);
5193 		un->un_f_pm_is_enabled = TRUE;
5194 		return;
5195 	}
5196 
5197 	if (!un->un_f_log_sense_supported) {
5198 		un->un_power_level = SD_SPINDLE_ON;
5199 		un->un_f_pm_is_enabled = FALSE;
5200 		return;
5201 	}
5202 
5203 	rval = sd_log_page_supported(un, START_STOP_CYCLE_PAGE);
5204 
5205 #ifdef	SDDEBUG
5206 	if (sd_force_pm_supported) {
5207 		/* Force a successful result */
5208 		rval = 1;
5209 	}
5210 #endif
5211 
5212 	/*
5213 	 * If the start-stop cycle counter log page is not supported
5214 	 * or if the pm-capable property is SD_PM_CAPABLE_FALSE (0)
5215 	 * then we should not create the pm_components property.
5216 	 */
5217 	if (rval == -1) {
5218 		/*
5219 		 * Error.
5220 		 * Reading log sense failed, most likely this is
5221 		 * an older drive that does not support log sense.
5222 		 * If this fails auto-pm is not supported.
5223 		 */
5224 		un->un_power_level = SD_SPINDLE_ON;
5225 		un->un_f_pm_is_enabled = FALSE;
5226 
5227 	} else if (rval == 0) {
5228 		/*
5229 		 * Page not found.
5230 		 * The start stop cycle counter is implemented as page
5231 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
5232 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
5233 		 */
5234 		if (sd_log_page_supported(un, START_STOP_CYCLE_VU_PAGE) == 1) {
5235 			/*
5236 			 * Page found, use this one.
5237 			 */
5238 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
5239 			un->un_f_pm_is_enabled = TRUE;
5240 		} else {
5241 			/*
5242 			 * Error or page not found.
5243 			 * auto-pm is not supported for this device.
5244 			 */
5245 			un->un_power_level = SD_SPINDLE_ON;
5246 			un->un_f_pm_is_enabled = FALSE;
5247 		}
5248 	} else {
5249 		/*
5250 		 * Page found, use it.
5251 		 */
5252 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
5253 		un->un_f_pm_is_enabled = TRUE;
5254 	}
5255 
5256 
5257 	if (un->un_f_pm_is_enabled == TRUE) {
5258 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
5259 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
5260 
5261 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
5262 		    log_page_size, un->un_start_stop_cycle_page,
5263 		    0x01, 0, SD_PATH_DIRECT);
5264 #ifdef	SDDEBUG
5265 		if (sd_force_pm_supported) {
5266 			/* Force a successful result */
5267 			rval = 0;
5268 		}
5269 #endif
5270 
5271 		/*
5272 		 * If the Log sense for Page( Start/stop cycle counter page)
5273 		 * succeeds, then power managment is supported and we can
5274 		 * enable auto-pm.
5275 		 */
5276 		if (rval == 0)  {
5277 			(void) sd_create_pm_components(devi, un);
5278 		} else {
5279 			un->un_power_level = SD_SPINDLE_ON;
5280 			un->un_f_pm_is_enabled = FALSE;
5281 		}
5282 
5283 		kmem_free(log_page_data, log_page_size);
5284 	}
5285 }
5286 
5287 
5288 /*
5289  *    Function: sd_create_pm_components
5290  *
5291  * Description: Initialize PM property.
5292  *
5293  *     Context: Kernel thread context
5294  */
5295 
5296 static void
5297 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
5298 {
5299 	char *pm_comp[] = { "NAME=spindle-motor", "0=off", "1=on", NULL };
5300 
5301 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5302 
5303 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
5304 	    "pm-components", pm_comp, 3) == DDI_PROP_SUCCESS) {
5305 		/*
5306 		 * When components are initially created they are idle,
5307 		 * power up any non-removables.
5308 		 * Note: the return value of pm_raise_power can't be used
5309 		 * for determining if PM should be enabled for this device.
5310 		 * Even if you check the return values and remove this
5311 		 * property created above, the PM framework will not honor the
5312 		 * change after the first call to pm_raise_power. Hence,
5313 		 * removal of that property does not help if pm_raise_power
5314 		 * fails. In the case of removable media, the start/stop
5315 		 * will fail if the media is not present.
5316 		 */
5317 		if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
5318 		    SD_SPINDLE_ON) == DDI_SUCCESS)) {
5319 			mutex_enter(SD_MUTEX(un));
5320 			un->un_power_level = SD_SPINDLE_ON;
5321 			mutex_enter(&un->un_pm_mutex);
5322 			/* Set to on and not busy. */
5323 			un->un_pm_count = 0;
5324 		} else {
5325 			mutex_enter(SD_MUTEX(un));
5326 			un->un_power_level = SD_SPINDLE_OFF;
5327 			mutex_enter(&un->un_pm_mutex);
5328 			/* Set to off. */
5329 			un->un_pm_count = -1;
5330 		}
5331 		mutex_exit(&un->un_pm_mutex);
5332 		mutex_exit(SD_MUTEX(un));
5333 	} else {
5334 		un->un_power_level = SD_SPINDLE_ON;
5335 		un->un_f_pm_is_enabled = FALSE;
5336 	}
5337 }
5338 
5339 
5340 /*
5341  *    Function: sd_ddi_suspend
5342  *
5343  * Description: Performs system power-down operations. This includes
5344  *		setting the drive state to indicate its suspended so
5345  *		that no new commands will be accepted. Also, wait for
5346  *		all commands that are in transport or queued to a timer
5347  *		for retry to complete. All timeout threads are cancelled.
5348  *
5349  * Return Code: DDI_FAILURE or DDI_SUCCESS
5350  *
5351  *     Context: Kernel thread context
5352  */
5353 
5354 static int
5355 sd_ddi_suspend(dev_info_t *devi)
5356 {
5357 	struct	sd_lun	*un;
5358 	clock_t		wait_cmds_complete;
5359 
5360 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
5361 	if (un == NULL) {
5362 		return (DDI_FAILURE);
5363 	}
5364 
5365 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
5366 
5367 	mutex_enter(SD_MUTEX(un));
5368 
5369 	/* Return success if the device is already suspended. */
5370 	if (un->un_state == SD_STATE_SUSPENDED) {
5371 		mutex_exit(SD_MUTEX(un));
5372 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5373 		    "device already suspended, exiting\n");
5374 		return (DDI_SUCCESS);
5375 	}
5376 
5377 	/* Return failure if the device is being used by HA */
5378 	if (un->un_resvd_status &
5379 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
5380 		mutex_exit(SD_MUTEX(un));
5381 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5382 		    "device in use by HA, exiting\n");
5383 		return (DDI_FAILURE);
5384 	}
5385 
5386 	/*
5387 	 * Return failure if the device is in a resource wait
5388 	 * or power changing state.
5389 	 */
5390 	if ((un->un_state == SD_STATE_RWAIT) ||
5391 	    (un->un_state == SD_STATE_PM_CHANGING)) {
5392 		mutex_exit(SD_MUTEX(un));
5393 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5394 		    "device in resource wait state, exiting\n");
5395 		return (DDI_FAILURE);
5396 	}
5397 
5398 
5399 	un->un_save_state = un->un_last_state;
5400 	New_state(un, SD_STATE_SUSPENDED);
5401 
5402 	/*
5403 	 * Wait for all commands that are in transport or queued to a timer
5404 	 * for retry to complete.
5405 	 *
5406 	 * While waiting, no new commands will be accepted or sent because of
5407 	 * the new state we set above.
5408 	 *
5409 	 * Wait till current operation has completed. If we are in the resource
5410 	 * wait state (with an intr outstanding) then we need to wait till the
5411 	 * intr completes and starts the next cmd. We want to wait for
5412 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
5413 	 */
5414 	wait_cmds_complete = ddi_get_lbolt() +
5415 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
5416 
5417 	while (un->un_ncmds_in_transport != 0) {
5418 		/*
5419 		 * Fail if commands do not finish in the specified time.
5420 		 */
5421 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
5422 		    wait_cmds_complete) == -1) {
5423 			/*
5424 			 * Undo the state changes made above. Everything
5425 			 * must go back to it's original value.
5426 			 */
5427 			Restore_state(un);
5428 			un->un_last_state = un->un_save_state;
5429 			/* Wake up any threads that might be waiting. */
5430 			cv_broadcast(&un->un_suspend_cv);
5431 			mutex_exit(SD_MUTEX(un));
5432 			SD_ERROR(SD_LOG_IO_PM, un,
5433 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
5434 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
5435 			return (DDI_FAILURE);
5436 		}
5437 	}
5438 
5439 	/*
5440 	 * Cancel SCSI watch thread and timeouts, if any are active
5441 	 */
5442 
5443 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
5444 		opaque_t temp_token = un->un_swr_token;
5445 		mutex_exit(SD_MUTEX(un));
5446 		scsi_watch_suspend(temp_token);
5447 		mutex_enter(SD_MUTEX(un));
5448 	}
5449 
5450 	if (un->un_reset_throttle_timeid != NULL) {
5451 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
5452 		un->un_reset_throttle_timeid = NULL;
5453 		mutex_exit(SD_MUTEX(un));
5454 		(void) untimeout(temp_id);
5455 		mutex_enter(SD_MUTEX(un));
5456 	}
5457 
5458 	if (un->un_dcvb_timeid != NULL) {
5459 		timeout_id_t temp_id = un->un_dcvb_timeid;
5460 		un->un_dcvb_timeid = NULL;
5461 		mutex_exit(SD_MUTEX(un));
5462 		(void) untimeout(temp_id);
5463 		mutex_enter(SD_MUTEX(un));
5464 	}
5465 
5466 	mutex_enter(&un->un_pm_mutex);
5467 	if (un->un_pm_timeid != NULL) {
5468 		timeout_id_t temp_id = un->un_pm_timeid;
5469 		un->un_pm_timeid = NULL;
5470 		mutex_exit(&un->un_pm_mutex);
5471 		mutex_exit(SD_MUTEX(un));
5472 		(void) untimeout(temp_id);
5473 		mutex_enter(SD_MUTEX(un));
5474 	} else {
5475 		mutex_exit(&un->un_pm_mutex);
5476 	}
5477 
5478 	if (un->un_retry_timeid != NULL) {
5479 		timeout_id_t temp_id = un->un_retry_timeid;
5480 		un->un_retry_timeid = NULL;
5481 		mutex_exit(SD_MUTEX(un));
5482 		(void) untimeout(temp_id);
5483 		mutex_enter(SD_MUTEX(un));
5484 	}
5485 
5486 	if (un->un_direct_priority_timeid != NULL) {
5487 		timeout_id_t temp_id = un->un_direct_priority_timeid;
5488 		un->un_direct_priority_timeid = NULL;
5489 		mutex_exit(SD_MUTEX(un));
5490 		(void) untimeout(temp_id);
5491 		mutex_enter(SD_MUTEX(un));
5492 	}
5493 
5494 	if (un->un_f_is_fibre == TRUE) {
5495 		/*
5496 		 * Remove callbacks for insert and remove events
5497 		 */
5498 		if (un->un_insert_event != NULL) {
5499 			mutex_exit(SD_MUTEX(un));
5500 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
5501 			mutex_enter(SD_MUTEX(un));
5502 			un->un_insert_event = NULL;
5503 		}
5504 
5505 		if (un->un_remove_event != NULL) {
5506 			mutex_exit(SD_MUTEX(un));
5507 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
5508 			mutex_enter(SD_MUTEX(un));
5509 			un->un_remove_event = NULL;
5510 		}
5511 	}
5512 
5513 	mutex_exit(SD_MUTEX(un));
5514 
5515 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
5516 
5517 	return (DDI_SUCCESS);
5518 }
5519 
5520 
5521 /*
5522  *    Function: sd_ddi_pm_suspend
5523  *
5524  * Description: Set the drive state to low power.
5525  *		Someone else is required to actually change the drive
5526  *		power level.
5527  *
5528  *   Arguments: un - driver soft state (unit) structure
5529  *
5530  * Return Code: DDI_FAILURE or DDI_SUCCESS
5531  *
5532  *     Context: Kernel thread context
5533  */
5534 
5535 static int
5536 sd_ddi_pm_suspend(struct sd_lun *un)
5537 {
5538 	ASSERT(un != NULL);
5539 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: entry\n");
5540 
5541 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5542 	mutex_enter(SD_MUTEX(un));
5543 
5544 	/*
5545 	 * Exit if power management is not enabled for this device, or if
5546 	 * the device is being used by HA.
5547 	 */
5548 	if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
5549 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
5550 		mutex_exit(SD_MUTEX(un));
5551 		SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exiting\n");
5552 		return (DDI_SUCCESS);
5553 	}
5554 
5555 	SD_INFO(SD_LOG_POWER, un, "sd_ddi_pm_suspend: un_ncmds_in_driver=%ld\n",
5556 	    un->un_ncmds_in_driver);
5557 
5558 	/*
5559 	 * See if the device is not busy, ie.:
5560 	 *    - we have no commands in the driver for this device
5561 	 *    - not waiting for resources
5562 	 */
5563 	if ((un->un_ncmds_in_driver == 0) &&
5564 	    (un->un_state != SD_STATE_RWAIT)) {
5565 		/*
5566 		 * The device is not busy, so it is OK to go to low power state.
5567 		 * Indicate low power, but rely on someone else to actually
5568 		 * change it.
5569 		 */
5570 		mutex_enter(&un->un_pm_mutex);
5571 		un->un_pm_count = -1;
5572 		mutex_exit(&un->un_pm_mutex);
5573 		un->un_power_level = SD_SPINDLE_OFF;
5574 	}
5575 
5576 	mutex_exit(SD_MUTEX(un));
5577 
5578 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exit\n");
5579 
5580 	return (DDI_SUCCESS);
5581 }
5582 
5583 
5584 /*
5585  *    Function: sd_ddi_resume
5586  *
5587  * Description: Performs system power-up operations..
5588  *
5589  * Return Code: DDI_SUCCESS
5590  *		DDI_FAILURE
5591  *
5592  *     Context: Kernel thread context
5593  */
5594 
5595 static int
5596 sd_ddi_resume(dev_info_t *devi)
5597 {
5598 	struct	sd_lun	*un;
5599 
5600 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
5601 	if (un == NULL) {
5602 		return (DDI_FAILURE);
5603 	}
5604 
5605 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
5606 
5607 	mutex_enter(SD_MUTEX(un));
5608 	Restore_state(un);
5609 
5610 	/*
5611 	 * Restore the state which was saved to give the
5612 	 * the right state in un_last_state
5613 	 */
5614 	un->un_last_state = un->un_save_state;
5615 	/*
5616 	 * Note: throttle comes back at full.
5617 	 * Also note: this MUST be done before calling pm_raise_power
5618 	 * otherwise the system can get hung in biowait. The scenario where
5619 	 * this'll happen is under cpr suspend. Writing of the system
5620 	 * state goes through sddump, which writes 0 to un_throttle. If
5621 	 * writing the system state then fails, example if the partition is
5622 	 * too small, then cpr attempts a resume. If throttle isn't restored
5623 	 * from the saved value until after calling pm_raise_power then
5624 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
5625 	 * in biowait.
5626 	 */
5627 	un->un_throttle = un->un_saved_throttle;
5628 
5629 	/*
5630 	 * The chance of failure is very rare as the only command done in power
5631 	 * entry point is START command when you transition from 0->1 or
5632 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
5633 	 * which suspend was done. Ignore the return value as the resume should
5634 	 * not be failed. In the case of removable media the media need not be
5635 	 * inserted and hence there is a chance that raise power will fail with
5636 	 * media not present.
5637 	 */
5638 	if (un->un_f_attach_spinup) {
5639 		mutex_exit(SD_MUTEX(un));
5640 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
5641 		mutex_enter(SD_MUTEX(un));
5642 	}
5643 
5644 	/*
5645 	 * Don't broadcast to the suspend cv and therefore possibly
5646 	 * start I/O until after power has been restored.
5647 	 */
5648 	cv_broadcast(&un->un_suspend_cv);
5649 	cv_broadcast(&un->un_state_cv);
5650 
5651 	/* restart thread */
5652 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
5653 		scsi_watch_resume(un->un_swr_token);
5654 	}
5655 
5656 #if (defined(__fibre))
5657 	if (un->un_f_is_fibre == TRUE) {
5658 		/*
5659 		 * Add callbacks for insert and remove events
5660 		 */
5661 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
5662 			sd_init_event_callbacks(un);
5663 		}
5664 	}
5665 #endif
5666 
5667 	/*
5668 	 * Transport any pending commands to the target.
5669 	 *
5670 	 * If this is a low-activity device commands in queue will have to wait
5671 	 * until new commands come in, which may take awhile. Also, we
5672 	 * specifically don't check un_ncmds_in_transport because we know that
5673 	 * there really are no commands in progress after the unit was
5674 	 * suspended and we could have reached the throttle level, been
5675 	 * suspended, and have no new commands coming in for awhile. Highly
5676 	 * unlikely, but so is the low-activity disk scenario.
5677 	 */
5678 	ddi_xbuf_dispatch(un->un_xbuf_attr);
5679 
5680 	sd_start_cmds(un, NULL);
5681 	mutex_exit(SD_MUTEX(un));
5682 
5683 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
5684 
5685 	return (DDI_SUCCESS);
5686 }
5687 
5688 
5689 /*
5690  *    Function: sd_ddi_pm_resume
5691  *
5692  * Description: Set the drive state to powered on.
5693  *		Someone else is required to actually change the drive
5694  *		power level.
5695  *
5696  *   Arguments: un - driver soft state (unit) structure
5697  *
5698  * Return Code: DDI_SUCCESS
5699  *
5700  *     Context: Kernel thread context
5701  */
5702 
5703 static int
5704 sd_ddi_pm_resume(struct sd_lun *un)
5705 {
5706 	ASSERT(un != NULL);
5707 
5708 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5709 	mutex_enter(SD_MUTEX(un));
5710 	un->un_power_level = SD_SPINDLE_ON;
5711 
5712 	ASSERT(!mutex_owned(&un->un_pm_mutex));
5713 	mutex_enter(&un->un_pm_mutex);
5714 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
5715 		un->un_pm_count++;
5716 		ASSERT(un->un_pm_count == 0);
5717 		/*
5718 		 * Note: no longer do the cv_broadcast on un_suspend_cv. The
5719 		 * un_suspend_cv is for a system resume, not a power management
5720 		 * device resume. (4297749)
5721 		 *	 cv_broadcast(&un->un_suspend_cv);
5722 		 */
5723 	}
5724 	mutex_exit(&un->un_pm_mutex);
5725 	mutex_exit(SD_MUTEX(un));
5726 
5727 	return (DDI_SUCCESS);
5728 }
5729 
5730 
5731 /*
5732  *    Function: sd_pm_idletimeout_handler
5733  *
5734  * Description: A timer routine that's active only while a device is busy.
5735  *		The purpose is to extend slightly the pm framework's busy
5736  *		view of the device to prevent busy/idle thrashing for
5737  *		back-to-back commands. Do this by comparing the current time
5738  *		to the time at which the last command completed and when the
5739  *		difference is greater than sd_pm_idletime, call
5740  *		pm_idle_component. In addition to indicating idle to the pm
5741  *		framework, update the chain type to again use the internal pm
5742  *		layers of the driver.
5743  *
5744  *   Arguments: arg - driver soft state (unit) structure
5745  *
5746  *     Context: Executes in a timeout(9F) thread context
5747  */
5748 
5749 static void
5750 sd_pm_idletimeout_handler(void *arg)
5751 {
5752 	struct sd_lun *un = arg;
5753 
5754 	time_t	now;
5755 
5756 	mutex_enter(&sd_detach_mutex);
5757 	if (un->un_detach_count != 0) {
5758 		/* Abort if the instance is detaching */
5759 		mutex_exit(&sd_detach_mutex);
5760 		return;
5761 	}
5762 	mutex_exit(&sd_detach_mutex);
5763 
5764 	now = ddi_get_time();
5765 	/*
5766 	 * Grab both mutexes, in the proper order, since we're accessing
5767 	 * both PM and softstate variables.
5768 	 */
5769 	mutex_enter(SD_MUTEX(un));
5770 	mutex_enter(&un->un_pm_mutex);
5771 	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
5772 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
5773 		/*
5774 		 * Update the chain types.
5775 		 * This takes affect on the next new command received.
5776 		 */
5777 		if (un->un_f_non_devbsize_supported) {
5778 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
5779 		} else {
5780 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
5781 		}
5782 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
5783 
5784 		SD_TRACE(SD_LOG_IO_PM, un,
5785 		    "sd_pm_idletimeout_handler: idling device\n");
5786 		(void) pm_idle_component(SD_DEVINFO(un), 0);
5787 		un->un_pm_idle_timeid = NULL;
5788 	} else {
5789 		un->un_pm_idle_timeid =
5790 		    timeout(sd_pm_idletimeout_handler, un,
5791 		    (drv_usectohz((clock_t)300000))); /* 300 ms. */
5792 	}
5793 	mutex_exit(&un->un_pm_mutex);
5794 	mutex_exit(SD_MUTEX(un));
5795 }
5796 
5797 
5798 /*
5799  *    Function: sd_pm_timeout_handler
5800  *
5801  * Description: Callback to tell framework we are idle.
5802  *
5803  *     Context: timeout(9f) thread context.
5804  */
5805 
5806 static void
5807 sd_pm_timeout_handler(void *arg)
5808 {
5809 	struct sd_lun *un = arg;
5810 
5811 	(void) pm_idle_component(SD_DEVINFO(un), 0);
5812 	mutex_enter(&un->un_pm_mutex);
5813 	un->un_pm_timeid = NULL;
5814 	mutex_exit(&un->un_pm_mutex);
5815 }
5816 
5817 
5818 /*
5819  *    Function: sdpower
5820  *
5821  * Description: PM entry point.
5822  *
5823  * Return Code: DDI_SUCCESS
5824  *		DDI_FAILURE
5825  *
5826  *     Context: Kernel thread context
5827  */
5828 
5829 static int
5830 sdpower(dev_info_t *devi, int component, int level)
5831 {
5832 	struct sd_lun	*un;
5833 	int		instance;
5834 	int		rval = DDI_SUCCESS;
5835 	uint_t		i, log_page_size, maxcycles, ncycles;
5836 	uchar_t		*log_page_data;
5837 	int		log_sense_page;
5838 	int		medium_present;
5839 	time_t		intvlp;
5840 	dev_t		dev;
5841 	struct pm_trans_data	sd_pm_tran_data;
5842 	uchar_t		save_state;
5843 	int		sval;
5844 	uchar_t		state_before_pm;
5845 	int		got_semaphore_here;
5846 
5847 	instance = ddi_get_instance(devi);
5848 
5849 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
5850 	    (SD_SPINDLE_OFF > level) || (level > SD_SPINDLE_ON) ||
5851 	    component != 0) {
5852 		return (DDI_FAILURE);
5853 	}
5854 
5855 	dev = sd_make_device(SD_DEVINFO(un));
5856 
5857 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
5858 
5859 	/*
5860 	 * Must synchronize power down with close.
5861 	 * Attempt to decrement/acquire the open/close semaphore,
5862 	 * but do NOT wait on it. If it's not greater than zero,
5863 	 * ie. it can't be decremented without waiting, then
5864 	 * someone else, either open or close, already has it
5865 	 * and the try returns 0. Use that knowledge here to determine
5866 	 * if it's OK to change the device power level.
5867 	 * Also, only increment it on exit if it was decremented, ie. gotten,
5868 	 * here.
5869 	 */
5870 	got_semaphore_here = sema_tryp(&un->un_semoclose);
5871 
5872 	mutex_enter(SD_MUTEX(un));
5873 
5874 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
5875 	    un->un_ncmds_in_driver);
5876 
5877 	/*
5878 	 * If un_ncmds_in_driver is non-zero it indicates commands are
5879 	 * already being processed in the driver, or if the semaphore was
5880 	 * not gotten here it indicates an open or close is being processed.
5881 	 * At the same time somebody is requesting to go low power which
5882 	 * can't happen, therefore we need to return failure.
5883 	 */
5884 	if ((level == SD_SPINDLE_OFF) &&
5885 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
5886 		mutex_exit(SD_MUTEX(un));
5887 
5888 		if (got_semaphore_here != 0) {
5889 			sema_v(&un->un_semoclose);
5890 		}
5891 		SD_TRACE(SD_LOG_IO_PM, un,
5892 		    "sdpower: exit, device has queued cmds.\n");
5893 		return (DDI_FAILURE);
5894 	}
5895 
5896 	/*
5897 	 * if it is OFFLINE that means the disk is completely dead
5898 	 * in our case we have to put the disk in on or off by sending commands
5899 	 * Of course that will fail anyway so return back here.
5900 	 *
5901 	 * Power changes to a device that's OFFLINE or SUSPENDED
5902 	 * are not allowed.
5903 	 */
5904 	if ((un->un_state == SD_STATE_OFFLINE) ||
5905 	    (un->un_state == SD_STATE_SUSPENDED)) {
5906 		mutex_exit(SD_MUTEX(un));
5907 
5908 		if (got_semaphore_here != 0) {
5909 			sema_v(&un->un_semoclose);
5910 		}
5911 		SD_TRACE(SD_LOG_IO_PM, un,
5912 		    "sdpower: exit, device is off-line.\n");
5913 		return (DDI_FAILURE);
5914 	}
5915 
5916 	/*
5917 	 * Change the device's state to indicate it's power level
5918 	 * is being changed. Do this to prevent a power off in the
5919 	 * middle of commands, which is especially bad on devices
5920 	 * that are really powered off instead of just spun down.
5921 	 */
5922 	state_before_pm = un->un_state;
5923 	un->un_state = SD_STATE_PM_CHANGING;
5924 
5925 	mutex_exit(SD_MUTEX(un));
5926 
5927 	/*
5928 	 * If "pm-capable" property is set to TRUE by HBA drivers,
5929 	 * bypass the following checking, otherwise, check the log
5930 	 * sense information for this device
5931 	 */
5932 	if ((level == SD_SPINDLE_OFF) && un->un_f_log_sense_supported) {
5933 		/*
5934 		 * Get the log sense information to understand whether the
5935 		 * the powercycle counts have gone beyond the threshhold.
5936 		 */
5937 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
5938 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
5939 
5940 		mutex_enter(SD_MUTEX(un));
5941 		log_sense_page = un->un_start_stop_cycle_page;
5942 		mutex_exit(SD_MUTEX(un));
5943 
5944 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
5945 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
5946 #ifdef	SDDEBUG
5947 		if (sd_force_pm_supported) {
5948 			/* Force a successful result */
5949 			rval = 0;
5950 		}
5951 #endif
5952 		if (rval != 0) {
5953 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5954 			    "Log Sense Failed\n");
5955 			kmem_free(log_page_data, log_page_size);
5956 			/* Cannot support power management on those drives */
5957 
5958 			if (got_semaphore_here != 0) {
5959 				sema_v(&un->un_semoclose);
5960 			}
5961 			/*
5962 			 * On exit put the state back to it's original value
5963 			 * and broadcast to anyone waiting for the power
5964 			 * change completion.
5965 			 */
5966 			mutex_enter(SD_MUTEX(un));
5967 			un->un_state = state_before_pm;
5968 			cv_broadcast(&un->un_suspend_cv);
5969 			mutex_exit(SD_MUTEX(un));
5970 			SD_TRACE(SD_LOG_IO_PM, un,
5971 			    "sdpower: exit, Log Sense Failed.\n");
5972 			return (DDI_FAILURE);
5973 		}
5974 
5975 		/*
5976 		 * From the page data - Convert the essential information to
5977 		 * pm_trans_data
5978 		 */
5979 		maxcycles =
5980 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
5981 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
5982 
5983 		sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
5984 
5985 		ncycles =
5986 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
5987 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
5988 
5989 		sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
5990 
5991 		for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
5992 			sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
5993 			    log_page_data[8+i];
5994 		}
5995 
5996 		kmem_free(log_page_data, log_page_size);
5997 
5998 		/*
5999 		 * Call pm_trans_check routine to get the Ok from
6000 		 * the global policy
6001 		 */
6002 
6003 		sd_pm_tran_data.format = DC_SCSI_FORMAT;
6004 		sd_pm_tran_data.un.scsi_cycles.flag = 0;
6005 
6006 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
6007 #ifdef	SDDEBUG
6008 		if (sd_force_pm_supported) {
6009 			/* Force a successful result */
6010 			rval = 1;
6011 		}
6012 #endif
6013 		switch (rval) {
6014 		case 0:
6015 			/*
6016 			 * Not Ok to Power cycle or error in parameters passed
6017 			 * Would have given the advised time to consider power
6018 			 * cycle. Based on the new intvlp parameter we are
6019 			 * supposed to pretend we are busy so that pm framework
6020 			 * will never call our power entry point. Because of
6021 			 * that install a timeout handler and wait for the
6022 			 * recommended time to elapse so that power management
6023 			 * can be effective again.
6024 			 *
6025 			 * To effect this behavior, call pm_busy_component to
6026 			 * indicate to the framework this device is busy.
6027 			 * By not adjusting un_pm_count the rest of PM in
6028 			 * the driver will function normally, and independant
6029 			 * of this but because the framework is told the device
6030 			 * is busy it won't attempt powering down until it gets
6031 			 * a matching idle. The timeout handler sends this.
6032 			 * Note: sd_pm_entry can't be called here to do this
6033 			 * because sdpower may have been called as a result
6034 			 * of a call to pm_raise_power from within sd_pm_entry.
6035 			 *
6036 			 * If a timeout handler is already active then
6037 			 * don't install another.
6038 			 */
6039 			mutex_enter(&un->un_pm_mutex);
6040 			if (un->un_pm_timeid == NULL) {
6041 				un->un_pm_timeid =
6042 				    timeout(sd_pm_timeout_handler,
6043 				    un, intvlp * drv_usectohz(1000000));
6044 				mutex_exit(&un->un_pm_mutex);
6045 				(void) pm_busy_component(SD_DEVINFO(un), 0);
6046 			} else {
6047 				mutex_exit(&un->un_pm_mutex);
6048 			}
6049 			if (got_semaphore_here != 0) {
6050 				sema_v(&un->un_semoclose);
6051 			}
6052 			/*
6053 			 * On exit put the state back to it's original value
6054 			 * and broadcast to anyone waiting for the power
6055 			 * change completion.
6056 			 */
6057 			mutex_enter(SD_MUTEX(un));
6058 			un->un_state = state_before_pm;
6059 			cv_broadcast(&un->un_suspend_cv);
6060 			mutex_exit(SD_MUTEX(un));
6061 
6062 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
6063 			    "trans check Failed, not ok to power cycle.\n");
6064 			return (DDI_FAILURE);
6065 
6066 		case -1:
6067 			if (got_semaphore_here != 0) {
6068 				sema_v(&un->un_semoclose);
6069 			}
6070 			/*
6071 			 * On exit put the state back to it's original value
6072 			 * and broadcast to anyone waiting for the power
6073 			 * change completion.
6074 			 */
6075 			mutex_enter(SD_MUTEX(un));
6076 			un->un_state = state_before_pm;
6077 			cv_broadcast(&un->un_suspend_cv);
6078 			mutex_exit(SD_MUTEX(un));
6079 			SD_TRACE(SD_LOG_IO_PM, un,
6080 			    "sdpower: exit, trans check command Failed.\n");
6081 			return (DDI_FAILURE);
6082 		}
6083 	}
6084 
6085 	if (level == SD_SPINDLE_OFF) {
6086 		/*
6087 		 * Save the last state... if the STOP FAILS we need it
6088 		 * for restoring
6089 		 */
6090 		mutex_enter(SD_MUTEX(un));
6091 		save_state = un->un_last_state;
6092 		/*
6093 		 * There must not be any cmds. getting processed
6094 		 * in the driver when we get here. Power to the
6095 		 * device is potentially going off.
6096 		 */
6097 		ASSERT(un->un_ncmds_in_driver == 0);
6098 		mutex_exit(SD_MUTEX(un));
6099 
6100 		/*
6101 		 * For now suspend the device completely before spindle is
6102 		 * turned off
6103 		 */
6104 		if ((rval = sd_ddi_pm_suspend(un)) == DDI_FAILURE) {
6105 			if (got_semaphore_here != 0) {
6106 				sema_v(&un->un_semoclose);
6107 			}
6108 			/*
6109 			 * On exit put the state back to it's original value
6110 			 * and broadcast to anyone waiting for the power
6111 			 * change completion.
6112 			 */
6113 			mutex_enter(SD_MUTEX(un));
6114 			un->un_state = state_before_pm;
6115 			cv_broadcast(&un->un_suspend_cv);
6116 			mutex_exit(SD_MUTEX(un));
6117 			SD_TRACE(SD_LOG_IO_PM, un,
6118 			    "sdpower: exit, PM suspend Failed.\n");
6119 			return (DDI_FAILURE);
6120 		}
6121 	}
6122 
6123 	/*
6124 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
6125 	 * close, or strategy. Dump no long uses this routine, it uses it's
6126 	 * own code so it can be done in polled mode.
6127 	 */
6128 
6129 	medium_present = TRUE;
6130 
6131 	/*
6132 	 * When powering up, issue a TUR in case the device is at unit
6133 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
6134 	 * a deadlock on un_pm_busy_cv will occur.
6135 	 */
6136 	if (level == SD_SPINDLE_ON) {
6137 		(void) sd_send_scsi_TEST_UNIT_READY(un,
6138 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
6139 	}
6140 
6141 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
6142 	    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
6143 
6144 	sval = sd_send_scsi_START_STOP_UNIT(un,
6145 	    ((level == SD_SPINDLE_ON) ? SD_TARGET_START : SD_TARGET_STOP),
6146 	    SD_PATH_DIRECT);
6147 	/* Command failed, check for media present. */
6148 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
6149 		medium_present = FALSE;
6150 	}
6151 
6152 	/*
6153 	 * The conditions of interest here are:
6154 	 *   if a spindle off with media present fails,
6155 	 *	then restore the state and return an error.
6156 	 *   else if a spindle on fails,
6157 	 *	then return an error (there's no state to restore).
6158 	 * In all other cases we setup for the new state
6159 	 * and return success.
6160 	 */
6161 	switch (level) {
6162 	case SD_SPINDLE_OFF:
6163 		if ((medium_present == TRUE) && (sval != 0)) {
6164 			/* The stop command from above failed */
6165 			rval = DDI_FAILURE;
6166 			/*
6167 			 * The stop command failed, and we have media
6168 			 * present. Put the level back by calling the
6169 			 * sd_pm_resume() and set the state back to
6170 			 * it's previous value.
6171 			 */
6172 			(void) sd_ddi_pm_resume(un);
6173 			mutex_enter(SD_MUTEX(un));
6174 			un->un_last_state = save_state;
6175 			mutex_exit(SD_MUTEX(un));
6176 			break;
6177 		}
6178 		/*
6179 		 * The stop command from above succeeded.
6180 		 */
6181 		if (un->un_f_monitor_media_state) {
6182 			/*
6183 			 * Terminate watch thread in case of removable media
6184 			 * devices going into low power state. This is as per
6185 			 * the requirements of pm framework, otherwise commands
6186 			 * will be generated for the device (through watch
6187 			 * thread), even when the device is in low power state.
6188 			 */
6189 			mutex_enter(SD_MUTEX(un));
6190 			un->un_f_watcht_stopped = FALSE;
6191 			if (un->un_swr_token != NULL) {
6192 				opaque_t temp_token = un->un_swr_token;
6193 				un->un_f_watcht_stopped = TRUE;
6194 				un->un_swr_token = NULL;
6195 				mutex_exit(SD_MUTEX(un));
6196 				(void) scsi_watch_request_terminate(temp_token,
6197 				    SCSI_WATCH_TERMINATE_WAIT);
6198 			} else {
6199 				mutex_exit(SD_MUTEX(un));
6200 			}
6201 		}
6202 		break;
6203 
6204 	default:	/* The level requested is spindle on... */
6205 		/*
6206 		 * Legacy behavior: return success on a failed spinup
6207 		 * if there is no media in the drive.
6208 		 * Do this by looking at medium_present here.
6209 		 */
6210 		if ((sval != 0) && medium_present) {
6211 			/* The start command from above failed */
6212 			rval = DDI_FAILURE;
6213 			break;
6214 		}
6215 		/*
6216 		 * The start command from above succeeded
6217 		 * Resume the devices now that we have
6218 		 * started the disks
6219 		 */
6220 		(void) sd_ddi_pm_resume(un);
6221 
6222 		/*
6223 		 * Resume the watch thread since it was suspended
6224 		 * when the device went into low power mode.
6225 		 */
6226 		if (un->un_f_monitor_media_state) {
6227 			mutex_enter(SD_MUTEX(un));
6228 			if (un->un_f_watcht_stopped == TRUE) {
6229 				opaque_t temp_token;
6230 
6231 				un->un_f_watcht_stopped = FALSE;
6232 				mutex_exit(SD_MUTEX(un));
6233 				temp_token = scsi_watch_request_submit(
6234 				    SD_SCSI_DEVP(un),
6235 				    sd_check_media_time,
6236 				    SENSE_LENGTH, sd_media_watch_cb,
6237 				    (caddr_t)dev);
6238 				mutex_enter(SD_MUTEX(un));
6239 				un->un_swr_token = temp_token;
6240 			}
6241 			mutex_exit(SD_MUTEX(un));
6242 		}
6243 	}
6244 	if (got_semaphore_here != 0) {
6245 		sema_v(&un->un_semoclose);
6246 	}
6247 	/*
6248 	 * On exit put the state back to it's original value
6249 	 * and broadcast to anyone waiting for the power
6250 	 * change completion.
6251 	 */
6252 	mutex_enter(SD_MUTEX(un));
6253 	un->un_state = state_before_pm;
6254 	cv_broadcast(&un->un_suspend_cv);
6255 	mutex_exit(SD_MUTEX(un));
6256 
6257 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
6258 
6259 	return (rval);
6260 }
6261 
6262 
6263 
6264 /*
6265  *    Function: sdattach
6266  *
6267  * Description: Driver's attach(9e) entry point function.
6268  *
6269  *   Arguments: devi - opaque device info handle
6270  *		cmd  - attach  type
6271  *
6272  * Return Code: DDI_SUCCESS
6273  *		DDI_FAILURE
6274  *
6275  *     Context: Kernel thread context
6276  */
6277 
6278 static int
6279 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
6280 {
6281 	switch (cmd) {
6282 	case DDI_ATTACH:
6283 		return (sd_unit_attach(devi));
6284 	case DDI_RESUME:
6285 		return (sd_ddi_resume(devi));
6286 	default:
6287 		break;
6288 	}
6289 	return (DDI_FAILURE);
6290 }
6291 
6292 
6293 /*
6294  *    Function: sddetach
6295  *
6296  * Description: Driver's detach(9E) entry point function.
6297  *
6298  *   Arguments: devi - opaque device info handle
6299  *		cmd  - detach  type
6300  *
6301  * Return Code: DDI_SUCCESS
6302  *		DDI_FAILURE
6303  *
6304  *     Context: Kernel thread context
6305  */
6306 
6307 static int
6308 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
6309 {
6310 	switch (cmd) {
6311 	case DDI_DETACH:
6312 		return (sd_unit_detach(devi));
6313 	case DDI_SUSPEND:
6314 		return (sd_ddi_suspend(devi));
6315 	default:
6316 		break;
6317 	}
6318 	return (DDI_FAILURE);
6319 }
6320 
6321 
6322 /*
6323  *     Function: sd_sync_with_callback
6324  *
6325  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
6326  *		 state while the callback routine is active.
6327  *
6328  *    Arguments: un: softstate structure for the instance
6329  *
6330  *	Context: Kernel thread context
6331  */
6332 
6333 static void
6334 sd_sync_with_callback(struct sd_lun *un)
6335 {
6336 	ASSERT(un != NULL);
6337 
6338 	mutex_enter(SD_MUTEX(un));
6339 
6340 	ASSERT(un->un_in_callback >= 0);
6341 
6342 	while (un->un_in_callback > 0) {
6343 		mutex_exit(SD_MUTEX(un));
6344 		delay(2);
6345 		mutex_enter(SD_MUTEX(un));
6346 	}
6347 
6348 	mutex_exit(SD_MUTEX(un));
6349 }
6350 
6351 /*
6352  *    Function: sd_unit_attach
6353  *
6354  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
6355  *		the soft state structure for the device and performs
6356  *		all necessary structure and device initializations.
6357  *
6358  *   Arguments: devi: the system's dev_info_t for the device.
6359  *
6360  * Return Code: DDI_SUCCESS if attach is successful.
6361  *		DDI_FAILURE if any part of the attach fails.
6362  *
6363  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
6364  *		Kernel thread context only.  Can sleep.
6365  */
6366 
6367 static int
6368 sd_unit_attach(dev_info_t *devi)
6369 {
6370 	struct	scsi_device	*devp;
6371 	struct	sd_lun		*un;
6372 	char			*variantp;
6373 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
6374 	int	instance;
6375 	int	rval;
6376 	int	wc_enabled;
6377 	int	tgt;
6378 	uint64_t	capacity;
6379 	uint_t		lbasize = 0;
6380 	dev_info_t	*pdip = ddi_get_parent(devi);
6381 	int		offbyone = 0;
6382 	int		geom_label_valid = 0;
6383 
6384 	/*
6385 	 * Retrieve the target driver's private data area. This was set
6386 	 * up by the HBA.
6387 	 */
6388 	devp = ddi_get_driver_private(devi);
6389 
6390 	/*
6391 	 * Retrieve the target ID of the device.
6392 	 */
6393 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
6394 	    SCSI_ADDR_PROP_TARGET, -1);
6395 
6396 	/*
6397 	 * Since we have no idea what state things were left in by the last
6398 	 * user of the device, set up some 'default' settings, ie. turn 'em
6399 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
6400 	 * Do this before the scsi_probe, which sends an inquiry.
6401 	 * This is a fix for bug (4430280).
6402 	 * Of special importance is wide-xfer. The drive could have been left
6403 	 * in wide transfer mode by the last driver to communicate with it,
6404 	 * this includes us. If that's the case, and if the following is not
6405 	 * setup properly or we don't re-negotiate with the drive prior to
6406 	 * transferring data to/from the drive, it causes bus parity errors,
6407 	 * data overruns, and unexpected interrupts. This first occurred when
6408 	 * the fix for bug (4378686) was made.
6409 	 */
6410 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
6411 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
6412 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
6413 
6414 	/*
6415 	 * Currently, scsi_ifsetcap sets tagged-qing capability for all LUNs
6416 	 * on a target. Setting it per lun instance actually sets the
6417 	 * capability of this target, which affects those luns already
6418 	 * attached on the same target. So during attach, we can only disable
6419 	 * this capability only when no other lun has been attached on this
6420 	 * target. By doing this, we assume a target has the same tagged-qing
6421 	 * capability for every lun. The condition can be removed when HBA
6422 	 * is changed to support per lun based tagged-qing capability.
6423 	 */
6424 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
6425 		(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
6426 	}
6427 
6428 	/*
6429 	 * Use scsi_probe() to issue an INQUIRY command to the device.
6430 	 * This call will allocate and fill in the scsi_inquiry structure
6431 	 * and point the sd_inq member of the scsi_device structure to it.
6432 	 * If the attach succeeds, then this memory will not be de-allocated
6433 	 * (via scsi_unprobe()) until the instance is detached.
6434 	 */
6435 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
6436 		goto probe_failed;
6437 	}
6438 
6439 	/*
6440 	 * Check the device type as specified in the inquiry data and
6441 	 * claim it if it is of a type that we support.
6442 	 */
6443 	switch (devp->sd_inq->inq_dtype) {
6444 	case DTYPE_DIRECT:
6445 		break;
6446 	case DTYPE_RODIRECT:
6447 		break;
6448 	case DTYPE_OPTICAL:
6449 		break;
6450 	case DTYPE_NOTPRESENT:
6451 	default:
6452 		/* Unsupported device type; fail the attach. */
6453 		goto probe_failed;
6454 	}
6455 
6456 	/*
6457 	 * Allocate the soft state structure for this unit.
6458 	 *
6459 	 * We rely upon this memory being set to all zeroes by
6460 	 * ddi_soft_state_zalloc().  We assume that any member of the
6461 	 * soft state structure that is not explicitly initialized by
6462 	 * this routine will have a value of zero.
6463 	 */
6464 	instance = ddi_get_instance(devp->sd_dev);
6465 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
6466 		goto probe_failed;
6467 	}
6468 
6469 	/*
6470 	 * Retrieve a pointer to the newly-allocated soft state.
6471 	 *
6472 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
6473 	 * was successful, unless something has gone horribly wrong and the
6474 	 * ddi's soft state internals are corrupt (in which case it is
6475 	 * probably better to halt here than just fail the attach....)
6476 	 */
6477 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
6478 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
6479 		    instance);
6480 		/*NOTREACHED*/
6481 	}
6482 
6483 	/*
6484 	 * Link the back ptr of the driver soft state to the scsi_device
6485 	 * struct for this lun.
6486 	 * Save a pointer to the softstate in the driver-private area of
6487 	 * the scsi_device struct.
6488 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
6489 	 * we first set un->un_sd below.
6490 	 */
6491 	un->un_sd = devp;
6492 	devp->sd_private = (opaque_t)un;
6493 
6494 	/*
6495 	 * The following must be after devp is stored in the soft state struct.
6496 	 */
6497 #ifdef SDDEBUG
6498 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6499 	    "%s_unit_attach: un:0x%p instance:%d\n",
6500 	    ddi_driver_name(devi), un, instance);
6501 #endif
6502 
6503 	/*
6504 	 * Set up the device type and node type (for the minor nodes).
6505 	 * By default we assume that the device can at least support the
6506 	 * Common Command Set. Call it a CD-ROM if it reports itself
6507 	 * as a RODIRECT device.
6508 	 */
6509 	switch (devp->sd_inq->inq_dtype) {
6510 	case DTYPE_RODIRECT:
6511 		un->un_node_type = DDI_NT_CD_CHAN;
6512 		un->un_ctype	 = CTYPE_CDROM;
6513 		break;
6514 	case DTYPE_OPTICAL:
6515 		un->un_node_type = DDI_NT_BLOCK_CHAN;
6516 		un->un_ctype	 = CTYPE_ROD;
6517 		break;
6518 	default:
6519 		un->un_node_type = DDI_NT_BLOCK_CHAN;
6520 		un->un_ctype	 = CTYPE_CCS;
6521 		break;
6522 	}
6523 
6524 	/*
6525 	 * Try to read the interconnect type from the HBA.
6526 	 *
6527 	 * Note: This driver is currently compiled as two binaries, a parallel
6528 	 * scsi version (sd) and a fibre channel version (ssd). All functional
6529 	 * differences are determined at compile time. In the future a single
6530 	 * binary will be provided and the inteconnect type will be used to
6531 	 * differentiate between fibre and parallel scsi behaviors. At that time
6532 	 * it will be necessary for all fibre channel HBAs to support this
6533 	 * property.
6534 	 *
6535 	 * set un_f_is_fiber to TRUE ( default fiber )
6536 	 */
6537 	un->un_f_is_fibre = TRUE;
6538 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
6539 	case INTERCONNECT_SSA:
6540 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
6541 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6542 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
6543 		break;
6544 	case INTERCONNECT_PARALLEL:
6545 		un->un_f_is_fibre = FALSE;
6546 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
6547 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6548 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
6549 		break;
6550 	case INTERCONNECT_SATA:
6551 		un->un_f_is_fibre = FALSE;
6552 		un->un_interconnect_type = SD_INTERCONNECT_SATA;
6553 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6554 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
6555 		break;
6556 	case INTERCONNECT_FIBRE:
6557 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
6558 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6559 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
6560 		break;
6561 	case INTERCONNECT_FABRIC:
6562 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
6563 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
6564 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6565 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
6566 		break;
6567 	default:
6568 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
6569 		/*
6570 		 * The HBA does not support the "interconnect-type" property
6571 		 * (or did not provide a recognized type).
6572 		 *
6573 		 * Note: This will be obsoleted when a single fibre channel
6574 		 * and parallel scsi driver is delivered. In the meantime the
6575 		 * interconnect type will be set to the platform default.If that
6576 		 * type is not parallel SCSI, it means that we should be
6577 		 * assuming "ssd" semantics. However, here this also means that
6578 		 * the FC HBA is not supporting the "interconnect-type" property
6579 		 * like we expect it to, so log this occurrence.
6580 		 */
6581 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
6582 		if (!SD_IS_PARALLEL_SCSI(un)) {
6583 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6584 			    "sd_unit_attach: un:0x%p Assuming "
6585 			    "INTERCONNECT_FIBRE\n", un);
6586 		} else {
6587 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6588 			    "sd_unit_attach: un:0x%p Assuming "
6589 			    "INTERCONNECT_PARALLEL\n", un);
6590 			un->un_f_is_fibre = FALSE;
6591 		}
6592 #else
6593 		/*
6594 		 * Note: This source will be implemented when a single fibre
6595 		 * channel and parallel scsi driver is delivered. The default
6596 		 * will be to assume that if a device does not support the
6597 		 * "interconnect-type" property it is a parallel SCSI HBA and
6598 		 * we will set the interconnect type for parallel scsi.
6599 		 */
6600 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
6601 		un->un_f_is_fibre = FALSE;
6602 #endif
6603 		break;
6604 	}
6605 
6606 	if (un->un_f_is_fibre == TRUE) {
6607 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
6608 		    SCSI_VERSION_3) {
6609 			switch (un->un_interconnect_type) {
6610 			case SD_INTERCONNECT_FIBRE:
6611 			case SD_INTERCONNECT_SSA:
6612 				un->un_node_type = DDI_NT_BLOCK_WWN;
6613 				break;
6614 			default:
6615 				break;
6616 			}
6617 		}
6618 	}
6619 
6620 	/*
6621 	 * Initialize the Request Sense command for the target
6622 	 */
6623 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
6624 		goto alloc_rqs_failed;
6625 	}
6626 
6627 	/*
6628 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
6629 	 * with separate binary for sd and ssd.
6630 	 *
6631 	 * x86 has 1 binary, un_retry_count is set base on connection type.
6632 	 * The hardcoded values will go away when Sparc uses 1 binary
6633 	 * for sd and ssd.  This hardcoded values need to match
6634 	 * SD_RETRY_COUNT in sddef.h
6635 	 * The value used is base on interconnect type.
6636 	 * fibre = 3, parallel = 5
6637 	 */
6638 #if defined(__i386) || defined(__amd64)
6639 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
6640 #else
6641 	un->un_retry_count = SD_RETRY_COUNT;
6642 #endif
6643 
6644 	/*
6645 	 * Set the per disk retry count to the default number of retries
6646 	 * for disks and CDROMs. This value can be overridden by the
6647 	 * disk property list or an entry in sd.conf.
6648 	 */
6649 	un->un_notready_retry_count =
6650 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
6651 	    : DISK_NOT_READY_RETRY_COUNT(un);
6652 
6653 	/*
6654 	 * Set the busy retry count to the default value of un_retry_count.
6655 	 * This can be overridden by entries in sd.conf or the device
6656 	 * config table.
6657 	 */
6658 	un->un_busy_retry_count = un->un_retry_count;
6659 
6660 	/*
6661 	 * Init the reset threshold for retries.  This number determines
6662 	 * how many retries must be performed before a reset can be issued
6663 	 * (for certain error conditions). This can be overridden by entries
6664 	 * in sd.conf or the device config table.
6665 	 */
6666 	un->un_reset_retry_count = (un->un_retry_count / 2);
6667 
6668 	/*
6669 	 * Set the victim_retry_count to the default un_retry_count
6670 	 */
6671 	un->un_victim_retry_count = (2 * un->un_retry_count);
6672 
6673 	/*
6674 	 * Set the reservation release timeout to the default value of
6675 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
6676 	 * device config table.
6677 	 */
6678 	un->un_reserve_release_time = 5;
6679 
6680 	/*
6681 	 * Set up the default maximum transfer size. Note that this may
6682 	 * get updated later in the attach, when setting up default wide
6683 	 * operations for disks.
6684 	 */
6685 #if defined(__i386) || defined(__amd64)
6686 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
6687 #else
6688 	un->un_max_xfer_size = (uint_t)maxphys;
6689 #endif
6690 
6691 	/*
6692 	 * Get "allow bus device reset" property (defaults to "enabled" if
6693 	 * the property was not defined). This is to disable bus resets for
6694 	 * certain kinds of error recovery. Note: In the future when a run-time
6695 	 * fibre check is available the soft state flag should default to
6696 	 * enabled.
6697 	 */
6698 	if (un->un_f_is_fibre == TRUE) {
6699 		un->un_f_allow_bus_device_reset = TRUE;
6700 	} else {
6701 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
6702 		    "allow-bus-device-reset", 1) != 0) {
6703 			un->un_f_allow_bus_device_reset = TRUE;
6704 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6705 			    "sd_unit_attach: un:0x%p Bus device reset "
6706 			    "enabled\n", un);
6707 		} else {
6708 			un->un_f_allow_bus_device_reset = FALSE;
6709 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6710 			    "sd_unit_attach: un:0x%p Bus device reset "
6711 			    "disabled\n", un);
6712 		}
6713 	}
6714 
6715 	/*
6716 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
6717 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
6718 	 *
6719 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
6720 	 * property. The new "variant" property with a value of "atapi" has been
6721 	 * introduced so that future 'variants' of standard SCSI behavior (like
6722 	 * atapi) could be specified by the underlying HBA drivers by supplying
6723 	 * a new value for the "variant" property, instead of having to define a
6724 	 * new property.
6725 	 */
6726 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
6727 		un->un_f_cfg_is_atapi = TRUE;
6728 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6729 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
6730 	}
6731 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
6732 	    &variantp) == DDI_PROP_SUCCESS) {
6733 		if (strcmp(variantp, "atapi") == 0) {
6734 			un->un_f_cfg_is_atapi = TRUE;
6735 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6736 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
6737 		}
6738 		ddi_prop_free(variantp);
6739 	}
6740 
6741 	un->un_cmd_timeout	= SD_IO_TIME;
6742 
6743 	/* Info on current states, statuses, etc. (Updated frequently) */
6744 	un->un_state		= SD_STATE_NORMAL;
6745 	un->un_last_state	= SD_STATE_NORMAL;
6746 
6747 	/* Control & status info for command throttling */
6748 	un->un_throttle		= sd_max_throttle;
6749 	un->un_saved_throttle	= sd_max_throttle;
6750 	un->un_min_throttle	= sd_min_throttle;
6751 
6752 	if (un->un_f_is_fibre == TRUE) {
6753 		un->un_f_use_adaptive_throttle = TRUE;
6754 	} else {
6755 		un->un_f_use_adaptive_throttle = FALSE;
6756 	}
6757 
6758 	/* Removable media support. */
6759 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
6760 	un->un_mediastate		= DKIO_NONE;
6761 	un->un_specified_mediastate	= DKIO_NONE;
6762 
6763 	/* CVs for suspend/resume (PM or DR) */
6764 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
6765 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
6766 
6767 	/* Power management support. */
6768 	un->un_power_level = SD_SPINDLE_UNINIT;
6769 
6770 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
6771 	un->un_f_wcc_inprog = 0;
6772 
6773 	/*
6774 	 * The open/close semaphore is used to serialize threads executing
6775 	 * in the driver's open & close entry point routines for a given
6776 	 * instance.
6777 	 */
6778 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
6779 
6780 	/*
6781 	 * The conf file entry and softstate variable is a forceful override,
6782 	 * meaning a non-zero value must be entered to change the default.
6783 	 */
6784 	un->un_f_disksort_disabled = FALSE;
6785 
6786 	/*
6787 	 * Retrieve the properties from the static driver table or the driver
6788 	 * configuration file (.conf) for this unit and update the soft state
6789 	 * for the device as needed for the indicated properties.
6790 	 * Note: the property configuration needs to occur here as some of the
6791 	 * following routines may have dependancies on soft state flags set
6792 	 * as part of the driver property configuration.
6793 	 */
6794 	sd_read_unit_properties(un);
6795 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6796 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
6797 
6798 	/*
6799 	 * Only if a device has "hotpluggable" property, it is
6800 	 * treated as hotpluggable device. Otherwise, it is
6801 	 * regarded as non-hotpluggable one.
6802 	 */
6803 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
6804 	    -1) != -1) {
6805 		un->un_f_is_hotpluggable = TRUE;
6806 	}
6807 
6808 	/*
6809 	 * set unit's attributes(flags) according to "hotpluggable" and
6810 	 * RMB bit in INQUIRY data.
6811 	 */
6812 	sd_set_unit_attributes(un, devi);
6813 
6814 	/*
6815 	 * By default, we mark the capacity, lbasize, and geometry
6816 	 * as invalid. Only if we successfully read a valid capacity
6817 	 * will we update the un_blockcount and un_tgt_blocksize with the
6818 	 * valid values (the geometry will be validated later).
6819 	 */
6820 	un->un_f_blockcount_is_valid	= FALSE;
6821 	un->un_f_tgt_blocksize_is_valid	= FALSE;
6822 
6823 	/*
6824 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
6825 	 * otherwise.
6826 	 */
6827 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
6828 	un->un_blockcount = 0;
6829 
6830 	/*
6831 	 * Set up the per-instance info needed to determine the correct
6832 	 * CDBs and other info for issuing commands to the target.
6833 	 */
6834 	sd_init_cdb_limits(un);
6835 
6836 	/*
6837 	 * Set up the IO chains to use, based upon the target type.
6838 	 */
6839 	if (un->un_f_non_devbsize_supported) {
6840 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
6841 	} else {
6842 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
6843 	}
6844 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
6845 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
6846 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
6847 
6848 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
6849 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
6850 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
6851 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
6852 
6853 
6854 	if (ISCD(un)) {
6855 		un->un_additional_codes = sd_additional_codes;
6856 	} else {
6857 		un->un_additional_codes = NULL;
6858 	}
6859 
6860 	/*
6861 	 * Create the kstats here so they can be available for attach-time
6862 	 * routines that send commands to the unit (either polled or via
6863 	 * sd_send_scsi_cmd).
6864 	 *
6865 	 * Note: This is a critical sequence that needs to be maintained:
6866 	 *	1) Instantiate the kstats here, before any routines using the
6867 	 *	   iopath (i.e. sd_send_scsi_cmd).
6868 	 *	2) Instantiate and initialize the partition stats
6869 	 *	   (sd_set_pstats).
6870 	 *	3) Initialize the error stats (sd_set_errstats), following
6871 	 *	   sd_validate_geometry(),sd_register_devid(),
6872 	 *	   and sd_cache_control().
6873 	 */
6874 
6875 	un->un_stats = kstat_create(sd_label, instance,
6876 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
6877 	if (un->un_stats != NULL) {
6878 		un->un_stats->ks_lock = SD_MUTEX(un);
6879 		kstat_install(un->un_stats);
6880 	}
6881 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6882 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
6883 
6884 	sd_create_errstats(un, instance);
6885 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6886 	    "sd_unit_attach: un:0x%p errstats created\n", un);
6887 
6888 	/*
6889 	 * The following if/else code was relocated here from below as part
6890 	 * of the fix for bug (4430280). However with the default setup added
6891 	 * on entry to this routine, it's no longer absolutely necessary for
6892 	 * this to be before the call to sd_spin_up_unit.
6893 	 */
6894 	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
6895 		/*
6896 		 * If SCSI-2 tagged queueing is supported by the target
6897 		 * and by the host adapter then we will enable it.
6898 		 */
6899 		un->un_tagflags = 0;
6900 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
6901 		    (devp->sd_inq->inq_cmdque) &&
6902 		    (un->un_f_arq_enabled == TRUE)) {
6903 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
6904 			    1, 1) == 1) {
6905 				un->un_tagflags = FLAG_STAG;
6906 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6907 				    "sd_unit_attach: un:0x%p tag queueing "
6908 				    "enabled\n", un);
6909 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
6910 			    "untagged-qing", 0) == 1) {
6911 				un->un_f_opt_queueing = TRUE;
6912 				un->un_saved_throttle = un->un_throttle =
6913 				    min(un->un_throttle, 3);
6914 			} else {
6915 				un->un_f_opt_queueing = FALSE;
6916 				un->un_saved_throttle = un->un_throttle = 1;
6917 			}
6918 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
6919 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
6920 			/* The Host Adapter supports internal queueing. */
6921 			un->un_f_opt_queueing = TRUE;
6922 			un->un_saved_throttle = un->un_throttle =
6923 			    min(un->un_throttle, 3);
6924 		} else {
6925 			un->un_f_opt_queueing = FALSE;
6926 			un->un_saved_throttle = un->un_throttle = 1;
6927 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6928 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
6929 		}
6930 
6931 		/*
6932 		 * Enable large transfers for SATA/SAS drives
6933 		 */
6934 		if (SD_IS_SERIAL(un)) {
6935 			un->un_max_xfer_size =
6936 			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
6937 			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
6938 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6939 			    "sd_unit_attach: un:0x%p max transfer "
6940 			    "size=0x%x\n", un, un->un_max_xfer_size);
6941 
6942 		}
6943 
6944 		/* Setup or tear down default wide operations for disks */
6945 
6946 		/*
6947 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
6948 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
6949 		 * system and be set to different values. In the future this
6950 		 * code may need to be updated when the ssd module is
6951 		 * obsoleted and removed from the system. (4299588)
6952 		 */
6953 		if (SD_IS_PARALLEL_SCSI(un) &&
6954 		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
6955 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
6956 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
6957 			    1, 1) == 1) {
6958 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6959 				    "sd_unit_attach: un:0x%p Wide Transfer "
6960 				    "enabled\n", un);
6961 			}
6962 
6963 			/*
6964 			 * If tagged queuing has also been enabled, then
6965 			 * enable large xfers
6966 			 */
6967 			if (un->un_saved_throttle == sd_max_throttle) {
6968 				un->un_max_xfer_size =
6969 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
6970 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
6971 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6972 				    "sd_unit_attach: un:0x%p max transfer "
6973 				    "size=0x%x\n", un, un->un_max_xfer_size);
6974 			}
6975 		} else {
6976 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
6977 			    0, 1) == 1) {
6978 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
6979 				    "sd_unit_attach: un:0x%p "
6980 				    "Wide Transfer disabled\n", un);
6981 			}
6982 		}
6983 	} else {
6984 		un->un_tagflags = FLAG_STAG;
6985 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
6986 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
6987 	}
6988 
6989 	/*
6990 	 * If this target supports LUN reset, try to enable it.
6991 	 */
6992 	if (un->un_f_lun_reset_enabled) {
6993 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
6994 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
6995 			    "un:0x%p lun_reset capability set\n", un);
6996 		} else {
6997 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
6998 			    "un:0x%p lun-reset capability not set\n", un);
6999 		}
7000 	}
7001 
7002 	/*
7003 	 * At this point in the attach, we have enough info in the
7004 	 * soft state to be able to issue commands to the target.
7005 	 *
7006 	 * All command paths used below MUST issue their commands as
7007 	 * SD_PATH_DIRECT. This is important as intermediate layers
7008 	 * are not all initialized yet (such as PM).
7009 	 */
7010 
7011 	/*
7012 	 * Send a TEST UNIT READY command to the device. This should clear
7013 	 * any outstanding UNIT ATTENTION that may be present.
7014 	 *
7015 	 * Note: Don't check for success, just track if there is a reservation,
7016 	 * this is a throw away command to clear any unit attentions.
7017 	 *
7018 	 * Note: This MUST be the first command issued to the target during
7019 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
7020 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
7021 	 * with attempts at spinning up a device with no media.
7022 	 */
7023 	if (sd_send_scsi_TEST_UNIT_READY(un, SD_DONT_RETRY_TUR) == EACCES) {
7024 		reservation_flag = SD_TARGET_IS_RESERVED;
7025 	}
7026 
7027 	/*
7028 	 * If the device is NOT a removable media device, attempt to spin
7029 	 * it up (using the START_STOP_UNIT command) and read its capacity
7030 	 * (using the READ CAPACITY command).  Note, however, that either
7031 	 * of these could fail and in some cases we would continue with
7032 	 * the attach despite the failure (see below).
7033 	 */
7034 	if (un->un_f_descr_format_supported) {
7035 		switch (sd_spin_up_unit(un)) {
7036 		case 0:
7037 			/*
7038 			 * Spin-up was successful; now try to read the
7039 			 * capacity.  If successful then save the results
7040 			 * and mark the capacity & lbasize as valid.
7041 			 */
7042 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7043 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
7044 
7045 			switch (sd_send_scsi_READ_CAPACITY(un, &capacity,
7046 			    &lbasize, SD_PATH_DIRECT)) {
7047 			case 0: {
7048 				if (capacity > DK_MAX_BLOCKS) {
7049 #ifdef _LP64
7050 					if (capacity + 1 >
7051 					    SD_GROUP1_MAX_ADDRESS) {
7052 						/*
7053 						 * Enable descriptor format
7054 						 * sense data so that we can
7055 						 * get 64 bit sense data
7056 						 * fields.
7057 						 */
7058 						sd_enable_descr_sense(un);
7059 					}
7060 #else
7061 					/* 32-bit kernels can't handle this */
7062 					scsi_log(SD_DEVINFO(un),
7063 					    sd_label, CE_WARN,
7064 					    "disk has %llu blocks, which "
7065 					    "is too large for a 32-bit "
7066 					    "kernel", capacity);
7067 
7068 #if defined(__i386) || defined(__amd64)
7069 					/*
7070 					 * 1TB disk was treated as (1T - 512)B
7071 					 * in the past, so that it might have
7072 					 * valid VTOC and solaris partitions,
7073 					 * we have to allow it to continue to
7074 					 * work.
7075 					 */
7076 					if (capacity -1 > DK_MAX_BLOCKS)
7077 #endif
7078 					goto spinup_failed;
7079 #endif
7080 				}
7081 
7082 				/*
7083 				 * Here it's not necessary to check the case:
7084 				 * the capacity of the device is bigger than
7085 				 * what the max hba cdb can support. Because
7086 				 * sd_send_scsi_READ_CAPACITY will retrieve
7087 				 * the capacity by sending USCSI command, which
7088 				 * is constrained by the max hba cdb. Actually,
7089 				 * sd_send_scsi_READ_CAPACITY will return
7090 				 * EINVAL when using bigger cdb than required
7091 				 * cdb length. Will handle this case in
7092 				 * "case EINVAL".
7093 				 */
7094 
7095 				/*
7096 				 * The following relies on
7097 				 * sd_send_scsi_READ_CAPACITY never
7098 				 * returning 0 for capacity and/or lbasize.
7099 				 */
7100 				sd_update_block_info(un, lbasize, capacity);
7101 
7102 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7103 				    "sd_unit_attach: un:0x%p capacity = %ld "
7104 				    "blocks; lbasize= %ld.\n", un,
7105 				    un->un_blockcount, un->un_tgt_blocksize);
7106 
7107 				break;
7108 			}
7109 			case EINVAL:
7110 				/*
7111 				 * In the case where the max-cdb-length property
7112 				 * is smaller than the required CDB length for
7113 				 * a SCSI device, a target driver can fail to
7114 				 * attach to that device.
7115 				 */
7116 				scsi_log(SD_DEVINFO(un),
7117 				    sd_label, CE_WARN,
7118 				    "disk capacity is too large "
7119 				    "for current cdb length");
7120 				goto spinup_failed;
7121 			case EACCES:
7122 				/*
7123 				 * Should never get here if the spin-up
7124 				 * succeeded, but code it in anyway.
7125 				 * From here, just continue with the attach...
7126 				 */
7127 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7128 				    "sd_unit_attach: un:0x%p "
7129 				    "sd_send_scsi_READ_CAPACITY "
7130 				    "returned reservation conflict\n", un);
7131 				reservation_flag = SD_TARGET_IS_RESERVED;
7132 				break;
7133 			default:
7134 				/*
7135 				 * Likewise, should never get here if the
7136 				 * spin-up succeeded. Just continue with
7137 				 * the attach...
7138 				 */
7139 				break;
7140 			}
7141 			break;
7142 		case EACCES:
7143 			/*
7144 			 * Device is reserved by another host.  In this case
7145 			 * we could not spin it up or read the capacity, but
7146 			 * we continue with the attach anyway.
7147 			 */
7148 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7149 			    "sd_unit_attach: un:0x%p spin-up reservation "
7150 			    "conflict.\n", un);
7151 			reservation_flag = SD_TARGET_IS_RESERVED;
7152 			break;
7153 		default:
7154 			/* Fail the attach if the spin-up failed. */
7155 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7156 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
7157 			goto spinup_failed;
7158 		}
7159 	}
7160 
7161 	/*
7162 	 * Check to see if this is a MMC drive
7163 	 */
7164 	if (ISCD(un)) {
7165 		sd_set_mmc_caps(un);
7166 	}
7167 
7168 
7169 	/*
7170 	 * Add a zero-length attribute to tell the world we support
7171 	 * kernel ioctls (for layered drivers)
7172 	 */
7173 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7174 	    DDI_KERNEL_IOCTL, NULL, 0);
7175 
7176 	/*
7177 	 * Add a boolean property to tell the world we support
7178 	 * the B_FAILFAST flag (for layered drivers)
7179 	 */
7180 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7181 	    "ddi-failfast-supported", NULL, 0);
7182 
7183 	/*
7184 	 * Initialize power management
7185 	 */
7186 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
7187 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
7188 	sd_setup_pm(un, devi);
7189 	if (un->un_f_pm_is_enabled == FALSE) {
7190 		/*
7191 		 * For performance, point to a jump table that does
7192 		 * not include pm.
7193 		 * The direct and priority chains don't change with PM.
7194 		 *
7195 		 * Note: this is currently done based on individual device
7196 		 * capabilities. When an interface for determining system
7197 		 * power enabled state becomes available, or when additional
7198 		 * layers are added to the command chain, these values will
7199 		 * have to be re-evaluated for correctness.
7200 		 */
7201 		if (un->un_f_non_devbsize_supported) {
7202 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
7203 		} else {
7204 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
7205 		}
7206 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
7207 	}
7208 
7209 	/*
7210 	 * This property is set to 0 by HA software to avoid retries
7211 	 * on a reserved disk. (The preferred property name is
7212 	 * "retry-on-reservation-conflict") (1189689)
7213 	 *
7214 	 * Note: The use of a global here can have unintended consequences. A
7215 	 * per instance variable is preferrable to match the capabilities of
7216 	 * different underlying hba's (4402600)
7217 	 */
7218 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
7219 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
7220 	    sd_retry_on_reservation_conflict);
7221 	if (sd_retry_on_reservation_conflict != 0) {
7222 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
7223 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
7224 		    sd_retry_on_reservation_conflict);
7225 	}
7226 
7227 	/* Set up options for QFULL handling. */
7228 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7229 	    "qfull-retries", -1)) != -1) {
7230 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
7231 		    rval, 1);
7232 	}
7233 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7234 	    "qfull-retry-interval", -1)) != -1) {
7235 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
7236 		    rval, 1);
7237 	}
7238 
7239 	/*
7240 	 * This just prints a message that announces the existence of the
7241 	 * device. The message is always printed in the system logfile, but
7242 	 * only appears on the console if the system is booted with the
7243 	 * -v (verbose) argument.
7244 	 */
7245 	ddi_report_dev(devi);
7246 
7247 	un->un_mediastate = DKIO_NONE;
7248 
7249 	cmlb_alloc_handle(&un->un_cmlbhandle);
7250 
7251 #if defined(__i386) || defined(__amd64)
7252 	/*
7253 	 * On x86, compensate for off-by-1 legacy error
7254 	 */
7255 	if (!un->un_f_has_removable_media && !un->un_f_is_hotpluggable &&
7256 	    (lbasize == un->un_sys_blocksize))
7257 		offbyone = CMLB_OFF_BY_ONE;
7258 #endif
7259 
7260 	if (cmlb_attach(devi, &sd_tgops, (int)devp->sd_inq->inq_dtype,
7261 	    un->un_f_has_removable_media, un->un_f_is_hotpluggable,
7262 	    un->un_node_type, offbyone, un->un_cmlbhandle,
7263 	    (void *)SD_PATH_DIRECT) != 0) {
7264 		goto cmlb_attach_failed;
7265 	}
7266 
7267 
7268 	/*
7269 	 * Read and validate the device's geometry (ie, disk label)
7270 	 * A new unformatted drive will not have a valid geometry, but
7271 	 * the driver needs to successfully attach to this device so
7272 	 * the drive can be formatted via ioctls.
7273 	 */
7274 	geom_label_valid = (cmlb_validate(un->un_cmlbhandle, 0,
7275 	    (void *)SD_PATH_DIRECT) == 0) ? 1: 0;
7276 
7277 	mutex_enter(SD_MUTEX(un));
7278 
7279 	/*
7280 	 * Read and initialize the devid for the unit.
7281 	 */
7282 	ASSERT(un->un_errstats != NULL);
7283 	if (un->un_f_devid_supported) {
7284 		sd_register_devid(un, devi, reservation_flag);
7285 	}
7286 	mutex_exit(SD_MUTEX(un));
7287 
7288 #if (defined(__fibre))
7289 	/*
7290 	 * Register callbacks for fibre only.  You can't do this soley
7291 	 * on the basis of the devid_type because this is hba specific.
7292 	 * We need to query our hba capabilities to find out whether to
7293 	 * register or not.
7294 	 */
7295 	if (un->un_f_is_fibre) {
7296 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
7297 			sd_init_event_callbacks(un);
7298 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7299 			    "sd_unit_attach: un:0x%p event callbacks inserted",
7300 			    un);
7301 		}
7302 	}
7303 #endif
7304 
7305 	if (un->un_f_opt_disable_cache == TRUE) {
7306 		/*
7307 		 * Disable both read cache and write cache.  This is
7308 		 * the historic behavior of the keywords in the config file.
7309 		 */
7310 		if (sd_cache_control(un, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
7311 		    0) {
7312 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7313 			    "sd_unit_attach: un:0x%p Could not disable "
7314 			    "caching", un);
7315 			goto devid_failed;
7316 		}
7317 	}
7318 
7319 	/*
7320 	 * Check the value of the WCE bit now and
7321 	 * set un_f_write_cache_enabled accordingly.
7322 	 */
7323 	(void) sd_get_write_cache_enabled(un, &wc_enabled);
7324 	mutex_enter(SD_MUTEX(un));
7325 	un->un_f_write_cache_enabled = (wc_enabled != 0);
7326 	mutex_exit(SD_MUTEX(un));
7327 
7328 	/*
7329 	 * Check the value of the NV_SUP bit and set
7330 	 * un_f_suppress_cache_flush accordingly.
7331 	 */
7332 	sd_get_nv_sup(un);
7333 
7334 	/*
7335 	 * Find out what type of reservation this disk supports.
7336 	 */
7337 	switch (sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS, 0, NULL)) {
7338 	case 0:
7339 		/*
7340 		 * SCSI-3 reservations are supported.
7341 		 */
7342 		un->un_reservation_type = SD_SCSI3_RESERVATION;
7343 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7344 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
7345 		break;
7346 	case ENOTSUP:
7347 		/*
7348 		 * The PERSISTENT RESERVE IN command would not be recognized by
7349 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
7350 		 */
7351 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7352 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
7353 		un->un_reservation_type = SD_SCSI2_RESERVATION;
7354 		break;
7355 	default:
7356 		/*
7357 		 * default to SCSI-3 reservations
7358 		 */
7359 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7360 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
7361 		un->un_reservation_type = SD_SCSI3_RESERVATION;
7362 		break;
7363 	}
7364 
7365 	/*
7366 	 * Set the pstat and error stat values here, so data obtained during the
7367 	 * previous attach-time routines is available.
7368 	 *
7369 	 * Note: This is a critical sequence that needs to be maintained:
7370 	 *	1) Instantiate the kstats before any routines using the iopath
7371 	 *	   (i.e. sd_send_scsi_cmd).
7372 	 *	2) Initialize the error stats (sd_set_errstats) and partition
7373 	 *	   stats (sd_set_pstats)here, following
7374 	 *	   cmlb_validate_geometry(), sd_register_devid(), and
7375 	 *	   sd_cache_control().
7376 	 */
7377 
7378 	if (un->un_f_pkstats_enabled && geom_label_valid) {
7379 		sd_set_pstats(un);
7380 		SD_TRACE(SD_LOG_IO_PARTITION, un,
7381 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
7382 	}
7383 
7384 	sd_set_errstats(un);
7385 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7386 	    "sd_unit_attach: un:0x%p errstats set\n", un);
7387 
7388 
7389 	/*
7390 	 * After successfully attaching an instance, we record the information
7391 	 * of how many luns have been attached on the relative target and
7392 	 * controller for parallel SCSI. This information is used when sd tries
7393 	 * to set the tagged queuing capability in HBA.
7394 	 */
7395 	if (SD_IS_PARALLEL_SCSI(un) && (tgt >= 0) && (tgt < NTARGETS_WIDE)) {
7396 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_ATTACH);
7397 	}
7398 
7399 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7400 	    "sd_unit_attach: un:0x%p exit success\n", un);
7401 
7402 	return (DDI_SUCCESS);
7403 
7404 	/*
7405 	 * An error occurred during the attach; clean up & return failure.
7406 	 */
7407 
7408 devid_failed:
7409 
7410 setup_pm_failed:
7411 	ddi_remove_minor_node(devi, NULL);
7412 
7413 cmlb_attach_failed:
7414 	/*
7415 	 * Cleanup from the scsi_ifsetcap() calls (437868)
7416 	 */
7417 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
7418 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
7419 
7420 	/*
7421 	 * Refer to the comments of setting tagged-qing in the beginning of
7422 	 * sd_unit_attach. We can only disable tagged queuing when there is
7423 	 * no lun attached on the target.
7424 	 */
7425 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
7426 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
7427 	}
7428 
7429 	if (un->un_f_is_fibre == FALSE) {
7430 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
7431 	}
7432 
7433 spinup_failed:
7434 
7435 	mutex_enter(SD_MUTEX(un));
7436 
7437 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
7438 	if (un->un_direct_priority_timeid != NULL) {
7439 		timeout_id_t temp_id = un->un_direct_priority_timeid;
7440 		un->un_direct_priority_timeid = NULL;
7441 		mutex_exit(SD_MUTEX(un));
7442 		(void) untimeout(temp_id);
7443 		mutex_enter(SD_MUTEX(un));
7444 	}
7445 
7446 	/* Cancel any pending start/stop timeouts */
7447 	if (un->un_startstop_timeid != NULL) {
7448 		timeout_id_t temp_id = un->un_startstop_timeid;
7449 		un->un_startstop_timeid = NULL;
7450 		mutex_exit(SD_MUTEX(un));
7451 		(void) untimeout(temp_id);
7452 		mutex_enter(SD_MUTEX(un));
7453 	}
7454 
7455 	/* Cancel any pending reset-throttle timeouts */
7456 	if (un->un_reset_throttle_timeid != NULL) {
7457 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
7458 		un->un_reset_throttle_timeid = NULL;
7459 		mutex_exit(SD_MUTEX(un));
7460 		(void) untimeout(temp_id);
7461 		mutex_enter(SD_MUTEX(un));
7462 	}
7463 
7464 	/* Cancel any pending retry timeouts */
7465 	if (un->un_retry_timeid != NULL) {
7466 		timeout_id_t temp_id = un->un_retry_timeid;
7467 		un->un_retry_timeid = NULL;
7468 		mutex_exit(SD_MUTEX(un));
7469 		(void) untimeout(temp_id);
7470 		mutex_enter(SD_MUTEX(un));
7471 	}
7472 
7473 	/* Cancel any pending delayed cv broadcast timeouts */
7474 	if (un->un_dcvb_timeid != NULL) {
7475 		timeout_id_t temp_id = un->un_dcvb_timeid;
7476 		un->un_dcvb_timeid = NULL;
7477 		mutex_exit(SD_MUTEX(un));
7478 		(void) untimeout(temp_id);
7479 		mutex_enter(SD_MUTEX(un));
7480 	}
7481 
7482 	mutex_exit(SD_MUTEX(un));
7483 
7484 	/* There should not be any in-progress I/O so ASSERT this check */
7485 	ASSERT(un->un_ncmds_in_transport == 0);
7486 	ASSERT(un->un_ncmds_in_driver == 0);
7487 
7488 	/* Do not free the softstate if the callback routine is active */
7489 	sd_sync_with_callback(un);
7490 
7491 	/*
7492 	 * Partition stats apparently are not used with removables. These would
7493 	 * not have been created during attach, so no need to clean them up...
7494 	 */
7495 	if (un->un_stats != NULL) {
7496 		kstat_delete(un->un_stats);
7497 		un->un_stats = NULL;
7498 	}
7499 	if (un->un_errstats != NULL) {
7500 		kstat_delete(un->un_errstats);
7501 		un->un_errstats = NULL;
7502 	}
7503 
7504 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
7505 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
7506 
7507 	ddi_prop_remove_all(devi);
7508 	sema_destroy(&un->un_semoclose);
7509 	cv_destroy(&un->un_state_cv);
7510 
7511 getrbuf_failed:
7512 
7513 	sd_free_rqs(un);
7514 
7515 alloc_rqs_failed:
7516 
7517 	devp->sd_private = NULL;
7518 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
7519 
7520 get_softstate_failed:
7521 	/*
7522 	 * Note: the man pages are unclear as to whether or not doing a
7523 	 * ddi_soft_state_free(sd_state, instance) is the right way to
7524 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
7525 	 * ddi_get_soft_state() fails.  The implication seems to be
7526 	 * that the get_soft_state cannot fail if the zalloc succeeds.
7527 	 */
7528 	ddi_soft_state_free(sd_state, instance);
7529 
7530 probe_failed:
7531 	scsi_unprobe(devp);
7532 
7533 	SD_ERROR(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: \
7534 	    un:0x%p exit failure\n", (void *)un);
7535 
7536 	return (DDI_FAILURE);
7537 }
7538 
7539 
7540 /*
7541  *    Function: sd_unit_detach
7542  *
7543  * Description: Performs DDI_DETACH processing for sddetach().
7544  *
7545  * Return Code: DDI_SUCCESS
7546  *		DDI_FAILURE
7547  *
7548  *     Context: Kernel thread context
7549  */
7550 
7551 static int
7552 sd_unit_detach(dev_info_t *devi)
7553 {
7554 	struct scsi_device	*devp;
7555 	struct sd_lun		*un;
7556 	int			i;
7557 	int			tgt;
7558 	dev_t			dev;
7559 	dev_info_t		*pdip = ddi_get_parent(devi);
7560 	int			instance = ddi_get_instance(devi);
7561 
7562 	mutex_enter(&sd_detach_mutex);
7563 
7564 	/*
7565 	 * Fail the detach for any of the following:
7566 	 *  - Unable to get the sd_lun struct for the instance
7567 	 *  - A layered driver has an outstanding open on the instance
7568 	 *  - Another thread is already detaching this instance
7569 	 *  - Another thread is currently performing an open
7570 	 */
7571 	devp = ddi_get_driver_private(devi);
7572 	if ((devp == NULL) ||
7573 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
7574 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
7575 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
7576 		mutex_exit(&sd_detach_mutex);
7577 		return (DDI_FAILURE);
7578 	}
7579 
7580 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
7581 
7582 	/*
7583 	 * Mark this instance as currently in a detach, to inhibit any
7584 	 * opens from a layered driver.
7585 	 */
7586 	un->un_detach_count++;
7587 	mutex_exit(&sd_detach_mutex);
7588 
7589 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7590 	    SCSI_ADDR_PROP_TARGET, -1);
7591 
7592 	dev = sd_make_device(SD_DEVINFO(un));
7593 
7594 #ifndef lint
7595 	_NOTE(COMPETING_THREADS_NOW);
7596 #endif
7597 
7598 	mutex_enter(SD_MUTEX(un));
7599 
7600 	/*
7601 	 * Fail the detach if there are any outstanding layered
7602 	 * opens on this device.
7603 	 */
7604 	for (i = 0; i < NDKMAP; i++) {
7605 		if (un->un_ocmap.lyropen[i] != 0) {
7606 			goto err_notclosed;
7607 		}
7608 	}
7609 
7610 	/*
7611 	 * Verify there are NO outstanding commands issued to this device.
7612 	 * ie, un_ncmds_in_transport == 0.
7613 	 * It's possible to have outstanding commands through the physio
7614 	 * code path, even though everything's closed.
7615 	 */
7616 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
7617 	    (un->un_direct_priority_timeid != NULL) ||
7618 	    (un->un_state == SD_STATE_RWAIT)) {
7619 		mutex_exit(SD_MUTEX(un));
7620 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7621 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
7622 		goto err_stillbusy;
7623 	}
7624 
7625 	/*
7626 	 * If we have the device reserved, release the reservation.
7627 	 */
7628 	if ((un->un_resvd_status & SD_RESERVE) &&
7629 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
7630 		mutex_exit(SD_MUTEX(un));
7631 		/*
7632 		 * Note: sd_reserve_release sends a command to the device
7633 		 * via the sd_ioctlcmd() path, and can sleep.
7634 		 */
7635 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
7636 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7637 			    "sd_dr_detach: Cannot release reservation \n");
7638 		}
7639 	} else {
7640 		mutex_exit(SD_MUTEX(un));
7641 	}
7642 
7643 	/*
7644 	 * Untimeout any reserve recover, throttle reset, restart unit
7645 	 * and delayed broadcast timeout threads. Protect the timeout pointer
7646 	 * from getting nulled by their callback functions.
7647 	 */
7648 	mutex_enter(SD_MUTEX(un));
7649 	if (un->un_resvd_timeid != NULL) {
7650 		timeout_id_t temp_id = un->un_resvd_timeid;
7651 		un->un_resvd_timeid = NULL;
7652 		mutex_exit(SD_MUTEX(un));
7653 		(void) untimeout(temp_id);
7654 		mutex_enter(SD_MUTEX(un));
7655 	}
7656 
7657 	if (un->un_reset_throttle_timeid != NULL) {
7658 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
7659 		un->un_reset_throttle_timeid = NULL;
7660 		mutex_exit(SD_MUTEX(un));
7661 		(void) untimeout(temp_id);
7662 		mutex_enter(SD_MUTEX(un));
7663 	}
7664 
7665 	if (un->un_startstop_timeid != NULL) {
7666 		timeout_id_t temp_id = un->un_startstop_timeid;
7667 		un->un_startstop_timeid = NULL;
7668 		mutex_exit(SD_MUTEX(un));
7669 		(void) untimeout(temp_id);
7670 		mutex_enter(SD_MUTEX(un));
7671 	}
7672 
7673 	if (un->un_dcvb_timeid != NULL) {
7674 		timeout_id_t temp_id = un->un_dcvb_timeid;
7675 		un->un_dcvb_timeid = NULL;
7676 		mutex_exit(SD_MUTEX(un));
7677 		(void) untimeout(temp_id);
7678 	} else {
7679 		mutex_exit(SD_MUTEX(un));
7680 	}
7681 
7682 	/* Remove any pending reservation reclaim requests for this device */
7683 	sd_rmv_resv_reclaim_req(dev);
7684 
7685 	mutex_enter(SD_MUTEX(un));
7686 
7687 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
7688 	if (un->un_direct_priority_timeid != NULL) {
7689 		timeout_id_t temp_id = un->un_direct_priority_timeid;
7690 		un->un_direct_priority_timeid = NULL;
7691 		mutex_exit(SD_MUTEX(un));
7692 		(void) untimeout(temp_id);
7693 		mutex_enter(SD_MUTEX(un));
7694 	}
7695 
7696 	/* Cancel any active multi-host disk watch thread requests */
7697 	if (un->un_mhd_token != NULL) {
7698 		mutex_exit(SD_MUTEX(un));
7699 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
7700 		if (scsi_watch_request_terminate(un->un_mhd_token,
7701 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
7702 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7703 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
7704 			/*
7705 			 * Note: We are returning here after having removed
7706 			 * some driver timeouts above. This is consistent with
7707 			 * the legacy implementation but perhaps the watch
7708 			 * terminate call should be made with the wait flag set.
7709 			 */
7710 			goto err_stillbusy;
7711 		}
7712 		mutex_enter(SD_MUTEX(un));
7713 		un->un_mhd_token = NULL;
7714 	}
7715 
7716 	if (un->un_swr_token != NULL) {
7717 		mutex_exit(SD_MUTEX(un));
7718 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
7719 		if (scsi_watch_request_terminate(un->un_swr_token,
7720 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
7721 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7722 			    "sd_dr_detach: Cannot cancel swr watch request\n");
7723 			/*
7724 			 * Note: We are returning here after having removed
7725 			 * some driver timeouts above. This is consistent with
7726 			 * the legacy implementation but perhaps the watch
7727 			 * terminate call should be made with the wait flag set.
7728 			 */
7729 			goto err_stillbusy;
7730 		}
7731 		mutex_enter(SD_MUTEX(un));
7732 		un->un_swr_token = NULL;
7733 	}
7734 
7735 	mutex_exit(SD_MUTEX(un));
7736 
7737 	/*
7738 	 * Clear any scsi_reset_notifies. We clear the reset notifies
7739 	 * if we have not registered one.
7740 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
7741 	 */
7742 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
7743 	    sd_mhd_reset_notify_cb, (caddr_t)un);
7744 
7745 	/*
7746 	 * protect the timeout pointers from getting nulled by
7747 	 * their callback functions during the cancellation process.
7748 	 * In such a scenario untimeout can be invoked with a null value.
7749 	 */
7750 	_NOTE(NO_COMPETING_THREADS_NOW);
7751 
7752 	mutex_enter(&un->un_pm_mutex);
7753 	if (un->un_pm_idle_timeid != NULL) {
7754 		timeout_id_t temp_id = un->un_pm_idle_timeid;
7755 		un->un_pm_idle_timeid = NULL;
7756 		mutex_exit(&un->un_pm_mutex);
7757 
7758 		/*
7759 		 * Timeout is active; cancel it.
7760 		 * Note that it'll never be active on a device
7761 		 * that does not support PM therefore we don't
7762 		 * have to check before calling pm_idle_component.
7763 		 */
7764 		(void) untimeout(temp_id);
7765 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7766 		mutex_enter(&un->un_pm_mutex);
7767 	}
7768 
7769 	/*
7770 	 * Check whether there is already a timeout scheduled for power
7771 	 * management. If yes then don't lower the power here, that's.
7772 	 * the timeout handler's job.
7773 	 */
7774 	if (un->un_pm_timeid != NULL) {
7775 		timeout_id_t temp_id = un->un_pm_timeid;
7776 		un->un_pm_timeid = NULL;
7777 		mutex_exit(&un->un_pm_mutex);
7778 		/*
7779 		 * Timeout is active; cancel it.
7780 		 * Note that it'll never be active on a device
7781 		 * that does not support PM therefore we don't
7782 		 * have to check before calling pm_idle_component.
7783 		 */
7784 		(void) untimeout(temp_id);
7785 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7786 
7787 	} else {
7788 		mutex_exit(&un->un_pm_mutex);
7789 		if ((un->un_f_pm_is_enabled == TRUE) &&
7790 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_SPINDLE_OFF) !=
7791 		    DDI_SUCCESS)) {
7792 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7793 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
7794 			/*
7795 			 * Fix for bug: 4297749, item # 13
7796 			 * The above test now includes a check to see if PM is
7797 			 * supported by this device before call
7798 			 * pm_lower_power().
7799 			 * Note, the following is not dead code. The call to
7800 			 * pm_lower_power above will generate a call back into
7801 			 * our sdpower routine which might result in a timeout
7802 			 * handler getting activated. Therefore the following
7803 			 * code is valid and necessary.
7804 			 */
7805 			mutex_enter(&un->un_pm_mutex);
7806 			if (un->un_pm_timeid != NULL) {
7807 				timeout_id_t temp_id = un->un_pm_timeid;
7808 				un->un_pm_timeid = NULL;
7809 				mutex_exit(&un->un_pm_mutex);
7810 				(void) untimeout(temp_id);
7811 				(void) pm_idle_component(SD_DEVINFO(un), 0);
7812 			} else {
7813 				mutex_exit(&un->un_pm_mutex);
7814 			}
7815 		}
7816 	}
7817 
7818 	/*
7819 	 * Cleanup from the scsi_ifsetcap() calls (437868)
7820 	 * Relocated here from above to be after the call to
7821 	 * pm_lower_power, which was getting errors.
7822 	 */
7823 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
7824 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
7825 
7826 	/*
7827 	 * Currently, tagged queuing is supported per target based by HBA.
7828 	 * Setting this per lun instance actually sets the capability of this
7829 	 * target in HBA, which affects those luns already attached on the
7830 	 * same target. So during detach, we can only disable this capability
7831 	 * only when this is the only lun left on this target. By doing
7832 	 * this, we assume a target has the same tagged queuing capability
7833 	 * for every lun. The condition can be removed when HBA is changed to
7834 	 * support per lun based tagged queuing capability.
7835 	 */
7836 	if (sd_scsi_get_target_lun_count(pdip, tgt) <= 1) {
7837 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
7838 	}
7839 
7840 	if (un->un_f_is_fibre == FALSE) {
7841 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
7842 	}
7843 
7844 	/*
7845 	 * Remove any event callbacks, fibre only
7846 	 */
7847 	if (un->un_f_is_fibre == TRUE) {
7848 		if ((un->un_insert_event != NULL) &&
7849 		    (ddi_remove_event_handler(un->un_insert_cb_id) !=
7850 		    DDI_SUCCESS)) {
7851 			/*
7852 			 * Note: We are returning here after having done
7853 			 * substantial cleanup above. This is consistent
7854 			 * with the legacy implementation but this may not
7855 			 * be the right thing to do.
7856 			 */
7857 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7858 			    "sd_dr_detach: Cannot cancel insert event\n");
7859 			goto err_remove_event;
7860 		}
7861 		un->un_insert_event = NULL;
7862 
7863 		if ((un->un_remove_event != NULL) &&
7864 		    (ddi_remove_event_handler(un->un_remove_cb_id) !=
7865 		    DDI_SUCCESS)) {
7866 			/*
7867 			 * Note: We are returning here after having done
7868 			 * substantial cleanup above. This is consistent
7869 			 * with the legacy implementation but this may not
7870 			 * be the right thing to do.
7871 			 */
7872 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7873 			    "sd_dr_detach: Cannot cancel remove event\n");
7874 			goto err_remove_event;
7875 		}
7876 		un->un_remove_event = NULL;
7877 	}
7878 
7879 	/* Do not free the softstate if the callback routine is active */
7880 	sd_sync_with_callback(un);
7881 
7882 	cmlb_detach(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
7883 	cmlb_free_handle(&un->un_cmlbhandle);
7884 
7885 	/*
7886 	 * Hold the detach mutex here, to make sure that no other threads ever
7887 	 * can access a (partially) freed soft state structure.
7888 	 */
7889 	mutex_enter(&sd_detach_mutex);
7890 
7891 	/*
7892 	 * Clean up the soft state struct.
7893 	 * Cleanup is done in reverse order of allocs/inits.
7894 	 * At this point there should be no competing threads anymore.
7895 	 */
7896 
7897 	/* Unregister and free device id. */
7898 	ddi_devid_unregister(devi);
7899 	if (un->un_devid) {
7900 		ddi_devid_free(un->un_devid);
7901 		un->un_devid = NULL;
7902 	}
7903 
7904 	/*
7905 	 * Destroy wmap cache if it exists.
7906 	 */
7907 	if (un->un_wm_cache != NULL) {
7908 		kmem_cache_destroy(un->un_wm_cache);
7909 		un->un_wm_cache = NULL;
7910 	}
7911 
7912 	/*
7913 	 * kstat cleanup is done in detach for all device types (4363169).
7914 	 * We do not want to fail detach if the device kstats are not deleted
7915 	 * since there is a confusion about the devo_refcnt for the device.
7916 	 * We just delete the kstats and let detach complete successfully.
7917 	 */
7918 	if (un->un_stats != NULL) {
7919 		kstat_delete(un->un_stats);
7920 		un->un_stats = NULL;
7921 	}
7922 	if (un->un_errstats != NULL) {
7923 		kstat_delete(un->un_errstats);
7924 		un->un_errstats = NULL;
7925 	}
7926 
7927 	/* Remove partition stats */
7928 	if (un->un_f_pkstats_enabled) {
7929 		for (i = 0; i < NSDMAP; i++) {
7930 			if (un->un_pstats[i] != NULL) {
7931 				kstat_delete(un->un_pstats[i]);
7932 				un->un_pstats[i] = NULL;
7933 			}
7934 		}
7935 	}
7936 
7937 	/* Remove xbuf registration */
7938 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
7939 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
7940 
7941 	/* Remove driver properties */
7942 	ddi_prop_remove_all(devi);
7943 
7944 	mutex_destroy(&un->un_pm_mutex);
7945 	cv_destroy(&un->un_pm_busy_cv);
7946 
7947 	cv_destroy(&un->un_wcc_cv);
7948 
7949 	/* Open/close semaphore */
7950 	sema_destroy(&un->un_semoclose);
7951 
7952 	/* Removable media condvar. */
7953 	cv_destroy(&un->un_state_cv);
7954 
7955 	/* Suspend/resume condvar. */
7956 	cv_destroy(&un->un_suspend_cv);
7957 	cv_destroy(&un->un_disk_busy_cv);
7958 
7959 	sd_free_rqs(un);
7960 
7961 	/* Free up soft state */
7962 	devp->sd_private = NULL;
7963 
7964 	bzero(un, sizeof (struct sd_lun));
7965 	ddi_soft_state_free(sd_state, instance);
7966 
7967 	mutex_exit(&sd_detach_mutex);
7968 
7969 	/* This frees up the INQUIRY data associated with the device. */
7970 	scsi_unprobe(devp);
7971 
7972 	/*
7973 	 * After successfully detaching an instance, we update the information
7974 	 * of how many luns have been attached in the relative target and
7975 	 * controller for parallel SCSI. This information is used when sd tries
7976 	 * to set the tagged queuing capability in HBA.
7977 	 * Since un has been released, we can't use SD_IS_PARALLEL_SCSI(un) to
7978 	 * check if the device is parallel SCSI. However, we don't need to
7979 	 * check here because we've already checked during attach. No device
7980 	 * that is not parallel SCSI is in the chain.
7981 	 */
7982 	if ((tgt >= 0) && (tgt < NTARGETS_WIDE)) {
7983 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_DETACH);
7984 	}
7985 
7986 	return (DDI_SUCCESS);
7987 
7988 err_notclosed:
7989 	mutex_exit(SD_MUTEX(un));
7990 
7991 err_stillbusy:
7992 	_NOTE(NO_COMPETING_THREADS_NOW);
7993 
7994 err_remove_event:
7995 	mutex_enter(&sd_detach_mutex);
7996 	un->un_detach_count--;
7997 	mutex_exit(&sd_detach_mutex);
7998 
7999 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
8000 	return (DDI_FAILURE);
8001 }
8002 
8003 
8004 /*
8005  *    Function: sd_create_errstats
8006  *
8007  * Description: This routine instantiates the device error stats.
8008  *
8009  *		Note: During attach the stats are instantiated first so they are
8010  *		available for attach-time routines that utilize the driver
8011  *		iopath to send commands to the device. The stats are initialized
8012  *		separately so data obtained during some attach-time routines is
8013  *		available. (4362483)
8014  *
8015  *   Arguments: un - driver soft state (unit) structure
8016  *		instance - driver instance
8017  *
8018  *     Context: Kernel thread context
8019  */
8020 
8021 static void
8022 sd_create_errstats(struct sd_lun *un, int instance)
8023 {
8024 	struct	sd_errstats	*stp;
8025 	char	kstatmodule_err[KSTAT_STRLEN];
8026 	char	kstatname[KSTAT_STRLEN];
8027 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
8028 
8029 	ASSERT(un != NULL);
8030 
8031 	if (un->un_errstats != NULL) {
8032 		return;
8033 	}
8034 
8035 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
8036 	    "%serr", sd_label);
8037 	(void) snprintf(kstatname, sizeof (kstatname),
8038 	    "%s%d,err", sd_label, instance);
8039 
8040 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
8041 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
8042 
8043 	if (un->un_errstats == NULL) {
8044 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8045 		    "sd_create_errstats: Failed kstat_create\n");
8046 		return;
8047 	}
8048 
8049 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
8050 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
8051 	    KSTAT_DATA_UINT32);
8052 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
8053 	    KSTAT_DATA_UINT32);
8054 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
8055 	    KSTAT_DATA_UINT32);
8056 	kstat_named_init(&stp->sd_vid,		"Vendor",
8057 	    KSTAT_DATA_CHAR);
8058 	kstat_named_init(&stp->sd_pid,		"Product",
8059 	    KSTAT_DATA_CHAR);
8060 	kstat_named_init(&stp->sd_revision,	"Revision",
8061 	    KSTAT_DATA_CHAR);
8062 	kstat_named_init(&stp->sd_serial,	"Serial No",
8063 	    KSTAT_DATA_CHAR);
8064 	kstat_named_init(&stp->sd_capacity,	"Size",
8065 	    KSTAT_DATA_ULONGLONG);
8066 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
8067 	    KSTAT_DATA_UINT32);
8068 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
8069 	    KSTAT_DATA_UINT32);
8070 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
8071 	    KSTAT_DATA_UINT32);
8072 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
8073 	    KSTAT_DATA_UINT32);
8074 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
8075 	    KSTAT_DATA_UINT32);
8076 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
8077 	    KSTAT_DATA_UINT32);
8078 
8079 	un->un_errstats->ks_private = un;
8080 	un->un_errstats->ks_update  = nulldev;
8081 
8082 	kstat_install(un->un_errstats);
8083 }
8084 
8085 
8086 /*
8087  *    Function: sd_set_errstats
8088  *
8089  * Description: This routine sets the value of the vendor id, product id,
8090  *		revision, serial number, and capacity device error stats.
8091  *
8092  *		Note: During attach the stats are instantiated first so they are
8093  *		available for attach-time routines that utilize the driver
8094  *		iopath to send commands to the device. The stats are initialized
8095  *		separately so data obtained during some attach-time routines is
8096  *		available. (4362483)
8097  *
8098  *   Arguments: un - driver soft state (unit) structure
8099  *
8100  *     Context: Kernel thread context
8101  */
8102 
8103 static void
8104 sd_set_errstats(struct sd_lun *un)
8105 {
8106 	struct	sd_errstats	*stp;
8107 
8108 	ASSERT(un != NULL);
8109 	ASSERT(un->un_errstats != NULL);
8110 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
8111 	ASSERT(stp != NULL);
8112 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
8113 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
8114 	(void) strncpy(stp->sd_revision.value.c,
8115 	    un->un_sd->sd_inq->inq_revision, 4);
8116 
8117 	/*
8118 	 * All the errstats are persistent across detach/attach,
8119 	 * so reset all the errstats here in case of the hot
8120 	 * replacement of disk drives, except for not changed
8121 	 * Sun qualified drives.
8122 	 */
8123 	if ((bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) != 0) ||
8124 	    (bcmp(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
8125 	    sizeof (SD_INQUIRY(un)->inq_serial)) != 0)) {
8126 		stp->sd_softerrs.value.ui32 = 0;
8127 		stp->sd_harderrs.value.ui32 = 0;
8128 		stp->sd_transerrs.value.ui32 = 0;
8129 		stp->sd_rq_media_err.value.ui32 = 0;
8130 		stp->sd_rq_ntrdy_err.value.ui32 = 0;
8131 		stp->sd_rq_nodev_err.value.ui32 = 0;
8132 		stp->sd_rq_recov_err.value.ui32 = 0;
8133 		stp->sd_rq_illrq_err.value.ui32 = 0;
8134 		stp->sd_rq_pfa_err.value.ui32 = 0;
8135 	}
8136 
8137 	/*
8138 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
8139 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
8140 	 * (4376302))
8141 	 */
8142 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
8143 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
8144 		    sizeof (SD_INQUIRY(un)->inq_serial));
8145 	}
8146 
8147 	if (un->un_f_blockcount_is_valid != TRUE) {
8148 		/*
8149 		 * Set capacity error stat to 0 for no media. This ensures
8150 		 * a valid capacity is displayed in response to 'iostat -E'
8151 		 * when no media is present in the device.
8152 		 */
8153 		stp->sd_capacity.value.ui64 = 0;
8154 	} else {
8155 		/*
8156 		 * Multiply un_blockcount by un->un_sys_blocksize to get
8157 		 * capacity.
8158 		 *
8159 		 * Note: for non-512 blocksize devices "un_blockcount" has been
8160 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
8161 		 * (un_tgt_blocksize / un->un_sys_blocksize).
8162 		 */
8163 		stp->sd_capacity.value.ui64 = (uint64_t)
8164 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
8165 	}
8166 }
8167 
8168 
8169 /*
8170  *    Function: sd_set_pstats
8171  *
8172  * Description: This routine instantiates and initializes the partition
8173  *              stats for each partition with more than zero blocks.
8174  *		(4363169)
8175  *
8176  *   Arguments: un - driver soft state (unit) structure
8177  *
8178  *     Context: Kernel thread context
8179  */
8180 
8181 static void
8182 sd_set_pstats(struct sd_lun *un)
8183 {
8184 	char	kstatname[KSTAT_STRLEN];
8185 	int	instance;
8186 	int	i;
8187 	diskaddr_t	nblks = 0;
8188 	char	*partname = NULL;
8189 
8190 	ASSERT(un != NULL);
8191 
8192 	instance = ddi_get_instance(SD_DEVINFO(un));
8193 
8194 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
8195 	for (i = 0; i < NSDMAP; i++) {
8196 
8197 		if (cmlb_partinfo(un->un_cmlbhandle, i,
8198 		    &nblks, NULL, &partname, NULL, (void *)SD_PATH_DIRECT) != 0)
8199 			continue;
8200 		mutex_enter(SD_MUTEX(un));
8201 
8202 		if ((un->un_pstats[i] == NULL) &&
8203 		    (nblks != 0)) {
8204 
8205 			(void) snprintf(kstatname, sizeof (kstatname),
8206 			    "%s%d,%s", sd_label, instance,
8207 			    partname);
8208 
8209 			un->un_pstats[i] = kstat_create(sd_label,
8210 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
8211 			    1, KSTAT_FLAG_PERSISTENT);
8212 			if (un->un_pstats[i] != NULL) {
8213 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
8214 				kstat_install(un->un_pstats[i]);
8215 			}
8216 		}
8217 		mutex_exit(SD_MUTEX(un));
8218 	}
8219 }
8220 
8221 
8222 #if (defined(__fibre))
8223 /*
8224  *    Function: sd_init_event_callbacks
8225  *
8226  * Description: This routine initializes the insertion and removal event
8227  *		callbacks. (fibre only)
8228  *
8229  *   Arguments: un - driver soft state (unit) structure
8230  *
8231  *     Context: Kernel thread context
8232  */
8233 
8234 static void
8235 sd_init_event_callbacks(struct sd_lun *un)
8236 {
8237 	ASSERT(un != NULL);
8238 
8239 	if ((un->un_insert_event == NULL) &&
8240 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
8241 	    &un->un_insert_event) == DDI_SUCCESS)) {
8242 		/*
8243 		 * Add the callback for an insertion event
8244 		 */
8245 		(void) ddi_add_event_handler(SD_DEVINFO(un),
8246 		    un->un_insert_event, sd_event_callback, (void *)un,
8247 		    &(un->un_insert_cb_id));
8248 	}
8249 
8250 	if ((un->un_remove_event == NULL) &&
8251 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
8252 	    &un->un_remove_event) == DDI_SUCCESS)) {
8253 		/*
8254 		 * Add the callback for a removal event
8255 		 */
8256 		(void) ddi_add_event_handler(SD_DEVINFO(un),
8257 		    un->un_remove_event, sd_event_callback, (void *)un,
8258 		    &(un->un_remove_cb_id));
8259 	}
8260 }
8261 
8262 
8263 /*
8264  *    Function: sd_event_callback
8265  *
8266  * Description: This routine handles insert/remove events (photon). The
8267  *		state is changed to OFFLINE which can be used to supress
8268  *		error msgs. (fibre only)
8269  *
8270  *   Arguments: un - driver soft state (unit) structure
8271  *
8272  *     Context: Callout thread context
8273  */
8274 /* ARGSUSED */
8275 static void
8276 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
8277     void *bus_impldata)
8278 {
8279 	struct sd_lun *un = (struct sd_lun *)arg;
8280 
8281 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
8282 	if (event == un->un_insert_event) {
8283 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
8284 		mutex_enter(SD_MUTEX(un));
8285 		if (un->un_state == SD_STATE_OFFLINE) {
8286 			if (un->un_last_state != SD_STATE_SUSPENDED) {
8287 				un->un_state = un->un_last_state;
8288 			} else {
8289 				/*
8290 				 * We have gone through SUSPEND/RESUME while
8291 				 * we were offline. Restore the last state
8292 				 */
8293 				un->un_state = un->un_save_state;
8294 			}
8295 		}
8296 		mutex_exit(SD_MUTEX(un));
8297 
8298 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
8299 	} else if (event == un->un_remove_event) {
8300 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
8301 		mutex_enter(SD_MUTEX(un));
8302 		/*
8303 		 * We need to handle an event callback that occurs during
8304 		 * the suspend operation, since we don't prevent it.
8305 		 */
8306 		if (un->un_state != SD_STATE_OFFLINE) {
8307 			if (un->un_state != SD_STATE_SUSPENDED) {
8308 				New_state(un, SD_STATE_OFFLINE);
8309 			} else {
8310 				un->un_last_state = SD_STATE_OFFLINE;
8311 			}
8312 		}
8313 		mutex_exit(SD_MUTEX(un));
8314 	} else {
8315 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
8316 		    "!Unknown event\n");
8317 	}
8318 
8319 }
8320 #endif
8321 
8322 /*
8323  *    Function: sd_cache_control()
8324  *
8325  * Description: This routine is the driver entry point for setting
8326  *		read and write caching by modifying the WCE (write cache
8327  *		enable) and RCD (read cache disable) bits of mode
8328  *		page 8 (MODEPAGE_CACHING).
8329  *
8330  *   Arguments: un - driver soft state (unit) structure
8331  *		rcd_flag - flag for controlling the read cache
8332  *		wce_flag - flag for controlling the write cache
8333  *
8334  * Return Code: EIO
8335  *		code returned by sd_send_scsi_MODE_SENSE and
8336  *		sd_send_scsi_MODE_SELECT
8337  *
8338  *     Context: Kernel Thread
8339  */
8340 
8341 static int
8342 sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag)
8343 {
8344 	struct mode_caching	*mode_caching_page;
8345 	uchar_t			*header;
8346 	size_t			buflen;
8347 	int			hdrlen;
8348 	int			bd_len;
8349 	int			rval = 0;
8350 	struct mode_header_grp2	*mhp;
8351 
8352 	ASSERT(un != NULL);
8353 
8354 	/*
8355 	 * Do a test unit ready, otherwise a mode sense may not work if this
8356 	 * is the first command sent to the device after boot.
8357 	 */
8358 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
8359 
8360 	if (un->un_f_cfg_is_atapi == TRUE) {
8361 		hdrlen = MODE_HEADER_LENGTH_GRP2;
8362 	} else {
8363 		hdrlen = MODE_HEADER_LENGTH;
8364 	}
8365 
8366 	/*
8367 	 * Allocate memory for the retrieved mode page and its headers.  Set
8368 	 * a pointer to the page itself.  Use mode_cache_scsi3 to insure
8369 	 * we get all of the mode sense data otherwise, the mode select
8370 	 * will fail.  mode_cache_scsi3 is a superset of mode_caching.
8371 	 */
8372 	buflen = hdrlen + MODE_BLK_DESC_LENGTH +
8373 	    sizeof (struct mode_cache_scsi3);
8374 
8375 	header = kmem_zalloc(buflen, KM_SLEEP);
8376 
8377 	/* Get the information from the device. */
8378 	if (un->un_f_cfg_is_atapi == TRUE) {
8379 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
8380 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8381 	} else {
8382 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
8383 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8384 	}
8385 	if (rval != 0) {
8386 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
8387 		    "sd_cache_control: Mode Sense Failed\n");
8388 		kmem_free(header, buflen);
8389 		return (rval);
8390 	}
8391 
8392 	/*
8393 	 * Determine size of Block Descriptors in order to locate
8394 	 * the mode page data. ATAPI devices return 0, SCSI devices
8395 	 * should return MODE_BLK_DESC_LENGTH.
8396 	 */
8397 	if (un->un_f_cfg_is_atapi == TRUE) {
8398 		mhp	= (struct mode_header_grp2 *)header;
8399 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
8400 	} else {
8401 		bd_len  = ((struct mode_header *)header)->bdesc_length;
8402 	}
8403 
8404 	if (bd_len > MODE_BLK_DESC_LENGTH) {
8405 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
8406 		    "sd_cache_control: Mode Sense returned invalid "
8407 		    "block descriptor length\n");
8408 		kmem_free(header, buflen);
8409 		return (EIO);
8410 	}
8411 
8412 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
8413 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
8414 		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
8415 		    " caching page code mismatch %d\n",
8416 		    mode_caching_page->mode_page.code);
8417 		kmem_free(header, buflen);
8418 		return (EIO);
8419 	}
8420 
8421 	/* Check the relevant bits on successful mode sense. */
8422 	if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
8423 	    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
8424 	    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
8425 	    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
8426 
8427 		size_t sbuflen;
8428 		uchar_t save_pg;
8429 
8430 		/*
8431 		 * Construct select buffer length based on the
8432 		 * length of the sense data returned.
8433 		 */
8434 		sbuflen =  hdrlen + MODE_BLK_DESC_LENGTH +
8435 		    sizeof (struct mode_page) +
8436 		    (int)mode_caching_page->mode_page.length;
8437 
8438 		/*
8439 		 * Set the caching bits as requested.
8440 		 */
8441 		if (rcd_flag == SD_CACHE_ENABLE)
8442 			mode_caching_page->rcd = 0;
8443 		else if (rcd_flag == SD_CACHE_DISABLE)
8444 			mode_caching_page->rcd = 1;
8445 
8446 		if (wce_flag == SD_CACHE_ENABLE)
8447 			mode_caching_page->wce = 1;
8448 		else if (wce_flag == SD_CACHE_DISABLE)
8449 			mode_caching_page->wce = 0;
8450 
8451 		/*
8452 		 * Save the page if the mode sense says the
8453 		 * drive supports it.
8454 		 */
8455 		save_pg = mode_caching_page->mode_page.ps ?
8456 		    SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
8457 
8458 		/* Clear reserved bits before mode select. */
8459 		mode_caching_page->mode_page.ps = 0;
8460 
8461 		/*
8462 		 * Clear out mode header for mode select.
8463 		 * The rest of the retrieved page will be reused.
8464 		 */
8465 		bzero(header, hdrlen);
8466 
8467 		if (un->un_f_cfg_is_atapi == TRUE) {
8468 			mhp = (struct mode_header_grp2 *)header;
8469 			mhp->bdesc_length_hi = bd_len >> 8;
8470 			mhp->bdesc_length_lo = (uchar_t)bd_len & 0xff;
8471 		} else {
8472 			((struct mode_header *)header)->bdesc_length = bd_len;
8473 		}
8474 
8475 		/* Issue mode select to change the cache settings */
8476 		if (un->un_f_cfg_is_atapi == TRUE) {
8477 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, header,
8478 			    sbuflen, save_pg, SD_PATH_DIRECT);
8479 		} else {
8480 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
8481 			    sbuflen, save_pg, SD_PATH_DIRECT);
8482 		}
8483 	}
8484 
8485 	kmem_free(header, buflen);
8486 	return (rval);
8487 }
8488 
8489 
8490 /*
8491  *    Function: sd_get_write_cache_enabled()
8492  *
8493  * Description: This routine is the driver entry point for determining if
8494  *		write caching is enabled.  It examines the WCE (write cache
8495  *		enable) bits of mode page 8 (MODEPAGE_CACHING).
8496  *
8497  *   Arguments: un - driver soft state (unit) structure
8498  *		is_enabled - pointer to int where write cache enabled state
8499  *		is returned (non-zero -> write cache enabled)
8500  *
8501  *
8502  * Return Code: EIO
8503  *		code returned by sd_send_scsi_MODE_SENSE
8504  *
8505  *     Context: Kernel Thread
8506  *
8507  * NOTE: If ioctl is added to disable write cache, this sequence should
8508  * be followed so that no locking is required for accesses to
8509  * un->un_f_write_cache_enabled:
8510  * 	do mode select to clear wce
8511  * 	do synchronize cache to flush cache
8512  * 	set un->un_f_write_cache_enabled = FALSE
8513  *
8514  * Conversely, an ioctl to enable the write cache should be done
8515  * in this order:
8516  * 	set un->un_f_write_cache_enabled = TRUE
8517  * 	do mode select to set wce
8518  */
8519 
8520 static int
8521 sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled)
8522 {
8523 	struct mode_caching	*mode_caching_page;
8524 	uchar_t			*header;
8525 	size_t			buflen;
8526 	int			hdrlen;
8527 	int			bd_len;
8528 	int			rval = 0;
8529 
8530 	ASSERT(un != NULL);
8531 	ASSERT(is_enabled != NULL);
8532 
8533 	/* in case of error, flag as enabled */
8534 	*is_enabled = TRUE;
8535 
8536 	/*
8537 	 * Do a test unit ready, otherwise a mode sense may not work if this
8538 	 * is the first command sent to the device after boot.
8539 	 */
8540 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
8541 
8542 	if (un->un_f_cfg_is_atapi == TRUE) {
8543 		hdrlen = MODE_HEADER_LENGTH_GRP2;
8544 	} else {
8545 		hdrlen = MODE_HEADER_LENGTH;
8546 	}
8547 
8548 	/*
8549 	 * Allocate memory for the retrieved mode page and its headers.  Set
8550 	 * a pointer to the page itself.
8551 	 */
8552 	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
8553 	header = kmem_zalloc(buflen, KM_SLEEP);
8554 
8555 	/* Get the information from the device. */
8556 	if (un->un_f_cfg_is_atapi == TRUE) {
8557 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
8558 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8559 	} else {
8560 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
8561 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8562 	}
8563 	if (rval != 0) {
8564 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
8565 		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
8566 		kmem_free(header, buflen);
8567 		return (rval);
8568 	}
8569 
8570 	/*
8571 	 * Determine size of Block Descriptors in order to locate
8572 	 * the mode page data. ATAPI devices return 0, SCSI devices
8573 	 * should return MODE_BLK_DESC_LENGTH.
8574 	 */
8575 	if (un->un_f_cfg_is_atapi == TRUE) {
8576 		struct mode_header_grp2	*mhp;
8577 		mhp	= (struct mode_header_grp2 *)header;
8578 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
8579 	} else {
8580 		bd_len  = ((struct mode_header *)header)->bdesc_length;
8581 	}
8582 
8583 	if (bd_len > MODE_BLK_DESC_LENGTH) {
8584 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
8585 		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
8586 		    "block descriptor length\n");
8587 		kmem_free(header, buflen);
8588 		return (EIO);
8589 	}
8590 
8591 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
8592 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
8593 		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
8594 		    " caching page code mismatch %d\n",
8595 		    mode_caching_page->mode_page.code);
8596 		kmem_free(header, buflen);
8597 		return (EIO);
8598 	}
8599 	*is_enabled = mode_caching_page->wce;
8600 
8601 	kmem_free(header, buflen);
8602 	return (0);
8603 }
8604 
8605 /*
8606  *    Function: sd_get_nv_sup()
8607  *
8608  * Description: This routine is the driver entry point for
8609  * determining whether non-volatile cache is supported. This
8610  * determination process works as follows:
8611  *
8612  * 1. sd first queries sd.conf on whether
8613  * suppress_cache_flush bit is set for this device.
8614  *
8615  * 2. if not there, then queries the internal disk table.
8616  *
8617  * 3. if either sd.conf or internal disk table specifies
8618  * cache flush be suppressed, we don't bother checking
8619  * NV_SUP bit.
8620  *
8621  * If SUPPRESS_CACHE_FLUSH bit is not set to 1, sd queries
8622  * the optional INQUIRY VPD page 0x86. If the device
8623  * supports VPD page 0x86, sd examines the NV_SUP
8624  * (non-volatile cache support) bit in the INQUIRY VPD page
8625  * 0x86:
8626  *   o If NV_SUP bit is set, sd assumes the device has a
8627  *   non-volatile cache and set the
8628  *   un_f_sync_nv_supported to TRUE.
8629  *   o Otherwise cache is not non-volatile,
8630  *   un_f_sync_nv_supported is set to FALSE.
8631  *
8632  * Arguments: un - driver soft state (unit) structure
8633  *
8634  * Return Code:
8635  *
8636  *     Context: Kernel Thread
8637  */
8638 
8639 static void
8640 sd_get_nv_sup(struct sd_lun *un)
8641 {
8642 	int		rval		= 0;
8643 	uchar_t		*inq86		= NULL;
8644 	size_t		inq86_len	= MAX_INQUIRY_SIZE;
8645 	size_t		inq86_resid	= 0;
8646 	struct		dk_callback *dkc;
8647 
8648 	ASSERT(un != NULL);
8649 
8650 	mutex_enter(SD_MUTEX(un));
8651 
8652 	/*
8653 	 * Be conservative on the device's support of
8654 	 * SYNC_NV bit: un_f_sync_nv_supported is
8655 	 * initialized to be false.
8656 	 */
8657 	un->un_f_sync_nv_supported = FALSE;
8658 
8659 	/*
8660 	 * If either sd.conf or internal disk table
8661 	 * specifies cache flush be suppressed, then
8662 	 * we don't bother checking NV_SUP bit.
8663 	 */
8664 	if (un->un_f_suppress_cache_flush == TRUE) {
8665 		mutex_exit(SD_MUTEX(un));
8666 		return;
8667 	}
8668 
8669 	if (sd_check_vpd_page_support(un) == 0 &&
8670 	    un->un_vpd_page_mask & SD_VPD_EXTENDED_DATA_PG) {
8671 		mutex_exit(SD_MUTEX(un));
8672 		/* collect page 86 data if available */
8673 		inq86 = kmem_zalloc(inq86_len, KM_SLEEP);
8674 		rval = sd_send_scsi_INQUIRY(un, inq86, inq86_len,
8675 		    0x01, 0x86, &inq86_resid);
8676 
8677 		if (rval == 0 && (inq86_len - inq86_resid > 6)) {
8678 			SD_TRACE(SD_LOG_COMMON, un,
8679 			    "sd_get_nv_sup: \
8680 			    successfully get VPD page: %x \
8681 			    PAGE LENGTH: %x BYTE 6: %x\n",
8682 			    inq86[1], inq86[3], inq86[6]);
8683 
8684 			mutex_enter(SD_MUTEX(un));
8685 			/*
8686 			 * check the value of NV_SUP bit: only if the device
8687 			 * reports NV_SUP bit to be 1, the
8688 			 * un_f_sync_nv_supported bit will be set to true.
8689 			 */
8690 			if (inq86[6] & SD_VPD_NV_SUP) {
8691 				un->un_f_sync_nv_supported = TRUE;
8692 			}
8693 			mutex_exit(SD_MUTEX(un));
8694 		}
8695 		kmem_free(inq86, inq86_len);
8696 	} else {
8697 		mutex_exit(SD_MUTEX(un));
8698 	}
8699 
8700 	/*
8701 	 * Send a SYNC CACHE command to check whether
8702 	 * SYNC_NV bit is supported. This command should have
8703 	 * un_f_sync_nv_supported set to correct value.
8704 	 */
8705 	mutex_enter(SD_MUTEX(un));
8706 	if (un->un_f_sync_nv_supported) {
8707 		mutex_exit(SD_MUTEX(un));
8708 		dkc = kmem_zalloc(sizeof (struct dk_callback), KM_SLEEP);
8709 		dkc->dkc_flag = FLUSH_VOLATILE;
8710 		(void) sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
8711 
8712 		/*
8713 		 * Send a TEST UNIT READY command to the device. This should
8714 		 * clear any outstanding UNIT ATTENTION that may be present.
8715 		 */
8716 		(void) sd_send_scsi_TEST_UNIT_READY(un, SD_DONT_RETRY_TUR);
8717 
8718 		kmem_free(dkc, sizeof (struct dk_callback));
8719 	} else {
8720 		mutex_exit(SD_MUTEX(un));
8721 	}
8722 
8723 	SD_TRACE(SD_LOG_COMMON, un, "sd_get_nv_sup: \
8724 	    un_f_suppress_cache_flush is set to %d\n",
8725 	    un->un_f_suppress_cache_flush);
8726 }
8727 
8728 /*
8729  *    Function: sd_make_device
8730  *
8731  * Description: Utility routine to return the Solaris device number from
8732  *		the data in the device's dev_info structure.
8733  *
8734  * Return Code: The Solaris device number
8735  *
8736  *     Context: Any
8737  */
8738 
8739 static dev_t
8740 sd_make_device(dev_info_t *devi)
8741 {
8742 	return (makedevice(ddi_name_to_major(ddi_get_name(devi)),
8743 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
8744 }
8745 
8746 
8747 /*
8748  *    Function: sd_pm_entry
8749  *
8750  * Description: Called at the start of a new command to manage power
8751  *		and busy status of a device. This includes determining whether
8752  *		the current power state of the device is sufficient for
8753  *		performing the command or whether it must be changed.
8754  *		The PM framework is notified appropriately.
8755  *		Only with a return status of DDI_SUCCESS will the
8756  *		component be busy to the framework.
8757  *
8758  *		All callers of sd_pm_entry must check the return status
8759  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
8760  *		of DDI_FAILURE indicates the device failed to power up.
8761  *		In this case un_pm_count has been adjusted so the result
8762  *		on exit is still powered down, ie. count is less than 0.
8763  *		Calling sd_pm_exit with this count value hits an ASSERT.
8764  *
8765  * Return Code: DDI_SUCCESS or DDI_FAILURE
8766  *
8767  *     Context: Kernel thread context.
8768  */
8769 
8770 static int
8771 sd_pm_entry(struct sd_lun *un)
8772 {
8773 	int return_status = DDI_SUCCESS;
8774 
8775 	ASSERT(!mutex_owned(SD_MUTEX(un)));
8776 	ASSERT(!mutex_owned(&un->un_pm_mutex));
8777 
8778 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
8779 
8780 	if (un->un_f_pm_is_enabled == FALSE) {
8781 		SD_TRACE(SD_LOG_IO_PM, un,
8782 		    "sd_pm_entry: exiting, PM not enabled\n");
8783 		return (return_status);
8784 	}
8785 
8786 	/*
8787 	 * Just increment a counter if PM is enabled. On the transition from
8788 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
8789 	 * the count with each IO and mark the device as idle when the count
8790 	 * hits 0.
8791 	 *
8792 	 * If the count is less than 0 the device is powered down. If a powered
8793 	 * down device is successfully powered up then the count must be
8794 	 * incremented to reflect the power up. Note that it'll get incremented
8795 	 * a second time to become busy.
8796 	 *
8797 	 * Because the following has the potential to change the device state
8798 	 * and must release the un_pm_mutex to do so, only one thread can be
8799 	 * allowed through at a time.
8800 	 */
8801 
8802 	mutex_enter(&un->un_pm_mutex);
8803 	while (un->un_pm_busy == TRUE) {
8804 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
8805 	}
8806 	un->un_pm_busy = TRUE;
8807 
8808 	if (un->un_pm_count < 1) {
8809 
8810 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
8811 
8812 		/*
8813 		 * Indicate we are now busy so the framework won't attempt to
8814 		 * power down the device. This call will only fail if either
8815 		 * we passed a bad component number or the device has no
8816 		 * components. Neither of these should ever happen.
8817 		 */
8818 		mutex_exit(&un->un_pm_mutex);
8819 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
8820 		ASSERT(return_status == DDI_SUCCESS);
8821 
8822 		mutex_enter(&un->un_pm_mutex);
8823 
8824 		if (un->un_pm_count < 0) {
8825 			mutex_exit(&un->un_pm_mutex);
8826 
8827 			SD_TRACE(SD_LOG_IO_PM, un,
8828 			    "sd_pm_entry: power up component\n");
8829 
8830 			/*
8831 			 * pm_raise_power will cause sdpower to be called
8832 			 * which brings the device power level to the
8833 			 * desired state, ON in this case. If successful,
8834 			 * un_pm_count and un_power_level will be updated
8835 			 * appropriately.
8836 			 */
8837 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
8838 			    SD_SPINDLE_ON);
8839 
8840 			mutex_enter(&un->un_pm_mutex);
8841 
8842 			if (return_status != DDI_SUCCESS) {
8843 				/*
8844 				 * Power up failed.
8845 				 * Idle the device and adjust the count
8846 				 * so the result on exit is that we're
8847 				 * still powered down, ie. count is less than 0.
8848 				 */
8849 				SD_TRACE(SD_LOG_IO_PM, un,
8850 				    "sd_pm_entry: power up failed,"
8851 				    " idle the component\n");
8852 
8853 				(void) pm_idle_component(SD_DEVINFO(un), 0);
8854 				un->un_pm_count--;
8855 			} else {
8856 				/*
8857 				 * Device is powered up, verify the
8858 				 * count is non-negative.
8859 				 * This is debug only.
8860 				 */
8861 				ASSERT(un->un_pm_count == 0);
8862 			}
8863 		}
8864 
8865 		if (return_status == DDI_SUCCESS) {
8866 			/*
8867 			 * For performance, now that the device has been tagged
8868 			 * as busy, and it's known to be powered up, update the
8869 			 * chain types to use jump tables that do not include
8870 			 * pm. This significantly lowers the overhead and
8871 			 * therefore improves performance.
8872 			 */
8873 
8874 			mutex_exit(&un->un_pm_mutex);
8875 			mutex_enter(SD_MUTEX(un));
8876 			SD_TRACE(SD_LOG_IO_PM, un,
8877 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
8878 			    un->un_uscsi_chain_type);
8879 
8880 			if (un->un_f_non_devbsize_supported) {
8881 				un->un_buf_chain_type =
8882 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
8883 			} else {
8884 				un->un_buf_chain_type =
8885 				    SD_CHAIN_INFO_DISK_NO_PM;
8886 			}
8887 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8888 
8889 			SD_TRACE(SD_LOG_IO_PM, un,
8890 			    "             changed  uscsi_chain_type to   %d\n",
8891 			    un->un_uscsi_chain_type);
8892 			mutex_exit(SD_MUTEX(un));
8893 			mutex_enter(&un->un_pm_mutex);
8894 
8895 			if (un->un_pm_idle_timeid == NULL) {
8896 				/* 300 ms. */
8897 				un->un_pm_idle_timeid =
8898 				    timeout(sd_pm_idletimeout_handler, un,
8899 				    (drv_usectohz((clock_t)300000)));
8900 				/*
8901 				 * Include an extra call to busy which keeps the
8902 				 * device busy with-respect-to the PM layer
8903 				 * until the timer fires, at which time it'll
8904 				 * get the extra idle call.
8905 				 */
8906 				(void) pm_busy_component(SD_DEVINFO(un), 0);
8907 			}
8908 		}
8909 	}
8910 	un->un_pm_busy = FALSE;
8911 	/* Next... */
8912 	cv_signal(&un->un_pm_busy_cv);
8913 
8914 	un->un_pm_count++;
8915 
8916 	SD_TRACE(SD_LOG_IO_PM, un,
8917 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
8918 
8919 	mutex_exit(&un->un_pm_mutex);
8920 
8921 	return (return_status);
8922 }
8923 
8924 
8925 /*
8926  *    Function: sd_pm_exit
8927  *
8928  * Description: Called at the completion of a command to manage busy
8929  *		status for the device. If the device becomes idle the
8930  *		PM framework is notified.
8931  *
8932  *     Context: Kernel thread context
8933  */
8934 
8935 static void
8936 sd_pm_exit(struct sd_lun *un)
8937 {
8938 	ASSERT(!mutex_owned(SD_MUTEX(un)));
8939 	ASSERT(!mutex_owned(&un->un_pm_mutex));
8940 
8941 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
8942 
8943 	/*
8944 	 * After attach the following flag is only read, so don't
8945 	 * take the penalty of acquiring a mutex for it.
8946 	 */
8947 	if (un->un_f_pm_is_enabled == TRUE) {
8948 
8949 		mutex_enter(&un->un_pm_mutex);
8950 		un->un_pm_count--;
8951 
8952 		SD_TRACE(SD_LOG_IO_PM, un,
8953 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
8954 
8955 		ASSERT(un->un_pm_count >= 0);
8956 		if (un->un_pm_count == 0) {
8957 			mutex_exit(&un->un_pm_mutex);
8958 
8959 			SD_TRACE(SD_LOG_IO_PM, un,
8960 			    "sd_pm_exit: idle component\n");
8961 
8962 			(void) pm_idle_component(SD_DEVINFO(un), 0);
8963 
8964 		} else {
8965 			mutex_exit(&un->un_pm_mutex);
8966 		}
8967 	}
8968 
8969 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
8970 }
8971 
8972 
8973 /*
8974  *    Function: sdopen
8975  *
8976  * Description: Driver's open(9e) entry point function.
8977  *
8978  *   Arguments: dev_i   - pointer to device number
8979  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
8980  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
8981  *		cred_p  - user credential pointer
8982  *
8983  * Return Code: EINVAL
8984  *		ENXIO
8985  *		EIO
8986  *		EROFS
8987  *		EBUSY
8988  *
8989  *     Context: Kernel thread context
8990  */
8991 /* ARGSUSED */
8992 static int
8993 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
8994 {
8995 	struct sd_lun	*un;
8996 	int		nodelay;
8997 	int		part;
8998 	uint64_t	partmask;
8999 	int		instance;
9000 	dev_t		dev;
9001 	int		rval = EIO;
9002 	diskaddr_t	nblks = 0;
9003 
9004 	/* Validate the open type */
9005 	if (otyp >= OTYPCNT) {
9006 		return (EINVAL);
9007 	}
9008 
9009 	dev = *dev_p;
9010 	instance = SDUNIT(dev);
9011 	mutex_enter(&sd_detach_mutex);
9012 
9013 	/*
9014 	 * Fail the open if there is no softstate for the instance, or
9015 	 * if another thread somewhere is trying to detach the instance.
9016 	 */
9017 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
9018 	    (un->un_detach_count != 0)) {
9019 		mutex_exit(&sd_detach_mutex);
9020 		/*
9021 		 * The probe cache only needs to be cleared when open (9e) fails
9022 		 * with ENXIO (4238046).
9023 		 */
9024 		/*
9025 		 * un-conditionally clearing probe cache is ok with
9026 		 * separate sd/ssd binaries
9027 		 * x86 platform can be an issue with both parallel
9028 		 * and fibre in 1 binary
9029 		 */
9030 		sd_scsi_clear_probe_cache();
9031 		return (ENXIO);
9032 	}
9033 
9034 	/*
9035 	 * The un_layer_count is to prevent another thread in specfs from
9036 	 * trying to detach the instance, which can happen when we are
9037 	 * called from a higher-layer driver instead of thru specfs.
9038 	 * This will not be needed when DDI provides a layered driver
9039 	 * interface that allows specfs to know that an instance is in
9040 	 * use by a layered driver & should not be detached.
9041 	 *
9042 	 * Note: the semantics for layered driver opens are exactly one
9043 	 * close for every open.
9044 	 */
9045 	if (otyp == OTYP_LYR) {
9046 		un->un_layer_count++;
9047 	}
9048 
9049 	/*
9050 	 * Keep a count of the current # of opens in progress. This is because
9051 	 * some layered drivers try to call us as a regular open. This can
9052 	 * cause problems that we cannot prevent, however by keeping this count
9053 	 * we can at least keep our open and detach routines from racing against
9054 	 * each other under such conditions.
9055 	 */
9056 	un->un_opens_in_progress++;
9057 	mutex_exit(&sd_detach_mutex);
9058 
9059 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
9060 	part	 = SDPART(dev);
9061 	partmask = 1 << part;
9062 
9063 	/*
9064 	 * We use a semaphore here in order to serialize
9065 	 * open and close requests on the device.
9066 	 */
9067 	sema_p(&un->un_semoclose);
9068 
9069 	mutex_enter(SD_MUTEX(un));
9070 
9071 	/*
9072 	 * All device accesses go thru sdstrategy() where we check
9073 	 * on suspend status but there could be a scsi_poll command,
9074 	 * which bypasses sdstrategy(), so we need to check pm
9075 	 * status.
9076 	 */
9077 
9078 	if (!nodelay) {
9079 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9080 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9081 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9082 		}
9083 
9084 		mutex_exit(SD_MUTEX(un));
9085 		if (sd_pm_entry(un) != DDI_SUCCESS) {
9086 			rval = EIO;
9087 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
9088 			    "sdopen: sd_pm_entry failed\n");
9089 			goto open_failed_with_pm;
9090 		}
9091 		mutex_enter(SD_MUTEX(un));
9092 	}
9093 
9094 	/* check for previous exclusive open */
9095 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
9096 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
9097 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
9098 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
9099 
9100 	if (un->un_exclopen & (partmask)) {
9101 		goto excl_open_fail;
9102 	}
9103 
9104 	if (flag & FEXCL) {
9105 		int i;
9106 		if (un->un_ocmap.lyropen[part]) {
9107 			goto excl_open_fail;
9108 		}
9109 		for (i = 0; i < (OTYPCNT - 1); i++) {
9110 			if (un->un_ocmap.regopen[i] & (partmask)) {
9111 				goto excl_open_fail;
9112 			}
9113 		}
9114 	}
9115 
9116 	/*
9117 	 * Check the write permission if this is a removable media device,
9118 	 * NDELAY has not been set, and writable permission is requested.
9119 	 *
9120 	 * Note: If NDELAY was set and this is write-protected media the WRITE
9121 	 * attempt will fail with EIO as part of the I/O processing. This is a
9122 	 * more permissive implementation that allows the open to succeed and
9123 	 * WRITE attempts to fail when appropriate.
9124 	 */
9125 	if (un->un_f_chk_wp_open) {
9126 		if ((flag & FWRITE) && (!nodelay)) {
9127 			mutex_exit(SD_MUTEX(un));
9128 			/*
9129 			 * Defer the check for write permission on writable
9130 			 * DVD drive till sdstrategy and will not fail open even
9131 			 * if FWRITE is set as the device can be writable
9132 			 * depending upon the media and the media can change
9133 			 * after the call to open().
9134 			 */
9135 			if (un->un_f_dvdram_writable_device == FALSE) {
9136 				if (ISCD(un) || sr_check_wp(dev)) {
9137 				rval = EROFS;
9138 				mutex_enter(SD_MUTEX(un));
9139 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
9140 				    "write to cd or write protected media\n");
9141 				goto open_fail;
9142 				}
9143 			}
9144 			mutex_enter(SD_MUTEX(un));
9145 		}
9146 	}
9147 
9148 	/*
9149 	 * If opening in NDELAY/NONBLOCK mode, just return.
9150 	 * Check if disk is ready and has a valid geometry later.
9151 	 */
9152 	if (!nodelay) {
9153 		mutex_exit(SD_MUTEX(un));
9154 		rval = sd_ready_and_valid(un);
9155 		mutex_enter(SD_MUTEX(un));
9156 		/*
9157 		 * Fail if device is not ready or if the number of disk
9158 		 * blocks is zero or negative for non CD devices.
9159 		 */
9160 
9161 		nblks = 0;
9162 
9163 		if (rval == SD_READY_VALID && (!ISCD(un))) {
9164 			/* if cmlb_partinfo fails, nblks remains 0 */
9165 			mutex_exit(SD_MUTEX(un));
9166 			(void) cmlb_partinfo(un->un_cmlbhandle, part, &nblks,
9167 			    NULL, NULL, NULL, (void *)SD_PATH_DIRECT);
9168 			mutex_enter(SD_MUTEX(un));
9169 		}
9170 
9171 		if ((rval != SD_READY_VALID) ||
9172 		    (!ISCD(un) && nblks <= 0)) {
9173 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
9174 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
9175 			    "device not ready or invalid disk block value\n");
9176 			goto open_fail;
9177 		}
9178 #if defined(__i386) || defined(__amd64)
9179 	} else {
9180 		uchar_t *cp;
9181 		/*
9182 		 * x86 requires special nodelay handling, so that p0 is
9183 		 * always defined and accessible.
9184 		 * Invalidate geometry only if device is not already open.
9185 		 */
9186 		cp = &un->un_ocmap.chkd[0];
9187 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
9188 			if (*cp != (uchar_t)0) {
9189 				break;
9190 			}
9191 			cp++;
9192 		}
9193 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
9194 			mutex_exit(SD_MUTEX(un));
9195 			cmlb_invalidate(un->un_cmlbhandle,
9196 			    (void *)SD_PATH_DIRECT);
9197 			mutex_enter(SD_MUTEX(un));
9198 		}
9199 
9200 #endif
9201 	}
9202 
9203 	if (otyp == OTYP_LYR) {
9204 		un->un_ocmap.lyropen[part]++;
9205 	} else {
9206 		un->un_ocmap.regopen[otyp] |= partmask;
9207 	}
9208 
9209 	/* Set up open and exclusive open flags */
9210 	if (flag & FEXCL) {
9211 		un->un_exclopen |= (partmask);
9212 	}
9213 
9214 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
9215 	    "open of part %d type %d\n", part, otyp);
9216 
9217 	mutex_exit(SD_MUTEX(un));
9218 	if (!nodelay) {
9219 		sd_pm_exit(un);
9220 	}
9221 
9222 	sema_v(&un->un_semoclose);
9223 
9224 	mutex_enter(&sd_detach_mutex);
9225 	un->un_opens_in_progress--;
9226 	mutex_exit(&sd_detach_mutex);
9227 
9228 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
9229 	return (DDI_SUCCESS);
9230 
9231 excl_open_fail:
9232 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
9233 	rval = EBUSY;
9234 
9235 open_fail:
9236 	mutex_exit(SD_MUTEX(un));
9237 
9238 	/*
9239 	 * On a failed open we must exit the pm management.
9240 	 */
9241 	if (!nodelay) {
9242 		sd_pm_exit(un);
9243 	}
9244 open_failed_with_pm:
9245 	sema_v(&un->un_semoclose);
9246 
9247 	mutex_enter(&sd_detach_mutex);
9248 	un->un_opens_in_progress--;
9249 	if (otyp == OTYP_LYR) {
9250 		un->un_layer_count--;
9251 	}
9252 	mutex_exit(&sd_detach_mutex);
9253 
9254 	return (rval);
9255 }
9256 
9257 
9258 /*
9259  *    Function: sdclose
9260  *
9261  * Description: Driver's close(9e) entry point function.
9262  *
9263  *   Arguments: dev    - device number
9264  *		flag   - file status flag, informational only
9265  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
9266  *		cred_p - user credential pointer
9267  *
9268  * Return Code: ENXIO
9269  *
9270  *     Context: Kernel thread context
9271  */
9272 /* ARGSUSED */
9273 static int
9274 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
9275 {
9276 	struct sd_lun	*un;
9277 	uchar_t		*cp;
9278 	int		part;
9279 	int		nodelay;
9280 	int		rval = 0;
9281 
9282 	/* Validate the open type */
9283 	if (otyp >= OTYPCNT) {
9284 		return (ENXIO);
9285 	}
9286 
9287 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9288 		return (ENXIO);
9289 	}
9290 
9291 	part = SDPART(dev);
9292 	nodelay = flag & (FNDELAY | FNONBLOCK);
9293 
9294 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
9295 	    "sdclose: close of part %d type %d\n", part, otyp);
9296 
9297 	/*
9298 	 * We use a semaphore here in order to serialize
9299 	 * open and close requests on the device.
9300 	 */
9301 	sema_p(&un->un_semoclose);
9302 
9303 	mutex_enter(SD_MUTEX(un));
9304 
9305 	/* Don't proceed if power is being changed. */
9306 	while (un->un_state == SD_STATE_PM_CHANGING) {
9307 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9308 	}
9309 
9310 	if (un->un_exclopen & (1 << part)) {
9311 		un->un_exclopen &= ~(1 << part);
9312 	}
9313 
9314 	/* Update the open partition map */
9315 	if (otyp == OTYP_LYR) {
9316 		un->un_ocmap.lyropen[part] -= 1;
9317 	} else {
9318 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
9319 	}
9320 
9321 	cp = &un->un_ocmap.chkd[0];
9322 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
9323 		if (*cp != NULL) {
9324 			break;
9325 		}
9326 		cp++;
9327 	}
9328 
9329 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
9330 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
9331 
9332 		/*
9333 		 * We avoid persistance upon the last close, and set
9334 		 * the throttle back to the maximum.
9335 		 */
9336 		un->un_throttle = un->un_saved_throttle;
9337 
9338 		if (un->un_state == SD_STATE_OFFLINE) {
9339 			if (un->un_f_is_fibre == FALSE) {
9340 				scsi_log(SD_DEVINFO(un), sd_label,
9341 				    CE_WARN, "offline\n");
9342 			}
9343 			mutex_exit(SD_MUTEX(un));
9344 			cmlb_invalidate(un->un_cmlbhandle,
9345 			    (void *)SD_PATH_DIRECT);
9346 			mutex_enter(SD_MUTEX(un));
9347 
9348 		} else {
9349 			/*
9350 			 * Flush any outstanding writes in NVRAM cache.
9351 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
9352 			 * cmd, it may not work for non-Pluto devices.
9353 			 * SYNCHRONIZE CACHE is not required for removables,
9354 			 * except DVD-RAM drives.
9355 			 *
9356 			 * Also note: because SYNCHRONIZE CACHE is currently
9357 			 * the only command issued here that requires the
9358 			 * drive be powered up, only do the power up before
9359 			 * sending the Sync Cache command. If additional
9360 			 * commands are added which require a powered up
9361 			 * drive, the following sequence may have to change.
9362 			 *
9363 			 * And finally, note that parallel SCSI on SPARC
9364 			 * only issues a Sync Cache to DVD-RAM, a newly
9365 			 * supported device.
9366 			 */
9367 #if defined(__i386) || defined(__amd64)
9368 			if (un->un_f_sync_cache_supported ||
9369 			    un->un_f_dvdram_writable_device == TRUE) {
9370 #else
9371 			if (un->un_f_dvdram_writable_device == TRUE) {
9372 #endif
9373 				mutex_exit(SD_MUTEX(un));
9374 				if (sd_pm_entry(un) == DDI_SUCCESS) {
9375 					rval =
9376 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
9377 					    NULL);
9378 					/* ignore error if not supported */
9379 					if (rval == ENOTSUP) {
9380 						rval = 0;
9381 					} else if (rval != 0) {
9382 						rval = EIO;
9383 					}
9384 					sd_pm_exit(un);
9385 				} else {
9386 					rval = EIO;
9387 				}
9388 				mutex_enter(SD_MUTEX(un));
9389 			}
9390 
9391 			/*
9392 			 * For devices which supports DOOR_LOCK, send an ALLOW
9393 			 * MEDIA REMOVAL command, but don't get upset if it
9394 			 * fails. We need to raise the power of the drive before
9395 			 * we can call sd_send_scsi_DOORLOCK()
9396 			 */
9397 			if (un->un_f_doorlock_supported) {
9398 				mutex_exit(SD_MUTEX(un));
9399 				if (sd_pm_entry(un) == DDI_SUCCESS) {
9400 					rval = sd_send_scsi_DOORLOCK(un,
9401 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
9402 
9403 					sd_pm_exit(un);
9404 					if (ISCD(un) && (rval != 0) &&
9405 					    (nodelay != 0)) {
9406 						rval = ENXIO;
9407 					}
9408 				} else {
9409 					rval = EIO;
9410 				}
9411 				mutex_enter(SD_MUTEX(un));
9412 			}
9413 
9414 			/*
9415 			 * If a device has removable media, invalidate all
9416 			 * parameters related to media, such as geometry,
9417 			 * blocksize, and blockcount.
9418 			 */
9419 			if (un->un_f_has_removable_media) {
9420 				sr_ejected(un);
9421 			}
9422 
9423 			/*
9424 			 * Destroy the cache (if it exists) which was
9425 			 * allocated for the write maps since this is
9426 			 * the last close for this media.
9427 			 */
9428 			if (un->un_wm_cache) {
9429 				/*
9430 				 * Check if there are pending commands.
9431 				 * and if there are give a warning and
9432 				 * do not destroy the cache.
9433 				 */
9434 				if (un->un_ncmds_in_driver > 0) {
9435 					scsi_log(SD_DEVINFO(un),
9436 					    sd_label, CE_WARN,
9437 					    "Unable to clean up memory "
9438 					    "because of pending I/O\n");
9439 				} else {
9440 					kmem_cache_destroy(
9441 					    un->un_wm_cache);
9442 					un->un_wm_cache = NULL;
9443 				}
9444 			}
9445 		}
9446 	}
9447 
9448 	mutex_exit(SD_MUTEX(un));
9449 	sema_v(&un->un_semoclose);
9450 
9451 	if (otyp == OTYP_LYR) {
9452 		mutex_enter(&sd_detach_mutex);
9453 		/*
9454 		 * The detach routine may run when the layer count
9455 		 * drops to zero.
9456 		 */
9457 		un->un_layer_count--;
9458 		mutex_exit(&sd_detach_mutex);
9459 	}
9460 
9461 	return (rval);
9462 }
9463 
9464 
9465 /*
9466  *    Function: sd_ready_and_valid
9467  *
9468  * Description: Test if device is ready and has a valid geometry.
9469  *
9470  *   Arguments: dev - device number
9471  *		un  - driver soft state (unit) structure
9472  *
9473  * Return Code: SD_READY_VALID		ready and valid label
9474  *		SD_NOT_READY_VALID	not ready, no label
9475  *		SD_RESERVED_BY_OTHERS	reservation conflict
9476  *
9477  *     Context: Never called at interrupt context.
9478  */
9479 
9480 static int
9481 sd_ready_and_valid(struct sd_lun *un)
9482 {
9483 	struct sd_errstats	*stp;
9484 	uint64_t		capacity;
9485 	uint_t			lbasize;
9486 	int			rval = SD_READY_VALID;
9487 	char			name_str[48];
9488 	int			is_valid;
9489 
9490 	ASSERT(un != NULL);
9491 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9492 
9493 	mutex_enter(SD_MUTEX(un));
9494 	/*
9495 	 * If a device has removable media, we must check if media is
9496 	 * ready when checking if this device is ready and valid.
9497 	 */
9498 	if (un->un_f_has_removable_media) {
9499 		mutex_exit(SD_MUTEX(un));
9500 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
9501 			rval = SD_NOT_READY_VALID;
9502 			mutex_enter(SD_MUTEX(un));
9503 			goto done;
9504 		}
9505 
9506 		is_valid = SD_IS_VALID_LABEL(un);
9507 		mutex_enter(SD_MUTEX(un));
9508 		if (!is_valid ||
9509 		    (un->un_f_blockcount_is_valid == FALSE) ||
9510 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
9511 
9512 			/* capacity has to be read every open. */
9513 			mutex_exit(SD_MUTEX(un));
9514 			if (sd_send_scsi_READ_CAPACITY(un, &capacity,
9515 			    &lbasize, SD_PATH_DIRECT) != 0) {
9516 				cmlb_invalidate(un->un_cmlbhandle,
9517 				    (void *)SD_PATH_DIRECT);
9518 				mutex_enter(SD_MUTEX(un));
9519 				rval = SD_NOT_READY_VALID;
9520 				goto done;
9521 			} else {
9522 				mutex_enter(SD_MUTEX(un));
9523 				sd_update_block_info(un, lbasize, capacity);
9524 			}
9525 		}
9526 
9527 		/*
9528 		 * Check if the media in the device is writable or not.
9529 		 */
9530 		if (!is_valid && ISCD(un)) {
9531 			sd_check_for_writable_cd(un, SD_PATH_DIRECT);
9532 		}
9533 
9534 	} else {
9535 		/*
9536 		 * Do a test unit ready to clear any unit attention from non-cd
9537 		 * devices.
9538 		 */
9539 		mutex_exit(SD_MUTEX(un));
9540 		(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9541 		mutex_enter(SD_MUTEX(un));
9542 	}
9543 
9544 
9545 	/*
9546 	 * If this is a non 512 block device, allocate space for
9547 	 * the wmap cache. This is being done here since every time
9548 	 * a media is changed this routine will be called and the
9549 	 * block size is a function of media rather than device.
9550 	 */
9551 	if (un->un_f_non_devbsize_supported && NOT_DEVBSIZE(un)) {
9552 		if (!(un->un_wm_cache)) {
9553 			(void) snprintf(name_str, sizeof (name_str),
9554 			    "%s%d_cache",
9555 			    ddi_driver_name(SD_DEVINFO(un)),
9556 			    ddi_get_instance(SD_DEVINFO(un)));
9557 			un->un_wm_cache = kmem_cache_create(
9558 			    name_str, sizeof (struct sd_w_map),
9559 			    8, sd_wm_cache_constructor,
9560 			    sd_wm_cache_destructor, NULL,
9561 			    (void *)un, NULL, 0);
9562 			if (!(un->un_wm_cache)) {
9563 					rval = ENOMEM;
9564 					goto done;
9565 			}
9566 		}
9567 	}
9568 
9569 	if (un->un_state == SD_STATE_NORMAL) {
9570 		/*
9571 		 * If the target is not yet ready here (defined by a TUR
9572 		 * failure), invalidate the geometry and print an 'offline'
9573 		 * message. This is a legacy message, as the state of the
9574 		 * target is not actually changed to SD_STATE_OFFLINE.
9575 		 *
9576 		 * If the TUR fails for EACCES (Reservation Conflict),
9577 		 * SD_RESERVED_BY_OTHERS will be returned to indicate
9578 		 * reservation conflict. If the TUR fails for other
9579 		 * reasons, SD_NOT_READY_VALID will be returned.
9580 		 */
9581 		int err;
9582 
9583 		mutex_exit(SD_MUTEX(un));
9584 		err = sd_send_scsi_TEST_UNIT_READY(un, 0);
9585 		mutex_enter(SD_MUTEX(un));
9586 
9587 		if (err != 0) {
9588 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9589 			    "offline or reservation conflict\n");
9590 			mutex_exit(SD_MUTEX(un));
9591 			cmlb_invalidate(un->un_cmlbhandle,
9592 			    (void *)SD_PATH_DIRECT);
9593 			mutex_enter(SD_MUTEX(un));
9594 			if (err == EACCES) {
9595 				rval = SD_RESERVED_BY_OTHERS;
9596 			} else {
9597 				rval = SD_NOT_READY_VALID;
9598 			}
9599 			goto done;
9600 		}
9601 	}
9602 
9603 	if (un->un_f_format_in_progress == FALSE) {
9604 		mutex_exit(SD_MUTEX(un));
9605 		if (cmlb_validate(un->un_cmlbhandle, 0,
9606 		    (void *)SD_PATH_DIRECT) != 0) {
9607 			rval = SD_NOT_READY_VALID;
9608 			mutex_enter(SD_MUTEX(un));
9609 			goto done;
9610 		}
9611 		if (un->un_f_pkstats_enabled) {
9612 			sd_set_pstats(un);
9613 			SD_TRACE(SD_LOG_IO_PARTITION, un,
9614 			    "sd_ready_and_valid: un:0x%p pstats created and "
9615 			    "set\n", un);
9616 		}
9617 		mutex_enter(SD_MUTEX(un));
9618 	}
9619 
9620 	/*
9621 	 * If this device supports DOOR_LOCK command, try and send
9622 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
9623 	 * if it fails. For a CD, however, it is an error
9624 	 */
9625 	if (un->un_f_doorlock_supported) {
9626 		mutex_exit(SD_MUTEX(un));
9627 		if ((sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
9628 		    SD_PATH_DIRECT) != 0) && ISCD(un)) {
9629 			rval = SD_NOT_READY_VALID;
9630 			mutex_enter(SD_MUTEX(un));
9631 			goto done;
9632 		}
9633 		mutex_enter(SD_MUTEX(un));
9634 	}
9635 
9636 	/* The state has changed, inform the media watch routines */
9637 	un->un_mediastate = DKIO_INSERTED;
9638 	cv_broadcast(&un->un_state_cv);
9639 	rval = SD_READY_VALID;
9640 
9641 done:
9642 
9643 	/*
9644 	 * Initialize the capacity kstat value, if no media previously
9645 	 * (capacity kstat is 0) and a media has been inserted
9646 	 * (un_blockcount > 0).
9647 	 */
9648 	if (un->un_errstats != NULL) {
9649 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
9650 		if ((stp->sd_capacity.value.ui64 == 0) &&
9651 		    (un->un_f_blockcount_is_valid == TRUE)) {
9652 			stp->sd_capacity.value.ui64 =
9653 			    (uint64_t)((uint64_t)un->un_blockcount *
9654 			    un->un_sys_blocksize);
9655 		}
9656 	}
9657 
9658 	mutex_exit(SD_MUTEX(un));
9659 	return (rval);
9660 }
9661 
9662 
9663 /*
9664  *    Function: sdmin
9665  *
9666  * Description: Routine to limit the size of a data transfer. Used in
9667  *		conjunction with physio(9F).
9668  *
9669  *   Arguments: bp - pointer to the indicated buf(9S) struct.
9670  *
9671  *     Context: Kernel thread context.
9672  */
9673 
9674 static void
9675 sdmin(struct buf *bp)
9676 {
9677 	struct sd_lun	*un;
9678 	int		instance;
9679 
9680 	instance = SDUNIT(bp->b_edev);
9681 
9682 	un = ddi_get_soft_state(sd_state, instance);
9683 	ASSERT(un != NULL);
9684 
9685 	if (bp->b_bcount > un->un_max_xfer_size) {
9686 		bp->b_bcount = un->un_max_xfer_size;
9687 	}
9688 }
9689 
9690 
9691 /*
9692  *    Function: sdread
9693  *
9694  * Description: Driver's read(9e) entry point function.
9695  *
9696  *   Arguments: dev   - device number
9697  *		uio   - structure pointer describing where data is to be stored
9698  *			in user's space
9699  *		cred_p  - user credential pointer
9700  *
9701  * Return Code: ENXIO
9702  *		EIO
9703  *		EINVAL
9704  *		value returned by physio
9705  *
9706  *     Context: Kernel thread context.
9707  */
9708 /* ARGSUSED */
9709 static int
9710 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
9711 {
9712 	struct sd_lun	*un = NULL;
9713 	int		secmask;
9714 	int		err;
9715 
9716 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9717 		return (ENXIO);
9718 	}
9719 
9720 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9721 
9722 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9723 		mutex_enter(SD_MUTEX(un));
9724 		/*
9725 		 * Because the call to sd_ready_and_valid will issue I/O we
9726 		 * must wait here if either the device is suspended or
9727 		 * if it's power level is changing.
9728 		 */
9729 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9730 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9731 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9732 		}
9733 		un->un_ncmds_in_driver++;
9734 		mutex_exit(SD_MUTEX(un));
9735 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9736 			mutex_enter(SD_MUTEX(un));
9737 			un->un_ncmds_in_driver--;
9738 			ASSERT(un->un_ncmds_in_driver >= 0);
9739 			mutex_exit(SD_MUTEX(un));
9740 			return (EIO);
9741 		}
9742 		mutex_enter(SD_MUTEX(un));
9743 		un->un_ncmds_in_driver--;
9744 		ASSERT(un->un_ncmds_in_driver >= 0);
9745 		mutex_exit(SD_MUTEX(un));
9746 	}
9747 
9748 	/*
9749 	 * Read requests are restricted to multiples of the system block size.
9750 	 */
9751 	secmask = un->un_sys_blocksize - 1;
9752 
9753 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9754 		SD_ERROR(SD_LOG_READ_WRITE, un,
9755 		    "sdread: file offset not modulo %d\n",
9756 		    un->un_sys_blocksize);
9757 		err = EINVAL;
9758 	} else if (uio->uio_iov->iov_len & (secmask)) {
9759 		SD_ERROR(SD_LOG_READ_WRITE, un,
9760 		    "sdread: transfer length not modulo %d\n",
9761 		    un->un_sys_blocksize);
9762 		err = EINVAL;
9763 	} else {
9764 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
9765 	}
9766 	return (err);
9767 }
9768 
9769 
9770 /*
9771  *    Function: sdwrite
9772  *
9773  * Description: Driver's write(9e) entry point function.
9774  *
9775  *   Arguments: dev   - device number
9776  *		uio   - structure pointer describing where data is stored in
9777  *			user's space
9778  *		cred_p  - user credential pointer
9779  *
9780  * Return Code: ENXIO
9781  *		EIO
9782  *		EINVAL
9783  *		value returned by physio
9784  *
9785  *     Context: Kernel thread context.
9786  */
9787 /* ARGSUSED */
9788 static int
9789 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
9790 {
9791 	struct sd_lun	*un = NULL;
9792 	int		secmask;
9793 	int		err;
9794 
9795 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9796 		return (ENXIO);
9797 	}
9798 
9799 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9800 
9801 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9802 		mutex_enter(SD_MUTEX(un));
9803 		/*
9804 		 * Because the call to sd_ready_and_valid will issue I/O we
9805 		 * must wait here if either the device is suspended or
9806 		 * if it's power level is changing.
9807 		 */
9808 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9809 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9810 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9811 		}
9812 		un->un_ncmds_in_driver++;
9813 		mutex_exit(SD_MUTEX(un));
9814 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9815 			mutex_enter(SD_MUTEX(un));
9816 			un->un_ncmds_in_driver--;
9817 			ASSERT(un->un_ncmds_in_driver >= 0);
9818 			mutex_exit(SD_MUTEX(un));
9819 			return (EIO);
9820 		}
9821 		mutex_enter(SD_MUTEX(un));
9822 		un->un_ncmds_in_driver--;
9823 		ASSERT(un->un_ncmds_in_driver >= 0);
9824 		mutex_exit(SD_MUTEX(un));
9825 	}
9826 
9827 	/*
9828 	 * Write requests are restricted to multiples of the system block size.
9829 	 */
9830 	secmask = un->un_sys_blocksize - 1;
9831 
9832 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9833 		SD_ERROR(SD_LOG_READ_WRITE, un,
9834 		    "sdwrite: file offset not modulo %d\n",
9835 		    un->un_sys_blocksize);
9836 		err = EINVAL;
9837 	} else if (uio->uio_iov->iov_len & (secmask)) {
9838 		SD_ERROR(SD_LOG_READ_WRITE, un,
9839 		    "sdwrite: transfer length not modulo %d\n",
9840 		    un->un_sys_blocksize);
9841 		err = EINVAL;
9842 	} else {
9843 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
9844 	}
9845 	return (err);
9846 }
9847 
9848 
9849 /*
9850  *    Function: sdaread
9851  *
9852  * Description: Driver's aread(9e) entry point function.
9853  *
9854  *   Arguments: dev   - device number
9855  *		aio   - structure pointer describing where data is to be stored
9856  *		cred_p  - user credential pointer
9857  *
9858  * Return Code: ENXIO
9859  *		EIO
9860  *		EINVAL
9861  *		value returned by aphysio
9862  *
9863  *     Context: Kernel thread context.
9864  */
9865 /* ARGSUSED */
9866 static int
9867 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
9868 {
9869 	struct sd_lun	*un = NULL;
9870 	struct uio	*uio = aio->aio_uio;
9871 	int		secmask;
9872 	int		err;
9873 
9874 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9875 		return (ENXIO);
9876 	}
9877 
9878 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9879 
9880 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9881 		mutex_enter(SD_MUTEX(un));
9882 		/*
9883 		 * Because the call to sd_ready_and_valid will issue I/O we
9884 		 * must wait here if either the device is suspended or
9885 		 * if it's power level is changing.
9886 		 */
9887 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9888 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9889 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9890 		}
9891 		un->un_ncmds_in_driver++;
9892 		mutex_exit(SD_MUTEX(un));
9893 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9894 			mutex_enter(SD_MUTEX(un));
9895 			un->un_ncmds_in_driver--;
9896 			ASSERT(un->un_ncmds_in_driver >= 0);
9897 			mutex_exit(SD_MUTEX(un));
9898 			return (EIO);
9899 		}
9900 		mutex_enter(SD_MUTEX(un));
9901 		un->un_ncmds_in_driver--;
9902 		ASSERT(un->un_ncmds_in_driver >= 0);
9903 		mutex_exit(SD_MUTEX(un));
9904 	}
9905 
9906 	/*
9907 	 * Read requests are restricted to multiples of the system block size.
9908 	 */
9909 	secmask = un->un_sys_blocksize - 1;
9910 
9911 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9912 		SD_ERROR(SD_LOG_READ_WRITE, un,
9913 		    "sdaread: file offset not modulo %d\n",
9914 		    un->un_sys_blocksize);
9915 		err = EINVAL;
9916 	} else if (uio->uio_iov->iov_len & (secmask)) {
9917 		SD_ERROR(SD_LOG_READ_WRITE, un,
9918 		    "sdaread: transfer length not modulo %d\n",
9919 		    un->un_sys_blocksize);
9920 		err = EINVAL;
9921 	} else {
9922 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
9923 	}
9924 	return (err);
9925 }
9926 
9927 
9928 /*
9929  *    Function: sdawrite
9930  *
9931  * Description: Driver's awrite(9e) entry point function.
9932  *
9933  *   Arguments: dev   - device number
9934  *		aio   - structure pointer describing where data is stored
9935  *		cred_p  - user credential pointer
9936  *
9937  * Return Code: ENXIO
9938  *		EIO
9939  *		EINVAL
9940  *		value returned by aphysio
9941  *
9942  *     Context: Kernel thread context.
9943  */
9944 /* ARGSUSED */
9945 static int
9946 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
9947 {
9948 	struct sd_lun	*un = NULL;
9949 	struct uio	*uio = aio->aio_uio;
9950 	int		secmask;
9951 	int		err;
9952 
9953 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9954 		return (ENXIO);
9955 	}
9956 
9957 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9958 
9959 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
9960 		mutex_enter(SD_MUTEX(un));
9961 		/*
9962 		 * Because the call to sd_ready_and_valid will issue I/O we
9963 		 * must wait here if either the device is suspended or
9964 		 * if it's power level is changing.
9965 		 */
9966 		while ((un->un_state == SD_STATE_SUSPENDED) ||
9967 		    (un->un_state == SD_STATE_PM_CHANGING)) {
9968 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9969 		}
9970 		un->un_ncmds_in_driver++;
9971 		mutex_exit(SD_MUTEX(un));
9972 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
9973 			mutex_enter(SD_MUTEX(un));
9974 			un->un_ncmds_in_driver--;
9975 			ASSERT(un->un_ncmds_in_driver >= 0);
9976 			mutex_exit(SD_MUTEX(un));
9977 			return (EIO);
9978 		}
9979 		mutex_enter(SD_MUTEX(un));
9980 		un->un_ncmds_in_driver--;
9981 		ASSERT(un->un_ncmds_in_driver >= 0);
9982 		mutex_exit(SD_MUTEX(un));
9983 	}
9984 
9985 	/*
9986 	 * Write requests are restricted to multiples of the system block size.
9987 	 */
9988 	secmask = un->un_sys_blocksize - 1;
9989 
9990 	if (uio->uio_loffset & ((offset_t)(secmask))) {
9991 		SD_ERROR(SD_LOG_READ_WRITE, un,
9992 		    "sdawrite: file offset not modulo %d\n",
9993 		    un->un_sys_blocksize);
9994 		err = EINVAL;
9995 	} else if (uio->uio_iov->iov_len & (secmask)) {
9996 		SD_ERROR(SD_LOG_READ_WRITE, un,
9997 		    "sdawrite: transfer length not modulo %d\n",
9998 		    un->un_sys_blocksize);
9999 		err = EINVAL;
10000 	} else {
10001 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
10002 	}
10003 	return (err);
10004 }
10005 
10006 
10007 
10008 
10009 
10010 /*
10011  * Driver IO processing follows the following sequence:
10012  *
10013  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
10014  *         |                |                     ^
10015  *         v                v                     |
10016  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
10017  *         |                |                     |                   |
10018  *         v                |                     |                   |
10019  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
10020  *         |                |                     ^                   ^
10021  *         v                v                     |                   |
10022  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
10023  *         |                |                     |                   |
10024  *     +---+                |                     +------------+      +-------+
10025  *     |                    |                                  |              |
10026  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
10027  *     |                    v                                  |              |
10028  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
10029  *     |                    |                                  ^              |
10030  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
10031  *     |                    v                                  |              |
10032  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
10033  *     |                    |                                  ^              |
10034  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
10035  *     |                    v                                  |              |
10036  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
10037  *     |                    |                                  ^              |
10038  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
10039  *     |                    v                                  |              |
10040  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
10041  *     |                    |                                  ^              |
10042  *     |                    |                                  |              |
10043  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
10044  *                          |                           ^
10045  *                          v                           |
10046  *                   sd_core_iostart()                  |
10047  *                          |                           |
10048  *                          |                           +------>(*destroypkt)()
10049  *                          +-> sd_start_cmds() <-+     |           |
10050  *                          |                     |     |           v
10051  *                          |                     |     |  scsi_destroy_pkt(9F)
10052  *                          |                     |     |
10053  *                          +->(*initpkt)()       +- sdintr()
10054  *                          |  |                        |  |
10055  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
10056  *                          |  +-> scsi_setup_cdb(9F)   |
10057  *                          |                           |
10058  *                          +--> scsi_transport(9F)     |
10059  *                                     |                |
10060  *                                     +----> SCSA ---->+
10061  *
10062  *
10063  * This code is based upon the following presumptions:
10064  *
10065  *   - iostart and iodone functions operate on buf(9S) structures. These
10066  *     functions perform the necessary operations on the buf(9S) and pass
10067  *     them along to the next function in the chain by using the macros
10068  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
10069  *     (for iodone side functions).
10070  *
10071  *   - The iostart side functions may sleep. The iodone side functions
10072  *     are called under interrupt context and may NOT sleep. Therefore
10073  *     iodone side functions also may not call iostart side functions.
10074  *     (NOTE: iostart side functions should NOT sleep for memory, as
10075  *     this could result in deadlock.)
10076  *
10077  *   - An iostart side function may call its corresponding iodone side
10078  *     function directly (if necessary).
10079  *
10080  *   - In the event of an error, an iostart side function can return a buf(9S)
10081  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
10082  *     b_error in the usual way of course).
10083  *
10084  *   - The taskq mechanism may be used by the iodone side functions to dispatch
10085  *     requests to the iostart side functions.  The iostart side functions in
10086  *     this case would be called under the context of a taskq thread, so it's
10087  *     OK for them to block/sleep/spin in this case.
10088  *
10089  *   - iostart side functions may allocate "shadow" buf(9S) structs and
10090  *     pass them along to the next function in the chain.  The corresponding
10091  *     iodone side functions must coalesce the "shadow" bufs and return
10092  *     the "original" buf to the next higher layer.
10093  *
10094  *   - The b_private field of the buf(9S) struct holds a pointer to
10095  *     an sd_xbuf struct, which contains information needed to
10096  *     construct the scsi_pkt for the command.
10097  *
10098  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
10099  *     layer must acquire & release the SD_MUTEX(un) as needed.
10100  */
10101 
10102 
10103 /*
10104  * Create taskq for all targets in the system. This is created at
10105  * _init(9E) and destroyed at _fini(9E).
10106  *
10107  * Note: here we set the minalloc to a reasonably high number to ensure that
10108  * we will have an adequate supply of task entries available at interrupt time.
10109  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
10110  * sd_create_taskq().  Since we do not want to sleep for allocations at
10111  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
10112  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
10113  * requests any one instant in time.
10114  */
10115 #define	SD_TASKQ_NUMTHREADS	8
10116 #define	SD_TASKQ_MINALLOC	256
10117 #define	SD_TASKQ_MAXALLOC	256
10118 
10119 static taskq_t	*sd_tq = NULL;
10120 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
10121 
10122 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
10123 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
10124 
10125 /*
10126  * The following task queue is being created for the write part of
10127  * read-modify-write of non-512 block size devices.
10128  * Limit the number of threads to 1 for now. This number has been chosen
10129  * considering the fact that it applies only to dvd ram drives/MO drives
10130  * currently. Performance for which is not main criteria at this stage.
10131  * Note: It needs to be explored if we can use a single taskq in future
10132  */
10133 #define	SD_WMR_TASKQ_NUMTHREADS	1
10134 static taskq_t	*sd_wmr_tq = NULL;
10135 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
10136 
10137 /*
10138  *    Function: sd_taskq_create
10139  *
10140  * Description: Create taskq thread(s) and preallocate task entries
10141  *
10142  * Return Code: Returns a pointer to the allocated taskq_t.
10143  *
10144  *     Context: Can sleep. Requires blockable context.
10145  *
10146  *       Notes: - The taskq() facility currently is NOT part of the DDI.
10147  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
10148  *		- taskq_create() will block for memory, also it will panic
10149  *		  if it cannot create the requested number of threads.
10150  *		- Currently taskq_create() creates threads that cannot be
10151  *		  swapped.
10152  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
10153  *		  supply of taskq entries at interrupt time (ie, so that we
10154  *		  do not have to sleep for memory)
10155  */
10156 
10157 static void
10158 sd_taskq_create(void)
10159 {
10160 	char	taskq_name[TASKQ_NAMELEN];
10161 
10162 	ASSERT(sd_tq == NULL);
10163 	ASSERT(sd_wmr_tq == NULL);
10164 
10165 	(void) snprintf(taskq_name, sizeof (taskq_name),
10166 	    "%s_drv_taskq", sd_label);
10167 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
10168 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
10169 	    TASKQ_PREPOPULATE));
10170 
10171 	(void) snprintf(taskq_name, sizeof (taskq_name),
10172 	    "%s_rmw_taskq", sd_label);
10173 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
10174 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
10175 	    TASKQ_PREPOPULATE));
10176 }
10177 
10178 
10179 /*
10180  *    Function: sd_taskq_delete
10181  *
10182  * Description: Complementary cleanup routine for sd_taskq_create().
10183  *
10184  *     Context: Kernel thread context.
10185  */
10186 
10187 static void
10188 sd_taskq_delete(void)
10189 {
10190 	ASSERT(sd_tq != NULL);
10191 	ASSERT(sd_wmr_tq != NULL);
10192 	taskq_destroy(sd_tq);
10193 	taskq_destroy(sd_wmr_tq);
10194 	sd_tq = NULL;
10195 	sd_wmr_tq = NULL;
10196 }
10197 
10198 
10199 /*
10200  *    Function: sdstrategy
10201  *
10202  * Description: Driver's strategy (9E) entry point function.
10203  *
10204  *   Arguments: bp - pointer to buf(9S)
10205  *
10206  * Return Code: Always returns zero
10207  *
10208  *     Context: Kernel thread context.
10209  */
10210 
10211 static int
10212 sdstrategy(struct buf *bp)
10213 {
10214 	struct sd_lun *un;
10215 
10216 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
10217 	if (un == NULL) {
10218 		bioerror(bp, EIO);
10219 		bp->b_resid = bp->b_bcount;
10220 		biodone(bp);
10221 		return (0);
10222 	}
10223 	/* As was done in the past, fail new cmds. if state is dumping. */
10224 	if (un->un_state == SD_STATE_DUMPING) {
10225 		bioerror(bp, ENXIO);
10226 		bp->b_resid = bp->b_bcount;
10227 		biodone(bp);
10228 		return (0);
10229 	}
10230 
10231 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10232 
10233 	/*
10234 	 * Commands may sneak in while we released the mutex in
10235 	 * DDI_SUSPEND, we should block new commands. However, old
10236 	 * commands that are still in the driver at this point should
10237 	 * still be allowed to drain.
10238 	 */
10239 	mutex_enter(SD_MUTEX(un));
10240 	/*
10241 	 * Must wait here if either the device is suspended or
10242 	 * if it's power level is changing.
10243 	 */
10244 	while ((un->un_state == SD_STATE_SUSPENDED) ||
10245 	    (un->un_state == SD_STATE_PM_CHANGING)) {
10246 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10247 	}
10248 
10249 	un->un_ncmds_in_driver++;
10250 
10251 	/*
10252 	 * atapi: Since we are running the CD for now in PIO mode we need to
10253 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
10254 	 * the HBA's init_pkt routine.
10255 	 */
10256 	if (un->un_f_cfg_is_atapi == TRUE) {
10257 		mutex_exit(SD_MUTEX(un));
10258 		bp_mapin(bp);
10259 		mutex_enter(SD_MUTEX(un));
10260 	}
10261 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
10262 	    un->un_ncmds_in_driver);
10263 
10264 	mutex_exit(SD_MUTEX(un));
10265 
10266 	/*
10267 	 * This will (eventually) allocate the sd_xbuf area and
10268 	 * call sd_xbuf_strategy().  We just want to return the
10269 	 * result of ddi_xbuf_qstrategy so that we have an opt-
10270 	 * imized tail call which saves us a stack frame.
10271 	 */
10272 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
10273 }
10274 
10275 
10276 /*
10277  *    Function: sd_xbuf_strategy
10278  *
10279  * Description: Function for initiating IO operations via the
10280  *		ddi_xbuf_qstrategy() mechanism.
10281  *
10282  *     Context: Kernel thread context.
10283  */
10284 
10285 static void
10286 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
10287 {
10288 	struct sd_lun *un = arg;
10289 
10290 	ASSERT(bp != NULL);
10291 	ASSERT(xp != NULL);
10292 	ASSERT(un != NULL);
10293 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10294 
10295 	/*
10296 	 * Initialize the fields in the xbuf and save a pointer to the
10297 	 * xbuf in bp->b_private.
10298 	 */
10299 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
10300 
10301 	/* Send the buf down the iostart chain */
10302 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
10303 }
10304 
10305 
10306 /*
10307  *    Function: sd_xbuf_init
10308  *
10309  * Description: Prepare the given sd_xbuf struct for use.
10310  *
10311  *   Arguments: un - ptr to softstate
10312  *		bp - ptr to associated buf(9S)
10313  *		xp - ptr to associated sd_xbuf
10314  *		chain_type - IO chain type to use:
10315  *			SD_CHAIN_NULL
10316  *			SD_CHAIN_BUFIO
10317  *			SD_CHAIN_USCSI
10318  *			SD_CHAIN_DIRECT
10319  *			SD_CHAIN_DIRECT_PRIORITY
10320  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
10321  *			initialization; may be NULL if none.
10322  *
10323  *     Context: Kernel thread context
10324  */
10325 
10326 static void
10327 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
10328 	uchar_t chain_type, void *pktinfop)
10329 {
10330 	int index;
10331 
10332 	ASSERT(un != NULL);
10333 	ASSERT(bp != NULL);
10334 	ASSERT(xp != NULL);
10335 
10336 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
10337 	    bp, chain_type);
10338 
10339 	xp->xb_un	= un;
10340 	xp->xb_pktp	= NULL;
10341 	xp->xb_pktinfo	= pktinfop;
10342 	xp->xb_private	= bp->b_private;
10343 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
10344 
10345 	/*
10346 	 * Set up the iostart and iodone chain indexes in the xbuf, based
10347 	 * upon the specified chain type to use.
10348 	 */
10349 	switch (chain_type) {
10350 	case SD_CHAIN_NULL:
10351 		/*
10352 		 * Fall thru to just use the values for the buf type, even
10353 		 * tho for the NULL chain these values will never be used.
10354 		 */
10355 		/* FALLTHRU */
10356 	case SD_CHAIN_BUFIO:
10357 		index = un->un_buf_chain_type;
10358 		break;
10359 	case SD_CHAIN_USCSI:
10360 		index = un->un_uscsi_chain_type;
10361 		break;
10362 	case SD_CHAIN_DIRECT:
10363 		index = un->un_direct_chain_type;
10364 		break;
10365 	case SD_CHAIN_DIRECT_PRIORITY:
10366 		index = un->un_priority_chain_type;
10367 		break;
10368 	default:
10369 		/* We're really broken if we ever get here... */
10370 		panic("sd_xbuf_init: illegal chain type!");
10371 		/*NOTREACHED*/
10372 	}
10373 
10374 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
10375 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
10376 
10377 	/*
10378 	 * It might be a bit easier to simply bzero the entire xbuf above,
10379 	 * but it turns out that since we init a fair number of members anyway,
10380 	 * we save a fair number cycles by doing explicit assignment of zero.
10381 	 */
10382 	xp->xb_pkt_flags	= 0;
10383 	xp->xb_dma_resid	= 0;
10384 	xp->xb_retry_count	= 0;
10385 	xp->xb_victim_retry_count = 0;
10386 	xp->xb_ua_retry_count	= 0;
10387 	xp->xb_nr_retry_count	= 0;
10388 	xp->xb_sense_bp		= NULL;
10389 	xp->xb_sense_status	= 0;
10390 	xp->xb_sense_state	= 0;
10391 	xp->xb_sense_resid	= 0;
10392 
10393 	bp->b_private	= xp;
10394 	bp->b_flags	&= ~(B_DONE | B_ERROR);
10395 	bp->b_resid	= 0;
10396 	bp->av_forw	= NULL;
10397 	bp->av_back	= NULL;
10398 	bioerror(bp, 0);
10399 
10400 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
10401 }
10402 
10403 
10404 /*
10405  *    Function: sd_uscsi_strategy
10406  *
10407  * Description: Wrapper for calling into the USCSI chain via physio(9F)
10408  *
10409  *   Arguments: bp - buf struct ptr
10410  *
10411  * Return Code: Always returns 0
10412  *
10413  *     Context: Kernel thread context
10414  */
10415 
10416 static int
10417 sd_uscsi_strategy(struct buf *bp)
10418 {
10419 	struct sd_lun		*un;
10420 	struct sd_uscsi_info	*uip;
10421 	struct sd_xbuf		*xp;
10422 	uchar_t			chain_type;
10423 
10424 	ASSERT(bp != NULL);
10425 
10426 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
10427 	if (un == NULL) {
10428 		bioerror(bp, EIO);
10429 		bp->b_resid = bp->b_bcount;
10430 		biodone(bp);
10431 		return (0);
10432 	}
10433 
10434 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10435 
10436 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
10437 
10438 	mutex_enter(SD_MUTEX(un));
10439 	/*
10440 	 * atapi: Since we are running the CD for now in PIO mode we need to
10441 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
10442 	 * the HBA's init_pkt routine.
10443 	 */
10444 	if (un->un_f_cfg_is_atapi == TRUE) {
10445 		mutex_exit(SD_MUTEX(un));
10446 		bp_mapin(bp);
10447 		mutex_enter(SD_MUTEX(un));
10448 	}
10449 	un->un_ncmds_in_driver++;
10450 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
10451 	    un->un_ncmds_in_driver);
10452 	mutex_exit(SD_MUTEX(un));
10453 
10454 	/*
10455 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
10456 	 */
10457 	ASSERT(bp->b_private != NULL);
10458 	uip = (struct sd_uscsi_info *)bp->b_private;
10459 
10460 	switch (uip->ui_flags) {
10461 	case SD_PATH_DIRECT:
10462 		chain_type = SD_CHAIN_DIRECT;
10463 		break;
10464 	case SD_PATH_DIRECT_PRIORITY:
10465 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
10466 		break;
10467 	default:
10468 		chain_type = SD_CHAIN_USCSI;
10469 		break;
10470 	}
10471 
10472 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
10473 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
10474 
10475 	/* Use the index obtained within xbuf_init */
10476 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
10477 
10478 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
10479 
10480 	return (0);
10481 }
10482 
10483 /*
10484  *    Function: sd_send_scsi_cmd
10485  *
10486  * Description: Runs a USCSI command for user (when called thru sdioctl),
10487  *		or for the driver
10488  *
10489  *   Arguments: dev - the dev_t for the device
10490  *		incmd - ptr to a valid uscsi_cmd struct
10491  *		flag - bit flag, indicating open settings, 32/64 bit type
10492  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
10493  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
10494  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
10495  *			to use the USCSI "direct" chain and bypass the normal
10496  *			command waitq.
10497  *
10498  * Return Code: 0 -  successful completion of the given command
10499  *		EIO - scsi_uscsi_handle_command() failed
10500  *		ENXIO  - soft state not found for specified dev
10501  *		EINVAL
10502  *		EFAULT - copyin/copyout error
10503  *		return code of scsi_uscsi_handle_command():
10504  *			EIO
10505  *			ENXIO
10506  *			EACCES
10507  *
10508  *     Context: Waits for command to complete. Can sleep.
10509  */
10510 
10511 static int
10512 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
10513 	enum uio_seg dataspace, int path_flag)
10514 {
10515 	struct sd_uscsi_info	*uip;
10516 	struct uscsi_cmd	*uscmd;
10517 	struct sd_lun	*un;
10518 	int	format = 0;
10519 	int	rval;
10520 
10521 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
10522 	if (un == NULL) {
10523 		return (ENXIO);
10524 	}
10525 
10526 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10527 
10528 #ifdef SDDEBUG
10529 	switch (dataspace) {
10530 	case UIO_USERSPACE:
10531 		SD_TRACE(SD_LOG_IO, un,
10532 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_USERSPACE\n", un);
10533 		break;
10534 	case UIO_SYSSPACE:
10535 		SD_TRACE(SD_LOG_IO, un,
10536 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_SYSSPACE\n", un);
10537 		break;
10538 	default:
10539 		SD_TRACE(SD_LOG_IO, un,
10540 		    "sd_send_scsi_cmd: entry: un:0x%p UNEXPECTED SPACE\n", un);
10541 		break;
10542 	}
10543 #endif
10544 
10545 	rval = scsi_uscsi_alloc_and_copyin((intptr_t)incmd, flag,
10546 	    SD_ADDRESS(un), &uscmd);
10547 	if (rval != 0) {
10548 		SD_TRACE(SD_LOG_IO, un, "sd_sense_scsi_cmd: "
10549 		    "scsi_uscsi_alloc_and_copyin failed\n", un);
10550 		return (rval);
10551 	}
10552 
10553 	if ((uscmd->uscsi_cdb != NULL) &&
10554 	    (uscmd->uscsi_cdb[0] == SCMD_FORMAT)) {
10555 		mutex_enter(SD_MUTEX(un));
10556 		un->un_f_format_in_progress = TRUE;
10557 		mutex_exit(SD_MUTEX(un));
10558 		format = 1;
10559 	}
10560 
10561 	/*
10562 	 * Allocate an sd_uscsi_info struct and fill it with the info
10563 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
10564 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
10565 	 * since we allocate the buf here in this function, we do not
10566 	 * need to preserve the prior contents of b_private.
10567 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
10568 	 */
10569 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
10570 	uip->ui_flags = path_flag;
10571 	uip->ui_cmdp = uscmd;
10572 
10573 	/*
10574 	 * Commands sent with priority are intended for error recovery
10575 	 * situations, and do not have retries performed.
10576 	 */
10577 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
10578 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
10579 	}
10580 	uscmd->uscsi_flags &= ~USCSI_NOINTR;
10581 
10582 	rval = scsi_uscsi_handle_cmd(dev, dataspace, uscmd,
10583 	    sd_uscsi_strategy, NULL, uip);
10584 
10585 #ifdef SDDEBUG
10586 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
10587 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
10588 	    uscmd->uscsi_status, uscmd->uscsi_resid);
10589 	if (uscmd->uscsi_bufaddr != NULL) {
10590 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
10591 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
10592 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
10593 		if (dataspace == UIO_SYSSPACE) {
10594 			SD_DUMP_MEMORY(un, SD_LOG_IO,
10595 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
10596 			    uscmd->uscsi_buflen, SD_LOG_HEX);
10597 		}
10598 	}
10599 #endif
10600 
10601 	if (format == 1) {
10602 		mutex_enter(SD_MUTEX(un));
10603 		un->un_f_format_in_progress = FALSE;
10604 		mutex_exit(SD_MUTEX(un));
10605 	}
10606 
10607 	(void) scsi_uscsi_copyout_and_free((intptr_t)incmd, uscmd);
10608 	kmem_free(uip, sizeof (struct sd_uscsi_info));
10609 
10610 	return (rval);
10611 }
10612 
10613 
10614 /*
10615  *    Function: sd_buf_iodone
10616  *
10617  * Description: Frees the sd_xbuf & returns the buf to its originator.
10618  *
10619  *     Context: May be called from interrupt context.
10620  */
10621 /* ARGSUSED */
10622 static void
10623 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
10624 {
10625 	struct sd_xbuf *xp;
10626 
10627 	ASSERT(un != NULL);
10628 	ASSERT(bp != NULL);
10629 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10630 
10631 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
10632 
10633 	xp = SD_GET_XBUF(bp);
10634 	ASSERT(xp != NULL);
10635 
10636 	mutex_enter(SD_MUTEX(un));
10637 
10638 	/*
10639 	 * Grab time when the cmd completed.
10640 	 * This is used for determining if the system has been
10641 	 * idle long enough to make it idle to the PM framework.
10642 	 * This is for lowering the overhead, and therefore improving
10643 	 * performance per I/O operation.
10644 	 */
10645 	un->un_pm_idle_time = ddi_get_time();
10646 
10647 	un->un_ncmds_in_driver--;
10648 	ASSERT(un->un_ncmds_in_driver >= 0);
10649 	SD_INFO(SD_LOG_IO, un, "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
10650 	    un->un_ncmds_in_driver);
10651 
10652 	mutex_exit(SD_MUTEX(un));
10653 
10654 	ddi_xbuf_done(bp, un->un_xbuf_attr);	/* xbuf is gone after this */
10655 	biodone(bp);				/* bp is gone after this */
10656 
10657 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
10658 }
10659 
10660 
10661 /*
10662  *    Function: sd_uscsi_iodone
10663  *
10664  * Description: Frees the sd_xbuf & returns the buf to its originator.
10665  *
10666  *     Context: May be called from interrupt context.
10667  */
10668 /* ARGSUSED */
10669 static void
10670 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
10671 {
10672 	struct sd_xbuf *xp;
10673 
10674 	ASSERT(un != NULL);
10675 	ASSERT(bp != NULL);
10676 
10677 	xp = SD_GET_XBUF(bp);
10678 	ASSERT(xp != NULL);
10679 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10680 
10681 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
10682 
10683 	bp->b_private = xp->xb_private;
10684 
10685 	mutex_enter(SD_MUTEX(un));
10686 
10687 	/*
10688 	 * Grab time when the cmd completed.
10689 	 * This is used for determining if the system has been
10690 	 * idle long enough to make it idle to the PM framework.
10691 	 * This is for lowering the overhead, and therefore improving
10692 	 * performance per I/O operation.
10693 	 */
10694 	un->un_pm_idle_time = ddi_get_time();
10695 
10696 	un->un_ncmds_in_driver--;
10697 	ASSERT(un->un_ncmds_in_driver >= 0);
10698 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
10699 	    un->un_ncmds_in_driver);
10700 
10701 	mutex_exit(SD_MUTEX(un));
10702 
10703 	kmem_free(xp, sizeof (struct sd_xbuf));
10704 	biodone(bp);
10705 
10706 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
10707 }
10708 
10709 
10710 /*
10711  *    Function: sd_mapblockaddr_iostart
10712  *
10713  * Description: Verify request lies within the partition limits for
10714  *		the indicated minor device.  Issue "overrun" buf if
10715  *		request would exceed partition range.  Converts
10716  *		partition-relative block address to absolute.
10717  *
10718  *     Context: Can sleep
10719  *
10720  *      Issues: This follows what the old code did, in terms of accessing
10721  *		some of the partition info in the unit struct without holding
10722  *		the mutext.  This is a general issue, if the partition info
10723  *		can be altered while IO is in progress... as soon as we send
10724  *		a buf, its partitioning can be invalid before it gets to the
10725  *		device.  Probably the right fix is to move partitioning out
10726  *		of the driver entirely.
10727  */
10728 
10729 static void
10730 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
10731 {
10732 	diskaddr_t	nblocks;	/* #blocks in the given partition */
10733 	daddr_t	blocknum;	/* Block number specified by the buf */
10734 	size_t	requested_nblocks;
10735 	size_t	available_nblocks;
10736 	int	partition;
10737 	diskaddr_t	partition_offset;
10738 	struct sd_xbuf *xp;
10739 
10740 
10741 	ASSERT(un != NULL);
10742 	ASSERT(bp != NULL);
10743 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10744 
10745 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10746 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
10747 
10748 	xp = SD_GET_XBUF(bp);
10749 	ASSERT(xp != NULL);
10750 
10751 	/*
10752 	 * If the geometry is not indicated as valid, attempt to access
10753 	 * the unit & verify the geometry/label. This can be the case for
10754 	 * removable-media devices, of if the device was opened in
10755 	 * NDELAY/NONBLOCK mode.
10756 	 */
10757 	if (!SD_IS_VALID_LABEL(un) &&
10758 	    (sd_ready_and_valid(un) != SD_READY_VALID)) {
10759 		/*
10760 		 * For removable devices it is possible to start an I/O
10761 		 * without a media by opening the device in nodelay mode.
10762 		 * Also for writable CDs there can be many scenarios where
10763 		 * there is no geometry yet but volume manager is trying to
10764 		 * issue a read() just because it can see TOC on the CD. So
10765 		 * do not print a message for removables.
10766 		 */
10767 		if (!un->un_f_has_removable_media) {
10768 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10769 			    "i/o to invalid geometry\n");
10770 		}
10771 		bioerror(bp, EIO);
10772 		bp->b_resid = bp->b_bcount;
10773 		SD_BEGIN_IODONE(index, un, bp);
10774 		return;
10775 	}
10776 
10777 	partition = SDPART(bp->b_edev);
10778 
10779 	nblocks = 0;
10780 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
10781 	    &nblocks, &partition_offset, NULL, NULL, (void *)SD_PATH_DIRECT);
10782 
10783 	/*
10784 	 * blocknum is the starting block number of the request. At this
10785 	 * point it is still relative to the start of the minor device.
10786 	 */
10787 	blocknum = xp->xb_blkno;
10788 
10789 	/*
10790 	 * Legacy: If the starting block number is one past the last block
10791 	 * in the partition, do not set B_ERROR in the buf.
10792 	 */
10793 	if (blocknum == nblocks)  {
10794 		goto error_exit;
10795 	}
10796 
10797 	/*
10798 	 * Confirm that the first block of the request lies within the
10799 	 * partition limits. Also the requested number of bytes must be
10800 	 * a multiple of the system block size.
10801 	 */
10802 	if ((blocknum < 0) || (blocknum >= nblocks) ||
10803 	    ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
10804 		bp->b_flags |= B_ERROR;
10805 		goto error_exit;
10806 	}
10807 
10808 	/*
10809 	 * If the requsted # blocks exceeds the available # blocks, that
10810 	 * is an overrun of the partition.
10811 	 */
10812 	requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
10813 	available_nblocks = (size_t)(nblocks - blocknum);
10814 	ASSERT(nblocks >= blocknum);
10815 
10816 	if (requested_nblocks > available_nblocks) {
10817 		/*
10818 		 * Allocate an "overrun" buf to allow the request to proceed
10819 		 * for the amount of space available in the partition. The
10820 		 * amount not transferred will be added into the b_resid
10821 		 * when the operation is complete. The overrun buf
10822 		 * replaces the original buf here, and the original buf
10823 		 * is saved inside the overrun buf, for later use.
10824 		 */
10825 		size_t resid = SD_SYSBLOCKS2BYTES(un,
10826 		    (offset_t)(requested_nblocks - available_nblocks));
10827 		size_t count = bp->b_bcount - resid;
10828 		/*
10829 		 * Note: count is an unsigned entity thus it'll NEVER
10830 		 * be less than 0 so ASSERT the original values are
10831 		 * correct.
10832 		 */
10833 		ASSERT(bp->b_bcount >= resid);
10834 
10835 		bp = sd_bioclone_alloc(bp, count, blocknum,
10836 		    (int (*)(struct buf *)) sd_mapblockaddr_iodone);
10837 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
10838 		ASSERT(xp != NULL);
10839 	}
10840 
10841 	/* At this point there should be no residual for this buf. */
10842 	ASSERT(bp->b_resid == 0);
10843 
10844 	/* Convert the block number to an absolute address. */
10845 	xp->xb_blkno += partition_offset;
10846 
10847 	SD_NEXT_IOSTART(index, un, bp);
10848 
10849 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10850 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
10851 
10852 	return;
10853 
10854 error_exit:
10855 	bp->b_resid = bp->b_bcount;
10856 	SD_BEGIN_IODONE(index, un, bp);
10857 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10858 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
10859 }
10860 
10861 
10862 /*
10863  *    Function: sd_mapblockaddr_iodone
10864  *
10865  * Description: Completion-side processing for partition management.
10866  *
10867  *     Context: May be called under interrupt context
10868  */
10869 
10870 static void
10871 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
10872 {
10873 	/* int	partition; */	/* Not used, see below. */
10874 	ASSERT(un != NULL);
10875 	ASSERT(bp != NULL);
10876 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10877 
10878 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10879 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
10880 
10881 	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
10882 		/*
10883 		 * We have an "overrun" buf to deal with...
10884 		 */
10885 		struct sd_xbuf	*xp;
10886 		struct buf	*obp;	/* ptr to the original buf */
10887 
10888 		xp = SD_GET_XBUF(bp);
10889 		ASSERT(xp != NULL);
10890 
10891 		/* Retrieve the pointer to the original buf */
10892 		obp = (struct buf *)xp->xb_private;
10893 		ASSERT(obp != NULL);
10894 
10895 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
10896 		bioerror(obp, bp->b_error);
10897 
10898 		sd_bioclone_free(bp);
10899 
10900 		/*
10901 		 * Get back the original buf.
10902 		 * Note that since the restoration of xb_blkno below
10903 		 * was removed, the sd_xbuf is not needed.
10904 		 */
10905 		bp = obp;
10906 		/*
10907 		 * xp = SD_GET_XBUF(bp);
10908 		 * ASSERT(xp != NULL);
10909 		 */
10910 	}
10911 
10912 	/*
10913 	 * Convert sd->xb_blkno back to a minor-device relative value.
10914 	 * Note: this has been commented out, as it is not needed in the
10915 	 * current implementation of the driver (ie, since this function
10916 	 * is at the top of the layering chains, so the info will be
10917 	 * discarded) and it is in the "hot" IO path.
10918 	 *
10919 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
10920 	 * xp->xb_blkno -= un->un_offset[partition];
10921 	 */
10922 
10923 	SD_NEXT_IODONE(index, un, bp);
10924 
10925 	SD_TRACE(SD_LOG_IO_PARTITION, un,
10926 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
10927 }
10928 
10929 
10930 /*
10931  *    Function: sd_mapblocksize_iostart
10932  *
10933  * Description: Convert between system block size (un->un_sys_blocksize)
10934  *		and target block size (un->un_tgt_blocksize).
10935  *
10936  *     Context: Can sleep to allocate resources.
10937  *
10938  * Assumptions: A higher layer has already performed any partition validation,
10939  *		and converted the xp->xb_blkno to an absolute value relative
10940  *		to the start of the device.
10941  *
10942  *		It is also assumed that the higher layer has implemented
10943  *		an "overrun" mechanism for the case where the request would
10944  *		read/write beyond the end of a partition.  In this case we
10945  *		assume (and ASSERT) that bp->b_resid == 0.
10946  *
10947  *		Note: The implementation for this routine assumes the target
10948  *		block size remains constant between allocation and transport.
10949  */
10950 
10951 static void
10952 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
10953 {
10954 	struct sd_mapblocksize_info	*bsp;
10955 	struct sd_xbuf			*xp;
10956 	offset_t first_byte;
10957 	daddr_t	start_block, end_block;
10958 	daddr_t	request_bytes;
10959 	ushort_t is_aligned = FALSE;
10960 
10961 	ASSERT(un != NULL);
10962 	ASSERT(bp != NULL);
10963 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10964 	ASSERT(bp->b_resid == 0);
10965 
10966 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
10967 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
10968 
10969 	/*
10970 	 * For a non-writable CD, a write request is an error
10971 	 */
10972 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
10973 	    (un->un_f_mmc_writable_media == FALSE)) {
10974 		bioerror(bp, EIO);
10975 		bp->b_resid = bp->b_bcount;
10976 		SD_BEGIN_IODONE(index, un, bp);
10977 		return;
10978 	}
10979 
10980 	/*
10981 	 * We do not need a shadow buf if the device is using
10982 	 * un->un_sys_blocksize as its block size or if bcount == 0.
10983 	 * In this case there is no layer-private data block allocated.
10984 	 */
10985 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
10986 	    (bp->b_bcount == 0)) {
10987 		goto done;
10988 	}
10989 
10990 #if defined(__i386) || defined(__amd64)
10991 	/* We do not support non-block-aligned transfers for ROD devices */
10992 	ASSERT(!ISROD(un));
10993 #endif
10994 
10995 	xp = SD_GET_XBUF(bp);
10996 	ASSERT(xp != NULL);
10997 
10998 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
10999 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
11000 	    un->un_tgt_blocksize, un->un_sys_blocksize);
11001 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
11002 	    "request start block:0x%x\n", xp->xb_blkno);
11003 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
11004 	    "request len:0x%x\n", bp->b_bcount);
11005 
11006 	/*
11007 	 * Allocate the layer-private data area for the mapblocksize layer.
11008 	 * Layers are allowed to use the xp_private member of the sd_xbuf
11009 	 * struct to store the pointer to their layer-private data block, but
11010 	 * each layer also has the responsibility of restoring the prior
11011 	 * contents of xb_private before returning the buf/xbuf to the
11012 	 * higher layer that sent it.
11013 	 *
11014 	 * Here we save the prior contents of xp->xb_private into the
11015 	 * bsp->mbs_oprivate field of our layer-private data area. This value
11016 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
11017 	 * the layer-private area and returning the buf/xbuf to the layer
11018 	 * that sent it.
11019 	 *
11020 	 * Note that here we use kmem_zalloc for the allocation as there are
11021 	 * parts of the mapblocksize code that expect certain fields to be
11022 	 * zero unless explicitly set to a required value.
11023 	 */
11024 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
11025 	bsp->mbs_oprivate = xp->xb_private;
11026 	xp->xb_private = bsp;
11027 
11028 	/*
11029 	 * This treats the data on the disk (target) as an array of bytes.
11030 	 * first_byte is the byte offset, from the beginning of the device,
11031 	 * to the location of the request. This is converted from a
11032 	 * un->un_sys_blocksize block address to a byte offset, and then back
11033 	 * to a block address based upon a un->un_tgt_blocksize block size.
11034 	 *
11035 	 * xp->xb_blkno should be absolute upon entry into this function,
11036 	 * but, but it is based upon partitions that use the "system"
11037 	 * block size. It must be adjusted to reflect the block size of
11038 	 * the target.
11039 	 *
11040 	 * Note that end_block is actually the block that follows the last
11041 	 * block of the request, but that's what is needed for the computation.
11042 	 */
11043 	first_byte  = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
11044 	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
11045 	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
11046 	    un->un_tgt_blocksize;
11047 
11048 	/* request_bytes is rounded up to a multiple of the target block size */
11049 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
11050 
11051 	/*
11052 	 * See if the starting address of the request and the request
11053 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
11054 	 * then we do not need to allocate a shadow buf to handle the request.
11055 	 */
11056 	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
11057 	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
11058 		is_aligned = TRUE;
11059 	}
11060 
11061 	if ((bp->b_flags & B_READ) == 0) {
11062 		/*
11063 		 * Lock the range for a write operation. An aligned request is
11064 		 * considered a simple write; otherwise the request must be a
11065 		 * read-modify-write.
11066 		 */
11067 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
11068 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
11069 	}
11070 
11071 	/*
11072 	 * Alloc a shadow buf if the request is not aligned. Also, this is
11073 	 * where the READ command is generated for a read-modify-write. (The
11074 	 * write phase is deferred until after the read completes.)
11075 	 */
11076 	if (is_aligned == FALSE) {
11077 
11078 		struct sd_mapblocksize_info	*shadow_bsp;
11079 		struct sd_xbuf	*shadow_xp;
11080 		struct buf	*shadow_bp;
11081 
11082 		/*
11083 		 * Allocate the shadow buf and it associated xbuf. Note that
11084 		 * after this call the xb_blkno value in both the original
11085 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
11086 		 * same: absolute relative to the start of the device, and
11087 		 * adjusted for the target block size. The b_blkno in the
11088 		 * shadow buf will also be set to this value. We should never
11089 		 * change b_blkno in the original bp however.
11090 		 *
11091 		 * Note also that the shadow buf will always need to be a
11092 		 * READ command, regardless of whether the incoming command
11093 		 * is a READ or a WRITE.
11094 		 */
11095 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
11096 		    xp->xb_blkno,
11097 		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
11098 
11099 		shadow_xp = SD_GET_XBUF(shadow_bp);
11100 
11101 		/*
11102 		 * Allocate the layer-private data for the shadow buf.
11103 		 * (No need to preserve xb_private in the shadow xbuf.)
11104 		 */
11105 		shadow_xp->xb_private = shadow_bsp =
11106 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
11107 
11108 		/*
11109 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
11110 		 * to figure out where the start of the user data is (based upon
11111 		 * the system block size) in the data returned by the READ
11112 		 * command (which will be based upon the target blocksize). Note
11113 		 * that this is only really used if the request is unaligned.
11114 		 */
11115 		bsp->mbs_copy_offset = (ssize_t)(first_byte -
11116 		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
11117 		ASSERT((bsp->mbs_copy_offset >= 0) &&
11118 		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
11119 
11120 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
11121 
11122 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
11123 
11124 		/* Transfer the wmap (if any) to the shadow buf */
11125 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
11126 		bsp->mbs_wmp = NULL;
11127 
11128 		/*
11129 		 * The shadow buf goes on from here in place of the
11130 		 * original buf.
11131 		 */
11132 		shadow_bsp->mbs_orig_bp = bp;
11133 		bp = shadow_bp;
11134 	}
11135 
11136 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
11137 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
11138 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
11139 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
11140 	    request_bytes);
11141 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
11142 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
11143 
11144 done:
11145 	SD_NEXT_IOSTART(index, un, bp);
11146 
11147 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
11148 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
11149 }
11150 
11151 
11152 /*
11153  *    Function: sd_mapblocksize_iodone
11154  *
11155  * Description: Completion side processing for block-size mapping.
11156  *
11157  *     Context: May be called under interrupt context
11158  */
11159 
11160 static void
11161 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
11162 {
11163 	struct sd_mapblocksize_info	*bsp;
11164 	struct sd_xbuf	*xp;
11165 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
11166 	struct buf	*orig_bp;	/* ptr to the original buf */
11167 	offset_t	shadow_end;
11168 	offset_t	request_end;
11169 	offset_t	shadow_start;
11170 	ssize_t		copy_offset;
11171 	size_t		copy_length;
11172 	size_t		shortfall;
11173 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
11174 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
11175 
11176 	ASSERT(un != NULL);
11177 	ASSERT(bp != NULL);
11178 
11179 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
11180 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
11181 
11182 	/*
11183 	 * There is no shadow buf or layer-private data if the target is
11184 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
11185 	 */
11186 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
11187 	    (bp->b_bcount == 0)) {
11188 		goto exit;
11189 	}
11190 
11191 	xp = SD_GET_XBUF(bp);
11192 	ASSERT(xp != NULL);
11193 
11194 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
11195 	bsp = xp->xb_private;
11196 
11197 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
11198 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
11199 
11200 	if (is_write) {
11201 		/*
11202 		 * For a WRITE request we must free up the block range that
11203 		 * we have locked up.  This holds regardless of whether this is
11204 		 * an aligned write request or a read-modify-write request.
11205 		 */
11206 		sd_range_unlock(un, bsp->mbs_wmp);
11207 		bsp->mbs_wmp = NULL;
11208 	}
11209 
11210 	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
11211 		/*
11212 		 * An aligned read or write command will have no shadow buf;
11213 		 * there is not much else to do with it.
11214 		 */
11215 		goto done;
11216 	}
11217 
11218 	orig_bp = bsp->mbs_orig_bp;
11219 	ASSERT(orig_bp != NULL);
11220 	orig_xp = SD_GET_XBUF(orig_bp);
11221 	ASSERT(orig_xp != NULL);
11222 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11223 
11224 	if (!is_write && has_wmap) {
11225 		/*
11226 		 * A READ with a wmap means this is the READ phase of a
11227 		 * read-modify-write. If an error occurred on the READ then
11228 		 * we do not proceed with the WRITE phase or copy any data.
11229 		 * Just release the write maps and return with an error.
11230 		 */
11231 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
11232 			orig_bp->b_resid = orig_bp->b_bcount;
11233 			bioerror(orig_bp, bp->b_error);
11234 			sd_range_unlock(un, bsp->mbs_wmp);
11235 			goto freebuf_done;
11236 		}
11237 	}
11238 
11239 	/*
11240 	 * Here is where we set up to copy the data from the shadow buf
11241 	 * into the space associated with the original buf.
11242 	 *
11243 	 * To deal with the conversion between block sizes, these
11244 	 * computations treat the data as an array of bytes, with the
11245 	 * first byte (byte 0) corresponding to the first byte in the
11246 	 * first block on the disk.
11247 	 */
11248 
11249 	/*
11250 	 * shadow_start and shadow_len indicate the location and size of
11251 	 * the data returned with the shadow IO request.
11252 	 */
11253 	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
11254 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
11255 
11256 	/*
11257 	 * copy_offset gives the offset (in bytes) from the start of the first
11258 	 * block of the READ request to the beginning of the data.  We retrieve
11259 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
11260 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
11261 	 * data to be copied (in bytes).
11262 	 */
11263 	copy_offset  = bsp->mbs_copy_offset;
11264 	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
11265 	copy_length  = orig_bp->b_bcount;
11266 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
11267 
11268 	/*
11269 	 * Set up the resid and error fields of orig_bp as appropriate.
11270 	 */
11271 	if (shadow_end >= request_end) {
11272 		/* We got all the requested data; set resid to zero */
11273 		orig_bp->b_resid = 0;
11274 	} else {
11275 		/*
11276 		 * We failed to get enough data to fully satisfy the original
11277 		 * request. Just copy back whatever data we got and set
11278 		 * up the residual and error code as required.
11279 		 *
11280 		 * 'shortfall' is the amount by which the data received with the
11281 		 * shadow buf has "fallen short" of the requested amount.
11282 		 */
11283 		shortfall = (size_t)(request_end - shadow_end);
11284 
11285 		if (shortfall > orig_bp->b_bcount) {
11286 			/*
11287 			 * We did not get enough data to even partially
11288 			 * fulfill the original request.  The residual is
11289 			 * equal to the amount requested.
11290 			 */
11291 			orig_bp->b_resid = orig_bp->b_bcount;
11292 		} else {
11293 			/*
11294 			 * We did not get all the data that we requested
11295 			 * from the device, but we will try to return what
11296 			 * portion we did get.
11297 			 */
11298 			orig_bp->b_resid = shortfall;
11299 		}
11300 		ASSERT(copy_length >= orig_bp->b_resid);
11301 		copy_length  -= orig_bp->b_resid;
11302 	}
11303 
11304 	/* Propagate the error code from the shadow buf to the original buf */
11305 	bioerror(orig_bp, bp->b_error);
11306 
11307 	if (is_write) {
11308 		goto freebuf_done;	/* No data copying for a WRITE */
11309 	}
11310 
11311 	if (has_wmap) {
11312 		/*
11313 		 * This is a READ command from the READ phase of a
11314 		 * read-modify-write request. We have to copy the data given
11315 		 * by the user OVER the data returned by the READ command,
11316 		 * then convert the command from a READ to a WRITE and send
11317 		 * it back to the target.
11318 		 */
11319 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
11320 		    copy_length);
11321 
11322 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
11323 
11324 		/*
11325 		 * Dispatch the WRITE command to the taskq thread, which
11326 		 * will in turn send the command to the target. When the
11327 		 * WRITE command completes, we (sd_mapblocksize_iodone())
11328 		 * will get called again as part of the iodone chain
11329 		 * processing for it. Note that we will still be dealing
11330 		 * with the shadow buf at that point.
11331 		 */
11332 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
11333 		    KM_NOSLEEP) != 0) {
11334 			/*
11335 			 * Dispatch was successful so we are done. Return
11336 			 * without going any higher up the iodone chain. Do
11337 			 * not free up any layer-private data until after the
11338 			 * WRITE completes.
11339 			 */
11340 			return;
11341 		}
11342 
11343 		/*
11344 		 * Dispatch of the WRITE command failed; set up the error
11345 		 * condition and send this IO back up the iodone chain.
11346 		 */
11347 		bioerror(orig_bp, EIO);
11348 		orig_bp->b_resid = orig_bp->b_bcount;
11349 
11350 	} else {
11351 		/*
11352 		 * This is a regular READ request (ie, not a RMW). Copy the
11353 		 * data from the shadow buf into the original buf. The
11354 		 * copy_offset compensates for any "misalignment" between the
11355 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
11356 		 * original buf (with its un->un_sys_blocksize blocks).
11357 		 */
11358 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
11359 		    copy_length);
11360 	}
11361 
11362 freebuf_done:
11363 
11364 	/*
11365 	 * At this point we still have both the shadow buf AND the original
11366 	 * buf to deal with, as well as the layer-private data area in each.
11367 	 * Local variables are as follows:
11368 	 *
11369 	 * bp -- points to shadow buf
11370 	 * xp -- points to xbuf of shadow buf
11371 	 * bsp -- points to layer-private data area of shadow buf
11372 	 * orig_bp -- points to original buf
11373 	 *
11374 	 * First free the shadow buf and its associated xbuf, then free the
11375 	 * layer-private data area from the shadow buf. There is no need to
11376 	 * restore xb_private in the shadow xbuf.
11377 	 */
11378 	sd_shadow_buf_free(bp);
11379 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
11380 
11381 	/*
11382 	 * Now update the local variables to point to the original buf, xbuf,
11383 	 * and layer-private area.
11384 	 */
11385 	bp = orig_bp;
11386 	xp = SD_GET_XBUF(bp);
11387 	ASSERT(xp != NULL);
11388 	ASSERT(xp == orig_xp);
11389 	bsp = xp->xb_private;
11390 	ASSERT(bsp != NULL);
11391 
11392 done:
11393 	/*
11394 	 * Restore xb_private to whatever it was set to by the next higher
11395 	 * layer in the chain, then free the layer-private data area.
11396 	 */
11397 	xp->xb_private = bsp->mbs_oprivate;
11398 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
11399 
11400 exit:
11401 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
11402 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
11403 
11404 	SD_NEXT_IODONE(index, un, bp);
11405 }
11406 
11407 
11408 /*
11409  *    Function: sd_checksum_iostart
11410  *
11411  * Description: A stub function for a layer that's currently not used.
11412  *		For now just a placeholder.
11413  *
11414  *     Context: Kernel thread context
11415  */
11416 
11417 static void
11418 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
11419 {
11420 	ASSERT(un != NULL);
11421 	ASSERT(bp != NULL);
11422 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11423 	SD_NEXT_IOSTART(index, un, bp);
11424 }
11425 
11426 
11427 /*
11428  *    Function: sd_checksum_iodone
11429  *
11430  * Description: A stub function for a layer that's currently not used.
11431  *		For now just a placeholder.
11432  *
11433  *     Context: May be called under interrupt context
11434  */
11435 
11436 static void
11437 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
11438 {
11439 	ASSERT(un != NULL);
11440 	ASSERT(bp != NULL);
11441 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11442 	SD_NEXT_IODONE(index, un, bp);
11443 }
11444 
11445 
11446 /*
11447  *    Function: sd_checksum_uscsi_iostart
11448  *
11449  * Description: A stub function for a layer that's currently not used.
11450  *		For now just a placeholder.
11451  *
11452  *     Context: Kernel thread context
11453  */
11454 
11455 static void
11456 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
11457 {
11458 	ASSERT(un != NULL);
11459 	ASSERT(bp != NULL);
11460 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11461 	SD_NEXT_IOSTART(index, un, bp);
11462 }
11463 
11464 
11465 /*
11466  *    Function: sd_checksum_uscsi_iodone
11467  *
11468  * Description: A stub function for a layer that's currently not used.
11469  *		For now just a placeholder.
11470  *
11471  *     Context: May be called under interrupt context
11472  */
11473 
11474 static void
11475 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
11476 {
11477 	ASSERT(un != NULL);
11478 	ASSERT(bp != NULL);
11479 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11480 	SD_NEXT_IODONE(index, un, bp);
11481 }
11482 
11483 
11484 /*
11485  *    Function: sd_pm_iostart
11486  *
11487  * Description: iostart-side routine for Power mangement.
11488  *
11489  *     Context: Kernel thread context
11490  */
11491 
11492 static void
11493 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
11494 {
11495 	ASSERT(un != NULL);
11496 	ASSERT(bp != NULL);
11497 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11498 	ASSERT(!mutex_owned(&un->un_pm_mutex));
11499 
11500 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
11501 
11502 	if (sd_pm_entry(un) != DDI_SUCCESS) {
11503 		/*
11504 		 * Set up to return the failed buf back up the 'iodone'
11505 		 * side of the calling chain.
11506 		 */
11507 		bioerror(bp, EIO);
11508 		bp->b_resid = bp->b_bcount;
11509 
11510 		SD_BEGIN_IODONE(index, un, bp);
11511 
11512 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
11513 		return;
11514 	}
11515 
11516 	SD_NEXT_IOSTART(index, un, bp);
11517 
11518 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
11519 }
11520 
11521 
11522 /*
11523  *    Function: sd_pm_iodone
11524  *
11525  * Description: iodone-side routine for power mangement.
11526  *
11527  *     Context: may be called from interrupt context
11528  */
11529 
11530 static void
11531 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
11532 {
11533 	ASSERT(un != NULL);
11534 	ASSERT(bp != NULL);
11535 	ASSERT(!mutex_owned(&un->un_pm_mutex));
11536 
11537 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
11538 
11539 	/*
11540 	 * After attach the following flag is only read, so don't
11541 	 * take the penalty of acquiring a mutex for it.
11542 	 */
11543 	if (un->un_f_pm_is_enabled == TRUE) {
11544 		sd_pm_exit(un);
11545 	}
11546 
11547 	SD_NEXT_IODONE(index, un, bp);
11548 
11549 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
11550 }
11551 
11552 
11553 /*
11554  *    Function: sd_core_iostart
11555  *
11556  * Description: Primary driver function for enqueuing buf(9S) structs from
11557  *		the system and initiating IO to the target device
11558  *
11559  *     Context: Kernel thread context. Can sleep.
11560  *
11561  * Assumptions:  - The given xp->xb_blkno is absolute
11562  *		   (ie, relative to the start of the device).
11563  *		 - The IO is to be done using the native blocksize of
11564  *		   the device, as specified in un->un_tgt_blocksize.
11565  */
11566 /* ARGSUSED */
11567 static void
11568 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
11569 {
11570 	struct sd_xbuf *xp;
11571 
11572 	ASSERT(un != NULL);
11573 	ASSERT(bp != NULL);
11574 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11575 	ASSERT(bp->b_resid == 0);
11576 
11577 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
11578 
11579 	xp = SD_GET_XBUF(bp);
11580 	ASSERT(xp != NULL);
11581 
11582 	mutex_enter(SD_MUTEX(un));
11583 
11584 	/*
11585 	 * If we are currently in the failfast state, fail any new IO
11586 	 * that has B_FAILFAST set, then return.
11587 	 */
11588 	if ((bp->b_flags & B_FAILFAST) &&
11589 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
11590 		mutex_exit(SD_MUTEX(un));
11591 		bioerror(bp, EIO);
11592 		bp->b_resid = bp->b_bcount;
11593 		SD_BEGIN_IODONE(index, un, bp);
11594 		return;
11595 	}
11596 
11597 	if (SD_IS_DIRECT_PRIORITY(xp)) {
11598 		/*
11599 		 * Priority command -- transport it immediately.
11600 		 *
11601 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
11602 		 * because all direct priority commands should be associated
11603 		 * with error recovery actions which we don't want to retry.
11604 		 */
11605 		sd_start_cmds(un, bp);
11606 	} else {
11607 		/*
11608 		 * Normal command -- add it to the wait queue, then start
11609 		 * transporting commands from the wait queue.
11610 		 */
11611 		sd_add_buf_to_waitq(un, bp);
11612 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
11613 		sd_start_cmds(un, NULL);
11614 	}
11615 
11616 	mutex_exit(SD_MUTEX(un));
11617 
11618 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
11619 }
11620 
11621 
11622 /*
11623  *    Function: sd_init_cdb_limits
11624  *
11625  * Description: This is to handle scsi_pkt initialization differences
11626  *		between the driver platforms.
11627  *
11628  *		Legacy behaviors:
11629  *
11630  *		If the block number or the sector count exceeds the
11631  *		capabilities of a Group 0 command, shift over to a
11632  *		Group 1 command. We don't blindly use Group 1
11633  *		commands because a) some drives (CDC Wren IVs) get a
11634  *		bit confused, and b) there is probably a fair amount
11635  *		of speed difference for a target to receive and decode
11636  *		a 10 byte command instead of a 6 byte command.
11637  *
11638  *		The xfer time difference of 6 vs 10 byte CDBs is
11639  *		still significant so this code is still worthwhile.
11640  *		10 byte CDBs are very inefficient with the fas HBA driver
11641  *		and older disks. Each CDB byte took 1 usec with some
11642  *		popular disks.
11643  *
11644  *     Context: Must be called at attach time
11645  */
11646 
11647 static void
11648 sd_init_cdb_limits(struct sd_lun *un)
11649 {
11650 	int hba_cdb_limit;
11651 
11652 	/*
11653 	 * Use CDB_GROUP1 commands for most devices except for
11654 	 * parallel SCSI fixed drives in which case we get better
11655 	 * performance using CDB_GROUP0 commands (where applicable).
11656 	 */
11657 	un->un_mincdb = SD_CDB_GROUP1;
11658 #if !defined(__fibre)
11659 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
11660 	    !un->un_f_has_removable_media) {
11661 		un->un_mincdb = SD_CDB_GROUP0;
11662 	}
11663 #endif
11664 
11665 	/*
11666 	 * Try to read the max-cdb-length supported by HBA.
11667 	 */
11668 	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
11669 	if (0 >= un->un_max_hba_cdb) {
11670 		un->un_max_hba_cdb = CDB_GROUP4;
11671 		hba_cdb_limit = SD_CDB_GROUP4;
11672 	} else if (0 < un->un_max_hba_cdb &&
11673 	    un->un_max_hba_cdb < CDB_GROUP1) {
11674 		hba_cdb_limit = SD_CDB_GROUP0;
11675 	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
11676 	    un->un_max_hba_cdb < CDB_GROUP5) {
11677 		hba_cdb_limit = SD_CDB_GROUP1;
11678 	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
11679 	    un->un_max_hba_cdb < CDB_GROUP4) {
11680 		hba_cdb_limit = SD_CDB_GROUP5;
11681 	} else {
11682 		hba_cdb_limit = SD_CDB_GROUP4;
11683 	}
11684 
11685 	/*
11686 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
11687 	 * commands for fixed disks unless we are building for a 32 bit
11688 	 * kernel.
11689 	 */
11690 #ifdef _LP64
11691 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
11692 	    min(hba_cdb_limit, SD_CDB_GROUP4);
11693 #else
11694 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
11695 	    min(hba_cdb_limit, SD_CDB_GROUP1);
11696 #endif
11697 
11698 	/*
11699 	 * x86 systems require the PKT_DMA_PARTIAL flag
11700 	 */
11701 #if defined(__x86)
11702 	un->un_pkt_flags = PKT_DMA_PARTIAL;
11703 #else
11704 	un->un_pkt_flags = 0;
11705 #endif
11706 
11707 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
11708 	    ? sizeof (struct scsi_arq_status) : 1);
11709 	un->un_cmd_timeout = (ushort_t)sd_io_time;
11710 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
11711 }
11712 
11713 
11714 /*
11715  *    Function: sd_initpkt_for_buf
11716  *
11717  * Description: Allocate and initialize for transport a scsi_pkt struct,
11718  *		based upon the info specified in the given buf struct.
11719  *
11720  *		Assumes the xb_blkno in the request is absolute (ie,
11721  *		relative to the start of the device (NOT partition!).
11722  *		Also assumes that the request is using the native block
11723  *		size of the device (as returned by the READ CAPACITY
11724  *		command).
11725  *
11726  * Return Code: SD_PKT_ALLOC_SUCCESS
11727  *		SD_PKT_ALLOC_FAILURE
11728  *		SD_PKT_ALLOC_FAILURE_NO_DMA
11729  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
11730  *
11731  *     Context: Kernel thread and may be called from software interrupt context
11732  *		as part of a sdrunout callback. This function may not block or
11733  *		call routines that block
11734  */
11735 
11736 static int
11737 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
11738 {
11739 	struct sd_xbuf	*xp;
11740 	struct scsi_pkt *pktp = NULL;
11741 	struct sd_lun	*un;
11742 	size_t		blockcount;
11743 	daddr_t		startblock;
11744 	int		rval;
11745 	int		cmd_flags;
11746 
11747 	ASSERT(bp != NULL);
11748 	ASSERT(pktpp != NULL);
11749 	xp = SD_GET_XBUF(bp);
11750 	ASSERT(xp != NULL);
11751 	un = SD_GET_UN(bp);
11752 	ASSERT(un != NULL);
11753 	ASSERT(mutex_owned(SD_MUTEX(un)));
11754 	ASSERT(bp->b_resid == 0);
11755 
11756 	SD_TRACE(SD_LOG_IO_CORE, un,
11757 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
11758 
11759 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
11760 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
11761 		/*
11762 		 * Already have a scsi_pkt -- just need DMA resources.
11763 		 * We must recompute the CDB in case the mapping returns
11764 		 * a nonzero pkt_resid.
11765 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
11766 		 * that is being retried, the unmap/remap of the DMA resouces
11767 		 * will result in the entire transfer starting over again
11768 		 * from the very first block.
11769 		 */
11770 		ASSERT(xp->xb_pktp != NULL);
11771 		pktp = xp->xb_pktp;
11772 	} else {
11773 		pktp = NULL;
11774 	}
11775 #endif /* __i386 || __amd64 */
11776 
11777 	startblock = xp->xb_blkno;	/* Absolute block num. */
11778 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
11779 
11780 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
11781 
11782 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
11783 
11784 #else
11785 
11786 	cmd_flags = un->un_pkt_flags | xp->xb_pkt_flags;
11787 
11788 #endif
11789 
11790 	/*
11791 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
11792 	 * call scsi_init_pkt, and build the CDB.
11793 	 */
11794 	rval = sd_setup_rw_pkt(un, &pktp, bp,
11795 	    cmd_flags, sdrunout, (caddr_t)un,
11796 	    startblock, blockcount);
11797 
11798 	if (rval == 0) {
11799 		/*
11800 		 * Success.
11801 		 *
11802 		 * If partial DMA is being used and required for this transfer.
11803 		 * set it up here.
11804 		 */
11805 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
11806 		    (pktp->pkt_resid != 0)) {
11807 
11808 			/*
11809 			 * Save the CDB length and pkt_resid for the
11810 			 * next xfer
11811 			 */
11812 			xp->xb_dma_resid = pktp->pkt_resid;
11813 
11814 			/* rezero resid */
11815 			pktp->pkt_resid = 0;
11816 
11817 		} else {
11818 			xp->xb_dma_resid = 0;
11819 		}
11820 
11821 		pktp->pkt_flags = un->un_tagflags;
11822 		pktp->pkt_time  = un->un_cmd_timeout;
11823 		pktp->pkt_comp  = sdintr;
11824 
11825 		pktp->pkt_private = bp;
11826 		*pktpp = pktp;
11827 
11828 		SD_TRACE(SD_LOG_IO_CORE, un,
11829 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
11830 
11831 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
11832 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
11833 #endif
11834 
11835 		return (SD_PKT_ALLOC_SUCCESS);
11836 
11837 	}
11838 
11839 	/*
11840 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
11841 	 * from sd_setup_rw_pkt.
11842 	 */
11843 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
11844 
11845 	if (rval == SD_PKT_ALLOC_FAILURE) {
11846 		*pktpp = NULL;
11847 		/*
11848 		 * Set the driver state to RWAIT to indicate the driver
11849 		 * is waiting on resource allocations. The driver will not
11850 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
11851 		 */
11852 		New_state(un, SD_STATE_RWAIT);
11853 
11854 		SD_ERROR(SD_LOG_IO_CORE, un,
11855 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
11856 
11857 		if ((bp->b_flags & B_ERROR) != 0) {
11858 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
11859 		}
11860 		return (SD_PKT_ALLOC_FAILURE);
11861 	} else {
11862 		/*
11863 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
11864 		 *
11865 		 * This should never happen.  Maybe someone messed with the
11866 		 * kernel's minphys?
11867 		 */
11868 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
11869 		    "Request rejected: too large for CDB: "
11870 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
11871 		SD_ERROR(SD_LOG_IO_CORE, un,
11872 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
11873 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
11874 
11875 	}
11876 }
11877 
11878 
11879 /*
11880  *    Function: sd_destroypkt_for_buf
11881  *
11882  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
11883  *
11884  *     Context: Kernel thread or interrupt context
11885  */
11886 
11887 static void
11888 sd_destroypkt_for_buf(struct buf *bp)
11889 {
11890 	ASSERT(bp != NULL);
11891 	ASSERT(SD_GET_UN(bp) != NULL);
11892 
11893 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
11894 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
11895 
11896 	ASSERT(SD_GET_PKTP(bp) != NULL);
11897 	scsi_destroy_pkt(SD_GET_PKTP(bp));
11898 
11899 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
11900 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
11901 }
11902 
11903 /*
11904  *    Function: sd_setup_rw_pkt
11905  *
11906  * Description: Determines appropriate CDB group for the requested LBA
11907  *		and transfer length, calls scsi_init_pkt, and builds
11908  *		the CDB.  Do not use for partial DMA transfers except
11909  *		for the initial transfer since the CDB size must
11910  *		remain constant.
11911  *
11912  *     Context: Kernel thread and may be called from software interrupt
11913  *		context as part of a sdrunout callback. This function may not
11914  *		block or call routines that block
11915  */
11916 
11917 
11918 int
11919 sd_setup_rw_pkt(struct sd_lun *un,
11920     struct scsi_pkt **pktpp, struct buf *bp, int flags,
11921     int (*callback)(caddr_t), caddr_t callback_arg,
11922     diskaddr_t lba, uint32_t blockcount)
11923 {
11924 	struct scsi_pkt *return_pktp;
11925 	union scsi_cdb *cdbp;
11926 	struct sd_cdbinfo *cp = NULL;
11927 	int i;
11928 
11929 	/*
11930 	 * See which size CDB to use, based upon the request.
11931 	 */
11932 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
11933 
11934 		/*
11935 		 * Check lba and block count against sd_cdbtab limits.
11936 		 * In the partial DMA case, we have to use the same size
11937 		 * CDB for all the transfers.  Check lba + blockcount
11938 		 * against the max LBA so we know that segment of the
11939 		 * transfer can use the CDB we select.
11940 		 */
11941 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
11942 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
11943 
11944 			/*
11945 			 * The command will fit into the CDB type
11946 			 * specified by sd_cdbtab[i].
11947 			 */
11948 			cp = sd_cdbtab + i;
11949 
11950 			/*
11951 			 * Call scsi_init_pkt so we can fill in the
11952 			 * CDB.
11953 			 */
11954 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
11955 			    bp, cp->sc_grpcode, un->un_status_len, 0,
11956 			    flags, callback, callback_arg);
11957 
11958 			if (return_pktp != NULL) {
11959 
11960 				/*
11961 				 * Return new value of pkt
11962 				 */
11963 				*pktpp = return_pktp;
11964 
11965 				/*
11966 				 * To be safe, zero the CDB insuring there is
11967 				 * no leftover data from a previous command.
11968 				 */
11969 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
11970 
11971 				/*
11972 				 * Handle partial DMA mapping
11973 				 */
11974 				if (return_pktp->pkt_resid != 0) {
11975 
11976 					/*
11977 					 * Not going to xfer as many blocks as
11978 					 * originally expected
11979 					 */
11980 					blockcount -=
11981 					    SD_BYTES2TGTBLOCKS(un,
11982 					    return_pktp->pkt_resid);
11983 				}
11984 
11985 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
11986 
11987 				/*
11988 				 * Set command byte based on the CDB
11989 				 * type we matched.
11990 				 */
11991 				cdbp->scc_cmd = cp->sc_grpmask |
11992 				    ((bp->b_flags & B_READ) ?
11993 				    SCMD_READ : SCMD_WRITE);
11994 
11995 				SD_FILL_SCSI1_LUN(un, return_pktp);
11996 
11997 				/*
11998 				 * Fill in LBA and length
11999 				 */
12000 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
12001 				    (cp->sc_grpcode == CDB_GROUP4) ||
12002 				    (cp->sc_grpcode == CDB_GROUP0) ||
12003 				    (cp->sc_grpcode == CDB_GROUP5));
12004 
12005 				if (cp->sc_grpcode == CDB_GROUP1) {
12006 					FORMG1ADDR(cdbp, lba);
12007 					FORMG1COUNT(cdbp, blockcount);
12008 					return (0);
12009 				} else if (cp->sc_grpcode == CDB_GROUP4) {
12010 					FORMG4LONGADDR(cdbp, lba);
12011 					FORMG4COUNT(cdbp, blockcount);
12012 					return (0);
12013 				} else if (cp->sc_grpcode == CDB_GROUP0) {
12014 					FORMG0ADDR(cdbp, lba);
12015 					FORMG0COUNT(cdbp, blockcount);
12016 					return (0);
12017 				} else if (cp->sc_grpcode == CDB_GROUP5) {
12018 					FORMG5ADDR(cdbp, lba);
12019 					FORMG5COUNT(cdbp, blockcount);
12020 					return (0);
12021 				}
12022 
12023 				/*
12024 				 * It should be impossible to not match one
12025 				 * of the CDB types above, so we should never
12026 				 * reach this point.  Set the CDB command byte
12027 				 * to test-unit-ready to avoid writing
12028 				 * to somewhere we don't intend.
12029 				 */
12030 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
12031 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
12032 			} else {
12033 				/*
12034 				 * Couldn't get scsi_pkt
12035 				 */
12036 				return (SD_PKT_ALLOC_FAILURE);
12037 			}
12038 		}
12039 	}
12040 
12041 	/*
12042 	 * None of the available CDB types were suitable.  This really
12043 	 * should never happen:  on a 64 bit system we support
12044 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
12045 	 * and on a 32 bit system we will refuse to bind to a device
12046 	 * larger than 2TB so addresses will never be larger than 32 bits.
12047 	 */
12048 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
12049 }
12050 
12051 #if defined(__i386) || defined(__amd64)
12052 /*
12053  *    Function: sd_setup_next_rw_pkt
12054  *
12055  * Description: Setup packet for partial DMA transfers, except for the
12056  * 		initial transfer.  sd_setup_rw_pkt should be used for
12057  *		the initial transfer.
12058  *
12059  *     Context: Kernel thread and may be called from interrupt context.
12060  */
12061 
12062 int
12063 sd_setup_next_rw_pkt(struct sd_lun *un,
12064     struct scsi_pkt *pktp, struct buf *bp,
12065     diskaddr_t lba, uint32_t blockcount)
12066 {
12067 	uchar_t com;
12068 	union scsi_cdb *cdbp;
12069 	uchar_t cdb_group_id;
12070 
12071 	ASSERT(pktp != NULL);
12072 	ASSERT(pktp->pkt_cdbp != NULL);
12073 
12074 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
12075 	com = cdbp->scc_cmd;
12076 	cdb_group_id = CDB_GROUPID(com);
12077 
12078 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
12079 	    (cdb_group_id == CDB_GROUPID_1) ||
12080 	    (cdb_group_id == CDB_GROUPID_4) ||
12081 	    (cdb_group_id == CDB_GROUPID_5));
12082 
12083 	/*
12084 	 * Move pkt to the next portion of the xfer.
12085 	 * func is NULL_FUNC so we do not have to release
12086 	 * the disk mutex here.
12087 	 */
12088 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
12089 	    NULL_FUNC, NULL) == pktp) {
12090 		/* Success.  Handle partial DMA */
12091 		if (pktp->pkt_resid != 0) {
12092 			blockcount -=
12093 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
12094 		}
12095 
12096 		cdbp->scc_cmd = com;
12097 		SD_FILL_SCSI1_LUN(un, pktp);
12098 		if (cdb_group_id == CDB_GROUPID_1) {
12099 			FORMG1ADDR(cdbp, lba);
12100 			FORMG1COUNT(cdbp, blockcount);
12101 			return (0);
12102 		} else if (cdb_group_id == CDB_GROUPID_4) {
12103 			FORMG4LONGADDR(cdbp, lba);
12104 			FORMG4COUNT(cdbp, blockcount);
12105 			return (0);
12106 		} else if (cdb_group_id == CDB_GROUPID_0) {
12107 			FORMG0ADDR(cdbp, lba);
12108 			FORMG0COUNT(cdbp, blockcount);
12109 			return (0);
12110 		} else if (cdb_group_id == CDB_GROUPID_5) {
12111 			FORMG5ADDR(cdbp, lba);
12112 			FORMG5COUNT(cdbp, blockcount);
12113 			return (0);
12114 		}
12115 
12116 		/* Unreachable */
12117 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
12118 	}
12119 
12120 	/*
12121 	 * Error setting up next portion of cmd transfer.
12122 	 * Something is definitely very wrong and this
12123 	 * should not happen.
12124 	 */
12125 	return (SD_PKT_ALLOC_FAILURE);
12126 }
12127 #endif /* defined(__i386) || defined(__amd64) */
12128 
12129 /*
12130  *    Function: sd_initpkt_for_uscsi
12131  *
12132  * Description: Allocate and initialize for transport a scsi_pkt struct,
12133  *		based upon the info specified in the given uscsi_cmd struct.
12134  *
12135  * Return Code: SD_PKT_ALLOC_SUCCESS
12136  *		SD_PKT_ALLOC_FAILURE
12137  *		SD_PKT_ALLOC_FAILURE_NO_DMA
12138  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
12139  *
12140  *     Context: Kernel thread and may be called from software interrupt context
12141  *		as part of a sdrunout callback. This function may not block or
12142  *		call routines that block
12143  */
12144 
12145 static int
12146 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
12147 {
12148 	struct uscsi_cmd *uscmd;
12149 	struct sd_xbuf	*xp;
12150 	struct scsi_pkt	*pktp;
12151 	struct sd_lun	*un;
12152 	uint32_t	flags = 0;
12153 
12154 	ASSERT(bp != NULL);
12155 	ASSERT(pktpp != NULL);
12156 	xp = SD_GET_XBUF(bp);
12157 	ASSERT(xp != NULL);
12158 	un = SD_GET_UN(bp);
12159 	ASSERT(un != NULL);
12160 	ASSERT(mutex_owned(SD_MUTEX(un)));
12161 
12162 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
12163 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
12164 	ASSERT(uscmd != NULL);
12165 
12166 	SD_TRACE(SD_LOG_IO_CORE, un,
12167 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
12168 
12169 	/*
12170 	 * Allocate the scsi_pkt for the command.
12171 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
12172 	 *	 during scsi_init_pkt time and will continue to use the
12173 	 *	 same path as long as the same scsi_pkt is used without
12174 	 *	 intervening scsi_dma_free(). Since uscsi command does
12175 	 *	 not call scsi_dmafree() before retry failed command, it
12176 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
12177 	 *	 set such that scsi_vhci can use other available path for
12178 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
12179 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
12180 	 */
12181 	pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
12182 	    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
12183 	    sizeof (struct scsi_arq_status), 0,
12184 	    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
12185 	    sdrunout, (caddr_t)un);
12186 
12187 	if (pktp == NULL) {
12188 		*pktpp = NULL;
12189 		/*
12190 		 * Set the driver state to RWAIT to indicate the driver
12191 		 * is waiting on resource allocations. The driver will not
12192 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
12193 		 */
12194 		New_state(un, SD_STATE_RWAIT);
12195 
12196 		SD_ERROR(SD_LOG_IO_CORE, un,
12197 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
12198 
12199 		if ((bp->b_flags & B_ERROR) != 0) {
12200 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
12201 		}
12202 		return (SD_PKT_ALLOC_FAILURE);
12203 	}
12204 
12205 	/*
12206 	 * We do not do DMA breakup for USCSI commands, so return failure
12207 	 * here if all the needed DMA resources were not allocated.
12208 	 */
12209 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
12210 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
12211 		scsi_destroy_pkt(pktp);
12212 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
12213 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
12214 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
12215 	}
12216 
12217 	/* Init the cdb from the given uscsi struct */
12218 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
12219 	    uscmd->uscsi_cdb[0], 0, 0, 0);
12220 
12221 	SD_FILL_SCSI1_LUN(un, pktp);
12222 
12223 	/*
12224 	 * Set up the optional USCSI flags. See the uscsi (7I) man page
12225 	 * for listing of the supported flags.
12226 	 */
12227 
12228 	if (uscmd->uscsi_flags & USCSI_SILENT) {
12229 		flags |= FLAG_SILENT;
12230 	}
12231 
12232 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
12233 		flags |= FLAG_DIAGNOSE;
12234 	}
12235 
12236 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
12237 		flags |= FLAG_ISOLATE;
12238 	}
12239 
12240 	if (un->un_f_is_fibre == FALSE) {
12241 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
12242 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
12243 		}
12244 	}
12245 
12246 	/*
12247 	 * Set the pkt flags here so we save time later.
12248 	 * Note: These flags are NOT in the uscsi man page!!!
12249 	 */
12250 	if (uscmd->uscsi_flags & USCSI_HEAD) {
12251 		flags |= FLAG_HEAD;
12252 	}
12253 
12254 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
12255 		flags |= FLAG_NOINTR;
12256 	}
12257 
12258 	/*
12259 	 * For tagged queueing, things get a bit complicated.
12260 	 * Check first for head of queue and last for ordered queue.
12261 	 * If neither head nor order, use the default driver tag flags.
12262 	 */
12263 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
12264 		if (uscmd->uscsi_flags & USCSI_HTAG) {
12265 			flags |= FLAG_HTAG;
12266 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
12267 			flags |= FLAG_OTAG;
12268 		} else {
12269 			flags |= un->un_tagflags & FLAG_TAGMASK;
12270 		}
12271 	}
12272 
12273 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
12274 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
12275 	}
12276 
12277 	pktp->pkt_flags = flags;
12278 
12279 	/* Copy the caller's CDB into the pkt... */
12280 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
12281 
12282 	if (uscmd->uscsi_timeout == 0) {
12283 		pktp->pkt_time = un->un_uscsi_timeout;
12284 	} else {
12285 		pktp->pkt_time = uscmd->uscsi_timeout;
12286 	}
12287 
12288 	/* need it later to identify USCSI request in sdintr */
12289 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
12290 
12291 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
12292 
12293 	pktp->pkt_private = bp;
12294 	pktp->pkt_comp = sdintr;
12295 	*pktpp = pktp;
12296 
12297 	SD_TRACE(SD_LOG_IO_CORE, un,
12298 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
12299 
12300 	return (SD_PKT_ALLOC_SUCCESS);
12301 }
12302 
12303 
12304 /*
12305  *    Function: sd_destroypkt_for_uscsi
12306  *
12307  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
12308  *		IOs.. Also saves relevant info into the associated uscsi_cmd
12309  *		struct.
12310  *
12311  *     Context: May be called under interrupt context
12312  */
12313 
12314 static void
12315 sd_destroypkt_for_uscsi(struct buf *bp)
12316 {
12317 	struct uscsi_cmd *uscmd;
12318 	struct sd_xbuf	*xp;
12319 	struct scsi_pkt	*pktp;
12320 	struct sd_lun	*un;
12321 
12322 	ASSERT(bp != NULL);
12323 	xp = SD_GET_XBUF(bp);
12324 	ASSERT(xp != NULL);
12325 	un = SD_GET_UN(bp);
12326 	ASSERT(un != NULL);
12327 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12328 	pktp = SD_GET_PKTP(bp);
12329 	ASSERT(pktp != NULL);
12330 
12331 	SD_TRACE(SD_LOG_IO_CORE, un,
12332 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
12333 
12334 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
12335 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
12336 	ASSERT(uscmd != NULL);
12337 
12338 	/* Save the status and the residual into the uscsi_cmd struct */
12339 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
12340 	uscmd->uscsi_resid  = bp->b_resid;
12341 
12342 	/*
12343 	 * If enabled, copy any saved sense data into the area specified
12344 	 * by the uscsi command.
12345 	 */
12346 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
12347 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
12348 		/*
12349 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
12350 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
12351 		 */
12352 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
12353 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
12354 		bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf, SENSE_LENGTH);
12355 	}
12356 
12357 	/* We are done with the scsi_pkt; free it now */
12358 	ASSERT(SD_GET_PKTP(bp) != NULL);
12359 	scsi_destroy_pkt(SD_GET_PKTP(bp));
12360 
12361 	SD_TRACE(SD_LOG_IO_CORE, un,
12362 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
12363 }
12364 
12365 
12366 /*
12367  *    Function: sd_bioclone_alloc
12368  *
12369  * Description: Allocate a buf(9S) and init it as per the given buf
12370  *		and the various arguments.  The associated sd_xbuf
12371  *		struct is (nearly) duplicated.  The struct buf *bp
12372  *		argument is saved in new_xp->xb_private.
12373  *
12374  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
12375  *		datalen - size of data area for the shadow bp
12376  *		blkno - starting LBA
12377  *		func - function pointer for b_iodone in the shadow buf. (May
12378  *			be NULL if none.)
12379  *
12380  * Return Code: Pointer to allocates buf(9S) struct
12381  *
12382  *     Context: Can sleep.
12383  */
12384 
12385 static struct buf *
12386 sd_bioclone_alloc(struct buf *bp, size_t datalen,
12387 	daddr_t blkno, int (*func)(struct buf *))
12388 {
12389 	struct	sd_lun	*un;
12390 	struct	sd_xbuf	*xp;
12391 	struct	sd_xbuf	*new_xp;
12392 	struct	buf	*new_bp;
12393 
12394 	ASSERT(bp != NULL);
12395 	xp = SD_GET_XBUF(bp);
12396 	ASSERT(xp != NULL);
12397 	un = SD_GET_UN(bp);
12398 	ASSERT(un != NULL);
12399 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12400 
12401 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
12402 	    NULL, KM_SLEEP);
12403 
12404 	new_bp->b_lblkno	= blkno;
12405 
12406 	/*
12407 	 * Allocate an xbuf for the shadow bp and copy the contents of the
12408 	 * original xbuf into it.
12409 	 */
12410 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
12411 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
12412 
12413 	/*
12414 	 * The given bp is automatically saved in the xb_private member
12415 	 * of the new xbuf.  Callers are allowed to depend on this.
12416 	 */
12417 	new_xp->xb_private = bp;
12418 
12419 	new_bp->b_private  = new_xp;
12420 
12421 	return (new_bp);
12422 }
12423 
12424 /*
12425  *    Function: sd_shadow_buf_alloc
12426  *
12427  * Description: Allocate a buf(9S) and init it as per the given buf
12428  *		and the various arguments.  The associated sd_xbuf
12429  *		struct is (nearly) duplicated.  The struct buf *bp
12430  *		argument is saved in new_xp->xb_private.
12431  *
12432  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
12433  *		datalen - size of data area for the shadow bp
12434  *		bflags - B_READ or B_WRITE (pseudo flag)
12435  *		blkno - starting LBA
12436  *		func - function pointer for b_iodone in the shadow buf. (May
12437  *			be NULL if none.)
12438  *
12439  * Return Code: Pointer to allocates buf(9S) struct
12440  *
12441  *     Context: Can sleep.
12442  */
12443 
12444 static struct buf *
12445 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
12446 	daddr_t blkno, int (*func)(struct buf *))
12447 {
12448 	struct	sd_lun	*un;
12449 	struct	sd_xbuf	*xp;
12450 	struct	sd_xbuf	*new_xp;
12451 	struct	buf	*new_bp;
12452 
12453 	ASSERT(bp != NULL);
12454 	xp = SD_GET_XBUF(bp);
12455 	ASSERT(xp != NULL);
12456 	un = SD_GET_UN(bp);
12457 	ASSERT(un != NULL);
12458 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12459 
12460 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
12461 		bp_mapin(bp);
12462 	}
12463 
12464 	bflags &= (B_READ | B_WRITE);
12465 #if defined(__i386) || defined(__amd64)
12466 	new_bp = getrbuf(KM_SLEEP);
12467 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
12468 	new_bp->b_bcount = datalen;
12469 	new_bp->b_flags = bflags |
12470 	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
12471 #else
12472 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
12473 	    datalen, bflags, SLEEP_FUNC, NULL);
12474 #endif
12475 	new_bp->av_forw	= NULL;
12476 	new_bp->av_back	= NULL;
12477 	new_bp->b_dev	= bp->b_dev;
12478 	new_bp->b_blkno	= blkno;
12479 	new_bp->b_iodone = func;
12480 	new_bp->b_edev	= bp->b_edev;
12481 	new_bp->b_resid	= 0;
12482 
12483 	/* We need to preserve the B_FAILFAST flag */
12484 	if (bp->b_flags & B_FAILFAST) {
12485 		new_bp->b_flags |= B_FAILFAST;
12486 	}
12487 
12488 	/*
12489 	 * Allocate an xbuf for the shadow bp and copy the contents of the
12490 	 * original xbuf into it.
12491 	 */
12492 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
12493 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
12494 
12495 	/* Need later to copy data between the shadow buf & original buf! */
12496 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
12497 
12498 	/*
12499 	 * The given bp is automatically saved in the xb_private member
12500 	 * of the new xbuf.  Callers are allowed to depend on this.
12501 	 */
12502 	new_xp->xb_private = bp;
12503 
12504 	new_bp->b_private  = new_xp;
12505 
12506 	return (new_bp);
12507 }
12508 
12509 /*
12510  *    Function: sd_bioclone_free
12511  *
12512  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
12513  *		in the larger than partition operation.
12514  *
12515  *     Context: May be called under interrupt context
12516  */
12517 
12518 static void
12519 sd_bioclone_free(struct buf *bp)
12520 {
12521 	struct sd_xbuf	*xp;
12522 
12523 	ASSERT(bp != NULL);
12524 	xp = SD_GET_XBUF(bp);
12525 	ASSERT(xp != NULL);
12526 
12527 	/*
12528 	 * Call bp_mapout() before freeing the buf,  in case a lower
12529 	 * layer or HBA  had done a bp_mapin().  we must do this here
12530 	 * as we are the "originator" of the shadow buf.
12531 	 */
12532 	bp_mapout(bp);
12533 
12534 	/*
12535 	 * Null out b_iodone before freeing the bp, to ensure that the driver
12536 	 * never gets confused by a stale value in this field. (Just a little
12537 	 * extra defensiveness here.)
12538 	 */
12539 	bp->b_iodone = NULL;
12540 
12541 	freerbuf(bp);
12542 
12543 	kmem_free(xp, sizeof (struct sd_xbuf));
12544 }
12545 
12546 /*
12547  *    Function: sd_shadow_buf_free
12548  *
12549  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
12550  *
12551  *     Context: May be called under interrupt context
12552  */
12553 
12554 static void
12555 sd_shadow_buf_free(struct buf *bp)
12556 {
12557 	struct sd_xbuf	*xp;
12558 
12559 	ASSERT(bp != NULL);
12560 	xp = SD_GET_XBUF(bp);
12561 	ASSERT(xp != NULL);
12562 
12563 #if defined(__sparc)
12564 	/*
12565 	 * Call bp_mapout() before freeing the buf,  in case a lower
12566 	 * layer or HBA  had done a bp_mapin().  we must do this here
12567 	 * as we are the "originator" of the shadow buf.
12568 	 */
12569 	bp_mapout(bp);
12570 #endif
12571 
12572 	/*
12573 	 * Null out b_iodone before freeing the bp, to ensure that the driver
12574 	 * never gets confused by a stale value in this field. (Just a little
12575 	 * extra defensiveness here.)
12576 	 */
12577 	bp->b_iodone = NULL;
12578 
12579 #if defined(__i386) || defined(__amd64)
12580 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
12581 	freerbuf(bp);
12582 #else
12583 	scsi_free_consistent_buf(bp);
12584 #endif
12585 
12586 	kmem_free(xp, sizeof (struct sd_xbuf));
12587 }
12588 
12589 
12590 /*
12591  *    Function: sd_print_transport_rejected_message
12592  *
12593  * Description: This implements the ludicrously complex rules for printing
12594  *		a "transport rejected" message.  This is to address the
12595  *		specific problem of having a flood of this error message
12596  *		produced when a failover occurs.
12597  *
12598  *     Context: Any.
12599  */
12600 
12601 static void
12602 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
12603 	int code)
12604 {
12605 	ASSERT(un != NULL);
12606 	ASSERT(mutex_owned(SD_MUTEX(un)));
12607 	ASSERT(xp != NULL);
12608 
12609 	/*
12610 	 * Print the "transport rejected" message under the following
12611 	 * conditions:
12612 	 *
12613 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
12614 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
12615 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
12616 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
12617 	 *   scsi_transport(9F) (which indicates that the target might have
12618 	 *   gone off-line).  This uses the un->un_tran_fatal_count
12619 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
12620 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
12621 	 *   from scsi_transport().
12622 	 *
12623 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
12624 	 * the preceeding cases in order for the message to be printed.
12625 	 */
12626 	if ((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) {
12627 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
12628 		    (code != TRAN_FATAL_ERROR) ||
12629 		    (un->un_tran_fatal_count == 1)) {
12630 			switch (code) {
12631 			case TRAN_BADPKT:
12632 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12633 				    "transport rejected bad packet\n");
12634 				break;
12635 			case TRAN_FATAL_ERROR:
12636 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12637 				    "transport rejected fatal error\n");
12638 				break;
12639 			default:
12640 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12641 				    "transport rejected (%d)\n", code);
12642 				break;
12643 			}
12644 		}
12645 	}
12646 }
12647 
12648 
12649 /*
12650  *    Function: sd_add_buf_to_waitq
12651  *
12652  * Description: Add the given buf(9S) struct to the wait queue for the
12653  *		instance.  If sorting is enabled, then the buf is added
12654  *		to the queue via an elevator sort algorithm (a la
12655  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
12656  *		If sorting is not enabled, then the buf is just added
12657  *		to the end of the wait queue.
12658  *
12659  * Return Code: void
12660  *
12661  *     Context: Does not sleep/block, therefore technically can be called
12662  *		from any context.  However if sorting is enabled then the
12663  *		execution time is indeterminate, and may take long if
12664  *		the wait queue grows large.
12665  */
12666 
12667 static void
12668 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
12669 {
12670 	struct buf *ap;
12671 
12672 	ASSERT(bp != NULL);
12673 	ASSERT(un != NULL);
12674 	ASSERT(mutex_owned(SD_MUTEX(un)));
12675 
12676 	/* If the queue is empty, add the buf as the only entry & return. */
12677 	if (un->un_waitq_headp == NULL) {
12678 		ASSERT(un->un_waitq_tailp == NULL);
12679 		un->un_waitq_headp = un->un_waitq_tailp = bp;
12680 		bp->av_forw = NULL;
12681 		return;
12682 	}
12683 
12684 	ASSERT(un->un_waitq_tailp != NULL);
12685 
12686 	/*
12687 	 * If sorting is disabled, just add the buf to the tail end of
12688 	 * the wait queue and return.
12689 	 */
12690 	if (un->un_f_disksort_disabled) {
12691 		un->un_waitq_tailp->av_forw = bp;
12692 		un->un_waitq_tailp = bp;
12693 		bp->av_forw = NULL;
12694 		return;
12695 	}
12696 
12697 	/*
12698 	 * Sort thru the list of requests currently on the wait queue
12699 	 * and add the new buf request at the appropriate position.
12700 	 *
12701 	 * The un->un_waitq_headp is an activity chain pointer on which
12702 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
12703 	 * first queue holds those requests which are positioned after
12704 	 * the current SD_GET_BLKNO() (in the first request); the second holds
12705 	 * requests which came in after their SD_GET_BLKNO() number was passed.
12706 	 * Thus we implement a one way scan, retracting after reaching
12707 	 * the end of the drive to the first request on the second
12708 	 * queue, at which time it becomes the first queue.
12709 	 * A one-way scan is natural because of the way UNIX read-ahead
12710 	 * blocks are allocated.
12711 	 *
12712 	 * If we lie after the first request, then we must locate the
12713 	 * second request list and add ourselves to it.
12714 	 */
12715 	ap = un->un_waitq_headp;
12716 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
12717 		while (ap->av_forw != NULL) {
12718 			/*
12719 			 * Look for an "inversion" in the (normally
12720 			 * ascending) block numbers. This indicates
12721 			 * the start of the second request list.
12722 			 */
12723 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
12724 				/*
12725 				 * Search the second request list for the
12726 				 * first request at a larger block number.
12727 				 * We go before that; however if there is
12728 				 * no such request, we go at the end.
12729 				 */
12730 				do {
12731 					if (SD_GET_BLKNO(bp) <
12732 					    SD_GET_BLKNO(ap->av_forw)) {
12733 						goto insert;
12734 					}
12735 					ap = ap->av_forw;
12736 				} while (ap->av_forw != NULL);
12737 				goto insert;		/* after last */
12738 			}
12739 			ap = ap->av_forw;
12740 		}
12741 
12742 		/*
12743 		 * No inversions... we will go after the last, and
12744 		 * be the first request in the second request list.
12745 		 */
12746 		goto insert;
12747 	}
12748 
12749 	/*
12750 	 * Request is at/after the current request...
12751 	 * sort in the first request list.
12752 	 */
12753 	while (ap->av_forw != NULL) {
12754 		/*
12755 		 * We want to go after the current request (1) if
12756 		 * there is an inversion after it (i.e. it is the end
12757 		 * of the first request list), or (2) if the next
12758 		 * request is a larger block no. than our request.
12759 		 */
12760 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
12761 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
12762 			goto insert;
12763 		}
12764 		ap = ap->av_forw;
12765 	}
12766 
12767 	/*
12768 	 * Neither a second list nor a larger request, therefore
12769 	 * we go at the end of the first list (which is the same
12770 	 * as the end of the whole schebang).
12771 	 */
12772 insert:
12773 	bp->av_forw = ap->av_forw;
12774 	ap->av_forw = bp;
12775 
12776 	/*
12777 	 * If we inserted onto the tail end of the waitq, make sure the
12778 	 * tail pointer is updated.
12779 	 */
12780 	if (ap == un->un_waitq_tailp) {
12781 		un->un_waitq_tailp = bp;
12782 	}
12783 }
12784 
12785 
12786 /*
12787  *    Function: sd_start_cmds
12788  *
12789  * Description: Remove and transport cmds from the driver queues.
12790  *
12791  *   Arguments: un - pointer to the unit (soft state) struct for the target.
12792  *
12793  *		immed_bp - ptr to a buf to be transported immediately. Only
12794  *		the immed_bp is transported; bufs on the waitq are not
12795  *		processed and the un_retry_bp is not checked.  If immed_bp is
12796  *		NULL, then normal queue processing is performed.
12797  *
12798  *     Context: May be called from kernel thread context, interrupt context,
12799  *		or runout callback context. This function may not block or
12800  *		call routines that block.
12801  */
12802 
12803 static void
12804 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
12805 {
12806 	struct	sd_xbuf	*xp;
12807 	struct	buf	*bp;
12808 	void	(*statp)(kstat_io_t *);
12809 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12810 	void	(*saved_statp)(kstat_io_t *);
12811 #endif
12812 	int	rval;
12813 
12814 	ASSERT(un != NULL);
12815 	ASSERT(mutex_owned(SD_MUTEX(un)));
12816 	ASSERT(un->un_ncmds_in_transport >= 0);
12817 	ASSERT(un->un_throttle >= 0);
12818 
12819 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
12820 
12821 	do {
12822 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12823 		saved_statp = NULL;
12824 #endif
12825 
12826 		/*
12827 		 * If we are syncing or dumping, fail the command to
12828 		 * avoid recursively calling back into scsi_transport().
12829 		 * The dump I/O itself uses a separate code path so this
12830 		 * only prevents non-dump I/O from being sent while dumping.
12831 		 * File system sync takes place before dumping begins.
12832 		 * During panic, filesystem I/O is allowed provided
12833 		 * un_in_callback is <= 1.  This is to prevent recursion
12834 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
12835 		 * sd_start_cmds and so on.  See panic.c for more information
12836 		 * about the states the system can be in during panic.
12837 		 */
12838 		if ((un->un_state == SD_STATE_DUMPING) ||
12839 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
12840 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12841 			    "sd_start_cmds: panicking\n");
12842 			goto exit;
12843 		}
12844 
12845 		if ((bp = immed_bp) != NULL) {
12846 			/*
12847 			 * We have a bp that must be transported immediately.
12848 			 * It's OK to transport the immed_bp here without doing
12849 			 * the throttle limit check because the immed_bp is
12850 			 * always used in a retry/recovery case. This means
12851 			 * that we know we are not at the throttle limit by
12852 			 * virtue of the fact that to get here we must have
12853 			 * already gotten a command back via sdintr(). This also
12854 			 * relies on (1) the command on un_retry_bp preventing
12855 			 * further commands from the waitq from being issued;
12856 			 * and (2) the code in sd_retry_command checking the
12857 			 * throttle limit before issuing a delayed or immediate
12858 			 * retry. This holds even if the throttle limit is
12859 			 * currently ratcheted down from its maximum value.
12860 			 */
12861 			statp = kstat_runq_enter;
12862 			if (bp == un->un_retry_bp) {
12863 				ASSERT((un->un_retry_statp == NULL) ||
12864 				    (un->un_retry_statp == kstat_waitq_enter) ||
12865 				    (un->un_retry_statp ==
12866 				    kstat_runq_back_to_waitq));
12867 				/*
12868 				 * If the waitq kstat was incremented when
12869 				 * sd_set_retry_bp() queued this bp for a retry,
12870 				 * then we must set up statp so that the waitq
12871 				 * count will get decremented correctly below.
12872 				 * Also we must clear un->un_retry_statp to
12873 				 * ensure that we do not act on a stale value
12874 				 * in this field.
12875 				 */
12876 				if ((un->un_retry_statp == kstat_waitq_enter) ||
12877 				    (un->un_retry_statp ==
12878 				    kstat_runq_back_to_waitq)) {
12879 					statp = kstat_waitq_to_runq;
12880 				}
12881 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12882 				saved_statp = un->un_retry_statp;
12883 #endif
12884 				un->un_retry_statp = NULL;
12885 
12886 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
12887 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
12888 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
12889 				    un, un->un_retry_bp, un->un_throttle,
12890 				    un->un_ncmds_in_transport);
12891 			} else {
12892 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
12893 				    "processing priority bp:0x%p\n", bp);
12894 			}
12895 
12896 		} else if ((bp = un->un_waitq_headp) != NULL) {
12897 			/*
12898 			 * A command on the waitq is ready to go, but do not
12899 			 * send it if:
12900 			 *
12901 			 * (1) the throttle limit has been reached, or
12902 			 * (2) a retry is pending, or
12903 			 * (3) a START_STOP_UNIT callback pending, or
12904 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
12905 			 *	command is pending.
12906 			 *
12907 			 * For all of these conditions, IO processing will
12908 			 * restart after the condition is cleared.
12909 			 */
12910 			if (un->un_ncmds_in_transport >= un->un_throttle) {
12911 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12912 				    "sd_start_cmds: exiting, "
12913 				    "throttle limit reached!\n");
12914 				goto exit;
12915 			}
12916 			if (un->un_retry_bp != NULL) {
12917 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12918 				    "sd_start_cmds: exiting, retry pending!\n");
12919 				goto exit;
12920 			}
12921 			if (un->un_startstop_timeid != NULL) {
12922 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12923 				    "sd_start_cmds: exiting, "
12924 				    "START_STOP pending!\n");
12925 				goto exit;
12926 			}
12927 			if (un->un_direct_priority_timeid != NULL) {
12928 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
12929 				    "sd_start_cmds: exiting, "
12930 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
12931 				goto exit;
12932 			}
12933 
12934 			/* Dequeue the command */
12935 			un->un_waitq_headp = bp->av_forw;
12936 			if (un->un_waitq_headp == NULL) {
12937 				un->un_waitq_tailp = NULL;
12938 			}
12939 			bp->av_forw = NULL;
12940 			statp = kstat_waitq_to_runq;
12941 			SD_TRACE(SD_LOG_IO_CORE, un,
12942 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
12943 
12944 		} else {
12945 			/* No work to do so bail out now */
12946 			SD_TRACE(SD_LOG_IO_CORE, un,
12947 			    "sd_start_cmds: no more work, exiting!\n");
12948 			goto exit;
12949 		}
12950 
12951 		/*
12952 		 * Reset the state to normal. This is the mechanism by which
12953 		 * the state transitions from either SD_STATE_RWAIT or
12954 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
12955 		 * If state is SD_STATE_PM_CHANGING then this command is
12956 		 * part of the device power control and the state must
12957 		 * not be put back to normal. Doing so would would
12958 		 * allow new commands to proceed when they shouldn't,
12959 		 * the device may be going off.
12960 		 */
12961 		if ((un->un_state != SD_STATE_SUSPENDED) &&
12962 		    (un->un_state != SD_STATE_PM_CHANGING)) {
12963 			New_state(un, SD_STATE_NORMAL);
12964 		}
12965 
12966 		xp = SD_GET_XBUF(bp);
12967 		ASSERT(xp != NULL);
12968 
12969 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
12970 		/*
12971 		 * Allocate the scsi_pkt if we need one, or attach DMA
12972 		 * resources if we have a scsi_pkt that needs them. The
12973 		 * latter should only occur for commands that are being
12974 		 * retried.
12975 		 */
12976 		if ((xp->xb_pktp == NULL) ||
12977 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
12978 #else
12979 		if (xp->xb_pktp == NULL) {
12980 #endif
12981 			/*
12982 			 * There is no scsi_pkt allocated for this buf. Call
12983 			 * the initpkt function to allocate & init one.
12984 			 *
12985 			 * The scsi_init_pkt runout callback functionality is
12986 			 * implemented as follows:
12987 			 *
12988 			 * 1) The initpkt function always calls
12989 			 *    scsi_init_pkt(9F) with sdrunout specified as the
12990 			 *    callback routine.
12991 			 * 2) A successful packet allocation is initialized and
12992 			 *    the I/O is transported.
12993 			 * 3) The I/O associated with an allocation resource
12994 			 *    failure is left on its queue to be retried via
12995 			 *    runout or the next I/O.
12996 			 * 4) The I/O associated with a DMA error is removed
12997 			 *    from the queue and failed with EIO. Processing of
12998 			 *    the transport queues is also halted to be
12999 			 *    restarted via runout or the next I/O.
13000 			 * 5) The I/O associated with a CDB size or packet
13001 			 *    size error is removed from the queue and failed
13002 			 *    with EIO. Processing of the transport queues is
13003 			 *    continued.
13004 			 *
13005 			 * Note: there is no interface for canceling a runout
13006 			 * callback. To prevent the driver from detaching or
13007 			 * suspending while a runout is pending the driver
13008 			 * state is set to SD_STATE_RWAIT
13009 			 *
13010 			 * Note: using the scsi_init_pkt callback facility can
13011 			 * result in an I/O request persisting at the head of
13012 			 * the list which cannot be satisfied even after
13013 			 * multiple retries. In the future the driver may
13014 			 * implement some kind of maximum runout count before
13015 			 * failing an I/O.
13016 			 *
13017 			 * Note: the use of funcp below may seem superfluous,
13018 			 * but it helps warlock figure out the correct
13019 			 * initpkt function calls (see [s]sd.wlcmd).
13020 			 */
13021 			struct scsi_pkt	*pktp;
13022 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
13023 
13024 			ASSERT(bp != un->un_rqs_bp);
13025 
13026 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
13027 			switch ((*funcp)(bp, &pktp)) {
13028 			case  SD_PKT_ALLOC_SUCCESS:
13029 				xp->xb_pktp = pktp;
13030 				SD_TRACE(SD_LOG_IO_CORE, un,
13031 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
13032 				    pktp);
13033 				goto got_pkt;
13034 
13035 			case SD_PKT_ALLOC_FAILURE:
13036 				/*
13037 				 * Temporary (hopefully) resource depletion.
13038 				 * Since retries and RQS commands always have a
13039 				 * scsi_pkt allocated, these cases should never
13040 				 * get here. So the only cases this needs to
13041 				 * handle is a bp from the waitq (which we put
13042 				 * back onto the waitq for sdrunout), or a bp
13043 				 * sent as an immed_bp (which we just fail).
13044 				 */
13045 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13046 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
13047 
13048 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13049 
13050 				if (bp == immed_bp) {
13051 					/*
13052 					 * If SD_XB_DMA_FREED is clear, then
13053 					 * this is a failure to allocate a
13054 					 * scsi_pkt, and we must fail the
13055 					 * command.
13056 					 */
13057 					if ((xp->xb_pkt_flags &
13058 					    SD_XB_DMA_FREED) == 0) {
13059 						break;
13060 					}
13061 
13062 					/*
13063 					 * If this immediate command is NOT our
13064 					 * un_retry_bp, then we must fail it.
13065 					 */
13066 					if (bp != un->un_retry_bp) {
13067 						break;
13068 					}
13069 
13070 					/*
13071 					 * We get here if this cmd is our
13072 					 * un_retry_bp that was DMAFREED, but
13073 					 * scsi_init_pkt() failed to reallocate
13074 					 * DMA resources when we attempted to
13075 					 * retry it. This can happen when an
13076 					 * mpxio failover is in progress, but
13077 					 * we don't want to just fail the
13078 					 * command in this case.
13079 					 *
13080 					 * Use timeout(9F) to restart it after
13081 					 * a 100ms delay.  We don't want to
13082 					 * let sdrunout() restart it, because
13083 					 * sdrunout() is just supposed to start
13084 					 * commands that are sitting on the
13085 					 * wait queue.  The un_retry_bp stays
13086 					 * set until the command completes, but
13087 					 * sdrunout can be called many times
13088 					 * before that happens.  Since sdrunout
13089 					 * cannot tell if the un_retry_bp is
13090 					 * already in the transport, it could
13091 					 * end up calling scsi_transport() for
13092 					 * the un_retry_bp multiple times.
13093 					 *
13094 					 * Also: don't schedule the callback
13095 					 * if some other callback is already
13096 					 * pending.
13097 					 */
13098 					if (un->un_retry_statp == NULL) {
13099 						/*
13100 						 * restore the kstat pointer to
13101 						 * keep kstat counts coherent
13102 						 * when we do retry the command.
13103 						 */
13104 						un->un_retry_statp =
13105 						    saved_statp;
13106 					}
13107 
13108 					if ((un->un_startstop_timeid == NULL) &&
13109 					    (un->un_retry_timeid == NULL) &&
13110 					    (un->un_direct_priority_timeid ==
13111 					    NULL)) {
13112 
13113 						un->un_retry_timeid =
13114 						    timeout(
13115 						    sd_start_retry_command,
13116 						    un, SD_RESTART_TIMEOUT);
13117 					}
13118 					goto exit;
13119 				}
13120 
13121 #else
13122 				if (bp == immed_bp) {
13123 					break;	/* Just fail the command */
13124 				}
13125 #endif
13126 
13127 				/* Add the buf back to the head of the waitq */
13128 				bp->av_forw = un->un_waitq_headp;
13129 				un->un_waitq_headp = bp;
13130 				if (un->un_waitq_tailp == NULL) {
13131 					un->un_waitq_tailp = bp;
13132 				}
13133 				goto exit;
13134 
13135 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
13136 				/*
13137 				 * HBA DMA resource failure. Fail the command
13138 				 * and continue processing of the queues.
13139 				 */
13140 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13141 				    "sd_start_cmds: "
13142 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
13143 				break;
13144 
13145 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
13146 				/*
13147 				 * Note:x86: Partial DMA mapping not supported
13148 				 * for USCSI commands, and all the needed DMA
13149 				 * resources were not allocated.
13150 				 */
13151 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13152 				    "sd_start_cmds: "
13153 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
13154 				break;
13155 
13156 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
13157 				/*
13158 				 * Note:x86: Request cannot fit into CDB based
13159 				 * on lba and len.
13160 				 */
13161 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13162 				    "sd_start_cmds: "
13163 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
13164 				break;
13165 
13166 			default:
13167 				/* Should NEVER get here! */
13168 				panic("scsi_initpkt error");
13169 				/*NOTREACHED*/
13170 			}
13171 
13172 			/*
13173 			 * Fatal error in allocating a scsi_pkt for this buf.
13174 			 * Update kstats & return the buf with an error code.
13175 			 * We must use sd_return_failed_command_no_restart() to
13176 			 * avoid a recursive call back into sd_start_cmds().
13177 			 * However this also means that we must keep processing
13178 			 * the waitq here in order to avoid stalling.
13179 			 */
13180 			if (statp == kstat_waitq_to_runq) {
13181 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
13182 			}
13183 			sd_return_failed_command_no_restart(un, bp, EIO);
13184 			if (bp == immed_bp) {
13185 				/* immed_bp is gone by now, so clear this */
13186 				immed_bp = NULL;
13187 			}
13188 			continue;
13189 		}
13190 got_pkt:
13191 		if (bp == immed_bp) {
13192 			/* goto the head of the class.... */
13193 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
13194 		}
13195 
13196 		un->un_ncmds_in_transport++;
13197 		SD_UPDATE_KSTATS(un, statp, bp);
13198 
13199 		/*
13200 		 * Call scsi_transport() to send the command to the target.
13201 		 * According to SCSA architecture, we must drop the mutex here
13202 		 * before calling scsi_transport() in order to avoid deadlock.
13203 		 * Note that the scsi_pkt's completion routine can be executed
13204 		 * (from interrupt context) even before the call to
13205 		 * scsi_transport() returns.
13206 		 */
13207 		SD_TRACE(SD_LOG_IO_CORE, un,
13208 		    "sd_start_cmds: calling scsi_transport()\n");
13209 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
13210 
13211 		mutex_exit(SD_MUTEX(un));
13212 		rval = scsi_transport(xp->xb_pktp);
13213 		mutex_enter(SD_MUTEX(un));
13214 
13215 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13216 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
13217 
13218 		switch (rval) {
13219 		case TRAN_ACCEPT:
13220 			/* Clear this with every pkt accepted by the HBA */
13221 			un->un_tran_fatal_count = 0;
13222 			break;	/* Success; try the next cmd (if any) */
13223 
13224 		case TRAN_BUSY:
13225 			un->un_ncmds_in_transport--;
13226 			ASSERT(un->un_ncmds_in_transport >= 0);
13227 
13228 			/*
13229 			 * Don't retry request sense, the sense data
13230 			 * is lost when another request is sent.
13231 			 * Free up the rqs buf and retry
13232 			 * the original failed cmd.  Update kstat.
13233 			 */
13234 			if (bp == un->un_rqs_bp) {
13235 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
13236 				bp = sd_mark_rqs_idle(un, xp);
13237 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
13238 				    NULL, NULL, EIO, SD_BSY_TIMEOUT / 500,
13239 				    kstat_waitq_enter);
13240 				goto exit;
13241 			}
13242 
13243 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13244 			/*
13245 			 * Free the DMA resources for the  scsi_pkt. This will
13246 			 * allow mpxio to select another path the next time
13247 			 * we call scsi_transport() with this scsi_pkt.
13248 			 * See sdintr() for the rationalization behind this.
13249 			 */
13250 			if ((un->un_f_is_fibre == TRUE) &&
13251 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
13252 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
13253 				scsi_dmafree(xp->xb_pktp);
13254 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
13255 			}
13256 #endif
13257 
13258 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
13259 				/*
13260 				 * Commands that are SD_PATH_DIRECT_PRIORITY
13261 				 * are for error recovery situations. These do
13262 				 * not use the normal command waitq, so if they
13263 				 * get a TRAN_BUSY we cannot put them back onto
13264 				 * the waitq for later retry. One possible
13265 				 * problem is that there could already be some
13266 				 * other command on un_retry_bp that is waiting
13267 				 * for this one to complete, so we would be
13268 				 * deadlocked if we put this command back onto
13269 				 * the waitq for later retry (since un_retry_bp
13270 				 * must complete before the driver gets back to
13271 				 * commands on the waitq).
13272 				 *
13273 				 * To avoid deadlock we must schedule a callback
13274 				 * that will restart this command after a set
13275 				 * interval.  This should keep retrying for as
13276 				 * long as the underlying transport keeps
13277 				 * returning TRAN_BUSY (just like for other
13278 				 * commands).  Use the same timeout interval as
13279 				 * for the ordinary TRAN_BUSY retry.
13280 				 */
13281 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13282 				    "sd_start_cmds: scsi_transport() returned "
13283 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
13284 
13285 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
13286 				un->un_direct_priority_timeid =
13287 				    timeout(sd_start_direct_priority_command,
13288 				    bp, SD_BSY_TIMEOUT / 500);
13289 
13290 				goto exit;
13291 			}
13292 
13293 			/*
13294 			 * For TRAN_BUSY, we want to reduce the throttle value,
13295 			 * unless we are retrying a command.
13296 			 */
13297 			if (bp != un->un_retry_bp) {
13298 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
13299 			}
13300 
13301 			/*
13302 			 * Set up the bp to be tried again 10 ms later.
13303 			 * Note:x86: Is there a timeout value in the sd_lun
13304 			 * for this condition?
13305 			 */
13306 			sd_set_retry_bp(un, bp, SD_BSY_TIMEOUT / 500,
13307 			    kstat_runq_back_to_waitq);
13308 			goto exit;
13309 
13310 		case TRAN_FATAL_ERROR:
13311 			un->un_tran_fatal_count++;
13312 			/* FALLTHRU */
13313 
13314 		case TRAN_BADPKT:
13315 		default:
13316 			un->un_ncmds_in_transport--;
13317 			ASSERT(un->un_ncmds_in_transport >= 0);
13318 
13319 			/*
13320 			 * If this is our REQUEST SENSE command with a
13321 			 * transport error, we must get back the pointers
13322 			 * to the original buf, and mark the REQUEST
13323 			 * SENSE command as "available".
13324 			 */
13325 			if (bp == un->un_rqs_bp) {
13326 				bp = sd_mark_rqs_idle(un, xp);
13327 				xp = SD_GET_XBUF(bp);
13328 			} else {
13329 				/*
13330 				 * Legacy behavior: do not update transport
13331 				 * error count for request sense commands.
13332 				 */
13333 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
13334 			}
13335 
13336 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
13337 			sd_print_transport_rejected_message(un, xp, rval);
13338 
13339 			/*
13340 			 * We must use sd_return_failed_command_no_restart() to
13341 			 * avoid a recursive call back into sd_start_cmds().
13342 			 * However this also means that we must keep processing
13343 			 * the waitq here in order to avoid stalling.
13344 			 */
13345 			sd_return_failed_command_no_restart(un, bp, EIO);
13346 
13347 			/*
13348 			 * Notify any threads waiting in sd_ddi_suspend() that
13349 			 * a command completion has occurred.
13350 			 */
13351 			if (un->un_state == SD_STATE_SUSPENDED) {
13352 				cv_broadcast(&un->un_disk_busy_cv);
13353 			}
13354 
13355 			if (bp == immed_bp) {
13356 				/* immed_bp is gone by now, so clear this */
13357 				immed_bp = NULL;
13358 			}
13359 			break;
13360 		}
13361 
13362 	} while (immed_bp == NULL);
13363 
13364 exit:
13365 	ASSERT(mutex_owned(SD_MUTEX(un)));
13366 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
13367 }
13368 
13369 
13370 /*
13371  *    Function: sd_return_command
13372  *
13373  * Description: Returns a command to its originator (with or without an
13374  *		error).  Also starts commands waiting to be transported
13375  *		to the target.
13376  *
13377  *     Context: May be called from interrupt, kernel, or timeout context
13378  */
13379 
13380 static void
13381 sd_return_command(struct sd_lun *un, struct buf *bp)
13382 {
13383 	struct sd_xbuf *xp;
13384 #if defined(__i386) || defined(__amd64)
13385 	struct scsi_pkt *pktp;
13386 #endif
13387 
13388 	ASSERT(bp != NULL);
13389 	ASSERT(un != NULL);
13390 	ASSERT(mutex_owned(SD_MUTEX(un)));
13391 	ASSERT(bp != un->un_rqs_bp);
13392 	xp = SD_GET_XBUF(bp);
13393 	ASSERT(xp != NULL);
13394 
13395 #if defined(__i386) || defined(__amd64)
13396 	pktp = SD_GET_PKTP(bp);
13397 #endif
13398 
13399 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
13400 
13401 #if defined(__i386) || defined(__amd64)
13402 	/*
13403 	 * Note:x86: check for the "sdrestart failed" case.
13404 	 */
13405 	if (((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
13406 	    (geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
13407 	    (xp->xb_pktp->pkt_resid == 0)) {
13408 
13409 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
13410 			/*
13411 			 * Successfully set up next portion of cmd
13412 			 * transfer, try sending it
13413 			 */
13414 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
13415 			    NULL, NULL, 0, (clock_t)0, NULL);
13416 			sd_start_cmds(un, NULL);
13417 			return;	/* Note:x86: need a return here? */
13418 		}
13419 	}
13420 #endif
13421 
13422 	/*
13423 	 * If this is the failfast bp, clear it from un_failfast_bp. This
13424 	 * can happen if upon being re-tried the failfast bp either
13425 	 * succeeded or encountered another error (possibly even a different
13426 	 * error than the one that precipitated the failfast state, but in
13427 	 * that case it would have had to exhaust retries as well). Regardless,
13428 	 * this should not occur whenever the instance is in the active
13429 	 * failfast state.
13430 	 */
13431 	if (bp == un->un_failfast_bp) {
13432 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
13433 		un->un_failfast_bp = NULL;
13434 	}
13435 
13436 	/*
13437 	 * Clear the failfast state upon successful completion of ANY cmd.
13438 	 */
13439 	if (bp->b_error == 0) {
13440 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
13441 	}
13442 
13443 	/*
13444 	 * This is used if the command was retried one or more times. Show that
13445 	 * we are done with it, and allow processing of the waitq to resume.
13446 	 */
13447 	if (bp == un->un_retry_bp) {
13448 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13449 		    "sd_return_command: un:0x%p: "
13450 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
13451 		un->un_retry_bp = NULL;
13452 		un->un_retry_statp = NULL;
13453 	}
13454 
13455 	SD_UPDATE_RDWR_STATS(un, bp);
13456 	SD_UPDATE_PARTITION_STATS(un, bp);
13457 
13458 	switch (un->un_state) {
13459 	case SD_STATE_SUSPENDED:
13460 		/*
13461 		 * Notify any threads waiting in sd_ddi_suspend() that
13462 		 * a command completion has occurred.
13463 		 */
13464 		cv_broadcast(&un->un_disk_busy_cv);
13465 		break;
13466 	default:
13467 		sd_start_cmds(un, NULL);
13468 		break;
13469 	}
13470 
13471 	/* Return this command up the iodone chain to its originator. */
13472 	mutex_exit(SD_MUTEX(un));
13473 
13474 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
13475 	xp->xb_pktp = NULL;
13476 
13477 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
13478 
13479 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13480 	mutex_enter(SD_MUTEX(un));
13481 
13482 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
13483 }
13484 
13485 
13486 /*
13487  *    Function: sd_return_failed_command
13488  *
13489  * Description: Command completion when an error occurred.
13490  *
13491  *     Context: May be called from interrupt context
13492  */
13493 
13494 static void
13495 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
13496 {
13497 	ASSERT(bp != NULL);
13498 	ASSERT(un != NULL);
13499 	ASSERT(mutex_owned(SD_MUTEX(un)));
13500 
13501 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13502 	    "sd_return_failed_command: entry\n");
13503 
13504 	/*
13505 	 * b_resid could already be nonzero due to a partial data
13506 	 * transfer, so do not change it here.
13507 	 */
13508 	SD_BIOERROR(bp, errcode);
13509 
13510 	sd_return_command(un, bp);
13511 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13512 	    "sd_return_failed_command: exit\n");
13513 }
13514 
13515 
13516 /*
13517  *    Function: sd_return_failed_command_no_restart
13518  *
13519  * Description: Same as sd_return_failed_command, but ensures that no
13520  *		call back into sd_start_cmds will be issued.
13521  *
13522  *     Context: May be called from interrupt context
13523  */
13524 
13525 static void
13526 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
13527 	int errcode)
13528 {
13529 	struct sd_xbuf *xp;
13530 
13531 	ASSERT(bp != NULL);
13532 	ASSERT(un != NULL);
13533 	ASSERT(mutex_owned(SD_MUTEX(un)));
13534 	xp = SD_GET_XBUF(bp);
13535 	ASSERT(xp != NULL);
13536 	ASSERT(errcode != 0);
13537 
13538 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13539 	    "sd_return_failed_command_no_restart: entry\n");
13540 
13541 	/*
13542 	 * b_resid could already be nonzero due to a partial data
13543 	 * transfer, so do not change it here.
13544 	 */
13545 	SD_BIOERROR(bp, errcode);
13546 
13547 	/*
13548 	 * If this is the failfast bp, clear it. This can happen if the
13549 	 * failfast bp encounterd a fatal error when we attempted to
13550 	 * re-try it (such as a scsi_transport(9F) failure).  However
13551 	 * we should NOT be in an active failfast state if the failfast
13552 	 * bp is not NULL.
13553 	 */
13554 	if (bp == un->un_failfast_bp) {
13555 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
13556 		un->un_failfast_bp = NULL;
13557 	}
13558 
13559 	if (bp == un->un_retry_bp) {
13560 		/*
13561 		 * This command was retried one or more times. Show that we are
13562 		 * done with it, and allow processing of the waitq to resume.
13563 		 */
13564 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13565 		    "sd_return_failed_command_no_restart: "
13566 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
13567 		un->un_retry_bp = NULL;
13568 		un->un_retry_statp = NULL;
13569 	}
13570 
13571 	SD_UPDATE_RDWR_STATS(un, bp);
13572 	SD_UPDATE_PARTITION_STATS(un, bp);
13573 
13574 	mutex_exit(SD_MUTEX(un));
13575 
13576 	if (xp->xb_pktp != NULL) {
13577 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
13578 		xp->xb_pktp = NULL;
13579 	}
13580 
13581 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
13582 
13583 	mutex_enter(SD_MUTEX(un));
13584 
13585 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13586 	    "sd_return_failed_command_no_restart: exit\n");
13587 }
13588 
13589 
13590 /*
13591  *    Function: sd_retry_command
13592  *
13593  * Description: queue up a command for retry, or (optionally) fail it
13594  *		if retry counts are exhausted.
13595  *
13596  *   Arguments: un - Pointer to the sd_lun struct for the target.
13597  *
13598  *		bp - Pointer to the buf for the command to be retried.
13599  *
13600  *		retry_check_flag - Flag to see which (if any) of the retry
13601  *		   counts should be decremented/checked. If the indicated
13602  *		   retry count is exhausted, then the command will not be
13603  *		   retried; it will be failed instead. This should use a
13604  *		   value equal to one of the following:
13605  *
13606  *			SD_RETRIES_NOCHECK
13607  *			SD_RESD_RETRIES_STANDARD
13608  *			SD_RETRIES_VICTIM
13609  *
13610  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
13611  *		   if the check should be made to see of FLAG_ISOLATE is set
13612  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
13613  *		   not retried, it is simply failed.
13614  *
13615  *		user_funcp - Ptr to function to call before dispatching the
13616  *		   command. May be NULL if no action needs to be performed.
13617  *		   (Primarily intended for printing messages.)
13618  *
13619  *		user_arg - Optional argument to be passed along to
13620  *		   the user_funcp call.
13621  *
13622  *		failure_code - errno return code to set in the bp if the
13623  *		   command is going to be failed.
13624  *
13625  *		retry_delay - Retry delay interval in (clock_t) units. May
13626  *		   be zero which indicates that the retry should be retried
13627  *		   immediately (ie, without an intervening delay).
13628  *
13629  *		statp - Ptr to kstat function to be updated if the command
13630  *		   is queued for a delayed retry. May be NULL if no kstat
13631  *		   update is desired.
13632  *
13633  *     Context: May be called from interrupt context.
13634  */
13635 
13636 static void
13637 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
13638 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
13639 	code), void *user_arg, int failure_code,  clock_t retry_delay,
13640 	void (*statp)(kstat_io_t *))
13641 {
13642 	struct sd_xbuf	*xp;
13643 	struct scsi_pkt	*pktp;
13644 
13645 	ASSERT(un != NULL);
13646 	ASSERT(mutex_owned(SD_MUTEX(un)));
13647 	ASSERT(bp != NULL);
13648 	xp = SD_GET_XBUF(bp);
13649 	ASSERT(xp != NULL);
13650 	pktp = SD_GET_PKTP(bp);
13651 	ASSERT(pktp != NULL);
13652 
13653 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
13654 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
13655 
13656 	/*
13657 	 * If we are syncing or dumping, fail the command to avoid
13658 	 * recursively calling back into scsi_transport().
13659 	 */
13660 	if (ddi_in_panic()) {
13661 		goto fail_command_no_log;
13662 	}
13663 
13664 	/*
13665 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
13666 	 * log an error and fail the command.
13667 	 */
13668 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
13669 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
13670 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
13671 		sd_dump_memory(un, SD_LOG_IO, "CDB",
13672 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
13673 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
13674 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
13675 		goto fail_command;
13676 	}
13677 
13678 	/*
13679 	 * If we are suspended, then put the command onto head of the
13680 	 * wait queue since we don't want to start more commands, and
13681 	 * clear the un_retry_bp. Next time when we are resumed, will
13682 	 * handle the command in the wait queue.
13683 	 */
13684 	switch (un->un_state) {
13685 	case SD_STATE_SUSPENDED:
13686 	case SD_STATE_DUMPING:
13687 		bp->av_forw = un->un_waitq_headp;
13688 		un->un_waitq_headp = bp;
13689 		if (un->un_waitq_tailp == NULL) {
13690 			un->un_waitq_tailp = bp;
13691 		}
13692 		if (bp == un->un_retry_bp) {
13693 			un->un_retry_bp = NULL;
13694 			un->un_retry_statp = NULL;
13695 		}
13696 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
13697 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
13698 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
13699 		return;
13700 	default:
13701 		break;
13702 	}
13703 
13704 	/*
13705 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
13706 	 * is set; if it is then we do not want to retry the command.
13707 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
13708 	 */
13709 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
13710 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
13711 			goto fail_command;
13712 		}
13713 	}
13714 
13715 
13716 	/*
13717 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
13718 	 * command timeout or a selection timeout has occurred. This means
13719 	 * that we were unable to establish an kind of communication with
13720 	 * the target, and subsequent retries and/or commands are likely
13721 	 * to encounter similar results and take a long time to complete.
13722 	 *
13723 	 * If this is a failfast error condition, we need to update the
13724 	 * failfast state, even if this bp does not have B_FAILFAST set.
13725 	 */
13726 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
13727 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
13728 			ASSERT(un->un_failfast_bp == NULL);
13729 			/*
13730 			 * If we are already in the active failfast state, and
13731 			 * another failfast error condition has been detected,
13732 			 * then fail this command if it has B_FAILFAST set.
13733 			 * If B_FAILFAST is clear, then maintain the legacy
13734 			 * behavior of retrying heroically, even tho this will
13735 			 * take a lot more time to fail the command.
13736 			 */
13737 			if (bp->b_flags & B_FAILFAST) {
13738 				goto fail_command;
13739 			}
13740 		} else {
13741 			/*
13742 			 * We're not in the active failfast state, but we
13743 			 * have a failfast error condition, so we must begin
13744 			 * transition to the next state. We do this regardless
13745 			 * of whether or not this bp has B_FAILFAST set.
13746 			 */
13747 			if (un->un_failfast_bp == NULL) {
13748 				/*
13749 				 * This is the first bp to meet a failfast
13750 				 * condition so save it on un_failfast_bp &
13751 				 * do normal retry processing. Do not enter
13752 				 * active failfast state yet. This marks
13753 				 * entry into the "failfast pending" state.
13754 				 */
13755 				un->un_failfast_bp = bp;
13756 
13757 			} else if (un->un_failfast_bp == bp) {
13758 				/*
13759 				 * This is the second time *this* bp has
13760 				 * encountered a failfast error condition,
13761 				 * so enter active failfast state & flush
13762 				 * queues as appropriate.
13763 				 */
13764 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
13765 				un->un_failfast_bp = NULL;
13766 				sd_failfast_flushq(un);
13767 
13768 				/*
13769 				 * Fail this bp now if B_FAILFAST set;
13770 				 * otherwise continue with retries. (It would
13771 				 * be pretty ironic if this bp succeeded on a
13772 				 * subsequent retry after we just flushed all
13773 				 * the queues).
13774 				 */
13775 				if (bp->b_flags & B_FAILFAST) {
13776 					goto fail_command;
13777 				}
13778 
13779 #if !defined(lint) && !defined(__lint)
13780 			} else {
13781 				/*
13782 				 * If neither of the preceeding conditionals
13783 				 * was true, it means that there is some
13784 				 * *other* bp that has met an inital failfast
13785 				 * condition and is currently either being
13786 				 * retried or is waiting to be retried. In
13787 				 * that case we should perform normal retry
13788 				 * processing on *this* bp, since there is a
13789 				 * chance that the current failfast condition
13790 				 * is transient and recoverable. If that does
13791 				 * not turn out to be the case, then retries
13792 				 * will be cleared when the wait queue is
13793 				 * flushed anyway.
13794 				 */
13795 #endif
13796 			}
13797 		}
13798 	} else {
13799 		/*
13800 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
13801 		 * likely were able to at least establish some level of
13802 		 * communication with the target and subsequent commands
13803 		 * and/or retries are likely to get through to the target,
13804 		 * In this case we want to be aggressive about clearing
13805 		 * the failfast state. Note that this does not affect
13806 		 * the "failfast pending" condition.
13807 		 */
13808 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
13809 	}
13810 
13811 
13812 	/*
13813 	 * Check the specified retry count to see if we can still do
13814 	 * any retries with this pkt before we should fail it.
13815 	 */
13816 	switch (retry_check_flag & SD_RETRIES_MASK) {
13817 	case SD_RETRIES_VICTIM:
13818 		/*
13819 		 * Check the victim retry count. If exhausted, then fall
13820 		 * thru & check against the standard retry count.
13821 		 */
13822 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
13823 			/* Increment count & proceed with the retry */
13824 			xp->xb_victim_retry_count++;
13825 			break;
13826 		}
13827 		/* Victim retries exhausted, fall back to std. retries... */
13828 		/* FALLTHRU */
13829 
13830 	case SD_RETRIES_STANDARD:
13831 		if (xp->xb_retry_count >= un->un_retry_count) {
13832 			/* Retries exhausted, fail the command */
13833 			SD_TRACE(SD_LOG_IO_CORE, un,
13834 			    "sd_retry_command: retries exhausted!\n");
13835 			/*
13836 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
13837 			 * commands with nonzero pkt_resid.
13838 			 */
13839 			if ((pktp->pkt_reason == CMD_CMPLT) &&
13840 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
13841 			    (pktp->pkt_resid != 0)) {
13842 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
13843 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
13844 					SD_UPDATE_B_RESID(bp, pktp);
13845 				}
13846 			}
13847 			goto fail_command;
13848 		}
13849 		xp->xb_retry_count++;
13850 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13851 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
13852 		break;
13853 
13854 	case SD_RETRIES_UA:
13855 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
13856 			/* Retries exhausted, fail the command */
13857 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13858 			    "Unit Attention retries exhausted. "
13859 			    "Check the target.\n");
13860 			goto fail_command;
13861 		}
13862 		xp->xb_ua_retry_count++;
13863 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13864 		    "sd_retry_command: retry count:%d\n",
13865 		    xp->xb_ua_retry_count);
13866 		break;
13867 
13868 	case SD_RETRIES_BUSY:
13869 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
13870 			/* Retries exhausted, fail the command */
13871 			SD_TRACE(SD_LOG_IO_CORE, un,
13872 			    "sd_retry_command: retries exhausted!\n");
13873 			goto fail_command;
13874 		}
13875 		xp->xb_retry_count++;
13876 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13877 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
13878 		break;
13879 
13880 	case SD_RETRIES_NOCHECK:
13881 	default:
13882 		/* No retry count to check. Just proceed with the retry */
13883 		break;
13884 	}
13885 
13886 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
13887 
13888 	/*
13889 	 * If we were given a zero timeout, we must attempt to retry the
13890 	 * command immediately (ie, without a delay).
13891 	 */
13892 	if (retry_delay == 0) {
13893 		/*
13894 		 * Check some limiting conditions to see if we can actually
13895 		 * do the immediate retry.  If we cannot, then we must
13896 		 * fall back to queueing up a delayed retry.
13897 		 */
13898 		if (un->un_ncmds_in_transport >= un->un_throttle) {
13899 			/*
13900 			 * We are at the throttle limit for the target,
13901 			 * fall back to delayed retry.
13902 			 */
13903 			retry_delay = SD_BSY_TIMEOUT;
13904 			statp = kstat_waitq_enter;
13905 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13906 			    "sd_retry_command: immed. retry hit "
13907 			    "throttle!\n");
13908 		} else {
13909 			/*
13910 			 * We're clear to proceed with the immediate retry.
13911 			 * First call the user-provided function (if any)
13912 			 */
13913 			if (user_funcp != NULL) {
13914 				(*user_funcp)(un, bp, user_arg,
13915 				    SD_IMMEDIATE_RETRY_ISSUED);
13916 #ifdef __lock_lint
13917 				sd_print_incomplete_msg(un, bp, user_arg,
13918 				    SD_IMMEDIATE_RETRY_ISSUED);
13919 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
13920 				    SD_IMMEDIATE_RETRY_ISSUED);
13921 				sd_print_sense_failed_msg(un, bp, user_arg,
13922 				    SD_IMMEDIATE_RETRY_ISSUED);
13923 #endif
13924 			}
13925 
13926 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13927 			    "sd_retry_command: issuing immediate retry\n");
13928 
13929 			/*
13930 			 * Call sd_start_cmds() to transport the command to
13931 			 * the target.
13932 			 */
13933 			sd_start_cmds(un, bp);
13934 
13935 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13936 			    "sd_retry_command exit\n");
13937 			return;
13938 		}
13939 	}
13940 
13941 	/*
13942 	 * Set up to retry the command after a delay.
13943 	 * First call the user-provided function (if any)
13944 	 */
13945 	if (user_funcp != NULL) {
13946 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
13947 	}
13948 
13949 	sd_set_retry_bp(un, bp, retry_delay, statp);
13950 
13951 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
13952 	return;
13953 
13954 fail_command:
13955 
13956 	if (user_funcp != NULL) {
13957 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
13958 	}
13959 
13960 fail_command_no_log:
13961 
13962 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
13963 	    "sd_retry_command: returning failed command\n");
13964 
13965 	sd_return_failed_command(un, bp, failure_code);
13966 
13967 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
13968 }
13969 
13970 
13971 /*
13972  *    Function: sd_set_retry_bp
13973  *
13974  * Description: Set up the given bp for retry.
13975  *
13976  *   Arguments: un - ptr to associated softstate
13977  *		bp - ptr to buf(9S) for the command
13978  *		retry_delay - time interval before issuing retry (may be 0)
13979  *		statp - optional pointer to kstat function
13980  *
13981  *     Context: May be called under interrupt context
13982  */
13983 
13984 static void
13985 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
13986 	void (*statp)(kstat_io_t *))
13987 {
13988 	ASSERT(un != NULL);
13989 	ASSERT(mutex_owned(SD_MUTEX(un)));
13990 	ASSERT(bp != NULL);
13991 
13992 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
13993 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
13994 
13995 	/*
13996 	 * Indicate that the command is being retried. This will not allow any
13997 	 * other commands on the wait queue to be transported to the target
13998 	 * until this command has been completed (success or failure). The
13999 	 * "retry command" is not transported to the target until the given
14000 	 * time delay expires, unless the user specified a 0 retry_delay.
14001 	 *
14002 	 * Note: the timeout(9F) callback routine is what actually calls
14003 	 * sd_start_cmds() to transport the command, with the exception of a
14004 	 * zero retry_delay. The only current implementor of a zero retry delay
14005 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
14006 	 */
14007 	if (un->un_retry_bp == NULL) {
14008 		ASSERT(un->un_retry_statp == NULL);
14009 		un->un_retry_bp = bp;
14010 
14011 		/*
14012 		 * If the user has not specified a delay the command should
14013 		 * be queued and no timeout should be scheduled.
14014 		 */
14015 		if (retry_delay == 0) {
14016 			/*
14017 			 * Save the kstat pointer that will be used in the
14018 			 * call to SD_UPDATE_KSTATS() below, so that
14019 			 * sd_start_cmds() can correctly decrement the waitq
14020 			 * count when it is time to transport this command.
14021 			 */
14022 			un->un_retry_statp = statp;
14023 			goto done;
14024 		}
14025 	}
14026 
14027 	if (un->un_retry_bp == bp) {
14028 		/*
14029 		 * Save the kstat pointer that will be used in the call to
14030 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
14031 		 * correctly decrement the waitq count when it is time to
14032 		 * transport this command.
14033 		 */
14034 		un->un_retry_statp = statp;
14035 
14036 		/*
14037 		 * Schedule a timeout if:
14038 		 *   1) The user has specified a delay.
14039 		 *   2) There is not a START_STOP_UNIT callback pending.
14040 		 *
14041 		 * If no delay has been specified, then it is up to the caller
14042 		 * to ensure that IO processing continues without stalling.
14043 		 * Effectively, this means that the caller will issue the
14044 		 * required call to sd_start_cmds(). The START_STOP_UNIT
14045 		 * callback does this after the START STOP UNIT command has
14046 		 * completed. In either of these cases we should not schedule
14047 		 * a timeout callback here.  Also don't schedule the timeout if
14048 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
14049 		 */
14050 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
14051 		    (un->un_direct_priority_timeid == NULL)) {
14052 			un->un_retry_timeid =
14053 			    timeout(sd_start_retry_command, un, retry_delay);
14054 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14055 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
14056 			    " bp:0x%p un_retry_timeid:0x%p\n",
14057 			    un, bp, un->un_retry_timeid);
14058 		}
14059 	} else {
14060 		/*
14061 		 * We only get in here if there is already another command
14062 		 * waiting to be retried.  In this case, we just put the
14063 		 * given command onto the wait queue, so it can be transported
14064 		 * after the current retry command has completed.
14065 		 *
14066 		 * Also we have to make sure that if the command at the head
14067 		 * of the wait queue is the un_failfast_bp, that we do not
14068 		 * put ahead of it any other commands that are to be retried.
14069 		 */
14070 		if ((un->un_failfast_bp != NULL) &&
14071 		    (un->un_failfast_bp == un->un_waitq_headp)) {
14072 			/*
14073 			 * Enqueue this command AFTER the first command on
14074 			 * the wait queue (which is also un_failfast_bp).
14075 			 */
14076 			bp->av_forw = un->un_waitq_headp->av_forw;
14077 			un->un_waitq_headp->av_forw = bp;
14078 			if (un->un_waitq_headp == un->un_waitq_tailp) {
14079 				un->un_waitq_tailp = bp;
14080 			}
14081 		} else {
14082 			/* Enqueue this command at the head of the waitq. */
14083 			bp->av_forw = un->un_waitq_headp;
14084 			un->un_waitq_headp = bp;
14085 			if (un->un_waitq_tailp == NULL) {
14086 				un->un_waitq_tailp = bp;
14087 			}
14088 		}
14089 
14090 		if (statp == NULL) {
14091 			statp = kstat_waitq_enter;
14092 		}
14093 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14094 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
14095 	}
14096 
14097 done:
14098 	if (statp != NULL) {
14099 		SD_UPDATE_KSTATS(un, statp, bp);
14100 	}
14101 
14102 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14103 	    "sd_set_retry_bp: exit un:0x%p\n", un);
14104 }
14105 
14106 
14107 /*
14108  *    Function: sd_start_retry_command
14109  *
14110  * Description: Start the command that has been waiting on the target's
14111  *		retry queue.  Called from timeout(9F) context after the
14112  *		retry delay interval has expired.
14113  *
14114  *   Arguments: arg - pointer to associated softstate for the device.
14115  *
14116  *     Context: timeout(9F) thread context.  May not sleep.
14117  */
14118 
14119 static void
14120 sd_start_retry_command(void *arg)
14121 {
14122 	struct sd_lun *un = arg;
14123 
14124 	ASSERT(un != NULL);
14125 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14126 
14127 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14128 	    "sd_start_retry_command: entry\n");
14129 
14130 	mutex_enter(SD_MUTEX(un));
14131 
14132 	un->un_retry_timeid = NULL;
14133 
14134 	if (un->un_retry_bp != NULL) {
14135 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14136 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
14137 		    un, un->un_retry_bp);
14138 		sd_start_cmds(un, un->un_retry_bp);
14139 	}
14140 
14141 	mutex_exit(SD_MUTEX(un));
14142 
14143 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14144 	    "sd_start_retry_command: exit\n");
14145 }
14146 
14147 
14148 /*
14149  *    Function: sd_start_direct_priority_command
14150  *
14151  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
14152  *		received TRAN_BUSY when we called scsi_transport() to send it
14153  *		to the underlying HBA. This function is called from timeout(9F)
14154  *		context after the delay interval has expired.
14155  *
14156  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
14157  *
14158  *     Context: timeout(9F) thread context.  May not sleep.
14159  */
14160 
14161 static void
14162 sd_start_direct_priority_command(void *arg)
14163 {
14164 	struct buf	*priority_bp = arg;
14165 	struct sd_lun	*un;
14166 
14167 	ASSERT(priority_bp != NULL);
14168 	un = SD_GET_UN(priority_bp);
14169 	ASSERT(un != NULL);
14170 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14171 
14172 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14173 	    "sd_start_direct_priority_command: entry\n");
14174 
14175 	mutex_enter(SD_MUTEX(un));
14176 	un->un_direct_priority_timeid = NULL;
14177 	sd_start_cmds(un, priority_bp);
14178 	mutex_exit(SD_MUTEX(un));
14179 
14180 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14181 	    "sd_start_direct_priority_command: exit\n");
14182 }
14183 
14184 
14185 /*
14186  *    Function: sd_send_request_sense_command
14187  *
14188  * Description: Sends a REQUEST SENSE command to the target
14189  *
14190  *     Context: May be called from interrupt context.
14191  */
14192 
14193 static void
14194 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
14195 	struct scsi_pkt *pktp)
14196 {
14197 	ASSERT(bp != NULL);
14198 	ASSERT(un != NULL);
14199 	ASSERT(mutex_owned(SD_MUTEX(un)));
14200 
14201 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
14202 	    "entry: buf:0x%p\n", bp);
14203 
14204 	/*
14205 	 * If we are syncing or dumping, then fail the command to avoid a
14206 	 * recursive callback into scsi_transport(). Also fail the command
14207 	 * if we are suspended (legacy behavior).
14208 	 */
14209 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
14210 	    (un->un_state == SD_STATE_DUMPING)) {
14211 		sd_return_failed_command(un, bp, EIO);
14212 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14213 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
14214 		return;
14215 	}
14216 
14217 	/*
14218 	 * Retry the failed command and don't issue the request sense if:
14219 	 *    1) the sense buf is busy
14220 	 *    2) we have 1 or more outstanding commands on the target
14221 	 *    (the sense data will be cleared or invalidated any way)
14222 	 *
14223 	 * Note: There could be an issue with not checking a retry limit here,
14224 	 * the problem is determining which retry limit to check.
14225 	 */
14226 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
14227 		/* Don't retry if the command is flagged as non-retryable */
14228 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
14229 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
14230 			    NULL, NULL, 0, SD_BSY_TIMEOUT, kstat_waitq_enter);
14231 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14232 			    "sd_send_request_sense_command: "
14233 			    "at full throttle, retrying exit\n");
14234 		} else {
14235 			sd_return_failed_command(un, bp, EIO);
14236 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14237 			    "sd_send_request_sense_command: "
14238 			    "at full throttle, non-retryable exit\n");
14239 		}
14240 		return;
14241 	}
14242 
14243 	sd_mark_rqs_busy(un, bp);
14244 	sd_start_cmds(un, un->un_rqs_bp);
14245 
14246 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14247 	    "sd_send_request_sense_command: exit\n");
14248 }
14249 
14250 
14251 /*
14252  *    Function: sd_mark_rqs_busy
14253  *
14254  * Description: Indicate that the request sense bp for this instance is
14255  *		in use.
14256  *
14257  *     Context: May be called under interrupt context
14258  */
14259 
14260 static void
14261 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
14262 {
14263 	struct sd_xbuf	*sense_xp;
14264 
14265 	ASSERT(un != NULL);
14266 	ASSERT(bp != NULL);
14267 	ASSERT(mutex_owned(SD_MUTEX(un)));
14268 	ASSERT(un->un_sense_isbusy == 0);
14269 
14270 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
14271 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
14272 
14273 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
14274 	ASSERT(sense_xp != NULL);
14275 
14276 	SD_INFO(SD_LOG_IO, un,
14277 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
14278 
14279 	ASSERT(sense_xp->xb_pktp != NULL);
14280 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
14281 	    == (FLAG_SENSING | FLAG_HEAD));
14282 
14283 	un->un_sense_isbusy = 1;
14284 	un->un_rqs_bp->b_resid = 0;
14285 	sense_xp->xb_pktp->pkt_resid  = 0;
14286 	sense_xp->xb_pktp->pkt_reason = 0;
14287 
14288 	/* So we can get back the bp at interrupt time! */
14289 	sense_xp->xb_sense_bp = bp;
14290 
14291 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
14292 
14293 	/*
14294 	 * Mark this buf as awaiting sense data. (This is already set in
14295 	 * the pkt_flags for the RQS packet.)
14296 	 */
14297 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
14298 
14299 	sense_xp->xb_retry_count	= 0;
14300 	sense_xp->xb_victim_retry_count = 0;
14301 	sense_xp->xb_ua_retry_count	= 0;
14302 	sense_xp->xb_nr_retry_count 	= 0;
14303 	sense_xp->xb_dma_resid  = 0;
14304 
14305 	/* Clean up the fields for auto-request sense */
14306 	sense_xp->xb_sense_status = 0;
14307 	sense_xp->xb_sense_state  = 0;
14308 	sense_xp->xb_sense_resid  = 0;
14309 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
14310 
14311 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
14312 }
14313 
14314 
14315 /*
14316  *    Function: sd_mark_rqs_idle
14317  *
14318  * Description: SD_MUTEX must be held continuously through this routine
14319  *		to prevent reuse of the rqs struct before the caller can
14320  *		complete it's processing.
14321  *
14322  * Return Code: Pointer to the RQS buf
14323  *
14324  *     Context: May be called under interrupt context
14325  */
14326 
14327 static struct buf *
14328 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
14329 {
14330 	struct buf *bp;
14331 	ASSERT(un != NULL);
14332 	ASSERT(sense_xp != NULL);
14333 	ASSERT(mutex_owned(SD_MUTEX(un)));
14334 	ASSERT(un->un_sense_isbusy != 0);
14335 
14336 	un->un_sense_isbusy = 0;
14337 	bp = sense_xp->xb_sense_bp;
14338 	sense_xp->xb_sense_bp = NULL;
14339 
14340 	/* This pkt is no longer interested in getting sense data */
14341 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
14342 
14343 	return (bp);
14344 }
14345 
14346 
14347 
14348 /*
14349  *    Function: sd_alloc_rqs
14350  *
14351  * Description: Set up the unit to receive auto request sense data
14352  *
14353  * Return Code: DDI_SUCCESS or DDI_FAILURE
14354  *
14355  *     Context: Called under attach(9E) context
14356  */
14357 
14358 static int
14359 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
14360 {
14361 	struct sd_xbuf *xp;
14362 
14363 	ASSERT(un != NULL);
14364 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14365 	ASSERT(un->un_rqs_bp == NULL);
14366 	ASSERT(un->un_rqs_pktp == NULL);
14367 
14368 	/*
14369 	 * First allocate the required buf and scsi_pkt structs, then set up
14370 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
14371 	 */
14372 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
14373 	    SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
14374 	if (un->un_rqs_bp == NULL) {
14375 		return (DDI_FAILURE);
14376 	}
14377 
14378 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
14379 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
14380 
14381 	if (un->un_rqs_pktp == NULL) {
14382 		sd_free_rqs(un);
14383 		return (DDI_FAILURE);
14384 	}
14385 
14386 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
14387 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
14388 	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
14389 
14390 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
14391 
14392 	/* Set up the other needed members in the ARQ scsi_pkt. */
14393 	un->un_rqs_pktp->pkt_comp   = sdintr;
14394 	un->un_rqs_pktp->pkt_time   = sd_io_time;
14395 	un->un_rqs_pktp->pkt_flags |=
14396 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
14397 
14398 	/*
14399 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
14400 	 * provide any intpkt, destroypkt routines as we take care of
14401 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
14402 	 */
14403 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14404 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
14405 	xp->xb_pktp = un->un_rqs_pktp;
14406 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
14407 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
14408 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
14409 
14410 	/*
14411 	 * Save the pointer to the request sense private bp so it can
14412 	 * be retrieved in sdintr.
14413 	 */
14414 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
14415 	ASSERT(un->un_rqs_bp->b_private == xp);
14416 
14417 	/*
14418 	 * See if the HBA supports auto-request sense for the specified
14419 	 * target/lun. If it does, then try to enable it (if not already
14420 	 * enabled).
14421 	 *
14422 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
14423 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
14424 	 * return success.  However, in both of these cases ARQ is always
14425 	 * enabled and scsi_ifgetcap will always return true. The best approach
14426 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
14427 	 *
14428 	 * The 3rd case is the HBA (adp) always return enabled on
14429 	 * scsi_ifgetgetcap even when it's not enable, the best approach
14430 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
14431 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
14432 	 */
14433 
14434 	if (un->un_f_is_fibre == TRUE) {
14435 		un->un_f_arq_enabled = TRUE;
14436 	} else {
14437 #if defined(__i386) || defined(__amd64)
14438 		/*
14439 		 * Circumvent the Adaptec bug, remove this code when
14440 		 * the bug is fixed
14441 		 */
14442 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
14443 #endif
14444 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
14445 		case 0:
14446 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
14447 			    "sd_alloc_rqs: HBA supports ARQ\n");
14448 			/*
14449 			 * ARQ is supported by this HBA but currently is not
14450 			 * enabled. Attempt to enable it and if successful then
14451 			 * mark this instance as ARQ enabled.
14452 			 */
14453 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
14454 			    == 1) {
14455 				/* Successfully enabled ARQ in the HBA */
14456 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
14457 				    "sd_alloc_rqs: ARQ enabled\n");
14458 				un->un_f_arq_enabled = TRUE;
14459 			} else {
14460 				/* Could not enable ARQ in the HBA */
14461 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
14462 				    "sd_alloc_rqs: failed ARQ enable\n");
14463 				un->un_f_arq_enabled = FALSE;
14464 			}
14465 			break;
14466 		case 1:
14467 			/*
14468 			 * ARQ is supported by this HBA and is already enabled.
14469 			 * Just mark ARQ as enabled for this instance.
14470 			 */
14471 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
14472 			    "sd_alloc_rqs: ARQ already enabled\n");
14473 			un->un_f_arq_enabled = TRUE;
14474 			break;
14475 		default:
14476 			/*
14477 			 * ARQ is not supported by this HBA; disable it for this
14478 			 * instance.
14479 			 */
14480 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
14481 			    "sd_alloc_rqs: HBA does not support ARQ\n");
14482 			un->un_f_arq_enabled = FALSE;
14483 			break;
14484 		}
14485 	}
14486 
14487 	return (DDI_SUCCESS);
14488 }
14489 
14490 
14491 /*
14492  *    Function: sd_free_rqs
14493  *
14494  * Description: Cleanup for the pre-instance RQS command.
14495  *
14496  *     Context: Kernel thread context
14497  */
14498 
14499 static void
14500 sd_free_rqs(struct sd_lun *un)
14501 {
14502 	ASSERT(un != NULL);
14503 
14504 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
14505 
14506 	/*
14507 	 * If consistent memory is bound to a scsi_pkt, the pkt
14508 	 * has to be destroyed *before* freeing the consistent memory.
14509 	 * Don't change the sequence of this operations.
14510 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
14511 	 * after it was freed in scsi_free_consistent_buf().
14512 	 */
14513 	if (un->un_rqs_pktp != NULL) {
14514 		scsi_destroy_pkt(un->un_rqs_pktp);
14515 		un->un_rqs_pktp = NULL;
14516 	}
14517 
14518 	if (un->un_rqs_bp != NULL) {
14519 		kmem_free(SD_GET_XBUF(un->un_rqs_bp), sizeof (struct sd_xbuf));
14520 		scsi_free_consistent_buf(un->un_rqs_bp);
14521 		un->un_rqs_bp = NULL;
14522 	}
14523 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
14524 }
14525 
14526 
14527 
14528 /*
14529  *    Function: sd_reduce_throttle
14530  *
14531  * Description: Reduces the maximum # of outstanding commands on a
14532  *		target to the current number of outstanding commands.
14533  *		Queues a tiemout(9F) callback to restore the limit
14534  *		after a specified interval has elapsed.
14535  *		Typically used when we get a TRAN_BUSY return code
14536  *		back from scsi_transport().
14537  *
14538  *   Arguments: un - ptr to the sd_lun softstate struct
14539  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
14540  *
14541  *     Context: May be called from interrupt context
14542  */
14543 
14544 static void
14545 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
14546 {
14547 	ASSERT(un != NULL);
14548 	ASSERT(mutex_owned(SD_MUTEX(un)));
14549 	ASSERT(un->un_ncmds_in_transport >= 0);
14550 
14551 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
14552 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
14553 	    un, un->un_throttle, un->un_ncmds_in_transport);
14554 
14555 	if (un->un_throttle > 1) {
14556 		if (un->un_f_use_adaptive_throttle == TRUE) {
14557 			switch (throttle_type) {
14558 			case SD_THROTTLE_TRAN_BUSY:
14559 				if (un->un_busy_throttle == 0) {
14560 					un->un_busy_throttle = un->un_throttle;
14561 				}
14562 				break;
14563 			case SD_THROTTLE_QFULL:
14564 				un->un_busy_throttle = 0;
14565 				break;
14566 			default:
14567 				ASSERT(FALSE);
14568 			}
14569 
14570 			if (un->un_ncmds_in_transport > 0) {
14571 				un->un_throttle = un->un_ncmds_in_transport;
14572 			}
14573 
14574 		} else {
14575 			if (un->un_ncmds_in_transport == 0) {
14576 				un->un_throttle = 1;
14577 			} else {
14578 				un->un_throttle = un->un_ncmds_in_transport;
14579 			}
14580 		}
14581 	}
14582 
14583 	/* Reschedule the timeout if none is currently active */
14584 	if (un->un_reset_throttle_timeid == NULL) {
14585 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
14586 		    un, SD_THROTTLE_RESET_INTERVAL);
14587 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14588 		    "sd_reduce_throttle: timeout scheduled!\n");
14589 	}
14590 
14591 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
14592 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
14593 }
14594 
14595 
14596 
14597 /*
14598  *    Function: sd_restore_throttle
14599  *
14600  * Description: Callback function for timeout(9F).  Resets the current
14601  *		value of un->un_throttle to its default.
14602  *
14603  *   Arguments: arg - pointer to associated softstate for the device.
14604  *
14605  *     Context: May be called from interrupt context
14606  */
14607 
14608 static void
14609 sd_restore_throttle(void *arg)
14610 {
14611 	struct sd_lun	*un = arg;
14612 
14613 	ASSERT(un != NULL);
14614 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14615 
14616 	mutex_enter(SD_MUTEX(un));
14617 
14618 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
14619 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
14620 
14621 	un->un_reset_throttle_timeid = NULL;
14622 
14623 	if (un->un_f_use_adaptive_throttle == TRUE) {
14624 		/*
14625 		 * If un_busy_throttle is nonzero, then it contains the
14626 		 * value that un_throttle was when we got a TRAN_BUSY back
14627 		 * from scsi_transport(). We want to revert back to this
14628 		 * value.
14629 		 *
14630 		 * In the QFULL case, the throttle limit will incrementally
14631 		 * increase until it reaches max throttle.
14632 		 */
14633 		if (un->un_busy_throttle > 0) {
14634 			un->un_throttle = un->un_busy_throttle;
14635 			un->un_busy_throttle = 0;
14636 		} else {
14637 			/*
14638 			 * increase throttle by 10% open gate slowly, schedule
14639 			 * another restore if saved throttle has not been
14640 			 * reached
14641 			 */
14642 			short throttle;
14643 			if (sd_qfull_throttle_enable) {
14644 				throttle = un->un_throttle +
14645 				    max((un->un_throttle / 10), 1);
14646 				un->un_throttle =
14647 				    (throttle < un->un_saved_throttle) ?
14648 				    throttle : un->un_saved_throttle;
14649 				if (un->un_throttle < un->un_saved_throttle) {
14650 					un->un_reset_throttle_timeid =
14651 					    timeout(sd_restore_throttle,
14652 					    un,
14653 					    SD_QFULL_THROTTLE_RESET_INTERVAL);
14654 				}
14655 			}
14656 		}
14657 
14658 		/*
14659 		 * If un_throttle has fallen below the low-water mark, we
14660 		 * restore the maximum value here (and allow it to ratchet
14661 		 * down again if necessary).
14662 		 */
14663 		if (un->un_throttle < un->un_min_throttle) {
14664 			un->un_throttle = un->un_saved_throttle;
14665 		}
14666 	} else {
14667 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
14668 		    "restoring limit from 0x%x to 0x%x\n",
14669 		    un->un_throttle, un->un_saved_throttle);
14670 		un->un_throttle = un->un_saved_throttle;
14671 	}
14672 
14673 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14674 	    "sd_restore_throttle: calling sd_start_cmds!\n");
14675 
14676 	sd_start_cmds(un, NULL);
14677 
14678 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14679 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
14680 	    un, un->un_throttle);
14681 
14682 	mutex_exit(SD_MUTEX(un));
14683 
14684 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
14685 }
14686 
14687 /*
14688  *    Function: sdrunout
14689  *
14690  * Description: Callback routine for scsi_init_pkt when a resource allocation
14691  *		fails.
14692  *
14693  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
14694  *		soft state instance.
14695  *
14696  * Return Code: The scsi_init_pkt routine allows for the callback function to
14697  *		return a 0 indicating the callback should be rescheduled or a 1
14698  *		indicating not to reschedule. This routine always returns 1
14699  *		because the driver always provides a callback function to
14700  *		scsi_init_pkt. This results in a callback always being scheduled
14701  *		(via the scsi_init_pkt callback implementation) if a resource
14702  *		failure occurs.
14703  *
14704  *     Context: This callback function may not block or call routines that block
14705  *
14706  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
14707  *		request persisting at the head of the list which cannot be
14708  *		satisfied even after multiple retries. In the future the driver
14709  *		may implement some time of maximum runout count before failing
14710  *		an I/O.
14711  */
14712 
14713 static int
14714 sdrunout(caddr_t arg)
14715 {
14716 	struct sd_lun	*un = (struct sd_lun *)arg;
14717 
14718 	ASSERT(un != NULL);
14719 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14720 
14721 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
14722 
14723 	mutex_enter(SD_MUTEX(un));
14724 	sd_start_cmds(un, NULL);
14725 	mutex_exit(SD_MUTEX(un));
14726 	/*
14727 	 * This callback routine always returns 1 (i.e. do not reschedule)
14728 	 * because we always specify sdrunout as the callback handler for
14729 	 * scsi_init_pkt inside the call to sd_start_cmds.
14730 	 */
14731 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
14732 	return (1);
14733 }
14734 
14735 
14736 /*
14737  *    Function: sdintr
14738  *
14739  * Description: Completion callback routine for scsi_pkt(9S) structs
14740  *		sent to the HBA driver via scsi_transport(9F).
14741  *
14742  *     Context: Interrupt context
14743  */
14744 
14745 static void
14746 sdintr(struct scsi_pkt *pktp)
14747 {
14748 	struct buf	*bp;
14749 	struct sd_xbuf	*xp;
14750 	struct sd_lun	*un;
14751 
14752 	ASSERT(pktp != NULL);
14753 	bp = (struct buf *)pktp->pkt_private;
14754 	ASSERT(bp != NULL);
14755 	xp = SD_GET_XBUF(bp);
14756 	ASSERT(xp != NULL);
14757 	ASSERT(xp->xb_pktp != NULL);
14758 	un = SD_GET_UN(bp);
14759 	ASSERT(un != NULL);
14760 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14761 
14762 #ifdef SD_FAULT_INJECTION
14763 
14764 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
14765 	/* SD FaultInjection */
14766 	sd_faultinjection(pktp);
14767 
14768 #endif /* SD_FAULT_INJECTION */
14769 
14770 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
14771 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
14772 
14773 	mutex_enter(SD_MUTEX(un));
14774 
14775 	/* Reduce the count of the #commands currently in transport */
14776 	un->un_ncmds_in_transport--;
14777 	ASSERT(un->un_ncmds_in_transport >= 0);
14778 
14779 	/* Increment counter to indicate that the callback routine is active */
14780 	un->un_in_callback++;
14781 
14782 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14783 
14784 #ifdef	SDDEBUG
14785 	if (bp == un->un_retry_bp) {
14786 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
14787 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
14788 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
14789 	}
14790 #endif
14791 
14792 	/*
14793 	 * If pkt_reason is CMD_DEV_GONE, fail the command, and update the media
14794 	 * state if needed.
14795 	 */
14796 	if (pktp->pkt_reason == CMD_DEV_GONE) {
14797 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
14798 		    "Device is gone\n");
14799 		if (un->un_mediastate != DKIO_DEV_GONE) {
14800 			un->un_mediastate = DKIO_DEV_GONE;
14801 			cv_broadcast(&un->un_state_cv);
14802 		}
14803 		sd_return_failed_command(un, bp, EIO);
14804 		goto exit;
14805 	}
14806 
14807 	/*
14808 	 * First see if the pkt has auto-request sense data with it....
14809 	 * Look at the packet state first so we don't take a performance
14810 	 * hit looking at the arq enabled flag unless absolutely necessary.
14811 	 */
14812 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
14813 	    (un->un_f_arq_enabled == TRUE)) {
14814 		/*
14815 		 * The HBA did an auto request sense for this command so check
14816 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
14817 		 * driver command that should not be retried.
14818 		 */
14819 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
14820 			/*
14821 			 * Save the relevant sense info into the xp for the
14822 			 * original cmd.
14823 			 */
14824 			struct scsi_arq_status *asp;
14825 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
14826 			xp->xb_sense_status =
14827 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
14828 			xp->xb_sense_state  = asp->sts_rqpkt_state;
14829 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
14830 			bcopy(&asp->sts_sensedata, xp->xb_sense_data,
14831 			    min(sizeof (struct scsi_extended_sense),
14832 			    SENSE_LENGTH));
14833 
14834 			/* fail the command */
14835 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14836 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
14837 			sd_return_failed_command(un, bp, EIO);
14838 			goto exit;
14839 		}
14840 
14841 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
14842 		/*
14843 		 * We want to either retry or fail this command, so free
14844 		 * the DMA resources here.  If we retry the command then
14845 		 * the DMA resources will be reallocated in sd_start_cmds().
14846 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
14847 		 * causes the *entire* transfer to start over again from the
14848 		 * beginning of the request, even for PARTIAL chunks that
14849 		 * have already transferred successfully.
14850 		 */
14851 		if ((un->un_f_is_fibre == TRUE) &&
14852 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14853 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
14854 			scsi_dmafree(pktp);
14855 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14856 		}
14857 #endif
14858 
14859 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14860 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
14861 
14862 		sd_handle_auto_request_sense(un, bp, xp, pktp);
14863 		goto exit;
14864 	}
14865 
14866 	/* Next see if this is the REQUEST SENSE pkt for the instance */
14867 	if (pktp->pkt_flags & FLAG_SENSING)  {
14868 		/* This pktp is from the unit's REQUEST_SENSE command */
14869 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14870 		    "sdintr: sd_handle_request_sense\n");
14871 		sd_handle_request_sense(un, bp, xp, pktp);
14872 		goto exit;
14873 	}
14874 
14875 	/*
14876 	 * Check to see if the command successfully completed as requested;
14877 	 * this is the most common case (and also the hot performance path).
14878 	 *
14879 	 * Requirements for successful completion are:
14880 	 * pkt_reason is CMD_CMPLT and packet status is status good.
14881 	 * In addition:
14882 	 * - A residual of zero indicates successful completion no matter what
14883 	 *   the command is.
14884 	 * - If the residual is not zero and the command is not a read or
14885 	 *   write, then it's still defined as successful completion. In other
14886 	 *   words, if the command is a read or write the residual must be
14887 	 *   zero for successful completion.
14888 	 * - If the residual is not zero and the command is a read or
14889 	 *   write, and it's a USCSICMD, then it's still defined as
14890 	 *   successful completion.
14891 	 */
14892 	if ((pktp->pkt_reason == CMD_CMPLT) &&
14893 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
14894 
14895 		/*
14896 		 * Since this command is returned with a good status, we
14897 		 * can reset the count for Sonoma failover.
14898 		 */
14899 		un->un_sonoma_failure_count = 0;
14900 
14901 		/*
14902 		 * Return all USCSI commands on good status
14903 		 */
14904 		if (pktp->pkt_resid == 0) {
14905 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14906 			    "sdintr: returning command for resid == 0\n");
14907 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
14908 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
14909 			SD_UPDATE_B_RESID(bp, pktp);
14910 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14911 			    "sdintr: returning command for resid != 0\n");
14912 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
14913 			SD_UPDATE_B_RESID(bp, pktp);
14914 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14915 			    "sdintr: returning uscsi command\n");
14916 		} else {
14917 			goto not_successful;
14918 		}
14919 		sd_return_command(un, bp);
14920 
14921 		/*
14922 		 * Decrement counter to indicate that the callback routine
14923 		 * is done.
14924 		 */
14925 		un->un_in_callback--;
14926 		ASSERT(un->un_in_callback >= 0);
14927 		mutex_exit(SD_MUTEX(un));
14928 
14929 		return;
14930 	}
14931 
14932 not_successful:
14933 
14934 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
14935 	/*
14936 	 * The following is based upon knowledge of the underlying transport
14937 	 * and its use of DMA resources.  This code should be removed when
14938 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
14939 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
14940 	 * and sd_start_cmds().
14941 	 *
14942 	 * Free any DMA resources associated with this command if there
14943 	 * is a chance it could be retried or enqueued for later retry.
14944 	 * If we keep the DMA binding then mpxio cannot reissue the
14945 	 * command on another path whenever a path failure occurs.
14946 	 *
14947 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
14948 	 * causes the *entire* transfer to start over again from the
14949 	 * beginning of the request, even for PARTIAL chunks that
14950 	 * have already transferred successfully.
14951 	 *
14952 	 * This is only done for non-uscsi commands (and also skipped for the
14953 	 * driver's internal RQS command). Also just do this for Fibre Channel
14954 	 * devices as these are the only ones that support mpxio.
14955 	 */
14956 	if ((un->un_f_is_fibre == TRUE) &&
14957 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14958 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
14959 		scsi_dmafree(pktp);
14960 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14961 	}
14962 #endif
14963 
14964 	/*
14965 	 * The command did not successfully complete as requested so check
14966 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
14967 	 * driver command that should not be retried so just return. If
14968 	 * FLAG_DIAGNOSE is not set the error will be processed below.
14969 	 */
14970 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
14971 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14972 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
14973 		/*
14974 		 * Issue a request sense if a check condition caused the error
14975 		 * (we handle the auto request sense case above), otherwise
14976 		 * just fail the command.
14977 		 */
14978 		if ((pktp->pkt_reason == CMD_CMPLT) &&
14979 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
14980 			sd_send_request_sense_command(un, bp, pktp);
14981 		} else {
14982 			sd_return_failed_command(un, bp, EIO);
14983 		}
14984 		goto exit;
14985 	}
14986 
14987 	/*
14988 	 * The command did not successfully complete as requested so process
14989 	 * the error, retry, and/or attempt recovery.
14990 	 */
14991 	switch (pktp->pkt_reason) {
14992 	case CMD_CMPLT:
14993 		switch (SD_GET_PKT_STATUS(pktp)) {
14994 		case STATUS_GOOD:
14995 			/*
14996 			 * The command completed successfully with a non-zero
14997 			 * residual
14998 			 */
14999 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15000 			    "sdintr: STATUS_GOOD \n");
15001 			sd_pkt_status_good(un, bp, xp, pktp);
15002 			break;
15003 
15004 		case STATUS_CHECK:
15005 		case STATUS_TERMINATED:
15006 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15007 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
15008 			sd_pkt_status_check_condition(un, bp, xp, pktp);
15009 			break;
15010 
15011 		case STATUS_BUSY:
15012 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15013 			    "sdintr: STATUS_BUSY\n");
15014 			sd_pkt_status_busy(un, bp, xp, pktp);
15015 			break;
15016 
15017 		case STATUS_RESERVATION_CONFLICT:
15018 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15019 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
15020 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
15021 			break;
15022 
15023 		case STATUS_QFULL:
15024 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15025 			    "sdintr: STATUS_QFULL\n");
15026 			sd_pkt_status_qfull(un, bp, xp, pktp);
15027 			break;
15028 
15029 		case STATUS_MET:
15030 		case STATUS_INTERMEDIATE:
15031 		case STATUS_SCSI2:
15032 		case STATUS_INTERMEDIATE_MET:
15033 		case STATUS_ACA_ACTIVE:
15034 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
15035 			    "Unexpected SCSI status received: 0x%x\n",
15036 			    SD_GET_PKT_STATUS(pktp));
15037 			sd_return_failed_command(un, bp, EIO);
15038 			break;
15039 
15040 		default:
15041 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
15042 			    "Invalid SCSI status received: 0x%x\n",
15043 			    SD_GET_PKT_STATUS(pktp));
15044 			sd_return_failed_command(un, bp, EIO);
15045 			break;
15046 
15047 		}
15048 		break;
15049 
15050 	case CMD_INCOMPLETE:
15051 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15052 		    "sdintr:  CMD_INCOMPLETE\n");
15053 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
15054 		break;
15055 	case CMD_TRAN_ERR:
15056 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15057 		    "sdintr: CMD_TRAN_ERR\n");
15058 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
15059 		break;
15060 	case CMD_RESET:
15061 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15062 		    "sdintr: CMD_RESET \n");
15063 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
15064 		break;
15065 	case CMD_ABORTED:
15066 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15067 		    "sdintr: CMD_ABORTED \n");
15068 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
15069 		break;
15070 	case CMD_TIMEOUT:
15071 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15072 		    "sdintr: CMD_TIMEOUT\n");
15073 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
15074 		break;
15075 	case CMD_UNX_BUS_FREE:
15076 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15077 		    "sdintr: CMD_UNX_BUS_FREE \n");
15078 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
15079 		break;
15080 	case CMD_TAG_REJECT:
15081 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15082 		    "sdintr: CMD_TAG_REJECT\n");
15083 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
15084 		break;
15085 	default:
15086 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15087 		    "sdintr: default\n");
15088 		sd_pkt_reason_default(un, bp, xp, pktp);
15089 		break;
15090 	}
15091 
15092 exit:
15093 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
15094 
15095 	/* Decrement counter to indicate that the callback routine is done. */
15096 	un->un_in_callback--;
15097 	ASSERT(un->un_in_callback >= 0);
15098 
15099 	/*
15100 	 * At this point, the pkt has been dispatched, ie, it is either
15101 	 * being re-tried or has been returned to its caller and should
15102 	 * not be referenced.
15103 	 */
15104 
15105 	mutex_exit(SD_MUTEX(un));
15106 }
15107 
15108 
15109 /*
15110  *    Function: sd_print_incomplete_msg
15111  *
15112  * Description: Prints the error message for a CMD_INCOMPLETE error.
15113  *
15114  *   Arguments: un - ptr to associated softstate for the device.
15115  *		bp - ptr to the buf(9S) for the command.
15116  *		arg - message string ptr
15117  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
15118  *			or SD_NO_RETRY_ISSUED.
15119  *
15120  *     Context: May be called under interrupt context
15121  */
15122 
15123 static void
15124 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
15125 {
15126 	struct scsi_pkt	*pktp;
15127 	char	*msgp;
15128 	char	*cmdp = arg;
15129 
15130 	ASSERT(un != NULL);
15131 	ASSERT(mutex_owned(SD_MUTEX(un)));
15132 	ASSERT(bp != NULL);
15133 	ASSERT(arg != NULL);
15134 	pktp = SD_GET_PKTP(bp);
15135 	ASSERT(pktp != NULL);
15136 
15137 	switch (code) {
15138 	case SD_DELAYED_RETRY_ISSUED:
15139 	case SD_IMMEDIATE_RETRY_ISSUED:
15140 		msgp = "retrying";
15141 		break;
15142 	case SD_NO_RETRY_ISSUED:
15143 	default:
15144 		msgp = "giving up";
15145 		break;
15146 	}
15147 
15148 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
15149 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15150 		    "incomplete %s- %s\n", cmdp, msgp);
15151 	}
15152 }
15153 
15154 
15155 
15156 /*
15157  *    Function: sd_pkt_status_good
15158  *
15159  * Description: Processing for a STATUS_GOOD code in pkt_status.
15160  *
15161  *     Context: May be called under interrupt context
15162  */
15163 
15164 static void
15165 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
15166 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
15167 {
15168 	char	*cmdp;
15169 
15170 	ASSERT(un != NULL);
15171 	ASSERT(mutex_owned(SD_MUTEX(un)));
15172 	ASSERT(bp != NULL);
15173 	ASSERT(xp != NULL);
15174 	ASSERT(pktp != NULL);
15175 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
15176 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
15177 	ASSERT(pktp->pkt_resid != 0);
15178 
15179 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
15180 
15181 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
15182 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
15183 	case SCMD_READ:
15184 		cmdp = "read";
15185 		break;
15186 	case SCMD_WRITE:
15187 		cmdp = "write";
15188 		break;
15189 	default:
15190 		SD_UPDATE_B_RESID(bp, pktp);
15191 		sd_return_command(un, bp);
15192 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
15193 		return;
15194 	}
15195 
15196 	/*
15197 	 * See if we can retry the read/write, preferrably immediately.
15198 	 * If retries are exhaused, then sd_retry_command() will update
15199 	 * the b_resid count.
15200 	 */
15201 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
15202 	    cmdp, EIO, (clock_t)0, NULL);
15203 
15204 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
15205 }
15206 
15207 
15208 
15209 
15210 
15211 /*
15212  *    Function: sd_handle_request_sense
15213  *
15214  * Description: Processing for non-auto Request Sense command.
15215  *
15216  *   Arguments: un - ptr to associated softstate
15217  *		sense_bp - ptr to buf(9S) for the RQS command
15218  *		sense_xp - ptr to the sd_xbuf for the RQS command
15219  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
15220  *
15221  *     Context: May be called under interrupt context
15222  */
15223 
15224 static void
15225 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
15226 	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
15227 {
15228 	struct buf	*cmd_bp;	/* buf for the original command */
15229 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
15230 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
15231 
15232 	ASSERT(un != NULL);
15233 	ASSERT(mutex_owned(SD_MUTEX(un)));
15234 	ASSERT(sense_bp != NULL);
15235 	ASSERT(sense_xp != NULL);
15236 	ASSERT(sense_pktp != NULL);
15237 
15238 	/*
15239 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
15240 	 * RQS command and not the original command.
15241 	 */
15242 	ASSERT(sense_pktp == un->un_rqs_pktp);
15243 	ASSERT(sense_bp   == un->un_rqs_bp);
15244 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
15245 	    (FLAG_SENSING | FLAG_HEAD));
15246 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
15247 	    FLAG_SENSING) == FLAG_SENSING);
15248 
15249 	/* These are the bp, xp, and pktp for the original command */
15250 	cmd_bp = sense_xp->xb_sense_bp;
15251 	cmd_xp = SD_GET_XBUF(cmd_bp);
15252 	cmd_pktp = SD_GET_PKTP(cmd_bp);
15253 
15254 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
15255 		/*
15256 		 * The REQUEST SENSE command failed.  Release the REQUEST
15257 		 * SENSE command for re-use, get back the bp for the original
15258 		 * command, and attempt to re-try the original command if
15259 		 * FLAG_DIAGNOSE is not set in the original packet.
15260 		 */
15261 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
15262 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15263 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
15264 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
15265 			    NULL, NULL, EIO, (clock_t)0, NULL);
15266 			return;
15267 		}
15268 	}
15269 
15270 	/*
15271 	 * Save the relevant sense info into the xp for the original cmd.
15272 	 *
15273 	 * Note: if the request sense failed the state info will be zero
15274 	 * as set in sd_mark_rqs_busy()
15275 	 */
15276 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
15277 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
15278 	cmd_xp->xb_sense_resid  = sense_pktp->pkt_resid;
15279 	bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data, SENSE_LENGTH);
15280 
15281 	/*
15282 	 *  Free up the RQS command....
15283 	 *  NOTE:
15284 	 *	Must do this BEFORE calling sd_validate_sense_data!
15285 	 *	sd_validate_sense_data may return the original command in
15286 	 *	which case the pkt will be freed and the flags can no
15287 	 *	longer be touched.
15288 	 *	SD_MUTEX is held through this process until the command
15289 	 *	is dispatched based upon the sense data, so there are
15290 	 *	no race conditions.
15291 	 */
15292 	(void) sd_mark_rqs_idle(un, sense_xp);
15293 
15294 	/*
15295 	 * For a retryable command see if we have valid sense data, if so then
15296 	 * turn it over to sd_decode_sense() to figure out the right course of
15297 	 * action. Just fail a non-retryable command.
15298 	 */
15299 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15300 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp) ==
15301 		    SD_SENSE_DATA_IS_VALID) {
15302 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
15303 		}
15304 	} else {
15305 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
15306 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15307 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
15308 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15309 		sd_return_failed_command(un, cmd_bp, EIO);
15310 	}
15311 }
15312 
15313 
15314 
15315 
15316 /*
15317  *    Function: sd_handle_auto_request_sense
15318  *
15319  * Description: Processing for auto-request sense information.
15320  *
15321  *   Arguments: un - ptr to associated softstate
15322  *		bp - ptr to buf(9S) for the command
15323  *		xp - ptr to the sd_xbuf for the command
15324  *		pktp - ptr to the scsi_pkt(9S) for the command
15325  *
15326  *     Context: May be called under interrupt context
15327  */
15328 
15329 static void
15330 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
15331 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
15332 {
15333 	struct scsi_arq_status *asp;
15334 
15335 	ASSERT(un != NULL);
15336 	ASSERT(mutex_owned(SD_MUTEX(un)));
15337 	ASSERT(bp != NULL);
15338 	ASSERT(xp != NULL);
15339 	ASSERT(pktp != NULL);
15340 	ASSERT(pktp != un->un_rqs_pktp);
15341 	ASSERT(bp   != un->un_rqs_bp);
15342 
15343 	/*
15344 	 * For auto-request sense, we get a scsi_arq_status back from
15345 	 * the HBA, with the sense data in the sts_sensedata member.
15346 	 * The pkt_scbp of the packet points to this scsi_arq_status.
15347 	 */
15348 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
15349 
15350 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
15351 		/*
15352 		 * The auto REQUEST SENSE failed; see if we can re-try
15353 		 * the original command.
15354 		 */
15355 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15356 		    "auto request sense failed (reason=%s)\n",
15357 		    scsi_rname(asp->sts_rqpkt_reason));
15358 
15359 		sd_reset_target(un, pktp);
15360 
15361 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15362 		    NULL, NULL, EIO, (clock_t)0, NULL);
15363 		return;
15364 	}
15365 
15366 	/* Save the relevant sense info into the xp for the original cmd. */
15367 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
15368 	xp->xb_sense_state  = asp->sts_rqpkt_state;
15369 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
15370 	bcopy(&asp->sts_sensedata, xp->xb_sense_data,
15371 	    min(sizeof (struct scsi_extended_sense), SENSE_LENGTH));
15372 
15373 	/*
15374 	 * See if we have valid sense data, if so then turn it over to
15375 	 * sd_decode_sense() to figure out the right course of action.
15376 	 */
15377 	if (sd_validate_sense_data(un, bp, xp) == SD_SENSE_DATA_IS_VALID) {
15378 		sd_decode_sense(un, bp, xp, pktp);
15379 	}
15380 }
15381 
15382 
15383 /*
15384  *    Function: sd_print_sense_failed_msg
15385  *
15386  * Description: Print log message when RQS has failed.
15387  *
15388  *   Arguments: un - ptr to associated softstate
15389  *		bp - ptr to buf(9S) for the command
15390  *		arg - generic message string ptr
15391  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
15392  *			or SD_NO_RETRY_ISSUED
15393  *
15394  *     Context: May be called from interrupt context
15395  */
15396 
15397 static void
15398 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
15399 	int code)
15400 {
15401 	char	*msgp = arg;
15402 
15403 	ASSERT(un != NULL);
15404 	ASSERT(mutex_owned(SD_MUTEX(un)));
15405 	ASSERT(bp != NULL);
15406 
15407 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
15408 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
15409 	}
15410 }
15411 
15412 
15413 /*
15414  *    Function: sd_validate_sense_data
15415  *
15416  * Description: Check the given sense data for validity.
15417  *		If the sense data is not valid, the command will
15418  *		be either failed or retried!
15419  *
15420  * Return Code: SD_SENSE_DATA_IS_INVALID
15421  *		SD_SENSE_DATA_IS_VALID
15422  *
15423  *     Context: May be called from interrupt context
15424  */
15425 
15426 static int
15427 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp)
15428 {
15429 	struct scsi_extended_sense *esp;
15430 	struct	scsi_pkt *pktp;
15431 	size_t	actual_len;
15432 	char	*msgp = NULL;
15433 
15434 	ASSERT(un != NULL);
15435 	ASSERT(mutex_owned(SD_MUTEX(un)));
15436 	ASSERT(bp != NULL);
15437 	ASSERT(bp != un->un_rqs_bp);
15438 	ASSERT(xp != NULL);
15439 
15440 	pktp = SD_GET_PKTP(bp);
15441 	ASSERT(pktp != NULL);
15442 
15443 	/*
15444 	 * Check the status of the RQS command (auto or manual).
15445 	 */
15446 	switch (xp->xb_sense_status & STATUS_MASK) {
15447 	case STATUS_GOOD:
15448 		break;
15449 
15450 	case STATUS_RESERVATION_CONFLICT:
15451 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
15452 		return (SD_SENSE_DATA_IS_INVALID);
15453 
15454 	case STATUS_BUSY:
15455 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15456 		    "Busy Status on REQUEST SENSE\n");
15457 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
15458 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
15459 		return (SD_SENSE_DATA_IS_INVALID);
15460 
15461 	case STATUS_QFULL:
15462 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15463 		    "QFULL Status on REQUEST SENSE\n");
15464 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
15465 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
15466 		return (SD_SENSE_DATA_IS_INVALID);
15467 
15468 	case STATUS_CHECK:
15469 	case STATUS_TERMINATED:
15470 		msgp = "Check Condition on REQUEST SENSE\n";
15471 		goto sense_failed;
15472 
15473 	default:
15474 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
15475 		goto sense_failed;
15476 	}
15477 
15478 	/*
15479 	 * See if we got the minimum required amount of sense data.
15480 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
15481 	 * or less.
15482 	 */
15483 	actual_len = (int)(SENSE_LENGTH - xp->xb_sense_resid);
15484 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
15485 	    (actual_len == 0)) {
15486 		msgp = "Request Sense couldn't get sense data\n";
15487 		goto sense_failed;
15488 	}
15489 
15490 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
15491 		msgp = "Not enough sense information\n";
15492 		goto sense_failed;
15493 	}
15494 
15495 	/*
15496 	 * We require the extended sense data
15497 	 */
15498 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
15499 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
15500 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
15501 			static char tmp[8];
15502 			static char buf[148];
15503 			char *p = (char *)(xp->xb_sense_data);
15504 			int i;
15505 
15506 			mutex_enter(&sd_sense_mutex);
15507 			(void) strcpy(buf, "undecodable sense information:");
15508 			for (i = 0; i < actual_len; i++) {
15509 				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
15510 				(void) strcpy(&buf[strlen(buf)], tmp);
15511 			}
15512 			i = strlen(buf);
15513 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
15514 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, buf);
15515 			mutex_exit(&sd_sense_mutex);
15516 		}
15517 		/* Note: Legacy behavior, fail the command with no retry */
15518 		sd_return_failed_command(un, bp, EIO);
15519 		return (SD_SENSE_DATA_IS_INVALID);
15520 	}
15521 
15522 	/*
15523 	 * Check that es_code is valid (es_class concatenated with es_code
15524 	 * make up the "response code" field.  es_class will always be 7, so
15525 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
15526 	 * format.
15527 	 */
15528 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
15529 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
15530 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
15531 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
15532 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
15533 		goto sense_failed;
15534 	}
15535 
15536 	return (SD_SENSE_DATA_IS_VALID);
15537 
15538 sense_failed:
15539 	/*
15540 	 * If the request sense failed (for whatever reason), attempt
15541 	 * to retry the original command.
15542 	 */
15543 #if defined(__i386) || defined(__amd64)
15544 	/*
15545 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
15546 	 * sddef.h for Sparc platform, and x86 uses 1 binary
15547 	 * for both SCSI/FC.
15548 	 * The SD_RETRY_DELAY value need to be adjusted here
15549 	 * when SD_RETRY_DELAY change in sddef.h
15550 	 */
15551 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15552 	    sd_print_sense_failed_msg, msgp, EIO,
15553 	    un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
15554 #else
15555 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15556 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
15557 #endif
15558 
15559 	return (SD_SENSE_DATA_IS_INVALID);
15560 }
15561 
15562 
15563 
15564 /*
15565  *    Function: sd_decode_sense
15566  *
15567  * Description: Take recovery action(s) when SCSI Sense Data is received.
15568  *
15569  *     Context: Interrupt context.
15570  */
15571 
15572 static void
15573 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
15574 	struct scsi_pkt *pktp)
15575 {
15576 	uint8_t sense_key;
15577 
15578 	ASSERT(un != NULL);
15579 	ASSERT(mutex_owned(SD_MUTEX(un)));
15580 	ASSERT(bp != NULL);
15581 	ASSERT(bp != un->un_rqs_bp);
15582 	ASSERT(xp != NULL);
15583 	ASSERT(pktp != NULL);
15584 
15585 	sense_key = scsi_sense_key(xp->xb_sense_data);
15586 
15587 	switch (sense_key) {
15588 	case KEY_NO_SENSE:
15589 		sd_sense_key_no_sense(un, bp, xp, pktp);
15590 		break;
15591 	case KEY_RECOVERABLE_ERROR:
15592 		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
15593 		    bp, xp, pktp);
15594 		break;
15595 	case KEY_NOT_READY:
15596 		sd_sense_key_not_ready(un, xp->xb_sense_data,
15597 		    bp, xp, pktp);
15598 		break;
15599 	case KEY_MEDIUM_ERROR:
15600 	case KEY_HARDWARE_ERROR:
15601 		sd_sense_key_medium_or_hardware_error(un,
15602 		    xp->xb_sense_data, bp, xp, pktp);
15603 		break;
15604 	case KEY_ILLEGAL_REQUEST:
15605 		sd_sense_key_illegal_request(un, bp, xp, pktp);
15606 		break;
15607 	case KEY_UNIT_ATTENTION:
15608 		sd_sense_key_unit_attention(un, xp->xb_sense_data,
15609 		    bp, xp, pktp);
15610 		break;
15611 	case KEY_WRITE_PROTECT:
15612 	case KEY_VOLUME_OVERFLOW:
15613 	case KEY_MISCOMPARE:
15614 		sd_sense_key_fail_command(un, bp, xp, pktp);
15615 		break;
15616 	case KEY_BLANK_CHECK:
15617 		sd_sense_key_blank_check(un, bp, xp, pktp);
15618 		break;
15619 	case KEY_ABORTED_COMMAND:
15620 		sd_sense_key_aborted_command(un, bp, xp, pktp);
15621 		break;
15622 	case KEY_VENDOR_UNIQUE:
15623 	case KEY_COPY_ABORTED:
15624 	case KEY_EQUAL:
15625 	case KEY_RESERVED:
15626 	default:
15627 		sd_sense_key_default(un, xp->xb_sense_data,
15628 		    bp, xp, pktp);
15629 		break;
15630 	}
15631 }
15632 
15633 
15634 /*
15635  *    Function: sd_dump_memory
15636  *
15637  * Description: Debug logging routine to print the contents of a user provided
15638  *		buffer. The output of the buffer is broken up into 256 byte
15639  *		segments due to a size constraint of the scsi_log.
15640  *		implementation.
15641  *
15642  *   Arguments: un - ptr to softstate
15643  *		comp - component mask
15644  *		title - "title" string to preceed data when printed
15645  *		data - ptr to data block to be printed
15646  *		len - size of data block to be printed
15647  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
15648  *
15649  *     Context: May be called from interrupt context
15650  */
15651 
15652 #define	SD_DUMP_MEMORY_BUF_SIZE	256
15653 
15654 static char *sd_dump_format_string[] = {
15655 		" 0x%02x",
15656 		" %c"
15657 };
15658 
15659 static void
15660 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
15661     int len, int fmt)
15662 {
15663 	int	i, j;
15664 	int	avail_count;
15665 	int	start_offset;
15666 	int	end_offset;
15667 	size_t	entry_len;
15668 	char	*bufp;
15669 	char	*local_buf;
15670 	char	*format_string;
15671 
15672 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
15673 
15674 	/*
15675 	 * In the debug version of the driver, this function is called from a
15676 	 * number of places which are NOPs in the release driver.
15677 	 * The debug driver therefore has additional methods of filtering
15678 	 * debug output.
15679 	 */
15680 #ifdef SDDEBUG
15681 	/*
15682 	 * In the debug version of the driver we can reduce the amount of debug
15683 	 * messages by setting sd_error_level to something other than
15684 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
15685 	 * sd_component_mask.
15686 	 */
15687 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
15688 	    (sd_error_level != SCSI_ERR_ALL)) {
15689 		return;
15690 	}
15691 	if (((sd_component_mask & comp) == 0) ||
15692 	    (sd_error_level != SCSI_ERR_ALL)) {
15693 		return;
15694 	}
15695 #else
15696 	if (sd_error_level != SCSI_ERR_ALL) {
15697 		return;
15698 	}
15699 #endif
15700 
15701 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
15702 	bufp = local_buf;
15703 	/*
15704 	 * Available length is the length of local_buf[], minus the
15705 	 * length of the title string, minus one for the ":", minus
15706 	 * one for the newline, minus one for the NULL terminator.
15707 	 * This gives the #bytes available for holding the printed
15708 	 * values from the given data buffer.
15709 	 */
15710 	if (fmt == SD_LOG_HEX) {
15711 		format_string = sd_dump_format_string[0];
15712 	} else /* SD_LOG_CHAR */ {
15713 		format_string = sd_dump_format_string[1];
15714 	}
15715 	/*
15716 	 * Available count is the number of elements from the given
15717 	 * data buffer that we can fit into the available length.
15718 	 * This is based upon the size of the format string used.
15719 	 * Make one entry and find it's size.
15720 	 */
15721 	(void) sprintf(bufp, format_string, data[0]);
15722 	entry_len = strlen(bufp);
15723 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
15724 
15725 	j = 0;
15726 	while (j < len) {
15727 		bufp = local_buf;
15728 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
15729 		start_offset = j;
15730 
15731 		end_offset = start_offset + avail_count;
15732 
15733 		(void) sprintf(bufp, "%s:", title);
15734 		bufp += strlen(bufp);
15735 		for (i = start_offset; ((i < end_offset) && (j < len));
15736 		    i++, j++) {
15737 			(void) sprintf(bufp, format_string, data[i]);
15738 			bufp += entry_len;
15739 		}
15740 		(void) sprintf(bufp, "\n");
15741 
15742 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
15743 	}
15744 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
15745 }
15746 
15747 /*
15748  *    Function: sd_print_sense_msg
15749  *
15750  * Description: Log a message based upon the given sense data.
15751  *
15752  *   Arguments: un - ptr to associated softstate
15753  *		bp - ptr to buf(9S) for the command
15754  *		arg - ptr to associate sd_sense_info struct
15755  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
15756  *			or SD_NO_RETRY_ISSUED
15757  *
15758  *     Context: May be called from interrupt context
15759  */
15760 
15761 static void
15762 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
15763 {
15764 	struct sd_xbuf	*xp;
15765 	struct scsi_pkt	*pktp;
15766 	uint8_t *sensep;
15767 	daddr_t request_blkno;
15768 	diskaddr_t err_blkno;
15769 	int severity;
15770 	int pfa_flag;
15771 	extern struct scsi_key_strings scsi_cmds[];
15772 
15773 	ASSERT(un != NULL);
15774 	ASSERT(mutex_owned(SD_MUTEX(un)));
15775 	ASSERT(bp != NULL);
15776 	xp = SD_GET_XBUF(bp);
15777 	ASSERT(xp != NULL);
15778 	pktp = SD_GET_PKTP(bp);
15779 	ASSERT(pktp != NULL);
15780 	ASSERT(arg != NULL);
15781 
15782 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
15783 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
15784 
15785 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
15786 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
15787 		severity = SCSI_ERR_RETRYABLE;
15788 	}
15789 
15790 	/* Use absolute block number for the request block number */
15791 	request_blkno = xp->xb_blkno;
15792 
15793 	/*
15794 	 * Now try to get the error block number from the sense data
15795 	 */
15796 	sensep = xp->xb_sense_data;
15797 
15798 	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
15799 	    (uint64_t *)&err_blkno)) {
15800 		/*
15801 		 * We retrieved the error block number from the information
15802 		 * portion of the sense data.
15803 		 *
15804 		 * For USCSI commands we are better off using the error
15805 		 * block no. as the requested block no. (This is the best
15806 		 * we can estimate.)
15807 		 */
15808 		if ((SD_IS_BUFIO(xp) == FALSE) &&
15809 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
15810 			request_blkno = err_blkno;
15811 		}
15812 	} else {
15813 		/*
15814 		 * Without the es_valid bit set (for fixed format) or an
15815 		 * information descriptor (for descriptor format) we cannot
15816 		 * be certain of the error blkno, so just use the
15817 		 * request_blkno.
15818 		 */
15819 		err_blkno = (diskaddr_t)request_blkno;
15820 	}
15821 
15822 	/*
15823 	 * The following will log the buffer contents for the release driver
15824 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
15825 	 * level is set to verbose.
15826 	 */
15827 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
15828 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15829 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15830 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
15831 
15832 	if (pfa_flag == FALSE) {
15833 		/* This is normally only set for USCSI */
15834 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
15835 			return;
15836 		}
15837 
15838 		if ((SD_IS_BUFIO(xp) == TRUE) &&
15839 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
15840 		    (severity < sd_error_level))) {
15841 			return;
15842 		}
15843 	}
15844 
15845 	/*
15846 	 * Check for Sonoma Failover and keep a count of how many failed I/O's
15847 	 */
15848 	if ((SD_IS_LSI(un)) &&
15849 	    (scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) &&
15850 	    (scsi_sense_asc(sensep) == 0x94) &&
15851 	    (scsi_sense_ascq(sensep) == 0x01)) {
15852 		un->un_sonoma_failure_count++;
15853 		if (un->un_sonoma_failure_count > 1) {
15854 			return;
15855 		}
15856 	}
15857 
15858 	scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
15859 	    request_blkno, err_blkno, scsi_cmds,
15860 	    (struct scsi_extended_sense *)sensep,
15861 	    un->un_additional_codes, NULL);
15862 }
15863 
15864 /*
15865  *    Function: sd_sense_key_no_sense
15866  *
15867  * Description: Recovery action when sense data was not received.
15868  *
15869  *     Context: May be called from interrupt context
15870  */
15871 
15872 static void
15873 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
15874 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
15875 {
15876 	struct sd_sense_info	si;
15877 
15878 	ASSERT(un != NULL);
15879 	ASSERT(mutex_owned(SD_MUTEX(un)));
15880 	ASSERT(bp != NULL);
15881 	ASSERT(xp != NULL);
15882 	ASSERT(pktp != NULL);
15883 
15884 	si.ssi_severity = SCSI_ERR_FATAL;
15885 	si.ssi_pfa_flag = FALSE;
15886 
15887 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
15888 
15889 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
15890 	    &si, EIO, (clock_t)0, NULL);
15891 }
15892 
15893 
15894 /*
15895  *    Function: sd_sense_key_recoverable_error
15896  *
15897  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
15898  *
15899  *     Context: May be called from interrupt context
15900  */
15901 
15902 static void
15903 sd_sense_key_recoverable_error(struct sd_lun *un,
15904 	uint8_t *sense_datap,
15905 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
15906 {
15907 	struct sd_sense_info	si;
15908 	uint8_t asc = scsi_sense_asc(sense_datap);
15909 
15910 	ASSERT(un != NULL);
15911 	ASSERT(mutex_owned(SD_MUTEX(un)));
15912 	ASSERT(bp != NULL);
15913 	ASSERT(xp != NULL);
15914 	ASSERT(pktp != NULL);
15915 
15916 	/*
15917 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
15918 	 */
15919 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
15920 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
15921 		si.ssi_severity = SCSI_ERR_INFO;
15922 		si.ssi_pfa_flag = TRUE;
15923 	} else {
15924 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
15925 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
15926 		si.ssi_severity = SCSI_ERR_RECOVERED;
15927 		si.ssi_pfa_flag = FALSE;
15928 	}
15929 
15930 	if (pktp->pkt_resid == 0) {
15931 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
15932 		sd_return_command(un, bp);
15933 		return;
15934 	}
15935 
15936 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
15937 	    &si, EIO, (clock_t)0, NULL);
15938 }
15939 
15940 
15941 
15942 
15943 /*
15944  *    Function: sd_sense_key_not_ready
15945  *
15946  * Description: Recovery actions for a SCSI "Not Ready" sense key.
15947  *
15948  *     Context: May be called from interrupt context
15949  */
15950 
15951 static void
15952 sd_sense_key_not_ready(struct sd_lun *un,
15953 	uint8_t *sense_datap,
15954 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
15955 {
15956 	struct sd_sense_info	si;
15957 	uint8_t asc = scsi_sense_asc(sense_datap);
15958 	uint8_t ascq = scsi_sense_ascq(sense_datap);
15959 
15960 	ASSERT(un != NULL);
15961 	ASSERT(mutex_owned(SD_MUTEX(un)));
15962 	ASSERT(bp != NULL);
15963 	ASSERT(xp != NULL);
15964 	ASSERT(pktp != NULL);
15965 
15966 	si.ssi_severity = SCSI_ERR_FATAL;
15967 	si.ssi_pfa_flag = FALSE;
15968 
15969 	/*
15970 	 * Update error stats after first NOT READY error. Disks may have
15971 	 * been powered down and may need to be restarted.  For CDROMs,
15972 	 * report NOT READY errors only if media is present.
15973 	 */
15974 	if ((ISCD(un) && (asc == 0x3A)) ||
15975 	    (xp->xb_nr_retry_count > 0)) {
15976 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
15977 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
15978 	}
15979 
15980 	/*
15981 	 * Just fail if the "not ready" retry limit has been reached.
15982 	 */
15983 	if (xp->xb_nr_retry_count >= un->un_notready_retry_count) {
15984 		/* Special check for error message printing for removables. */
15985 		if (un->un_f_has_removable_media && (asc == 0x04) &&
15986 		    (ascq >= 0x04)) {
15987 			si.ssi_severity = SCSI_ERR_ALL;
15988 		}
15989 		goto fail_command;
15990 	}
15991 
15992 	/*
15993 	 * Check the ASC and ASCQ in the sense data as needed, to determine
15994 	 * what to do.
15995 	 */
15996 	switch (asc) {
15997 	case 0x04:	/* LOGICAL UNIT NOT READY */
15998 		/*
15999 		 * disk drives that don't spin up result in a very long delay
16000 		 * in format without warning messages. We will log a message
16001 		 * if the error level is set to verbose.
16002 		 */
16003 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
16004 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16005 			    "logical unit not ready, resetting disk\n");
16006 		}
16007 
16008 		/*
16009 		 * There are different requirements for CDROMs and disks for
16010 		 * the number of retries.  If a CD-ROM is giving this, it is
16011 		 * probably reading TOC and is in the process of getting
16012 		 * ready, so we should keep on trying for a long time to make
16013 		 * sure that all types of media are taken in account (for
16014 		 * some media the drive takes a long time to read TOC).  For
16015 		 * disks we do not want to retry this too many times as this
16016 		 * can cause a long hang in format when the drive refuses to
16017 		 * spin up (a very common failure).
16018 		 */
16019 		switch (ascq) {
16020 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
16021 			/*
16022 			 * Disk drives frequently refuse to spin up which
16023 			 * results in a very long hang in format without
16024 			 * warning messages.
16025 			 *
16026 			 * Note: This code preserves the legacy behavior of
16027 			 * comparing xb_nr_retry_count against zero for fibre
16028 			 * channel targets instead of comparing against the
16029 			 * un_reset_retry_count value.  The reason for this
16030 			 * discrepancy has been so utterly lost beneath the
16031 			 * Sands of Time that even Indiana Jones could not
16032 			 * find it.
16033 			 */
16034 			if (un->un_f_is_fibre == TRUE) {
16035 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
16036 				    (xp->xb_nr_retry_count > 0)) &&
16037 				    (un->un_startstop_timeid == NULL)) {
16038 					scsi_log(SD_DEVINFO(un), sd_label,
16039 					    CE_WARN, "logical unit not ready, "
16040 					    "resetting disk\n");
16041 					sd_reset_target(un, pktp);
16042 				}
16043 			} else {
16044 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
16045 				    (xp->xb_nr_retry_count >
16046 				    un->un_reset_retry_count)) &&
16047 				    (un->un_startstop_timeid == NULL)) {
16048 					scsi_log(SD_DEVINFO(un), sd_label,
16049 					    CE_WARN, "logical unit not ready, "
16050 					    "resetting disk\n");
16051 					sd_reset_target(un, pktp);
16052 				}
16053 			}
16054 			break;
16055 
16056 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
16057 			/*
16058 			 * If the target is in the process of becoming
16059 			 * ready, just proceed with the retry. This can
16060 			 * happen with CD-ROMs that take a long time to
16061 			 * read TOC after a power cycle or reset.
16062 			 */
16063 			goto do_retry;
16064 
16065 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
16066 			break;
16067 
16068 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
16069 			/*
16070 			 * Retries cannot help here so just fail right away.
16071 			 */
16072 			goto fail_command;
16073 
16074 		case 0x88:
16075 			/*
16076 			 * Vendor-unique code for T3/T4: it indicates a
16077 			 * path problem in a mutipathed config, but as far as
16078 			 * the target driver is concerned it equates to a fatal
16079 			 * error, so we should just fail the command right away
16080 			 * (without printing anything to the console). If this
16081 			 * is not a T3/T4, fall thru to the default recovery
16082 			 * action.
16083 			 * T3/T4 is FC only, don't need to check is_fibre
16084 			 */
16085 			if (SD_IS_T3(un) || SD_IS_T4(un)) {
16086 				sd_return_failed_command(un, bp, EIO);
16087 				return;
16088 			}
16089 			/* FALLTHRU */
16090 
16091 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
16092 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
16093 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
16094 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
16095 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
16096 		default:    /* Possible future codes in SCSI spec? */
16097 			/*
16098 			 * For removable-media devices, do not retry if
16099 			 * ASCQ > 2 as these result mostly from USCSI commands
16100 			 * on MMC devices issued to check status of an
16101 			 * operation initiated in immediate mode.  Also for
16102 			 * ASCQ >= 4 do not print console messages as these
16103 			 * mainly represent a user-initiated operation
16104 			 * instead of a system failure.
16105 			 */
16106 			if (un->un_f_has_removable_media) {
16107 				si.ssi_severity = SCSI_ERR_ALL;
16108 				goto fail_command;
16109 			}
16110 			break;
16111 		}
16112 
16113 		/*
16114 		 * As part of our recovery attempt for the NOT READY
16115 		 * condition, we issue a START STOP UNIT command. However
16116 		 * we want to wait for a short delay before attempting this
16117 		 * as there may still be more commands coming back from the
16118 		 * target with the check condition. To do this we use
16119 		 * timeout(9F) to call sd_start_stop_unit_callback() after
16120 		 * the delay interval expires. (sd_start_stop_unit_callback()
16121 		 * dispatches sd_start_stop_unit_task(), which will issue
16122 		 * the actual START STOP UNIT command. The delay interval
16123 		 * is one-half of the delay that we will use to retry the
16124 		 * command that generated the NOT READY condition.
16125 		 *
16126 		 * Note that we could just dispatch sd_start_stop_unit_task()
16127 		 * from here and allow it to sleep for the delay interval,
16128 		 * but then we would be tying up the taskq thread
16129 		 * uncesessarily for the duration of the delay.
16130 		 *
16131 		 * Do not issue the START STOP UNIT if the current command
16132 		 * is already a START STOP UNIT.
16133 		 */
16134 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
16135 			break;
16136 		}
16137 
16138 		/*
16139 		 * Do not schedule the timeout if one is already pending.
16140 		 */
16141 		if (un->un_startstop_timeid != NULL) {
16142 			SD_INFO(SD_LOG_ERROR, un,
16143 			    "sd_sense_key_not_ready: restart already issued to"
16144 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
16145 			    ddi_get_instance(SD_DEVINFO(un)));
16146 			break;
16147 		}
16148 
16149 		/*
16150 		 * Schedule the START STOP UNIT command, then queue the command
16151 		 * for a retry.
16152 		 *
16153 		 * Note: A timeout is not scheduled for this retry because we
16154 		 * want the retry to be serial with the START_STOP_UNIT. The
16155 		 * retry will be started when the START_STOP_UNIT is completed
16156 		 * in sd_start_stop_unit_task.
16157 		 */
16158 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
16159 		    un, SD_BSY_TIMEOUT / 2);
16160 		xp->xb_nr_retry_count++;
16161 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
16162 		return;
16163 
16164 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
16165 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
16166 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16167 			    "unit does not respond to selection\n");
16168 		}
16169 		break;
16170 
16171 	case 0x3A:	/* MEDIUM NOT PRESENT */
16172 		if (sd_error_level >= SCSI_ERR_FATAL) {
16173 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16174 			    "Caddy not inserted in drive\n");
16175 		}
16176 
16177 		sr_ejected(un);
16178 		un->un_mediastate = DKIO_EJECTED;
16179 		/* The state has changed, inform the media watch routines */
16180 		cv_broadcast(&un->un_state_cv);
16181 		/* Just fail if no media is present in the drive. */
16182 		goto fail_command;
16183 
16184 	default:
16185 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
16186 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
16187 			    "Unit not Ready. Additional sense code 0x%x\n",
16188 			    asc);
16189 		}
16190 		break;
16191 	}
16192 
16193 do_retry:
16194 
16195 	/*
16196 	 * Retry the command, as some targets may report NOT READY for
16197 	 * several seconds after being reset.
16198 	 */
16199 	xp->xb_nr_retry_count++;
16200 	si.ssi_severity = SCSI_ERR_RETRYABLE;
16201 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
16202 	    &si, EIO, SD_BSY_TIMEOUT, NULL);
16203 
16204 	return;
16205 
16206 fail_command:
16207 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16208 	sd_return_failed_command(un, bp, EIO);
16209 }
16210 
16211 
16212 
16213 /*
16214  *    Function: sd_sense_key_medium_or_hardware_error
16215  *
16216  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
16217  *		sense key.
16218  *
16219  *     Context: May be called from interrupt context
16220  */
16221 
16222 static void
16223 sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
16224 	uint8_t *sense_datap,
16225 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
16226 {
16227 	struct sd_sense_info	si;
16228 	uint8_t sense_key = scsi_sense_key(sense_datap);
16229 	uint8_t asc = scsi_sense_asc(sense_datap);
16230 
16231 	ASSERT(un != NULL);
16232 	ASSERT(mutex_owned(SD_MUTEX(un)));
16233 	ASSERT(bp != NULL);
16234 	ASSERT(xp != NULL);
16235 	ASSERT(pktp != NULL);
16236 
16237 	si.ssi_severity = SCSI_ERR_FATAL;
16238 	si.ssi_pfa_flag = FALSE;
16239 
16240 	if (sense_key == KEY_MEDIUM_ERROR) {
16241 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
16242 	}
16243 
16244 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16245 
16246 	if ((un->un_reset_retry_count != 0) &&
16247 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
16248 		mutex_exit(SD_MUTEX(un));
16249 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
16250 		if (un->un_f_allow_bus_device_reset == TRUE) {
16251 
16252 			boolean_t try_resetting_target = B_TRUE;
16253 
16254 			/*
16255 			 * We need to be able to handle specific ASC when we are
16256 			 * handling a KEY_HARDWARE_ERROR. In particular
16257 			 * taking the default action of resetting the target may
16258 			 * not be the appropriate way to attempt recovery.
16259 			 * Resetting a target because of a single LUN failure
16260 			 * victimizes all LUNs on that target.
16261 			 *
16262 			 * This is true for the LSI arrays, if an LSI
16263 			 * array controller returns an ASC of 0x84 (LUN Dead) we
16264 			 * should trust it.
16265 			 */
16266 
16267 			if (sense_key == KEY_HARDWARE_ERROR) {
16268 				switch (asc) {
16269 				case 0x84:
16270 					if (SD_IS_LSI(un)) {
16271 						try_resetting_target = B_FALSE;
16272 					}
16273 					break;
16274 				default:
16275 					break;
16276 				}
16277 			}
16278 
16279 			if (try_resetting_target == B_TRUE) {
16280 				int reset_retval = 0;
16281 				if (un->un_f_lun_reset_enabled == TRUE) {
16282 					SD_TRACE(SD_LOG_IO_CORE, un,
16283 					    "sd_sense_key_medium_or_hardware_"
16284 					    "error: issuing RESET_LUN\n");
16285 					reset_retval =
16286 					    scsi_reset(SD_ADDRESS(un),
16287 					    RESET_LUN);
16288 				}
16289 				if (reset_retval == 0) {
16290 					SD_TRACE(SD_LOG_IO_CORE, un,
16291 					    "sd_sense_key_medium_or_hardware_"
16292 					    "error: issuing RESET_TARGET\n");
16293 					(void) scsi_reset(SD_ADDRESS(un),
16294 					    RESET_TARGET);
16295 				}
16296 			}
16297 		}
16298 		mutex_enter(SD_MUTEX(un));
16299 	}
16300 
16301 	/*
16302 	 * This really ought to be a fatal error, but we will retry anyway
16303 	 * as some drives report this as a spurious error.
16304 	 */
16305 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
16306 	    &si, EIO, (clock_t)0, NULL);
16307 }
16308 
16309 
16310 
16311 /*
16312  *    Function: sd_sense_key_illegal_request
16313  *
16314  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
16315  *
16316  *     Context: May be called from interrupt context
16317  */
16318 
16319 static void
16320 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
16321 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16322 {
16323 	struct sd_sense_info	si;
16324 
16325 	ASSERT(un != NULL);
16326 	ASSERT(mutex_owned(SD_MUTEX(un)));
16327 	ASSERT(bp != NULL);
16328 	ASSERT(xp != NULL);
16329 	ASSERT(pktp != NULL);
16330 
16331 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
16332 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
16333 
16334 	si.ssi_severity = SCSI_ERR_INFO;
16335 	si.ssi_pfa_flag = FALSE;
16336 
16337 	/* Pointless to retry if the target thinks it's an illegal request */
16338 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16339 	sd_return_failed_command(un, bp, EIO);
16340 }
16341 
16342 
16343 
16344 
16345 /*
16346  *    Function: sd_sense_key_unit_attention
16347  *
16348  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
16349  *
16350  *     Context: May be called from interrupt context
16351  */
16352 
16353 static void
16354 sd_sense_key_unit_attention(struct sd_lun *un,
16355 	uint8_t *sense_datap,
16356 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
16357 {
16358 	/*
16359 	 * For UNIT ATTENTION we allow retries for one minute. Devices
16360 	 * like Sonoma can return UNIT ATTENTION close to a minute
16361 	 * under certain conditions.
16362 	 */
16363 	int	retry_check_flag = SD_RETRIES_UA;
16364 	boolean_t	kstat_updated = B_FALSE;
16365 	struct	sd_sense_info		si;
16366 	uint8_t asc = scsi_sense_asc(sense_datap);
16367 
16368 	ASSERT(un != NULL);
16369 	ASSERT(mutex_owned(SD_MUTEX(un)));
16370 	ASSERT(bp != NULL);
16371 	ASSERT(xp != NULL);
16372 	ASSERT(pktp != NULL);
16373 
16374 	si.ssi_severity = SCSI_ERR_INFO;
16375 	si.ssi_pfa_flag = FALSE;
16376 
16377 
16378 	switch (asc) {
16379 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
16380 		if (sd_report_pfa != 0) {
16381 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
16382 			si.ssi_pfa_flag = TRUE;
16383 			retry_check_flag = SD_RETRIES_STANDARD;
16384 			goto do_retry;
16385 		}
16386 
16387 		break;
16388 
16389 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
16390 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
16391 			un->un_resvd_status |=
16392 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
16393 		}
16394 #ifdef _LP64
16395 		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
16396 			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
16397 			    un, KM_NOSLEEP) == 0) {
16398 				/*
16399 				 * If we can't dispatch the task we'll just
16400 				 * live without descriptor sense.  We can
16401 				 * try again on the next "unit attention"
16402 				 */
16403 				SD_ERROR(SD_LOG_ERROR, un,
16404 				    "sd_sense_key_unit_attention: "
16405 				    "Could not dispatch "
16406 				    "sd_reenable_dsense_task\n");
16407 			}
16408 		}
16409 #endif /* _LP64 */
16410 		/* FALLTHRU */
16411 
16412 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
16413 		if (!un->un_f_has_removable_media) {
16414 			break;
16415 		}
16416 
16417 		/*
16418 		 * When we get a unit attention from a removable-media device,
16419 		 * it may be in a state that will take a long time to recover
16420 		 * (e.g., from a reset).  Since we are executing in interrupt
16421 		 * context here, we cannot wait around for the device to come
16422 		 * back. So hand this command off to sd_media_change_task()
16423 		 * for deferred processing under taskq thread context. (Note
16424 		 * that the command still may be failed if a problem is
16425 		 * encountered at a later time.)
16426 		 */
16427 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
16428 		    KM_NOSLEEP) == 0) {
16429 			/*
16430 			 * Cannot dispatch the request so fail the command.
16431 			 */
16432 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
16433 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
16434 			si.ssi_severity = SCSI_ERR_FATAL;
16435 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16436 			sd_return_failed_command(un, bp, EIO);
16437 		}
16438 
16439 		/*
16440 		 * If failed to dispatch sd_media_change_task(), we already
16441 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
16442 		 * we should update kstat later if it encounters an error. So,
16443 		 * we update kstat_updated flag here.
16444 		 */
16445 		kstat_updated = B_TRUE;
16446 
16447 		/*
16448 		 * Either the command has been successfully dispatched to a
16449 		 * task Q for retrying, or the dispatch failed. In either case
16450 		 * do NOT retry again by calling sd_retry_command. This sets up
16451 		 * two retries of the same command and when one completes and
16452 		 * frees the resources the other will access freed memory,
16453 		 * a bad thing.
16454 		 */
16455 		return;
16456 
16457 	default:
16458 		break;
16459 	}
16460 
16461 	/*
16462 	 * Update kstat if we haven't done that.
16463 	 */
16464 	if (!kstat_updated) {
16465 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16466 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
16467 	}
16468 
16469 do_retry:
16470 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
16471 	    EIO, SD_UA_RETRY_DELAY, NULL);
16472 }
16473 
16474 
16475 
16476 /*
16477  *    Function: sd_sense_key_fail_command
16478  *
16479  * Description: Use to fail a command when we don't like the sense key that
16480  *		was returned.
16481  *
16482  *     Context: May be called from interrupt context
16483  */
16484 
16485 static void
16486 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
16487 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16488 {
16489 	struct sd_sense_info	si;
16490 
16491 	ASSERT(un != NULL);
16492 	ASSERT(mutex_owned(SD_MUTEX(un)));
16493 	ASSERT(bp != NULL);
16494 	ASSERT(xp != NULL);
16495 	ASSERT(pktp != NULL);
16496 
16497 	si.ssi_severity = SCSI_ERR_FATAL;
16498 	si.ssi_pfa_flag = FALSE;
16499 
16500 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16501 	sd_return_failed_command(un, bp, EIO);
16502 }
16503 
16504 
16505 
16506 /*
16507  *    Function: sd_sense_key_blank_check
16508  *
16509  * Description: Recovery actions for a SCSI "Blank Check" sense key.
16510  *		Has no monetary connotation.
16511  *
16512  *     Context: May be called from interrupt context
16513  */
16514 
16515 static void
16516 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
16517 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16518 {
16519 	struct sd_sense_info	si;
16520 
16521 	ASSERT(un != NULL);
16522 	ASSERT(mutex_owned(SD_MUTEX(un)));
16523 	ASSERT(bp != NULL);
16524 	ASSERT(xp != NULL);
16525 	ASSERT(pktp != NULL);
16526 
16527 	/*
16528 	 * Blank check is not fatal for removable devices, therefore
16529 	 * it does not require a console message.
16530 	 */
16531 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
16532 	    SCSI_ERR_FATAL;
16533 	si.ssi_pfa_flag = FALSE;
16534 
16535 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
16536 	sd_return_failed_command(un, bp, EIO);
16537 }
16538 
16539 
16540 
16541 
16542 /*
16543  *    Function: sd_sense_key_aborted_command
16544  *
16545  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
16546  *
16547  *     Context: May be called from interrupt context
16548  */
16549 
16550 static void
16551 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
16552 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16553 {
16554 	struct sd_sense_info	si;
16555 
16556 	ASSERT(un != NULL);
16557 	ASSERT(mutex_owned(SD_MUTEX(un)));
16558 	ASSERT(bp != NULL);
16559 	ASSERT(xp != NULL);
16560 	ASSERT(pktp != NULL);
16561 
16562 	si.ssi_severity = SCSI_ERR_FATAL;
16563 	si.ssi_pfa_flag = FALSE;
16564 
16565 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16566 
16567 	/*
16568 	 * This really ought to be a fatal error, but we will retry anyway
16569 	 * as some drives report this as a spurious error.
16570 	 */
16571 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
16572 	    &si, EIO, drv_usectohz(100000), NULL);
16573 }
16574 
16575 
16576 
16577 /*
16578  *    Function: sd_sense_key_default
16579  *
16580  * Description: Default recovery action for several SCSI sense keys (basically
16581  *		attempts a retry).
16582  *
16583  *     Context: May be called from interrupt context
16584  */
16585 
16586 static void
16587 sd_sense_key_default(struct sd_lun *un,
16588 	uint8_t *sense_datap,
16589 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
16590 {
16591 	struct sd_sense_info	si;
16592 	uint8_t sense_key = scsi_sense_key(sense_datap);
16593 
16594 	ASSERT(un != NULL);
16595 	ASSERT(mutex_owned(SD_MUTEX(un)));
16596 	ASSERT(bp != NULL);
16597 	ASSERT(xp != NULL);
16598 	ASSERT(pktp != NULL);
16599 
16600 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16601 
16602 	/*
16603 	 * Undecoded sense key.	Attempt retries and hope that will fix
16604 	 * the problem.  Otherwise, we're dead.
16605 	 */
16606 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16607 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16608 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
16609 	}
16610 
16611 	si.ssi_severity = SCSI_ERR_FATAL;
16612 	si.ssi_pfa_flag = FALSE;
16613 
16614 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
16615 	    &si, EIO, (clock_t)0, NULL);
16616 }
16617 
16618 
16619 
16620 /*
16621  *    Function: sd_print_retry_msg
16622  *
16623  * Description: Print a message indicating the retry action being taken.
16624  *
16625  *   Arguments: un - ptr to associated softstate
16626  *		bp - ptr to buf(9S) for the command
16627  *		arg - not used.
16628  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16629  *			or SD_NO_RETRY_ISSUED
16630  *
16631  *     Context: May be called from interrupt context
16632  */
16633 /* ARGSUSED */
16634 static void
16635 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
16636 {
16637 	struct sd_xbuf	*xp;
16638 	struct scsi_pkt *pktp;
16639 	char *reasonp;
16640 	char *msgp;
16641 
16642 	ASSERT(un != NULL);
16643 	ASSERT(mutex_owned(SD_MUTEX(un)));
16644 	ASSERT(bp != NULL);
16645 	pktp = SD_GET_PKTP(bp);
16646 	ASSERT(pktp != NULL);
16647 	xp = SD_GET_XBUF(bp);
16648 	ASSERT(xp != NULL);
16649 
16650 	ASSERT(!mutex_owned(&un->un_pm_mutex));
16651 	mutex_enter(&un->un_pm_mutex);
16652 	if ((un->un_state == SD_STATE_SUSPENDED) ||
16653 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
16654 	    (pktp->pkt_flags & FLAG_SILENT)) {
16655 		mutex_exit(&un->un_pm_mutex);
16656 		goto update_pkt_reason;
16657 	}
16658 	mutex_exit(&un->un_pm_mutex);
16659 
16660 	/*
16661 	 * Suppress messages if they are all the same pkt_reason; with
16662 	 * TQ, many (up to 256) are returned with the same pkt_reason.
16663 	 * If we are in panic, then suppress the retry messages.
16664 	 */
16665 	switch (flag) {
16666 	case SD_NO_RETRY_ISSUED:
16667 		msgp = "giving up";
16668 		break;
16669 	case SD_IMMEDIATE_RETRY_ISSUED:
16670 	case SD_DELAYED_RETRY_ISSUED:
16671 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
16672 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
16673 		    (sd_error_level != SCSI_ERR_ALL))) {
16674 			return;
16675 		}
16676 		msgp = "retrying command";
16677 		break;
16678 	default:
16679 		goto update_pkt_reason;
16680 	}
16681 
16682 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
16683 	    scsi_rname(pktp->pkt_reason));
16684 
16685 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16686 	    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
16687 
16688 update_pkt_reason:
16689 	/*
16690 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
16691 	 * This is to prevent multiple console messages for the same failure
16692 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
16693 	 * when the command is retried successfully because there still may be
16694 	 * more commands coming back with the same value of pktp->pkt_reason.
16695 	 */
16696 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
16697 		un->un_last_pkt_reason = pktp->pkt_reason;
16698 	}
16699 }
16700 
16701 
16702 /*
16703  *    Function: sd_print_cmd_incomplete_msg
16704  *
16705  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
16706  *
16707  *   Arguments: un - ptr to associated softstate
16708  *		bp - ptr to buf(9S) for the command
16709  *		arg - passed to sd_print_retry_msg()
16710  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16711  *			or SD_NO_RETRY_ISSUED
16712  *
16713  *     Context: May be called from interrupt context
16714  */
16715 
16716 static void
16717 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
16718 	int code)
16719 {
16720 	dev_info_t	*dip;
16721 
16722 	ASSERT(un != NULL);
16723 	ASSERT(mutex_owned(SD_MUTEX(un)));
16724 	ASSERT(bp != NULL);
16725 
16726 	switch (code) {
16727 	case SD_NO_RETRY_ISSUED:
16728 		/* Command was failed. Someone turned off this target? */
16729 		if (un->un_state != SD_STATE_OFFLINE) {
16730 			/*
16731 			 * Suppress message if we are detaching and
16732 			 * device has been disconnected
16733 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
16734 			 * private interface and not part of the DDI
16735 			 */
16736 			dip = un->un_sd->sd_dev;
16737 			if (!(DEVI_IS_DETACHING(dip) &&
16738 			    DEVI_IS_DEVICE_REMOVED(dip))) {
16739 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16740 				"disk not responding to selection\n");
16741 			}
16742 			New_state(un, SD_STATE_OFFLINE);
16743 		}
16744 		break;
16745 
16746 	case SD_DELAYED_RETRY_ISSUED:
16747 	case SD_IMMEDIATE_RETRY_ISSUED:
16748 	default:
16749 		/* Command was successfully queued for retry */
16750 		sd_print_retry_msg(un, bp, arg, code);
16751 		break;
16752 	}
16753 }
16754 
16755 
16756 /*
16757  *    Function: sd_pkt_reason_cmd_incomplete
16758  *
16759  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
16760  *
16761  *     Context: May be called from interrupt context
16762  */
16763 
16764 static void
16765 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
16766 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16767 {
16768 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
16769 
16770 	ASSERT(un != NULL);
16771 	ASSERT(mutex_owned(SD_MUTEX(un)));
16772 	ASSERT(bp != NULL);
16773 	ASSERT(xp != NULL);
16774 	ASSERT(pktp != NULL);
16775 
16776 	/* Do not do a reset if selection did not complete */
16777 	/* Note: Should this not just check the bit? */
16778 	if (pktp->pkt_state != STATE_GOT_BUS) {
16779 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
16780 		sd_reset_target(un, pktp);
16781 	}
16782 
16783 	/*
16784 	 * If the target was not successfully selected, then set
16785 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
16786 	 * with the target, and further retries and/or commands are
16787 	 * likely to take a long time.
16788 	 */
16789 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
16790 		flag |= SD_RETRIES_FAILFAST;
16791 	}
16792 
16793 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16794 
16795 	sd_retry_command(un, bp, flag,
16796 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16797 }
16798 
16799 
16800 
16801 /*
16802  *    Function: sd_pkt_reason_cmd_tran_err
16803  *
16804  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
16805  *
16806  *     Context: May be called from interrupt context
16807  */
16808 
16809 static void
16810 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
16811 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16812 {
16813 	ASSERT(un != NULL);
16814 	ASSERT(mutex_owned(SD_MUTEX(un)));
16815 	ASSERT(bp != NULL);
16816 	ASSERT(xp != NULL);
16817 	ASSERT(pktp != NULL);
16818 
16819 	/*
16820 	 * Do not reset if we got a parity error, or if
16821 	 * selection did not complete.
16822 	 */
16823 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16824 	/* Note: Should this not just check the bit for pkt_state? */
16825 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
16826 	    (pktp->pkt_state != STATE_GOT_BUS)) {
16827 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
16828 		sd_reset_target(un, pktp);
16829 	}
16830 
16831 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16832 
16833 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
16834 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16835 }
16836 
16837 
16838 
16839 /*
16840  *    Function: sd_pkt_reason_cmd_reset
16841  *
16842  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
16843  *
16844  *     Context: May be called from interrupt context
16845  */
16846 
16847 static void
16848 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
16849 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16850 {
16851 	ASSERT(un != NULL);
16852 	ASSERT(mutex_owned(SD_MUTEX(un)));
16853 	ASSERT(bp != NULL);
16854 	ASSERT(xp != NULL);
16855 	ASSERT(pktp != NULL);
16856 
16857 	/* The target may still be running the command, so try to reset. */
16858 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16859 	sd_reset_target(un, pktp);
16860 
16861 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16862 
16863 	/*
16864 	 * If pkt_reason is CMD_RESET chances are that this pkt got
16865 	 * reset because another target on this bus caused it. The target
16866 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
16867 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
16868 	 */
16869 
16870 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
16871 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16872 }
16873 
16874 
16875 
16876 
16877 /*
16878  *    Function: sd_pkt_reason_cmd_aborted
16879  *
16880  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
16881  *
16882  *     Context: May be called from interrupt context
16883  */
16884 
16885 static void
16886 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
16887 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16888 {
16889 	ASSERT(un != NULL);
16890 	ASSERT(mutex_owned(SD_MUTEX(un)));
16891 	ASSERT(bp != NULL);
16892 	ASSERT(xp != NULL);
16893 	ASSERT(pktp != NULL);
16894 
16895 	/* The target may still be running the command, so try to reset. */
16896 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16897 	sd_reset_target(un, pktp);
16898 
16899 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16900 
16901 	/*
16902 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
16903 	 * aborted because another target on this bus caused it. The target
16904 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
16905 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
16906 	 */
16907 
16908 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
16909 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16910 }
16911 
16912 
16913 
16914 /*
16915  *    Function: sd_pkt_reason_cmd_timeout
16916  *
16917  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
16918  *
16919  *     Context: May be called from interrupt context
16920  */
16921 
16922 static void
16923 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
16924 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16925 {
16926 	ASSERT(un != NULL);
16927 	ASSERT(mutex_owned(SD_MUTEX(un)));
16928 	ASSERT(bp != NULL);
16929 	ASSERT(xp != NULL);
16930 	ASSERT(pktp != NULL);
16931 
16932 
16933 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
16934 	sd_reset_target(un, pktp);
16935 
16936 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16937 
16938 	/*
16939 	 * A command timeout indicates that we could not establish
16940 	 * communication with the target, so set SD_RETRIES_FAILFAST
16941 	 * as further retries/commands are likely to take a long time.
16942 	 */
16943 	sd_retry_command(un, bp,
16944 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
16945 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16946 }
16947 
16948 
16949 
16950 /*
16951  *    Function: sd_pkt_reason_cmd_unx_bus_free
16952  *
16953  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
16954  *
16955  *     Context: May be called from interrupt context
16956  */
16957 
16958 static void
16959 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
16960 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16961 {
16962 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
16963 
16964 	ASSERT(un != NULL);
16965 	ASSERT(mutex_owned(SD_MUTEX(un)));
16966 	ASSERT(bp != NULL);
16967 	ASSERT(xp != NULL);
16968 	ASSERT(pktp != NULL);
16969 
16970 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16971 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
16972 
16973 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
16974 	    sd_print_retry_msg : NULL;
16975 
16976 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
16977 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
16978 }
16979 
16980 
16981 /*
16982  *    Function: sd_pkt_reason_cmd_tag_reject
16983  *
16984  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
16985  *
16986  *     Context: May be called from interrupt context
16987  */
16988 
16989 static void
16990 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
16991 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16992 {
16993 	ASSERT(un != NULL);
16994 	ASSERT(mutex_owned(SD_MUTEX(un)));
16995 	ASSERT(bp != NULL);
16996 	ASSERT(xp != NULL);
16997 	ASSERT(pktp != NULL);
16998 
16999 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17000 	pktp->pkt_flags = 0;
17001 	un->un_tagflags = 0;
17002 	if (un->un_f_opt_queueing == TRUE) {
17003 		un->un_throttle = min(un->un_throttle, 3);
17004 	} else {
17005 		un->un_throttle = 1;
17006 	}
17007 	mutex_exit(SD_MUTEX(un));
17008 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
17009 	mutex_enter(SD_MUTEX(un));
17010 
17011 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
17012 
17013 	/* Legacy behavior not to check retry counts here. */
17014 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
17015 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
17016 }
17017 
17018 
17019 /*
17020  *    Function: sd_pkt_reason_default
17021  *
17022  * Description: Default recovery actions for SCSA pkt_reason values that
17023  *		do not have more explicit recovery actions.
17024  *
17025  *     Context: May be called from interrupt context
17026  */
17027 
17028 static void
17029 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
17030 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17031 {
17032 	ASSERT(un != NULL);
17033 	ASSERT(mutex_owned(SD_MUTEX(un)));
17034 	ASSERT(bp != NULL);
17035 	ASSERT(xp != NULL);
17036 	ASSERT(pktp != NULL);
17037 
17038 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
17039 	sd_reset_target(un, pktp);
17040 
17041 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
17042 
17043 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
17044 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
17045 }
17046 
17047 
17048 
17049 /*
17050  *    Function: sd_pkt_status_check_condition
17051  *
17052  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
17053  *
17054  *     Context: May be called from interrupt context
17055  */
17056 
17057 static void
17058 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
17059 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17060 {
17061 	ASSERT(un != NULL);
17062 	ASSERT(mutex_owned(SD_MUTEX(un)));
17063 	ASSERT(bp != NULL);
17064 	ASSERT(xp != NULL);
17065 	ASSERT(pktp != NULL);
17066 
17067 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
17068 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
17069 
17070 	/*
17071 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
17072 	 * command will be retried after the request sense). Otherwise, retry
17073 	 * the command. Note: we are issuing the request sense even though the
17074 	 * retry limit may have been reached for the failed command.
17075 	 */
17076 	if (un->un_f_arq_enabled == FALSE) {
17077 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
17078 		    "no ARQ, sending request sense command\n");
17079 		sd_send_request_sense_command(un, bp, pktp);
17080 	} else {
17081 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
17082 		    "ARQ,retrying request sense command\n");
17083 #if defined(__i386) || defined(__amd64)
17084 		/*
17085 		 * The SD_RETRY_DELAY value need to be adjusted here
17086 		 * when SD_RETRY_DELAY change in sddef.h
17087 		 */
17088 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
17089 		    un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
17090 		    NULL);
17091 #else
17092 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
17093 		    EIO, SD_RETRY_DELAY, NULL);
17094 #endif
17095 	}
17096 
17097 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
17098 }
17099 
17100 
17101 /*
17102  *    Function: sd_pkt_status_busy
17103  *
17104  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
17105  *
17106  *     Context: May be called from interrupt context
17107  */
17108 
17109 static void
17110 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17111 	struct scsi_pkt *pktp)
17112 {
17113 	ASSERT(un != NULL);
17114 	ASSERT(mutex_owned(SD_MUTEX(un)));
17115 	ASSERT(bp != NULL);
17116 	ASSERT(xp != NULL);
17117 	ASSERT(pktp != NULL);
17118 
17119 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17120 	    "sd_pkt_status_busy: entry\n");
17121 
17122 	/* If retries are exhausted, just fail the command. */
17123 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
17124 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17125 		    "device busy too long\n");
17126 		sd_return_failed_command(un, bp, EIO);
17127 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17128 		    "sd_pkt_status_busy: exit\n");
17129 		return;
17130 	}
17131 	xp->xb_retry_count++;
17132 
17133 	/*
17134 	 * Try to reset the target. However, we do not want to perform
17135 	 * more than one reset if the device continues to fail. The reset
17136 	 * will be performed when the retry count reaches the reset
17137 	 * threshold.  This threshold should be set such that at least
17138 	 * one retry is issued before the reset is performed.
17139 	 */
17140 	if (xp->xb_retry_count ==
17141 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
17142 		int rval = 0;
17143 		mutex_exit(SD_MUTEX(un));
17144 		if (un->un_f_allow_bus_device_reset == TRUE) {
17145 			/*
17146 			 * First try to reset the LUN; if we cannot then
17147 			 * try to reset the target.
17148 			 */
17149 			if (un->un_f_lun_reset_enabled == TRUE) {
17150 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17151 				    "sd_pkt_status_busy: RESET_LUN\n");
17152 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
17153 			}
17154 			if (rval == 0) {
17155 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17156 				    "sd_pkt_status_busy: RESET_TARGET\n");
17157 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
17158 			}
17159 		}
17160 		if (rval == 0) {
17161 			/*
17162 			 * If the RESET_LUN and/or RESET_TARGET failed,
17163 			 * try RESET_ALL
17164 			 */
17165 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17166 			    "sd_pkt_status_busy: RESET_ALL\n");
17167 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
17168 		}
17169 		mutex_enter(SD_MUTEX(un));
17170 		if (rval == 0) {
17171 			/*
17172 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
17173 			 * At this point we give up & fail the command.
17174 			 */
17175 			sd_return_failed_command(un, bp, EIO);
17176 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17177 			    "sd_pkt_status_busy: exit (failed cmd)\n");
17178 			return;
17179 		}
17180 	}
17181 
17182 	/*
17183 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
17184 	 * we have already checked the retry counts above.
17185 	 */
17186 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
17187 	    EIO, SD_BSY_TIMEOUT, NULL);
17188 
17189 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17190 	    "sd_pkt_status_busy: exit\n");
17191 }
17192 
17193 
17194 /*
17195  *    Function: sd_pkt_status_reservation_conflict
17196  *
17197  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
17198  *		command status.
17199  *
17200  *     Context: May be called from interrupt context
17201  */
17202 
17203 static void
17204 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
17205 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17206 {
17207 	ASSERT(un != NULL);
17208 	ASSERT(mutex_owned(SD_MUTEX(un)));
17209 	ASSERT(bp != NULL);
17210 	ASSERT(xp != NULL);
17211 	ASSERT(pktp != NULL);
17212 
17213 	/*
17214 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
17215 	 * conflict could be due to various reasons like incorrect keys, not
17216 	 * registered or not reserved etc. So, we return EACCES to the caller.
17217 	 */
17218 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
17219 		int cmd = SD_GET_PKT_OPCODE(pktp);
17220 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
17221 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
17222 			sd_return_failed_command(un, bp, EACCES);
17223 			return;
17224 		}
17225 	}
17226 
17227 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
17228 
17229 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
17230 		if (sd_failfast_enable != 0) {
17231 			/* By definition, we must panic here.... */
17232 			sd_panic_for_res_conflict(un);
17233 			/*NOTREACHED*/
17234 		}
17235 		SD_ERROR(SD_LOG_IO, un,
17236 		    "sd_handle_resv_conflict: Disk Reserved\n");
17237 		sd_return_failed_command(un, bp, EACCES);
17238 		return;
17239 	}
17240 
17241 	/*
17242 	 * 1147670: retry only if sd_retry_on_reservation_conflict
17243 	 * property is set (default is 1). Retries will not succeed
17244 	 * on a disk reserved by another initiator. HA systems
17245 	 * may reset this via sd.conf to avoid these retries.
17246 	 *
17247 	 * Note: The legacy return code for this failure is EIO, however EACCES
17248 	 * seems more appropriate for a reservation conflict.
17249 	 */
17250 	if (sd_retry_on_reservation_conflict == 0) {
17251 		SD_ERROR(SD_LOG_IO, un,
17252 		    "sd_handle_resv_conflict: Device Reserved\n");
17253 		sd_return_failed_command(un, bp, EIO);
17254 		return;
17255 	}
17256 
17257 	/*
17258 	 * Retry the command if we can.
17259 	 *
17260 	 * Note: The legacy return code for this failure is EIO, however EACCES
17261 	 * seems more appropriate for a reservation conflict.
17262 	 */
17263 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
17264 	    (clock_t)2, NULL);
17265 }
17266 
17267 
17268 
17269 /*
17270  *    Function: sd_pkt_status_qfull
17271  *
17272  * Description: Handle a QUEUE FULL condition from the target.  This can
17273  *		occur if the HBA does not handle the queue full condition.
17274  *		(Basically this means third-party HBAs as Sun HBAs will
17275  *		handle the queue full condition.)  Note that if there are
17276  *		some commands already in the transport, then the queue full
17277  *		has occurred because the queue for this nexus is actually
17278  *		full. If there are no commands in the transport, then the
17279  *		queue full is resulting from some other initiator or lun
17280  *		consuming all the resources at the target.
17281  *
17282  *     Context: May be called from interrupt context
17283  */
17284 
17285 static void
17286 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
17287 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17288 {
17289 	ASSERT(un != NULL);
17290 	ASSERT(mutex_owned(SD_MUTEX(un)));
17291 	ASSERT(bp != NULL);
17292 	ASSERT(xp != NULL);
17293 	ASSERT(pktp != NULL);
17294 
17295 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17296 	    "sd_pkt_status_qfull: entry\n");
17297 
17298 	/*
17299 	 * Just lower the QFULL throttle and retry the command.  Note that
17300 	 * we do not limit the number of retries here.
17301 	 */
17302 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
17303 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
17304 	    SD_RESTART_TIMEOUT, NULL);
17305 
17306 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17307 	    "sd_pkt_status_qfull: exit\n");
17308 }
17309 
17310 
17311 /*
17312  *    Function: sd_reset_target
17313  *
17314  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
17315  *		RESET_TARGET, or RESET_ALL.
17316  *
17317  *     Context: May be called under interrupt context.
17318  */
17319 
17320 static void
17321 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
17322 {
17323 	int rval = 0;
17324 
17325 	ASSERT(un != NULL);
17326 	ASSERT(mutex_owned(SD_MUTEX(un)));
17327 	ASSERT(pktp != NULL);
17328 
17329 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
17330 
17331 	/*
17332 	 * No need to reset if the transport layer has already done so.
17333 	 */
17334 	if ((pktp->pkt_statistics &
17335 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
17336 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17337 		    "sd_reset_target: no reset\n");
17338 		return;
17339 	}
17340 
17341 	mutex_exit(SD_MUTEX(un));
17342 
17343 	if (un->un_f_allow_bus_device_reset == TRUE) {
17344 		if (un->un_f_lun_reset_enabled == TRUE) {
17345 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17346 			    "sd_reset_target: RESET_LUN\n");
17347 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
17348 		}
17349 		if (rval == 0) {
17350 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17351 			    "sd_reset_target: RESET_TARGET\n");
17352 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
17353 		}
17354 	}
17355 
17356 	if (rval == 0) {
17357 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17358 		    "sd_reset_target: RESET_ALL\n");
17359 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
17360 	}
17361 
17362 	mutex_enter(SD_MUTEX(un));
17363 
17364 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
17365 }
17366 
17367 
17368 /*
17369  *    Function: sd_media_change_task
17370  *
17371  * Description: Recovery action for CDROM to become available.
17372  *
17373  *     Context: Executes in a taskq() thread context
17374  */
17375 
17376 static void
17377 sd_media_change_task(void *arg)
17378 {
17379 	struct	scsi_pkt	*pktp = arg;
17380 	struct	sd_lun		*un;
17381 	struct	buf		*bp;
17382 	struct	sd_xbuf		*xp;
17383 	int	err		= 0;
17384 	int	retry_count	= 0;
17385 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
17386 	struct	sd_sense_info	si;
17387 
17388 	ASSERT(pktp != NULL);
17389 	bp = (struct buf *)pktp->pkt_private;
17390 	ASSERT(bp != NULL);
17391 	xp = SD_GET_XBUF(bp);
17392 	ASSERT(xp != NULL);
17393 	un = SD_GET_UN(bp);
17394 	ASSERT(un != NULL);
17395 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17396 	ASSERT(un->un_f_monitor_media_state);
17397 
17398 	si.ssi_severity = SCSI_ERR_INFO;
17399 	si.ssi_pfa_flag = FALSE;
17400 
17401 	/*
17402 	 * When a reset is issued on a CDROM, it takes a long time to
17403 	 * recover. First few attempts to read capacity and other things
17404 	 * related to handling unit attention fail (with a ASC 0x4 and
17405 	 * ASCQ 0x1). In that case we want to do enough retries and we want
17406 	 * to limit the retries in other cases of genuine failures like
17407 	 * no media in drive.
17408 	 */
17409 	while (retry_count++ < retry_limit) {
17410 		if ((err = sd_handle_mchange(un)) == 0) {
17411 			break;
17412 		}
17413 		if (err == EAGAIN) {
17414 			retry_limit = SD_UNIT_ATTENTION_RETRY;
17415 		}
17416 		/* Sleep for 0.5 sec. & try again */
17417 		delay(drv_usectohz(500000));
17418 	}
17419 
17420 	/*
17421 	 * Dispatch (retry or fail) the original command here,
17422 	 * along with appropriate console messages....
17423 	 *
17424 	 * Must grab the mutex before calling sd_retry_command,
17425 	 * sd_print_sense_msg and sd_return_failed_command.
17426 	 */
17427 	mutex_enter(SD_MUTEX(un));
17428 	if (err != SD_CMD_SUCCESS) {
17429 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17430 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17431 		si.ssi_severity = SCSI_ERR_FATAL;
17432 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17433 		sd_return_failed_command(un, bp, EIO);
17434 	} else {
17435 		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17436 		    &si, EIO, (clock_t)0, NULL);
17437 	}
17438 	mutex_exit(SD_MUTEX(un));
17439 }
17440 
17441 
17442 
17443 /*
17444  *    Function: sd_handle_mchange
17445  *
17446  * Description: Perform geometry validation & other recovery when CDROM
17447  *		has been removed from drive.
17448  *
17449  * Return Code: 0 for success
17450  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
17451  *		sd_send_scsi_READ_CAPACITY()
17452  *
17453  *     Context: Executes in a taskq() thread context
17454  */
17455 
17456 static int
17457 sd_handle_mchange(struct sd_lun *un)
17458 {
17459 	uint64_t	capacity;
17460 	uint32_t	lbasize;
17461 	int		rval;
17462 
17463 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17464 	ASSERT(un->un_f_monitor_media_state);
17465 
17466 	if ((rval = sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
17467 	    SD_PATH_DIRECT_PRIORITY)) != 0) {
17468 		return (rval);
17469 	}
17470 
17471 	mutex_enter(SD_MUTEX(un));
17472 	sd_update_block_info(un, lbasize, capacity);
17473 
17474 	if (un->un_errstats != NULL) {
17475 		struct	sd_errstats *stp =
17476 		    (struct sd_errstats *)un->un_errstats->ks_data;
17477 		stp->sd_capacity.value.ui64 = (uint64_t)
17478 		    ((uint64_t)un->un_blockcount *
17479 		    (uint64_t)un->un_tgt_blocksize);
17480 	}
17481 
17482 
17483 	/*
17484 	 * Check if the media in the device is writable or not
17485 	 */
17486 	if (ISCD(un))
17487 		sd_check_for_writable_cd(un, SD_PATH_DIRECT_PRIORITY);
17488 
17489 	/*
17490 	 * Note: Maybe let the strategy/partitioning chain worry about getting
17491 	 * valid geometry.
17492 	 */
17493 	mutex_exit(SD_MUTEX(un));
17494 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
17495 
17496 
17497 	if (cmlb_validate(un->un_cmlbhandle, 0,
17498 	    (void *)SD_PATH_DIRECT_PRIORITY) != 0) {
17499 		return (EIO);
17500 	} else {
17501 		if (un->un_f_pkstats_enabled) {
17502 			sd_set_pstats(un);
17503 			SD_TRACE(SD_LOG_IO_PARTITION, un,
17504 			    "sd_handle_mchange: un:0x%p pstats created and "
17505 			    "set\n", un);
17506 		}
17507 	}
17508 
17509 
17510 	/*
17511 	 * Try to lock the door
17512 	 */
17513 	return (sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
17514 	    SD_PATH_DIRECT_PRIORITY));
17515 }
17516 
17517 
17518 /*
17519  *    Function: sd_send_scsi_DOORLOCK
17520  *
17521  * Description: Issue the scsi DOOR LOCK command
17522  *
17523  *   Arguments: un    - pointer to driver soft state (unit) structure for
17524  *			this target.
17525  *		flag  - SD_REMOVAL_ALLOW
17526  *			SD_REMOVAL_PREVENT
17527  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17528  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17529  *			to use the USCSI "direct" chain and bypass the normal
17530  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
17531  *			command is issued as part of an error recovery action.
17532  *
17533  * Return Code: 0   - Success
17534  *		errno return code from sd_send_scsi_cmd()
17535  *
17536  *     Context: Can sleep.
17537  */
17538 
17539 static int
17540 sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag)
17541 {
17542 	union scsi_cdb		cdb;
17543 	struct uscsi_cmd	ucmd_buf;
17544 	struct scsi_extended_sense	sense_buf;
17545 	int			status;
17546 
17547 	ASSERT(un != NULL);
17548 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17549 
17550 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
17551 
17552 	/* already determined doorlock is not supported, fake success */
17553 	if (un->un_f_doorlock_supported == FALSE) {
17554 		return (0);
17555 	}
17556 
17557 	/*
17558 	 * If we are ejecting and see an SD_REMOVAL_PREVENT
17559 	 * ignore the command so we can complete the eject
17560 	 * operation.
17561 	 */
17562 	if (flag == SD_REMOVAL_PREVENT) {
17563 		mutex_enter(SD_MUTEX(un));
17564 		if (un->un_f_ejecting == TRUE) {
17565 			mutex_exit(SD_MUTEX(un));
17566 			return (EAGAIN);
17567 		}
17568 		mutex_exit(SD_MUTEX(un));
17569 	}
17570 
17571 	bzero(&cdb, sizeof (cdb));
17572 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17573 
17574 	cdb.scc_cmd = SCMD_DOORLOCK;
17575 	cdb.cdb_opaque[4] = (uchar_t)flag;
17576 
17577 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17578 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
17579 	ucmd_buf.uscsi_bufaddr	= NULL;
17580 	ucmd_buf.uscsi_buflen	= 0;
17581 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17582 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
17583 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
17584 	ucmd_buf.uscsi_timeout	= 15;
17585 
17586 	SD_TRACE(SD_LOG_IO, un,
17587 	    "sd_send_scsi_DOORLOCK: returning sd_send_scsi_cmd()\n");
17588 
17589 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17590 	    UIO_SYSSPACE, path_flag);
17591 
17592 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
17593 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
17594 	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
17595 		/* fake success and skip subsequent doorlock commands */
17596 		un->un_f_doorlock_supported = FALSE;
17597 		return (0);
17598 	}
17599 
17600 	return (status);
17601 }
17602 
17603 /*
17604  *    Function: sd_send_scsi_READ_CAPACITY
17605  *
17606  * Description: This routine uses the scsi READ CAPACITY command to determine
17607  *		the device capacity in number of blocks and the device native
17608  *		block size. If this function returns a failure, then the
17609  *		values in *capp and *lbap are undefined.  If the capacity
17610  *		returned is 0xffffffff then the lun is too large for a
17611  *		normal READ CAPACITY command and the results of a
17612  *		READ CAPACITY 16 will be used instead.
17613  *
17614  *   Arguments: un   - ptr to soft state struct for the target
17615  *		capp - ptr to unsigned 64-bit variable to receive the
17616  *			capacity value from the command.
17617  *		lbap - ptr to unsigned 32-bit varaible to receive the
17618  *			block size value from the command
17619  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17620  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17621  *			to use the USCSI "direct" chain and bypass the normal
17622  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
17623  *			command is issued as part of an error recovery action.
17624  *
17625  * Return Code: 0   - Success
17626  *		EIO - IO error
17627  *		EACCES - Reservation conflict detected
17628  *		EAGAIN - Device is becoming ready
17629  *		errno return code from sd_send_scsi_cmd()
17630  *
17631  *     Context: Can sleep.  Blocks until command completes.
17632  */
17633 
17634 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
17635 
17636 static int
17637 sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp, uint32_t *lbap,
17638 	int path_flag)
17639 {
17640 	struct	scsi_extended_sense	sense_buf;
17641 	struct	uscsi_cmd	ucmd_buf;
17642 	union	scsi_cdb	cdb;
17643 	uint32_t		*capacity_buf;
17644 	uint64_t		capacity;
17645 	uint32_t		lbasize;
17646 	int			status;
17647 
17648 	ASSERT(un != NULL);
17649 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17650 	ASSERT(capp != NULL);
17651 	ASSERT(lbap != NULL);
17652 
17653 	SD_TRACE(SD_LOG_IO, un,
17654 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
17655 
17656 	/*
17657 	 * First send a READ_CAPACITY command to the target.
17658 	 * (This command is mandatory under SCSI-2.)
17659 	 *
17660 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
17661 	 * Medium Indicator bit is cleared.  The address field must be
17662 	 * zero if the PMI bit is zero.
17663 	 */
17664 	bzero(&cdb, sizeof (cdb));
17665 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17666 
17667 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
17668 
17669 	cdb.scc_cmd = SCMD_READ_CAPACITY;
17670 
17671 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17672 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
17673 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
17674 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
17675 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17676 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
17677 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
17678 	ucmd_buf.uscsi_timeout	= 60;
17679 
17680 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17681 	    UIO_SYSSPACE, path_flag);
17682 
17683 	switch (status) {
17684 	case 0:
17685 		/* Return failure if we did not get valid capacity data. */
17686 		if (ucmd_buf.uscsi_resid != 0) {
17687 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17688 			return (EIO);
17689 		}
17690 
17691 		/*
17692 		 * Read capacity and block size from the READ CAPACITY 10 data.
17693 		 * This data may be adjusted later due to device specific
17694 		 * issues.
17695 		 *
17696 		 * According to the SCSI spec, the READ CAPACITY 10
17697 		 * command returns the following:
17698 		 *
17699 		 *  bytes 0-3: Maximum logical block address available.
17700 		 *		(MSB in byte:0 & LSB in byte:3)
17701 		 *
17702 		 *  bytes 4-7: Block length in bytes
17703 		 *		(MSB in byte:4 & LSB in byte:7)
17704 		 *
17705 		 */
17706 		capacity = BE_32(capacity_buf[0]);
17707 		lbasize = BE_32(capacity_buf[1]);
17708 
17709 		/*
17710 		 * Done with capacity_buf
17711 		 */
17712 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17713 
17714 		/*
17715 		 * if the reported capacity is set to all 0xf's, then
17716 		 * this disk is too large and requires SBC-2 commands.
17717 		 * Reissue the request using READ CAPACITY 16.
17718 		 */
17719 		if (capacity == 0xffffffff) {
17720 			status = sd_send_scsi_READ_CAPACITY_16(un, &capacity,
17721 			    &lbasize, path_flag);
17722 			if (status != 0) {
17723 				return (status);
17724 			}
17725 		}
17726 		break;	/* Success! */
17727 	case EIO:
17728 		switch (ucmd_buf.uscsi_status) {
17729 		case STATUS_RESERVATION_CONFLICT:
17730 			status = EACCES;
17731 			break;
17732 		case STATUS_CHECK:
17733 			/*
17734 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
17735 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
17736 			 */
17737 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
17738 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
17739 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
17740 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17741 				return (EAGAIN);
17742 			}
17743 			break;
17744 		default:
17745 			break;
17746 		}
17747 		/* FALLTHRU */
17748 	default:
17749 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
17750 		return (status);
17751 	}
17752 
17753 	/*
17754 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
17755 	 * (2352 and 0 are common) so for these devices always force the value
17756 	 * to 2048 as required by the ATAPI specs.
17757 	 */
17758 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
17759 		lbasize = 2048;
17760 	}
17761 
17762 	/*
17763 	 * Get the maximum LBA value from the READ CAPACITY data.
17764 	 * Here we assume that the Partial Medium Indicator (PMI) bit
17765 	 * was cleared when issuing the command. This means that the LBA
17766 	 * returned from the device is the LBA of the last logical block
17767 	 * on the logical unit.  The actual logical block count will be
17768 	 * this value plus one.
17769 	 *
17770 	 * Currently the capacity is saved in terms of un->un_sys_blocksize,
17771 	 * so scale the capacity value to reflect this.
17772 	 */
17773 	capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
17774 
17775 	/*
17776 	 * Copy the values from the READ CAPACITY command into the space
17777 	 * provided by the caller.
17778 	 */
17779 	*capp = capacity;
17780 	*lbap = lbasize;
17781 
17782 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
17783 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
17784 
17785 	/*
17786 	 * Both the lbasize and capacity from the device must be nonzero,
17787 	 * otherwise we assume that the values are not valid and return
17788 	 * failure to the caller. (4203735)
17789 	 */
17790 	if ((capacity == 0) || (lbasize == 0)) {
17791 		return (EIO);
17792 	}
17793 
17794 	return (0);
17795 }
17796 
17797 /*
17798  *    Function: sd_send_scsi_READ_CAPACITY_16
17799  *
17800  * Description: This routine uses the scsi READ CAPACITY 16 command to
17801  *		determine the device capacity in number of blocks and the
17802  *		device native block size.  If this function returns a failure,
17803  *		then the values in *capp and *lbap are undefined.
17804  *		This routine should always be called by
17805  *		sd_send_scsi_READ_CAPACITY which will appy any device
17806  *		specific adjustments to capacity and lbasize.
17807  *
17808  *   Arguments: un   - ptr to soft state struct for the target
17809  *		capp - ptr to unsigned 64-bit variable to receive the
17810  *			capacity value from the command.
17811  *		lbap - ptr to unsigned 32-bit varaible to receive the
17812  *			block size value from the command
17813  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17814  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17815  *			to use the USCSI "direct" chain and bypass the normal
17816  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
17817  *			this command is issued as part of an error recovery
17818  *			action.
17819  *
17820  * Return Code: 0   - Success
17821  *		EIO - IO error
17822  *		EACCES - Reservation conflict detected
17823  *		EAGAIN - Device is becoming ready
17824  *		errno return code from sd_send_scsi_cmd()
17825  *
17826  *     Context: Can sleep.  Blocks until command completes.
17827  */
17828 
17829 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
17830 
17831 static int
17832 sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
17833 	uint32_t *lbap, int path_flag)
17834 {
17835 	struct	scsi_extended_sense	sense_buf;
17836 	struct	uscsi_cmd	ucmd_buf;
17837 	union	scsi_cdb	cdb;
17838 	uint64_t		*capacity16_buf;
17839 	uint64_t		capacity;
17840 	uint32_t		lbasize;
17841 	int			status;
17842 
17843 	ASSERT(un != NULL);
17844 	ASSERT(!mutex_owned(SD_MUTEX(un)));
17845 	ASSERT(capp != NULL);
17846 	ASSERT(lbap != NULL);
17847 
17848 	SD_TRACE(SD_LOG_IO, un,
17849 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
17850 
17851 	/*
17852 	 * First send a READ_CAPACITY_16 command to the target.
17853 	 *
17854 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
17855 	 * Medium Indicator bit is cleared.  The address field must be
17856 	 * zero if the PMI bit is zero.
17857 	 */
17858 	bzero(&cdb, sizeof (cdb));
17859 	bzero(&ucmd_buf, sizeof (ucmd_buf));
17860 
17861 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
17862 
17863 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
17864 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
17865 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
17866 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
17867 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
17868 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
17869 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
17870 	ucmd_buf.uscsi_timeout	= 60;
17871 
17872 	/*
17873 	 * Read Capacity (16) is a Service Action In command.  One
17874 	 * command byte (0x9E) is overloaded for multiple operations,
17875 	 * with the second CDB byte specifying the desired operation
17876 	 */
17877 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
17878 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
17879 
17880 	/*
17881 	 * Fill in allocation length field
17882 	 */
17883 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
17884 
17885 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
17886 	    UIO_SYSSPACE, path_flag);
17887 
17888 	switch (status) {
17889 	case 0:
17890 		/* Return failure if we did not get valid capacity data. */
17891 		if (ucmd_buf.uscsi_resid > 20) {
17892 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17893 			return (EIO);
17894 		}
17895 
17896 		/*
17897 		 * Read capacity and block size from the READ CAPACITY 10 data.
17898 		 * This data may be adjusted later due to device specific
17899 		 * issues.
17900 		 *
17901 		 * According to the SCSI spec, the READ CAPACITY 10
17902 		 * command returns the following:
17903 		 *
17904 		 *  bytes 0-7: Maximum logical block address available.
17905 		 *		(MSB in byte:0 & LSB in byte:7)
17906 		 *
17907 		 *  bytes 8-11: Block length in bytes
17908 		 *		(MSB in byte:8 & LSB in byte:11)
17909 		 *
17910 		 */
17911 		capacity = BE_64(capacity16_buf[0]);
17912 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
17913 
17914 		/*
17915 		 * Done with capacity16_buf
17916 		 */
17917 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17918 
17919 		/*
17920 		 * if the reported capacity is set to all 0xf's, then
17921 		 * this disk is too large.  This could only happen with
17922 		 * a device that supports LBAs larger than 64 bits which
17923 		 * are not defined by any current T10 standards.
17924 		 */
17925 		if (capacity == 0xffffffffffffffff) {
17926 			return (EIO);
17927 		}
17928 		break;	/* Success! */
17929 	case EIO:
17930 		switch (ucmd_buf.uscsi_status) {
17931 		case STATUS_RESERVATION_CONFLICT:
17932 			status = EACCES;
17933 			break;
17934 		case STATUS_CHECK:
17935 			/*
17936 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
17937 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
17938 			 */
17939 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
17940 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
17941 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
17942 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17943 				return (EAGAIN);
17944 			}
17945 			break;
17946 		default:
17947 			break;
17948 		}
17949 		/* FALLTHRU */
17950 	default:
17951 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
17952 		return (status);
17953 	}
17954 
17955 	*capp = capacity;
17956 	*lbap = lbasize;
17957 
17958 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
17959 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
17960 
17961 	return (0);
17962 }
17963 
17964 
17965 /*
17966  *    Function: sd_send_scsi_START_STOP_UNIT
17967  *
17968  * Description: Issue a scsi START STOP UNIT command to the target.
17969  *
17970  *   Arguments: un    - pointer to driver soft state (unit) structure for
17971  *			this target.
17972  *		flag  - SD_TARGET_START
17973  *			SD_TARGET_STOP
17974  *			SD_TARGET_EJECT
17975  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
17976  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
17977  *			to use the USCSI "direct" chain and bypass the normal
17978  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
17979  *			command is issued as part of an error recovery action.
17980  *
17981  * Return Code: 0   - Success
17982  *		EIO - IO error
17983  *		EACCES - Reservation conflict detected
17984  *		ENXIO  - Not Ready, medium not present
17985  *		errno return code from sd_send_scsi_cmd()
17986  *
17987  *     Context: Can sleep.
17988  */
17989 
17990 static int
17991 sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag, int path_flag)
17992 {
17993 	struct	scsi_extended_sense	sense_buf;
17994 	union scsi_cdb		cdb;
17995 	struct uscsi_cmd	ucmd_buf;
17996 	int			status;
17997 
17998 	ASSERT(un != NULL);
17999 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18000 
18001 	SD_TRACE(SD_LOG_IO, un,
18002 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
18003 
18004 	if (un->un_f_check_start_stop &&
18005 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
18006 	    (un->un_f_start_stop_supported != TRUE)) {
18007 		return (0);
18008 	}
18009 
18010 	/*
18011 	 * If we are performing an eject operation and
18012 	 * we receive any command other than SD_TARGET_EJECT
18013 	 * we should immediately return.
18014 	 */
18015 	if (flag != SD_TARGET_EJECT) {
18016 		mutex_enter(SD_MUTEX(un));
18017 		if (un->un_f_ejecting == TRUE) {
18018 			mutex_exit(SD_MUTEX(un));
18019 			return (EAGAIN);
18020 		}
18021 		mutex_exit(SD_MUTEX(un));
18022 	}
18023 
18024 	bzero(&cdb, sizeof (cdb));
18025 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18026 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18027 
18028 	cdb.scc_cmd = SCMD_START_STOP;
18029 	cdb.cdb_opaque[4] = (uchar_t)flag;
18030 
18031 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18032 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
18033 	ucmd_buf.uscsi_bufaddr	= NULL;
18034 	ucmd_buf.uscsi_buflen	= 0;
18035 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18036 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18037 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
18038 	ucmd_buf.uscsi_timeout	= 200;
18039 
18040 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18041 	    UIO_SYSSPACE, path_flag);
18042 
18043 	switch (status) {
18044 	case 0:
18045 		break;	/* Success! */
18046 	case EIO:
18047 		switch (ucmd_buf.uscsi_status) {
18048 		case STATUS_RESERVATION_CONFLICT:
18049 			status = EACCES;
18050 			break;
18051 		case STATUS_CHECK:
18052 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
18053 				switch (scsi_sense_key(
18054 				    (uint8_t *)&sense_buf)) {
18055 				case KEY_ILLEGAL_REQUEST:
18056 					status = ENOTSUP;
18057 					break;
18058 				case KEY_NOT_READY:
18059 					if (scsi_sense_asc(
18060 					    (uint8_t *)&sense_buf)
18061 					    == 0x3A) {
18062 						status = ENXIO;
18063 					}
18064 					break;
18065 				default:
18066 					break;
18067 				}
18068 			}
18069 			break;
18070 		default:
18071 			break;
18072 		}
18073 		break;
18074 	default:
18075 		break;
18076 	}
18077 
18078 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
18079 
18080 	return (status);
18081 }
18082 
18083 
18084 /*
18085  *    Function: sd_start_stop_unit_callback
18086  *
18087  * Description: timeout(9F) callback to begin recovery process for a
18088  *		device that has spun down.
18089  *
18090  *   Arguments: arg - pointer to associated softstate struct.
18091  *
18092  *     Context: Executes in a timeout(9F) thread context
18093  */
18094 
18095 static void
18096 sd_start_stop_unit_callback(void *arg)
18097 {
18098 	struct sd_lun	*un = arg;
18099 	ASSERT(un != NULL);
18100 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18101 
18102 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
18103 
18104 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
18105 }
18106 
18107 
18108 /*
18109  *    Function: sd_start_stop_unit_task
18110  *
18111  * Description: Recovery procedure when a drive is spun down.
18112  *
18113  *   Arguments: arg - pointer to associated softstate struct.
18114  *
18115  *     Context: Executes in a taskq() thread context
18116  */
18117 
18118 static void
18119 sd_start_stop_unit_task(void *arg)
18120 {
18121 	struct sd_lun	*un = arg;
18122 
18123 	ASSERT(un != NULL);
18124 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18125 
18126 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
18127 
18128 	/*
18129 	 * Some unformatted drives report not ready error, no need to
18130 	 * restart if format has been initiated.
18131 	 */
18132 	mutex_enter(SD_MUTEX(un));
18133 	if (un->un_f_format_in_progress == TRUE) {
18134 		mutex_exit(SD_MUTEX(un));
18135 		return;
18136 	}
18137 	mutex_exit(SD_MUTEX(un));
18138 
18139 	/*
18140 	 * When a START STOP command is issued from here, it is part of a
18141 	 * failure recovery operation and must be issued before any other
18142 	 * commands, including any pending retries. Thus it must be sent
18143 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
18144 	 * succeeds or not, we will start I/O after the attempt.
18145 	 */
18146 	(void) sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
18147 	    SD_PATH_DIRECT_PRIORITY);
18148 
18149 	/*
18150 	 * The above call blocks until the START_STOP_UNIT command completes.
18151 	 * Now that it has completed, we must re-try the original IO that
18152 	 * received the NOT READY condition in the first place. There are
18153 	 * three possible conditions here:
18154 	 *
18155 	 *  (1) The original IO is on un_retry_bp.
18156 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
18157 	 *	is NULL.
18158 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
18159 	 *	points to some other, unrelated bp.
18160 	 *
18161 	 * For each case, we must call sd_start_cmds() with un_retry_bp
18162 	 * as the argument. If un_retry_bp is NULL, this will initiate
18163 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
18164 	 * then this will process the bp on un_retry_bp. That may or may not
18165 	 * be the original IO, but that does not matter: the important thing
18166 	 * is to keep the IO processing going at this point.
18167 	 *
18168 	 * Note: This is a very specific error recovery sequence associated
18169 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
18170 	 * serialize the I/O with completion of the spin-up.
18171 	 */
18172 	mutex_enter(SD_MUTEX(un));
18173 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18174 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
18175 	    un, un->un_retry_bp);
18176 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
18177 	sd_start_cmds(un, un->un_retry_bp);
18178 	mutex_exit(SD_MUTEX(un));
18179 
18180 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
18181 }
18182 
18183 
18184 /*
18185  *    Function: sd_send_scsi_INQUIRY
18186  *
18187  * Description: Issue the scsi INQUIRY command.
18188  *
18189  *   Arguments: un
18190  *		bufaddr
18191  *		buflen
18192  *		evpd
18193  *		page_code
18194  *		page_length
18195  *
18196  * Return Code: 0   - Success
18197  *		errno return code from sd_send_scsi_cmd()
18198  *
18199  *     Context: Can sleep. Does not return until command is completed.
18200  */
18201 
18202 static int
18203 sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr, size_t buflen,
18204 	uchar_t evpd, uchar_t page_code, size_t *residp)
18205 {
18206 	union scsi_cdb		cdb;
18207 	struct uscsi_cmd	ucmd_buf;
18208 	int			status;
18209 
18210 	ASSERT(un != NULL);
18211 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18212 	ASSERT(bufaddr != NULL);
18213 
18214 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
18215 
18216 	bzero(&cdb, sizeof (cdb));
18217 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18218 	bzero(bufaddr, buflen);
18219 
18220 	cdb.scc_cmd = SCMD_INQUIRY;
18221 	cdb.cdb_opaque[1] = evpd;
18222 	cdb.cdb_opaque[2] = page_code;
18223 	FORMG0COUNT(&cdb, buflen);
18224 
18225 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18226 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
18227 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
18228 	ucmd_buf.uscsi_buflen	= buflen;
18229 	ucmd_buf.uscsi_rqbuf	= NULL;
18230 	ucmd_buf.uscsi_rqlen	= 0;
18231 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
18232 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
18233 
18234 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18235 	    UIO_SYSSPACE, SD_PATH_DIRECT);
18236 
18237 	if ((status == 0) && (residp != NULL)) {
18238 		*residp = ucmd_buf.uscsi_resid;
18239 	}
18240 
18241 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
18242 
18243 	return (status);
18244 }
18245 
18246 
18247 /*
18248  *    Function: sd_send_scsi_TEST_UNIT_READY
18249  *
18250  * Description: Issue the scsi TEST UNIT READY command.
18251  *		This routine can be told to set the flag USCSI_DIAGNOSE to
18252  *		prevent retrying failed commands. Use this when the intent
18253  *		is either to check for device readiness, to clear a Unit
18254  *		Attention, or to clear any outstanding sense data.
18255  *		However under specific conditions the expected behavior
18256  *		is for retries to bring a device ready, so use the flag
18257  *		with caution.
18258  *
18259  *   Arguments: un
18260  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
18261  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
18262  *			0: dont check for media present, do retries on cmd.
18263  *
18264  * Return Code: 0   - Success
18265  *		EIO - IO error
18266  *		EACCES - Reservation conflict detected
18267  *		ENXIO  - Not Ready, medium not present
18268  *		errno return code from sd_send_scsi_cmd()
18269  *
18270  *     Context: Can sleep. Does not return until command is completed.
18271  */
18272 
18273 static int
18274 sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag)
18275 {
18276 	struct	scsi_extended_sense	sense_buf;
18277 	union scsi_cdb		cdb;
18278 	struct uscsi_cmd	ucmd_buf;
18279 	int			status;
18280 
18281 	ASSERT(un != NULL);
18282 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18283 
18284 	SD_TRACE(SD_LOG_IO, un,
18285 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
18286 
18287 	/*
18288 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
18289 	 * timeouts when they receive a TUR and the queue is not empty. Check
18290 	 * the configuration flag set during attach (indicating the drive has
18291 	 * this firmware bug) and un_ncmds_in_transport before issuing the
18292 	 * TUR. If there are
18293 	 * pending commands return success, this is a bit arbitrary but is ok
18294 	 * for non-removables (i.e. the eliteI disks) and non-clustering
18295 	 * configurations.
18296 	 */
18297 	if (un->un_f_cfg_tur_check == TRUE) {
18298 		mutex_enter(SD_MUTEX(un));
18299 		if (un->un_ncmds_in_transport != 0) {
18300 			mutex_exit(SD_MUTEX(un));
18301 			return (0);
18302 		}
18303 		mutex_exit(SD_MUTEX(un));
18304 	}
18305 
18306 	bzero(&cdb, sizeof (cdb));
18307 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18308 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18309 
18310 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
18311 
18312 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18313 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
18314 	ucmd_buf.uscsi_bufaddr	= NULL;
18315 	ucmd_buf.uscsi_buflen	= 0;
18316 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18317 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18318 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
18319 
18320 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
18321 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
18322 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
18323 	}
18324 	ucmd_buf.uscsi_timeout	= 60;
18325 
18326 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18327 	    UIO_SYSSPACE, ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT :
18328 	    SD_PATH_STANDARD));
18329 
18330 	switch (status) {
18331 	case 0:
18332 		break;	/* Success! */
18333 	case EIO:
18334 		switch (ucmd_buf.uscsi_status) {
18335 		case STATUS_RESERVATION_CONFLICT:
18336 			status = EACCES;
18337 			break;
18338 		case STATUS_CHECK:
18339 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
18340 				break;
18341 			}
18342 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18343 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
18344 			    KEY_NOT_READY) &&
18345 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
18346 				status = ENXIO;
18347 			}
18348 			break;
18349 		default:
18350 			break;
18351 		}
18352 		break;
18353 	default:
18354 		break;
18355 	}
18356 
18357 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
18358 
18359 	return (status);
18360 }
18361 
18362 
18363 /*
18364  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
18365  *
18366  * Description: Issue the scsi PERSISTENT RESERVE IN command.
18367  *
18368  *   Arguments: un
18369  *
18370  * Return Code: 0   - Success
18371  *		EACCES
18372  *		ENOTSUP
18373  *		errno return code from sd_send_scsi_cmd()
18374  *
18375  *     Context: Can sleep. Does not return until command is completed.
18376  */
18377 
18378 static int
18379 sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un, uchar_t  usr_cmd,
18380 	uint16_t data_len, uchar_t *data_bufp)
18381 {
18382 	struct scsi_extended_sense	sense_buf;
18383 	union scsi_cdb		cdb;
18384 	struct uscsi_cmd	ucmd_buf;
18385 	int			status;
18386 	int			no_caller_buf = FALSE;
18387 
18388 	ASSERT(un != NULL);
18389 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18390 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
18391 
18392 	SD_TRACE(SD_LOG_IO, un,
18393 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
18394 
18395 	bzero(&cdb, sizeof (cdb));
18396 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18397 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18398 	if (data_bufp == NULL) {
18399 		/* Allocate a default buf if the caller did not give one */
18400 		ASSERT(data_len == 0);
18401 		data_len  = MHIOC_RESV_KEY_SIZE;
18402 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
18403 		no_caller_buf = TRUE;
18404 	}
18405 
18406 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
18407 	cdb.cdb_opaque[1] = usr_cmd;
18408 	FORMG1COUNT(&cdb, data_len);
18409 
18410 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18411 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
18412 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
18413 	ucmd_buf.uscsi_buflen	= data_len;
18414 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18415 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18416 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
18417 	ucmd_buf.uscsi_timeout	= 60;
18418 
18419 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18420 	    UIO_SYSSPACE, SD_PATH_STANDARD);
18421 
18422 	switch (status) {
18423 	case 0:
18424 		break;	/* Success! */
18425 	case EIO:
18426 		switch (ucmd_buf.uscsi_status) {
18427 		case STATUS_RESERVATION_CONFLICT:
18428 			status = EACCES;
18429 			break;
18430 		case STATUS_CHECK:
18431 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18432 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
18433 			    KEY_ILLEGAL_REQUEST)) {
18434 				status = ENOTSUP;
18435 			}
18436 			break;
18437 		default:
18438 			break;
18439 		}
18440 		break;
18441 	default:
18442 		break;
18443 	}
18444 
18445 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
18446 
18447 	if (no_caller_buf == TRUE) {
18448 		kmem_free(data_bufp, data_len);
18449 	}
18450 
18451 	return (status);
18452 }
18453 
18454 
18455 /*
18456  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
18457  *
18458  * Description: This routine is the driver entry point for handling CD-ROM
18459  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
18460  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
18461  *		device.
18462  *
18463  *   Arguments: un  -   Pointer to soft state struct for the target.
18464  *		usr_cmd SCSI-3 reservation facility command (one of
18465  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
18466  *			SD_SCSI3_PREEMPTANDABORT)
18467  *		usr_bufp - user provided pointer register, reserve descriptor or
18468  *			preempt and abort structure (mhioc_register_t,
18469  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
18470  *
18471  * Return Code: 0   - Success
18472  *		EACCES
18473  *		ENOTSUP
18474  *		errno return code from sd_send_scsi_cmd()
18475  *
18476  *     Context: Can sleep. Does not return until command is completed.
18477  */
18478 
18479 static int
18480 sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un, uchar_t usr_cmd,
18481 	uchar_t	*usr_bufp)
18482 {
18483 	struct scsi_extended_sense	sense_buf;
18484 	union scsi_cdb		cdb;
18485 	struct uscsi_cmd	ucmd_buf;
18486 	int			status;
18487 	uchar_t			data_len = sizeof (sd_prout_t);
18488 	sd_prout_t		*prp;
18489 
18490 	ASSERT(un != NULL);
18491 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18492 	ASSERT(data_len == 24);	/* required by scsi spec */
18493 
18494 	SD_TRACE(SD_LOG_IO, un,
18495 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
18496 
18497 	if (usr_bufp == NULL) {
18498 		return (EINVAL);
18499 	}
18500 
18501 	bzero(&cdb, sizeof (cdb));
18502 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18503 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
18504 	prp = kmem_zalloc(data_len, KM_SLEEP);
18505 
18506 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
18507 	cdb.cdb_opaque[1] = usr_cmd;
18508 	FORMG1COUNT(&cdb, data_len);
18509 
18510 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18511 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
18512 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
18513 	ucmd_buf.uscsi_buflen	= data_len;
18514 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18515 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
18516 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
18517 	ucmd_buf.uscsi_timeout	= 60;
18518 
18519 	switch (usr_cmd) {
18520 	case SD_SCSI3_REGISTER: {
18521 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
18522 
18523 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
18524 		bcopy(ptr->newkey.key, prp->service_key,
18525 		    MHIOC_RESV_KEY_SIZE);
18526 		prp->aptpl = ptr->aptpl;
18527 		break;
18528 	}
18529 	case SD_SCSI3_RESERVE:
18530 	case SD_SCSI3_RELEASE: {
18531 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
18532 
18533 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
18534 		prp->scope_address = BE_32(ptr->scope_specific_addr);
18535 		cdb.cdb_opaque[2] = ptr->type;
18536 		break;
18537 	}
18538 	case SD_SCSI3_PREEMPTANDABORT: {
18539 		mhioc_preemptandabort_t *ptr =
18540 		    (mhioc_preemptandabort_t *)usr_bufp;
18541 
18542 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
18543 		bcopy(ptr->victim_key.key, prp->service_key,
18544 		    MHIOC_RESV_KEY_SIZE);
18545 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
18546 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
18547 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
18548 		break;
18549 	}
18550 	case SD_SCSI3_REGISTERANDIGNOREKEY:
18551 	{
18552 		mhioc_registerandignorekey_t *ptr;
18553 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
18554 		bcopy(ptr->newkey.key,
18555 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
18556 		prp->aptpl = ptr->aptpl;
18557 		break;
18558 	}
18559 	default:
18560 		ASSERT(FALSE);
18561 		break;
18562 	}
18563 
18564 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
18565 	    UIO_SYSSPACE, SD_PATH_STANDARD);
18566 
18567 	switch (status) {
18568 	case 0:
18569 		break;	/* Success! */
18570 	case EIO:
18571 		switch (ucmd_buf.uscsi_status) {
18572 		case STATUS_RESERVATION_CONFLICT:
18573 			status = EACCES;
18574 			break;
18575 		case STATUS_CHECK:
18576 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18577 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
18578 			    KEY_ILLEGAL_REQUEST)) {
18579 				status = ENOTSUP;
18580 			}
18581 			break;
18582 		default:
18583 			break;
18584 		}
18585 		break;
18586 	default:
18587 		break;
18588 	}
18589 
18590 	kmem_free(prp, data_len);
18591 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
18592 	return (status);
18593 }
18594 
18595 
18596 /*
18597  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
18598  *
18599  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
18600  *
18601  *   Arguments: un - pointer to the target's soft state struct
18602  *              dkc - pointer to the callback structure
18603  *
18604  * Return Code: 0 - success
18605  *		errno-type error code
18606  *
18607  *     Context: kernel thread context only.
18608  *
18609  *  _______________________________________________________________
18610  * | dkc_flag &   | dkc_callback | DKIOCFLUSHWRITECACHE            |
18611  * |FLUSH_VOLATILE|              | operation                       |
18612  * |______________|______________|_________________________________|
18613  * | 0            | NULL         | Synchronous flush on both       |
18614  * |              |              | volatile and non-volatile cache |
18615  * |______________|______________|_________________________________|
18616  * | 1            | NULL         | Synchronous flush on volatile   |
18617  * |              |              | cache; disk drivers may suppress|
18618  * |              |              | flush if disk table indicates   |
18619  * |              |              | non-volatile cache              |
18620  * |______________|______________|_________________________________|
18621  * | 0            | !NULL        | Asynchronous flush on both      |
18622  * |              |              | volatile and non-volatile cache;|
18623  * |______________|______________|_________________________________|
18624  * | 1            | !NULL        | Asynchronous flush on volatile  |
18625  * |              |              | cache; disk drivers may suppress|
18626  * |              |              | flush if disk table indicates   |
18627  * |              |              | non-volatile cache              |
18628  * |______________|______________|_________________________________|
18629  *
18630  */
18631 
18632 static int
18633 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
18634 {
18635 	struct sd_uscsi_info	*uip;
18636 	struct uscsi_cmd	*uscmd;
18637 	union scsi_cdb		*cdb;
18638 	struct buf		*bp;
18639 	int			rval = 0;
18640 	int			is_async;
18641 
18642 	SD_TRACE(SD_LOG_IO, un,
18643 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
18644 
18645 	ASSERT(un != NULL);
18646 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18647 
18648 	if (dkc == NULL || dkc->dkc_callback == NULL) {
18649 		is_async = FALSE;
18650 	} else {
18651 		is_async = TRUE;
18652 	}
18653 
18654 	mutex_enter(SD_MUTEX(un));
18655 	/* check whether cache flush should be suppressed */
18656 	if (un->un_f_suppress_cache_flush == TRUE) {
18657 		mutex_exit(SD_MUTEX(un));
18658 		/*
18659 		 * suppress the cache flush if the device is told to do
18660 		 * so by sd.conf or disk table
18661 		 */
18662 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_SYNCHRONIZE_CACHE: \
18663 		    skip the cache flush since suppress_cache_flush is %d!\n",
18664 		    un->un_f_suppress_cache_flush);
18665 
18666 		if (is_async == TRUE) {
18667 			/* invoke callback for asynchronous flush */
18668 			(*dkc->dkc_callback)(dkc->dkc_cookie, 0);
18669 		}
18670 		return (rval);
18671 	}
18672 	mutex_exit(SD_MUTEX(un));
18673 
18674 	/*
18675 	 * check dkc_flag & FLUSH_VOLATILE so SYNC_NV bit can be
18676 	 * set properly
18677 	 */
18678 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
18679 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
18680 
18681 	mutex_enter(SD_MUTEX(un));
18682 	if (dkc != NULL && un->un_f_sync_nv_supported &&
18683 	    (dkc->dkc_flag & FLUSH_VOLATILE)) {
18684 		/*
18685 		 * if the device supports SYNC_NV bit, turn on
18686 		 * the SYNC_NV bit to only flush volatile cache
18687 		 */
18688 		cdb->cdb_un.tag |= SD_SYNC_NV_BIT;
18689 	}
18690 	mutex_exit(SD_MUTEX(un));
18691 
18692 	/*
18693 	 * First get some memory for the uscsi_cmd struct and cdb
18694 	 * and initialize for SYNCHRONIZE_CACHE cmd.
18695 	 */
18696 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
18697 	uscmd->uscsi_cdblen = CDB_GROUP1;
18698 	uscmd->uscsi_cdb = (caddr_t)cdb;
18699 	uscmd->uscsi_bufaddr = NULL;
18700 	uscmd->uscsi_buflen = 0;
18701 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
18702 	uscmd->uscsi_rqlen = SENSE_LENGTH;
18703 	uscmd->uscsi_rqresid = SENSE_LENGTH;
18704 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
18705 	uscmd->uscsi_timeout = sd_io_time;
18706 
18707 	/*
18708 	 * Allocate an sd_uscsi_info struct and fill it with the info
18709 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
18710 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
18711 	 * since we allocate the buf here in this function, we do not
18712 	 * need to preserve the prior contents of b_private.
18713 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
18714 	 */
18715 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
18716 	uip->ui_flags = SD_PATH_DIRECT;
18717 	uip->ui_cmdp  = uscmd;
18718 
18719 	bp = getrbuf(KM_SLEEP);
18720 	bp->b_private = uip;
18721 
18722 	/*
18723 	 * Setup buffer to carry uscsi request.
18724 	 */
18725 	bp->b_flags  = B_BUSY;
18726 	bp->b_bcount = 0;
18727 	bp->b_blkno  = 0;
18728 
18729 	if (is_async == TRUE) {
18730 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
18731 		uip->ui_dkc = *dkc;
18732 	}
18733 
18734 	bp->b_edev = SD_GET_DEV(un);
18735 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
18736 
18737 	(void) sd_uscsi_strategy(bp);
18738 
18739 	/*
18740 	 * If synchronous request, wait for completion
18741 	 * If async just return and let b_iodone callback
18742 	 * cleanup.
18743 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
18744 	 * but it was also incremented in sd_uscsi_strategy(), so
18745 	 * we should be ok.
18746 	 */
18747 	if (is_async == FALSE) {
18748 		(void) biowait(bp);
18749 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
18750 	}
18751 
18752 	return (rval);
18753 }
18754 
18755 
18756 static int
18757 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
18758 {
18759 	struct sd_uscsi_info *uip;
18760 	struct uscsi_cmd *uscmd;
18761 	uint8_t *sense_buf;
18762 	struct sd_lun *un;
18763 	int status;
18764 	union scsi_cdb *cdb;
18765 
18766 	uip = (struct sd_uscsi_info *)(bp->b_private);
18767 	ASSERT(uip != NULL);
18768 
18769 	uscmd = uip->ui_cmdp;
18770 	ASSERT(uscmd != NULL);
18771 
18772 	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
18773 	ASSERT(sense_buf != NULL);
18774 
18775 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
18776 	ASSERT(un != NULL);
18777 
18778 	cdb = (union scsi_cdb *)uscmd->uscsi_cdb;
18779 
18780 	status = geterror(bp);
18781 	switch (status) {
18782 	case 0:
18783 		break;	/* Success! */
18784 	case EIO:
18785 		switch (uscmd->uscsi_status) {
18786 		case STATUS_RESERVATION_CONFLICT:
18787 			/* Ignore reservation conflict */
18788 			status = 0;
18789 			goto done;
18790 
18791 		case STATUS_CHECK:
18792 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
18793 			    (scsi_sense_key(sense_buf) ==
18794 			    KEY_ILLEGAL_REQUEST)) {
18795 				/* Ignore Illegal Request error */
18796 				if (cdb->cdb_un.tag|SD_SYNC_NV_BIT) {
18797 					mutex_enter(SD_MUTEX(un));
18798 					un->un_f_sync_nv_supported = FALSE;
18799 					mutex_exit(SD_MUTEX(un));
18800 					status = 0;
18801 					SD_TRACE(SD_LOG_IO, un,
18802 					    "un_f_sync_nv_supported \
18803 					    is set to false.\n");
18804 					goto done;
18805 				}
18806 
18807 				mutex_enter(SD_MUTEX(un));
18808 				un->un_f_sync_cache_supported = FALSE;
18809 				mutex_exit(SD_MUTEX(un));
18810 				SD_TRACE(SD_LOG_IO, un,
18811 				    "sd_send_scsi_SYNCHRONIZE_CACHE_biodone: \
18812 				    un_f_sync_cache_supported set to false \
18813 				    with asc = %x, ascq = %x\n",
18814 				    scsi_sense_asc(sense_buf),
18815 				    scsi_sense_ascq(sense_buf));
18816 				status = ENOTSUP;
18817 				goto done;
18818 			}
18819 			break;
18820 		default:
18821 			break;
18822 		}
18823 		/* FALLTHRU */
18824 	default:
18825 		/*
18826 		 * Don't log an error message if this device
18827 		 * has removable media.
18828 		 */
18829 		if (!un->un_f_has_removable_media) {
18830 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18831 			    "SYNCHRONIZE CACHE command failed (%d)\n", status);
18832 		}
18833 		break;
18834 	}
18835 
18836 done:
18837 	if (uip->ui_dkc.dkc_callback != NULL) {
18838 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
18839 	}
18840 
18841 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
18842 	freerbuf(bp);
18843 	kmem_free(uip, sizeof (struct sd_uscsi_info));
18844 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
18845 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
18846 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
18847 
18848 	return (status);
18849 }
18850 
18851 
18852 /*
18853  *    Function: sd_send_scsi_GET_CONFIGURATION
18854  *
18855  * Description: Issues the get configuration command to the device.
18856  *		Called from sd_check_for_writable_cd & sd_get_media_info
18857  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
18858  *   Arguments: un
18859  *		ucmdbuf
18860  *		rqbuf
18861  *		rqbuflen
18862  *		bufaddr
18863  *		buflen
18864  *		path_flag
18865  *
18866  * Return Code: 0   - Success
18867  *		errno return code from sd_send_scsi_cmd()
18868  *
18869  *     Context: Can sleep. Does not return until command is completed.
18870  *
18871  */
18872 
18873 static int
18874 sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un, struct uscsi_cmd *ucmdbuf,
18875 	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen,
18876 	int path_flag)
18877 {
18878 	char	cdb[CDB_GROUP1];
18879 	int	status;
18880 
18881 	ASSERT(un != NULL);
18882 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18883 	ASSERT(bufaddr != NULL);
18884 	ASSERT(ucmdbuf != NULL);
18885 	ASSERT(rqbuf != NULL);
18886 
18887 	SD_TRACE(SD_LOG_IO, un,
18888 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
18889 
18890 	bzero(cdb, sizeof (cdb));
18891 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
18892 	bzero(rqbuf, rqbuflen);
18893 	bzero(bufaddr, buflen);
18894 
18895 	/*
18896 	 * Set up cdb field for the get configuration command.
18897 	 */
18898 	cdb[0] = SCMD_GET_CONFIGURATION;
18899 	cdb[1] = 0x02;  /* Requested Type */
18900 	cdb[8] = SD_PROFILE_HEADER_LEN;
18901 	ucmdbuf->uscsi_cdb = cdb;
18902 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
18903 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
18904 	ucmdbuf->uscsi_buflen = buflen;
18905 	ucmdbuf->uscsi_timeout = sd_io_time;
18906 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
18907 	ucmdbuf->uscsi_rqlen = rqbuflen;
18908 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
18909 
18910 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, FKIOCTL,
18911 	    UIO_SYSSPACE, path_flag);
18912 
18913 	switch (status) {
18914 	case 0:
18915 		break;  /* Success! */
18916 	case EIO:
18917 		switch (ucmdbuf->uscsi_status) {
18918 		case STATUS_RESERVATION_CONFLICT:
18919 			status = EACCES;
18920 			break;
18921 		default:
18922 			break;
18923 		}
18924 		break;
18925 	default:
18926 		break;
18927 	}
18928 
18929 	if (status == 0) {
18930 		SD_DUMP_MEMORY(un, SD_LOG_IO,
18931 		    "sd_send_scsi_GET_CONFIGURATION: data",
18932 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
18933 	}
18934 
18935 	SD_TRACE(SD_LOG_IO, un,
18936 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
18937 
18938 	return (status);
18939 }
18940 
18941 /*
18942  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
18943  *
18944  * Description: Issues the get configuration command to the device to
18945  *              retrieve a specific feature. Called from
18946  *		sd_check_for_writable_cd & sd_set_mmc_caps.
18947  *   Arguments: un
18948  *              ucmdbuf
18949  *              rqbuf
18950  *              rqbuflen
18951  *              bufaddr
18952  *              buflen
18953  *		feature
18954  *
18955  * Return Code: 0   - Success
18956  *              errno return code from sd_send_scsi_cmd()
18957  *
18958  *     Context: Can sleep. Does not return until command is completed.
18959  *
18960  */
18961 static int
18962 sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
18963 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
18964 	uchar_t *bufaddr, uint_t buflen, char feature, int path_flag)
18965 {
18966 	char    cdb[CDB_GROUP1];
18967 	int	status;
18968 
18969 	ASSERT(un != NULL);
18970 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18971 	ASSERT(bufaddr != NULL);
18972 	ASSERT(ucmdbuf != NULL);
18973 	ASSERT(rqbuf != NULL);
18974 
18975 	SD_TRACE(SD_LOG_IO, un,
18976 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
18977 
18978 	bzero(cdb, sizeof (cdb));
18979 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
18980 	bzero(rqbuf, rqbuflen);
18981 	bzero(bufaddr, buflen);
18982 
18983 	/*
18984 	 * Set up cdb field for the get configuration command.
18985 	 */
18986 	cdb[0] = SCMD_GET_CONFIGURATION;
18987 	cdb[1] = 0x02;  /* Requested Type */
18988 	cdb[3] = feature;
18989 	cdb[8] = buflen;
18990 	ucmdbuf->uscsi_cdb = cdb;
18991 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
18992 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
18993 	ucmdbuf->uscsi_buflen = buflen;
18994 	ucmdbuf->uscsi_timeout = sd_io_time;
18995 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
18996 	ucmdbuf->uscsi_rqlen = rqbuflen;
18997 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
18998 
18999 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, FKIOCTL,
19000 	    UIO_SYSSPACE, path_flag);
19001 
19002 	switch (status) {
19003 	case 0:
19004 		break;  /* Success! */
19005 	case EIO:
19006 		switch (ucmdbuf->uscsi_status) {
19007 		case STATUS_RESERVATION_CONFLICT:
19008 			status = EACCES;
19009 			break;
19010 		default:
19011 			break;
19012 		}
19013 		break;
19014 	default:
19015 		break;
19016 	}
19017 
19018 	if (status == 0) {
19019 		SD_DUMP_MEMORY(un, SD_LOG_IO,
19020 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
19021 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
19022 	}
19023 
19024 	SD_TRACE(SD_LOG_IO, un,
19025 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
19026 
19027 	return (status);
19028 }
19029 
19030 
19031 /*
19032  *    Function: sd_send_scsi_MODE_SENSE
19033  *
19034  * Description: Utility function for issuing a scsi MODE SENSE command.
19035  *		Note: This routine uses a consistent implementation for Group0,
19036  *		Group1, and Group2 commands across all platforms. ATAPI devices
19037  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
19038  *
19039  *   Arguments: un - pointer to the softstate struct for the target.
19040  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
19041  *			  CDB_GROUP[1|2] (10 byte).
19042  *		bufaddr - buffer for page data retrieved from the target.
19043  *		buflen - size of page to be retrieved.
19044  *		page_code - page code of data to be retrieved from the target.
19045  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19046  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19047  *			to use the USCSI "direct" chain and bypass the normal
19048  *			command waitq.
19049  *
19050  * Return Code: 0   - Success
19051  *		errno return code from sd_send_scsi_cmd()
19052  *
19053  *     Context: Can sleep. Does not return until command is completed.
19054  */
19055 
19056 static int
19057 sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
19058 	size_t buflen,  uchar_t page_code, int path_flag)
19059 {
19060 	struct	scsi_extended_sense	sense_buf;
19061 	union scsi_cdb		cdb;
19062 	struct uscsi_cmd	ucmd_buf;
19063 	int			status;
19064 	int			headlen;
19065 
19066 	ASSERT(un != NULL);
19067 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19068 	ASSERT(bufaddr != NULL);
19069 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
19070 	    (cdbsize == CDB_GROUP2));
19071 
19072 	SD_TRACE(SD_LOG_IO, un,
19073 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
19074 
19075 	bzero(&cdb, sizeof (cdb));
19076 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19077 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19078 	bzero(bufaddr, buflen);
19079 
19080 	if (cdbsize == CDB_GROUP0) {
19081 		cdb.scc_cmd = SCMD_MODE_SENSE;
19082 		cdb.cdb_opaque[2] = page_code;
19083 		FORMG0COUNT(&cdb, buflen);
19084 		headlen = MODE_HEADER_LENGTH;
19085 	} else {
19086 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
19087 		cdb.cdb_opaque[2] = page_code;
19088 		FORMG1COUNT(&cdb, buflen);
19089 		headlen = MODE_HEADER_LENGTH_GRP2;
19090 	}
19091 
19092 	ASSERT(headlen <= buflen);
19093 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
19094 
19095 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19096 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
19097 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19098 	ucmd_buf.uscsi_buflen	= buflen;
19099 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19100 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19101 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19102 	ucmd_buf.uscsi_timeout	= 60;
19103 
19104 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
19105 	    UIO_SYSSPACE, path_flag);
19106 
19107 	switch (status) {
19108 	case 0:
19109 		/*
19110 		 * sr_check_wp() uses 0x3f page code and check the header of
19111 		 * mode page to determine if target device is write-protected.
19112 		 * But some USB devices return 0 bytes for 0x3f page code. For
19113 		 * this case, make sure that mode page header is returned at
19114 		 * least.
19115 		 */
19116 		if (buflen - ucmd_buf.uscsi_resid <  headlen)
19117 			status = EIO;
19118 		break;	/* Success! */
19119 	case EIO:
19120 		switch (ucmd_buf.uscsi_status) {
19121 		case STATUS_RESERVATION_CONFLICT:
19122 			status = EACCES;
19123 			break;
19124 		default:
19125 			break;
19126 		}
19127 		break;
19128 	default:
19129 		break;
19130 	}
19131 
19132 	if (status == 0) {
19133 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
19134 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
19135 	}
19136 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
19137 
19138 	return (status);
19139 }
19140 
19141 
19142 /*
19143  *    Function: sd_send_scsi_MODE_SELECT
19144  *
19145  * Description: Utility function for issuing a scsi MODE SELECT command.
19146  *		Note: This routine uses a consistent implementation for Group0,
19147  *		Group1, and Group2 commands across all platforms. ATAPI devices
19148  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
19149  *
19150  *   Arguments: un - pointer to the softstate struct for the target.
19151  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
19152  *			  CDB_GROUP[1|2] (10 byte).
19153  *		bufaddr - buffer for page data retrieved from the target.
19154  *		buflen - size of page to be retrieved.
19155  *		save_page - boolean to determin if SP bit should be set.
19156  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19157  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19158  *			to use the USCSI "direct" chain and bypass the normal
19159  *			command waitq.
19160  *
19161  * Return Code: 0   - Success
19162  *		errno return code from sd_send_scsi_cmd()
19163  *
19164  *     Context: Can sleep. Does not return until command is completed.
19165  */
19166 
19167 static int
19168 sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
19169 	size_t buflen,  uchar_t save_page, int path_flag)
19170 {
19171 	struct	scsi_extended_sense	sense_buf;
19172 	union scsi_cdb		cdb;
19173 	struct uscsi_cmd	ucmd_buf;
19174 	int			status;
19175 
19176 	ASSERT(un != NULL);
19177 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19178 	ASSERT(bufaddr != NULL);
19179 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
19180 	    (cdbsize == CDB_GROUP2));
19181 
19182 	SD_TRACE(SD_LOG_IO, un,
19183 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
19184 
19185 	bzero(&cdb, sizeof (cdb));
19186 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19187 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19188 
19189 	/* Set the PF bit for many third party drives */
19190 	cdb.cdb_opaque[1] = 0x10;
19191 
19192 	/* Set the savepage(SP) bit if given */
19193 	if (save_page == SD_SAVE_PAGE) {
19194 		cdb.cdb_opaque[1] |= 0x01;
19195 	}
19196 
19197 	if (cdbsize == CDB_GROUP0) {
19198 		cdb.scc_cmd = SCMD_MODE_SELECT;
19199 		FORMG0COUNT(&cdb, buflen);
19200 	} else {
19201 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
19202 		FORMG1COUNT(&cdb, buflen);
19203 	}
19204 
19205 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
19206 
19207 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19208 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
19209 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19210 	ucmd_buf.uscsi_buflen	= buflen;
19211 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19212 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19213 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
19214 	ucmd_buf.uscsi_timeout	= 60;
19215 
19216 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
19217 	    UIO_SYSSPACE, path_flag);
19218 
19219 	switch (status) {
19220 	case 0:
19221 		break;	/* Success! */
19222 	case EIO:
19223 		switch (ucmd_buf.uscsi_status) {
19224 		case STATUS_RESERVATION_CONFLICT:
19225 			status = EACCES;
19226 			break;
19227 		default:
19228 			break;
19229 		}
19230 		break;
19231 	default:
19232 		break;
19233 	}
19234 
19235 	if (status == 0) {
19236 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
19237 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
19238 	}
19239 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
19240 
19241 	return (status);
19242 }
19243 
19244 
19245 /*
19246  *    Function: sd_send_scsi_RDWR
19247  *
19248  * Description: Issue a scsi READ or WRITE command with the given parameters.
19249  *
19250  *   Arguments: un:      Pointer to the sd_lun struct for the target.
19251  *		cmd:	 SCMD_READ or SCMD_WRITE
19252  *		bufaddr: Address of caller's buffer to receive the RDWR data
19253  *		buflen:  Length of caller's buffer receive the RDWR data.
19254  *		start_block: Block number for the start of the RDWR operation.
19255  *			 (Assumes target-native block size.)
19256  *		residp:  Pointer to variable to receive the redisual of the
19257  *			 RDWR operation (may be NULL of no residual requested).
19258  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19259  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19260  *			to use the USCSI "direct" chain and bypass the normal
19261  *			command waitq.
19262  *
19263  * Return Code: 0   - Success
19264  *		errno return code from sd_send_scsi_cmd()
19265  *
19266  *     Context: Can sleep. Does not return until command is completed.
19267  */
19268 
19269 static int
19270 sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
19271 	size_t buflen, daddr_t start_block, int path_flag)
19272 {
19273 	struct	scsi_extended_sense	sense_buf;
19274 	union scsi_cdb		cdb;
19275 	struct uscsi_cmd	ucmd_buf;
19276 	uint32_t		block_count;
19277 	int			status;
19278 	int			cdbsize;
19279 	uchar_t			flag;
19280 
19281 	ASSERT(un != NULL);
19282 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19283 	ASSERT(bufaddr != NULL);
19284 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
19285 
19286 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
19287 
19288 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
19289 		return (EINVAL);
19290 	}
19291 
19292 	mutex_enter(SD_MUTEX(un));
19293 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
19294 	mutex_exit(SD_MUTEX(un));
19295 
19296 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
19297 
19298 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
19299 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
19300 	    bufaddr, buflen, start_block, block_count);
19301 
19302 	bzero(&cdb, sizeof (cdb));
19303 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19304 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19305 
19306 	/* Compute CDB size to use */
19307 	if (start_block > 0xffffffff)
19308 		cdbsize = CDB_GROUP4;
19309 	else if ((start_block & 0xFFE00000) ||
19310 	    (un->un_f_cfg_is_atapi == TRUE))
19311 		cdbsize = CDB_GROUP1;
19312 	else
19313 		cdbsize = CDB_GROUP0;
19314 
19315 	switch (cdbsize) {
19316 	case CDB_GROUP0:	/* 6-byte CDBs */
19317 		cdb.scc_cmd = cmd;
19318 		FORMG0ADDR(&cdb, start_block);
19319 		FORMG0COUNT(&cdb, block_count);
19320 		break;
19321 	case CDB_GROUP1:	/* 10-byte CDBs */
19322 		cdb.scc_cmd = cmd | SCMD_GROUP1;
19323 		FORMG1ADDR(&cdb, start_block);
19324 		FORMG1COUNT(&cdb, block_count);
19325 		break;
19326 	case CDB_GROUP4:	/* 16-byte CDBs */
19327 		cdb.scc_cmd = cmd | SCMD_GROUP4;
19328 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
19329 		FORMG4COUNT(&cdb, block_count);
19330 		break;
19331 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
19332 	default:
19333 		/* All others reserved */
19334 		return (EINVAL);
19335 	}
19336 
19337 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
19338 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
19339 
19340 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19341 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
19342 	ucmd_buf.uscsi_bufaddr	= bufaddr;
19343 	ucmd_buf.uscsi_buflen	= buflen;
19344 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19345 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19346 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
19347 	ucmd_buf.uscsi_timeout	= 60;
19348 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
19349 	    UIO_SYSSPACE, path_flag);
19350 	switch (status) {
19351 	case 0:
19352 		break;	/* Success! */
19353 	case EIO:
19354 		switch (ucmd_buf.uscsi_status) {
19355 		case STATUS_RESERVATION_CONFLICT:
19356 			status = EACCES;
19357 			break;
19358 		default:
19359 			break;
19360 		}
19361 		break;
19362 	default:
19363 		break;
19364 	}
19365 
19366 	if (status == 0) {
19367 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
19368 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
19369 	}
19370 
19371 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
19372 
19373 	return (status);
19374 }
19375 
19376 
19377 /*
19378  *    Function: sd_send_scsi_LOG_SENSE
19379  *
19380  * Description: Issue a scsi LOG_SENSE command with the given parameters.
19381  *
19382  *   Arguments: un:      Pointer to the sd_lun struct for the target.
19383  *
19384  * Return Code: 0   - Success
19385  *		errno return code from sd_send_scsi_cmd()
19386  *
19387  *     Context: Can sleep. Does not return until command is completed.
19388  */
19389 
19390 static int
19391 sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr, uint16_t buflen,
19392 	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
19393 	int path_flag)
19394 
19395 {
19396 	struct	scsi_extended_sense	sense_buf;
19397 	union scsi_cdb		cdb;
19398 	struct uscsi_cmd	ucmd_buf;
19399 	int			status;
19400 
19401 	ASSERT(un != NULL);
19402 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19403 
19404 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
19405 
19406 	bzero(&cdb, sizeof (cdb));
19407 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19408 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19409 
19410 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
19411 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
19412 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
19413 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
19414 	FORMG1COUNT(&cdb, buflen);
19415 
19416 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19417 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19418 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19419 	ucmd_buf.uscsi_buflen	= buflen;
19420 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19421 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19422 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19423 	ucmd_buf.uscsi_timeout	= 60;
19424 
19425 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
19426 	    UIO_SYSSPACE, path_flag);
19427 
19428 	switch (status) {
19429 	case 0:
19430 		break;
19431 	case EIO:
19432 		switch (ucmd_buf.uscsi_status) {
19433 		case STATUS_RESERVATION_CONFLICT:
19434 			status = EACCES;
19435 			break;
19436 		case STATUS_CHECK:
19437 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19438 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
19439 				KEY_ILLEGAL_REQUEST) &&
19440 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
19441 				/*
19442 				 * ASC 0x24: INVALID FIELD IN CDB
19443 				 */
19444 				switch (page_code) {
19445 				case START_STOP_CYCLE_PAGE:
19446 					/*
19447 					 * The start stop cycle counter is
19448 					 * implemented as page 0x31 in earlier
19449 					 * generation disks. In new generation
19450 					 * disks the start stop cycle counter is
19451 					 * implemented as page 0xE. To properly
19452 					 * handle this case if an attempt for
19453 					 * log page 0xE is made and fails we
19454 					 * will try again using page 0x31.
19455 					 *
19456 					 * Network storage BU committed to
19457 					 * maintain the page 0x31 for this
19458 					 * purpose and will not have any other
19459 					 * page implemented with page code 0x31
19460 					 * until all disks transition to the
19461 					 * standard page.
19462 					 */
19463 					mutex_enter(SD_MUTEX(un));
19464 					un->un_start_stop_cycle_page =
19465 					    START_STOP_CYCLE_VU_PAGE;
19466 					cdb.cdb_opaque[2] =
19467 					    (char)(page_control << 6) |
19468 					    un->un_start_stop_cycle_page;
19469 					mutex_exit(SD_MUTEX(un));
19470 					status = sd_send_scsi_cmd(
19471 					    SD_GET_DEV(un), &ucmd_buf, FKIOCTL,
19472 					    UIO_SYSSPACE, path_flag);
19473 
19474 					break;
19475 				case TEMPERATURE_PAGE:
19476 					status = ENOTTY;
19477 					break;
19478 				default:
19479 					break;
19480 				}
19481 			}
19482 			break;
19483 		default:
19484 			break;
19485 		}
19486 		break;
19487 	default:
19488 		break;
19489 	}
19490 
19491 	if (status == 0) {
19492 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
19493 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
19494 	}
19495 
19496 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
19497 
19498 	return (status);
19499 }
19500 
19501 
19502 /*
19503  *    Function: sdioctl
19504  *
19505  * Description: Driver's ioctl(9e) entry point function.
19506  *
19507  *   Arguments: dev     - device number
19508  *		cmd     - ioctl operation to be performed
19509  *		arg     - user argument, contains data to be set or reference
19510  *			  parameter for get
19511  *		flag    - bit flag, indicating open settings, 32/64 bit type
19512  *		cred_p  - user credential pointer
19513  *		rval_p  - calling process return value (OPT)
19514  *
19515  * Return Code: EINVAL
19516  *		ENOTTY
19517  *		ENXIO
19518  *		EIO
19519  *		EFAULT
19520  *		ENOTSUP
19521  *		EPERM
19522  *
19523  *     Context: Called from the device switch at normal priority.
19524  */
19525 
19526 static int
19527 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
19528 {
19529 	struct sd_lun	*un = NULL;
19530 	int		err = 0;
19531 	int		i = 0;
19532 	cred_t		*cr;
19533 	int		tmprval = EINVAL;
19534 	int 		is_valid;
19535 
19536 	/*
19537 	 * All device accesses go thru sdstrategy where we check on suspend
19538 	 * status
19539 	 */
19540 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
19541 		return (ENXIO);
19542 	}
19543 
19544 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19545 
19546 
19547 	is_valid = SD_IS_VALID_LABEL(un);
19548 
19549 	/*
19550 	 * Moved this wait from sd_uscsi_strategy to here for
19551 	 * reasons of deadlock prevention. Internal driver commands,
19552 	 * specifically those to change a devices power level, result
19553 	 * in a call to sd_uscsi_strategy.
19554 	 */
19555 	mutex_enter(SD_MUTEX(un));
19556 	while ((un->un_state == SD_STATE_SUSPENDED) ||
19557 	    (un->un_state == SD_STATE_PM_CHANGING)) {
19558 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
19559 	}
19560 	/*
19561 	 * Twiddling the counter here protects commands from now
19562 	 * through to the top of sd_uscsi_strategy. Without the
19563 	 * counter inc. a power down, for example, could get in
19564 	 * after the above check for state is made and before
19565 	 * execution gets to the top of sd_uscsi_strategy.
19566 	 * That would cause problems.
19567 	 */
19568 	un->un_ncmds_in_driver++;
19569 
19570 	if (!is_valid &&
19571 	    (flag & (FNDELAY | FNONBLOCK))) {
19572 		switch (cmd) {
19573 		case DKIOCGGEOM:	/* SD_PATH_DIRECT */
19574 		case DKIOCGVTOC:
19575 		case DKIOCGAPART:
19576 		case DKIOCPARTINFO:
19577 		case DKIOCSGEOM:
19578 		case DKIOCSAPART:
19579 		case DKIOCGETEFI:
19580 		case DKIOCPARTITION:
19581 		case DKIOCSVTOC:
19582 		case DKIOCSETEFI:
19583 		case DKIOCGMBOOT:
19584 		case DKIOCSMBOOT:
19585 		case DKIOCG_PHYGEOM:
19586 		case DKIOCG_VIRTGEOM:
19587 			/* let cmlb handle it */
19588 			goto skip_ready_valid;
19589 
19590 		case CDROMPAUSE:
19591 		case CDROMRESUME:
19592 		case CDROMPLAYMSF:
19593 		case CDROMPLAYTRKIND:
19594 		case CDROMREADTOCHDR:
19595 		case CDROMREADTOCENTRY:
19596 		case CDROMSTOP:
19597 		case CDROMSTART:
19598 		case CDROMVOLCTRL:
19599 		case CDROMSUBCHNL:
19600 		case CDROMREADMODE2:
19601 		case CDROMREADMODE1:
19602 		case CDROMREADOFFSET:
19603 		case CDROMSBLKMODE:
19604 		case CDROMGBLKMODE:
19605 		case CDROMGDRVSPEED:
19606 		case CDROMSDRVSPEED:
19607 		case CDROMCDDA:
19608 		case CDROMCDXA:
19609 		case CDROMSUBCODE:
19610 			if (!ISCD(un)) {
19611 				un->un_ncmds_in_driver--;
19612 				ASSERT(un->un_ncmds_in_driver >= 0);
19613 				mutex_exit(SD_MUTEX(un));
19614 				return (ENOTTY);
19615 			}
19616 			break;
19617 		case FDEJECT:
19618 		case DKIOCEJECT:
19619 		case CDROMEJECT:
19620 			if (!un->un_f_eject_media_supported) {
19621 				un->un_ncmds_in_driver--;
19622 				ASSERT(un->un_ncmds_in_driver >= 0);
19623 				mutex_exit(SD_MUTEX(un));
19624 				return (ENOTTY);
19625 			}
19626 			break;
19627 		case DKIOCFLUSHWRITECACHE:
19628 			mutex_exit(SD_MUTEX(un));
19629 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
19630 			if (err != 0) {
19631 				mutex_enter(SD_MUTEX(un));
19632 				un->un_ncmds_in_driver--;
19633 				ASSERT(un->un_ncmds_in_driver >= 0);
19634 				mutex_exit(SD_MUTEX(un));
19635 				return (EIO);
19636 			}
19637 			mutex_enter(SD_MUTEX(un));
19638 			/* FALLTHROUGH */
19639 		case DKIOCREMOVABLE:
19640 		case DKIOCHOTPLUGGABLE:
19641 		case DKIOCINFO:
19642 		case DKIOCGMEDIAINFO:
19643 		case MHIOCENFAILFAST:
19644 		case MHIOCSTATUS:
19645 		case MHIOCTKOWN:
19646 		case MHIOCRELEASE:
19647 		case MHIOCGRP_INKEYS:
19648 		case MHIOCGRP_INRESV:
19649 		case MHIOCGRP_REGISTER:
19650 		case MHIOCGRP_RESERVE:
19651 		case MHIOCGRP_PREEMPTANDABORT:
19652 		case MHIOCGRP_REGISTERANDIGNOREKEY:
19653 		case CDROMCLOSETRAY:
19654 		case USCSICMD:
19655 			goto skip_ready_valid;
19656 		default:
19657 			break;
19658 		}
19659 
19660 		mutex_exit(SD_MUTEX(un));
19661 		err = sd_ready_and_valid(un);
19662 		mutex_enter(SD_MUTEX(un));
19663 
19664 		if (err != SD_READY_VALID) {
19665 			switch (cmd) {
19666 			case DKIOCSTATE:
19667 			case CDROMGDRVSPEED:
19668 			case CDROMSDRVSPEED:
19669 			case FDEJECT:	/* for eject command */
19670 			case DKIOCEJECT:
19671 			case CDROMEJECT:
19672 			case DKIOCREMOVABLE:
19673 			case DKIOCHOTPLUGGABLE:
19674 				break;
19675 			default:
19676 				if (un->un_f_has_removable_media) {
19677 					err = ENXIO;
19678 				} else {
19679 				/* Do not map SD_RESERVED_BY_OTHERS to EIO */
19680 					if (err == SD_RESERVED_BY_OTHERS) {
19681 						err = EACCES;
19682 					} else {
19683 						err = EIO;
19684 					}
19685 				}
19686 				un->un_ncmds_in_driver--;
19687 				ASSERT(un->un_ncmds_in_driver >= 0);
19688 				mutex_exit(SD_MUTEX(un));
19689 				return (err);
19690 			}
19691 		}
19692 	}
19693 
19694 skip_ready_valid:
19695 	mutex_exit(SD_MUTEX(un));
19696 
19697 	switch (cmd) {
19698 	case DKIOCINFO:
19699 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
19700 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
19701 		break;
19702 
19703 	case DKIOCGMEDIAINFO:
19704 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
19705 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
19706 		break;
19707 
19708 	case DKIOCGGEOM:
19709 	case DKIOCGVTOC:
19710 	case DKIOCGAPART:
19711 	case DKIOCPARTINFO:
19712 	case DKIOCSGEOM:
19713 	case DKIOCSAPART:
19714 	case DKIOCGETEFI:
19715 	case DKIOCPARTITION:
19716 	case DKIOCSVTOC:
19717 	case DKIOCSETEFI:
19718 	case DKIOCGMBOOT:
19719 	case DKIOCSMBOOT:
19720 	case DKIOCG_PHYGEOM:
19721 	case DKIOCG_VIRTGEOM:
19722 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOC %d\n", cmd);
19723 
19724 		/* TUR should spin up */
19725 
19726 		if (un->un_f_has_removable_media)
19727 			err = sd_send_scsi_TEST_UNIT_READY(un,
19728 			    SD_CHECK_FOR_MEDIA);
19729 		else
19730 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
19731 
19732 		if (err != 0)
19733 			break;
19734 
19735 		err = cmlb_ioctl(un->un_cmlbhandle, dev,
19736 		    cmd, arg, flag, cred_p, rval_p, (void *)SD_PATH_DIRECT);
19737 
19738 		if ((err == 0) &&
19739 		    ((cmd == DKIOCSETEFI) ||
19740 		    (un->un_f_pkstats_enabled) &&
19741 		    (cmd == DKIOCSAPART || cmd == DKIOCSVTOC))) {
19742 
19743 			tmprval = cmlb_validate(un->un_cmlbhandle, CMLB_SILENT,
19744 			    (void *)SD_PATH_DIRECT);
19745 			if ((tmprval == 0) && un->un_f_pkstats_enabled) {
19746 				sd_set_pstats(un);
19747 				SD_TRACE(SD_LOG_IO_PARTITION, un,
19748 				    "sd_ioctl: un:0x%p pstats created and "
19749 				    "set\n", un);
19750 			}
19751 		}
19752 
19753 		if ((cmd == DKIOCSVTOC) ||
19754 		    ((cmd == DKIOCSETEFI) && (tmprval == 0))) {
19755 
19756 			mutex_enter(SD_MUTEX(un));
19757 			if (un->un_f_devid_supported &&
19758 			    (un->un_f_opt_fab_devid == TRUE)) {
19759 				if (un->un_devid == NULL) {
19760 					sd_register_devid(un, SD_DEVINFO(un),
19761 					    SD_TARGET_IS_UNRESERVED);
19762 				} else {
19763 					/*
19764 					 * The device id for this disk
19765 					 * has been fabricated. The
19766 					 * device id must be preserved
19767 					 * by writing it back out to
19768 					 * disk.
19769 					 */
19770 					if (sd_write_deviceid(un) != 0) {
19771 						ddi_devid_free(un->un_devid);
19772 						un->un_devid = NULL;
19773 					}
19774 				}
19775 			}
19776 			mutex_exit(SD_MUTEX(un));
19777 		}
19778 
19779 		break;
19780 
19781 	case DKIOCLOCK:
19782 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
19783 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
19784 		    SD_PATH_STANDARD);
19785 		break;
19786 
19787 	case DKIOCUNLOCK:
19788 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
19789 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
19790 		    SD_PATH_STANDARD);
19791 		break;
19792 
19793 	case DKIOCSTATE: {
19794 		enum dkio_state		state;
19795 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
19796 
19797 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
19798 			err = EFAULT;
19799 		} else {
19800 			err = sd_check_media(dev, state);
19801 			if (err == 0) {
19802 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
19803 				    sizeof (int), flag) != 0)
19804 					err = EFAULT;
19805 			}
19806 		}
19807 		break;
19808 	}
19809 
19810 	case DKIOCREMOVABLE:
19811 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
19812 		i = un->un_f_has_removable_media ? 1 : 0;
19813 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
19814 			err = EFAULT;
19815 		} else {
19816 			err = 0;
19817 		}
19818 		break;
19819 
19820 	case DKIOCHOTPLUGGABLE:
19821 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
19822 		i = un->un_f_is_hotpluggable ? 1 : 0;
19823 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
19824 			err = EFAULT;
19825 		} else {
19826 			err = 0;
19827 		}
19828 		break;
19829 
19830 	case DKIOCGTEMPERATURE:
19831 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
19832 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
19833 		break;
19834 
19835 	case MHIOCENFAILFAST:
19836 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
19837 		if ((err = drv_priv(cred_p)) == 0) {
19838 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
19839 		}
19840 		break;
19841 
19842 	case MHIOCTKOWN:
19843 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
19844 		if ((err = drv_priv(cred_p)) == 0) {
19845 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
19846 		}
19847 		break;
19848 
19849 	case MHIOCRELEASE:
19850 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
19851 		if ((err = drv_priv(cred_p)) == 0) {
19852 			err = sd_mhdioc_release(dev);
19853 		}
19854 		break;
19855 
19856 	case MHIOCSTATUS:
19857 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
19858 		if ((err = drv_priv(cred_p)) == 0) {
19859 			switch (sd_send_scsi_TEST_UNIT_READY(un, 0)) {
19860 			case 0:
19861 				err = 0;
19862 				break;
19863 			case EACCES:
19864 				*rval_p = 1;
19865 				err = 0;
19866 				break;
19867 			default:
19868 				err = EIO;
19869 				break;
19870 			}
19871 		}
19872 		break;
19873 
19874 	case MHIOCQRESERVE:
19875 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
19876 		if ((err = drv_priv(cred_p)) == 0) {
19877 			err = sd_reserve_release(dev, SD_RESERVE);
19878 		}
19879 		break;
19880 
19881 	case MHIOCREREGISTERDEVID:
19882 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
19883 		if (drv_priv(cred_p) == EPERM) {
19884 			err = EPERM;
19885 		} else if (!un->un_f_devid_supported) {
19886 			err = ENOTTY;
19887 		} else {
19888 			err = sd_mhdioc_register_devid(dev);
19889 		}
19890 		break;
19891 
19892 	case MHIOCGRP_INKEYS:
19893 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
19894 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
19895 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19896 				err = ENOTSUP;
19897 			} else {
19898 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
19899 				    flag);
19900 			}
19901 		}
19902 		break;
19903 
19904 	case MHIOCGRP_INRESV:
19905 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
19906 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
19907 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19908 				err = ENOTSUP;
19909 			} else {
19910 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
19911 			}
19912 		}
19913 		break;
19914 
19915 	case MHIOCGRP_REGISTER:
19916 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
19917 		if ((err = drv_priv(cred_p)) != EPERM) {
19918 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19919 				err = ENOTSUP;
19920 			} else if (arg != NULL) {
19921 				mhioc_register_t reg;
19922 				if (ddi_copyin((void *)arg, &reg,
19923 				    sizeof (mhioc_register_t), flag) != 0) {
19924 					err = EFAULT;
19925 				} else {
19926 					err =
19927 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19928 					    un, SD_SCSI3_REGISTER,
19929 					    (uchar_t *)&reg);
19930 				}
19931 			}
19932 		}
19933 		break;
19934 
19935 	case MHIOCGRP_RESERVE:
19936 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
19937 		if ((err = drv_priv(cred_p)) != EPERM) {
19938 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19939 				err = ENOTSUP;
19940 			} else if (arg != NULL) {
19941 				mhioc_resv_desc_t resv_desc;
19942 				if (ddi_copyin((void *)arg, &resv_desc,
19943 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
19944 					err = EFAULT;
19945 				} else {
19946 					err =
19947 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19948 					    un, SD_SCSI3_RESERVE,
19949 					    (uchar_t *)&resv_desc);
19950 				}
19951 			}
19952 		}
19953 		break;
19954 
19955 	case MHIOCGRP_PREEMPTANDABORT:
19956 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
19957 		if ((err = drv_priv(cred_p)) != EPERM) {
19958 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19959 				err = ENOTSUP;
19960 			} else if (arg != NULL) {
19961 				mhioc_preemptandabort_t preempt_abort;
19962 				if (ddi_copyin((void *)arg, &preempt_abort,
19963 				    sizeof (mhioc_preemptandabort_t),
19964 				    flag) != 0) {
19965 					err = EFAULT;
19966 				} else {
19967 					err =
19968 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19969 					    un, SD_SCSI3_PREEMPTANDABORT,
19970 					    (uchar_t *)&preempt_abort);
19971 				}
19972 			}
19973 		}
19974 		break;
19975 
19976 	case MHIOCGRP_REGISTERANDIGNOREKEY:
19977 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTERANDIGNOREKEY\n");
19978 		if ((err = drv_priv(cred_p)) != EPERM) {
19979 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
19980 				err = ENOTSUP;
19981 			} else if (arg != NULL) {
19982 				mhioc_registerandignorekey_t r_and_i;
19983 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
19984 				    sizeof (mhioc_registerandignorekey_t),
19985 				    flag) != 0) {
19986 					err = EFAULT;
19987 				} else {
19988 					err =
19989 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
19990 					    un, SD_SCSI3_REGISTERANDIGNOREKEY,
19991 					    (uchar_t *)&r_and_i);
19992 				}
19993 			}
19994 		}
19995 		break;
19996 
19997 	case USCSICMD:
19998 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
19999 		cr = ddi_get_cred();
20000 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
20001 			err = EPERM;
20002 		} else {
20003 			enum uio_seg	uioseg;
20004 			uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE :
20005 			    UIO_USERSPACE;
20006 			if (un->un_f_format_in_progress == TRUE) {
20007 				err = EAGAIN;
20008 				break;
20009 			}
20010 			err = sd_send_scsi_cmd(dev, (struct uscsi_cmd *)arg,
20011 			    flag, uioseg, SD_PATH_STANDARD);
20012 		}
20013 		break;
20014 
20015 	case CDROMPAUSE:
20016 	case CDROMRESUME:
20017 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
20018 		if (!ISCD(un)) {
20019 			err = ENOTTY;
20020 		} else {
20021 			err = sr_pause_resume(dev, cmd);
20022 		}
20023 		break;
20024 
20025 	case CDROMPLAYMSF:
20026 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
20027 		if (!ISCD(un)) {
20028 			err = ENOTTY;
20029 		} else {
20030 			err = sr_play_msf(dev, (caddr_t)arg, flag);
20031 		}
20032 		break;
20033 
20034 	case CDROMPLAYTRKIND:
20035 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
20036 #if defined(__i386) || defined(__amd64)
20037 		/*
20038 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
20039 		 */
20040 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
20041 #else
20042 		if (!ISCD(un)) {
20043 #endif
20044 			err = ENOTTY;
20045 		} else {
20046 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
20047 		}
20048 		break;
20049 
20050 	case CDROMREADTOCHDR:
20051 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
20052 		if (!ISCD(un)) {
20053 			err = ENOTTY;
20054 		} else {
20055 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
20056 		}
20057 		break;
20058 
20059 	case CDROMREADTOCENTRY:
20060 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
20061 		if (!ISCD(un)) {
20062 			err = ENOTTY;
20063 		} else {
20064 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
20065 		}
20066 		break;
20067 
20068 	case CDROMSTOP:
20069 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
20070 		if (!ISCD(un)) {
20071 			err = ENOTTY;
20072 		} else {
20073 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_STOP,
20074 			    SD_PATH_STANDARD);
20075 		}
20076 		break;
20077 
20078 	case CDROMSTART:
20079 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
20080 		if (!ISCD(un)) {
20081 			err = ENOTTY;
20082 		} else {
20083 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
20084 			    SD_PATH_STANDARD);
20085 		}
20086 		break;
20087 
20088 	case CDROMCLOSETRAY:
20089 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
20090 		if (!ISCD(un)) {
20091 			err = ENOTTY;
20092 		} else {
20093 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_CLOSE,
20094 			    SD_PATH_STANDARD);
20095 		}
20096 		break;
20097 
20098 	case FDEJECT:	/* for eject command */
20099 	case DKIOCEJECT:
20100 	case CDROMEJECT:
20101 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
20102 		if (!un->un_f_eject_media_supported) {
20103 			err = ENOTTY;
20104 		} else {
20105 			err = sr_eject(dev);
20106 		}
20107 		break;
20108 
20109 	case CDROMVOLCTRL:
20110 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
20111 		if (!ISCD(un)) {
20112 			err = ENOTTY;
20113 		} else {
20114 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
20115 		}
20116 		break;
20117 
20118 	case CDROMSUBCHNL:
20119 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
20120 		if (!ISCD(un)) {
20121 			err = ENOTTY;
20122 		} else {
20123 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
20124 		}
20125 		break;
20126 
20127 	case CDROMREADMODE2:
20128 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
20129 		if (!ISCD(un)) {
20130 			err = ENOTTY;
20131 		} else if (un->un_f_cfg_is_atapi == TRUE) {
20132 			/*
20133 			 * If the drive supports READ CD, use that instead of
20134 			 * switching the LBA size via a MODE SELECT
20135 			 * Block Descriptor
20136 			 */
20137 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
20138 		} else {
20139 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
20140 		}
20141 		break;
20142 
20143 	case CDROMREADMODE1:
20144 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
20145 		if (!ISCD(un)) {
20146 			err = ENOTTY;
20147 		} else {
20148 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
20149 		}
20150 		break;
20151 
20152 	case CDROMREADOFFSET:
20153 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
20154 		if (!ISCD(un)) {
20155 			err = ENOTTY;
20156 		} else {
20157 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
20158 			    flag);
20159 		}
20160 		break;
20161 
20162 	case CDROMSBLKMODE:
20163 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
20164 		/*
20165 		 * There is no means of changing block size in case of atapi
20166 		 * drives, thus return ENOTTY if drive type is atapi
20167 		 */
20168 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
20169 			err = ENOTTY;
20170 		} else if (un->un_f_mmc_cap == TRUE) {
20171 
20172 			/*
20173 			 * MMC Devices do not support changing the
20174 			 * logical block size
20175 			 *
20176 			 * Note: EINVAL is being returned instead of ENOTTY to
20177 			 * maintain consistancy with the original mmc
20178 			 * driver update.
20179 			 */
20180 			err = EINVAL;
20181 		} else {
20182 			mutex_enter(SD_MUTEX(un));
20183 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
20184 			    (un->un_ncmds_in_transport > 0)) {
20185 				mutex_exit(SD_MUTEX(un));
20186 				err = EINVAL;
20187 			} else {
20188 				mutex_exit(SD_MUTEX(un));
20189 				err = sr_change_blkmode(dev, cmd, arg, flag);
20190 			}
20191 		}
20192 		break;
20193 
20194 	case CDROMGBLKMODE:
20195 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
20196 		if (!ISCD(un)) {
20197 			err = ENOTTY;
20198 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
20199 		    (un->un_f_blockcount_is_valid != FALSE)) {
20200 			/*
20201 			 * Drive is an ATAPI drive so return target block
20202 			 * size for ATAPI drives since we cannot change the
20203 			 * blocksize on ATAPI drives. Used primarily to detect
20204 			 * if an ATAPI cdrom is present.
20205 			 */
20206 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
20207 			    sizeof (int), flag) != 0) {
20208 				err = EFAULT;
20209 			} else {
20210 				err = 0;
20211 			}
20212 
20213 		} else {
20214 			/*
20215 			 * Drive supports changing block sizes via a Mode
20216 			 * Select.
20217 			 */
20218 			err = sr_change_blkmode(dev, cmd, arg, flag);
20219 		}
20220 		break;
20221 
20222 	case CDROMGDRVSPEED:
20223 	case CDROMSDRVSPEED:
20224 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
20225 		if (!ISCD(un)) {
20226 			err = ENOTTY;
20227 		} else if (un->un_f_mmc_cap == TRUE) {
20228 			/*
20229 			 * Note: In the future the driver implementation
20230 			 * for getting and
20231 			 * setting cd speed should entail:
20232 			 * 1) If non-mmc try the Toshiba mode page
20233 			 *    (sr_change_speed)
20234 			 * 2) If mmc but no support for Real Time Streaming try
20235 			 *    the SET CD SPEED (0xBB) command
20236 			 *   (sr_atapi_change_speed)
20237 			 * 3) If mmc and support for Real Time Streaming
20238 			 *    try the GET PERFORMANCE and SET STREAMING
20239 			 *    commands (not yet implemented, 4380808)
20240 			 */
20241 			/*
20242 			 * As per recent MMC spec, CD-ROM speed is variable
20243 			 * and changes with LBA. Since there is no such
20244 			 * things as drive speed now, fail this ioctl.
20245 			 *
20246 			 * Note: EINVAL is returned for consistancy of original
20247 			 * implementation which included support for getting
20248 			 * the drive speed of mmc devices but not setting
20249 			 * the drive speed. Thus EINVAL would be returned
20250 			 * if a set request was made for an mmc device.
20251 			 * We no longer support get or set speed for
20252 			 * mmc but need to remain consistent with regard
20253 			 * to the error code returned.
20254 			 */
20255 			err = EINVAL;
20256 		} else if (un->un_f_cfg_is_atapi == TRUE) {
20257 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
20258 		} else {
20259 			err = sr_change_speed(dev, cmd, arg, flag);
20260 		}
20261 		break;
20262 
20263 	case CDROMCDDA:
20264 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
20265 		if (!ISCD(un)) {
20266 			err = ENOTTY;
20267 		} else {
20268 			err = sr_read_cdda(dev, (void *)arg, flag);
20269 		}
20270 		break;
20271 
20272 	case CDROMCDXA:
20273 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
20274 		if (!ISCD(un)) {
20275 			err = ENOTTY;
20276 		} else {
20277 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
20278 		}
20279 		break;
20280 
20281 	case CDROMSUBCODE:
20282 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
20283 		if (!ISCD(un)) {
20284 			err = ENOTTY;
20285 		} else {
20286 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
20287 		}
20288 		break;
20289 
20290 
20291 #ifdef SDDEBUG
20292 /* RESET/ABORTS testing ioctls */
20293 	case DKIOCRESET: {
20294 		int	reset_level;
20295 
20296 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
20297 			err = EFAULT;
20298 		} else {
20299 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
20300 			    "reset_level = 0x%lx\n", reset_level);
20301 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
20302 				err = 0;
20303 			} else {
20304 				err = EIO;
20305 			}
20306 		}
20307 		break;
20308 	}
20309 
20310 	case DKIOCABORT:
20311 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
20312 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
20313 			err = 0;
20314 		} else {
20315 			err = EIO;
20316 		}
20317 		break;
20318 #endif
20319 
20320 #ifdef SD_FAULT_INJECTION
20321 /* SDIOC FaultInjection testing ioctls */
20322 	case SDIOCSTART:
20323 	case SDIOCSTOP:
20324 	case SDIOCINSERTPKT:
20325 	case SDIOCINSERTXB:
20326 	case SDIOCINSERTUN:
20327 	case SDIOCINSERTARQ:
20328 	case SDIOCPUSH:
20329 	case SDIOCRETRIEVE:
20330 	case SDIOCRUN:
20331 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
20332 		    "SDIOC detected cmd:0x%X:\n", cmd);
20333 		/* call error generator */
20334 		sd_faultinjection_ioctl(cmd, arg, un);
20335 		err = 0;
20336 		break;
20337 
20338 #endif /* SD_FAULT_INJECTION */
20339 
20340 	case DKIOCFLUSHWRITECACHE:
20341 		{
20342 			struct dk_callback *dkc = (struct dk_callback *)arg;
20343 
20344 			mutex_enter(SD_MUTEX(un));
20345 			if (!un->un_f_sync_cache_supported ||
20346 			    !un->un_f_write_cache_enabled) {
20347 				err = un->un_f_sync_cache_supported ?
20348 				    0 : ENOTSUP;
20349 				mutex_exit(SD_MUTEX(un));
20350 				if ((flag & FKIOCTL) && dkc != NULL &&
20351 				    dkc->dkc_callback != NULL) {
20352 					(*dkc->dkc_callback)(dkc->dkc_cookie,
20353 					    err);
20354 					/*
20355 					 * Did callback and reported error.
20356 					 * Since we did a callback, ioctl
20357 					 * should return 0.
20358 					 */
20359 					err = 0;
20360 				}
20361 				break;
20362 			}
20363 			mutex_exit(SD_MUTEX(un));
20364 
20365 			if ((flag & FKIOCTL) && dkc != NULL &&
20366 			    dkc->dkc_callback != NULL) {
20367 				/* async SYNC CACHE request */
20368 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
20369 			} else {
20370 				/* synchronous SYNC CACHE request */
20371 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
20372 			}
20373 		}
20374 		break;
20375 
20376 	case DKIOCGETWCE: {
20377 
20378 		int wce;
20379 
20380 		if ((err = sd_get_write_cache_enabled(un, &wce)) != 0) {
20381 			break;
20382 		}
20383 
20384 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
20385 			err = EFAULT;
20386 		}
20387 		break;
20388 	}
20389 
20390 	case DKIOCSETWCE: {
20391 
20392 		int wce, sync_supported;
20393 
20394 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
20395 			err = EFAULT;
20396 			break;
20397 		}
20398 
20399 		/*
20400 		 * Synchronize multiple threads trying to enable
20401 		 * or disable the cache via the un_f_wcc_cv
20402 		 * condition variable.
20403 		 */
20404 		mutex_enter(SD_MUTEX(un));
20405 
20406 		/*
20407 		 * Don't allow the cache to be enabled if the
20408 		 * config file has it disabled.
20409 		 */
20410 		if (un->un_f_opt_disable_cache && wce) {
20411 			mutex_exit(SD_MUTEX(un));
20412 			err = EINVAL;
20413 			break;
20414 		}
20415 
20416 		/*
20417 		 * Wait for write cache change in progress
20418 		 * bit to be clear before proceeding.
20419 		 */
20420 		while (un->un_f_wcc_inprog)
20421 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
20422 
20423 		un->un_f_wcc_inprog = 1;
20424 
20425 		if (un->un_f_write_cache_enabled && wce == 0) {
20426 			/*
20427 			 * Disable the write cache.  Don't clear
20428 			 * un_f_write_cache_enabled until after
20429 			 * the mode select and flush are complete.
20430 			 */
20431 			sync_supported = un->un_f_sync_cache_supported;
20432 
20433 			/*
20434 			 * If cache flush is suppressed, we assume that the
20435 			 * controller firmware will take care of managing the
20436 			 * write cache for us: no need to explicitly
20437 			 * disable it.
20438 			 */
20439 			if (!un->un_f_suppress_cache_flush) {
20440 				mutex_exit(SD_MUTEX(un));
20441 				if ((err = sd_cache_control(un,
20442 				    SD_CACHE_NOCHANGE,
20443 				    SD_CACHE_DISABLE)) == 0 &&
20444 				    sync_supported) {
20445 					err = sd_send_scsi_SYNCHRONIZE_CACHE(un,
20446 					    NULL);
20447 				}
20448 			} else {
20449 				mutex_exit(SD_MUTEX(un));
20450 			}
20451 
20452 			mutex_enter(SD_MUTEX(un));
20453 			if (err == 0) {
20454 				un->un_f_write_cache_enabled = 0;
20455 			}
20456 
20457 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
20458 			/*
20459 			 * Set un_f_write_cache_enabled first, so there is
20460 			 * no window where the cache is enabled, but the
20461 			 * bit says it isn't.
20462 			 */
20463 			un->un_f_write_cache_enabled = 1;
20464 
20465 			/*
20466 			 * If cache flush is suppressed, we assume that the
20467 			 * controller firmware will take care of managing the
20468 			 * write cache for us: no need to explicitly
20469 			 * enable it.
20470 			 */
20471 			if (!un->un_f_suppress_cache_flush) {
20472 				mutex_exit(SD_MUTEX(un));
20473 				err = sd_cache_control(un, SD_CACHE_NOCHANGE,
20474 				    SD_CACHE_ENABLE);
20475 			} else {
20476 				mutex_exit(SD_MUTEX(un));
20477 			}
20478 
20479 			mutex_enter(SD_MUTEX(un));
20480 
20481 			if (err) {
20482 				un->un_f_write_cache_enabled = 0;
20483 			}
20484 		}
20485 
20486 		un->un_f_wcc_inprog = 0;
20487 		cv_broadcast(&un->un_wcc_cv);
20488 		mutex_exit(SD_MUTEX(un));
20489 		break;
20490 	}
20491 
20492 	default:
20493 		err = ENOTTY;
20494 		break;
20495 	}
20496 	mutex_enter(SD_MUTEX(un));
20497 	un->un_ncmds_in_driver--;
20498 	ASSERT(un->un_ncmds_in_driver >= 0);
20499 	mutex_exit(SD_MUTEX(un));
20500 
20501 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
20502 	return (err);
20503 }
20504 
20505 
20506 /*
20507  *    Function: sd_dkio_ctrl_info
20508  *
20509  * Description: This routine is the driver entry point for handling controller
20510  *		information ioctl requests (DKIOCINFO).
20511  *
20512  *   Arguments: dev  - the device number
20513  *		arg  - pointer to user provided dk_cinfo structure
20514  *		       specifying the controller type and attributes.
20515  *		flag - this argument is a pass through to ddi_copyxxx()
20516  *		       directly from the mode argument of ioctl().
20517  *
20518  * Return Code: 0
20519  *		EFAULT
20520  *		ENXIO
20521  */
20522 
20523 static int
20524 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
20525 {
20526 	struct sd_lun	*un = NULL;
20527 	struct dk_cinfo	*info;
20528 	dev_info_t	*pdip;
20529 	int		lun, tgt;
20530 
20531 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20532 		return (ENXIO);
20533 	}
20534 
20535 	info = (struct dk_cinfo *)
20536 	    kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
20537 
20538 	switch (un->un_ctype) {
20539 	case CTYPE_CDROM:
20540 		info->dki_ctype = DKC_CDROM;
20541 		break;
20542 	default:
20543 		info->dki_ctype = DKC_SCSI_CCS;
20544 		break;
20545 	}
20546 	pdip = ddi_get_parent(SD_DEVINFO(un));
20547 	info->dki_cnum = ddi_get_instance(pdip);
20548 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
20549 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
20550 	} else {
20551 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
20552 		    DK_DEVLEN - 1);
20553 	}
20554 
20555 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
20556 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
20557 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
20558 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
20559 
20560 	/* Unit Information */
20561 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
20562 	info->dki_slave = ((tgt << 3) | lun);
20563 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
20564 	    DK_DEVLEN - 1);
20565 	info->dki_flags = DKI_FMTVOL;
20566 	info->dki_partition = SDPART(dev);
20567 
20568 	/* Max Transfer size of this device in blocks */
20569 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
20570 	info->dki_addr = 0;
20571 	info->dki_space = 0;
20572 	info->dki_prio = 0;
20573 	info->dki_vec = 0;
20574 
20575 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
20576 		kmem_free(info, sizeof (struct dk_cinfo));
20577 		return (EFAULT);
20578 	} else {
20579 		kmem_free(info, sizeof (struct dk_cinfo));
20580 		return (0);
20581 	}
20582 }
20583 
20584 
20585 /*
20586  *    Function: sd_get_media_info
20587  *
20588  * Description: This routine is the driver entry point for handling ioctl
20589  *		requests for the media type or command set profile used by the
20590  *		drive to operate on the media (DKIOCGMEDIAINFO).
20591  *
20592  *   Arguments: dev	- the device number
20593  *		arg	- pointer to user provided dk_minfo structure
20594  *			  specifying the media type, logical block size and
20595  *			  drive capacity.
20596  *		flag	- this argument is a pass through to ddi_copyxxx()
20597  *			  directly from the mode argument of ioctl().
20598  *
20599  * Return Code: 0
20600  *		EACCESS
20601  *		EFAULT
20602  *		ENXIO
20603  *		EIO
20604  */
20605 
20606 static int
20607 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
20608 {
20609 	struct sd_lun		*un = NULL;
20610 	struct uscsi_cmd	com;
20611 	struct scsi_inquiry	*sinq;
20612 	struct dk_minfo		media_info;
20613 	u_longlong_t		media_capacity;
20614 	uint64_t		capacity;
20615 	uint_t			lbasize;
20616 	uchar_t			*out_data;
20617 	uchar_t			*rqbuf;
20618 	int			rval = 0;
20619 	int			rtn;
20620 
20621 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
20622 	    (un->un_state == SD_STATE_OFFLINE)) {
20623 		return (ENXIO);
20624 	}
20625 
20626 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
20627 
20628 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
20629 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
20630 
20631 	/* Issue a TUR to determine if the drive is ready with media present */
20632 	rval = sd_send_scsi_TEST_UNIT_READY(un, SD_CHECK_FOR_MEDIA);
20633 	if (rval == ENXIO) {
20634 		goto done;
20635 	}
20636 
20637 	/* Now get configuration data */
20638 	if (ISCD(un)) {
20639 		media_info.dki_media_type = DK_CDROM;
20640 
20641 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
20642 		if (un->un_f_mmc_cap == TRUE) {
20643 			rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf,
20644 			    SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN,
20645 			    SD_PATH_STANDARD);
20646 
20647 			if (rtn) {
20648 				/*
20649 				 * Failed for other than an illegal request
20650 				 * or command not supported
20651 				 */
20652 				if ((com.uscsi_status == STATUS_CHECK) &&
20653 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
20654 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
20655 					    (rqbuf[12] != 0x20)) {
20656 						rval = EIO;
20657 						goto done;
20658 					}
20659 				}
20660 			} else {
20661 				/*
20662 				 * The GET CONFIGURATION command succeeded
20663 				 * so set the media type according to the
20664 				 * returned data
20665 				 */
20666 				media_info.dki_media_type = out_data[6];
20667 				media_info.dki_media_type <<= 8;
20668 				media_info.dki_media_type |= out_data[7];
20669 			}
20670 		}
20671 	} else {
20672 		/*
20673 		 * The profile list is not available, so we attempt to identify
20674 		 * the media type based on the inquiry data
20675 		 */
20676 		sinq = un->un_sd->sd_inq;
20677 		if ((sinq->inq_dtype == DTYPE_DIRECT) ||
20678 		    (sinq->inq_dtype == DTYPE_OPTICAL)) {
20679 			/* This is a direct access device  or optical disk */
20680 			media_info.dki_media_type = DK_FIXED_DISK;
20681 
20682 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
20683 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
20684 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
20685 					media_info.dki_media_type = DK_ZIP;
20686 				} else if (
20687 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
20688 					media_info.dki_media_type = DK_JAZ;
20689 				}
20690 			}
20691 		} else {
20692 			/*
20693 			 * Not a CD, direct access or optical disk so return
20694 			 * unknown media
20695 			 */
20696 			media_info.dki_media_type = DK_UNKNOWN;
20697 		}
20698 	}
20699 
20700 	/* Now read the capacity so we can provide the lbasize and capacity */
20701 	switch (sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
20702 	    SD_PATH_DIRECT)) {
20703 	case 0:
20704 		break;
20705 	case EACCES:
20706 		rval = EACCES;
20707 		goto done;
20708 	default:
20709 		rval = EIO;
20710 		goto done;
20711 	}
20712 
20713 	media_info.dki_lbsize = lbasize;
20714 	media_capacity = capacity;
20715 
20716 	/*
20717 	 * sd_send_scsi_READ_CAPACITY() reports capacity in
20718 	 * un->un_sys_blocksize chunks. So we need to convert it into
20719 	 * cap.lbasize chunks.
20720 	 */
20721 	media_capacity *= un->un_sys_blocksize;
20722 	media_capacity /= lbasize;
20723 	media_info.dki_capacity = media_capacity;
20724 
20725 	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
20726 		rval = EFAULT;
20727 		/* Put goto. Anybody might add some code below in future */
20728 		goto done;
20729 	}
20730 done:
20731 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
20732 	kmem_free(rqbuf, SENSE_LENGTH);
20733 	return (rval);
20734 }
20735 
20736 
20737 /*
20738  *    Function: sd_check_media
20739  *
20740  * Description: This utility routine implements the functionality for the
20741  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
20742  *		driver state changes from that specified by the user
20743  *		(inserted or ejected). For example, if the user specifies
20744  *		DKIO_EJECTED and the current media state is inserted this
20745  *		routine will immediately return DKIO_INSERTED. However, if the
20746  *		current media state is not inserted the user thread will be
20747  *		blocked until the drive state changes. If DKIO_NONE is specified
20748  *		the user thread will block until a drive state change occurs.
20749  *
20750  *   Arguments: dev  - the device number
20751  *		state  - user pointer to a dkio_state, updated with the current
20752  *			drive state at return.
20753  *
20754  * Return Code: ENXIO
20755  *		EIO
20756  *		EAGAIN
20757  *		EINTR
20758  */
20759 
20760 static int
20761 sd_check_media(dev_t dev, enum dkio_state state)
20762 {
20763 	struct sd_lun		*un = NULL;
20764 	enum dkio_state		prev_state;
20765 	opaque_t		token = NULL;
20766 	int			rval = 0;
20767 
20768 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20769 		return (ENXIO);
20770 	}
20771 
20772 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
20773 
20774 	mutex_enter(SD_MUTEX(un));
20775 
20776 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
20777 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
20778 
20779 	prev_state = un->un_mediastate;
20780 
20781 	/* is there anything to do? */
20782 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
20783 		/*
20784 		 * submit the request to the scsi_watch service;
20785 		 * scsi_media_watch_cb() does the real work
20786 		 */
20787 		mutex_exit(SD_MUTEX(un));
20788 
20789 		/*
20790 		 * This change handles the case where a scsi watch request is
20791 		 * added to a device that is powered down. To accomplish this
20792 		 * we power up the device before adding the scsi watch request,
20793 		 * since the scsi watch sends a TUR directly to the device
20794 		 * which the device cannot handle if it is powered down.
20795 		 */
20796 		if (sd_pm_entry(un) != DDI_SUCCESS) {
20797 			mutex_enter(SD_MUTEX(un));
20798 			goto done;
20799 		}
20800 
20801 		token = scsi_watch_request_submit(SD_SCSI_DEVP(un),
20802 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
20803 		    (caddr_t)dev);
20804 
20805 		sd_pm_exit(un);
20806 
20807 		mutex_enter(SD_MUTEX(un));
20808 		if (token == NULL) {
20809 			rval = EAGAIN;
20810 			goto done;
20811 		}
20812 
20813 		/*
20814 		 * This is a special case IOCTL that doesn't return
20815 		 * until the media state changes. Routine sdpower
20816 		 * knows about and handles this so don't count it
20817 		 * as an active cmd in the driver, which would
20818 		 * keep the device busy to the pm framework.
20819 		 * If the count isn't decremented the device can't
20820 		 * be powered down.
20821 		 */
20822 		un->un_ncmds_in_driver--;
20823 		ASSERT(un->un_ncmds_in_driver >= 0);
20824 
20825 		/*
20826 		 * if a prior request had been made, this will be the same
20827 		 * token, as scsi_watch was designed that way.
20828 		 */
20829 		un->un_swr_token = token;
20830 		un->un_specified_mediastate = state;
20831 
20832 		/*
20833 		 * now wait for media change
20834 		 * we will not be signalled unless mediastate == state but it is
20835 		 * still better to test for this condition, since there is a
20836 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
20837 		 */
20838 		SD_TRACE(SD_LOG_COMMON, un,
20839 		    "sd_check_media: waiting for media state change\n");
20840 		while (un->un_mediastate == state) {
20841 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
20842 				SD_TRACE(SD_LOG_COMMON, un,
20843 				    "sd_check_media: waiting for media state "
20844 				    "was interrupted\n");
20845 				un->un_ncmds_in_driver++;
20846 				rval = EINTR;
20847 				goto done;
20848 			}
20849 			SD_TRACE(SD_LOG_COMMON, un,
20850 			    "sd_check_media: received signal, state=%x\n",
20851 			    un->un_mediastate);
20852 		}
20853 		/*
20854 		 * Inc the counter to indicate the device once again
20855 		 * has an active outstanding cmd.
20856 		 */
20857 		un->un_ncmds_in_driver++;
20858 	}
20859 
20860 	/* invalidate geometry */
20861 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
20862 		sr_ejected(un);
20863 	}
20864 
20865 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
20866 		uint64_t	capacity;
20867 		uint_t		lbasize;
20868 
20869 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
20870 		mutex_exit(SD_MUTEX(un));
20871 		/*
20872 		 * Since the following routines use SD_PATH_DIRECT, we must
20873 		 * call PM directly before the upcoming disk accesses. This
20874 		 * may cause the disk to be power/spin up.
20875 		 */
20876 
20877 		if (sd_pm_entry(un) == DDI_SUCCESS) {
20878 			rval = sd_send_scsi_READ_CAPACITY(un,
20879 			    &capacity,
20880 			    &lbasize, SD_PATH_DIRECT);
20881 			if (rval != 0) {
20882 				sd_pm_exit(un);
20883 				mutex_enter(SD_MUTEX(un));
20884 				goto done;
20885 			}
20886 		} else {
20887 			rval = EIO;
20888 			mutex_enter(SD_MUTEX(un));
20889 			goto done;
20890 		}
20891 		mutex_enter(SD_MUTEX(un));
20892 
20893 		sd_update_block_info(un, lbasize, capacity);
20894 
20895 		/*
20896 		 *  Check if the media in the device is writable or not
20897 		 */
20898 		if (ISCD(un))
20899 			sd_check_for_writable_cd(un, SD_PATH_DIRECT);
20900 
20901 		mutex_exit(SD_MUTEX(un));
20902 		cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
20903 		if ((cmlb_validate(un->un_cmlbhandle, 0,
20904 		    (void *)SD_PATH_DIRECT) == 0) && un->un_f_pkstats_enabled) {
20905 			sd_set_pstats(un);
20906 			SD_TRACE(SD_LOG_IO_PARTITION, un,
20907 			    "sd_check_media: un:0x%p pstats created and "
20908 			    "set\n", un);
20909 		}
20910 
20911 		rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
20912 		    SD_PATH_DIRECT);
20913 		sd_pm_exit(un);
20914 
20915 		mutex_enter(SD_MUTEX(un));
20916 	}
20917 done:
20918 	un->un_f_watcht_stopped = FALSE;
20919 	if (un->un_swr_token) {
20920 		/*
20921 		 * Use of this local token and the mutex ensures that we avoid
20922 		 * some race conditions associated with terminating the
20923 		 * scsi watch.
20924 		 */
20925 		token = un->un_swr_token;
20926 		un->un_swr_token = (opaque_t)NULL;
20927 		mutex_exit(SD_MUTEX(un));
20928 		(void) scsi_watch_request_terminate(token,
20929 		    SCSI_WATCH_TERMINATE_WAIT);
20930 		mutex_enter(SD_MUTEX(un));
20931 	}
20932 
20933 	/*
20934 	 * Update the capacity kstat value, if no media previously
20935 	 * (capacity kstat is 0) and a media has been inserted
20936 	 * (un_f_blockcount_is_valid == TRUE)
20937 	 */
20938 	if (un->un_errstats) {
20939 		struct sd_errstats	*stp = NULL;
20940 
20941 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
20942 		if ((stp->sd_capacity.value.ui64 == 0) &&
20943 		    (un->un_f_blockcount_is_valid == TRUE)) {
20944 			stp->sd_capacity.value.ui64 =
20945 			    (uint64_t)((uint64_t)un->un_blockcount *
20946 			    un->un_sys_blocksize);
20947 		}
20948 	}
20949 	mutex_exit(SD_MUTEX(un));
20950 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
20951 	return (rval);
20952 }
20953 
20954 
20955 /*
20956  *    Function: sd_delayed_cv_broadcast
20957  *
20958  * Description: Delayed cv_broadcast to allow for target to recover from media
20959  *		insertion.
20960  *
20961  *   Arguments: arg - driver soft state (unit) structure
20962  */
20963 
20964 static void
20965 sd_delayed_cv_broadcast(void *arg)
20966 {
20967 	struct sd_lun *un = arg;
20968 
20969 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
20970 
20971 	mutex_enter(SD_MUTEX(un));
20972 	un->un_dcvb_timeid = NULL;
20973 	cv_broadcast(&un->un_state_cv);
20974 	mutex_exit(SD_MUTEX(un));
20975 }
20976 
20977 
20978 /*
20979  *    Function: sd_media_watch_cb
20980  *
20981  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
20982  *		routine processes the TUR sense data and updates the driver
20983  *		state if a transition has occurred. The user thread
20984  *		(sd_check_media) is then signalled.
20985  *
20986  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
20987  *			among multiple watches that share this callback function
20988  *		resultp - scsi watch facility result packet containing scsi
20989  *			  packet, status byte and sense data
20990  *
20991  * Return Code: 0 for success, -1 for failure
20992  */
20993 
20994 static int
20995 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
20996 {
20997 	struct sd_lun			*un;
20998 	struct scsi_status		*statusp = resultp->statusp;
20999 	uint8_t				*sensep = (uint8_t *)resultp->sensep;
21000 	enum dkio_state			state = DKIO_NONE;
21001 	dev_t				dev = (dev_t)arg;
21002 	uchar_t				actual_sense_length;
21003 	uint8_t				skey, asc, ascq;
21004 
21005 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21006 		return (-1);
21007 	}
21008 	actual_sense_length = resultp->actual_sense_length;
21009 
21010 	mutex_enter(SD_MUTEX(un));
21011 	SD_TRACE(SD_LOG_COMMON, un,
21012 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
21013 	    *((char *)statusp), (void *)sensep, actual_sense_length);
21014 
21015 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
21016 		un->un_mediastate = DKIO_DEV_GONE;
21017 		cv_broadcast(&un->un_state_cv);
21018 		mutex_exit(SD_MUTEX(un));
21019 
21020 		return (0);
21021 	}
21022 
21023 	/*
21024 	 * If there was a check condition then sensep points to valid sense data
21025 	 * If status was not a check condition but a reservation or busy status
21026 	 * then the new state is DKIO_NONE
21027 	 */
21028 	if (sensep != NULL) {
21029 		skey = scsi_sense_key(sensep);
21030 		asc = scsi_sense_asc(sensep);
21031 		ascq = scsi_sense_ascq(sensep);
21032 
21033 		SD_INFO(SD_LOG_COMMON, un,
21034 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
21035 		    skey, asc, ascq);
21036 		/* This routine only uses up to 13 bytes of sense data. */
21037 		if (actual_sense_length >= 13) {
21038 			if (skey == KEY_UNIT_ATTENTION) {
21039 				if (asc == 0x28) {
21040 					state = DKIO_INSERTED;
21041 				}
21042 			} else if (skey == KEY_NOT_READY) {
21043 				/*
21044 				 * if 02/04/02  means that the host
21045 				 * should send start command. Explicitly
21046 				 * leave the media state as is
21047 				 * (inserted) as the media is inserted
21048 				 * and host has stopped device for PM
21049 				 * reasons. Upon next true read/write
21050 				 * to this media will bring the
21051 				 * device to the right state good for
21052 				 * media access.
21053 				 */
21054 				if (asc == 0x3a) {
21055 					state = DKIO_EJECTED;
21056 				} else {
21057 					/*
21058 					 * If the drive is busy with an
21059 					 * operation or long write, keep the
21060 					 * media in an inserted state.
21061 					 */
21062 
21063 					if ((asc == 0x04) &&
21064 					    ((ascq == 0x02) ||
21065 					    (ascq == 0x07) ||
21066 					    (ascq == 0x08))) {
21067 						state = DKIO_INSERTED;
21068 					}
21069 				}
21070 			} else if (skey == KEY_NO_SENSE) {
21071 				if ((asc == 0x00) && (ascq == 0x00)) {
21072 					/*
21073 					 * Sense Data 00/00/00 does not provide
21074 					 * any information about the state of
21075 					 * the media. Ignore it.
21076 					 */
21077 					mutex_exit(SD_MUTEX(un));
21078 					return (0);
21079 				}
21080 			}
21081 		}
21082 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
21083 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
21084 		state = DKIO_INSERTED;
21085 	}
21086 
21087 	SD_TRACE(SD_LOG_COMMON, un,
21088 	    "sd_media_watch_cb: state=%x, specified=%x\n",
21089 	    state, un->un_specified_mediastate);
21090 
21091 	/*
21092 	 * now signal the waiting thread if this is *not* the specified state;
21093 	 * delay the signal if the state is DKIO_INSERTED to allow the target
21094 	 * to recover
21095 	 */
21096 	if (state != un->un_specified_mediastate) {
21097 		un->un_mediastate = state;
21098 		if (state == DKIO_INSERTED) {
21099 			/*
21100 			 * delay the signal to give the drive a chance
21101 			 * to do what it apparently needs to do
21102 			 */
21103 			SD_TRACE(SD_LOG_COMMON, un,
21104 			    "sd_media_watch_cb: delayed cv_broadcast\n");
21105 			if (un->un_dcvb_timeid == NULL) {
21106 				un->un_dcvb_timeid =
21107 				    timeout(sd_delayed_cv_broadcast, un,
21108 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
21109 			}
21110 		} else {
21111 			SD_TRACE(SD_LOG_COMMON, un,
21112 			    "sd_media_watch_cb: immediate cv_broadcast\n");
21113 			cv_broadcast(&un->un_state_cv);
21114 		}
21115 	}
21116 	mutex_exit(SD_MUTEX(un));
21117 	return (0);
21118 }
21119 
21120 
21121 /*
21122  *    Function: sd_dkio_get_temp
21123  *
21124  * Description: This routine is the driver entry point for handling ioctl
21125  *		requests to get the disk temperature.
21126  *
21127  *   Arguments: dev  - the device number
21128  *		arg  - pointer to user provided dk_temperature structure.
21129  *		flag - this argument is a pass through to ddi_copyxxx()
21130  *		       directly from the mode argument of ioctl().
21131  *
21132  * Return Code: 0
21133  *		EFAULT
21134  *		ENXIO
21135  *		EAGAIN
21136  */
21137 
21138 static int
21139 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
21140 {
21141 	struct sd_lun		*un = NULL;
21142 	struct dk_temperature	*dktemp = NULL;
21143 	uchar_t			*temperature_page;
21144 	int			rval = 0;
21145 	int			path_flag = SD_PATH_STANDARD;
21146 
21147 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21148 		return (ENXIO);
21149 	}
21150 
21151 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
21152 
21153 	/* copyin the disk temp argument to get the user flags */
21154 	if (ddi_copyin((void *)arg, dktemp,
21155 	    sizeof (struct dk_temperature), flag) != 0) {
21156 		rval = EFAULT;
21157 		goto done;
21158 	}
21159 
21160 	/* Initialize the temperature to invalid. */
21161 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
21162 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
21163 
21164 	/*
21165 	 * Note: Investigate removing the "bypass pm" semantic.
21166 	 * Can we just bypass PM always?
21167 	 */
21168 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
21169 		path_flag = SD_PATH_DIRECT;
21170 		ASSERT(!mutex_owned(&un->un_pm_mutex));
21171 		mutex_enter(&un->un_pm_mutex);
21172 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
21173 			/*
21174 			 * If DKT_BYPASS_PM is set, and the drive happens to be
21175 			 * in low power mode, we can not wake it up, Need to
21176 			 * return EAGAIN.
21177 			 */
21178 			mutex_exit(&un->un_pm_mutex);
21179 			rval = EAGAIN;
21180 			goto done;
21181 		} else {
21182 			/*
21183 			 * Indicate to PM the device is busy. This is required
21184 			 * to avoid a race - i.e. the ioctl is issuing a
21185 			 * command and the pm framework brings down the device
21186 			 * to low power mode (possible power cut-off on some
21187 			 * platforms).
21188 			 */
21189 			mutex_exit(&un->un_pm_mutex);
21190 			if (sd_pm_entry(un) != DDI_SUCCESS) {
21191 				rval = EAGAIN;
21192 				goto done;
21193 			}
21194 		}
21195 	}
21196 
21197 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
21198 
21199 	if ((rval = sd_send_scsi_LOG_SENSE(un, temperature_page,
21200 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag)) != 0) {
21201 		goto done2;
21202 	}
21203 
21204 	/*
21205 	 * For the current temperature verify that the parameter length is 0x02
21206 	 * and the parameter code is 0x00
21207 	 */
21208 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
21209 	    (temperature_page[5] == 0x00)) {
21210 		if (temperature_page[9] == 0xFF) {
21211 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
21212 		} else {
21213 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
21214 		}
21215 	}
21216 
21217 	/*
21218 	 * For the reference temperature verify that the parameter
21219 	 * length is 0x02 and the parameter code is 0x01
21220 	 */
21221 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
21222 	    (temperature_page[11] == 0x01)) {
21223 		if (temperature_page[15] == 0xFF) {
21224 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
21225 		} else {
21226 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
21227 		}
21228 	}
21229 
21230 	/* Do the copyout regardless of the temperature commands status. */
21231 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
21232 	    flag) != 0) {
21233 		rval = EFAULT;
21234 	}
21235 
21236 done2:
21237 	if (path_flag == SD_PATH_DIRECT) {
21238 		sd_pm_exit(un);
21239 	}
21240 
21241 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
21242 done:
21243 	if (dktemp != NULL) {
21244 		kmem_free(dktemp, sizeof (struct dk_temperature));
21245 	}
21246 
21247 	return (rval);
21248 }
21249 
21250 
21251 /*
21252  *    Function: sd_log_page_supported
21253  *
21254  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
21255  *		supported log pages.
21256  *
21257  *   Arguments: un -
21258  *		log_page -
21259  *
21260  * Return Code: -1 - on error (log sense is optional and may not be supported).
21261  *		0  - log page not found.
21262  *  		1  - log page found.
21263  */
21264 
21265 static int
21266 sd_log_page_supported(struct sd_lun *un, int log_page)
21267 {
21268 	uchar_t *log_page_data;
21269 	int	i;
21270 	int	match = 0;
21271 	int	log_size;
21272 
21273 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
21274 
21275 	if (sd_send_scsi_LOG_SENSE(un, log_page_data, 0xFF, 0, 0x01, 0,
21276 	    SD_PATH_DIRECT) != 0) {
21277 		SD_ERROR(SD_LOG_COMMON, un,
21278 		    "sd_log_page_supported: failed log page retrieval\n");
21279 		kmem_free(log_page_data, 0xFF);
21280 		return (-1);
21281 	}
21282 	log_size = log_page_data[3];
21283 
21284 	/*
21285 	 * The list of supported log pages start from the fourth byte. Check
21286 	 * until we run out of log pages or a match is found.
21287 	 */
21288 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
21289 		if (log_page_data[i] == log_page) {
21290 			match++;
21291 		}
21292 	}
21293 	kmem_free(log_page_data, 0xFF);
21294 	return (match);
21295 }
21296 
21297 
21298 /*
21299  *    Function: sd_mhdioc_failfast
21300  *
21301  * Description: This routine is the driver entry point for handling ioctl
21302  *		requests to enable/disable the multihost failfast option.
21303  *		(MHIOCENFAILFAST)
21304  *
21305  *   Arguments: dev	- the device number
21306  *		arg	- user specified probing interval.
21307  *		flag	- this argument is a pass through to ddi_copyxxx()
21308  *			  directly from the mode argument of ioctl().
21309  *
21310  * Return Code: 0
21311  *		EFAULT
21312  *		ENXIO
21313  */
21314 
21315 static int
21316 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
21317 {
21318 	struct sd_lun	*un = NULL;
21319 	int		mh_time;
21320 	int		rval = 0;
21321 
21322 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21323 		return (ENXIO);
21324 	}
21325 
21326 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
21327 		return (EFAULT);
21328 
21329 	if (mh_time) {
21330 		mutex_enter(SD_MUTEX(un));
21331 		un->un_resvd_status |= SD_FAILFAST;
21332 		mutex_exit(SD_MUTEX(un));
21333 		/*
21334 		 * If mh_time is INT_MAX, then this ioctl is being used for
21335 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
21336 		 */
21337 		if (mh_time != INT_MAX) {
21338 			rval = sd_check_mhd(dev, mh_time);
21339 		}
21340 	} else {
21341 		(void) sd_check_mhd(dev, 0);
21342 		mutex_enter(SD_MUTEX(un));
21343 		un->un_resvd_status &= ~SD_FAILFAST;
21344 		mutex_exit(SD_MUTEX(un));
21345 	}
21346 	return (rval);
21347 }
21348 
21349 
21350 /*
21351  *    Function: sd_mhdioc_takeown
21352  *
21353  * Description: This routine is the driver entry point for handling ioctl
21354  *		requests to forcefully acquire exclusive access rights to the
21355  *		multihost disk (MHIOCTKOWN).
21356  *
21357  *   Arguments: dev	- the device number
21358  *		arg	- user provided structure specifying the delay
21359  *			  parameters in milliseconds
21360  *		flag	- this argument is a pass through to ddi_copyxxx()
21361  *			  directly from the mode argument of ioctl().
21362  *
21363  * Return Code: 0
21364  *		EFAULT
21365  *		ENXIO
21366  */
21367 
21368 static int
21369 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
21370 {
21371 	struct sd_lun		*un = NULL;
21372 	struct mhioctkown	*tkown = NULL;
21373 	int			rval = 0;
21374 
21375 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21376 		return (ENXIO);
21377 	}
21378 
21379 	if (arg != NULL) {
21380 		tkown = (struct mhioctkown *)
21381 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
21382 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
21383 		if (rval != 0) {
21384 			rval = EFAULT;
21385 			goto error;
21386 		}
21387 	}
21388 
21389 	rval = sd_take_ownership(dev, tkown);
21390 	mutex_enter(SD_MUTEX(un));
21391 	if (rval == 0) {
21392 		un->un_resvd_status |= SD_RESERVE;
21393 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
21394 			sd_reinstate_resv_delay =
21395 			    tkown->reinstate_resv_delay * 1000;
21396 		} else {
21397 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
21398 		}
21399 		/*
21400 		 * Give the scsi_watch routine interval set by
21401 		 * the MHIOCENFAILFAST ioctl precedence here.
21402 		 */
21403 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
21404 			mutex_exit(SD_MUTEX(un));
21405 			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
21406 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
21407 			    "sd_mhdioc_takeown : %d\n",
21408 			    sd_reinstate_resv_delay);
21409 		} else {
21410 			mutex_exit(SD_MUTEX(un));
21411 		}
21412 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
21413 		    sd_mhd_reset_notify_cb, (caddr_t)un);
21414 	} else {
21415 		un->un_resvd_status &= ~SD_RESERVE;
21416 		mutex_exit(SD_MUTEX(un));
21417 	}
21418 
21419 error:
21420 	if (tkown != NULL) {
21421 		kmem_free(tkown, sizeof (struct mhioctkown));
21422 	}
21423 	return (rval);
21424 }
21425 
21426 
21427 /*
21428  *    Function: sd_mhdioc_release
21429  *
21430  * Description: This routine is the driver entry point for handling ioctl
21431  *		requests to release exclusive access rights to the multihost
21432  *		disk (MHIOCRELEASE).
21433  *
21434  *   Arguments: dev	- the device number
21435  *
21436  * Return Code: 0
21437  *		ENXIO
21438  */
21439 
21440 static int
21441 sd_mhdioc_release(dev_t dev)
21442 {
21443 	struct sd_lun		*un = NULL;
21444 	timeout_id_t		resvd_timeid_save;
21445 	int			resvd_status_save;
21446 	int			rval = 0;
21447 
21448 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21449 		return (ENXIO);
21450 	}
21451 
21452 	mutex_enter(SD_MUTEX(un));
21453 	resvd_status_save = un->un_resvd_status;
21454 	un->un_resvd_status &=
21455 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
21456 	if (un->un_resvd_timeid) {
21457 		resvd_timeid_save = un->un_resvd_timeid;
21458 		un->un_resvd_timeid = NULL;
21459 		mutex_exit(SD_MUTEX(un));
21460 		(void) untimeout(resvd_timeid_save);
21461 	} else {
21462 		mutex_exit(SD_MUTEX(un));
21463 	}
21464 
21465 	/*
21466 	 * destroy any pending timeout thread that may be attempting to
21467 	 * reinstate reservation on this device.
21468 	 */
21469 	sd_rmv_resv_reclaim_req(dev);
21470 
21471 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
21472 		mutex_enter(SD_MUTEX(un));
21473 		if ((un->un_mhd_token) &&
21474 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
21475 			mutex_exit(SD_MUTEX(un));
21476 			(void) sd_check_mhd(dev, 0);
21477 		} else {
21478 			mutex_exit(SD_MUTEX(un));
21479 		}
21480 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
21481 		    sd_mhd_reset_notify_cb, (caddr_t)un);
21482 	} else {
21483 		/*
21484 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
21485 		 */
21486 		mutex_enter(SD_MUTEX(un));
21487 		un->un_resvd_status = resvd_status_save;
21488 		mutex_exit(SD_MUTEX(un));
21489 	}
21490 	return (rval);
21491 }
21492 
21493 
21494 /*
21495  *    Function: sd_mhdioc_register_devid
21496  *
21497  * Description: This routine is the driver entry point for handling ioctl
21498  *		requests to register the device id (MHIOCREREGISTERDEVID).
21499  *
21500  *		Note: The implementation for this ioctl has been updated to
21501  *		be consistent with the original PSARC case (1999/357)
21502  *		(4375899, 4241671, 4220005)
21503  *
21504  *   Arguments: dev	- the device number
21505  *
21506  * Return Code: 0
21507  *		ENXIO
21508  */
21509 
21510 static int
21511 sd_mhdioc_register_devid(dev_t dev)
21512 {
21513 	struct sd_lun	*un = NULL;
21514 	int		rval = 0;
21515 
21516 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21517 		return (ENXIO);
21518 	}
21519 
21520 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21521 
21522 	mutex_enter(SD_MUTEX(un));
21523 
21524 	/* If a devid already exists, de-register it */
21525 	if (un->un_devid != NULL) {
21526 		ddi_devid_unregister(SD_DEVINFO(un));
21527 		/*
21528 		 * After unregister devid, needs to free devid memory
21529 		 */
21530 		ddi_devid_free(un->un_devid);
21531 		un->un_devid = NULL;
21532 	}
21533 
21534 	/* Check for reservation conflict */
21535 	mutex_exit(SD_MUTEX(un));
21536 	rval = sd_send_scsi_TEST_UNIT_READY(un, 0);
21537 	mutex_enter(SD_MUTEX(un));
21538 
21539 	switch (rval) {
21540 	case 0:
21541 		sd_register_devid(un, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
21542 		break;
21543 	case EACCES:
21544 		break;
21545 	default:
21546 		rval = EIO;
21547 	}
21548 
21549 	mutex_exit(SD_MUTEX(un));
21550 	return (rval);
21551 }
21552 
21553 
21554 /*
21555  *    Function: sd_mhdioc_inkeys
21556  *
21557  * Description: This routine is the driver entry point for handling ioctl
21558  *		requests to issue the SCSI-3 Persistent In Read Keys command
21559  *		to the device (MHIOCGRP_INKEYS).
21560  *
21561  *   Arguments: dev	- the device number
21562  *		arg	- user provided in_keys structure
21563  *		flag	- this argument is a pass through to ddi_copyxxx()
21564  *			  directly from the mode argument of ioctl().
21565  *
21566  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
21567  *		ENXIO
21568  *		EFAULT
21569  */
21570 
21571 static int
21572 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
21573 {
21574 	struct sd_lun		*un;
21575 	mhioc_inkeys_t		inkeys;
21576 	int			rval = 0;
21577 
21578 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21579 		return (ENXIO);
21580 	}
21581 
21582 #ifdef _MULTI_DATAMODEL
21583 	switch (ddi_model_convert_from(flag & FMODELS)) {
21584 	case DDI_MODEL_ILP32: {
21585 		struct mhioc_inkeys32	inkeys32;
21586 
21587 		if (ddi_copyin(arg, &inkeys32,
21588 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
21589 			return (EFAULT);
21590 		}
21591 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
21592 		if ((rval = sd_persistent_reservation_in_read_keys(un,
21593 		    &inkeys, flag)) != 0) {
21594 			return (rval);
21595 		}
21596 		inkeys32.generation = inkeys.generation;
21597 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
21598 		    flag) != 0) {
21599 			return (EFAULT);
21600 		}
21601 		break;
21602 	}
21603 	case DDI_MODEL_NONE:
21604 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
21605 		    flag) != 0) {
21606 			return (EFAULT);
21607 		}
21608 		if ((rval = sd_persistent_reservation_in_read_keys(un,
21609 		    &inkeys, flag)) != 0) {
21610 			return (rval);
21611 		}
21612 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
21613 		    flag) != 0) {
21614 			return (EFAULT);
21615 		}
21616 		break;
21617 	}
21618 
21619 #else /* ! _MULTI_DATAMODEL */
21620 
21621 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
21622 		return (EFAULT);
21623 	}
21624 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
21625 	if (rval != 0) {
21626 		return (rval);
21627 	}
21628 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
21629 		return (EFAULT);
21630 	}
21631 
21632 #endif /* _MULTI_DATAMODEL */
21633 
21634 	return (rval);
21635 }
21636 
21637 
21638 /*
21639  *    Function: sd_mhdioc_inresv
21640  *
21641  * Description: This routine is the driver entry point for handling ioctl
21642  *		requests to issue the SCSI-3 Persistent In Read Reservations
21643  *		command to the device (MHIOCGRP_INKEYS).
21644  *
21645  *   Arguments: dev	- the device number
21646  *		arg	- user provided in_resv structure
21647  *		flag	- this argument is a pass through to ddi_copyxxx()
21648  *			  directly from the mode argument of ioctl().
21649  *
21650  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
21651  *		ENXIO
21652  *		EFAULT
21653  */
21654 
21655 static int
21656 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
21657 {
21658 	struct sd_lun		*un;
21659 	mhioc_inresvs_t		inresvs;
21660 	int			rval = 0;
21661 
21662 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21663 		return (ENXIO);
21664 	}
21665 
21666 #ifdef _MULTI_DATAMODEL
21667 
21668 	switch (ddi_model_convert_from(flag & FMODELS)) {
21669 	case DDI_MODEL_ILP32: {
21670 		struct mhioc_inresvs32	inresvs32;
21671 
21672 		if (ddi_copyin(arg, &inresvs32,
21673 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
21674 			return (EFAULT);
21675 		}
21676 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
21677 		if ((rval = sd_persistent_reservation_in_read_resv(un,
21678 		    &inresvs, flag)) != 0) {
21679 			return (rval);
21680 		}
21681 		inresvs32.generation = inresvs.generation;
21682 		if (ddi_copyout(&inresvs32, arg,
21683 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
21684 			return (EFAULT);
21685 		}
21686 		break;
21687 	}
21688 	case DDI_MODEL_NONE:
21689 		if (ddi_copyin(arg, &inresvs,
21690 		    sizeof (mhioc_inresvs_t), flag) != 0) {
21691 			return (EFAULT);
21692 		}
21693 		if ((rval = sd_persistent_reservation_in_read_resv(un,
21694 		    &inresvs, flag)) != 0) {
21695 			return (rval);
21696 		}
21697 		if (ddi_copyout(&inresvs, arg,
21698 		    sizeof (mhioc_inresvs_t), flag) != 0) {
21699 			return (EFAULT);
21700 		}
21701 		break;
21702 	}
21703 
21704 #else /* ! _MULTI_DATAMODEL */
21705 
21706 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
21707 		return (EFAULT);
21708 	}
21709 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
21710 	if (rval != 0) {
21711 		return (rval);
21712 	}
21713 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
21714 		return (EFAULT);
21715 	}
21716 
21717 #endif /* ! _MULTI_DATAMODEL */
21718 
21719 	return (rval);
21720 }
21721 
21722 
21723 /*
21724  * The following routines support the clustering functionality described below
21725  * and implement lost reservation reclaim functionality.
21726  *
21727  * Clustering
21728  * ----------
21729  * The clustering code uses two different, independent forms of SCSI
21730  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
21731  * Persistent Group Reservations. For any particular disk, it will use either
21732  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
21733  *
21734  * SCSI-2
21735  * The cluster software takes ownership of a multi-hosted disk by issuing the
21736  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
21737  * MHIOCRELEASE ioctl.  Closely related is the MHIOCENFAILFAST ioctl -- a
21738  * cluster, just after taking ownership of the disk with the MHIOCTKOWN ioctl
21739  * then issues the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the
21740  * driver. The meaning of failfast is that if the driver (on this host) ever
21741  * encounters the scsi error return code RESERVATION_CONFLICT from the device,
21742  * it should immediately panic the host. The motivation for this ioctl is that
21743  * if this host does encounter reservation conflict, the underlying cause is
21744  * that some other host of the cluster has decided that this host is no longer
21745  * in the cluster and has seized control of the disks for itself. Since this
21746  * host is no longer in the cluster, it ought to panic itself. The
21747  * MHIOCENFAILFAST ioctl does two things:
21748  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
21749  *      error to panic the host
21750  *      (b) it sets up a periodic timer to test whether this host still has
21751  *      "access" (in that no other host has reserved the device):  if the
21752  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
21753  *      purpose of that periodic timer is to handle scenarios where the host is
21754  *      otherwise temporarily quiescent, temporarily doing no real i/o.
21755  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
21756  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
21757  * the device itself.
21758  *
21759  * SCSI-3 PGR
21760  * A direct semantic implementation of the SCSI-3 Persistent Reservation
21761  * facility is supported through the shared multihost disk ioctls
21762  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
21763  * MHIOCGRP_PREEMPTANDABORT)
21764  *
21765  * Reservation Reclaim:
21766  * --------------------
21767  * To support the lost reservation reclaim operations this driver creates a
21768  * single thread to handle reinstating reservations on all devices that have
21769  * lost reservations sd_resv_reclaim_requests are logged for all devices that
21770  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
21771  * and the reservation reclaim thread loops through the requests to regain the
21772  * lost reservations.
21773  */
21774 
21775 /*
21776  *    Function: sd_check_mhd()
21777  *
21778  * Description: This function sets up and submits a scsi watch request or
21779  *		terminates an existing watch request. This routine is used in
21780  *		support of reservation reclaim.
21781  *
21782  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
21783  *			 among multiple watches that share the callback function
21784  *		interval - the number of microseconds specifying the watch
21785  *			   interval for issuing TEST UNIT READY commands. If
21786  *			   set to 0 the watch should be terminated. If the
21787  *			   interval is set to 0 and if the device is required
21788  *			   to hold reservation while disabling failfast, the
21789  *			   watch is restarted with an interval of
21790  *			   reinstate_resv_delay.
21791  *
21792  * Return Code: 0	   - Successful submit/terminate of scsi watch request
21793  *		ENXIO      - Indicates an invalid device was specified
21794  *		EAGAIN     - Unable to submit the scsi watch request
21795  */
21796 
21797 static int
21798 sd_check_mhd(dev_t dev, int interval)
21799 {
21800 	struct sd_lun	*un;
21801 	opaque_t	token;
21802 
21803 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21804 		return (ENXIO);
21805 	}
21806 
21807 	/* is this a watch termination request? */
21808 	if (interval == 0) {
21809 		mutex_enter(SD_MUTEX(un));
21810 		/* if there is an existing watch task then terminate it */
21811 		if (un->un_mhd_token) {
21812 			token = un->un_mhd_token;
21813 			un->un_mhd_token = NULL;
21814 			mutex_exit(SD_MUTEX(un));
21815 			(void) scsi_watch_request_terminate(token,
21816 			    SCSI_WATCH_TERMINATE_WAIT);
21817 			mutex_enter(SD_MUTEX(un));
21818 		} else {
21819 			mutex_exit(SD_MUTEX(un));
21820 			/*
21821 			 * Note: If we return here we don't check for the
21822 			 * failfast case. This is the original legacy
21823 			 * implementation but perhaps we should be checking
21824 			 * the failfast case.
21825 			 */
21826 			return (0);
21827 		}
21828 		/*
21829 		 * If the device is required to hold reservation while
21830 		 * disabling failfast, we need to restart the scsi_watch
21831 		 * routine with an interval of reinstate_resv_delay.
21832 		 */
21833 		if (un->un_resvd_status & SD_RESERVE) {
21834 			interval = sd_reinstate_resv_delay/1000;
21835 		} else {
21836 			/* no failfast so bail */
21837 			mutex_exit(SD_MUTEX(un));
21838 			return (0);
21839 		}
21840 		mutex_exit(SD_MUTEX(un));
21841 	}
21842 
21843 	/*
21844 	 * adjust minimum time interval to 1 second,
21845 	 * and convert from msecs to usecs
21846 	 */
21847 	if (interval > 0 && interval < 1000) {
21848 		interval = 1000;
21849 	}
21850 	interval *= 1000;
21851 
21852 	/*
21853 	 * submit the request to the scsi_watch service
21854 	 */
21855 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
21856 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
21857 	if (token == NULL) {
21858 		return (EAGAIN);
21859 	}
21860 
21861 	/*
21862 	 * save token for termination later on
21863 	 */
21864 	mutex_enter(SD_MUTEX(un));
21865 	un->un_mhd_token = token;
21866 	mutex_exit(SD_MUTEX(un));
21867 	return (0);
21868 }
21869 
21870 
21871 /*
21872  *    Function: sd_mhd_watch_cb()
21873  *
21874  * Description: This function is the call back function used by the scsi watch
21875  *		facility. The scsi watch facility sends the "Test Unit Ready"
21876  *		and processes the status. If applicable (i.e. a "Unit Attention"
21877  *		status and automatic "Request Sense" not used) the scsi watch
21878  *		facility will send a "Request Sense" and retrieve the sense data
21879  *		to be passed to this callback function. In either case the
21880  *		automatic "Request Sense" or the facility submitting one, this
21881  *		callback is passed the status and sense data.
21882  *
21883  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
21884  *			among multiple watches that share this callback function
21885  *		resultp - scsi watch facility result packet containing scsi
21886  *			  packet, status byte and sense data
21887  *
21888  * Return Code: 0 - continue the watch task
21889  *		non-zero - terminate the watch task
21890  */
21891 
21892 static int
21893 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
21894 {
21895 	struct sd_lun			*un;
21896 	struct scsi_status		*statusp;
21897 	uint8_t				*sensep;
21898 	struct scsi_pkt			*pkt;
21899 	uchar_t				actual_sense_length;
21900 	dev_t  				dev = (dev_t)arg;
21901 
21902 	ASSERT(resultp != NULL);
21903 	statusp			= resultp->statusp;
21904 	sensep			= (uint8_t *)resultp->sensep;
21905 	pkt			= resultp->pkt;
21906 	actual_sense_length	= resultp->actual_sense_length;
21907 
21908 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21909 		return (ENXIO);
21910 	}
21911 
21912 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
21913 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
21914 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
21915 
21916 	/* Begin processing of the status and/or sense data */
21917 	if (pkt->pkt_reason != CMD_CMPLT) {
21918 		/* Handle the incomplete packet */
21919 		sd_mhd_watch_incomplete(un, pkt);
21920 		return (0);
21921 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
21922 		if (*((unsigned char *)statusp)
21923 		    == STATUS_RESERVATION_CONFLICT) {
21924 			/*
21925 			 * Handle a reservation conflict by panicking if
21926 			 * configured for failfast or by logging the conflict
21927 			 * and updating the reservation status
21928 			 */
21929 			mutex_enter(SD_MUTEX(un));
21930 			if ((un->un_resvd_status & SD_FAILFAST) &&
21931 			    (sd_failfast_enable)) {
21932 				sd_panic_for_res_conflict(un);
21933 				/*NOTREACHED*/
21934 			}
21935 			SD_INFO(SD_LOG_IOCTL_MHD, un,
21936 			    "sd_mhd_watch_cb: Reservation Conflict\n");
21937 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
21938 			mutex_exit(SD_MUTEX(un));
21939 		}
21940 	}
21941 
21942 	if (sensep != NULL) {
21943 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
21944 			mutex_enter(SD_MUTEX(un));
21945 			if ((scsi_sense_asc(sensep) ==
21946 			    SD_SCSI_RESET_SENSE_CODE) &&
21947 			    (un->un_resvd_status & SD_RESERVE)) {
21948 				/*
21949 				 * The additional sense code indicates a power
21950 				 * on or bus device reset has occurred; update
21951 				 * the reservation status.
21952 				 */
21953 				un->un_resvd_status |=
21954 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
21955 				SD_INFO(SD_LOG_IOCTL_MHD, un,
21956 				    "sd_mhd_watch_cb: Lost Reservation\n");
21957 			}
21958 		} else {
21959 			return (0);
21960 		}
21961 	} else {
21962 		mutex_enter(SD_MUTEX(un));
21963 	}
21964 
21965 	if ((un->un_resvd_status & SD_RESERVE) &&
21966 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
21967 		if (un->un_resvd_status & SD_WANT_RESERVE) {
21968 			/*
21969 			 * A reset occurred in between the last probe and this
21970 			 * one so if a timeout is pending cancel it.
21971 			 */
21972 			if (un->un_resvd_timeid) {
21973 				timeout_id_t temp_id = un->un_resvd_timeid;
21974 				un->un_resvd_timeid = NULL;
21975 				mutex_exit(SD_MUTEX(un));
21976 				(void) untimeout(temp_id);
21977 				mutex_enter(SD_MUTEX(un));
21978 			}
21979 			un->un_resvd_status &= ~SD_WANT_RESERVE;
21980 		}
21981 		if (un->un_resvd_timeid == 0) {
21982 			/* Schedule a timeout to handle the lost reservation */
21983 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
21984 			    (void *)dev,
21985 			    drv_usectohz(sd_reinstate_resv_delay));
21986 		}
21987 	}
21988 	mutex_exit(SD_MUTEX(un));
21989 	return (0);
21990 }
21991 
21992 
21993 /*
21994  *    Function: sd_mhd_watch_incomplete()
21995  *
21996  * Description: This function is used to find out why a scsi pkt sent by the
21997  *		scsi watch facility was not completed. Under some scenarios this
21998  *		routine will return. Otherwise it will send a bus reset to see
21999  *		if the drive is still online.
22000  *
22001  *   Arguments: un  - driver soft state (unit) structure
22002  *		pkt - incomplete scsi pkt
22003  */
22004 
22005 static void
22006 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
22007 {
22008 	int	be_chatty;
22009 	int	perr;
22010 
22011 	ASSERT(pkt != NULL);
22012 	ASSERT(un != NULL);
22013 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
22014 	perr		= (pkt->pkt_statistics & STAT_PERR);
22015 
22016 	mutex_enter(SD_MUTEX(un));
22017 	if (un->un_state == SD_STATE_DUMPING) {
22018 		mutex_exit(SD_MUTEX(un));
22019 		return;
22020 	}
22021 
22022 	switch (pkt->pkt_reason) {
22023 	case CMD_UNX_BUS_FREE:
22024 		/*
22025 		 * If we had a parity error that caused the target to drop BSY*,
22026 		 * don't be chatty about it.
22027 		 */
22028 		if (perr && be_chatty) {
22029 			be_chatty = 0;
22030 		}
22031 		break;
22032 	case CMD_TAG_REJECT:
22033 		/*
22034 		 * The SCSI-2 spec states that a tag reject will be sent by the
22035 		 * target if tagged queuing is not supported. A tag reject may
22036 		 * also be sent during certain initialization periods or to
22037 		 * control internal resources. For the latter case the target
22038 		 * may also return Queue Full.
22039 		 *
22040 		 * If this driver receives a tag reject from a target that is
22041 		 * going through an init period or controlling internal
22042 		 * resources tagged queuing will be disabled. This is a less
22043 		 * than optimal behavior but the driver is unable to determine
22044 		 * the target state and assumes tagged queueing is not supported
22045 		 */
22046 		pkt->pkt_flags = 0;
22047 		un->un_tagflags = 0;
22048 
22049 		if (un->un_f_opt_queueing == TRUE) {
22050 			un->un_throttle = min(un->un_throttle, 3);
22051 		} else {
22052 			un->un_throttle = 1;
22053 		}
22054 		mutex_exit(SD_MUTEX(un));
22055 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
22056 		mutex_enter(SD_MUTEX(un));
22057 		break;
22058 	case CMD_INCOMPLETE:
22059 		/*
22060 		 * The transport stopped with an abnormal state, fallthrough and
22061 		 * reset the target and/or bus unless selection did not complete
22062 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
22063 		 * go through a target/bus reset
22064 		 */
22065 		if (pkt->pkt_state == STATE_GOT_BUS) {
22066 			break;
22067 		}
22068 		/*FALLTHROUGH*/
22069 
22070 	case CMD_TIMEOUT:
22071 	default:
22072 		/*
22073 		 * The lun may still be running the command, so a lun reset
22074 		 * should be attempted. If the lun reset fails or cannot be
22075 		 * issued, than try a target reset. Lastly try a bus reset.
22076 		 */
22077 		if ((pkt->pkt_statistics &
22078 		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
22079 			int reset_retval = 0;
22080 			mutex_exit(SD_MUTEX(un));
22081 			if (un->un_f_allow_bus_device_reset == TRUE) {
22082 				if (un->un_f_lun_reset_enabled == TRUE) {
22083 					reset_retval =
22084 					    scsi_reset(SD_ADDRESS(un),
22085 					    RESET_LUN);
22086 				}
22087 				if (reset_retval == 0) {
22088 					reset_retval =
22089 					    scsi_reset(SD_ADDRESS(un),
22090 					    RESET_TARGET);
22091 				}
22092 			}
22093 			if (reset_retval == 0) {
22094 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
22095 			}
22096 			mutex_enter(SD_MUTEX(un));
22097 		}
22098 		break;
22099 	}
22100 
22101 	/* A device/bus reset has occurred; update the reservation status. */
22102 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
22103 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
22104 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
22105 			un->un_resvd_status |=
22106 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
22107 			SD_INFO(SD_LOG_IOCTL_MHD, un,
22108 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
22109 		}
22110 	}
22111 
22112 	/*
22113 	 * The disk has been turned off; Update the device state.
22114 	 *
22115 	 * Note: Should we be offlining the disk here?
22116 	 */
22117 	if (pkt->pkt_state == STATE_GOT_BUS) {
22118 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
22119 		    "Disk not responding to selection\n");
22120 		if (un->un_state != SD_STATE_OFFLINE) {
22121 			New_state(un, SD_STATE_OFFLINE);
22122 		}
22123 	} else if (be_chatty) {
22124 		/*
22125 		 * suppress messages if they are all the same pkt reason;
22126 		 * with TQ, many (up to 256) are returned with the same
22127 		 * pkt_reason
22128 		 */
22129 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
22130 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
22131 			    "sd_mhd_watch_incomplete: "
22132 			    "SCSI transport failed: reason '%s'\n",
22133 			    scsi_rname(pkt->pkt_reason));
22134 		}
22135 	}
22136 	un->un_last_pkt_reason = pkt->pkt_reason;
22137 	mutex_exit(SD_MUTEX(un));
22138 }
22139 
22140 
22141 /*
22142  *    Function: sd_sname()
22143  *
22144  * Description: This is a simple little routine to return a string containing
22145  *		a printable description of command status byte for use in
22146  *		logging.
22147  *
22148  *   Arguments: status - pointer to a status byte
22149  *
22150  * Return Code: char * - string containing status description.
22151  */
22152 
22153 static char *
22154 sd_sname(uchar_t status)
22155 {
22156 	switch (status & STATUS_MASK) {
22157 	case STATUS_GOOD:
22158 		return ("good status");
22159 	case STATUS_CHECK:
22160 		return ("check condition");
22161 	case STATUS_MET:
22162 		return ("condition met");
22163 	case STATUS_BUSY:
22164 		return ("busy");
22165 	case STATUS_INTERMEDIATE:
22166 		return ("intermediate");
22167 	case STATUS_INTERMEDIATE_MET:
22168 		return ("intermediate - condition met");
22169 	case STATUS_RESERVATION_CONFLICT:
22170 		return ("reservation_conflict");
22171 	case STATUS_TERMINATED:
22172 		return ("command terminated");
22173 	case STATUS_QFULL:
22174 		return ("queue full");
22175 	default:
22176 		return ("<unknown status>");
22177 	}
22178 }
22179 
22180 
22181 /*
22182  *    Function: sd_mhd_resvd_recover()
22183  *
22184  * Description: This function adds a reservation entry to the
22185  *		sd_resv_reclaim_request list and signals the reservation
22186  *		reclaim thread that there is work pending. If the reservation
22187  *		reclaim thread has not been previously created this function
22188  *		will kick it off.
22189  *
22190  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
22191  *			among multiple watches that share this callback function
22192  *
22193  *     Context: This routine is called by timeout() and is run in interrupt
22194  *		context. It must not sleep or call other functions which may
22195  *		sleep.
22196  */
22197 
22198 static void
22199 sd_mhd_resvd_recover(void *arg)
22200 {
22201 	dev_t			dev = (dev_t)arg;
22202 	struct sd_lun		*un;
22203 	struct sd_thr_request	*sd_treq = NULL;
22204 	struct sd_thr_request	*sd_cur = NULL;
22205 	struct sd_thr_request	*sd_prev = NULL;
22206 	int			already_there = 0;
22207 
22208 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22209 		return;
22210 	}
22211 
22212 	mutex_enter(SD_MUTEX(un));
22213 	un->un_resvd_timeid = NULL;
22214 	if (un->un_resvd_status & SD_WANT_RESERVE) {
22215 		/*
22216 		 * There was a reset so don't issue the reserve, allow the
22217 		 * sd_mhd_watch_cb callback function to notice this and
22218 		 * reschedule the timeout for reservation.
22219 		 */
22220 		mutex_exit(SD_MUTEX(un));
22221 		return;
22222 	}
22223 	mutex_exit(SD_MUTEX(un));
22224 
22225 	/*
22226 	 * Add this device to the sd_resv_reclaim_request list and the
22227 	 * sd_resv_reclaim_thread should take care of the rest.
22228 	 *
22229 	 * Note: We can't sleep in this context so if the memory allocation
22230 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
22231 	 * reschedule the timeout for reservation.  (4378460)
22232 	 */
22233 	sd_treq = (struct sd_thr_request *)
22234 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
22235 	if (sd_treq == NULL) {
22236 		return;
22237 	}
22238 
22239 	sd_treq->sd_thr_req_next = NULL;
22240 	sd_treq->dev = dev;
22241 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
22242 	if (sd_tr.srq_thr_req_head == NULL) {
22243 		sd_tr.srq_thr_req_head = sd_treq;
22244 	} else {
22245 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
22246 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
22247 			if (sd_cur->dev == dev) {
22248 				/*
22249 				 * already in Queue so don't log
22250 				 * another request for the device
22251 				 */
22252 				already_there = 1;
22253 				break;
22254 			}
22255 			sd_prev = sd_cur;
22256 		}
22257 		if (!already_there) {
22258 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
22259 			    "logging request for %lx\n", dev);
22260 			sd_prev->sd_thr_req_next = sd_treq;
22261 		} else {
22262 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
22263 		}
22264 	}
22265 
22266 	/*
22267 	 * Create a kernel thread to do the reservation reclaim and free up this
22268 	 * thread. We cannot block this thread while we go away to do the
22269 	 * reservation reclaim
22270 	 */
22271 	if (sd_tr.srq_resv_reclaim_thread == NULL)
22272 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
22273 		    sd_resv_reclaim_thread, NULL,
22274 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
22275 
22276 	/* Tell the reservation reclaim thread that it has work to do */
22277 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
22278 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
22279 }
22280 
22281 /*
22282  *    Function: sd_resv_reclaim_thread()
22283  *
22284  * Description: This function implements the reservation reclaim operations
22285  *
22286  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
22287  *		      among multiple watches that share this callback function
22288  */
22289 
22290 static void
22291 sd_resv_reclaim_thread()
22292 {
22293 	struct sd_lun		*un;
22294 	struct sd_thr_request	*sd_mhreq;
22295 
22296 	/* Wait for work */
22297 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
22298 	if (sd_tr.srq_thr_req_head == NULL) {
22299 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
22300 		    &sd_tr.srq_resv_reclaim_mutex);
22301 	}
22302 
22303 	/* Loop while we have work */
22304 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
22305 		un = ddi_get_soft_state(sd_state,
22306 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
22307 		if (un == NULL) {
22308 			/*
22309 			 * softstate structure is NULL so just
22310 			 * dequeue the request and continue
22311 			 */
22312 			sd_tr.srq_thr_req_head =
22313 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
22314 			kmem_free(sd_tr.srq_thr_cur_req,
22315 			    sizeof (struct sd_thr_request));
22316 			continue;
22317 		}
22318 
22319 		/* dequeue the request */
22320 		sd_mhreq = sd_tr.srq_thr_cur_req;
22321 		sd_tr.srq_thr_req_head =
22322 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
22323 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
22324 
22325 		/*
22326 		 * Reclaim reservation only if SD_RESERVE is still set. There
22327 		 * may have been a call to MHIOCRELEASE before we got here.
22328 		 */
22329 		mutex_enter(SD_MUTEX(un));
22330 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
22331 			/*
22332 			 * Note: The SD_LOST_RESERVE flag is cleared before
22333 			 * reclaiming the reservation. If this is done after the
22334 			 * call to sd_reserve_release a reservation loss in the
22335 			 * window between pkt completion of reserve cmd and
22336 			 * mutex_enter below may not be recognized
22337 			 */
22338 			un->un_resvd_status &= ~SD_LOST_RESERVE;
22339 			mutex_exit(SD_MUTEX(un));
22340 
22341 			if (sd_reserve_release(sd_mhreq->dev,
22342 			    SD_RESERVE) == 0) {
22343 				mutex_enter(SD_MUTEX(un));
22344 				un->un_resvd_status |= SD_RESERVE;
22345 				mutex_exit(SD_MUTEX(un));
22346 				SD_INFO(SD_LOG_IOCTL_MHD, un,
22347 				    "sd_resv_reclaim_thread: "
22348 				    "Reservation Recovered\n");
22349 			} else {
22350 				mutex_enter(SD_MUTEX(un));
22351 				un->un_resvd_status |= SD_LOST_RESERVE;
22352 				mutex_exit(SD_MUTEX(un));
22353 				SD_INFO(SD_LOG_IOCTL_MHD, un,
22354 				    "sd_resv_reclaim_thread: Failed "
22355 				    "Reservation Recovery\n");
22356 			}
22357 		} else {
22358 			mutex_exit(SD_MUTEX(un));
22359 		}
22360 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
22361 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
22362 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
22363 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
22364 		/*
22365 		 * wakeup the destroy thread if anyone is waiting on
22366 		 * us to complete.
22367 		 */
22368 		cv_signal(&sd_tr.srq_inprocess_cv);
22369 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
22370 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
22371 	}
22372 
22373 	/*
22374 	 * cleanup the sd_tr structure now that this thread will not exist
22375 	 */
22376 	ASSERT(sd_tr.srq_thr_req_head == NULL);
22377 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
22378 	sd_tr.srq_resv_reclaim_thread = NULL;
22379 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
22380 	thread_exit();
22381 }
22382 
22383 
22384 /*
22385  *    Function: sd_rmv_resv_reclaim_req()
22386  *
22387  * Description: This function removes any pending reservation reclaim requests
22388  *		for the specified device.
22389  *
22390  *   Arguments: dev - the device 'dev_t'
22391  */
22392 
22393 static void
22394 sd_rmv_resv_reclaim_req(dev_t dev)
22395 {
22396 	struct sd_thr_request *sd_mhreq;
22397 	struct sd_thr_request *sd_prev;
22398 
22399 	/* Remove a reservation reclaim request from the list */
22400 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
22401 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
22402 		/*
22403 		 * We are attempting to reinstate reservation for
22404 		 * this device. We wait for sd_reserve_release()
22405 		 * to return before we return.
22406 		 */
22407 		cv_wait(&sd_tr.srq_inprocess_cv,
22408 		    &sd_tr.srq_resv_reclaim_mutex);
22409 	} else {
22410 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
22411 		if (sd_mhreq && sd_mhreq->dev == dev) {
22412 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
22413 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
22414 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
22415 			return;
22416 		}
22417 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
22418 			if (sd_mhreq && sd_mhreq->dev == dev) {
22419 				break;
22420 			}
22421 			sd_prev = sd_mhreq;
22422 		}
22423 		if (sd_mhreq != NULL) {
22424 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
22425 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
22426 		}
22427 	}
22428 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
22429 }
22430 
22431 
22432 /*
22433  *    Function: sd_mhd_reset_notify_cb()
22434  *
22435  * Description: This is a call back function for scsi_reset_notify. This
22436  *		function updates the softstate reserved status and logs the
22437  *		reset. The driver scsi watch facility callback function
22438  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
22439  *		will reclaim the reservation.
22440  *
22441  *   Arguments: arg  - driver soft state (unit) structure
22442  */
22443 
22444 static void
22445 sd_mhd_reset_notify_cb(caddr_t arg)
22446 {
22447 	struct sd_lun *un = (struct sd_lun *)arg;
22448 
22449 	mutex_enter(SD_MUTEX(un));
22450 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
22451 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
22452 		SD_INFO(SD_LOG_IOCTL_MHD, un,
22453 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
22454 	}
22455 	mutex_exit(SD_MUTEX(un));
22456 }
22457 
22458 
22459 /*
22460  *    Function: sd_take_ownership()
22461  *
22462  * Description: This routine implements an algorithm to achieve a stable
22463  *		reservation on disks which don't implement priority reserve,
22464  *		and makes sure that other host lose re-reservation attempts.
22465  *		This algorithm contains of a loop that keeps issuing the RESERVE
22466  *		for some period of time (min_ownership_delay, default 6 seconds)
22467  *		During that loop, it looks to see if there has been a bus device
22468  *		reset or bus reset (both of which cause an existing reservation
22469  *		to be lost). If the reservation is lost issue RESERVE until a
22470  *		period of min_ownership_delay with no resets has gone by, or
22471  *		until max_ownership_delay has expired. This loop ensures that
22472  *		the host really did manage to reserve the device, in spite of
22473  *		resets. The looping for min_ownership_delay (default six
22474  *		seconds) is important to early generation clustering products,
22475  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
22476  *		MHIOCENFAILFAST periodic timer of two seconds. By having
22477  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
22478  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
22479  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
22480  *		have already noticed, via the MHIOCENFAILFAST polling, that it
22481  *		no longer "owns" the disk and will have panicked itself.  Thus,
22482  *		the host issuing the MHIOCTKOWN is assured (with timing
22483  *		dependencies) that by the time it actually starts to use the
22484  *		disk for real work, the old owner is no longer accessing it.
22485  *
22486  *		min_ownership_delay is the minimum amount of time for which the
22487  *		disk must be reserved continuously devoid of resets before the
22488  *		MHIOCTKOWN ioctl will return success.
22489  *
22490  *		max_ownership_delay indicates the amount of time by which the
22491  *		take ownership should succeed or timeout with an error.
22492  *
22493  *   Arguments: dev - the device 'dev_t'
22494  *		*p  - struct containing timing info.
22495  *
22496  * Return Code: 0 for success or error code
22497  */
22498 
22499 static int
22500 sd_take_ownership(dev_t dev, struct mhioctkown *p)
22501 {
22502 	struct sd_lun	*un;
22503 	int		rval;
22504 	int		err;
22505 	int		reservation_count   = 0;
22506 	int		min_ownership_delay =  6000000; /* in usec */
22507 	int		max_ownership_delay = 30000000; /* in usec */
22508 	clock_t		start_time;	/* starting time of this algorithm */
22509 	clock_t		end_time;	/* time limit for giving up */
22510 	clock_t		ownership_time;	/* time limit for stable ownership */
22511 	clock_t		current_time;
22512 	clock_t		previous_current_time;
22513 
22514 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22515 		return (ENXIO);
22516 	}
22517 
22518 	/*
22519 	 * Attempt a device reservation. A priority reservation is requested.
22520 	 */
22521 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
22522 	    != SD_SUCCESS) {
22523 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
22524 		    "sd_take_ownership: return(1)=%d\n", rval);
22525 		return (rval);
22526 	}
22527 
22528 	/* Update the softstate reserved status to indicate the reservation */
22529 	mutex_enter(SD_MUTEX(un));
22530 	un->un_resvd_status |= SD_RESERVE;
22531 	un->un_resvd_status &=
22532 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
22533 	mutex_exit(SD_MUTEX(un));
22534 
22535 	if (p != NULL) {
22536 		if (p->min_ownership_delay != 0) {
22537 			min_ownership_delay = p->min_ownership_delay * 1000;
22538 		}
22539 		if (p->max_ownership_delay != 0) {
22540 			max_ownership_delay = p->max_ownership_delay * 1000;
22541 		}
22542 	}
22543 	SD_INFO(SD_LOG_IOCTL_MHD, un,
22544 	    "sd_take_ownership: min, max delays: %d, %d\n",
22545 	    min_ownership_delay, max_ownership_delay);
22546 
22547 	start_time = ddi_get_lbolt();
22548 	current_time	= start_time;
22549 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
22550 	end_time	= start_time + drv_usectohz(max_ownership_delay);
22551 
22552 	while (current_time - end_time < 0) {
22553 		delay(drv_usectohz(500000));
22554 
22555 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
22556 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
22557 				mutex_enter(SD_MUTEX(un));
22558 				rval = (un->un_resvd_status &
22559 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
22560 				mutex_exit(SD_MUTEX(un));
22561 				break;
22562 			}
22563 		}
22564 		previous_current_time = current_time;
22565 		current_time = ddi_get_lbolt();
22566 		mutex_enter(SD_MUTEX(un));
22567 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
22568 			ownership_time = ddi_get_lbolt() +
22569 			    drv_usectohz(min_ownership_delay);
22570 			reservation_count = 0;
22571 		} else {
22572 			reservation_count++;
22573 		}
22574 		un->un_resvd_status |= SD_RESERVE;
22575 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
22576 		mutex_exit(SD_MUTEX(un));
22577 
22578 		SD_INFO(SD_LOG_IOCTL_MHD, un,
22579 		    "sd_take_ownership: ticks for loop iteration=%ld, "
22580 		    "reservation=%s\n", (current_time - previous_current_time),
22581 		    reservation_count ? "ok" : "reclaimed");
22582 
22583 		if (current_time - ownership_time >= 0 &&
22584 		    reservation_count >= 4) {
22585 			rval = 0; /* Achieved a stable ownership */
22586 			break;
22587 		}
22588 		if (current_time - end_time >= 0) {
22589 			rval = EACCES; /* No ownership in max possible time */
22590 			break;
22591 		}
22592 	}
22593 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
22594 	    "sd_take_ownership: return(2)=%d\n", rval);
22595 	return (rval);
22596 }
22597 
22598 
22599 /*
22600  *    Function: sd_reserve_release()
22601  *
22602  * Description: This function builds and sends scsi RESERVE, RELEASE, and
22603  *		PRIORITY RESERVE commands based on a user specified command type
22604  *
22605  *   Arguments: dev - the device 'dev_t'
22606  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
22607  *		      SD_RESERVE, SD_RELEASE
22608  *
22609  * Return Code: 0 or Error Code
22610  */
22611 
22612 static int
22613 sd_reserve_release(dev_t dev, int cmd)
22614 {
22615 	struct uscsi_cmd	*com = NULL;
22616 	struct sd_lun		*un = NULL;
22617 	char			cdb[CDB_GROUP0];
22618 	int			rval;
22619 
22620 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
22621 	    (cmd == SD_PRIORITY_RESERVE));
22622 
22623 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22624 		return (ENXIO);
22625 	}
22626 
22627 	/* instantiate and initialize the command and cdb */
22628 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
22629 	bzero(cdb, CDB_GROUP0);
22630 	com->uscsi_flags   = USCSI_SILENT;
22631 	com->uscsi_timeout = un->un_reserve_release_time;
22632 	com->uscsi_cdblen  = CDB_GROUP0;
22633 	com->uscsi_cdb	   = cdb;
22634 	if (cmd == SD_RELEASE) {
22635 		cdb[0] = SCMD_RELEASE;
22636 	} else {
22637 		cdb[0] = SCMD_RESERVE;
22638 	}
22639 
22640 	/* Send the command. */
22641 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
22642 	    SD_PATH_STANDARD);
22643 
22644 	/*
22645 	 * "break" a reservation that is held by another host, by issuing a
22646 	 * reset if priority reserve is desired, and we could not get the
22647 	 * device.
22648 	 */
22649 	if ((cmd == SD_PRIORITY_RESERVE) &&
22650 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
22651 		/*
22652 		 * First try to reset the LUN. If we cannot, then try a target
22653 		 * reset, followed by a bus reset if the target reset fails.
22654 		 */
22655 		int reset_retval = 0;
22656 		if (un->un_f_lun_reset_enabled == TRUE) {
22657 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
22658 		}
22659 		if (reset_retval == 0) {
22660 			/* The LUN reset either failed or was not issued */
22661 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
22662 		}
22663 		if ((reset_retval == 0) &&
22664 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
22665 			rval = EIO;
22666 			kmem_free(com, sizeof (*com));
22667 			return (rval);
22668 		}
22669 
22670 		bzero(com, sizeof (struct uscsi_cmd));
22671 		com->uscsi_flags   = USCSI_SILENT;
22672 		com->uscsi_cdb	   = cdb;
22673 		com->uscsi_cdblen  = CDB_GROUP0;
22674 		com->uscsi_timeout = 5;
22675 
22676 		/*
22677 		 * Reissue the last reserve command, this time without request
22678 		 * sense.  Assume that it is just a regular reserve command.
22679 		 */
22680 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
22681 		    SD_PATH_STANDARD);
22682 	}
22683 
22684 	/* Return an error if still getting a reservation conflict. */
22685 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
22686 		rval = EACCES;
22687 	}
22688 
22689 	kmem_free(com, sizeof (*com));
22690 	return (rval);
22691 }
22692 
22693 
22694 #define	SD_NDUMP_RETRIES	12
22695 /*
22696  *	System Crash Dump routine
22697  */
22698 
22699 static int
22700 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
22701 {
22702 	int		instance;
22703 	int		partition;
22704 	int		i;
22705 	int		err;
22706 	struct sd_lun	*un;
22707 	struct scsi_pkt *wr_pktp;
22708 	struct buf	*wr_bp;
22709 	struct buf	wr_buf;
22710 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
22711 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
22712 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
22713 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
22714 	size_t		io_start_offset;
22715 	int		doing_rmw = FALSE;
22716 	int		rval;
22717 #if defined(__i386) || defined(__amd64)
22718 	ssize_t dma_resid;
22719 	daddr_t oblkno;
22720 #endif
22721 	diskaddr_t	nblks = 0;
22722 	diskaddr_t	start_block;
22723 
22724 	instance = SDUNIT(dev);
22725 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
22726 	    !SD_IS_VALID_LABEL(un) || ISCD(un)) {
22727 		return (ENXIO);
22728 	}
22729 
22730 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
22731 
22732 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
22733 
22734 	partition = SDPART(dev);
22735 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
22736 
22737 	/* Validate blocks to dump at against partition size. */
22738 
22739 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
22740 	    &nblks, &start_block, NULL, NULL, (void *)SD_PATH_DIRECT);
22741 
22742 	if ((blkno + nblk) > nblks) {
22743 		SD_TRACE(SD_LOG_DUMP, un,
22744 		    "sddump: dump range larger than partition: "
22745 		    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
22746 		    blkno, nblk, nblks);
22747 		return (EINVAL);
22748 	}
22749 
22750 	mutex_enter(&un->un_pm_mutex);
22751 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
22752 		struct scsi_pkt *start_pktp;
22753 
22754 		mutex_exit(&un->un_pm_mutex);
22755 
22756 		/*
22757 		 * use pm framework to power on HBA 1st
22758 		 */
22759 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
22760 
22761 		/*
22762 		 * Dump no long uses sdpower to power on a device, it's
22763 		 * in-line here so it can be done in polled mode.
22764 		 */
22765 
22766 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
22767 
22768 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
22769 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
22770 
22771 		if (start_pktp == NULL) {
22772 			/* We were not given a SCSI packet, fail. */
22773 			return (EIO);
22774 		}
22775 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
22776 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
22777 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
22778 		start_pktp->pkt_flags = FLAG_NOINTR;
22779 
22780 		mutex_enter(SD_MUTEX(un));
22781 		SD_FILL_SCSI1_LUN(un, start_pktp);
22782 		mutex_exit(SD_MUTEX(un));
22783 		/*
22784 		 * Scsi_poll returns 0 (success) if the command completes and
22785 		 * the status block is STATUS_GOOD.
22786 		 */
22787 		if (sd_scsi_poll(un, start_pktp) != 0) {
22788 			scsi_destroy_pkt(start_pktp);
22789 			return (EIO);
22790 		}
22791 		scsi_destroy_pkt(start_pktp);
22792 		(void) sd_ddi_pm_resume(un);
22793 	} else {
22794 		mutex_exit(&un->un_pm_mutex);
22795 	}
22796 
22797 	mutex_enter(SD_MUTEX(un));
22798 	un->un_throttle = 0;
22799 
22800 	/*
22801 	 * The first time through, reset the specific target device.
22802 	 * However, when cpr calls sddump we know that sd is in a
22803 	 * a good state so no bus reset is required.
22804 	 * Clear sense data via Request Sense cmd.
22805 	 * In sddump we don't care about allow_bus_device_reset anymore
22806 	 */
22807 
22808 	if ((un->un_state != SD_STATE_SUSPENDED) &&
22809 	    (un->un_state != SD_STATE_DUMPING)) {
22810 
22811 		New_state(un, SD_STATE_DUMPING);
22812 
22813 		if (un->un_f_is_fibre == FALSE) {
22814 			mutex_exit(SD_MUTEX(un));
22815 			/*
22816 			 * Attempt a bus reset for parallel scsi.
22817 			 *
22818 			 * Note: A bus reset is required because on some host
22819 			 * systems (i.e. E420R) a bus device reset is
22820 			 * insufficient to reset the state of the target.
22821 			 *
22822 			 * Note: Don't issue the reset for fibre-channel,
22823 			 * because this tends to hang the bus (loop) for
22824 			 * too long while everyone is logging out and in
22825 			 * and the deadman timer for dumping will fire
22826 			 * before the dump is complete.
22827 			 */
22828 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
22829 				mutex_enter(SD_MUTEX(un));
22830 				Restore_state(un);
22831 				mutex_exit(SD_MUTEX(un));
22832 				return (EIO);
22833 			}
22834 
22835 			/* Delay to give the device some recovery time. */
22836 			drv_usecwait(10000);
22837 
22838 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
22839 				SD_INFO(SD_LOG_DUMP, un,
22840 				    "sddump: sd_send_polled_RQS failed\n");
22841 			}
22842 			mutex_enter(SD_MUTEX(un));
22843 		}
22844 	}
22845 
22846 	/*
22847 	 * Convert the partition-relative block number to a
22848 	 * disk physical block number.
22849 	 */
22850 	blkno += start_block;
22851 
22852 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
22853 
22854 
22855 	/*
22856 	 * Check if the device has a non-512 block size.
22857 	 */
22858 	wr_bp = NULL;
22859 	if (NOT_DEVBSIZE(un)) {
22860 		tgt_byte_offset = blkno * un->un_sys_blocksize;
22861 		tgt_byte_count = nblk * un->un_sys_blocksize;
22862 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
22863 		    (tgt_byte_count % un->un_tgt_blocksize)) {
22864 			doing_rmw = TRUE;
22865 			/*
22866 			 * Calculate the block number and number of block
22867 			 * in terms of the media block size.
22868 			 */
22869 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
22870 			tgt_nblk =
22871 			    ((tgt_byte_offset + tgt_byte_count +
22872 			    (un->un_tgt_blocksize - 1)) /
22873 			    un->un_tgt_blocksize) - tgt_blkno;
22874 
22875 			/*
22876 			 * Invoke the routine which is going to do read part
22877 			 * of read-modify-write.
22878 			 * Note that this routine returns a pointer to
22879 			 * a valid bp in wr_bp.
22880 			 */
22881 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
22882 			    &wr_bp);
22883 			if (err) {
22884 				mutex_exit(SD_MUTEX(un));
22885 				return (err);
22886 			}
22887 			/*
22888 			 * Offset is being calculated as -
22889 			 * (original block # * system block size) -
22890 			 * (new block # * target block size)
22891 			 */
22892 			io_start_offset =
22893 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
22894 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
22895 
22896 			ASSERT((io_start_offset >= 0) &&
22897 			    (io_start_offset < un->un_tgt_blocksize));
22898 			/*
22899 			 * Do the modify portion of read modify write.
22900 			 */
22901 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
22902 			    (size_t)nblk * un->un_sys_blocksize);
22903 		} else {
22904 			doing_rmw = FALSE;
22905 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
22906 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
22907 		}
22908 
22909 		/* Convert blkno and nblk to target blocks */
22910 		blkno = tgt_blkno;
22911 		nblk = tgt_nblk;
22912 	} else {
22913 		wr_bp = &wr_buf;
22914 		bzero(wr_bp, sizeof (struct buf));
22915 		wr_bp->b_flags		= B_BUSY;
22916 		wr_bp->b_un.b_addr	= addr;
22917 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
22918 		wr_bp->b_resid		= 0;
22919 	}
22920 
22921 	mutex_exit(SD_MUTEX(un));
22922 
22923 	/*
22924 	 * Obtain a SCSI packet for the write command.
22925 	 * It should be safe to call the allocator here without
22926 	 * worrying about being locked for DVMA mapping because
22927 	 * the address we're passed is already a DVMA mapping
22928 	 *
22929 	 * We are also not going to worry about semaphore ownership
22930 	 * in the dump buffer. Dumping is single threaded at present.
22931 	 */
22932 
22933 	wr_pktp = NULL;
22934 
22935 #if defined(__i386) || defined(__amd64)
22936 	dma_resid = wr_bp->b_bcount;
22937 	oblkno = blkno;
22938 	while (dma_resid != 0) {
22939 #endif
22940 
22941 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
22942 		wr_bp->b_flags &= ~B_ERROR;
22943 
22944 #if defined(__i386) || defined(__amd64)
22945 		blkno = oblkno +
22946 		    ((wr_bp->b_bcount - dma_resid) /
22947 		    un->un_tgt_blocksize);
22948 		nblk = dma_resid / un->un_tgt_blocksize;
22949 
22950 		if (wr_pktp) {
22951 			/* Partial DMA transfers after initial transfer */
22952 			rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
22953 			    blkno, nblk);
22954 		} else {
22955 			/* Initial transfer */
22956 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
22957 			    un->un_pkt_flags, NULL_FUNC, NULL,
22958 			    blkno, nblk);
22959 		}
22960 #else
22961 		rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
22962 		    0, NULL_FUNC, NULL, blkno, nblk);
22963 #endif
22964 
22965 		if (rval == 0) {
22966 			/* We were given a SCSI packet, continue. */
22967 			break;
22968 		}
22969 
22970 		if (i == 0) {
22971 			if (wr_bp->b_flags & B_ERROR) {
22972 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
22973 				    "no resources for dumping; "
22974 				    "error code: 0x%x, retrying",
22975 				    geterror(wr_bp));
22976 			} else {
22977 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
22978 				    "no resources for dumping; retrying");
22979 			}
22980 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
22981 			if (wr_bp->b_flags & B_ERROR) {
22982 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22983 				    "no resources for dumping; error code: "
22984 				    "0x%x, retrying\n", geterror(wr_bp));
22985 			}
22986 		} else {
22987 			if (wr_bp->b_flags & B_ERROR) {
22988 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22989 				    "no resources for dumping; "
22990 				    "error code: 0x%x, retries failed, "
22991 				    "giving up.\n", geterror(wr_bp));
22992 			} else {
22993 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
22994 				    "no resources for dumping; "
22995 				    "retries failed, giving up.\n");
22996 			}
22997 			mutex_enter(SD_MUTEX(un));
22998 			Restore_state(un);
22999 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
23000 				mutex_exit(SD_MUTEX(un));
23001 				scsi_free_consistent_buf(wr_bp);
23002 			} else {
23003 				mutex_exit(SD_MUTEX(un));
23004 			}
23005 			return (EIO);
23006 		}
23007 		drv_usecwait(10000);
23008 	}
23009 
23010 #if defined(__i386) || defined(__amd64)
23011 	/*
23012 	 * save the resid from PARTIAL_DMA
23013 	 */
23014 	dma_resid = wr_pktp->pkt_resid;
23015 	if (dma_resid != 0)
23016 		nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
23017 	wr_pktp->pkt_resid = 0;
23018 #endif
23019 
23020 	/* SunBug 1222170 */
23021 	wr_pktp->pkt_flags = FLAG_NOINTR;
23022 
23023 	err = EIO;
23024 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
23025 
23026 		/*
23027 		 * Scsi_poll returns 0 (success) if the command completes and
23028 		 * the status block is STATUS_GOOD.  We should only check
23029 		 * errors if this condition is not true.  Even then we should
23030 		 * send our own request sense packet only if we have a check
23031 		 * condition and auto request sense has not been performed by
23032 		 * the hba.
23033 		 */
23034 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
23035 
23036 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
23037 		    (wr_pktp->pkt_resid == 0)) {
23038 			err = SD_SUCCESS;
23039 			break;
23040 		}
23041 
23042 		/*
23043 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
23044 		 */
23045 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
23046 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
23047 			    "Device is gone\n");
23048 			break;
23049 		}
23050 
23051 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
23052 			SD_INFO(SD_LOG_DUMP, un,
23053 			    "sddump: write failed with CHECK, try # %d\n", i);
23054 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
23055 				(void) sd_send_polled_RQS(un);
23056 			}
23057 
23058 			continue;
23059 		}
23060 
23061 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
23062 			int reset_retval = 0;
23063 
23064 			SD_INFO(SD_LOG_DUMP, un,
23065 			    "sddump: write failed with BUSY, try # %d\n", i);
23066 
23067 			if (un->un_f_lun_reset_enabled == TRUE) {
23068 				reset_retval = scsi_reset(SD_ADDRESS(un),
23069 				    RESET_LUN);
23070 			}
23071 			if (reset_retval == 0) {
23072 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
23073 			}
23074 			(void) sd_send_polled_RQS(un);
23075 
23076 		} else {
23077 			SD_INFO(SD_LOG_DUMP, un,
23078 			    "sddump: write failed with 0x%x, try # %d\n",
23079 			    SD_GET_PKT_STATUS(wr_pktp), i);
23080 			mutex_enter(SD_MUTEX(un));
23081 			sd_reset_target(un, wr_pktp);
23082 			mutex_exit(SD_MUTEX(un));
23083 		}
23084 
23085 		/*
23086 		 * If we are not getting anywhere with lun/target resets,
23087 		 * let's reset the bus.
23088 		 */
23089 		if (i == SD_NDUMP_RETRIES/2) {
23090 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
23091 			(void) sd_send_polled_RQS(un);
23092 		}
23093 
23094 	}
23095 #if defined(__i386) || defined(__amd64)
23096 	}	/* dma_resid */
23097 #endif
23098 
23099 	scsi_destroy_pkt(wr_pktp);
23100 	mutex_enter(SD_MUTEX(un));
23101 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
23102 		mutex_exit(SD_MUTEX(un));
23103 		scsi_free_consistent_buf(wr_bp);
23104 	} else {
23105 		mutex_exit(SD_MUTEX(un));
23106 	}
23107 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
23108 	return (err);
23109 }
23110 
23111 /*
23112  *    Function: sd_scsi_poll()
23113  *
23114  * Description: This is a wrapper for the scsi_poll call.
23115  *
23116  *   Arguments: sd_lun - The unit structure
23117  *              scsi_pkt - The scsi packet being sent to the device.
23118  *
23119  * Return Code: 0 - Command completed successfully with good status
23120  *             -1 - Command failed.  This could indicate a check condition
23121  *                  or other status value requiring recovery action.
23122  *
23123  */
23124 
23125 static int
23126 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
23127 {
23128 	int status;
23129 
23130 	ASSERT(un != NULL);
23131 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23132 	ASSERT(pktp != NULL);
23133 
23134 	status = SD_SUCCESS;
23135 
23136 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
23137 		pktp->pkt_flags |= un->un_tagflags;
23138 		pktp->pkt_flags &= ~FLAG_NODISCON;
23139 	}
23140 
23141 	status = sd_ddi_scsi_poll(pktp);
23142 	/*
23143 	 * Scsi_poll returns 0 (success) if the command completes and the
23144 	 * status block is STATUS_GOOD.  We should only check errors if this
23145 	 * condition is not true.  Even then we should send our own request
23146 	 * sense packet only if we have a check condition and auto
23147 	 * request sense has not been performed by the hba.
23148 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
23149 	 */
23150 	if ((status != SD_SUCCESS) &&
23151 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
23152 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
23153 	    (pktp->pkt_reason != CMD_DEV_GONE))
23154 		(void) sd_send_polled_RQS(un);
23155 
23156 	return (status);
23157 }
23158 
23159 /*
23160  *    Function: sd_send_polled_RQS()
23161  *
23162  * Description: This sends the request sense command to a device.
23163  *
23164  *   Arguments: sd_lun - The unit structure
23165  *
23166  * Return Code: 0 - Command completed successfully with good status
23167  *             -1 - Command failed.
23168  *
23169  */
23170 
23171 static int
23172 sd_send_polled_RQS(struct sd_lun *un)
23173 {
23174 	int	ret_val;
23175 	struct	scsi_pkt	*rqs_pktp;
23176 	struct	buf		*rqs_bp;
23177 
23178 	ASSERT(un != NULL);
23179 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23180 
23181 	ret_val = SD_SUCCESS;
23182 
23183 	rqs_pktp = un->un_rqs_pktp;
23184 	rqs_bp	 = un->un_rqs_bp;
23185 
23186 	mutex_enter(SD_MUTEX(un));
23187 
23188 	if (un->un_sense_isbusy) {
23189 		ret_val = SD_FAILURE;
23190 		mutex_exit(SD_MUTEX(un));
23191 		return (ret_val);
23192 	}
23193 
23194 	/*
23195 	 * If the request sense buffer (and packet) is not in use,
23196 	 * let's set the un_sense_isbusy and send our packet
23197 	 */
23198 	un->un_sense_isbusy 	= 1;
23199 	rqs_pktp->pkt_resid  	= 0;
23200 	rqs_pktp->pkt_reason 	= 0;
23201 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
23202 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
23203 
23204 	mutex_exit(SD_MUTEX(un));
23205 
23206 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
23207 	    " 0x%p\n", rqs_bp->b_un.b_addr);
23208 
23209 	/*
23210 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
23211 	 * axle - it has a call into us!
23212 	 */
23213 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
23214 		SD_INFO(SD_LOG_COMMON, un,
23215 		    "sd_send_polled_RQS: RQS failed\n");
23216 	}
23217 
23218 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
23219 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
23220 
23221 	mutex_enter(SD_MUTEX(un));
23222 	un->un_sense_isbusy = 0;
23223 	mutex_exit(SD_MUTEX(un));
23224 
23225 	return (ret_val);
23226 }
23227 
23228 /*
23229  * Defines needed for localized version of the scsi_poll routine.
23230  */
23231 #define	SD_CSEC		10000			/* usecs */
23232 #define	SD_SEC_TO_CSEC	(1000000/SD_CSEC)
23233 
23234 
23235 /*
23236  *    Function: sd_ddi_scsi_poll()
23237  *
23238  * Description: Localized version of the scsi_poll routine.  The purpose is to
23239  *		send a scsi_pkt to a device as a polled command.  This version
23240  *		is to ensure more robust handling of transport errors.
23241  *		Specifically this routine cures not ready, coming ready
23242  *		transition for power up and reset of sonoma's.  This can take
23243  *		up to 45 seconds for power-on and 20 seconds for reset of a
23244  * 		sonoma lun.
23245  *
23246  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
23247  *
23248  * Return Code: 0 - Command completed successfully with good status
23249  *             -1 - Command failed.
23250  *
23251  */
23252 
23253 static int
23254 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
23255 {
23256 	int busy_count;
23257 	int timeout;
23258 	int rval = SD_FAILURE;
23259 	int savef;
23260 	uint8_t *sensep;
23261 	long savet;
23262 	void (*savec)();
23263 	/*
23264 	 * The following is defined in machdep.c and is used in determining if
23265 	 * the scsi transport system will do polled I/O instead of interrupt
23266 	 * I/O when called from xx_dump().
23267 	 */
23268 	extern int do_polled_io;
23269 
23270 	/*
23271 	 * save old flags in pkt, to restore at end
23272 	 */
23273 	savef = pkt->pkt_flags;
23274 	savec = pkt->pkt_comp;
23275 	savet = pkt->pkt_time;
23276 
23277 	pkt->pkt_flags |= FLAG_NOINTR;
23278 
23279 	/*
23280 	 * XXX there is nothing in the SCSA spec that states that we should not
23281 	 * do a callback for polled cmds; however, removing this will break sd
23282 	 * and probably other target drivers
23283 	 */
23284 	pkt->pkt_comp = NULL;
23285 
23286 	/*
23287 	 * we don't like a polled command without timeout.
23288 	 * 60 seconds seems long enough.
23289 	 */
23290 	if (pkt->pkt_time == 0) {
23291 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
23292 	}
23293 
23294 	/*
23295 	 * Send polled cmd.
23296 	 *
23297 	 * We do some error recovery for various errors.  Tran_busy,
23298 	 * queue full, and non-dispatched commands are retried every 10 msec.
23299 	 * as they are typically transient failures.  Busy status and Not
23300 	 * Ready are retried every second as this status takes a while to
23301 	 * change.  Unit attention is retried for pkt_time (60) times
23302 	 * with no delay.
23303 	 */
23304 	timeout = pkt->pkt_time * SD_SEC_TO_CSEC;
23305 
23306 	for (busy_count = 0; busy_count < timeout; busy_count++) {
23307 		int rc;
23308 		int poll_delay;
23309 
23310 		/*
23311 		 * Initialize pkt status variables.
23312 		 */
23313 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
23314 
23315 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
23316 			if (rc != TRAN_BUSY) {
23317 				/* Transport failed - give up. */
23318 				break;
23319 			} else {
23320 				/* Transport busy - try again. */
23321 				poll_delay = 1 * SD_CSEC; /* 10 msec */
23322 			}
23323 		} else {
23324 			/*
23325 			 * Transport accepted - check pkt status.
23326 			 */
23327 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
23328 			if (pkt->pkt_reason == CMD_CMPLT &&
23329 			    rc == STATUS_CHECK &&
23330 			    pkt->pkt_state & STATE_ARQ_DONE) {
23331 				struct scsi_arq_status *arqstat =
23332 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
23333 
23334 				sensep = (uint8_t *)&arqstat->sts_sensedata;
23335 			} else {
23336 				sensep = NULL;
23337 			}
23338 
23339 			if ((pkt->pkt_reason == CMD_CMPLT) &&
23340 			    (rc == STATUS_GOOD)) {
23341 				/* No error - we're done */
23342 				rval = SD_SUCCESS;
23343 				break;
23344 
23345 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
23346 				/* Lost connection - give up */
23347 				break;
23348 
23349 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
23350 			    (pkt->pkt_state == 0)) {
23351 				/* Pkt not dispatched - try again. */
23352 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
23353 
23354 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
23355 			    (rc == STATUS_QFULL)) {
23356 				/* Queue full - try again. */
23357 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
23358 
23359 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
23360 			    (rc == STATUS_BUSY)) {
23361 				/* Busy - try again. */
23362 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
23363 				busy_count += (SD_SEC_TO_CSEC - 1);
23364 
23365 			} else if ((sensep != NULL) &&
23366 			    (scsi_sense_key(sensep) ==
23367 			    KEY_UNIT_ATTENTION)) {
23368 				/* Unit Attention - try again */
23369 				busy_count += (SD_SEC_TO_CSEC - 1); /* 1 */
23370 				continue;
23371 
23372 			} else if ((sensep != NULL) &&
23373 			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
23374 			    (scsi_sense_asc(sensep) == 0x04) &&
23375 			    (scsi_sense_ascq(sensep) == 0x01)) {
23376 				/* Not ready -> ready - try again. */
23377 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
23378 				busy_count += (SD_SEC_TO_CSEC - 1);
23379 
23380 			} else {
23381 				/* BAD status - give up. */
23382 				break;
23383 			}
23384 		}
23385 
23386 		if ((curthread->t_flag & T_INTR_THREAD) == 0 &&
23387 		    !do_polled_io) {
23388 			delay(drv_usectohz(poll_delay));
23389 		} else {
23390 			/* we busy wait during cpr_dump or interrupt threads */
23391 			drv_usecwait(poll_delay);
23392 		}
23393 	}
23394 
23395 	pkt->pkt_flags = savef;
23396 	pkt->pkt_comp = savec;
23397 	pkt->pkt_time = savet;
23398 	return (rval);
23399 }
23400 
23401 
23402 /*
23403  *    Function: sd_persistent_reservation_in_read_keys
23404  *
23405  * Description: This routine is the driver entry point for handling CD-ROM
23406  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
23407  *		by sending the SCSI-3 PRIN commands to the device.
23408  *		Processes the read keys command response by copying the
23409  *		reservation key information into the user provided buffer.
23410  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
23411  *
23412  *   Arguments: un   -  Pointer to soft state struct for the target.
23413  *		usrp -	user provided pointer to multihost Persistent In Read
23414  *			Keys structure (mhioc_inkeys_t)
23415  *		flag -	this argument is a pass through to ddi_copyxxx()
23416  *			directly from the mode argument of ioctl().
23417  *
23418  * Return Code: 0   - Success
23419  *		EACCES
23420  *		ENOTSUP
23421  *		errno return code from sd_send_scsi_cmd()
23422  *
23423  *     Context: Can sleep. Does not return until command is completed.
23424  */
23425 
23426 static int
23427 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
23428     mhioc_inkeys_t *usrp, int flag)
23429 {
23430 #ifdef _MULTI_DATAMODEL
23431 	struct mhioc_key_list32	li32;
23432 #endif
23433 	sd_prin_readkeys_t	*in;
23434 	mhioc_inkeys_t		*ptr;
23435 	mhioc_key_list_t	li;
23436 	uchar_t			*data_bufp;
23437 	int 			data_len;
23438 	int			rval;
23439 	size_t			copysz;
23440 
23441 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
23442 		return (EINVAL);
23443 	}
23444 	bzero(&li, sizeof (mhioc_key_list_t));
23445 
23446 	/*
23447 	 * Get the listsize from user
23448 	 */
23449 #ifdef _MULTI_DATAMODEL
23450 
23451 	switch (ddi_model_convert_from(flag & FMODELS)) {
23452 	case DDI_MODEL_ILP32:
23453 		copysz = sizeof (struct mhioc_key_list32);
23454 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
23455 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23456 			    "sd_persistent_reservation_in_read_keys: "
23457 			    "failed ddi_copyin: mhioc_key_list32_t\n");
23458 			rval = EFAULT;
23459 			goto done;
23460 		}
23461 		li.listsize = li32.listsize;
23462 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
23463 		break;
23464 
23465 	case DDI_MODEL_NONE:
23466 		copysz = sizeof (mhioc_key_list_t);
23467 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
23468 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23469 			    "sd_persistent_reservation_in_read_keys: "
23470 			    "failed ddi_copyin: mhioc_key_list_t\n");
23471 			rval = EFAULT;
23472 			goto done;
23473 		}
23474 		break;
23475 	}
23476 
23477 #else /* ! _MULTI_DATAMODEL */
23478 	copysz = sizeof (mhioc_key_list_t);
23479 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
23480 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23481 		    "sd_persistent_reservation_in_read_keys: "
23482 		    "failed ddi_copyin: mhioc_key_list_t\n");
23483 		rval = EFAULT;
23484 		goto done;
23485 	}
23486 #endif
23487 
23488 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
23489 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
23490 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
23491 
23492 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS,
23493 	    data_len, data_bufp)) != 0) {
23494 		goto done;
23495 	}
23496 	in = (sd_prin_readkeys_t *)data_bufp;
23497 	ptr->generation = BE_32(in->generation);
23498 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
23499 
23500 	/*
23501 	 * Return the min(listsize, listlen) keys
23502 	 */
23503 #ifdef _MULTI_DATAMODEL
23504 
23505 	switch (ddi_model_convert_from(flag & FMODELS)) {
23506 	case DDI_MODEL_ILP32:
23507 		li32.listlen = li.listlen;
23508 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
23509 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23510 			    "sd_persistent_reservation_in_read_keys: "
23511 			    "failed ddi_copyout: mhioc_key_list32_t\n");
23512 			rval = EFAULT;
23513 			goto done;
23514 		}
23515 		break;
23516 
23517 	case DDI_MODEL_NONE:
23518 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
23519 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23520 			    "sd_persistent_reservation_in_read_keys: "
23521 			    "failed ddi_copyout: mhioc_key_list_t\n");
23522 			rval = EFAULT;
23523 			goto done;
23524 		}
23525 		break;
23526 	}
23527 
23528 #else /* ! _MULTI_DATAMODEL */
23529 
23530 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
23531 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23532 		    "sd_persistent_reservation_in_read_keys: "
23533 		    "failed ddi_copyout: mhioc_key_list_t\n");
23534 		rval = EFAULT;
23535 		goto done;
23536 	}
23537 
23538 #endif /* _MULTI_DATAMODEL */
23539 
23540 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
23541 	    li.listsize * MHIOC_RESV_KEY_SIZE);
23542 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
23543 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23544 		    "sd_persistent_reservation_in_read_keys: "
23545 		    "failed ddi_copyout: keylist\n");
23546 		rval = EFAULT;
23547 	}
23548 done:
23549 	kmem_free(data_bufp, data_len);
23550 	return (rval);
23551 }
23552 
23553 
23554 /*
23555  *    Function: sd_persistent_reservation_in_read_resv
23556  *
23557  * Description: This routine is the driver entry point for handling CD-ROM
23558  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
23559  *		by sending the SCSI-3 PRIN commands to the device.
23560  *		Process the read persistent reservations command response by
23561  *		copying the reservation information into the user provided
23562  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
23563  *
23564  *   Arguments: un   -  Pointer to soft state struct for the target.
23565  *		usrp -	user provided pointer to multihost Persistent In Read
23566  *			Keys structure (mhioc_inkeys_t)
23567  *		flag -	this argument is a pass through to ddi_copyxxx()
23568  *			directly from the mode argument of ioctl().
23569  *
23570  * Return Code: 0   - Success
23571  *		EACCES
23572  *		ENOTSUP
23573  *		errno return code from sd_send_scsi_cmd()
23574  *
23575  *     Context: Can sleep. Does not return until command is completed.
23576  */
23577 
23578 static int
23579 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
23580     mhioc_inresvs_t *usrp, int flag)
23581 {
23582 #ifdef _MULTI_DATAMODEL
23583 	struct mhioc_resv_desc_list32 resvlist32;
23584 #endif
23585 	sd_prin_readresv_t	*in;
23586 	mhioc_inresvs_t		*ptr;
23587 	sd_readresv_desc_t	*readresv_ptr;
23588 	mhioc_resv_desc_list_t	resvlist;
23589 	mhioc_resv_desc_t 	resvdesc;
23590 	uchar_t			*data_bufp;
23591 	int 			data_len;
23592 	int			rval;
23593 	int			i;
23594 	size_t			copysz;
23595 	mhioc_resv_desc_t	*bufp;
23596 
23597 	if ((ptr = usrp) == NULL) {
23598 		return (EINVAL);
23599 	}
23600 
23601 	/*
23602 	 * Get the listsize from user
23603 	 */
23604 #ifdef _MULTI_DATAMODEL
23605 	switch (ddi_model_convert_from(flag & FMODELS)) {
23606 	case DDI_MODEL_ILP32:
23607 		copysz = sizeof (struct mhioc_resv_desc_list32);
23608 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
23609 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23610 			    "sd_persistent_reservation_in_read_resv: "
23611 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
23612 			rval = EFAULT;
23613 			goto done;
23614 		}
23615 		resvlist.listsize = resvlist32.listsize;
23616 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
23617 		break;
23618 
23619 	case DDI_MODEL_NONE:
23620 		copysz = sizeof (mhioc_resv_desc_list_t);
23621 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
23622 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23623 			    "sd_persistent_reservation_in_read_resv: "
23624 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
23625 			rval = EFAULT;
23626 			goto done;
23627 		}
23628 		break;
23629 	}
23630 #else /* ! _MULTI_DATAMODEL */
23631 	copysz = sizeof (mhioc_resv_desc_list_t);
23632 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
23633 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23634 		    "sd_persistent_reservation_in_read_resv: "
23635 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
23636 		rval = EFAULT;
23637 		goto done;
23638 	}
23639 #endif /* ! _MULTI_DATAMODEL */
23640 
23641 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
23642 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
23643 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
23644 
23645 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_RESV,
23646 	    data_len, data_bufp)) != 0) {
23647 		goto done;
23648 	}
23649 	in = (sd_prin_readresv_t *)data_bufp;
23650 	ptr->generation = BE_32(in->generation);
23651 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
23652 
23653 	/*
23654 	 * Return the min(listsize, listlen( keys
23655 	 */
23656 #ifdef _MULTI_DATAMODEL
23657 
23658 	switch (ddi_model_convert_from(flag & FMODELS)) {
23659 	case DDI_MODEL_ILP32:
23660 		resvlist32.listlen = resvlist.listlen;
23661 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
23662 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23663 			    "sd_persistent_reservation_in_read_resv: "
23664 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
23665 			rval = EFAULT;
23666 			goto done;
23667 		}
23668 		break;
23669 
23670 	case DDI_MODEL_NONE:
23671 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
23672 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23673 			    "sd_persistent_reservation_in_read_resv: "
23674 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
23675 			rval = EFAULT;
23676 			goto done;
23677 		}
23678 		break;
23679 	}
23680 
23681 #else /* ! _MULTI_DATAMODEL */
23682 
23683 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
23684 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
23685 		    "sd_persistent_reservation_in_read_resv: "
23686 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
23687 		rval = EFAULT;
23688 		goto done;
23689 	}
23690 
23691 #endif /* ! _MULTI_DATAMODEL */
23692 
23693 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
23694 	bufp = resvlist.list;
23695 	copysz = sizeof (mhioc_resv_desc_t);
23696 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
23697 	    i++, readresv_ptr++, bufp++) {
23698 
23699 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
23700 		    MHIOC_RESV_KEY_SIZE);
23701 		resvdesc.type  = readresv_ptr->type;
23702 		resvdesc.scope = readresv_ptr->scope;
23703 		resvdesc.scope_specific_addr =
23704 		    BE_32(readresv_ptr->scope_specific_addr);
23705 
23706 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
23707 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
23708 			    "sd_persistent_reservation_in_read_resv: "
23709 			    "failed ddi_copyout: resvlist\n");
23710 			rval = EFAULT;
23711 			goto done;
23712 		}
23713 	}
23714 done:
23715 	kmem_free(data_bufp, data_len);
23716 	return (rval);
23717 }
23718 
23719 
23720 /*
23721  *    Function: sr_change_blkmode()
23722  *
23723  * Description: This routine is the driver entry point for handling CD-ROM
23724  *		block mode ioctl requests. Support for returning and changing
23725  *		the current block size in use by the device is implemented. The
23726  *		LBA size is changed via a MODE SELECT Block Descriptor.
23727  *
23728  *		This routine issues a mode sense with an allocation length of
23729  *		12 bytes for the mode page header and a single block descriptor.
23730  *
23731  *   Arguments: dev - the device 'dev_t'
23732  *		cmd - the request type; one of CDROMGBLKMODE (get) or
23733  *		      CDROMSBLKMODE (set)
23734  *		data - current block size or requested block size
23735  *		flag - this argument is a pass through to ddi_copyxxx() directly
23736  *		       from the mode argument of ioctl().
23737  *
23738  * Return Code: the code returned by sd_send_scsi_cmd()
23739  *		EINVAL if invalid arguments are provided
23740  *		EFAULT if ddi_copyxxx() fails
23741  *		ENXIO if fail ddi_get_soft_state
23742  *		EIO if invalid mode sense block descriptor length
23743  *
23744  */
23745 
23746 static int
23747 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
23748 {
23749 	struct sd_lun			*un = NULL;
23750 	struct mode_header		*sense_mhp, *select_mhp;
23751 	struct block_descriptor		*sense_desc, *select_desc;
23752 	int				current_bsize;
23753 	int				rval = EINVAL;
23754 	uchar_t				*sense = NULL;
23755 	uchar_t				*select = NULL;
23756 
23757 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
23758 
23759 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23760 		return (ENXIO);
23761 	}
23762 
23763 	/*
23764 	 * The block length is changed via the Mode Select block descriptor, the
23765 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
23766 	 * required as part of this routine. Therefore the mode sense allocation
23767 	 * length is specified to be the length of a mode page header and a
23768 	 * block descriptor.
23769 	 */
23770 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
23771 
23772 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
23773 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD)) != 0) {
23774 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23775 		    "sr_change_blkmode: Mode Sense Failed\n");
23776 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23777 		return (rval);
23778 	}
23779 
23780 	/* Check the block descriptor len to handle only 1 block descriptor */
23781 	sense_mhp = (struct mode_header *)sense;
23782 	if ((sense_mhp->bdesc_length == 0) ||
23783 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
23784 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23785 		    "sr_change_blkmode: Mode Sense returned invalid block"
23786 		    " descriptor length\n");
23787 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23788 		return (EIO);
23789 	}
23790 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
23791 	current_bsize = ((sense_desc->blksize_hi << 16) |
23792 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
23793 
23794 	/* Process command */
23795 	switch (cmd) {
23796 	case CDROMGBLKMODE:
23797 		/* Return the block size obtained during the mode sense */
23798 		if (ddi_copyout(&current_bsize, (void *)data,
23799 		    sizeof (int), flag) != 0)
23800 			rval = EFAULT;
23801 		break;
23802 	case CDROMSBLKMODE:
23803 		/* Validate the requested block size */
23804 		switch (data) {
23805 		case CDROM_BLK_512:
23806 		case CDROM_BLK_1024:
23807 		case CDROM_BLK_2048:
23808 		case CDROM_BLK_2056:
23809 		case CDROM_BLK_2336:
23810 		case CDROM_BLK_2340:
23811 		case CDROM_BLK_2352:
23812 		case CDROM_BLK_2368:
23813 		case CDROM_BLK_2448:
23814 		case CDROM_BLK_2646:
23815 		case CDROM_BLK_2647:
23816 			break;
23817 		default:
23818 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23819 			    "sr_change_blkmode: "
23820 			    "Block Size '%ld' Not Supported\n", data);
23821 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23822 			return (EINVAL);
23823 		}
23824 
23825 		/*
23826 		 * The current block size matches the requested block size so
23827 		 * there is no need to send the mode select to change the size
23828 		 */
23829 		if (current_bsize == data) {
23830 			break;
23831 		}
23832 
23833 		/* Build the select data for the requested block size */
23834 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
23835 		select_mhp = (struct mode_header *)select;
23836 		select_desc =
23837 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
23838 		/*
23839 		 * The LBA size is changed via the block descriptor, so the
23840 		 * descriptor is built according to the user data
23841 		 */
23842 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
23843 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
23844 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
23845 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
23846 
23847 		/* Send the mode select for the requested block size */
23848 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
23849 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
23850 		    SD_PATH_STANDARD)) != 0) {
23851 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23852 			    "sr_change_blkmode: Mode Select Failed\n");
23853 			/*
23854 			 * The mode select failed for the requested block size,
23855 			 * so reset the data for the original block size and
23856 			 * send it to the target. The error is indicated by the
23857 			 * return value for the failed mode select.
23858 			 */
23859 			select_desc->blksize_hi  = sense_desc->blksize_hi;
23860 			select_desc->blksize_mid = sense_desc->blksize_mid;
23861 			select_desc->blksize_lo  = sense_desc->blksize_lo;
23862 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
23863 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
23864 			    SD_PATH_STANDARD);
23865 		} else {
23866 			ASSERT(!mutex_owned(SD_MUTEX(un)));
23867 			mutex_enter(SD_MUTEX(un));
23868 			sd_update_block_info(un, (uint32_t)data, 0);
23869 			mutex_exit(SD_MUTEX(un));
23870 		}
23871 		break;
23872 	default:
23873 		/* should not reach here, but check anyway */
23874 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23875 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
23876 		rval = EINVAL;
23877 		break;
23878 	}
23879 
23880 	if (select) {
23881 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
23882 	}
23883 	if (sense) {
23884 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
23885 	}
23886 	return (rval);
23887 }
23888 
23889 
23890 /*
23891  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
23892  * implement driver support for getting and setting the CD speed. The command
23893  * set used will be based on the device type. If the device has not been
23894  * identified as MMC the Toshiba vendor specific mode page will be used. If
23895  * the device is MMC but does not support the Real Time Streaming feature
23896  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
23897  * be used to read the speed.
23898  */
23899 
23900 /*
23901  *    Function: sr_change_speed()
23902  *
23903  * Description: This routine is the driver entry point for handling CD-ROM
23904  *		drive speed ioctl requests for devices supporting the Toshiba
23905  *		vendor specific drive speed mode page. Support for returning
23906  *		and changing the current drive speed in use by the device is
23907  *		implemented.
23908  *
23909  *   Arguments: dev - the device 'dev_t'
23910  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
23911  *		      CDROMSDRVSPEED (set)
23912  *		data - current drive speed or requested drive speed
23913  *		flag - this argument is a pass through to ddi_copyxxx() directly
23914  *		       from the mode argument of ioctl().
23915  *
23916  * Return Code: the code returned by sd_send_scsi_cmd()
23917  *		EINVAL if invalid arguments are provided
23918  *		EFAULT if ddi_copyxxx() fails
23919  *		ENXIO if fail ddi_get_soft_state
23920  *		EIO if invalid mode sense block descriptor length
23921  */
23922 
23923 static int
23924 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
23925 {
23926 	struct sd_lun			*un = NULL;
23927 	struct mode_header		*sense_mhp, *select_mhp;
23928 	struct mode_speed		*sense_page, *select_page;
23929 	int				current_speed;
23930 	int				rval = EINVAL;
23931 	int				bd_len;
23932 	uchar_t				*sense = NULL;
23933 	uchar_t				*select = NULL;
23934 
23935 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
23936 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23937 		return (ENXIO);
23938 	}
23939 
23940 	/*
23941 	 * Note: The drive speed is being modified here according to a Toshiba
23942 	 * vendor specific mode page (0x31).
23943 	 */
23944 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
23945 
23946 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
23947 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
23948 	    SD_PATH_STANDARD)) != 0) {
23949 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23950 		    "sr_change_speed: Mode Sense Failed\n");
23951 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23952 		return (rval);
23953 	}
23954 	sense_mhp  = (struct mode_header *)sense;
23955 
23956 	/* Check the block descriptor len to handle only 1 block descriptor */
23957 	bd_len = sense_mhp->bdesc_length;
23958 	if (bd_len > MODE_BLK_DESC_LENGTH) {
23959 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23960 		    "sr_change_speed: Mode Sense returned invalid block "
23961 		    "descriptor length\n");
23962 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23963 		return (EIO);
23964 	}
23965 
23966 	sense_page = (struct mode_speed *)
23967 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
23968 	current_speed = sense_page->speed;
23969 
23970 	/* Process command */
23971 	switch (cmd) {
23972 	case CDROMGDRVSPEED:
23973 		/* Return the drive speed obtained during the mode sense */
23974 		if (current_speed == 0x2) {
23975 			current_speed = CDROM_TWELVE_SPEED;
23976 		}
23977 		if (ddi_copyout(&current_speed, (void *)data,
23978 		    sizeof (int), flag) != 0) {
23979 			rval = EFAULT;
23980 		}
23981 		break;
23982 	case CDROMSDRVSPEED:
23983 		/* Validate the requested drive speed */
23984 		switch ((uchar_t)data) {
23985 		case CDROM_TWELVE_SPEED:
23986 			data = 0x2;
23987 			/*FALLTHROUGH*/
23988 		case CDROM_NORMAL_SPEED:
23989 		case CDROM_DOUBLE_SPEED:
23990 		case CDROM_QUAD_SPEED:
23991 		case CDROM_MAXIMUM_SPEED:
23992 			break;
23993 		default:
23994 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
23995 			    "sr_change_speed: "
23996 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
23997 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
23998 			return (EINVAL);
23999 		}
24000 
24001 		/*
24002 		 * The current drive speed matches the requested drive speed so
24003 		 * there is no need to send the mode select to change the speed
24004 		 */
24005 		if (current_speed == data) {
24006 			break;
24007 		}
24008 
24009 		/* Build the select data for the requested drive speed */
24010 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
24011 		select_mhp = (struct mode_header *)select;
24012 		select_mhp->bdesc_length = 0;
24013 		select_page =
24014 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
24015 		select_page =
24016 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
24017 		select_page->mode_page.code = CDROM_MODE_SPEED;
24018 		select_page->mode_page.length = 2;
24019 		select_page->speed = (uchar_t)data;
24020 
24021 		/* Send the mode select for the requested block size */
24022 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
24023 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
24024 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
24025 			/*
24026 			 * The mode select failed for the requested drive speed,
24027 			 * so reset the data for the original drive speed and
24028 			 * send it to the target. The error is indicated by the
24029 			 * return value for the failed mode select.
24030 			 */
24031 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24032 			    "sr_drive_speed: Mode Select Failed\n");
24033 			select_page->speed = sense_page->speed;
24034 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
24035 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
24036 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
24037 		}
24038 		break;
24039 	default:
24040 		/* should not reach here, but check anyway */
24041 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24042 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
24043 		rval = EINVAL;
24044 		break;
24045 	}
24046 
24047 	if (select) {
24048 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
24049 	}
24050 	if (sense) {
24051 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
24052 	}
24053 
24054 	return (rval);
24055 }
24056 
24057 
24058 /*
24059  *    Function: sr_atapi_change_speed()
24060  *
24061  * Description: This routine is the driver entry point for handling CD-ROM
24062  *		drive speed ioctl requests for MMC devices that do not support
24063  *		the Real Time Streaming feature (0x107).
24064  *
24065  *		Note: This routine will use the SET SPEED command which may not
24066  *		be supported by all devices.
24067  *
24068  *   Arguments: dev- the device 'dev_t'
24069  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
24070  *		     CDROMSDRVSPEED (set)
24071  *		data- current drive speed or requested drive speed
24072  *		flag- this argument is a pass through to ddi_copyxxx() directly
24073  *		      from the mode argument of ioctl().
24074  *
24075  * Return Code: the code returned by sd_send_scsi_cmd()
24076  *		EINVAL if invalid arguments are provided
24077  *		EFAULT if ddi_copyxxx() fails
24078  *		ENXIO if fail ddi_get_soft_state
24079  *		EIO if invalid mode sense block descriptor length
24080  */
24081 
24082 static int
24083 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
24084 {
24085 	struct sd_lun			*un;
24086 	struct uscsi_cmd		*com = NULL;
24087 	struct mode_header_grp2		*sense_mhp;
24088 	uchar_t				*sense_page;
24089 	uchar_t				*sense = NULL;
24090 	char				cdb[CDB_GROUP5];
24091 	int				bd_len;
24092 	int				current_speed = 0;
24093 	int				max_speed = 0;
24094 	int				rval;
24095 
24096 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
24097 
24098 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24099 		return (ENXIO);
24100 	}
24101 
24102 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
24103 
24104 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
24105 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
24106 	    SD_PATH_STANDARD)) != 0) {
24107 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24108 		    "sr_atapi_change_speed: Mode Sense Failed\n");
24109 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
24110 		return (rval);
24111 	}
24112 
24113 	/* Check the block descriptor len to handle only 1 block descriptor */
24114 	sense_mhp = (struct mode_header_grp2 *)sense;
24115 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
24116 	if (bd_len > MODE_BLK_DESC_LENGTH) {
24117 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24118 		    "sr_atapi_change_speed: Mode Sense returned invalid "
24119 		    "block descriptor length\n");
24120 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
24121 		return (EIO);
24122 	}
24123 
24124 	/* Calculate the current and maximum drive speeds */
24125 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
24126 	current_speed = (sense_page[14] << 8) | sense_page[15];
24127 	max_speed = (sense_page[8] << 8) | sense_page[9];
24128 
24129 	/* Process the command */
24130 	switch (cmd) {
24131 	case CDROMGDRVSPEED:
24132 		current_speed /= SD_SPEED_1X;
24133 		if (ddi_copyout(&current_speed, (void *)data,
24134 		    sizeof (int), flag) != 0)
24135 			rval = EFAULT;
24136 		break;
24137 	case CDROMSDRVSPEED:
24138 		/* Convert the speed code to KB/sec */
24139 		switch ((uchar_t)data) {
24140 		case CDROM_NORMAL_SPEED:
24141 			current_speed = SD_SPEED_1X;
24142 			break;
24143 		case CDROM_DOUBLE_SPEED:
24144 			current_speed = 2 * SD_SPEED_1X;
24145 			break;
24146 		case CDROM_QUAD_SPEED:
24147 			current_speed = 4 * SD_SPEED_1X;
24148 			break;
24149 		case CDROM_TWELVE_SPEED:
24150 			current_speed = 12 * SD_SPEED_1X;
24151 			break;
24152 		case CDROM_MAXIMUM_SPEED:
24153 			current_speed = 0xffff;
24154 			break;
24155 		default:
24156 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24157 			    "sr_atapi_change_speed: invalid drive speed %d\n",
24158 			    (uchar_t)data);
24159 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
24160 			return (EINVAL);
24161 		}
24162 
24163 		/* Check the request against the drive's max speed. */
24164 		if (current_speed != 0xffff) {
24165 			if (current_speed > max_speed) {
24166 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
24167 				return (EINVAL);
24168 			}
24169 		}
24170 
24171 		/*
24172 		 * Build and send the SET SPEED command
24173 		 *
24174 		 * Note: The SET SPEED (0xBB) command used in this routine is
24175 		 * obsolete per the SCSI MMC spec but still supported in the
24176 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
24177 		 * therefore the command is still implemented in this routine.
24178 		 */
24179 		bzero(cdb, sizeof (cdb));
24180 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
24181 		cdb[2] = (uchar_t)(current_speed >> 8);
24182 		cdb[3] = (uchar_t)current_speed;
24183 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24184 		com->uscsi_cdb	   = (caddr_t)cdb;
24185 		com->uscsi_cdblen  = CDB_GROUP5;
24186 		com->uscsi_bufaddr = NULL;
24187 		com->uscsi_buflen  = 0;
24188 		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
24189 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, 0, SD_PATH_STANDARD);
24190 		break;
24191 	default:
24192 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24193 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
24194 		rval = EINVAL;
24195 	}
24196 
24197 	if (sense) {
24198 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
24199 	}
24200 	if (com) {
24201 		kmem_free(com, sizeof (*com));
24202 	}
24203 	return (rval);
24204 }
24205 
24206 
24207 /*
24208  *    Function: sr_pause_resume()
24209  *
24210  * Description: This routine is the driver entry point for handling CD-ROM
24211  *		pause/resume ioctl requests. This only affects the audio play
24212  *		operation.
24213  *
24214  *   Arguments: dev - the device 'dev_t'
24215  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
24216  *		      for setting the resume bit of the cdb.
24217  *
24218  * Return Code: the code returned by sd_send_scsi_cmd()
24219  *		EINVAL if invalid mode specified
24220  *
24221  */
24222 
24223 static int
24224 sr_pause_resume(dev_t dev, int cmd)
24225 {
24226 	struct sd_lun		*un;
24227 	struct uscsi_cmd	*com;
24228 	char			cdb[CDB_GROUP1];
24229 	int			rval;
24230 
24231 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24232 		return (ENXIO);
24233 	}
24234 
24235 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24236 	bzero(cdb, CDB_GROUP1);
24237 	cdb[0] = SCMD_PAUSE_RESUME;
24238 	switch (cmd) {
24239 	case CDROMRESUME:
24240 		cdb[8] = 1;
24241 		break;
24242 	case CDROMPAUSE:
24243 		cdb[8] = 0;
24244 		break;
24245 	default:
24246 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
24247 		    " Command '%x' Not Supported\n", cmd);
24248 		rval = EINVAL;
24249 		goto done;
24250 	}
24251 
24252 	com->uscsi_cdb    = cdb;
24253 	com->uscsi_cdblen = CDB_GROUP1;
24254 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
24255 
24256 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24257 	    SD_PATH_STANDARD);
24258 
24259 done:
24260 	kmem_free(com, sizeof (*com));
24261 	return (rval);
24262 }
24263 
24264 
24265 /*
24266  *    Function: sr_play_msf()
24267  *
24268  * Description: This routine is the driver entry point for handling CD-ROM
24269  *		ioctl requests to output the audio signals at the specified
24270  *		starting address and continue the audio play until the specified
24271  *		ending address (CDROMPLAYMSF) The address is in Minute Second
24272  *		Frame (MSF) format.
24273  *
24274  *   Arguments: dev	- the device 'dev_t'
24275  *		data	- pointer to user provided audio msf structure,
24276  *		          specifying start/end addresses.
24277  *		flag	- this argument is a pass through to ddi_copyxxx()
24278  *		          directly from the mode argument of ioctl().
24279  *
24280  * Return Code: the code returned by sd_send_scsi_cmd()
24281  *		EFAULT if ddi_copyxxx() fails
24282  *		ENXIO if fail ddi_get_soft_state
24283  *		EINVAL if data pointer is NULL
24284  */
24285 
24286 static int
24287 sr_play_msf(dev_t dev, caddr_t data, int flag)
24288 {
24289 	struct sd_lun		*un;
24290 	struct uscsi_cmd	*com;
24291 	struct cdrom_msf	msf_struct;
24292 	struct cdrom_msf	*msf = &msf_struct;
24293 	char			cdb[CDB_GROUP1];
24294 	int			rval;
24295 
24296 	if (data == NULL) {
24297 		return (EINVAL);
24298 	}
24299 
24300 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24301 		return (ENXIO);
24302 	}
24303 
24304 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
24305 		return (EFAULT);
24306 	}
24307 
24308 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24309 	bzero(cdb, CDB_GROUP1);
24310 	cdb[0] = SCMD_PLAYAUDIO_MSF;
24311 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
24312 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
24313 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
24314 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
24315 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
24316 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
24317 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
24318 	} else {
24319 		cdb[3] = msf->cdmsf_min0;
24320 		cdb[4] = msf->cdmsf_sec0;
24321 		cdb[5] = msf->cdmsf_frame0;
24322 		cdb[6] = msf->cdmsf_min1;
24323 		cdb[7] = msf->cdmsf_sec1;
24324 		cdb[8] = msf->cdmsf_frame1;
24325 	}
24326 	com->uscsi_cdb    = cdb;
24327 	com->uscsi_cdblen = CDB_GROUP1;
24328 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
24329 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24330 	    SD_PATH_STANDARD);
24331 	kmem_free(com, sizeof (*com));
24332 	return (rval);
24333 }
24334 
24335 
24336 /*
24337  *    Function: sr_play_trkind()
24338  *
24339  * Description: This routine is the driver entry point for handling CD-ROM
24340  *		ioctl requests to output the audio signals at the specified
24341  *		starting address and continue the audio play until the specified
24342  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
24343  *		format.
24344  *
24345  *   Arguments: dev	- the device 'dev_t'
24346  *		data	- pointer to user provided audio track/index structure,
24347  *		          specifying start/end addresses.
24348  *		flag	- this argument is a pass through to ddi_copyxxx()
24349  *		          directly from the mode argument of ioctl().
24350  *
24351  * Return Code: the code returned by sd_send_scsi_cmd()
24352  *		EFAULT if ddi_copyxxx() fails
24353  *		ENXIO if fail ddi_get_soft_state
24354  *		EINVAL if data pointer is NULL
24355  */
24356 
24357 static int
24358 sr_play_trkind(dev_t dev, caddr_t data, int flag)
24359 {
24360 	struct cdrom_ti		ti_struct;
24361 	struct cdrom_ti		*ti = &ti_struct;
24362 	struct uscsi_cmd	*com = NULL;
24363 	char			cdb[CDB_GROUP1];
24364 	int			rval;
24365 
24366 	if (data == NULL) {
24367 		return (EINVAL);
24368 	}
24369 
24370 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
24371 		return (EFAULT);
24372 	}
24373 
24374 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24375 	bzero(cdb, CDB_GROUP1);
24376 	cdb[0] = SCMD_PLAYAUDIO_TI;
24377 	cdb[4] = ti->cdti_trk0;
24378 	cdb[5] = ti->cdti_ind0;
24379 	cdb[7] = ti->cdti_trk1;
24380 	cdb[8] = ti->cdti_ind1;
24381 	com->uscsi_cdb    = cdb;
24382 	com->uscsi_cdblen = CDB_GROUP1;
24383 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
24384 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24385 	    SD_PATH_STANDARD);
24386 	kmem_free(com, sizeof (*com));
24387 	return (rval);
24388 }
24389 
24390 
24391 /*
24392  *    Function: sr_read_all_subcodes()
24393  *
24394  * Description: This routine is the driver entry point for handling CD-ROM
24395  *		ioctl requests to return raw subcode data while the target is
24396  *		playing audio (CDROMSUBCODE).
24397  *
24398  *   Arguments: dev	- the device 'dev_t'
24399  *		data	- pointer to user provided cdrom subcode structure,
24400  *		          specifying the transfer length and address.
24401  *		flag	- this argument is a pass through to ddi_copyxxx()
24402  *		          directly from the mode argument of ioctl().
24403  *
24404  * Return Code: the code returned by sd_send_scsi_cmd()
24405  *		EFAULT if ddi_copyxxx() fails
24406  *		ENXIO if fail ddi_get_soft_state
24407  *		EINVAL if data pointer is NULL
24408  */
24409 
24410 static int
24411 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
24412 {
24413 	struct sd_lun		*un = NULL;
24414 	struct uscsi_cmd	*com = NULL;
24415 	struct cdrom_subcode	*subcode = NULL;
24416 	int			rval;
24417 	size_t			buflen;
24418 	char			cdb[CDB_GROUP5];
24419 
24420 #ifdef _MULTI_DATAMODEL
24421 	/* To support ILP32 applications in an LP64 world */
24422 	struct cdrom_subcode32		cdrom_subcode32;
24423 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
24424 #endif
24425 	if (data == NULL) {
24426 		return (EINVAL);
24427 	}
24428 
24429 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24430 		return (ENXIO);
24431 	}
24432 
24433 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
24434 
24435 #ifdef _MULTI_DATAMODEL
24436 	switch (ddi_model_convert_from(flag & FMODELS)) {
24437 	case DDI_MODEL_ILP32:
24438 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
24439 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24440 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
24441 			kmem_free(subcode, sizeof (struct cdrom_subcode));
24442 			return (EFAULT);
24443 		}
24444 		/* Convert the ILP32 uscsi data from the application to LP64 */
24445 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
24446 		break;
24447 	case DDI_MODEL_NONE:
24448 		if (ddi_copyin(data, subcode,
24449 		    sizeof (struct cdrom_subcode), flag)) {
24450 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24451 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
24452 			kmem_free(subcode, sizeof (struct cdrom_subcode));
24453 			return (EFAULT);
24454 		}
24455 		break;
24456 	}
24457 #else /* ! _MULTI_DATAMODEL */
24458 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
24459 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24460 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
24461 		kmem_free(subcode, sizeof (struct cdrom_subcode));
24462 		return (EFAULT);
24463 	}
24464 #endif /* _MULTI_DATAMODEL */
24465 
24466 	/*
24467 	 * Since MMC-2 expects max 3 bytes for length, check if the
24468 	 * length input is greater than 3 bytes
24469 	 */
24470 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
24471 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24472 		    "sr_read_all_subcodes: "
24473 		    "cdrom transfer length too large: %d (limit %d)\n",
24474 		    subcode->cdsc_length, 0xFFFFFF);
24475 		kmem_free(subcode, sizeof (struct cdrom_subcode));
24476 		return (EINVAL);
24477 	}
24478 
24479 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
24480 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24481 	bzero(cdb, CDB_GROUP5);
24482 
24483 	if (un->un_f_mmc_cap == TRUE) {
24484 		cdb[0] = (char)SCMD_READ_CD;
24485 		cdb[2] = (char)0xff;
24486 		cdb[3] = (char)0xff;
24487 		cdb[4] = (char)0xff;
24488 		cdb[5] = (char)0xff;
24489 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
24490 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
24491 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
24492 		cdb[10] = 1;
24493 	} else {
24494 		/*
24495 		 * Note: A vendor specific command (0xDF) is being used her to
24496 		 * request a read of all subcodes.
24497 		 */
24498 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
24499 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
24500 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
24501 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
24502 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
24503 	}
24504 	com->uscsi_cdb	   = cdb;
24505 	com->uscsi_cdblen  = CDB_GROUP5;
24506 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
24507 	com->uscsi_buflen  = buflen;
24508 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24509 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
24510 	    SD_PATH_STANDARD);
24511 	kmem_free(subcode, sizeof (struct cdrom_subcode));
24512 	kmem_free(com, sizeof (*com));
24513 	return (rval);
24514 }
24515 
24516 
24517 /*
24518  *    Function: sr_read_subchannel()
24519  *
24520  * Description: This routine is the driver entry point for handling CD-ROM
24521  *		ioctl requests to return the Q sub-channel data of the CD
24522  *		current position block. (CDROMSUBCHNL) The data includes the
24523  *		track number, index number, absolute CD-ROM address (LBA or MSF
24524  *		format per the user) , track relative CD-ROM address (LBA or MSF
24525  *		format per the user), control data and audio status.
24526  *
24527  *   Arguments: dev	- the device 'dev_t'
24528  *		data	- pointer to user provided cdrom sub-channel structure
24529  *		flag	- this argument is a pass through to ddi_copyxxx()
24530  *		          directly from the mode argument of ioctl().
24531  *
24532  * Return Code: the code returned by sd_send_scsi_cmd()
24533  *		EFAULT if ddi_copyxxx() fails
24534  *		ENXIO if fail ddi_get_soft_state
24535  *		EINVAL if data pointer is NULL
24536  */
24537 
24538 static int
24539 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
24540 {
24541 	struct sd_lun		*un;
24542 	struct uscsi_cmd	*com;
24543 	struct cdrom_subchnl	subchanel;
24544 	struct cdrom_subchnl	*subchnl = &subchanel;
24545 	char			cdb[CDB_GROUP1];
24546 	caddr_t			buffer;
24547 	int			rval;
24548 
24549 	if (data == NULL) {
24550 		return (EINVAL);
24551 	}
24552 
24553 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24554 	    (un->un_state == SD_STATE_OFFLINE)) {
24555 		return (ENXIO);
24556 	}
24557 
24558 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
24559 		return (EFAULT);
24560 	}
24561 
24562 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
24563 	bzero(cdb, CDB_GROUP1);
24564 	cdb[0] = SCMD_READ_SUBCHANNEL;
24565 	/* Set the MSF bit based on the user requested address format */
24566 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
24567 	/*
24568 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
24569 	 * returned
24570 	 */
24571 	cdb[2] = 0x40;
24572 	/*
24573 	 * Set byte 3 to specify the return data format. A value of 0x01
24574 	 * indicates that the CD-ROM current position should be returned.
24575 	 */
24576 	cdb[3] = 0x01;
24577 	cdb[8] = 0x10;
24578 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24579 	com->uscsi_cdb	   = cdb;
24580 	com->uscsi_cdblen  = CDB_GROUP1;
24581 	com->uscsi_bufaddr = buffer;
24582 	com->uscsi_buflen  = 16;
24583 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24584 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24585 	    SD_PATH_STANDARD);
24586 	if (rval != 0) {
24587 		kmem_free(buffer, 16);
24588 		kmem_free(com, sizeof (*com));
24589 		return (rval);
24590 	}
24591 
24592 	/* Process the returned Q sub-channel data */
24593 	subchnl->cdsc_audiostatus = buffer[1];
24594 	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
24595 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
24596 	subchnl->cdsc_trk	= buffer[6];
24597 	subchnl->cdsc_ind	= buffer[7];
24598 	if (subchnl->cdsc_format & CDROM_LBA) {
24599 		subchnl->cdsc_absaddr.lba =
24600 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
24601 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
24602 		subchnl->cdsc_reladdr.lba =
24603 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
24604 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
24605 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
24606 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
24607 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
24608 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
24609 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
24610 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
24611 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
24612 	} else {
24613 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
24614 		subchnl->cdsc_absaddr.msf.second = buffer[10];
24615 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
24616 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
24617 		subchnl->cdsc_reladdr.msf.second = buffer[14];
24618 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
24619 	}
24620 	kmem_free(buffer, 16);
24621 	kmem_free(com, sizeof (*com));
24622 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
24623 	    != 0) {
24624 		return (EFAULT);
24625 	}
24626 	return (rval);
24627 }
24628 
24629 
24630 /*
24631  *    Function: sr_read_tocentry()
24632  *
24633  * Description: This routine is the driver entry point for handling CD-ROM
24634  *		ioctl requests to read from the Table of Contents (TOC)
24635  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
24636  *		fields, the starting address (LBA or MSF format per the user)
24637  *		and the data mode if the user specified track is a data track.
24638  *
24639  *		Note: The READ HEADER (0x44) command used in this routine is
24640  *		obsolete per the SCSI MMC spec but still supported in the
24641  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
24642  *		therefore the command is still implemented in this routine.
24643  *
24644  *   Arguments: dev	- the device 'dev_t'
24645  *		data	- pointer to user provided toc entry structure,
24646  *			  specifying the track # and the address format
24647  *			  (LBA or MSF).
24648  *		flag	- this argument is a pass through to ddi_copyxxx()
24649  *		          directly from the mode argument of ioctl().
24650  *
24651  * Return Code: the code returned by sd_send_scsi_cmd()
24652  *		EFAULT if ddi_copyxxx() fails
24653  *		ENXIO if fail ddi_get_soft_state
24654  *		EINVAL if data pointer is NULL
24655  */
24656 
24657 static int
24658 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
24659 {
24660 	struct sd_lun		*un = NULL;
24661 	struct uscsi_cmd	*com;
24662 	struct cdrom_tocentry	toc_entry;
24663 	struct cdrom_tocentry	*entry = &toc_entry;
24664 	caddr_t			buffer;
24665 	int			rval;
24666 	char			cdb[CDB_GROUP1];
24667 
24668 	if (data == NULL) {
24669 		return (EINVAL);
24670 	}
24671 
24672 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24673 	    (un->un_state == SD_STATE_OFFLINE)) {
24674 		return (ENXIO);
24675 	}
24676 
24677 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
24678 		return (EFAULT);
24679 	}
24680 
24681 	/* Validate the requested track and address format */
24682 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
24683 		return (EINVAL);
24684 	}
24685 
24686 	if (entry->cdte_track == 0) {
24687 		return (EINVAL);
24688 	}
24689 
24690 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
24691 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24692 	bzero(cdb, CDB_GROUP1);
24693 
24694 	cdb[0] = SCMD_READ_TOC;
24695 	/* Set the MSF bit based on the user requested address format  */
24696 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
24697 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
24698 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
24699 	} else {
24700 		cdb[6] = entry->cdte_track;
24701 	}
24702 
24703 	/*
24704 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
24705 	 * (4 byte TOC response header + 8 byte track descriptor)
24706 	 */
24707 	cdb[8] = 12;
24708 	com->uscsi_cdb	   = cdb;
24709 	com->uscsi_cdblen  = CDB_GROUP1;
24710 	com->uscsi_bufaddr = buffer;
24711 	com->uscsi_buflen  = 0x0C;
24712 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
24713 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24714 	    SD_PATH_STANDARD);
24715 	if (rval != 0) {
24716 		kmem_free(buffer, 12);
24717 		kmem_free(com, sizeof (*com));
24718 		return (rval);
24719 	}
24720 
24721 	/* Process the toc entry */
24722 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
24723 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
24724 	if (entry->cdte_format & CDROM_LBA) {
24725 		entry->cdte_addr.lba =
24726 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
24727 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
24728 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
24729 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
24730 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
24731 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
24732 		/*
24733 		 * Send a READ TOC command using the LBA address format to get
24734 		 * the LBA for the track requested so it can be used in the
24735 		 * READ HEADER request
24736 		 *
24737 		 * Note: The MSF bit of the READ HEADER command specifies the
24738 		 * output format. The block address specified in that command
24739 		 * must be in LBA format.
24740 		 */
24741 		cdb[1] = 0;
24742 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24743 		    SD_PATH_STANDARD);
24744 		if (rval != 0) {
24745 			kmem_free(buffer, 12);
24746 			kmem_free(com, sizeof (*com));
24747 			return (rval);
24748 		}
24749 	} else {
24750 		entry->cdte_addr.msf.minute	= buffer[9];
24751 		entry->cdte_addr.msf.second	= buffer[10];
24752 		entry->cdte_addr.msf.frame	= buffer[11];
24753 		/*
24754 		 * Send a READ TOC command using the LBA address format to get
24755 		 * the LBA for the track requested so it can be used in the
24756 		 * READ HEADER request
24757 		 *
24758 		 * Note: The MSF bit of the READ HEADER command specifies the
24759 		 * output format. The block address specified in that command
24760 		 * must be in LBA format.
24761 		 */
24762 		cdb[1] = 0;
24763 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24764 		    SD_PATH_STANDARD);
24765 		if (rval != 0) {
24766 			kmem_free(buffer, 12);
24767 			kmem_free(com, sizeof (*com));
24768 			return (rval);
24769 		}
24770 	}
24771 
24772 	/*
24773 	 * Build and send the READ HEADER command to determine the data mode of
24774 	 * the user specified track.
24775 	 */
24776 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
24777 	    (entry->cdte_track != CDROM_LEADOUT)) {
24778 		bzero(cdb, CDB_GROUP1);
24779 		cdb[0] = SCMD_READ_HEADER;
24780 		cdb[2] = buffer[8];
24781 		cdb[3] = buffer[9];
24782 		cdb[4] = buffer[10];
24783 		cdb[5] = buffer[11];
24784 		cdb[8] = 0x08;
24785 		com->uscsi_buflen = 0x08;
24786 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24787 		    SD_PATH_STANDARD);
24788 		if (rval == 0) {
24789 			entry->cdte_datamode = buffer[0];
24790 		} else {
24791 			/*
24792 			 * READ HEADER command failed, since this is
24793 			 * obsoleted in one spec, its better to return
24794 			 * -1 for an invlid track so that we can still
24795 			 * receive the rest of the TOC data.
24796 			 */
24797 			entry->cdte_datamode = (uchar_t)-1;
24798 		}
24799 	} else {
24800 		entry->cdte_datamode = (uchar_t)-1;
24801 	}
24802 
24803 	kmem_free(buffer, 12);
24804 	kmem_free(com, sizeof (*com));
24805 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
24806 		return (EFAULT);
24807 
24808 	return (rval);
24809 }
24810 
24811 
24812 /*
24813  *    Function: sr_read_tochdr()
24814  *
24815  * Description: This routine is the driver entry point for handling CD-ROM
24816  * 		ioctl requests to read the Table of Contents (TOC) header
24817  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
24818  *		and ending track numbers
24819  *
24820  *   Arguments: dev	- the device 'dev_t'
24821  *		data	- pointer to user provided toc header structure,
24822  *			  specifying the starting and ending track numbers.
24823  *		flag	- this argument is a pass through to ddi_copyxxx()
24824  *			  directly from the mode argument of ioctl().
24825  *
24826  * Return Code: the code returned by sd_send_scsi_cmd()
24827  *		EFAULT if ddi_copyxxx() fails
24828  *		ENXIO if fail ddi_get_soft_state
24829  *		EINVAL if data pointer is NULL
24830  */
24831 
24832 static int
24833 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
24834 {
24835 	struct sd_lun		*un;
24836 	struct uscsi_cmd	*com;
24837 	struct cdrom_tochdr	toc_header;
24838 	struct cdrom_tochdr	*hdr = &toc_header;
24839 	char			cdb[CDB_GROUP1];
24840 	int			rval;
24841 	caddr_t			buffer;
24842 
24843 	if (data == NULL) {
24844 		return (EINVAL);
24845 	}
24846 
24847 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24848 	    (un->un_state == SD_STATE_OFFLINE)) {
24849 		return (ENXIO);
24850 	}
24851 
24852 	buffer = kmem_zalloc(4, KM_SLEEP);
24853 	bzero(cdb, CDB_GROUP1);
24854 	cdb[0] = SCMD_READ_TOC;
24855 	/*
24856 	 * Specifying a track number of 0x00 in the READ TOC command indicates
24857 	 * that the TOC header should be returned
24858 	 */
24859 	cdb[6] = 0x00;
24860 	/*
24861 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
24862 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
24863 	 */
24864 	cdb[8] = 0x04;
24865 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24866 	com->uscsi_cdb	   = cdb;
24867 	com->uscsi_cdblen  = CDB_GROUP1;
24868 	com->uscsi_bufaddr = buffer;
24869 	com->uscsi_buflen  = 0x04;
24870 	com->uscsi_timeout = 300;
24871 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
24872 
24873 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24874 	    SD_PATH_STANDARD);
24875 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
24876 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
24877 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
24878 	} else {
24879 		hdr->cdth_trk0 = buffer[2];
24880 		hdr->cdth_trk1 = buffer[3];
24881 	}
24882 	kmem_free(buffer, 4);
24883 	kmem_free(com, sizeof (*com));
24884 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
24885 		return (EFAULT);
24886 	}
24887 	return (rval);
24888 }
24889 
24890 
24891 /*
24892  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
24893  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
24894  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
24895  * digital audio and extended architecture digital audio. These modes are
24896  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
24897  * MMC specs.
24898  *
24899  * In addition to support for the various data formats these routines also
24900  * include support for devices that implement only the direct access READ
24901  * commands (0x08, 0x28), devices that implement the READ_CD commands
24902  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
24903  * READ CDXA commands (0xD8, 0xDB)
24904  */
24905 
24906 /*
24907  *    Function: sr_read_mode1()
24908  *
24909  * Description: This routine is the driver entry point for handling CD-ROM
24910  *		ioctl read mode1 requests (CDROMREADMODE1).
24911  *
24912  *   Arguments: dev	- the device 'dev_t'
24913  *		data	- pointer to user provided cd read structure specifying
24914  *			  the lba buffer address and length.
24915  *		flag	- this argument is a pass through to ddi_copyxxx()
24916  *			  directly from the mode argument of ioctl().
24917  *
24918  * Return Code: the code returned by sd_send_scsi_cmd()
24919  *		EFAULT if ddi_copyxxx() fails
24920  *		ENXIO if fail ddi_get_soft_state
24921  *		EINVAL if data pointer is NULL
24922  */
24923 
24924 static int
24925 sr_read_mode1(dev_t dev, caddr_t data, int flag)
24926 {
24927 	struct sd_lun		*un;
24928 	struct cdrom_read	mode1_struct;
24929 	struct cdrom_read	*mode1 = &mode1_struct;
24930 	int			rval;
24931 #ifdef _MULTI_DATAMODEL
24932 	/* To support ILP32 applications in an LP64 world */
24933 	struct cdrom_read32	cdrom_read32;
24934 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
24935 #endif /* _MULTI_DATAMODEL */
24936 
24937 	if (data == NULL) {
24938 		return (EINVAL);
24939 	}
24940 
24941 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
24942 	    (un->un_state == SD_STATE_OFFLINE)) {
24943 		return (ENXIO);
24944 	}
24945 
24946 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24947 	    "sd_read_mode1: entry: un:0x%p\n", un);
24948 
24949 #ifdef _MULTI_DATAMODEL
24950 	switch (ddi_model_convert_from(flag & FMODELS)) {
24951 	case DDI_MODEL_ILP32:
24952 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
24953 			return (EFAULT);
24954 		}
24955 		/* Convert the ILP32 uscsi data from the application to LP64 */
24956 		cdrom_read32tocdrom_read(cdrd32, mode1);
24957 		break;
24958 	case DDI_MODEL_NONE:
24959 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
24960 			return (EFAULT);
24961 		}
24962 	}
24963 #else /* ! _MULTI_DATAMODEL */
24964 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
24965 		return (EFAULT);
24966 	}
24967 #endif /* _MULTI_DATAMODEL */
24968 
24969 	rval = sd_send_scsi_READ(un, mode1->cdread_bufaddr,
24970 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
24971 
24972 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
24973 	    "sd_read_mode1: exit: un:0x%p\n", un);
24974 
24975 	return (rval);
24976 }
24977 
24978 
24979 /*
24980  *    Function: sr_read_cd_mode2()
24981  *
24982  * Description: This routine is the driver entry point for handling CD-ROM
24983  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
24984  *		support the READ CD (0xBE) command or the 1st generation
24985  *		READ CD (0xD4) command.
24986  *
24987  *   Arguments: dev	- the device 'dev_t'
24988  *		data	- pointer to user provided cd read structure specifying
24989  *			  the lba buffer address and length.
24990  *		flag	- this argument is a pass through to ddi_copyxxx()
24991  *			  directly from the mode argument of ioctl().
24992  *
24993  * Return Code: the code returned by sd_send_scsi_cmd()
24994  *		EFAULT if ddi_copyxxx() fails
24995  *		ENXIO if fail ddi_get_soft_state
24996  *		EINVAL if data pointer is NULL
24997  */
24998 
24999 static int
25000 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
25001 {
25002 	struct sd_lun		*un;
25003 	struct uscsi_cmd	*com;
25004 	struct cdrom_read	mode2_struct;
25005 	struct cdrom_read	*mode2 = &mode2_struct;
25006 	uchar_t			cdb[CDB_GROUP5];
25007 	int			nblocks;
25008 	int			rval;
25009 #ifdef _MULTI_DATAMODEL
25010 	/*  To support ILP32 applications in an LP64 world */
25011 	struct cdrom_read32	cdrom_read32;
25012 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
25013 #endif /* _MULTI_DATAMODEL */
25014 
25015 	if (data == NULL) {
25016 		return (EINVAL);
25017 	}
25018 
25019 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25020 	    (un->un_state == SD_STATE_OFFLINE)) {
25021 		return (ENXIO);
25022 	}
25023 
25024 #ifdef _MULTI_DATAMODEL
25025 	switch (ddi_model_convert_from(flag & FMODELS)) {
25026 	case DDI_MODEL_ILP32:
25027 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
25028 			return (EFAULT);
25029 		}
25030 		/* Convert the ILP32 uscsi data from the application to LP64 */
25031 		cdrom_read32tocdrom_read(cdrd32, mode2);
25032 		break;
25033 	case DDI_MODEL_NONE:
25034 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
25035 			return (EFAULT);
25036 		}
25037 		break;
25038 	}
25039 
25040 #else /* ! _MULTI_DATAMODEL */
25041 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
25042 		return (EFAULT);
25043 	}
25044 #endif /* _MULTI_DATAMODEL */
25045 
25046 	bzero(cdb, sizeof (cdb));
25047 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
25048 		/* Read command supported by 1st generation atapi drives */
25049 		cdb[0] = SCMD_READ_CDD4;
25050 	} else {
25051 		/* Universal CD Access Command */
25052 		cdb[0] = SCMD_READ_CD;
25053 	}
25054 
25055 	/*
25056 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
25057 	 */
25058 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
25059 
25060 	/* set the start address */
25061 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
25062 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
25063 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
25064 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
25065 
25066 	/* set the transfer length */
25067 	nblocks = mode2->cdread_buflen / 2336;
25068 	cdb[6] = (uchar_t)(nblocks >> 16);
25069 	cdb[7] = (uchar_t)(nblocks >> 8);
25070 	cdb[8] = (uchar_t)nblocks;
25071 
25072 	/* set the filter bits */
25073 	cdb[9] = CDROM_READ_CD_USERDATA;
25074 
25075 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25076 	com->uscsi_cdb = (caddr_t)cdb;
25077 	com->uscsi_cdblen = sizeof (cdb);
25078 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
25079 	com->uscsi_buflen = mode2->cdread_buflen;
25080 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
25081 
25082 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
25083 	    SD_PATH_STANDARD);
25084 	kmem_free(com, sizeof (*com));
25085 	return (rval);
25086 }
25087 
25088 
25089 /*
25090  *    Function: sr_read_mode2()
25091  *
25092  * Description: This routine is the driver entry point for handling CD-ROM
25093  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
25094  *		do not support the READ CD (0xBE) command.
25095  *
25096  *   Arguments: dev	- the device 'dev_t'
25097  *		data	- pointer to user provided cd read structure specifying
25098  *			  the lba buffer address and length.
25099  *		flag	- this argument is a pass through to ddi_copyxxx()
25100  *			  directly from the mode argument of ioctl().
25101  *
25102  * Return Code: the code returned by sd_send_scsi_cmd()
25103  *		EFAULT if ddi_copyxxx() fails
25104  *		ENXIO if fail ddi_get_soft_state
25105  *		EINVAL if data pointer is NULL
25106  *		EIO if fail to reset block size
25107  *		EAGAIN if commands are in progress in the driver
25108  */
25109 
25110 static int
25111 sr_read_mode2(dev_t dev, caddr_t data, int flag)
25112 {
25113 	struct sd_lun		*un;
25114 	struct cdrom_read	mode2_struct;
25115 	struct cdrom_read	*mode2 = &mode2_struct;
25116 	int			rval;
25117 	uint32_t		restore_blksize;
25118 	struct uscsi_cmd	*com;
25119 	uchar_t			cdb[CDB_GROUP0];
25120 	int			nblocks;
25121 
25122 #ifdef _MULTI_DATAMODEL
25123 	/* To support ILP32 applications in an LP64 world */
25124 	struct cdrom_read32	cdrom_read32;
25125 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
25126 #endif /* _MULTI_DATAMODEL */
25127 
25128 	if (data == NULL) {
25129 		return (EINVAL);
25130 	}
25131 
25132 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25133 	    (un->un_state == SD_STATE_OFFLINE)) {
25134 		return (ENXIO);
25135 	}
25136 
25137 	/*
25138 	 * Because this routine will update the device and driver block size
25139 	 * being used we want to make sure there are no commands in progress.
25140 	 * If commands are in progress the user will have to try again.
25141 	 *
25142 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
25143 	 * in sdioctl to protect commands from sdioctl through to the top of
25144 	 * sd_uscsi_strategy. See sdioctl for details.
25145 	 */
25146 	mutex_enter(SD_MUTEX(un));
25147 	if (un->un_ncmds_in_driver != 1) {
25148 		mutex_exit(SD_MUTEX(un));
25149 		return (EAGAIN);
25150 	}
25151 	mutex_exit(SD_MUTEX(un));
25152 
25153 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
25154 	    "sd_read_mode2: entry: un:0x%p\n", un);
25155 
25156 #ifdef _MULTI_DATAMODEL
25157 	switch (ddi_model_convert_from(flag & FMODELS)) {
25158 	case DDI_MODEL_ILP32:
25159 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
25160 			return (EFAULT);
25161 		}
25162 		/* Convert the ILP32 uscsi data from the application to LP64 */
25163 		cdrom_read32tocdrom_read(cdrd32, mode2);
25164 		break;
25165 	case DDI_MODEL_NONE:
25166 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
25167 			return (EFAULT);
25168 		}
25169 		break;
25170 	}
25171 #else /* ! _MULTI_DATAMODEL */
25172 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
25173 		return (EFAULT);
25174 	}
25175 #endif /* _MULTI_DATAMODEL */
25176 
25177 	/* Store the current target block size for restoration later */
25178 	restore_blksize = un->un_tgt_blocksize;
25179 
25180 	/* Change the device and soft state target block size to 2336 */
25181 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
25182 		rval = EIO;
25183 		goto done;
25184 	}
25185 
25186 
25187 	bzero(cdb, sizeof (cdb));
25188 
25189 	/* set READ operation */
25190 	cdb[0] = SCMD_READ;
25191 
25192 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
25193 	mode2->cdread_lba >>= 2;
25194 
25195 	/* set the start address */
25196 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
25197 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
25198 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
25199 
25200 	/* set the transfer length */
25201 	nblocks = mode2->cdread_buflen / 2336;
25202 	cdb[4] = (uchar_t)nblocks & 0xFF;
25203 
25204 	/* build command */
25205 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25206 	com->uscsi_cdb = (caddr_t)cdb;
25207 	com->uscsi_cdblen = sizeof (cdb);
25208 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
25209 	com->uscsi_buflen = mode2->cdread_buflen;
25210 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
25211 
25212 	/*
25213 	 * Issue SCSI command with user space address for read buffer.
25214 	 *
25215 	 * This sends the command through main channel in the driver.
25216 	 *
25217 	 * Since this is accessed via an IOCTL call, we go through the
25218 	 * standard path, so that if the device was powered down, then
25219 	 * it would be 'awakened' to handle the command.
25220 	 */
25221 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
25222 	    SD_PATH_STANDARD);
25223 
25224 	kmem_free(com, sizeof (*com));
25225 
25226 	/* Restore the device and soft state target block size */
25227 	if (sr_sector_mode(dev, restore_blksize) != 0) {
25228 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25229 		    "can't do switch back to mode 1\n");
25230 		/*
25231 		 * If sd_send_scsi_READ succeeded we still need to report
25232 		 * an error because we failed to reset the block size
25233 		 */
25234 		if (rval == 0) {
25235 			rval = EIO;
25236 		}
25237 	}
25238 
25239 done:
25240 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
25241 	    "sd_read_mode2: exit: un:0x%p\n", un);
25242 
25243 	return (rval);
25244 }
25245 
25246 
25247 /*
25248  *    Function: sr_sector_mode()
25249  *
25250  * Description: This utility function is used by sr_read_mode2 to set the target
25251  *		block size based on the user specified size. This is a legacy
25252  *		implementation based upon a vendor specific mode page
25253  *
25254  *   Arguments: dev	- the device 'dev_t'
25255  *		data	- flag indicating if block size is being set to 2336 or
25256  *			  512.
25257  *
25258  * Return Code: the code returned by sd_send_scsi_cmd()
25259  *		EFAULT if ddi_copyxxx() fails
25260  *		ENXIO if fail ddi_get_soft_state
25261  *		EINVAL if data pointer is NULL
25262  */
25263 
25264 static int
25265 sr_sector_mode(dev_t dev, uint32_t blksize)
25266 {
25267 	struct sd_lun	*un;
25268 	uchar_t		*sense;
25269 	uchar_t		*select;
25270 	int		rval;
25271 
25272 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25273 	    (un->un_state == SD_STATE_OFFLINE)) {
25274 		return (ENXIO);
25275 	}
25276 
25277 	sense = kmem_zalloc(20, KM_SLEEP);
25278 
25279 	/* Note: This is a vendor specific mode page (0x81) */
25280 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, 20, 0x81,
25281 	    SD_PATH_STANDARD)) != 0) {
25282 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
25283 		    "sr_sector_mode: Mode Sense failed\n");
25284 		kmem_free(sense, 20);
25285 		return (rval);
25286 	}
25287 	select = kmem_zalloc(20, KM_SLEEP);
25288 	select[3] = 0x08;
25289 	select[10] = ((blksize >> 8) & 0xff);
25290 	select[11] = (blksize & 0xff);
25291 	select[12] = 0x01;
25292 	select[13] = 0x06;
25293 	select[14] = sense[14];
25294 	select[15] = sense[15];
25295 	if (blksize == SD_MODE2_BLKSIZE) {
25296 		select[14] |= 0x01;
25297 	}
25298 
25299 	if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select, 20,
25300 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
25301 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
25302 		    "sr_sector_mode: Mode Select failed\n");
25303 	} else {
25304 		/*
25305 		 * Only update the softstate block size if we successfully
25306 		 * changed the device block mode.
25307 		 */
25308 		mutex_enter(SD_MUTEX(un));
25309 		sd_update_block_info(un, blksize, 0);
25310 		mutex_exit(SD_MUTEX(un));
25311 	}
25312 	kmem_free(sense, 20);
25313 	kmem_free(select, 20);
25314 	return (rval);
25315 }
25316 
25317 
25318 /*
25319  *    Function: sr_read_cdda()
25320  *
25321  * Description: This routine is the driver entry point for handling CD-ROM
25322  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
25323  *		the target supports CDDA these requests are handled via a vendor
25324  *		specific command (0xD8) If the target does not support CDDA
25325  *		these requests are handled via the READ CD command (0xBE).
25326  *
25327  *   Arguments: dev	- the device 'dev_t'
25328  *		data	- pointer to user provided CD-DA structure specifying
25329  *			  the track starting address, transfer length, and
25330  *			  subcode options.
25331  *		flag	- this argument is a pass through to ddi_copyxxx()
25332  *			  directly from the mode argument of ioctl().
25333  *
25334  * Return Code: the code returned by sd_send_scsi_cmd()
25335  *		EFAULT if ddi_copyxxx() fails
25336  *		ENXIO if fail ddi_get_soft_state
25337  *		EINVAL if invalid arguments are provided
25338  *		ENOTTY
25339  */
25340 
25341 static int
25342 sr_read_cdda(dev_t dev, caddr_t data, int flag)
25343 {
25344 	struct sd_lun			*un;
25345 	struct uscsi_cmd		*com;
25346 	struct cdrom_cdda		*cdda;
25347 	int				rval;
25348 	size_t				buflen;
25349 	char				cdb[CDB_GROUP5];
25350 
25351 #ifdef _MULTI_DATAMODEL
25352 	/* To support ILP32 applications in an LP64 world */
25353 	struct cdrom_cdda32	cdrom_cdda32;
25354 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
25355 #endif /* _MULTI_DATAMODEL */
25356 
25357 	if (data == NULL) {
25358 		return (EINVAL);
25359 	}
25360 
25361 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25362 		return (ENXIO);
25363 	}
25364 
25365 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
25366 
25367 #ifdef _MULTI_DATAMODEL
25368 	switch (ddi_model_convert_from(flag & FMODELS)) {
25369 	case DDI_MODEL_ILP32:
25370 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
25371 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25372 			    "sr_read_cdda: ddi_copyin Failed\n");
25373 			kmem_free(cdda, sizeof (struct cdrom_cdda));
25374 			return (EFAULT);
25375 		}
25376 		/* Convert the ILP32 uscsi data from the application to LP64 */
25377 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
25378 		break;
25379 	case DDI_MODEL_NONE:
25380 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
25381 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25382 			    "sr_read_cdda: ddi_copyin Failed\n");
25383 			kmem_free(cdda, sizeof (struct cdrom_cdda));
25384 			return (EFAULT);
25385 		}
25386 		break;
25387 	}
25388 #else /* ! _MULTI_DATAMODEL */
25389 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
25390 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25391 		    "sr_read_cdda: ddi_copyin Failed\n");
25392 		kmem_free(cdda, sizeof (struct cdrom_cdda));
25393 		return (EFAULT);
25394 	}
25395 #endif /* _MULTI_DATAMODEL */
25396 
25397 	/*
25398 	 * Since MMC-2 expects max 3 bytes for length, check if the
25399 	 * length input is greater than 3 bytes
25400 	 */
25401 	if ((cdda->cdda_length & 0xFF000000) != 0) {
25402 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
25403 		    "cdrom transfer length too large: %d (limit %d)\n",
25404 		    cdda->cdda_length, 0xFFFFFF);
25405 		kmem_free(cdda, sizeof (struct cdrom_cdda));
25406 		return (EINVAL);
25407 	}
25408 
25409 	switch (cdda->cdda_subcode) {
25410 	case CDROM_DA_NO_SUBCODE:
25411 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
25412 		break;
25413 	case CDROM_DA_SUBQ:
25414 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
25415 		break;
25416 	case CDROM_DA_ALL_SUBCODE:
25417 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
25418 		break;
25419 	case CDROM_DA_SUBCODE_ONLY:
25420 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
25421 		break;
25422 	default:
25423 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25424 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
25425 		    cdda->cdda_subcode);
25426 		kmem_free(cdda, sizeof (struct cdrom_cdda));
25427 		return (EINVAL);
25428 	}
25429 
25430 	/* Build and send the command */
25431 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25432 	bzero(cdb, CDB_GROUP5);
25433 
25434 	if (un->un_f_cfg_cdda == TRUE) {
25435 		cdb[0] = (char)SCMD_READ_CD;
25436 		cdb[1] = 0x04;
25437 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
25438 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
25439 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
25440 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
25441 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
25442 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
25443 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
25444 		cdb[9] = 0x10;
25445 		switch (cdda->cdda_subcode) {
25446 		case CDROM_DA_NO_SUBCODE :
25447 			cdb[10] = 0x0;
25448 			break;
25449 		case CDROM_DA_SUBQ :
25450 			cdb[10] = 0x2;
25451 			break;
25452 		case CDROM_DA_ALL_SUBCODE :
25453 			cdb[10] = 0x1;
25454 			break;
25455 		case CDROM_DA_SUBCODE_ONLY :
25456 			/* FALLTHROUGH */
25457 		default :
25458 			kmem_free(cdda, sizeof (struct cdrom_cdda));
25459 			kmem_free(com, sizeof (*com));
25460 			return (ENOTTY);
25461 		}
25462 	} else {
25463 		cdb[0] = (char)SCMD_READ_CDDA;
25464 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
25465 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
25466 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
25467 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
25468 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
25469 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
25470 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
25471 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
25472 		cdb[10] = cdda->cdda_subcode;
25473 	}
25474 
25475 	com->uscsi_cdb = cdb;
25476 	com->uscsi_cdblen = CDB_GROUP5;
25477 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
25478 	com->uscsi_buflen = buflen;
25479 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
25480 
25481 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
25482 	    SD_PATH_STANDARD);
25483 
25484 	kmem_free(cdda, sizeof (struct cdrom_cdda));
25485 	kmem_free(com, sizeof (*com));
25486 	return (rval);
25487 }
25488 
25489 
25490 /*
25491  *    Function: sr_read_cdxa()
25492  *
25493  * Description: This routine is the driver entry point for handling CD-ROM
25494  *		ioctl requests to return CD-XA (Extended Architecture) data.
25495  *		(CDROMCDXA).
25496  *
25497  *   Arguments: dev	- the device 'dev_t'
25498  *		data	- pointer to user provided CD-XA structure specifying
25499  *			  the data starting address, transfer length, and format
25500  *		flag	- this argument is a pass through to ddi_copyxxx()
25501  *			  directly from the mode argument of ioctl().
25502  *
25503  * Return Code: the code returned by sd_send_scsi_cmd()
25504  *		EFAULT if ddi_copyxxx() fails
25505  *		ENXIO if fail ddi_get_soft_state
25506  *		EINVAL if data pointer is NULL
25507  */
25508 
25509 static int
25510 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
25511 {
25512 	struct sd_lun		*un;
25513 	struct uscsi_cmd	*com;
25514 	struct cdrom_cdxa	*cdxa;
25515 	int			rval;
25516 	size_t			buflen;
25517 	char			cdb[CDB_GROUP5];
25518 	uchar_t			read_flags;
25519 
25520 #ifdef _MULTI_DATAMODEL
25521 	/* To support ILP32 applications in an LP64 world */
25522 	struct cdrom_cdxa32		cdrom_cdxa32;
25523 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
25524 #endif /* _MULTI_DATAMODEL */
25525 
25526 	if (data == NULL) {
25527 		return (EINVAL);
25528 	}
25529 
25530 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25531 		return (ENXIO);
25532 	}
25533 
25534 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
25535 
25536 #ifdef _MULTI_DATAMODEL
25537 	switch (ddi_model_convert_from(flag & FMODELS)) {
25538 	case DDI_MODEL_ILP32:
25539 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
25540 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25541 			return (EFAULT);
25542 		}
25543 		/*
25544 		 * Convert the ILP32 uscsi data from the
25545 		 * application to LP64 for internal use.
25546 		 */
25547 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
25548 		break;
25549 	case DDI_MODEL_NONE:
25550 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
25551 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25552 			return (EFAULT);
25553 		}
25554 		break;
25555 	}
25556 #else /* ! _MULTI_DATAMODEL */
25557 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
25558 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25559 		return (EFAULT);
25560 	}
25561 #endif /* _MULTI_DATAMODEL */
25562 
25563 	/*
25564 	 * Since MMC-2 expects max 3 bytes for length, check if the
25565 	 * length input is greater than 3 bytes
25566 	 */
25567 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
25568 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
25569 		    "cdrom transfer length too large: %d (limit %d)\n",
25570 		    cdxa->cdxa_length, 0xFFFFFF);
25571 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25572 		return (EINVAL);
25573 	}
25574 
25575 	switch (cdxa->cdxa_format) {
25576 	case CDROM_XA_DATA:
25577 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
25578 		read_flags = 0x10;
25579 		break;
25580 	case CDROM_XA_SECTOR_DATA:
25581 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
25582 		read_flags = 0xf8;
25583 		break;
25584 	case CDROM_XA_DATA_W_ERROR:
25585 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
25586 		read_flags = 0xfc;
25587 		break;
25588 	default:
25589 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25590 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
25591 		    cdxa->cdxa_format);
25592 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25593 		return (EINVAL);
25594 	}
25595 
25596 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25597 	bzero(cdb, CDB_GROUP5);
25598 	if (un->un_f_mmc_cap == TRUE) {
25599 		cdb[0] = (char)SCMD_READ_CD;
25600 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
25601 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
25602 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
25603 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
25604 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
25605 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
25606 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
25607 		cdb[9] = (char)read_flags;
25608 	} else {
25609 		/*
25610 		 * Note: A vendor specific command (0xDB) is being used her to
25611 		 * request a read of all subcodes.
25612 		 */
25613 		cdb[0] = (char)SCMD_READ_CDXA;
25614 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
25615 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
25616 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
25617 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
25618 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
25619 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
25620 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
25621 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
25622 		cdb[10] = cdxa->cdxa_format;
25623 	}
25624 	com->uscsi_cdb	   = cdb;
25625 	com->uscsi_cdblen  = CDB_GROUP5;
25626 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
25627 	com->uscsi_buflen  = buflen;
25628 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
25629 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
25630 	    SD_PATH_STANDARD);
25631 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
25632 	kmem_free(com, sizeof (*com));
25633 	return (rval);
25634 }
25635 
25636 
25637 /*
25638  *    Function: sr_eject()
25639  *
25640  * Description: This routine is the driver entry point for handling CD-ROM
25641  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
25642  *
25643  *   Arguments: dev	- the device 'dev_t'
25644  *
25645  * Return Code: the code returned by sd_send_scsi_cmd()
25646  */
25647 
25648 static int
25649 sr_eject(dev_t dev)
25650 {
25651 	struct sd_lun	*un;
25652 	int		rval;
25653 
25654 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25655 	    (un->un_state == SD_STATE_OFFLINE)) {
25656 		return (ENXIO);
25657 	}
25658 
25659 	/*
25660 	 * To prevent race conditions with the eject
25661 	 * command, keep track of an eject command as
25662 	 * it progresses. If we are already handling
25663 	 * an eject command in the driver for the given
25664 	 * unit and another request to eject is received
25665 	 * immediately return EAGAIN so we don't lose
25666 	 * the command if the current eject command fails.
25667 	 */
25668 	mutex_enter(SD_MUTEX(un));
25669 	if (un->un_f_ejecting == TRUE) {
25670 		mutex_exit(SD_MUTEX(un));
25671 		return (EAGAIN);
25672 	}
25673 	un->un_f_ejecting = TRUE;
25674 	mutex_exit(SD_MUTEX(un));
25675 
25676 	if ((rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
25677 	    SD_PATH_STANDARD)) != 0) {
25678 		mutex_enter(SD_MUTEX(un));
25679 		un->un_f_ejecting = FALSE;
25680 		mutex_exit(SD_MUTEX(un));
25681 		return (rval);
25682 	}
25683 
25684 	rval = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_EJECT,
25685 	    SD_PATH_STANDARD);
25686 
25687 	if (rval == 0) {
25688 		mutex_enter(SD_MUTEX(un));
25689 		sr_ejected(un);
25690 		un->un_mediastate = DKIO_EJECTED;
25691 		un->un_f_ejecting = FALSE;
25692 		cv_broadcast(&un->un_state_cv);
25693 		mutex_exit(SD_MUTEX(un));
25694 	} else {
25695 		mutex_enter(SD_MUTEX(un));
25696 		un->un_f_ejecting = FALSE;
25697 		mutex_exit(SD_MUTEX(un));
25698 	}
25699 	return (rval);
25700 }
25701 
25702 
25703 /*
25704  *    Function: sr_ejected()
25705  *
25706  * Description: This routine updates the soft state structure to invalidate the
25707  *		geometry information after the media has been ejected or a
25708  *		media eject has been detected.
25709  *
25710  *   Arguments: un - driver soft state (unit) structure
25711  */
25712 
25713 static void
25714 sr_ejected(struct sd_lun *un)
25715 {
25716 	struct sd_errstats *stp;
25717 
25718 	ASSERT(un != NULL);
25719 	ASSERT(mutex_owned(SD_MUTEX(un)));
25720 
25721 	un->un_f_blockcount_is_valid	= FALSE;
25722 	un->un_f_tgt_blocksize_is_valid	= FALSE;
25723 	mutex_exit(SD_MUTEX(un));
25724 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
25725 	mutex_enter(SD_MUTEX(un));
25726 
25727 	if (un->un_errstats != NULL) {
25728 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
25729 		stp->sd_capacity.value.ui64 = 0;
25730 	}
25731 
25732 	/* remove "capacity-of-device" properties */
25733 	(void) ddi_prop_remove(DDI_DEV_T_NONE, SD_DEVINFO(un),
25734 	    "device-nblocks");
25735 	(void) ddi_prop_remove(DDI_DEV_T_NONE, SD_DEVINFO(un),
25736 	    "device-blksize");
25737 }
25738 
25739 
25740 /*
25741  *    Function: sr_check_wp()
25742  *
25743  * Description: This routine checks the write protection of a removable
25744  *      media disk and hotpluggable devices via the write protect bit of
25745  *      the Mode Page Header device specific field. Some devices choke
25746  *      on unsupported mode page. In order to workaround this issue,
25747  *      this routine has been implemented to use 0x3f mode page(request
25748  *      for all pages) for all device types.
25749  *
25750  *   Arguments: dev		- the device 'dev_t'
25751  *
25752  * Return Code: int indicating if the device is write protected (1) or not (0)
25753  *
25754  *     Context: Kernel thread.
25755  *
25756  */
25757 
25758 static int
25759 sr_check_wp(dev_t dev)
25760 {
25761 	struct sd_lun	*un;
25762 	uchar_t		device_specific;
25763 	uchar_t		*sense;
25764 	int		hdrlen;
25765 	int		rval = FALSE;
25766 
25767 	/*
25768 	 * Note: The return codes for this routine should be reworked to
25769 	 * properly handle the case of a NULL softstate.
25770 	 */
25771 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25772 		return (FALSE);
25773 	}
25774 
25775 	if (un->un_f_cfg_is_atapi == TRUE) {
25776 		/*
25777 		 * The mode page contents are not required; set the allocation
25778 		 * length for the mode page header only
25779 		 */
25780 		hdrlen = MODE_HEADER_LENGTH_GRP2;
25781 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
25782 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense, hdrlen,
25783 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
25784 			goto err_exit;
25785 		device_specific =
25786 		    ((struct mode_header_grp2 *)sense)->device_specific;
25787 	} else {
25788 		hdrlen = MODE_HEADER_LENGTH;
25789 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
25790 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, hdrlen,
25791 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
25792 			goto err_exit;
25793 		device_specific =
25794 		    ((struct mode_header *)sense)->device_specific;
25795 	}
25796 
25797 	/*
25798 	 * Write protect mode sense failed; not all disks
25799 	 * understand this query. Return FALSE assuming that
25800 	 * these devices are not writable.
25801 	 */
25802 	if (device_specific & WRITE_PROTECT) {
25803 		rval = TRUE;
25804 	}
25805 
25806 err_exit:
25807 	kmem_free(sense, hdrlen);
25808 	return (rval);
25809 }
25810 
25811 /*
25812  *    Function: sr_volume_ctrl()
25813  *
25814  * Description: This routine is the driver entry point for handling CD-ROM
25815  *		audio output volume ioctl requests. (CDROMVOLCTRL)
25816  *
25817  *   Arguments: dev	- the device 'dev_t'
25818  *		data	- pointer to user audio volume control structure
25819  *		flag	- this argument is a pass through to ddi_copyxxx()
25820  *			  directly from the mode argument of ioctl().
25821  *
25822  * Return Code: the code returned by sd_send_scsi_cmd()
25823  *		EFAULT if ddi_copyxxx() fails
25824  *		ENXIO if fail ddi_get_soft_state
25825  *		EINVAL if data pointer is NULL
25826  *
25827  */
25828 
25829 static int
25830 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
25831 {
25832 	struct sd_lun		*un;
25833 	struct cdrom_volctrl    volume;
25834 	struct cdrom_volctrl    *vol = &volume;
25835 	uchar_t			*sense_page;
25836 	uchar_t			*select_page;
25837 	uchar_t			*sense;
25838 	uchar_t			*select;
25839 	int			sense_buflen;
25840 	int			select_buflen;
25841 	int			rval;
25842 
25843 	if (data == NULL) {
25844 		return (EINVAL);
25845 	}
25846 
25847 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
25848 	    (un->un_state == SD_STATE_OFFLINE)) {
25849 		return (ENXIO);
25850 	}
25851 
25852 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
25853 		return (EFAULT);
25854 	}
25855 
25856 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
25857 		struct mode_header_grp2		*sense_mhp;
25858 		struct mode_header_grp2		*select_mhp;
25859 		int				bd_len;
25860 
25861 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
25862 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
25863 		    MODEPAGE_AUDIO_CTRL_LEN;
25864 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
25865 		select = kmem_zalloc(select_buflen, KM_SLEEP);
25866 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
25867 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
25868 		    SD_PATH_STANDARD)) != 0) {
25869 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
25870 			    "sr_volume_ctrl: Mode Sense Failed\n");
25871 			kmem_free(sense, sense_buflen);
25872 			kmem_free(select, select_buflen);
25873 			return (rval);
25874 		}
25875 		sense_mhp = (struct mode_header_grp2 *)sense;
25876 		select_mhp = (struct mode_header_grp2 *)select;
25877 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
25878 		    sense_mhp->bdesc_length_lo;
25879 		if (bd_len > MODE_BLK_DESC_LENGTH) {
25880 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25881 			    "sr_volume_ctrl: Mode Sense returned invalid "
25882 			    "block descriptor length\n");
25883 			kmem_free(sense, sense_buflen);
25884 			kmem_free(select, select_buflen);
25885 			return (EIO);
25886 		}
25887 		sense_page = (uchar_t *)
25888 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
25889 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
25890 		select_mhp->length_msb = 0;
25891 		select_mhp->length_lsb = 0;
25892 		select_mhp->bdesc_length_hi = 0;
25893 		select_mhp->bdesc_length_lo = 0;
25894 	} else {
25895 		struct mode_header		*sense_mhp, *select_mhp;
25896 
25897 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
25898 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
25899 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
25900 		select = kmem_zalloc(select_buflen, KM_SLEEP);
25901 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
25902 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
25903 		    SD_PATH_STANDARD)) != 0) {
25904 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25905 			    "sr_volume_ctrl: Mode Sense Failed\n");
25906 			kmem_free(sense, sense_buflen);
25907 			kmem_free(select, select_buflen);
25908 			return (rval);
25909 		}
25910 		sense_mhp  = (struct mode_header *)sense;
25911 		select_mhp = (struct mode_header *)select;
25912 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
25913 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25914 			    "sr_volume_ctrl: Mode Sense returned invalid "
25915 			    "block descriptor length\n");
25916 			kmem_free(sense, sense_buflen);
25917 			kmem_free(select, select_buflen);
25918 			return (EIO);
25919 		}
25920 		sense_page = (uchar_t *)
25921 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
25922 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
25923 		select_mhp->length = 0;
25924 		select_mhp->bdesc_length = 0;
25925 	}
25926 	/*
25927 	 * Note: An audio control data structure could be created and overlayed
25928 	 * on the following in place of the array indexing method implemented.
25929 	 */
25930 
25931 	/* Build the select data for the user volume data */
25932 	select_page[0] = MODEPAGE_AUDIO_CTRL;
25933 	select_page[1] = 0xE;
25934 	/* Set the immediate bit */
25935 	select_page[2] = 0x04;
25936 	/* Zero out reserved fields */
25937 	select_page[3] = 0x00;
25938 	select_page[4] = 0x00;
25939 	/* Return sense data for fields not to be modified */
25940 	select_page[5] = sense_page[5];
25941 	select_page[6] = sense_page[6];
25942 	select_page[7] = sense_page[7];
25943 	/* Set the user specified volume levels for channel 0 and 1 */
25944 	select_page[8] = 0x01;
25945 	select_page[9] = vol->channel0;
25946 	select_page[10] = 0x02;
25947 	select_page[11] = vol->channel1;
25948 	/* Channel 2 and 3 are currently unsupported so return the sense data */
25949 	select_page[12] = sense_page[12];
25950 	select_page[13] = sense_page[13];
25951 	select_page[14] = sense_page[14];
25952 	select_page[15] = sense_page[15];
25953 
25954 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
25955 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, select,
25956 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
25957 	} else {
25958 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
25959 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
25960 	}
25961 
25962 	kmem_free(sense, sense_buflen);
25963 	kmem_free(select, select_buflen);
25964 	return (rval);
25965 }
25966 
25967 
25968 /*
25969  *    Function: sr_read_sony_session_offset()
25970  *
25971  * Description: This routine is the driver entry point for handling CD-ROM
25972  *		ioctl requests for session offset information. (CDROMREADOFFSET)
25973  *		The address of the first track in the last session of a
25974  *		multi-session CD-ROM is returned
25975  *
25976  *		Note: This routine uses a vendor specific key value in the
25977  *		command control field without implementing any vendor check here
25978  *		or in the ioctl routine.
25979  *
25980  *   Arguments: dev	- the device 'dev_t'
25981  *		data	- pointer to an int to hold the requested address
25982  *		flag	- this argument is a pass through to ddi_copyxxx()
25983  *			  directly from the mode argument of ioctl().
25984  *
25985  * Return Code: the code returned by sd_send_scsi_cmd()
25986  *		EFAULT if ddi_copyxxx() fails
25987  *		ENXIO if fail ddi_get_soft_state
25988  *		EINVAL if data pointer is NULL
25989  */
25990 
25991 static int
25992 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
25993 {
25994 	struct sd_lun		*un;
25995 	struct uscsi_cmd	*com;
25996 	caddr_t			buffer;
25997 	char			cdb[CDB_GROUP1];
25998 	int			session_offset = 0;
25999 	int			rval;
26000 
26001 	if (data == NULL) {
26002 		return (EINVAL);
26003 	}
26004 
26005 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
26006 	    (un->un_state == SD_STATE_OFFLINE)) {
26007 		return (ENXIO);
26008 	}
26009 
26010 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
26011 	bzero(cdb, CDB_GROUP1);
26012 	cdb[0] = SCMD_READ_TOC;
26013 	/*
26014 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
26015 	 * (4 byte TOC response header + 8 byte response data)
26016 	 */
26017 	cdb[8] = SONY_SESSION_OFFSET_LEN;
26018 	/* Byte 9 is the control byte. A vendor specific value is used */
26019 	cdb[9] = SONY_SESSION_OFFSET_KEY;
26020 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26021 	com->uscsi_cdb = cdb;
26022 	com->uscsi_cdblen = CDB_GROUP1;
26023 	com->uscsi_bufaddr = buffer;
26024 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
26025 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
26026 
26027 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26028 	    SD_PATH_STANDARD);
26029 	if (rval != 0) {
26030 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
26031 		kmem_free(com, sizeof (*com));
26032 		return (rval);
26033 	}
26034 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
26035 		session_offset =
26036 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
26037 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
26038 		/*
26039 		 * Offset returned offset in current lbasize block's. Convert to
26040 		 * 2k block's to return to the user
26041 		 */
26042 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
26043 			session_offset >>= 2;
26044 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
26045 			session_offset >>= 1;
26046 		}
26047 	}
26048 
26049 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
26050 		rval = EFAULT;
26051 	}
26052 
26053 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
26054 	kmem_free(com, sizeof (*com));
26055 	return (rval);
26056 }
26057 
26058 
26059 /*
26060  *    Function: sd_wm_cache_constructor()
26061  *
26062  * Description: Cache Constructor for the wmap cache for the read/modify/write
26063  * 		devices.
26064  *
26065  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
26066  *		un	- sd_lun structure for the device.
26067  *		flag	- the km flags passed to constructor
26068  *
26069  * Return Code: 0 on success.
26070  *		-1 on failure.
26071  */
26072 
26073 /*ARGSUSED*/
26074 static int
26075 sd_wm_cache_constructor(void *wm, void *un, int flags)
26076 {
26077 	bzero(wm, sizeof (struct sd_w_map));
26078 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
26079 	return (0);
26080 }
26081 
26082 
26083 /*
26084  *    Function: sd_wm_cache_destructor()
26085  *
26086  * Description: Cache destructor for the wmap cache for the read/modify/write
26087  * 		devices.
26088  *
26089  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
26090  *		un	- sd_lun structure for the device.
26091  */
26092 /*ARGSUSED*/
26093 static void
26094 sd_wm_cache_destructor(void *wm, void *un)
26095 {
26096 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
26097 }
26098 
26099 
26100 /*
26101  *    Function: sd_range_lock()
26102  *
26103  * Description: Lock the range of blocks specified as parameter to ensure
26104  *		that read, modify write is atomic and no other i/o writes
26105  *		to the same location. The range is specified in terms
26106  *		of start and end blocks. Block numbers are the actual
26107  *		media block numbers and not system.
26108  *
26109  *   Arguments: un	- sd_lun structure for the device.
26110  *		startb - The starting block number
26111  *		endb - The end block number
26112  *		typ - type of i/o - simple/read_modify_write
26113  *
26114  * Return Code: wm  - pointer to the wmap structure.
26115  *
26116  *     Context: This routine can sleep.
26117  */
26118 
26119 static struct sd_w_map *
26120 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
26121 {
26122 	struct sd_w_map *wmp = NULL;
26123 	struct sd_w_map *sl_wmp = NULL;
26124 	struct sd_w_map *tmp_wmp;
26125 	wm_state state = SD_WM_CHK_LIST;
26126 
26127 
26128 	ASSERT(un != NULL);
26129 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26130 
26131 	mutex_enter(SD_MUTEX(un));
26132 
26133 	while (state != SD_WM_DONE) {
26134 
26135 		switch (state) {
26136 		case SD_WM_CHK_LIST:
26137 			/*
26138 			 * This is the starting state. Check the wmap list
26139 			 * to see if the range is currently available.
26140 			 */
26141 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
26142 				/*
26143 				 * If this is a simple write and no rmw
26144 				 * i/o is pending then try to lock the
26145 				 * range as the range should be available.
26146 				 */
26147 				state = SD_WM_LOCK_RANGE;
26148 			} else {
26149 				tmp_wmp = sd_get_range(un, startb, endb);
26150 				if (tmp_wmp != NULL) {
26151 					if ((wmp != NULL) && ONLIST(un, wmp)) {
26152 						/*
26153 						 * Should not keep onlist wmps
26154 						 * while waiting this macro
26155 						 * will also do wmp = NULL;
26156 						 */
26157 						FREE_ONLIST_WMAP(un, wmp);
26158 					}
26159 					/*
26160 					 * sl_wmp is the wmap on which wait
26161 					 * is done, since the tmp_wmp points
26162 					 * to the inuse wmap, set sl_wmp to
26163 					 * tmp_wmp and change the state to sleep
26164 					 */
26165 					sl_wmp = tmp_wmp;
26166 					state = SD_WM_WAIT_MAP;
26167 				} else {
26168 					state = SD_WM_LOCK_RANGE;
26169 				}
26170 
26171 			}
26172 			break;
26173 
26174 		case SD_WM_LOCK_RANGE:
26175 			ASSERT(un->un_wm_cache);
26176 			/*
26177 			 * The range need to be locked, try to get a wmap.
26178 			 * First attempt it with NO_SLEEP, want to avoid a sleep
26179 			 * if possible as we will have to release the sd mutex
26180 			 * if we have to sleep.
26181 			 */
26182 			if (wmp == NULL)
26183 				wmp = kmem_cache_alloc(un->un_wm_cache,
26184 				    KM_NOSLEEP);
26185 			if (wmp == NULL) {
26186 				mutex_exit(SD_MUTEX(un));
26187 				_NOTE(DATA_READABLE_WITHOUT_LOCK
26188 				    (sd_lun::un_wm_cache))
26189 				wmp = kmem_cache_alloc(un->un_wm_cache,
26190 				    KM_SLEEP);
26191 				mutex_enter(SD_MUTEX(un));
26192 				/*
26193 				 * we released the mutex so recheck and go to
26194 				 * check list state.
26195 				 */
26196 				state = SD_WM_CHK_LIST;
26197 			} else {
26198 				/*
26199 				 * We exit out of state machine since we
26200 				 * have the wmap. Do the housekeeping first.
26201 				 * place the wmap on the wmap list if it is not
26202 				 * on it already and then set the state to done.
26203 				 */
26204 				wmp->wm_start = startb;
26205 				wmp->wm_end = endb;
26206 				wmp->wm_flags = typ | SD_WM_BUSY;
26207 				if (typ & SD_WTYPE_RMW) {
26208 					un->un_rmw_count++;
26209 				}
26210 				/*
26211 				 * If not already on the list then link
26212 				 */
26213 				if (!ONLIST(un, wmp)) {
26214 					wmp->wm_next = un->un_wm;
26215 					wmp->wm_prev = NULL;
26216 					if (wmp->wm_next)
26217 						wmp->wm_next->wm_prev = wmp;
26218 					un->un_wm = wmp;
26219 				}
26220 				state = SD_WM_DONE;
26221 			}
26222 			break;
26223 
26224 		case SD_WM_WAIT_MAP:
26225 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
26226 			/*
26227 			 * Wait is done on sl_wmp, which is set in the
26228 			 * check_list state.
26229 			 */
26230 			sl_wmp->wm_wanted_count++;
26231 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
26232 			sl_wmp->wm_wanted_count--;
26233 			/*
26234 			 * We can reuse the memory from the completed sl_wmp
26235 			 * lock range for our new lock, but only if noone is
26236 			 * waiting for it.
26237 			 */
26238 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
26239 			if (sl_wmp->wm_wanted_count == 0) {
26240 				if (wmp != NULL)
26241 					CHK_N_FREEWMP(un, wmp);
26242 				wmp = sl_wmp;
26243 			}
26244 			sl_wmp = NULL;
26245 			/*
26246 			 * After waking up, need to recheck for availability of
26247 			 * range.
26248 			 */
26249 			state = SD_WM_CHK_LIST;
26250 			break;
26251 
26252 		default:
26253 			panic("sd_range_lock: "
26254 			    "Unknown state %d in sd_range_lock", state);
26255 			/*NOTREACHED*/
26256 		} /* switch(state) */
26257 
26258 	} /* while(state != SD_WM_DONE) */
26259 
26260 	mutex_exit(SD_MUTEX(un));
26261 
26262 	ASSERT(wmp != NULL);
26263 
26264 	return (wmp);
26265 }
26266 
26267 
26268 /*
26269  *    Function: sd_get_range()
26270  *
26271  * Description: Find if there any overlapping I/O to this one
26272  *		Returns the write-map of 1st such I/O, NULL otherwise.
26273  *
26274  *   Arguments: un	- sd_lun structure for the device.
26275  *		startb - The starting block number
26276  *		endb - The end block number
26277  *
26278  * Return Code: wm  - pointer to the wmap structure.
26279  */
26280 
26281 static struct sd_w_map *
26282 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
26283 {
26284 	struct sd_w_map *wmp;
26285 
26286 	ASSERT(un != NULL);
26287 
26288 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
26289 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
26290 			continue;
26291 		}
26292 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
26293 			break;
26294 		}
26295 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
26296 			break;
26297 		}
26298 	}
26299 
26300 	return (wmp);
26301 }
26302 
26303 
26304 /*
26305  *    Function: sd_free_inlist_wmap()
26306  *
26307  * Description: Unlink and free a write map struct.
26308  *
26309  *   Arguments: un      - sd_lun structure for the device.
26310  *		wmp	- sd_w_map which needs to be unlinked.
26311  */
26312 
26313 static void
26314 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
26315 {
26316 	ASSERT(un != NULL);
26317 
26318 	if (un->un_wm == wmp) {
26319 		un->un_wm = wmp->wm_next;
26320 	} else {
26321 		wmp->wm_prev->wm_next = wmp->wm_next;
26322 	}
26323 
26324 	if (wmp->wm_next) {
26325 		wmp->wm_next->wm_prev = wmp->wm_prev;
26326 	}
26327 
26328 	wmp->wm_next = wmp->wm_prev = NULL;
26329 
26330 	kmem_cache_free(un->un_wm_cache, wmp);
26331 }
26332 
26333 
26334 /*
26335  *    Function: sd_range_unlock()
26336  *
26337  * Description: Unlock the range locked by wm.
26338  *		Free write map if nobody else is waiting on it.
26339  *
26340  *   Arguments: un      - sd_lun structure for the device.
26341  *              wmp     - sd_w_map which needs to be unlinked.
26342  */
26343 
26344 static void
26345 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
26346 {
26347 	ASSERT(un != NULL);
26348 	ASSERT(wm != NULL);
26349 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26350 
26351 	mutex_enter(SD_MUTEX(un));
26352 
26353 	if (wm->wm_flags & SD_WTYPE_RMW) {
26354 		un->un_rmw_count--;
26355 	}
26356 
26357 	if (wm->wm_wanted_count) {
26358 		wm->wm_flags = 0;
26359 		/*
26360 		 * Broadcast that the wmap is available now.
26361 		 */
26362 		cv_broadcast(&wm->wm_avail);
26363 	} else {
26364 		/*
26365 		 * If no one is waiting on the map, it should be free'ed.
26366 		 */
26367 		sd_free_inlist_wmap(un, wm);
26368 	}
26369 
26370 	mutex_exit(SD_MUTEX(un));
26371 }
26372 
26373 
26374 /*
26375  *    Function: sd_read_modify_write_task
26376  *
26377  * Description: Called from a taskq thread to initiate the write phase of
26378  *		a read-modify-write request.  This is used for targets where
26379  *		un->un_sys_blocksize != un->un_tgt_blocksize.
26380  *
26381  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
26382  *
26383  *     Context: Called under taskq thread context.
26384  */
26385 
26386 static void
26387 sd_read_modify_write_task(void *arg)
26388 {
26389 	struct sd_mapblocksize_info	*bsp;
26390 	struct buf	*bp;
26391 	struct sd_xbuf	*xp;
26392 	struct sd_lun	*un;
26393 
26394 	bp = arg;	/* The bp is given in arg */
26395 	ASSERT(bp != NULL);
26396 
26397 	/* Get the pointer to the layer-private data struct */
26398 	xp = SD_GET_XBUF(bp);
26399 	ASSERT(xp != NULL);
26400 	bsp = xp->xb_private;
26401 	ASSERT(bsp != NULL);
26402 
26403 	un = SD_GET_UN(bp);
26404 	ASSERT(un != NULL);
26405 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26406 
26407 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
26408 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
26409 
26410 	/*
26411 	 * This is the write phase of a read-modify-write request, called
26412 	 * under the context of a taskq thread in response to the completion
26413 	 * of the read portion of the rmw request completing under interrupt
26414 	 * context. The write request must be sent from here down the iostart
26415 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
26416 	 * we use the layer index saved in the layer-private data area.
26417 	 */
26418 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
26419 
26420 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
26421 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
26422 }
26423 
26424 
26425 /*
26426  *    Function: sddump_do_read_of_rmw()
26427  *
26428  * Description: This routine will be called from sddump, If sddump is called
26429  *		with an I/O which not aligned on device blocksize boundary
26430  *		then the write has to be converted to read-modify-write.
26431  *		Do the read part here in order to keep sddump simple.
26432  *		Note - That the sd_mutex is held across the call to this
26433  *		routine.
26434  *
26435  *   Arguments: un	- sd_lun
26436  *		blkno	- block number in terms of media block size.
26437  *		nblk	- number of blocks.
26438  *		bpp	- pointer to pointer to the buf structure. On return
26439  *			from this function, *bpp points to the valid buffer
26440  *			to which the write has to be done.
26441  *
26442  * Return Code: 0 for success or errno-type return code
26443  */
26444 
26445 static int
26446 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
26447 	struct buf **bpp)
26448 {
26449 	int err;
26450 	int i;
26451 	int rval;
26452 	struct buf *bp;
26453 	struct scsi_pkt *pkt = NULL;
26454 	uint32_t target_blocksize;
26455 
26456 	ASSERT(un != NULL);
26457 	ASSERT(mutex_owned(SD_MUTEX(un)));
26458 
26459 	target_blocksize = un->un_tgt_blocksize;
26460 
26461 	mutex_exit(SD_MUTEX(un));
26462 
26463 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
26464 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
26465 	if (bp == NULL) {
26466 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26467 		    "no resources for dumping; giving up");
26468 		err = ENOMEM;
26469 		goto done;
26470 	}
26471 
26472 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
26473 	    blkno, nblk);
26474 	if (rval != 0) {
26475 		scsi_free_consistent_buf(bp);
26476 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26477 		    "no resources for dumping; giving up");
26478 		err = ENOMEM;
26479 		goto done;
26480 	}
26481 
26482 	pkt->pkt_flags |= FLAG_NOINTR;
26483 
26484 	err = EIO;
26485 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26486 
26487 		/*
26488 		 * Scsi_poll returns 0 (success) if the command completes and
26489 		 * the status block is STATUS_GOOD.  We should only check
26490 		 * errors if this condition is not true.  Even then we should
26491 		 * send our own request sense packet only if we have a check
26492 		 * condition and auto request sense has not been performed by
26493 		 * the hba.
26494 		 */
26495 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
26496 
26497 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
26498 			err = 0;
26499 			break;
26500 		}
26501 
26502 		/*
26503 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
26504 		 * no need to read RQS data.
26505 		 */
26506 		if (pkt->pkt_reason == CMD_DEV_GONE) {
26507 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26508 			    "Device is gone\n");
26509 			break;
26510 		}
26511 
26512 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
26513 			SD_INFO(SD_LOG_DUMP, un,
26514 			    "sddump: read failed with CHECK, try # %d\n", i);
26515 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
26516 				(void) sd_send_polled_RQS(un);
26517 			}
26518 
26519 			continue;
26520 		}
26521 
26522 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
26523 			int reset_retval = 0;
26524 
26525 			SD_INFO(SD_LOG_DUMP, un,
26526 			    "sddump: read failed with BUSY, try # %d\n", i);
26527 
26528 			if (un->un_f_lun_reset_enabled == TRUE) {
26529 				reset_retval = scsi_reset(SD_ADDRESS(un),
26530 				    RESET_LUN);
26531 			}
26532 			if (reset_retval == 0) {
26533 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26534 			}
26535 			(void) sd_send_polled_RQS(un);
26536 
26537 		} else {
26538 			SD_INFO(SD_LOG_DUMP, un,
26539 			    "sddump: read failed with 0x%x, try # %d\n",
26540 			    SD_GET_PKT_STATUS(pkt), i);
26541 			mutex_enter(SD_MUTEX(un));
26542 			sd_reset_target(un, pkt);
26543 			mutex_exit(SD_MUTEX(un));
26544 		}
26545 
26546 		/*
26547 		 * If we are not getting anywhere with lun/target resets,
26548 		 * let's reset the bus.
26549 		 */
26550 		if (i > SD_NDUMP_RETRIES/2) {
26551 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26552 			(void) sd_send_polled_RQS(un);
26553 		}
26554 
26555 	}
26556 	scsi_destroy_pkt(pkt);
26557 
26558 	if (err != 0) {
26559 		scsi_free_consistent_buf(bp);
26560 		*bpp = NULL;
26561 	} else {
26562 		*bpp = bp;
26563 	}
26564 
26565 done:
26566 	mutex_enter(SD_MUTEX(un));
26567 	return (err);
26568 }
26569 
26570 
26571 /*
26572  *    Function: sd_failfast_flushq
26573  *
26574  * Description: Take all bp's on the wait queue that have B_FAILFAST set
26575  *		in b_flags and move them onto the failfast queue, then kick
26576  *		off a thread to return all bp's on the failfast queue to
26577  *		their owners with an error set.
26578  *
26579  *   Arguments: un - pointer to the soft state struct for the instance.
26580  *
26581  *     Context: may execute in interrupt context.
26582  */
26583 
26584 static void
26585 sd_failfast_flushq(struct sd_lun *un)
26586 {
26587 	struct buf *bp;
26588 	struct buf *next_waitq_bp;
26589 	struct buf *prev_waitq_bp = NULL;
26590 
26591 	ASSERT(un != NULL);
26592 	ASSERT(mutex_owned(SD_MUTEX(un)));
26593 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
26594 	ASSERT(un->un_failfast_bp == NULL);
26595 
26596 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
26597 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
26598 
26599 	/*
26600 	 * Check if we should flush all bufs when entering failfast state, or
26601 	 * just those with B_FAILFAST set.
26602 	 */
26603 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
26604 		/*
26605 		 * Move *all* bp's on the wait queue to the failfast flush
26606 		 * queue, including those that do NOT have B_FAILFAST set.
26607 		 */
26608 		if (un->un_failfast_headp == NULL) {
26609 			ASSERT(un->un_failfast_tailp == NULL);
26610 			un->un_failfast_headp = un->un_waitq_headp;
26611 		} else {
26612 			ASSERT(un->un_failfast_tailp != NULL);
26613 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
26614 		}
26615 
26616 		un->un_failfast_tailp = un->un_waitq_tailp;
26617 
26618 		/* update kstat for each bp moved out of the waitq */
26619 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
26620 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
26621 		}
26622 
26623 		/* empty the waitq */
26624 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
26625 
26626 	} else {
26627 		/*
26628 		 * Go thru the wait queue, pick off all entries with
26629 		 * B_FAILFAST set, and move these onto the failfast queue.
26630 		 */
26631 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
26632 			/*
26633 			 * Save the pointer to the next bp on the wait queue,
26634 			 * so we get to it on the next iteration of this loop.
26635 			 */
26636 			next_waitq_bp = bp->av_forw;
26637 
26638 			/*
26639 			 * If this bp from the wait queue does NOT have
26640 			 * B_FAILFAST set, just move on to the next element
26641 			 * in the wait queue. Note, this is the only place
26642 			 * where it is correct to set prev_waitq_bp.
26643 			 */
26644 			if ((bp->b_flags & B_FAILFAST) == 0) {
26645 				prev_waitq_bp = bp;
26646 				continue;
26647 			}
26648 
26649 			/*
26650 			 * Remove the bp from the wait queue.
26651 			 */
26652 			if (bp == un->un_waitq_headp) {
26653 				/* The bp is the first element of the waitq. */
26654 				un->un_waitq_headp = next_waitq_bp;
26655 				if (un->un_waitq_headp == NULL) {
26656 					/* The wait queue is now empty */
26657 					un->un_waitq_tailp = NULL;
26658 				}
26659 			} else {
26660 				/*
26661 				 * The bp is either somewhere in the middle
26662 				 * or at the end of the wait queue.
26663 				 */
26664 				ASSERT(un->un_waitq_headp != NULL);
26665 				ASSERT(prev_waitq_bp != NULL);
26666 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
26667 				    == 0);
26668 				if (bp == un->un_waitq_tailp) {
26669 					/* bp is the last entry on the waitq. */
26670 					ASSERT(next_waitq_bp == NULL);
26671 					un->un_waitq_tailp = prev_waitq_bp;
26672 				}
26673 				prev_waitq_bp->av_forw = next_waitq_bp;
26674 			}
26675 			bp->av_forw = NULL;
26676 
26677 			/*
26678 			 * update kstat since the bp is moved out of
26679 			 * the waitq
26680 			 */
26681 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
26682 
26683 			/*
26684 			 * Now put the bp onto the failfast queue.
26685 			 */
26686 			if (un->un_failfast_headp == NULL) {
26687 				/* failfast queue is currently empty */
26688 				ASSERT(un->un_failfast_tailp == NULL);
26689 				un->un_failfast_headp =
26690 				    un->un_failfast_tailp = bp;
26691 			} else {
26692 				/* Add the bp to the end of the failfast q */
26693 				ASSERT(un->un_failfast_tailp != NULL);
26694 				ASSERT(un->un_failfast_tailp->b_flags &
26695 				    B_FAILFAST);
26696 				un->un_failfast_tailp->av_forw = bp;
26697 				un->un_failfast_tailp = bp;
26698 			}
26699 		}
26700 	}
26701 
26702 	/*
26703 	 * Now return all bp's on the failfast queue to their owners.
26704 	 */
26705 	while ((bp = un->un_failfast_headp) != NULL) {
26706 
26707 		un->un_failfast_headp = bp->av_forw;
26708 		if (un->un_failfast_headp == NULL) {
26709 			un->un_failfast_tailp = NULL;
26710 		}
26711 
26712 		/*
26713 		 * We want to return the bp with a failure error code, but
26714 		 * we do not want a call to sd_start_cmds() to occur here,
26715 		 * so use sd_return_failed_command_no_restart() instead of
26716 		 * sd_return_failed_command().
26717 		 */
26718 		sd_return_failed_command_no_restart(un, bp, EIO);
26719 	}
26720 
26721 	/* Flush the xbuf queues if required. */
26722 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
26723 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
26724 	}
26725 
26726 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
26727 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
26728 }
26729 
26730 
26731 /*
26732  *    Function: sd_failfast_flushq_callback
26733  *
26734  * Description: Return TRUE if the given bp meets the criteria for failfast
26735  *		flushing. Used with ddi_xbuf_flushq(9F).
26736  *
26737  *   Arguments: bp - ptr to buf struct to be examined.
26738  *
26739  *     Context: Any
26740  */
26741 
26742 static int
26743 sd_failfast_flushq_callback(struct buf *bp)
26744 {
26745 	/*
26746 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
26747 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
26748 	 */
26749 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
26750 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
26751 }
26752 
26753 
26754 
26755 #if defined(__i386) || defined(__amd64)
26756 /*
26757  * Function: sd_setup_next_xfer
26758  *
26759  * Description: Prepare next I/O operation using DMA_PARTIAL
26760  *
26761  */
26762 
26763 static int
26764 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
26765     struct scsi_pkt *pkt, struct sd_xbuf *xp)
26766 {
26767 	ssize_t	num_blks_not_xfered;
26768 	daddr_t	strt_blk_num;
26769 	ssize_t	bytes_not_xfered;
26770 	int	rval;
26771 
26772 	ASSERT(pkt->pkt_resid == 0);
26773 
26774 	/*
26775 	 * Calculate next block number and amount to be transferred.
26776 	 *
26777 	 * How much data NOT transfered to the HBA yet.
26778 	 */
26779 	bytes_not_xfered = xp->xb_dma_resid;
26780 
26781 	/*
26782 	 * figure how many blocks NOT transfered to the HBA yet.
26783 	 */
26784 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
26785 
26786 	/*
26787 	 * set starting block number to the end of what WAS transfered.
26788 	 */
26789 	strt_blk_num = xp->xb_blkno +
26790 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
26791 
26792 	/*
26793 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
26794 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
26795 	 * the disk mutex here.
26796 	 */
26797 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
26798 	    strt_blk_num, num_blks_not_xfered);
26799 
26800 	if (rval == 0) {
26801 
26802 		/*
26803 		 * Success.
26804 		 *
26805 		 * Adjust things if there are still more blocks to be
26806 		 * transfered.
26807 		 */
26808 		xp->xb_dma_resid = pkt->pkt_resid;
26809 		pkt->pkt_resid = 0;
26810 
26811 		return (1);
26812 	}
26813 
26814 	/*
26815 	 * There's really only one possible return value from
26816 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
26817 	 * returns NULL.
26818 	 */
26819 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
26820 
26821 	bp->b_resid = bp->b_bcount;
26822 	bp->b_flags |= B_ERROR;
26823 
26824 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26825 	    "Error setting up next portion of DMA transfer\n");
26826 
26827 	return (0);
26828 }
26829 #endif
26830 
26831 /*
26832  *    Function: sd_panic_for_res_conflict
26833  *
26834  * Description: Call panic with a string formatted with "Reservation Conflict"
26835  *		and a human readable identifier indicating the SD instance
26836  *		that experienced the reservation conflict.
26837  *
26838  *   Arguments: un - pointer to the soft state struct for the instance.
26839  *
26840  *     Context: may execute in interrupt context.
26841  */
26842 
26843 #define	SD_RESV_CONFLICT_FMT_LEN 40
26844 void
26845 sd_panic_for_res_conflict(struct sd_lun *un)
26846 {
26847 	char panic_str[SD_RESV_CONFLICT_FMT_LEN+MAXPATHLEN];
26848 	char path_str[MAXPATHLEN];
26849 
26850 	(void) snprintf(panic_str, sizeof (panic_str),
26851 	    "Reservation Conflict\nDisk: %s",
26852 	    ddi_pathname(SD_DEVINFO(un), path_str));
26853 
26854 	panic(panic_str);
26855 }
26856 
26857 /*
26858  * Note: The following sd_faultinjection_ioctl( ) routines implement
26859  * driver support for handling fault injection for error analysis
26860  * causing faults in multiple layers of the driver.
26861  *
26862  */
26863 
26864 #ifdef SD_FAULT_INJECTION
26865 static uint_t   sd_fault_injection_on = 0;
26866 
26867 /*
26868  *    Function: sd_faultinjection_ioctl()
26869  *
26870  * Description: This routine is the driver entry point for handling
26871  *              faultinjection ioctls to inject errors into the
26872  *              layer model
26873  *
26874  *   Arguments: cmd	- the ioctl cmd received
26875  *		arg	- the arguments from user and returns
26876  */
26877 
26878 static void
26879 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
26880 
26881 	uint_t i;
26882 	uint_t rval;
26883 
26884 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
26885 
26886 	mutex_enter(SD_MUTEX(un));
26887 
26888 	switch (cmd) {
26889 	case SDIOCRUN:
26890 		/* Allow pushed faults to be injected */
26891 		SD_INFO(SD_LOG_SDTEST, un,
26892 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
26893 
26894 		sd_fault_injection_on = 1;
26895 
26896 		SD_INFO(SD_LOG_IOERR, un,
26897 		    "sd_faultinjection_ioctl: run finished\n");
26898 		break;
26899 
26900 	case SDIOCSTART:
26901 		/* Start Injection Session */
26902 		SD_INFO(SD_LOG_SDTEST, un,
26903 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
26904 
26905 		sd_fault_injection_on = 0;
26906 		un->sd_injection_mask = 0xFFFFFFFF;
26907 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
26908 			un->sd_fi_fifo_pkt[i] = NULL;
26909 			un->sd_fi_fifo_xb[i] = NULL;
26910 			un->sd_fi_fifo_un[i] = NULL;
26911 			un->sd_fi_fifo_arq[i] = NULL;
26912 		}
26913 		un->sd_fi_fifo_start = 0;
26914 		un->sd_fi_fifo_end = 0;
26915 
26916 		mutex_enter(&(un->un_fi_mutex));
26917 		un->sd_fi_log[0] = '\0';
26918 		un->sd_fi_buf_len = 0;
26919 		mutex_exit(&(un->un_fi_mutex));
26920 
26921 		SD_INFO(SD_LOG_IOERR, un,
26922 		    "sd_faultinjection_ioctl: start finished\n");
26923 		break;
26924 
26925 	case SDIOCSTOP:
26926 		/* Stop Injection Session */
26927 		SD_INFO(SD_LOG_SDTEST, un,
26928 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
26929 		sd_fault_injection_on = 0;
26930 		un->sd_injection_mask = 0x0;
26931 
26932 		/* Empty stray or unuseds structs from fifo */
26933 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
26934 			if (un->sd_fi_fifo_pkt[i] != NULL) {
26935 				kmem_free(un->sd_fi_fifo_pkt[i],
26936 				    sizeof (struct sd_fi_pkt));
26937 			}
26938 			if (un->sd_fi_fifo_xb[i] != NULL) {
26939 				kmem_free(un->sd_fi_fifo_xb[i],
26940 				    sizeof (struct sd_fi_xb));
26941 			}
26942 			if (un->sd_fi_fifo_un[i] != NULL) {
26943 				kmem_free(un->sd_fi_fifo_un[i],
26944 				    sizeof (struct sd_fi_un));
26945 			}
26946 			if (un->sd_fi_fifo_arq[i] != NULL) {
26947 				kmem_free(un->sd_fi_fifo_arq[i],
26948 				    sizeof (struct sd_fi_arq));
26949 			}
26950 			un->sd_fi_fifo_pkt[i] = NULL;
26951 			un->sd_fi_fifo_un[i] = NULL;
26952 			un->sd_fi_fifo_xb[i] = NULL;
26953 			un->sd_fi_fifo_arq[i] = NULL;
26954 		}
26955 		un->sd_fi_fifo_start = 0;
26956 		un->sd_fi_fifo_end = 0;
26957 
26958 		SD_INFO(SD_LOG_IOERR, un,
26959 		    "sd_faultinjection_ioctl: stop finished\n");
26960 		break;
26961 
26962 	case SDIOCINSERTPKT:
26963 		/* Store a packet struct to be pushed onto fifo */
26964 		SD_INFO(SD_LOG_SDTEST, un,
26965 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
26966 
26967 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
26968 
26969 		sd_fault_injection_on = 0;
26970 
26971 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
26972 		if (un->sd_fi_fifo_pkt[i] != NULL) {
26973 			kmem_free(un->sd_fi_fifo_pkt[i],
26974 			    sizeof (struct sd_fi_pkt));
26975 		}
26976 		if (arg != NULL) {
26977 			un->sd_fi_fifo_pkt[i] =
26978 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
26979 			if (un->sd_fi_fifo_pkt[i] == NULL) {
26980 				/* Alloc failed don't store anything */
26981 				break;
26982 			}
26983 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
26984 			    sizeof (struct sd_fi_pkt), 0);
26985 			if (rval == -1) {
26986 				kmem_free(un->sd_fi_fifo_pkt[i],
26987 				    sizeof (struct sd_fi_pkt));
26988 				un->sd_fi_fifo_pkt[i] = NULL;
26989 			}
26990 		} else {
26991 			SD_INFO(SD_LOG_IOERR, un,
26992 			    "sd_faultinjection_ioctl: pkt null\n");
26993 		}
26994 		break;
26995 
26996 	case SDIOCINSERTXB:
26997 		/* Store a xb struct to be pushed onto fifo */
26998 		SD_INFO(SD_LOG_SDTEST, un,
26999 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
27000 
27001 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
27002 
27003 		sd_fault_injection_on = 0;
27004 
27005 		if (un->sd_fi_fifo_xb[i] != NULL) {
27006 			kmem_free(un->sd_fi_fifo_xb[i],
27007 			    sizeof (struct sd_fi_xb));
27008 			un->sd_fi_fifo_xb[i] = NULL;
27009 		}
27010 		if (arg != NULL) {
27011 			un->sd_fi_fifo_xb[i] =
27012 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
27013 			if (un->sd_fi_fifo_xb[i] == NULL) {
27014 				/* Alloc failed don't store anything */
27015 				break;
27016 			}
27017 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
27018 			    sizeof (struct sd_fi_xb), 0);
27019 
27020 			if (rval == -1) {
27021 				kmem_free(un->sd_fi_fifo_xb[i],
27022 				    sizeof (struct sd_fi_xb));
27023 				un->sd_fi_fifo_xb[i] = NULL;
27024 			}
27025 		} else {
27026 			SD_INFO(SD_LOG_IOERR, un,
27027 			    "sd_faultinjection_ioctl: xb null\n");
27028 		}
27029 		break;
27030 
27031 	case SDIOCINSERTUN:
27032 		/* Store a un struct to be pushed onto fifo */
27033 		SD_INFO(SD_LOG_SDTEST, un,
27034 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
27035 
27036 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
27037 
27038 		sd_fault_injection_on = 0;
27039 
27040 		if (un->sd_fi_fifo_un[i] != NULL) {
27041 			kmem_free(un->sd_fi_fifo_un[i],
27042 			    sizeof (struct sd_fi_un));
27043 			un->sd_fi_fifo_un[i] = NULL;
27044 		}
27045 		if (arg != NULL) {
27046 			un->sd_fi_fifo_un[i] =
27047 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
27048 			if (un->sd_fi_fifo_un[i] == NULL) {
27049 				/* Alloc failed don't store anything */
27050 				break;
27051 			}
27052 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
27053 			    sizeof (struct sd_fi_un), 0);
27054 			if (rval == -1) {
27055 				kmem_free(un->sd_fi_fifo_un[i],
27056 				    sizeof (struct sd_fi_un));
27057 				un->sd_fi_fifo_un[i] = NULL;
27058 			}
27059 
27060 		} else {
27061 			SD_INFO(SD_LOG_IOERR, un,
27062 			    "sd_faultinjection_ioctl: un null\n");
27063 		}
27064 
27065 		break;
27066 
27067 	case SDIOCINSERTARQ:
27068 		/* Store a arq struct to be pushed onto fifo */
27069 		SD_INFO(SD_LOG_SDTEST, un,
27070 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
27071 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
27072 
27073 		sd_fault_injection_on = 0;
27074 
27075 		if (un->sd_fi_fifo_arq[i] != NULL) {
27076 			kmem_free(un->sd_fi_fifo_arq[i],
27077 			    sizeof (struct sd_fi_arq));
27078 			un->sd_fi_fifo_arq[i] = NULL;
27079 		}
27080 		if (arg != NULL) {
27081 			un->sd_fi_fifo_arq[i] =
27082 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
27083 			if (un->sd_fi_fifo_arq[i] == NULL) {
27084 				/* Alloc failed don't store anything */
27085 				break;
27086 			}
27087 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
27088 			    sizeof (struct sd_fi_arq), 0);
27089 			if (rval == -1) {
27090 				kmem_free(un->sd_fi_fifo_arq[i],
27091 				    sizeof (struct sd_fi_arq));
27092 				un->sd_fi_fifo_arq[i] = NULL;
27093 			}
27094 
27095 		} else {
27096 			SD_INFO(SD_LOG_IOERR, un,
27097 			    "sd_faultinjection_ioctl: arq null\n");
27098 		}
27099 
27100 		break;
27101 
27102 	case SDIOCPUSH:
27103 		/* Push stored xb, pkt, un, and arq onto fifo */
27104 		sd_fault_injection_on = 0;
27105 
27106 		if (arg != NULL) {
27107 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
27108 			if (rval != -1 &&
27109 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
27110 				un->sd_fi_fifo_end += i;
27111 			}
27112 		} else {
27113 			SD_INFO(SD_LOG_IOERR, un,
27114 			    "sd_faultinjection_ioctl: push arg null\n");
27115 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
27116 				un->sd_fi_fifo_end++;
27117 			}
27118 		}
27119 		SD_INFO(SD_LOG_IOERR, un,
27120 		    "sd_faultinjection_ioctl: push to end=%d\n",
27121 		    un->sd_fi_fifo_end);
27122 		break;
27123 
27124 	case SDIOCRETRIEVE:
27125 		/* Return buffer of log from Injection session */
27126 		SD_INFO(SD_LOG_SDTEST, un,
27127 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
27128 
27129 		sd_fault_injection_on = 0;
27130 
27131 		mutex_enter(&(un->un_fi_mutex));
27132 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
27133 		    un->sd_fi_buf_len+1, 0);
27134 		mutex_exit(&(un->un_fi_mutex));
27135 
27136 		if (rval == -1) {
27137 			/*
27138 			 * arg is possibly invalid setting
27139 			 * it to NULL for return
27140 			 */
27141 			arg = NULL;
27142 		}
27143 		break;
27144 	}
27145 
27146 	mutex_exit(SD_MUTEX(un));
27147 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
27148 			    " exit\n");
27149 }
27150 
27151 
27152 /*
27153  *    Function: sd_injection_log()
27154  *
27155  * Description: This routine adds buff to the already existing injection log
27156  *              for retrieval via faultinjection_ioctl for use in fault
27157  *              detection and recovery
27158  *
27159  *   Arguments: buf - the string to add to the log
27160  */
27161 
27162 static void
27163 sd_injection_log(char *buf, struct sd_lun *un)
27164 {
27165 	uint_t len;
27166 
27167 	ASSERT(un != NULL);
27168 	ASSERT(buf != NULL);
27169 
27170 	mutex_enter(&(un->un_fi_mutex));
27171 
27172 	len = min(strlen(buf), 255);
27173 	/* Add logged value to Injection log to be returned later */
27174 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
27175 		uint_t	offset = strlen((char *)un->sd_fi_log);
27176 		char *destp = (char *)un->sd_fi_log + offset;
27177 		int i;
27178 		for (i = 0; i < len; i++) {
27179 			*destp++ = *buf++;
27180 		}
27181 		un->sd_fi_buf_len += len;
27182 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
27183 	}
27184 
27185 	mutex_exit(&(un->un_fi_mutex));
27186 }
27187 
27188 
27189 /*
27190  *    Function: sd_faultinjection()
27191  *
27192  * Description: This routine takes the pkt and changes its
27193  *		content based on error injection scenerio.
27194  *
27195  *   Arguments: pktp	- packet to be changed
27196  */
27197 
27198 static void
27199 sd_faultinjection(struct scsi_pkt *pktp)
27200 {
27201 	uint_t i;
27202 	struct sd_fi_pkt *fi_pkt;
27203 	struct sd_fi_xb *fi_xb;
27204 	struct sd_fi_un *fi_un;
27205 	struct sd_fi_arq *fi_arq;
27206 	struct buf *bp;
27207 	struct sd_xbuf *xb;
27208 	struct sd_lun *un;
27209 
27210 	ASSERT(pktp != NULL);
27211 
27212 	/* pull bp xb and un from pktp */
27213 	bp = (struct buf *)pktp->pkt_private;
27214 	xb = SD_GET_XBUF(bp);
27215 	un = SD_GET_UN(bp);
27216 
27217 	ASSERT(un != NULL);
27218 
27219 	mutex_enter(SD_MUTEX(un));
27220 
27221 	SD_TRACE(SD_LOG_SDTEST, un,
27222 	    "sd_faultinjection: entry Injection from sdintr\n");
27223 
27224 	/* if injection is off return */
27225 	if (sd_fault_injection_on == 0 ||
27226 	    un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
27227 		mutex_exit(SD_MUTEX(un));
27228 		return;
27229 	}
27230 
27231 
27232 	/* take next set off fifo */
27233 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
27234 
27235 	fi_pkt = un->sd_fi_fifo_pkt[i];
27236 	fi_xb = un->sd_fi_fifo_xb[i];
27237 	fi_un = un->sd_fi_fifo_un[i];
27238 	fi_arq = un->sd_fi_fifo_arq[i];
27239 
27240 
27241 	/* set variables accordingly */
27242 	/* set pkt if it was on fifo */
27243 	if (fi_pkt != NULL) {
27244 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
27245 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
27246 		SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
27247 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
27248 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
27249 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
27250 
27251 	}
27252 
27253 	/* set xb if it was on fifo */
27254 	if (fi_xb != NULL) {
27255 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
27256 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
27257 		SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
27258 		SD_CONDSET(xb, xb, xb_victim_retry_count,
27259 		    "xb_victim_retry_count");
27260 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
27261 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
27262 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
27263 
27264 		/* copy in block data from sense */
27265 		if (fi_xb->xb_sense_data[0] != -1) {
27266 			bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
27267 			    SENSE_LENGTH);
27268 		}
27269 
27270 		/* copy in extended sense codes */
27271 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_code,
27272 		    "es_code");
27273 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_key,
27274 		    "es_key");
27275 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_add_code,
27276 		    "es_add_code");
27277 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb,
27278 		    es_qual_code, "es_qual_code");
27279 	}
27280 
27281 	/* set un if it was on fifo */
27282 	if (fi_un != NULL) {
27283 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
27284 		SD_CONDSET(un, un, un_ctype, "un_ctype");
27285 		SD_CONDSET(un, un, un_reset_retry_count,
27286 		    "un_reset_retry_count");
27287 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
27288 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
27289 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
27290 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
27291 		    "un_f_allow_bus_device_reset");
27292 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
27293 
27294 	}
27295 
27296 	/* copy in auto request sense if it was on fifo */
27297 	if (fi_arq != NULL) {
27298 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
27299 	}
27300 
27301 	/* free structs */
27302 	if (un->sd_fi_fifo_pkt[i] != NULL) {
27303 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
27304 	}
27305 	if (un->sd_fi_fifo_xb[i] != NULL) {
27306 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
27307 	}
27308 	if (un->sd_fi_fifo_un[i] != NULL) {
27309 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
27310 	}
27311 	if (un->sd_fi_fifo_arq[i] != NULL) {
27312 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
27313 	}
27314 
27315 	/*
27316 	 * kmem_free does not gurantee to set to NULL
27317 	 * since we uses these to determine if we set
27318 	 * values or not lets confirm they are always
27319 	 * NULL after free
27320 	 */
27321 	un->sd_fi_fifo_pkt[i] = NULL;
27322 	un->sd_fi_fifo_un[i] = NULL;
27323 	un->sd_fi_fifo_xb[i] = NULL;
27324 	un->sd_fi_fifo_arq[i] = NULL;
27325 
27326 	un->sd_fi_fifo_start++;
27327 
27328 	mutex_exit(SD_MUTEX(un));
27329 
27330 	SD_TRACE(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
27331 }
27332 
27333 #endif /* SD_FAULT_INJECTION */
27334 
27335 /*
27336  * This routine is invoked in sd_unit_attach(). Before calling it, the
27337  * properties in conf file should be processed already, and "hotpluggable"
27338  * property was processed also.
27339  *
27340  * The sd driver distinguishes 3 different type of devices: removable media,
27341  * non-removable media, and hotpluggable. Below the differences are defined:
27342  *
27343  * 1. Device ID
27344  *
27345  *     The device ID of a device is used to identify this device. Refer to
27346  *     ddi_devid_register(9F).
27347  *
27348  *     For a non-removable media disk device which can provide 0x80 or 0x83
27349  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
27350  *     device ID is created to identify this device. For other non-removable
27351  *     media devices, a default device ID is created only if this device has
27352  *     at least 2 alter cylinders. Otherwise, this device has no devid.
27353  *
27354  *     -------------------------------------------------------
27355  *     removable media   hotpluggable  | Can Have Device ID
27356  *     -------------------------------------------------------
27357  *         false             false     |     Yes
27358  *         false             true      |     Yes
27359  *         true                x       |     No
27360  *     ------------------------------------------------------
27361  *
27362  *
27363  * 2. SCSI group 4 commands
27364  *
27365  *     In SCSI specs, only some commands in group 4 command set can use
27366  *     8-byte addresses that can be used to access >2TB storage spaces.
27367  *     Other commands have no such capability. Without supporting group4,
27368  *     it is impossible to make full use of storage spaces of a disk with
27369  *     capacity larger than 2TB.
27370  *
27371  *     -----------------------------------------------
27372  *     removable media   hotpluggable   LP64  |  Group
27373  *     -----------------------------------------------
27374  *           false          false       false |   1
27375  *           false          false       true  |   4
27376  *           false          true        false |   1
27377  *           false          true        true  |   4
27378  *           true             x           x   |   5
27379  *     -----------------------------------------------
27380  *
27381  *
27382  * 3. Check for VTOC Label
27383  *
27384  *     If a direct-access disk has no EFI label, sd will check if it has a
27385  *     valid VTOC label. Now, sd also does that check for removable media
27386  *     and hotpluggable devices.
27387  *
27388  *     --------------------------------------------------------------
27389  *     Direct-Access   removable media    hotpluggable |  Check Label
27390  *     -------------------------------------------------------------
27391  *         false          false           false        |   No
27392  *         false          false           true         |   No
27393  *         false          true            false        |   Yes
27394  *         false          true            true         |   Yes
27395  *         true            x                x          |   Yes
27396  *     --------------------------------------------------------------
27397  *
27398  *
27399  * 4. Building default VTOC label
27400  *
27401  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
27402  *     If those devices have no valid VTOC label, sd(7d) will attempt to
27403  *     create default VTOC for them. Currently sd creates default VTOC label
27404  *     for all devices on x86 platform (VTOC_16), but only for removable
27405  *     media devices on SPARC (VTOC_8).
27406  *
27407  *     -----------------------------------------------------------
27408  *       removable media hotpluggable platform   |   Default Label
27409  *     -----------------------------------------------------------
27410  *             false          false    sparc     |     No
27411  *             false          true      x86      |     Yes
27412  *             false          true     sparc     |     Yes
27413  *             true             x        x       |     Yes
27414  *     ----------------------------------------------------------
27415  *
27416  *
27417  * 5. Supported blocksizes of target devices
27418  *
27419  *     Sd supports non-512-byte blocksize for removable media devices only.
27420  *     For other devices, only 512-byte blocksize is supported. This may be
27421  *     changed in near future because some RAID devices require non-512-byte
27422  *     blocksize
27423  *
27424  *     -----------------------------------------------------------
27425  *     removable media    hotpluggable    | non-512-byte blocksize
27426  *     -----------------------------------------------------------
27427  *           false          false         |   No
27428  *           false          true          |   No
27429  *           true             x           |   Yes
27430  *     -----------------------------------------------------------
27431  *
27432  *
27433  * 6. Automatic mount & unmount
27434  *
27435  *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
27436  *     if a device is removable media device. It return 1 for removable media
27437  *     devices, and 0 for others.
27438  *
27439  *     The automatic mounting subsystem should distinguish between the types
27440  *     of devices and apply automounting policies to each.
27441  *
27442  *
27443  * 7. fdisk partition management
27444  *
27445  *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
27446  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
27447  *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
27448  *     fdisk partitions on both x86 and SPARC platform.
27449  *
27450  *     -----------------------------------------------------------
27451  *       platform   removable media  USB/1394  |  fdisk supported
27452  *     -----------------------------------------------------------
27453  *        x86         X               X        |       true
27454  *     ------------------------------------------------------------
27455  *        sparc       X               X        |       false
27456  *     ------------------------------------------------------------
27457  *
27458  *
27459  * 8. MBOOT/MBR
27460  *
27461  *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
27462  *     read/write mboot for removable media devices on sparc platform.
27463  *
27464  *     -----------------------------------------------------------
27465  *       platform   removable media  USB/1394  |  mboot supported
27466  *     -----------------------------------------------------------
27467  *        x86         X               X        |       true
27468  *     ------------------------------------------------------------
27469  *        sparc      false           false     |       false
27470  *        sparc      false           true      |       true
27471  *        sparc      true            false     |       true
27472  *        sparc      true            true      |       true
27473  *     ------------------------------------------------------------
27474  *
27475  *
27476  * 9.  error handling during opening device
27477  *
27478  *     If failed to open a disk device, an errno is returned. For some kinds
27479  *     of errors, different errno is returned depending on if this device is
27480  *     a removable media device. This brings USB/1394 hard disks in line with
27481  *     expected hard disk behavior. It is not expected that this breaks any
27482  *     application.
27483  *
27484  *     ------------------------------------------------------
27485  *       removable media    hotpluggable   |  errno
27486  *     ------------------------------------------------------
27487  *             false          false        |   EIO
27488  *             false          true         |   EIO
27489  *             true             x          |   ENXIO
27490  *     ------------------------------------------------------
27491  *
27492  *
27493  * 11. ioctls: DKIOCEJECT, CDROMEJECT
27494  *
27495  *     These IOCTLs are applicable only to removable media devices.
27496  *
27497  *     -----------------------------------------------------------
27498  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
27499  *     -----------------------------------------------------------
27500  *             false          false        |     No
27501  *             false          true         |     No
27502  *             true            x           |     Yes
27503  *     -----------------------------------------------------------
27504  *
27505  *
27506  * 12. Kstats for partitions
27507  *
27508  *     sd creates partition kstat for non-removable media devices. USB and
27509  *     Firewire hard disks now have partition kstats
27510  *
27511  *      ------------------------------------------------------
27512  *       removable media    hotpluggable   |   kstat
27513  *      ------------------------------------------------------
27514  *             false          false        |    Yes
27515  *             false          true         |    Yes
27516  *             true             x          |    No
27517  *       ------------------------------------------------------
27518  *
27519  *
27520  * 13. Removable media & hotpluggable properties
27521  *
27522  *     Sd driver creates a "removable-media" property for removable media
27523  *     devices. Parent nexus drivers create a "hotpluggable" property if
27524  *     it supports hotplugging.
27525  *
27526  *     ---------------------------------------------------------------------
27527  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
27528  *     ---------------------------------------------------------------------
27529  *       false            false       |    No                   No
27530  *       false            true        |    No                   Yes
27531  *       true             false       |    Yes                  No
27532  *       true             true        |    Yes                  Yes
27533  *     ---------------------------------------------------------------------
27534  *
27535  *
27536  * 14. Power Management
27537  *
27538  *     sd only power manages removable media devices or devices that support
27539  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
27540  *
27541  *     A parent nexus that supports hotplugging can also set "pm-capable"
27542  *     if the disk can be power managed.
27543  *
27544  *     ------------------------------------------------------------
27545  *       removable media hotpluggable pm-capable  |   power manage
27546  *     ------------------------------------------------------------
27547  *             false          false     false     |     No
27548  *             false          false     true      |     Yes
27549  *             false          true      false     |     No
27550  *             false          true      true      |     Yes
27551  *             true             x        x        |     Yes
27552  *     ------------------------------------------------------------
27553  *
27554  *      USB and firewire hard disks can now be power managed independently
27555  *      of the framebuffer
27556  *
27557  *
27558  * 15. Support for USB disks with capacity larger than 1TB
27559  *
27560  *     Currently, sd doesn't permit a fixed disk device with capacity
27561  *     larger than 1TB to be used in a 32-bit operating system environment.
27562  *     However, sd doesn't do that for removable media devices. Instead, it
27563  *     assumes that removable media devices cannot have a capacity larger
27564  *     than 1TB. Therefore, using those devices on 32-bit system is partially
27565  *     supported, which can cause some unexpected results.
27566  *
27567  *     ---------------------------------------------------------------------
27568  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
27569  *     ---------------------------------------------------------------------
27570  *             false          false  |   true         |     no
27571  *             false          true   |   true         |     no
27572  *             true           false  |   true         |     Yes
27573  *             true           true   |   true         |     Yes
27574  *     ---------------------------------------------------------------------
27575  *
27576  *
27577  * 16. Check write-protection at open time
27578  *
27579  *     When a removable media device is being opened for writing without NDELAY
27580  *     flag, sd will check if this device is writable. If attempting to open
27581  *     without NDELAY flag a write-protected device, this operation will abort.
27582  *
27583  *     ------------------------------------------------------------
27584  *       removable media    USB/1394   |   WP Check
27585  *     ------------------------------------------------------------
27586  *             false          false    |     No
27587  *             false          true     |     No
27588  *             true           false    |     Yes
27589  *             true           true     |     Yes
27590  *     ------------------------------------------------------------
27591  *
27592  *
27593  * 17. syslog when corrupted VTOC is encountered
27594  *
27595  *      Currently, if an invalid VTOC is encountered, sd only print syslog
27596  *      for fixed SCSI disks.
27597  *     ------------------------------------------------------------
27598  *       removable media    USB/1394   |   print syslog
27599  *     ------------------------------------------------------------
27600  *             false          false    |     Yes
27601  *             false          true     |     No
27602  *             true           false    |     No
27603  *             true           true     |     No
27604  *     ------------------------------------------------------------
27605  */
27606 static void
27607 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
27608 {
27609 	int	pm_capable_prop;
27610 
27611 	ASSERT(un->un_sd);
27612 	ASSERT(un->un_sd->sd_inq);
27613 
27614 	/*
27615 	 * Enable SYNC CACHE support for all devices.
27616 	 */
27617 	un->un_f_sync_cache_supported = TRUE;
27618 
27619 	if (un->un_sd->sd_inq->inq_rmb) {
27620 		/*
27621 		 * The media of this device is removable. And for this kind
27622 		 * of devices, it is possible to change medium after opening
27623 		 * devices. Thus we should support this operation.
27624 		 */
27625 		un->un_f_has_removable_media = TRUE;
27626 
27627 		/*
27628 		 * support non-512-byte blocksize of removable media devices
27629 		 */
27630 		un->un_f_non_devbsize_supported = TRUE;
27631 
27632 		/*
27633 		 * Assume that all removable media devices support DOOR_LOCK
27634 		 */
27635 		un->un_f_doorlock_supported = TRUE;
27636 
27637 		/*
27638 		 * For a removable media device, it is possible to be opened
27639 		 * with NDELAY flag when there is no media in drive, in this
27640 		 * case we don't care if device is writable. But if without
27641 		 * NDELAY flag, we need to check if media is write-protected.
27642 		 */
27643 		un->un_f_chk_wp_open = TRUE;
27644 
27645 		/*
27646 		 * need to start a SCSI watch thread to monitor media state,
27647 		 * when media is being inserted or ejected, notify syseventd.
27648 		 */
27649 		un->un_f_monitor_media_state = TRUE;
27650 
27651 		/*
27652 		 * Some devices don't support START_STOP_UNIT command.
27653 		 * Therefore, we'd better check if a device supports it
27654 		 * before sending it.
27655 		 */
27656 		un->un_f_check_start_stop = TRUE;
27657 
27658 		/*
27659 		 * support eject media ioctl:
27660 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
27661 		 */
27662 		un->un_f_eject_media_supported = TRUE;
27663 
27664 		/*
27665 		 * Because many removable-media devices don't support
27666 		 * LOG_SENSE, we couldn't use this command to check if
27667 		 * a removable media device support power-management.
27668 		 * We assume that they support power-management via
27669 		 * START_STOP_UNIT command and can be spun up and down
27670 		 * without limitations.
27671 		 */
27672 		un->un_f_pm_supported = TRUE;
27673 
27674 		/*
27675 		 * Need to create a zero length (Boolean) property
27676 		 * removable-media for the removable media devices.
27677 		 * Note that the return value of the property is not being
27678 		 * checked, since if unable to create the property
27679 		 * then do not want the attach to fail altogether. Consistent
27680 		 * with other property creation in attach.
27681 		 */
27682 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
27683 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
27684 
27685 	} else {
27686 		/*
27687 		 * create device ID for device
27688 		 */
27689 		un->un_f_devid_supported = TRUE;
27690 
27691 		/*
27692 		 * Spin up non-removable-media devices once it is attached
27693 		 */
27694 		un->un_f_attach_spinup = TRUE;
27695 
27696 		/*
27697 		 * According to SCSI specification, Sense data has two kinds of
27698 		 * format: fixed format, and descriptor format. At present, we
27699 		 * don't support descriptor format sense data for removable
27700 		 * media.
27701 		 */
27702 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
27703 			un->un_f_descr_format_supported = TRUE;
27704 		}
27705 
27706 		/*
27707 		 * kstats are created only for non-removable media devices.
27708 		 *
27709 		 * Set this in sd.conf to 0 in order to disable kstats.  The
27710 		 * default is 1, so they are enabled by default.
27711 		 */
27712 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
27713 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
27714 		    "enable-partition-kstats", 1));
27715 
27716 		/*
27717 		 * Check if HBA has set the "pm-capable" property.
27718 		 * If "pm-capable" exists and is non-zero then we can
27719 		 * power manage the device without checking the start/stop
27720 		 * cycle count log sense page.
27721 		 *
27722 		 * If "pm-capable" exists and is SD_PM_CAPABLE_FALSE (0)
27723 		 * then we should not power manage the device.
27724 		 *
27725 		 * If "pm-capable" doesn't exist then pm_capable_prop will
27726 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
27727 		 * sd will check the start/stop cycle count log sense page
27728 		 * and power manage the device if the cycle count limit has
27729 		 * not been exceeded.
27730 		 */
27731 		pm_capable_prop = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
27732 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
27733 		if (pm_capable_prop == SD_PM_CAPABLE_UNDEFINED) {
27734 			un->un_f_log_sense_supported = TRUE;
27735 		} else {
27736 			/*
27737 			 * pm-capable property exists.
27738 			 *
27739 			 * Convert "TRUE" values for pm_capable_prop to
27740 			 * SD_PM_CAPABLE_TRUE (1) to make it easier to check
27741 			 * later. "TRUE" values are any values except
27742 			 * SD_PM_CAPABLE_FALSE (0) and
27743 			 * SD_PM_CAPABLE_UNDEFINED (-1)
27744 			 */
27745 			if (pm_capable_prop == SD_PM_CAPABLE_FALSE) {
27746 				un->un_f_log_sense_supported = FALSE;
27747 			} else {
27748 				un->un_f_pm_supported = TRUE;
27749 			}
27750 
27751 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
27752 			    "sd_unit_attach: un:0x%p pm-capable "
27753 			    "property set to %d.\n", un, un->un_f_pm_supported);
27754 		}
27755 	}
27756 
27757 	if (un->un_f_is_hotpluggable) {
27758 
27759 		/*
27760 		 * Have to watch hotpluggable devices as well, since
27761 		 * that's the only way for userland applications to
27762 		 * detect hot removal while device is busy/mounted.
27763 		 */
27764 		un->un_f_monitor_media_state = TRUE;
27765 
27766 		un->un_f_check_start_stop = TRUE;
27767 
27768 	}
27769 }
27770 
27771 /*
27772  * sd_tg_rdwr:
27773  * Provides rdwr access for cmlb via sd_tgops. The start_block is
27774  * in sys block size, req_length in bytes.
27775  *
27776  */
27777 static int
27778 sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
27779     diskaddr_t start_block, size_t reqlength, void *tg_cookie)
27780 {
27781 	struct sd_lun *un;
27782 	int path_flag = (int)(uintptr_t)tg_cookie;
27783 	char *dkl = NULL;
27784 	diskaddr_t real_addr = start_block;
27785 	diskaddr_t first_byte, end_block;
27786 
27787 	size_t	buffer_size = reqlength;
27788 	int rval;
27789 	diskaddr_t	cap;
27790 	uint32_t	lbasize;
27791 
27792 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
27793 	if (un == NULL)
27794 		return (ENXIO);
27795 
27796 	if (cmd != TG_READ && cmd != TG_WRITE)
27797 		return (EINVAL);
27798 
27799 	mutex_enter(SD_MUTEX(un));
27800 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
27801 		mutex_exit(SD_MUTEX(un));
27802 		rval = sd_send_scsi_READ_CAPACITY(un, (uint64_t *)&cap,
27803 		    &lbasize, path_flag);
27804 		if (rval != 0)
27805 			return (rval);
27806 		mutex_enter(SD_MUTEX(un));
27807 		sd_update_block_info(un, lbasize, cap);
27808 		if ((un->un_f_tgt_blocksize_is_valid == FALSE)) {
27809 			mutex_exit(SD_MUTEX(un));
27810 			return (EIO);
27811 		}
27812 	}
27813 
27814 	if (NOT_DEVBSIZE(un)) {
27815 		/*
27816 		 * sys_blocksize != tgt_blocksize, need to re-adjust
27817 		 * blkno and save the index to beginning of dk_label
27818 		 */
27819 		first_byte  = SD_SYSBLOCKS2BYTES(un, start_block);
27820 		real_addr = first_byte / un->un_tgt_blocksize;
27821 
27822 		end_block = (first_byte + reqlength +
27823 		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
27824 
27825 		/* round up buffer size to multiple of target block size */
27826 		buffer_size = (end_block - real_addr) * un->un_tgt_blocksize;
27827 
27828 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_tg_rdwr",
27829 		    "label_addr: 0x%x allocation size: 0x%x\n",
27830 		    real_addr, buffer_size);
27831 
27832 		if (((first_byte % un->un_tgt_blocksize) != 0) ||
27833 		    (reqlength % un->un_tgt_blocksize) != 0)
27834 			/* the request is not aligned */
27835 			dkl = kmem_zalloc(buffer_size, KM_SLEEP);
27836 	}
27837 
27838 	/*
27839 	 * The MMC standard allows READ CAPACITY to be
27840 	 * inaccurate by a bounded amount (in the interest of
27841 	 * response latency).  As a result, failed READs are
27842 	 * commonplace (due to the reading of metadata and not
27843 	 * data). Depending on the per-Vendor/drive Sense data,
27844 	 * the failed READ can cause many (unnecessary) retries.
27845 	 */
27846 
27847 	if (ISCD(un) && (cmd == TG_READ) &&
27848 	    (un->un_f_blockcount_is_valid == TRUE) &&
27849 	    ((start_block == (un->un_blockcount - 1))||
27850 	    (start_block == (un->un_blockcount - 2)))) {
27851 			path_flag = SD_PATH_DIRECT_PRIORITY;
27852 	}
27853 
27854 	mutex_exit(SD_MUTEX(un));
27855 	if (cmd == TG_READ) {
27856 		rval = sd_send_scsi_READ(un, (dkl != NULL)? dkl: bufaddr,
27857 		    buffer_size, real_addr, path_flag);
27858 		if (dkl != NULL)
27859 			bcopy(dkl + SD_TGTBYTEOFFSET(un, start_block,
27860 			    real_addr), bufaddr, reqlength);
27861 	} else {
27862 		if (dkl) {
27863 			rval = sd_send_scsi_READ(un, dkl, buffer_size,
27864 			    real_addr, path_flag);
27865 			if (rval) {
27866 				kmem_free(dkl, buffer_size);
27867 				return (rval);
27868 			}
27869 			bcopy(bufaddr, dkl + SD_TGTBYTEOFFSET(un, start_block,
27870 			    real_addr), reqlength);
27871 		}
27872 		rval = sd_send_scsi_WRITE(un, (dkl != NULL)? dkl: bufaddr,
27873 		    buffer_size, real_addr, path_flag);
27874 	}
27875 
27876 	if (dkl != NULL)
27877 		kmem_free(dkl, buffer_size);
27878 
27879 	return (rval);
27880 }
27881 
27882 
27883 static int
27884 sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie)
27885 {
27886 
27887 	struct sd_lun *un;
27888 	diskaddr_t	cap;
27889 	uint32_t	lbasize;
27890 	int		path_flag = (int)(uintptr_t)tg_cookie;
27891 	int		ret = 0;
27892 
27893 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
27894 	if (un == NULL)
27895 		return (ENXIO);
27896 
27897 	switch (cmd) {
27898 	case TG_GETPHYGEOM:
27899 	case TG_GETVIRTGEOM:
27900 	case TG_GETCAPACITY:
27901 	case  TG_GETBLOCKSIZE:
27902 		mutex_enter(SD_MUTEX(un));
27903 
27904 		if ((un->un_f_blockcount_is_valid == TRUE) &&
27905 		    (un->un_f_tgt_blocksize_is_valid == TRUE)) {
27906 			cap = un->un_blockcount;
27907 			lbasize = un->un_tgt_blocksize;
27908 			mutex_exit(SD_MUTEX(un));
27909 		} else {
27910 			mutex_exit(SD_MUTEX(un));
27911 			ret = sd_send_scsi_READ_CAPACITY(un, (uint64_t *)&cap,
27912 			    &lbasize, path_flag);
27913 			if (ret != 0)
27914 				return (ret);
27915 			mutex_enter(SD_MUTEX(un));
27916 			sd_update_block_info(un, lbasize, cap);
27917 			if ((un->un_f_blockcount_is_valid == FALSE) ||
27918 			    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
27919 				mutex_exit(SD_MUTEX(un));
27920 				return (EIO);
27921 			}
27922 			mutex_exit(SD_MUTEX(un));
27923 		}
27924 
27925 		if (cmd == TG_GETCAPACITY) {
27926 			*(diskaddr_t *)arg = cap;
27927 			return (0);
27928 		}
27929 
27930 		if (cmd == TG_GETBLOCKSIZE) {
27931 			*(uint32_t *)arg = lbasize;
27932 			return (0);
27933 		}
27934 
27935 		if (cmd == TG_GETPHYGEOM)
27936 			ret = sd_get_physical_geometry(un, (cmlb_geom_t *)arg,
27937 			    cap, lbasize, path_flag);
27938 		else
27939 			/* TG_GETVIRTGEOM */
27940 			ret = sd_get_virtual_geometry(un,
27941 			    (cmlb_geom_t *)arg, cap, lbasize);
27942 
27943 		return (ret);
27944 
27945 	case TG_GETATTR:
27946 		mutex_enter(SD_MUTEX(un));
27947 		((tg_attribute_t *)arg)->media_is_writable =
27948 		    un->un_f_mmc_writable_media;
27949 		mutex_exit(SD_MUTEX(un));
27950 		return (0);
27951 	default:
27952 		return (ENOTTY);
27953 
27954 	}
27955 
27956 }
27957