xref: /illumos-gate/usr/src/uts/common/io/scsi/targets/sd.c (revision 0173c38a73f34277e0c97a19fedfd25d81ba8380)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * SCSI disk target driver.
30  */
31 #include <sys/scsi/scsi.h>
32 #include <sys/dkbad.h>
33 #include <sys/dklabel.h>
34 #include <sys/dkio.h>
35 #include <sys/fdio.h>
36 #include <sys/cdio.h>
37 #include <sys/mhd.h>
38 #include <sys/vtoc.h>
39 #include <sys/dktp/fdisk.h>
40 #include <sys/file.h>
41 #include <sys/stat.h>
42 #include <sys/kstat.h>
43 #include <sys/vtrace.h>
44 #include <sys/note.h>
45 #include <sys/thread.h>
46 #include <sys/proc.h>
47 #include <sys/efi_partition.h>
48 #include <sys/var.h>
49 #include <sys/aio_req.h>
50 
51 #ifdef __lock_lint
52 #define	_LP64
53 #define	__amd64
54 #endif
55 
56 #if (defined(__fibre))
57 /* Note: is there a leadville version of the following? */
58 #include <sys/fc4/fcal_linkapp.h>
59 #endif
60 #include <sys/taskq.h>
61 #include <sys/uuid.h>
62 #include <sys/byteorder.h>
63 #include <sys/sdt.h>
64 
65 #include "sd_xbuf.h"
66 
67 #include <sys/scsi/targets/sddef.h>
68 
69 
70 /*
71  * Loadable module info.
72  */
73 #if (defined(__fibre))
74 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver %I%"
75 char _depends_on[]	= "misc/scsi drv/fcp";
76 #else
77 #define	SD_MODULE_NAME	"SCSI Disk Driver %I%"
78 char _depends_on[]	= "misc/scsi";
79 #endif
80 
81 /*
82  * Define the interconnect type, to allow the driver to distinguish
83  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
84  *
85  * This is really for backward compatability. In the future, the driver
86  * should actually check the "interconnect-type" property as reported by
87  * the HBA; however at present this property is not defined by all HBAs,
88  * so we will use this #define (1) to permit the driver to run in
89  * backward-compatability mode; and (2) to print a notification message
90  * if an FC HBA does not support the "interconnect-type" property.  The
91  * behavior of the driver will be to assume parallel SCSI behaviors unless
92  * the "interconnect-type" property is defined by the HBA **AND** has a
93  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
94  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
95  * Channel behaviors (as per the old ssd).  (Note that the
96  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
97  * will result in the driver assuming parallel SCSI behaviors.)
98  *
99  * (see common/sys/scsi/impl/services.h)
100  *
101  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
102  * since some FC HBAs may already support that, and there is some code in
103  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
104  * default would confuse that code, and besides things should work fine
105  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
106  * "interconnect_type" property.
107  *
108  * Notes for off-by-1 workaround:
109  * -----------------------------
110  *
111  *    SCSI READ_CAPACITY command returns the LBA number of the
112  *    last logical block, but sd once treated this number as
113  *    disks' capacity on x86 platform. And LBAs are addressed
114  *    based 0. So the last block was lost on x86 platform.
115  *
116  *    Now, we remove this workaround. In order for present sd
117  *    driver to work with disks which are labeled/partitioned
118  *    via previous sd, we add workaround as follows:
119  *
120  *    1) Locate backup EFI label: sd searchs the next to last
121  *       block for backup EFI label if it can't find it on the
122  *       last block;
123  *    2) Calculate geometry: refer to sd_convert_geometry(), If
124  *       capacity increasing by 1 causes disks' capacity to cross
125  *       over the limits in table CHS_values, geometry info will
126  *       change. This will raise an issue: In case that primary
127  *       VTOC label is destroyed, format commandline can restore
128  *       it via backup VTOC labels. And format locates backup VTOC
129  *       labels by use of geometry from sd driver. So changing
130  *       geometry will prevent format from finding backup VTOC
131  *       labels. To eliminate this side effect for compatibility,
132  *       sd uses (capacity -1) to calculate geometry;
133  *    3) 1TB disks: VTOC uses 32-bit signed int, thus sd doesn't
134  *       support VTOC for a disk which has more than DK_MAX_BLOCKS
135  *       LBAs. However, for exactly 1TB disk, it was treated as
136  *       (1T - 512)B in the past, and could have VTOC. To overcome
137  *       this, if an exactly 1TB disk has solaris fdisk partition,
138  *       it will be allowed to work with sd.
139  */
140 #if (defined(__fibre))
141 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
142 #else
143 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
144 #endif
145 
146 /*
147  * The name of the driver, established from the module name in _init.
148  */
149 static	char *sd_label			= NULL;
150 
151 /*
152  * Driver name is unfortunately prefixed on some driver.conf properties.
153  */
154 #if (defined(__fibre))
155 #define	sd_max_xfer_size		ssd_max_xfer_size
156 #define	sd_config_list			ssd_config_list
157 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
158 static	char *sd_config_list		= "ssd-config-list";
159 #else
160 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
161 static	char *sd_config_list		= "sd-config-list";
162 #endif
163 
164 /*
165  * Driver global variables
166  */
167 
168 #if (defined(__fibre))
169 /*
170  * These #defines are to avoid namespace collisions that occur because this
171  * code is currently used to compile two seperate driver modules: sd and ssd.
172  * All global variables need to be treated this way (even if declared static)
173  * in order to allow the debugger to resolve the names properly.
174  * It is anticipated that in the near future the ssd module will be obsoleted,
175  * at which time this namespace issue should go away.
176  */
177 #define	sd_state			ssd_state
178 #define	sd_io_time			ssd_io_time
179 #define	sd_failfast_enable		ssd_failfast_enable
180 #define	sd_ua_retry_count		ssd_ua_retry_count
181 #define	sd_report_pfa			ssd_report_pfa
182 #define	sd_max_throttle			ssd_max_throttle
183 #define	sd_min_throttle			ssd_min_throttle
184 #define	sd_rot_delay			ssd_rot_delay
185 
186 #define	sd_retry_on_reservation_conflict	\
187 					ssd_retry_on_reservation_conflict
188 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
189 #define	sd_resv_conflict_name		ssd_resv_conflict_name
190 
191 #define	sd_component_mask		ssd_component_mask
192 #define	sd_level_mask			ssd_level_mask
193 #define	sd_debug_un			ssd_debug_un
194 #define	sd_error_level			ssd_error_level
195 
196 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
197 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
198 
199 #define	sd_tr				ssd_tr
200 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
201 #define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
202 #define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
203 #define	sd_check_media_time		ssd_check_media_time
204 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
205 #define	sd_label_mutex			ssd_label_mutex
206 #define	sd_detach_mutex			ssd_detach_mutex
207 #define	sd_log_buf			ssd_log_buf
208 #define	sd_log_mutex			ssd_log_mutex
209 
210 #define	sd_disk_table			ssd_disk_table
211 #define	sd_disk_table_size		ssd_disk_table_size
212 #define	sd_sense_mutex			ssd_sense_mutex
213 #define	sd_cdbtab			ssd_cdbtab
214 
215 #define	sd_cb_ops			ssd_cb_ops
216 #define	sd_ops				ssd_ops
217 #define	sd_additional_codes		ssd_additional_codes
218 
219 #define	sd_minor_data			ssd_minor_data
220 #define	sd_minor_data_efi		ssd_minor_data_efi
221 
222 #define	sd_tq				ssd_tq
223 #define	sd_wmr_tq			ssd_wmr_tq
224 #define	sd_taskq_name			ssd_taskq_name
225 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
226 #define	sd_taskq_minalloc		ssd_taskq_minalloc
227 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
228 
229 #define	sd_dump_format_string		ssd_dump_format_string
230 
231 #define	sd_iostart_chain		ssd_iostart_chain
232 #define	sd_iodone_chain			ssd_iodone_chain
233 
234 #define	sd_pm_idletime			ssd_pm_idletime
235 
236 #define	sd_force_pm_supported		ssd_force_pm_supported
237 
238 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
239 
240 #endif
241 
242 
243 #ifdef	SDDEBUG
244 int	sd_force_pm_supported		= 0;
245 #endif	/* SDDEBUG */
246 
247 void *sd_state				= NULL;
248 int sd_io_time				= SD_IO_TIME;
249 int sd_failfast_enable			= 1;
250 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
251 int sd_report_pfa			= 1;
252 int sd_max_throttle			= SD_MAX_THROTTLE;
253 int sd_min_throttle			= SD_MIN_THROTTLE;
254 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
255 int sd_qfull_throttle_enable		= TRUE;
256 
257 int sd_retry_on_reservation_conflict	= 1;
258 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
259 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
260 
261 static int sd_dtype_optical_bind	= -1;
262 
263 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
264 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
265 
266 /*
267  * Global data for debug logging. To enable debug printing, sd_component_mask
268  * and sd_level_mask should be set to the desired bit patterns as outlined in
269  * sddef.h.
270  */
271 uint_t	sd_component_mask		= 0x0;
272 uint_t	sd_level_mask			= 0x0;
273 struct	sd_lun *sd_debug_un		= NULL;
274 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
275 
276 /* Note: these may go away in the future... */
277 static uint32_t	sd_xbuf_active_limit	= 512;
278 static uint32_t sd_xbuf_reserve_limit	= 16;
279 
280 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
281 
282 /*
283  * Timer value used to reset the throttle after it has been reduced
284  * (typically in response to TRAN_BUSY or STATUS_QFULL)
285  */
286 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
287 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
288 
289 /*
290  * Interval value associated with the media change scsi watch.
291  */
292 static int sd_check_media_time		= 3000000;
293 
294 /*
295  * Wait value used for in progress operations during a DDI_SUSPEND
296  */
297 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
298 
299 /*
300  * sd_label_mutex protects a static buffer used in the disk label
301  * component of the driver
302  */
303 static kmutex_t sd_label_mutex;
304 
305 /*
306  * sd_detach_mutex protects un_layer_count, un_detach_count, and
307  * un_opens_in_progress in the sd_lun structure.
308  */
309 static kmutex_t sd_detach_mutex;
310 
311 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
312 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
313 
314 /*
315  * Global buffer and mutex for debug logging
316  */
317 static char	sd_log_buf[1024];
318 static kmutex_t	sd_log_mutex;
319 
320 
321 /*
322  * "Smart" Probe Caching structs, globals, #defines, etc.
323  * For parallel scsi and non-self-identify device only.
324  */
325 
326 /*
327  * The following resources and routines are implemented to support
328  * "smart" probing, which caches the scsi_probe() results in an array,
329  * in order to help avoid long probe times.
330  */
331 struct sd_scsi_probe_cache {
332 	struct	sd_scsi_probe_cache	*next;
333 	dev_info_t	*pdip;
334 	int		cache[NTARGETS_WIDE];
335 };
336 
337 static kmutex_t	sd_scsi_probe_cache_mutex;
338 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
339 
340 /*
341  * Really we only need protection on the head of the linked list, but
342  * better safe than sorry.
343  */
344 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
345     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
346 
347 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
348     sd_scsi_probe_cache_head))
349 
350 
351 /*
352  * Vendor specific data name property declarations
353  */
354 
355 #if defined(__fibre) || defined(__i386) ||defined(__amd64)
356 
357 static sd_tunables seagate_properties = {
358 	SEAGATE_THROTTLE_VALUE,
359 	0,
360 	0,
361 	0,
362 	0,
363 	0,
364 	0,
365 	0,
366 	0
367 };
368 
369 
370 static sd_tunables fujitsu_properties = {
371 	FUJITSU_THROTTLE_VALUE,
372 	0,
373 	0,
374 	0,
375 	0,
376 	0,
377 	0,
378 	0,
379 	0
380 };
381 
382 static sd_tunables ibm_properties = {
383 	IBM_THROTTLE_VALUE,
384 	0,
385 	0,
386 	0,
387 	0,
388 	0,
389 	0,
390 	0,
391 	0
392 };
393 
394 static sd_tunables purple_properties = {
395 	PURPLE_THROTTLE_VALUE,
396 	0,
397 	0,
398 	PURPLE_BUSY_RETRIES,
399 	PURPLE_RESET_RETRY_COUNT,
400 	PURPLE_RESERVE_RELEASE_TIME,
401 	0,
402 	0,
403 	0
404 };
405 
406 static sd_tunables sve_properties = {
407 	SVE_THROTTLE_VALUE,
408 	0,
409 	0,
410 	SVE_BUSY_RETRIES,
411 	SVE_RESET_RETRY_COUNT,
412 	SVE_RESERVE_RELEASE_TIME,
413 	SVE_MIN_THROTTLE_VALUE,
414 	SVE_DISKSORT_DISABLED_FLAG,
415 	0
416 };
417 
418 static sd_tunables maserati_properties = {
419 	0,
420 	0,
421 	0,
422 	0,
423 	0,
424 	0,
425 	0,
426 	MASERATI_DISKSORT_DISABLED_FLAG,
427 	MASERATI_LUN_RESET_ENABLED_FLAG
428 };
429 
430 static sd_tunables pirus_properties = {
431 	PIRUS_THROTTLE_VALUE,
432 	0,
433 	PIRUS_NRR_COUNT,
434 	PIRUS_BUSY_RETRIES,
435 	PIRUS_RESET_RETRY_COUNT,
436 	0,
437 	PIRUS_MIN_THROTTLE_VALUE,
438 	PIRUS_DISKSORT_DISABLED_FLAG,
439 	PIRUS_LUN_RESET_ENABLED_FLAG
440 };
441 
442 #endif
443 
444 #if (defined(__sparc) && !defined(__fibre)) || \
445 	(defined(__i386) || defined(__amd64))
446 
447 
448 static sd_tunables elite_properties = {
449 	ELITE_THROTTLE_VALUE,
450 	0,
451 	0,
452 	0,
453 	0,
454 	0,
455 	0,
456 	0,
457 	0
458 };
459 
460 static sd_tunables st31200n_properties = {
461 	ST31200N_THROTTLE_VALUE,
462 	0,
463 	0,
464 	0,
465 	0,
466 	0,
467 	0,
468 	0,
469 	0
470 };
471 
472 #endif /* Fibre or not */
473 
474 static sd_tunables lsi_properties_scsi = {
475 	LSI_THROTTLE_VALUE,
476 	0,
477 	LSI_NOTREADY_RETRIES,
478 	0,
479 	0,
480 	0,
481 	0,
482 	0,
483 	0
484 };
485 
486 static sd_tunables symbios_properties = {
487 	SYMBIOS_THROTTLE_VALUE,
488 	0,
489 	SYMBIOS_NOTREADY_RETRIES,
490 	0,
491 	0,
492 	0,
493 	0,
494 	0,
495 	0
496 };
497 
498 static sd_tunables lsi_properties = {
499 	0,
500 	0,
501 	LSI_NOTREADY_RETRIES,
502 	0,
503 	0,
504 	0,
505 	0,
506 	0,
507 	0
508 };
509 
510 static sd_tunables lsi_oem_properties = {
511 	0,
512 	0,
513 	LSI_OEM_NOTREADY_RETRIES,
514 	0,
515 	0,
516 	0,
517 	0,
518 	0,
519 	0
520 };
521 
522 
523 
524 #if (defined(SD_PROP_TST))
525 
526 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
527 #define	SD_TST_THROTTLE_VAL	16
528 #define	SD_TST_NOTREADY_VAL	12
529 #define	SD_TST_BUSY_VAL		60
530 #define	SD_TST_RST_RETRY_VAL	36
531 #define	SD_TST_RSV_REL_TIME	60
532 
533 static sd_tunables tst_properties = {
534 	SD_TST_THROTTLE_VAL,
535 	SD_TST_CTYPE_VAL,
536 	SD_TST_NOTREADY_VAL,
537 	SD_TST_BUSY_VAL,
538 	SD_TST_RST_RETRY_VAL,
539 	SD_TST_RSV_REL_TIME,
540 	0,
541 	0,
542 	0
543 };
544 #endif
545 
546 /* This is similiar to the ANSI toupper implementation */
547 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
548 
549 /*
550  * Static Driver Configuration Table
551  *
552  * This is the table of disks which need throttle adjustment (or, perhaps
553  * something else as defined by the flags at a future time.)  device_id
554  * is a string consisting of concatenated vid (vendor), pid (product/model)
555  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
556  * the parts of the string are as defined by the sizes in the scsi_inquiry
557  * structure.  Device type is searched as far as the device_id string is
558  * defined.  Flags defines which values are to be set in the driver from the
559  * properties list.
560  *
561  * Entries below which begin and end with a "*" are a special case.
562  * These do not have a specific vendor, and the string which follows
563  * can appear anywhere in the 16 byte PID portion of the inquiry data.
564  *
565  * Entries below which begin and end with a " " (blank) are a special
566  * case. The comparison function will treat multiple consecutive blanks
567  * as equivalent to a single blank. For example, this causes a
568  * sd_disk_table entry of " NEC CDROM " to match a device's id string
569  * of  "NEC       CDROM".
570  *
571  * Note: The MD21 controller type has been obsoleted.
572  *	 ST318202F is a Legacy device
573  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
574  *	 made with an FC connection. The entries here are a legacy.
575  */
576 static sd_disk_config_t sd_disk_table[] = {
577 #if defined(__fibre) || defined(__i386) || defined(__amd64)
578 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
579 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
580 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
581 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
582 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
583 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
584 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
585 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
586 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
587 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
588 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
589 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
590 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
591 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
592 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
593 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
594 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
595 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
596 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
597 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
598 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
599 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
600 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
601 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
602 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
603 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
604 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
605 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
606 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
607 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
608 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
609 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
610 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
611 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
612 	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
613 	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
614 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
615 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
616 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
617 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
618 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
619 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
620 	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
621 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
622 	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
623 			SD_CONF_BSET_BSY_RETRY_COUNT|
624 			SD_CONF_BSET_RST_RETRIES|
625 			SD_CONF_BSET_RSV_REL_TIME,
626 		&purple_properties },
627 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
628 		SD_CONF_BSET_BSY_RETRY_COUNT|
629 		SD_CONF_BSET_RST_RETRIES|
630 		SD_CONF_BSET_RSV_REL_TIME|
631 		SD_CONF_BSET_MIN_THROTTLE|
632 		SD_CONF_BSET_DISKSORT_DISABLED,
633 		&sve_properties },
634 	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
635 			SD_CONF_BSET_BSY_RETRY_COUNT|
636 			SD_CONF_BSET_RST_RETRIES|
637 			SD_CONF_BSET_RSV_REL_TIME,
638 		&purple_properties },
639 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
640 		SD_CONF_BSET_LUN_RESET_ENABLED,
641 		&maserati_properties },
642 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
643 		SD_CONF_BSET_NRR_COUNT|
644 		SD_CONF_BSET_BSY_RETRY_COUNT|
645 		SD_CONF_BSET_RST_RETRIES|
646 		SD_CONF_BSET_MIN_THROTTLE|
647 		SD_CONF_BSET_DISKSORT_DISABLED|
648 		SD_CONF_BSET_LUN_RESET_ENABLED,
649 		&pirus_properties },
650 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
651 		SD_CONF_BSET_NRR_COUNT|
652 		SD_CONF_BSET_BSY_RETRY_COUNT|
653 		SD_CONF_BSET_RST_RETRIES|
654 		SD_CONF_BSET_MIN_THROTTLE|
655 		SD_CONF_BSET_DISKSORT_DISABLED|
656 		SD_CONF_BSET_LUN_RESET_ENABLED,
657 		&pirus_properties },
658 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
659 		SD_CONF_BSET_NRR_COUNT|
660 		SD_CONF_BSET_BSY_RETRY_COUNT|
661 		SD_CONF_BSET_RST_RETRIES|
662 		SD_CONF_BSET_MIN_THROTTLE|
663 		SD_CONF_BSET_DISKSORT_DISABLED|
664 		SD_CONF_BSET_LUN_RESET_ENABLED,
665 		&pirus_properties },
666 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
667 		SD_CONF_BSET_NRR_COUNT|
668 		SD_CONF_BSET_BSY_RETRY_COUNT|
669 		SD_CONF_BSET_RST_RETRIES|
670 		SD_CONF_BSET_MIN_THROTTLE|
671 		SD_CONF_BSET_DISKSORT_DISABLED|
672 		SD_CONF_BSET_LUN_RESET_ENABLED,
673 		&pirus_properties },
674 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
675 		SD_CONF_BSET_NRR_COUNT|
676 		SD_CONF_BSET_BSY_RETRY_COUNT|
677 		SD_CONF_BSET_RST_RETRIES|
678 		SD_CONF_BSET_MIN_THROTTLE|
679 		SD_CONF_BSET_DISKSORT_DISABLED|
680 		SD_CONF_BSET_LUN_RESET_ENABLED,
681 		&pirus_properties },
682 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
683 		SD_CONF_BSET_NRR_COUNT|
684 		SD_CONF_BSET_BSY_RETRY_COUNT|
685 		SD_CONF_BSET_RST_RETRIES|
686 		SD_CONF_BSET_MIN_THROTTLE|
687 		SD_CONF_BSET_DISKSORT_DISABLED|
688 		SD_CONF_BSET_LUN_RESET_ENABLED,
689 		&pirus_properties },
690 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
691 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
692 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
693 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
694 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
695 #endif /* fibre or NON-sparc platforms */
696 #if ((defined(__sparc) && !defined(__fibre)) ||\
697 	(defined(__i386) || defined(__amd64)))
698 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
699 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
700 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
701 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
702 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
703 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
704 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
705 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
706 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
707 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
708 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
709 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
710 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
711 	    &symbios_properties },
712 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
713 	    &lsi_properties_scsi },
714 #if defined(__i386) || defined(__amd64)
715 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
716 				    | SD_CONF_BSET_READSUB_BCD
717 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
718 				    | SD_CONF_BSET_NO_READ_HEADER
719 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
720 
721 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
722 				    | SD_CONF_BSET_READSUB_BCD
723 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
724 				    | SD_CONF_BSET_NO_READ_HEADER
725 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
726 #endif /* __i386 || __amd64 */
727 #endif /* sparc NON-fibre or NON-sparc platforms */
728 
729 #if (defined(SD_PROP_TST))
730 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
731 				| SD_CONF_BSET_CTYPE
732 				| SD_CONF_BSET_NRR_COUNT
733 				| SD_CONF_BSET_FAB_DEVID
734 				| SD_CONF_BSET_NOCACHE
735 				| SD_CONF_BSET_BSY_RETRY_COUNT
736 				| SD_CONF_BSET_PLAYMSF_BCD
737 				| SD_CONF_BSET_READSUB_BCD
738 				| SD_CONF_BSET_READ_TOC_TRK_BCD
739 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
740 				| SD_CONF_BSET_NO_READ_HEADER
741 				| SD_CONF_BSET_READ_CD_XD4
742 				| SD_CONF_BSET_RST_RETRIES
743 				| SD_CONF_BSET_RSV_REL_TIME
744 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
745 #endif
746 };
747 
748 static const int sd_disk_table_size =
749 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
750 
751 
752 /*
753  * Return codes of sd_uselabel().
754  */
755 #define	SD_LABEL_IS_VALID		0
756 #define	SD_LABEL_IS_INVALID		1
757 
758 #define	SD_INTERCONNECT_PARALLEL	0
759 #define	SD_INTERCONNECT_FABRIC		1
760 #define	SD_INTERCONNECT_FIBRE		2
761 #define	SD_INTERCONNECT_SSA		3
762 #define	SD_INTERCONNECT_SATA		4
763 #define	SD_IS_PARALLEL_SCSI(un)		\
764 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
765 #define	SD_IS_SERIAL(un)		\
766 	((un)->un_interconnect_type == SD_INTERCONNECT_SATA)
767 
768 /*
769  * Definitions used by device id registration routines
770  */
771 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
772 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
773 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
774 #define	WD_NODE			7	/* the whole disk minor */
775 
776 static kmutex_t sd_sense_mutex = {0};
777 
778 /*
779  * Macros for updates of the driver state
780  */
781 #define	New_state(un, s)        \
782 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
783 #define	Restore_state(un)	\
784 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
785 
786 static struct sd_cdbinfo sd_cdbtab[] = {
787 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
788 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
789 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
790 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
791 };
792 
793 /*
794  * Specifies the number of seconds that must have elapsed since the last
795  * cmd. has completed for a device to be declared idle to the PM framework.
796  */
797 static int sd_pm_idletime = 1;
798 
799 /*
800  * Internal function prototypes
801  */
802 
803 #if (defined(__fibre))
804 /*
805  * These #defines are to avoid namespace collisions that occur because this
806  * code is currently used to compile two seperate driver modules: sd and ssd.
807  * All function names need to be treated this way (even if declared static)
808  * in order to allow the debugger to resolve the names properly.
809  * It is anticipated that in the near future the ssd module will be obsoleted,
810  * at which time this ugliness should go away.
811  */
812 #define	sd_log_trace			ssd_log_trace
813 #define	sd_log_info			ssd_log_info
814 #define	sd_log_err			ssd_log_err
815 #define	sdprobe				ssdprobe
816 #define	sdinfo				ssdinfo
817 #define	sd_prop_op			ssd_prop_op
818 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
819 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
820 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
821 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
822 #define	sd_spin_up_unit			ssd_spin_up_unit
823 #define	sd_enable_descr_sense		ssd_enable_descr_sense
824 #define	sd_reenable_dsense_task		ssd_reenable_dsense_task
825 #define	sd_set_mmc_caps			ssd_set_mmc_caps
826 #define	sd_read_unit_properties		ssd_read_unit_properties
827 #define	sd_process_sdconf_file		ssd_process_sdconf_file
828 #define	sd_process_sdconf_table		ssd_process_sdconf_table
829 #define	sd_sdconf_id_match		ssd_sdconf_id_match
830 #define	sd_blank_cmp			ssd_blank_cmp
831 #define	sd_chk_vers1_data		ssd_chk_vers1_data
832 #define	sd_set_vers1_properties		ssd_set_vers1_properties
833 #define	sd_validate_geometry		ssd_validate_geometry
834 
835 #if defined(_SUNOS_VTOC_16)
836 #define	sd_convert_geometry		ssd_convert_geometry
837 #endif
838 
839 #define	sd_resync_geom_caches		ssd_resync_geom_caches
840 #define	sd_read_fdisk			ssd_read_fdisk
841 #define	sd_get_physical_geometry	ssd_get_physical_geometry
842 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
843 #define	sd_update_block_info		ssd_update_block_info
844 #define	sd_swap_efi_gpt			ssd_swap_efi_gpt
845 #define	sd_swap_efi_gpe			ssd_swap_efi_gpe
846 #define	sd_validate_efi			ssd_validate_efi
847 #define	sd_use_efi			ssd_use_efi
848 #define	sd_uselabel			ssd_uselabel
849 #define	sd_build_default_label		ssd_build_default_label
850 #define	sd_has_max_chs_vals		ssd_has_max_chs_vals
851 #define	sd_inq_fill			ssd_inq_fill
852 #define	sd_register_devid		ssd_register_devid
853 #define	sd_get_devid_block		ssd_get_devid_block
854 #define	sd_get_devid			ssd_get_devid
855 #define	sd_create_devid			ssd_create_devid
856 #define	sd_write_deviceid		ssd_write_deviceid
857 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
858 #define	sd_setup_pm			ssd_setup_pm
859 #define	sd_create_pm_components		ssd_create_pm_components
860 #define	sd_ddi_suspend			ssd_ddi_suspend
861 #define	sd_ddi_pm_suspend		ssd_ddi_pm_suspend
862 #define	sd_ddi_resume			ssd_ddi_resume
863 #define	sd_ddi_pm_resume		ssd_ddi_pm_resume
864 #define	sdpower				ssdpower
865 #define	sdattach			ssdattach
866 #define	sddetach			ssddetach
867 #define	sd_unit_attach			ssd_unit_attach
868 #define	sd_unit_detach			ssd_unit_detach
869 #define	sd_set_unit_attributes		ssd_set_unit_attributes
870 #define	sd_create_minor_nodes		ssd_create_minor_nodes
871 #define	sd_create_errstats		ssd_create_errstats
872 #define	sd_set_errstats			ssd_set_errstats
873 #define	sd_set_pstats			ssd_set_pstats
874 #define	sddump				ssddump
875 #define	sd_scsi_poll			ssd_scsi_poll
876 #define	sd_send_polled_RQS		ssd_send_polled_RQS
877 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
878 #define	sd_init_event_callbacks		ssd_init_event_callbacks
879 #define	sd_event_callback		ssd_event_callback
880 #define	sd_cache_control		ssd_cache_control
881 #define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
882 #define	sd_make_device			ssd_make_device
883 #define	sdopen				ssdopen
884 #define	sdclose				ssdclose
885 #define	sd_ready_and_valid		ssd_ready_and_valid
886 #define	sdmin				ssdmin
887 #define	sdread				ssdread
888 #define	sdwrite				ssdwrite
889 #define	sdaread				ssdaread
890 #define	sdawrite			ssdawrite
891 #define	sdstrategy			ssdstrategy
892 #define	sdioctl				ssdioctl
893 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
894 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
895 #define	sd_checksum_iostart		ssd_checksum_iostart
896 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
897 #define	sd_pm_iostart			ssd_pm_iostart
898 #define	sd_core_iostart			ssd_core_iostart
899 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
900 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
901 #define	sd_checksum_iodone		ssd_checksum_iodone
902 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
903 #define	sd_pm_iodone			ssd_pm_iodone
904 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
905 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
906 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
907 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
908 #define	sd_buf_iodone			ssd_buf_iodone
909 #define	sd_uscsi_strategy		ssd_uscsi_strategy
910 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
911 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
912 #define	sd_uscsi_iodone			ssd_uscsi_iodone
913 #define	sd_xbuf_strategy		ssd_xbuf_strategy
914 #define	sd_xbuf_init			ssd_xbuf_init
915 #define	sd_pm_entry			ssd_pm_entry
916 #define	sd_pm_exit			ssd_pm_exit
917 
918 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
919 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
920 
921 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
922 #define	sdintr				ssdintr
923 #define	sd_start_cmds			ssd_start_cmds
924 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
925 #define	sd_bioclone_alloc		ssd_bioclone_alloc
926 #define	sd_bioclone_free		ssd_bioclone_free
927 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
928 #define	sd_shadow_buf_free		ssd_shadow_buf_free
929 #define	sd_print_transport_rejected_message	\
930 					ssd_print_transport_rejected_message
931 #define	sd_retry_command		ssd_retry_command
932 #define	sd_set_retry_bp			ssd_set_retry_bp
933 #define	sd_send_request_sense_command	ssd_send_request_sense_command
934 #define	sd_start_retry_command		ssd_start_retry_command
935 #define	sd_start_direct_priority_command	\
936 					ssd_start_direct_priority_command
937 #define	sd_return_failed_command	ssd_return_failed_command
938 #define	sd_return_failed_command_no_restart	\
939 					ssd_return_failed_command_no_restart
940 #define	sd_return_command		ssd_return_command
941 #define	sd_sync_with_callback		ssd_sync_with_callback
942 #define	sdrunout			ssdrunout
943 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
944 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
945 #define	sd_reduce_throttle		ssd_reduce_throttle
946 #define	sd_restore_throttle		ssd_restore_throttle
947 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
948 #define	sd_init_cdb_limits		ssd_init_cdb_limits
949 #define	sd_pkt_status_good		ssd_pkt_status_good
950 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
951 #define	sd_pkt_status_busy		ssd_pkt_status_busy
952 #define	sd_pkt_status_reservation_conflict	\
953 					ssd_pkt_status_reservation_conflict
954 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
955 #define	sd_handle_request_sense		ssd_handle_request_sense
956 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
957 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
958 #define	sd_validate_sense_data		ssd_validate_sense_data
959 #define	sd_decode_sense			ssd_decode_sense
960 #define	sd_print_sense_msg		ssd_print_sense_msg
961 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
962 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
963 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
964 #define	sd_sense_key_medium_or_hardware_error	\
965 					ssd_sense_key_medium_or_hardware_error
966 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
967 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
968 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
969 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
970 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
971 #define	sd_sense_key_default		ssd_sense_key_default
972 #define	sd_print_retry_msg		ssd_print_retry_msg
973 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
974 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
975 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
976 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
977 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
978 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
979 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
980 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
981 #define	sd_pkt_reason_default		ssd_pkt_reason_default
982 #define	sd_reset_target			ssd_reset_target
983 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
984 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
985 #define	sd_taskq_create			ssd_taskq_create
986 #define	sd_taskq_delete			ssd_taskq_delete
987 #define	sd_media_change_task		ssd_media_change_task
988 #define	sd_handle_mchange		ssd_handle_mchange
989 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
990 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
991 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
992 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
993 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
994 					sd_send_scsi_feature_GET_CONFIGURATION
995 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
996 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
997 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
998 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
999 					ssd_send_scsi_PERSISTENT_RESERVE_IN
1000 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
1001 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
1002 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
1003 #define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
1004 					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
1005 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
1006 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
1007 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
1008 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
1009 #define	sd_alloc_rqs			ssd_alloc_rqs
1010 #define	sd_free_rqs			ssd_free_rqs
1011 #define	sd_dump_memory			ssd_dump_memory
1012 #define	sd_uscsi_ioctl			ssd_uscsi_ioctl
1013 #define	sd_get_media_info		ssd_get_media_info
1014 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
1015 #define	sd_dkio_get_geometry		ssd_dkio_get_geometry
1016 #define	sd_dkio_set_geometry		ssd_dkio_set_geometry
1017 #define	sd_dkio_get_partition		ssd_dkio_get_partition
1018 #define	sd_dkio_set_partition		ssd_dkio_set_partition
1019 #define	sd_dkio_partition		ssd_dkio_partition
1020 #define	sd_dkio_get_vtoc		ssd_dkio_get_vtoc
1021 #define	sd_dkio_get_efi			ssd_dkio_get_efi
1022 #define	sd_build_user_vtoc		ssd_build_user_vtoc
1023 #define	sd_dkio_set_vtoc		ssd_dkio_set_vtoc
1024 #define	sd_dkio_set_efi			ssd_dkio_set_efi
1025 #define	sd_build_label_vtoc		ssd_build_label_vtoc
1026 #define	sd_write_label			ssd_write_label
1027 #define	sd_clear_vtoc			ssd_clear_vtoc
1028 #define	sd_clear_efi			ssd_clear_efi
1029 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
1030 #define	sd_setup_next_xfer		ssd_setup_next_xfer
1031 #define	sd_dkio_get_temp		ssd_dkio_get_temp
1032 #define	sd_dkio_get_mboot		ssd_dkio_get_mboot
1033 #define	sd_dkio_set_mboot		ssd_dkio_set_mboot
1034 #define	sd_setup_default_geometry	ssd_setup_default_geometry
1035 #define	sd_update_fdisk_and_vtoc	ssd_update_fdisk_and_vtoc
1036 #define	sd_check_mhd			ssd_check_mhd
1037 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
1038 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1039 #define	sd_sname			ssd_sname
1040 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1041 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1042 #define	sd_take_ownership		ssd_take_ownership
1043 #define	sd_reserve_release		ssd_reserve_release
1044 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1045 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1046 #define	sd_persistent_reservation_in_read_keys	\
1047 					ssd_persistent_reservation_in_read_keys
1048 #define	sd_persistent_reservation_in_read_resv	\
1049 					ssd_persistent_reservation_in_read_resv
1050 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1051 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1052 #define	sd_mhdioc_release		ssd_mhdioc_release
1053 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1054 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1055 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1056 #define	sr_change_blkmode		ssr_change_blkmode
1057 #define	sr_change_speed			ssr_change_speed
1058 #define	sr_atapi_change_speed		ssr_atapi_change_speed
1059 #define	sr_pause_resume			ssr_pause_resume
1060 #define	sr_play_msf			ssr_play_msf
1061 #define	sr_play_trkind			ssr_play_trkind
1062 #define	sr_read_all_subcodes		ssr_read_all_subcodes
1063 #define	sr_read_subchannel		ssr_read_subchannel
1064 #define	sr_read_tocentry		ssr_read_tocentry
1065 #define	sr_read_tochdr			ssr_read_tochdr
1066 #define	sr_read_cdda			ssr_read_cdda
1067 #define	sr_read_cdxa			ssr_read_cdxa
1068 #define	sr_read_mode1			ssr_read_mode1
1069 #define	sr_read_mode2			ssr_read_mode2
1070 #define	sr_read_cd_mode2		ssr_read_cd_mode2
1071 #define	sr_sector_mode			ssr_sector_mode
1072 #define	sr_eject			ssr_eject
1073 #define	sr_ejected			ssr_ejected
1074 #define	sr_check_wp			ssr_check_wp
1075 #define	sd_check_media			ssd_check_media
1076 #define	sd_media_watch_cb		ssd_media_watch_cb
1077 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1078 #define	sr_volume_ctrl			ssr_volume_ctrl
1079 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1080 #define	sd_log_page_supported		ssd_log_page_supported
1081 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1082 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1083 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1084 #define	sd_range_lock			ssd_range_lock
1085 #define	sd_get_range			ssd_get_range
1086 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1087 #define	sd_range_unlock			ssd_range_unlock
1088 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1089 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1090 
1091 #define	sd_iostart_chain		ssd_iostart_chain
1092 #define	sd_iodone_chain			ssd_iodone_chain
1093 #define	sd_initpkt_map			ssd_initpkt_map
1094 #define	sd_destroypkt_map		ssd_destroypkt_map
1095 #define	sd_chain_type_map		ssd_chain_type_map
1096 #define	sd_chain_index_map		ssd_chain_index_map
1097 
1098 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1099 #define	sd_failfast_flushq		ssd_failfast_flushq
1100 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1101 
1102 #define	sd_is_lsi			ssd_is_lsi
1103 
1104 #endif	/* #if (defined(__fibre)) */
1105 
1106 
1107 int _init(void);
1108 int _fini(void);
1109 int _info(struct modinfo *modinfop);
1110 
1111 /*PRINTFLIKE3*/
1112 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1113 /*PRINTFLIKE3*/
1114 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1115 /*PRINTFLIKE3*/
1116 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1117 
1118 static int sdprobe(dev_info_t *devi);
1119 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1120     void **result);
1121 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1122     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1123 
1124 /*
1125  * Smart probe for parallel scsi
1126  */
1127 static void sd_scsi_probe_cache_init(void);
1128 static void sd_scsi_probe_cache_fini(void);
1129 static void sd_scsi_clear_probe_cache(void);
1130 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1131 
1132 static int	sd_spin_up_unit(struct sd_lun *un);
1133 #ifdef _LP64
1134 static void	sd_enable_descr_sense(struct sd_lun *un);
1135 static void	sd_reenable_dsense_task(void *arg);
1136 #endif /* _LP64 */
1137 
1138 static void	sd_set_mmc_caps(struct sd_lun *un);
1139 
1140 static void sd_read_unit_properties(struct sd_lun *un);
1141 static int  sd_process_sdconf_file(struct sd_lun *un);
1142 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1143     int *data_list, sd_tunables *values);
1144 static void sd_process_sdconf_table(struct sd_lun *un);
1145 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1146 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1147 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1148 	int list_len, char *dataname_ptr);
1149 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1150     sd_tunables *prop_list);
1151 static int  sd_validate_geometry(struct sd_lun *un, int path_flag);
1152 
1153 #if defined(_SUNOS_VTOC_16)
1154 static void sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g);
1155 #endif
1156 
1157 static void sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
1158 	int path_flag);
1159 static int  sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize,
1160 	int path_flag);
1161 static void sd_get_physical_geometry(struct sd_lun *un,
1162 	struct geom_cache *pgeom_p, int capacity, int lbasize, int path_flag);
1163 static void sd_get_virtual_geometry(struct sd_lun *un, int capacity,
1164 	int lbasize);
1165 static int  sd_uselabel(struct sd_lun *un, struct dk_label *l, int path_flag);
1166 static void sd_swap_efi_gpt(efi_gpt_t *);
1167 static void sd_swap_efi_gpe(int nparts, efi_gpe_t *);
1168 static int sd_validate_efi(efi_gpt_t *);
1169 static int sd_use_efi(struct sd_lun *, int);
1170 static void sd_build_default_label(struct sd_lun *un);
1171 
1172 #if defined(_FIRMWARE_NEEDS_FDISK)
1173 static int  sd_has_max_chs_vals(struct ipart *fdp);
1174 #endif
1175 static void sd_inq_fill(char *p, int l, char *s);
1176 
1177 
1178 static void sd_register_devid(struct sd_lun *un, dev_info_t *devi,
1179     int reservation_flag);
1180 static daddr_t  sd_get_devid_block(struct sd_lun *un);
1181 static int  sd_get_devid(struct sd_lun *un);
1182 static int  sd_get_serialnum(struct sd_lun *un, uchar_t *wwn, int *len);
1183 static ddi_devid_t sd_create_devid(struct sd_lun *un);
1184 static int  sd_write_deviceid(struct sd_lun *un);
1185 static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1186 static int  sd_check_vpd_page_support(struct sd_lun *un);
1187 
1188 static void sd_setup_pm(struct sd_lun *un, dev_info_t *devi);
1189 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1190 
1191 static int  sd_ddi_suspend(dev_info_t *devi);
1192 static int  sd_ddi_pm_suspend(struct sd_lun *un);
1193 static int  sd_ddi_resume(dev_info_t *devi);
1194 static int  sd_ddi_pm_resume(struct sd_lun *un);
1195 static int  sdpower(dev_info_t *devi, int component, int level);
1196 
1197 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1198 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1199 static int  sd_unit_attach(dev_info_t *devi);
1200 static int  sd_unit_detach(dev_info_t *devi);
1201 
1202 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1203 static int  sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi);
1204 static void sd_create_errstats(struct sd_lun *un, int instance);
1205 static void sd_set_errstats(struct sd_lun *un);
1206 static void sd_set_pstats(struct sd_lun *un);
1207 
1208 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1209 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1210 static int  sd_send_polled_RQS(struct sd_lun *un);
1211 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1212 
1213 #if (defined(__fibre))
1214 /*
1215  * Event callbacks (photon)
1216  */
1217 static void sd_init_event_callbacks(struct sd_lun *un);
1218 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1219 #endif
1220 
1221 /*
1222  * Defines for sd_cache_control
1223  */
1224 
1225 #define	SD_CACHE_ENABLE		1
1226 #define	SD_CACHE_DISABLE	0
1227 #define	SD_CACHE_NOCHANGE	-1
1228 
1229 static int   sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag);
1230 static int   sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled);
1231 static dev_t sd_make_device(dev_info_t *devi);
1232 
1233 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1234 	uint64_t capacity);
1235 
1236 /*
1237  * Driver entry point functions.
1238  */
1239 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1240 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1241 static int  sd_ready_and_valid(struct sd_lun *un);
1242 
1243 static void sdmin(struct buf *bp);
1244 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1245 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1246 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1247 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1248 
1249 static int sdstrategy(struct buf *bp);
1250 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1251 
1252 /*
1253  * Function prototypes for layering functions in the iostart chain.
1254  */
1255 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1256 	struct buf *bp);
1257 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1258 	struct buf *bp);
1259 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1260 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1261 	struct buf *bp);
1262 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1263 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1264 
1265 /*
1266  * Function prototypes for layering functions in the iodone chain.
1267  */
1268 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1269 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1270 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1271 	struct buf *bp);
1272 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1273 	struct buf *bp);
1274 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1275 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1276 	struct buf *bp);
1277 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1278 
1279 /*
1280  * Prototypes for functions to support buf(9S) based IO.
1281  */
1282 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1283 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1284 static void sd_destroypkt_for_buf(struct buf *);
1285 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1286 	struct buf *bp, int flags,
1287 	int (*callback)(caddr_t), caddr_t callback_arg,
1288 	diskaddr_t lba, uint32_t blockcount);
1289 #if defined(__i386) || defined(__amd64)
1290 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1291 	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1292 #endif /* defined(__i386) || defined(__amd64) */
1293 
1294 /*
1295  * Prototypes for functions to support USCSI IO.
1296  */
1297 static int sd_uscsi_strategy(struct buf *bp);
1298 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1299 static void sd_destroypkt_for_uscsi(struct buf *);
1300 
1301 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1302 	uchar_t chain_type, void *pktinfop);
1303 
1304 static int  sd_pm_entry(struct sd_lun *un);
1305 static void sd_pm_exit(struct sd_lun *un);
1306 
1307 static void sd_pm_idletimeout_handler(void *arg);
1308 
1309 /*
1310  * sd_core internal functions (used at the sd_core_io layer).
1311  */
1312 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1313 static void sdintr(struct scsi_pkt *pktp);
1314 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1315 
1316 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
1317 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
1318 	int path_flag);
1319 
1320 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1321 	daddr_t blkno, int (*func)(struct buf *));
1322 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1323 	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1324 static void sd_bioclone_free(struct buf *bp);
1325 static void sd_shadow_buf_free(struct buf *bp);
1326 
1327 static void sd_print_transport_rejected_message(struct sd_lun *un,
1328 	struct sd_xbuf *xp, int code);
1329 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1330     void *arg, int code);
1331 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1332     void *arg, int code);
1333 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1334     void *arg, int code);
1335 
1336 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1337 	int retry_check_flag,
1338 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1339 		int c),
1340 	void *user_arg, int failure_code,  clock_t retry_delay,
1341 	void (*statp)(kstat_io_t *));
1342 
1343 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1344 	clock_t retry_delay, void (*statp)(kstat_io_t *));
1345 
1346 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1347 	struct scsi_pkt *pktp);
1348 static void sd_start_retry_command(void *arg);
1349 static void sd_start_direct_priority_command(void *arg);
1350 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1351 	int errcode);
1352 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1353 	struct buf *bp, int errcode);
1354 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1355 static void sd_sync_with_callback(struct sd_lun *un);
1356 static int sdrunout(caddr_t arg);
1357 
1358 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1359 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1360 
1361 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1362 static void sd_restore_throttle(void *arg);
1363 
1364 static void sd_init_cdb_limits(struct sd_lun *un);
1365 
1366 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1367 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1368 
1369 /*
1370  * Error handling functions
1371  */
1372 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1373 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1374 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1375 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1376 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1377 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1378 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1379 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1380 
1381 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1382 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1383 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1384 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1385 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1386 	struct sd_xbuf *xp);
1387 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1388 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1389 
1390 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1391 	void *arg, int code);
1392 
1393 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1394 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1395 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1396 	uint8_t *sense_datap,
1397 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1398 static void sd_sense_key_not_ready(struct sd_lun *un,
1399 	uint8_t *sense_datap,
1400 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1401 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1402 	uint8_t *sense_datap,
1403 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1404 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1405 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1406 static void sd_sense_key_unit_attention(struct sd_lun *un,
1407 	uint8_t *sense_datap,
1408 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1409 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1410 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1411 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1412 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1413 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1414 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1415 static void sd_sense_key_default(struct sd_lun *un,
1416 	uint8_t *sense_datap,
1417 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1418 
1419 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1420 	void *arg, int flag);
1421 
1422 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1423 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1424 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1425 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1426 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1427 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1428 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1429 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1430 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1431 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1432 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1433 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1434 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1435 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1436 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1437 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1438 
1439 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1440 
1441 static void sd_start_stop_unit_callback(void *arg);
1442 static void sd_start_stop_unit_task(void *arg);
1443 
1444 static void sd_taskq_create(void);
1445 static void sd_taskq_delete(void);
1446 static void sd_media_change_task(void *arg);
1447 
1448 static int sd_handle_mchange(struct sd_lun *un);
1449 static int sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag);
1450 static int sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp,
1451 	uint32_t *lbap, int path_flag);
1452 static int sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
1453 	uint32_t *lbap, int path_flag);
1454 static int sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag,
1455 	int path_flag);
1456 static int sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr,
1457 	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1458 static int sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag);
1459 static int sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un,
1460 	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1461 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un,
1462 	uchar_t usr_cmd, uchar_t *usr_bufp);
1463 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1464 	struct dk_callback *dkc);
1465 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1466 static int sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un,
1467 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1468 	uchar_t *bufaddr, uint_t buflen);
1469 static int sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
1470 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1471 	uchar_t *bufaddr, uint_t buflen, char feature);
1472 static int sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize,
1473 	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1474 static int sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize,
1475 	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1476 static int sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
1477 	size_t buflen, daddr_t start_block, int path_flag);
1478 #define	sd_send_scsi_READ(un, bufaddr, buflen, start_block, path_flag)	\
1479 	sd_send_scsi_RDWR(un, SCMD_READ, bufaddr, buflen, start_block, \
1480 	path_flag)
1481 #define	sd_send_scsi_WRITE(un, bufaddr, buflen, start_block, path_flag)	\
1482 	sd_send_scsi_RDWR(un, SCMD_WRITE, bufaddr, buflen, start_block,\
1483 	path_flag)
1484 
1485 static int sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr,
1486 	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1487 	uint16_t param_ptr, int path_flag);
1488 
1489 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1490 static void sd_free_rqs(struct sd_lun *un);
1491 
1492 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1493 	uchar_t *data, int len, int fmt);
1494 static void sd_panic_for_res_conflict(struct sd_lun *un);
1495 
1496 /*
1497  * Disk Ioctl Function Prototypes
1498  */
1499 static int sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag);
1500 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1501 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1502 static int sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag,
1503 	int geom_validated);
1504 static int sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag);
1505 static int sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag,
1506 	int geom_validated);
1507 static int sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag);
1508 static int sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag,
1509 	int geom_validated);
1510 static int sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag);
1511 static int sd_dkio_partition(dev_t dev, caddr_t arg, int flag);
1512 static void sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1513 static int sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag);
1514 static int sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag);
1515 static int sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1516 static int sd_write_label(dev_t dev);
1517 static int sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl);
1518 static void sd_clear_vtoc(struct sd_lun *un);
1519 static void sd_clear_efi(struct sd_lun *un);
1520 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1521 static int sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag);
1522 static int sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag);
1523 static void sd_setup_default_geometry(struct sd_lun *un);
1524 #if defined(__i386) || defined(__amd64)
1525 static int sd_update_fdisk_and_vtoc(struct sd_lun *un);
1526 #endif
1527 
1528 /*
1529  * Multi-host Ioctl Prototypes
1530  */
1531 static int sd_check_mhd(dev_t dev, int interval);
1532 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1533 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1534 static char *sd_sname(uchar_t status);
1535 static void sd_mhd_resvd_recover(void *arg);
1536 static void sd_resv_reclaim_thread();
1537 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1538 static int sd_reserve_release(dev_t dev, int cmd);
1539 static void sd_rmv_resv_reclaim_req(dev_t dev);
1540 static void sd_mhd_reset_notify_cb(caddr_t arg);
1541 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1542 	mhioc_inkeys_t *usrp, int flag);
1543 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1544 	mhioc_inresvs_t *usrp, int flag);
1545 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1546 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1547 static int sd_mhdioc_release(dev_t dev);
1548 static int sd_mhdioc_register_devid(dev_t dev);
1549 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1550 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1551 
1552 /*
1553  * SCSI removable prototypes
1554  */
1555 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1556 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1557 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1558 static int sr_pause_resume(dev_t dev, int mode);
1559 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1560 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1561 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1562 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1563 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1564 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1565 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1566 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1567 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1568 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1569 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1570 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1571 static int sr_eject(dev_t dev);
1572 static void sr_ejected(register struct sd_lun *un);
1573 static int sr_check_wp(dev_t dev);
1574 static int sd_check_media(dev_t dev, enum dkio_state state);
1575 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1576 static void sd_delayed_cv_broadcast(void *arg);
1577 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1578 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1579 
1580 static int sd_log_page_supported(struct sd_lun *un, int log_page);
1581 
1582 /*
1583  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1584  */
1585 static void sd_check_for_writable_cd(struct sd_lun *un);
1586 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1587 static void sd_wm_cache_destructor(void *wm, void *un);
1588 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1589 	daddr_t endb, ushort_t typ);
1590 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1591 	daddr_t endb);
1592 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1593 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1594 static void sd_read_modify_write_task(void * arg);
1595 static int
1596 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1597 	struct buf **bpp);
1598 
1599 
1600 /*
1601  * Function prototypes for failfast support.
1602  */
1603 static void sd_failfast_flushq(struct sd_lun *un);
1604 static int sd_failfast_flushq_callback(struct buf *bp);
1605 
1606 /*
1607  * Function prototypes to check for lsi devices
1608  */
1609 static void sd_is_lsi(struct sd_lun *un);
1610 
1611 /*
1612  * Function prototypes for x86 support
1613  */
1614 #if defined(__i386) || defined(__amd64)
1615 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1616 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1617 #endif
1618 
1619 /*
1620  * Constants for failfast support:
1621  *
1622  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1623  * failfast processing being performed.
1624  *
1625  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1626  * failfast processing on all bufs with B_FAILFAST set.
1627  */
1628 
1629 #define	SD_FAILFAST_INACTIVE		0
1630 #define	SD_FAILFAST_ACTIVE		1
1631 
1632 /*
1633  * Bitmask to control behavior of buf(9S) flushes when a transition to
1634  * the failfast state occurs. Optional bits include:
1635  *
1636  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1637  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1638  * be flushed.
1639  *
1640  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1641  * driver, in addition to the regular wait queue. This includes the xbuf
1642  * queues. When clear, only the driver's wait queue will be flushed.
1643  */
1644 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1645 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1646 
1647 /*
1648  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1649  * to flush all queues within the driver.
1650  */
1651 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1652 
1653 
1654 /*
1655  * SD Testing Fault Injection
1656  */
1657 #ifdef SD_FAULT_INJECTION
1658 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1659 static void sd_faultinjection(struct scsi_pkt *pktp);
1660 static void sd_injection_log(char *buf, struct sd_lun *un);
1661 #endif
1662 
1663 /*
1664  * Device driver ops vector
1665  */
1666 static struct cb_ops sd_cb_ops = {
1667 	sdopen,			/* open */
1668 	sdclose,		/* close */
1669 	sdstrategy,		/* strategy */
1670 	nodev,			/* print */
1671 	sddump,			/* dump */
1672 	sdread,			/* read */
1673 	sdwrite,		/* write */
1674 	sdioctl,		/* ioctl */
1675 	nodev,			/* devmap */
1676 	nodev,			/* mmap */
1677 	nodev,			/* segmap */
1678 	nochpoll,		/* poll */
1679 	sd_prop_op,		/* cb_prop_op */
1680 	0,			/* streamtab  */
1681 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1682 	CB_REV,			/* cb_rev */
1683 	sdaread, 		/* async I/O read entry point */
1684 	sdawrite		/* async I/O write entry point */
1685 };
1686 
1687 static struct dev_ops sd_ops = {
1688 	DEVO_REV,		/* devo_rev, */
1689 	0,			/* refcnt  */
1690 	sdinfo,			/* info */
1691 	nulldev,		/* identify */
1692 	sdprobe,		/* probe */
1693 	sdattach,		/* attach */
1694 	sddetach,		/* detach */
1695 	nodev,			/* reset */
1696 	&sd_cb_ops,		/* driver operations */
1697 	NULL,			/* bus operations */
1698 	sdpower			/* power */
1699 };
1700 
1701 
1702 /*
1703  * This is the loadable module wrapper.
1704  */
1705 #include <sys/modctl.h>
1706 
1707 static struct modldrv modldrv = {
1708 	&mod_driverops,		/* Type of module. This one is a driver */
1709 	SD_MODULE_NAME,		/* Module name. */
1710 	&sd_ops			/* driver ops */
1711 };
1712 
1713 
1714 static struct modlinkage modlinkage = {
1715 	MODREV_1,
1716 	&modldrv,
1717 	NULL
1718 };
1719 
1720 
1721 static struct scsi_asq_key_strings sd_additional_codes[] = {
1722 	0x81, 0, "Logical Unit is Reserved",
1723 	0x85, 0, "Audio Address Not Valid",
1724 	0xb6, 0, "Media Load Mechanism Failed",
1725 	0xB9, 0, "Audio Play Operation Aborted",
1726 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1727 	0x53, 2, "Medium removal prevented",
1728 	0x6f, 0, "Authentication failed during key exchange",
1729 	0x6f, 1, "Key not present",
1730 	0x6f, 2, "Key not established",
1731 	0x6f, 3, "Read without proper authentication",
1732 	0x6f, 4, "Mismatched region to this logical unit",
1733 	0x6f, 5, "Region reset count error",
1734 	0xffff, 0x0, NULL
1735 };
1736 
1737 
1738 /*
1739  * Struct for passing printing information for sense data messages
1740  */
1741 struct sd_sense_info {
1742 	int	ssi_severity;
1743 	int	ssi_pfa_flag;
1744 };
1745 
1746 /*
1747  * Table of function pointers for iostart-side routines. Seperate "chains"
1748  * of layered function calls are formed by placing the function pointers
1749  * sequentially in the desired order. Functions are called according to an
1750  * incrementing table index ordering. The last function in each chain must
1751  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1752  * in the sd_iodone_chain[] array.
1753  *
1754  * Note: It may seem more natural to organize both the iostart and iodone
1755  * functions together, into an array of structures (or some similar
1756  * organization) with a common index, rather than two seperate arrays which
1757  * must be maintained in synchronization. The purpose of this division is
1758  * to achiece improved performance: individual arrays allows for more
1759  * effective cache line utilization on certain platforms.
1760  */
1761 
1762 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1763 
1764 
1765 static sd_chain_t sd_iostart_chain[] = {
1766 
1767 	/* Chain for buf IO for disk drive targets (PM enabled) */
1768 	sd_mapblockaddr_iostart,	/* Index: 0 */
1769 	sd_pm_iostart,			/* Index: 1 */
1770 	sd_core_iostart,		/* Index: 2 */
1771 
1772 	/* Chain for buf IO for disk drive targets (PM disabled) */
1773 	sd_mapblockaddr_iostart,	/* Index: 3 */
1774 	sd_core_iostart,		/* Index: 4 */
1775 
1776 	/* Chain for buf IO for removable-media targets (PM enabled) */
1777 	sd_mapblockaddr_iostart,	/* Index: 5 */
1778 	sd_mapblocksize_iostart,	/* Index: 6 */
1779 	sd_pm_iostart,			/* Index: 7 */
1780 	sd_core_iostart,		/* Index: 8 */
1781 
1782 	/* Chain for buf IO for removable-media targets (PM disabled) */
1783 	sd_mapblockaddr_iostart,	/* Index: 9 */
1784 	sd_mapblocksize_iostart,	/* Index: 10 */
1785 	sd_core_iostart,		/* Index: 11 */
1786 
1787 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1788 	sd_mapblockaddr_iostart,	/* Index: 12 */
1789 	sd_checksum_iostart,		/* Index: 13 */
1790 	sd_pm_iostart,			/* Index: 14 */
1791 	sd_core_iostart,		/* Index: 15 */
1792 
1793 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1794 	sd_mapblockaddr_iostart,	/* Index: 16 */
1795 	sd_checksum_iostart,		/* Index: 17 */
1796 	sd_core_iostart,		/* Index: 18 */
1797 
1798 	/* Chain for USCSI commands (all targets) */
1799 	sd_pm_iostart,			/* Index: 19 */
1800 	sd_core_iostart,		/* Index: 20 */
1801 
1802 	/* Chain for checksumming USCSI commands (all targets) */
1803 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1804 	sd_pm_iostart,			/* Index: 22 */
1805 	sd_core_iostart,		/* Index: 23 */
1806 
1807 	/* Chain for "direct" USCSI commands (all targets) */
1808 	sd_core_iostart,		/* Index: 24 */
1809 
1810 	/* Chain for "direct priority" USCSI commands (all targets) */
1811 	sd_core_iostart,		/* Index: 25 */
1812 };
1813 
1814 /*
1815  * Macros to locate the first function of each iostart chain in the
1816  * sd_iostart_chain[] array. These are located by the index in the array.
1817  */
1818 #define	SD_CHAIN_DISK_IOSTART			0
1819 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1820 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1821 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1822 #define	SD_CHAIN_CHKSUM_IOSTART			12
1823 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1824 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1825 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1826 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1827 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1828 
1829 
1830 /*
1831  * Table of function pointers for the iodone-side routines for the driver-
1832  * internal layering mechanism.  The calling sequence for iodone routines
1833  * uses a decrementing table index, so the last routine called in a chain
1834  * must be at the lowest array index location for that chain.  The last
1835  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1836  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1837  * of the functions in an iodone side chain must correspond to the ordering
1838  * of the iostart routines for that chain.  Note that there is no iodone
1839  * side routine that corresponds to sd_core_iostart(), so there is no
1840  * entry in the table for this.
1841  */
1842 
1843 static sd_chain_t sd_iodone_chain[] = {
1844 
1845 	/* Chain for buf IO for disk drive targets (PM enabled) */
1846 	sd_buf_iodone,			/* Index: 0 */
1847 	sd_mapblockaddr_iodone,		/* Index: 1 */
1848 	sd_pm_iodone,			/* Index: 2 */
1849 
1850 	/* Chain for buf IO for disk drive targets (PM disabled) */
1851 	sd_buf_iodone,			/* Index: 3 */
1852 	sd_mapblockaddr_iodone,		/* Index: 4 */
1853 
1854 	/* Chain for buf IO for removable-media targets (PM enabled) */
1855 	sd_buf_iodone,			/* Index: 5 */
1856 	sd_mapblockaddr_iodone,		/* Index: 6 */
1857 	sd_mapblocksize_iodone,		/* Index: 7 */
1858 	sd_pm_iodone,			/* Index: 8 */
1859 
1860 	/* Chain for buf IO for removable-media targets (PM disabled) */
1861 	sd_buf_iodone,			/* Index: 9 */
1862 	sd_mapblockaddr_iodone,		/* Index: 10 */
1863 	sd_mapblocksize_iodone,		/* Index: 11 */
1864 
1865 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1866 	sd_buf_iodone,			/* Index: 12 */
1867 	sd_mapblockaddr_iodone,		/* Index: 13 */
1868 	sd_checksum_iodone,		/* Index: 14 */
1869 	sd_pm_iodone,			/* Index: 15 */
1870 
1871 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1872 	sd_buf_iodone,			/* Index: 16 */
1873 	sd_mapblockaddr_iodone,		/* Index: 17 */
1874 	sd_checksum_iodone,		/* Index: 18 */
1875 
1876 	/* Chain for USCSI commands (non-checksum targets) */
1877 	sd_uscsi_iodone,		/* Index: 19 */
1878 	sd_pm_iodone,			/* Index: 20 */
1879 
1880 	/* Chain for USCSI commands (checksum targets) */
1881 	sd_uscsi_iodone,		/* Index: 21 */
1882 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1883 	sd_pm_iodone,			/* Index: 22 */
1884 
1885 	/* Chain for "direct" USCSI commands (all targets) */
1886 	sd_uscsi_iodone,		/* Index: 24 */
1887 
1888 	/* Chain for "direct priority" USCSI commands (all targets) */
1889 	sd_uscsi_iodone,		/* Index: 25 */
1890 };
1891 
1892 
1893 /*
1894  * Macros to locate the "first" function in the sd_iodone_chain[] array for
1895  * each iodone-side chain. These are located by the array index, but as the
1896  * iodone side functions are called in a decrementing-index order, the
1897  * highest index number in each chain must be specified (as these correspond
1898  * to the first function in the iodone chain that will be called by the core
1899  * at IO completion time).
1900  */
1901 
1902 #define	SD_CHAIN_DISK_IODONE			2
1903 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
1904 #define	SD_CHAIN_RMMEDIA_IODONE			8
1905 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1906 #define	SD_CHAIN_CHKSUM_IODONE			15
1907 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1908 #define	SD_CHAIN_USCSI_CMD_IODONE		20
1909 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1910 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
1911 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1912 
1913 
1914 
1915 
1916 /*
1917  * Array to map a layering chain index to the appropriate initpkt routine.
1918  * The redundant entries are present so that the index used for accessing
1919  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1920  * with this table as well.
1921  */
1922 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1923 
1924 static sd_initpkt_t	sd_initpkt_map[] = {
1925 
1926 	/* Chain for buf IO for disk drive targets (PM enabled) */
1927 	sd_initpkt_for_buf,		/* Index: 0 */
1928 	sd_initpkt_for_buf,		/* Index: 1 */
1929 	sd_initpkt_for_buf,		/* Index: 2 */
1930 
1931 	/* Chain for buf IO for disk drive targets (PM disabled) */
1932 	sd_initpkt_for_buf,		/* Index: 3 */
1933 	sd_initpkt_for_buf,		/* Index: 4 */
1934 
1935 	/* Chain for buf IO for removable-media targets (PM enabled) */
1936 	sd_initpkt_for_buf,		/* Index: 5 */
1937 	sd_initpkt_for_buf,		/* Index: 6 */
1938 	sd_initpkt_for_buf,		/* Index: 7 */
1939 	sd_initpkt_for_buf,		/* Index: 8 */
1940 
1941 	/* Chain for buf IO for removable-media targets (PM disabled) */
1942 	sd_initpkt_for_buf,		/* Index: 9 */
1943 	sd_initpkt_for_buf,		/* Index: 10 */
1944 	sd_initpkt_for_buf,		/* Index: 11 */
1945 
1946 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1947 	sd_initpkt_for_buf,		/* Index: 12 */
1948 	sd_initpkt_for_buf,		/* Index: 13 */
1949 	sd_initpkt_for_buf,		/* Index: 14 */
1950 	sd_initpkt_for_buf,		/* Index: 15 */
1951 
1952 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1953 	sd_initpkt_for_buf,		/* Index: 16 */
1954 	sd_initpkt_for_buf,		/* Index: 17 */
1955 	sd_initpkt_for_buf,		/* Index: 18 */
1956 
1957 	/* Chain for USCSI commands (non-checksum targets) */
1958 	sd_initpkt_for_uscsi,		/* Index: 19 */
1959 	sd_initpkt_for_uscsi,		/* Index: 20 */
1960 
1961 	/* Chain for USCSI commands (checksum targets) */
1962 	sd_initpkt_for_uscsi,		/* Index: 21 */
1963 	sd_initpkt_for_uscsi,		/* Index: 22 */
1964 	sd_initpkt_for_uscsi,		/* Index: 22 */
1965 
1966 	/* Chain for "direct" USCSI commands (all targets) */
1967 	sd_initpkt_for_uscsi,		/* Index: 24 */
1968 
1969 	/* Chain for "direct priority" USCSI commands (all targets) */
1970 	sd_initpkt_for_uscsi,		/* Index: 25 */
1971 
1972 };
1973 
1974 
1975 /*
1976  * Array to map a layering chain index to the appropriate destroypktpkt routine.
1977  * The redundant entries are present so that the index used for accessing
1978  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1979  * with this table as well.
1980  */
1981 typedef void (*sd_destroypkt_t)(struct buf *);
1982 
1983 static sd_destroypkt_t	sd_destroypkt_map[] = {
1984 
1985 	/* Chain for buf IO for disk drive targets (PM enabled) */
1986 	sd_destroypkt_for_buf,		/* Index: 0 */
1987 	sd_destroypkt_for_buf,		/* Index: 1 */
1988 	sd_destroypkt_for_buf,		/* Index: 2 */
1989 
1990 	/* Chain for buf IO for disk drive targets (PM disabled) */
1991 	sd_destroypkt_for_buf,		/* Index: 3 */
1992 	sd_destroypkt_for_buf,		/* Index: 4 */
1993 
1994 	/* Chain for buf IO for removable-media targets (PM enabled) */
1995 	sd_destroypkt_for_buf,		/* Index: 5 */
1996 	sd_destroypkt_for_buf,		/* Index: 6 */
1997 	sd_destroypkt_for_buf,		/* Index: 7 */
1998 	sd_destroypkt_for_buf,		/* Index: 8 */
1999 
2000 	/* Chain for buf IO for removable-media targets (PM disabled) */
2001 	sd_destroypkt_for_buf,		/* Index: 9 */
2002 	sd_destroypkt_for_buf,		/* Index: 10 */
2003 	sd_destroypkt_for_buf,		/* Index: 11 */
2004 
2005 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2006 	sd_destroypkt_for_buf,		/* Index: 12 */
2007 	sd_destroypkt_for_buf,		/* Index: 13 */
2008 	sd_destroypkt_for_buf,		/* Index: 14 */
2009 	sd_destroypkt_for_buf,		/* Index: 15 */
2010 
2011 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2012 	sd_destroypkt_for_buf,		/* Index: 16 */
2013 	sd_destroypkt_for_buf,		/* Index: 17 */
2014 	sd_destroypkt_for_buf,		/* Index: 18 */
2015 
2016 	/* Chain for USCSI commands (non-checksum targets) */
2017 	sd_destroypkt_for_uscsi,	/* Index: 19 */
2018 	sd_destroypkt_for_uscsi,	/* Index: 20 */
2019 
2020 	/* Chain for USCSI commands (checksum targets) */
2021 	sd_destroypkt_for_uscsi,	/* Index: 21 */
2022 	sd_destroypkt_for_uscsi,	/* Index: 22 */
2023 	sd_destroypkt_for_uscsi,	/* Index: 22 */
2024 
2025 	/* Chain for "direct" USCSI commands (all targets) */
2026 	sd_destroypkt_for_uscsi,	/* Index: 24 */
2027 
2028 	/* Chain for "direct priority" USCSI commands (all targets) */
2029 	sd_destroypkt_for_uscsi,	/* Index: 25 */
2030 
2031 };
2032 
2033 
2034 
2035 /*
2036  * Array to map a layering chain index to the appropriate chain "type".
2037  * The chain type indicates a specific property/usage of the chain.
2038  * The redundant entries are present so that the index used for accessing
2039  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2040  * with this table as well.
2041  */
2042 
2043 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
2044 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
2045 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
2046 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
2047 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
2048 						/* (for error recovery) */
2049 
2050 static int sd_chain_type_map[] = {
2051 
2052 	/* Chain for buf IO for disk drive targets (PM enabled) */
2053 	SD_CHAIN_BUFIO,			/* Index: 0 */
2054 	SD_CHAIN_BUFIO,			/* Index: 1 */
2055 	SD_CHAIN_BUFIO,			/* Index: 2 */
2056 
2057 	/* Chain for buf IO for disk drive targets (PM disabled) */
2058 	SD_CHAIN_BUFIO,			/* Index: 3 */
2059 	SD_CHAIN_BUFIO,			/* Index: 4 */
2060 
2061 	/* Chain for buf IO for removable-media targets (PM enabled) */
2062 	SD_CHAIN_BUFIO,			/* Index: 5 */
2063 	SD_CHAIN_BUFIO,			/* Index: 6 */
2064 	SD_CHAIN_BUFIO,			/* Index: 7 */
2065 	SD_CHAIN_BUFIO,			/* Index: 8 */
2066 
2067 	/* Chain for buf IO for removable-media targets (PM disabled) */
2068 	SD_CHAIN_BUFIO,			/* Index: 9 */
2069 	SD_CHAIN_BUFIO,			/* Index: 10 */
2070 	SD_CHAIN_BUFIO,			/* Index: 11 */
2071 
2072 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2073 	SD_CHAIN_BUFIO,			/* Index: 12 */
2074 	SD_CHAIN_BUFIO,			/* Index: 13 */
2075 	SD_CHAIN_BUFIO,			/* Index: 14 */
2076 	SD_CHAIN_BUFIO,			/* Index: 15 */
2077 
2078 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2079 	SD_CHAIN_BUFIO,			/* Index: 16 */
2080 	SD_CHAIN_BUFIO,			/* Index: 17 */
2081 	SD_CHAIN_BUFIO,			/* Index: 18 */
2082 
2083 	/* Chain for USCSI commands (non-checksum targets) */
2084 	SD_CHAIN_USCSI,			/* Index: 19 */
2085 	SD_CHAIN_USCSI,			/* Index: 20 */
2086 
2087 	/* Chain for USCSI commands (checksum targets) */
2088 	SD_CHAIN_USCSI,			/* Index: 21 */
2089 	SD_CHAIN_USCSI,			/* Index: 22 */
2090 	SD_CHAIN_USCSI,			/* Index: 22 */
2091 
2092 	/* Chain for "direct" USCSI commands (all targets) */
2093 	SD_CHAIN_DIRECT,		/* Index: 24 */
2094 
2095 	/* Chain for "direct priority" USCSI commands (all targets) */
2096 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2097 };
2098 
2099 
2100 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2101 #define	SD_IS_BUFIO(xp)			\
2102 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2103 
2104 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2105 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2106 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2107 
2108 
2109 
2110 /*
2111  * Struct, array, and macros to map a specific chain to the appropriate
2112  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2113  *
2114  * The sd_chain_index_map[] array is used at attach time to set the various
2115  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2116  * chain to be used with the instance. This allows different instances to use
2117  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2118  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2119  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2120  * dynamically & without the use of locking; and (2) a layer may update the
2121  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2122  * to allow for deferred processing of an IO within the same chain from a
2123  * different execution context.
2124  */
2125 
2126 struct sd_chain_index {
2127 	int	sci_iostart_index;
2128 	int	sci_iodone_index;
2129 };
2130 
2131 static struct sd_chain_index	sd_chain_index_map[] = {
2132 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2133 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2134 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2135 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2136 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2137 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2138 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2139 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2140 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2141 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2142 };
2143 
2144 
2145 /*
2146  * The following are indexes into the sd_chain_index_map[] array.
2147  */
2148 
2149 /* un->un_buf_chain_type must be set to one of these */
2150 #define	SD_CHAIN_INFO_DISK		0
2151 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2152 #define	SD_CHAIN_INFO_RMMEDIA		2
2153 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2154 #define	SD_CHAIN_INFO_CHKSUM		4
2155 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2156 
2157 /* un->un_uscsi_chain_type must be set to one of these */
2158 #define	SD_CHAIN_INFO_USCSI_CMD		6
2159 /* USCSI with PM disabled is the same as DIRECT */
2160 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2161 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2162 
2163 /* un->un_direct_chain_type must be set to one of these */
2164 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2165 
2166 /* un->un_priority_chain_type must be set to one of these */
2167 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2168 
2169 /* size for devid inquiries */
2170 #define	MAX_INQUIRY_SIZE		0xF0
2171 
2172 /*
2173  * Macros used by functions to pass a given buf(9S) struct along to the
2174  * next function in the layering chain for further processing.
2175  *
2176  * In the following macros, passing more than three arguments to the called
2177  * routines causes the optimizer for the SPARC compiler to stop doing tail
2178  * call elimination which results in significant performance degradation.
2179  */
2180 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2181 	((*(sd_iostart_chain[index]))(index, un, bp))
2182 
2183 #define	SD_BEGIN_IODONE(index, un, bp)	\
2184 	((*(sd_iodone_chain[index]))(index, un, bp))
2185 
2186 #define	SD_NEXT_IOSTART(index, un, bp)				\
2187 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2188 
2189 #define	SD_NEXT_IODONE(index, un, bp)				\
2190 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2191 
2192 /*
2193  *    Function: _init
2194  *
2195  * Description: This is the driver _init(9E) entry point.
2196  *
2197  * Return Code: Returns the value from mod_install(9F) or
2198  *		ddi_soft_state_init(9F) as appropriate.
2199  *
2200  *     Context: Called when driver module loaded.
2201  */
2202 
2203 int
2204 _init(void)
2205 {
2206 	int	err;
2207 
2208 	/* establish driver name from module name */
2209 	sd_label = mod_modname(&modlinkage);
2210 
2211 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2212 		SD_MAXUNIT);
2213 
2214 	if (err != 0) {
2215 		return (err);
2216 	}
2217 
2218 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2219 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2220 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2221 
2222 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2223 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2224 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2225 
2226 	/*
2227 	 * it's ok to init here even for fibre device
2228 	 */
2229 	sd_scsi_probe_cache_init();
2230 
2231 	/*
2232 	 * Creating taskq before mod_install ensures that all callers (threads)
2233 	 * that enter the module after a successfull mod_install encounter
2234 	 * a valid taskq.
2235 	 */
2236 	sd_taskq_create();
2237 
2238 	err = mod_install(&modlinkage);
2239 	if (err != 0) {
2240 		/* delete taskq if install fails */
2241 		sd_taskq_delete();
2242 
2243 		mutex_destroy(&sd_detach_mutex);
2244 		mutex_destroy(&sd_log_mutex);
2245 		mutex_destroy(&sd_label_mutex);
2246 
2247 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2248 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2249 		cv_destroy(&sd_tr.srq_inprocess_cv);
2250 
2251 		sd_scsi_probe_cache_fini();
2252 
2253 		ddi_soft_state_fini(&sd_state);
2254 		return (err);
2255 	}
2256 
2257 	return (err);
2258 }
2259 
2260 
2261 /*
2262  *    Function: _fini
2263  *
2264  * Description: This is the driver _fini(9E) entry point.
2265  *
2266  * Return Code: Returns the value from mod_remove(9F)
2267  *
2268  *     Context: Called when driver module is unloaded.
2269  */
2270 
2271 int
2272 _fini(void)
2273 {
2274 	int err;
2275 
2276 	if ((err = mod_remove(&modlinkage)) != 0) {
2277 		return (err);
2278 	}
2279 
2280 	sd_taskq_delete();
2281 
2282 	mutex_destroy(&sd_detach_mutex);
2283 	mutex_destroy(&sd_log_mutex);
2284 	mutex_destroy(&sd_label_mutex);
2285 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2286 
2287 	sd_scsi_probe_cache_fini();
2288 
2289 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2290 	cv_destroy(&sd_tr.srq_inprocess_cv);
2291 
2292 	ddi_soft_state_fini(&sd_state);
2293 
2294 	return (err);
2295 }
2296 
2297 
2298 /*
2299  *    Function: _info
2300  *
2301  * Description: This is the driver _info(9E) entry point.
2302  *
2303  *   Arguments: modinfop - pointer to the driver modinfo structure
2304  *
2305  * Return Code: Returns the value from mod_info(9F).
2306  *
2307  *     Context: Kernel thread context
2308  */
2309 
2310 int
2311 _info(struct modinfo *modinfop)
2312 {
2313 	return (mod_info(&modlinkage, modinfop));
2314 }
2315 
2316 
2317 /*
2318  * The following routines implement the driver message logging facility.
2319  * They provide component- and level- based debug output filtering.
2320  * Output may also be restricted to messages for a single instance by
2321  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2322  * to NULL, then messages for all instances are printed.
2323  *
2324  * These routines have been cloned from each other due to the language
2325  * constraints of macros and variable argument list processing.
2326  */
2327 
2328 
2329 /*
2330  *    Function: sd_log_err
2331  *
2332  * Description: This routine is called by the SD_ERROR macro for debug
2333  *		logging of error conditions.
2334  *
2335  *   Arguments: comp - driver component being logged
2336  *		dev  - pointer to driver info structure
2337  *		fmt  - error string and format to be logged
2338  */
2339 
2340 static void
2341 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2342 {
2343 	va_list		ap;
2344 	dev_info_t	*dev;
2345 
2346 	ASSERT(un != NULL);
2347 	dev = SD_DEVINFO(un);
2348 	ASSERT(dev != NULL);
2349 
2350 	/*
2351 	 * Filter messages based on the global component and level masks.
2352 	 * Also print if un matches the value of sd_debug_un, or if
2353 	 * sd_debug_un is set to NULL.
2354 	 */
2355 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2356 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2357 		mutex_enter(&sd_log_mutex);
2358 		va_start(ap, fmt);
2359 		(void) vsprintf(sd_log_buf, fmt, ap);
2360 		va_end(ap);
2361 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2362 		mutex_exit(&sd_log_mutex);
2363 	}
2364 #ifdef SD_FAULT_INJECTION
2365 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2366 	if (un->sd_injection_mask & comp) {
2367 		mutex_enter(&sd_log_mutex);
2368 		va_start(ap, fmt);
2369 		(void) vsprintf(sd_log_buf, fmt, ap);
2370 		va_end(ap);
2371 		sd_injection_log(sd_log_buf, un);
2372 		mutex_exit(&sd_log_mutex);
2373 	}
2374 #endif
2375 }
2376 
2377 
2378 /*
2379  *    Function: sd_log_info
2380  *
2381  * Description: This routine is called by the SD_INFO macro for debug
2382  *		logging of general purpose informational conditions.
2383  *
2384  *   Arguments: comp - driver component being logged
2385  *		dev  - pointer to driver info structure
2386  *		fmt  - info string and format to be logged
2387  */
2388 
2389 static void
2390 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2391 {
2392 	va_list		ap;
2393 	dev_info_t	*dev;
2394 
2395 	ASSERT(un != NULL);
2396 	dev = SD_DEVINFO(un);
2397 	ASSERT(dev != NULL);
2398 
2399 	/*
2400 	 * Filter messages based on the global component and level masks.
2401 	 * Also print if un matches the value of sd_debug_un, or if
2402 	 * sd_debug_un is set to NULL.
2403 	 */
2404 	if ((sd_component_mask & component) &&
2405 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2406 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2407 		mutex_enter(&sd_log_mutex);
2408 		va_start(ap, fmt);
2409 		(void) vsprintf(sd_log_buf, fmt, ap);
2410 		va_end(ap);
2411 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2412 		mutex_exit(&sd_log_mutex);
2413 	}
2414 #ifdef SD_FAULT_INJECTION
2415 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2416 	if (un->sd_injection_mask & component) {
2417 		mutex_enter(&sd_log_mutex);
2418 		va_start(ap, fmt);
2419 		(void) vsprintf(sd_log_buf, fmt, ap);
2420 		va_end(ap);
2421 		sd_injection_log(sd_log_buf, un);
2422 		mutex_exit(&sd_log_mutex);
2423 	}
2424 #endif
2425 }
2426 
2427 
2428 /*
2429  *    Function: sd_log_trace
2430  *
2431  * Description: This routine is called by the SD_TRACE macro for debug
2432  *		logging of trace conditions (i.e. function entry/exit).
2433  *
2434  *   Arguments: comp - driver component being logged
2435  *		dev  - pointer to driver info structure
2436  *		fmt  - trace string and format to be logged
2437  */
2438 
2439 static void
2440 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2441 {
2442 	va_list		ap;
2443 	dev_info_t	*dev;
2444 
2445 	ASSERT(un != NULL);
2446 	dev = SD_DEVINFO(un);
2447 	ASSERT(dev != NULL);
2448 
2449 	/*
2450 	 * Filter messages based on the global component and level masks.
2451 	 * Also print if un matches the value of sd_debug_un, or if
2452 	 * sd_debug_un is set to NULL.
2453 	 */
2454 	if ((sd_component_mask & component) &&
2455 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2456 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2457 		mutex_enter(&sd_log_mutex);
2458 		va_start(ap, fmt);
2459 		(void) vsprintf(sd_log_buf, fmt, ap);
2460 		va_end(ap);
2461 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2462 		mutex_exit(&sd_log_mutex);
2463 	}
2464 #ifdef SD_FAULT_INJECTION
2465 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2466 	if (un->sd_injection_mask & component) {
2467 		mutex_enter(&sd_log_mutex);
2468 		va_start(ap, fmt);
2469 		(void) vsprintf(sd_log_buf, fmt, ap);
2470 		va_end(ap);
2471 		sd_injection_log(sd_log_buf, un);
2472 		mutex_exit(&sd_log_mutex);
2473 	}
2474 #endif
2475 }
2476 
2477 
2478 /*
2479  *    Function: sdprobe
2480  *
2481  * Description: This is the driver probe(9e) entry point function.
2482  *
2483  *   Arguments: devi - opaque device info handle
2484  *
2485  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2486  *              DDI_PROBE_FAILURE: If the probe failed.
2487  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2488  *				   but may be present in the future.
2489  */
2490 
2491 static int
2492 sdprobe(dev_info_t *devi)
2493 {
2494 	struct scsi_device	*devp;
2495 	int			rval;
2496 	int			instance;
2497 
2498 	/*
2499 	 * if it wasn't for pln, sdprobe could actually be nulldev
2500 	 * in the "__fibre" case.
2501 	 */
2502 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2503 		return (DDI_PROBE_DONTCARE);
2504 	}
2505 
2506 	devp = ddi_get_driver_private(devi);
2507 
2508 	if (devp == NULL) {
2509 		/* Ooops... nexus driver is mis-configured... */
2510 		return (DDI_PROBE_FAILURE);
2511 	}
2512 
2513 	instance = ddi_get_instance(devi);
2514 
2515 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2516 		return (DDI_PROBE_PARTIAL);
2517 	}
2518 
2519 	/*
2520 	 * Call the SCSA utility probe routine to see if we actually
2521 	 * have a target at this SCSI nexus.
2522 	 */
2523 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2524 	case SCSIPROBE_EXISTS:
2525 		switch (devp->sd_inq->inq_dtype) {
2526 		case DTYPE_DIRECT:
2527 			rval = DDI_PROBE_SUCCESS;
2528 			break;
2529 		case DTYPE_RODIRECT:
2530 			/* CDs etc. Can be removable media */
2531 			rval = DDI_PROBE_SUCCESS;
2532 			break;
2533 		case DTYPE_OPTICAL:
2534 			/*
2535 			 * Rewritable optical driver HP115AA
2536 			 * Can also be removable media
2537 			 */
2538 
2539 			/*
2540 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2541 			 * pre solaris 9 sparc sd behavior is required
2542 			 *
2543 			 * If first time through and sd_dtype_optical_bind
2544 			 * has not been set in /etc/system check properties
2545 			 */
2546 
2547 			if (sd_dtype_optical_bind  < 0) {
2548 			    sd_dtype_optical_bind = ddi_prop_get_int
2549 				(DDI_DEV_T_ANY,	devi,	0,
2550 				"optical-device-bind",	1);
2551 			}
2552 
2553 			if (sd_dtype_optical_bind == 0) {
2554 				rval = DDI_PROBE_FAILURE;
2555 			} else {
2556 				rval = DDI_PROBE_SUCCESS;
2557 			}
2558 			break;
2559 
2560 		case DTYPE_NOTPRESENT:
2561 		default:
2562 			rval = DDI_PROBE_FAILURE;
2563 			break;
2564 		}
2565 		break;
2566 	default:
2567 		rval = DDI_PROBE_PARTIAL;
2568 		break;
2569 	}
2570 
2571 	/*
2572 	 * This routine checks for resource allocation prior to freeing,
2573 	 * so it will take care of the "smart probing" case where a
2574 	 * scsi_probe() may or may not have been issued and will *not*
2575 	 * free previously-freed resources.
2576 	 */
2577 	scsi_unprobe(devp);
2578 	return (rval);
2579 }
2580 
2581 
2582 /*
2583  *    Function: sdinfo
2584  *
2585  * Description: This is the driver getinfo(9e) entry point function.
2586  * 		Given the device number, return the devinfo pointer from
2587  *		the scsi_device structure or the instance number
2588  *		associated with the dev_t.
2589  *
2590  *   Arguments: dip     - pointer to device info structure
2591  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2592  *			  DDI_INFO_DEVT2INSTANCE)
2593  *		arg     - driver dev_t
2594  *		resultp - user buffer for request response
2595  *
2596  * Return Code: DDI_SUCCESS
2597  *              DDI_FAILURE
2598  */
2599 /* ARGSUSED */
2600 static int
2601 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2602 {
2603 	struct sd_lun	*un;
2604 	dev_t		dev;
2605 	int		instance;
2606 	int		error;
2607 
2608 	switch (infocmd) {
2609 	case DDI_INFO_DEVT2DEVINFO:
2610 		dev = (dev_t)arg;
2611 		instance = SDUNIT(dev);
2612 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2613 			return (DDI_FAILURE);
2614 		}
2615 		*result = (void *) SD_DEVINFO(un);
2616 		error = DDI_SUCCESS;
2617 		break;
2618 	case DDI_INFO_DEVT2INSTANCE:
2619 		dev = (dev_t)arg;
2620 		instance = SDUNIT(dev);
2621 		*result = (void *)(uintptr_t)instance;
2622 		error = DDI_SUCCESS;
2623 		break;
2624 	default:
2625 		error = DDI_FAILURE;
2626 	}
2627 	return (error);
2628 }
2629 
2630 /*
2631  *    Function: sd_prop_op
2632  *
2633  * Description: This is the driver prop_op(9e) entry point function.
2634  *		Return the number of blocks for the partition in question
2635  *		or forward the request to the property facilities.
2636  *
2637  *   Arguments: dev       - device number
2638  *		dip       - pointer to device info structure
2639  *		prop_op   - property operator
2640  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2641  *		name      - pointer to property name
2642  *		valuep    - pointer or address of the user buffer
2643  *		lengthp   - property length
2644  *
2645  * Return Code: DDI_PROP_SUCCESS
2646  *              DDI_PROP_NOT_FOUND
2647  *              DDI_PROP_UNDEFINED
2648  *              DDI_PROP_NO_MEMORY
2649  *              DDI_PROP_BUF_TOO_SMALL
2650  */
2651 
2652 static int
2653 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2654 	char *name, caddr_t valuep, int *lengthp)
2655 {
2656 	int		instance = ddi_get_instance(dip);
2657 	struct sd_lun	*un;
2658 	uint64_t	nblocks64;
2659 
2660 	/*
2661 	 * Our dynamic properties are all device specific and size oriented.
2662 	 * Requests issued under conditions where size is valid are passed
2663 	 * to ddi_prop_op_nblocks with the size information, otherwise the
2664 	 * request is passed to ddi_prop_op. Size depends on valid geometry.
2665 	 */
2666 	un = ddi_get_soft_state(sd_state, instance);
2667 	if ((dev == DDI_DEV_T_ANY) || (un == NULL) ||
2668 	    (un->un_f_geometry_is_valid == FALSE)) {
2669 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2670 		    name, valuep, lengthp));
2671 	} else {
2672 		/* get nblocks value */
2673 		ASSERT(!mutex_owned(SD_MUTEX(un)));
2674 		mutex_enter(SD_MUTEX(un));
2675 		nblocks64 = (ulong_t)un->un_map[SDPART(dev)].dkl_nblk;
2676 		mutex_exit(SD_MUTEX(un));
2677 
2678 		return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags,
2679 		    name, valuep, lengthp, nblocks64));
2680 	}
2681 }
2682 
2683 /*
2684  * The following functions are for smart probing:
2685  * sd_scsi_probe_cache_init()
2686  * sd_scsi_probe_cache_fini()
2687  * sd_scsi_clear_probe_cache()
2688  * sd_scsi_probe_with_cache()
2689  */
2690 
2691 /*
2692  *    Function: sd_scsi_probe_cache_init
2693  *
2694  * Description: Initializes the probe response cache mutex and head pointer.
2695  *
2696  *     Context: Kernel thread context
2697  */
2698 
2699 static void
2700 sd_scsi_probe_cache_init(void)
2701 {
2702 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2703 	sd_scsi_probe_cache_head = NULL;
2704 }
2705 
2706 
2707 /*
2708  *    Function: sd_scsi_probe_cache_fini
2709  *
2710  * Description: Frees all resources associated with the probe response cache.
2711  *
2712  *     Context: Kernel thread context
2713  */
2714 
2715 static void
2716 sd_scsi_probe_cache_fini(void)
2717 {
2718 	struct sd_scsi_probe_cache *cp;
2719 	struct sd_scsi_probe_cache *ncp;
2720 
2721 	/* Clean up our smart probing linked list */
2722 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2723 		ncp = cp->next;
2724 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2725 	}
2726 	sd_scsi_probe_cache_head = NULL;
2727 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2728 }
2729 
2730 
2731 /*
2732  *    Function: sd_scsi_clear_probe_cache
2733  *
2734  * Description: This routine clears the probe response cache. This is
2735  *		done when open() returns ENXIO so that when deferred
2736  *		attach is attempted (possibly after a device has been
2737  *		turned on) we will retry the probe. Since we don't know
2738  *		which target we failed to open, we just clear the
2739  *		entire cache.
2740  *
2741  *     Context: Kernel thread context
2742  */
2743 
2744 static void
2745 sd_scsi_clear_probe_cache(void)
2746 {
2747 	struct sd_scsi_probe_cache	*cp;
2748 	int				i;
2749 
2750 	mutex_enter(&sd_scsi_probe_cache_mutex);
2751 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2752 		/*
2753 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2754 		 * force probing to be performed the next time
2755 		 * sd_scsi_probe_with_cache is called.
2756 		 */
2757 		for (i = 0; i < NTARGETS_WIDE; i++) {
2758 			cp->cache[i] = SCSIPROBE_EXISTS;
2759 		}
2760 	}
2761 	mutex_exit(&sd_scsi_probe_cache_mutex);
2762 }
2763 
2764 
2765 /*
2766  *    Function: sd_scsi_probe_with_cache
2767  *
2768  * Description: This routine implements support for a scsi device probe
2769  *		with cache. The driver maintains a cache of the target
2770  *		responses to scsi probes. If we get no response from a
2771  *		target during a probe inquiry, we remember that, and we
2772  *		avoid additional calls to scsi_probe on non-zero LUNs
2773  *		on the same target until the cache is cleared. By doing
2774  *		so we avoid the 1/4 sec selection timeout for nonzero
2775  *		LUNs. lun0 of a target is always probed.
2776  *
2777  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2778  *              waitfunc - indicates what the allocator routines should
2779  *			   do when resources are not available. This value
2780  *			   is passed on to scsi_probe() when that routine
2781  *			   is called.
2782  *
2783  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2784  *		otherwise the value returned by scsi_probe(9F).
2785  *
2786  *     Context: Kernel thread context
2787  */
2788 
2789 static int
2790 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2791 {
2792 	struct sd_scsi_probe_cache	*cp;
2793 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2794 	int		lun, tgt;
2795 
2796 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2797 	    SCSI_ADDR_PROP_LUN, 0);
2798 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2799 	    SCSI_ADDR_PROP_TARGET, -1);
2800 
2801 	/* Make sure caching enabled and target in range */
2802 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2803 		/* do it the old way (no cache) */
2804 		return (scsi_probe(devp, waitfn));
2805 	}
2806 
2807 	mutex_enter(&sd_scsi_probe_cache_mutex);
2808 
2809 	/* Find the cache for this scsi bus instance */
2810 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2811 		if (cp->pdip == pdip) {
2812 			break;
2813 		}
2814 	}
2815 
2816 	/* If we can't find a cache for this pdip, create one */
2817 	if (cp == NULL) {
2818 		int i;
2819 
2820 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2821 		    KM_SLEEP);
2822 		cp->pdip = pdip;
2823 		cp->next = sd_scsi_probe_cache_head;
2824 		sd_scsi_probe_cache_head = cp;
2825 		for (i = 0; i < NTARGETS_WIDE; i++) {
2826 			cp->cache[i] = SCSIPROBE_EXISTS;
2827 		}
2828 	}
2829 
2830 	mutex_exit(&sd_scsi_probe_cache_mutex);
2831 
2832 	/* Recompute the cache for this target if LUN zero */
2833 	if (lun == 0) {
2834 		cp->cache[tgt] = SCSIPROBE_EXISTS;
2835 	}
2836 
2837 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2838 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2839 		return (SCSIPROBE_NORESP);
2840 	}
2841 
2842 	/* Do the actual probe; save & return the result */
2843 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2844 }
2845 
2846 
2847 /*
2848  *    Function: sd_spin_up_unit
2849  *
2850  * Description: Issues the following commands to spin-up the device:
2851  *		START STOP UNIT, and INQUIRY.
2852  *
2853  *   Arguments: un - driver soft state (unit) structure
2854  *
2855  * Return Code: 0 - success
2856  *		EIO - failure
2857  *		EACCES - reservation conflict
2858  *
2859  *     Context: Kernel thread context
2860  */
2861 
2862 static int
2863 sd_spin_up_unit(struct sd_lun *un)
2864 {
2865 	size_t	resid		= 0;
2866 	int	has_conflict	= FALSE;
2867 	uchar_t *bufaddr;
2868 
2869 	ASSERT(un != NULL);
2870 
2871 	/*
2872 	 * Send a throwaway START UNIT command.
2873 	 *
2874 	 * If we fail on this, we don't care presently what precisely
2875 	 * is wrong.  EMC's arrays will also fail this with a check
2876 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
2877 	 * we don't want to fail the attach because it may become
2878 	 * "active" later.
2879 	 */
2880 	if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START, SD_PATH_DIRECT)
2881 	    == EACCES)
2882 		has_conflict = TRUE;
2883 
2884 	/*
2885 	 * Send another INQUIRY command to the target. This is necessary for
2886 	 * non-removable media direct access devices because their INQUIRY data
2887 	 * may not be fully qualified until they are spun up (perhaps via the
2888 	 * START command above).  Note: This seems to be needed for some
2889 	 * legacy devices only.) The INQUIRY command should succeed even if a
2890 	 * Reservation Conflict is present.
2891 	 */
2892 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
2893 	if (sd_send_scsi_INQUIRY(un, bufaddr, SUN_INQSIZE, 0, 0, &resid) != 0) {
2894 		kmem_free(bufaddr, SUN_INQSIZE);
2895 		return (EIO);
2896 	}
2897 
2898 	/*
2899 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
2900 	 * Note that this routine does not return a failure here even if the
2901 	 * INQUIRY command did not return any data.  This is a legacy behavior.
2902 	 */
2903 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
2904 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
2905 	}
2906 
2907 	kmem_free(bufaddr, SUN_INQSIZE);
2908 
2909 	/* If we hit a reservation conflict above, tell the caller. */
2910 	if (has_conflict == TRUE) {
2911 		return (EACCES);
2912 	}
2913 
2914 	return (0);
2915 }
2916 
2917 #ifdef _LP64
2918 /*
2919  *    Function: sd_enable_descr_sense
2920  *
2921  * Description: This routine attempts to select descriptor sense format
2922  *		using the Control mode page.  Devices that support 64 bit
2923  *		LBAs (for >2TB luns) should also implement descriptor
2924  *		sense data so we will call this function whenever we see
2925  *		a lun larger than 2TB.  If for some reason the device
2926  *		supports 64 bit LBAs but doesn't support descriptor sense
2927  *		presumably the mode select will fail.  Everything will
2928  *		continue to work normally except that we will not get
2929  *		complete sense data for commands that fail with an LBA
2930  *		larger than 32 bits.
2931  *
2932  *   Arguments: un - driver soft state (unit) structure
2933  *
2934  *     Context: Kernel thread context only
2935  */
2936 
2937 static void
2938 sd_enable_descr_sense(struct sd_lun *un)
2939 {
2940 	uchar_t			*header;
2941 	struct mode_control_scsi3 *ctrl_bufp;
2942 	size_t			buflen;
2943 	size_t			bd_len;
2944 
2945 	/*
2946 	 * Read MODE SENSE page 0xA, Control Mode Page
2947 	 */
2948 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
2949 	    sizeof (struct mode_control_scsi3);
2950 	header = kmem_zalloc(buflen, KM_SLEEP);
2951 	if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
2952 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT) != 0) {
2953 		SD_ERROR(SD_LOG_COMMON, un,
2954 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
2955 		goto eds_exit;
2956 	}
2957 
2958 	/*
2959 	 * Determine size of Block Descriptors in order to locate
2960 	 * the mode page data. ATAPI devices return 0, SCSI devices
2961 	 * should return MODE_BLK_DESC_LENGTH.
2962 	 */
2963 	bd_len  = ((struct mode_header *)header)->bdesc_length;
2964 
2965 	ctrl_bufp = (struct mode_control_scsi3 *)
2966 	    (header + MODE_HEADER_LENGTH + bd_len);
2967 
2968 	/*
2969 	 * Clear PS bit for MODE SELECT
2970 	 */
2971 	ctrl_bufp->mode_page.ps = 0;
2972 
2973 	/*
2974 	 * Set D_SENSE to enable descriptor sense format.
2975 	 */
2976 	ctrl_bufp->d_sense = 1;
2977 
2978 	/*
2979 	 * Use MODE SELECT to commit the change to the D_SENSE bit
2980 	 */
2981 	if (sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
2982 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT) != 0) {
2983 		SD_INFO(SD_LOG_COMMON, un,
2984 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
2985 		goto eds_exit;
2986 	}
2987 
2988 eds_exit:
2989 	kmem_free(header, buflen);
2990 }
2991 
2992 /*
2993  *    Function: sd_reenable_dsense_task
2994  *
2995  * Description: Re-enable descriptor sense after device or bus reset
2996  *
2997  *     Context: Executes in a taskq() thread context
2998  */
2999 static void
3000 sd_reenable_dsense_task(void *arg)
3001 {
3002 	struct	sd_lun	*un = arg;
3003 
3004 	ASSERT(un != NULL);
3005 	sd_enable_descr_sense(un);
3006 }
3007 #endif /* _LP64 */
3008 
3009 /*
3010  *    Function: sd_set_mmc_caps
3011  *
3012  * Description: This routine determines if the device is MMC compliant and if
3013  *		the device supports CDDA via a mode sense of the CDVD
3014  *		capabilities mode page. Also checks if the device is a
3015  *		dvdram writable device.
3016  *
3017  *   Arguments: un - driver soft state (unit) structure
3018  *
3019  *     Context: Kernel thread context only
3020  */
3021 
3022 static void
3023 sd_set_mmc_caps(struct sd_lun *un)
3024 {
3025 	struct mode_header_grp2		*sense_mhp;
3026 	uchar_t				*sense_page;
3027 	caddr_t				buf;
3028 	int				bd_len;
3029 	int				status;
3030 	struct uscsi_cmd		com;
3031 	int				rtn;
3032 	uchar_t				*out_data_rw, *out_data_hd;
3033 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3034 
3035 	ASSERT(un != NULL);
3036 
3037 	/*
3038 	 * The flags which will be set in this function are - mmc compliant,
3039 	 * dvdram writable device, cdda support. Initialize them to FALSE
3040 	 * and if a capability is detected - it will be set to TRUE.
3041 	 */
3042 	un->un_f_mmc_cap = FALSE;
3043 	un->un_f_dvdram_writable_device = FALSE;
3044 	un->un_f_cfg_cdda = FALSE;
3045 
3046 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3047 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3048 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3049 
3050 	if (status != 0) {
3051 		/* command failed; just return */
3052 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3053 		return;
3054 	}
3055 	/*
3056 	 * If the mode sense request for the CDROM CAPABILITIES
3057 	 * page (0x2A) succeeds the device is assumed to be MMC.
3058 	 */
3059 	un->un_f_mmc_cap = TRUE;
3060 
3061 	/* Get to the page data */
3062 	sense_mhp = (struct mode_header_grp2 *)buf;
3063 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3064 	    sense_mhp->bdesc_length_lo;
3065 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3066 		/*
3067 		 * We did not get back the expected block descriptor
3068 		 * length so we cannot determine if the device supports
3069 		 * CDDA. However, we still indicate the device is MMC
3070 		 * according to the successful response to the page
3071 		 * 0x2A mode sense request.
3072 		 */
3073 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3074 		    "sd_set_mmc_caps: Mode Sense returned "
3075 		    "invalid block descriptor length\n");
3076 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3077 		return;
3078 	}
3079 
3080 	/* See if read CDDA is supported */
3081 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3082 	    bd_len);
3083 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3084 
3085 	/* See if writing DVD RAM is supported. */
3086 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3087 	if (un->un_f_dvdram_writable_device == TRUE) {
3088 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3089 		return;
3090 	}
3091 
3092 	/*
3093 	 * If the device presents DVD or CD capabilities in the mode
3094 	 * page, we can return here since a RRD will not have
3095 	 * these capabilities.
3096 	 */
3097 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3098 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3099 		return;
3100 	}
3101 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3102 
3103 	/*
3104 	 * If un->un_f_dvdram_writable_device is still FALSE,
3105 	 * check for a Removable Rigid Disk (RRD).  A RRD
3106 	 * device is identified by the features RANDOM_WRITABLE and
3107 	 * HARDWARE_DEFECT_MANAGEMENT.
3108 	 */
3109 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3110 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3111 
3112 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3113 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3114 	    RANDOM_WRITABLE);
3115 	if (rtn != 0) {
3116 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3117 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3118 		return;
3119 	}
3120 
3121 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3122 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3123 
3124 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3125 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3126 	    HARDWARE_DEFECT_MANAGEMENT);
3127 	if (rtn == 0) {
3128 		/*
3129 		 * We have good information, check for random writable
3130 		 * and hardware defect features.
3131 		 */
3132 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3133 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3134 			un->un_f_dvdram_writable_device = TRUE;
3135 		}
3136 	}
3137 
3138 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3139 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3140 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3141 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3142 }
3143 
3144 /*
3145  *    Function: sd_check_for_writable_cd
3146  *
3147  * Description: This routine determines if the media in the device is
3148  *		writable or not. It uses the get configuration command (0x46)
3149  *		to determine if the media is writable
3150  *
3151  *   Arguments: un - driver soft state (unit) structure
3152  *
3153  *     Context: Never called at interrupt context.
3154  */
3155 
3156 static void
3157 sd_check_for_writable_cd(struct sd_lun *un)
3158 {
3159 	struct uscsi_cmd		com;
3160 	uchar_t				*out_data;
3161 	uchar_t				*rqbuf;
3162 	int				rtn;
3163 	uchar_t				*out_data_rw, *out_data_hd;
3164 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3165 	struct mode_header_grp2		*sense_mhp;
3166 	uchar_t				*sense_page;
3167 	caddr_t				buf;
3168 	int				bd_len;
3169 	int				status;
3170 
3171 	ASSERT(un != NULL);
3172 	ASSERT(mutex_owned(SD_MUTEX(un)));
3173 
3174 	/*
3175 	 * Initialize the writable media to false, if configuration info.
3176 	 * tells us otherwise then only we will set it.
3177 	 */
3178 	un->un_f_mmc_writable_media = FALSE;
3179 	mutex_exit(SD_MUTEX(un));
3180 
3181 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3182 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3183 
3184 	rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf, SENSE_LENGTH,
3185 	    out_data, SD_PROFILE_HEADER_LEN);
3186 
3187 	mutex_enter(SD_MUTEX(un));
3188 	if (rtn == 0) {
3189 		/*
3190 		 * We have good information, check for writable DVD.
3191 		 */
3192 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3193 			un->un_f_mmc_writable_media = TRUE;
3194 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3195 			kmem_free(rqbuf, SENSE_LENGTH);
3196 			return;
3197 		}
3198 	}
3199 
3200 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3201 	kmem_free(rqbuf, SENSE_LENGTH);
3202 
3203 	/*
3204 	 * Determine if this is a RRD type device.
3205 	 */
3206 	mutex_exit(SD_MUTEX(un));
3207 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3208 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3209 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3210 	mutex_enter(SD_MUTEX(un));
3211 	if (status != 0) {
3212 		/* command failed; just return */
3213 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3214 		return;
3215 	}
3216 
3217 	/* Get to the page data */
3218 	sense_mhp = (struct mode_header_grp2 *)buf;
3219 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3220 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3221 		/*
3222 		 * We did not get back the expected block descriptor length so
3223 		 * we cannot check the mode page.
3224 		 */
3225 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3226 		    "sd_check_for_writable_cd: Mode Sense returned "
3227 		    "invalid block descriptor length\n");
3228 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3229 		return;
3230 	}
3231 
3232 	/*
3233 	 * If the device presents DVD or CD capabilities in the mode
3234 	 * page, we can return here since a RRD device will not have
3235 	 * these capabilities.
3236 	 */
3237 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3238 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3239 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3240 		return;
3241 	}
3242 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3243 
3244 	/*
3245 	 * If un->un_f_mmc_writable_media is still FALSE,
3246 	 * check for RRD type media.  A RRD device is identified
3247 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3248 	 */
3249 	mutex_exit(SD_MUTEX(un));
3250 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3251 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3252 
3253 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3254 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3255 	    RANDOM_WRITABLE);
3256 	if (rtn != 0) {
3257 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3258 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3259 		mutex_enter(SD_MUTEX(un));
3260 		return;
3261 	}
3262 
3263 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3264 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3265 
3266 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3267 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3268 	    HARDWARE_DEFECT_MANAGEMENT);
3269 	mutex_enter(SD_MUTEX(un));
3270 	if (rtn == 0) {
3271 		/*
3272 		 * We have good information, check for random writable
3273 		 * and hardware defect features as current.
3274 		 */
3275 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3276 		    (out_data_rw[10] & 0x1) &&
3277 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3278 		    (out_data_hd[10] & 0x1)) {
3279 			un->un_f_mmc_writable_media = TRUE;
3280 		}
3281 	}
3282 
3283 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3284 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3285 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3286 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3287 }
3288 
3289 /*
3290  *    Function: sd_read_unit_properties
3291  *
3292  * Description: The following implements a property lookup mechanism.
3293  *		Properties for particular disks (keyed on vendor, model
3294  *		and rev numbers) are sought in the sd.conf file via
3295  *		sd_process_sdconf_file(), and if not found there, are
3296  *		looked for in a list hardcoded in this driver via
3297  *		sd_process_sdconf_table() Once located the properties
3298  *		are used to update the driver unit structure.
3299  *
3300  *   Arguments: un - driver soft state (unit) structure
3301  */
3302 
3303 static void
3304 sd_read_unit_properties(struct sd_lun *un)
3305 {
3306 	/*
3307 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3308 	 * the "sd-config-list" property (from the sd.conf file) or if
3309 	 * there was not a match for the inquiry vid/pid. If this event
3310 	 * occurs the static driver configuration table is searched for
3311 	 * a match.
3312 	 */
3313 	ASSERT(un != NULL);
3314 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3315 		sd_process_sdconf_table(un);
3316 	}
3317 
3318 	/* check for LSI device */
3319 	sd_is_lsi(un);
3320 
3321 
3322 }
3323 
3324 
3325 /*
3326  *    Function: sd_process_sdconf_file
3327  *
3328  * Description: Use ddi_getlongprop to obtain the properties from the
3329  *		driver's config file (ie, sd.conf) and update the driver
3330  *		soft state structure accordingly.
3331  *
3332  *   Arguments: un - driver soft state (unit) structure
3333  *
3334  * Return Code: SD_SUCCESS - The properties were successfully set according
3335  *			     to the driver configuration file.
3336  *		SD_FAILURE - The driver config list was not obtained or
3337  *			     there was no vid/pid match. This indicates that
3338  *			     the static config table should be used.
3339  *
3340  * The config file has a property, "sd-config-list", which consists of
3341  * one or more duplets as follows:
3342  *
3343  *  sd-config-list=
3344  *	<duplet>,
3345  *	[<duplet>,]
3346  *	[<duplet>];
3347  *
3348  * The structure of each duplet is as follows:
3349  *
3350  *  <duplet>:= <vid+pid>,<data-property-name_list>
3351  *
3352  * The first entry of the duplet is the device ID string (the concatenated
3353  * vid & pid; not to be confused with a device_id).  This is defined in
3354  * the same way as in the sd_disk_table.
3355  *
3356  * The second part of the duplet is a string that identifies a
3357  * data-property-name-list. The data-property-name-list is defined as
3358  * follows:
3359  *
3360  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3361  *
3362  * The syntax of <data-property-name> depends on the <version> field.
3363  *
3364  * If version = SD_CONF_VERSION_1 we have the following syntax:
3365  *
3366  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3367  *
3368  * where the prop0 value will be used to set prop0 if bit0 set in the
3369  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3370  *
3371  */
3372 
3373 static int
3374 sd_process_sdconf_file(struct sd_lun *un)
3375 {
3376 	char	*config_list = NULL;
3377 	int	config_list_len;
3378 	int	len;
3379 	int	dupletlen = 0;
3380 	char	*vidptr;
3381 	int	vidlen;
3382 	char	*dnlist_ptr;
3383 	char	*dataname_ptr;
3384 	int	dnlist_len;
3385 	int	dataname_len;
3386 	int	*data_list;
3387 	int	data_list_len;
3388 	int	rval = SD_FAILURE;
3389 	int	i;
3390 
3391 	ASSERT(un != NULL);
3392 
3393 	/* Obtain the configuration list associated with the .conf file */
3394 	if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), DDI_PROP_DONTPASS,
3395 	    sd_config_list, (caddr_t)&config_list, &config_list_len)
3396 	    != DDI_PROP_SUCCESS) {
3397 		return (SD_FAILURE);
3398 	}
3399 
3400 	/*
3401 	 * Compare vids in each duplet to the inquiry vid - if a match is
3402 	 * made, get the data value and update the soft state structure
3403 	 * accordingly.
3404 	 *
3405 	 * Note: This algorithm is complex and difficult to maintain. It should
3406 	 * be replaced with a more robust implementation.
3407 	 */
3408 	for (len = config_list_len, vidptr = config_list; len > 0;
3409 	    vidptr += dupletlen, len -= dupletlen) {
3410 		/*
3411 		 * Note: The assumption here is that each vid entry is on
3412 		 * a unique line from its associated duplet.
3413 		 */
3414 		vidlen = dupletlen = (int)strlen(vidptr);
3415 		if ((vidlen == 0) ||
3416 		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3417 			dupletlen++;
3418 			continue;
3419 		}
3420 
3421 		/*
3422 		 * dnlist contains 1 or more blank separated
3423 		 * data-property-name entries
3424 		 */
3425 		dnlist_ptr = vidptr + vidlen + 1;
3426 		dnlist_len = (int)strlen(dnlist_ptr);
3427 		dupletlen += dnlist_len + 2;
3428 
3429 		/*
3430 		 * Set a pointer for the first data-property-name
3431 		 * entry in the list
3432 		 */
3433 		dataname_ptr = dnlist_ptr;
3434 		dataname_len = 0;
3435 
3436 		/*
3437 		 * Loop through all data-property-name entries in the
3438 		 * data-property-name-list setting the properties for each.
3439 		 */
3440 		while (dataname_len < dnlist_len) {
3441 			int version;
3442 
3443 			/*
3444 			 * Determine the length of the current
3445 			 * data-property-name entry by indexing until a
3446 			 * blank or NULL is encountered. When the space is
3447 			 * encountered reset it to a NULL for compliance
3448 			 * with ddi_getlongprop().
3449 			 */
3450 			for (i = 0; ((dataname_ptr[i] != ' ') &&
3451 			    (dataname_ptr[i] != '\0')); i++) {
3452 				;
3453 			}
3454 
3455 			dataname_len += i;
3456 			/* If not null terminated, Make it so */
3457 			if (dataname_ptr[i] == ' ') {
3458 				dataname_ptr[i] = '\0';
3459 			}
3460 			dataname_len++;
3461 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3462 			    "sd_process_sdconf_file: disk:%s, data:%s\n",
3463 			    vidptr, dataname_ptr);
3464 
3465 			/* Get the data list */
3466 			if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), 0,
3467 			    dataname_ptr, (caddr_t)&data_list, &data_list_len)
3468 			    != DDI_PROP_SUCCESS) {
3469 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3470 				    "sd_process_sdconf_file: data property (%s)"
3471 				    " has no value\n", dataname_ptr);
3472 				dataname_ptr = dnlist_ptr + dataname_len;
3473 				continue;
3474 			}
3475 
3476 			version = data_list[0];
3477 
3478 			if (version == SD_CONF_VERSION_1) {
3479 				sd_tunables values;
3480 
3481 				/* Set the properties */
3482 				if (sd_chk_vers1_data(un, data_list[1],
3483 				    &data_list[2], data_list_len, dataname_ptr)
3484 				    == SD_SUCCESS) {
3485 					sd_get_tunables_from_conf(un,
3486 					    data_list[1], &data_list[2],
3487 					    &values);
3488 					sd_set_vers1_properties(un,
3489 					    data_list[1], &values);
3490 					rval = SD_SUCCESS;
3491 				} else {
3492 					rval = SD_FAILURE;
3493 				}
3494 			} else {
3495 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3496 				    "data property %s version 0x%x is invalid.",
3497 				    dataname_ptr, version);
3498 				rval = SD_FAILURE;
3499 			}
3500 			kmem_free(data_list, data_list_len);
3501 			dataname_ptr = dnlist_ptr + dataname_len;
3502 		}
3503 	}
3504 
3505 	/* free up the memory allocated by ddi_getlongprop */
3506 	if (config_list) {
3507 		kmem_free(config_list, config_list_len);
3508 	}
3509 
3510 	return (rval);
3511 }
3512 
3513 /*
3514  *    Function: sd_get_tunables_from_conf()
3515  *
3516  *
3517  *    This function reads the data list from the sd.conf file and pulls
3518  *    the values that can have numeric values as arguments and places
3519  *    the values in the apropriate sd_tunables member.
3520  *    Since the order of the data list members varies across platforms
3521  *    This function reads them from the data list in a platform specific
3522  *    order and places them into the correct sd_tunable member that is
3523  *    a consistant across all platforms.
3524  */
3525 static void
3526 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3527     sd_tunables *values)
3528 {
3529 	int i;
3530 	int mask;
3531 
3532 	bzero(values, sizeof (sd_tunables));
3533 
3534 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3535 
3536 		mask = 1 << i;
3537 		if (mask > flags) {
3538 			break;
3539 		}
3540 
3541 		switch (mask & flags) {
3542 		case 0:	/* This mask bit not set in flags */
3543 			continue;
3544 		case SD_CONF_BSET_THROTTLE:
3545 			values->sdt_throttle = data_list[i];
3546 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3547 			    "sd_get_tunables_from_conf: throttle = %d\n",
3548 			    values->sdt_throttle);
3549 			break;
3550 		case SD_CONF_BSET_CTYPE:
3551 			values->sdt_ctype = data_list[i];
3552 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3553 			    "sd_get_tunables_from_conf: ctype = %d\n",
3554 			    values->sdt_ctype);
3555 			break;
3556 		case SD_CONF_BSET_NRR_COUNT:
3557 			values->sdt_not_rdy_retries = data_list[i];
3558 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3559 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
3560 			    values->sdt_not_rdy_retries);
3561 			break;
3562 		case SD_CONF_BSET_BSY_RETRY_COUNT:
3563 			values->sdt_busy_retries = data_list[i];
3564 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3565 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
3566 			    values->sdt_busy_retries);
3567 			break;
3568 		case SD_CONF_BSET_RST_RETRIES:
3569 			values->sdt_reset_retries = data_list[i];
3570 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3571 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
3572 			    values->sdt_reset_retries);
3573 			break;
3574 		case SD_CONF_BSET_RSV_REL_TIME:
3575 			values->sdt_reserv_rel_time = data_list[i];
3576 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3577 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
3578 			    values->sdt_reserv_rel_time);
3579 			break;
3580 		case SD_CONF_BSET_MIN_THROTTLE:
3581 			values->sdt_min_throttle = data_list[i];
3582 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3583 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
3584 			    values->sdt_min_throttle);
3585 			break;
3586 		case SD_CONF_BSET_DISKSORT_DISABLED:
3587 			values->sdt_disk_sort_dis = data_list[i];
3588 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3589 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
3590 			    values->sdt_disk_sort_dis);
3591 			break;
3592 		case SD_CONF_BSET_LUN_RESET_ENABLED:
3593 			values->sdt_lun_reset_enable = data_list[i];
3594 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3595 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
3596 			    "\n", values->sdt_lun_reset_enable);
3597 			break;
3598 		}
3599 	}
3600 }
3601 
3602 /*
3603  *    Function: sd_process_sdconf_table
3604  *
3605  * Description: Search the static configuration table for a match on the
3606  *		inquiry vid/pid and update the driver soft state structure
3607  *		according to the table property values for the device.
3608  *
3609  *		The form of a configuration table entry is:
3610  *		  <vid+pid>,<flags>,<property-data>
3611  *		  "SEAGATE ST42400N",1,63,0,0			(Fibre)
3612  *		  "SEAGATE ST42400N",1,63,0,0,0,0		(Sparc)
3613  *		  "SEAGATE ST42400N",1,63,0,0,0,0,0,0,0,0,0,0	(Intel)
3614  *
3615  *   Arguments: un - driver soft state (unit) structure
3616  */
3617 
3618 static void
3619 sd_process_sdconf_table(struct sd_lun *un)
3620 {
3621 	char	*id = NULL;
3622 	int	table_index;
3623 	int	idlen;
3624 
3625 	ASSERT(un != NULL);
3626 	for (table_index = 0; table_index < sd_disk_table_size;
3627 	    table_index++) {
3628 		id = sd_disk_table[table_index].device_id;
3629 		idlen = strlen(id);
3630 		if (idlen == 0) {
3631 			continue;
3632 		}
3633 
3634 		/*
3635 		 * The static configuration table currently does not
3636 		 * implement version 10 properties. Additionally,
3637 		 * multiple data-property-name entries are not
3638 		 * implemented in the static configuration table.
3639 		 */
3640 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
3641 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3642 			    "sd_process_sdconf_table: disk %s\n", id);
3643 			sd_set_vers1_properties(un,
3644 			    sd_disk_table[table_index].flags,
3645 			    sd_disk_table[table_index].properties);
3646 			break;
3647 		}
3648 	}
3649 }
3650 
3651 
3652 /*
3653  *    Function: sd_sdconf_id_match
3654  *
3655  * Description: This local function implements a case sensitive vid/pid
3656  *		comparison as well as the boundary cases of wild card and
3657  *		multiple blanks.
3658  *
3659  *		Note: An implicit assumption made here is that the scsi
3660  *		inquiry structure will always keep the vid, pid and
3661  *		revision strings in consecutive sequence, so they can be
3662  *		read as a single string. If this assumption is not the
3663  *		case, a separate string, to be used for the check, needs
3664  *		to be built with these strings concatenated.
3665  *
3666  *   Arguments: un - driver soft state (unit) structure
3667  *		id - table or config file vid/pid
3668  *		idlen  - length of the vid/pid (bytes)
3669  *
3670  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3671  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3672  */
3673 
3674 static int
3675 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
3676 {
3677 	struct scsi_inquiry	*sd_inq;
3678 	int 			rval = SD_SUCCESS;
3679 
3680 	ASSERT(un != NULL);
3681 	sd_inq = un->un_sd->sd_inq;
3682 	ASSERT(id != NULL);
3683 
3684 	/*
3685 	 * We use the inq_vid as a pointer to a buffer containing the
3686 	 * vid and pid and use the entire vid/pid length of the table
3687 	 * entry for the comparison. This works because the inq_pid
3688 	 * data member follows inq_vid in the scsi_inquiry structure.
3689 	 */
3690 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
3691 		/*
3692 		 * The user id string is compared to the inquiry vid/pid
3693 		 * using a case insensitive comparison and ignoring
3694 		 * multiple spaces.
3695 		 */
3696 		rval = sd_blank_cmp(un, id, idlen);
3697 		if (rval != SD_SUCCESS) {
3698 			/*
3699 			 * User id strings that start and end with a "*"
3700 			 * are a special case. These do not have a
3701 			 * specific vendor, and the product string can
3702 			 * appear anywhere in the 16 byte PID portion of
3703 			 * the inquiry data. This is a simple strstr()
3704 			 * type search for the user id in the inquiry data.
3705 			 */
3706 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
3707 				char	*pidptr = &id[1];
3708 				int	i;
3709 				int	j;
3710 				int	pidstrlen = idlen - 2;
3711 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
3712 				    pidstrlen;
3713 
3714 				if (j < 0) {
3715 					return (SD_FAILURE);
3716 				}
3717 				for (i = 0; i < j; i++) {
3718 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
3719 					    pidptr, pidstrlen) == 0) {
3720 						rval = SD_SUCCESS;
3721 						break;
3722 					}
3723 				}
3724 			}
3725 		}
3726 	}
3727 	return (rval);
3728 }
3729 
3730 
3731 /*
3732  *    Function: sd_blank_cmp
3733  *
3734  * Description: If the id string starts and ends with a space, treat
3735  *		multiple consecutive spaces as equivalent to a single
3736  *		space. For example, this causes a sd_disk_table entry
3737  *		of " NEC CDROM " to match a device's id string of
3738  *		"NEC       CDROM".
3739  *
3740  *		Note: The success exit condition for this routine is if
3741  *		the pointer to the table entry is '\0' and the cnt of
3742  *		the inquiry length is zero. This will happen if the inquiry
3743  *		string returned by the device is padded with spaces to be
3744  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
3745  *		SCSI spec states that the inquiry string is to be padded with
3746  *		spaces.
3747  *
3748  *   Arguments: un - driver soft state (unit) structure
3749  *		id - table or config file vid/pid
3750  *		idlen  - length of the vid/pid (bytes)
3751  *
3752  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3753  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3754  */
3755 
3756 static int
3757 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
3758 {
3759 	char		*p1;
3760 	char		*p2;
3761 	int		cnt;
3762 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
3763 	    sizeof (SD_INQUIRY(un)->inq_pid);
3764 
3765 	ASSERT(un != NULL);
3766 	p2 = un->un_sd->sd_inq->inq_vid;
3767 	ASSERT(id != NULL);
3768 	p1 = id;
3769 
3770 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
3771 		/*
3772 		 * Note: string p1 is terminated by a NUL but string p2
3773 		 * isn't.  The end of p2 is determined by cnt.
3774 		 */
3775 		for (;;) {
3776 			/* skip over any extra blanks in both strings */
3777 			while ((*p1 != '\0') && (*p1 == ' ')) {
3778 				p1++;
3779 			}
3780 			while ((cnt != 0) && (*p2 == ' ')) {
3781 				p2++;
3782 				cnt--;
3783 			}
3784 
3785 			/* compare the two strings */
3786 			if ((cnt == 0) ||
3787 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
3788 				break;
3789 			}
3790 			while ((cnt > 0) &&
3791 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
3792 				p1++;
3793 				p2++;
3794 				cnt--;
3795 			}
3796 		}
3797 	}
3798 
3799 	/* return SD_SUCCESS if both strings match */
3800 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
3801 }
3802 
3803 
3804 /*
3805  *    Function: sd_chk_vers1_data
3806  *
3807  * Description: Verify the version 1 device properties provided by the
3808  *		user via the configuration file
3809  *
3810  *   Arguments: un	     - driver soft state (unit) structure
3811  *		flags	     - integer mask indicating properties to be set
3812  *		prop_list    - integer list of property values
3813  *		list_len     - length of user provided data
3814  *
3815  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
3816  *		SD_FAILURE - Indicates the user provided data is invalid
3817  */
3818 
3819 static int
3820 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
3821     int list_len, char *dataname_ptr)
3822 {
3823 	int i;
3824 	int mask = 1;
3825 	int index = 0;
3826 
3827 	ASSERT(un != NULL);
3828 
3829 	/* Check for a NULL property name and list */
3830 	if (dataname_ptr == NULL) {
3831 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3832 		    "sd_chk_vers1_data: NULL data property name.");
3833 		return (SD_FAILURE);
3834 	}
3835 	if (prop_list == NULL) {
3836 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3837 		    "sd_chk_vers1_data: %s NULL data property list.",
3838 		    dataname_ptr);
3839 		return (SD_FAILURE);
3840 	}
3841 
3842 	/* Display a warning if undefined bits are set in the flags */
3843 	if (flags & ~SD_CONF_BIT_MASK) {
3844 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3845 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
3846 		    "Properties not set.",
3847 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
3848 		return (SD_FAILURE);
3849 	}
3850 
3851 	/*
3852 	 * Verify the length of the list by identifying the highest bit set
3853 	 * in the flags and validating that the property list has a length
3854 	 * up to the index of this bit.
3855 	 */
3856 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3857 		if (flags & mask) {
3858 			index++;
3859 		}
3860 		mask = 1 << i;
3861 	}
3862 	if ((list_len / sizeof (int)) < (index + 2)) {
3863 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3864 		    "sd_chk_vers1_data: "
3865 		    "Data property list %s size is incorrect. "
3866 		    "Properties not set.", dataname_ptr);
3867 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
3868 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
3869 		return (SD_FAILURE);
3870 	}
3871 	return (SD_SUCCESS);
3872 }
3873 
3874 
3875 /*
3876  *    Function: sd_set_vers1_properties
3877  *
3878  * Description: Set version 1 device properties based on a property list
3879  *		retrieved from the driver configuration file or static
3880  *		configuration table. Version 1 properties have the format:
3881  *
3882  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3883  *
3884  *		where the prop0 value will be used to set prop0 if bit0
3885  *		is set in the flags
3886  *
3887  *   Arguments: un	     - driver soft state (unit) structure
3888  *		flags	     - integer mask indicating properties to be set
3889  *		prop_list    - integer list of property values
3890  */
3891 
3892 static void
3893 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
3894 {
3895 	ASSERT(un != NULL);
3896 
3897 	/*
3898 	 * Set the flag to indicate cache is to be disabled. An attempt
3899 	 * to disable the cache via sd_cache_control() will be made
3900 	 * later during attach once the basic initialization is complete.
3901 	 */
3902 	if (flags & SD_CONF_BSET_NOCACHE) {
3903 		un->un_f_opt_disable_cache = TRUE;
3904 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3905 		    "sd_set_vers1_properties: caching disabled flag set\n");
3906 	}
3907 
3908 	/* CD-specific configuration parameters */
3909 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
3910 		un->un_f_cfg_playmsf_bcd = TRUE;
3911 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3912 		    "sd_set_vers1_properties: playmsf_bcd set\n");
3913 	}
3914 	if (flags & SD_CONF_BSET_READSUB_BCD) {
3915 		un->un_f_cfg_readsub_bcd = TRUE;
3916 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3917 		    "sd_set_vers1_properties: readsub_bcd set\n");
3918 	}
3919 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
3920 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
3921 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3922 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
3923 	}
3924 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
3925 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
3926 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3927 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
3928 	}
3929 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
3930 		un->un_f_cfg_no_read_header = TRUE;
3931 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3932 			    "sd_set_vers1_properties: no_read_header set\n");
3933 	}
3934 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
3935 		un->un_f_cfg_read_cd_xd4 = TRUE;
3936 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3937 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
3938 	}
3939 
3940 	/* Support for devices which do not have valid/unique serial numbers */
3941 	if (flags & SD_CONF_BSET_FAB_DEVID) {
3942 		un->un_f_opt_fab_devid = TRUE;
3943 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3944 		    "sd_set_vers1_properties: fab_devid bit set\n");
3945 	}
3946 
3947 	/* Support for user throttle configuration */
3948 	if (flags & SD_CONF_BSET_THROTTLE) {
3949 		ASSERT(prop_list != NULL);
3950 		un->un_saved_throttle = un->un_throttle =
3951 		    prop_list->sdt_throttle;
3952 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3953 		    "sd_set_vers1_properties: throttle set to %d\n",
3954 		    prop_list->sdt_throttle);
3955 	}
3956 
3957 	/* Set the per disk retry count according to the conf file or table. */
3958 	if (flags & SD_CONF_BSET_NRR_COUNT) {
3959 		ASSERT(prop_list != NULL);
3960 		if (prop_list->sdt_not_rdy_retries) {
3961 			un->un_notready_retry_count =
3962 				prop_list->sdt_not_rdy_retries;
3963 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3964 			    "sd_set_vers1_properties: not ready retry count"
3965 			    " set to %d\n", un->un_notready_retry_count);
3966 		}
3967 	}
3968 
3969 	/* The controller type is reported for generic disk driver ioctls */
3970 	if (flags & SD_CONF_BSET_CTYPE) {
3971 		ASSERT(prop_list != NULL);
3972 		switch (prop_list->sdt_ctype) {
3973 		case CTYPE_CDROM:
3974 			un->un_ctype = prop_list->sdt_ctype;
3975 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3976 			    "sd_set_vers1_properties: ctype set to "
3977 			    "CTYPE_CDROM\n");
3978 			break;
3979 		case CTYPE_CCS:
3980 			un->un_ctype = prop_list->sdt_ctype;
3981 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3982 				"sd_set_vers1_properties: ctype set to "
3983 				"CTYPE_CCS\n");
3984 			break;
3985 		case CTYPE_ROD:		/* RW optical */
3986 			un->un_ctype = prop_list->sdt_ctype;
3987 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3988 			    "sd_set_vers1_properties: ctype set to "
3989 			    "CTYPE_ROD\n");
3990 			break;
3991 		default:
3992 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3993 			    "sd_set_vers1_properties: Could not set "
3994 			    "invalid ctype value (%d)",
3995 			    prop_list->sdt_ctype);
3996 		}
3997 	}
3998 
3999 	/* Purple failover timeout */
4000 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4001 		ASSERT(prop_list != NULL);
4002 		un->un_busy_retry_count =
4003 			prop_list->sdt_busy_retries;
4004 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4005 		    "sd_set_vers1_properties: "
4006 		    "busy retry count set to %d\n",
4007 		    un->un_busy_retry_count);
4008 	}
4009 
4010 	/* Purple reset retry count */
4011 	if (flags & SD_CONF_BSET_RST_RETRIES) {
4012 		ASSERT(prop_list != NULL);
4013 		un->un_reset_retry_count =
4014 			prop_list->sdt_reset_retries;
4015 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4016 		    "sd_set_vers1_properties: "
4017 		    "reset retry count set to %d\n",
4018 		    un->un_reset_retry_count);
4019 	}
4020 
4021 	/* Purple reservation release timeout */
4022 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4023 		ASSERT(prop_list != NULL);
4024 		un->un_reserve_release_time =
4025 			prop_list->sdt_reserv_rel_time;
4026 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4027 		    "sd_set_vers1_properties: "
4028 		    "reservation release timeout set to %d\n",
4029 		    un->un_reserve_release_time);
4030 	}
4031 
4032 	/*
4033 	 * Driver flag telling the driver to verify that no commands are pending
4034 	 * for a device before issuing a Test Unit Ready. This is a workaround
4035 	 * for a firmware bug in some Seagate eliteI drives.
4036 	 */
4037 	if (flags & SD_CONF_BSET_TUR_CHECK) {
4038 		un->un_f_cfg_tur_check = TRUE;
4039 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4040 		    "sd_set_vers1_properties: tur queue check set\n");
4041 	}
4042 
4043 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4044 		un->un_min_throttle = prop_list->sdt_min_throttle;
4045 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4046 		    "sd_set_vers1_properties: min throttle set to %d\n",
4047 		    un->un_min_throttle);
4048 	}
4049 
4050 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4051 		un->un_f_disksort_disabled =
4052 		    (prop_list->sdt_disk_sort_dis != 0) ?
4053 		    TRUE : FALSE;
4054 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4055 		    "sd_set_vers1_properties: disksort disabled "
4056 		    "flag set to %d\n",
4057 		    prop_list->sdt_disk_sort_dis);
4058 	}
4059 
4060 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4061 		un->un_f_lun_reset_enabled =
4062 		    (prop_list->sdt_lun_reset_enable != 0) ?
4063 		    TRUE : FALSE;
4064 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4065 		    "sd_set_vers1_properties: lun reset enabled "
4066 		    "flag set to %d\n",
4067 		    prop_list->sdt_lun_reset_enable);
4068 	}
4069 
4070 	/*
4071 	 * Validate the throttle values.
4072 	 * If any of the numbers are invalid, set everything to defaults.
4073 	 */
4074 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4075 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4076 	    (un->un_min_throttle > un->un_throttle)) {
4077 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4078 		un->un_min_throttle = sd_min_throttle;
4079 	}
4080 }
4081 
4082 /*
4083  *   Function: sd_is_lsi()
4084  *
4085  *   Description: Check for lsi devices, step throught the static device
4086  *	table to match vid/pid.
4087  *
4088  *   Args: un - ptr to sd_lun
4089  *
4090  *   Notes:  When creating new LSI property, need to add the new LSI property
4091  *		to this function.
4092  */
4093 static void
4094 sd_is_lsi(struct sd_lun *un)
4095 {
4096 	char	*id = NULL;
4097 	int	table_index;
4098 	int	idlen;
4099 	void	*prop;
4100 
4101 	ASSERT(un != NULL);
4102 	for (table_index = 0; table_index < sd_disk_table_size;
4103 	    table_index++) {
4104 		id = sd_disk_table[table_index].device_id;
4105 		idlen = strlen(id);
4106 		if (idlen == 0) {
4107 			continue;
4108 		}
4109 
4110 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4111 			prop = sd_disk_table[table_index].properties;
4112 			if (prop == &lsi_properties ||
4113 			    prop == &lsi_oem_properties ||
4114 			    prop == &lsi_properties_scsi ||
4115 			    prop == &symbios_properties) {
4116 				un->un_f_cfg_is_lsi = TRUE;
4117 			}
4118 			break;
4119 		}
4120 	}
4121 }
4122 
4123 
4124 /*
4125  * The following routines support reading and interpretation of disk labels,
4126  * including Solaris BE (8-slice) vtoc's, Solaris LE (16-slice) vtoc's, and
4127  * fdisk tables.
4128  */
4129 
4130 /*
4131  *    Function: sd_validate_geometry
4132  *
4133  * Description: Read the label from the disk (if present). Update the unit's
4134  *		geometry and vtoc information from the data in the label.
4135  *		Verify that the label is valid.
4136  *
4137  *   Arguments: un - driver soft state (unit) structure
4138  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4139  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4140  *			to use the USCSI "direct" chain and bypass the normal
4141  *			command waitq.
4142  *
4143  * Return Code: 0 - Successful completion
4144  *		EINVAL  - Invalid value in un->un_tgt_blocksize or
4145  *			  un->un_blockcount; or label on disk is corrupted
4146  *			  or unreadable.
4147  *		EACCES  - Reservation conflict at the device.
4148  *		ENOMEM  - Resource allocation error
4149  *		ENOTSUP - geometry not applicable
4150  *
4151  *     Context: Kernel thread only (can sleep).
4152  */
4153 
4154 static int
4155 sd_validate_geometry(struct sd_lun *un, int path_flag)
4156 {
4157 	static	char		labelstring[128];
4158 	static	char		buf[256];
4159 	char	*label		= NULL;
4160 	int	label_error = 0;
4161 	int	gvalid		= un->un_f_geometry_is_valid;
4162 	int	lbasize;
4163 	uint_t	capacity;
4164 	int	count;
4165 
4166 	ASSERT(un != NULL);
4167 	ASSERT(mutex_owned(SD_MUTEX(un)));
4168 
4169 	/*
4170 	 * If the required values are not valid, then try getting them
4171 	 * once via read capacity. If that fails, then fail this call.
4172 	 * This is necessary with the new mpxio failover behavior in
4173 	 * the T300 where we can get an attach for the inactive path
4174 	 * before the active path. The inactive path fails commands with
4175 	 * sense data of 02,04,88 which happens to the read capacity
4176 	 * before mpxio has had sufficient knowledge to know if it should
4177 	 * force a fail over or not. (Which it won't do at attach anyhow).
4178 	 * If the read capacity at attach time fails, un_tgt_blocksize and
4179 	 * un_blockcount won't be valid.
4180 	 */
4181 	if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4182 	    (un->un_f_blockcount_is_valid != TRUE)) {
4183 		uint64_t	cap;
4184 		uint32_t	lbasz;
4185 		int		rval;
4186 
4187 		mutex_exit(SD_MUTEX(un));
4188 		rval = sd_send_scsi_READ_CAPACITY(un, &cap,
4189 		    &lbasz, SD_PATH_DIRECT);
4190 		mutex_enter(SD_MUTEX(un));
4191 		if (rval == 0) {
4192 			/*
4193 			 * The following relies on
4194 			 * sd_send_scsi_READ_CAPACITY never
4195 			 * returning 0 for capacity and/or lbasize.
4196 			 */
4197 			sd_update_block_info(un, lbasz, cap);
4198 		}
4199 
4200 		if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4201 		    (un->un_f_blockcount_is_valid != TRUE)) {
4202 			return (EINVAL);
4203 		}
4204 	}
4205 
4206 	/*
4207 	 * Copy the lbasize and capacity so that if they're reset while we're
4208 	 * not holding the SD_MUTEX, we will continue to use valid values
4209 	 * after the SD_MUTEX is reacquired. (4119659)
4210 	 */
4211 	lbasize  = un->un_tgt_blocksize;
4212 	capacity = un->un_blockcount;
4213 
4214 #if defined(_SUNOS_VTOC_16)
4215 	/*
4216 	 * Set up the "whole disk" fdisk partition; this should always
4217 	 * exist, regardless of whether the disk contains an fdisk table
4218 	 * or vtoc.
4219 	 */
4220 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
4221 	un->un_map[P0_RAW_DISK].dkl_nblk  = capacity;
4222 #endif
4223 
4224 	/*
4225 	 * Refresh the logical and physical geometry caches.
4226 	 * (data from MODE SENSE format/rigid disk geometry pages,
4227 	 * and scsi_ifgetcap("geometry").
4228 	 */
4229 	sd_resync_geom_caches(un, capacity, lbasize, path_flag);
4230 
4231 	label_error = sd_use_efi(un, path_flag);
4232 	if (label_error == 0) {
4233 		/* found a valid EFI label */
4234 		SD_TRACE(SD_LOG_IO_PARTITION, un,
4235 			"sd_validate_geometry: found EFI label\n");
4236 		un->un_solaris_offset = 0;
4237 		un->un_solaris_size = capacity;
4238 		return (ENOTSUP);
4239 	}
4240 	if (un->un_blockcount > DK_MAX_BLOCKS) {
4241 		if (label_error == ESRCH) {
4242 			/*
4243 			 * they've configured a LUN over 1TB, but used
4244 			 * format.dat to restrict format's view of the
4245 			 * capacity to be under 1TB
4246 			 */
4247 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4248 "is >1TB and has a VTOC label: use format(1M) to either decrease the");
4249 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
4250 "size to be < 1TB or relabel the disk with an EFI label");
4251 		} else {
4252 			/* unlabeled disk over 1TB */
4253 #if defined(__i386) || defined(__amd64)
4254 			/*
4255 			 * Refer to comments on off-by-1 at the head of the file
4256 			 * A 1TB disk was treated as (1T - 512)B in the past,
4257 			 * thus, it might have valid solaris partition. We
4258 			 * will return ENOTSUP later only if this disk has no
4259 			 * valid solaris partition.
4260 			 */
4261 			if ((un->un_tgt_blocksize != un->un_sys_blocksize) ||
4262 			    (un->un_blockcount - 1 > DK_MAX_BLOCKS) ||
4263 			    un->un_f_has_removable_media ||
4264 			    un->un_f_is_hotpluggable)
4265 #endif
4266 				return (ENOTSUP);
4267 		}
4268 	}
4269 	label_error = 0;
4270 
4271 	/*
4272 	 * at this point it is either labeled with a VTOC or it is
4273 	 * under 1TB (<= 1TB actually for off-by-1)
4274 	 */
4275 	if (un->un_f_vtoc_label_supported) {
4276 		struct	dk_label *dkl;
4277 		offset_t dkl1;
4278 		offset_t label_addr, real_addr;
4279 		int	rval;
4280 		size_t	buffer_size;
4281 
4282 		/*
4283 		 * Note: This will set up un->un_solaris_size and
4284 		 * un->un_solaris_offset.
4285 		 */
4286 		switch (sd_read_fdisk(un, capacity, lbasize, path_flag)) {
4287 		case SD_CMD_RESERVATION_CONFLICT:
4288 			ASSERT(mutex_owned(SD_MUTEX(un)));
4289 			return (EACCES);
4290 		case SD_CMD_FAILURE:
4291 			ASSERT(mutex_owned(SD_MUTEX(un)));
4292 			return (ENOMEM);
4293 		}
4294 
4295 		if (un->un_solaris_size <= DK_LABEL_LOC) {
4296 
4297 #if defined(__i386) || defined(__amd64)
4298 			/*
4299 			 * Refer to comments on off-by-1 at the head of the file
4300 			 * This is for 1TB disk only. Since that there is no
4301 			 * solaris partitions, return ENOTSUP as we do for
4302 			 * >1TB disk.
4303 			 */
4304 			if (un->un_blockcount > DK_MAX_BLOCKS)
4305 				return (ENOTSUP);
4306 #endif
4307 			/*
4308 			 * Found fdisk table but no Solaris partition entry,
4309 			 * so don't call sd_uselabel() and don't create
4310 			 * a default label.
4311 			 */
4312 			label_error = 0;
4313 			un->un_f_geometry_is_valid = TRUE;
4314 			goto no_solaris_partition;
4315 		}
4316 		label_addr = (daddr_t)(un->un_solaris_offset + DK_LABEL_LOC);
4317 
4318 #if defined(__i386) || defined(__amd64)
4319 		/*
4320 		 * Refer to comments on off-by-1 at the head of the file
4321 		 * Now, this 1TB disk has valid solaris partition. It
4322 		 * must be created by previous sd driver, we have to
4323 		 * treat it as (1T-512)B.
4324 		 */
4325 		if (un->un_blockcount > DK_MAX_BLOCKS) {
4326 			un->un_f_capacity_adjusted = 1;
4327 			un->un_blockcount = DK_MAX_BLOCKS;
4328 			un->un_map[P0_RAW_DISK].dkl_nblk  = DK_MAX_BLOCKS;
4329 
4330 			/*
4331 			 * Refer to sd_read_fdisk, when there is no
4332 			 * fdisk partition table, un_solaris_size is
4333 			 * set to disk's capacity. In this case, we
4334 			 * need to adjust it
4335 			 */
4336 			if (un->un_solaris_size > DK_MAX_BLOCKS)
4337 				un->un_solaris_size = DK_MAX_BLOCKS;
4338 			sd_resync_geom_caches(un, DK_MAX_BLOCKS,
4339 			    lbasize, path_flag);
4340 		}
4341 #endif
4342 
4343 		/*
4344 		 * sys_blocksize != tgt_blocksize, need to re-adjust
4345 		 * blkno and save the index to beginning of dk_label
4346 		 */
4347 		real_addr = SD_SYS2TGTBLOCK(un, label_addr);
4348 		buffer_size = SD_REQBYTES2TGTBYTES(un,
4349 		    sizeof (struct dk_label));
4350 
4351 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_validate_geometry: "
4352 		    "label_addr: 0x%x allocation size: 0x%x\n",
4353 		    label_addr, buffer_size);
4354 		dkl = kmem_zalloc(buffer_size, KM_NOSLEEP);
4355 		if (dkl == NULL) {
4356 			return (ENOMEM);
4357 		}
4358 
4359 		mutex_exit(SD_MUTEX(un));
4360 		rval = sd_send_scsi_READ(un, dkl, buffer_size, real_addr,
4361 		    path_flag);
4362 		mutex_enter(SD_MUTEX(un));
4363 
4364 		switch (rval) {
4365 		case 0:
4366 			/*
4367 			 * sd_uselabel will establish that the geometry
4368 			 * is valid.
4369 			 * For sys_blocksize != tgt_blocksize, need
4370 			 * to index into the beginning of dk_label
4371 			 */
4372 			dkl1 = (daddr_t)dkl
4373 				+ SD_TGTBYTEOFFSET(un, label_addr, real_addr);
4374 			if (sd_uselabel(un, (struct dk_label *)(uintptr_t)dkl1,
4375 			    path_flag) != SD_LABEL_IS_VALID) {
4376 				label_error = EINVAL;
4377 			}
4378 			break;
4379 		case EACCES:
4380 			label_error = EACCES;
4381 			break;
4382 		default:
4383 			label_error = EINVAL;
4384 			break;
4385 		}
4386 
4387 		kmem_free(dkl, buffer_size);
4388 
4389 #if defined(_SUNOS_VTOC_8)
4390 		label = (char *)un->un_asciilabel;
4391 #elif defined(_SUNOS_VTOC_16)
4392 		label = (char *)un->un_vtoc.v_asciilabel;
4393 #else
4394 #error "No VTOC format defined."
4395 #endif
4396 	}
4397 
4398 	/*
4399 	 * If a valid label was not found, AND if no reservation conflict
4400 	 * was detected, then go ahead and create a default label (4069506).
4401 	 */
4402 	if (un->un_f_default_vtoc_supported && (label_error != EACCES)) {
4403 		if (un->un_f_geometry_is_valid == FALSE) {
4404 			sd_build_default_label(un);
4405 		}
4406 		label_error = 0;
4407 	}
4408 
4409 no_solaris_partition:
4410 	if ((!un->un_f_has_removable_media ||
4411 	    (un->un_f_has_removable_media &&
4412 		un->un_mediastate == DKIO_EJECTED)) &&
4413 		(un->un_state == SD_STATE_NORMAL && !gvalid)) {
4414 		/*
4415 		 * Print out a message indicating who and what we are.
4416 		 * We do this only when we happen to really validate the
4417 		 * geometry. We may call sd_validate_geometry() at other
4418 		 * times, e.g., ioctl()'s like Get VTOC in which case we
4419 		 * don't want to print the label.
4420 		 * If the geometry is valid, print the label string,
4421 		 * else print vendor and product info, if available
4422 		 */
4423 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
4424 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "?<%s>\n", label);
4425 		} else {
4426 			mutex_enter(&sd_label_mutex);
4427 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
4428 			    labelstring);
4429 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
4430 			    &labelstring[64]);
4431 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
4432 			    labelstring, &labelstring[64]);
4433 			if (un->un_f_blockcount_is_valid == TRUE) {
4434 				(void) sprintf(&buf[strlen(buf)],
4435 				    ", %llu %u byte blocks\n",
4436 				    (longlong_t)un->un_blockcount,
4437 				    un->un_tgt_blocksize);
4438 			} else {
4439 				(void) sprintf(&buf[strlen(buf)],
4440 				    ", (unknown capacity)\n");
4441 			}
4442 			SD_INFO(SD_LOG_ATTACH_DETACH, un, buf);
4443 			mutex_exit(&sd_label_mutex);
4444 		}
4445 	}
4446 
4447 #if defined(_SUNOS_VTOC_16)
4448 	/*
4449 	 * If we have valid geometry, set up the remaining fdisk partitions.
4450 	 * Note that dkl_cylno is not used for the fdisk map entries, so
4451 	 * we set it to an entirely bogus value.
4452 	 */
4453 	for (count = 0; count < FD_NUMPART; count++) {
4454 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
4455 		un->un_map[FDISK_P1 + count].dkl_nblk =
4456 		    un->un_fmap[count].fmap_nblk;
4457 
4458 		un->un_offset[FDISK_P1 + count] =
4459 		    un->un_fmap[count].fmap_start;
4460 	}
4461 #endif
4462 
4463 	for (count = 0; count < NDKMAP; count++) {
4464 #if defined(_SUNOS_VTOC_8)
4465 		struct dk_map *lp  = &un->un_map[count];
4466 		un->un_offset[count] =
4467 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
4468 #elif defined(_SUNOS_VTOC_16)
4469 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
4470 
4471 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
4472 #else
4473 #error "No VTOC format defined."
4474 #endif
4475 	}
4476 
4477 	return (label_error);
4478 }
4479 
4480 
4481 #if defined(_SUNOS_VTOC_16)
4482 /*
4483  * Macro: MAX_BLKS
4484  *
4485  *	This macro is used for table entries where we need to have the largest
4486  *	possible sector value for that head & SPT (sectors per track)
4487  *	combination.  Other entries for some smaller disk sizes are set by
4488  *	convention to match those used by X86 BIOS usage.
4489  */
4490 #define	MAX_BLKS(heads, spt)	UINT16_MAX * heads * spt, heads, spt
4491 
4492 /*
4493  *    Function: sd_convert_geometry
4494  *
4495  * Description: Convert physical geometry into a dk_geom structure. In
4496  *		other words, make sure we don't wrap 16-bit values.
4497  *		e.g. converting from geom_cache to dk_geom
4498  *
4499  *     Context: Kernel thread only
4500  */
4501 static void
4502 sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g)
4503 {
4504 	int i;
4505 	static const struct chs_values {
4506 		uint_t max_cap;		/* Max Capacity for this HS. */
4507 		uint_t nhead;		/* Heads to use. */
4508 		uint_t nsect;		/* SPT to use. */
4509 	} CHS_values[] = {
4510 		{0x00200000,  64, 32},		/* 1GB or smaller disk. */
4511 		{0x01000000, 128, 32},		/* 8GB or smaller disk. */
4512 		{MAX_BLKS(255,  63)},		/* 502.02GB or smaller disk. */
4513 		{MAX_BLKS(255, 126)},		/* .98TB or smaller disk. */
4514 		{DK_MAX_BLOCKS, 255, 189}	/* Max size is just under 1TB */
4515 	};
4516 
4517 	/* Unlabeled SCSI floppy device */
4518 	if (capacity <= 0x1000) {
4519 		un_g->dkg_nhead = 2;
4520 		un_g->dkg_ncyl = 80;
4521 		un_g->dkg_nsect = capacity / (un_g->dkg_nhead * un_g->dkg_ncyl);
4522 		return;
4523 	}
4524 
4525 	/*
4526 	 * For all devices we calculate cylinders using the
4527 	 * heads and sectors we assign based on capacity of the
4528 	 * device.  The table is designed to be compatible with the
4529 	 * way other operating systems lay out fdisk tables for X86
4530 	 * and to insure that the cylinders never exceed 65535 to
4531 	 * prevent problems with X86 ioctls that report geometry.
4532 	 * We use SPT that are multiples of 63, since other OSes that
4533 	 * are not limited to 16-bits for cylinders stop at 63 SPT
4534 	 * we make do by using multiples of 63 SPT.
4535 	 *
4536 	 * Note than capacities greater than or equal to 1TB will simply
4537 	 * get the largest geometry from the table. This should be okay
4538 	 * since disks this large shouldn't be using CHS values anyway.
4539 	 */
4540 	for (i = 0; CHS_values[i].max_cap < capacity &&
4541 	    CHS_values[i].max_cap != DK_MAX_BLOCKS; i++)
4542 		;
4543 
4544 	un_g->dkg_nhead = CHS_values[i].nhead;
4545 	un_g->dkg_nsect = CHS_values[i].nsect;
4546 }
4547 #endif
4548 
4549 
4550 /*
4551  *    Function: sd_resync_geom_caches
4552  *
4553  * Description: (Re)initialize both geometry caches: the virtual geometry
4554  *		information is extracted from the HBA (the "geometry"
4555  *		capability), and the physical geometry cache data is
4556  *		generated by issuing MODE SENSE commands.
4557  *
4558  *   Arguments: un - driver soft state (unit) structure
4559  *		capacity - disk capacity in #blocks
4560  *		lbasize - disk block size in bytes
4561  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4562  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4563  *			to use the USCSI "direct" chain and bypass the normal
4564  *			command waitq.
4565  *
4566  *     Context: Kernel thread only (can sleep).
4567  */
4568 
4569 static void
4570 sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
4571 	int path_flag)
4572 {
4573 	struct 	geom_cache 	pgeom;
4574 	struct 	geom_cache	*pgeom_p = &pgeom;
4575 	int 	spc;
4576 	unsigned short nhead;
4577 	unsigned short nsect;
4578 
4579 	ASSERT(un != NULL);
4580 	ASSERT(mutex_owned(SD_MUTEX(un)));
4581 
4582 	/*
4583 	 * Ask the controller for its logical geometry.
4584 	 * Note: if the HBA does not support scsi_ifgetcap("geometry"),
4585 	 * then the lgeom cache will be invalid.
4586 	 */
4587 	sd_get_virtual_geometry(un, capacity, lbasize);
4588 
4589 	/*
4590 	 * Initialize the pgeom cache from lgeom, so that if MODE SENSE
4591 	 * doesn't work, DKIOCG_PHYSGEOM can return reasonable values.
4592 	 */
4593 	if (un->un_lgeom.g_nsect == 0 || un->un_lgeom.g_nhead == 0) {
4594 		/*
4595 		 * Note: Perhaps this needs to be more adaptive? The rationale
4596 		 * is that, if there's no HBA geometry from the HBA driver, any
4597 		 * guess is good, since this is the physical geometry. If MODE
4598 		 * SENSE fails this gives a max cylinder size for non-LBA access
4599 		 */
4600 		nhead = 255;
4601 		nsect = 63;
4602 	} else {
4603 		nhead = un->un_lgeom.g_nhead;
4604 		nsect = un->un_lgeom.g_nsect;
4605 	}
4606 
4607 	if (ISCD(un)) {
4608 		pgeom_p->g_nhead = 1;
4609 		pgeom_p->g_nsect = nsect * nhead;
4610 	} else {
4611 		pgeom_p->g_nhead = nhead;
4612 		pgeom_p->g_nsect = nsect;
4613 	}
4614 
4615 	spc = pgeom_p->g_nhead * pgeom_p->g_nsect;
4616 	pgeom_p->g_capacity = capacity;
4617 	pgeom_p->g_ncyl = pgeom_p->g_capacity / spc;
4618 	pgeom_p->g_acyl = 0;
4619 
4620 	/*
4621 	 * Retrieve fresh geometry data from the hardware, stash it
4622 	 * here temporarily before we rebuild the incore label.
4623 	 *
4624 	 * We want to use the MODE SENSE commands to derive the
4625 	 * physical geometry of the device, but if either command
4626 	 * fails, the logical geometry is used as the fallback for
4627 	 * disk label geometry.
4628 	 */
4629 	mutex_exit(SD_MUTEX(un));
4630 	sd_get_physical_geometry(un, pgeom_p, capacity, lbasize, path_flag);
4631 	mutex_enter(SD_MUTEX(un));
4632 
4633 	/*
4634 	 * Now update the real copy while holding the mutex. This
4635 	 * way the global copy is never in an inconsistent state.
4636 	 */
4637 	bcopy(pgeom_p, &un->un_pgeom,  sizeof (un->un_pgeom));
4638 
4639 	SD_INFO(SD_LOG_COMMON, un, "sd_resync_geom_caches: "
4640 	    "(cached from lgeom)\n");
4641 	SD_INFO(SD_LOG_COMMON, un,
4642 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4643 	    un->un_pgeom.g_ncyl, un->un_pgeom.g_acyl,
4644 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
4645 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
4646 	    "intrlv: %d; rpm: %d\n", un->un_pgeom.g_secsize,
4647 	    un->un_pgeom.g_capacity, un->un_pgeom.g_intrlv,
4648 	    un->un_pgeom.g_rpm);
4649 }
4650 
4651 
4652 /*
4653  *    Function: sd_read_fdisk
4654  *
4655  * Description: utility routine to read the fdisk table.
4656  *
4657  *   Arguments: un - driver soft state (unit) structure
4658  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4659  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4660  *			to use the USCSI "direct" chain and bypass the normal
4661  *			command waitq.
4662  *
4663  * Return Code: SD_CMD_SUCCESS
4664  *		SD_CMD_FAILURE
4665  *
4666  *     Context: Kernel thread only (can sleep).
4667  */
4668 /* ARGSUSED */
4669 static int
4670 sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize, int path_flag)
4671 {
4672 #if defined(_NO_FDISK_PRESENT)
4673 
4674 	un->un_solaris_offset = 0;
4675 	un->un_solaris_size = capacity;
4676 	bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4677 	return (SD_CMD_SUCCESS);
4678 
4679 #elif defined(_FIRMWARE_NEEDS_FDISK)
4680 
4681 	struct ipart	*fdp;
4682 	struct mboot	*mbp;
4683 	struct ipart	fdisk[FD_NUMPART];
4684 	int		i;
4685 	char		sigbuf[2];
4686 	caddr_t		bufp;
4687 	int		uidx;
4688 	int		rval;
4689 	int		lba = 0;
4690 	uint_t		solaris_offset;	/* offset to solaris part. */
4691 	daddr_t		solaris_size;	/* size of solaris partition */
4692 	uint32_t	blocksize;
4693 
4694 	ASSERT(un != NULL);
4695 	ASSERT(mutex_owned(SD_MUTEX(un)));
4696 	ASSERT(un->un_f_tgt_blocksize_is_valid == TRUE);
4697 
4698 	blocksize = un->un_tgt_blocksize;
4699 
4700 	/*
4701 	 * Start off assuming no fdisk table
4702 	 */
4703 	solaris_offset = 0;
4704 	solaris_size   = capacity;
4705 
4706 	mutex_exit(SD_MUTEX(un));
4707 	bufp = kmem_zalloc(blocksize, KM_SLEEP);
4708 	rval = sd_send_scsi_READ(un, bufp, blocksize, 0, path_flag);
4709 	mutex_enter(SD_MUTEX(un));
4710 
4711 	if (rval != 0) {
4712 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4713 		    "sd_read_fdisk: fdisk read err\n");
4714 		kmem_free(bufp, blocksize);
4715 		return (SD_CMD_FAILURE);
4716 	}
4717 
4718 	mbp = (struct mboot *)bufp;
4719 
4720 	/*
4721 	 * The fdisk table does not begin on a 4-byte boundary within the
4722 	 * master boot record, so we copy it to an aligned structure to avoid
4723 	 * alignment exceptions on some processors.
4724 	 */
4725 	bcopy(&mbp->parts[0], fdisk, sizeof (fdisk));
4726 
4727 	/*
4728 	 * Check for lba support before verifying sig; sig might not be
4729 	 * there, say on a blank disk, but the max_chs mark may still
4730 	 * be present.
4731 	 *
4732 	 * Note: LBA support and BEFs are an x86-only concept but this
4733 	 * code should work OK on SPARC as well.
4734 	 */
4735 
4736 	/*
4737 	 * First, check for lba-access-ok on root node (or prom root node)
4738 	 * if present there, don't need to search fdisk table.
4739 	 */
4740 	if (ddi_getprop(DDI_DEV_T_ANY, ddi_root_node(), 0,
4741 	    "lba-access-ok", 0) != 0) {
4742 		/* All drives do LBA; don't search fdisk table */
4743 		lba = 1;
4744 	} else {
4745 		/* Okay, look for mark in fdisk table */
4746 		for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4747 			/* accumulate "lba" value from all partitions */
4748 			lba = (lba || sd_has_max_chs_vals(fdp));
4749 		}
4750 	}
4751 
4752 	if (lba != 0) {
4753 		dev_t dev = sd_make_device(SD_DEVINFO(un));
4754 
4755 		if (ddi_getprop(dev, SD_DEVINFO(un), DDI_PROP_DONTPASS,
4756 		    "lba-access-ok", 0) == 0) {
4757 			/* not found; create it */
4758 			if (ddi_prop_create(dev, SD_DEVINFO(un), 0,
4759 			    "lba-access-ok", (caddr_t)NULL, 0) !=
4760 			    DDI_PROP_SUCCESS) {
4761 				SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4762 				    "sd_read_fdisk: Can't create lba property "
4763 				    "for instance %d\n",
4764 				    ddi_get_instance(SD_DEVINFO(un)));
4765 			}
4766 		}
4767 	}
4768 
4769 	bcopy(&mbp->signature, sigbuf, sizeof (sigbuf));
4770 
4771 	/*
4772 	 * Endian-independent signature check
4773 	 */
4774 	if (((sigbuf[1] & 0xFF) != ((MBB_MAGIC >> 8) & 0xFF)) ||
4775 	    (sigbuf[0] != (MBB_MAGIC & 0xFF))) {
4776 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4777 		    "sd_read_fdisk: no fdisk\n");
4778 		bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4779 		rval = SD_CMD_SUCCESS;
4780 		goto done;
4781 	}
4782 
4783 #ifdef SDDEBUG
4784 	if (sd_level_mask & SD_LOGMASK_INFO) {
4785 		fdp = fdisk;
4786 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_read_fdisk:\n");
4787 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "         relsect    "
4788 		    "numsect         sysid       bootid\n");
4789 		for (i = 0; i < FD_NUMPART; i++, fdp++) {
4790 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4791 			    "    %d:  %8d   %8d     0x%08x     0x%08x\n",
4792 			    i, fdp->relsect, fdp->numsect,
4793 			    fdp->systid, fdp->bootid);
4794 		}
4795 	}
4796 #endif
4797 
4798 	/*
4799 	 * Try to find the unix partition
4800 	 */
4801 	uidx = -1;
4802 	solaris_offset = 0;
4803 	solaris_size   = 0;
4804 
4805 	for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4806 		int	relsect;
4807 		int	numsect;
4808 
4809 		if (fdp->numsect == 0) {
4810 			un->un_fmap[i].fmap_start = 0;
4811 			un->un_fmap[i].fmap_nblk  = 0;
4812 			continue;
4813 		}
4814 
4815 		/*
4816 		 * Data in the fdisk table is little-endian.
4817 		 */
4818 		relsect = LE_32(fdp->relsect);
4819 		numsect = LE_32(fdp->numsect);
4820 
4821 		un->un_fmap[i].fmap_start = relsect;
4822 		un->un_fmap[i].fmap_nblk  = numsect;
4823 
4824 		if (fdp->systid != SUNIXOS &&
4825 		    fdp->systid != SUNIXOS2 &&
4826 		    fdp->systid != EFI_PMBR) {
4827 			continue;
4828 		}
4829 
4830 		/*
4831 		 * use the last active solaris partition id found
4832 		 * (there should only be 1 active partition id)
4833 		 *
4834 		 * if there are no active solaris partition id
4835 		 * then use the first inactive solaris partition id
4836 		 */
4837 		if ((uidx == -1) || (fdp->bootid == ACTIVE)) {
4838 			uidx = i;
4839 			solaris_offset = relsect;
4840 			solaris_size   = numsect;
4841 		}
4842 	}
4843 
4844 	SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk 0x%x 0x%lx",
4845 	    un->un_solaris_offset, un->un_solaris_size);
4846 
4847 	rval = SD_CMD_SUCCESS;
4848 
4849 done:
4850 
4851 	/*
4852 	 * Clear the VTOC info, only if the Solaris partition entry
4853 	 * has moved, changed size, been deleted, or if the size of
4854 	 * the partition is too small to even fit the label sector.
4855 	 */
4856 	if ((un->un_solaris_offset != solaris_offset) ||
4857 	    (un->un_solaris_size != solaris_size) ||
4858 	    solaris_size <= DK_LABEL_LOC) {
4859 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk moved 0x%x 0x%lx",
4860 			solaris_offset, solaris_size);
4861 		bzero(&un->un_g, sizeof (struct dk_geom));
4862 		bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
4863 		bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
4864 		un->un_f_geometry_is_valid = FALSE;
4865 	}
4866 	un->un_solaris_offset = solaris_offset;
4867 	un->un_solaris_size = solaris_size;
4868 	kmem_free(bufp, blocksize);
4869 	return (rval);
4870 
4871 #else	/* #elif defined(_FIRMWARE_NEEDS_FDISK) */
4872 #error "fdisk table presence undetermined for this platform."
4873 #endif	/* #if defined(_NO_FDISK_PRESENT) */
4874 }
4875 
4876 
4877 /*
4878  *    Function: sd_get_physical_geometry
4879  *
4880  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4881  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4882  *		target, and use this information to initialize the physical
4883  *		geometry cache specified by pgeom_p.
4884  *
4885  *		MODE SENSE is an optional command, so failure in this case
4886  *		does not necessarily denote an error. We want to use the
4887  *		MODE SENSE commands to derive the physical geometry of the
4888  *		device, but if either command fails, the logical geometry is
4889  *		used as the fallback for disk label geometry.
4890  *
4891  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4892  *		have already been initialized for the current target and
4893  *		that the current values be passed as args so that we don't
4894  *		end up ever trying to use -1 as a valid value. This could
4895  *		happen if either value is reset while we're not holding
4896  *		the mutex.
4897  *
4898  *   Arguments: un - driver soft state (unit) structure
4899  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4900  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4901  *			to use the USCSI "direct" chain and bypass the normal
4902  *			command waitq.
4903  *
4904  *     Context: Kernel thread only (can sleep).
4905  */
4906 
4907 static void
4908 sd_get_physical_geometry(struct sd_lun *un, struct geom_cache *pgeom_p,
4909 	int capacity, int lbasize, int path_flag)
4910 {
4911 	struct	mode_format	*page3p;
4912 	struct	mode_geometry	*page4p;
4913 	struct	mode_header	*headerp;
4914 	int	sector_size;
4915 	int	nsect;
4916 	int	nhead;
4917 	int	ncyl;
4918 	int	intrlv;
4919 	int	spc;
4920 	int	modesense_capacity;
4921 	int	rpm;
4922 	int	bd_len;
4923 	int	mode_header_length;
4924 	uchar_t	*p3bufp;
4925 	uchar_t	*p4bufp;
4926 	int	cdbsize;
4927 
4928 	ASSERT(un != NULL);
4929 	ASSERT(!(mutex_owned(SD_MUTEX(un))));
4930 
4931 	if (un->un_f_blockcount_is_valid != TRUE) {
4932 		return;
4933 	}
4934 
4935 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
4936 		return;
4937 	}
4938 
4939 	if (lbasize == 0) {
4940 		if (ISCD(un)) {
4941 			lbasize = 2048;
4942 		} else {
4943 			lbasize = un->un_sys_blocksize;
4944 		}
4945 	}
4946 	pgeom_p->g_secsize = (unsigned short)lbasize;
4947 
4948 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4949 
4950 	/*
4951 	 * Retrieve MODE SENSE page 3 - Format Device Page
4952 	 */
4953 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4954 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p3bufp,
4955 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag)
4956 	    != 0) {
4957 		SD_ERROR(SD_LOG_COMMON, un,
4958 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4959 		goto page3_exit;
4960 	}
4961 
4962 	/*
4963 	 * Determine size of Block Descriptors in order to locate the mode
4964 	 * page data.  ATAPI devices return 0, SCSI devices should return
4965 	 * MODE_BLK_DESC_LENGTH.
4966 	 */
4967 	headerp = (struct mode_header *)p3bufp;
4968 	if (un->un_f_cfg_is_atapi == TRUE) {
4969 		struct mode_header_grp2 *mhp =
4970 		    (struct mode_header_grp2 *)headerp;
4971 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4972 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4973 	} else {
4974 		mode_header_length = MODE_HEADER_LENGTH;
4975 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4976 	}
4977 
4978 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4979 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4980 		    "received unexpected bd_len of %d, page3\n", bd_len);
4981 		goto page3_exit;
4982 	}
4983 
4984 	page3p = (struct mode_format *)
4985 	    ((caddr_t)headerp + mode_header_length + bd_len);
4986 
4987 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4988 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4989 		    "mode sense pg3 code mismatch %d\n",
4990 		    page3p->mode_page.code);
4991 		goto page3_exit;
4992 	}
4993 
4994 	/*
4995 	 * Use this physical geometry data only if BOTH MODE SENSE commands
4996 	 * complete successfully; otherwise, revert to the logical geometry.
4997 	 * So, we need to save everything in temporary variables.
4998 	 */
4999 	sector_size = BE_16(page3p->data_bytes_sect);
5000 
5001 	/*
5002 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
5003 	 */
5004 	if (sector_size == 0) {
5005 		sector_size = (ISCD(un)) ? 2048 : un->un_sys_blocksize;
5006 	} else {
5007 		sector_size &= ~(un->un_sys_blocksize - 1);
5008 	}
5009 
5010 	nsect  = BE_16(page3p->sect_track);
5011 	intrlv = BE_16(page3p->interleave);
5012 
5013 	SD_INFO(SD_LOG_COMMON, un,
5014 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
5015 	SD_INFO(SD_LOG_COMMON, un,
5016 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
5017 	    page3p->mode_page.code, nsect, sector_size);
5018 	SD_INFO(SD_LOG_COMMON, un,
5019 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
5020 	    BE_16(page3p->track_skew),
5021 	    BE_16(page3p->cylinder_skew));
5022 
5023 
5024 	/*
5025 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
5026 	 */
5027 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
5028 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p4bufp,
5029 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag)
5030 	    != 0) {
5031 		SD_ERROR(SD_LOG_COMMON, un,
5032 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
5033 		goto page4_exit;
5034 	}
5035 
5036 	/*
5037 	 * Determine size of Block Descriptors in order to locate the mode
5038 	 * page data.  ATAPI devices return 0, SCSI devices should return
5039 	 * MODE_BLK_DESC_LENGTH.
5040 	 */
5041 	headerp = (struct mode_header *)p4bufp;
5042 	if (un->un_f_cfg_is_atapi == TRUE) {
5043 		struct mode_header_grp2 *mhp =
5044 		    (struct mode_header_grp2 *)headerp;
5045 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
5046 	} else {
5047 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
5048 	}
5049 
5050 	if (bd_len > MODE_BLK_DESC_LENGTH) {
5051 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
5052 		    "received unexpected bd_len of %d, page4\n", bd_len);
5053 		goto page4_exit;
5054 	}
5055 
5056 	page4p = (struct mode_geometry *)
5057 	    ((caddr_t)headerp + mode_header_length + bd_len);
5058 
5059 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
5060 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
5061 		    "mode sense pg4 code mismatch %d\n",
5062 		    page4p->mode_page.code);
5063 		goto page4_exit;
5064 	}
5065 
5066 	/*
5067 	 * Stash the data now, after we know that both commands completed.
5068 	 */
5069 
5070 	mutex_enter(SD_MUTEX(un));
5071 
5072 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
5073 	spc   = nhead * nsect;
5074 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
5075 	rpm   = BE_16(page4p->rpm);
5076 
5077 	modesense_capacity = spc * ncyl;
5078 
5079 	SD_INFO(SD_LOG_COMMON, un,
5080 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
5081 	SD_INFO(SD_LOG_COMMON, un,
5082 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
5083 	SD_INFO(SD_LOG_COMMON, un,
5084 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
5085 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
5086 	    (void *)pgeom_p, capacity);
5087 
5088 	/*
5089 	 * Compensate if the drive's geometry is not rectangular, i.e.,
5090 	 * the product of C * H * S returned by MODE SENSE >= that returned
5091 	 * by read capacity. This is an idiosyncrasy of the original x86
5092 	 * disk subsystem.
5093 	 */
5094 	if (modesense_capacity >= capacity) {
5095 		SD_INFO(SD_LOG_COMMON, un,
5096 		    "sd_get_physical_geometry: adjusting acyl; "
5097 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
5098 		    (modesense_capacity - capacity + spc - 1) / spc);
5099 		if (sector_size != 0) {
5100 			/* 1243403: NEC D38x7 drives don't support sec size */
5101 			pgeom_p->g_secsize = (unsigned short)sector_size;
5102 		}
5103 		pgeom_p->g_nsect    = (unsigned short)nsect;
5104 		pgeom_p->g_nhead    = (unsigned short)nhead;
5105 		pgeom_p->g_capacity = capacity;
5106 		pgeom_p->g_acyl	    =
5107 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
5108 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
5109 	}
5110 
5111 	pgeom_p->g_rpm    = (unsigned short)rpm;
5112 	pgeom_p->g_intrlv = (unsigned short)intrlv;
5113 
5114 	SD_INFO(SD_LOG_COMMON, un,
5115 	    "sd_get_physical_geometry: mode sense geometry:\n");
5116 	SD_INFO(SD_LOG_COMMON, un,
5117 	    "   nsect: %d; sector size: %d; interlv: %d\n",
5118 	    nsect, sector_size, intrlv);
5119 	SD_INFO(SD_LOG_COMMON, un,
5120 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
5121 	    nhead, ncyl, rpm, modesense_capacity);
5122 	SD_INFO(SD_LOG_COMMON, un,
5123 	    "sd_get_physical_geometry: (cached)\n");
5124 	SD_INFO(SD_LOG_COMMON, un,
5125 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5126 	    un->un_pgeom.g_ncyl,  un->un_pgeom.g_acyl,
5127 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
5128 	SD_INFO(SD_LOG_COMMON, un,
5129 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
5130 	    un->un_pgeom.g_secsize, un->un_pgeom.g_capacity,
5131 	    un->un_pgeom.g_intrlv, un->un_pgeom.g_rpm);
5132 
5133 	mutex_exit(SD_MUTEX(un));
5134 
5135 page4_exit:
5136 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
5137 page3_exit:
5138 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
5139 }
5140 
5141 
5142 /*
5143  *    Function: sd_get_virtual_geometry
5144  *
5145  * Description: Ask the controller to tell us about the target device.
5146  *
5147  *   Arguments: un - pointer to softstate
5148  *		capacity - disk capacity in #blocks
5149  *		lbasize - disk block size in bytes
5150  *
5151  *     Context: Kernel thread only
5152  */
5153 
5154 static void
5155 sd_get_virtual_geometry(struct sd_lun *un, int capacity, int lbasize)
5156 {
5157 	struct	geom_cache 	*lgeom_p = &un->un_lgeom;
5158 	uint_t	geombuf;
5159 	int	spc;
5160 
5161 	ASSERT(un != NULL);
5162 	ASSERT(mutex_owned(SD_MUTEX(un)));
5163 
5164 	mutex_exit(SD_MUTEX(un));
5165 
5166 	/* Set sector size, and total number of sectors */
5167 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
5168 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
5169 
5170 	/* Let the HBA tell us its geometry */
5171 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
5172 
5173 	mutex_enter(SD_MUTEX(un));
5174 
5175 	/* A value of -1 indicates an undefined "geometry" property */
5176 	if (geombuf == (-1)) {
5177 		return;
5178 	}
5179 
5180 	/* Initialize the logical geometry cache. */
5181 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
5182 	lgeom_p->g_nsect   = geombuf & 0xffff;
5183 	lgeom_p->g_secsize = un->un_sys_blocksize;
5184 
5185 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
5186 
5187 	/*
5188 	 * Note: The driver originally converted the capacity value from
5189 	 * target blocks to system blocks. However, the capacity value passed
5190 	 * to this routine is already in terms of system blocks (this scaling
5191 	 * is done when the READ CAPACITY command is issued and processed).
5192 	 * This 'error' may have gone undetected because the usage of g_ncyl
5193 	 * (which is based upon g_capacity) is very limited within the driver
5194 	 */
5195 	lgeom_p->g_capacity = capacity;
5196 
5197 	/*
5198 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
5199 	 * hba may return zero values if the device has been removed.
5200 	 */
5201 	if (spc == 0) {
5202 		lgeom_p->g_ncyl = 0;
5203 	} else {
5204 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
5205 	}
5206 	lgeom_p->g_acyl = 0;
5207 
5208 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
5209 	SD_INFO(SD_LOG_COMMON, un,
5210 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5211 	    un->un_lgeom.g_ncyl,  un->un_lgeom.g_acyl,
5212 	    un->un_lgeom.g_nhead, un->un_lgeom.g_nsect);
5213 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
5214 	    "intrlv: %d; rpm: %d\n", un->un_lgeom.g_secsize,
5215 	    un->un_lgeom.g_capacity, un->un_lgeom.g_intrlv, un->un_lgeom.g_rpm);
5216 }
5217 
5218 
5219 /*
5220  *    Function: sd_update_block_info
5221  *
5222  * Description: Calculate a byte count to sector count bitshift value
5223  *		from sector size.
5224  *
5225  *   Arguments: un: unit struct.
5226  *		lbasize: new target sector size
5227  *		capacity: new target capacity, ie. block count
5228  *
5229  *     Context: Kernel thread context
5230  */
5231 
5232 static void
5233 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
5234 {
5235 	if (lbasize != 0) {
5236 		un->un_tgt_blocksize = lbasize;
5237 		un->un_f_tgt_blocksize_is_valid	= TRUE;
5238 	}
5239 
5240 	if (capacity != 0) {
5241 		un->un_blockcount		= capacity;
5242 		un->un_f_blockcount_is_valid	= TRUE;
5243 	}
5244 }
5245 
5246 
5247 static void
5248 sd_swap_efi_gpt(efi_gpt_t *e)
5249 {
5250 	_NOTE(ASSUMING_PROTECTED(*e))
5251 	e->efi_gpt_Signature = LE_64(e->efi_gpt_Signature);
5252 	e->efi_gpt_Revision = LE_32(e->efi_gpt_Revision);
5253 	e->efi_gpt_HeaderSize = LE_32(e->efi_gpt_HeaderSize);
5254 	e->efi_gpt_HeaderCRC32 = LE_32(e->efi_gpt_HeaderCRC32);
5255 	e->efi_gpt_MyLBA = LE_64(e->efi_gpt_MyLBA);
5256 	e->efi_gpt_AlternateLBA = LE_64(e->efi_gpt_AlternateLBA);
5257 	e->efi_gpt_FirstUsableLBA = LE_64(e->efi_gpt_FirstUsableLBA);
5258 	e->efi_gpt_LastUsableLBA = LE_64(e->efi_gpt_LastUsableLBA);
5259 	UUID_LE_CONVERT(e->efi_gpt_DiskGUID, e->efi_gpt_DiskGUID);
5260 	e->efi_gpt_PartitionEntryLBA = LE_64(e->efi_gpt_PartitionEntryLBA);
5261 	e->efi_gpt_NumberOfPartitionEntries =
5262 	    LE_32(e->efi_gpt_NumberOfPartitionEntries);
5263 	e->efi_gpt_SizeOfPartitionEntry =
5264 	    LE_32(e->efi_gpt_SizeOfPartitionEntry);
5265 	e->efi_gpt_PartitionEntryArrayCRC32 =
5266 	    LE_32(e->efi_gpt_PartitionEntryArrayCRC32);
5267 }
5268 
5269 static void
5270 sd_swap_efi_gpe(int nparts, efi_gpe_t *p)
5271 {
5272 	int i;
5273 
5274 	_NOTE(ASSUMING_PROTECTED(*p))
5275 	for (i = 0; i < nparts; i++) {
5276 		UUID_LE_CONVERT(p[i].efi_gpe_PartitionTypeGUID,
5277 		    p[i].efi_gpe_PartitionTypeGUID);
5278 		p[i].efi_gpe_StartingLBA = LE_64(p[i].efi_gpe_StartingLBA);
5279 		p[i].efi_gpe_EndingLBA = LE_64(p[i].efi_gpe_EndingLBA);
5280 		/* PartitionAttrs */
5281 	}
5282 }
5283 
5284 static int
5285 sd_validate_efi(efi_gpt_t *labp)
5286 {
5287 	if (labp->efi_gpt_Signature != EFI_SIGNATURE)
5288 		return (EINVAL);
5289 	/* at least 96 bytes in this version of the spec. */
5290 	if (sizeof (efi_gpt_t) - sizeof (labp->efi_gpt_Reserved2) >
5291 	    labp->efi_gpt_HeaderSize)
5292 		return (EINVAL);
5293 	/* this should be 128 bytes */
5294 	if (labp->efi_gpt_SizeOfPartitionEntry != sizeof (efi_gpe_t))
5295 		return (EINVAL);
5296 	return (0);
5297 }
5298 
5299 static int
5300 sd_use_efi(struct sd_lun *un, int path_flag)
5301 {
5302 	int		i;
5303 	int		rval = 0;
5304 	efi_gpe_t	*partitions;
5305 	uchar_t		*buf;
5306 	uint_t		lbasize;
5307 	uint64_t	cap = 0;
5308 	uint_t		nparts;
5309 	diskaddr_t	gpe_lba;
5310 	struct uuid	uuid_type_reserved = EFI_RESERVED;
5311 
5312 	ASSERT(mutex_owned(SD_MUTEX(un)));
5313 	lbasize = un->un_tgt_blocksize;
5314 	un->un_reserved = -1;
5315 
5316 	mutex_exit(SD_MUTEX(un));
5317 
5318 	buf = kmem_zalloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
5319 
5320 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
5321 		rval = EINVAL;
5322 		goto done_err;
5323 	}
5324 
5325 	rval = sd_send_scsi_READ(un, buf, lbasize, 0, path_flag);
5326 	if (rval) {
5327 		goto done_err;
5328 	}
5329 	if (((struct dk_label *)buf)->dkl_magic == DKL_MAGIC) {
5330 		/* not ours */
5331 		rval = ESRCH;
5332 		goto done_err;
5333 	}
5334 
5335 	rval = sd_send_scsi_READ(un, buf, lbasize, 1, path_flag);
5336 	if (rval) {
5337 		goto done_err;
5338 	}
5339 	sd_swap_efi_gpt((efi_gpt_t *)buf);
5340 
5341 	if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0) {
5342 		/*
5343 		 * Couldn't read the primary, try the backup.  Our
5344 		 * capacity at this point could be based on CHS, so
5345 		 * check what the device reports.
5346 		 */
5347 		rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
5348 		    path_flag);
5349 		if (rval) {
5350 			goto done_err;
5351 		}
5352 
5353 		/*
5354 		 * The MMC standard allows READ CAPACITY to be
5355 		 * inaccurate by a bounded amount (in the interest of
5356 		 * response latency).  As a result, failed READs are
5357 		 * commonplace (due to the reading of metadata and not
5358 		 * data). Depending on the per-Vendor/drive Sense data,
5359 		 * the failed READ can cause many (unnecessary) retries.
5360 		 */
5361 		if ((rval = sd_send_scsi_READ(un, buf, lbasize,
5362 		    cap - 1, (ISCD(un)) ? SD_PATH_DIRECT_PRIORITY :
5363 			path_flag)) != 0) {
5364 				goto done_err;
5365 		}
5366 
5367 		sd_swap_efi_gpt((efi_gpt_t *)buf);
5368 		if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0) {
5369 
5370 			/*
5371 			 * Refer to comments related to off-by-1 at the
5372 			 * header of this file. Search the next to last
5373 			 * block for backup EFI label.
5374 			 */
5375 			if ((rval = sd_send_scsi_READ(un, buf, lbasize,
5376 			    cap - 2, (ISCD(un)) ? SD_PATH_DIRECT_PRIORITY :
5377 				path_flag)) != 0) {
5378 					goto done_err;
5379 			}
5380 			sd_swap_efi_gpt((efi_gpt_t *)buf);
5381 			if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0)
5382 				goto done_err;
5383 		}
5384 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5385 		    "primary label corrupt; using backup\n");
5386 	}
5387 
5388 	if (cap == 0)
5389 		rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
5390 		    path_flag);
5391 
5392 	nparts = ((efi_gpt_t *)buf)->efi_gpt_NumberOfPartitionEntries;
5393 	gpe_lba = ((efi_gpt_t *)buf)->efi_gpt_PartitionEntryLBA;
5394 
5395 	rval = sd_send_scsi_READ(un, buf, EFI_MIN_ARRAY_SIZE, gpe_lba,
5396 	    path_flag);
5397 	if (rval) {
5398 		goto done_err;
5399 	}
5400 	partitions = (efi_gpe_t *)buf;
5401 
5402 	if (nparts > MAXPART) {
5403 		nparts = MAXPART;
5404 	}
5405 	sd_swap_efi_gpe(nparts, partitions);
5406 
5407 	mutex_enter(SD_MUTEX(un));
5408 
5409 	/* Fill in partition table. */
5410 	for (i = 0; i < nparts; i++) {
5411 		if (partitions->efi_gpe_StartingLBA != 0 ||
5412 		    partitions->efi_gpe_EndingLBA != 0) {
5413 			un->un_map[i].dkl_cylno =
5414 			    partitions->efi_gpe_StartingLBA;
5415 			un->un_map[i].dkl_nblk =
5416 			    partitions->efi_gpe_EndingLBA -
5417 			    partitions->efi_gpe_StartingLBA + 1;
5418 			un->un_offset[i] =
5419 			    partitions->efi_gpe_StartingLBA;
5420 		}
5421 		if (un->un_reserved == -1) {
5422 			if (bcmp(&partitions->efi_gpe_PartitionTypeGUID,
5423 			    &uuid_type_reserved, sizeof (struct uuid)) == 0) {
5424 				un->un_reserved = i;
5425 			}
5426 		}
5427 		if (i == WD_NODE) {
5428 			/*
5429 			 * minor number 7 corresponds to the whole disk
5430 			 */
5431 			un->un_map[i].dkl_cylno = 0;
5432 			un->un_map[i].dkl_nblk = un->un_blockcount;
5433 			un->un_offset[i] = 0;
5434 		}
5435 		partitions++;
5436 	}
5437 	un->un_solaris_offset = 0;
5438 	un->un_solaris_size = cap;
5439 	un->un_f_geometry_is_valid = TRUE;
5440 
5441 	/* clear the vtoc label */
5442 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5443 
5444 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5445 	return (0);
5446 
5447 done_err:
5448 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5449 	mutex_enter(SD_MUTEX(un));
5450 	/*
5451 	 * if we didn't find something that could look like a VTOC
5452 	 * and the disk is over 1TB, we know there isn't a valid label.
5453 	 * Otherwise let sd_uselabel decide what to do.  We only
5454 	 * want to invalidate this if we're certain the label isn't
5455 	 * valid because sd_prop_op will now fail, which in turn
5456 	 * causes things like opens and stats on the partition to fail.
5457 	 */
5458 	if ((un->un_blockcount > DK_MAX_BLOCKS) && (rval != ESRCH)) {
5459 		un->un_f_geometry_is_valid = FALSE;
5460 	}
5461 	return (rval);
5462 }
5463 
5464 
5465 /*
5466  *    Function: sd_uselabel
5467  *
5468  * Description: Validate the disk label and update the relevant data (geometry,
5469  *		partition, vtoc, and capacity data) in the sd_lun struct.
5470  *		Marks the geometry of the unit as being valid.
5471  *
5472  *   Arguments: un: unit struct.
5473  *		dk_label: disk label
5474  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
5475  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
5476  *			to use the USCSI "direct" chain and bypass the normal
5477  *			command waitq.
5478  *
5479  * Return Code: SD_LABEL_IS_VALID: Label read from disk is OK; geometry,
5480  *		partition, vtoc, and capacity data are good.
5481  *
5482  *		SD_LABEL_IS_INVALID: Magic number or checksum error in the
5483  *		label; or computed capacity does not jibe with capacity
5484  *		reported from the READ CAPACITY command.
5485  *
5486  *     Context: Kernel thread only (can sleep).
5487  */
5488 
5489 static int
5490 sd_uselabel(struct sd_lun *un, struct dk_label *labp, int path_flag)
5491 {
5492 	short	*sp;
5493 	short	sum;
5494 	short	count;
5495 	int	label_error = SD_LABEL_IS_VALID;
5496 	int	i;
5497 	int	capacity;
5498 	int	part_end;
5499 	int	track_capacity;
5500 	int	err;
5501 #if defined(_SUNOS_VTOC_16)
5502 	struct	dkl_partition	*vpartp;
5503 #endif
5504 	ASSERT(un != NULL);
5505 	ASSERT(mutex_owned(SD_MUTEX(un)));
5506 
5507 	/* Validate the magic number of the label. */
5508 	if (labp->dkl_magic != DKL_MAGIC) {
5509 #if defined(__sparc)
5510 		if ((un->un_state == SD_STATE_NORMAL) &&
5511 			un->un_f_vtoc_errlog_supported) {
5512 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5513 			    "Corrupt label; wrong magic number\n");
5514 		}
5515 #endif
5516 		return (SD_LABEL_IS_INVALID);
5517 	}
5518 
5519 	/* Validate the checksum of the label. */
5520 	sp  = (short *)labp;
5521 	sum = 0;
5522 	count = sizeof (struct dk_label) / sizeof (short);
5523 	while (count--)	 {
5524 		sum ^= *sp++;
5525 	}
5526 
5527 	if (sum != 0) {
5528 #if	defined(_SUNOS_VTOC_16)
5529 		if ((un->un_state == SD_STATE_NORMAL) && !ISCD(un)) {
5530 #elif defined(_SUNOS_VTOC_8)
5531 		if ((un->un_state == SD_STATE_NORMAL) &&
5532 		    un->un_f_vtoc_errlog_supported) {
5533 #endif
5534 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5535 			    "Corrupt label - label checksum failed\n");
5536 		}
5537 		return (SD_LABEL_IS_INVALID);
5538 	}
5539 
5540 
5541 	/*
5542 	 * Fill in geometry structure with data from label.
5543 	 */
5544 	bzero(&un->un_g, sizeof (struct dk_geom));
5545 	un->un_g.dkg_ncyl   = labp->dkl_ncyl;
5546 	un->un_g.dkg_acyl   = labp->dkl_acyl;
5547 	un->un_g.dkg_bcyl   = 0;
5548 	un->un_g.dkg_nhead  = labp->dkl_nhead;
5549 	un->un_g.dkg_nsect  = labp->dkl_nsect;
5550 	un->un_g.dkg_intrlv = labp->dkl_intrlv;
5551 
5552 #if defined(_SUNOS_VTOC_8)
5553 	un->un_g.dkg_gap1   = labp->dkl_gap1;
5554 	un->un_g.dkg_gap2   = labp->dkl_gap2;
5555 	un->un_g.dkg_bhead  = labp->dkl_bhead;
5556 #endif
5557 #if defined(_SUNOS_VTOC_16)
5558 	un->un_dkg_skew = labp->dkl_skew;
5559 #endif
5560 
5561 #if defined(__i386) || defined(__amd64)
5562 	un->un_g.dkg_apc = labp->dkl_apc;
5563 #endif
5564 
5565 	/*
5566 	 * Currently we rely on the values in the label being accurate. If
5567 	 * dlk_rpm or dlk_pcly are zero in the label, use a default value.
5568 	 *
5569 	 * Note: In the future a MODE SENSE may be used to retrieve this data,
5570 	 * although this command is optional in SCSI-2.
5571 	 */
5572 	un->un_g.dkg_rpm  = (labp->dkl_rpm  != 0) ? labp->dkl_rpm  : 3600;
5573 	un->un_g.dkg_pcyl = (labp->dkl_pcyl != 0) ? labp->dkl_pcyl :
5574 	    (un->un_g.dkg_ncyl + un->un_g.dkg_acyl);
5575 
5576 	/*
5577 	 * The Read and Write reinstruct values may not be valid
5578 	 * for older disks.
5579 	 */
5580 	un->un_g.dkg_read_reinstruct  = labp->dkl_read_reinstruct;
5581 	un->un_g.dkg_write_reinstruct = labp->dkl_write_reinstruct;
5582 
5583 	/* Fill in partition table. */
5584 #if defined(_SUNOS_VTOC_8)
5585 	for (i = 0; i < NDKMAP; i++) {
5586 		un->un_map[i].dkl_cylno = labp->dkl_map[i].dkl_cylno;
5587 		un->un_map[i].dkl_nblk  = labp->dkl_map[i].dkl_nblk;
5588 	}
5589 #endif
5590 #if  defined(_SUNOS_VTOC_16)
5591 	vpartp		= labp->dkl_vtoc.v_part;
5592 	track_capacity	= labp->dkl_nhead * labp->dkl_nsect;
5593 
5594 	/* Prevent divide by zero */
5595 	if (track_capacity == 0) {
5596 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5597 		    "Corrupt label - zero nhead or nsect value\n");
5598 
5599 		return (SD_LABEL_IS_INVALID);
5600 	}
5601 
5602 	for (i = 0; i < NDKMAP; i++, vpartp++) {
5603 		un->un_map[i].dkl_cylno = vpartp->p_start / track_capacity;
5604 		un->un_map[i].dkl_nblk  = vpartp->p_size;
5605 	}
5606 #endif
5607 
5608 	/* Fill in VTOC Structure. */
5609 	bcopy(&labp->dkl_vtoc, &un->un_vtoc, sizeof (struct dk_vtoc));
5610 #if defined(_SUNOS_VTOC_8)
5611 	/*
5612 	 * The 8-slice vtoc does not include the ascii label; save it into
5613 	 * the device's soft state structure here.
5614 	 */
5615 	bcopy(labp->dkl_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
5616 #endif
5617 
5618 	/* Now look for a valid capacity. */
5619 	track_capacity	= (un->un_g.dkg_nhead * un->un_g.dkg_nsect);
5620 	capacity	= (un->un_g.dkg_ncyl  * track_capacity);
5621 
5622 	if (un->un_g.dkg_acyl) {
5623 #if defined(__i386) || defined(__amd64)
5624 		/* we may have > 1 alts cylinder */
5625 		capacity += (track_capacity * un->un_g.dkg_acyl);
5626 #else
5627 		capacity += track_capacity;
5628 #endif
5629 	}
5630 
5631 	/*
5632 	 * Force check here to ensure the computed capacity is valid.
5633 	 * If capacity is zero, it indicates an invalid label and
5634 	 * we should abort updating the relevant data then.
5635 	 */
5636 	if (capacity == 0) {
5637 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5638 		    "Corrupt label - no valid capacity could be retrieved\n");
5639 
5640 		return (SD_LABEL_IS_INVALID);
5641 	}
5642 
5643 	/* Mark the geometry as valid. */
5644 	un->un_f_geometry_is_valid = TRUE;
5645 
5646 	/*
5647 	 * At this point, un->un_blockcount should contain valid data from
5648 	 * the READ CAPACITY command.
5649 	 */
5650 	if (un->un_f_blockcount_is_valid != TRUE) {
5651 		/*
5652 		 * We have a situation where the target didn't give us a good
5653 		 * READ CAPACITY value, yet there appears to be a valid label.
5654 		 * In this case, we'll fake the capacity.
5655 		 */
5656 		un->un_blockcount = capacity;
5657 		un->un_f_blockcount_is_valid = TRUE;
5658 		goto done;
5659 	}
5660 
5661 
5662 	if ((capacity <= un->un_blockcount) ||
5663 	    (un->un_state != SD_STATE_NORMAL)) {
5664 #if defined(_SUNOS_VTOC_8)
5665 		/*
5666 		 * We can't let this happen on drives that are subdivided
5667 		 * into logical disks (i.e., that have an fdisk table).
5668 		 * The un_blockcount field should always hold the full media
5669 		 * size in sectors, period.  This code would overwrite
5670 		 * un_blockcount with the size of the Solaris fdisk partition.
5671 		 */
5672 		SD_ERROR(SD_LOG_COMMON, un,
5673 		    "sd_uselabel: Label %d blocks; Drive %d blocks\n",
5674 		    capacity, un->un_blockcount);
5675 		un->un_blockcount = capacity;
5676 		un->un_f_blockcount_is_valid = TRUE;
5677 #endif	/* defined(_SUNOS_VTOC_8) */
5678 		goto done;
5679 	}
5680 
5681 	if (ISCD(un)) {
5682 		/* For CDROMs, we trust that the data in the label is OK. */
5683 #if defined(_SUNOS_VTOC_8)
5684 		for (i = 0; i < NDKMAP; i++) {
5685 			part_end = labp->dkl_nhead * labp->dkl_nsect *
5686 			    labp->dkl_map[i].dkl_cylno +
5687 			    labp->dkl_map[i].dkl_nblk  - 1;
5688 
5689 			if ((labp->dkl_map[i].dkl_nblk) &&
5690 			    (part_end > un->un_blockcount)) {
5691 				un->un_f_geometry_is_valid = FALSE;
5692 				break;
5693 			}
5694 		}
5695 #endif
5696 #if defined(_SUNOS_VTOC_16)
5697 		vpartp = &(labp->dkl_vtoc.v_part[0]);
5698 		for (i = 0; i < NDKMAP; i++, vpartp++) {
5699 			part_end = vpartp->p_start + vpartp->p_size;
5700 			if ((vpartp->p_size > 0) &&
5701 			    (part_end > un->un_blockcount)) {
5702 				un->un_f_geometry_is_valid = FALSE;
5703 				break;
5704 			}
5705 		}
5706 #endif
5707 	} else {
5708 		uint64_t t_capacity;
5709 		uint32_t t_lbasize;
5710 
5711 		mutex_exit(SD_MUTEX(un));
5712 		err = sd_send_scsi_READ_CAPACITY(un, &t_capacity, &t_lbasize,
5713 		    path_flag);
5714 		ASSERT(t_capacity <= DK_MAX_BLOCKS);
5715 		mutex_enter(SD_MUTEX(un));
5716 
5717 		if (err == 0) {
5718 			sd_update_block_info(un, t_lbasize, t_capacity);
5719 		}
5720 
5721 		if (capacity > un->un_blockcount) {
5722 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5723 			    "Corrupt label - bad geometry\n");
5724 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
5725 			    "Label says %u blocks; Drive says %llu blocks\n",
5726 			    capacity, (unsigned long long)un->un_blockcount);
5727 			un->un_f_geometry_is_valid = FALSE;
5728 			label_error = SD_LABEL_IS_INVALID;
5729 		}
5730 	}
5731 
5732 done:
5733 
5734 	SD_INFO(SD_LOG_COMMON, un, "sd_uselabel: (label geometry)\n");
5735 	SD_INFO(SD_LOG_COMMON, un,
5736 	    "   ncyl: %d; acyl: %d; nhead: %d; nsect: %d\n",
5737 	    un->un_g.dkg_ncyl,  un->un_g.dkg_acyl,
5738 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5739 	SD_INFO(SD_LOG_COMMON, un,
5740 	    "   lbasize: %d; capacity: %d; intrlv: %d; rpm: %d\n",
5741 	    un->un_tgt_blocksize, un->un_blockcount,
5742 	    un->un_g.dkg_intrlv, un->un_g.dkg_rpm);
5743 	SD_INFO(SD_LOG_COMMON, un, "   wrt_reinstr: %d; rd_reinstr: %d\n",
5744 	    un->un_g.dkg_write_reinstruct, un->un_g.dkg_read_reinstruct);
5745 
5746 	ASSERT(mutex_owned(SD_MUTEX(un)));
5747 
5748 	return (label_error);
5749 }
5750 
5751 
5752 /*
5753  *    Function: sd_build_default_label
5754  *
5755  * Description: Generate a default label for those devices that do not have
5756  *		one, e.g., new media, removable cartridges, etc..
5757  *
5758  *     Context: Kernel thread only
5759  */
5760 
5761 static void
5762 sd_build_default_label(struct sd_lun *un)
5763 {
5764 #if defined(_SUNOS_VTOC_16)
5765 	uint_t	phys_spc;
5766 	uint_t	disksize;
5767 	struct	dk_geom un_g;
5768 	uint64_t capacity;
5769 #endif
5770 
5771 	ASSERT(un != NULL);
5772 	ASSERT(mutex_owned(SD_MUTEX(un)));
5773 
5774 #if defined(_SUNOS_VTOC_8)
5775 	/*
5776 	 * Note: This is a legacy check for non-removable devices on VTOC_8
5777 	 * only. This may be a valid check for VTOC_16 as well.
5778 	 * Once we understand why there is this difference between SPARC and
5779 	 * x86 platform, we could remove this legacy check.
5780 	 */
5781 	ASSERT(un->un_f_default_vtoc_supported);
5782 #endif
5783 
5784 	bzero(&un->un_g, sizeof (struct dk_geom));
5785 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5786 	bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
5787 
5788 #if defined(_SUNOS_VTOC_8)
5789 
5790 	/*
5791 	 * It's a REMOVABLE media, therefore no label (on sparc, anyway).
5792 	 * But it is still necessary to set up various geometry information,
5793 	 * and we are doing this here.
5794 	 */
5795 
5796 	/*
5797 	 * For the rpm, we use the minimum for the disk.  For the head, cyl,
5798 	 * and number of sector per track, if the capacity <= 1GB, head = 64,
5799 	 * sect = 32.  else head = 255, sect 63 Note: the capacity should be
5800 	 * equal to C*H*S values.  This will cause some truncation of size due
5801 	 * to round off errors. For CD-ROMs, this truncation can have adverse
5802 	 * side effects, so returning ncyl and nhead as 1. The nsect will
5803 	 * overflow for most of CD-ROMs as nsect is of type ushort. (4190569)
5804 	 */
5805 	if (ISCD(un)) {
5806 		/*
5807 		 * Preserve the old behavior for non-writable
5808 		 * medias. Since dkg_nsect is a ushort, it
5809 		 * will lose bits as cdroms have more than
5810 		 * 65536 sectors. So if we recalculate
5811 		 * capacity, it will become much shorter.
5812 		 * But the dkg_* information is not
5813 		 * used for CDROMs so it is OK. But for
5814 		 * Writable CDs we need this information
5815 		 * to be valid (for newfs say). So we
5816 		 * make nsect and nhead > 1 that way
5817 		 * nsect can still stay within ushort limit
5818 		 * without losing any bits.
5819 		 */
5820 		if (un->un_f_mmc_writable_media == TRUE) {
5821 			un->un_g.dkg_nhead = 64;
5822 			un->un_g.dkg_nsect = 32;
5823 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
5824 			un->un_blockcount = un->un_g.dkg_ncyl *
5825 			    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5826 		} else {
5827 			un->un_g.dkg_ncyl  = 1;
5828 			un->un_g.dkg_nhead = 1;
5829 			un->un_g.dkg_nsect = un->un_blockcount;
5830 		}
5831 	} else {
5832 		if (un->un_blockcount <= 0x1000) {
5833 			/* unlabeled SCSI floppy device */
5834 			un->un_g.dkg_nhead = 2;
5835 			un->un_g.dkg_ncyl = 80;
5836 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
5837 		} else if (un->un_blockcount <= 0x200000) {
5838 			un->un_g.dkg_nhead = 64;
5839 			un->un_g.dkg_nsect = 32;
5840 			un->un_g.dkg_ncyl  = un->un_blockcount / (64 * 32);
5841 		} else {
5842 			un->un_g.dkg_nhead = 255;
5843 			un->un_g.dkg_nsect = 63;
5844 			un->un_g.dkg_ncyl  = un->un_blockcount / (255 * 63);
5845 		}
5846 		un->un_blockcount =
5847 		    un->un_g.dkg_ncyl * un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5848 	}
5849 
5850 	un->un_g.dkg_acyl	= 0;
5851 	un->un_g.dkg_bcyl	= 0;
5852 	un->un_g.dkg_rpm	= 200;
5853 	un->un_asciilabel[0]	= '\0';
5854 	un->un_g.dkg_pcyl	= un->un_g.dkg_ncyl;
5855 
5856 	un->un_map[0].dkl_cylno = 0;
5857 	un->un_map[0].dkl_nblk  = un->un_blockcount;
5858 	un->un_map[2].dkl_cylno = 0;
5859 	un->un_map[2].dkl_nblk  = un->un_blockcount;
5860 
5861 #elif defined(_SUNOS_VTOC_16)
5862 
5863 	if (un->un_solaris_size == 0) {
5864 		/*
5865 		 * Got fdisk table but no solaris entry therefore
5866 		 * don't create a default label
5867 		 */
5868 		un->un_f_geometry_is_valid = TRUE;
5869 		return;
5870 	}
5871 
5872 	/*
5873 	 * For CDs we continue to use the physical geometry to calculate
5874 	 * number of cylinders. All other devices must convert the
5875 	 * physical geometry (geom_cache) to values that will fit
5876 	 * in a dk_geom structure.
5877 	 */
5878 	if (ISCD(un)) {
5879 		phys_spc = un->un_pgeom.g_nhead * un->un_pgeom.g_nsect;
5880 	} else {
5881 		/* Convert physical geometry to disk geometry */
5882 		bzero(&un_g, sizeof (struct dk_geom));
5883 
5884 		/*
5885 		 * Refer to comments related to off-by-1 at the
5886 		 * header of this file.
5887 		 * Before caculating geometry, capacity should be
5888 		 * decreased by 1. That un_f_capacity_adjusted is
5889 		 * TRUE means that we are treating a 1TB disk as
5890 		 * (1T - 512)B. And the capacity of disks is already
5891 		 * decreased by 1.
5892 		 */
5893 		if (!un->un_f_capacity_adjusted &&
5894 		    !un->un_f_has_removable_media &&
5895 		    !un->un_f_is_hotpluggable &&
5896 			un->un_tgt_blocksize == un->un_sys_blocksize)
5897 			capacity = un->un_blockcount - 1;
5898 		else
5899 			capacity = un->un_blockcount;
5900 
5901 		sd_convert_geometry(capacity, &un_g);
5902 		bcopy(&un_g, &un->un_g, sizeof (un->un_g));
5903 		phys_spc = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5904 	}
5905 
5906 	ASSERT(phys_spc != 0);
5907 	un->un_g.dkg_pcyl = un->un_solaris_size / phys_spc;
5908 	un->un_g.dkg_acyl = DK_ACYL;
5909 	un->un_g.dkg_ncyl = un->un_g.dkg_pcyl - DK_ACYL;
5910 	disksize = un->un_g.dkg_ncyl * phys_spc;
5911 
5912 	if (ISCD(un)) {
5913 		/*
5914 		 * CD's don't use the "heads * sectors * cyls"-type of
5915 		 * geometry, but instead use the entire capacity of the media.
5916 		 */
5917 		disksize = un->un_solaris_size;
5918 		un->un_g.dkg_nhead = 1;
5919 		un->un_g.dkg_nsect = 1;
5920 		un->un_g.dkg_rpm =
5921 		    (un->un_pgeom.g_rpm == 0) ? 200 : un->un_pgeom.g_rpm;
5922 
5923 		un->un_vtoc.v_part[0].p_start = 0;
5924 		un->un_vtoc.v_part[0].p_size  = disksize;
5925 		un->un_vtoc.v_part[0].p_tag   = V_BACKUP;
5926 		un->un_vtoc.v_part[0].p_flag  = V_UNMNT;
5927 
5928 		un->un_map[0].dkl_cylno = 0;
5929 		un->un_map[0].dkl_nblk  = disksize;
5930 		un->un_offset[0] = 0;
5931 
5932 	} else {
5933 		/*
5934 		 * Hard disks and removable media cartridges
5935 		 */
5936 		un->un_g.dkg_rpm =
5937 		    (un->un_pgeom.g_rpm == 0) ? 3600: un->un_pgeom.g_rpm;
5938 		un->un_vtoc.v_sectorsz = un->un_sys_blocksize;
5939 
5940 		/* Add boot slice */
5941 		un->un_vtoc.v_part[8].p_start = 0;
5942 		un->un_vtoc.v_part[8].p_size  = phys_spc;
5943 		un->un_vtoc.v_part[8].p_tag   = V_BOOT;
5944 		un->un_vtoc.v_part[8].p_flag  = V_UNMNT;
5945 
5946 		un->un_map[8].dkl_cylno = 0;
5947 		un->un_map[8].dkl_nblk  = phys_spc;
5948 		un->un_offset[8] = 0;
5949 	}
5950 
5951 	un->un_g.dkg_apc = 0;
5952 	un->un_vtoc.v_nparts = V_NUMPAR;
5953 	un->un_vtoc.v_version = V_VERSION;
5954 
5955 	/* Add backup slice */
5956 	un->un_vtoc.v_part[2].p_start = 0;
5957 	un->un_vtoc.v_part[2].p_size  = disksize;
5958 	un->un_vtoc.v_part[2].p_tag   = V_BACKUP;
5959 	un->un_vtoc.v_part[2].p_flag  = V_UNMNT;
5960 
5961 	un->un_map[2].dkl_cylno = 0;
5962 	un->un_map[2].dkl_nblk  = disksize;
5963 	un->un_offset[2] = 0;
5964 
5965 	(void) sprintf(un->un_vtoc.v_asciilabel, "DEFAULT cyl %d alt %d"
5966 	    " hd %d sec %d", un->un_g.dkg_ncyl, un->un_g.dkg_acyl,
5967 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5968 
5969 #else
5970 #error "No VTOC format defined."
5971 #endif
5972 
5973 	un->un_g.dkg_read_reinstruct  = 0;
5974 	un->un_g.dkg_write_reinstruct = 0;
5975 
5976 	un->un_g.dkg_intrlv = 1;
5977 
5978 	un->un_vtoc.v_sanity  = VTOC_SANE;
5979 
5980 	un->un_f_geometry_is_valid = TRUE;
5981 
5982 	SD_INFO(SD_LOG_COMMON, un,
5983 	    "sd_build_default_label: Default label created: "
5984 	    "cyl: %d\tacyl: %d\tnhead: %d\tnsect: %d\tcap: %d\n",
5985 	    un->un_g.dkg_ncyl, un->un_g.dkg_acyl, un->un_g.dkg_nhead,
5986 	    un->un_g.dkg_nsect, un->un_blockcount);
5987 }
5988 
5989 
5990 #if defined(_FIRMWARE_NEEDS_FDISK)
5991 /*
5992  * Max CHS values, as they are encoded into bytes, for 1022/254/63
5993  */
5994 #define	LBA_MAX_SECT	(63 | ((1022 & 0x300) >> 2))
5995 #define	LBA_MAX_CYL	(1022 & 0xFF)
5996 #define	LBA_MAX_HEAD	(254)
5997 
5998 
5999 /*
6000  *    Function: sd_has_max_chs_vals
6001  *
6002  * Description: Return TRUE if Cylinder-Head-Sector values are all at maximum.
6003  *
6004  *   Arguments: fdp - ptr to CHS info
6005  *
6006  * Return Code: True or false
6007  *
6008  *     Context: Any.
6009  */
6010 
6011 static int
6012 sd_has_max_chs_vals(struct ipart *fdp)
6013 {
6014 	return ((fdp->begcyl  == LBA_MAX_CYL)	&&
6015 	    (fdp->beghead == LBA_MAX_HEAD)	&&
6016 	    (fdp->begsect == LBA_MAX_SECT)	&&
6017 	    (fdp->endcyl  == LBA_MAX_CYL)	&&
6018 	    (fdp->endhead == LBA_MAX_HEAD)	&&
6019 	    (fdp->endsect == LBA_MAX_SECT));
6020 }
6021 #endif
6022 
6023 
6024 /*
6025  *    Function: sd_inq_fill
6026  *
6027  * Description: Print a piece of inquiry data, cleaned up for non-printable
6028  *		characters and stopping at the first space character after
6029  *		the beginning of the passed string;
6030  *
6031  *   Arguments: p - source string
6032  *		l - maximum length to copy
6033  *		s - destination string
6034  *
6035  *     Context: Any.
6036  */
6037 
6038 static void
6039 sd_inq_fill(char *p, int l, char *s)
6040 {
6041 	unsigned i = 0;
6042 	char c;
6043 
6044 	while (i++ < l) {
6045 		if ((c = *p++) < ' ' || c >= 0x7F) {
6046 			c = '*';
6047 		} else if (i != 1 && c == ' ') {
6048 			break;
6049 		}
6050 		*s++ = c;
6051 	}
6052 	*s++ = 0;
6053 }
6054 
6055 
6056 /*
6057  *    Function: sd_register_devid
6058  *
6059  * Description: This routine will obtain the device id information from the
6060  *		target, obtain the serial number, and register the device
6061  *		id with the ddi framework.
6062  *
6063  *   Arguments: devi - the system's dev_info_t for the device.
6064  *		un - driver soft state (unit) structure
6065  *		reservation_flag - indicates if a reservation conflict
6066  *		occurred during attach
6067  *
6068  *     Context: Kernel Thread
6069  */
6070 static void
6071 sd_register_devid(struct sd_lun *un, dev_info_t *devi, int reservation_flag)
6072 {
6073 	int		rval		= 0;
6074 	uchar_t		*inq80		= NULL;
6075 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
6076 	size_t		inq80_resid	= 0;
6077 	uchar_t		*inq83		= NULL;
6078 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
6079 	size_t		inq83_resid	= 0;
6080 
6081 	ASSERT(un != NULL);
6082 	ASSERT(mutex_owned(SD_MUTEX(un)));
6083 	ASSERT((SD_DEVINFO(un)) == devi);
6084 
6085 	/*
6086 	 * This is the case of antiquated Sun disk drives that have the
6087 	 * FAB_DEVID property set in the disk_table.  These drives
6088 	 * manage the devid's by storing them in last 2 available sectors
6089 	 * on the drive and have them fabricated by the ddi layer by calling
6090 	 * ddi_devid_init and passing the DEVID_FAB flag.
6091 	 */
6092 	if (un->un_f_opt_fab_devid == TRUE) {
6093 		/*
6094 		 * Depending on EINVAL isn't reliable, since a reserved disk
6095 		 * may result in invalid geometry, so check to make sure a
6096 		 * reservation conflict did not occur during attach.
6097 		 */
6098 		if ((sd_get_devid(un) == EINVAL) &&
6099 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
6100 			/*
6101 			 * The devid is invalid AND there is no reservation
6102 			 * conflict.  Fabricate a new devid.
6103 			 */
6104 			(void) sd_create_devid(un);
6105 		}
6106 
6107 		/* Register the devid if it exists */
6108 		if (un->un_devid != NULL) {
6109 			(void) ddi_devid_register(SD_DEVINFO(un),
6110 			    un->un_devid);
6111 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6112 			    "sd_register_devid: Devid Fabricated\n");
6113 		}
6114 		return;
6115 	}
6116 
6117 	/*
6118 	 * We check the availibility of the World Wide Name (0x83) and Unit
6119 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
6120 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
6121 	 * 0x83 is availible, that is the best choice.  Our next choice is
6122 	 * 0x80.  If neither are availible, we munge the devid from the device
6123 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
6124 	 * to fabricate a devid for non-Sun qualified disks.
6125 	 */
6126 	if (sd_check_vpd_page_support(un) == 0) {
6127 		/* collect page 80 data if available */
6128 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
6129 
6130 			mutex_exit(SD_MUTEX(un));
6131 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
6132 			rval = sd_send_scsi_INQUIRY(un, inq80, inq80_len,
6133 			    0x01, 0x80, &inq80_resid);
6134 
6135 			if (rval != 0) {
6136 				kmem_free(inq80, inq80_len);
6137 				inq80 = NULL;
6138 				inq80_len = 0;
6139 			}
6140 			mutex_enter(SD_MUTEX(un));
6141 		}
6142 
6143 		/* collect page 83 data if available */
6144 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
6145 			mutex_exit(SD_MUTEX(un));
6146 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
6147 			rval = sd_send_scsi_INQUIRY(un, inq83, inq83_len,
6148 			    0x01, 0x83, &inq83_resid);
6149 
6150 			if (rval != 0) {
6151 				kmem_free(inq83, inq83_len);
6152 				inq83 = NULL;
6153 				inq83_len = 0;
6154 			}
6155 			mutex_enter(SD_MUTEX(un));
6156 		}
6157 	}
6158 
6159 	/* encode best devid possible based on data available */
6160 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
6161 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
6162 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
6163 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
6164 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
6165 
6166 		/* devid successfully encoded, register devid */
6167 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
6168 
6169 	} else {
6170 		/*
6171 		 * Unable to encode a devid based on data available.
6172 		 * This is not a Sun qualified disk.  Older Sun disk
6173 		 * drives that have the SD_FAB_DEVID property
6174 		 * set in the disk_table and non Sun qualified
6175 		 * disks are treated in the same manner.  These
6176 		 * drives manage the devid's by storing them in
6177 		 * last 2 available sectors on the drive and
6178 		 * have them fabricated by the ddi layer by
6179 		 * calling ddi_devid_init and passing the
6180 		 * DEVID_FAB flag.
6181 		 * Create a fabricate devid only if there's no
6182 		 * fabricate devid existed.
6183 		 */
6184 		if (sd_get_devid(un) == EINVAL) {
6185 			(void) sd_create_devid(un);
6186 		}
6187 		un->un_f_opt_fab_devid = TRUE;
6188 
6189 		/* Register the devid if it exists */
6190 		if (un->un_devid != NULL) {
6191 			(void) ddi_devid_register(SD_DEVINFO(un),
6192 			    un->un_devid);
6193 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6194 			    "sd_register_devid: devid fabricated using "
6195 			    "ddi framework\n");
6196 		}
6197 	}
6198 
6199 	/* clean up resources */
6200 	if (inq80 != NULL) {
6201 		kmem_free(inq80, inq80_len);
6202 	}
6203 	if (inq83 != NULL) {
6204 		kmem_free(inq83, inq83_len);
6205 	}
6206 }
6207 
6208 static daddr_t
6209 sd_get_devid_block(struct sd_lun *un)
6210 {
6211 	daddr_t			spc, blk, head, cyl;
6212 
6213 	if ((un->un_f_geometry_is_valid == FALSE) ||
6214 	    (un->un_solaris_size < DK_LABEL_LOC))
6215 		return (-1);
6216 
6217 	if (un->un_vtoc.v_sanity != VTOC_SANE) {
6218 		/* EFI labeled */
6219 		if (un->un_reserved != -1) {
6220 			blk = un->un_map[un->un_reserved].dkl_cylno;
6221 		} else {
6222 			return (-1);
6223 		}
6224 	} else {
6225 		/* SMI labeled */
6226 		/* this geometry doesn't allow us to write a devid */
6227 		if (un->un_g.dkg_acyl < 2) {
6228 			return (-1);
6229 		}
6230 
6231 		/*
6232 		 * Subtract 2 guarantees that the next to last cylinder
6233 		 * is used
6234 		 */
6235 		cyl  = un->un_g.dkg_ncyl  + un->un_g.dkg_acyl - 2;
6236 		spc  = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
6237 		head = un->un_g.dkg_nhead - 1;
6238 		blk  = (cyl * (spc - un->un_g.dkg_apc)) +
6239 		    (head * un->un_g.dkg_nsect) + 1;
6240 	}
6241 	return (blk);
6242 }
6243 
6244 /*
6245  *    Function: sd_get_devid
6246  *
6247  * Description: This routine will return 0 if a valid device id has been
6248  *		obtained from the target and stored in the soft state. If a
6249  *		valid device id has not been previously read and stored, a
6250  *		read attempt will be made.
6251  *
6252  *   Arguments: un - driver soft state (unit) structure
6253  *
6254  * Return Code: 0 if we successfully get the device id
6255  *
6256  *     Context: Kernel Thread
6257  */
6258 
6259 static int
6260 sd_get_devid(struct sd_lun *un)
6261 {
6262 	struct dk_devid		*dkdevid;
6263 	ddi_devid_t		tmpid;
6264 	uint_t			*ip;
6265 	size_t			sz;
6266 	daddr_t			blk;
6267 	int			status;
6268 	int			chksum;
6269 	int			i;
6270 	size_t			buffer_size;
6271 
6272 	ASSERT(un != NULL);
6273 	ASSERT(mutex_owned(SD_MUTEX(un)));
6274 
6275 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
6276 	    un);
6277 
6278 	if (un->un_devid != NULL) {
6279 		return (0);
6280 	}
6281 
6282 	blk = sd_get_devid_block(un);
6283 	if (blk < 0)
6284 		return (EINVAL);
6285 
6286 	/*
6287 	 * Read and verify device id, stored in the reserved cylinders at the
6288 	 * end of the disk. Backup label is on the odd sectors of the last
6289 	 * track of the last cylinder. Device id will be on track of the next
6290 	 * to last cylinder.
6291 	 */
6292 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
6293 	mutex_exit(SD_MUTEX(un));
6294 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
6295 	status = sd_send_scsi_READ(un, dkdevid, buffer_size, blk,
6296 	    SD_PATH_DIRECT);
6297 	if (status != 0) {
6298 		goto error;
6299 	}
6300 
6301 	/* Validate the revision */
6302 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
6303 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
6304 		status = EINVAL;
6305 		goto error;
6306 	}
6307 
6308 	/* Calculate the checksum */
6309 	chksum = 0;
6310 	ip = (uint_t *)dkdevid;
6311 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6312 	    i++) {
6313 		chksum ^= ip[i];
6314 	}
6315 
6316 	/* Compare the checksums */
6317 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
6318 		status = EINVAL;
6319 		goto error;
6320 	}
6321 
6322 	/* Validate the device id */
6323 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
6324 		status = EINVAL;
6325 		goto error;
6326 	}
6327 
6328 	/*
6329 	 * Store the device id in the driver soft state
6330 	 */
6331 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
6332 	tmpid = kmem_alloc(sz, KM_SLEEP);
6333 
6334 	mutex_enter(SD_MUTEX(un));
6335 
6336 	un->un_devid = tmpid;
6337 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
6338 
6339 	kmem_free(dkdevid, buffer_size);
6340 
6341 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
6342 
6343 	return (status);
6344 error:
6345 	mutex_enter(SD_MUTEX(un));
6346 	kmem_free(dkdevid, buffer_size);
6347 	return (status);
6348 }
6349 
6350 
6351 /*
6352  *    Function: sd_create_devid
6353  *
6354  * Description: This routine will fabricate the device id and write it
6355  *		to the disk.
6356  *
6357  *   Arguments: un - driver soft state (unit) structure
6358  *
6359  * Return Code: value of the fabricated device id
6360  *
6361  *     Context: Kernel Thread
6362  */
6363 
6364 static ddi_devid_t
6365 sd_create_devid(struct sd_lun *un)
6366 {
6367 	ASSERT(un != NULL);
6368 
6369 	/* Fabricate the devid */
6370 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
6371 	    == DDI_FAILURE) {
6372 		return (NULL);
6373 	}
6374 
6375 	/* Write the devid to disk */
6376 	if (sd_write_deviceid(un) != 0) {
6377 		ddi_devid_free(un->un_devid);
6378 		un->un_devid = NULL;
6379 	}
6380 
6381 	return (un->un_devid);
6382 }
6383 
6384 
6385 /*
6386  *    Function: sd_write_deviceid
6387  *
6388  * Description: This routine will write the device id to the disk
6389  *		reserved sector.
6390  *
6391  *   Arguments: un - driver soft state (unit) structure
6392  *
6393  * Return Code: EINVAL
6394  *		value returned by sd_send_scsi_cmd
6395  *
6396  *     Context: Kernel Thread
6397  */
6398 
6399 static int
6400 sd_write_deviceid(struct sd_lun *un)
6401 {
6402 	struct dk_devid		*dkdevid;
6403 	daddr_t			blk;
6404 	uint_t			*ip, chksum;
6405 	int			status;
6406 	int			i;
6407 
6408 	ASSERT(mutex_owned(SD_MUTEX(un)));
6409 
6410 	blk = sd_get_devid_block(un);
6411 	if (blk < 0)
6412 		return (-1);
6413 	mutex_exit(SD_MUTEX(un));
6414 
6415 	/* Allocate the buffer */
6416 	dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
6417 
6418 	/* Fill in the revision */
6419 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
6420 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
6421 
6422 	/* Copy in the device id */
6423 	mutex_enter(SD_MUTEX(un));
6424 	bcopy(un->un_devid, &dkdevid->dkd_devid,
6425 	    ddi_devid_sizeof(un->un_devid));
6426 	mutex_exit(SD_MUTEX(un));
6427 
6428 	/* Calculate the checksum */
6429 	chksum = 0;
6430 	ip = (uint_t *)dkdevid;
6431 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6432 	    i++) {
6433 		chksum ^= ip[i];
6434 	}
6435 
6436 	/* Fill-in checksum */
6437 	DKD_FORMCHKSUM(chksum, dkdevid);
6438 
6439 	/* Write the reserved sector */
6440 	status = sd_send_scsi_WRITE(un, dkdevid, un->un_sys_blocksize, blk,
6441 	    SD_PATH_DIRECT);
6442 
6443 	kmem_free(dkdevid, un->un_sys_blocksize);
6444 
6445 	mutex_enter(SD_MUTEX(un));
6446 	return (status);
6447 }
6448 
6449 
6450 /*
6451  *    Function: sd_check_vpd_page_support
6452  *
6453  * Description: This routine sends an inquiry command with the EVPD bit set and
6454  *		a page code of 0x00 to the device. It is used to determine which
6455  *		vital product pages are availible to find the devid. We are
6456  *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
6457  *		device does not support that command.
6458  *
6459  *   Arguments: un  - driver soft state (unit) structure
6460  *
6461  * Return Code: 0 - success
6462  *		1 - check condition
6463  *
6464  *     Context: This routine can sleep.
6465  */
6466 
6467 static int
6468 sd_check_vpd_page_support(struct sd_lun *un)
6469 {
6470 	uchar_t	*page_list	= NULL;
6471 	uchar_t	page_length	= 0xff;	/* Use max possible length */
6472 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
6473 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
6474 	int    	rval		= 0;
6475 	int	counter;
6476 
6477 	ASSERT(un != NULL);
6478 	ASSERT(mutex_owned(SD_MUTEX(un)));
6479 
6480 	mutex_exit(SD_MUTEX(un));
6481 
6482 	/*
6483 	 * We'll set the page length to the maximum to save figuring it out
6484 	 * with an additional call.
6485 	 */
6486 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
6487 
6488 	rval = sd_send_scsi_INQUIRY(un, page_list, page_length, evpd,
6489 	    page_code, NULL);
6490 
6491 	mutex_enter(SD_MUTEX(un));
6492 
6493 	/*
6494 	 * Now we must validate that the device accepted the command, as some
6495 	 * drives do not support it.  If the drive does support it, we will
6496 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
6497 	 * not, we return -1.
6498 	 */
6499 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
6500 		/* Loop to find one of the 2 pages we need */
6501 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
6502 
6503 		/*
6504 		 * Pages are returned in ascending order, and 0x83 is what we
6505 		 * are hoping for.
6506 		 */
6507 		while ((page_list[counter] <= 0x83) &&
6508 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
6509 		    VPD_HEAD_OFFSET))) {
6510 			/*
6511 			 * Add 3 because page_list[3] is the number of
6512 			 * pages minus 3
6513 			 */
6514 
6515 			switch (page_list[counter]) {
6516 			case 0x00:
6517 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
6518 				break;
6519 			case 0x80:
6520 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
6521 				break;
6522 			case 0x81:
6523 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
6524 				break;
6525 			case 0x82:
6526 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
6527 				break;
6528 			case 0x83:
6529 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
6530 				break;
6531 			}
6532 			counter++;
6533 		}
6534 
6535 	} else {
6536 		rval = -1;
6537 
6538 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6539 		    "sd_check_vpd_page_support: This drive does not implement "
6540 		    "VPD pages.\n");
6541 	}
6542 
6543 	kmem_free(page_list, page_length);
6544 
6545 	return (rval);
6546 }
6547 
6548 
6549 /*
6550  *    Function: sd_setup_pm
6551  *
6552  * Description: Initialize Power Management on the device
6553  *
6554  *     Context: Kernel Thread
6555  */
6556 
6557 static void
6558 sd_setup_pm(struct sd_lun *un, dev_info_t *devi)
6559 {
6560 	uint_t	log_page_size;
6561 	uchar_t	*log_page_data;
6562 	int	rval;
6563 
6564 	/*
6565 	 * Since we are called from attach, holding a mutex for
6566 	 * un is unnecessary. Because some of the routines called
6567 	 * from here require SD_MUTEX to not be held, assert this
6568 	 * right up front.
6569 	 */
6570 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6571 	/*
6572 	 * Since the sd device does not have the 'reg' property,
6573 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
6574 	 * The following code is to tell cpr that this device
6575 	 * DOES need to be suspended and resumed.
6576 	 */
6577 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
6578 	    "pm-hardware-state", "needs-suspend-resume");
6579 
6580 	/*
6581 	 * This complies with the new power management framework
6582 	 * for certain desktop machines. Create the pm_components
6583 	 * property as a string array property.
6584 	 */
6585 	if (un->un_f_pm_supported) {
6586 		/*
6587 		 * not all devices have a motor, try it first.
6588 		 * some devices may return ILLEGAL REQUEST, some
6589 		 * will hang
6590 		 * The following START_STOP_UNIT is used to check if target
6591 		 * device has a motor.
6592 		 */
6593 		un->un_f_start_stop_supported = TRUE;
6594 		if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
6595 		    SD_PATH_DIRECT) != 0) {
6596 			un->un_f_start_stop_supported = FALSE;
6597 		}
6598 
6599 		/*
6600 		 * create pm properties anyways otherwise the parent can't
6601 		 * go to sleep
6602 		 */
6603 		(void) sd_create_pm_components(devi, un);
6604 		un->un_f_pm_is_enabled = TRUE;
6605 		return;
6606 	}
6607 
6608 	if (!un->un_f_log_sense_supported) {
6609 		un->un_power_level = SD_SPINDLE_ON;
6610 		un->un_f_pm_is_enabled = FALSE;
6611 		return;
6612 	}
6613 
6614 	rval = sd_log_page_supported(un, START_STOP_CYCLE_PAGE);
6615 
6616 #ifdef	SDDEBUG
6617 	if (sd_force_pm_supported) {
6618 		/* Force a successful result */
6619 		rval = 1;
6620 	}
6621 #endif
6622 
6623 	/*
6624 	 * If the start-stop cycle counter log page is not supported
6625 	 * or if the pm-capable property is SD_PM_CAPABLE_FALSE (0)
6626 	 * then we should not create the pm_components property.
6627 	 */
6628 	if (rval == -1) {
6629 		/*
6630 		 * Error.
6631 		 * Reading log sense failed, most likely this is
6632 		 * an older drive that does not support log sense.
6633 		 * If this fails auto-pm is not supported.
6634 		 */
6635 		un->un_power_level = SD_SPINDLE_ON;
6636 		un->un_f_pm_is_enabled = FALSE;
6637 
6638 	} else if (rval == 0) {
6639 		/*
6640 		 * Page not found.
6641 		 * The start stop cycle counter is implemented as page
6642 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
6643 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
6644 		 */
6645 		if (sd_log_page_supported(un, START_STOP_CYCLE_VU_PAGE) == 1) {
6646 			/*
6647 			 * Page found, use this one.
6648 			 */
6649 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
6650 			un->un_f_pm_is_enabled = TRUE;
6651 		} else {
6652 			/*
6653 			 * Error or page not found.
6654 			 * auto-pm is not supported for this device.
6655 			 */
6656 			un->un_power_level = SD_SPINDLE_ON;
6657 			un->un_f_pm_is_enabled = FALSE;
6658 		}
6659 	} else {
6660 		/*
6661 		 * Page found, use it.
6662 		 */
6663 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
6664 		un->un_f_pm_is_enabled = TRUE;
6665 	}
6666 
6667 
6668 	if (un->un_f_pm_is_enabled == TRUE) {
6669 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6670 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6671 
6672 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
6673 		    log_page_size, un->un_start_stop_cycle_page,
6674 		    0x01, 0, SD_PATH_DIRECT);
6675 #ifdef	SDDEBUG
6676 		if (sd_force_pm_supported) {
6677 			/* Force a successful result */
6678 			rval = 0;
6679 		}
6680 #endif
6681 
6682 		/*
6683 		 * If the Log sense for Page( Start/stop cycle counter page)
6684 		 * succeeds, then power managment is supported and we can
6685 		 * enable auto-pm.
6686 		 */
6687 		if (rval == 0)  {
6688 			(void) sd_create_pm_components(devi, un);
6689 		} else {
6690 			un->un_power_level = SD_SPINDLE_ON;
6691 			un->un_f_pm_is_enabled = FALSE;
6692 		}
6693 
6694 		kmem_free(log_page_data, log_page_size);
6695 	}
6696 }
6697 
6698 
6699 /*
6700  *    Function: sd_create_pm_components
6701  *
6702  * Description: Initialize PM property.
6703  *
6704  *     Context: Kernel thread context
6705  */
6706 
6707 static void
6708 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
6709 {
6710 	char *pm_comp[] = { "NAME=spindle-motor", "0=off", "1=on", NULL };
6711 
6712 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6713 
6714 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
6715 	    "pm-components", pm_comp, 3) == DDI_PROP_SUCCESS) {
6716 		/*
6717 		 * When components are initially created they are idle,
6718 		 * power up any non-removables.
6719 		 * Note: the return value of pm_raise_power can't be used
6720 		 * for determining if PM should be enabled for this device.
6721 		 * Even if you check the return values and remove this
6722 		 * property created above, the PM framework will not honor the
6723 		 * change after the first call to pm_raise_power. Hence,
6724 		 * removal of that property does not help if pm_raise_power
6725 		 * fails. In the case of removable media, the start/stop
6726 		 * will fail if the media is not present.
6727 		 */
6728 		if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
6729 		    SD_SPINDLE_ON) == DDI_SUCCESS)) {
6730 			mutex_enter(SD_MUTEX(un));
6731 			un->un_power_level = SD_SPINDLE_ON;
6732 			mutex_enter(&un->un_pm_mutex);
6733 			/* Set to on and not busy. */
6734 			un->un_pm_count = 0;
6735 		} else {
6736 			mutex_enter(SD_MUTEX(un));
6737 			un->un_power_level = SD_SPINDLE_OFF;
6738 			mutex_enter(&un->un_pm_mutex);
6739 			/* Set to off. */
6740 			un->un_pm_count = -1;
6741 		}
6742 		mutex_exit(&un->un_pm_mutex);
6743 		mutex_exit(SD_MUTEX(un));
6744 	} else {
6745 		un->un_power_level = SD_SPINDLE_ON;
6746 		un->un_f_pm_is_enabled = FALSE;
6747 	}
6748 }
6749 
6750 
6751 /*
6752  *    Function: sd_ddi_suspend
6753  *
6754  * Description: Performs system power-down operations. This includes
6755  *		setting the drive state to indicate its suspended so
6756  *		that no new commands will be accepted. Also, wait for
6757  *		all commands that are in transport or queued to a timer
6758  *		for retry to complete. All timeout threads are cancelled.
6759  *
6760  * Return Code: DDI_FAILURE or DDI_SUCCESS
6761  *
6762  *     Context: Kernel thread context
6763  */
6764 
6765 static int
6766 sd_ddi_suspend(dev_info_t *devi)
6767 {
6768 	struct	sd_lun	*un;
6769 	clock_t		wait_cmds_complete;
6770 
6771 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6772 	if (un == NULL) {
6773 		return (DDI_FAILURE);
6774 	}
6775 
6776 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
6777 
6778 	mutex_enter(SD_MUTEX(un));
6779 
6780 	/* Return success if the device is already suspended. */
6781 	if (un->un_state == SD_STATE_SUSPENDED) {
6782 		mutex_exit(SD_MUTEX(un));
6783 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6784 		    "device already suspended, exiting\n");
6785 		return (DDI_SUCCESS);
6786 	}
6787 
6788 	/* Return failure if the device is being used by HA */
6789 	if (un->un_resvd_status &
6790 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
6791 		mutex_exit(SD_MUTEX(un));
6792 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6793 		    "device in use by HA, exiting\n");
6794 		return (DDI_FAILURE);
6795 	}
6796 
6797 	/*
6798 	 * Return failure if the device is in a resource wait
6799 	 * or power changing state.
6800 	 */
6801 	if ((un->un_state == SD_STATE_RWAIT) ||
6802 	    (un->un_state == SD_STATE_PM_CHANGING)) {
6803 		mutex_exit(SD_MUTEX(un));
6804 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6805 		    "device in resource wait state, exiting\n");
6806 		return (DDI_FAILURE);
6807 	}
6808 
6809 
6810 	un->un_save_state = un->un_last_state;
6811 	New_state(un, SD_STATE_SUSPENDED);
6812 
6813 	/*
6814 	 * Wait for all commands that are in transport or queued to a timer
6815 	 * for retry to complete.
6816 	 *
6817 	 * While waiting, no new commands will be accepted or sent because of
6818 	 * the new state we set above.
6819 	 *
6820 	 * Wait till current operation has completed. If we are in the resource
6821 	 * wait state (with an intr outstanding) then we need to wait till the
6822 	 * intr completes and starts the next cmd. We want to wait for
6823 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
6824 	 */
6825 	wait_cmds_complete = ddi_get_lbolt() +
6826 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
6827 
6828 	while (un->un_ncmds_in_transport != 0) {
6829 		/*
6830 		 * Fail if commands do not finish in the specified time.
6831 		 */
6832 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
6833 		    wait_cmds_complete) == -1) {
6834 			/*
6835 			 * Undo the state changes made above. Everything
6836 			 * must go back to it's original value.
6837 			 */
6838 			Restore_state(un);
6839 			un->un_last_state = un->un_save_state;
6840 			/* Wake up any threads that might be waiting. */
6841 			cv_broadcast(&un->un_suspend_cv);
6842 			mutex_exit(SD_MUTEX(un));
6843 			SD_ERROR(SD_LOG_IO_PM, un,
6844 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
6845 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
6846 			return (DDI_FAILURE);
6847 		}
6848 	}
6849 
6850 	/*
6851 	 * Cancel SCSI watch thread and timeouts, if any are active
6852 	 */
6853 
6854 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
6855 		opaque_t temp_token = un->un_swr_token;
6856 		mutex_exit(SD_MUTEX(un));
6857 		scsi_watch_suspend(temp_token);
6858 		mutex_enter(SD_MUTEX(un));
6859 	}
6860 
6861 	if (un->un_reset_throttle_timeid != NULL) {
6862 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
6863 		un->un_reset_throttle_timeid = NULL;
6864 		mutex_exit(SD_MUTEX(un));
6865 		(void) untimeout(temp_id);
6866 		mutex_enter(SD_MUTEX(un));
6867 	}
6868 
6869 	if (un->un_dcvb_timeid != NULL) {
6870 		timeout_id_t temp_id = un->un_dcvb_timeid;
6871 		un->un_dcvb_timeid = NULL;
6872 		mutex_exit(SD_MUTEX(un));
6873 		(void) untimeout(temp_id);
6874 		mutex_enter(SD_MUTEX(un));
6875 	}
6876 
6877 	mutex_enter(&un->un_pm_mutex);
6878 	if (un->un_pm_timeid != NULL) {
6879 		timeout_id_t temp_id = un->un_pm_timeid;
6880 		un->un_pm_timeid = NULL;
6881 		mutex_exit(&un->un_pm_mutex);
6882 		mutex_exit(SD_MUTEX(un));
6883 		(void) untimeout(temp_id);
6884 		mutex_enter(SD_MUTEX(un));
6885 	} else {
6886 		mutex_exit(&un->un_pm_mutex);
6887 	}
6888 
6889 	if (un->un_retry_timeid != NULL) {
6890 		timeout_id_t temp_id = un->un_retry_timeid;
6891 		un->un_retry_timeid = NULL;
6892 		mutex_exit(SD_MUTEX(un));
6893 		(void) untimeout(temp_id);
6894 		mutex_enter(SD_MUTEX(un));
6895 	}
6896 
6897 	if (un->un_direct_priority_timeid != NULL) {
6898 		timeout_id_t temp_id = un->un_direct_priority_timeid;
6899 		un->un_direct_priority_timeid = NULL;
6900 		mutex_exit(SD_MUTEX(un));
6901 		(void) untimeout(temp_id);
6902 		mutex_enter(SD_MUTEX(un));
6903 	}
6904 
6905 	if (un->un_f_is_fibre == TRUE) {
6906 		/*
6907 		 * Remove callbacks for insert and remove events
6908 		 */
6909 		if (un->un_insert_event != NULL) {
6910 			mutex_exit(SD_MUTEX(un));
6911 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
6912 			mutex_enter(SD_MUTEX(un));
6913 			un->un_insert_event = NULL;
6914 		}
6915 
6916 		if (un->un_remove_event != NULL) {
6917 			mutex_exit(SD_MUTEX(un));
6918 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
6919 			mutex_enter(SD_MUTEX(un));
6920 			un->un_remove_event = NULL;
6921 		}
6922 	}
6923 
6924 	mutex_exit(SD_MUTEX(un));
6925 
6926 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
6927 
6928 	return (DDI_SUCCESS);
6929 }
6930 
6931 
6932 /*
6933  *    Function: sd_ddi_pm_suspend
6934  *
6935  * Description: Set the drive state to low power.
6936  *		Someone else is required to actually change the drive
6937  *		power level.
6938  *
6939  *   Arguments: un - driver soft state (unit) structure
6940  *
6941  * Return Code: DDI_FAILURE or DDI_SUCCESS
6942  *
6943  *     Context: Kernel thread context
6944  */
6945 
6946 static int
6947 sd_ddi_pm_suspend(struct sd_lun *un)
6948 {
6949 	ASSERT(un != NULL);
6950 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: entry\n");
6951 
6952 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6953 	mutex_enter(SD_MUTEX(un));
6954 
6955 	/*
6956 	 * Exit if power management is not enabled for this device, or if
6957 	 * the device is being used by HA.
6958 	 */
6959 	if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
6960 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
6961 		mutex_exit(SD_MUTEX(un));
6962 		SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exiting\n");
6963 		return (DDI_SUCCESS);
6964 	}
6965 
6966 	SD_INFO(SD_LOG_POWER, un, "sd_ddi_pm_suspend: un_ncmds_in_driver=%ld\n",
6967 	    un->un_ncmds_in_driver);
6968 
6969 	/*
6970 	 * See if the device is not busy, ie.:
6971 	 *    - we have no commands in the driver for this device
6972 	 *    - not waiting for resources
6973 	 */
6974 	if ((un->un_ncmds_in_driver == 0) &&
6975 	    (un->un_state != SD_STATE_RWAIT)) {
6976 		/*
6977 		 * The device is not busy, so it is OK to go to low power state.
6978 		 * Indicate low power, but rely on someone else to actually
6979 		 * change it.
6980 		 */
6981 		mutex_enter(&un->un_pm_mutex);
6982 		un->un_pm_count = -1;
6983 		mutex_exit(&un->un_pm_mutex);
6984 		un->un_power_level = SD_SPINDLE_OFF;
6985 	}
6986 
6987 	mutex_exit(SD_MUTEX(un));
6988 
6989 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exit\n");
6990 
6991 	return (DDI_SUCCESS);
6992 }
6993 
6994 
6995 /*
6996  *    Function: sd_ddi_resume
6997  *
6998  * Description: Performs system power-up operations..
6999  *
7000  * Return Code: DDI_SUCCESS
7001  *		DDI_FAILURE
7002  *
7003  *     Context: Kernel thread context
7004  */
7005 
7006 static int
7007 sd_ddi_resume(dev_info_t *devi)
7008 {
7009 	struct	sd_lun	*un;
7010 
7011 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
7012 	if (un == NULL) {
7013 		return (DDI_FAILURE);
7014 	}
7015 
7016 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
7017 
7018 	mutex_enter(SD_MUTEX(un));
7019 	Restore_state(un);
7020 
7021 	/*
7022 	 * Restore the state which was saved to give the
7023 	 * the right state in un_last_state
7024 	 */
7025 	un->un_last_state = un->un_save_state;
7026 	/*
7027 	 * Note: throttle comes back at full.
7028 	 * Also note: this MUST be done before calling pm_raise_power
7029 	 * otherwise the system can get hung in biowait. The scenario where
7030 	 * this'll happen is under cpr suspend. Writing of the system
7031 	 * state goes through sddump, which writes 0 to un_throttle. If
7032 	 * writing the system state then fails, example if the partition is
7033 	 * too small, then cpr attempts a resume. If throttle isn't restored
7034 	 * from the saved value until after calling pm_raise_power then
7035 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
7036 	 * in biowait.
7037 	 */
7038 	un->un_throttle = un->un_saved_throttle;
7039 
7040 	/*
7041 	 * The chance of failure is very rare as the only command done in power
7042 	 * entry point is START command when you transition from 0->1 or
7043 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
7044 	 * which suspend was done. Ignore the return value as the resume should
7045 	 * not be failed. In the case of removable media the media need not be
7046 	 * inserted and hence there is a chance that raise power will fail with
7047 	 * media not present.
7048 	 */
7049 	if (un->un_f_attach_spinup) {
7050 		mutex_exit(SD_MUTEX(un));
7051 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
7052 		mutex_enter(SD_MUTEX(un));
7053 	}
7054 
7055 	/*
7056 	 * Don't broadcast to the suspend cv and therefore possibly
7057 	 * start I/O until after power has been restored.
7058 	 */
7059 	cv_broadcast(&un->un_suspend_cv);
7060 	cv_broadcast(&un->un_state_cv);
7061 
7062 	/* restart thread */
7063 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
7064 		scsi_watch_resume(un->un_swr_token);
7065 	}
7066 
7067 #if (defined(__fibre))
7068 	if (un->un_f_is_fibre == TRUE) {
7069 		/*
7070 		 * Add callbacks for insert and remove events
7071 		 */
7072 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
7073 			sd_init_event_callbacks(un);
7074 		}
7075 	}
7076 #endif
7077 
7078 	/*
7079 	 * Transport any pending commands to the target.
7080 	 *
7081 	 * If this is a low-activity device commands in queue will have to wait
7082 	 * until new commands come in, which may take awhile. Also, we
7083 	 * specifically don't check un_ncmds_in_transport because we know that
7084 	 * there really are no commands in progress after the unit was
7085 	 * suspended and we could have reached the throttle level, been
7086 	 * suspended, and have no new commands coming in for awhile. Highly
7087 	 * unlikely, but so is the low-activity disk scenario.
7088 	 */
7089 	ddi_xbuf_dispatch(un->un_xbuf_attr);
7090 
7091 	sd_start_cmds(un, NULL);
7092 	mutex_exit(SD_MUTEX(un));
7093 
7094 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
7095 
7096 	return (DDI_SUCCESS);
7097 }
7098 
7099 
7100 /*
7101  *    Function: sd_ddi_pm_resume
7102  *
7103  * Description: Set the drive state to powered on.
7104  *		Someone else is required to actually change the drive
7105  *		power level.
7106  *
7107  *   Arguments: un - driver soft state (unit) structure
7108  *
7109  * Return Code: DDI_SUCCESS
7110  *
7111  *     Context: Kernel thread context
7112  */
7113 
7114 static int
7115 sd_ddi_pm_resume(struct sd_lun *un)
7116 {
7117 	ASSERT(un != NULL);
7118 
7119 	ASSERT(!mutex_owned(SD_MUTEX(un)));
7120 	mutex_enter(SD_MUTEX(un));
7121 	un->un_power_level = SD_SPINDLE_ON;
7122 
7123 	ASSERT(!mutex_owned(&un->un_pm_mutex));
7124 	mutex_enter(&un->un_pm_mutex);
7125 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
7126 		un->un_pm_count++;
7127 		ASSERT(un->un_pm_count == 0);
7128 		/*
7129 		 * Note: no longer do the cv_broadcast on un_suspend_cv. The
7130 		 * un_suspend_cv is for a system resume, not a power management
7131 		 * device resume. (4297749)
7132 		 *	 cv_broadcast(&un->un_suspend_cv);
7133 		 */
7134 	}
7135 	mutex_exit(&un->un_pm_mutex);
7136 	mutex_exit(SD_MUTEX(un));
7137 
7138 	return (DDI_SUCCESS);
7139 }
7140 
7141 
7142 /*
7143  *    Function: sd_pm_idletimeout_handler
7144  *
7145  * Description: A timer routine that's active only while a device is busy.
7146  *		The purpose is to extend slightly the pm framework's busy
7147  *		view of the device to prevent busy/idle thrashing for
7148  *		back-to-back commands. Do this by comparing the current time
7149  *		to the time at which the last command completed and when the
7150  *		difference is greater than sd_pm_idletime, call
7151  *		pm_idle_component. In addition to indicating idle to the pm
7152  *		framework, update the chain type to again use the internal pm
7153  *		layers of the driver.
7154  *
7155  *   Arguments: arg - driver soft state (unit) structure
7156  *
7157  *     Context: Executes in a timeout(9F) thread context
7158  */
7159 
7160 static void
7161 sd_pm_idletimeout_handler(void *arg)
7162 {
7163 	struct sd_lun *un = arg;
7164 
7165 	time_t	now;
7166 
7167 	mutex_enter(&sd_detach_mutex);
7168 	if (un->un_detach_count != 0) {
7169 		/* Abort if the instance is detaching */
7170 		mutex_exit(&sd_detach_mutex);
7171 		return;
7172 	}
7173 	mutex_exit(&sd_detach_mutex);
7174 
7175 	now = ddi_get_time();
7176 	/*
7177 	 * Grab both mutexes, in the proper order, since we're accessing
7178 	 * both PM and softstate variables.
7179 	 */
7180 	mutex_enter(SD_MUTEX(un));
7181 	mutex_enter(&un->un_pm_mutex);
7182 	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
7183 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
7184 		/*
7185 		 * Update the chain types.
7186 		 * This takes affect on the next new command received.
7187 		 */
7188 		if (un->un_f_non_devbsize_supported) {
7189 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
7190 		} else {
7191 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
7192 		}
7193 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
7194 
7195 		SD_TRACE(SD_LOG_IO_PM, un,
7196 		    "sd_pm_idletimeout_handler: idling device\n");
7197 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7198 		un->un_pm_idle_timeid = NULL;
7199 	} else {
7200 		un->un_pm_idle_timeid =
7201 			timeout(sd_pm_idletimeout_handler, un,
7202 			(drv_usectohz((clock_t)300000))); /* 300 ms. */
7203 	}
7204 	mutex_exit(&un->un_pm_mutex);
7205 	mutex_exit(SD_MUTEX(un));
7206 }
7207 
7208 
7209 /*
7210  *    Function: sd_pm_timeout_handler
7211  *
7212  * Description: Callback to tell framework we are idle.
7213  *
7214  *     Context: timeout(9f) thread context.
7215  */
7216 
7217 static void
7218 sd_pm_timeout_handler(void *arg)
7219 {
7220 	struct sd_lun *un = arg;
7221 
7222 	(void) pm_idle_component(SD_DEVINFO(un), 0);
7223 	mutex_enter(&un->un_pm_mutex);
7224 	un->un_pm_timeid = NULL;
7225 	mutex_exit(&un->un_pm_mutex);
7226 }
7227 
7228 
7229 /*
7230  *    Function: sdpower
7231  *
7232  * Description: PM entry point.
7233  *
7234  * Return Code: DDI_SUCCESS
7235  *		DDI_FAILURE
7236  *
7237  *     Context: Kernel thread context
7238  */
7239 
7240 static int
7241 sdpower(dev_info_t *devi, int component, int level)
7242 {
7243 	struct sd_lun	*un;
7244 	int		instance;
7245 	int		rval = DDI_SUCCESS;
7246 	uint_t		i, log_page_size, maxcycles, ncycles;
7247 	uchar_t		*log_page_data;
7248 	int		log_sense_page;
7249 	int		medium_present;
7250 	time_t		intvlp;
7251 	dev_t		dev;
7252 	struct pm_trans_data	sd_pm_tran_data;
7253 	uchar_t		save_state;
7254 	int		sval;
7255 	uchar_t		state_before_pm;
7256 	int		got_semaphore_here;
7257 
7258 	instance = ddi_get_instance(devi);
7259 
7260 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
7261 	    (SD_SPINDLE_OFF > level) || (level > SD_SPINDLE_ON) ||
7262 	    component != 0) {
7263 		return (DDI_FAILURE);
7264 	}
7265 
7266 	dev = sd_make_device(SD_DEVINFO(un));
7267 
7268 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
7269 
7270 	/*
7271 	 * Must synchronize power down with close.
7272 	 * Attempt to decrement/acquire the open/close semaphore,
7273 	 * but do NOT wait on it. If it's not greater than zero,
7274 	 * ie. it can't be decremented without waiting, then
7275 	 * someone else, either open or close, already has it
7276 	 * and the try returns 0. Use that knowledge here to determine
7277 	 * if it's OK to change the device power level.
7278 	 * Also, only increment it on exit if it was decremented, ie. gotten,
7279 	 * here.
7280 	 */
7281 	got_semaphore_here = sema_tryp(&un->un_semoclose);
7282 
7283 	mutex_enter(SD_MUTEX(un));
7284 
7285 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
7286 	    un->un_ncmds_in_driver);
7287 
7288 	/*
7289 	 * If un_ncmds_in_driver is non-zero it indicates commands are
7290 	 * already being processed in the driver, or if the semaphore was
7291 	 * not gotten here it indicates an open or close is being processed.
7292 	 * At the same time somebody is requesting to go low power which
7293 	 * can't happen, therefore we need to return failure.
7294 	 */
7295 	if ((level == SD_SPINDLE_OFF) &&
7296 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
7297 		mutex_exit(SD_MUTEX(un));
7298 
7299 		if (got_semaphore_here != 0) {
7300 			sema_v(&un->un_semoclose);
7301 		}
7302 		SD_TRACE(SD_LOG_IO_PM, un,
7303 		    "sdpower: exit, device has queued cmds.\n");
7304 		return (DDI_FAILURE);
7305 	}
7306 
7307 	/*
7308 	 * if it is OFFLINE that means the disk is completely dead
7309 	 * in our case we have to put the disk in on or off by sending commands
7310 	 * Of course that will fail anyway so return back here.
7311 	 *
7312 	 * Power changes to a device that's OFFLINE or SUSPENDED
7313 	 * are not allowed.
7314 	 */
7315 	if ((un->un_state == SD_STATE_OFFLINE) ||
7316 	    (un->un_state == SD_STATE_SUSPENDED)) {
7317 		mutex_exit(SD_MUTEX(un));
7318 
7319 		if (got_semaphore_here != 0) {
7320 			sema_v(&un->un_semoclose);
7321 		}
7322 		SD_TRACE(SD_LOG_IO_PM, un,
7323 		    "sdpower: exit, device is off-line.\n");
7324 		return (DDI_FAILURE);
7325 	}
7326 
7327 	/*
7328 	 * Change the device's state to indicate it's power level
7329 	 * is being changed. Do this to prevent a power off in the
7330 	 * middle of commands, which is especially bad on devices
7331 	 * that are really powered off instead of just spun down.
7332 	 */
7333 	state_before_pm = un->un_state;
7334 	un->un_state = SD_STATE_PM_CHANGING;
7335 
7336 	mutex_exit(SD_MUTEX(un));
7337 
7338 	/*
7339 	 * If "pm-capable" property is set to TRUE by HBA drivers,
7340 	 * bypass the following checking, otherwise, check the log
7341 	 * sense information for this device
7342 	 */
7343 	if ((level == SD_SPINDLE_OFF) && un->un_f_log_sense_supported) {
7344 		/*
7345 		 * Get the log sense information to understand whether the
7346 		 * the powercycle counts have gone beyond the threshhold.
7347 		 */
7348 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
7349 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
7350 
7351 		mutex_enter(SD_MUTEX(un));
7352 		log_sense_page = un->un_start_stop_cycle_page;
7353 		mutex_exit(SD_MUTEX(un));
7354 
7355 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
7356 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
7357 #ifdef	SDDEBUG
7358 		if (sd_force_pm_supported) {
7359 			/* Force a successful result */
7360 			rval = 0;
7361 		}
7362 #endif
7363 		if (rval != 0) {
7364 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
7365 			    "Log Sense Failed\n");
7366 			kmem_free(log_page_data, log_page_size);
7367 			/* Cannot support power management on those drives */
7368 
7369 			if (got_semaphore_here != 0) {
7370 				sema_v(&un->un_semoclose);
7371 			}
7372 			/*
7373 			 * On exit put the state back to it's original value
7374 			 * and broadcast to anyone waiting for the power
7375 			 * change completion.
7376 			 */
7377 			mutex_enter(SD_MUTEX(un));
7378 			un->un_state = state_before_pm;
7379 			cv_broadcast(&un->un_suspend_cv);
7380 			mutex_exit(SD_MUTEX(un));
7381 			SD_TRACE(SD_LOG_IO_PM, un,
7382 			    "sdpower: exit, Log Sense Failed.\n");
7383 			return (DDI_FAILURE);
7384 		}
7385 
7386 		/*
7387 		 * From the page data - Convert the essential information to
7388 		 * pm_trans_data
7389 		 */
7390 		maxcycles =
7391 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
7392 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
7393 
7394 		sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
7395 
7396 		ncycles =
7397 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
7398 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
7399 
7400 		sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
7401 
7402 		for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
7403 			sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
7404 			    log_page_data[8+i];
7405 		}
7406 
7407 		kmem_free(log_page_data, log_page_size);
7408 
7409 		/*
7410 		 * Call pm_trans_check routine to get the Ok from
7411 		 * the global policy
7412 		 */
7413 
7414 		sd_pm_tran_data.format = DC_SCSI_FORMAT;
7415 		sd_pm_tran_data.un.scsi_cycles.flag = 0;
7416 
7417 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
7418 #ifdef	SDDEBUG
7419 		if (sd_force_pm_supported) {
7420 			/* Force a successful result */
7421 			rval = 1;
7422 		}
7423 #endif
7424 		switch (rval) {
7425 		case 0:
7426 			/*
7427 			 * Not Ok to Power cycle or error in parameters passed
7428 			 * Would have given the advised time to consider power
7429 			 * cycle. Based on the new intvlp parameter we are
7430 			 * supposed to pretend we are busy so that pm framework
7431 			 * will never call our power entry point. Because of
7432 			 * that install a timeout handler and wait for the
7433 			 * recommended time to elapse so that power management
7434 			 * can be effective again.
7435 			 *
7436 			 * To effect this behavior, call pm_busy_component to
7437 			 * indicate to the framework this device is busy.
7438 			 * By not adjusting un_pm_count the rest of PM in
7439 			 * the driver will function normally, and independant
7440 			 * of this but because the framework is told the device
7441 			 * is busy it won't attempt powering down until it gets
7442 			 * a matching idle. The timeout handler sends this.
7443 			 * Note: sd_pm_entry can't be called here to do this
7444 			 * because sdpower may have been called as a result
7445 			 * of a call to pm_raise_power from within sd_pm_entry.
7446 			 *
7447 			 * If a timeout handler is already active then
7448 			 * don't install another.
7449 			 */
7450 			mutex_enter(&un->un_pm_mutex);
7451 			if (un->un_pm_timeid == NULL) {
7452 				un->un_pm_timeid =
7453 				    timeout(sd_pm_timeout_handler,
7454 				    un, intvlp * drv_usectohz(1000000));
7455 				mutex_exit(&un->un_pm_mutex);
7456 				(void) pm_busy_component(SD_DEVINFO(un), 0);
7457 			} else {
7458 				mutex_exit(&un->un_pm_mutex);
7459 			}
7460 			if (got_semaphore_here != 0) {
7461 				sema_v(&un->un_semoclose);
7462 			}
7463 			/*
7464 			 * On exit put the state back to it's original value
7465 			 * and broadcast to anyone waiting for the power
7466 			 * change completion.
7467 			 */
7468 			mutex_enter(SD_MUTEX(un));
7469 			un->un_state = state_before_pm;
7470 			cv_broadcast(&un->un_suspend_cv);
7471 			mutex_exit(SD_MUTEX(un));
7472 
7473 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
7474 			    "trans check Failed, not ok to power cycle.\n");
7475 			return (DDI_FAILURE);
7476 
7477 		case -1:
7478 			if (got_semaphore_here != 0) {
7479 				sema_v(&un->un_semoclose);
7480 			}
7481 			/*
7482 			 * On exit put the state back to it's original value
7483 			 * and broadcast to anyone waiting for the power
7484 			 * change completion.
7485 			 */
7486 			mutex_enter(SD_MUTEX(un));
7487 			un->un_state = state_before_pm;
7488 			cv_broadcast(&un->un_suspend_cv);
7489 			mutex_exit(SD_MUTEX(un));
7490 			SD_TRACE(SD_LOG_IO_PM, un,
7491 			    "sdpower: exit, trans check command Failed.\n");
7492 			return (DDI_FAILURE);
7493 		}
7494 	}
7495 
7496 	if (level == SD_SPINDLE_OFF) {
7497 		/*
7498 		 * Save the last state... if the STOP FAILS we need it
7499 		 * for restoring
7500 		 */
7501 		mutex_enter(SD_MUTEX(un));
7502 		save_state = un->un_last_state;
7503 		/*
7504 		 * There must not be any cmds. getting processed
7505 		 * in the driver when we get here. Power to the
7506 		 * device is potentially going off.
7507 		 */
7508 		ASSERT(un->un_ncmds_in_driver == 0);
7509 		mutex_exit(SD_MUTEX(un));
7510 
7511 		/*
7512 		 * For now suspend the device completely before spindle is
7513 		 * turned off
7514 		 */
7515 		if ((rval = sd_ddi_pm_suspend(un)) == DDI_FAILURE) {
7516 			if (got_semaphore_here != 0) {
7517 				sema_v(&un->un_semoclose);
7518 			}
7519 			/*
7520 			 * On exit put the state back to it's original value
7521 			 * and broadcast to anyone waiting for the power
7522 			 * change completion.
7523 			 */
7524 			mutex_enter(SD_MUTEX(un));
7525 			un->un_state = state_before_pm;
7526 			cv_broadcast(&un->un_suspend_cv);
7527 			mutex_exit(SD_MUTEX(un));
7528 			SD_TRACE(SD_LOG_IO_PM, un,
7529 			    "sdpower: exit, PM suspend Failed.\n");
7530 			return (DDI_FAILURE);
7531 		}
7532 	}
7533 
7534 	/*
7535 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
7536 	 * close, or strategy. Dump no long uses this routine, it uses it's
7537 	 * own code so it can be done in polled mode.
7538 	 */
7539 
7540 	medium_present = TRUE;
7541 
7542 	/*
7543 	 * When powering up, issue a TUR in case the device is at unit
7544 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
7545 	 * a deadlock on un_pm_busy_cv will occur.
7546 	 */
7547 	if (level == SD_SPINDLE_ON) {
7548 		(void) sd_send_scsi_TEST_UNIT_READY(un,
7549 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
7550 	}
7551 
7552 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
7553 	    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
7554 
7555 	sval = sd_send_scsi_START_STOP_UNIT(un,
7556 	    ((level == SD_SPINDLE_ON) ? SD_TARGET_START : SD_TARGET_STOP),
7557 	    SD_PATH_DIRECT);
7558 	/* Command failed, check for media present. */
7559 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
7560 		medium_present = FALSE;
7561 	}
7562 
7563 	/*
7564 	 * The conditions of interest here are:
7565 	 *   if a spindle off with media present fails,
7566 	 *	then restore the state and return an error.
7567 	 *   else if a spindle on fails,
7568 	 *	then return an error (there's no state to restore).
7569 	 * In all other cases we setup for the new state
7570 	 * and return success.
7571 	 */
7572 	switch (level) {
7573 	case SD_SPINDLE_OFF:
7574 		if ((medium_present == TRUE) && (sval != 0)) {
7575 			/* The stop command from above failed */
7576 			rval = DDI_FAILURE;
7577 			/*
7578 			 * The stop command failed, and we have media
7579 			 * present. Put the level back by calling the
7580 			 * sd_pm_resume() and set the state back to
7581 			 * it's previous value.
7582 			 */
7583 			(void) sd_ddi_pm_resume(un);
7584 			mutex_enter(SD_MUTEX(un));
7585 			un->un_last_state = save_state;
7586 			mutex_exit(SD_MUTEX(un));
7587 			break;
7588 		}
7589 		/*
7590 		 * The stop command from above succeeded.
7591 		 */
7592 		if (un->un_f_monitor_media_state) {
7593 			/*
7594 			 * Terminate watch thread in case of removable media
7595 			 * devices going into low power state. This is as per
7596 			 * the requirements of pm framework, otherwise commands
7597 			 * will be generated for the device (through watch
7598 			 * thread), even when the device is in low power state.
7599 			 */
7600 			mutex_enter(SD_MUTEX(un));
7601 			un->un_f_watcht_stopped = FALSE;
7602 			if (un->un_swr_token != NULL) {
7603 				opaque_t temp_token = un->un_swr_token;
7604 				un->un_f_watcht_stopped = TRUE;
7605 				un->un_swr_token = NULL;
7606 				mutex_exit(SD_MUTEX(un));
7607 				(void) scsi_watch_request_terminate(temp_token,
7608 				    SCSI_WATCH_TERMINATE_WAIT);
7609 			} else {
7610 				mutex_exit(SD_MUTEX(un));
7611 			}
7612 		}
7613 		break;
7614 
7615 	default:	/* The level requested is spindle on... */
7616 		/*
7617 		 * Legacy behavior: return success on a failed spinup
7618 		 * if there is no media in the drive.
7619 		 * Do this by looking at medium_present here.
7620 		 */
7621 		if ((sval != 0) && medium_present) {
7622 			/* The start command from above failed */
7623 			rval = DDI_FAILURE;
7624 			break;
7625 		}
7626 		/*
7627 		 * The start command from above succeeded
7628 		 * Resume the devices now that we have
7629 		 * started the disks
7630 		 */
7631 		(void) sd_ddi_pm_resume(un);
7632 
7633 		/*
7634 		 * Resume the watch thread since it was suspended
7635 		 * when the device went into low power mode.
7636 		 */
7637 		if (un->un_f_monitor_media_state) {
7638 			mutex_enter(SD_MUTEX(un));
7639 			if (un->un_f_watcht_stopped == TRUE) {
7640 				opaque_t temp_token;
7641 
7642 				un->un_f_watcht_stopped = FALSE;
7643 				mutex_exit(SD_MUTEX(un));
7644 				temp_token = scsi_watch_request_submit(
7645 				    SD_SCSI_DEVP(un),
7646 				    sd_check_media_time,
7647 				    SENSE_LENGTH, sd_media_watch_cb,
7648 				    (caddr_t)dev);
7649 				mutex_enter(SD_MUTEX(un));
7650 				un->un_swr_token = temp_token;
7651 			}
7652 			mutex_exit(SD_MUTEX(un));
7653 		}
7654 	}
7655 	if (got_semaphore_here != 0) {
7656 		sema_v(&un->un_semoclose);
7657 	}
7658 	/*
7659 	 * On exit put the state back to it's original value
7660 	 * and broadcast to anyone waiting for the power
7661 	 * change completion.
7662 	 */
7663 	mutex_enter(SD_MUTEX(un));
7664 	un->un_state = state_before_pm;
7665 	cv_broadcast(&un->un_suspend_cv);
7666 	mutex_exit(SD_MUTEX(un));
7667 
7668 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
7669 
7670 	return (rval);
7671 }
7672 
7673 
7674 
7675 /*
7676  *    Function: sdattach
7677  *
7678  * Description: Driver's attach(9e) entry point function.
7679  *
7680  *   Arguments: devi - opaque device info handle
7681  *		cmd  - attach  type
7682  *
7683  * Return Code: DDI_SUCCESS
7684  *		DDI_FAILURE
7685  *
7686  *     Context: Kernel thread context
7687  */
7688 
7689 static int
7690 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
7691 {
7692 	switch (cmd) {
7693 	case DDI_ATTACH:
7694 		return (sd_unit_attach(devi));
7695 	case DDI_RESUME:
7696 		return (sd_ddi_resume(devi));
7697 	default:
7698 		break;
7699 	}
7700 	return (DDI_FAILURE);
7701 }
7702 
7703 
7704 /*
7705  *    Function: sddetach
7706  *
7707  * Description: Driver's detach(9E) entry point function.
7708  *
7709  *   Arguments: devi - opaque device info handle
7710  *		cmd  - detach  type
7711  *
7712  * Return Code: DDI_SUCCESS
7713  *		DDI_FAILURE
7714  *
7715  *     Context: Kernel thread context
7716  */
7717 
7718 static int
7719 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
7720 {
7721 	switch (cmd) {
7722 	case DDI_DETACH:
7723 		return (sd_unit_detach(devi));
7724 	case DDI_SUSPEND:
7725 		return (sd_ddi_suspend(devi));
7726 	default:
7727 		break;
7728 	}
7729 	return (DDI_FAILURE);
7730 }
7731 
7732 
7733 /*
7734  *     Function: sd_sync_with_callback
7735  *
7736  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
7737  *		 state while the callback routine is active.
7738  *
7739  *    Arguments: un: softstate structure for the instance
7740  *
7741  *	Context: Kernel thread context
7742  */
7743 
7744 static void
7745 sd_sync_with_callback(struct sd_lun *un)
7746 {
7747 	ASSERT(un != NULL);
7748 
7749 	mutex_enter(SD_MUTEX(un));
7750 
7751 	ASSERT(un->un_in_callback >= 0);
7752 
7753 	while (un->un_in_callback > 0) {
7754 		mutex_exit(SD_MUTEX(un));
7755 		delay(2);
7756 		mutex_enter(SD_MUTEX(un));
7757 	}
7758 
7759 	mutex_exit(SD_MUTEX(un));
7760 }
7761 
7762 /*
7763  *    Function: sd_unit_attach
7764  *
7765  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
7766  *		the soft state structure for the device and performs
7767  *		all necessary structure and device initializations.
7768  *
7769  *   Arguments: devi: the system's dev_info_t for the device.
7770  *
7771  * Return Code: DDI_SUCCESS if attach is successful.
7772  *		DDI_FAILURE if any part of the attach fails.
7773  *
7774  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
7775  *		Kernel thread context only.  Can sleep.
7776  */
7777 
7778 static int
7779 sd_unit_attach(dev_info_t *devi)
7780 {
7781 	struct	scsi_device	*devp;
7782 	struct	sd_lun		*un;
7783 	char			*variantp;
7784 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
7785 	int	instance;
7786 	int	rval;
7787 	int	wc_enabled;
7788 	uint64_t	capacity;
7789 	uint_t		lbasize;
7790 
7791 	/*
7792 	 * Retrieve the target driver's private data area. This was set
7793 	 * up by the HBA.
7794 	 */
7795 	devp = ddi_get_driver_private(devi);
7796 
7797 	/*
7798 	 * Since we have no idea what state things were left in by the last
7799 	 * user of the device, set up some 'default' settings, ie. turn 'em
7800 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
7801 	 * Do this before the scsi_probe, which sends an inquiry.
7802 	 * This is a fix for bug (4430280).
7803 	 * Of special importance is wide-xfer. The drive could have been left
7804 	 * in wide transfer mode by the last driver to communicate with it,
7805 	 * this includes us. If that's the case, and if the following is not
7806 	 * setup properly or we don't re-negotiate with the drive prior to
7807 	 * transferring data to/from the drive, it causes bus parity errors,
7808 	 * data overruns, and unexpected interrupts. This first occurred when
7809 	 * the fix for bug (4378686) was made.
7810 	 */
7811 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
7812 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
7813 	(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
7814 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
7815 
7816 	/*
7817 	 * Use scsi_probe() to issue an INQUIRY command to the device.
7818 	 * This call will allocate and fill in the scsi_inquiry structure
7819 	 * and point the sd_inq member of the scsi_device structure to it.
7820 	 * If the attach succeeds, then this memory will not be de-allocated
7821 	 * (via scsi_unprobe()) until the instance is detached.
7822 	 */
7823 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
7824 		goto probe_failed;
7825 	}
7826 
7827 	/*
7828 	 * Check the device type as specified in the inquiry data and
7829 	 * claim it if it is of a type that we support.
7830 	 */
7831 	switch (devp->sd_inq->inq_dtype) {
7832 	case DTYPE_DIRECT:
7833 		break;
7834 	case DTYPE_RODIRECT:
7835 		break;
7836 	case DTYPE_OPTICAL:
7837 		break;
7838 	case DTYPE_NOTPRESENT:
7839 	default:
7840 		/* Unsupported device type; fail the attach. */
7841 		goto probe_failed;
7842 	}
7843 
7844 	/*
7845 	 * Allocate the soft state structure for this unit.
7846 	 *
7847 	 * We rely upon this memory being set to all zeroes by
7848 	 * ddi_soft_state_zalloc().  We assume that any member of the
7849 	 * soft state structure that is not explicitly initialized by
7850 	 * this routine will have a value of zero.
7851 	 */
7852 	instance = ddi_get_instance(devp->sd_dev);
7853 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
7854 		goto probe_failed;
7855 	}
7856 
7857 	/*
7858 	 * Retrieve a pointer to the newly-allocated soft state.
7859 	 *
7860 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
7861 	 * was successful, unless something has gone horribly wrong and the
7862 	 * ddi's soft state internals are corrupt (in which case it is
7863 	 * probably better to halt here than just fail the attach....)
7864 	 */
7865 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
7866 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
7867 		    instance);
7868 		/*NOTREACHED*/
7869 	}
7870 
7871 	/*
7872 	 * Link the back ptr of the driver soft state to the scsi_device
7873 	 * struct for this lun.
7874 	 * Save a pointer to the softstate in the driver-private area of
7875 	 * the scsi_device struct.
7876 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
7877 	 * we first set un->un_sd below.
7878 	 */
7879 	un->un_sd = devp;
7880 	devp->sd_private = (opaque_t)un;
7881 
7882 	/*
7883 	 * The following must be after devp is stored in the soft state struct.
7884 	 */
7885 #ifdef SDDEBUG
7886 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7887 	    "%s_unit_attach: un:0x%p instance:%d\n",
7888 	    ddi_driver_name(devi), un, instance);
7889 #endif
7890 
7891 	/*
7892 	 * Set up the device type and node type (for the minor nodes).
7893 	 * By default we assume that the device can at least support the
7894 	 * Common Command Set. Call it a CD-ROM if it reports itself
7895 	 * as a RODIRECT device.
7896 	 */
7897 	switch (devp->sd_inq->inq_dtype) {
7898 	case DTYPE_RODIRECT:
7899 		un->un_node_type = DDI_NT_CD_CHAN;
7900 		un->un_ctype	 = CTYPE_CDROM;
7901 		break;
7902 	case DTYPE_OPTICAL:
7903 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7904 		un->un_ctype	 = CTYPE_ROD;
7905 		break;
7906 	default:
7907 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7908 		un->un_ctype	 = CTYPE_CCS;
7909 		break;
7910 	}
7911 
7912 	/*
7913 	 * Try to read the interconnect type from the HBA.
7914 	 *
7915 	 * Note: This driver is currently compiled as two binaries, a parallel
7916 	 * scsi version (sd) and a fibre channel version (ssd). All functional
7917 	 * differences are determined at compile time. In the future a single
7918 	 * binary will be provided and the inteconnect type will be used to
7919 	 * differentiate between fibre and parallel scsi behaviors. At that time
7920 	 * it will be necessary for all fibre channel HBAs to support this
7921 	 * property.
7922 	 *
7923 	 * set un_f_is_fiber to TRUE ( default fiber )
7924 	 */
7925 	un->un_f_is_fibre = TRUE;
7926 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
7927 	case INTERCONNECT_SSA:
7928 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
7929 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7930 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
7931 		break;
7932 	case INTERCONNECT_PARALLEL:
7933 		un->un_f_is_fibre = FALSE;
7934 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7935 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7936 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
7937 		break;
7938 	case INTERCONNECT_SATA:
7939 		un->un_f_is_fibre = FALSE;
7940 		un->un_interconnect_type = SD_INTERCONNECT_SATA;
7941 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7942 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
7943 		break;
7944 	case INTERCONNECT_FIBRE:
7945 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
7946 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7947 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
7948 		break;
7949 	case INTERCONNECT_FABRIC:
7950 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
7951 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
7952 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7953 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
7954 		break;
7955 	default:
7956 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
7957 		/*
7958 		 * The HBA does not support the "interconnect-type" property
7959 		 * (or did not provide a recognized type).
7960 		 *
7961 		 * Note: This will be obsoleted when a single fibre channel
7962 		 * and parallel scsi driver is delivered. In the meantime the
7963 		 * interconnect type will be set to the platform default.If that
7964 		 * type is not parallel SCSI, it means that we should be
7965 		 * assuming "ssd" semantics. However, here this also means that
7966 		 * the FC HBA is not supporting the "interconnect-type" property
7967 		 * like we expect it to, so log this occurrence.
7968 		 */
7969 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
7970 		if (!SD_IS_PARALLEL_SCSI(un)) {
7971 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7972 			    "sd_unit_attach: un:0x%p Assuming "
7973 			    "INTERCONNECT_FIBRE\n", un);
7974 		} else {
7975 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7976 			    "sd_unit_attach: un:0x%p Assuming "
7977 			    "INTERCONNECT_PARALLEL\n", un);
7978 			un->un_f_is_fibre = FALSE;
7979 		}
7980 #else
7981 		/*
7982 		 * Note: This source will be implemented when a single fibre
7983 		 * channel and parallel scsi driver is delivered. The default
7984 		 * will be to assume that if a device does not support the
7985 		 * "interconnect-type" property it is a parallel SCSI HBA and
7986 		 * we will set the interconnect type for parallel scsi.
7987 		 */
7988 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7989 		un->un_f_is_fibre = FALSE;
7990 #endif
7991 		break;
7992 	}
7993 
7994 	if (un->un_f_is_fibre == TRUE) {
7995 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
7996 			SCSI_VERSION_3) {
7997 			switch (un->un_interconnect_type) {
7998 			case SD_INTERCONNECT_FIBRE:
7999 			case SD_INTERCONNECT_SSA:
8000 				un->un_node_type = DDI_NT_BLOCK_WWN;
8001 				break;
8002 			default:
8003 				break;
8004 			}
8005 		}
8006 	}
8007 
8008 	/*
8009 	 * Initialize the Request Sense command for the target
8010 	 */
8011 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
8012 		goto alloc_rqs_failed;
8013 	}
8014 
8015 	/*
8016 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
8017 	 * with seperate binary for sd and ssd.
8018 	 *
8019 	 * x86 has 1 binary, un_retry_count is set base on connection type.
8020 	 * The hardcoded values will go away when Sparc uses 1 binary
8021 	 * for sd and ssd.  This hardcoded values need to match
8022 	 * SD_RETRY_COUNT in sddef.h
8023 	 * The value used is base on interconnect type.
8024 	 * fibre = 3, parallel = 5
8025 	 */
8026 #if defined(__i386) || defined(__amd64)
8027 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
8028 #else
8029 	un->un_retry_count = SD_RETRY_COUNT;
8030 #endif
8031 
8032 	/*
8033 	 * Set the per disk retry count to the default number of retries
8034 	 * for disks and CDROMs. This value can be overridden by the
8035 	 * disk property list or an entry in sd.conf.
8036 	 */
8037 	un->un_notready_retry_count =
8038 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
8039 			: DISK_NOT_READY_RETRY_COUNT(un);
8040 
8041 	/*
8042 	 * Set the busy retry count to the default value of un_retry_count.
8043 	 * This can be overridden by entries in sd.conf or the device
8044 	 * config table.
8045 	 */
8046 	un->un_busy_retry_count = un->un_retry_count;
8047 
8048 	/*
8049 	 * Init the reset threshold for retries.  This number determines
8050 	 * how many retries must be performed before a reset can be issued
8051 	 * (for certain error conditions). This can be overridden by entries
8052 	 * in sd.conf or the device config table.
8053 	 */
8054 	un->un_reset_retry_count = (un->un_retry_count / 2);
8055 
8056 	/*
8057 	 * Set the victim_retry_count to the default un_retry_count
8058 	 */
8059 	un->un_victim_retry_count = (2 * un->un_retry_count);
8060 
8061 	/*
8062 	 * Set the reservation release timeout to the default value of
8063 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
8064 	 * device config table.
8065 	 */
8066 	un->un_reserve_release_time = 5;
8067 
8068 	/*
8069 	 * Set up the default maximum transfer size. Note that this may
8070 	 * get updated later in the attach, when setting up default wide
8071 	 * operations for disks.
8072 	 */
8073 #if defined(__i386) || defined(__amd64)
8074 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
8075 #else
8076 	un->un_max_xfer_size = (uint_t)maxphys;
8077 #endif
8078 
8079 	/*
8080 	 * Get "allow bus device reset" property (defaults to "enabled" if
8081 	 * the property was not defined). This is to disable bus resets for
8082 	 * certain kinds of error recovery. Note: In the future when a run-time
8083 	 * fibre check is available the soft state flag should default to
8084 	 * enabled.
8085 	 */
8086 	if (un->un_f_is_fibre == TRUE) {
8087 		un->un_f_allow_bus_device_reset = TRUE;
8088 	} else {
8089 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
8090 			"allow-bus-device-reset", 1) != 0) {
8091 			un->un_f_allow_bus_device_reset = TRUE;
8092 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8093 			"sd_unit_attach: un:0x%p Bus device reset enabled\n",
8094 				un);
8095 		} else {
8096 			un->un_f_allow_bus_device_reset = FALSE;
8097 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8098 			"sd_unit_attach: un:0x%p Bus device reset disabled\n",
8099 				un);
8100 		}
8101 	}
8102 
8103 	/*
8104 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
8105 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
8106 	 *
8107 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
8108 	 * property. The new "variant" property with a value of "atapi" has been
8109 	 * introduced so that future 'variants' of standard SCSI behavior (like
8110 	 * atapi) could be specified by the underlying HBA drivers by supplying
8111 	 * a new value for the "variant" property, instead of having to define a
8112 	 * new property.
8113 	 */
8114 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
8115 		un->un_f_cfg_is_atapi = TRUE;
8116 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8117 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
8118 	}
8119 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
8120 	    &variantp) == DDI_PROP_SUCCESS) {
8121 		if (strcmp(variantp, "atapi") == 0) {
8122 			un->un_f_cfg_is_atapi = TRUE;
8123 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8124 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
8125 		}
8126 		ddi_prop_free(variantp);
8127 	}
8128 
8129 	un->un_cmd_timeout	= SD_IO_TIME;
8130 
8131 	/* Info on current states, statuses, etc. (Updated frequently) */
8132 	un->un_state		= SD_STATE_NORMAL;
8133 	un->un_last_state	= SD_STATE_NORMAL;
8134 
8135 	/* Control & status info for command throttling */
8136 	un->un_throttle		= sd_max_throttle;
8137 	un->un_saved_throttle	= sd_max_throttle;
8138 	un->un_min_throttle	= sd_min_throttle;
8139 
8140 	if (un->un_f_is_fibre == TRUE) {
8141 		un->un_f_use_adaptive_throttle = TRUE;
8142 	} else {
8143 		un->un_f_use_adaptive_throttle = FALSE;
8144 	}
8145 
8146 	/* Removable media support. */
8147 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
8148 	un->un_mediastate		= DKIO_NONE;
8149 	un->un_specified_mediastate	= DKIO_NONE;
8150 
8151 	/* CVs for suspend/resume (PM or DR) */
8152 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
8153 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
8154 
8155 	/* Power management support. */
8156 	un->un_power_level = SD_SPINDLE_UNINIT;
8157 
8158 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
8159 	un->un_f_wcc_inprog = 0;
8160 
8161 	/*
8162 	 * The open/close semaphore is used to serialize threads executing
8163 	 * in the driver's open & close entry point routines for a given
8164 	 * instance.
8165 	 */
8166 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
8167 
8168 	/*
8169 	 * The conf file entry and softstate variable is a forceful override,
8170 	 * meaning a non-zero value must be entered to change the default.
8171 	 */
8172 	un->un_f_disksort_disabled = FALSE;
8173 
8174 	/*
8175 	 * Retrieve the properties from the static driver table or the driver
8176 	 * configuration file (.conf) for this unit and update the soft state
8177 	 * for the device as needed for the indicated properties.
8178 	 * Note: the property configuration needs to occur here as some of the
8179 	 * following routines may have dependancies on soft state flags set
8180 	 * as part of the driver property configuration.
8181 	 */
8182 	sd_read_unit_properties(un);
8183 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8184 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
8185 
8186 	/*
8187 	 * Only if a device has "hotpluggable" property, it is
8188 	 * treated as hotpluggable device. Otherwise, it is
8189 	 * regarded as non-hotpluggable one.
8190 	 */
8191 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
8192 	    -1) != -1) {
8193 		un->un_f_is_hotpluggable = TRUE;
8194 	}
8195 
8196 	/*
8197 	 * set unit's attributes(flags) according to "hotpluggable" and
8198 	 * RMB bit in INQUIRY data.
8199 	 */
8200 	sd_set_unit_attributes(un, devi);
8201 
8202 	/*
8203 	 * By default, we mark the capacity, lbasize, and geometry
8204 	 * as invalid. Only if we successfully read a valid capacity
8205 	 * will we update the un_blockcount and un_tgt_blocksize with the
8206 	 * valid values (the geometry will be validated later).
8207 	 */
8208 	un->un_f_blockcount_is_valid	= FALSE;
8209 	un->un_f_tgt_blocksize_is_valid	= FALSE;
8210 	un->un_f_geometry_is_valid	= FALSE;
8211 
8212 	/*
8213 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
8214 	 * otherwise.
8215 	 */
8216 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
8217 	un->un_blockcount = 0;
8218 
8219 	/*
8220 	 * Set up the per-instance info needed to determine the correct
8221 	 * CDBs and other info for issuing commands to the target.
8222 	 */
8223 	sd_init_cdb_limits(un);
8224 
8225 	/*
8226 	 * Set up the IO chains to use, based upon the target type.
8227 	 */
8228 	if (un->un_f_non_devbsize_supported) {
8229 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
8230 	} else {
8231 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
8232 	}
8233 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
8234 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
8235 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
8236 
8237 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
8238 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
8239 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
8240 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
8241 
8242 
8243 	if (ISCD(un)) {
8244 		un->un_additional_codes = sd_additional_codes;
8245 	} else {
8246 		un->un_additional_codes = NULL;
8247 	}
8248 
8249 	/*
8250 	 * Create the kstats here so they can be available for attach-time
8251 	 * routines that send commands to the unit (either polled or via
8252 	 * sd_send_scsi_cmd).
8253 	 *
8254 	 * Note: This is a critical sequence that needs to be maintained:
8255 	 *	1) Instantiate the kstats here, before any routines using the
8256 	 *	   iopath (i.e. sd_send_scsi_cmd).
8257 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8258 	 *	   stats (sd_set_pstats), following sd_validate_geometry(),
8259 	 *	   sd_register_devid(), and sd_cache_control().
8260 	 */
8261 
8262 	un->un_stats = kstat_create(sd_label, instance,
8263 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
8264 	if (un->un_stats != NULL) {
8265 		un->un_stats->ks_lock = SD_MUTEX(un);
8266 		kstat_install(un->un_stats);
8267 	}
8268 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8269 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
8270 
8271 	sd_create_errstats(un, instance);
8272 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8273 	    "sd_unit_attach: un:0x%p errstats created\n", un);
8274 
8275 	/*
8276 	 * The following if/else code was relocated here from below as part
8277 	 * of the fix for bug (4430280). However with the default setup added
8278 	 * on entry to this routine, it's no longer absolutely necessary for
8279 	 * this to be before the call to sd_spin_up_unit.
8280 	 */
8281 	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
8282 		/*
8283 		 * If SCSI-2 tagged queueing is supported by the target
8284 		 * and by the host adapter then we will enable it.
8285 		 */
8286 		un->un_tagflags = 0;
8287 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8288 		    (devp->sd_inq->inq_cmdque) &&
8289 		    (un->un_f_arq_enabled == TRUE)) {
8290 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
8291 			    1, 1) == 1) {
8292 				un->un_tagflags = FLAG_STAG;
8293 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8294 				    "sd_unit_attach: un:0x%p tag queueing "
8295 				    "enabled\n", un);
8296 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
8297 			    "untagged-qing", 0) == 1) {
8298 				un->un_f_opt_queueing = TRUE;
8299 				un->un_saved_throttle = un->un_throttle =
8300 				    min(un->un_throttle, 3);
8301 			} else {
8302 				un->un_f_opt_queueing = FALSE;
8303 				un->un_saved_throttle = un->un_throttle = 1;
8304 			}
8305 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
8306 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
8307 			/* The Host Adapter supports internal queueing. */
8308 			un->un_f_opt_queueing = TRUE;
8309 			un->un_saved_throttle = un->un_throttle =
8310 			    min(un->un_throttle, 3);
8311 		} else {
8312 			un->un_f_opt_queueing = FALSE;
8313 			un->un_saved_throttle = un->un_throttle = 1;
8314 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8315 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
8316 		}
8317 
8318 		/*
8319 		 * Enable large transfers for SATA/SAS drives
8320 		 */
8321 		if (SD_IS_SERIAL(un)) {
8322 			un->un_max_xfer_size =
8323 			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8324 			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
8325 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8326 			    "sd_unit_attach: un:0x%p max transfer "
8327 			    "size=0x%x\n", un, un->un_max_xfer_size);
8328 
8329 		}
8330 
8331 		/* Setup or tear down default wide operations for disks */
8332 
8333 		/*
8334 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
8335 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
8336 		 * system and be set to different values. In the future this
8337 		 * code may need to be updated when the ssd module is
8338 		 * obsoleted and removed from the system. (4299588)
8339 		 */
8340 		if (SD_IS_PARALLEL_SCSI(un) &&
8341 		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8342 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
8343 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8344 			    1, 1) == 1) {
8345 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8346 				    "sd_unit_attach: un:0x%p Wide Transfer "
8347 				    "enabled\n", un);
8348 			}
8349 
8350 			/*
8351 			 * If tagged queuing has also been enabled, then
8352 			 * enable large xfers
8353 			 */
8354 			if (un->un_saved_throttle == sd_max_throttle) {
8355 				un->un_max_xfer_size =
8356 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8357 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
8358 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8359 				    "sd_unit_attach: un:0x%p max transfer "
8360 				    "size=0x%x\n", un, un->un_max_xfer_size);
8361 			}
8362 		} else {
8363 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8364 			    0, 1) == 1) {
8365 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8366 				    "sd_unit_attach: un:0x%p "
8367 				    "Wide Transfer disabled\n", un);
8368 			}
8369 		}
8370 	} else {
8371 		un->un_tagflags = FLAG_STAG;
8372 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
8373 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
8374 	}
8375 
8376 	/*
8377 	 * If this target supports LUN reset, try to enable it.
8378 	 */
8379 	if (un->un_f_lun_reset_enabled) {
8380 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
8381 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8382 			    "un:0x%p lun_reset capability set\n", un);
8383 		} else {
8384 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8385 			    "un:0x%p lun-reset capability not set\n", un);
8386 		}
8387 	}
8388 
8389 	/*
8390 	 * At this point in the attach, we have enough info in the
8391 	 * soft state to be able to issue commands to the target.
8392 	 *
8393 	 * All command paths used below MUST issue their commands as
8394 	 * SD_PATH_DIRECT. This is important as intermediate layers
8395 	 * are not all initialized yet (such as PM).
8396 	 */
8397 
8398 	/*
8399 	 * Send a TEST UNIT READY command to the device. This should clear
8400 	 * any outstanding UNIT ATTENTION that may be present.
8401 	 *
8402 	 * Note: Don't check for success, just track if there is a reservation,
8403 	 * this is a throw away command to clear any unit attentions.
8404 	 *
8405 	 * Note: This MUST be the first command issued to the target during
8406 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
8407 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
8408 	 * with attempts at spinning up a device with no media.
8409 	 */
8410 	if (sd_send_scsi_TEST_UNIT_READY(un, SD_DONT_RETRY_TUR) == EACCES) {
8411 		reservation_flag = SD_TARGET_IS_RESERVED;
8412 	}
8413 
8414 	/*
8415 	 * If the device is NOT a removable media device, attempt to spin
8416 	 * it up (using the START_STOP_UNIT command) and read its capacity
8417 	 * (using the READ CAPACITY command).  Note, however, that either
8418 	 * of these could fail and in some cases we would continue with
8419 	 * the attach despite the failure (see below).
8420 	 */
8421 	if (un->un_f_descr_format_supported) {
8422 		switch (sd_spin_up_unit(un)) {
8423 		case 0:
8424 			/*
8425 			 * Spin-up was successful; now try to read the
8426 			 * capacity.  If successful then save the results
8427 			 * and mark the capacity & lbasize as valid.
8428 			 */
8429 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8430 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
8431 
8432 			switch (sd_send_scsi_READ_CAPACITY(un, &capacity,
8433 			    &lbasize, SD_PATH_DIRECT)) {
8434 			case 0: {
8435 				if (capacity > DK_MAX_BLOCKS) {
8436 #ifdef _LP64
8437 					if (capacity + 1 >
8438 					    SD_GROUP1_MAX_ADDRESS) {
8439 						/*
8440 						 * Enable descriptor format
8441 						 * sense data so that we can
8442 						 * get 64 bit sense data
8443 						 * fields.
8444 						 */
8445 						sd_enable_descr_sense(un);
8446 					}
8447 #else
8448 					/* 32-bit kernels can't handle this */
8449 					scsi_log(SD_DEVINFO(un),
8450 					    sd_label, CE_WARN,
8451 					    "disk has %llu blocks, which "
8452 					    "is too large for a 32-bit "
8453 					    "kernel", capacity);
8454 
8455 #if defined(__i386) || defined(__amd64)
8456 					/*
8457 					 * Refer to comments related to off-by-1
8458 					 * at the header of this file.
8459 					 * 1TB disk was treated as (1T - 512)B
8460 					 * in the past, so that it might has
8461 					 * valid VTOC and solaris partitions,
8462 					 * we have to allow it to continue to
8463 					 * work.
8464 					 */
8465 					if (capacity -1 > DK_MAX_BLOCKS)
8466 #endif
8467 					goto spinup_failed;
8468 #endif
8469 				}
8470 
8471 				/*
8472 				 * Here it's not necessary to check the case:
8473 				 * the capacity of the device is bigger than
8474 				 * what the max hba cdb can support. Because
8475 				 * sd_send_scsi_READ_CAPACITY will retrieve
8476 				 * the capacity by sending USCSI command, which
8477 				 * is constrained by the max hba cdb. Actually,
8478 				 * sd_send_scsi_READ_CAPACITY will return
8479 				 * EINVAL when using bigger cdb than required
8480 				 * cdb length. Will handle this case in
8481 				 * "case EINVAL".
8482 				 */
8483 
8484 				/*
8485 				 * The following relies on
8486 				 * sd_send_scsi_READ_CAPACITY never
8487 				 * returning 0 for capacity and/or lbasize.
8488 				 */
8489 				sd_update_block_info(un, lbasize, capacity);
8490 
8491 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8492 				    "sd_unit_attach: un:0x%p capacity = %ld "
8493 				    "blocks; lbasize= %ld.\n", un,
8494 				    un->un_blockcount, un->un_tgt_blocksize);
8495 
8496 				break;
8497 			}
8498 			case EINVAL:
8499 				/*
8500 				 * In the case where the max-cdb-length property
8501 				 * is smaller than the required CDB length for
8502 				 * a SCSI device, a target driver can fail to
8503 				 * attach to that device.
8504 				 */
8505 				scsi_log(SD_DEVINFO(un),
8506 				    sd_label, CE_WARN,
8507 				    "disk capacity is too large "
8508 				    "for current cdb length");
8509 				goto spinup_failed;
8510 			case EACCES:
8511 				/*
8512 				 * Should never get here if the spin-up
8513 				 * succeeded, but code it in anyway.
8514 				 * From here, just continue with the attach...
8515 				 */
8516 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8517 				    "sd_unit_attach: un:0x%p "
8518 				    "sd_send_scsi_READ_CAPACITY "
8519 				    "returned reservation conflict\n", un);
8520 				reservation_flag = SD_TARGET_IS_RESERVED;
8521 				break;
8522 			default:
8523 				/*
8524 				 * Likewise, should never get here if the
8525 				 * spin-up succeeded. Just continue with
8526 				 * the attach...
8527 				 */
8528 				break;
8529 			}
8530 			break;
8531 		case EACCES:
8532 			/*
8533 			 * Device is reserved by another host.  In this case
8534 			 * we could not spin it up or read the capacity, but
8535 			 * we continue with the attach anyway.
8536 			 */
8537 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8538 			    "sd_unit_attach: un:0x%p spin-up reservation "
8539 			    "conflict.\n", un);
8540 			reservation_flag = SD_TARGET_IS_RESERVED;
8541 			break;
8542 		default:
8543 			/* Fail the attach if the spin-up failed. */
8544 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8545 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
8546 			goto spinup_failed;
8547 		}
8548 	}
8549 
8550 	/*
8551 	 * Check to see if this is a MMC drive
8552 	 */
8553 	if (ISCD(un)) {
8554 		sd_set_mmc_caps(un);
8555 	}
8556 
8557 	/*
8558 	 * Create the minor nodes for the device.
8559 	 * Note: If we want to support fdisk on both sparc and intel, this will
8560 	 * have to separate out the notion that VTOC8 is always sparc, and
8561 	 * VTOC16 is always intel (tho these can be the defaults).  The vtoc
8562 	 * type will have to be determined at run-time, and the fdisk
8563 	 * partitioning will have to have been read & set up before we
8564 	 * create the minor nodes. (any other inits (such as kstats) that
8565 	 * also ought to be done before creating the minor nodes?) (Doesn't
8566 	 * setting up the minor nodes kind of imply that we're ready to
8567 	 * handle an open from userland?)
8568 	 */
8569 	if (sd_create_minor_nodes(un, devi) != DDI_SUCCESS) {
8570 		goto create_minor_nodes_failed;
8571 	}
8572 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8573 	    "sd_unit_attach: un:0x%p minor nodes created\n", un);
8574 
8575 	/*
8576 	 * Add a zero-length attribute to tell the world we support
8577 	 * kernel ioctls (for layered drivers)
8578 	 */
8579 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8580 	    DDI_KERNEL_IOCTL, NULL, 0);
8581 
8582 	/*
8583 	 * Add a boolean property to tell the world we support
8584 	 * the B_FAILFAST flag (for layered drivers)
8585 	 */
8586 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8587 	    "ddi-failfast-supported", NULL, 0);
8588 
8589 	/*
8590 	 * Initialize power management
8591 	 */
8592 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
8593 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
8594 	sd_setup_pm(un, devi);
8595 	if (un->un_f_pm_is_enabled == FALSE) {
8596 		/*
8597 		 * For performance, point to a jump table that does
8598 		 * not include pm.
8599 		 * The direct and priority chains don't change with PM.
8600 		 *
8601 		 * Note: this is currently done based on individual device
8602 		 * capabilities. When an interface for determining system
8603 		 * power enabled state becomes available, or when additional
8604 		 * layers are added to the command chain, these values will
8605 		 * have to be re-evaluated for correctness.
8606 		 */
8607 		if (un->un_f_non_devbsize_supported) {
8608 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
8609 		} else {
8610 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
8611 		}
8612 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8613 	}
8614 
8615 	/*
8616 	 * This property is set to 0 by HA software to avoid retries
8617 	 * on a reserved disk. (The preferred property name is
8618 	 * "retry-on-reservation-conflict") (1189689)
8619 	 *
8620 	 * Note: The use of a global here can have unintended consequences. A
8621 	 * per instance variable is preferrable to match the capabilities of
8622 	 * different underlying hba's (4402600)
8623 	 */
8624 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
8625 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
8626 	    sd_retry_on_reservation_conflict);
8627 	if (sd_retry_on_reservation_conflict != 0) {
8628 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
8629 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
8630 		    sd_retry_on_reservation_conflict);
8631 	}
8632 
8633 	/* Set up options for QFULL handling. */
8634 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8635 	    "qfull-retries", -1)) != -1) {
8636 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
8637 		    rval, 1);
8638 	}
8639 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8640 	    "qfull-retry-interval", -1)) != -1) {
8641 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
8642 		    rval, 1);
8643 	}
8644 
8645 	/*
8646 	 * This just prints a message that announces the existence of the
8647 	 * device. The message is always printed in the system logfile, but
8648 	 * only appears on the console if the system is booted with the
8649 	 * -v (verbose) argument.
8650 	 */
8651 	ddi_report_dev(devi);
8652 
8653 	/*
8654 	 * The framework calls driver attach routines single-threaded
8655 	 * for a given instance.  However we still acquire SD_MUTEX here
8656 	 * because this required for calling the sd_validate_geometry()
8657 	 * and sd_register_devid() functions.
8658 	 */
8659 	mutex_enter(SD_MUTEX(un));
8660 	un->un_f_geometry_is_valid = FALSE;
8661 	un->un_mediastate = DKIO_NONE;
8662 	un->un_reserved = -1;
8663 
8664 	/*
8665 	 * Read and validate the device's geometry (ie, disk label)
8666 	 * A new unformatted drive will not have a valid geometry, but
8667 	 * the driver needs to successfully attach to this device so
8668 	 * the drive can be formatted via ioctls.
8669 	 */
8670 	if (((sd_validate_geometry(un, SD_PATH_DIRECT) ==
8671 	    ENOTSUP)) &&
8672 	    (un->un_blockcount < DK_MAX_BLOCKS)) {
8673 		/*
8674 		 * We found a small disk with an EFI label on it;
8675 		 * we need to fix up the minor nodes accordingly.
8676 		 */
8677 		ddi_remove_minor_node(devi, "h");
8678 		ddi_remove_minor_node(devi, "h,raw");
8679 		(void) ddi_create_minor_node(devi, "wd",
8680 		    S_IFBLK,
8681 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8682 		    un->un_node_type, NULL);
8683 		(void) ddi_create_minor_node(devi, "wd,raw",
8684 		    S_IFCHR,
8685 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8686 		    un->un_node_type, NULL);
8687 	}
8688 #if defined(__i386) || defined(__amd64)
8689 	else if (un->un_f_capacity_adjusted == 1) {
8690 		/*
8691 		 * Refer to comments related to off-by-1 at the
8692 		 * header of this file.
8693 		 * Adjust minor node for 1TB disk.
8694 		 */
8695 		ddi_remove_minor_node(devi, "wd");
8696 		ddi_remove_minor_node(devi, "wd,raw");
8697 		(void) ddi_create_minor_node(devi, "h",
8698 		    S_IFBLK,
8699 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8700 		    un->un_node_type, NULL);
8701 		(void) ddi_create_minor_node(devi, "h,raw",
8702 		    S_IFCHR,
8703 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8704 		    un->un_node_type, NULL);
8705 	}
8706 #endif
8707 	/*
8708 	 * Read and initialize the devid for the unit.
8709 	 */
8710 	ASSERT(un->un_errstats != NULL);
8711 	if (un->un_f_devid_supported) {
8712 		sd_register_devid(un, devi, reservation_flag);
8713 	}
8714 	mutex_exit(SD_MUTEX(un));
8715 
8716 #if (defined(__fibre))
8717 	/*
8718 	 * Register callbacks for fibre only.  You can't do this soley
8719 	 * on the basis of the devid_type because this is hba specific.
8720 	 * We need to query our hba capabilities to find out whether to
8721 	 * register or not.
8722 	 */
8723 	if (un->un_f_is_fibre) {
8724 	    if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
8725 		sd_init_event_callbacks(un);
8726 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8727 		    "sd_unit_attach: un:0x%p event callbacks inserted", un);
8728 	    }
8729 	}
8730 #endif
8731 
8732 	if (un->un_f_opt_disable_cache == TRUE) {
8733 		/*
8734 		 * Disable both read cache and write cache.  This is
8735 		 * the historic behavior of the keywords in the config file.
8736 		 */
8737 		if (sd_cache_control(un, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
8738 		    0) {
8739 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8740 			    "sd_unit_attach: un:0x%p Could not disable "
8741 			    "caching", un);
8742 			goto devid_failed;
8743 		}
8744 	}
8745 
8746 	/*
8747 	 * Check the value of the WCE bit now and
8748 	 * set un_f_write_cache_enabled accordingly.
8749 	 */
8750 	(void) sd_get_write_cache_enabled(un, &wc_enabled);
8751 	mutex_enter(SD_MUTEX(un));
8752 	un->un_f_write_cache_enabled = (wc_enabled != 0);
8753 	mutex_exit(SD_MUTEX(un));
8754 
8755 	/*
8756 	 * Set the pstat and error stat values here, so data obtained during the
8757 	 * previous attach-time routines is available.
8758 	 *
8759 	 * Note: This is a critical sequence that needs to be maintained:
8760 	 *	1) Instantiate the kstats before any routines using the iopath
8761 	 *	   (i.e. sd_send_scsi_cmd).
8762 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8763 	 *	   stats (sd_set_pstats)here, following sd_validate_geometry(),
8764 	 *	   sd_register_devid(), and sd_cache_control().
8765 	 */
8766 	if (un->un_f_pkstats_enabled) {
8767 		sd_set_pstats(un);
8768 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8769 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
8770 	}
8771 
8772 	sd_set_errstats(un);
8773 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8774 	    "sd_unit_attach: un:0x%p errstats set\n", un);
8775 
8776 	/*
8777 	 * Find out what type of reservation this disk supports.
8778 	 */
8779 	switch (sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS, 0, NULL)) {
8780 	case 0:
8781 		/*
8782 		 * SCSI-3 reservations are supported.
8783 		 */
8784 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8785 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8786 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
8787 		break;
8788 	case ENOTSUP:
8789 		/*
8790 		 * The PERSISTENT RESERVE IN command would not be recognized by
8791 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
8792 		 */
8793 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8794 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
8795 		un->un_reservation_type = SD_SCSI2_RESERVATION;
8796 		break;
8797 	default:
8798 		/*
8799 		 * default to SCSI-3 reservations
8800 		 */
8801 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8802 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
8803 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8804 		break;
8805 	}
8806 
8807 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8808 	    "sd_unit_attach: un:0x%p exit success\n", un);
8809 
8810 	return (DDI_SUCCESS);
8811 
8812 	/*
8813 	 * An error occurred during the attach; clean up & return failure.
8814 	 */
8815 
8816 devid_failed:
8817 
8818 setup_pm_failed:
8819 	ddi_remove_minor_node(devi, NULL);
8820 
8821 create_minor_nodes_failed:
8822 	/*
8823 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8824 	 */
8825 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8826 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8827 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8828 
8829 	if (un->un_f_is_fibre == FALSE) {
8830 	    (void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8831 	}
8832 
8833 spinup_failed:
8834 
8835 	mutex_enter(SD_MUTEX(un));
8836 
8837 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
8838 	if (un->un_direct_priority_timeid != NULL) {
8839 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8840 		un->un_direct_priority_timeid = NULL;
8841 		mutex_exit(SD_MUTEX(un));
8842 		(void) untimeout(temp_id);
8843 		mutex_enter(SD_MUTEX(un));
8844 	}
8845 
8846 	/* Cancel any pending start/stop timeouts */
8847 	if (un->un_startstop_timeid != NULL) {
8848 		timeout_id_t temp_id = un->un_startstop_timeid;
8849 		un->un_startstop_timeid = NULL;
8850 		mutex_exit(SD_MUTEX(un));
8851 		(void) untimeout(temp_id);
8852 		mutex_enter(SD_MUTEX(un));
8853 	}
8854 
8855 	/* Cancel any pending reset-throttle timeouts */
8856 	if (un->un_reset_throttle_timeid != NULL) {
8857 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8858 		un->un_reset_throttle_timeid = NULL;
8859 		mutex_exit(SD_MUTEX(un));
8860 		(void) untimeout(temp_id);
8861 		mutex_enter(SD_MUTEX(un));
8862 	}
8863 
8864 	/* Cancel any pending retry timeouts */
8865 	if (un->un_retry_timeid != NULL) {
8866 		timeout_id_t temp_id = un->un_retry_timeid;
8867 		un->un_retry_timeid = NULL;
8868 		mutex_exit(SD_MUTEX(un));
8869 		(void) untimeout(temp_id);
8870 		mutex_enter(SD_MUTEX(un));
8871 	}
8872 
8873 	/* Cancel any pending delayed cv broadcast timeouts */
8874 	if (un->un_dcvb_timeid != NULL) {
8875 		timeout_id_t temp_id = un->un_dcvb_timeid;
8876 		un->un_dcvb_timeid = NULL;
8877 		mutex_exit(SD_MUTEX(un));
8878 		(void) untimeout(temp_id);
8879 		mutex_enter(SD_MUTEX(un));
8880 	}
8881 
8882 	mutex_exit(SD_MUTEX(un));
8883 
8884 	/* There should not be any in-progress I/O so ASSERT this check */
8885 	ASSERT(un->un_ncmds_in_transport == 0);
8886 	ASSERT(un->un_ncmds_in_driver == 0);
8887 
8888 	/* Do not free the softstate if the callback routine is active */
8889 	sd_sync_with_callback(un);
8890 
8891 	/*
8892 	 * Partition stats apparently are not used with removables. These would
8893 	 * not have been created during attach, so no need to clean them up...
8894 	 */
8895 	if (un->un_stats != NULL) {
8896 		kstat_delete(un->un_stats);
8897 		un->un_stats = NULL;
8898 	}
8899 	if (un->un_errstats != NULL) {
8900 		kstat_delete(un->un_errstats);
8901 		un->un_errstats = NULL;
8902 	}
8903 
8904 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8905 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8906 
8907 	ddi_prop_remove_all(devi);
8908 	sema_destroy(&un->un_semoclose);
8909 	cv_destroy(&un->un_state_cv);
8910 
8911 getrbuf_failed:
8912 
8913 	sd_free_rqs(un);
8914 
8915 alloc_rqs_failed:
8916 
8917 	devp->sd_private = NULL;
8918 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
8919 
8920 get_softstate_failed:
8921 	/*
8922 	 * Note: the man pages are unclear as to whether or not doing a
8923 	 * ddi_soft_state_free(sd_state, instance) is the right way to
8924 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
8925 	 * ddi_get_soft_state() fails.  The implication seems to be
8926 	 * that the get_soft_state cannot fail if the zalloc succeeds.
8927 	 */
8928 	ddi_soft_state_free(sd_state, instance);
8929 
8930 probe_failed:
8931 	scsi_unprobe(devp);
8932 #ifdef SDDEBUG
8933 	if ((sd_component_mask & SD_LOG_ATTACH_DETACH) &&
8934 	    (sd_level_mask & SD_LOGMASK_TRACE)) {
8935 		cmn_err(CE_CONT, "sd_unit_attach: un:0x%p exit failure\n",
8936 		    (void *)un);
8937 	}
8938 #endif
8939 	return (DDI_FAILURE);
8940 }
8941 
8942 
8943 /*
8944  *    Function: sd_unit_detach
8945  *
8946  * Description: Performs DDI_DETACH processing for sddetach().
8947  *
8948  * Return Code: DDI_SUCCESS
8949  *		DDI_FAILURE
8950  *
8951  *     Context: Kernel thread context
8952  */
8953 
8954 static int
8955 sd_unit_detach(dev_info_t *devi)
8956 {
8957 	struct scsi_device	*devp;
8958 	struct sd_lun		*un;
8959 	int			i;
8960 	dev_t			dev;
8961 	int			instance = ddi_get_instance(devi);
8962 
8963 	mutex_enter(&sd_detach_mutex);
8964 
8965 	/*
8966 	 * Fail the detach for any of the following:
8967 	 *  - Unable to get the sd_lun struct for the instance
8968 	 *  - A layered driver has an outstanding open on the instance
8969 	 *  - Another thread is already detaching this instance
8970 	 *  - Another thread is currently performing an open
8971 	 */
8972 	devp = ddi_get_driver_private(devi);
8973 	if ((devp == NULL) ||
8974 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
8975 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
8976 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
8977 		mutex_exit(&sd_detach_mutex);
8978 		return (DDI_FAILURE);
8979 	}
8980 
8981 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
8982 
8983 	/*
8984 	 * Mark this instance as currently in a detach, to inhibit any
8985 	 * opens from a layered driver.
8986 	 */
8987 	un->un_detach_count++;
8988 	mutex_exit(&sd_detach_mutex);
8989 
8990 	dev = sd_make_device(SD_DEVINFO(un));
8991 
8992 	_NOTE(COMPETING_THREADS_NOW);
8993 
8994 	mutex_enter(SD_MUTEX(un));
8995 
8996 	/*
8997 	 * Fail the detach if there are any outstanding layered
8998 	 * opens on this device.
8999 	 */
9000 	for (i = 0; i < NDKMAP; i++) {
9001 		if (un->un_ocmap.lyropen[i] != 0) {
9002 			goto err_notclosed;
9003 		}
9004 	}
9005 
9006 	/*
9007 	 * Verify there are NO outstanding commands issued to this device.
9008 	 * ie, un_ncmds_in_transport == 0.
9009 	 * It's possible to have outstanding commands through the physio
9010 	 * code path, even though everything's closed.
9011 	 */
9012 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
9013 	    (un->un_direct_priority_timeid != NULL) ||
9014 	    (un->un_state == SD_STATE_RWAIT)) {
9015 		mutex_exit(SD_MUTEX(un));
9016 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9017 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
9018 		goto err_stillbusy;
9019 	}
9020 
9021 	/*
9022 	 * If we have the device reserved, release the reservation.
9023 	 */
9024 	if ((un->un_resvd_status & SD_RESERVE) &&
9025 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
9026 		mutex_exit(SD_MUTEX(un));
9027 		/*
9028 		 * Note: sd_reserve_release sends a command to the device
9029 		 * via the sd_ioctlcmd() path, and can sleep.
9030 		 */
9031 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
9032 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9033 			    "sd_dr_detach: Cannot release reservation \n");
9034 		}
9035 	} else {
9036 		mutex_exit(SD_MUTEX(un));
9037 	}
9038 
9039 	/*
9040 	 * Untimeout any reserve recover, throttle reset, restart unit
9041 	 * and delayed broadcast timeout threads. Protect the timeout pointer
9042 	 * from getting nulled by their callback functions.
9043 	 */
9044 	mutex_enter(SD_MUTEX(un));
9045 	if (un->un_resvd_timeid != NULL) {
9046 		timeout_id_t temp_id = un->un_resvd_timeid;
9047 		un->un_resvd_timeid = NULL;
9048 		mutex_exit(SD_MUTEX(un));
9049 		(void) untimeout(temp_id);
9050 		mutex_enter(SD_MUTEX(un));
9051 	}
9052 
9053 	if (un->un_reset_throttle_timeid != NULL) {
9054 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
9055 		un->un_reset_throttle_timeid = NULL;
9056 		mutex_exit(SD_MUTEX(un));
9057 		(void) untimeout(temp_id);
9058 		mutex_enter(SD_MUTEX(un));
9059 	}
9060 
9061 	if (un->un_startstop_timeid != NULL) {
9062 		timeout_id_t temp_id = un->un_startstop_timeid;
9063 		un->un_startstop_timeid = NULL;
9064 		mutex_exit(SD_MUTEX(un));
9065 		(void) untimeout(temp_id);
9066 		mutex_enter(SD_MUTEX(un));
9067 	}
9068 
9069 	if (un->un_dcvb_timeid != NULL) {
9070 		timeout_id_t temp_id = un->un_dcvb_timeid;
9071 		un->un_dcvb_timeid = NULL;
9072 		mutex_exit(SD_MUTEX(un));
9073 		(void) untimeout(temp_id);
9074 	} else {
9075 		mutex_exit(SD_MUTEX(un));
9076 	}
9077 
9078 	/* Remove any pending reservation reclaim requests for this device */
9079 	sd_rmv_resv_reclaim_req(dev);
9080 
9081 	mutex_enter(SD_MUTEX(un));
9082 
9083 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
9084 	if (un->un_direct_priority_timeid != NULL) {
9085 		timeout_id_t temp_id = un->un_direct_priority_timeid;
9086 		un->un_direct_priority_timeid = NULL;
9087 		mutex_exit(SD_MUTEX(un));
9088 		(void) untimeout(temp_id);
9089 		mutex_enter(SD_MUTEX(un));
9090 	}
9091 
9092 	/* Cancel any active multi-host disk watch thread requests */
9093 	if (un->un_mhd_token != NULL) {
9094 		mutex_exit(SD_MUTEX(un));
9095 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
9096 		if (scsi_watch_request_terminate(un->un_mhd_token,
9097 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
9098 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9099 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
9100 			/*
9101 			 * Note: We are returning here after having removed
9102 			 * some driver timeouts above. This is consistent with
9103 			 * the legacy implementation but perhaps the watch
9104 			 * terminate call should be made with the wait flag set.
9105 			 */
9106 			goto err_stillbusy;
9107 		}
9108 		mutex_enter(SD_MUTEX(un));
9109 		un->un_mhd_token = NULL;
9110 	}
9111 
9112 	if (un->un_swr_token != NULL) {
9113 		mutex_exit(SD_MUTEX(un));
9114 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
9115 		if (scsi_watch_request_terminate(un->un_swr_token,
9116 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
9117 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9118 			    "sd_dr_detach: Cannot cancel swr watch request\n");
9119 			/*
9120 			 * Note: We are returning here after having removed
9121 			 * some driver timeouts above. This is consistent with
9122 			 * the legacy implementation but perhaps the watch
9123 			 * terminate call should be made with the wait flag set.
9124 			 */
9125 			goto err_stillbusy;
9126 		}
9127 		mutex_enter(SD_MUTEX(un));
9128 		un->un_swr_token = NULL;
9129 	}
9130 
9131 	mutex_exit(SD_MUTEX(un));
9132 
9133 	/*
9134 	 * Clear any scsi_reset_notifies. We clear the reset notifies
9135 	 * if we have not registered one.
9136 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
9137 	 */
9138 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
9139 	    sd_mhd_reset_notify_cb, (caddr_t)un);
9140 
9141 	/*
9142 	 * protect the timeout pointers from getting nulled by
9143 	 * their callback functions during the cancellation process.
9144 	 * In such a scenario untimeout can be invoked with a null value.
9145 	 */
9146 	_NOTE(NO_COMPETING_THREADS_NOW);
9147 
9148 	mutex_enter(&un->un_pm_mutex);
9149 	if (un->un_pm_idle_timeid != NULL) {
9150 		timeout_id_t temp_id = un->un_pm_idle_timeid;
9151 		un->un_pm_idle_timeid = NULL;
9152 		mutex_exit(&un->un_pm_mutex);
9153 
9154 		/*
9155 		 * Timeout is active; cancel it.
9156 		 * Note that it'll never be active on a device
9157 		 * that does not support PM therefore we don't
9158 		 * have to check before calling pm_idle_component.
9159 		 */
9160 		(void) untimeout(temp_id);
9161 		(void) pm_idle_component(SD_DEVINFO(un), 0);
9162 		mutex_enter(&un->un_pm_mutex);
9163 	}
9164 
9165 	/*
9166 	 * Check whether there is already a timeout scheduled for power
9167 	 * management. If yes then don't lower the power here, that's.
9168 	 * the timeout handler's job.
9169 	 */
9170 	if (un->un_pm_timeid != NULL) {
9171 		timeout_id_t temp_id = un->un_pm_timeid;
9172 		un->un_pm_timeid = NULL;
9173 		mutex_exit(&un->un_pm_mutex);
9174 		/*
9175 		 * Timeout is active; cancel it.
9176 		 * Note that it'll never be active on a device
9177 		 * that does not support PM therefore we don't
9178 		 * have to check before calling pm_idle_component.
9179 		 */
9180 		(void) untimeout(temp_id);
9181 		(void) pm_idle_component(SD_DEVINFO(un), 0);
9182 
9183 	} else {
9184 		mutex_exit(&un->un_pm_mutex);
9185 		if ((un->un_f_pm_is_enabled == TRUE) &&
9186 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_SPINDLE_OFF) !=
9187 		    DDI_SUCCESS)) {
9188 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9189 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
9190 			/*
9191 			 * Fix for bug: 4297749, item # 13
9192 			 * The above test now includes a check to see if PM is
9193 			 * supported by this device before call
9194 			 * pm_lower_power().
9195 			 * Note, the following is not dead code. The call to
9196 			 * pm_lower_power above will generate a call back into
9197 			 * our sdpower routine which might result in a timeout
9198 			 * handler getting activated. Therefore the following
9199 			 * code is valid and necessary.
9200 			 */
9201 			mutex_enter(&un->un_pm_mutex);
9202 			if (un->un_pm_timeid != NULL) {
9203 				timeout_id_t temp_id = un->un_pm_timeid;
9204 				un->un_pm_timeid = NULL;
9205 				mutex_exit(&un->un_pm_mutex);
9206 				(void) untimeout(temp_id);
9207 				(void) pm_idle_component(SD_DEVINFO(un), 0);
9208 			} else {
9209 				mutex_exit(&un->un_pm_mutex);
9210 			}
9211 		}
9212 	}
9213 
9214 	/*
9215 	 * Cleanup from the scsi_ifsetcap() calls (437868)
9216 	 * Relocated here from above to be after the call to
9217 	 * pm_lower_power, which was getting errors.
9218 	 */
9219 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
9220 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
9221 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
9222 
9223 	if (un->un_f_is_fibre == FALSE) {
9224 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
9225 	}
9226 
9227 	/*
9228 	 * Remove any event callbacks, fibre only
9229 	 */
9230 	if (un->un_f_is_fibre == TRUE) {
9231 		if ((un->un_insert_event != NULL) &&
9232 			(ddi_remove_event_handler(un->un_insert_cb_id) !=
9233 				DDI_SUCCESS)) {
9234 			/*
9235 			 * Note: We are returning here after having done
9236 			 * substantial cleanup above. This is consistent
9237 			 * with the legacy implementation but this may not
9238 			 * be the right thing to do.
9239 			 */
9240 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9241 				"sd_dr_detach: Cannot cancel insert event\n");
9242 			goto err_remove_event;
9243 		}
9244 		un->un_insert_event = NULL;
9245 
9246 		if ((un->un_remove_event != NULL) &&
9247 			(ddi_remove_event_handler(un->un_remove_cb_id) !=
9248 				DDI_SUCCESS)) {
9249 			/*
9250 			 * Note: We are returning here after having done
9251 			 * substantial cleanup above. This is consistent
9252 			 * with the legacy implementation but this may not
9253 			 * be the right thing to do.
9254 			 */
9255 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9256 				"sd_dr_detach: Cannot cancel remove event\n");
9257 			goto err_remove_event;
9258 		}
9259 		un->un_remove_event = NULL;
9260 	}
9261 
9262 	/* Do not free the softstate if the callback routine is active */
9263 	sd_sync_with_callback(un);
9264 
9265 	/*
9266 	 * Hold the detach mutex here, to make sure that no other threads ever
9267 	 * can access a (partially) freed soft state structure.
9268 	 */
9269 	mutex_enter(&sd_detach_mutex);
9270 
9271 	/*
9272 	 * Clean up the soft state struct.
9273 	 * Cleanup is done in reverse order of allocs/inits.
9274 	 * At this point there should be no competing threads anymore.
9275 	 */
9276 
9277 	/* Unregister and free device id. */
9278 	ddi_devid_unregister(devi);
9279 	if (un->un_devid) {
9280 		ddi_devid_free(un->un_devid);
9281 		un->un_devid = NULL;
9282 	}
9283 
9284 	/*
9285 	 * Destroy wmap cache if it exists.
9286 	 */
9287 	if (un->un_wm_cache != NULL) {
9288 		kmem_cache_destroy(un->un_wm_cache);
9289 		un->un_wm_cache = NULL;
9290 	}
9291 
9292 	/* Remove minor nodes */
9293 	ddi_remove_minor_node(devi, NULL);
9294 
9295 	/*
9296 	 * kstat cleanup is done in detach for all device types (4363169).
9297 	 * We do not want to fail detach if the device kstats are not deleted
9298 	 * since there is a confusion about the devo_refcnt for the device.
9299 	 * We just delete the kstats and let detach complete successfully.
9300 	 */
9301 	if (un->un_stats != NULL) {
9302 		kstat_delete(un->un_stats);
9303 		un->un_stats = NULL;
9304 	}
9305 	if (un->un_errstats != NULL) {
9306 		kstat_delete(un->un_errstats);
9307 		un->un_errstats = NULL;
9308 	}
9309 
9310 	/* Remove partition stats */
9311 	if (un->un_f_pkstats_enabled) {
9312 		for (i = 0; i < NSDMAP; i++) {
9313 			if (un->un_pstats[i] != NULL) {
9314 				kstat_delete(un->un_pstats[i]);
9315 				un->un_pstats[i] = NULL;
9316 			}
9317 		}
9318 	}
9319 
9320 	/* Remove xbuf registration */
9321 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
9322 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
9323 
9324 	/* Remove driver properties */
9325 	ddi_prop_remove_all(devi);
9326 
9327 	mutex_destroy(&un->un_pm_mutex);
9328 	cv_destroy(&un->un_pm_busy_cv);
9329 
9330 	cv_destroy(&un->un_wcc_cv);
9331 
9332 	/* Open/close semaphore */
9333 	sema_destroy(&un->un_semoclose);
9334 
9335 	/* Removable media condvar. */
9336 	cv_destroy(&un->un_state_cv);
9337 
9338 	/* Suspend/resume condvar. */
9339 	cv_destroy(&un->un_suspend_cv);
9340 	cv_destroy(&un->un_disk_busy_cv);
9341 
9342 	sd_free_rqs(un);
9343 
9344 	/* Free up soft state */
9345 	devp->sd_private = NULL;
9346 	bzero(un, sizeof (struct sd_lun));
9347 	ddi_soft_state_free(sd_state, instance);
9348 
9349 	mutex_exit(&sd_detach_mutex);
9350 
9351 	/* This frees up the INQUIRY data associated with the device. */
9352 	scsi_unprobe(devp);
9353 
9354 	return (DDI_SUCCESS);
9355 
9356 err_notclosed:
9357 	mutex_exit(SD_MUTEX(un));
9358 
9359 err_stillbusy:
9360 	_NOTE(NO_COMPETING_THREADS_NOW);
9361 
9362 err_remove_event:
9363 	mutex_enter(&sd_detach_mutex);
9364 	un->un_detach_count--;
9365 	mutex_exit(&sd_detach_mutex);
9366 
9367 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
9368 	return (DDI_FAILURE);
9369 }
9370 
9371 
9372 /*
9373  * Driver minor node structure and data table
9374  */
9375 struct driver_minor_data {
9376 	char	*name;
9377 	minor_t	minor;
9378 	int	type;
9379 };
9380 
9381 static struct driver_minor_data sd_minor_data[] = {
9382 	{"a", 0, S_IFBLK},
9383 	{"b", 1, S_IFBLK},
9384 	{"c", 2, S_IFBLK},
9385 	{"d", 3, S_IFBLK},
9386 	{"e", 4, S_IFBLK},
9387 	{"f", 5, S_IFBLK},
9388 	{"g", 6, S_IFBLK},
9389 	{"h", 7, S_IFBLK},
9390 #if defined(_SUNOS_VTOC_16)
9391 	{"i", 8, S_IFBLK},
9392 	{"j", 9, S_IFBLK},
9393 	{"k", 10, S_IFBLK},
9394 	{"l", 11, S_IFBLK},
9395 	{"m", 12, S_IFBLK},
9396 	{"n", 13, S_IFBLK},
9397 	{"o", 14, S_IFBLK},
9398 	{"p", 15, S_IFBLK},
9399 #endif			/* defined(_SUNOS_VTOC_16) */
9400 #if defined(_FIRMWARE_NEEDS_FDISK)
9401 	{"q", 16, S_IFBLK},
9402 	{"r", 17, S_IFBLK},
9403 	{"s", 18, S_IFBLK},
9404 	{"t", 19, S_IFBLK},
9405 	{"u", 20, S_IFBLK},
9406 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9407 	{"a,raw", 0, S_IFCHR},
9408 	{"b,raw", 1, S_IFCHR},
9409 	{"c,raw", 2, S_IFCHR},
9410 	{"d,raw", 3, S_IFCHR},
9411 	{"e,raw", 4, S_IFCHR},
9412 	{"f,raw", 5, S_IFCHR},
9413 	{"g,raw", 6, S_IFCHR},
9414 	{"h,raw", 7, S_IFCHR},
9415 #if defined(_SUNOS_VTOC_16)
9416 	{"i,raw", 8, S_IFCHR},
9417 	{"j,raw", 9, S_IFCHR},
9418 	{"k,raw", 10, S_IFCHR},
9419 	{"l,raw", 11, S_IFCHR},
9420 	{"m,raw", 12, S_IFCHR},
9421 	{"n,raw", 13, S_IFCHR},
9422 	{"o,raw", 14, S_IFCHR},
9423 	{"p,raw", 15, S_IFCHR},
9424 #endif			/* defined(_SUNOS_VTOC_16) */
9425 #if defined(_FIRMWARE_NEEDS_FDISK)
9426 	{"q,raw", 16, S_IFCHR},
9427 	{"r,raw", 17, S_IFCHR},
9428 	{"s,raw", 18, S_IFCHR},
9429 	{"t,raw", 19, S_IFCHR},
9430 	{"u,raw", 20, S_IFCHR},
9431 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9432 	{0}
9433 };
9434 
9435 static struct driver_minor_data sd_minor_data_efi[] = {
9436 	{"a", 0, S_IFBLK},
9437 	{"b", 1, S_IFBLK},
9438 	{"c", 2, S_IFBLK},
9439 	{"d", 3, S_IFBLK},
9440 	{"e", 4, S_IFBLK},
9441 	{"f", 5, S_IFBLK},
9442 	{"g", 6, S_IFBLK},
9443 	{"wd", 7, S_IFBLK},
9444 #if defined(_FIRMWARE_NEEDS_FDISK)
9445 	{"q", 16, S_IFBLK},
9446 	{"r", 17, S_IFBLK},
9447 	{"s", 18, S_IFBLK},
9448 	{"t", 19, S_IFBLK},
9449 	{"u", 20, S_IFBLK},
9450 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9451 	{"a,raw", 0, S_IFCHR},
9452 	{"b,raw", 1, S_IFCHR},
9453 	{"c,raw", 2, S_IFCHR},
9454 	{"d,raw", 3, S_IFCHR},
9455 	{"e,raw", 4, S_IFCHR},
9456 	{"f,raw", 5, S_IFCHR},
9457 	{"g,raw", 6, S_IFCHR},
9458 	{"wd,raw", 7, S_IFCHR},
9459 #if defined(_FIRMWARE_NEEDS_FDISK)
9460 	{"q,raw", 16, S_IFCHR},
9461 	{"r,raw", 17, S_IFCHR},
9462 	{"s,raw", 18, S_IFCHR},
9463 	{"t,raw", 19, S_IFCHR},
9464 	{"u,raw", 20, S_IFCHR},
9465 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9466 	{0}
9467 };
9468 
9469 
9470 /*
9471  *    Function: sd_create_minor_nodes
9472  *
9473  * Description: Create the minor device nodes for the instance.
9474  *
9475  *   Arguments: un - driver soft state (unit) structure
9476  *		devi - pointer to device info structure
9477  *
9478  * Return Code: DDI_SUCCESS
9479  *		DDI_FAILURE
9480  *
9481  *     Context: Kernel thread context
9482  */
9483 
9484 static int
9485 sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi)
9486 {
9487 	struct driver_minor_data	*dmdp;
9488 	struct scsi_device		*devp;
9489 	int				instance;
9490 	char				name[48];
9491 
9492 	ASSERT(un != NULL);
9493 	devp = ddi_get_driver_private(devi);
9494 	instance = ddi_get_instance(devp->sd_dev);
9495 
9496 	/*
9497 	 * Create all the minor nodes for this target.
9498 	 */
9499 	if (un->un_blockcount > DK_MAX_BLOCKS)
9500 		dmdp = sd_minor_data_efi;
9501 	else
9502 		dmdp = sd_minor_data;
9503 	while (dmdp->name != NULL) {
9504 
9505 		(void) sprintf(name, "%s", dmdp->name);
9506 
9507 		if (ddi_create_minor_node(devi, name, dmdp->type,
9508 		    (instance << SDUNIT_SHIFT) | dmdp->minor,
9509 		    un->un_node_type, NULL) == DDI_FAILURE) {
9510 			/*
9511 			 * Clean up any nodes that may have been created, in
9512 			 * case this fails in the middle of the loop.
9513 			 */
9514 			ddi_remove_minor_node(devi, NULL);
9515 			return (DDI_FAILURE);
9516 		}
9517 		dmdp++;
9518 	}
9519 
9520 	return (DDI_SUCCESS);
9521 }
9522 
9523 
9524 /*
9525  *    Function: sd_create_errstats
9526  *
9527  * Description: This routine instantiates the device error stats.
9528  *
9529  *		Note: During attach the stats are instantiated first so they are
9530  *		available for attach-time routines that utilize the driver
9531  *		iopath to send commands to the device. The stats are initialized
9532  *		separately so data obtained during some attach-time routines is
9533  *		available. (4362483)
9534  *
9535  *   Arguments: un - driver soft state (unit) structure
9536  *		instance - driver instance
9537  *
9538  *     Context: Kernel thread context
9539  */
9540 
9541 static void
9542 sd_create_errstats(struct sd_lun *un, int instance)
9543 {
9544 	struct	sd_errstats	*stp;
9545 	char	kstatmodule_err[KSTAT_STRLEN];
9546 	char	kstatname[KSTAT_STRLEN];
9547 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
9548 
9549 	ASSERT(un != NULL);
9550 
9551 	if (un->un_errstats != NULL) {
9552 		return;
9553 	}
9554 
9555 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
9556 	    "%serr", sd_label);
9557 	(void) snprintf(kstatname, sizeof (kstatname),
9558 	    "%s%d,err", sd_label, instance);
9559 
9560 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
9561 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
9562 
9563 	if (un->un_errstats == NULL) {
9564 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9565 		    "sd_create_errstats: Failed kstat_create\n");
9566 		return;
9567 	}
9568 
9569 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9570 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
9571 	    KSTAT_DATA_UINT32);
9572 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
9573 	    KSTAT_DATA_UINT32);
9574 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
9575 	    KSTAT_DATA_UINT32);
9576 	kstat_named_init(&stp->sd_vid,		"Vendor",
9577 	    KSTAT_DATA_CHAR);
9578 	kstat_named_init(&stp->sd_pid,		"Product",
9579 	    KSTAT_DATA_CHAR);
9580 	kstat_named_init(&stp->sd_revision,	"Revision",
9581 	    KSTAT_DATA_CHAR);
9582 	kstat_named_init(&stp->sd_serial,	"Serial No",
9583 	    KSTAT_DATA_CHAR);
9584 	kstat_named_init(&stp->sd_capacity,	"Size",
9585 	    KSTAT_DATA_ULONGLONG);
9586 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
9587 	    KSTAT_DATA_UINT32);
9588 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
9589 	    KSTAT_DATA_UINT32);
9590 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
9591 	    KSTAT_DATA_UINT32);
9592 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
9593 	    KSTAT_DATA_UINT32);
9594 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
9595 	    KSTAT_DATA_UINT32);
9596 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
9597 	    KSTAT_DATA_UINT32);
9598 
9599 	un->un_errstats->ks_private = un;
9600 	un->un_errstats->ks_update  = nulldev;
9601 
9602 	kstat_install(un->un_errstats);
9603 }
9604 
9605 
9606 /*
9607  *    Function: sd_set_errstats
9608  *
9609  * Description: This routine sets the value of the vendor id, product id,
9610  *		revision, serial number, and capacity device error stats.
9611  *
9612  *		Note: During attach the stats are instantiated first so they are
9613  *		available for attach-time routines that utilize the driver
9614  *		iopath to send commands to the device. The stats are initialized
9615  *		separately so data obtained during some attach-time routines is
9616  *		available. (4362483)
9617  *
9618  *   Arguments: un - driver soft state (unit) structure
9619  *
9620  *     Context: Kernel thread context
9621  */
9622 
9623 static void
9624 sd_set_errstats(struct sd_lun *un)
9625 {
9626 	struct	sd_errstats	*stp;
9627 
9628 	ASSERT(un != NULL);
9629 	ASSERT(un->un_errstats != NULL);
9630 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9631 	ASSERT(stp != NULL);
9632 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
9633 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
9634 	(void) strncpy(stp->sd_revision.value.c,
9635 	    un->un_sd->sd_inq->inq_revision, 4);
9636 
9637 	/*
9638 	 * All the errstats are persistent across detach/attach,
9639 	 * so reset all the errstats here in case of the hot
9640 	 * replacement of disk drives, except for not changed
9641 	 * Sun qualified drives.
9642 	 */
9643 	if ((bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) != 0) ||
9644 	    (bcmp(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9645 	    sizeof (SD_INQUIRY(un)->inq_serial)) != 0)) {
9646 		stp->sd_softerrs.value.ui32 = 0;
9647 		stp->sd_harderrs.value.ui32 = 0;
9648 		stp->sd_transerrs.value.ui32 = 0;
9649 		stp->sd_rq_media_err.value.ui32 = 0;
9650 		stp->sd_rq_ntrdy_err.value.ui32 = 0;
9651 		stp->sd_rq_nodev_err.value.ui32 = 0;
9652 		stp->sd_rq_recov_err.value.ui32 = 0;
9653 		stp->sd_rq_illrq_err.value.ui32 = 0;
9654 		stp->sd_rq_pfa_err.value.ui32 = 0;
9655 	}
9656 
9657 	/*
9658 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
9659 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
9660 	 * (4376302))
9661 	 */
9662 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
9663 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9664 		    sizeof (SD_INQUIRY(un)->inq_serial));
9665 	}
9666 
9667 	if (un->un_f_blockcount_is_valid != TRUE) {
9668 		/*
9669 		 * Set capacity error stat to 0 for no media. This ensures
9670 		 * a valid capacity is displayed in response to 'iostat -E'
9671 		 * when no media is present in the device.
9672 		 */
9673 		stp->sd_capacity.value.ui64 = 0;
9674 	} else {
9675 		/*
9676 		 * Multiply un_blockcount by un->un_sys_blocksize to get
9677 		 * capacity.
9678 		 *
9679 		 * Note: for non-512 blocksize devices "un_blockcount" has been
9680 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
9681 		 * (un_tgt_blocksize / un->un_sys_blocksize).
9682 		 */
9683 		stp->sd_capacity.value.ui64 = (uint64_t)
9684 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
9685 	}
9686 }
9687 
9688 
9689 /*
9690  *    Function: sd_set_pstats
9691  *
9692  * Description: This routine instantiates and initializes the partition
9693  *              stats for each partition with more than zero blocks.
9694  *		(4363169)
9695  *
9696  *   Arguments: un - driver soft state (unit) structure
9697  *
9698  *     Context: Kernel thread context
9699  */
9700 
9701 static void
9702 sd_set_pstats(struct sd_lun *un)
9703 {
9704 	char	kstatname[KSTAT_STRLEN];
9705 	int	instance;
9706 	int	i;
9707 
9708 	ASSERT(un != NULL);
9709 
9710 	instance = ddi_get_instance(SD_DEVINFO(un));
9711 
9712 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
9713 	for (i = 0; i < NSDMAP; i++) {
9714 		if ((un->un_pstats[i] == NULL) &&
9715 		    (un->un_map[i].dkl_nblk != 0)) {
9716 			(void) snprintf(kstatname, sizeof (kstatname),
9717 			    "%s%d,%s", sd_label, instance,
9718 			    sd_minor_data[i].name);
9719 			un->un_pstats[i] = kstat_create(sd_label,
9720 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
9721 			    1, KSTAT_FLAG_PERSISTENT);
9722 			if (un->un_pstats[i] != NULL) {
9723 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
9724 				kstat_install(un->un_pstats[i]);
9725 			}
9726 		}
9727 	}
9728 }
9729 
9730 
9731 #if (defined(__fibre))
9732 /*
9733  *    Function: sd_init_event_callbacks
9734  *
9735  * Description: This routine initializes the insertion and removal event
9736  *		callbacks. (fibre only)
9737  *
9738  *   Arguments: un - driver soft state (unit) structure
9739  *
9740  *     Context: Kernel thread context
9741  */
9742 
9743 static void
9744 sd_init_event_callbacks(struct sd_lun *un)
9745 {
9746 	ASSERT(un != NULL);
9747 
9748 	if ((un->un_insert_event == NULL) &&
9749 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
9750 	    &un->un_insert_event) == DDI_SUCCESS)) {
9751 		/*
9752 		 * Add the callback for an insertion event
9753 		 */
9754 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9755 		    un->un_insert_event, sd_event_callback, (void *)un,
9756 		    &(un->un_insert_cb_id));
9757 	}
9758 
9759 	if ((un->un_remove_event == NULL) &&
9760 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
9761 	    &un->un_remove_event) == DDI_SUCCESS)) {
9762 		/*
9763 		 * Add the callback for a removal event
9764 		 */
9765 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9766 		    un->un_remove_event, sd_event_callback, (void *)un,
9767 		    &(un->un_remove_cb_id));
9768 	}
9769 }
9770 
9771 
9772 /*
9773  *    Function: sd_event_callback
9774  *
9775  * Description: This routine handles insert/remove events (photon). The
9776  *		state is changed to OFFLINE which can be used to supress
9777  *		error msgs. (fibre only)
9778  *
9779  *   Arguments: un - driver soft state (unit) structure
9780  *
9781  *     Context: Callout thread context
9782  */
9783 /* ARGSUSED */
9784 static void
9785 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
9786     void *bus_impldata)
9787 {
9788 	struct sd_lun *un = (struct sd_lun *)arg;
9789 
9790 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
9791 	if (event == un->un_insert_event) {
9792 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
9793 		mutex_enter(SD_MUTEX(un));
9794 		if (un->un_state == SD_STATE_OFFLINE) {
9795 			if (un->un_last_state != SD_STATE_SUSPENDED) {
9796 				un->un_state = un->un_last_state;
9797 			} else {
9798 				/*
9799 				 * We have gone through SUSPEND/RESUME while
9800 				 * we were offline. Restore the last state
9801 				 */
9802 				un->un_state = un->un_save_state;
9803 			}
9804 		}
9805 		mutex_exit(SD_MUTEX(un));
9806 
9807 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
9808 	} else if (event == un->un_remove_event) {
9809 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
9810 		mutex_enter(SD_MUTEX(un));
9811 		/*
9812 		 * We need to handle an event callback that occurs during
9813 		 * the suspend operation, since we don't prevent it.
9814 		 */
9815 		if (un->un_state != SD_STATE_OFFLINE) {
9816 			if (un->un_state != SD_STATE_SUSPENDED) {
9817 				New_state(un, SD_STATE_OFFLINE);
9818 			} else {
9819 				un->un_last_state = SD_STATE_OFFLINE;
9820 			}
9821 		}
9822 		mutex_exit(SD_MUTEX(un));
9823 	} else {
9824 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
9825 		    "!Unknown event\n");
9826 	}
9827 
9828 }
9829 #endif
9830 
9831 /*
9832  *    Function: sd_cache_control()
9833  *
9834  * Description: This routine is the driver entry point for setting
9835  *		read and write caching by modifying the WCE (write cache
9836  *		enable) and RCD (read cache disable) bits of mode
9837  *		page 8 (MODEPAGE_CACHING).
9838  *
9839  *   Arguments: un - driver soft state (unit) structure
9840  *		rcd_flag - flag for controlling the read cache
9841  *		wce_flag - flag for controlling the write cache
9842  *
9843  * Return Code: EIO
9844  *		code returned by sd_send_scsi_MODE_SENSE and
9845  *		sd_send_scsi_MODE_SELECT
9846  *
9847  *     Context: Kernel Thread
9848  */
9849 
9850 static int
9851 sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag)
9852 {
9853 	struct mode_caching	*mode_caching_page;
9854 	uchar_t			*header;
9855 	size_t			buflen;
9856 	int			hdrlen;
9857 	int			bd_len;
9858 	int			rval = 0;
9859 	struct mode_header_grp2	*mhp;
9860 
9861 	ASSERT(un != NULL);
9862 
9863 	/*
9864 	 * Do a test unit ready, otherwise a mode sense may not work if this
9865 	 * is the first command sent to the device after boot.
9866 	 */
9867 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9868 
9869 	if (un->un_f_cfg_is_atapi == TRUE) {
9870 		hdrlen = MODE_HEADER_LENGTH_GRP2;
9871 	} else {
9872 		hdrlen = MODE_HEADER_LENGTH;
9873 	}
9874 
9875 	/*
9876 	 * Allocate memory for the retrieved mode page and its headers.  Set
9877 	 * a pointer to the page itself.  Use mode_cache_scsi3 to insure
9878 	 * we get all of the mode sense data otherwise, the mode select
9879 	 * will fail.  mode_cache_scsi3 is a superset of mode_caching.
9880 	 */
9881 	buflen = hdrlen + MODE_BLK_DESC_LENGTH +
9882 		sizeof (struct mode_cache_scsi3);
9883 
9884 	header = kmem_zalloc(buflen, KM_SLEEP);
9885 
9886 	/* Get the information from the device. */
9887 	if (un->un_f_cfg_is_atapi == TRUE) {
9888 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
9889 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9890 	} else {
9891 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
9892 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9893 	}
9894 	if (rval != 0) {
9895 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9896 		    "sd_cache_control: Mode Sense Failed\n");
9897 		kmem_free(header, buflen);
9898 		return (rval);
9899 	}
9900 
9901 	/*
9902 	 * Determine size of Block Descriptors in order to locate
9903 	 * the mode page data. ATAPI devices return 0, SCSI devices
9904 	 * should return MODE_BLK_DESC_LENGTH.
9905 	 */
9906 	if (un->un_f_cfg_is_atapi == TRUE) {
9907 		mhp	= (struct mode_header_grp2 *)header;
9908 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9909 	} else {
9910 		bd_len  = ((struct mode_header *)header)->bdesc_length;
9911 	}
9912 
9913 	if (bd_len > MODE_BLK_DESC_LENGTH) {
9914 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9915 		    "sd_cache_control: Mode Sense returned invalid "
9916 		    "block descriptor length\n");
9917 		kmem_free(header, buflen);
9918 		return (EIO);
9919 	}
9920 
9921 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9922 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
9923 		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
9924 		    " caching page code mismatch %d\n",
9925 		    mode_caching_page->mode_page.code);
9926 		kmem_free(header, buflen);
9927 		return (EIO);
9928 	}
9929 
9930 	/* Check the relevant bits on successful mode sense. */
9931 	if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
9932 	    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
9933 	    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
9934 	    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
9935 
9936 		size_t sbuflen;
9937 		uchar_t save_pg;
9938 
9939 		/*
9940 		 * Construct select buffer length based on the
9941 		 * length of the sense data returned.
9942 		 */
9943 		sbuflen =  hdrlen + MODE_BLK_DESC_LENGTH +
9944 				sizeof (struct mode_page) +
9945 				(int)mode_caching_page->mode_page.length;
9946 
9947 		/*
9948 		 * Set the caching bits as requested.
9949 		 */
9950 		if (rcd_flag == SD_CACHE_ENABLE)
9951 			mode_caching_page->rcd = 0;
9952 		else if (rcd_flag == SD_CACHE_DISABLE)
9953 			mode_caching_page->rcd = 1;
9954 
9955 		if (wce_flag == SD_CACHE_ENABLE)
9956 			mode_caching_page->wce = 1;
9957 		else if (wce_flag == SD_CACHE_DISABLE)
9958 			mode_caching_page->wce = 0;
9959 
9960 		/*
9961 		 * Save the page if the mode sense says the
9962 		 * drive supports it.
9963 		 */
9964 		save_pg = mode_caching_page->mode_page.ps ?
9965 				SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
9966 
9967 		/* Clear reserved bits before mode select. */
9968 		mode_caching_page->mode_page.ps = 0;
9969 
9970 		/*
9971 		 * Clear out mode header for mode select.
9972 		 * The rest of the retrieved page will be reused.
9973 		 */
9974 		bzero(header, hdrlen);
9975 
9976 		if (un->un_f_cfg_is_atapi == TRUE) {
9977 			mhp = (struct mode_header_grp2 *)header;
9978 			mhp->bdesc_length_hi = bd_len >> 8;
9979 			mhp->bdesc_length_lo = (uchar_t)bd_len & 0xff;
9980 		} else {
9981 			((struct mode_header *)header)->bdesc_length = bd_len;
9982 		}
9983 
9984 		/* Issue mode select to change the cache settings */
9985 		if (un->un_f_cfg_is_atapi == TRUE) {
9986 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, header,
9987 			    sbuflen, save_pg, SD_PATH_DIRECT);
9988 		} else {
9989 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
9990 			    sbuflen, save_pg, SD_PATH_DIRECT);
9991 		}
9992 	}
9993 
9994 	kmem_free(header, buflen);
9995 	return (rval);
9996 }
9997 
9998 
9999 /*
10000  *    Function: sd_get_write_cache_enabled()
10001  *
10002  * Description: This routine is the driver entry point for determining if
10003  *		write caching is enabled.  It examines the WCE (write cache
10004  *		enable) bits of mode page 8 (MODEPAGE_CACHING).
10005  *
10006  *   Arguments: un - driver soft state (unit) structure
10007  *   		is_enabled - pointer to int where write cache enabled state
10008  *   			is returned (non-zero -> write cache enabled)
10009  *
10010  *
10011  * Return Code: EIO
10012  *		code returned by sd_send_scsi_MODE_SENSE
10013  *
10014  *     Context: Kernel Thread
10015  *
10016  * NOTE: If ioctl is added to disable write cache, this sequence should
10017  * be followed so that no locking is required for accesses to
10018  * un->un_f_write_cache_enabled:
10019  * 	do mode select to clear wce
10020  * 	do synchronize cache to flush cache
10021  * 	set un->un_f_write_cache_enabled = FALSE
10022  *
10023  * Conversely, an ioctl to enable the write cache should be done
10024  * in this order:
10025  * 	set un->un_f_write_cache_enabled = TRUE
10026  * 	do mode select to set wce
10027  */
10028 
10029 static int
10030 sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled)
10031 {
10032 	struct mode_caching	*mode_caching_page;
10033 	uchar_t			*header;
10034 	size_t			buflen;
10035 	int			hdrlen;
10036 	int			bd_len;
10037 	int			rval = 0;
10038 
10039 	ASSERT(un != NULL);
10040 	ASSERT(is_enabled != NULL);
10041 
10042 	/* in case of error, flag as enabled */
10043 	*is_enabled = TRUE;
10044 
10045 	/*
10046 	 * Do a test unit ready, otherwise a mode sense may not work if this
10047 	 * is the first command sent to the device after boot.
10048 	 */
10049 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
10050 
10051 	if (un->un_f_cfg_is_atapi == TRUE) {
10052 		hdrlen = MODE_HEADER_LENGTH_GRP2;
10053 	} else {
10054 		hdrlen = MODE_HEADER_LENGTH;
10055 	}
10056 
10057 	/*
10058 	 * Allocate memory for the retrieved mode page and its headers.  Set
10059 	 * a pointer to the page itself.
10060 	 */
10061 	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
10062 	header = kmem_zalloc(buflen, KM_SLEEP);
10063 
10064 	/* Get the information from the device. */
10065 	if (un->un_f_cfg_is_atapi == TRUE) {
10066 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
10067 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
10068 	} else {
10069 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
10070 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
10071 	}
10072 	if (rval != 0) {
10073 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
10074 		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
10075 		kmem_free(header, buflen);
10076 		return (rval);
10077 	}
10078 
10079 	/*
10080 	 * Determine size of Block Descriptors in order to locate
10081 	 * the mode page data. ATAPI devices return 0, SCSI devices
10082 	 * should return MODE_BLK_DESC_LENGTH.
10083 	 */
10084 	if (un->un_f_cfg_is_atapi == TRUE) {
10085 		struct mode_header_grp2	*mhp;
10086 		mhp	= (struct mode_header_grp2 *)header;
10087 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
10088 	} else {
10089 		bd_len  = ((struct mode_header *)header)->bdesc_length;
10090 	}
10091 
10092 	if (bd_len > MODE_BLK_DESC_LENGTH) {
10093 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10094 		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
10095 		    "block descriptor length\n");
10096 		kmem_free(header, buflen);
10097 		return (EIO);
10098 	}
10099 
10100 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
10101 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
10102 		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
10103 		    " caching page code mismatch %d\n",
10104 		    mode_caching_page->mode_page.code);
10105 		kmem_free(header, buflen);
10106 		return (EIO);
10107 	}
10108 	*is_enabled = mode_caching_page->wce;
10109 
10110 	kmem_free(header, buflen);
10111 	return (0);
10112 }
10113 
10114 
10115 /*
10116  *    Function: sd_make_device
10117  *
10118  * Description: Utility routine to return the Solaris device number from
10119  *		the data in the device's dev_info structure.
10120  *
10121  * Return Code: The Solaris device number
10122  *
10123  *     Context: Any
10124  */
10125 
10126 static dev_t
10127 sd_make_device(dev_info_t *devi)
10128 {
10129 	return (makedevice(ddi_name_to_major(ddi_get_name(devi)),
10130 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
10131 }
10132 
10133 
10134 /*
10135  *    Function: sd_pm_entry
10136  *
10137  * Description: Called at the start of a new command to manage power
10138  *		and busy status of a device. This includes determining whether
10139  *		the current power state of the device is sufficient for
10140  *		performing the command or whether it must be changed.
10141  *		The PM framework is notified appropriately.
10142  *		Only with a return status of DDI_SUCCESS will the
10143  *		component be busy to the framework.
10144  *
10145  *		All callers of sd_pm_entry must check the return status
10146  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
10147  *		of DDI_FAILURE indicates the device failed to power up.
10148  *		In this case un_pm_count has been adjusted so the result
10149  *		on exit is still powered down, ie. count is less than 0.
10150  *		Calling sd_pm_exit with this count value hits an ASSERT.
10151  *
10152  * Return Code: DDI_SUCCESS or DDI_FAILURE
10153  *
10154  *     Context: Kernel thread context.
10155  */
10156 
10157 static int
10158 sd_pm_entry(struct sd_lun *un)
10159 {
10160 	int return_status = DDI_SUCCESS;
10161 
10162 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10163 	ASSERT(!mutex_owned(&un->un_pm_mutex));
10164 
10165 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
10166 
10167 	if (un->un_f_pm_is_enabled == FALSE) {
10168 		SD_TRACE(SD_LOG_IO_PM, un,
10169 		    "sd_pm_entry: exiting, PM not enabled\n");
10170 		return (return_status);
10171 	}
10172 
10173 	/*
10174 	 * Just increment a counter if PM is enabled. On the transition from
10175 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
10176 	 * the count with each IO and mark the device as idle when the count
10177 	 * hits 0.
10178 	 *
10179 	 * If the count is less than 0 the device is powered down. If a powered
10180 	 * down device is successfully powered up then the count must be
10181 	 * incremented to reflect the power up. Note that it'll get incremented
10182 	 * a second time to become busy.
10183 	 *
10184 	 * Because the following has the potential to change the device state
10185 	 * and must release the un_pm_mutex to do so, only one thread can be
10186 	 * allowed through at a time.
10187 	 */
10188 
10189 	mutex_enter(&un->un_pm_mutex);
10190 	while (un->un_pm_busy == TRUE) {
10191 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
10192 	}
10193 	un->un_pm_busy = TRUE;
10194 
10195 	if (un->un_pm_count < 1) {
10196 
10197 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
10198 
10199 		/*
10200 		 * Indicate we are now busy so the framework won't attempt to
10201 		 * power down the device. This call will only fail if either
10202 		 * we passed a bad component number or the device has no
10203 		 * components. Neither of these should ever happen.
10204 		 */
10205 		mutex_exit(&un->un_pm_mutex);
10206 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
10207 		ASSERT(return_status == DDI_SUCCESS);
10208 
10209 		mutex_enter(&un->un_pm_mutex);
10210 
10211 		if (un->un_pm_count < 0) {
10212 			mutex_exit(&un->un_pm_mutex);
10213 
10214 			SD_TRACE(SD_LOG_IO_PM, un,
10215 			    "sd_pm_entry: power up component\n");
10216 
10217 			/*
10218 			 * pm_raise_power will cause sdpower to be called
10219 			 * which brings the device power level to the
10220 			 * desired state, ON in this case. If successful,
10221 			 * un_pm_count and un_power_level will be updated
10222 			 * appropriately.
10223 			 */
10224 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
10225 			    SD_SPINDLE_ON);
10226 
10227 			mutex_enter(&un->un_pm_mutex);
10228 
10229 			if (return_status != DDI_SUCCESS) {
10230 				/*
10231 				 * Power up failed.
10232 				 * Idle the device and adjust the count
10233 				 * so the result on exit is that we're
10234 				 * still powered down, ie. count is less than 0.
10235 				 */
10236 				SD_TRACE(SD_LOG_IO_PM, un,
10237 				    "sd_pm_entry: power up failed,"
10238 				    " idle the component\n");
10239 
10240 				(void) pm_idle_component(SD_DEVINFO(un), 0);
10241 				un->un_pm_count--;
10242 			} else {
10243 				/*
10244 				 * Device is powered up, verify the
10245 				 * count is non-negative.
10246 				 * This is debug only.
10247 				 */
10248 				ASSERT(un->un_pm_count == 0);
10249 			}
10250 		}
10251 
10252 		if (return_status == DDI_SUCCESS) {
10253 			/*
10254 			 * For performance, now that the device has been tagged
10255 			 * as busy, and it's known to be powered up, update the
10256 			 * chain types to use jump tables that do not include
10257 			 * pm. This significantly lowers the overhead and
10258 			 * therefore improves performance.
10259 			 */
10260 
10261 			mutex_exit(&un->un_pm_mutex);
10262 			mutex_enter(SD_MUTEX(un));
10263 			SD_TRACE(SD_LOG_IO_PM, un,
10264 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
10265 			    un->un_uscsi_chain_type);
10266 
10267 			if (un->un_f_non_devbsize_supported) {
10268 				un->un_buf_chain_type =
10269 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
10270 			} else {
10271 				un->un_buf_chain_type =
10272 				    SD_CHAIN_INFO_DISK_NO_PM;
10273 			}
10274 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
10275 
10276 			SD_TRACE(SD_LOG_IO_PM, un,
10277 			    "             changed  uscsi_chain_type to   %d\n",
10278 			    un->un_uscsi_chain_type);
10279 			mutex_exit(SD_MUTEX(un));
10280 			mutex_enter(&un->un_pm_mutex);
10281 
10282 			if (un->un_pm_idle_timeid == NULL) {
10283 				/* 300 ms. */
10284 				un->un_pm_idle_timeid =
10285 				    timeout(sd_pm_idletimeout_handler, un,
10286 				    (drv_usectohz((clock_t)300000)));
10287 				/*
10288 				 * Include an extra call to busy which keeps the
10289 				 * device busy with-respect-to the PM layer
10290 				 * until the timer fires, at which time it'll
10291 				 * get the extra idle call.
10292 				 */
10293 				(void) pm_busy_component(SD_DEVINFO(un), 0);
10294 			}
10295 		}
10296 	}
10297 	un->un_pm_busy = FALSE;
10298 	/* Next... */
10299 	cv_signal(&un->un_pm_busy_cv);
10300 
10301 	un->un_pm_count++;
10302 
10303 	SD_TRACE(SD_LOG_IO_PM, un,
10304 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
10305 
10306 	mutex_exit(&un->un_pm_mutex);
10307 
10308 	return (return_status);
10309 }
10310 
10311 
10312 /*
10313  *    Function: sd_pm_exit
10314  *
10315  * Description: Called at the completion of a command to manage busy
10316  *		status for the device. If the device becomes idle the
10317  *		PM framework is notified.
10318  *
10319  *     Context: Kernel thread context
10320  */
10321 
10322 static void
10323 sd_pm_exit(struct sd_lun *un)
10324 {
10325 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10326 	ASSERT(!mutex_owned(&un->un_pm_mutex));
10327 
10328 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
10329 
10330 	/*
10331 	 * After attach the following flag is only read, so don't
10332 	 * take the penalty of acquiring a mutex for it.
10333 	 */
10334 	if (un->un_f_pm_is_enabled == TRUE) {
10335 
10336 		mutex_enter(&un->un_pm_mutex);
10337 		un->un_pm_count--;
10338 
10339 		SD_TRACE(SD_LOG_IO_PM, un,
10340 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
10341 
10342 		ASSERT(un->un_pm_count >= 0);
10343 		if (un->un_pm_count == 0) {
10344 			mutex_exit(&un->un_pm_mutex);
10345 
10346 			SD_TRACE(SD_LOG_IO_PM, un,
10347 			    "sd_pm_exit: idle component\n");
10348 
10349 			(void) pm_idle_component(SD_DEVINFO(un), 0);
10350 
10351 		} else {
10352 			mutex_exit(&un->un_pm_mutex);
10353 		}
10354 	}
10355 
10356 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
10357 }
10358 
10359 
10360 /*
10361  *    Function: sdopen
10362  *
10363  * Description: Driver's open(9e) entry point function.
10364  *
10365  *   Arguments: dev_i   - pointer to device number
10366  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
10367  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10368  *		cred_p  - user credential pointer
10369  *
10370  * Return Code: EINVAL
10371  *		ENXIO
10372  *		EIO
10373  *		EROFS
10374  *		EBUSY
10375  *
10376  *     Context: Kernel thread context
10377  */
10378 /* ARGSUSED */
10379 static int
10380 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
10381 {
10382 	struct sd_lun	*un;
10383 	int		nodelay;
10384 	int		part;
10385 	uint64_t	partmask;
10386 	int		instance;
10387 	dev_t		dev;
10388 	int		rval = EIO;
10389 
10390 	/* Validate the open type */
10391 	if (otyp >= OTYPCNT) {
10392 		return (EINVAL);
10393 	}
10394 
10395 	dev = *dev_p;
10396 	instance = SDUNIT(dev);
10397 	mutex_enter(&sd_detach_mutex);
10398 
10399 	/*
10400 	 * Fail the open if there is no softstate for the instance, or
10401 	 * if another thread somewhere is trying to detach the instance.
10402 	 */
10403 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
10404 	    (un->un_detach_count != 0)) {
10405 		mutex_exit(&sd_detach_mutex);
10406 		/*
10407 		 * The probe cache only needs to be cleared when open (9e) fails
10408 		 * with ENXIO (4238046).
10409 		 */
10410 		/*
10411 		 * un-conditionally clearing probe cache is ok with
10412 		 * separate sd/ssd binaries
10413 		 * x86 platform can be an issue with both parallel
10414 		 * and fibre in 1 binary
10415 		 */
10416 		sd_scsi_clear_probe_cache();
10417 		return (ENXIO);
10418 	}
10419 
10420 	/*
10421 	 * The un_layer_count is to prevent another thread in specfs from
10422 	 * trying to detach the instance, which can happen when we are
10423 	 * called from a higher-layer driver instead of thru specfs.
10424 	 * This will not be needed when DDI provides a layered driver
10425 	 * interface that allows specfs to know that an instance is in
10426 	 * use by a layered driver & should not be detached.
10427 	 *
10428 	 * Note: the semantics for layered driver opens are exactly one
10429 	 * close for every open.
10430 	 */
10431 	if (otyp == OTYP_LYR) {
10432 		un->un_layer_count++;
10433 	}
10434 
10435 	/*
10436 	 * Keep a count of the current # of opens in progress. This is because
10437 	 * some layered drivers try to call us as a regular open. This can
10438 	 * cause problems that we cannot prevent, however by keeping this count
10439 	 * we can at least keep our open and detach routines from racing against
10440 	 * each other under such conditions.
10441 	 */
10442 	un->un_opens_in_progress++;
10443 	mutex_exit(&sd_detach_mutex);
10444 
10445 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
10446 	part	 = SDPART(dev);
10447 	partmask = 1 << part;
10448 
10449 	/*
10450 	 * We use a semaphore here in order to serialize
10451 	 * open and close requests on the device.
10452 	 */
10453 	sema_p(&un->un_semoclose);
10454 
10455 	mutex_enter(SD_MUTEX(un));
10456 
10457 	/*
10458 	 * All device accesses go thru sdstrategy() where we check
10459 	 * on suspend status but there could be a scsi_poll command,
10460 	 * which bypasses sdstrategy(), so we need to check pm
10461 	 * status.
10462 	 */
10463 
10464 	if (!nodelay) {
10465 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10466 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10467 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10468 		}
10469 
10470 		mutex_exit(SD_MUTEX(un));
10471 		if (sd_pm_entry(un) != DDI_SUCCESS) {
10472 			rval = EIO;
10473 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
10474 			    "sdopen: sd_pm_entry failed\n");
10475 			goto open_failed_with_pm;
10476 		}
10477 		mutex_enter(SD_MUTEX(un));
10478 	}
10479 
10480 	/* check for previous exclusive open */
10481 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
10482 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10483 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
10484 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
10485 
10486 	if (un->un_exclopen & (partmask)) {
10487 		goto excl_open_fail;
10488 	}
10489 
10490 	if (flag & FEXCL) {
10491 		int i;
10492 		if (un->un_ocmap.lyropen[part]) {
10493 			goto excl_open_fail;
10494 		}
10495 		for (i = 0; i < (OTYPCNT - 1); i++) {
10496 			if (un->un_ocmap.regopen[i] & (partmask)) {
10497 				goto excl_open_fail;
10498 			}
10499 		}
10500 	}
10501 
10502 	/*
10503 	 * Check the write permission if this is a removable media device,
10504 	 * NDELAY has not been set, and writable permission is requested.
10505 	 *
10506 	 * Note: If NDELAY was set and this is write-protected media the WRITE
10507 	 * attempt will fail with EIO as part of the I/O processing. This is a
10508 	 * more permissive implementation that allows the open to succeed and
10509 	 * WRITE attempts to fail when appropriate.
10510 	 */
10511 	if (un->un_f_chk_wp_open) {
10512 		if ((flag & FWRITE) && (!nodelay)) {
10513 			mutex_exit(SD_MUTEX(un));
10514 			/*
10515 			 * Defer the check for write permission on writable
10516 			 * DVD drive till sdstrategy and will not fail open even
10517 			 * if FWRITE is set as the device can be writable
10518 			 * depending upon the media and the media can change
10519 			 * after the call to open().
10520 			 */
10521 			if (un->un_f_dvdram_writable_device == FALSE) {
10522 				if (ISCD(un) || sr_check_wp(dev)) {
10523 				rval = EROFS;
10524 				mutex_enter(SD_MUTEX(un));
10525 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10526 				    "write to cd or write protected media\n");
10527 				goto open_fail;
10528 				}
10529 			}
10530 			mutex_enter(SD_MUTEX(un));
10531 		}
10532 	}
10533 
10534 	/*
10535 	 * If opening in NDELAY/NONBLOCK mode, just return.
10536 	 * Check if disk is ready and has a valid geometry later.
10537 	 */
10538 	if (!nodelay) {
10539 		mutex_exit(SD_MUTEX(un));
10540 		rval = sd_ready_and_valid(un);
10541 		mutex_enter(SD_MUTEX(un));
10542 		/*
10543 		 * Fail if device is not ready or if the number of disk
10544 		 * blocks is zero or negative for non CD devices.
10545 		 */
10546 		if ((rval != SD_READY_VALID) ||
10547 		    (!ISCD(un) && un->un_map[part].dkl_nblk <= 0)) {
10548 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
10549 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10550 			    "device not ready or invalid disk block value\n");
10551 			goto open_fail;
10552 		}
10553 #if defined(__i386) || defined(__amd64)
10554 	} else {
10555 		uchar_t *cp;
10556 		/*
10557 		 * x86 requires special nodelay handling, so that p0 is
10558 		 * always defined and accessible.
10559 		 * Invalidate geometry only if device is not already open.
10560 		 */
10561 		cp = &un->un_ocmap.chkd[0];
10562 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10563 			if (*cp != (uchar_t)0) {
10564 			    break;
10565 			}
10566 			cp++;
10567 		}
10568 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10569 			un->un_f_geometry_is_valid = FALSE;
10570 		}
10571 
10572 #endif
10573 	}
10574 
10575 	if (otyp == OTYP_LYR) {
10576 		un->un_ocmap.lyropen[part]++;
10577 	} else {
10578 		un->un_ocmap.regopen[otyp] |= partmask;
10579 	}
10580 
10581 	/* Set up open and exclusive open flags */
10582 	if (flag & FEXCL) {
10583 		un->un_exclopen |= (partmask);
10584 	}
10585 
10586 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10587 	    "open of part %d type %d\n", part, otyp);
10588 
10589 	mutex_exit(SD_MUTEX(un));
10590 	if (!nodelay) {
10591 		sd_pm_exit(un);
10592 	}
10593 
10594 	sema_v(&un->un_semoclose);
10595 
10596 	mutex_enter(&sd_detach_mutex);
10597 	un->un_opens_in_progress--;
10598 	mutex_exit(&sd_detach_mutex);
10599 
10600 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
10601 	return (DDI_SUCCESS);
10602 
10603 excl_open_fail:
10604 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
10605 	rval = EBUSY;
10606 
10607 open_fail:
10608 	mutex_exit(SD_MUTEX(un));
10609 
10610 	/*
10611 	 * On a failed open we must exit the pm management.
10612 	 */
10613 	if (!nodelay) {
10614 		sd_pm_exit(un);
10615 	}
10616 open_failed_with_pm:
10617 	sema_v(&un->un_semoclose);
10618 
10619 	mutex_enter(&sd_detach_mutex);
10620 	un->un_opens_in_progress--;
10621 	if (otyp == OTYP_LYR) {
10622 		un->un_layer_count--;
10623 	}
10624 	mutex_exit(&sd_detach_mutex);
10625 
10626 	return (rval);
10627 }
10628 
10629 
10630 /*
10631  *    Function: sdclose
10632  *
10633  * Description: Driver's close(9e) entry point function.
10634  *
10635  *   Arguments: dev    - device number
10636  *		flag   - file status flag, informational only
10637  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10638  *		cred_p - user credential pointer
10639  *
10640  * Return Code: ENXIO
10641  *
10642  *     Context: Kernel thread context
10643  */
10644 /* ARGSUSED */
10645 static int
10646 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
10647 {
10648 	struct sd_lun	*un;
10649 	uchar_t		*cp;
10650 	int		part;
10651 	int		nodelay;
10652 	int		rval = 0;
10653 
10654 	/* Validate the open type */
10655 	if (otyp >= OTYPCNT) {
10656 		return (ENXIO);
10657 	}
10658 
10659 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10660 		return (ENXIO);
10661 	}
10662 
10663 	part = SDPART(dev);
10664 	nodelay = flag & (FNDELAY | FNONBLOCK);
10665 
10666 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10667 	    "sdclose: close of part %d type %d\n", part, otyp);
10668 
10669 	/*
10670 	 * We use a semaphore here in order to serialize
10671 	 * open and close requests on the device.
10672 	 */
10673 	sema_p(&un->un_semoclose);
10674 
10675 	mutex_enter(SD_MUTEX(un));
10676 
10677 	/* Don't proceed if power is being changed. */
10678 	while (un->un_state == SD_STATE_PM_CHANGING) {
10679 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10680 	}
10681 
10682 	if (un->un_exclopen & (1 << part)) {
10683 		un->un_exclopen &= ~(1 << part);
10684 	}
10685 
10686 	/* Update the open partition map */
10687 	if (otyp == OTYP_LYR) {
10688 		un->un_ocmap.lyropen[part] -= 1;
10689 	} else {
10690 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
10691 	}
10692 
10693 	cp = &un->un_ocmap.chkd[0];
10694 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10695 		if (*cp != NULL) {
10696 			break;
10697 		}
10698 		cp++;
10699 	}
10700 
10701 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10702 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
10703 
10704 		/*
10705 		 * We avoid persistance upon the last close, and set
10706 		 * the throttle back to the maximum.
10707 		 */
10708 		un->un_throttle = un->un_saved_throttle;
10709 
10710 		if (un->un_state == SD_STATE_OFFLINE) {
10711 			if (un->un_f_is_fibre == FALSE) {
10712 				scsi_log(SD_DEVINFO(un), sd_label,
10713 					CE_WARN, "offline\n");
10714 			}
10715 			un->un_f_geometry_is_valid = FALSE;
10716 
10717 		} else {
10718 			/*
10719 			 * Flush any outstanding writes in NVRAM cache.
10720 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
10721 			 * cmd, it may not work for non-Pluto devices.
10722 			 * SYNCHRONIZE CACHE is not required for removables,
10723 			 * except DVD-RAM drives.
10724 			 *
10725 			 * Also note: because SYNCHRONIZE CACHE is currently
10726 			 * the only command issued here that requires the
10727 			 * drive be powered up, only do the power up before
10728 			 * sending the Sync Cache command. If additional
10729 			 * commands are added which require a powered up
10730 			 * drive, the following sequence may have to change.
10731 			 *
10732 			 * And finally, note that parallel SCSI on SPARC
10733 			 * only issues a Sync Cache to DVD-RAM, a newly
10734 			 * supported device.
10735 			 */
10736 #if defined(__i386) || defined(__amd64)
10737 			if (un->un_f_sync_cache_supported ||
10738 			    un->un_f_dvdram_writable_device == TRUE) {
10739 #else
10740 			if (un->un_f_dvdram_writable_device == TRUE) {
10741 #endif
10742 				mutex_exit(SD_MUTEX(un));
10743 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10744 					rval =
10745 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
10746 					    NULL);
10747 					/* ignore error if not supported */
10748 					if (rval == ENOTSUP) {
10749 						rval = 0;
10750 					} else if (rval != 0) {
10751 						rval = EIO;
10752 					}
10753 					sd_pm_exit(un);
10754 				} else {
10755 					rval = EIO;
10756 				}
10757 				mutex_enter(SD_MUTEX(un));
10758 			}
10759 
10760 			/*
10761 			 * For devices which supports DOOR_LOCK, send an ALLOW
10762 			 * MEDIA REMOVAL command, but don't get upset if it
10763 			 * fails. We need to raise the power of the drive before
10764 			 * we can call sd_send_scsi_DOORLOCK()
10765 			 */
10766 			if (un->un_f_doorlock_supported) {
10767 				mutex_exit(SD_MUTEX(un));
10768 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10769 					rval = sd_send_scsi_DOORLOCK(un,
10770 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
10771 
10772 					sd_pm_exit(un);
10773 					if (ISCD(un) && (rval != 0) &&
10774 					    (nodelay != 0)) {
10775 						rval = ENXIO;
10776 					}
10777 				} else {
10778 					rval = EIO;
10779 				}
10780 				mutex_enter(SD_MUTEX(un));
10781 			}
10782 
10783 			/*
10784 			 * If a device has removable media, invalidate all
10785 			 * parameters related to media, such as geometry,
10786 			 * blocksize, and blockcount.
10787 			 */
10788 			if (un->un_f_has_removable_media) {
10789 				sr_ejected(un);
10790 			}
10791 
10792 			/*
10793 			 * Destroy the cache (if it exists) which was
10794 			 * allocated for the write maps since this is
10795 			 * the last close for this media.
10796 			 */
10797 			if (un->un_wm_cache) {
10798 				/*
10799 				 * Check if there are pending commands.
10800 				 * and if there are give a warning and
10801 				 * do not destroy the cache.
10802 				 */
10803 				if (un->un_ncmds_in_driver > 0) {
10804 					scsi_log(SD_DEVINFO(un),
10805 					    sd_label, CE_WARN,
10806 					    "Unable to clean up memory "
10807 					    "because of pending I/O\n");
10808 				} else {
10809 					kmem_cache_destroy(
10810 					    un->un_wm_cache);
10811 					un->un_wm_cache = NULL;
10812 				}
10813 			}
10814 		}
10815 	}
10816 
10817 	mutex_exit(SD_MUTEX(un));
10818 	sema_v(&un->un_semoclose);
10819 
10820 	if (otyp == OTYP_LYR) {
10821 		mutex_enter(&sd_detach_mutex);
10822 		/*
10823 		 * The detach routine may run when the layer count
10824 		 * drops to zero.
10825 		 */
10826 		un->un_layer_count--;
10827 		mutex_exit(&sd_detach_mutex);
10828 	}
10829 
10830 	return (rval);
10831 }
10832 
10833 
10834 /*
10835  *    Function: sd_ready_and_valid
10836  *
10837  * Description: Test if device is ready and has a valid geometry.
10838  *
10839  *   Arguments: dev - device number
10840  *		un  - driver soft state (unit) structure
10841  *
10842  * Return Code: SD_READY_VALID		ready and valid label
10843  *		SD_READY_NOT_VALID	ready, geom ops never applicable
10844  *		SD_NOT_READY_VALID	not ready, no label
10845  *
10846  *     Context: Never called at interrupt context.
10847  */
10848 
10849 static int
10850 sd_ready_and_valid(struct sd_lun *un)
10851 {
10852 	struct sd_errstats	*stp;
10853 	uint64_t		capacity;
10854 	uint_t			lbasize;
10855 	int			rval = SD_READY_VALID;
10856 	char			name_str[48];
10857 
10858 	ASSERT(un != NULL);
10859 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10860 
10861 	mutex_enter(SD_MUTEX(un));
10862 	/*
10863 	 * If a device has removable media, we must check if media is
10864 	 * ready when checking if this device is ready and valid.
10865 	 */
10866 	if (un->un_f_has_removable_media) {
10867 		mutex_exit(SD_MUTEX(un));
10868 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
10869 			rval = SD_NOT_READY_VALID;
10870 			mutex_enter(SD_MUTEX(un));
10871 			goto done;
10872 		}
10873 
10874 		mutex_enter(SD_MUTEX(un));
10875 		if ((un->un_f_geometry_is_valid == FALSE) ||
10876 		    (un->un_f_blockcount_is_valid == FALSE) ||
10877 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
10878 
10879 			/* capacity has to be read every open. */
10880 			mutex_exit(SD_MUTEX(un));
10881 			if (sd_send_scsi_READ_CAPACITY(un, &capacity,
10882 			    &lbasize, SD_PATH_DIRECT) != 0) {
10883 				mutex_enter(SD_MUTEX(un));
10884 				un->un_f_geometry_is_valid = FALSE;
10885 				rval = SD_NOT_READY_VALID;
10886 				goto done;
10887 			} else {
10888 				mutex_enter(SD_MUTEX(un));
10889 				sd_update_block_info(un, lbasize, capacity);
10890 			}
10891 		}
10892 
10893 		/*
10894 		 * Check if the media in the device is writable or not.
10895 		 */
10896 		if ((un->un_f_geometry_is_valid == FALSE) && ISCD(un)) {
10897 			sd_check_for_writable_cd(un);
10898 		}
10899 
10900 	} else {
10901 		/*
10902 		 * Do a test unit ready to clear any unit attention from non-cd
10903 		 * devices.
10904 		 */
10905 		mutex_exit(SD_MUTEX(un));
10906 		(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
10907 		mutex_enter(SD_MUTEX(un));
10908 	}
10909 
10910 
10911 	/*
10912 	 * If this is a non 512 block device, allocate space for
10913 	 * the wmap cache. This is being done here since every time
10914 	 * a media is changed this routine will be called and the
10915 	 * block size is a function of media rather than device.
10916 	 */
10917 	if (un->un_f_non_devbsize_supported && NOT_DEVBSIZE(un)) {
10918 		if (!(un->un_wm_cache)) {
10919 			(void) snprintf(name_str, sizeof (name_str),
10920 			    "%s%d_cache",
10921 			    ddi_driver_name(SD_DEVINFO(un)),
10922 			    ddi_get_instance(SD_DEVINFO(un)));
10923 			un->un_wm_cache = kmem_cache_create(
10924 			    name_str, sizeof (struct sd_w_map),
10925 			    8, sd_wm_cache_constructor,
10926 			    sd_wm_cache_destructor, NULL,
10927 			    (void *)un, NULL, 0);
10928 			if (!(un->un_wm_cache)) {
10929 					rval = ENOMEM;
10930 					goto done;
10931 			}
10932 		}
10933 	}
10934 
10935 	if (un->un_state == SD_STATE_NORMAL) {
10936 		/*
10937 		 * If the target is not yet ready here (defined by a TUR
10938 		 * failure), invalidate the geometry and print an 'offline'
10939 		 * message. This is a legacy message, as the state of the
10940 		 * target is not actually changed to SD_STATE_OFFLINE.
10941 		 *
10942 		 * If the TUR fails for EACCES (Reservation Conflict), it
10943 		 * means there actually is nothing wrong with the target that
10944 		 * would require invalidating the geometry, so continue in
10945 		 * that case as if the TUR was successful.
10946 		 */
10947 		int err;
10948 
10949 		mutex_exit(SD_MUTEX(un));
10950 		err = sd_send_scsi_TEST_UNIT_READY(un, 0);
10951 		mutex_enter(SD_MUTEX(un));
10952 
10953 		if ((err != 0) && (err != EACCES)) {
10954 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10955 			    "offline\n");
10956 			un->un_f_geometry_is_valid = FALSE;
10957 			rval = SD_NOT_READY_VALID;
10958 			goto done;
10959 		}
10960 	}
10961 
10962 	if (un->un_f_format_in_progress == FALSE) {
10963 		/*
10964 		 * Note: sd_validate_geometry may return TRUE, but that does
10965 		 * not necessarily mean un_f_geometry_is_valid == TRUE!
10966 		 */
10967 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
10968 		if (rval == ENOTSUP) {
10969 			if (un->un_f_geometry_is_valid == TRUE)
10970 				rval = 0;
10971 			else {
10972 				rval = SD_READY_NOT_VALID;
10973 				goto done;
10974 			}
10975 		}
10976 		if (rval != 0) {
10977 			/*
10978 			 * We don't check the validity of geometry for
10979 			 * CDROMs. Also we assume we have a good label
10980 			 * even if sd_validate_geometry returned ENOMEM.
10981 			 */
10982 			if (!ISCD(un) && rval != ENOMEM) {
10983 				rval = SD_NOT_READY_VALID;
10984 				goto done;
10985 			}
10986 		}
10987 	}
10988 
10989 	/*
10990 	 * If this device supports DOOR_LOCK command, try and send
10991 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
10992 	 * if it fails. For a CD, however, it is an error
10993 	 */
10994 	if (un->un_f_doorlock_supported) {
10995 		mutex_exit(SD_MUTEX(un));
10996 		if ((sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
10997 		    SD_PATH_DIRECT) != 0) && ISCD(un)) {
10998 			rval = SD_NOT_READY_VALID;
10999 			mutex_enter(SD_MUTEX(un));
11000 			goto done;
11001 		}
11002 		mutex_enter(SD_MUTEX(un));
11003 	}
11004 
11005 	/* The state has changed, inform the media watch routines */
11006 	un->un_mediastate = DKIO_INSERTED;
11007 	cv_broadcast(&un->un_state_cv);
11008 	rval = SD_READY_VALID;
11009 
11010 done:
11011 
11012 	/*
11013 	 * Initialize the capacity kstat value, if no media previously
11014 	 * (capacity kstat is 0) and a media has been inserted
11015 	 * (un_blockcount > 0).
11016 	 */
11017 	if (un->un_errstats != NULL) {
11018 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
11019 		if ((stp->sd_capacity.value.ui64 == 0) &&
11020 		    (un->un_f_blockcount_is_valid == TRUE)) {
11021 			stp->sd_capacity.value.ui64 =
11022 			    (uint64_t)((uint64_t)un->un_blockcount *
11023 			    un->un_sys_blocksize);
11024 		}
11025 	}
11026 
11027 	mutex_exit(SD_MUTEX(un));
11028 	return (rval);
11029 }
11030 
11031 
11032 /*
11033  *    Function: sdmin
11034  *
11035  * Description: Routine to limit the size of a data transfer. Used in
11036  *		conjunction with physio(9F).
11037  *
11038  *   Arguments: bp - pointer to the indicated buf(9S) struct.
11039  *
11040  *     Context: Kernel thread context.
11041  */
11042 
11043 static void
11044 sdmin(struct buf *bp)
11045 {
11046 	struct sd_lun	*un;
11047 	int		instance;
11048 
11049 	instance = SDUNIT(bp->b_edev);
11050 
11051 	un = ddi_get_soft_state(sd_state, instance);
11052 	ASSERT(un != NULL);
11053 
11054 	if (bp->b_bcount > un->un_max_xfer_size) {
11055 		bp->b_bcount = un->un_max_xfer_size;
11056 	}
11057 }
11058 
11059 
11060 /*
11061  *    Function: sdread
11062  *
11063  * Description: Driver's read(9e) entry point function.
11064  *
11065  *   Arguments: dev   - device number
11066  *		uio   - structure pointer describing where data is to be stored
11067  *			in user's space
11068  *		cred_p  - user credential pointer
11069  *
11070  * Return Code: ENXIO
11071  *		EIO
11072  *		EINVAL
11073  *		value returned by physio
11074  *
11075  *     Context: Kernel thread context.
11076  */
11077 /* ARGSUSED */
11078 static int
11079 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
11080 {
11081 	struct sd_lun	*un = NULL;
11082 	int		secmask;
11083 	int		err;
11084 
11085 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11086 		return (ENXIO);
11087 	}
11088 
11089 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11090 
11091 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11092 		mutex_enter(SD_MUTEX(un));
11093 		/*
11094 		 * Because the call to sd_ready_and_valid will issue I/O we
11095 		 * must wait here if either the device is suspended or
11096 		 * if it's power level is changing.
11097 		 */
11098 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11099 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11100 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11101 		}
11102 		un->un_ncmds_in_driver++;
11103 		mutex_exit(SD_MUTEX(un));
11104 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11105 			mutex_enter(SD_MUTEX(un));
11106 			un->un_ncmds_in_driver--;
11107 			ASSERT(un->un_ncmds_in_driver >= 0);
11108 			mutex_exit(SD_MUTEX(un));
11109 			return (EIO);
11110 		}
11111 		mutex_enter(SD_MUTEX(un));
11112 		un->un_ncmds_in_driver--;
11113 		ASSERT(un->un_ncmds_in_driver >= 0);
11114 		mutex_exit(SD_MUTEX(un));
11115 	}
11116 
11117 	/*
11118 	 * Read requests are restricted to multiples of the system block size.
11119 	 */
11120 	secmask = un->un_sys_blocksize - 1;
11121 
11122 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11123 		SD_ERROR(SD_LOG_READ_WRITE, un,
11124 		    "sdread: file offset not modulo %d\n",
11125 		    un->un_sys_blocksize);
11126 		err = EINVAL;
11127 	} else if (uio->uio_iov->iov_len & (secmask)) {
11128 		SD_ERROR(SD_LOG_READ_WRITE, un,
11129 		    "sdread: transfer length not modulo %d\n",
11130 		    un->un_sys_blocksize);
11131 		err = EINVAL;
11132 	} else {
11133 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
11134 	}
11135 	return (err);
11136 }
11137 
11138 
11139 /*
11140  *    Function: sdwrite
11141  *
11142  * Description: Driver's write(9e) entry point function.
11143  *
11144  *   Arguments: dev   - device number
11145  *		uio   - structure pointer describing where data is stored in
11146  *			user's space
11147  *		cred_p  - user credential pointer
11148  *
11149  * Return Code: ENXIO
11150  *		EIO
11151  *		EINVAL
11152  *		value returned by physio
11153  *
11154  *     Context: Kernel thread context.
11155  */
11156 /* ARGSUSED */
11157 static int
11158 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
11159 {
11160 	struct sd_lun	*un = NULL;
11161 	int		secmask;
11162 	int		err;
11163 
11164 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11165 		return (ENXIO);
11166 	}
11167 
11168 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11169 
11170 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11171 		mutex_enter(SD_MUTEX(un));
11172 		/*
11173 		 * Because the call to sd_ready_and_valid will issue I/O we
11174 		 * must wait here if either the device is suspended or
11175 		 * if it's power level is changing.
11176 		 */
11177 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11178 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11179 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11180 		}
11181 		un->un_ncmds_in_driver++;
11182 		mutex_exit(SD_MUTEX(un));
11183 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11184 			mutex_enter(SD_MUTEX(un));
11185 			un->un_ncmds_in_driver--;
11186 			ASSERT(un->un_ncmds_in_driver >= 0);
11187 			mutex_exit(SD_MUTEX(un));
11188 			return (EIO);
11189 		}
11190 		mutex_enter(SD_MUTEX(un));
11191 		un->un_ncmds_in_driver--;
11192 		ASSERT(un->un_ncmds_in_driver >= 0);
11193 		mutex_exit(SD_MUTEX(un));
11194 	}
11195 
11196 	/*
11197 	 * Write requests are restricted to multiples of the system block size.
11198 	 */
11199 	secmask = un->un_sys_blocksize - 1;
11200 
11201 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11202 		SD_ERROR(SD_LOG_READ_WRITE, un,
11203 		    "sdwrite: file offset not modulo %d\n",
11204 		    un->un_sys_blocksize);
11205 		err = EINVAL;
11206 	} else if (uio->uio_iov->iov_len & (secmask)) {
11207 		SD_ERROR(SD_LOG_READ_WRITE, un,
11208 		    "sdwrite: transfer length not modulo %d\n",
11209 		    un->un_sys_blocksize);
11210 		err = EINVAL;
11211 	} else {
11212 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
11213 	}
11214 	return (err);
11215 }
11216 
11217 
11218 /*
11219  *    Function: sdaread
11220  *
11221  * Description: Driver's aread(9e) entry point function.
11222  *
11223  *   Arguments: dev   - device number
11224  *		aio   - structure pointer describing where data is to be stored
11225  *		cred_p  - user credential pointer
11226  *
11227  * Return Code: ENXIO
11228  *		EIO
11229  *		EINVAL
11230  *		value returned by aphysio
11231  *
11232  *     Context: Kernel thread context.
11233  */
11234 /* ARGSUSED */
11235 static int
11236 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11237 {
11238 	struct sd_lun	*un = NULL;
11239 	struct uio	*uio = aio->aio_uio;
11240 	int		secmask;
11241 	int		err;
11242 
11243 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11244 		return (ENXIO);
11245 	}
11246 
11247 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11248 
11249 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11250 		mutex_enter(SD_MUTEX(un));
11251 		/*
11252 		 * Because the call to sd_ready_and_valid will issue I/O we
11253 		 * must wait here if either the device is suspended or
11254 		 * if it's power level is changing.
11255 		 */
11256 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11257 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11258 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11259 		}
11260 		un->un_ncmds_in_driver++;
11261 		mutex_exit(SD_MUTEX(un));
11262 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11263 			mutex_enter(SD_MUTEX(un));
11264 			un->un_ncmds_in_driver--;
11265 			ASSERT(un->un_ncmds_in_driver >= 0);
11266 			mutex_exit(SD_MUTEX(un));
11267 			return (EIO);
11268 		}
11269 		mutex_enter(SD_MUTEX(un));
11270 		un->un_ncmds_in_driver--;
11271 		ASSERT(un->un_ncmds_in_driver >= 0);
11272 		mutex_exit(SD_MUTEX(un));
11273 	}
11274 
11275 	/*
11276 	 * Read requests are restricted to multiples of the system block size.
11277 	 */
11278 	secmask = un->un_sys_blocksize - 1;
11279 
11280 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11281 		SD_ERROR(SD_LOG_READ_WRITE, un,
11282 		    "sdaread: file offset not modulo %d\n",
11283 		    un->un_sys_blocksize);
11284 		err = EINVAL;
11285 	} else if (uio->uio_iov->iov_len & (secmask)) {
11286 		SD_ERROR(SD_LOG_READ_WRITE, un,
11287 		    "sdaread: transfer length not modulo %d\n",
11288 		    un->un_sys_blocksize);
11289 		err = EINVAL;
11290 	} else {
11291 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
11292 	}
11293 	return (err);
11294 }
11295 
11296 
11297 /*
11298  *    Function: sdawrite
11299  *
11300  * Description: Driver's awrite(9e) entry point function.
11301  *
11302  *   Arguments: dev   - device number
11303  *		aio   - structure pointer describing where data is stored
11304  *		cred_p  - user credential pointer
11305  *
11306  * Return Code: ENXIO
11307  *		EIO
11308  *		EINVAL
11309  *		value returned by aphysio
11310  *
11311  *     Context: Kernel thread context.
11312  */
11313 /* ARGSUSED */
11314 static int
11315 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11316 {
11317 	struct sd_lun	*un = NULL;
11318 	struct uio	*uio = aio->aio_uio;
11319 	int		secmask;
11320 	int		err;
11321 
11322 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11323 		return (ENXIO);
11324 	}
11325 
11326 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11327 
11328 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11329 		mutex_enter(SD_MUTEX(un));
11330 		/*
11331 		 * Because the call to sd_ready_and_valid will issue I/O we
11332 		 * must wait here if either the device is suspended or
11333 		 * if it's power level is changing.
11334 		 */
11335 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11336 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11337 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11338 		}
11339 		un->un_ncmds_in_driver++;
11340 		mutex_exit(SD_MUTEX(un));
11341 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11342 			mutex_enter(SD_MUTEX(un));
11343 			un->un_ncmds_in_driver--;
11344 			ASSERT(un->un_ncmds_in_driver >= 0);
11345 			mutex_exit(SD_MUTEX(un));
11346 			return (EIO);
11347 		}
11348 		mutex_enter(SD_MUTEX(un));
11349 		un->un_ncmds_in_driver--;
11350 		ASSERT(un->un_ncmds_in_driver >= 0);
11351 		mutex_exit(SD_MUTEX(un));
11352 	}
11353 
11354 	/*
11355 	 * Write requests are restricted to multiples of the system block size.
11356 	 */
11357 	secmask = un->un_sys_blocksize - 1;
11358 
11359 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11360 		SD_ERROR(SD_LOG_READ_WRITE, un,
11361 		    "sdawrite: file offset not modulo %d\n",
11362 		    un->un_sys_blocksize);
11363 		err = EINVAL;
11364 	} else if (uio->uio_iov->iov_len & (secmask)) {
11365 		SD_ERROR(SD_LOG_READ_WRITE, un,
11366 		    "sdawrite: transfer length not modulo %d\n",
11367 		    un->un_sys_blocksize);
11368 		err = EINVAL;
11369 	} else {
11370 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
11371 	}
11372 	return (err);
11373 }
11374 
11375 
11376 
11377 
11378 
11379 /*
11380  * Driver IO processing follows the following sequence:
11381  *
11382  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
11383  *         |                |                     ^
11384  *         v                v                     |
11385  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
11386  *         |                |                     |                   |
11387  *         v                |                     |                   |
11388  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
11389  *         |                |                     ^                   ^
11390  *         v                v                     |                   |
11391  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
11392  *         |                |                     |                   |
11393  *     +---+                |                     +------------+      +-------+
11394  *     |                    |                                  |              |
11395  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11396  *     |                    v                                  |              |
11397  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
11398  *     |                    |                                  ^              |
11399  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11400  *     |                    v                                  |              |
11401  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
11402  *     |                    |                                  ^              |
11403  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11404  *     |                    v                                  |              |
11405  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
11406  *     |                    |                                  ^              |
11407  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
11408  *     |                    v                                  |              |
11409  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
11410  *     |                    |                                  ^              |
11411  *     |                    |                                  |              |
11412  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
11413  *                          |                           ^
11414  *                          v                           |
11415  *                   sd_core_iostart()                  |
11416  *                          |                           |
11417  *                          |                           +------>(*destroypkt)()
11418  *                          +-> sd_start_cmds() <-+     |           |
11419  *                          |                     |     |           v
11420  *                          |                     |     |  scsi_destroy_pkt(9F)
11421  *                          |                     |     |
11422  *                          +->(*initpkt)()       +- sdintr()
11423  *                          |  |                        |  |
11424  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
11425  *                          |  +-> scsi_setup_cdb(9F)   |
11426  *                          |                           |
11427  *                          +--> scsi_transport(9F)     |
11428  *                                     |                |
11429  *                                     +----> SCSA ---->+
11430  *
11431  *
11432  * This code is based upon the following presumtions:
11433  *
11434  *   - iostart and iodone functions operate on buf(9S) structures. These
11435  *     functions perform the necessary operations on the buf(9S) and pass
11436  *     them along to the next function in the chain by using the macros
11437  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
11438  *     (for iodone side functions).
11439  *
11440  *   - The iostart side functions may sleep. The iodone side functions
11441  *     are called under interrupt context and may NOT sleep. Therefore
11442  *     iodone side functions also may not call iostart side functions.
11443  *     (NOTE: iostart side functions should NOT sleep for memory, as
11444  *     this could result in deadlock.)
11445  *
11446  *   - An iostart side function may call its corresponding iodone side
11447  *     function directly (if necessary).
11448  *
11449  *   - In the event of an error, an iostart side function can return a buf(9S)
11450  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
11451  *     b_error in the usual way of course).
11452  *
11453  *   - The taskq mechanism may be used by the iodone side functions to dispatch
11454  *     requests to the iostart side functions.  The iostart side functions in
11455  *     this case would be called under the context of a taskq thread, so it's
11456  *     OK for them to block/sleep/spin in this case.
11457  *
11458  *   - iostart side functions may allocate "shadow" buf(9S) structs and
11459  *     pass them along to the next function in the chain.  The corresponding
11460  *     iodone side functions must coalesce the "shadow" bufs and return
11461  *     the "original" buf to the next higher layer.
11462  *
11463  *   - The b_private field of the buf(9S) struct holds a pointer to
11464  *     an sd_xbuf struct, which contains information needed to
11465  *     construct the scsi_pkt for the command.
11466  *
11467  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
11468  *     layer must acquire & release the SD_MUTEX(un) as needed.
11469  */
11470 
11471 
11472 /*
11473  * Create taskq for all targets in the system. This is created at
11474  * _init(9E) and destroyed at _fini(9E).
11475  *
11476  * Note: here we set the minalloc to a reasonably high number to ensure that
11477  * we will have an adequate supply of task entries available at interrupt time.
11478  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
11479  * sd_create_taskq().  Since we do not want to sleep for allocations at
11480  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
11481  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
11482  * requests any one instant in time.
11483  */
11484 #define	SD_TASKQ_NUMTHREADS	8
11485 #define	SD_TASKQ_MINALLOC	256
11486 #define	SD_TASKQ_MAXALLOC	256
11487 
11488 static taskq_t	*sd_tq = NULL;
11489 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
11490 
11491 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
11492 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
11493 
11494 /*
11495  * The following task queue is being created for the write part of
11496  * read-modify-write of non-512 block size devices.
11497  * Limit the number of threads to 1 for now. This number has been choosen
11498  * considering the fact that it applies only to dvd ram drives/MO drives
11499  * currently. Performance for which is not main criteria at this stage.
11500  * Note: It needs to be explored if we can use a single taskq in future
11501  */
11502 #define	SD_WMR_TASKQ_NUMTHREADS	1
11503 static taskq_t	*sd_wmr_tq = NULL;
11504 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
11505 
11506 /*
11507  *    Function: sd_taskq_create
11508  *
11509  * Description: Create taskq thread(s) and preallocate task entries
11510  *
11511  * Return Code: Returns a pointer to the allocated taskq_t.
11512  *
11513  *     Context: Can sleep. Requires blockable context.
11514  *
11515  *       Notes: - The taskq() facility currently is NOT part of the DDI.
11516  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
11517  *		- taskq_create() will block for memory, also it will panic
11518  *		  if it cannot create the requested number of threads.
11519  *		- Currently taskq_create() creates threads that cannot be
11520  *		  swapped.
11521  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
11522  *		  supply of taskq entries at interrupt time (ie, so that we
11523  *		  do not have to sleep for memory)
11524  */
11525 
11526 static void
11527 sd_taskq_create(void)
11528 {
11529 	char	taskq_name[TASKQ_NAMELEN];
11530 
11531 	ASSERT(sd_tq == NULL);
11532 	ASSERT(sd_wmr_tq == NULL);
11533 
11534 	(void) snprintf(taskq_name, sizeof (taskq_name),
11535 	    "%s_drv_taskq", sd_label);
11536 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
11537 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11538 	    TASKQ_PREPOPULATE));
11539 
11540 	(void) snprintf(taskq_name, sizeof (taskq_name),
11541 	    "%s_rmw_taskq", sd_label);
11542 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
11543 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11544 	    TASKQ_PREPOPULATE));
11545 }
11546 
11547 
11548 /*
11549  *    Function: sd_taskq_delete
11550  *
11551  * Description: Complementary cleanup routine for sd_taskq_create().
11552  *
11553  *     Context: Kernel thread context.
11554  */
11555 
11556 static void
11557 sd_taskq_delete(void)
11558 {
11559 	ASSERT(sd_tq != NULL);
11560 	ASSERT(sd_wmr_tq != NULL);
11561 	taskq_destroy(sd_tq);
11562 	taskq_destroy(sd_wmr_tq);
11563 	sd_tq = NULL;
11564 	sd_wmr_tq = NULL;
11565 }
11566 
11567 
11568 /*
11569  *    Function: sdstrategy
11570  *
11571  * Description: Driver's strategy (9E) entry point function.
11572  *
11573  *   Arguments: bp - pointer to buf(9S)
11574  *
11575  * Return Code: Always returns zero
11576  *
11577  *     Context: Kernel thread context.
11578  */
11579 
11580 static int
11581 sdstrategy(struct buf *bp)
11582 {
11583 	struct sd_lun *un;
11584 
11585 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11586 	if (un == NULL) {
11587 		bioerror(bp, EIO);
11588 		bp->b_resid = bp->b_bcount;
11589 		biodone(bp);
11590 		return (0);
11591 	}
11592 	/* As was done in the past, fail new cmds. if state is dumping. */
11593 	if (un->un_state == SD_STATE_DUMPING) {
11594 		bioerror(bp, ENXIO);
11595 		bp->b_resid = bp->b_bcount;
11596 		biodone(bp);
11597 		return (0);
11598 	}
11599 
11600 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11601 
11602 	/*
11603 	 * Commands may sneak in while we released the mutex in
11604 	 * DDI_SUSPEND, we should block new commands. However, old
11605 	 * commands that are still in the driver at this point should
11606 	 * still be allowed to drain.
11607 	 */
11608 	mutex_enter(SD_MUTEX(un));
11609 	/*
11610 	 * Must wait here if either the device is suspended or
11611 	 * if it's power level is changing.
11612 	 */
11613 	while ((un->un_state == SD_STATE_SUSPENDED) ||
11614 	    (un->un_state == SD_STATE_PM_CHANGING)) {
11615 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11616 	}
11617 
11618 	un->un_ncmds_in_driver++;
11619 
11620 	/*
11621 	 * atapi: Since we are running the CD for now in PIO mode we need to
11622 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11623 	 * the HBA's init_pkt routine.
11624 	 */
11625 	if (un->un_f_cfg_is_atapi == TRUE) {
11626 		mutex_exit(SD_MUTEX(un));
11627 		bp_mapin(bp);
11628 		mutex_enter(SD_MUTEX(un));
11629 	}
11630 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
11631 	    un->un_ncmds_in_driver);
11632 
11633 	mutex_exit(SD_MUTEX(un));
11634 
11635 	/*
11636 	 * This will (eventually) allocate the sd_xbuf area and
11637 	 * call sd_xbuf_strategy().  We just want to return the
11638 	 * result of ddi_xbuf_qstrategy so that we have an opt-
11639 	 * imized tail call which saves us a stack frame.
11640 	 */
11641 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
11642 }
11643 
11644 
11645 /*
11646  *    Function: sd_xbuf_strategy
11647  *
11648  * Description: Function for initiating IO operations via the
11649  *		ddi_xbuf_qstrategy() mechanism.
11650  *
11651  *     Context: Kernel thread context.
11652  */
11653 
11654 static void
11655 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
11656 {
11657 	struct sd_lun *un = arg;
11658 
11659 	ASSERT(bp != NULL);
11660 	ASSERT(xp != NULL);
11661 	ASSERT(un != NULL);
11662 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11663 
11664 	/*
11665 	 * Initialize the fields in the xbuf and save a pointer to the
11666 	 * xbuf in bp->b_private.
11667 	 */
11668 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
11669 
11670 	/* Send the buf down the iostart chain */
11671 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
11672 }
11673 
11674 
11675 /*
11676  *    Function: sd_xbuf_init
11677  *
11678  * Description: Prepare the given sd_xbuf struct for use.
11679  *
11680  *   Arguments: un - ptr to softstate
11681  *		bp - ptr to associated buf(9S)
11682  *		xp - ptr to associated sd_xbuf
11683  *		chain_type - IO chain type to use:
11684  *			SD_CHAIN_NULL
11685  *			SD_CHAIN_BUFIO
11686  *			SD_CHAIN_USCSI
11687  *			SD_CHAIN_DIRECT
11688  *			SD_CHAIN_DIRECT_PRIORITY
11689  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
11690  *			initialization; may be NULL if none.
11691  *
11692  *     Context: Kernel thread context
11693  */
11694 
11695 static void
11696 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
11697 	uchar_t chain_type, void *pktinfop)
11698 {
11699 	int index;
11700 
11701 	ASSERT(un != NULL);
11702 	ASSERT(bp != NULL);
11703 	ASSERT(xp != NULL);
11704 
11705 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
11706 	    bp, chain_type);
11707 
11708 	xp->xb_un	= un;
11709 	xp->xb_pktp	= NULL;
11710 	xp->xb_pktinfo	= pktinfop;
11711 	xp->xb_private	= bp->b_private;
11712 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
11713 
11714 	/*
11715 	 * Set up the iostart and iodone chain indexes in the xbuf, based
11716 	 * upon the specified chain type to use.
11717 	 */
11718 	switch (chain_type) {
11719 	case SD_CHAIN_NULL:
11720 		/*
11721 		 * Fall thru to just use the values for the buf type, even
11722 		 * tho for the NULL chain these values will never be used.
11723 		 */
11724 		/* FALLTHRU */
11725 	case SD_CHAIN_BUFIO:
11726 		index = un->un_buf_chain_type;
11727 		break;
11728 	case SD_CHAIN_USCSI:
11729 		index = un->un_uscsi_chain_type;
11730 		break;
11731 	case SD_CHAIN_DIRECT:
11732 		index = un->un_direct_chain_type;
11733 		break;
11734 	case SD_CHAIN_DIRECT_PRIORITY:
11735 		index = un->un_priority_chain_type;
11736 		break;
11737 	default:
11738 		/* We're really broken if we ever get here... */
11739 		panic("sd_xbuf_init: illegal chain type!");
11740 		/*NOTREACHED*/
11741 	}
11742 
11743 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
11744 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
11745 
11746 	/*
11747 	 * It might be a bit easier to simply bzero the entire xbuf above,
11748 	 * but it turns out that since we init a fair number of members anyway,
11749 	 * we save a fair number cycles by doing explicit assignment of zero.
11750 	 */
11751 	xp->xb_pkt_flags	= 0;
11752 	xp->xb_dma_resid	= 0;
11753 	xp->xb_retry_count	= 0;
11754 	xp->xb_victim_retry_count = 0;
11755 	xp->xb_ua_retry_count	= 0;
11756 	xp->xb_sense_bp		= NULL;
11757 	xp->xb_sense_status	= 0;
11758 	xp->xb_sense_state	= 0;
11759 	xp->xb_sense_resid	= 0;
11760 
11761 	bp->b_private	= xp;
11762 	bp->b_flags	&= ~(B_DONE | B_ERROR);
11763 	bp->b_resid	= 0;
11764 	bp->av_forw	= NULL;
11765 	bp->av_back	= NULL;
11766 	bioerror(bp, 0);
11767 
11768 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
11769 }
11770 
11771 
11772 /*
11773  *    Function: sd_uscsi_strategy
11774  *
11775  * Description: Wrapper for calling into the USCSI chain via physio(9F)
11776  *
11777  *   Arguments: bp - buf struct ptr
11778  *
11779  * Return Code: Always returns 0
11780  *
11781  *     Context: Kernel thread context
11782  */
11783 
11784 static int
11785 sd_uscsi_strategy(struct buf *bp)
11786 {
11787 	struct sd_lun		*un;
11788 	struct sd_uscsi_info	*uip;
11789 	struct sd_xbuf		*xp;
11790 	uchar_t			chain_type;
11791 
11792 	ASSERT(bp != NULL);
11793 
11794 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11795 	if (un == NULL) {
11796 		bioerror(bp, EIO);
11797 		bp->b_resid = bp->b_bcount;
11798 		biodone(bp);
11799 		return (0);
11800 	}
11801 
11802 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11803 
11804 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
11805 
11806 	mutex_enter(SD_MUTEX(un));
11807 	/*
11808 	 * atapi: Since we are running the CD for now in PIO mode we need to
11809 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11810 	 * the HBA's init_pkt routine.
11811 	 */
11812 	if (un->un_f_cfg_is_atapi == TRUE) {
11813 		mutex_exit(SD_MUTEX(un));
11814 		bp_mapin(bp);
11815 		mutex_enter(SD_MUTEX(un));
11816 	}
11817 	un->un_ncmds_in_driver++;
11818 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
11819 	    un->un_ncmds_in_driver);
11820 	mutex_exit(SD_MUTEX(un));
11821 
11822 	/*
11823 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
11824 	 */
11825 	ASSERT(bp->b_private != NULL);
11826 	uip = (struct sd_uscsi_info *)bp->b_private;
11827 
11828 	switch (uip->ui_flags) {
11829 	case SD_PATH_DIRECT:
11830 		chain_type = SD_CHAIN_DIRECT;
11831 		break;
11832 	case SD_PATH_DIRECT_PRIORITY:
11833 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
11834 		break;
11835 	default:
11836 		chain_type = SD_CHAIN_USCSI;
11837 		break;
11838 	}
11839 
11840 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
11841 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
11842 
11843 	/* Use the index obtained within xbuf_init */
11844 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
11845 
11846 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
11847 
11848 	return (0);
11849 }
11850 
11851 
11852 /*
11853  * These routines perform raw i/o operations.
11854  */
11855 /*ARGSUSED*/
11856 static void
11857 sduscsimin(struct buf *bp)
11858 {
11859 	/*
11860 	 * do not break up because the CDB count would then
11861 	 * be incorrect and data underruns would result (incomplete
11862 	 * read/writes which would be retried and then failed, see
11863 	 * sdintr().
11864 	 */
11865 }
11866 
11867 
11868 
11869 /*
11870  *    Function: sd_send_scsi_cmd
11871  *
11872  * Description: Runs a USCSI command for user (when called thru sdioctl),
11873  *		or for the driver
11874  *
11875  *   Arguments: dev - the dev_t for the device
11876  *		incmd - ptr to a valid uscsi_cmd struct
11877  *		cdbspace - UIO_USERSPACE or UIO_SYSSPACE
11878  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11879  *		rqbufspace - UIO_USERSPACE or UIO_SYSSPACE
11880  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11881  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11882  *			to use the USCSI "direct" chain and bypass the normal
11883  *			command waitq.
11884  *
11885  * Return Code: 0 -  successful completion of the given command
11886  *		EIO - scsi_reset() failed, or see biowait()/physio() codes.
11887  *		ENXIO  - soft state not found for specified dev
11888  *		EINVAL
11889  *		EFAULT - copyin/copyout error
11890  *		return code of biowait(9F) or physio(9F):
11891  *			EIO - IO error, caller may check incmd->uscsi_status
11892  *			ENXIO
11893  *			EACCES - reservation conflict
11894  *
11895  *     Context: Waits for command to complete. Can sleep.
11896  */
11897 
11898 static int
11899 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
11900 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
11901 	int path_flag)
11902 {
11903 	struct sd_uscsi_info	*uip;
11904 	struct uscsi_cmd	*uscmd;
11905 	struct sd_lun	*un;
11906 	struct buf	*bp;
11907 	int	rval;
11908 	int	flags;
11909 
11910 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
11911 	if (un == NULL) {
11912 		return (ENXIO);
11913 	}
11914 
11915 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11916 
11917 #ifdef SDDEBUG
11918 	switch (dataspace) {
11919 	case UIO_USERSPACE:
11920 		SD_TRACE(SD_LOG_IO, un,
11921 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_USERSPACE\n", un);
11922 		break;
11923 	case UIO_SYSSPACE:
11924 		SD_TRACE(SD_LOG_IO, un,
11925 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_SYSSPACE\n", un);
11926 		break;
11927 	default:
11928 		SD_TRACE(SD_LOG_IO, un,
11929 		    "sd_send_scsi_cmd: entry: un:0x%p UNEXPECTED SPACE\n", un);
11930 		break;
11931 	}
11932 #endif
11933 
11934 	/*
11935 	 * Perform resets directly; no need to generate a command to do it.
11936 	 */
11937 	if (incmd->uscsi_flags & (USCSI_RESET | USCSI_RESET_ALL)) {
11938 		flags = ((incmd->uscsi_flags & USCSI_RESET_ALL) != 0) ?
11939 		    RESET_ALL : RESET_TARGET;
11940 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: Issuing reset\n");
11941 		if (scsi_reset(SD_ADDRESS(un), flags) == 0) {
11942 			/* Reset attempt was unsuccessful */
11943 			SD_TRACE(SD_LOG_IO, un,
11944 			    "sd_send_scsi_cmd: reset: failure\n");
11945 			return (EIO);
11946 		}
11947 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: reset: success\n");
11948 		return (0);
11949 	}
11950 
11951 	/* Perfunctory sanity check... */
11952 	if (incmd->uscsi_cdblen <= 0) {
11953 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11954 		    "invalid uscsi_cdblen, returning EINVAL\n");
11955 		return (EINVAL);
11956 	} else if (incmd->uscsi_cdblen > un->un_max_hba_cdb) {
11957 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11958 		    "unsupported uscsi_cdblen, returning EINVAL\n");
11959 		return (EINVAL);
11960 	}
11961 
11962 	/*
11963 	 * In order to not worry about where the uscsi structure came from
11964 	 * (or where the cdb it points to came from) we're going to make
11965 	 * kmem_alloc'd copies of them here. This will also allow reference
11966 	 * to the data they contain long after this process has gone to
11967 	 * sleep and its kernel stack has been unmapped, etc.
11968 	 *
11969 	 * First get some memory for the uscsi_cmd struct and copy the
11970 	 * contents of the given uscsi_cmd struct into it.
11971 	 */
11972 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
11973 	bcopy(incmd, uscmd, sizeof (struct uscsi_cmd));
11974 
11975 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: uscsi_cmd",
11976 	    (uchar_t *)uscmd, sizeof (struct uscsi_cmd), SD_LOG_HEX);
11977 
11978 	/*
11979 	 * Now get some space for the CDB, and copy the given CDB into
11980 	 * it. Use ddi_copyin() in case the data is in user space.
11981 	 */
11982 	uscmd->uscsi_cdb = kmem_zalloc((size_t)incmd->uscsi_cdblen, KM_SLEEP);
11983 	flags = (cdbspace == UIO_SYSSPACE) ? FKIOCTL : 0;
11984 	if (ddi_copyin(incmd->uscsi_cdb, uscmd->uscsi_cdb,
11985 	    (uint_t)incmd->uscsi_cdblen, flags) != 0) {
11986 		kmem_free(uscmd->uscsi_cdb, (size_t)incmd->uscsi_cdblen);
11987 		kmem_free(uscmd, sizeof (struct uscsi_cmd));
11988 		return (EFAULT);
11989 	}
11990 
11991 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: CDB",
11992 	    (uchar_t *)uscmd->uscsi_cdb, incmd->uscsi_cdblen, SD_LOG_HEX);
11993 
11994 	bp = getrbuf(KM_SLEEP);
11995 
11996 	/*
11997 	 * Allocate an sd_uscsi_info struct and fill it with the info
11998 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
11999 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
12000 	 * since we allocate the buf here in this function, we do not
12001 	 * need to preserve the prior contents of b_private.
12002 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
12003 	 */
12004 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
12005 	uip->ui_flags = path_flag;
12006 	uip->ui_cmdp  = uscmd;
12007 	bp->b_private = uip;
12008 
12009 	/*
12010 	 * Initialize Request Sense buffering, if requested.
12011 	 */
12012 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
12013 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
12014 		/*
12015 		 * Here uscmd->uscsi_rqbuf currently points to the caller's
12016 		 * buffer, but we replace this with a kernel buffer that
12017 		 * we allocate to use with the sense data. The sense data
12018 		 * (if present) gets copied into this new buffer before the
12019 		 * command is completed.  Then we copy the sense data from
12020 		 * our allocated buf into the caller's buffer below. Note
12021 		 * that incmd->uscsi_rqbuf and incmd->uscsi_rqlen are used
12022 		 * below to perform the copy back to the caller's buf.
12023 		 */
12024 		uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
12025 		if (rqbufspace == UIO_USERSPACE) {
12026 			uscmd->uscsi_rqlen   = SENSE_LENGTH;
12027 			uscmd->uscsi_rqresid = SENSE_LENGTH;
12028 		} else {
12029 			uchar_t rlen = min(SENSE_LENGTH, uscmd->uscsi_rqlen);
12030 			uscmd->uscsi_rqlen   = rlen;
12031 			uscmd->uscsi_rqresid = rlen;
12032 		}
12033 	} else {
12034 		uscmd->uscsi_rqbuf = NULL;
12035 		uscmd->uscsi_rqlen   = 0;
12036 		uscmd->uscsi_rqresid = 0;
12037 	}
12038 
12039 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: rqbuf:0x%p  rqlen:%d\n",
12040 	    uscmd->uscsi_rqbuf, uscmd->uscsi_rqlen);
12041 
12042 	if (un->un_f_is_fibre == FALSE) {
12043 		/*
12044 		 * Force asynchronous mode, if necessary.  Doing this here
12045 		 * has the unfortunate effect of running other queued
12046 		 * commands async also, but since the main purpose of this
12047 		 * capability is downloading new drive firmware, we can
12048 		 * probably live with it.
12049 		 */
12050 		if ((uscmd->uscsi_flags & USCSI_ASYNC) != 0) {
12051 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
12052 				== 1) {
12053 				if (scsi_ifsetcap(SD_ADDRESS(un),
12054 					    "synchronous", 0, 1) == 1) {
12055 					SD_TRACE(SD_LOG_IO, un,
12056 					"sd_send_scsi_cmd: forced async ok\n");
12057 				} else {
12058 					SD_TRACE(SD_LOG_IO, un,
12059 					"sd_send_scsi_cmd:\
12060 					forced async failed\n");
12061 					rval = EINVAL;
12062 					goto done;
12063 				}
12064 			}
12065 		}
12066 
12067 		/*
12068 		 * Re-enable synchronous mode, if requested
12069 		 */
12070 		if (uscmd->uscsi_flags & USCSI_SYNC) {
12071 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
12072 				== 0) {
12073 				int i = scsi_ifsetcap(SD_ADDRESS(un),
12074 						"synchronous", 1, 1);
12075 				SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12076 					"re-enabled sync %s\n",
12077 					(i == 1) ? "ok" : "failed");
12078 			}
12079 		}
12080 	}
12081 
12082 	/*
12083 	 * Commands sent with priority are intended for error recovery
12084 	 * situations, and do not have retries performed.
12085 	 */
12086 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
12087 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
12088 	}
12089 
12090 	/*
12091 	 * If we're going to do actual I/O, let physio do all the right things
12092 	 */
12093 	if (uscmd->uscsi_buflen != 0) {
12094 		struct iovec	aiov;
12095 		struct uio	auio;
12096 		struct uio	*uio = &auio;
12097 
12098 		bzero(&auio, sizeof (struct uio));
12099 		bzero(&aiov, sizeof (struct iovec));
12100 		aiov.iov_base = uscmd->uscsi_bufaddr;
12101 		aiov.iov_len  = uscmd->uscsi_buflen;
12102 		uio->uio_iov  = &aiov;
12103 
12104 		uio->uio_iovcnt  = 1;
12105 		uio->uio_resid   = uscmd->uscsi_buflen;
12106 		uio->uio_segflg  = dataspace;
12107 
12108 		/*
12109 		 * physio() will block here until the command completes....
12110 		 */
12111 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling physio.\n");
12112 
12113 		rval = physio(sd_uscsi_strategy, bp, dev,
12114 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE),
12115 		    sduscsimin, uio);
12116 
12117 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12118 		    "returned from physio with 0x%x\n", rval);
12119 
12120 	} else {
12121 		/*
12122 		 * We have to mimic what physio would do here! Argh!
12123 		 */
12124 		bp->b_flags  = B_BUSY |
12125 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE);
12126 		bp->b_edev   = dev;
12127 		bp->b_dev    = cmpdev(dev);	/* maybe unnecessary? */
12128 		bp->b_bcount = 0;
12129 		bp->b_blkno  = 0;
12130 
12131 		SD_TRACE(SD_LOG_IO, un,
12132 		    "sd_send_scsi_cmd: calling sd_uscsi_strategy...\n");
12133 
12134 		(void) sd_uscsi_strategy(bp);
12135 
12136 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling biowait\n");
12137 
12138 		rval = biowait(bp);
12139 
12140 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12141 		    "returned from  biowait with 0x%x\n", rval);
12142 	}
12143 
12144 done:
12145 
12146 #ifdef SDDEBUG
12147 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12148 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
12149 	    uscmd->uscsi_status, uscmd->uscsi_resid);
12150 	if (uscmd->uscsi_bufaddr != NULL) {
12151 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12152 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
12153 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
12154 		if (dataspace == UIO_SYSSPACE) {
12155 			SD_DUMP_MEMORY(un, SD_LOG_IO,
12156 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
12157 			    uscmd->uscsi_buflen, SD_LOG_HEX);
12158 		}
12159 	}
12160 #endif
12161 
12162 	/*
12163 	 * Get the status and residual to return to the caller.
12164 	 */
12165 	incmd->uscsi_status = uscmd->uscsi_status;
12166 	incmd->uscsi_resid  = uscmd->uscsi_resid;
12167 
12168 	/*
12169 	 * If the caller wants sense data, copy back whatever sense data
12170 	 * we may have gotten, and update the relevant rqsense info.
12171 	 */
12172 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
12173 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
12174 
12175 		int rqlen = uscmd->uscsi_rqlen - uscmd->uscsi_rqresid;
12176 		rqlen = min(((int)incmd->uscsi_rqlen), rqlen);
12177 
12178 		/* Update the Request Sense status and resid */
12179 		incmd->uscsi_rqresid  = incmd->uscsi_rqlen - rqlen;
12180 		incmd->uscsi_rqstatus = uscmd->uscsi_rqstatus;
12181 
12182 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12183 		    "uscsi_rqstatus: 0x%02x  uscsi_rqresid:0x%x\n",
12184 		    incmd->uscsi_rqstatus, incmd->uscsi_rqresid);
12185 
12186 		/* Copy out the sense data for user processes */
12187 		if ((incmd->uscsi_rqbuf != NULL) && (rqlen != 0)) {
12188 			int flags =
12189 			    (rqbufspace == UIO_USERSPACE) ? 0 : FKIOCTL;
12190 			if (ddi_copyout(uscmd->uscsi_rqbuf, incmd->uscsi_rqbuf,
12191 			    rqlen, flags) != 0) {
12192 				rval = EFAULT;
12193 			}
12194 			/*
12195 			 * Note: Can't touch incmd->uscsi_rqbuf so use
12196 			 * uscmd->uscsi_rqbuf instead. They're the same.
12197 			 */
12198 			SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12199 			    "incmd->uscsi_rqbuf: 0x%p  rqlen:%d\n",
12200 			    incmd->uscsi_rqbuf, rqlen);
12201 			SD_DUMP_MEMORY(un, SD_LOG_IO, "rq",
12202 			    (uchar_t *)uscmd->uscsi_rqbuf, rqlen, SD_LOG_HEX);
12203 		}
12204 	}
12205 
12206 	/*
12207 	 * Free allocated resources and return; mapout the buf in case it was
12208 	 * mapped in by a lower layer.
12209 	 */
12210 	bp_mapout(bp);
12211 	freerbuf(bp);
12212 	kmem_free(uip, sizeof (struct sd_uscsi_info));
12213 	if (uscmd->uscsi_rqbuf != NULL) {
12214 		kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
12215 	}
12216 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
12217 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
12218 
12219 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: exit\n");
12220 
12221 	return (rval);
12222 }
12223 
12224 
12225 /*
12226  *    Function: sd_buf_iodone
12227  *
12228  * Description: Frees the sd_xbuf & returns the buf to its originator.
12229  *
12230  *     Context: May be called from interrupt context.
12231  */
12232 /* ARGSUSED */
12233 static void
12234 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
12235 {
12236 	struct sd_xbuf *xp;
12237 
12238 	ASSERT(un != NULL);
12239 	ASSERT(bp != NULL);
12240 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12241 
12242 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
12243 
12244 	xp = SD_GET_XBUF(bp);
12245 	ASSERT(xp != NULL);
12246 
12247 	mutex_enter(SD_MUTEX(un));
12248 
12249 	/*
12250 	 * Grab time when the cmd completed.
12251 	 * This is used for determining if the system has been
12252 	 * idle long enough to make it idle to the PM framework.
12253 	 * This is for lowering the overhead, and therefore improving
12254 	 * performance per I/O operation.
12255 	 */
12256 	un->un_pm_idle_time = ddi_get_time();
12257 
12258 	un->un_ncmds_in_driver--;
12259 	ASSERT(un->un_ncmds_in_driver >= 0);
12260 	SD_INFO(SD_LOG_IO, un, "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
12261 	    un->un_ncmds_in_driver);
12262 
12263 	mutex_exit(SD_MUTEX(un));
12264 
12265 	ddi_xbuf_done(bp, un->un_xbuf_attr);	/* xbuf is gone after this */
12266 	biodone(bp);				/* bp is gone after this */
12267 
12268 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
12269 }
12270 
12271 
12272 /*
12273  *    Function: sd_uscsi_iodone
12274  *
12275  * Description: Frees the sd_xbuf & returns the buf to its originator.
12276  *
12277  *     Context: May be called from interrupt context.
12278  */
12279 /* ARGSUSED */
12280 static void
12281 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12282 {
12283 	struct sd_xbuf *xp;
12284 
12285 	ASSERT(un != NULL);
12286 	ASSERT(bp != NULL);
12287 
12288 	xp = SD_GET_XBUF(bp);
12289 	ASSERT(xp != NULL);
12290 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12291 
12292 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
12293 
12294 	bp->b_private = xp->xb_private;
12295 
12296 	mutex_enter(SD_MUTEX(un));
12297 
12298 	/*
12299 	 * Grab time when the cmd completed.
12300 	 * This is used for determining if the system has been
12301 	 * idle long enough to make it idle to the PM framework.
12302 	 * This is for lowering the overhead, and therefore improving
12303 	 * performance per I/O operation.
12304 	 */
12305 	un->un_pm_idle_time = ddi_get_time();
12306 
12307 	un->un_ncmds_in_driver--;
12308 	ASSERT(un->un_ncmds_in_driver >= 0);
12309 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
12310 	    un->un_ncmds_in_driver);
12311 
12312 	mutex_exit(SD_MUTEX(un));
12313 
12314 	kmem_free(xp, sizeof (struct sd_xbuf));
12315 	biodone(bp);
12316 
12317 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
12318 }
12319 
12320 
12321 /*
12322  *    Function: sd_mapblockaddr_iostart
12323  *
12324  * Description: Verify request lies withing the partition limits for
12325  *		the indicated minor device.  Issue "overrun" buf if
12326  *		request would exceed partition range.  Converts
12327  *		partition-relative block address to absolute.
12328  *
12329  *     Context: Can sleep
12330  *
12331  *      Issues: This follows what the old code did, in terms of accessing
12332  *		some of the partition info in the unit struct without holding
12333  *		the mutext.  This is a general issue, if the partition info
12334  *		can be altered while IO is in progress... as soon as we send
12335  *		a buf, its partitioning can be invalid before it gets to the
12336  *		device.  Probably the right fix is to move partitioning out
12337  *		of the driver entirely.
12338  */
12339 
12340 static void
12341 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
12342 {
12343 	daddr_t	nblocks;	/* #blocks in the given partition */
12344 	daddr_t	blocknum;	/* Block number specified by the buf */
12345 	size_t	requested_nblocks;
12346 	size_t	available_nblocks;
12347 	int	partition;
12348 	diskaddr_t	partition_offset;
12349 	struct sd_xbuf *xp;
12350 
12351 
12352 	ASSERT(un != NULL);
12353 	ASSERT(bp != NULL);
12354 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12355 
12356 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12357 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
12358 
12359 	xp = SD_GET_XBUF(bp);
12360 	ASSERT(xp != NULL);
12361 
12362 	/*
12363 	 * If the geometry is not indicated as valid, attempt to access
12364 	 * the unit & verify the geometry/label. This can be the case for
12365 	 * removable-media devices, of if the device was opened in
12366 	 * NDELAY/NONBLOCK mode.
12367 	 */
12368 	if ((un->un_f_geometry_is_valid != TRUE) &&
12369 	    (sd_ready_and_valid(un) != SD_READY_VALID)) {
12370 		/*
12371 		 * For removable devices it is possible to start an I/O
12372 		 * without a media by opening the device in nodelay mode.
12373 		 * Also for writable CDs there can be many scenarios where
12374 		 * there is no geometry yet but volume manager is trying to
12375 		 * issue a read() just because it can see TOC on the CD. So
12376 		 * do not print a message for removables.
12377 		 */
12378 		if (!un->un_f_has_removable_media) {
12379 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12380 			    "i/o to invalid geometry\n");
12381 		}
12382 		bioerror(bp, EIO);
12383 		bp->b_resid = bp->b_bcount;
12384 		SD_BEGIN_IODONE(index, un, bp);
12385 		return;
12386 	}
12387 
12388 	partition = SDPART(bp->b_edev);
12389 
12390 	/* #blocks in partition */
12391 	nblocks = un->un_map[partition].dkl_nblk;    /* #blocks in partition */
12392 
12393 	/* Use of a local variable potentially improves performance slightly */
12394 	partition_offset = un->un_offset[partition];
12395 
12396 	/*
12397 	 * blocknum is the starting block number of the request. At this
12398 	 * point it is still relative to the start of the minor device.
12399 	 */
12400 	blocknum = xp->xb_blkno;
12401 
12402 	/*
12403 	 * Legacy: If the starting block number is one past the last block
12404 	 * in the partition, do not set B_ERROR in the buf.
12405 	 */
12406 	if (blocknum == nblocks)  {
12407 		goto error_exit;
12408 	}
12409 
12410 	/*
12411 	 * Confirm that the first block of the request lies within the
12412 	 * partition limits. Also the requested number of bytes must be
12413 	 * a multiple of the system block size.
12414 	 */
12415 	if ((blocknum < 0) || (blocknum >= nblocks) ||
12416 	    ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
12417 		bp->b_flags |= B_ERROR;
12418 		goto error_exit;
12419 	}
12420 
12421 	/*
12422 	 * If the requsted # blocks exceeds the available # blocks, that
12423 	 * is an overrun of the partition.
12424 	 */
12425 	requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
12426 	available_nblocks = (size_t)(nblocks - blocknum);
12427 	ASSERT(nblocks >= blocknum);
12428 
12429 	if (requested_nblocks > available_nblocks) {
12430 		/*
12431 		 * Allocate an "overrun" buf to allow the request to proceed
12432 		 * for the amount of space available in the partition. The
12433 		 * amount not transferred will be added into the b_resid
12434 		 * when the operation is complete. The overrun buf
12435 		 * replaces the original buf here, and the original buf
12436 		 * is saved inside the overrun buf, for later use.
12437 		 */
12438 		size_t resid = SD_SYSBLOCKS2BYTES(un,
12439 		    (offset_t)(requested_nblocks - available_nblocks));
12440 		size_t count = bp->b_bcount - resid;
12441 		/*
12442 		 * Note: count is an unsigned entity thus it'll NEVER
12443 		 * be less than 0 so ASSERT the original values are
12444 		 * correct.
12445 		 */
12446 		ASSERT(bp->b_bcount >= resid);
12447 
12448 		bp = sd_bioclone_alloc(bp, count, blocknum,
12449 			(int (*)(struct buf *)) sd_mapblockaddr_iodone);
12450 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
12451 		ASSERT(xp != NULL);
12452 	}
12453 
12454 	/* At this point there should be no residual for this buf. */
12455 	ASSERT(bp->b_resid == 0);
12456 
12457 	/* Convert the block number to an absolute address. */
12458 	xp->xb_blkno += partition_offset;
12459 
12460 	SD_NEXT_IOSTART(index, un, bp);
12461 
12462 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12463 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
12464 
12465 	return;
12466 
12467 error_exit:
12468 	bp->b_resid = bp->b_bcount;
12469 	SD_BEGIN_IODONE(index, un, bp);
12470 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12471 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
12472 }
12473 
12474 
12475 /*
12476  *    Function: sd_mapblockaddr_iodone
12477  *
12478  * Description: Completion-side processing for partition management.
12479  *
12480  *     Context: May be called under interrupt context
12481  */
12482 
12483 static void
12484 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
12485 {
12486 	/* int	partition; */	/* Not used, see below. */
12487 	ASSERT(un != NULL);
12488 	ASSERT(bp != NULL);
12489 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12490 
12491 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12492 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
12493 
12494 	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
12495 		/*
12496 		 * We have an "overrun" buf to deal with...
12497 		 */
12498 		struct sd_xbuf	*xp;
12499 		struct buf	*obp;	/* ptr to the original buf */
12500 
12501 		xp = SD_GET_XBUF(bp);
12502 		ASSERT(xp != NULL);
12503 
12504 		/* Retrieve the pointer to the original buf */
12505 		obp = (struct buf *)xp->xb_private;
12506 		ASSERT(obp != NULL);
12507 
12508 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
12509 		bioerror(obp, bp->b_error);
12510 
12511 		sd_bioclone_free(bp);
12512 
12513 		/*
12514 		 * Get back the original buf.
12515 		 * Note that since the restoration of xb_blkno below
12516 		 * was removed, the sd_xbuf is not needed.
12517 		 */
12518 		bp = obp;
12519 		/*
12520 		 * xp = SD_GET_XBUF(bp);
12521 		 * ASSERT(xp != NULL);
12522 		 */
12523 	}
12524 
12525 	/*
12526 	 * Convert sd->xb_blkno back to a minor-device relative value.
12527 	 * Note: this has been commented out, as it is not needed in the
12528 	 * current implementation of the driver (ie, since this function
12529 	 * is at the top of the layering chains, so the info will be
12530 	 * discarded) and it is in the "hot" IO path.
12531 	 *
12532 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
12533 	 * xp->xb_blkno -= un->un_offset[partition];
12534 	 */
12535 
12536 	SD_NEXT_IODONE(index, un, bp);
12537 
12538 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12539 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
12540 }
12541 
12542 
12543 /*
12544  *    Function: sd_mapblocksize_iostart
12545  *
12546  * Description: Convert between system block size (un->un_sys_blocksize)
12547  *		and target block size (un->un_tgt_blocksize).
12548  *
12549  *     Context: Can sleep to allocate resources.
12550  *
12551  * Assumptions: A higher layer has already performed any partition validation,
12552  *		and converted the xp->xb_blkno to an absolute value relative
12553  *		to the start of the device.
12554  *
12555  *		It is also assumed that the higher layer has implemented
12556  *		an "overrun" mechanism for the case where the request would
12557  *		read/write beyond the end of a partition.  In this case we
12558  *		assume (and ASSERT) that bp->b_resid == 0.
12559  *
12560  *		Note: The implementation for this routine assumes the target
12561  *		block size remains constant between allocation and transport.
12562  */
12563 
12564 static void
12565 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
12566 {
12567 	struct sd_mapblocksize_info	*bsp;
12568 	struct sd_xbuf			*xp;
12569 	offset_t first_byte;
12570 	daddr_t	start_block, end_block;
12571 	daddr_t	request_bytes;
12572 	ushort_t is_aligned = FALSE;
12573 
12574 	ASSERT(un != NULL);
12575 	ASSERT(bp != NULL);
12576 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12577 	ASSERT(bp->b_resid == 0);
12578 
12579 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12580 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
12581 
12582 	/*
12583 	 * For a non-writable CD, a write request is an error
12584 	 */
12585 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
12586 	    (un->un_f_mmc_writable_media == FALSE)) {
12587 		bioerror(bp, EIO);
12588 		bp->b_resid = bp->b_bcount;
12589 		SD_BEGIN_IODONE(index, un, bp);
12590 		return;
12591 	}
12592 
12593 	/*
12594 	 * We do not need a shadow buf if the device is using
12595 	 * un->un_sys_blocksize as its block size or if bcount == 0.
12596 	 * In this case there is no layer-private data block allocated.
12597 	 */
12598 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12599 	    (bp->b_bcount == 0)) {
12600 		goto done;
12601 	}
12602 
12603 #if defined(__i386) || defined(__amd64)
12604 	/* We do not support non-block-aligned transfers for ROD devices */
12605 	ASSERT(!ISROD(un));
12606 #endif
12607 
12608 	xp = SD_GET_XBUF(bp);
12609 	ASSERT(xp != NULL);
12610 
12611 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12612 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
12613 	    un->un_tgt_blocksize, un->un_sys_blocksize);
12614 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12615 	    "request start block:0x%x\n", xp->xb_blkno);
12616 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12617 	    "request len:0x%x\n", bp->b_bcount);
12618 
12619 	/*
12620 	 * Allocate the layer-private data area for the mapblocksize layer.
12621 	 * Layers are allowed to use the xp_private member of the sd_xbuf
12622 	 * struct to store the pointer to their layer-private data block, but
12623 	 * each layer also has the responsibility of restoring the prior
12624 	 * contents of xb_private before returning the buf/xbuf to the
12625 	 * higher layer that sent it.
12626 	 *
12627 	 * Here we save the prior contents of xp->xb_private into the
12628 	 * bsp->mbs_oprivate field of our layer-private data area. This value
12629 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
12630 	 * the layer-private area and returning the buf/xbuf to the layer
12631 	 * that sent it.
12632 	 *
12633 	 * Note that here we use kmem_zalloc for the allocation as there are
12634 	 * parts of the mapblocksize code that expect certain fields to be
12635 	 * zero unless explicitly set to a required value.
12636 	 */
12637 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12638 	bsp->mbs_oprivate = xp->xb_private;
12639 	xp->xb_private = bsp;
12640 
12641 	/*
12642 	 * This treats the data on the disk (target) as an array of bytes.
12643 	 * first_byte is the byte offset, from the beginning of the device,
12644 	 * to the location of the request. This is converted from a
12645 	 * un->un_sys_blocksize block address to a byte offset, and then back
12646 	 * to a block address based upon a un->un_tgt_blocksize block size.
12647 	 *
12648 	 * xp->xb_blkno should be absolute upon entry into this function,
12649 	 * but, but it is based upon partitions that use the "system"
12650 	 * block size. It must be adjusted to reflect the block size of
12651 	 * the target.
12652 	 *
12653 	 * Note that end_block is actually the block that follows the last
12654 	 * block of the request, but that's what is needed for the computation.
12655 	 */
12656 	first_byte  = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12657 	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
12658 	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
12659 	    un->un_tgt_blocksize;
12660 
12661 	/* request_bytes is rounded up to a multiple of the target block size */
12662 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
12663 
12664 	/*
12665 	 * See if the starting address of the request and the request
12666 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
12667 	 * then we do not need to allocate a shadow buf to handle the request.
12668 	 */
12669 	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
12670 	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
12671 		is_aligned = TRUE;
12672 	}
12673 
12674 	if ((bp->b_flags & B_READ) == 0) {
12675 		/*
12676 		 * Lock the range for a write operation. An aligned request is
12677 		 * considered a simple write; otherwise the request must be a
12678 		 * read-modify-write.
12679 		 */
12680 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
12681 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
12682 	}
12683 
12684 	/*
12685 	 * Alloc a shadow buf if the request is not aligned. Also, this is
12686 	 * where the READ command is generated for a read-modify-write. (The
12687 	 * write phase is deferred until after the read completes.)
12688 	 */
12689 	if (is_aligned == FALSE) {
12690 
12691 		struct sd_mapblocksize_info	*shadow_bsp;
12692 		struct sd_xbuf	*shadow_xp;
12693 		struct buf	*shadow_bp;
12694 
12695 		/*
12696 		 * Allocate the shadow buf and it associated xbuf. Note that
12697 		 * after this call the xb_blkno value in both the original
12698 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
12699 		 * same: absolute relative to the start of the device, and
12700 		 * adjusted for the target block size. The b_blkno in the
12701 		 * shadow buf will also be set to this value. We should never
12702 		 * change b_blkno in the original bp however.
12703 		 *
12704 		 * Note also that the shadow buf will always need to be a
12705 		 * READ command, regardless of whether the incoming command
12706 		 * is a READ or a WRITE.
12707 		 */
12708 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
12709 		    xp->xb_blkno,
12710 		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
12711 
12712 		shadow_xp = SD_GET_XBUF(shadow_bp);
12713 
12714 		/*
12715 		 * Allocate the layer-private data for the shadow buf.
12716 		 * (No need to preserve xb_private in the shadow xbuf.)
12717 		 */
12718 		shadow_xp->xb_private = shadow_bsp =
12719 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12720 
12721 		/*
12722 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
12723 		 * to figure out where the start of the user data is (based upon
12724 		 * the system block size) in the data returned by the READ
12725 		 * command (which will be based upon the target blocksize). Note
12726 		 * that this is only really used if the request is unaligned.
12727 		 */
12728 		bsp->mbs_copy_offset = (ssize_t)(first_byte -
12729 		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
12730 		ASSERT((bsp->mbs_copy_offset >= 0) &&
12731 		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
12732 
12733 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
12734 
12735 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
12736 
12737 		/* Transfer the wmap (if any) to the shadow buf */
12738 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
12739 		bsp->mbs_wmp = NULL;
12740 
12741 		/*
12742 		 * The shadow buf goes on from here in place of the
12743 		 * original buf.
12744 		 */
12745 		shadow_bsp->mbs_orig_bp = bp;
12746 		bp = shadow_bp;
12747 	}
12748 
12749 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12750 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
12751 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12752 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
12753 	    request_bytes);
12754 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12755 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
12756 
12757 done:
12758 	SD_NEXT_IOSTART(index, un, bp);
12759 
12760 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12761 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
12762 }
12763 
12764 
12765 /*
12766  *    Function: sd_mapblocksize_iodone
12767  *
12768  * Description: Completion side processing for block-size mapping.
12769  *
12770  *     Context: May be called under interrupt context
12771  */
12772 
12773 static void
12774 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
12775 {
12776 	struct sd_mapblocksize_info	*bsp;
12777 	struct sd_xbuf	*xp;
12778 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
12779 	struct buf	*orig_bp;	/* ptr to the original buf */
12780 	offset_t	shadow_end;
12781 	offset_t	request_end;
12782 	offset_t	shadow_start;
12783 	ssize_t		copy_offset;
12784 	size_t		copy_length;
12785 	size_t		shortfall;
12786 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
12787 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
12788 
12789 	ASSERT(un != NULL);
12790 	ASSERT(bp != NULL);
12791 
12792 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12793 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
12794 
12795 	/*
12796 	 * There is no shadow buf or layer-private data if the target is
12797 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
12798 	 */
12799 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12800 	    (bp->b_bcount == 0)) {
12801 		goto exit;
12802 	}
12803 
12804 	xp = SD_GET_XBUF(bp);
12805 	ASSERT(xp != NULL);
12806 
12807 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
12808 	bsp = xp->xb_private;
12809 
12810 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
12811 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
12812 
12813 	if (is_write) {
12814 		/*
12815 		 * For a WRITE request we must free up the block range that
12816 		 * we have locked up.  This holds regardless of whether this is
12817 		 * an aligned write request or a read-modify-write request.
12818 		 */
12819 		sd_range_unlock(un, bsp->mbs_wmp);
12820 		bsp->mbs_wmp = NULL;
12821 	}
12822 
12823 	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
12824 		/*
12825 		 * An aligned read or write command will have no shadow buf;
12826 		 * there is not much else to do with it.
12827 		 */
12828 		goto done;
12829 	}
12830 
12831 	orig_bp = bsp->mbs_orig_bp;
12832 	ASSERT(orig_bp != NULL);
12833 	orig_xp = SD_GET_XBUF(orig_bp);
12834 	ASSERT(orig_xp != NULL);
12835 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12836 
12837 	if (!is_write && has_wmap) {
12838 		/*
12839 		 * A READ with a wmap means this is the READ phase of a
12840 		 * read-modify-write. If an error occurred on the READ then
12841 		 * we do not proceed with the WRITE phase or copy any data.
12842 		 * Just release the write maps and return with an error.
12843 		 */
12844 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
12845 			orig_bp->b_resid = orig_bp->b_bcount;
12846 			bioerror(orig_bp, bp->b_error);
12847 			sd_range_unlock(un, bsp->mbs_wmp);
12848 			goto freebuf_done;
12849 		}
12850 	}
12851 
12852 	/*
12853 	 * Here is where we set up to copy the data from the shadow buf
12854 	 * into the space associated with the original buf.
12855 	 *
12856 	 * To deal with the conversion between block sizes, these
12857 	 * computations treat the data as an array of bytes, with the
12858 	 * first byte (byte 0) corresponding to the first byte in the
12859 	 * first block on the disk.
12860 	 */
12861 
12862 	/*
12863 	 * shadow_start and shadow_len indicate the location and size of
12864 	 * the data returned with the shadow IO request.
12865 	 */
12866 	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12867 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
12868 
12869 	/*
12870 	 * copy_offset gives the offset (in bytes) from the start of the first
12871 	 * block of the READ request to the beginning of the data.  We retrieve
12872 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
12873 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
12874 	 * data to be copied (in bytes).
12875 	 */
12876 	copy_offset  = bsp->mbs_copy_offset;
12877 	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
12878 	copy_length  = orig_bp->b_bcount;
12879 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
12880 
12881 	/*
12882 	 * Set up the resid and error fields of orig_bp as appropriate.
12883 	 */
12884 	if (shadow_end >= request_end) {
12885 		/* We got all the requested data; set resid to zero */
12886 		orig_bp->b_resid = 0;
12887 	} else {
12888 		/*
12889 		 * We failed to get enough data to fully satisfy the original
12890 		 * request. Just copy back whatever data we got and set
12891 		 * up the residual and error code as required.
12892 		 *
12893 		 * 'shortfall' is the amount by which the data received with the
12894 		 * shadow buf has "fallen short" of the requested amount.
12895 		 */
12896 		shortfall = (size_t)(request_end - shadow_end);
12897 
12898 		if (shortfall > orig_bp->b_bcount) {
12899 			/*
12900 			 * We did not get enough data to even partially
12901 			 * fulfill the original request.  The residual is
12902 			 * equal to the amount requested.
12903 			 */
12904 			orig_bp->b_resid = orig_bp->b_bcount;
12905 		} else {
12906 			/*
12907 			 * We did not get all the data that we requested
12908 			 * from the device, but we will try to return what
12909 			 * portion we did get.
12910 			 */
12911 			orig_bp->b_resid = shortfall;
12912 		}
12913 		ASSERT(copy_length >= orig_bp->b_resid);
12914 		copy_length  -= orig_bp->b_resid;
12915 	}
12916 
12917 	/* Propagate the error code from the shadow buf to the original buf */
12918 	bioerror(orig_bp, bp->b_error);
12919 
12920 	if (is_write) {
12921 		goto freebuf_done;	/* No data copying for a WRITE */
12922 	}
12923 
12924 	if (has_wmap) {
12925 		/*
12926 		 * This is a READ command from the READ phase of a
12927 		 * read-modify-write request. We have to copy the data given
12928 		 * by the user OVER the data returned by the READ command,
12929 		 * then convert the command from a READ to a WRITE and send
12930 		 * it back to the target.
12931 		 */
12932 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
12933 		    copy_length);
12934 
12935 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
12936 
12937 		/*
12938 		 * Dispatch the WRITE command to the taskq thread, which
12939 		 * will in turn send the command to the target. When the
12940 		 * WRITE command completes, we (sd_mapblocksize_iodone())
12941 		 * will get called again as part of the iodone chain
12942 		 * processing for it. Note that we will still be dealing
12943 		 * with the shadow buf at that point.
12944 		 */
12945 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
12946 		    KM_NOSLEEP) != 0) {
12947 			/*
12948 			 * Dispatch was successful so we are done. Return
12949 			 * without going any higher up the iodone chain. Do
12950 			 * not free up any layer-private data until after the
12951 			 * WRITE completes.
12952 			 */
12953 			return;
12954 		}
12955 
12956 		/*
12957 		 * Dispatch of the WRITE command failed; set up the error
12958 		 * condition and send this IO back up the iodone chain.
12959 		 */
12960 		bioerror(orig_bp, EIO);
12961 		orig_bp->b_resid = orig_bp->b_bcount;
12962 
12963 	} else {
12964 		/*
12965 		 * This is a regular READ request (ie, not a RMW). Copy the
12966 		 * data from the shadow buf into the original buf. The
12967 		 * copy_offset compensates for any "misalignment" between the
12968 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
12969 		 * original buf (with its un->un_sys_blocksize blocks).
12970 		 */
12971 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
12972 		    copy_length);
12973 	}
12974 
12975 freebuf_done:
12976 
12977 	/*
12978 	 * At this point we still have both the shadow buf AND the original
12979 	 * buf to deal with, as well as the layer-private data area in each.
12980 	 * Local variables are as follows:
12981 	 *
12982 	 * bp -- points to shadow buf
12983 	 * xp -- points to xbuf of shadow buf
12984 	 * bsp -- points to layer-private data area of shadow buf
12985 	 * orig_bp -- points to original buf
12986 	 *
12987 	 * First free the shadow buf and its associated xbuf, then free the
12988 	 * layer-private data area from the shadow buf. There is no need to
12989 	 * restore xb_private in the shadow xbuf.
12990 	 */
12991 	sd_shadow_buf_free(bp);
12992 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12993 
12994 	/*
12995 	 * Now update the local variables to point to the original buf, xbuf,
12996 	 * and layer-private area.
12997 	 */
12998 	bp = orig_bp;
12999 	xp = SD_GET_XBUF(bp);
13000 	ASSERT(xp != NULL);
13001 	ASSERT(xp == orig_xp);
13002 	bsp = xp->xb_private;
13003 	ASSERT(bsp != NULL);
13004 
13005 done:
13006 	/*
13007 	 * Restore xb_private to whatever it was set to by the next higher
13008 	 * layer in the chain, then free the layer-private data area.
13009 	 */
13010 	xp->xb_private = bsp->mbs_oprivate;
13011 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
13012 
13013 exit:
13014 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
13015 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
13016 
13017 	SD_NEXT_IODONE(index, un, bp);
13018 }
13019 
13020 
13021 /*
13022  *    Function: sd_checksum_iostart
13023  *
13024  * Description: A stub function for a layer that's currently not used.
13025  *		For now just a placeholder.
13026  *
13027  *     Context: Kernel thread context
13028  */
13029 
13030 static void
13031 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
13032 {
13033 	ASSERT(un != NULL);
13034 	ASSERT(bp != NULL);
13035 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13036 	SD_NEXT_IOSTART(index, un, bp);
13037 }
13038 
13039 
13040 /*
13041  *    Function: sd_checksum_iodone
13042  *
13043  * Description: A stub function for a layer that's currently not used.
13044  *		For now just a placeholder.
13045  *
13046  *     Context: May be called under interrupt context
13047  */
13048 
13049 static void
13050 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
13051 {
13052 	ASSERT(un != NULL);
13053 	ASSERT(bp != NULL);
13054 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13055 	SD_NEXT_IODONE(index, un, bp);
13056 }
13057 
13058 
13059 /*
13060  *    Function: sd_checksum_uscsi_iostart
13061  *
13062  * Description: A stub function for a layer that's currently not used.
13063  *		For now just a placeholder.
13064  *
13065  *     Context: Kernel thread context
13066  */
13067 
13068 static void
13069 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
13070 {
13071 	ASSERT(un != NULL);
13072 	ASSERT(bp != NULL);
13073 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13074 	SD_NEXT_IOSTART(index, un, bp);
13075 }
13076 
13077 
13078 /*
13079  *    Function: sd_checksum_uscsi_iodone
13080  *
13081  * Description: A stub function for a layer that's currently not used.
13082  *		For now just a placeholder.
13083  *
13084  *     Context: May be called under interrupt context
13085  */
13086 
13087 static void
13088 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
13089 {
13090 	ASSERT(un != NULL);
13091 	ASSERT(bp != NULL);
13092 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13093 	SD_NEXT_IODONE(index, un, bp);
13094 }
13095 
13096 
13097 /*
13098  *    Function: sd_pm_iostart
13099  *
13100  * Description: iostart-side routine for Power mangement.
13101  *
13102  *     Context: Kernel thread context
13103  */
13104 
13105 static void
13106 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
13107 {
13108 	ASSERT(un != NULL);
13109 	ASSERT(bp != NULL);
13110 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13111 	ASSERT(!mutex_owned(&un->un_pm_mutex));
13112 
13113 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
13114 
13115 	if (sd_pm_entry(un) != DDI_SUCCESS) {
13116 		/*
13117 		 * Set up to return the failed buf back up the 'iodone'
13118 		 * side of the calling chain.
13119 		 */
13120 		bioerror(bp, EIO);
13121 		bp->b_resid = bp->b_bcount;
13122 
13123 		SD_BEGIN_IODONE(index, un, bp);
13124 
13125 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13126 		return;
13127 	}
13128 
13129 	SD_NEXT_IOSTART(index, un, bp);
13130 
13131 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13132 }
13133 
13134 
13135 /*
13136  *    Function: sd_pm_iodone
13137  *
13138  * Description: iodone-side routine for power mangement.
13139  *
13140  *     Context: may be called from interrupt context
13141  */
13142 
13143 static void
13144 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
13145 {
13146 	ASSERT(un != NULL);
13147 	ASSERT(bp != NULL);
13148 	ASSERT(!mutex_owned(&un->un_pm_mutex));
13149 
13150 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
13151 
13152 	/*
13153 	 * After attach the following flag is only read, so don't
13154 	 * take the penalty of acquiring a mutex for it.
13155 	 */
13156 	if (un->un_f_pm_is_enabled == TRUE) {
13157 		sd_pm_exit(un);
13158 	}
13159 
13160 	SD_NEXT_IODONE(index, un, bp);
13161 
13162 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
13163 }
13164 
13165 
13166 /*
13167  *    Function: sd_core_iostart
13168  *
13169  * Description: Primary driver function for enqueuing buf(9S) structs from
13170  *		the system and initiating IO to the target device
13171  *
13172  *     Context: Kernel thread context. Can sleep.
13173  *
13174  * Assumptions:  - The given xp->xb_blkno is absolute
13175  *		   (ie, relative to the start of the device).
13176  *		 - The IO is to be done using the native blocksize of
13177  *		   the device, as specified in un->un_tgt_blocksize.
13178  */
13179 /* ARGSUSED */
13180 static void
13181 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
13182 {
13183 	struct sd_xbuf *xp;
13184 
13185 	ASSERT(un != NULL);
13186 	ASSERT(bp != NULL);
13187 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13188 	ASSERT(bp->b_resid == 0);
13189 
13190 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
13191 
13192 	xp = SD_GET_XBUF(bp);
13193 	ASSERT(xp != NULL);
13194 
13195 	mutex_enter(SD_MUTEX(un));
13196 
13197 	/*
13198 	 * If we are currently in the failfast state, fail any new IO
13199 	 * that has B_FAILFAST set, then return.
13200 	 */
13201 	if ((bp->b_flags & B_FAILFAST) &&
13202 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
13203 		mutex_exit(SD_MUTEX(un));
13204 		bioerror(bp, EIO);
13205 		bp->b_resid = bp->b_bcount;
13206 		SD_BEGIN_IODONE(index, un, bp);
13207 		return;
13208 	}
13209 
13210 	if (SD_IS_DIRECT_PRIORITY(xp)) {
13211 		/*
13212 		 * Priority command -- transport it immediately.
13213 		 *
13214 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
13215 		 * because all direct priority commands should be associated
13216 		 * with error recovery actions which we don't want to retry.
13217 		 */
13218 		sd_start_cmds(un, bp);
13219 	} else {
13220 		/*
13221 		 * Normal command -- add it to the wait queue, then start
13222 		 * transporting commands from the wait queue.
13223 		 */
13224 		sd_add_buf_to_waitq(un, bp);
13225 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
13226 		sd_start_cmds(un, NULL);
13227 	}
13228 
13229 	mutex_exit(SD_MUTEX(un));
13230 
13231 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
13232 }
13233 
13234 
13235 /*
13236  *    Function: sd_init_cdb_limits
13237  *
13238  * Description: This is to handle scsi_pkt initialization differences
13239  *		between the driver platforms.
13240  *
13241  *		Legacy behaviors:
13242  *
13243  *		If the block number or the sector count exceeds the
13244  *		capabilities of a Group 0 command, shift over to a
13245  *		Group 1 command. We don't blindly use Group 1
13246  *		commands because a) some drives (CDC Wren IVs) get a
13247  *		bit confused, and b) there is probably a fair amount
13248  *		of speed difference for a target to receive and decode
13249  *		a 10 byte command instead of a 6 byte command.
13250  *
13251  *		The xfer time difference of 6 vs 10 byte CDBs is
13252  *		still significant so this code is still worthwhile.
13253  *		10 byte CDBs are very inefficient with the fas HBA driver
13254  *		and older disks. Each CDB byte took 1 usec with some
13255  *		popular disks.
13256  *
13257  *     Context: Must be called at attach time
13258  */
13259 
13260 static void
13261 sd_init_cdb_limits(struct sd_lun *un)
13262 {
13263 	int hba_cdb_limit;
13264 
13265 	/*
13266 	 * Use CDB_GROUP1 commands for most devices except for
13267 	 * parallel SCSI fixed drives in which case we get better
13268 	 * performance using CDB_GROUP0 commands (where applicable).
13269 	 */
13270 	un->un_mincdb = SD_CDB_GROUP1;
13271 #if !defined(__fibre)
13272 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
13273 	    !un->un_f_has_removable_media) {
13274 		un->un_mincdb = SD_CDB_GROUP0;
13275 	}
13276 #endif
13277 
13278 	/*
13279 	 * Try to read the max-cdb-length supported by HBA.
13280 	 */
13281 	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
13282 	if (0 >= un->un_max_hba_cdb) {
13283 		un->un_max_hba_cdb = CDB_GROUP4;
13284 		hba_cdb_limit = SD_CDB_GROUP4;
13285 	} else if (0 < un->un_max_hba_cdb &&
13286 	    un->un_max_hba_cdb < CDB_GROUP1) {
13287 		hba_cdb_limit = SD_CDB_GROUP0;
13288 	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
13289 	    un->un_max_hba_cdb < CDB_GROUP5) {
13290 		hba_cdb_limit = SD_CDB_GROUP1;
13291 	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
13292 	    un->un_max_hba_cdb < CDB_GROUP4) {
13293 		hba_cdb_limit = SD_CDB_GROUP5;
13294 	} else {
13295 		hba_cdb_limit = SD_CDB_GROUP4;
13296 	}
13297 
13298 	/*
13299 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
13300 	 * commands for fixed disks unless we are building for a 32 bit
13301 	 * kernel.
13302 	 */
13303 #ifdef _LP64
13304 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13305 	    min(hba_cdb_limit, SD_CDB_GROUP4);
13306 #else
13307 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13308 	    min(hba_cdb_limit, SD_CDB_GROUP1);
13309 #endif
13310 
13311 	/*
13312 	 * x86 systems require the PKT_DMA_PARTIAL flag
13313 	 */
13314 #if defined(__x86)
13315 	un->un_pkt_flags = PKT_DMA_PARTIAL;
13316 #else
13317 	un->un_pkt_flags = 0;
13318 #endif
13319 
13320 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
13321 	    ? sizeof (struct scsi_arq_status) : 1);
13322 	un->un_cmd_timeout = (ushort_t)sd_io_time;
13323 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
13324 }
13325 
13326 
13327 /*
13328  *    Function: sd_initpkt_for_buf
13329  *
13330  * Description: Allocate and initialize for transport a scsi_pkt struct,
13331  *		based upon the info specified in the given buf struct.
13332  *
13333  *		Assumes the xb_blkno in the request is absolute (ie,
13334  *		relative to the start of the device (NOT partition!).
13335  *		Also assumes that the request is using the native block
13336  *		size of the device (as returned by the READ CAPACITY
13337  *		command).
13338  *
13339  * Return Code: SD_PKT_ALLOC_SUCCESS
13340  *		SD_PKT_ALLOC_FAILURE
13341  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13342  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13343  *
13344  *     Context: Kernel thread and may be called from software interrupt context
13345  *		as part of a sdrunout callback. This function may not block or
13346  *		call routines that block
13347  */
13348 
13349 static int
13350 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
13351 {
13352 	struct sd_xbuf	*xp;
13353 	struct scsi_pkt *pktp = NULL;
13354 	struct sd_lun	*un;
13355 	size_t		blockcount;
13356 	daddr_t		startblock;
13357 	int		rval;
13358 	int		cmd_flags;
13359 
13360 	ASSERT(bp != NULL);
13361 	ASSERT(pktpp != NULL);
13362 	xp = SD_GET_XBUF(bp);
13363 	ASSERT(xp != NULL);
13364 	un = SD_GET_UN(bp);
13365 	ASSERT(un != NULL);
13366 	ASSERT(mutex_owned(SD_MUTEX(un)));
13367 	ASSERT(bp->b_resid == 0);
13368 
13369 	SD_TRACE(SD_LOG_IO_CORE, un,
13370 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
13371 
13372 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13373 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
13374 		/*
13375 		 * Already have a scsi_pkt -- just need DMA resources.
13376 		 * We must recompute the CDB in case the mapping returns
13377 		 * a nonzero pkt_resid.
13378 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
13379 		 * that is being retried, the unmap/remap of the DMA resouces
13380 		 * will result in the entire transfer starting over again
13381 		 * from the very first block.
13382 		 */
13383 		ASSERT(xp->xb_pktp != NULL);
13384 		pktp = xp->xb_pktp;
13385 	} else {
13386 		pktp = NULL;
13387 	}
13388 #endif /* __i386 || __amd64 */
13389 
13390 	startblock = xp->xb_blkno;	/* Absolute block num. */
13391 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
13392 
13393 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13394 
13395 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
13396 
13397 #else
13398 
13399 	cmd_flags = un->un_pkt_flags | xp->xb_pkt_flags;
13400 
13401 #endif
13402 
13403 	/*
13404 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
13405 	 * call scsi_init_pkt, and build the CDB.
13406 	 */
13407 	rval = sd_setup_rw_pkt(un, &pktp, bp,
13408 	    cmd_flags, sdrunout, (caddr_t)un,
13409 	    startblock, blockcount);
13410 
13411 	if (rval == 0) {
13412 		/*
13413 		 * Success.
13414 		 *
13415 		 * If partial DMA is being used and required for this transfer.
13416 		 * set it up here.
13417 		 */
13418 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
13419 		    (pktp->pkt_resid != 0)) {
13420 
13421 			/*
13422 			 * Save the CDB length and pkt_resid for the
13423 			 * next xfer
13424 			 */
13425 			xp->xb_dma_resid = pktp->pkt_resid;
13426 
13427 			/* rezero resid */
13428 			pktp->pkt_resid = 0;
13429 
13430 		} else {
13431 			xp->xb_dma_resid = 0;
13432 		}
13433 
13434 		pktp->pkt_flags = un->un_tagflags;
13435 		pktp->pkt_time  = un->un_cmd_timeout;
13436 		pktp->pkt_comp  = sdintr;
13437 
13438 		pktp->pkt_private = bp;
13439 		*pktpp = pktp;
13440 
13441 		SD_TRACE(SD_LOG_IO_CORE, un,
13442 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
13443 
13444 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13445 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
13446 #endif
13447 
13448 		return (SD_PKT_ALLOC_SUCCESS);
13449 
13450 	}
13451 
13452 	/*
13453 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
13454 	 * from sd_setup_rw_pkt.
13455 	 */
13456 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
13457 
13458 	if (rval == SD_PKT_ALLOC_FAILURE) {
13459 		*pktpp = NULL;
13460 		/*
13461 		 * Set the driver state to RWAIT to indicate the driver
13462 		 * is waiting on resource allocations. The driver will not
13463 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13464 		 */
13465 		New_state(un, SD_STATE_RWAIT);
13466 
13467 		SD_ERROR(SD_LOG_IO_CORE, un,
13468 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
13469 
13470 		if ((bp->b_flags & B_ERROR) != 0) {
13471 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13472 		}
13473 		return (SD_PKT_ALLOC_FAILURE);
13474 	} else {
13475 		/*
13476 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13477 		 *
13478 		 * This should never happen.  Maybe someone messed with the
13479 		 * kernel's minphys?
13480 		 */
13481 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13482 		    "Request rejected: too large for CDB: "
13483 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
13484 		SD_ERROR(SD_LOG_IO_CORE, un,
13485 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
13486 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13487 
13488 	}
13489 }
13490 
13491 
13492 /*
13493  *    Function: sd_destroypkt_for_buf
13494  *
13495  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
13496  *
13497  *     Context: Kernel thread or interrupt context
13498  */
13499 
13500 static void
13501 sd_destroypkt_for_buf(struct buf *bp)
13502 {
13503 	ASSERT(bp != NULL);
13504 	ASSERT(SD_GET_UN(bp) != NULL);
13505 
13506 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13507 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
13508 
13509 	ASSERT(SD_GET_PKTP(bp) != NULL);
13510 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13511 
13512 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13513 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
13514 }
13515 
13516 /*
13517  *    Function: sd_setup_rw_pkt
13518  *
13519  * Description: Determines appropriate CDB group for the requested LBA
13520  *		and transfer length, calls scsi_init_pkt, and builds
13521  *		the CDB.  Do not use for partial DMA transfers except
13522  *		for the initial transfer since the CDB size must
13523  *		remain constant.
13524  *
13525  *     Context: Kernel thread and may be called from software interrupt
13526  *		context as part of a sdrunout callback. This function may not
13527  *		block or call routines that block
13528  */
13529 
13530 
13531 int
13532 sd_setup_rw_pkt(struct sd_lun *un,
13533     struct scsi_pkt **pktpp, struct buf *bp, int flags,
13534     int (*callback)(caddr_t), caddr_t callback_arg,
13535     diskaddr_t lba, uint32_t blockcount)
13536 {
13537 	struct scsi_pkt *return_pktp;
13538 	union scsi_cdb *cdbp;
13539 	struct sd_cdbinfo *cp = NULL;
13540 	int i;
13541 
13542 	/*
13543 	 * See which size CDB to use, based upon the request.
13544 	 */
13545 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
13546 
13547 		/*
13548 		 * Check lba and block count against sd_cdbtab limits.
13549 		 * In the partial DMA case, we have to use the same size
13550 		 * CDB for all the transfers.  Check lba + blockcount
13551 		 * against the max LBA so we know that segment of the
13552 		 * transfer can use the CDB we select.
13553 		 */
13554 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
13555 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
13556 
13557 			/*
13558 			 * The command will fit into the CDB type
13559 			 * specified by sd_cdbtab[i].
13560 			 */
13561 			cp = sd_cdbtab + i;
13562 
13563 			/*
13564 			 * Call scsi_init_pkt so we can fill in the
13565 			 * CDB.
13566 			 */
13567 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
13568 			    bp, cp->sc_grpcode, un->un_status_len, 0,
13569 			    flags, callback, callback_arg);
13570 
13571 			if (return_pktp != NULL) {
13572 
13573 				/*
13574 				 * Return new value of pkt
13575 				 */
13576 				*pktpp = return_pktp;
13577 
13578 				/*
13579 				 * To be safe, zero the CDB insuring there is
13580 				 * no leftover data from a previous command.
13581 				 */
13582 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
13583 
13584 				/*
13585 				 * Handle partial DMA mapping
13586 				 */
13587 				if (return_pktp->pkt_resid != 0) {
13588 
13589 					/*
13590 					 * Not going to xfer as many blocks as
13591 					 * originally expected
13592 					 */
13593 					blockcount -=
13594 					    SD_BYTES2TGTBLOCKS(un,
13595 						return_pktp->pkt_resid);
13596 				}
13597 
13598 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
13599 
13600 				/*
13601 				 * Set command byte based on the CDB
13602 				 * type we matched.
13603 				 */
13604 				cdbp->scc_cmd = cp->sc_grpmask |
13605 				    ((bp->b_flags & B_READ) ?
13606 					SCMD_READ : SCMD_WRITE);
13607 
13608 				SD_FILL_SCSI1_LUN(un, return_pktp);
13609 
13610 				/*
13611 				 * Fill in LBA and length
13612 				 */
13613 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
13614 				    (cp->sc_grpcode == CDB_GROUP4) ||
13615 				    (cp->sc_grpcode == CDB_GROUP0) ||
13616 				    (cp->sc_grpcode == CDB_GROUP5));
13617 
13618 				if (cp->sc_grpcode == CDB_GROUP1) {
13619 					FORMG1ADDR(cdbp, lba);
13620 					FORMG1COUNT(cdbp, blockcount);
13621 					return (0);
13622 				} else if (cp->sc_grpcode == CDB_GROUP4) {
13623 					FORMG4LONGADDR(cdbp, lba);
13624 					FORMG4COUNT(cdbp, blockcount);
13625 					return (0);
13626 				} else if (cp->sc_grpcode == CDB_GROUP0) {
13627 					FORMG0ADDR(cdbp, lba);
13628 					FORMG0COUNT(cdbp, blockcount);
13629 					return (0);
13630 				} else if (cp->sc_grpcode == CDB_GROUP5) {
13631 					FORMG5ADDR(cdbp, lba);
13632 					FORMG5COUNT(cdbp, blockcount);
13633 					return (0);
13634 				}
13635 
13636 				/*
13637 				 * It should be impossible to not match one
13638 				 * of the CDB types above, so we should never
13639 				 * reach this point.  Set the CDB command byte
13640 				 * to test-unit-ready to avoid writing
13641 				 * to somewhere we don't intend.
13642 				 */
13643 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
13644 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13645 			} else {
13646 				/*
13647 				 * Couldn't get scsi_pkt
13648 				 */
13649 				return (SD_PKT_ALLOC_FAILURE);
13650 			}
13651 		}
13652 	}
13653 
13654 	/*
13655 	 * None of the available CDB types were suitable.  This really
13656 	 * should never happen:  on a 64 bit system we support
13657 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
13658 	 * and on a 32 bit system we will refuse to bind to a device
13659 	 * larger than 2TB so addresses will never be larger than 32 bits.
13660 	 */
13661 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13662 }
13663 
13664 #if defined(__i386) || defined(__amd64)
13665 /*
13666  *    Function: sd_setup_next_rw_pkt
13667  *
13668  * Description: Setup packet for partial DMA transfers, except for the
13669  * 		initial transfer.  sd_setup_rw_pkt should be used for
13670  *		the initial transfer.
13671  *
13672  *     Context: Kernel thread and may be called from interrupt context.
13673  */
13674 
13675 int
13676 sd_setup_next_rw_pkt(struct sd_lun *un,
13677     struct scsi_pkt *pktp, struct buf *bp,
13678     diskaddr_t lba, uint32_t blockcount)
13679 {
13680 	uchar_t com;
13681 	union scsi_cdb *cdbp;
13682 	uchar_t cdb_group_id;
13683 
13684 	ASSERT(pktp != NULL);
13685 	ASSERT(pktp->pkt_cdbp != NULL);
13686 
13687 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
13688 	com = cdbp->scc_cmd;
13689 	cdb_group_id = CDB_GROUPID(com);
13690 
13691 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
13692 	    (cdb_group_id == CDB_GROUPID_1) ||
13693 	    (cdb_group_id == CDB_GROUPID_4) ||
13694 	    (cdb_group_id == CDB_GROUPID_5));
13695 
13696 	/*
13697 	 * Move pkt to the next portion of the xfer.
13698 	 * func is NULL_FUNC so we do not have to release
13699 	 * the disk mutex here.
13700 	 */
13701 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
13702 	    NULL_FUNC, NULL) == pktp) {
13703 		/* Success.  Handle partial DMA */
13704 		if (pktp->pkt_resid != 0) {
13705 			blockcount -=
13706 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
13707 		}
13708 
13709 		cdbp->scc_cmd = com;
13710 		SD_FILL_SCSI1_LUN(un, pktp);
13711 		if (cdb_group_id == CDB_GROUPID_1) {
13712 			FORMG1ADDR(cdbp, lba);
13713 			FORMG1COUNT(cdbp, blockcount);
13714 			return (0);
13715 		} else if (cdb_group_id == CDB_GROUPID_4) {
13716 			FORMG4LONGADDR(cdbp, lba);
13717 			FORMG4COUNT(cdbp, blockcount);
13718 			return (0);
13719 		} else if (cdb_group_id == CDB_GROUPID_0) {
13720 			FORMG0ADDR(cdbp, lba);
13721 			FORMG0COUNT(cdbp, blockcount);
13722 			return (0);
13723 		} else if (cdb_group_id == CDB_GROUPID_5) {
13724 			FORMG5ADDR(cdbp, lba);
13725 			FORMG5COUNT(cdbp, blockcount);
13726 			return (0);
13727 		}
13728 
13729 		/* Unreachable */
13730 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13731 	}
13732 
13733 	/*
13734 	 * Error setting up next portion of cmd transfer.
13735 	 * Something is definitely very wrong and this
13736 	 * should not happen.
13737 	 */
13738 	return (SD_PKT_ALLOC_FAILURE);
13739 }
13740 #endif /* defined(__i386) || defined(__amd64) */
13741 
13742 /*
13743  *    Function: sd_initpkt_for_uscsi
13744  *
13745  * Description: Allocate and initialize for transport a scsi_pkt struct,
13746  *		based upon the info specified in the given uscsi_cmd struct.
13747  *
13748  * Return Code: SD_PKT_ALLOC_SUCCESS
13749  *		SD_PKT_ALLOC_FAILURE
13750  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13751  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13752  *
13753  *     Context: Kernel thread and may be called from software interrupt context
13754  *		as part of a sdrunout callback. This function may not block or
13755  *		call routines that block
13756  */
13757 
13758 static int
13759 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
13760 {
13761 	struct uscsi_cmd *uscmd;
13762 	struct sd_xbuf	*xp;
13763 	struct scsi_pkt	*pktp;
13764 	struct sd_lun	*un;
13765 	uint32_t	flags = 0;
13766 
13767 	ASSERT(bp != NULL);
13768 	ASSERT(pktpp != NULL);
13769 	xp = SD_GET_XBUF(bp);
13770 	ASSERT(xp != NULL);
13771 	un = SD_GET_UN(bp);
13772 	ASSERT(un != NULL);
13773 	ASSERT(mutex_owned(SD_MUTEX(un)));
13774 
13775 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13776 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13777 	ASSERT(uscmd != NULL);
13778 
13779 	SD_TRACE(SD_LOG_IO_CORE, un,
13780 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
13781 
13782 	/*
13783 	 * Allocate the scsi_pkt for the command.
13784 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
13785 	 *	 during scsi_init_pkt time and will continue to use the
13786 	 *	 same path as long as the same scsi_pkt is used without
13787 	 *	 intervening scsi_dma_free(). Since uscsi command does
13788 	 *	 not call scsi_dmafree() before retry failed command, it
13789 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
13790 	 *	 set such that scsi_vhci can use other available path for
13791 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
13792 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
13793 	 */
13794 	pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
13795 	    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
13796 	    sizeof (struct scsi_arq_status), 0,
13797 	    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
13798 	    sdrunout, (caddr_t)un);
13799 
13800 	if (pktp == NULL) {
13801 		*pktpp = NULL;
13802 		/*
13803 		 * Set the driver state to RWAIT to indicate the driver
13804 		 * is waiting on resource allocations. The driver will not
13805 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13806 		 */
13807 		New_state(un, SD_STATE_RWAIT);
13808 
13809 		SD_ERROR(SD_LOG_IO_CORE, un,
13810 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
13811 
13812 		if ((bp->b_flags & B_ERROR) != 0) {
13813 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13814 		}
13815 		return (SD_PKT_ALLOC_FAILURE);
13816 	}
13817 
13818 	/*
13819 	 * We do not do DMA breakup for USCSI commands, so return failure
13820 	 * here if all the needed DMA resources were not allocated.
13821 	 */
13822 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
13823 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
13824 		scsi_destroy_pkt(pktp);
13825 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
13826 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
13827 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
13828 	}
13829 
13830 	/* Init the cdb from the given uscsi struct */
13831 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
13832 	    uscmd->uscsi_cdb[0], 0, 0, 0);
13833 
13834 	SD_FILL_SCSI1_LUN(un, pktp);
13835 
13836 	/*
13837 	 * Set up the optional USCSI flags. See the uscsi (7I) man page
13838 	 * for listing of the supported flags.
13839 	 */
13840 
13841 	if (uscmd->uscsi_flags & USCSI_SILENT) {
13842 		flags |= FLAG_SILENT;
13843 	}
13844 
13845 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
13846 		flags |= FLAG_DIAGNOSE;
13847 	}
13848 
13849 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
13850 		flags |= FLAG_ISOLATE;
13851 	}
13852 
13853 	if (un->un_f_is_fibre == FALSE) {
13854 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
13855 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
13856 		}
13857 	}
13858 
13859 	/*
13860 	 * Set the pkt flags here so we save time later.
13861 	 * Note: These flags are NOT in the uscsi man page!!!
13862 	 */
13863 	if (uscmd->uscsi_flags & USCSI_HEAD) {
13864 		flags |= FLAG_HEAD;
13865 	}
13866 
13867 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
13868 		flags |= FLAG_NOINTR;
13869 	}
13870 
13871 	/*
13872 	 * For tagged queueing, things get a bit complicated.
13873 	 * Check first for head of queue and last for ordered queue.
13874 	 * If neither head nor order, use the default driver tag flags.
13875 	 */
13876 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
13877 		if (uscmd->uscsi_flags & USCSI_HTAG) {
13878 			flags |= FLAG_HTAG;
13879 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
13880 			flags |= FLAG_OTAG;
13881 		} else {
13882 			flags |= un->un_tagflags & FLAG_TAGMASK;
13883 		}
13884 	}
13885 
13886 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
13887 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
13888 	}
13889 
13890 	pktp->pkt_flags = flags;
13891 
13892 	/* Copy the caller's CDB into the pkt... */
13893 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
13894 
13895 	if (uscmd->uscsi_timeout == 0) {
13896 		pktp->pkt_time = un->un_uscsi_timeout;
13897 	} else {
13898 		pktp->pkt_time = uscmd->uscsi_timeout;
13899 	}
13900 
13901 	/* need it later to identify USCSI request in sdintr */
13902 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
13903 
13904 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
13905 
13906 	pktp->pkt_private = bp;
13907 	pktp->pkt_comp = sdintr;
13908 	*pktpp = pktp;
13909 
13910 	SD_TRACE(SD_LOG_IO_CORE, un,
13911 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
13912 
13913 	return (SD_PKT_ALLOC_SUCCESS);
13914 }
13915 
13916 
13917 /*
13918  *    Function: sd_destroypkt_for_uscsi
13919  *
13920  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
13921  *		IOs.. Also saves relevant info into the associated uscsi_cmd
13922  *		struct.
13923  *
13924  *     Context: May be called under interrupt context
13925  */
13926 
13927 static void
13928 sd_destroypkt_for_uscsi(struct buf *bp)
13929 {
13930 	struct uscsi_cmd *uscmd;
13931 	struct sd_xbuf	*xp;
13932 	struct scsi_pkt	*pktp;
13933 	struct sd_lun	*un;
13934 
13935 	ASSERT(bp != NULL);
13936 	xp = SD_GET_XBUF(bp);
13937 	ASSERT(xp != NULL);
13938 	un = SD_GET_UN(bp);
13939 	ASSERT(un != NULL);
13940 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13941 	pktp = SD_GET_PKTP(bp);
13942 	ASSERT(pktp != NULL);
13943 
13944 	SD_TRACE(SD_LOG_IO_CORE, un,
13945 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
13946 
13947 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13948 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13949 	ASSERT(uscmd != NULL);
13950 
13951 	/* Save the status and the residual into the uscsi_cmd struct */
13952 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
13953 	uscmd->uscsi_resid  = bp->b_resid;
13954 
13955 	/*
13956 	 * If enabled, copy any saved sense data into the area specified
13957 	 * by the uscsi command.
13958 	 */
13959 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
13960 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
13961 		/*
13962 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
13963 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
13964 		 */
13965 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
13966 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
13967 		bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf, SENSE_LENGTH);
13968 	}
13969 
13970 	/* We are done with the scsi_pkt; free it now */
13971 	ASSERT(SD_GET_PKTP(bp) != NULL);
13972 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13973 
13974 	SD_TRACE(SD_LOG_IO_CORE, un,
13975 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
13976 }
13977 
13978 
13979 /*
13980  *    Function: sd_bioclone_alloc
13981  *
13982  * Description: Allocate a buf(9S) and init it as per the given buf
13983  *		and the various arguments.  The associated sd_xbuf
13984  *		struct is (nearly) duplicated.  The struct buf *bp
13985  *		argument is saved in new_xp->xb_private.
13986  *
13987  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13988  *		datalen - size of data area for the shadow bp
13989  *		blkno - starting LBA
13990  *		func - function pointer for b_iodone in the shadow buf. (May
13991  *			be NULL if none.)
13992  *
13993  * Return Code: Pointer to allocates buf(9S) struct
13994  *
13995  *     Context: Can sleep.
13996  */
13997 
13998 static struct buf *
13999 sd_bioclone_alloc(struct buf *bp, size_t datalen,
14000 	daddr_t blkno, int (*func)(struct buf *))
14001 {
14002 	struct	sd_lun	*un;
14003 	struct	sd_xbuf	*xp;
14004 	struct	sd_xbuf	*new_xp;
14005 	struct	buf	*new_bp;
14006 
14007 	ASSERT(bp != NULL);
14008 	xp = SD_GET_XBUF(bp);
14009 	ASSERT(xp != NULL);
14010 	un = SD_GET_UN(bp);
14011 	ASSERT(un != NULL);
14012 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14013 
14014 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
14015 	    NULL, KM_SLEEP);
14016 
14017 	new_bp->b_lblkno	= blkno;
14018 
14019 	/*
14020 	 * Allocate an xbuf for the shadow bp and copy the contents of the
14021 	 * original xbuf into it.
14022 	 */
14023 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14024 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
14025 
14026 	/*
14027 	 * The given bp is automatically saved in the xb_private member
14028 	 * of the new xbuf.  Callers are allowed to depend on this.
14029 	 */
14030 	new_xp->xb_private = bp;
14031 
14032 	new_bp->b_private  = new_xp;
14033 
14034 	return (new_bp);
14035 }
14036 
14037 /*
14038  *    Function: sd_shadow_buf_alloc
14039  *
14040  * Description: Allocate a buf(9S) and init it as per the given buf
14041  *		and the various arguments.  The associated sd_xbuf
14042  *		struct is (nearly) duplicated.  The struct buf *bp
14043  *		argument is saved in new_xp->xb_private.
14044  *
14045  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
14046  *		datalen - size of data area for the shadow bp
14047  *		bflags - B_READ or B_WRITE (pseudo flag)
14048  *		blkno - starting LBA
14049  *		func - function pointer for b_iodone in the shadow buf. (May
14050  *			be NULL if none.)
14051  *
14052  * Return Code: Pointer to allocates buf(9S) struct
14053  *
14054  *     Context: Can sleep.
14055  */
14056 
14057 static struct buf *
14058 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
14059 	daddr_t blkno, int (*func)(struct buf *))
14060 {
14061 	struct	sd_lun	*un;
14062 	struct	sd_xbuf	*xp;
14063 	struct	sd_xbuf	*new_xp;
14064 	struct	buf	*new_bp;
14065 
14066 	ASSERT(bp != NULL);
14067 	xp = SD_GET_XBUF(bp);
14068 	ASSERT(xp != NULL);
14069 	un = SD_GET_UN(bp);
14070 	ASSERT(un != NULL);
14071 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14072 
14073 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
14074 		bp_mapin(bp);
14075 	}
14076 
14077 	bflags &= (B_READ | B_WRITE);
14078 #if defined(__i386) || defined(__amd64)
14079 	new_bp = getrbuf(KM_SLEEP);
14080 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
14081 	new_bp->b_bcount = datalen;
14082 	new_bp->b_flags = bflags |
14083 	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
14084 #else
14085 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
14086 	    datalen, bflags, SLEEP_FUNC, NULL);
14087 #endif
14088 	new_bp->av_forw	= NULL;
14089 	new_bp->av_back	= NULL;
14090 	new_bp->b_dev	= bp->b_dev;
14091 	new_bp->b_blkno	= blkno;
14092 	new_bp->b_iodone = func;
14093 	new_bp->b_edev	= bp->b_edev;
14094 	new_bp->b_resid	= 0;
14095 
14096 	/* We need to preserve the B_FAILFAST flag */
14097 	if (bp->b_flags & B_FAILFAST) {
14098 		new_bp->b_flags |= B_FAILFAST;
14099 	}
14100 
14101 	/*
14102 	 * Allocate an xbuf for the shadow bp and copy the contents of the
14103 	 * original xbuf into it.
14104 	 */
14105 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14106 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
14107 
14108 	/* Need later to copy data between the shadow buf & original buf! */
14109 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
14110 
14111 	/*
14112 	 * The given bp is automatically saved in the xb_private member
14113 	 * of the new xbuf.  Callers are allowed to depend on this.
14114 	 */
14115 	new_xp->xb_private = bp;
14116 
14117 	new_bp->b_private  = new_xp;
14118 
14119 	return (new_bp);
14120 }
14121 
14122 /*
14123  *    Function: sd_bioclone_free
14124  *
14125  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
14126  *		in the larger than partition operation.
14127  *
14128  *     Context: May be called under interrupt context
14129  */
14130 
14131 static void
14132 sd_bioclone_free(struct buf *bp)
14133 {
14134 	struct sd_xbuf	*xp;
14135 
14136 	ASSERT(bp != NULL);
14137 	xp = SD_GET_XBUF(bp);
14138 	ASSERT(xp != NULL);
14139 
14140 	/*
14141 	 * Call bp_mapout() before freeing the buf,  in case a lower
14142 	 * layer or HBA  had done a bp_mapin().  we must do this here
14143 	 * as we are the "originator" of the shadow buf.
14144 	 */
14145 	bp_mapout(bp);
14146 
14147 	/*
14148 	 * Null out b_iodone before freeing the bp, to ensure that the driver
14149 	 * never gets confused by a stale value in this field. (Just a little
14150 	 * extra defensiveness here.)
14151 	 */
14152 	bp->b_iodone = NULL;
14153 
14154 	freerbuf(bp);
14155 
14156 	kmem_free(xp, sizeof (struct sd_xbuf));
14157 }
14158 
14159 /*
14160  *    Function: sd_shadow_buf_free
14161  *
14162  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
14163  *
14164  *     Context: May be called under interrupt context
14165  */
14166 
14167 static void
14168 sd_shadow_buf_free(struct buf *bp)
14169 {
14170 	struct sd_xbuf	*xp;
14171 
14172 	ASSERT(bp != NULL);
14173 	xp = SD_GET_XBUF(bp);
14174 	ASSERT(xp != NULL);
14175 
14176 #if defined(__sparc)
14177 	/*
14178 	 * Call bp_mapout() before freeing the buf,  in case a lower
14179 	 * layer or HBA  had done a bp_mapin().  we must do this here
14180 	 * as we are the "originator" of the shadow buf.
14181 	 */
14182 	bp_mapout(bp);
14183 #endif
14184 
14185 	/*
14186 	 * Null out b_iodone before freeing the bp, to ensure that the driver
14187 	 * never gets confused by a stale value in this field. (Just a little
14188 	 * extra defensiveness here.)
14189 	 */
14190 	bp->b_iodone = NULL;
14191 
14192 #if defined(__i386) || defined(__amd64)
14193 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
14194 	freerbuf(bp);
14195 #else
14196 	scsi_free_consistent_buf(bp);
14197 #endif
14198 
14199 	kmem_free(xp, sizeof (struct sd_xbuf));
14200 }
14201 
14202 
14203 /*
14204  *    Function: sd_print_transport_rejected_message
14205  *
14206  * Description: This implements the ludicrously complex rules for printing
14207  *		a "transport rejected" message.  This is to address the
14208  *		specific problem of having a flood of this error message
14209  *		produced when a failover occurs.
14210  *
14211  *     Context: Any.
14212  */
14213 
14214 static void
14215 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
14216 	int code)
14217 {
14218 	ASSERT(un != NULL);
14219 	ASSERT(mutex_owned(SD_MUTEX(un)));
14220 	ASSERT(xp != NULL);
14221 
14222 	/*
14223 	 * Print the "transport rejected" message under the following
14224 	 * conditions:
14225 	 *
14226 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
14227 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
14228 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
14229 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
14230 	 *   scsi_transport(9F) (which indicates that the target might have
14231 	 *   gone off-line).  This uses the un->un_tran_fatal_count
14232 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
14233 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
14234 	 *   from scsi_transport().
14235 	 *
14236 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
14237 	 * the preceeding cases in order for the message to be printed.
14238 	 */
14239 	if ((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) {
14240 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
14241 		    (code != TRAN_FATAL_ERROR) ||
14242 		    (un->un_tran_fatal_count == 1)) {
14243 			switch (code) {
14244 			case TRAN_BADPKT:
14245 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14246 				    "transport rejected bad packet\n");
14247 				break;
14248 			case TRAN_FATAL_ERROR:
14249 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14250 				    "transport rejected fatal error\n");
14251 				break;
14252 			default:
14253 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14254 				    "transport rejected (%d)\n", code);
14255 				break;
14256 			}
14257 		}
14258 	}
14259 }
14260 
14261 
14262 /*
14263  *    Function: sd_add_buf_to_waitq
14264  *
14265  * Description: Add the given buf(9S) struct to the wait queue for the
14266  *		instance.  If sorting is enabled, then the buf is added
14267  *		to the queue via an elevator sort algorithm (a la
14268  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
14269  *		If sorting is not enabled, then the buf is just added
14270  *		to the end of the wait queue.
14271  *
14272  * Return Code: void
14273  *
14274  *     Context: Does not sleep/block, therefore technically can be called
14275  *		from any context.  However if sorting is enabled then the
14276  *		execution time is indeterminate, and may take long if
14277  *		the wait queue grows large.
14278  */
14279 
14280 static void
14281 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
14282 {
14283 	struct buf *ap;
14284 
14285 	ASSERT(bp != NULL);
14286 	ASSERT(un != NULL);
14287 	ASSERT(mutex_owned(SD_MUTEX(un)));
14288 
14289 	/* If the queue is empty, add the buf as the only entry & return. */
14290 	if (un->un_waitq_headp == NULL) {
14291 		ASSERT(un->un_waitq_tailp == NULL);
14292 		un->un_waitq_headp = un->un_waitq_tailp = bp;
14293 		bp->av_forw = NULL;
14294 		return;
14295 	}
14296 
14297 	ASSERT(un->un_waitq_tailp != NULL);
14298 
14299 	/*
14300 	 * If sorting is disabled, just add the buf to the tail end of
14301 	 * the wait queue and return.
14302 	 */
14303 	if (un->un_f_disksort_disabled) {
14304 		un->un_waitq_tailp->av_forw = bp;
14305 		un->un_waitq_tailp = bp;
14306 		bp->av_forw = NULL;
14307 		return;
14308 	}
14309 
14310 	/*
14311 	 * Sort thru the list of requests currently on the wait queue
14312 	 * and add the new buf request at the appropriate position.
14313 	 *
14314 	 * The un->un_waitq_headp is an activity chain pointer on which
14315 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
14316 	 * first queue holds those requests which are positioned after
14317 	 * the current SD_GET_BLKNO() (in the first request); the second holds
14318 	 * requests which came in after their SD_GET_BLKNO() number was passed.
14319 	 * Thus we implement a one way scan, retracting after reaching
14320 	 * the end of the drive to the first request on the second
14321 	 * queue, at which time it becomes the first queue.
14322 	 * A one-way scan is natural because of the way UNIX read-ahead
14323 	 * blocks are allocated.
14324 	 *
14325 	 * If we lie after the first request, then we must locate the
14326 	 * second request list and add ourselves to it.
14327 	 */
14328 	ap = un->un_waitq_headp;
14329 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
14330 		while (ap->av_forw != NULL) {
14331 			/*
14332 			 * Look for an "inversion" in the (normally
14333 			 * ascending) block numbers. This indicates
14334 			 * the start of the second request list.
14335 			 */
14336 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
14337 				/*
14338 				 * Search the second request list for the
14339 				 * first request at a larger block number.
14340 				 * We go before that; however if there is
14341 				 * no such request, we go at the end.
14342 				 */
14343 				do {
14344 					if (SD_GET_BLKNO(bp) <
14345 					    SD_GET_BLKNO(ap->av_forw)) {
14346 						goto insert;
14347 					}
14348 					ap = ap->av_forw;
14349 				} while (ap->av_forw != NULL);
14350 				goto insert;		/* after last */
14351 			}
14352 			ap = ap->av_forw;
14353 		}
14354 
14355 		/*
14356 		 * No inversions... we will go after the last, and
14357 		 * be the first request in the second request list.
14358 		 */
14359 		goto insert;
14360 	}
14361 
14362 	/*
14363 	 * Request is at/after the current request...
14364 	 * sort in the first request list.
14365 	 */
14366 	while (ap->av_forw != NULL) {
14367 		/*
14368 		 * We want to go after the current request (1) if
14369 		 * there is an inversion after it (i.e. it is the end
14370 		 * of the first request list), or (2) if the next
14371 		 * request is a larger block no. than our request.
14372 		 */
14373 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
14374 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
14375 			goto insert;
14376 		}
14377 		ap = ap->av_forw;
14378 	}
14379 
14380 	/*
14381 	 * Neither a second list nor a larger request, therefore
14382 	 * we go at the end of the first list (which is the same
14383 	 * as the end of the whole schebang).
14384 	 */
14385 insert:
14386 	bp->av_forw = ap->av_forw;
14387 	ap->av_forw = bp;
14388 
14389 	/*
14390 	 * If we inserted onto the tail end of the waitq, make sure the
14391 	 * tail pointer is updated.
14392 	 */
14393 	if (ap == un->un_waitq_tailp) {
14394 		un->un_waitq_tailp = bp;
14395 	}
14396 }
14397 
14398 
14399 /*
14400  *    Function: sd_start_cmds
14401  *
14402  * Description: Remove and transport cmds from the driver queues.
14403  *
14404  *   Arguments: un - pointer to the unit (soft state) struct for the target.
14405  *
14406  *		immed_bp - ptr to a buf to be transported immediately. Only
14407  *		the immed_bp is transported; bufs on the waitq are not
14408  *		processed and the un_retry_bp is not checked.  If immed_bp is
14409  *		NULL, then normal queue processing is performed.
14410  *
14411  *     Context: May be called from kernel thread context, interrupt context,
14412  *		or runout callback context. This function may not block or
14413  *		call routines that block.
14414  */
14415 
14416 static void
14417 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
14418 {
14419 	struct	sd_xbuf	*xp;
14420 	struct	buf	*bp;
14421 	void	(*statp)(kstat_io_t *);
14422 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14423 	void	(*saved_statp)(kstat_io_t *);
14424 #endif
14425 	int	rval;
14426 
14427 	ASSERT(un != NULL);
14428 	ASSERT(mutex_owned(SD_MUTEX(un)));
14429 	ASSERT(un->un_ncmds_in_transport >= 0);
14430 	ASSERT(un->un_throttle >= 0);
14431 
14432 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
14433 
14434 	do {
14435 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14436 		saved_statp = NULL;
14437 #endif
14438 
14439 		/*
14440 		 * If we are syncing or dumping, fail the command to
14441 		 * avoid recursively calling back into scsi_transport().
14442 		 * The dump I/O itself uses a separate code path so this
14443 		 * only prevents non-dump I/O from being sent while dumping.
14444 		 * File system sync takes place before dumping begins.
14445 		 * During panic, filesystem I/O is allowed provided
14446 		 * un_in_callback is <= 1.  This is to prevent recursion
14447 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
14448 		 * sd_start_cmds and so on.  See panic.c for more information
14449 		 * about the states the system can be in during panic.
14450 		 */
14451 		if ((un->un_state == SD_STATE_DUMPING) ||
14452 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
14453 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14454 			    "sd_start_cmds: panicking\n");
14455 			goto exit;
14456 		}
14457 
14458 		if ((bp = immed_bp) != NULL) {
14459 			/*
14460 			 * We have a bp that must be transported immediately.
14461 			 * It's OK to transport the immed_bp here without doing
14462 			 * the throttle limit check because the immed_bp is
14463 			 * always used in a retry/recovery case. This means
14464 			 * that we know we are not at the throttle limit by
14465 			 * virtue of the fact that to get here we must have
14466 			 * already gotten a command back via sdintr(). This also
14467 			 * relies on (1) the command on un_retry_bp preventing
14468 			 * further commands from the waitq from being issued;
14469 			 * and (2) the code in sd_retry_command checking the
14470 			 * throttle limit before issuing a delayed or immediate
14471 			 * retry. This holds even if the throttle limit is
14472 			 * currently ratcheted down from its maximum value.
14473 			 */
14474 			statp = kstat_runq_enter;
14475 			if (bp == un->un_retry_bp) {
14476 				ASSERT((un->un_retry_statp == NULL) ||
14477 				    (un->un_retry_statp == kstat_waitq_enter) ||
14478 				    (un->un_retry_statp ==
14479 				    kstat_runq_back_to_waitq));
14480 				/*
14481 				 * If the waitq kstat was incremented when
14482 				 * sd_set_retry_bp() queued this bp for a retry,
14483 				 * then we must set up statp so that the waitq
14484 				 * count will get decremented correctly below.
14485 				 * Also we must clear un->un_retry_statp to
14486 				 * ensure that we do not act on a stale value
14487 				 * in this field.
14488 				 */
14489 				if ((un->un_retry_statp == kstat_waitq_enter) ||
14490 				    (un->un_retry_statp ==
14491 				    kstat_runq_back_to_waitq)) {
14492 					statp = kstat_waitq_to_runq;
14493 				}
14494 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14495 				saved_statp = un->un_retry_statp;
14496 #endif
14497 				un->un_retry_statp = NULL;
14498 
14499 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14500 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
14501 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
14502 				    un, un->un_retry_bp, un->un_throttle,
14503 				    un->un_ncmds_in_transport);
14504 			} else {
14505 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
14506 				    "processing priority bp:0x%p\n", bp);
14507 			}
14508 
14509 		} else if ((bp = un->un_waitq_headp) != NULL) {
14510 			/*
14511 			 * A command on the waitq is ready to go, but do not
14512 			 * send it if:
14513 			 *
14514 			 * (1) the throttle limit has been reached, or
14515 			 * (2) a retry is pending, or
14516 			 * (3) a START_STOP_UNIT callback pending, or
14517 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
14518 			 *	command is pending.
14519 			 *
14520 			 * For all of these conditions, IO processing will
14521 			 * restart after the condition is cleared.
14522 			 */
14523 			if (un->un_ncmds_in_transport >= un->un_throttle) {
14524 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14525 				    "sd_start_cmds: exiting, "
14526 				    "throttle limit reached!\n");
14527 				goto exit;
14528 			}
14529 			if (un->un_retry_bp != NULL) {
14530 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14531 				    "sd_start_cmds: exiting, retry pending!\n");
14532 				goto exit;
14533 			}
14534 			if (un->un_startstop_timeid != NULL) {
14535 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14536 				    "sd_start_cmds: exiting, "
14537 				    "START_STOP pending!\n");
14538 				goto exit;
14539 			}
14540 			if (un->un_direct_priority_timeid != NULL) {
14541 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14542 				    "sd_start_cmds: exiting, "
14543 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
14544 				goto exit;
14545 			}
14546 
14547 			/* Dequeue the command */
14548 			un->un_waitq_headp = bp->av_forw;
14549 			if (un->un_waitq_headp == NULL) {
14550 				un->un_waitq_tailp = NULL;
14551 			}
14552 			bp->av_forw = NULL;
14553 			statp = kstat_waitq_to_runq;
14554 			SD_TRACE(SD_LOG_IO_CORE, un,
14555 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
14556 
14557 		} else {
14558 			/* No work to do so bail out now */
14559 			SD_TRACE(SD_LOG_IO_CORE, un,
14560 			    "sd_start_cmds: no more work, exiting!\n");
14561 			goto exit;
14562 		}
14563 
14564 		/*
14565 		 * Reset the state to normal. This is the mechanism by which
14566 		 * the state transitions from either SD_STATE_RWAIT or
14567 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
14568 		 * If state is SD_STATE_PM_CHANGING then this command is
14569 		 * part of the device power control and the state must
14570 		 * not be put back to normal. Doing so would would
14571 		 * allow new commands to proceed when they shouldn't,
14572 		 * the device may be going off.
14573 		 */
14574 		if ((un->un_state != SD_STATE_SUSPENDED) &&
14575 		    (un->un_state != SD_STATE_PM_CHANGING)) {
14576 			New_state(un, SD_STATE_NORMAL);
14577 		    }
14578 
14579 		xp = SD_GET_XBUF(bp);
14580 		ASSERT(xp != NULL);
14581 
14582 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14583 		/*
14584 		 * Allocate the scsi_pkt if we need one, or attach DMA
14585 		 * resources if we have a scsi_pkt that needs them. The
14586 		 * latter should only occur for commands that are being
14587 		 * retried.
14588 		 */
14589 		if ((xp->xb_pktp == NULL) ||
14590 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
14591 #else
14592 		if (xp->xb_pktp == NULL) {
14593 #endif
14594 			/*
14595 			 * There is no scsi_pkt allocated for this buf. Call
14596 			 * the initpkt function to allocate & init one.
14597 			 *
14598 			 * The scsi_init_pkt runout callback functionality is
14599 			 * implemented as follows:
14600 			 *
14601 			 * 1) The initpkt function always calls
14602 			 *    scsi_init_pkt(9F) with sdrunout specified as the
14603 			 *    callback routine.
14604 			 * 2) A successful packet allocation is initialized and
14605 			 *    the I/O is transported.
14606 			 * 3) The I/O associated with an allocation resource
14607 			 *    failure is left on its queue to be retried via
14608 			 *    runout or the next I/O.
14609 			 * 4) The I/O associated with a DMA error is removed
14610 			 *    from the queue and failed with EIO. Processing of
14611 			 *    the transport queues is also halted to be
14612 			 *    restarted via runout or the next I/O.
14613 			 * 5) The I/O associated with a CDB size or packet
14614 			 *    size error is removed from the queue and failed
14615 			 *    with EIO. Processing of the transport queues is
14616 			 *    continued.
14617 			 *
14618 			 * Note: there is no interface for canceling a runout
14619 			 * callback. To prevent the driver from detaching or
14620 			 * suspending while a runout is pending the driver
14621 			 * state is set to SD_STATE_RWAIT
14622 			 *
14623 			 * Note: using the scsi_init_pkt callback facility can
14624 			 * result in an I/O request persisting at the head of
14625 			 * the list which cannot be satisfied even after
14626 			 * multiple retries. In the future the driver may
14627 			 * implement some kind of maximum runout count before
14628 			 * failing an I/O.
14629 			 *
14630 			 * Note: the use of funcp below may seem superfluous,
14631 			 * but it helps warlock figure out the correct
14632 			 * initpkt function calls (see [s]sd.wlcmd).
14633 			 */
14634 			struct scsi_pkt	*pktp;
14635 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
14636 
14637 			ASSERT(bp != un->un_rqs_bp);
14638 
14639 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
14640 			switch ((*funcp)(bp, &pktp)) {
14641 			case  SD_PKT_ALLOC_SUCCESS:
14642 				xp->xb_pktp = pktp;
14643 				SD_TRACE(SD_LOG_IO_CORE, un,
14644 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
14645 				    pktp);
14646 				goto got_pkt;
14647 
14648 			case SD_PKT_ALLOC_FAILURE:
14649 				/*
14650 				 * Temporary (hopefully) resource depletion.
14651 				 * Since retries and RQS commands always have a
14652 				 * scsi_pkt allocated, these cases should never
14653 				 * get here. So the only cases this needs to
14654 				 * handle is a bp from the waitq (which we put
14655 				 * back onto the waitq for sdrunout), or a bp
14656 				 * sent as an immed_bp (which we just fail).
14657 				 */
14658 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14659 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
14660 
14661 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14662 
14663 				if (bp == immed_bp) {
14664 					/*
14665 					 * If SD_XB_DMA_FREED is clear, then
14666 					 * this is a failure to allocate a
14667 					 * scsi_pkt, and we must fail the
14668 					 * command.
14669 					 */
14670 					if ((xp->xb_pkt_flags &
14671 					    SD_XB_DMA_FREED) == 0) {
14672 						break;
14673 					}
14674 
14675 					/*
14676 					 * If this immediate command is NOT our
14677 					 * un_retry_bp, then we must fail it.
14678 					 */
14679 					if (bp != un->un_retry_bp) {
14680 						break;
14681 					}
14682 
14683 					/*
14684 					 * We get here if this cmd is our
14685 					 * un_retry_bp that was DMAFREED, but
14686 					 * scsi_init_pkt() failed to reallocate
14687 					 * DMA resources when we attempted to
14688 					 * retry it. This can happen when an
14689 					 * mpxio failover is in progress, but
14690 					 * we don't want to just fail the
14691 					 * command in this case.
14692 					 *
14693 					 * Use timeout(9F) to restart it after
14694 					 * a 100ms delay.  We don't want to
14695 					 * let sdrunout() restart it, because
14696 					 * sdrunout() is just supposed to start
14697 					 * commands that are sitting on the
14698 					 * wait queue.  The un_retry_bp stays
14699 					 * set until the command completes, but
14700 					 * sdrunout can be called many times
14701 					 * before that happens.  Since sdrunout
14702 					 * cannot tell if the un_retry_bp is
14703 					 * already in the transport, it could
14704 					 * end up calling scsi_transport() for
14705 					 * the un_retry_bp multiple times.
14706 					 *
14707 					 * Also: don't schedule the callback
14708 					 * if some other callback is already
14709 					 * pending.
14710 					 */
14711 					if (un->un_retry_statp == NULL) {
14712 						/*
14713 						 * restore the kstat pointer to
14714 						 * keep kstat counts coherent
14715 						 * when we do retry the command.
14716 						 */
14717 						un->un_retry_statp =
14718 						    saved_statp;
14719 					}
14720 
14721 					if ((un->un_startstop_timeid == NULL) &&
14722 					    (un->un_retry_timeid == NULL) &&
14723 					    (un->un_direct_priority_timeid ==
14724 					    NULL)) {
14725 
14726 						un->un_retry_timeid =
14727 						    timeout(
14728 						    sd_start_retry_command,
14729 						    un, SD_RESTART_TIMEOUT);
14730 					}
14731 					goto exit;
14732 				}
14733 
14734 #else
14735 				if (bp == immed_bp) {
14736 					break;	/* Just fail the command */
14737 				}
14738 #endif
14739 
14740 				/* Add the buf back to the head of the waitq */
14741 				bp->av_forw = un->un_waitq_headp;
14742 				un->un_waitq_headp = bp;
14743 				if (un->un_waitq_tailp == NULL) {
14744 					un->un_waitq_tailp = bp;
14745 				}
14746 				goto exit;
14747 
14748 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
14749 				/*
14750 				 * HBA DMA resource failure. Fail the command
14751 				 * and continue processing of the queues.
14752 				 */
14753 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14754 				    "sd_start_cmds: "
14755 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
14756 				break;
14757 
14758 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
14759 				/*
14760 				 * Note:x86: Partial DMA mapping not supported
14761 				 * for USCSI commands, and all the needed DMA
14762 				 * resources were not allocated.
14763 				 */
14764 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14765 				    "sd_start_cmds: "
14766 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
14767 				break;
14768 
14769 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
14770 				/*
14771 				 * Note:x86: Request cannot fit into CDB based
14772 				 * on lba and len.
14773 				 */
14774 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14775 				    "sd_start_cmds: "
14776 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
14777 				break;
14778 
14779 			default:
14780 				/* Should NEVER get here! */
14781 				panic("scsi_initpkt error");
14782 				/*NOTREACHED*/
14783 			}
14784 
14785 			/*
14786 			 * Fatal error in allocating a scsi_pkt for this buf.
14787 			 * Update kstats & return the buf with an error code.
14788 			 * We must use sd_return_failed_command_no_restart() to
14789 			 * avoid a recursive call back into sd_start_cmds().
14790 			 * However this also means that we must keep processing
14791 			 * the waitq here in order to avoid stalling.
14792 			 */
14793 			if (statp == kstat_waitq_to_runq) {
14794 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
14795 			}
14796 			sd_return_failed_command_no_restart(un, bp, EIO);
14797 			if (bp == immed_bp) {
14798 				/* immed_bp is gone by now, so clear this */
14799 				immed_bp = NULL;
14800 			}
14801 			continue;
14802 		}
14803 got_pkt:
14804 		if (bp == immed_bp) {
14805 			/* goto the head of the class.... */
14806 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
14807 		}
14808 
14809 		un->un_ncmds_in_transport++;
14810 		SD_UPDATE_KSTATS(un, statp, bp);
14811 
14812 		/*
14813 		 * Call scsi_transport() to send the command to the target.
14814 		 * According to SCSA architecture, we must drop the mutex here
14815 		 * before calling scsi_transport() in order to avoid deadlock.
14816 		 * Note that the scsi_pkt's completion routine can be executed
14817 		 * (from interrupt context) even before the call to
14818 		 * scsi_transport() returns.
14819 		 */
14820 		SD_TRACE(SD_LOG_IO_CORE, un,
14821 		    "sd_start_cmds: calling scsi_transport()\n");
14822 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
14823 
14824 		mutex_exit(SD_MUTEX(un));
14825 		rval = scsi_transport(xp->xb_pktp);
14826 		mutex_enter(SD_MUTEX(un));
14827 
14828 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14829 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
14830 
14831 		switch (rval) {
14832 		case TRAN_ACCEPT:
14833 			/* Clear this with every pkt accepted by the HBA */
14834 			un->un_tran_fatal_count = 0;
14835 			break;	/* Success; try the next cmd (if any) */
14836 
14837 		case TRAN_BUSY:
14838 			un->un_ncmds_in_transport--;
14839 			ASSERT(un->un_ncmds_in_transport >= 0);
14840 
14841 			/*
14842 			 * Don't retry request sense, the sense data
14843 			 * is lost when another request is sent.
14844 			 * Free up the rqs buf and retry
14845 			 * the original failed cmd.  Update kstat.
14846 			 */
14847 			if (bp == un->un_rqs_bp) {
14848 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14849 				bp = sd_mark_rqs_idle(un, xp);
14850 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
14851 					NULL, NULL, EIO, SD_BSY_TIMEOUT / 500,
14852 					kstat_waitq_enter);
14853 				goto exit;
14854 			}
14855 
14856 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14857 			/*
14858 			 * Free the DMA resources for the  scsi_pkt. This will
14859 			 * allow mpxio to select another path the next time
14860 			 * we call scsi_transport() with this scsi_pkt.
14861 			 * See sdintr() for the rationalization behind this.
14862 			 */
14863 			if ((un->un_f_is_fibre == TRUE) &&
14864 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14865 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
14866 				scsi_dmafree(xp->xb_pktp);
14867 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14868 			}
14869 #endif
14870 
14871 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
14872 				/*
14873 				 * Commands that are SD_PATH_DIRECT_PRIORITY
14874 				 * are for error recovery situations. These do
14875 				 * not use the normal command waitq, so if they
14876 				 * get a TRAN_BUSY we cannot put them back onto
14877 				 * the waitq for later retry. One possible
14878 				 * problem is that there could already be some
14879 				 * other command on un_retry_bp that is waiting
14880 				 * for this one to complete, so we would be
14881 				 * deadlocked if we put this command back onto
14882 				 * the waitq for later retry (since un_retry_bp
14883 				 * must complete before the driver gets back to
14884 				 * commands on the waitq).
14885 				 *
14886 				 * To avoid deadlock we must schedule a callback
14887 				 * that will restart this command after a set
14888 				 * interval.  This should keep retrying for as
14889 				 * long as the underlying transport keeps
14890 				 * returning TRAN_BUSY (just like for other
14891 				 * commands).  Use the same timeout interval as
14892 				 * for the ordinary TRAN_BUSY retry.
14893 				 */
14894 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14895 				    "sd_start_cmds: scsi_transport() returned "
14896 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
14897 
14898 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14899 				un->un_direct_priority_timeid =
14900 				    timeout(sd_start_direct_priority_command,
14901 				    bp, SD_BSY_TIMEOUT / 500);
14902 
14903 				goto exit;
14904 			}
14905 
14906 			/*
14907 			 * For TRAN_BUSY, we want to reduce the throttle value,
14908 			 * unless we are retrying a command.
14909 			 */
14910 			if (bp != un->un_retry_bp) {
14911 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
14912 			}
14913 
14914 			/*
14915 			 * Set up the bp to be tried again 10 ms later.
14916 			 * Note:x86: Is there a timeout value in the sd_lun
14917 			 * for this condition?
14918 			 */
14919 			sd_set_retry_bp(un, bp, SD_BSY_TIMEOUT / 500,
14920 				kstat_runq_back_to_waitq);
14921 			goto exit;
14922 
14923 		case TRAN_FATAL_ERROR:
14924 			un->un_tran_fatal_count++;
14925 			/* FALLTHRU */
14926 
14927 		case TRAN_BADPKT:
14928 		default:
14929 			un->un_ncmds_in_transport--;
14930 			ASSERT(un->un_ncmds_in_transport >= 0);
14931 
14932 			/*
14933 			 * If this is our REQUEST SENSE command with a
14934 			 * transport error, we must get back the pointers
14935 			 * to the original buf, and mark the REQUEST
14936 			 * SENSE command as "available".
14937 			 */
14938 			if (bp == un->un_rqs_bp) {
14939 				bp = sd_mark_rqs_idle(un, xp);
14940 				xp = SD_GET_XBUF(bp);
14941 			} else {
14942 				/*
14943 				 * Legacy behavior: do not update transport
14944 				 * error count for request sense commands.
14945 				 */
14946 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
14947 			}
14948 
14949 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14950 			sd_print_transport_rejected_message(un, xp, rval);
14951 
14952 			/*
14953 			 * We must use sd_return_failed_command_no_restart() to
14954 			 * avoid a recursive call back into sd_start_cmds().
14955 			 * However this also means that we must keep processing
14956 			 * the waitq here in order to avoid stalling.
14957 			 */
14958 			sd_return_failed_command_no_restart(un, bp, EIO);
14959 
14960 			/*
14961 			 * Notify any threads waiting in sd_ddi_suspend() that
14962 			 * a command completion has occurred.
14963 			 */
14964 			if (un->un_state == SD_STATE_SUSPENDED) {
14965 				cv_broadcast(&un->un_disk_busy_cv);
14966 			}
14967 
14968 			if (bp == immed_bp) {
14969 				/* immed_bp is gone by now, so clear this */
14970 				immed_bp = NULL;
14971 			}
14972 			break;
14973 		}
14974 
14975 	} while (immed_bp == NULL);
14976 
14977 exit:
14978 	ASSERT(mutex_owned(SD_MUTEX(un)));
14979 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
14980 }
14981 
14982 
14983 /*
14984  *    Function: sd_return_command
14985  *
14986  * Description: Returns a command to its originator (with or without an
14987  *		error).  Also starts commands waiting to be transported
14988  *		to the target.
14989  *
14990  *     Context: May be called from interrupt, kernel, or timeout context
14991  */
14992 
14993 static void
14994 sd_return_command(struct sd_lun *un, struct buf *bp)
14995 {
14996 	struct sd_xbuf *xp;
14997 #if defined(__i386) || defined(__amd64)
14998 	struct scsi_pkt *pktp;
14999 #endif
15000 
15001 	ASSERT(bp != NULL);
15002 	ASSERT(un != NULL);
15003 	ASSERT(mutex_owned(SD_MUTEX(un)));
15004 	ASSERT(bp != un->un_rqs_bp);
15005 	xp = SD_GET_XBUF(bp);
15006 	ASSERT(xp != NULL);
15007 
15008 #if defined(__i386) || defined(__amd64)
15009 	pktp = SD_GET_PKTP(bp);
15010 #endif
15011 
15012 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
15013 
15014 #if defined(__i386) || defined(__amd64)
15015 	/*
15016 	 * Note:x86: check for the "sdrestart failed" case.
15017 	 */
15018 	if (((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
15019 		(geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
15020 		(xp->xb_pktp->pkt_resid == 0)) {
15021 
15022 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
15023 			/*
15024 			 * Successfully set up next portion of cmd
15025 			 * transfer, try sending it
15026 			 */
15027 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15028 			    NULL, NULL, 0, (clock_t)0, NULL);
15029 			sd_start_cmds(un, NULL);
15030 			return;	/* Note:x86: need a return here? */
15031 		}
15032 	}
15033 #endif
15034 
15035 	/*
15036 	 * If this is the failfast bp, clear it from un_failfast_bp. This
15037 	 * can happen if upon being re-tried the failfast bp either
15038 	 * succeeded or encountered another error (possibly even a different
15039 	 * error than the one that precipitated the failfast state, but in
15040 	 * that case it would have had to exhaust retries as well). Regardless,
15041 	 * this should not occur whenever the instance is in the active
15042 	 * failfast state.
15043 	 */
15044 	if (bp == un->un_failfast_bp) {
15045 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
15046 		un->un_failfast_bp = NULL;
15047 	}
15048 
15049 	/*
15050 	 * Clear the failfast state upon successful completion of ANY cmd.
15051 	 */
15052 	if (bp->b_error == 0) {
15053 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15054 	}
15055 
15056 	/*
15057 	 * This is used if the command was retried one or more times. Show that
15058 	 * we are done with it, and allow processing of the waitq to resume.
15059 	 */
15060 	if (bp == un->un_retry_bp) {
15061 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15062 		    "sd_return_command: un:0x%p: "
15063 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15064 		un->un_retry_bp = NULL;
15065 		un->un_retry_statp = NULL;
15066 	}
15067 
15068 	SD_UPDATE_RDWR_STATS(un, bp);
15069 	SD_UPDATE_PARTITION_STATS(un, bp);
15070 
15071 	switch (un->un_state) {
15072 	case SD_STATE_SUSPENDED:
15073 		/*
15074 		 * Notify any threads waiting in sd_ddi_suspend() that
15075 		 * a command completion has occurred.
15076 		 */
15077 		cv_broadcast(&un->un_disk_busy_cv);
15078 		break;
15079 	default:
15080 		sd_start_cmds(un, NULL);
15081 		break;
15082 	}
15083 
15084 	/* Return this command up the iodone chain to its originator. */
15085 	mutex_exit(SD_MUTEX(un));
15086 
15087 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15088 	xp->xb_pktp = NULL;
15089 
15090 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15091 
15092 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15093 	mutex_enter(SD_MUTEX(un));
15094 
15095 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
15096 }
15097 
15098 
15099 /*
15100  *    Function: sd_return_failed_command
15101  *
15102  * Description: Command completion when an error occurred.
15103  *
15104  *     Context: May be called from interrupt context
15105  */
15106 
15107 static void
15108 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
15109 {
15110 	ASSERT(bp != NULL);
15111 	ASSERT(un != NULL);
15112 	ASSERT(mutex_owned(SD_MUTEX(un)));
15113 
15114 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15115 	    "sd_return_failed_command: entry\n");
15116 
15117 	/*
15118 	 * b_resid could already be nonzero due to a partial data
15119 	 * transfer, so do not change it here.
15120 	 */
15121 	SD_BIOERROR(bp, errcode);
15122 
15123 	sd_return_command(un, bp);
15124 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15125 	    "sd_return_failed_command: exit\n");
15126 }
15127 
15128 
15129 /*
15130  *    Function: sd_return_failed_command_no_restart
15131  *
15132  * Description: Same as sd_return_failed_command, but ensures that no
15133  *		call back into sd_start_cmds will be issued.
15134  *
15135  *     Context: May be called from interrupt context
15136  */
15137 
15138 static void
15139 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
15140 	int errcode)
15141 {
15142 	struct sd_xbuf *xp;
15143 
15144 	ASSERT(bp != NULL);
15145 	ASSERT(un != NULL);
15146 	ASSERT(mutex_owned(SD_MUTEX(un)));
15147 	xp = SD_GET_XBUF(bp);
15148 	ASSERT(xp != NULL);
15149 	ASSERT(errcode != 0);
15150 
15151 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15152 	    "sd_return_failed_command_no_restart: entry\n");
15153 
15154 	/*
15155 	 * b_resid could already be nonzero due to a partial data
15156 	 * transfer, so do not change it here.
15157 	 */
15158 	SD_BIOERROR(bp, errcode);
15159 
15160 	/*
15161 	 * If this is the failfast bp, clear it. This can happen if the
15162 	 * failfast bp encounterd a fatal error when we attempted to
15163 	 * re-try it (such as a scsi_transport(9F) failure).  However
15164 	 * we should NOT be in an active failfast state if the failfast
15165 	 * bp is not NULL.
15166 	 */
15167 	if (bp == un->un_failfast_bp) {
15168 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
15169 		un->un_failfast_bp = NULL;
15170 	}
15171 
15172 	if (bp == un->un_retry_bp) {
15173 		/*
15174 		 * This command was retried one or more times. Show that we are
15175 		 * done with it, and allow processing of the waitq to resume.
15176 		 */
15177 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15178 		    "sd_return_failed_command_no_restart: "
15179 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15180 		un->un_retry_bp = NULL;
15181 		un->un_retry_statp = NULL;
15182 	}
15183 
15184 	SD_UPDATE_RDWR_STATS(un, bp);
15185 	SD_UPDATE_PARTITION_STATS(un, bp);
15186 
15187 	mutex_exit(SD_MUTEX(un));
15188 
15189 	if (xp->xb_pktp != NULL) {
15190 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15191 		xp->xb_pktp = NULL;
15192 	}
15193 
15194 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15195 
15196 	mutex_enter(SD_MUTEX(un));
15197 
15198 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15199 	    "sd_return_failed_command_no_restart: exit\n");
15200 }
15201 
15202 
15203 /*
15204  *    Function: sd_retry_command
15205  *
15206  * Description: queue up a command for retry, or (optionally) fail it
15207  *		if retry counts are exhausted.
15208  *
15209  *   Arguments: un - Pointer to the sd_lun struct for the target.
15210  *
15211  *		bp - Pointer to the buf for the command to be retried.
15212  *
15213  *		retry_check_flag - Flag to see which (if any) of the retry
15214  *		   counts should be decremented/checked. If the indicated
15215  *		   retry count is exhausted, then the command will not be
15216  *		   retried; it will be failed instead. This should use a
15217  *		   value equal to one of the following:
15218  *
15219  *			SD_RETRIES_NOCHECK
15220  *			SD_RESD_RETRIES_STANDARD
15221  *			SD_RETRIES_VICTIM
15222  *
15223  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
15224  *		   if the check should be made to see of FLAG_ISOLATE is set
15225  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
15226  *		   not retried, it is simply failed.
15227  *
15228  *		user_funcp - Ptr to function to call before dispatching the
15229  *		   command. May be NULL if no action needs to be performed.
15230  *		   (Primarily intended for printing messages.)
15231  *
15232  *		user_arg - Optional argument to be passed along to
15233  *		   the user_funcp call.
15234  *
15235  *		failure_code - errno return code to set in the bp if the
15236  *		   command is going to be failed.
15237  *
15238  *		retry_delay - Retry delay interval in (clock_t) units. May
15239  *		   be zero which indicates that the retry should be retried
15240  *		   immediately (ie, without an intervening delay).
15241  *
15242  *		statp - Ptr to kstat function to be updated if the command
15243  *		   is queued for a delayed retry. May be NULL if no kstat
15244  *		   update is desired.
15245  *
15246  *     Context: May be called from interupt context.
15247  */
15248 
15249 static void
15250 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
15251 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
15252 	code), void *user_arg, int failure_code,  clock_t retry_delay,
15253 	void (*statp)(kstat_io_t *))
15254 {
15255 	struct sd_xbuf	*xp;
15256 	struct scsi_pkt	*pktp;
15257 
15258 	ASSERT(un != NULL);
15259 	ASSERT(mutex_owned(SD_MUTEX(un)));
15260 	ASSERT(bp != NULL);
15261 	xp = SD_GET_XBUF(bp);
15262 	ASSERT(xp != NULL);
15263 	pktp = SD_GET_PKTP(bp);
15264 	ASSERT(pktp != NULL);
15265 
15266 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15267 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
15268 
15269 	/*
15270 	 * If we are syncing or dumping, fail the command to avoid
15271 	 * recursively calling back into scsi_transport().
15272 	 */
15273 	if (ddi_in_panic()) {
15274 		goto fail_command_no_log;
15275 	}
15276 
15277 	/*
15278 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
15279 	 * log an error and fail the command.
15280 	 */
15281 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
15282 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15283 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
15284 		sd_dump_memory(un, SD_LOG_IO, "CDB",
15285 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15286 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15287 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15288 		goto fail_command;
15289 	}
15290 
15291 	/*
15292 	 * If we are suspended, then put the command onto head of the
15293 	 * wait queue since we don't want to start more commands.
15294 	 */
15295 	switch (un->un_state) {
15296 	case SD_STATE_SUSPENDED:
15297 	case SD_STATE_DUMPING:
15298 		bp->av_forw = un->un_waitq_headp;
15299 		un->un_waitq_headp = bp;
15300 		if (un->un_waitq_tailp == NULL) {
15301 			un->un_waitq_tailp = bp;
15302 		}
15303 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
15304 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
15305 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
15306 		return;
15307 	default:
15308 		break;
15309 	}
15310 
15311 	/*
15312 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
15313 	 * is set; if it is then we do not want to retry the command.
15314 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
15315 	 */
15316 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
15317 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
15318 			goto fail_command;
15319 		}
15320 	}
15321 
15322 
15323 	/*
15324 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
15325 	 * command timeout or a selection timeout has occurred. This means
15326 	 * that we were unable to establish an kind of communication with
15327 	 * the target, and subsequent retries and/or commands are likely
15328 	 * to encounter similar results and take a long time to complete.
15329 	 *
15330 	 * If this is a failfast error condition, we need to update the
15331 	 * failfast state, even if this bp does not have B_FAILFAST set.
15332 	 */
15333 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
15334 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
15335 			ASSERT(un->un_failfast_bp == NULL);
15336 			/*
15337 			 * If we are already in the active failfast state, and
15338 			 * another failfast error condition has been detected,
15339 			 * then fail this command if it has B_FAILFAST set.
15340 			 * If B_FAILFAST is clear, then maintain the legacy
15341 			 * behavior of retrying heroically, even tho this will
15342 			 * take a lot more time to fail the command.
15343 			 */
15344 			if (bp->b_flags & B_FAILFAST) {
15345 				goto fail_command;
15346 			}
15347 		} else {
15348 			/*
15349 			 * We're not in the active failfast state, but we
15350 			 * have a failfast error condition, so we must begin
15351 			 * transition to the next state. We do this regardless
15352 			 * of whether or not this bp has B_FAILFAST set.
15353 			 */
15354 			if (un->un_failfast_bp == NULL) {
15355 				/*
15356 				 * This is the first bp to meet a failfast
15357 				 * condition so save it on un_failfast_bp &
15358 				 * do normal retry processing. Do not enter
15359 				 * active failfast state yet. This marks
15360 				 * entry into the "failfast pending" state.
15361 				 */
15362 				un->un_failfast_bp = bp;
15363 
15364 			} else if (un->un_failfast_bp == bp) {
15365 				/*
15366 				 * This is the second time *this* bp has
15367 				 * encountered a failfast error condition,
15368 				 * so enter active failfast state & flush
15369 				 * queues as appropriate.
15370 				 */
15371 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
15372 				un->un_failfast_bp = NULL;
15373 				sd_failfast_flushq(un);
15374 
15375 				/*
15376 				 * Fail this bp now if B_FAILFAST set;
15377 				 * otherwise continue with retries. (It would
15378 				 * be pretty ironic if this bp succeeded on a
15379 				 * subsequent retry after we just flushed all
15380 				 * the queues).
15381 				 */
15382 				if (bp->b_flags & B_FAILFAST) {
15383 					goto fail_command;
15384 				}
15385 
15386 #if !defined(lint) && !defined(__lint)
15387 			} else {
15388 				/*
15389 				 * If neither of the preceeding conditionals
15390 				 * was true, it means that there is some
15391 				 * *other* bp that has met an inital failfast
15392 				 * condition and is currently either being
15393 				 * retried or is waiting to be retried. In
15394 				 * that case we should perform normal retry
15395 				 * processing on *this* bp, since there is a
15396 				 * chance that the current failfast condition
15397 				 * is transient and recoverable. If that does
15398 				 * not turn out to be the case, then retries
15399 				 * will be cleared when the wait queue is
15400 				 * flushed anyway.
15401 				 */
15402 #endif
15403 			}
15404 		}
15405 	} else {
15406 		/*
15407 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
15408 		 * likely were able to at least establish some level of
15409 		 * communication with the target and subsequent commands
15410 		 * and/or retries are likely to get through to the target,
15411 		 * In this case we want to be aggressive about clearing
15412 		 * the failfast state. Note that this does not affect
15413 		 * the "failfast pending" condition.
15414 		 */
15415 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15416 	}
15417 
15418 
15419 	/*
15420 	 * Check the specified retry count to see if we can still do
15421 	 * any retries with this pkt before we should fail it.
15422 	 */
15423 	switch (retry_check_flag & SD_RETRIES_MASK) {
15424 	case SD_RETRIES_VICTIM:
15425 		/*
15426 		 * Check the victim retry count. If exhausted, then fall
15427 		 * thru & check against the standard retry count.
15428 		 */
15429 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
15430 			/* Increment count & proceed with the retry */
15431 			xp->xb_victim_retry_count++;
15432 			break;
15433 		}
15434 		/* Victim retries exhausted, fall back to std. retries... */
15435 		/* FALLTHRU */
15436 
15437 	case SD_RETRIES_STANDARD:
15438 		if (xp->xb_retry_count >= un->un_retry_count) {
15439 			/* Retries exhausted, fail the command */
15440 			SD_TRACE(SD_LOG_IO_CORE, un,
15441 			    "sd_retry_command: retries exhausted!\n");
15442 			/*
15443 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
15444 			 * commands with nonzero pkt_resid.
15445 			 */
15446 			if ((pktp->pkt_reason == CMD_CMPLT) &&
15447 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
15448 			    (pktp->pkt_resid != 0)) {
15449 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
15450 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
15451 					SD_UPDATE_B_RESID(bp, pktp);
15452 				}
15453 			}
15454 			goto fail_command;
15455 		}
15456 		xp->xb_retry_count++;
15457 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15458 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15459 		break;
15460 
15461 	case SD_RETRIES_UA:
15462 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
15463 			/* Retries exhausted, fail the command */
15464 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15465 			    "Unit Attention retries exhausted. "
15466 			    "Check the target.\n");
15467 			goto fail_command;
15468 		}
15469 		xp->xb_ua_retry_count++;
15470 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15471 		    "sd_retry_command: retry count:%d\n",
15472 			xp->xb_ua_retry_count);
15473 		break;
15474 
15475 	case SD_RETRIES_BUSY:
15476 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
15477 			/* Retries exhausted, fail the command */
15478 			SD_TRACE(SD_LOG_IO_CORE, un,
15479 			    "sd_retry_command: retries exhausted!\n");
15480 			goto fail_command;
15481 		}
15482 		xp->xb_retry_count++;
15483 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15484 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15485 		break;
15486 
15487 	case SD_RETRIES_NOCHECK:
15488 	default:
15489 		/* No retry count to check. Just proceed with the retry */
15490 		break;
15491 	}
15492 
15493 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15494 
15495 	/*
15496 	 * If we were given a zero timeout, we must attempt to retry the
15497 	 * command immediately (ie, without a delay).
15498 	 */
15499 	if (retry_delay == 0) {
15500 		/*
15501 		 * Check some limiting conditions to see if we can actually
15502 		 * do the immediate retry.  If we cannot, then we must
15503 		 * fall back to queueing up a delayed retry.
15504 		 */
15505 		if (un->un_ncmds_in_transport >= un->un_throttle) {
15506 			/*
15507 			 * We are at the throttle limit for the target,
15508 			 * fall back to delayed retry.
15509 			 */
15510 			retry_delay = SD_BSY_TIMEOUT;
15511 			statp = kstat_waitq_enter;
15512 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15513 			    "sd_retry_command: immed. retry hit "
15514 			    "throttle!\n");
15515 		} else {
15516 			/*
15517 			 * We're clear to proceed with the immediate retry.
15518 			 * First call the user-provided function (if any)
15519 			 */
15520 			if (user_funcp != NULL) {
15521 				(*user_funcp)(un, bp, user_arg,
15522 				    SD_IMMEDIATE_RETRY_ISSUED);
15523 #ifdef __lock_lint
15524 				sd_print_incomplete_msg(un, bp, user_arg,
15525 				    SD_IMMEDIATE_RETRY_ISSUED);
15526 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
15527 				    SD_IMMEDIATE_RETRY_ISSUED);
15528 				sd_print_sense_failed_msg(un, bp, user_arg,
15529 				    SD_IMMEDIATE_RETRY_ISSUED);
15530 #endif
15531 			}
15532 
15533 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15534 			    "sd_retry_command: issuing immediate retry\n");
15535 
15536 			/*
15537 			 * Call sd_start_cmds() to transport the command to
15538 			 * the target.
15539 			 */
15540 			sd_start_cmds(un, bp);
15541 
15542 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15543 			    "sd_retry_command exit\n");
15544 			return;
15545 		}
15546 	}
15547 
15548 	/*
15549 	 * Set up to retry the command after a delay.
15550 	 * First call the user-provided function (if any)
15551 	 */
15552 	if (user_funcp != NULL) {
15553 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
15554 	}
15555 
15556 	sd_set_retry_bp(un, bp, retry_delay, statp);
15557 
15558 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15559 	return;
15560 
15561 fail_command:
15562 
15563 	if (user_funcp != NULL) {
15564 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
15565 	}
15566 
15567 fail_command_no_log:
15568 
15569 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15570 	    "sd_retry_command: returning failed command\n");
15571 
15572 	sd_return_failed_command(un, bp, failure_code);
15573 
15574 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15575 }
15576 
15577 
15578 /*
15579  *    Function: sd_set_retry_bp
15580  *
15581  * Description: Set up the given bp for retry.
15582  *
15583  *   Arguments: un - ptr to associated softstate
15584  *		bp - ptr to buf(9S) for the command
15585  *		retry_delay - time interval before issuing retry (may be 0)
15586  *		statp - optional pointer to kstat function
15587  *
15588  *     Context: May be called under interrupt context
15589  */
15590 
15591 static void
15592 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
15593 	void (*statp)(kstat_io_t *))
15594 {
15595 	ASSERT(un != NULL);
15596 	ASSERT(mutex_owned(SD_MUTEX(un)));
15597 	ASSERT(bp != NULL);
15598 
15599 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15600 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
15601 
15602 	/*
15603 	 * Indicate that the command is being retried. This will not allow any
15604 	 * other commands on the wait queue to be transported to the target
15605 	 * until this command has been completed (success or failure). The
15606 	 * "retry command" is not transported to the target until the given
15607 	 * time delay expires, unless the user specified a 0 retry_delay.
15608 	 *
15609 	 * Note: the timeout(9F) callback routine is what actually calls
15610 	 * sd_start_cmds() to transport the command, with the exception of a
15611 	 * zero retry_delay. The only current implementor of a zero retry delay
15612 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
15613 	 */
15614 	if (un->un_retry_bp == NULL) {
15615 		ASSERT(un->un_retry_statp == NULL);
15616 		un->un_retry_bp = bp;
15617 
15618 		/*
15619 		 * If the user has not specified a delay the command should
15620 		 * be queued and no timeout should be scheduled.
15621 		 */
15622 		if (retry_delay == 0) {
15623 			/*
15624 			 * Save the kstat pointer that will be used in the
15625 			 * call to SD_UPDATE_KSTATS() below, so that
15626 			 * sd_start_cmds() can correctly decrement the waitq
15627 			 * count when it is time to transport this command.
15628 			 */
15629 			un->un_retry_statp = statp;
15630 			goto done;
15631 		}
15632 	}
15633 
15634 	if (un->un_retry_bp == bp) {
15635 		/*
15636 		 * Save the kstat pointer that will be used in the call to
15637 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
15638 		 * correctly decrement the waitq count when it is time to
15639 		 * transport this command.
15640 		 */
15641 		un->un_retry_statp = statp;
15642 
15643 		/*
15644 		 * Schedule a timeout if:
15645 		 *   1) The user has specified a delay.
15646 		 *   2) There is not a START_STOP_UNIT callback pending.
15647 		 *
15648 		 * If no delay has been specified, then it is up to the caller
15649 		 * to ensure that IO processing continues without stalling.
15650 		 * Effectively, this means that the caller will issue the
15651 		 * required call to sd_start_cmds(). The START_STOP_UNIT
15652 		 * callback does this after the START STOP UNIT command has
15653 		 * completed. In either of these cases we should not schedule
15654 		 * a timeout callback here.  Also don't schedule the timeout if
15655 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
15656 		 */
15657 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
15658 		    (un->un_direct_priority_timeid == NULL)) {
15659 			un->un_retry_timeid =
15660 			    timeout(sd_start_retry_command, un, retry_delay);
15661 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15662 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
15663 			    " bp:0x%p un_retry_timeid:0x%p\n",
15664 			    un, bp, un->un_retry_timeid);
15665 		}
15666 	} else {
15667 		/*
15668 		 * We only get in here if there is already another command
15669 		 * waiting to be retried.  In this case, we just put the
15670 		 * given command onto the wait queue, so it can be transported
15671 		 * after the current retry command has completed.
15672 		 *
15673 		 * Also we have to make sure that if the command at the head
15674 		 * of the wait queue is the un_failfast_bp, that we do not
15675 		 * put ahead of it any other commands that are to be retried.
15676 		 */
15677 		if ((un->un_failfast_bp != NULL) &&
15678 		    (un->un_failfast_bp == un->un_waitq_headp)) {
15679 			/*
15680 			 * Enqueue this command AFTER the first command on
15681 			 * the wait queue (which is also un_failfast_bp).
15682 			 */
15683 			bp->av_forw = un->un_waitq_headp->av_forw;
15684 			un->un_waitq_headp->av_forw = bp;
15685 			if (un->un_waitq_headp == un->un_waitq_tailp) {
15686 				un->un_waitq_tailp = bp;
15687 			}
15688 		} else {
15689 			/* Enqueue this command at the head of the waitq. */
15690 			bp->av_forw = un->un_waitq_headp;
15691 			un->un_waitq_headp = bp;
15692 			if (un->un_waitq_tailp == NULL) {
15693 				un->un_waitq_tailp = bp;
15694 			}
15695 		}
15696 
15697 		if (statp == NULL) {
15698 			statp = kstat_waitq_enter;
15699 		}
15700 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15701 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
15702 	}
15703 
15704 done:
15705 	if (statp != NULL) {
15706 		SD_UPDATE_KSTATS(un, statp, bp);
15707 	}
15708 
15709 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15710 	    "sd_set_retry_bp: exit un:0x%p\n", un);
15711 }
15712 
15713 
15714 /*
15715  *    Function: sd_start_retry_command
15716  *
15717  * Description: Start the command that has been waiting on the target's
15718  *		retry queue.  Called from timeout(9F) context after the
15719  *		retry delay interval has expired.
15720  *
15721  *   Arguments: arg - pointer to associated softstate for the device.
15722  *
15723  *     Context: timeout(9F) thread context.  May not sleep.
15724  */
15725 
15726 static void
15727 sd_start_retry_command(void *arg)
15728 {
15729 	struct sd_lun *un = arg;
15730 
15731 	ASSERT(un != NULL);
15732 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15733 
15734 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15735 	    "sd_start_retry_command: entry\n");
15736 
15737 	mutex_enter(SD_MUTEX(un));
15738 
15739 	un->un_retry_timeid = NULL;
15740 
15741 	if (un->un_retry_bp != NULL) {
15742 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15743 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
15744 		    un, un->un_retry_bp);
15745 		sd_start_cmds(un, un->un_retry_bp);
15746 	}
15747 
15748 	mutex_exit(SD_MUTEX(un));
15749 
15750 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15751 	    "sd_start_retry_command: exit\n");
15752 }
15753 
15754 
15755 /*
15756  *    Function: sd_start_direct_priority_command
15757  *
15758  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
15759  *		received TRAN_BUSY when we called scsi_transport() to send it
15760  *		to the underlying HBA. This function is called from timeout(9F)
15761  *		context after the delay interval has expired.
15762  *
15763  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
15764  *
15765  *     Context: timeout(9F) thread context.  May not sleep.
15766  */
15767 
15768 static void
15769 sd_start_direct_priority_command(void *arg)
15770 {
15771 	struct buf	*priority_bp = arg;
15772 	struct sd_lun	*un;
15773 
15774 	ASSERT(priority_bp != NULL);
15775 	un = SD_GET_UN(priority_bp);
15776 	ASSERT(un != NULL);
15777 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15778 
15779 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15780 	    "sd_start_direct_priority_command: entry\n");
15781 
15782 	mutex_enter(SD_MUTEX(un));
15783 	un->un_direct_priority_timeid = NULL;
15784 	sd_start_cmds(un, priority_bp);
15785 	mutex_exit(SD_MUTEX(un));
15786 
15787 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15788 	    "sd_start_direct_priority_command: exit\n");
15789 }
15790 
15791 
15792 /*
15793  *    Function: sd_send_request_sense_command
15794  *
15795  * Description: Sends a REQUEST SENSE command to the target
15796  *
15797  *     Context: May be called from interrupt context.
15798  */
15799 
15800 static void
15801 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
15802 	struct scsi_pkt *pktp)
15803 {
15804 	ASSERT(bp != NULL);
15805 	ASSERT(un != NULL);
15806 	ASSERT(mutex_owned(SD_MUTEX(un)));
15807 
15808 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
15809 	    "entry: buf:0x%p\n", bp);
15810 
15811 	/*
15812 	 * If we are syncing or dumping, then fail the command to avoid a
15813 	 * recursive callback into scsi_transport(). Also fail the command
15814 	 * if we are suspended (legacy behavior).
15815 	 */
15816 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
15817 	    (un->un_state == SD_STATE_DUMPING)) {
15818 		sd_return_failed_command(un, bp, EIO);
15819 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15820 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
15821 		return;
15822 	}
15823 
15824 	/*
15825 	 * Retry the failed command and don't issue the request sense if:
15826 	 *    1) the sense buf is busy
15827 	 *    2) we have 1 or more outstanding commands on the target
15828 	 *    (the sense data will be cleared or invalidated any way)
15829 	 *
15830 	 * Note: There could be an issue with not checking a retry limit here,
15831 	 * the problem is determining which retry limit to check.
15832 	 */
15833 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
15834 		/* Don't retry if the command is flagged as non-retryable */
15835 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15836 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15837 			    NULL, NULL, 0, SD_BSY_TIMEOUT, kstat_waitq_enter);
15838 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15839 			    "sd_send_request_sense_command: "
15840 			    "at full throttle, retrying exit\n");
15841 		} else {
15842 			sd_return_failed_command(un, bp, EIO);
15843 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15844 			    "sd_send_request_sense_command: "
15845 			    "at full throttle, non-retryable exit\n");
15846 		}
15847 		return;
15848 	}
15849 
15850 	sd_mark_rqs_busy(un, bp);
15851 	sd_start_cmds(un, un->un_rqs_bp);
15852 
15853 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15854 	    "sd_send_request_sense_command: exit\n");
15855 }
15856 
15857 
15858 /*
15859  *    Function: sd_mark_rqs_busy
15860  *
15861  * Description: Indicate that the request sense bp for this instance is
15862  *		in use.
15863  *
15864  *     Context: May be called under interrupt context
15865  */
15866 
15867 static void
15868 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
15869 {
15870 	struct sd_xbuf	*sense_xp;
15871 
15872 	ASSERT(un != NULL);
15873 	ASSERT(bp != NULL);
15874 	ASSERT(mutex_owned(SD_MUTEX(un)));
15875 	ASSERT(un->un_sense_isbusy == 0);
15876 
15877 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
15878 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
15879 
15880 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
15881 	ASSERT(sense_xp != NULL);
15882 
15883 	SD_INFO(SD_LOG_IO, un,
15884 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
15885 
15886 	ASSERT(sense_xp->xb_pktp != NULL);
15887 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
15888 	    == (FLAG_SENSING | FLAG_HEAD));
15889 
15890 	un->un_sense_isbusy = 1;
15891 	un->un_rqs_bp->b_resid = 0;
15892 	sense_xp->xb_pktp->pkt_resid  = 0;
15893 	sense_xp->xb_pktp->pkt_reason = 0;
15894 
15895 	/* So we can get back the bp at interrupt time! */
15896 	sense_xp->xb_sense_bp = bp;
15897 
15898 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
15899 
15900 	/*
15901 	 * Mark this buf as awaiting sense data. (This is already set in
15902 	 * the pkt_flags for the RQS packet.)
15903 	 */
15904 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
15905 
15906 	sense_xp->xb_retry_count	= 0;
15907 	sense_xp->xb_victim_retry_count = 0;
15908 	sense_xp->xb_ua_retry_count	= 0;
15909 	sense_xp->xb_dma_resid  = 0;
15910 
15911 	/* Clean up the fields for auto-request sense */
15912 	sense_xp->xb_sense_status = 0;
15913 	sense_xp->xb_sense_state  = 0;
15914 	sense_xp->xb_sense_resid  = 0;
15915 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
15916 
15917 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
15918 }
15919 
15920 
15921 /*
15922  *    Function: sd_mark_rqs_idle
15923  *
15924  * Description: SD_MUTEX must be held continuously through this routine
15925  *		to prevent reuse of the rqs struct before the caller can
15926  *		complete it's processing.
15927  *
15928  * Return Code: Pointer to the RQS buf
15929  *
15930  *     Context: May be called under interrupt context
15931  */
15932 
15933 static struct buf *
15934 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
15935 {
15936 	struct buf *bp;
15937 	ASSERT(un != NULL);
15938 	ASSERT(sense_xp != NULL);
15939 	ASSERT(mutex_owned(SD_MUTEX(un)));
15940 	ASSERT(un->un_sense_isbusy != 0);
15941 
15942 	un->un_sense_isbusy = 0;
15943 	bp = sense_xp->xb_sense_bp;
15944 	sense_xp->xb_sense_bp = NULL;
15945 
15946 	/* This pkt is no longer interested in getting sense data */
15947 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
15948 
15949 	return (bp);
15950 }
15951 
15952 
15953 
15954 /*
15955  *    Function: sd_alloc_rqs
15956  *
15957  * Description: Set up the unit to receive auto request sense data
15958  *
15959  * Return Code: DDI_SUCCESS or DDI_FAILURE
15960  *
15961  *     Context: Called under attach(9E) context
15962  */
15963 
15964 static int
15965 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
15966 {
15967 	struct sd_xbuf *xp;
15968 
15969 	ASSERT(un != NULL);
15970 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15971 	ASSERT(un->un_rqs_bp == NULL);
15972 	ASSERT(un->un_rqs_pktp == NULL);
15973 
15974 	/*
15975 	 * First allocate the required buf and scsi_pkt structs, then set up
15976 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
15977 	 */
15978 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
15979 	    SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
15980 	if (un->un_rqs_bp == NULL) {
15981 		return (DDI_FAILURE);
15982 	}
15983 
15984 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
15985 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
15986 
15987 	if (un->un_rqs_pktp == NULL) {
15988 		sd_free_rqs(un);
15989 		return (DDI_FAILURE);
15990 	}
15991 
15992 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
15993 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
15994 	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
15995 
15996 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
15997 
15998 	/* Set up the other needed members in the ARQ scsi_pkt. */
15999 	un->un_rqs_pktp->pkt_comp   = sdintr;
16000 	un->un_rqs_pktp->pkt_time   = sd_io_time;
16001 	un->un_rqs_pktp->pkt_flags |=
16002 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
16003 
16004 	/*
16005 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
16006 	 * provide any intpkt, destroypkt routines as we take care of
16007 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
16008 	 */
16009 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
16010 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
16011 	xp->xb_pktp = un->un_rqs_pktp;
16012 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
16013 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
16014 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
16015 
16016 	/*
16017 	 * Save the pointer to the request sense private bp so it can
16018 	 * be retrieved in sdintr.
16019 	 */
16020 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
16021 	ASSERT(un->un_rqs_bp->b_private == xp);
16022 
16023 	/*
16024 	 * See if the HBA supports auto-request sense for the specified
16025 	 * target/lun. If it does, then try to enable it (if not already
16026 	 * enabled).
16027 	 *
16028 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
16029 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
16030 	 * return success.  However, in both of these cases ARQ is always
16031 	 * enabled and scsi_ifgetcap will always return true. The best approach
16032 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
16033 	 *
16034 	 * The 3rd case is the HBA (adp) always return enabled on
16035 	 * scsi_ifgetgetcap even when it's not enable, the best approach
16036 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
16037 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
16038 	 */
16039 
16040 	if (un->un_f_is_fibre == TRUE) {
16041 		un->un_f_arq_enabled = TRUE;
16042 	} else {
16043 #if defined(__i386) || defined(__amd64)
16044 		/*
16045 		 * Circumvent the Adaptec bug, remove this code when
16046 		 * the bug is fixed
16047 		 */
16048 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
16049 #endif
16050 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
16051 		case 0:
16052 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16053 				"sd_alloc_rqs: HBA supports ARQ\n");
16054 			/*
16055 			 * ARQ is supported by this HBA but currently is not
16056 			 * enabled. Attempt to enable it and if successful then
16057 			 * mark this instance as ARQ enabled.
16058 			 */
16059 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
16060 				== 1) {
16061 				/* Successfully enabled ARQ in the HBA */
16062 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16063 					"sd_alloc_rqs: ARQ enabled\n");
16064 				un->un_f_arq_enabled = TRUE;
16065 			} else {
16066 				/* Could not enable ARQ in the HBA */
16067 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16068 				"sd_alloc_rqs: failed ARQ enable\n");
16069 				un->un_f_arq_enabled = FALSE;
16070 			}
16071 			break;
16072 		case 1:
16073 			/*
16074 			 * ARQ is supported by this HBA and is already enabled.
16075 			 * Just mark ARQ as enabled for this instance.
16076 			 */
16077 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16078 				"sd_alloc_rqs: ARQ already enabled\n");
16079 			un->un_f_arq_enabled = TRUE;
16080 			break;
16081 		default:
16082 			/*
16083 			 * ARQ is not supported by this HBA; disable it for this
16084 			 * instance.
16085 			 */
16086 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16087 				"sd_alloc_rqs: HBA does not support ARQ\n");
16088 			un->un_f_arq_enabled = FALSE;
16089 			break;
16090 		}
16091 	}
16092 
16093 	return (DDI_SUCCESS);
16094 }
16095 
16096 
16097 /*
16098  *    Function: sd_free_rqs
16099  *
16100  * Description: Cleanup for the pre-instance RQS command.
16101  *
16102  *     Context: Kernel thread context
16103  */
16104 
16105 static void
16106 sd_free_rqs(struct sd_lun *un)
16107 {
16108 	ASSERT(un != NULL);
16109 
16110 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
16111 
16112 	/*
16113 	 * If consistent memory is bound to a scsi_pkt, the pkt
16114 	 * has to be destroyed *before* freeing the consistent memory.
16115 	 * Don't change the sequence of this operations.
16116 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
16117 	 * after it was freed in scsi_free_consistent_buf().
16118 	 */
16119 	if (un->un_rqs_pktp != NULL) {
16120 		scsi_destroy_pkt(un->un_rqs_pktp);
16121 		un->un_rqs_pktp = NULL;
16122 	}
16123 
16124 	if (un->un_rqs_bp != NULL) {
16125 		kmem_free(SD_GET_XBUF(un->un_rqs_bp), sizeof (struct sd_xbuf));
16126 		scsi_free_consistent_buf(un->un_rqs_bp);
16127 		un->un_rqs_bp = NULL;
16128 	}
16129 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
16130 }
16131 
16132 
16133 
16134 /*
16135  *    Function: sd_reduce_throttle
16136  *
16137  * Description: Reduces the maximun # of outstanding commands on a
16138  *		target to the current number of outstanding commands.
16139  *		Queues a tiemout(9F) callback to restore the limit
16140  *		after a specified interval has elapsed.
16141  *		Typically used when we get a TRAN_BUSY return code
16142  *		back from scsi_transport().
16143  *
16144  *   Arguments: un - ptr to the sd_lun softstate struct
16145  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
16146  *
16147  *     Context: May be called from interrupt context
16148  */
16149 
16150 static void
16151 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
16152 {
16153 	ASSERT(un != NULL);
16154 	ASSERT(mutex_owned(SD_MUTEX(un)));
16155 	ASSERT(un->un_ncmds_in_transport >= 0);
16156 
16157 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16158 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
16159 	    un, un->un_throttle, un->un_ncmds_in_transport);
16160 
16161 	if (un->un_throttle > 1) {
16162 		if (un->un_f_use_adaptive_throttle == TRUE) {
16163 			switch (throttle_type) {
16164 			case SD_THROTTLE_TRAN_BUSY:
16165 				if (un->un_busy_throttle == 0) {
16166 					un->un_busy_throttle = un->un_throttle;
16167 				}
16168 				break;
16169 			case SD_THROTTLE_QFULL:
16170 				un->un_busy_throttle = 0;
16171 				break;
16172 			default:
16173 				ASSERT(FALSE);
16174 			}
16175 
16176 			if (un->un_ncmds_in_transport > 0) {
16177 			    un->un_throttle = un->un_ncmds_in_transport;
16178 			}
16179 
16180 		} else {
16181 			if (un->un_ncmds_in_transport == 0) {
16182 				un->un_throttle = 1;
16183 			} else {
16184 				un->un_throttle = un->un_ncmds_in_transport;
16185 			}
16186 		}
16187 	}
16188 
16189 	/* Reschedule the timeout if none is currently active */
16190 	if (un->un_reset_throttle_timeid == NULL) {
16191 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
16192 		    un, SD_THROTTLE_RESET_INTERVAL);
16193 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16194 		    "sd_reduce_throttle: timeout scheduled!\n");
16195 	}
16196 
16197 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16198 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16199 }
16200 
16201 
16202 
16203 /*
16204  *    Function: sd_restore_throttle
16205  *
16206  * Description: Callback function for timeout(9F).  Resets the current
16207  *		value of un->un_throttle to its default.
16208  *
16209  *   Arguments: arg - pointer to associated softstate for the device.
16210  *
16211  *     Context: May be called from interrupt context
16212  */
16213 
16214 static void
16215 sd_restore_throttle(void *arg)
16216 {
16217 	struct sd_lun	*un = arg;
16218 
16219 	ASSERT(un != NULL);
16220 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16221 
16222 	mutex_enter(SD_MUTEX(un));
16223 
16224 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16225 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16226 
16227 	un->un_reset_throttle_timeid = NULL;
16228 
16229 	if (un->un_f_use_adaptive_throttle == TRUE) {
16230 		/*
16231 		 * If un_busy_throttle is nonzero, then it contains the
16232 		 * value that un_throttle was when we got a TRAN_BUSY back
16233 		 * from scsi_transport(). We want to revert back to this
16234 		 * value.
16235 		 *
16236 		 * In the QFULL case, the throttle limit will incrementally
16237 		 * increase until it reaches max throttle.
16238 		 */
16239 		if (un->un_busy_throttle > 0) {
16240 			un->un_throttle = un->un_busy_throttle;
16241 			un->un_busy_throttle = 0;
16242 		} else {
16243 			/*
16244 			 * increase throttle by 10% open gate slowly, schedule
16245 			 * another restore if saved throttle has not been
16246 			 * reached
16247 			 */
16248 			short throttle;
16249 			if (sd_qfull_throttle_enable) {
16250 				throttle = un->un_throttle +
16251 				    max((un->un_throttle / 10), 1);
16252 				un->un_throttle =
16253 				    (throttle < un->un_saved_throttle) ?
16254 				    throttle : un->un_saved_throttle;
16255 				if (un->un_throttle < un->un_saved_throttle) {
16256 				    un->un_reset_throttle_timeid =
16257 					timeout(sd_restore_throttle,
16258 					un, SD_QFULL_THROTTLE_RESET_INTERVAL);
16259 				}
16260 			}
16261 		}
16262 
16263 		/*
16264 		 * If un_throttle has fallen below the low-water mark, we
16265 		 * restore the maximum value here (and allow it to ratchet
16266 		 * down again if necessary).
16267 		 */
16268 		if (un->un_throttle < un->un_min_throttle) {
16269 			un->un_throttle = un->un_saved_throttle;
16270 		}
16271 	} else {
16272 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16273 		    "restoring limit from 0x%x to 0x%x\n",
16274 		    un->un_throttle, un->un_saved_throttle);
16275 		un->un_throttle = un->un_saved_throttle;
16276 	}
16277 
16278 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16279 	    "sd_restore_throttle: calling sd_start_cmds!\n");
16280 
16281 	sd_start_cmds(un, NULL);
16282 
16283 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16284 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
16285 	    un, un->un_throttle);
16286 
16287 	mutex_exit(SD_MUTEX(un));
16288 
16289 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
16290 }
16291 
16292 /*
16293  *    Function: sdrunout
16294  *
16295  * Description: Callback routine for scsi_init_pkt when a resource allocation
16296  *		fails.
16297  *
16298  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
16299  *		soft state instance.
16300  *
16301  * Return Code: The scsi_init_pkt routine allows for the callback function to
16302  *		return a 0 indicating the callback should be rescheduled or a 1
16303  *		indicating not to reschedule. This routine always returns 1
16304  *		because the driver always provides a callback function to
16305  *		scsi_init_pkt. This results in a callback always being scheduled
16306  *		(via the scsi_init_pkt callback implementation) if a resource
16307  *		failure occurs.
16308  *
16309  *     Context: This callback function may not block or call routines that block
16310  *
16311  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
16312  *		request persisting at the head of the list which cannot be
16313  *		satisfied even after multiple retries. In the future the driver
16314  *		may implement some time of maximum runout count before failing
16315  *		an I/O.
16316  */
16317 
16318 static int
16319 sdrunout(caddr_t arg)
16320 {
16321 	struct sd_lun	*un = (struct sd_lun *)arg;
16322 
16323 	ASSERT(un != NULL);
16324 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16325 
16326 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
16327 
16328 	mutex_enter(SD_MUTEX(un));
16329 	sd_start_cmds(un, NULL);
16330 	mutex_exit(SD_MUTEX(un));
16331 	/*
16332 	 * This callback routine always returns 1 (i.e. do not reschedule)
16333 	 * because we always specify sdrunout as the callback handler for
16334 	 * scsi_init_pkt inside the call to sd_start_cmds.
16335 	 */
16336 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
16337 	return (1);
16338 }
16339 
16340 
16341 /*
16342  *    Function: sdintr
16343  *
16344  * Description: Completion callback routine for scsi_pkt(9S) structs
16345  *		sent to the HBA driver via scsi_transport(9F).
16346  *
16347  *     Context: Interrupt context
16348  */
16349 
16350 static void
16351 sdintr(struct scsi_pkt *pktp)
16352 {
16353 	struct buf	*bp;
16354 	struct sd_xbuf	*xp;
16355 	struct sd_lun	*un;
16356 
16357 	ASSERT(pktp != NULL);
16358 	bp = (struct buf *)pktp->pkt_private;
16359 	ASSERT(bp != NULL);
16360 	xp = SD_GET_XBUF(bp);
16361 	ASSERT(xp != NULL);
16362 	ASSERT(xp->xb_pktp != NULL);
16363 	un = SD_GET_UN(bp);
16364 	ASSERT(un != NULL);
16365 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16366 
16367 #ifdef SD_FAULT_INJECTION
16368 
16369 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
16370 	/* SD FaultInjection */
16371 	sd_faultinjection(pktp);
16372 
16373 #endif /* SD_FAULT_INJECTION */
16374 
16375 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
16376 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
16377 
16378 	mutex_enter(SD_MUTEX(un));
16379 
16380 	/* Reduce the count of the #commands currently in transport */
16381 	un->un_ncmds_in_transport--;
16382 	ASSERT(un->un_ncmds_in_transport >= 0);
16383 
16384 	/* Increment counter to indicate that the callback routine is active */
16385 	un->un_in_callback++;
16386 
16387 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
16388 
16389 #ifdef	SDDEBUG
16390 	if (bp == un->un_retry_bp) {
16391 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
16392 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
16393 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
16394 	}
16395 #endif
16396 
16397 	/*
16398 	 * If pkt_reason is CMD_DEV_GONE, just fail the command
16399 	 */
16400 	if (pktp->pkt_reason == CMD_DEV_GONE) {
16401 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16402 			    "Device is gone\n");
16403 		sd_return_failed_command(un, bp, EIO);
16404 		goto exit;
16405 	}
16406 
16407 	/*
16408 	 * First see if the pkt has auto-request sense data with it....
16409 	 * Look at the packet state first so we don't take a performance
16410 	 * hit looking at the arq enabled flag unless absolutely necessary.
16411 	 */
16412 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
16413 	    (un->un_f_arq_enabled == TRUE)) {
16414 		/*
16415 		 * The HBA did an auto request sense for this command so check
16416 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16417 		 * driver command that should not be retried.
16418 		 */
16419 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16420 			/*
16421 			 * Save the relevant sense info into the xp for the
16422 			 * original cmd.
16423 			 */
16424 			struct scsi_arq_status *asp;
16425 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16426 			xp->xb_sense_status =
16427 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
16428 			xp->xb_sense_state  = asp->sts_rqpkt_state;
16429 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16430 			bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16431 			    min(sizeof (struct scsi_extended_sense),
16432 			    SENSE_LENGTH));
16433 
16434 			/* fail the command */
16435 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16436 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
16437 			sd_return_failed_command(un, bp, EIO);
16438 			goto exit;
16439 		}
16440 
16441 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16442 		/*
16443 		 * We want to either retry or fail this command, so free
16444 		 * the DMA resources here.  If we retry the command then
16445 		 * the DMA resources will be reallocated in sd_start_cmds().
16446 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
16447 		 * causes the *entire* transfer to start over again from the
16448 		 * beginning of the request, even for PARTIAL chunks that
16449 		 * have already transferred successfully.
16450 		 */
16451 		if ((un->un_f_is_fibre == TRUE) &&
16452 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16453 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16454 			scsi_dmafree(pktp);
16455 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16456 		}
16457 #endif
16458 
16459 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16460 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
16461 
16462 		sd_handle_auto_request_sense(un, bp, xp, pktp);
16463 		goto exit;
16464 	}
16465 
16466 	/* Next see if this is the REQUEST SENSE pkt for the instance */
16467 	if (pktp->pkt_flags & FLAG_SENSING)  {
16468 		/* This pktp is from the unit's REQUEST_SENSE command */
16469 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16470 		    "sdintr: sd_handle_request_sense\n");
16471 		sd_handle_request_sense(un, bp, xp, pktp);
16472 		goto exit;
16473 	}
16474 
16475 	/*
16476 	 * Check to see if the command successfully completed as requested;
16477 	 * this is the most common case (and also the hot performance path).
16478 	 *
16479 	 * Requirements for successful completion are:
16480 	 * pkt_reason is CMD_CMPLT and packet status is status good.
16481 	 * In addition:
16482 	 * - A residual of zero indicates successful completion no matter what
16483 	 *   the command is.
16484 	 * - If the residual is not zero and the command is not a read or
16485 	 *   write, then it's still defined as successful completion. In other
16486 	 *   words, if the command is a read or write the residual must be
16487 	 *   zero for successful completion.
16488 	 * - If the residual is not zero and the command is a read or
16489 	 *   write, and it's a USCSICMD, then it's still defined as
16490 	 *   successful completion.
16491 	 */
16492 	if ((pktp->pkt_reason == CMD_CMPLT) &&
16493 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
16494 
16495 		/*
16496 		 * Since this command is returned with a good status, we
16497 		 * can reset the count for Sonoma failover.
16498 		 */
16499 		un->un_sonoma_failure_count = 0;
16500 
16501 		/*
16502 		 * Return all USCSI commands on good status
16503 		 */
16504 		if (pktp->pkt_resid == 0) {
16505 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16506 			    "sdintr: returning command for resid == 0\n");
16507 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
16508 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
16509 			SD_UPDATE_B_RESID(bp, pktp);
16510 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16511 			    "sdintr: returning command for resid != 0\n");
16512 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16513 			SD_UPDATE_B_RESID(bp, pktp);
16514 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16515 				"sdintr: returning uscsi command\n");
16516 		} else {
16517 			goto not_successful;
16518 		}
16519 		sd_return_command(un, bp);
16520 
16521 		/*
16522 		 * Decrement counter to indicate that the callback routine
16523 		 * is done.
16524 		 */
16525 		un->un_in_callback--;
16526 		ASSERT(un->un_in_callback >= 0);
16527 		mutex_exit(SD_MUTEX(un));
16528 
16529 		return;
16530 	}
16531 
16532 not_successful:
16533 
16534 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16535 	/*
16536 	 * The following is based upon knowledge of the underlying transport
16537 	 * and its use of DMA resources.  This code should be removed when
16538 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
16539 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
16540 	 * and sd_start_cmds().
16541 	 *
16542 	 * Free any DMA resources associated with this command if there
16543 	 * is a chance it could be retried or enqueued for later retry.
16544 	 * If we keep the DMA binding then mpxio cannot reissue the
16545 	 * command on another path whenever a path failure occurs.
16546 	 *
16547 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
16548 	 * causes the *entire* transfer to start over again from the
16549 	 * beginning of the request, even for PARTIAL chunks that
16550 	 * have already transferred successfully.
16551 	 *
16552 	 * This is only done for non-uscsi commands (and also skipped for the
16553 	 * driver's internal RQS command). Also just do this for Fibre Channel
16554 	 * devices as these are the only ones that support mpxio.
16555 	 */
16556 	if ((un->un_f_is_fibre == TRUE) &&
16557 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16558 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16559 		scsi_dmafree(pktp);
16560 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16561 	}
16562 #endif
16563 
16564 	/*
16565 	 * The command did not successfully complete as requested so check
16566 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16567 	 * driver command that should not be retried so just return. If
16568 	 * FLAG_DIAGNOSE is not set the error will be processed below.
16569 	 */
16570 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16571 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16572 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
16573 		/*
16574 		 * Issue a request sense if a check condition caused the error
16575 		 * (we handle the auto request sense case above), otherwise
16576 		 * just fail the command.
16577 		 */
16578 		if ((pktp->pkt_reason == CMD_CMPLT) &&
16579 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
16580 			sd_send_request_sense_command(un, bp, pktp);
16581 		} else {
16582 			sd_return_failed_command(un, bp, EIO);
16583 		}
16584 		goto exit;
16585 	}
16586 
16587 	/*
16588 	 * The command did not successfully complete as requested so process
16589 	 * the error, retry, and/or attempt recovery.
16590 	 */
16591 	switch (pktp->pkt_reason) {
16592 	case CMD_CMPLT:
16593 		switch (SD_GET_PKT_STATUS(pktp)) {
16594 		case STATUS_GOOD:
16595 			/*
16596 			 * The command completed successfully with a non-zero
16597 			 * residual
16598 			 */
16599 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16600 			    "sdintr: STATUS_GOOD \n");
16601 			sd_pkt_status_good(un, bp, xp, pktp);
16602 			break;
16603 
16604 		case STATUS_CHECK:
16605 		case STATUS_TERMINATED:
16606 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16607 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
16608 			sd_pkt_status_check_condition(un, bp, xp, pktp);
16609 			break;
16610 
16611 		case STATUS_BUSY:
16612 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16613 			    "sdintr: STATUS_BUSY\n");
16614 			sd_pkt_status_busy(un, bp, xp, pktp);
16615 			break;
16616 
16617 		case STATUS_RESERVATION_CONFLICT:
16618 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16619 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
16620 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16621 			break;
16622 
16623 		case STATUS_QFULL:
16624 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16625 			    "sdintr: STATUS_QFULL\n");
16626 			sd_pkt_status_qfull(un, bp, xp, pktp);
16627 			break;
16628 
16629 		case STATUS_MET:
16630 		case STATUS_INTERMEDIATE:
16631 		case STATUS_SCSI2:
16632 		case STATUS_INTERMEDIATE_MET:
16633 		case STATUS_ACA_ACTIVE:
16634 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16635 			    "Unexpected SCSI status received: 0x%x\n",
16636 			    SD_GET_PKT_STATUS(pktp));
16637 			sd_return_failed_command(un, bp, EIO);
16638 			break;
16639 
16640 		default:
16641 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16642 			    "Invalid SCSI status received: 0x%x\n",
16643 			    SD_GET_PKT_STATUS(pktp));
16644 			sd_return_failed_command(un, bp, EIO);
16645 			break;
16646 
16647 		}
16648 		break;
16649 
16650 	case CMD_INCOMPLETE:
16651 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16652 		    "sdintr:  CMD_INCOMPLETE\n");
16653 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
16654 		break;
16655 	case CMD_TRAN_ERR:
16656 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16657 		    "sdintr: CMD_TRAN_ERR\n");
16658 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
16659 		break;
16660 	case CMD_RESET:
16661 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16662 		    "sdintr: CMD_RESET \n");
16663 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
16664 		break;
16665 	case CMD_ABORTED:
16666 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16667 		    "sdintr: CMD_ABORTED \n");
16668 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
16669 		break;
16670 	case CMD_TIMEOUT:
16671 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16672 		    "sdintr: CMD_TIMEOUT\n");
16673 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
16674 		break;
16675 	case CMD_UNX_BUS_FREE:
16676 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16677 		    "sdintr: CMD_UNX_BUS_FREE \n");
16678 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
16679 		break;
16680 	case CMD_TAG_REJECT:
16681 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16682 		    "sdintr: CMD_TAG_REJECT\n");
16683 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
16684 		break;
16685 	default:
16686 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16687 		    "sdintr: default\n");
16688 		sd_pkt_reason_default(un, bp, xp, pktp);
16689 		break;
16690 	}
16691 
16692 exit:
16693 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
16694 
16695 	/* Decrement counter to indicate that the callback routine is done. */
16696 	un->un_in_callback--;
16697 	ASSERT(un->un_in_callback >= 0);
16698 
16699 	/*
16700 	 * At this point, the pkt has been dispatched, ie, it is either
16701 	 * being re-tried or has been returned to its caller and should
16702 	 * not be referenced.
16703 	 */
16704 
16705 	mutex_exit(SD_MUTEX(un));
16706 }
16707 
16708 
16709 /*
16710  *    Function: sd_print_incomplete_msg
16711  *
16712  * Description: Prints the error message for a CMD_INCOMPLETE error.
16713  *
16714  *   Arguments: un - ptr to associated softstate for the device.
16715  *		bp - ptr to the buf(9S) for the command.
16716  *		arg - message string ptr
16717  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
16718  *			or SD_NO_RETRY_ISSUED.
16719  *
16720  *     Context: May be called under interrupt context
16721  */
16722 
16723 static void
16724 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
16725 {
16726 	struct scsi_pkt	*pktp;
16727 	char	*msgp;
16728 	char	*cmdp = arg;
16729 
16730 	ASSERT(un != NULL);
16731 	ASSERT(mutex_owned(SD_MUTEX(un)));
16732 	ASSERT(bp != NULL);
16733 	ASSERT(arg != NULL);
16734 	pktp = SD_GET_PKTP(bp);
16735 	ASSERT(pktp != NULL);
16736 
16737 	switch (code) {
16738 	case SD_DELAYED_RETRY_ISSUED:
16739 	case SD_IMMEDIATE_RETRY_ISSUED:
16740 		msgp = "retrying";
16741 		break;
16742 	case SD_NO_RETRY_ISSUED:
16743 	default:
16744 		msgp = "giving up";
16745 		break;
16746 	}
16747 
16748 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16749 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16750 		    "incomplete %s- %s\n", cmdp, msgp);
16751 	}
16752 }
16753 
16754 
16755 
16756 /*
16757  *    Function: sd_pkt_status_good
16758  *
16759  * Description: Processing for a STATUS_GOOD code in pkt_status.
16760  *
16761  *     Context: May be called under interrupt context
16762  */
16763 
16764 static void
16765 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
16766 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16767 {
16768 	char	*cmdp;
16769 
16770 	ASSERT(un != NULL);
16771 	ASSERT(mutex_owned(SD_MUTEX(un)));
16772 	ASSERT(bp != NULL);
16773 	ASSERT(xp != NULL);
16774 	ASSERT(pktp != NULL);
16775 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
16776 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
16777 	ASSERT(pktp->pkt_resid != 0);
16778 
16779 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
16780 
16781 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16782 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
16783 	case SCMD_READ:
16784 		cmdp = "read";
16785 		break;
16786 	case SCMD_WRITE:
16787 		cmdp = "write";
16788 		break;
16789 	default:
16790 		SD_UPDATE_B_RESID(bp, pktp);
16791 		sd_return_command(un, bp);
16792 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16793 		return;
16794 	}
16795 
16796 	/*
16797 	 * See if we can retry the read/write, preferrably immediately.
16798 	 * If retries are exhaused, then sd_retry_command() will update
16799 	 * the b_resid count.
16800 	 */
16801 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
16802 	    cmdp, EIO, (clock_t)0, NULL);
16803 
16804 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16805 }
16806 
16807 
16808 
16809 
16810 
16811 /*
16812  *    Function: sd_handle_request_sense
16813  *
16814  * Description: Processing for non-auto Request Sense command.
16815  *
16816  *   Arguments: un - ptr to associated softstate
16817  *		sense_bp - ptr to buf(9S) for the RQS command
16818  *		sense_xp - ptr to the sd_xbuf for the RQS command
16819  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
16820  *
16821  *     Context: May be called under interrupt context
16822  */
16823 
16824 static void
16825 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
16826 	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
16827 {
16828 	struct buf	*cmd_bp;	/* buf for the original command */
16829 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
16830 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
16831 
16832 	ASSERT(un != NULL);
16833 	ASSERT(mutex_owned(SD_MUTEX(un)));
16834 	ASSERT(sense_bp != NULL);
16835 	ASSERT(sense_xp != NULL);
16836 	ASSERT(sense_pktp != NULL);
16837 
16838 	/*
16839 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
16840 	 * RQS command and not the original command.
16841 	 */
16842 	ASSERT(sense_pktp == un->un_rqs_pktp);
16843 	ASSERT(sense_bp   == un->un_rqs_bp);
16844 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
16845 	    (FLAG_SENSING | FLAG_HEAD));
16846 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
16847 	    FLAG_SENSING) == FLAG_SENSING);
16848 
16849 	/* These are the bp, xp, and pktp for the original command */
16850 	cmd_bp = sense_xp->xb_sense_bp;
16851 	cmd_xp = SD_GET_XBUF(cmd_bp);
16852 	cmd_pktp = SD_GET_PKTP(cmd_bp);
16853 
16854 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
16855 		/*
16856 		 * The REQUEST SENSE command failed.  Release the REQUEST
16857 		 * SENSE command for re-use, get back the bp for the original
16858 		 * command, and attempt to re-try the original command if
16859 		 * FLAG_DIAGNOSE is not set in the original packet.
16860 		 */
16861 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16862 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16863 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
16864 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
16865 			    NULL, NULL, EIO, (clock_t)0, NULL);
16866 			return;
16867 		}
16868 	}
16869 
16870 	/*
16871 	 * Save the relevant sense info into the xp for the original cmd.
16872 	 *
16873 	 * Note: if the request sense failed the state info will be zero
16874 	 * as set in sd_mark_rqs_busy()
16875 	 */
16876 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
16877 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
16878 	cmd_xp->xb_sense_resid  = sense_pktp->pkt_resid;
16879 	bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data, SENSE_LENGTH);
16880 
16881 	/*
16882 	 *  Free up the RQS command....
16883 	 *  NOTE:
16884 	 *	Must do this BEFORE calling sd_validate_sense_data!
16885 	 *	sd_validate_sense_data may return the original command in
16886 	 *	which case the pkt will be freed and the flags can no
16887 	 *	longer be touched.
16888 	 *	SD_MUTEX is held through this process until the command
16889 	 *	is dispatched based upon the sense data, so there are
16890 	 *	no race conditions.
16891 	 */
16892 	(void) sd_mark_rqs_idle(un, sense_xp);
16893 
16894 	/*
16895 	 * For a retryable command see if we have valid sense data, if so then
16896 	 * turn it over to sd_decode_sense() to figure out the right course of
16897 	 * action. Just fail a non-retryable command.
16898 	 */
16899 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16900 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp) ==
16901 		    SD_SENSE_DATA_IS_VALID) {
16902 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
16903 		}
16904 	} else {
16905 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
16906 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
16907 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
16908 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
16909 		sd_return_failed_command(un, cmd_bp, EIO);
16910 	}
16911 }
16912 
16913 
16914 
16915 
16916 /*
16917  *    Function: sd_handle_auto_request_sense
16918  *
16919  * Description: Processing for auto-request sense information.
16920  *
16921  *   Arguments: un - ptr to associated softstate
16922  *		bp - ptr to buf(9S) for the command
16923  *		xp - ptr to the sd_xbuf for the command
16924  *		pktp - ptr to the scsi_pkt(9S) for the command
16925  *
16926  *     Context: May be called under interrupt context
16927  */
16928 
16929 static void
16930 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
16931 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16932 {
16933 	struct scsi_arq_status *asp;
16934 
16935 	ASSERT(un != NULL);
16936 	ASSERT(mutex_owned(SD_MUTEX(un)));
16937 	ASSERT(bp != NULL);
16938 	ASSERT(xp != NULL);
16939 	ASSERT(pktp != NULL);
16940 	ASSERT(pktp != un->un_rqs_pktp);
16941 	ASSERT(bp   != un->un_rqs_bp);
16942 
16943 	/*
16944 	 * For auto-request sense, we get a scsi_arq_status back from
16945 	 * the HBA, with the sense data in the sts_sensedata member.
16946 	 * The pkt_scbp of the packet points to this scsi_arq_status.
16947 	 */
16948 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16949 
16950 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
16951 		/*
16952 		 * The auto REQUEST SENSE failed; see if we can re-try
16953 		 * the original command.
16954 		 */
16955 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16956 		    "auto request sense failed (reason=%s)\n",
16957 		    scsi_rname(asp->sts_rqpkt_reason));
16958 
16959 		sd_reset_target(un, pktp);
16960 
16961 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16962 		    NULL, NULL, EIO, (clock_t)0, NULL);
16963 		return;
16964 	}
16965 
16966 	/* Save the relevant sense info into the xp for the original cmd. */
16967 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
16968 	xp->xb_sense_state  = asp->sts_rqpkt_state;
16969 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16970 	bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16971 	    min(sizeof (struct scsi_extended_sense), SENSE_LENGTH));
16972 
16973 	/*
16974 	 * See if we have valid sense data, if so then turn it over to
16975 	 * sd_decode_sense() to figure out the right course of action.
16976 	 */
16977 	if (sd_validate_sense_data(un, bp, xp) == SD_SENSE_DATA_IS_VALID) {
16978 		sd_decode_sense(un, bp, xp, pktp);
16979 	}
16980 }
16981 
16982 
16983 /*
16984  *    Function: sd_print_sense_failed_msg
16985  *
16986  * Description: Print log message when RQS has failed.
16987  *
16988  *   Arguments: un - ptr to associated softstate
16989  *		bp - ptr to buf(9S) for the command
16990  *		arg - generic message string ptr
16991  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16992  *			or SD_NO_RETRY_ISSUED
16993  *
16994  *     Context: May be called from interrupt context
16995  */
16996 
16997 static void
16998 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
16999 	int code)
17000 {
17001 	char	*msgp = arg;
17002 
17003 	ASSERT(un != NULL);
17004 	ASSERT(mutex_owned(SD_MUTEX(un)));
17005 	ASSERT(bp != NULL);
17006 
17007 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
17008 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
17009 	}
17010 }
17011 
17012 
17013 /*
17014  *    Function: sd_validate_sense_data
17015  *
17016  * Description: Check the given sense data for validity.
17017  *		If the sense data is not valid, the command will
17018  *		be either failed or retried!
17019  *
17020  * Return Code: SD_SENSE_DATA_IS_INVALID
17021  *		SD_SENSE_DATA_IS_VALID
17022  *
17023  *     Context: May be called from interrupt context
17024  */
17025 
17026 static int
17027 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp)
17028 {
17029 	struct scsi_extended_sense *esp;
17030 	struct	scsi_pkt *pktp;
17031 	size_t	actual_len;
17032 	char	*msgp = NULL;
17033 
17034 	ASSERT(un != NULL);
17035 	ASSERT(mutex_owned(SD_MUTEX(un)));
17036 	ASSERT(bp != NULL);
17037 	ASSERT(bp != un->un_rqs_bp);
17038 	ASSERT(xp != NULL);
17039 
17040 	pktp = SD_GET_PKTP(bp);
17041 	ASSERT(pktp != NULL);
17042 
17043 	/*
17044 	 * Check the status of the RQS command (auto or manual).
17045 	 */
17046 	switch (xp->xb_sense_status & STATUS_MASK) {
17047 	case STATUS_GOOD:
17048 		break;
17049 
17050 	case STATUS_RESERVATION_CONFLICT:
17051 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
17052 		return (SD_SENSE_DATA_IS_INVALID);
17053 
17054 	case STATUS_BUSY:
17055 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17056 		    "Busy Status on REQUEST SENSE\n");
17057 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
17058 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
17059 		return (SD_SENSE_DATA_IS_INVALID);
17060 
17061 	case STATUS_QFULL:
17062 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17063 		    "QFULL Status on REQUEST SENSE\n");
17064 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
17065 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
17066 		return (SD_SENSE_DATA_IS_INVALID);
17067 
17068 	case STATUS_CHECK:
17069 	case STATUS_TERMINATED:
17070 		msgp = "Check Condition on REQUEST SENSE\n";
17071 		goto sense_failed;
17072 
17073 	default:
17074 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
17075 		goto sense_failed;
17076 	}
17077 
17078 	/*
17079 	 * See if we got the minimum required amount of sense data.
17080 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
17081 	 * or less.
17082 	 */
17083 	actual_len = (int)(SENSE_LENGTH - xp->xb_sense_resid);
17084 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
17085 	    (actual_len == 0)) {
17086 		msgp = "Request Sense couldn't get sense data\n";
17087 		goto sense_failed;
17088 	}
17089 
17090 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
17091 		msgp = "Not enough sense information\n";
17092 		goto sense_failed;
17093 	}
17094 
17095 	/*
17096 	 * We require the extended sense data
17097 	 */
17098 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
17099 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
17100 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17101 			static char tmp[8];
17102 			static char buf[148];
17103 			char *p = (char *)(xp->xb_sense_data);
17104 			int i;
17105 
17106 			mutex_enter(&sd_sense_mutex);
17107 			(void) strcpy(buf, "undecodable sense information:");
17108 			for (i = 0; i < actual_len; i++) {
17109 				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
17110 				(void) strcpy(&buf[strlen(buf)], tmp);
17111 			}
17112 			i = strlen(buf);
17113 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
17114 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, buf);
17115 			mutex_exit(&sd_sense_mutex);
17116 		}
17117 		/* Note: Legacy behavior, fail the command with no retry */
17118 		sd_return_failed_command(un, bp, EIO);
17119 		return (SD_SENSE_DATA_IS_INVALID);
17120 	}
17121 
17122 	/*
17123 	 * Check that es_code is valid (es_class concatenated with es_code
17124 	 * make up the "response code" field.  es_class will always be 7, so
17125 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
17126 	 * format.
17127 	 */
17128 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
17129 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
17130 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
17131 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
17132 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
17133 		goto sense_failed;
17134 	}
17135 
17136 	return (SD_SENSE_DATA_IS_VALID);
17137 
17138 sense_failed:
17139 	/*
17140 	 * If the request sense failed (for whatever reason), attempt
17141 	 * to retry the original command.
17142 	 */
17143 #if defined(__i386) || defined(__amd64)
17144 	/*
17145 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
17146 	 * sddef.h for Sparc platform, and x86 uses 1 binary
17147 	 * for both SCSI/FC.
17148 	 * The SD_RETRY_DELAY value need to be adjusted here
17149 	 * when SD_RETRY_DELAY change in sddef.h
17150 	 */
17151 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17152 	    sd_print_sense_failed_msg, msgp, EIO,
17153 		un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
17154 #else
17155 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17156 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
17157 #endif
17158 
17159 	return (SD_SENSE_DATA_IS_INVALID);
17160 }
17161 
17162 
17163 
17164 /*
17165  *    Function: sd_decode_sense
17166  *
17167  * Description: Take recovery action(s) when SCSI Sense Data is received.
17168  *
17169  *     Context: Interrupt context.
17170  */
17171 
17172 static void
17173 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17174 	struct scsi_pkt *pktp)
17175 {
17176 	uint8_t sense_key;
17177 
17178 	ASSERT(un != NULL);
17179 	ASSERT(mutex_owned(SD_MUTEX(un)));
17180 	ASSERT(bp != NULL);
17181 	ASSERT(bp != un->un_rqs_bp);
17182 	ASSERT(xp != NULL);
17183 	ASSERT(pktp != NULL);
17184 
17185 	sense_key = scsi_sense_key(xp->xb_sense_data);
17186 
17187 	switch (sense_key) {
17188 	case KEY_NO_SENSE:
17189 		sd_sense_key_no_sense(un, bp, xp, pktp);
17190 		break;
17191 	case KEY_RECOVERABLE_ERROR:
17192 		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
17193 		    bp, xp, pktp);
17194 		break;
17195 	case KEY_NOT_READY:
17196 		sd_sense_key_not_ready(un, xp->xb_sense_data,
17197 		    bp, xp, pktp);
17198 		break;
17199 	case KEY_MEDIUM_ERROR:
17200 	case KEY_HARDWARE_ERROR:
17201 		sd_sense_key_medium_or_hardware_error(un,
17202 		    xp->xb_sense_data, bp, xp, pktp);
17203 		break;
17204 	case KEY_ILLEGAL_REQUEST:
17205 		sd_sense_key_illegal_request(un, bp, xp, pktp);
17206 		break;
17207 	case KEY_UNIT_ATTENTION:
17208 		sd_sense_key_unit_attention(un, xp->xb_sense_data,
17209 		    bp, xp, pktp);
17210 		break;
17211 	case KEY_WRITE_PROTECT:
17212 	case KEY_VOLUME_OVERFLOW:
17213 	case KEY_MISCOMPARE:
17214 		sd_sense_key_fail_command(un, bp, xp, pktp);
17215 		break;
17216 	case KEY_BLANK_CHECK:
17217 		sd_sense_key_blank_check(un, bp, xp, pktp);
17218 		break;
17219 	case KEY_ABORTED_COMMAND:
17220 		sd_sense_key_aborted_command(un, bp, xp, pktp);
17221 		break;
17222 	case KEY_VENDOR_UNIQUE:
17223 	case KEY_COPY_ABORTED:
17224 	case KEY_EQUAL:
17225 	case KEY_RESERVED:
17226 	default:
17227 		sd_sense_key_default(un, xp->xb_sense_data,
17228 		    bp, xp, pktp);
17229 		break;
17230 	}
17231 }
17232 
17233 
17234 /*
17235  *    Function: sd_dump_memory
17236  *
17237  * Description: Debug logging routine to print the contents of a user provided
17238  *		buffer. The output of the buffer is broken up into 256 byte
17239  *		segments due to a size constraint of the scsi_log.
17240  *		implementation.
17241  *
17242  *   Arguments: un - ptr to softstate
17243  *		comp - component mask
17244  *		title - "title" string to preceed data when printed
17245  *		data - ptr to data block to be printed
17246  *		len - size of data block to be printed
17247  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
17248  *
17249  *     Context: May be called from interrupt context
17250  */
17251 
17252 #define	SD_DUMP_MEMORY_BUF_SIZE	256
17253 
17254 static char *sd_dump_format_string[] = {
17255 		" 0x%02x",
17256 		" %c"
17257 };
17258 
17259 static void
17260 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
17261     int len, int fmt)
17262 {
17263 	int	i, j;
17264 	int	avail_count;
17265 	int	start_offset;
17266 	int	end_offset;
17267 	size_t	entry_len;
17268 	char	*bufp;
17269 	char	*local_buf;
17270 	char	*format_string;
17271 
17272 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
17273 
17274 	/*
17275 	 * In the debug version of the driver, this function is called from a
17276 	 * number of places which are NOPs in the release driver.
17277 	 * The debug driver therefore has additional methods of filtering
17278 	 * debug output.
17279 	 */
17280 #ifdef SDDEBUG
17281 	/*
17282 	 * In the debug version of the driver we can reduce the amount of debug
17283 	 * messages by setting sd_error_level to something other than
17284 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
17285 	 * sd_component_mask.
17286 	 */
17287 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
17288 	    (sd_error_level != SCSI_ERR_ALL)) {
17289 		return;
17290 	}
17291 	if (((sd_component_mask & comp) == 0) ||
17292 	    (sd_error_level != SCSI_ERR_ALL)) {
17293 		return;
17294 	}
17295 #else
17296 	if (sd_error_level != SCSI_ERR_ALL) {
17297 		return;
17298 	}
17299 #endif
17300 
17301 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
17302 	bufp = local_buf;
17303 	/*
17304 	 * Available length is the length of local_buf[], minus the
17305 	 * length of the title string, minus one for the ":", minus
17306 	 * one for the newline, minus one for the NULL terminator.
17307 	 * This gives the #bytes available for holding the printed
17308 	 * values from the given data buffer.
17309 	 */
17310 	if (fmt == SD_LOG_HEX) {
17311 		format_string = sd_dump_format_string[0];
17312 	} else /* SD_LOG_CHAR */ {
17313 		format_string = sd_dump_format_string[1];
17314 	}
17315 	/*
17316 	 * Available count is the number of elements from the given
17317 	 * data buffer that we can fit into the available length.
17318 	 * This is based upon the size of the format string used.
17319 	 * Make one entry and find it's size.
17320 	 */
17321 	(void) sprintf(bufp, format_string, data[0]);
17322 	entry_len = strlen(bufp);
17323 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
17324 
17325 	j = 0;
17326 	while (j < len) {
17327 		bufp = local_buf;
17328 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
17329 		start_offset = j;
17330 
17331 		end_offset = start_offset + avail_count;
17332 
17333 		(void) sprintf(bufp, "%s:", title);
17334 		bufp += strlen(bufp);
17335 		for (i = start_offset; ((i < end_offset) && (j < len));
17336 		    i++, j++) {
17337 			(void) sprintf(bufp, format_string, data[i]);
17338 			bufp += entry_len;
17339 		}
17340 		(void) sprintf(bufp, "\n");
17341 
17342 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
17343 	}
17344 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
17345 }
17346 
17347 /*
17348  *    Function: sd_print_sense_msg
17349  *
17350  * Description: Log a message based upon the given sense data.
17351  *
17352  *   Arguments: un - ptr to associated softstate
17353  *		bp - ptr to buf(9S) for the command
17354  *		arg - ptr to associate sd_sense_info struct
17355  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17356  *			or SD_NO_RETRY_ISSUED
17357  *
17358  *     Context: May be called from interrupt context
17359  */
17360 
17361 static void
17362 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17363 {
17364 	struct sd_xbuf	*xp;
17365 	struct scsi_pkt	*pktp;
17366 	uint8_t *sensep;
17367 	daddr_t request_blkno;
17368 	diskaddr_t err_blkno;
17369 	int severity;
17370 	int pfa_flag;
17371 	extern struct scsi_key_strings scsi_cmds[];
17372 
17373 	ASSERT(un != NULL);
17374 	ASSERT(mutex_owned(SD_MUTEX(un)));
17375 	ASSERT(bp != NULL);
17376 	xp = SD_GET_XBUF(bp);
17377 	ASSERT(xp != NULL);
17378 	pktp = SD_GET_PKTP(bp);
17379 	ASSERT(pktp != NULL);
17380 	ASSERT(arg != NULL);
17381 
17382 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
17383 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
17384 
17385 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
17386 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
17387 		severity = SCSI_ERR_RETRYABLE;
17388 	}
17389 
17390 	/* Use absolute block number for the request block number */
17391 	request_blkno = xp->xb_blkno;
17392 
17393 	/*
17394 	 * Now try to get the error block number from the sense data
17395 	 */
17396 	sensep = xp->xb_sense_data;
17397 
17398 	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
17399 		(uint64_t *)&err_blkno)) {
17400 		/*
17401 		 * We retrieved the error block number from the information
17402 		 * portion of the sense data.
17403 		 *
17404 		 * For USCSI commands we are better off using the error
17405 		 * block no. as the requested block no. (This is the best
17406 		 * we can estimate.)
17407 		 */
17408 		if ((SD_IS_BUFIO(xp) == FALSE) &&
17409 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
17410 			request_blkno = err_blkno;
17411 		}
17412 	} else {
17413 		/*
17414 		 * Without the es_valid bit set (for fixed format) or an
17415 		 * information descriptor (for descriptor format) we cannot
17416 		 * be certain of the error blkno, so just use the
17417 		 * request_blkno.
17418 		 */
17419 		err_blkno = (diskaddr_t)request_blkno;
17420 	}
17421 
17422 	/*
17423 	 * The following will log the buffer contents for the release driver
17424 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
17425 	 * level is set to verbose.
17426 	 */
17427 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
17428 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17429 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
17430 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
17431 
17432 	if (pfa_flag == FALSE) {
17433 		/* This is normally only set for USCSI */
17434 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
17435 			return;
17436 		}
17437 
17438 		if ((SD_IS_BUFIO(xp) == TRUE) &&
17439 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
17440 		    (severity < sd_error_level))) {
17441 			return;
17442 		}
17443 	}
17444 
17445 	/*
17446 	 * Check for Sonoma Failover and keep a count of how many failed I/O's
17447 	 */
17448 	if ((SD_IS_LSI(un)) &&
17449 	    (scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) &&
17450 	    (scsi_sense_asc(sensep) == 0x94) &&
17451 	    (scsi_sense_ascq(sensep) == 0x01)) {
17452 		un->un_sonoma_failure_count++;
17453 		if (un->un_sonoma_failure_count > 1) {
17454 			return;
17455 		}
17456 	}
17457 
17458 	scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
17459 	    request_blkno, err_blkno, scsi_cmds,
17460 	    (struct scsi_extended_sense *)sensep,
17461 	    un->un_additional_codes, NULL);
17462 }
17463 
17464 /*
17465  *    Function: sd_sense_key_no_sense
17466  *
17467  * Description: Recovery action when sense data was not received.
17468  *
17469  *     Context: May be called from interrupt context
17470  */
17471 
17472 static void
17473 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
17474 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17475 {
17476 	struct sd_sense_info	si;
17477 
17478 	ASSERT(un != NULL);
17479 	ASSERT(mutex_owned(SD_MUTEX(un)));
17480 	ASSERT(bp != NULL);
17481 	ASSERT(xp != NULL);
17482 	ASSERT(pktp != NULL);
17483 
17484 	si.ssi_severity = SCSI_ERR_FATAL;
17485 	si.ssi_pfa_flag = FALSE;
17486 
17487 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17488 
17489 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17490 		&si, EIO, (clock_t)0, NULL);
17491 }
17492 
17493 
17494 /*
17495  *    Function: sd_sense_key_recoverable_error
17496  *
17497  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
17498  *
17499  *     Context: May be called from interrupt context
17500  */
17501 
17502 static void
17503 sd_sense_key_recoverable_error(struct sd_lun *un,
17504 	uint8_t *sense_datap,
17505 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17506 {
17507 	struct sd_sense_info	si;
17508 	uint8_t asc = scsi_sense_asc(sense_datap);
17509 
17510 	ASSERT(un != NULL);
17511 	ASSERT(mutex_owned(SD_MUTEX(un)));
17512 	ASSERT(bp != NULL);
17513 	ASSERT(xp != NULL);
17514 	ASSERT(pktp != NULL);
17515 
17516 	/*
17517 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
17518 	 */
17519 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
17520 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17521 		si.ssi_severity = SCSI_ERR_INFO;
17522 		si.ssi_pfa_flag = TRUE;
17523 	} else {
17524 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
17525 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
17526 		si.ssi_severity = SCSI_ERR_RECOVERED;
17527 		si.ssi_pfa_flag = FALSE;
17528 	}
17529 
17530 	if (pktp->pkt_resid == 0) {
17531 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17532 		sd_return_command(un, bp);
17533 		return;
17534 	}
17535 
17536 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17537 	    &si, EIO, (clock_t)0, NULL);
17538 }
17539 
17540 
17541 
17542 
17543 /*
17544  *    Function: sd_sense_key_not_ready
17545  *
17546  * Description: Recovery actions for a SCSI "Not Ready" sense key.
17547  *
17548  *     Context: May be called from interrupt context
17549  */
17550 
17551 static void
17552 sd_sense_key_not_ready(struct sd_lun *un,
17553 	uint8_t *sense_datap,
17554 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17555 {
17556 	struct sd_sense_info	si;
17557 	uint8_t asc = scsi_sense_asc(sense_datap);
17558 	uint8_t ascq = scsi_sense_ascq(sense_datap);
17559 
17560 	ASSERT(un != NULL);
17561 	ASSERT(mutex_owned(SD_MUTEX(un)));
17562 	ASSERT(bp != NULL);
17563 	ASSERT(xp != NULL);
17564 	ASSERT(pktp != NULL);
17565 
17566 	si.ssi_severity = SCSI_ERR_FATAL;
17567 	si.ssi_pfa_flag = FALSE;
17568 
17569 	/*
17570 	 * Update error stats after first NOT READY error. Disks may have
17571 	 * been powered down and may need to be restarted.  For CDROMs,
17572 	 * report NOT READY errors only if media is present.
17573 	 */
17574 	if ((ISCD(un) && (un->un_f_geometry_is_valid == TRUE)) ||
17575 	    (xp->xb_retry_count > 0)) {
17576 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17577 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
17578 	}
17579 
17580 	/*
17581 	 * Just fail if the "not ready" retry limit has been reached.
17582 	 */
17583 	if (xp->xb_retry_count >= un->un_notready_retry_count) {
17584 		/* Special check for error message printing for removables. */
17585 		if (un->un_f_has_removable_media && (asc == 0x04) &&
17586 		    (ascq >= 0x04)) {
17587 			si.ssi_severity = SCSI_ERR_ALL;
17588 		}
17589 		goto fail_command;
17590 	}
17591 
17592 	/*
17593 	 * Check the ASC and ASCQ in the sense data as needed, to determine
17594 	 * what to do.
17595 	 */
17596 	switch (asc) {
17597 	case 0x04:	/* LOGICAL UNIT NOT READY */
17598 		/*
17599 		 * disk drives that don't spin up result in a very long delay
17600 		 * in format without warning messages. We will log a message
17601 		 * if the error level is set to verbose.
17602 		 */
17603 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17604 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17605 			    "logical unit not ready, resetting disk\n");
17606 		}
17607 
17608 		/*
17609 		 * There are different requirements for CDROMs and disks for
17610 		 * the number of retries.  If a CD-ROM is giving this, it is
17611 		 * probably reading TOC and is in the process of getting
17612 		 * ready, so we should keep on trying for a long time to make
17613 		 * sure that all types of media are taken in account (for
17614 		 * some media the drive takes a long time to read TOC).  For
17615 		 * disks we do not want to retry this too many times as this
17616 		 * can cause a long hang in format when the drive refuses to
17617 		 * spin up (a very common failure).
17618 		 */
17619 		switch (ascq) {
17620 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
17621 			/*
17622 			 * Disk drives frequently refuse to spin up which
17623 			 * results in a very long hang in format without
17624 			 * warning messages.
17625 			 *
17626 			 * Note: This code preserves the legacy behavior of
17627 			 * comparing xb_retry_count against zero for fibre
17628 			 * channel targets instead of comparing against the
17629 			 * un_reset_retry_count value.  The reason for this
17630 			 * discrepancy has been so utterly lost beneath the
17631 			 * Sands of Time that even Indiana Jones could not
17632 			 * find it.
17633 			 */
17634 			if (un->un_f_is_fibre == TRUE) {
17635 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17636 					(xp->xb_retry_count > 0)) &&
17637 					(un->un_startstop_timeid == NULL)) {
17638 					scsi_log(SD_DEVINFO(un), sd_label,
17639 					CE_WARN, "logical unit not ready, "
17640 					"resetting disk\n");
17641 					sd_reset_target(un, pktp);
17642 				}
17643 			} else {
17644 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17645 					(xp->xb_retry_count >
17646 					un->un_reset_retry_count)) &&
17647 					(un->un_startstop_timeid == NULL)) {
17648 					scsi_log(SD_DEVINFO(un), sd_label,
17649 					CE_WARN, "logical unit not ready, "
17650 					"resetting disk\n");
17651 					sd_reset_target(un, pktp);
17652 				}
17653 			}
17654 			break;
17655 
17656 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
17657 			/*
17658 			 * If the target is in the process of becoming
17659 			 * ready, just proceed with the retry. This can
17660 			 * happen with CD-ROMs that take a long time to
17661 			 * read TOC after a power cycle or reset.
17662 			 */
17663 			goto do_retry;
17664 
17665 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
17666 			break;
17667 
17668 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
17669 			/*
17670 			 * Retries cannot help here so just fail right away.
17671 			 */
17672 			goto fail_command;
17673 
17674 		case 0x88:
17675 			/*
17676 			 * Vendor-unique code for T3/T4: it indicates a
17677 			 * path problem in a mutipathed config, but as far as
17678 			 * the target driver is concerned it equates to a fatal
17679 			 * error, so we should just fail the command right away
17680 			 * (without printing anything to the console). If this
17681 			 * is not a T3/T4, fall thru to the default recovery
17682 			 * action.
17683 			 * T3/T4 is FC only, don't need to check is_fibre
17684 			 */
17685 			if (SD_IS_T3(un) || SD_IS_T4(un)) {
17686 				sd_return_failed_command(un, bp, EIO);
17687 				return;
17688 			}
17689 			/* FALLTHRU */
17690 
17691 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
17692 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
17693 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
17694 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
17695 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
17696 		default:    /* Possible future codes in SCSI spec? */
17697 			/*
17698 			 * For removable-media devices, do not retry if
17699 			 * ASCQ > 2 as these result mostly from USCSI commands
17700 			 * on MMC devices issued to check status of an
17701 			 * operation initiated in immediate mode.  Also for
17702 			 * ASCQ >= 4 do not print console messages as these
17703 			 * mainly represent a user-initiated operation
17704 			 * instead of a system failure.
17705 			 */
17706 			if (un->un_f_has_removable_media) {
17707 				si.ssi_severity = SCSI_ERR_ALL;
17708 				goto fail_command;
17709 			}
17710 			break;
17711 		}
17712 
17713 		/*
17714 		 * As part of our recovery attempt for the NOT READY
17715 		 * condition, we issue a START STOP UNIT command. However
17716 		 * we want to wait for a short delay before attempting this
17717 		 * as there may still be more commands coming back from the
17718 		 * target with the check condition. To do this we use
17719 		 * timeout(9F) to call sd_start_stop_unit_callback() after
17720 		 * the delay interval expires. (sd_start_stop_unit_callback()
17721 		 * dispatches sd_start_stop_unit_task(), which will issue
17722 		 * the actual START STOP UNIT command. The delay interval
17723 		 * is one-half of the delay that we will use to retry the
17724 		 * command that generated the NOT READY condition.
17725 		 *
17726 		 * Note that we could just dispatch sd_start_stop_unit_task()
17727 		 * from here and allow it to sleep for the delay interval,
17728 		 * but then we would be tying up the taskq thread
17729 		 * uncesessarily for the duration of the delay.
17730 		 *
17731 		 * Do not issue the START STOP UNIT if the current command
17732 		 * is already a START STOP UNIT.
17733 		 */
17734 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
17735 			break;
17736 		}
17737 
17738 		/*
17739 		 * Do not schedule the timeout if one is already pending.
17740 		 */
17741 		if (un->un_startstop_timeid != NULL) {
17742 			SD_INFO(SD_LOG_ERROR, un,
17743 			    "sd_sense_key_not_ready: restart already issued to"
17744 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
17745 			    ddi_get_instance(SD_DEVINFO(un)));
17746 			break;
17747 		}
17748 
17749 		/*
17750 		 * Schedule the START STOP UNIT command, then queue the command
17751 		 * for a retry.
17752 		 *
17753 		 * Note: A timeout is not scheduled for this retry because we
17754 		 * want the retry to be serial with the START_STOP_UNIT. The
17755 		 * retry will be started when the START_STOP_UNIT is completed
17756 		 * in sd_start_stop_unit_task.
17757 		 */
17758 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
17759 		    un, SD_BSY_TIMEOUT / 2);
17760 		xp->xb_retry_count++;
17761 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
17762 		return;
17763 
17764 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
17765 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17766 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17767 			    "unit does not respond to selection\n");
17768 		}
17769 		break;
17770 
17771 	case 0x3A:	/* MEDIUM NOT PRESENT */
17772 		if (sd_error_level >= SCSI_ERR_FATAL) {
17773 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17774 			    "Caddy not inserted in drive\n");
17775 		}
17776 
17777 		sr_ejected(un);
17778 		un->un_mediastate = DKIO_EJECTED;
17779 		/* The state has changed, inform the media watch routines */
17780 		cv_broadcast(&un->un_state_cv);
17781 		/* Just fail if no media is present in the drive. */
17782 		goto fail_command;
17783 
17784 	default:
17785 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17786 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
17787 			    "Unit not Ready. Additional sense code 0x%x\n",
17788 			    asc);
17789 		}
17790 		break;
17791 	}
17792 
17793 do_retry:
17794 
17795 	/*
17796 	 * Retry the command, as some targets may report NOT READY for
17797 	 * several seconds after being reset.
17798 	 */
17799 	xp->xb_retry_count++;
17800 	si.ssi_severity = SCSI_ERR_RETRYABLE;
17801 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17802 	    &si, EIO, SD_BSY_TIMEOUT, NULL);
17803 
17804 	return;
17805 
17806 fail_command:
17807 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17808 	sd_return_failed_command(un, bp, EIO);
17809 }
17810 
17811 
17812 
17813 /*
17814  *    Function: sd_sense_key_medium_or_hardware_error
17815  *
17816  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
17817  *		sense key.
17818  *
17819  *     Context: May be called from interrupt context
17820  */
17821 
17822 static void
17823 sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
17824 	uint8_t *sense_datap,
17825 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17826 {
17827 	struct sd_sense_info	si;
17828 	uint8_t sense_key = scsi_sense_key(sense_datap);
17829 	uint8_t asc = scsi_sense_asc(sense_datap);
17830 
17831 	ASSERT(un != NULL);
17832 	ASSERT(mutex_owned(SD_MUTEX(un)));
17833 	ASSERT(bp != NULL);
17834 	ASSERT(xp != NULL);
17835 	ASSERT(pktp != NULL);
17836 
17837 	si.ssi_severity = SCSI_ERR_FATAL;
17838 	si.ssi_pfa_flag = FALSE;
17839 
17840 	if (sense_key == KEY_MEDIUM_ERROR) {
17841 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
17842 	}
17843 
17844 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17845 
17846 	if ((un->un_reset_retry_count != 0) &&
17847 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
17848 		mutex_exit(SD_MUTEX(un));
17849 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
17850 		if (un->un_f_allow_bus_device_reset == TRUE) {
17851 
17852 			boolean_t try_resetting_target = B_TRUE;
17853 
17854 			/*
17855 			 * We need to be able to handle specific ASC when we are
17856 			 * handling a KEY_HARDWARE_ERROR. In particular
17857 			 * taking the default action of resetting the target may
17858 			 * not be the appropriate way to attempt recovery.
17859 			 * Resetting a target because of a single LUN failure
17860 			 * victimizes all LUNs on that target.
17861 			 *
17862 			 * This is true for the LSI arrays, if an LSI
17863 			 * array controller returns an ASC of 0x84 (LUN Dead) we
17864 			 * should trust it.
17865 			 */
17866 
17867 			if (sense_key == KEY_HARDWARE_ERROR) {
17868 				switch (asc) {
17869 				case 0x84:
17870 					if (SD_IS_LSI(un)) {
17871 						try_resetting_target = B_FALSE;
17872 					}
17873 					break;
17874 				default:
17875 					break;
17876 				}
17877 			}
17878 
17879 			if (try_resetting_target == B_TRUE) {
17880 				int reset_retval = 0;
17881 				if (un->un_f_lun_reset_enabled == TRUE) {
17882 					SD_TRACE(SD_LOG_IO_CORE, un,
17883 					    "sd_sense_key_medium_or_hardware_"
17884 					    "error: issuing RESET_LUN\n");
17885 					reset_retval =
17886 					    scsi_reset(SD_ADDRESS(un),
17887 					    RESET_LUN);
17888 				}
17889 				if (reset_retval == 0) {
17890 					SD_TRACE(SD_LOG_IO_CORE, un,
17891 					    "sd_sense_key_medium_or_hardware_"
17892 					    "error: issuing RESET_TARGET\n");
17893 					(void) scsi_reset(SD_ADDRESS(un),
17894 					    RESET_TARGET);
17895 				}
17896 			}
17897 		}
17898 		mutex_enter(SD_MUTEX(un));
17899 	}
17900 
17901 	/*
17902 	 * This really ought to be a fatal error, but we will retry anyway
17903 	 * as some drives report this as a spurious error.
17904 	 */
17905 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17906 	    &si, EIO, (clock_t)0, NULL);
17907 }
17908 
17909 
17910 
17911 /*
17912  *    Function: sd_sense_key_illegal_request
17913  *
17914  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
17915  *
17916  *     Context: May be called from interrupt context
17917  */
17918 
17919 static void
17920 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
17921 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17922 {
17923 	struct sd_sense_info	si;
17924 
17925 	ASSERT(un != NULL);
17926 	ASSERT(mutex_owned(SD_MUTEX(un)));
17927 	ASSERT(bp != NULL);
17928 	ASSERT(xp != NULL);
17929 	ASSERT(pktp != NULL);
17930 
17931 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17932 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
17933 
17934 	si.ssi_severity = SCSI_ERR_INFO;
17935 	si.ssi_pfa_flag = FALSE;
17936 
17937 	/* Pointless to retry if the target thinks it's an illegal request */
17938 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17939 	sd_return_failed_command(un, bp, EIO);
17940 }
17941 
17942 
17943 
17944 
17945 /*
17946  *    Function: sd_sense_key_unit_attention
17947  *
17948  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
17949  *
17950  *     Context: May be called from interrupt context
17951  */
17952 
17953 static void
17954 sd_sense_key_unit_attention(struct sd_lun *un,
17955 	uint8_t *sense_datap,
17956 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17957 {
17958 	/*
17959 	 * For UNIT ATTENTION we allow retries for one minute. Devices
17960 	 * like Sonoma can return UNIT ATTENTION close to a minute
17961 	 * under certain conditions.
17962 	 */
17963 	int	retry_check_flag = SD_RETRIES_UA;
17964 	boolean_t	kstat_updated = B_FALSE;
17965 	struct	sd_sense_info		si;
17966 	uint8_t asc = scsi_sense_asc(sense_datap);
17967 
17968 	ASSERT(un != NULL);
17969 	ASSERT(mutex_owned(SD_MUTEX(un)));
17970 	ASSERT(bp != NULL);
17971 	ASSERT(xp != NULL);
17972 	ASSERT(pktp != NULL);
17973 
17974 	si.ssi_severity = SCSI_ERR_INFO;
17975 	si.ssi_pfa_flag = FALSE;
17976 
17977 
17978 	switch (asc) {
17979 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
17980 		if (sd_report_pfa != 0) {
17981 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17982 			si.ssi_pfa_flag = TRUE;
17983 			retry_check_flag = SD_RETRIES_STANDARD;
17984 			goto do_retry;
17985 		}
17986 
17987 		break;
17988 
17989 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
17990 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
17991 			un->un_resvd_status |=
17992 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
17993 		}
17994 #ifdef _LP64
17995 		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
17996 			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
17997 			    un, KM_NOSLEEP) == 0) {
17998 				/*
17999 				 * If we can't dispatch the task we'll just
18000 				 * live without descriptor sense.  We can
18001 				 * try again on the next "unit attention"
18002 				 */
18003 				SD_ERROR(SD_LOG_ERROR, un,
18004 				    "sd_sense_key_unit_attention: "
18005 				    "Could not dispatch "
18006 				    "sd_reenable_dsense_task\n");
18007 			}
18008 		}
18009 #endif /* _LP64 */
18010 		/* FALLTHRU */
18011 
18012 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
18013 		if (!un->un_f_has_removable_media) {
18014 			break;
18015 		}
18016 
18017 		/*
18018 		 * When we get a unit attention from a removable-media device,
18019 		 * it may be in a state that will take a long time to recover
18020 		 * (e.g., from a reset).  Since we are executing in interrupt
18021 		 * context here, we cannot wait around for the device to come
18022 		 * back. So hand this command off to sd_media_change_task()
18023 		 * for deferred processing under taskq thread context. (Note
18024 		 * that the command still may be failed if a problem is
18025 		 * encountered at a later time.)
18026 		 */
18027 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
18028 		    KM_NOSLEEP) == 0) {
18029 			/*
18030 			 * Cannot dispatch the request so fail the command.
18031 			 */
18032 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
18033 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18034 			si.ssi_severity = SCSI_ERR_FATAL;
18035 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18036 			sd_return_failed_command(un, bp, EIO);
18037 		}
18038 
18039 		/*
18040 		 * If failed to dispatch sd_media_change_task(), we already
18041 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
18042 		 * we should update kstat later if it encounters an error. So,
18043 		 * we update kstat_updated flag here.
18044 		 */
18045 		kstat_updated = B_TRUE;
18046 
18047 		/*
18048 		 * Either the command has been successfully dispatched to a
18049 		 * task Q for retrying, or the dispatch failed. In either case
18050 		 * do NOT retry again by calling sd_retry_command. This sets up
18051 		 * two retries of the same command and when one completes and
18052 		 * frees the resources the other will access freed memory,
18053 		 * a bad thing.
18054 		 */
18055 		return;
18056 
18057 	default:
18058 		break;
18059 	}
18060 
18061 	/*
18062 	 * Update kstat if we haven't done that.
18063 	 */
18064 	if (!kstat_updated) {
18065 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18066 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18067 	}
18068 
18069 do_retry:
18070 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
18071 	    EIO, SD_UA_RETRY_DELAY, NULL);
18072 }
18073 
18074 
18075 
18076 /*
18077  *    Function: sd_sense_key_fail_command
18078  *
18079  * Description: Use to fail a command when we don't like the sense key that
18080  *		was returned.
18081  *
18082  *     Context: May be called from interrupt context
18083  */
18084 
18085 static void
18086 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
18087 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18088 {
18089 	struct sd_sense_info	si;
18090 
18091 	ASSERT(un != NULL);
18092 	ASSERT(mutex_owned(SD_MUTEX(un)));
18093 	ASSERT(bp != NULL);
18094 	ASSERT(xp != NULL);
18095 	ASSERT(pktp != NULL);
18096 
18097 	si.ssi_severity = SCSI_ERR_FATAL;
18098 	si.ssi_pfa_flag = FALSE;
18099 
18100 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18101 	sd_return_failed_command(un, bp, EIO);
18102 }
18103 
18104 
18105 
18106 /*
18107  *    Function: sd_sense_key_blank_check
18108  *
18109  * Description: Recovery actions for a SCSI "Blank Check" sense key.
18110  *		Has no monetary connotation.
18111  *
18112  *     Context: May be called from interrupt context
18113  */
18114 
18115 static void
18116 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
18117 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18118 {
18119 	struct sd_sense_info	si;
18120 
18121 	ASSERT(un != NULL);
18122 	ASSERT(mutex_owned(SD_MUTEX(un)));
18123 	ASSERT(bp != NULL);
18124 	ASSERT(xp != NULL);
18125 	ASSERT(pktp != NULL);
18126 
18127 	/*
18128 	 * Blank check is not fatal for removable devices, therefore
18129 	 * it does not require a console message.
18130 	 */
18131 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
18132 	    SCSI_ERR_FATAL;
18133 	si.ssi_pfa_flag = FALSE;
18134 
18135 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18136 	sd_return_failed_command(un, bp, EIO);
18137 }
18138 
18139 
18140 
18141 
18142 /*
18143  *    Function: sd_sense_key_aborted_command
18144  *
18145  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
18146  *
18147  *     Context: May be called from interrupt context
18148  */
18149 
18150 static void
18151 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
18152 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18153 {
18154 	struct sd_sense_info	si;
18155 
18156 	ASSERT(un != NULL);
18157 	ASSERT(mutex_owned(SD_MUTEX(un)));
18158 	ASSERT(bp != NULL);
18159 	ASSERT(xp != NULL);
18160 	ASSERT(pktp != NULL);
18161 
18162 	si.ssi_severity = SCSI_ERR_FATAL;
18163 	si.ssi_pfa_flag = FALSE;
18164 
18165 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18166 
18167 	/*
18168 	 * This really ought to be a fatal error, but we will retry anyway
18169 	 * as some drives report this as a spurious error.
18170 	 */
18171 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18172 	    &si, EIO, (clock_t)0, NULL);
18173 }
18174 
18175 
18176 
18177 /*
18178  *    Function: sd_sense_key_default
18179  *
18180  * Description: Default recovery action for several SCSI sense keys (basically
18181  *		attempts a retry).
18182  *
18183  *     Context: May be called from interrupt context
18184  */
18185 
18186 static void
18187 sd_sense_key_default(struct sd_lun *un,
18188 	uint8_t *sense_datap,
18189 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18190 {
18191 	struct sd_sense_info	si;
18192 	uint8_t sense_key = scsi_sense_key(sense_datap);
18193 
18194 	ASSERT(un != NULL);
18195 	ASSERT(mutex_owned(SD_MUTEX(un)));
18196 	ASSERT(bp != NULL);
18197 	ASSERT(xp != NULL);
18198 	ASSERT(pktp != NULL);
18199 
18200 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18201 
18202 	/*
18203 	 * Undecoded sense key.	Attempt retries and hope that will fix
18204 	 * the problem.  Otherwise, we're dead.
18205 	 */
18206 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
18207 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18208 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
18209 	}
18210 
18211 	si.ssi_severity = SCSI_ERR_FATAL;
18212 	si.ssi_pfa_flag = FALSE;
18213 
18214 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18215 	    &si, EIO, (clock_t)0, NULL);
18216 }
18217 
18218 
18219 
18220 /*
18221  *    Function: sd_print_retry_msg
18222  *
18223  * Description: Print a message indicating the retry action being taken.
18224  *
18225  *   Arguments: un - ptr to associated softstate
18226  *		bp - ptr to buf(9S) for the command
18227  *		arg - not used.
18228  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18229  *			or SD_NO_RETRY_ISSUED
18230  *
18231  *     Context: May be called from interrupt context
18232  */
18233 /* ARGSUSED */
18234 static void
18235 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
18236 {
18237 	struct sd_xbuf	*xp;
18238 	struct scsi_pkt *pktp;
18239 	char *reasonp;
18240 	char *msgp;
18241 
18242 	ASSERT(un != NULL);
18243 	ASSERT(mutex_owned(SD_MUTEX(un)));
18244 	ASSERT(bp != NULL);
18245 	pktp = SD_GET_PKTP(bp);
18246 	ASSERT(pktp != NULL);
18247 	xp = SD_GET_XBUF(bp);
18248 	ASSERT(xp != NULL);
18249 
18250 	ASSERT(!mutex_owned(&un->un_pm_mutex));
18251 	mutex_enter(&un->un_pm_mutex);
18252 	if ((un->un_state == SD_STATE_SUSPENDED) ||
18253 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
18254 	    (pktp->pkt_flags & FLAG_SILENT)) {
18255 		mutex_exit(&un->un_pm_mutex);
18256 		goto update_pkt_reason;
18257 	}
18258 	mutex_exit(&un->un_pm_mutex);
18259 
18260 	/*
18261 	 * Suppress messages if they are all the same pkt_reason; with
18262 	 * TQ, many (up to 256) are returned with the same pkt_reason.
18263 	 * If we are in panic, then suppress the retry messages.
18264 	 */
18265 	switch (flag) {
18266 	case SD_NO_RETRY_ISSUED:
18267 		msgp = "giving up";
18268 		break;
18269 	case SD_IMMEDIATE_RETRY_ISSUED:
18270 	case SD_DELAYED_RETRY_ISSUED:
18271 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
18272 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
18273 		    (sd_error_level != SCSI_ERR_ALL))) {
18274 			return;
18275 		}
18276 		msgp = "retrying command";
18277 		break;
18278 	default:
18279 		goto update_pkt_reason;
18280 	}
18281 
18282 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
18283 	    scsi_rname(pktp->pkt_reason));
18284 
18285 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18286 	    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
18287 
18288 update_pkt_reason:
18289 	/*
18290 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
18291 	 * This is to prevent multiple console messages for the same failure
18292 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
18293 	 * when the command is retried successfully because there still may be
18294 	 * more commands coming back with the same value of pktp->pkt_reason.
18295 	 */
18296 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
18297 		un->un_last_pkt_reason = pktp->pkt_reason;
18298 	}
18299 }
18300 
18301 
18302 /*
18303  *    Function: sd_print_cmd_incomplete_msg
18304  *
18305  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
18306  *
18307  *   Arguments: un - ptr to associated softstate
18308  *		bp - ptr to buf(9S) for the command
18309  *		arg - passed to sd_print_retry_msg()
18310  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18311  *			or SD_NO_RETRY_ISSUED
18312  *
18313  *     Context: May be called from interrupt context
18314  */
18315 
18316 static void
18317 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
18318 	int code)
18319 {
18320 	dev_info_t	*dip;
18321 
18322 	ASSERT(un != NULL);
18323 	ASSERT(mutex_owned(SD_MUTEX(un)));
18324 	ASSERT(bp != NULL);
18325 
18326 	switch (code) {
18327 	case SD_NO_RETRY_ISSUED:
18328 		/* Command was failed. Someone turned off this target? */
18329 		if (un->un_state != SD_STATE_OFFLINE) {
18330 			/*
18331 			 * Suppress message if we are detaching and
18332 			 * device has been disconnected
18333 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
18334 			 * private interface and not part of the DDI
18335 			 */
18336 			dip = un->un_sd->sd_dev;
18337 			if (!(DEVI_IS_DETACHING(dip) &&
18338 			    DEVI_IS_DEVICE_REMOVED(dip))) {
18339 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18340 				"disk not responding to selection\n");
18341 			}
18342 			New_state(un, SD_STATE_OFFLINE);
18343 		}
18344 		break;
18345 
18346 	case SD_DELAYED_RETRY_ISSUED:
18347 	case SD_IMMEDIATE_RETRY_ISSUED:
18348 	default:
18349 		/* Command was successfully queued for retry */
18350 		sd_print_retry_msg(un, bp, arg, code);
18351 		break;
18352 	}
18353 }
18354 
18355 
18356 /*
18357  *    Function: sd_pkt_reason_cmd_incomplete
18358  *
18359  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
18360  *
18361  *     Context: May be called from interrupt context
18362  */
18363 
18364 static void
18365 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
18366 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18367 {
18368 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
18369 
18370 	ASSERT(un != NULL);
18371 	ASSERT(mutex_owned(SD_MUTEX(un)));
18372 	ASSERT(bp != NULL);
18373 	ASSERT(xp != NULL);
18374 	ASSERT(pktp != NULL);
18375 
18376 	/* Do not do a reset if selection did not complete */
18377 	/* Note: Should this not just check the bit? */
18378 	if (pktp->pkt_state != STATE_GOT_BUS) {
18379 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18380 		sd_reset_target(un, pktp);
18381 	}
18382 
18383 	/*
18384 	 * If the target was not successfully selected, then set
18385 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
18386 	 * with the target, and further retries and/or commands are
18387 	 * likely to take a long time.
18388 	 */
18389 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
18390 		flag |= SD_RETRIES_FAILFAST;
18391 	}
18392 
18393 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18394 
18395 	sd_retry_command(un, bp, flag,
18396 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18397 }
18398 
18399 
18400 
18401 /*
18402  *    Function: sd_pkt_reason_cmd_tran_err
18403  *
18404  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
18405  *
18406  *     Context: May be called from interrupt context
18407  */
18408 
18409 static void
18410 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
18411 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18412 {
18413 	ASSERT(un != NULL);
18414 	ASSERT(mutex_owned(SD_MUTEX(un)));
18415 	ASSERT(bp != NULL);
18416 	ASSERT(xp != NULL);
18417 	ASSERT(pktp != NULL);
18418 
18419 	/*
18420 	 * Do not reset if we got a parity error, or if
18421 	 * selection did not complete.
18422 	 */
18423 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18424 	/* Note: Should this not just check the bit for pkt_state? */
18425 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
18426 	    (pktp->pkt_state != STATE_GOT_BUS)) {
18427 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18428 		sd_reset_target(un, pktp);
18429 	}
18430 
18431 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18432 
18433 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18434 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18435 }
18436 
18437 
18438 
18439 /*
18440  *    Function: sd_pkt_reason_cmd_reset
18441  *
18442  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
18443  *
18444  *     Context: May be called from interrupt context
18445  */
18446 
18447 static void
18448 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
18449 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18450 {
18451 	ASSERT(un != NULL);
18452 	ASSERT(mutex_owned(SD_MUTEX(un)));
18453 	ASSERT(bp != NULL);
18454 	ASSERT(xp != NULL);
18455 	ASSERT(pktp != NULL);
18456 
18457 	/* The target may still be running the command, so try to reset. */
18458 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18459 	sd_reset_target(un, pktp);
18460 
18461 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18462 
18463 	/*
18464 	 * If pkt_reason is CMD_RESET chances are that this pkt got
18465 	 * reset because another target on this bus caused it. The target
18466 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18467 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18468 	 */
18469 
18470 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18471 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18472 }
18473 
18474 
18475 
18476 
18477 /*
18478  *    Function: sd_pkt_reason_cmd_aborted
18479  *
18480  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
18481  *
18482  *     Context: May be called from interrupt context
18483  */
18484 
18485 static void
18486 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
18487 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18488 {
18489 	ASSERT(un != NULL);
18490 	ASSERT(mutex_owned(SD_MUTEX(un)));
18491 	ASSERT(bp != NULL);
18492 	ASSERT(xp != NULL);
18493 	ASSERT(pktp != NULL);
18494 
18495 	/* The target may still be running the command, so try to reset. */
18496 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18497 	sd_reset_target(un, pktp);
18498 
18499 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18500 
18501 	/*
18502 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
18503 	 * aborted because another target on this bus caused it. The target
18504 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18505 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18506 	 */
18507 
18508 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18509 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18510 }
18511 
18512 
18513 
18514 /*
18515  *    Function: sd_pkt_reason_cmd_timeout
18516  *
18517  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
18518  *
18519  *     Context: May be called from interrupt context
18520  */
18521 
18522 static void
18523 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
18524 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18525 {
18526 	ASSERT(un != NULL);
18527 	ASSERT(mutex_owned(SD_MUTEX(un)));
18528 	ASSERT(bp != NULL);
18529 	ASSERT(xp != NULL);
18530 	ASSERT(pktp != NULL);
18531 
18532 
18533 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18534 	sd_reset_target(un, pktp);
18535 
18536 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18537 
18538 	/*
18539 	 * A command timeout indicates that we could not establish
18540 	 * communication with the target, so set SD_RETRIES_FAILFAST
18541 	 * as further retries/commands are likely to take a long time.
18542 	 */
18543 	sd_retry_command(un, bp,
18544 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
18545 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18546 }
18547 
18548 
18549 
18550 /*
18551  *    Function: sd_pkt_reason_cmd_unx_bus_free
18552  *
18553  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
18554  *
18555  *     Context: May be called from interrupt context
18556  */
18557 
18558 static void
18559 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
18560 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18561 {
18562 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
18563 
18564 	ASSERT(un != NULL);
18565 	ASSERT(mutex_owned(SD_MUTEX(un)));
18566 	ASSERT(bp != NULL);
18567 	ASSERT(xp != NULL);
18568 	ASSERT(pktp != NULL);
18569 
18570 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18571 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18572 
18573 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
18574 	    sd_print_retry_msg : NULL;
18575 
18576 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18577 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18578 }
18579 
18580 
18581 /*
18582  *    Function: sd_pkt_reason_cmd_tag_reject
18583  *
18584  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
18585  *
18586  *     Context: May be called from interrupt context
18587  */
18588 
18589 static void
18590 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
18591 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18592 {
18593 	ASSERT(un != NULL);
18594 	ASSERT(mutex_owned(SD_MUTEX(un)));
18595 	ASSERT(bp != NULL);
18596 	ASSERT(xp != NULL);
18597 	ASSERT(pktp != NULL);
18598 
18599 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18600 	pktp->pkt_flags = 0;
18601 	un->un_tagflags = 0;
18602 	if (un->un_f_opt_queueing == TRUE) {
18603 		un->un_throttle = min(un->un_throttle, 3);
18604 	} else {
18605 		un->un_throttle = 1;
18606 	}
18607 	mutex_exit(SD_MUTEX(un));
18608 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
18609 	mutex_enter(SD_MUTEX(un));
18610 
18611 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18612 
18613 	/* Legacy behavior not to check retry counts here. */
18614 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
18615 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18616 }
18617 
18618 
18619 /*
18620  *    Function: sd_pkt_reason_default
18621  *
18622  * Description: Default recovery actions for SCSA pkt_reason values that
18623  *		do not have more explicit recovery actions.
18624  *
18625  *     Context: May be called from interrupt context
18626  */
18627 
18628 static void
18629 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
18630 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18631 {
18632 	ASSERT(un != NULL);
18633 	ASSERT(mutex_owned(SD_MUTEX(un)));
18634 	ASSERT(bp != NULL);
18635 	ASSERT(xp != NULL);
18636 	ASSERT(pktp != NULL);
18637 
18638 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18639 	sd_reset_target(un, pktp);
18640 
18641 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18642 
18643 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18644 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18645 }
18646 
18647 
18648 
18649 /*
18650  *    Function: sd_pkt_status_check_condition
18651  *
18652  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
18653  *
18654  *     Context: May be called from interrupt context
18655  */
18656 
18657 static void
18658 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
18659 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18660 {
18661 	ASSERT(un != NULL);
18662 	ASSERT(mutex_owned(SD_MUTEX(un)));
18663 	ASSERT(bp != NULL);
18664 	ASSERT(xp != NULL);
18665 	ASSERT(pktp != NULL);
18666 
18667 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
18668 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
18669 
18670 	/*
18671 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
18672 	 * command will be retried after the request sense). Otherwise, retry
18673 	 * the command. Note: we are issuing the request sense even though the
18674 	 * retry limit may have been reached for the failed command.
18675 	 */
18676 	if (un->un_f_arq_enabled == FALSE) {
18677 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18678 		    "no ARQ, sending request sense command\n");
18679 		sd_send_request_sense_command(un, bp, pktp);
18680 	} else {
18681 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18682 		    "ARQ,retrying request sense command\n");
18683 #if defined(__i386) || defined(__amd64)
18684 		/*
18685 		 * The SD_RETRY_DELAY value need to be adjusted here
18686 		 * when SD_RETRY_DELAY change in sddef.h
18687 		 */
18688 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18689 			un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
18690 			NULL);
18691 #else
18692 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
18693 		    EIO, SD_RETRY_DELAY, NULL);
18694 #endif
18695 	}
18696 
18697 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
18698 }
18699 
18700 
18701 /*
18702  *    Function: sd_pkt_status_busy
18703  *
18704  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
18705  *
18706  *     Context: May be called from interrupt context
18707  */
18708 
18709 static void
18710 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18711 	struct scsi_pkt *pktp)
18712 {
18713 	ASSERT(un != NULL);
18714 	ASSERT(mutex_owned(SD_MUTEX(un)));
18715 	ASSERT(bp != NULL);
18716 	ASSERT(xp != NULL);
18717 	ASSERT(pktp != NULL);
18718 
18719 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18720 	    "sd_pkt_status_busy: entry\n");
18721 
18722 	/* If retries are exhausted, just fail the command. */
18723 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
18724 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18725 		    "device busy too long\n");
18726 		sd_return_failed_command(un, bp, EIO);
18727 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18728 		    "sd_pkt_status_busy: exit\n");
18729 		return;
18730 	}
18731 	xp->xb_retry_count++;
18732 
18733 	/*
18734 	 * Try to reset the target. However, we do not want to perform
18735 	 * more than one reset if the device continues to fail. The reset
18736 	 * will be performed when the retry count reaches the reset
18737 	 * threshold.  This threshold should be set such that at least
18738 	 * one retry is issued before the reset is performed.
18739 	 */
18740 	if (xp->xb_retry_count ==
18741 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
18742 		int rval = 0;
18743 		mutex_exit(SD_MUTEX(un));
18744 		if (un->un_f_allow_bus_device_reset == TRUE) {
18745 			/*
18746 			 * First try to reset the LUN; if we cannot then
18747 			 * try to reset the target.
18748 			 */
18749 			if (un->un_f_lun_reset_enabled == TRUE) {
18750 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18751 				    "sd_pkt_status_busy: RESET_LUN\n");
18752 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18753 			}
18754 			if (rval == 0) {
18755 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18756 				    "sd_pkt_status_busy: RESET_TARGET\n");
18757 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18758 			}
18759 		}
18760 		if (rval == 0) {
18761 			/*
18762 			 * If the RESET_LUN and/or RESET_TARGET failed,
18763 			 * try RESET_ALL
18764 			 */
18765 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18766 			    "sd_pkt_status_busy: RESET_ALL\n");
18767 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
18768 		}
18769 		mutex_enter(SD_MUTEX(un));
18770 		if (rval == 0) {
18771 			/*
18772 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
18773 			 * At this point we give up & fail the command.
18774 			 */
18775 			sd_return_failed_command(un, bp, EIO);
18776 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18777 			    "sd_pkt_status_busy: exit (failed cmd)\n");
18778 			return;
18779 		}
18780 	}
18781 
18782 	/*
18783 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
18784 	 * we have already checked the retry counts above.
18785 	 */
18786 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
18787 	    EIO, SD_BSY_TIMEOUT, NULL);
18788 
18789 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18790 	    "sd_pkt_status_busy: exit\n");
18791 }
18792 
18793 
18794 /*
18795  *    Function: sd_pkt_status_reservation_conflict
18796  *
18797  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
18798  *		command status.
18799  *
18800  *     Context: May be called from interrupt context
18801  */
18802 
18803 static void
18804 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
18805 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18806 {
18807 	ASSERT(un != NULL);
18808 	ASSERT(mutex_owned(SD_MUTEX(un)));
18809 	ASSERT(bp != NULL);
18810 	ASSERT(xp != NULL);
18811 	ASSERT(pktp != NULL);
18812 
18813 	/*
18814 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
18815 	 * conflict could be due to various reasons like incorrect keys, not
18816 	 * registered or not reserved etc. So, we return EACCES to the caller.
18817 	 */
18818 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
18819 		int cmd = SD_GET_PKT_OPCODE(pktp);
18820 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
18821 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
18822 			sd_return_failed_command(un, bp, EACCES);
18823 			return;
18824 		}
18825 	}
18826 
18827 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
18828 
18829 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
18830 		if (sd_failfast_enable != 0) {
18831 			/* By definition, we must panic here.... */
18832 			sd_panic_for_res_conflict(un);
18833 			/*NOTREACHED*/
18834 		}
18835 		SD_ERROR(SD_LOG_IO, un,
18836 		    "sd_handle_resv_conflict: Disk Reserved\n");
18837 		sd_return_failed_command(un, bp, EACCES);
18838 		return;
18839 	}
18840 
18841 	/*
18842 	 * 1147670: retry only if sd_retry_on_reservation_conflict
18843 	 * property is set (default is 1). Retries will not succeed
18844 	 * on a disk reserved by another initiator. HA systems
18845 	 * may reset this via sd.conf to avoid these retries.
18846 	 *
18847 	 * Note: The legacy return code for this failure is EIO, however EACCES
18848 	 * seems more appropriate for a reservation conflict.
18849 	 */
18850 	if (sd_retry_on_reservation_conflict == 0) {
18851 		SD_ERROR(SD_LOG_IO, un,
18852 		    "sd_handle_resv_conflict: Device Reserved\n");
18853 		sd_return_failed_command(un, bp, EIO);
18854 		return;
18855 	}
18856 
18857 	/*
18858 	 * Retry the command if we can.
18859 	 *
18860 	 * Note: The legacy return code for this failure is EIO, however EACCES
18861 	 * seems more appropriate for a reservation conflict.
18862 	 */
18863 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18864 	    (clock_t)2, NULL);
18865 }
18866 
18867 
18868 
18869 /*
18870  *    Function: sd_pkt_status_qfull
18871  *
18872  * Description: Handle a QUEUE FULL condition from the target.  This can
18873  *		occur if the HBA does not handle the queue full condition.
18874  *		(Basically this means third-party HBAs as Sun HBAs will
18875  *		handle the queue full condition.)  Note that if there are
18876  *		some commands already in the transport, then the queue full
18877  *		has occurred because the queue for this nexus is actually
18878  *		full. If there are no commands in the transport, then the
18879  *		queue full is resulting from some other initiator or lun
18880  *		consuming all the resources at the target.
18881  *
18882  *     Context: May be called from interrupt context
18883  */
18884 
18885 static void
18886 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
18887 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18888 {
18889 	ASSERT(un != NULL);
18890 	ASSERT(mutex_owned(SD_MUTEX(un)));
18891 	ASSERT(bp != NULL);
18892 	ASSERT(xp != NULL);
18893 	ASSERT(pktp != NULL);
18894 
18895 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18896 	    "sd_pkt_status_qfull: entry\n");
18897 
18898 	/*
18899 	 * Just lower the QFULL throttle and retry the command.  Note that
18900 	 * we do not limit the number of retries here.
18901 	 */
18902 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
18903 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
18904 	    SD_RESTART_TIMEOUT, NULL);
18905 
18906 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18907 	    "sd_pkt_status_qfull: exit\n");
18908 }
18909 
18910 
18911 /*
18912  *    Function: sd_reset_target
18913  *
18914  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
18915  *		RESET_TARGET, or RESET_ALL.
18916  *
18917  *     Context: May be called under interrupt context.
18918  */
18919 
18920 static void
18921 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
18922 {
18923 	int rval = 0;
18924 
18925 	ASSERT(un != NULL);
18926 	ASSERT(mutex_owned(SD_MUTEX(un)));
18927 	ASSERT(pktp != NULL);
18928 
18929 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
18930 
18931 	/*
18932 	 * No need to reset if the transport layer has already done so.
18933 	 */
18934 	if ((pktp->pkt_statistics &
18935 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
18936 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18937 		    "sd_reset_target: no reset\n");
18938 		return;
18939 	}
18940 
18941 	mutex_exit(SD_MUTEX(un));
18942 
18943 	if (un->un_f_allow_bus_device_reset == TRUE) {
18944 		if (un->un_f_lun_reset_enabled == TRUE) {
18945 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18946 			    "sd_reset_target: RESET_LUN\n");
18947 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18948 		}
18949 		if (rval == 0) {
18950 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18951 			    "sd_reset_target: RESET_TARGET\n");
18952 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18953 		}
18954 	}
18955 
18956 	if (rval == 0) {
18957 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18958 		    "sd_reset_target: RESET_ALL\n");
18959 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
18960 	}
18961 
18962 	mutex_enter(SD_MUTEX(un));
18963 
18964 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
18965 }
18966 
18967 
18968 /*
18969  *    Function: sd_media_change_task
18970  *
18971  * Description: Recovery action for CDROM to become available.
18972  *
18973  *     Context: Executes in a taskq() thread context
18974  */
18975 
18976 static void
18977 sd_media_change_task(void *arg)
18978 {
18979 	struct	scsi_pkt	*pktp = arg;
18980 	struct	sd_lun		*un;
18981 	struct	buf		*bp;
18982 	struct	sd_xbuf		*xp;
18983 	int	err		= 0;
18984 	int	retry_count	= 0;
18985 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
18986 	struct	sd_sense_info	si;
18987 
18988 	ASSERT(pktp != NULL);
18989 	bp = (struct buf *)pktp->pkt_private;
18990 	ASSERT(bp != NULL);
18991 	xp = SD_GET_XBUF(bp);
18992 	ASSERT(xp != NULL);
18993 	un = SD_GET_UN(bp);
18994 	ASSERT(un != NULL);
18995 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18996 	ASSERT(un->un_f_monitor_media_state);
18997 
18998 	si.ssi_severity = SCSI_ERR_INFO;
18999 	si.ssi_pfa_flag = FALSE;
19000 
19001 	/*
19002 	 * When a reset is issued on a CDROM, it takes a long time to
19003 	 * recover. First few attempts to read capacity and other things
19004 	 * related to handling unit attention fail (with a ASC 0x4 and
19005 	 * ASCQ 0x1). In that case we want to do enough retries and we want
19006 	 * to limit the retries in other cases of genuine failures like
19007 	 * no media in drive.
19008 	 */
19009 	while (retry_count++ < retry_limit) {
19010 		if ((err = sd_handle_mchange(un)) == 0) {
19011 			break;
19012 		}
19013 		if (err == EAGAIN) {
19014 			retry_limit = SD_UNIT_ATTENTION_RETRY;
19015 		}
19016 		/* Sleep for 0.5 sec. & try again */
19017 		delay(drv_usectohz(500000));
19018 	}
19019 
19020 	/*
19021 	 * Dispatch (retry or fail) the original command here,
19022 	 * along with appropriate console messages....
19023 	 *
19024 	 * Must grab the mutex before calling sd_retry_command,
19025 	 * sd_print_sense_msg and sd_return_failed_command.
19026 	 */
19027 	mutex_enter(SD_MUTEX(un));
19028 	if (err != SD_CMD_SUCCESS) {
19029 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
19030 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
19031 		si.ssi_severity = SCSI_ERR_FATAL;
19032 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
19033 		sd_return_failed_command(un, bp, EIO);
19034 	} else {
19035 		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
19036 		    &si, EIO, (clock_t)0, NULL);
19037 	}
19038 	mutex_exit(SD_MUTEX(un));
19039 }
19040 
19041 
19042 
19043 /*
19044  *    Function: sd_handle_mchange
19045  *
19046  * Description: Perform geometry validation & other recovery when CDROM
19047  *		has been removed from drive.
19048  *
19049  * Return Code: 0 for success
19050  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
19051  *		sd_send_scsi_READ_CAPACITY()
19052  *
19053  *     Context: Executes in a taskq() thread context
19054  */
19055 
19056 static int
19057 sd_handle_mchange(struct sd_lun *un)
19058 {
19059 	uint64_t	capacity;
19060 	uint32_t	lbasize;
19061 	int		rval;
19062 
19063 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19064 	ASSERT(un->un_f_monitor_media_state);
19065 
19066 	if ((rval = sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
19067 	    SD_PATH_DIRECT_PRIORITY)) != 0) {
19068 		return (rval);
19069 	}
19070 
19071 	mutex_enter(SD_MUTEX(un));
19072 	sd_update_block_info(un, lbasize, capacity);
19073 
19074 	if (un->un_errstats != NULL) {
19075 		struct	sd_errstats *stp =
19076 		    (struct sd_errstats *)un->un_errstats->ks_data;
19077 		stp->sd_capacity.value.ui64 = (uint64_t)
19078 		    ((uint64_t)un->un_blockcount *
19079 		    (uint64_t)un->un_tgt_blocksize);
19080 	}
19081 
19082 	/*
19083 	 * Note: Maybe let the strategy/partitioning chain worry about getting
19084 	 * valid geometry.
19085 	 */
19086 	un->un_f_geometry_is_valid = FALSE;
19087 	(void) sd_validate_geometry(un, SD_PATH_DIRECT_PRIORITY);
19088 	if (un->un_f_geometry_is_valid == FALSE) {
19089 		mutex_exit(SD_MUTEX(un));
19090 		return (EIO);
19091 	}
19092 
19093 	mutex_exit(SD_MUTEX(un));
19094 
19095 	/*
19096 	 * Try to lock the door
19097 	 */
19098 	return (sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
19099 	    SD_PATH_DIRECT_PRIORITY));
19100 }
19101 
19102 
19103 /*
19104  *    Function: sd_send_scsi_DOORLOCK
19105  *
19106  * Description: Issue the scsi DOOR LOCK command
19107  *
19108  *   Arguments: un    - pointer to driver soft state (unit) structure for
19109  *			this target.
19110  *		flag  - SD_REMOVAL_ALLOW
19111  *			SD_REMOVAL_PREVENT
19112  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19113  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19114  *			to use the USCSI "direct" chain and bypass the normal
19115  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19116  *			command is issued as part of an error recovery action.
19117  *
19118  * Return Code: 0   - Success
19119  *		errno return code from sd_send_scsi_cmd()
19120  *
19121  *     Context: Can sleep.
19122  */
19123 
19124 static int
19125 sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag)
19126 {
19127 	union scsi_cdb		cdb;
19128 	struct uscsi_cmd	ucmd_buf;
19129 	struct scsi_extended_sense	sense_buf;
19130 	int			status;
19131 
19132 	ASSERT(un != NULL);
19133 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19134 
19135 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
19136 
19137 	/* already determined doorlock is not supported, fake success */
19138 	if (un->un_f_doorlock_supported == FALSE) {
19139 		return (0);
19140 	}
19141 
19142 	bzero(&cdb, sizeof (cdb));
19143 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19144 
19145 	cdb.scc_cmd = SCMD_DOORLOCK;
19146 	cdb.cdb_opaque[4] = (uchar_t)flag;
19147 
19148 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19149 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19150 	ucmd_buf.uscsi_bufaddr	= NULL;
19151 	ucmd_buf.uscsi_buflen	= 0;
19152 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19153 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19154 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19155 	ucmd_buf.uscsi_timeout	= 15;
19156 
19157 	SD_TRACE(SD_LOG_IO, un,
19158 	    "sd_send_scsi_DOORLOCK: returning sd_send_scsi_cmd()\n");
19159 
19160 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19161 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19162 
19163 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
19164 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19165 	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
19166 		/* fake success and skip subsequent doorlock commands */
19167 		un->un_f_doorlock_supported = FALSE;
19168 		return (0);
19169 	}
19170 
19171 	return (status);
19172 }
19173 
19174 /*
19175  *    Function: sd_send_scsi_READ_CAPACITY
19176  *
19177  * Description: This routine uses the scsi READ CAPACITY command to determine
19178  *		the device capacity in number of blocks and the device native
19179  *		block size. If this function returns a failure, then the
19180  *		values in *capp and *lbap are undefined.  If the capacity
19181  *		returned is 0xffffffff then the lun is too large for a
19182  *		normal READ CAPACITY command and the results of a
19183  *		READ CAPACITY 16 will be used instead.
19184  *
19185  *   Arguments: un   - ptr to soft state struct for the target
19186  *		capp - ptr to unsigned 64-bit variable to receive the
19187  *			capacity value from the command.
19188  *		lbap - ptr to unsigned 32-bit varaible to receive the
19189  *			block size value from the command
19190  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19191  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19192  *			to use the USCSI "direct" chain and bypass the normal
19193  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19194  *			command is issued as part of an error recovery action.
19195  *
19196  * Return Code: 0   - Success
19197  *		EIO - IO error
19198  *		EACCES - Reservation conflict detected
19199  *		EAGAIN - Device is becoming ready
19200  *		errno return code from sd_send_scsi_cmd()
19201  *
19202  *     Context: Can sleep.  Blocks until command completes.
19203  */
19204 
19205 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
19206 
19207 static int
19208 sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp, uint32_t *lbap,
19209 	int path_flag)
19210 {
19211 	struct	scsi_extended_sense	sense_buf;
19212 	struct	uscsi_cmd	ucmd_buf;
19213 	union	scsi_cdb	cdb;
19214 	uint32_t		*capacity_buf;
19215 	uint64_t		capacity;
19216 	uint32_t		lbasize;
19217 	int			status;
19218 
19219 	ASSERT(un != NULL);
19220 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19221 	ASSERT(capp != NULL);
19222 	ASSERT(lbap != NULL);
19223 
19224 	SD_TRACE(SD_LOG_IO, un,
19225 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19226 
19227 	/*
19228 	 * First send a READ_CAPACITY command to the target.
19229 	 * (This command is mandatory under SCSI-2.)
19230 	 *
19231 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
19232 	 * Medium Indicator bit is cleared.  The address field must be
19233 	 * zero if the PMI bit is zero.
19234 	 */
19235 	bzero(&cdb, sizeof (cdb));
19236 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19237 
19238 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
19239 
19240 	cdb.scc_cmd = SCMD_READ_CAPACITY;
19241 
19242 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19243 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19244 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
19245 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
19246 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19247 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19248 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19249 	ucmd_buf.uscsi_timeout	= 60;
19250 
19251 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19252 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19253 
19254 	switch (status) {
19255 	case 0:
19256 		/* Return failure if we did not get valid capacity data. */
19257 		if (ucmd_buf.uscsi_resid != 0) {
19258 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19259 			return (EIO);
19260 		}
19261 
19262 		/*
19263 		 * Read capacity and block size from the READ CAPACITY 10 data.
19264 		 * This data may be adjusted later due to device specific
19265 		 * issues.
19266 		 *
19267 		 * According to the SCSI spec, the READ CAPACITY 10
19268 		 * command returns the following:
19269 		 *
19270 		 *  bytes 0-3: Maximum logical block address available.
19271 		 *		(MSB in byte:0 & LSB in byte:3)
19272 		 *
19273 		 *  bytes 4-7: Block length in bytes
19274 		 *		(MSB in byte:4 & LSB in byte:7)
19275 		 *
19276 		 */
19277 		capacity = BE_32(capacity_buf[0]);
19278 		lbasize = BE_32(capacity_buf[1]);
19279 
19280 		/*
19281 		 * Done with capacity_buf
19282 		 */
19283 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19284 
19285 		/*
19286 		 * if the reported capacity is set to all 0xf's, then
19287 		 * this disk is too large and requires SBC-2 commands.
19288 		 * Reissue the request using READ CAPACITY 16.
19289 		 */
19290 		if (capacity == 0xffffffff) {
19291 			status = sd_send_scsi_READ_CAPACITY_16(un, &capacity,
19292 			    &lbasize, path_flag);
19293 			if (status != 0) {
19294 				return (status);
19295 			}
19296 		}
19297 		break;	/* Success! */
19298 	case EIO:
19299 		switch (ucmd_buf.uscsi_status) {
19300 		case STATUS_RESERVATION_CONFLICT:
19301 			status = EACCES;
19302 			break;
19303 		case STATUS_CHECK:
19304 			/*
19305 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19306 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19307 			 */
19308 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19309 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19310 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19311 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19312 				return (EAGAIN);
19313 			}
19314 			break;
19315 		default:
19316 			break;
19317 		}
19318 		/* FALLTHRU */
19319 	default:
19320 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19321 		return (status);
19322 	}
19323 
19324 	/*
19325 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
19326 	 * (2352 and 0 are common) so for these devices always force the value
19327 	 * to 2048 as required by the ATAPI specs.
19328 	 */
19329 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
19330 		lbasize = 2048;
19331 	}
19332 
19333 	/*
19334 	 * Get the maximum LBA value from the READ CAPACITY data.
19335 	 * Here we assume that the Partial Medium Indicator (PMI) bit
19336 	 * was cleared when issuing the command. This means that the LBA
19337 	 * returned from the device is the LBA of the last logical block
19338 	 * on the logical unit.  The actual logical block count will be
19339 	 * this value plus one.
19340 	 *
19341 	 * Currently the capacity is saved in terms of un->un_sys_blocksize,
19342 	 * so scale the capacity value to reflect this.
19343 	 */
19344 	capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
19345 
19346 #if defined(__i386) || defined(__amd64)
19347 	/*
19348 	 * Refer to comments related to off-by-1 at the
19349 	 * header of this file.
19350 	 * Treat 1TB disk as (1T - 512)B.
19351 	 */
19352 	if (un->un_f_capacity_adjusted == 1)
19353 	    capacity = DK_MAX_BLOCKS;
19354 #endif
19355 
19356 	/*
19357 	 * Copy the values from the READ CAPACITY command into the space
19358 	 * provided by the caller.
19359 	 */
19360 	*capp = capacity;
19361 	*lbap = lbasize;
19362 
19363 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
19364 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19365 
19366 	/*
19367 	 * Both the lbasize and capacity from the device must be nonzero,
19368 	 * otherwise we assume that the values are not valid and return
19369 	 * failure to the caller. (4203735)
19370 	 */
19371 	if ((capacity == 0) || (lbasize == 0)) {
19372 		return (EIO);
19373 	}
19374 
19375 	return (0);
19376 }
19377 
19378 /*
19379  *    Function: sd_send_scsi_READ_CAPACITY_16
19380  *
19381  * Description: This routine uses the scsi READ CAPACITY 16 command to
19382  *		determine the device capacity in number of blocks and the
19383  *		device native block size.  If this function returns a failure,
19384  *		then the values in *capp and *lbap are undefined.
19385  *		This routine should always be called by
19386  *		sd_send_scsi_READ_CAPACITY which will appy any device
19387  *		specific adjustments to capacity and lbasize.
19388  *
19389  *   Arguments: un   - ptr to soft state struct for the target
19390  *		capp - ptr to unsigned 64-bit variable to receive the
19391  *			capacity value from the command.
19392  *		lbap - ptr to unsigned 32-bit varaible to receive the
19393  *			block size value from the command
19394  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19395  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19396  *			to use the USCSI "direct" chain and bypass the normal
19397  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
19398  *			this command is issued as part of an error recovery
19399  *			action.
19400  *
19401  * Return Code: 0   - Success
19402  *		EIO - IO error
19403  *		EACCES - Reservation conflict detected
19404  *		EAGAIN - Device is becoming ready
19405  *		errno return code from sd_send_scsi_cmd()
19406  *
19407  *     Context: Can sleep.  Blocks until command completes.
19408  */
19409 
19410 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
19411 
19412 static int
19413 sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
19414 	uint32_t *lbap, int path_flag)
19415 {
19416 	struct	scsi_extended_sense	sense_buf;
19417 	struct	uscsi_cmd	ucmd_buf;
19418 	union	scsi_cdb	cdb;
19419 	uint64_t		*capacity16_buf;
19420 	uint64_t		capacity;
19421 	uint32_t		lbasize;
19422 	int			status;
19423 
19424 	ASSERT(un != NULL);
19425 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19426 	ASSERT(capp != NULL);
19427 	ASSERT(lbap != NULL);
19428 
19429 	SD_TRACE(SD_LOG_IO, un,
19430 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19431 
19432 	/*
19433 	 * First send a READ_CAPACITY_16 command to the target.
19434 	 *
19435 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
19436 	 * Medium Indicator bit is cleared.  The address field must be
19437 	 * zero if the PMI bit is zero.
19438 	 */
19439 	bzero(&cdb, sizeof (cdb));
19440 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19441 
19442 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
19443 
19444 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19445 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
19446 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
19447 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
19448 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19449 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19450 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19451 	ucmd_buf.uscsi_timeout	= 60;
19452 
19453 	/*
19454 	 * Read Capacity (16) is a Service Action In command.  One
19455 	 * command byte (0x9E) is overloaded for multiple operations,
19456 	 * with the second CDB byte specifying the desired operation
19457 	 */
19458 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
19459 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
19460 
19461 	/*
19462 	 * Fill in allocation length field
19463 	 */
19464 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
19465 
19466 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19467 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19468 
19469 	switch (status) {
19470 	case 0:
19471 		/* Return failure if we did not get valid capacity data. */
19472 		if (ucmd_buf.uscsi_resid > 20) {
19473 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19474 			return (EIO);
19475 		}
19476 
19477 		/*
19478 		 * Read capacity and block size from the READ CAPACITY 10 data.
19479 		 * This data may be adjusted later due to device specific
19480 		 * issues.
19481 		 *
19482 		 * According to the SCSI spec, the READ CAPACITY 10
19483 		 * command returns the following:
19484 		 *
19485 		 *  bytes 0-7: Maximum logical block address available.
19486 		 *		(MSB in byte:0 & LSB in byte:7)
19487 		 *
19488 		 *  bytes 8-11: Block length in bytes
19489 		 *		(MSB in byte:8 & LSB in byte:11)
19490 		 *
19491 		 */
19492 		capacity = BE_64(capacity16_buf[0]);
19493 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
19494 
19495 		/*
19496 		 * Done with capacity16_buf
19497 		 */
19498 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19499 
19500 		/*
19501 		 * if the reported capacity is set to all 0xf's, then
19502 		 * this disk is too large.  This could only happen with
19503 		 * a device that supports LBAs larger than 64 bits which
19504 		 * are not defined by any current T10 standards.
19505 		 */
19506 		if (capacity == 0xffffffffffffffff) {
19507 			return (EIO);
19508 		}
19509 		break;	/* Success! */
19510 	case EIO:
19511 		switch (ucmd_buf.uscsi_status) {
19512 		case STATUS_RESERVATION_CONFLICT:
19513 			status = EACCES;
19514 			break;
19515 		case STATUS_CHECK:
19516 			/*
19517 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19518 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19519 			 */
19520 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19521 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19522 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19523 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19524 				return (EAGAIN);
19525 			}
19526 			break;
19527 		default:
19528 			break;
19529 		}
19530 		/* FALLTHRU */
19531 	default:
19532 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19533 		return (status);
19534 	}
19535 
19536 	*capp = capacity;
19537 	*lbap = lbasize;
19538 
19539 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
19540 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19541 
19542 	return (0);
19543 }
19544 
19545 
19546 /*
19547  *    Function: sd_send_scsi_START_STOP_UNIT
19548  *
19549  * Description: Issue a scsi START STOP UNIT command to the target.
19550  *
19551  *   Arguments: un    - pointer to driver soft state (unit) structure for
19552  *			this target.
19553  *		flag  - SD_TARGET_START
19554  *			SD_TARGET_STOP
19555  *			SD_TARGET_EJECT
19556  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19557  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19558  *			to use the USCSI "direct" chain and bypass the normal
19559  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19560  *			command is issued as part of an error recovery action.
19561  *
19562  * Return Code: 0   - Success
19563  *		EIO - IO error
19564  *		EACCES - Reservation conflict detected
19565  *		ENXIO  - Not Ready, medium not present
19566  *		errno return code from sd_send_scsi_cmd()
19567  *
19568  *     Context: Can sleep.
19569  */
19570 
19571 static int
19572 sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag, int path_flag)
19573 {
19574 	struct	scsi_extended_sense	sense_buf;
19575 	union scsi_cdb		cdb;
19576 	struct uscsi_cmd	ucmd_buf;
19577 	int			status;
19578 
19579 	ASSERT(un != NULL);
19580 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19581 
19582 	SD_TRACE(SD_LOG_IO, un,
19583 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
19584 
19585 	if (un->un_f_check_start_stop &&
19586 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
19587 	    (un->un_f_start_stop_supported != TRUE)) {
19588 		return (0);
19589 	}
19590 
19591 	bzero(&cdb, sizeof (cdb));
19592 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19593 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19594 
19595 	cdb.scc_cmd = SCMD_START_STOP;
19596 	cdb.cdb_opaque[4] = (uchar_t)flag;
19597 
19598 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19599 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19600 	ucmd_buf.uscsi_bufaddr	= NULL;
19601 	ucmd_buf.uscsi_buflen	= 0;
19602 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19603 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19604 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19605 	ucmd_buf.uscsi_timeout	= 200;
19606 
19607 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19608 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19609 
19610 	switch (status) {
19611 	case 0:
19612 		break;	/* Success! */
19613 	case EIO:
19614 		switch (ucmd_buf.uscsi_status) {
19615 		case STATUS_RESERVATION_CONFLICT:
19616 			status = EACCES;
19617 			break;
19618 		case STATUS_CHECK:
19619 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
19620 				switch (scsi_sense_key(
19621 						(uint8_t *)&sense_buf)) {
19622 				case KEY_ILLEGAL_REQUEST:
19623 					status = ENOTSUP;
19624 					break;
19625 				case KEY_NOT_READY:
19626 					if (scsi_sense_asc(
19627 						    (uint8_t *)&sense_buf)
19628 					    == 0x3A) {
19629 						status = ENXIO;
19630 					}
19631 					break;
19632 				default:
19633 					break;
19634 				}
19635 			}
19636 			break;
19637 		default:
19638 			break;
19639 		}
19640 		break;
19641 	default:
19642 		break;
19643 	}
19644 
19645 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
19646 
19647 	return (status);
19648 }
19649 
19650 
19651 /*
19652  *    Function: sd_start_stop_unit_callback
19653  *
19654  * Description: timeout(9F) callback to begin recovery process for a
19655  *		device that has spun down.
19656  *
19657  *   Arguments: arg - pointer to associated softstate struct.
19658  *
19659  *     Context: Executes in a timeout(9F) thread context
19660  */
19661 
19662 static void
19663 sd_start_stop_unit_callback(void *arg)
19664 {
19665 	struct sd_lun	*un = arg;
19666 	ASSERT(un != NULL);
19667 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19668 
19669 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
19670 
19671 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
19672 }
19673 
19674 
19675 /*
19676  *    Function: sd_start_stop_unit_task
19677  *
19678  * Description: Recovery procedure when a drive is spun down.
19679  *
19680  *   Arguments: arg - pointer to associated softstate struct.
19681  *
19682  *     Context: Executes in a taskq() thread context
19683  */
19684 
19685 static void
19686 sd_start_stop_unit_task(void *arg)
19687 {
19688 	struct sd_lun	*un = arg;
19689 
19690 	ASSERT(un != NULL);
19691 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19692 
19693 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
19694 
19695 	/*
19696 	 * Some unformatted drives report not ready error, no need to
19697 	 * restart if format has been initiated.
19698 	 */
19699 	mutex_enter(SD_MUTEX(un));
19700 	if (un->un_f_format_in_progress == TRUE) {
19701 		mutex_exit(SD_MUTEX(un));
19702 		return;
19703 	}
19704 	mutex_exit(SD_MUTEX(un));
19705 
19706 	/*
19707 	 * When a START STOP command is issued from here, it is part of a
19708 	 * failure recovery operation and must be issued before any other
19709 	 * commands, including any pending retries. Thus it must be sent
19710 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
19711 	 * succeeds or not, we will start I/O after the attempt.
19712 	 */
19713 	(void) sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
19714 	    SD_PATH_DIRECT_PRIORITY);
19715 
19716 	/*
19717 	 * The above call blocks until the START_STOP_UNIT command completes.
19718 	 * Now that it has completed, we must re-try the original IO that
19719 	 * received the NOT READY condition in the first place. There are
19720 	 * three possible conditions here:
19721 	 *
19722 	 *  (1) The original IO is on un_retry_bp.
19723 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
19724 	 *	is NULL.
19725 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
19726 	 *	points to some other, unrelated bp.
19727 	 *
19728 	 * For each case, we must call sd_start_cmds() with un_retry_bp
19729 	 * as the argument. If un_retry_bp is NULL, this will initiate
19730 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
19731 	 * then this will process the bp on un_retry_bp. That may or may not
19732 	 * be the original IO, but that does not matter: the important thing
19733 	 * is to keep the IO processing going at this point.
19734 	 *
19735 	 * Note: This is a very specific error recovery sequence associated
19736 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
19737 	 * serialize the I/O with completion of the spin-up.
19738 	 */
19739 	mutex_enter(SD_MUTEX(un));
19740 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19741 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
19742 	    un, un->un_retry_bp);
19743 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
19744 	sd_start_cmds(un, un->un_retry_bp);
19745 	mutex_exit(SD_MUTEX(un));
19746 
19747 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
19748 }
19749 
19750 
19751 /*
19752  *    Function: sd_send_scsi_INQUIRY
19753  *
19754  * Description: Issue the scsi INQUIRY command.
19755  *
19756  *   Arguments: un
19757  *		bufaddr
19758  *		buflen
19759  *		evpd
19760  *		page_code
19761  *		page_length
19762  *
19763  * Return Code: 0   - Success
19764  *		errno return code from sd_send_scsi_cmd()
19765  *
19766  *     Context: Can sleep. Does not return until command is completed.
19767  */
19768 
19769 static int
19770 sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr, size_t buflen,
19771 	uchar_t evpd, uchar_t page_code, size_t *residp)
19772 {
19773 	union scsi_cdb		cdb;
19774 	struct uscsi_cmd	ucmd_buf;
19775 	int			status;
19776 
19777 	ASSERT(un != NULL);
19778 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19779 	ASSERT(bufaddr != NULL);
19780 
19781 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
19782 
19783 	bzero(&cdb, sizeof (cdb));
19784 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19785 	bzero(bufaddr, buflen);
19786 
19787 	cdb.scc_cmd = SCMD_INQUIRY;
19788 	cdb.cdb_opaque[1] = evpd;
19789 	cdb.cdb_opaque[2] = page_code;
19790 	FORMG0COUNT(&cdb, buflen);
19791 
19792 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19793 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19794 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19795 	ucmd_buf.uscsi_buflen	= buflen;
19796 	ucmd_buf.uscsi_rqbuf	= NULL;
19797 	ucmd_buf.uscsi_rqlen	= 0;
19798 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
19799 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
19800 
19801 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19802 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_DIRECT);
19803 
19804 	if ((status == 0) && (residp != NULL)) {
19805 		*residp = ucmd_buf.uscsi_resid;
19806 	}
19807 
19808 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
19809 
19810 	return (status);
19811 }
19812 
19813 
19814 /*
19815  *    Function: sd_send_scsi_TEST_UNIT_READY
19816  *
19817  * Description: Issue the scsi TEST UNIT READY command.
19818  *		This routine can be told to set the flag USCSI_DIAGNOSE to
19819  *		prevent retrying failed commands. Use this when the intent
19820  *		is either to check for device readiness, to clear a Unit
19821  *		Attention, or to clear any outstanding sense data.
19822  *		However under specific conditions the expected behavior
19823  *		is for retries to bring a device ready, so use the flag
19824  *		with caution.
19825  *
19826  *   Arguments: un
19827  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
19828  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
19829  *			0: dont check for media present, do retries on cmd.
19830  *
19831  * Return Code: 0   - Success
19832  *		EIO - IO error
19833  *		EACCES - Reservation conflict detected
19834  *		ENXIO  - Not Ready, medium not present
19835  *		errno return code from sd_send_scsi_cmd()
19836  *
19837  *     Context: Can sleep. Does not return until command is completed.
19838  */
19839 
19840 static int
19841 sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag)
19842 {
19843 	struct	scsi_extended_sense	sense_buf;
19844 	union scsi_cdb		cdb;
19845 	struct uscsi_cmd	ucmd_buf;
19846 	int			status;
19847 
19848 	ASSERT(un != NULL);
19849 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19850 
19851 	SD_TRACE(SD_LOG_IO, un,
19852 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
19853 
19854 	/*
19855 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
19856 	 * timeouts when they receive a TUR and the queue is not empty. Check
19857 	 * the configuration flag set during attach (indicating the drive has
19858 	 * this firmware bug) and un_ncmds_in_transport before issuing the
19859 	 * TUR. If there are
19860 	 * pending commands return success, this is a bit arbitrary but is ok
19861 	 * for non-removables (i.e. the eliteI disks) and non-clustering
19862 	 * configurations.
19863 	 */
19864 	if (un->un_f_cfg_tur_check == TRUE) {
19865 		mutex_enter(SD_MUTEX(un));
19866 		if (un->un_ncmds_in_transport != 0) {
19867 			mutex_exit(SD_MUTEX(un));
19868 			return (0);
19869 		}
19870 		mutex_exit(SD_MUTEX(un));
19871 	}
19872 
19873 	bzero(&cdb, sizeof (cdb));
19874 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19875 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19876 
19877 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
19878 
19879 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19880 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19881 	ucmd_buf.uscsi_bufaddr	= NULL;
19882 	ucmd_buf.uscsi_buflen	= 0;
19883 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19884 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19885 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19886 
19887 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
19888 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
19889 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
19890 	}
19891 	ucmd_buf.uscsi_timeout	= 60;
19892 
19893 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19894 	    UIO_SYSSPACE, UIO_SYSSPACE,
19895 	    ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT : SD_PATH_STANDARD));
19896 
19897 	switch (status) {
19898 	case 0:
19899 		break;	/* Success! */
19900 	case EIO:
19901 		switch (ucmd_buf.uscsi_status) {
19902 		case STATUS_RESERVATION_CONFLICT:
19903 			status = EACCES;
19904 			break;
19905 		case STATUS_CHECK:
19906 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
19907 				break;
19908 			}
19909 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19910 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
19911 				KEY_NOT_READY) &&
19912 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
19913 				status = ENXIO;
19914 			}
19915 			break;
19916 		default:
19917 			break;
19918 		}
19919 		break;
19920 	default:
19921 		break;
19922 	}
19923 
19924 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
19925 
19926 	return (status);
19927 }
19928 
19929 
19930 /*
19931  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
19932  *
19933  * Description: Issue the scsi PERSISTENT RESERVE IN command.
19934  *
19935  *   Arguments: un
19936  *
19937  * Return Code: 0   - Success
19938  *		EACCES
19939  *		ENOTSUP
19940  *		errno return code from sd_send_scsi_cmd()
19941  *
19942  *     Context: Can sleep. Does not return until command is completed.
19943  */
19944 
19945 static int
19946 sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un, uchar_t  usr_cmd,
19947 	uint16_t data_len, uchar_t *data_bufp)
19948 {
19949 	struct scsi_extended_sense	sense_buf;
19950 	union scsi_cdb		cdb;
19951 	struct uscsi_cmd	ucmd_buf;
19952 	int			status;
19953 	int			no_caller_buf = FALSE;
19954 
19955 	ASSERT(un != NULL);
19956 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19957 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
19958 
19959 	SD_TRACE(SD_LOG_IO, un,
19960 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
19961 
19962 	bzero(&cdb, sizeof (cdb));
19963 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19964 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19965 	if (data_bufp == NULL) {
19966 		/* Allocate a default buf if the caller did not give one */
19967 		ASSERT(data_len == 0);
19968 		data_len  = MHIOC_RESV_KEY_SIZE;
19969 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
19970 		no_caller_buf = TRUE;
19971 	}
19972 
19973 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
19974 	cdb.cdb_opaque[1] = usr_cmd;
19975 	FORMG1COUNT(&cdb, data_len);
19976 
19977 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19978 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19979 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
19980 	ucmd_buf.uscsi_buflen	= data_len;
19981 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19982 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19983 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19984 	ucmd_buf.uscsi_timeout	= 60;
19985 
19986 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19987 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
19988 
19989 	switch (status) {
19990 	case 0:
19991 		break;	/* Success! */
19992 	case EIO:
19993 		switch (ucmd_buf.uscsi_status) {
19994 		case STATUS_RESERVATION_CONFLICT:
19995 			status = EACCES;
19996 			break;
19997 		case STATUS_CHECK:
19998 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19999 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20000 				KEY_ILLEGAL_REQUEST)) {
20001 				status = ENOTSUP;
20002 			}
20003 			break;
20004 		default:
20005 			break;
20006 		}
20007 		break;
20008 	default:
20009 		break;
20010 	}
20011 
20012 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
20013 
20014 	if (no_caller_buf == TRUE) {
20015 		kmem_free(data_bufp, data_len);
20016 	}
20017 
20018 	return (status);
20019 }
20020 
20021 
20022 /*
20023  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
20024  *
20025  * Description: This routine is the driver entry point for handling CD-ROM
20026  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
20027  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
20028  *		device.
20029  *
20030  *   Arguments: un  -   Pointer to soft state struct for the target.
20031  *		usr_cmd SCSI-3 reservation facility command (one of
20032  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
20033  *			SD_SCSI3_PREEMPTANDABORT)
20034  *		usr_bufp - user provided pointer register, reserve descriptor or
20035  *			preempt and abort structure (mhioc_register_t,
20036  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
20037  *
20038  * Return Code: 0   - Success
20039  *		EACCES
20040  *		ENOTSUP
20041  *		errno return code from sd_send_scsi_cmd()
20042  *
20043  *     Context: Can sleep. Does not return until command is completed.
20044  */
20045 
20046 static int
20047 sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un, uchar_t usr_cmd,
20048 	uchar_t	*usr_bufp)
20049 {
20050 	struct scsi_extended_sense	sense_buf;
20051 	union scsi_cdb		cdb;
20052 	struct uscsi_cmd	ucmd_buf;
20053 	int			status;
20054 	uchar_t			data_len = sizeof (sd_prout_t);
20055 	sd_prout_t		*prp;
20056 
20057 	ASSERT(un != NULL);
20058 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20059 	ASSERT(data_len == 24);	/* required by scsi spec */
20060 
20061 	SD_TRACE(SD_LOG_IO, un,
20062 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
20063 
20064 	if (usr_bufp == NULL) {
20065 		return (EINVAL);
20066 	}
20067 
20068 	bzero(&cdb, sizeof (cdb));
20069 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20070 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20071 	prp = kmem_zalloc(data_len, KM_SLEEP);
20072 
20073 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
20074 	cdb.cdb_opaque[1] = usr_cmd;
20075 	FORMG1COUNT(&cdb, data_len);
20076 
20077 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20078 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20079 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
20080 	ucmd_buf.uscsi_buflen	= data_len;
20081 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20082 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20083 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20084 	ucmd_buf.uscsi_timeout	= 60;
20085 
20086 	switch (usr_cmd) {
20087 	case SD_SCSI3_REGISTER: {
20088 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
20089 
20090 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20091 		bcopy(ptr->newkey.key, prp->service_key,
20092 		    MHIOC_RESV_KEY_SIZE);
20093 		prp->aptpl = ptr->aptpl;
20094 		break;
20095 	}
20096 	case SD_SCSI3_RESERVE:
20097 	case SD_SCSI3_RELEASE: {
20098 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
20099 
20100 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20101 		prp->scope_address = BE_32(ptr->scope_specific_addr);
20102 		cdb.cdb_opaque[2] = ptr->type;
20103 		break;
20104 	}
20105 	case SD_SCSI3_PREEMPTANDABORT: {
20106 		mhioc_preemptandabort_t *ptr =
20107 		    (mhioc_preemptandabort_t *)usr_bufp;
20108 
20109 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20110 		bcopy(ptr->victim_key.key, prp->service_key,
20111 		    MHIOC_RESV_KEY_SIZE);
20112 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
20113 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
20114 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
20115 		break;
20116 	}
20117 	case SD_SCSI3_REGISTERANDIGNOREKEY:
20118 	{
20119 		mhioc_registerandignorekey_t *ptr;
20120 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
20121 		bcopy(ptr->newkey.key,
20122 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
20123 		prp->aptpl = ptr->aptpl;
20124 		break;
20125 	}
20126 	default:
20127 		ASSERT(FALSE);
20128 		break;
20129 	}
20130 
20131 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20132 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20133 
20134 	switch (status) {
20135 	case 0:
20136 		break;	/* Success! */
20137 	case EIO:
20138 		switch (ucmd_buf.uscsi_status) {
20139 		case STATUS_RESERVATION_CONFLICT:
20140 			status = EACCES;
20141 			break;
20142 		case STATUS_CHECK:
20143 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20144 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20145 				KEY_ILLEGAL_REQUEST)) {
20146 				status = ENOTSUP;
20147 			}
20148 			break;
20149 		default:
20150 			break;
20151 		}
20152 		break;
20153 	default:
20154 		break;
20155 	}
20156 
20157 	kmem_free(prp, data_len);
20158 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
20159 	return (status);
20160 }
20161 
20162 
20163 /*
20164  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
20165  *
20166  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
20167  *
20168  *   Arguments: un - pointer to the target's soft state struct
20169  *
20170  * Return Code: 0 - success
20171  *		errno-type error code
20172  *
20173  *     Context: kernel thread context only.
20174  */
20175 
20176 static int
20177 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
20178 {
20179 	struct sd_uscsi_info	*uip;
20180 	struct uscsi_cmd	*uscmd;
20181 	union scsi_cdb		*cdb;
20182 	struct buf		*bp;
20183 	int			rval = 0;
20184 
20185 	SD_TRACE(SD_LOG_IO, un,
20186 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
20187 
20188 	ASSERT(un != NULL);
20189 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20190 
20191 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
20192 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
20193 
20194 	/*
20195 	 * First get some memory for the uscsi_cmd struct and cdb
20196 	 * and initialize for SYNCHRONIZE_CACHE cmd.
20197 	 */
20198 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
20199 	uscmd->uscsi_cdblen = CDB_GROUP1;
20200 	uscmd->uscsi_cdb = (caddr_t)cdb;
20201 	uscmd->uscsi_bufaddr = NULL;
20202 	uscmd->uscsi_buflen = 0;
20203 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
20204 	uscmd->uscsi_rqlen = SENSE_LENGTH;
20205 	uscmd->uscsi_rqresid = SENSE_LENGTH;
20206 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
20207 	uscmd->uscsi_timeout = sd_io_time;
20208 
20209 	/*
20210 	 * Allocate an sd_uscsi_info struct and fill it with the info
20211 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
20212 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
20213 	 * since we allocate the buf here in this function, we do not
20214 	 * need to preserve the prior contents of b_private.
20215 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
20216 	 */
20217 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
20218 	uip->ui_flags = SD_PATH_DIRECT;
20219 	uip->ui_cmdp  = uscmd;
20220 
20221 	bp = getrbuf(KM_SLEEP);
20222 	bp->b_private = uip;
20223 
20224 	/*
20225 	 * Setup buffer to carry uscsi request.
20226 	 */
20227 	bp->b_flags  = B_BUSY;
20228 	bp->b_bcount = 0;
20229 	bp->b_blkno  = 0;
20230 
20231 	if (dkc != NULL) {
20232 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
20233 		uip->ui_dkc = *dkc;
20234 	}
20235 
20236 	bp->b_edev = SD_GET_DEV(un);
20237 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
20238 
20239 	(void) sd_uscsi_strategy(bp);
20240 
20241 	/*
20242 	 * If synchronous request, wait for completion
20243 	 * If async just return and let b_iodone callback
20244 	 * cleanup.
20245 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
20246 	 * but it was also incremented in sd_uscsi_strategy(), so
20247 	 * we should be ok.
20248 	 */
20249 	if (dkc == NULL) {
20250 		(void) biowait(bp);
20251 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
20252 	}
20253 
20254 	return (rval);
20255 }
20256 
20257 
20258 static int
20259 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
20260 {
20261 	struct sd_uscsi_info *uip;
20262 	struct uscsi_cmd *uscmd;
20263 	uint8_t *sense_buf;
20264 	struct sd_lun *un;
20265 	int status;
20266 
20267 	uip = (struct sd_uscsi_info *)(bp->b_private);
20268 	ASSERT(uip != NULL);
20269 
20270 	uscmd = uip->ui_cmdp;
20271 	ASSERT(uscmd != NULL);
20272 
20273 	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
20274 	ASSERT(sense_buf != NULL);
20275 
20276 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
20277 	ASSERT(un != NULL);
20278 
20279 	status = geterror(bp);
20280 	switch (status) {
20281 	case 0:
20282 		break;	/* Success! */
20283 	case EIO:
20284 		switch (uscmd->uscsi_status) {
20285 		case STATUS_RESERVATION_CONFLICT:
20286 			/* Ignore reservation conflict */
20287 			status = 0;
20288 			goto done;
20289 
20290 		case STATUS_CHECK:
20291 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
20292 			    (scsi_sense_key(sense_buf) ==
20293 				KEY_ILLEGAL_REQUEST)) {
20294 				/* Ignore Illegal Request error */
20295 				mutex_enter(SD_MUTEX(un));
20296 				un->un_f_sync_cache_supported = FALSE;
20297 				mutex_exit(SD_MUTEX(un));
20298 				status = ENOTSUP;
20299 				goto done;
20300 			}
20301 			break;
20302 		default:
20303 			break;
20304 		}
20305 		/* FALLTHRU */
20306 	default:
20307 		/* Ignore error if the media is not present */
20308 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
20309 			status = 0;
20310 			goto done;
20311 		}
20312 		/* If we reach this, we had an error */
20313 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
20314 		    "SYNCHRONIZE CACHE command failed (%d)\n", status);
20315 		break;
20316 	}
20317 
20318 done:
20319 	if (uip->ui_dkc.dkc_callback != NULL) {
20320 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
20321 	}
20322 
20323 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
20324 	freerbuf(bp);
20325 	kmem_free(uip, sizeof (struct sd_uscsi_info));
20326 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
20327 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
20328 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
20329 
20330 	return (status);
20331 }
20332 
20333 
20334 /*
20335  *    Function: sd_send_scsi_GET_CONFIGURATION
20336  *
20337  * Description: Issues the get configuration command to the device.
20338  *		Called from sd_check_for_writable_cd & sd_get_media_info
20339  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
20340  *   Arguments: un
20341  *		ucmdbuf
20342  *		rqbuf
20343  *		rqbuflen
20344  *		bufaddr
20345  *		buflen
20346  *
20347  * Return Code: 0   - Success
20348  *		errno return code from sd_send_scsi_cmd()
20349  *
20350  *     Context: Can sleep. Does not return until command is completed.
20351  *
20352  */
20353 
20354 static int
20355 sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un, struct uscsi_cmd *ucmdbuf,
20356 	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen)
20357 {
20358 	char	cdb[CDB_GROUP1];
20359 	int	status;
20360 
20361 	ASSERT(un != NULL);
20362 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20363 	ASSERT(bufaddr != NULL);
20364 	ASSERT(ucmdbuf != NULL);
20365 	ASSERT(rqbuf != NULL);
20366 
20367 	SD_TRACE(SD_LOG_IO, un,
20368 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
20369 
20370 	bzero(cdb, sizeof (cdb));
20371 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20372 	bzero(rqbuf, rqbuflen);
20373 	bzero(bufaddr, buflen);
20374 
20375 	/*
20376 	 * Set up cdb field for the get configuration command.
20377 	 */
20378 	cdb[0] = SCMD_GET_CONFIGURATION;
20379 	cdb[1] = 0x02;  /* Requested Type */
20380 	cdb[8] = SD_PROFILE_HEADER_LEN;
20381 	ucmdbuf->uscsi_cdb = cdb;
20382 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20383 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20384 	ucmdbuf->uscsi_buflen = buflen;
20385 	ucmdbuf->uscsi_timeout = sd_io_time;
20386 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20387 	ucmdbuf->uscsi_rqlen = rqbuflen;
20388 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20389 
20390 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20391 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20392 
20393 	switch (status) {
20394 	case 0:
20395 		break;  /* Success! */
20396 	case EIO:
20397 		switch (ucmdbuf->uscsi_status) {
20398 		case STATUS_RESERVATION_CONFLICT:
20399 			status = EACCES;
20400 			break;
20401 		default:
20402 			break;
20403 		}
20404 		break;
20405 	default:
20406 		break;
20407 	}
20408 
20409 	if (status == 0) {
20410 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20411 		    "sd_send_scsi_GET_CONFIGURATION: data",
20412 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20413 	}
20414 
20415 	SD_TRACE(SD_LOG_IO, un,
20416 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
20417 
20418 	return (status);
20419 }
20420 
20421 /*
20422  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
20423  *
20424  * Description: Issues the get configuration command to the device to
20425  *              retrieve a specfic feature. Called from
20426  *		sd_check_for_writable_cd & sd_set_mmc_caps.
20427  *   Arguments: un
20428  *              ucmdbuf
20429  *              rqbuf
20430  *              rqbuflen
20431  *              bufaddr
20432  *              buflen
20433  *		feature
20434  *
20435  * Return Code: 0   - Success
20436  *              errno return code from sd_send_scsi_cmd()
20437  *
20438  *     Context: Can sleep. Does not return until command is completed.
20439  *
20440  */
20441 static int
20442 sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
20443 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
20444 	uchar_t *bufaddr, uint_t buflen, char feature)
20445 {
20446 	char    cdb[CDB_GROUP1];
20447 	int	status;
20448 
20449 	ASSERT(un != NULL);
20450 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20451 	ASSERT(bufaddr != NULL);
20452 	ASSERT(ucmdbuf != NULL);
20453 	ASSERT(rqbuf != NULL);
20454 
20455 	SD_TRACE(SD_LOG_IO, un,
20456 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
20457 
20458 	bzero(cdb, sizeof (cdb));
20459 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20460 	bzero(rqbuf, rqbuflen);
20461 	bzero(bufaddr, buflen);
20462 
20463 	/*
20464 	 * Set up cdb field for the get configuration command.
20465 	 */
20466 	cdb[0] = SCMD_GET_CONFIGURATION;
20467 	cdb[1] = 0x02;  /* Requested Type */
20468 	cdb[3] = feature;
20469 	cdb[8] = buflen;
20470 	ucmdbuf->uscsi_cdb = cdb;
20471 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20472 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20473 	ucmdbuf->uscsi_buflen = buflen;
20474 	ucmdbuf->uscsi_timeout = sd_io_time;
20475 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20476 	ucmdbuf->uscsi_rqlen = rqbuflen;
20477 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20478 
20479 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20480 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20481 
20482 	switch (status) {
20483 	case 0:
20484 		break;  /* Success! */
20485 	case EIO:
20486 		switch (ucmdbuf->uscsi_status) {
20487 		case STATUS_RESERVATION_CONFLICT:
20488 			status = EACCES;
20489 			break;
20490 		default:
20491 			break;
20492 		}
20493 		break;
20494 	default:
20495 		break;
20496 	}
20497 
20498 	if (status == 0) {
20499 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20500 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
20501 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20502 	}
20503 
20504 	SD_TRACE(SD_LOG_IO, un,
20505 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
20506 
20507 	return (status);
20508 }
20509 
20510 
20511 /*
20512  *    Function: sd_send_scsi_MODE_SENSE
20513  *
20514  * Description: Utility function for issuing a scsi MODE SENSE command.
20515  *		Note: This routine uses a consistent implementation for Group0,
20516  *		Group1, and Group2 commands across all platforms. ATAPI devices
20517  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20518  *
20519  *   Arguments: un - pointer to the softstate struct for the target.
20520  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20521  *			  CDB_GROUP[1|2] (10 byte).
20522  *		bufaddr - buffer for page data retrieved from the target.
20523  *		buflen - size of page to be retrieved.
20524  *		page_code - page code of data to be retrieved from the target.
20525  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20526  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20527  *			to use the USCSI "direct" chain and bypass the normal
20528  *			command waitq.
20529  *
20530  * Return Code: 0   - Success
20531  *		errno return code from sd_send_scsi_cmd()
20532  *
20533  *     Context: Can sleep. Does not return until command is completed.
20534  */
20535 
20536 static int
20537 sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20538 	size_t buflen,  uchar_t page_code, int path_flag)
20539 {
20540 	struct	scsi_extended_sense	sense_buf;
20541 	union scsi_cdb		cdb;
20542 	struct uscsi_cmd	ucmd_buf;
20543 	int			status;
20544 	int			headlen;
20545 
20546 	ASSERT(un != NULL);
20547 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20548 	ASSERT(bufaddr != NULL);
20549 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20550 	    (cdbsize == CDB_GROUP2));
20551 
20552 	SD_TRACE(SD_LOG_IO, un,
20553 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
20554 
20555 	bzero(&cdb, sizeof (cdb));
20556 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20557 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20558 	bzero(bufaddr, buflen);
20559 
20560 	if (cdbsize == CDB_GROUP0) {
20561 		cdb.scc_cmd = SCMD_MODE_SENSE;
20562 		cdb.cdb_opaque[2] = page_code;
20563 		FORMG0COUNT(&cdb, buflen);
20564 		headlen = MODE_HEADER_LENGTH;
20565 	} else {
20566 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
20567 		cdb.cdb_opaque[2] = page_code;
20568 		FORMG1COUNT(&cdb, buflen);
20569 		headlen = MODE_HEADER_LENGTH_GRP2;
20570 	}
20571 
20572 	ASSERT(headlen <= buflen);
20573 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20574 
20575 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20576 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20577 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20578 	ucmd_buf.uscsi_buflen	= buflen;
20579 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20580 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20581 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20582 	ucmd_buf.uscsi_timeout	= 60;
20583 
20584 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20585 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20586 
20587 	switch (status) {
20588 	case 0:
20589 		/*
20590 		 * sr_check_wp() uses 0x3f page code and check the header of
20591 		 * mode page to determine if target device is write-protected.
20592 		 * But some USB devices return 0 bytes for 0x3f page code. For
20593 		 * this case, make sure that mode page header is returned at
20594 		 * least.
20595 		 */
20596 		if (buflen - ucmd_buf.uscsi_resid <  headlen)
20597 			status = EIO;
20598 		break;	/* Success! */
20599 	case EIO:
20600 		switch (ucmd_buf.uscsi_status) {
20601 		case STATUS_RESERVATION_CONFLICT:
20602 			status = EACCES;
20603 			break;
20604 		default:
20605 			break;
20606 		}
20607 		break;
20608 	default:
20609 		break;
20610 	}
20611 
20612 	if (status == 0) {
20613 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
20614 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20615 	}
20616 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
20617 
20618 	return (status);
20619 }
20620 
20621 
20622 /*
20623  *    Function: sd_send_scsi_MODE_SELECT
20624  *
20625  * Description: Utility function for issuing a scsi MODE SELECT command.
20626  *		Note: This routine uses a consistent implementation for Group0,
20627  *		Group1, and Group2 commands across all platforms. ATAPI devices
20628  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20629  *
20630  *   Arguments: un - pointer to the softstate struct for the target.
20631  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20632  *			  CDB_GROUP[1|2] (10 byte).
20633  *		bufaddr - buffer for page data retrieved from the target.
20634  *		buflen - size of page to be retrieved.
20635  *		save_page - boolean to determin if SP bit should be set.
20636  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20637  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20638  *			to use the USCSI "direct" chain and bypass the normal
20639  *			command waitq.
20640  *
20641  * Return Code: 0   - Success
20642  *		errno return code from sd_send_scsi_cmd()
20643  *
20644  *     Context: Can sleep. Does not return until command is completed.
20645  */
20646 
20647 static int
20648 sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20649 	size_t buflen,  uchar_t save_page, int path_flag)
20650 {
20651 	struct	scsi_extended_sense	sense_buf;
20652 	union scsi_cdb		cdb;
20653 	struct uscsi_cmd	ucmd_buf;
20654 	int			status;
20655 
20656 	ASSERT(un != NULL);
20657 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20658 	ASSERT(bufaddr != NULL);
20659 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20660 	    (cdbsize == CDB_GROUP2));
20661 
20662 	SD_TRACE(SD_LOG_IO, un,
20663 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
20664 
20665 	bzero(&cdb, sizeof (cdb));
20666 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20667 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20668 
20669 	/* Set the PF bit for many third party drives */
20670 	cdb.cdb_opaque[1] = 0x10;
20671 
20672 	/* Set the savepage(SP) bit if given */
20673 	if (save_page == SD_SAVE_PAGE) {
20674 		cdb.cdb_opaque[1] |= 0x01;
20675 	}
20676 
20677 	if (cdbsize == CDB_GROUP0) {
20678 		cdb.scc_cmd = SCMD_MODE_SELECT;
20679 		FORMG0COUNT(&cdb, buflen);
20680 	} else {
20681 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
20682 		FORMG1COUNT(&cdb, buflen);
20683 	}
20684 
20685 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20686 
20687 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20688 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20689 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20690 	ucmd_buf.uscsi_buflen	= buflen;
20691 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20692 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20693 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20694 	ucmd_buf.uscsi_timeout	= 60;
20695 
20696 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20697 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20698 
20699 	switch (status) {
20700 	case 0:
20701 		break;	/* Success! */
20702 	case EIO:
20703 		switch (ucmd_buf.uscsi_status) {
20704 		case STATUS_RESERVATION_CONFLICT:
20705 			status = EACCES;
20706 			break;
20707 		default:
20708 			break;
20709 		}
20710 		break;
20711 	default:
20712 		break;
20713 	}
20714 
20715 	if (status == 0) {
20716 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
20717 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20718 	}
20719 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
20720 
20721 	return (status);
20722 }
20723 
20724 
20725 /*
20726  *    Function: sd_send_scsi_RDWR
20727  *
20728  * Description: Issue a scsi READ or WRITE command with the given parameters.
20729  *
20730  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20731  *		cmd:	 SCMD_READ or SCMD_WRITE
20732  *		bufaddr: Address of caller's buffer to receive the RDWR data
20733  *		buflen:  Length of caller's buffer receive the RDWR data.
20734  *		start_block: Block number for the start of the RDWR operation.
20735  *			 (Assumes target-native block size.)
20736  *		residp:  Pointer to variable to receive the redisual of the
20737  *			 RDWR operation (may be NULL of no residual requested).
20738  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20739  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20740  *			to use the USCSI "direct" chain and bypass the normal
20741  *			command waitq.
20742  *
20743  * Return Code: 0   - Success
20744  *		errno return code from sd_send_scsi_cmd()
20745  *
20746  *     Context: Can sleep. Does not return until command is completed.
20747  */
20748 
20749 static int
20750 sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
20751 	size_t buflen, daddr_t start_block, int path_flag)
20752 {
20753 	struct	scsi_extended_sense	sense_buf;
20754 	union scsi_cdb		cdb;
20755 	struct uscsi_cmd	ucmd_buf;
20756 	uint32_t		block_count;
20757 	int			status;
20758 	int			cdbsize;
20759 	uchar_t			flag;
20760 
20761 	ASSERT(un != NULL);
20762 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20763 	ASSERT(bufaddr != NULL);
20764 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
20765 
20766 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
20767 
20768 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
20769 		return (EINVAL);
20770 	}
20771 
20772 	mutex_enter(SD_MUTEX(un));
20773 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
20774 	mutex_exit(SD_MUTEX(un));
20775 
20776 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
20777 
20778 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
20779 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
20780 	    bufaddr, buflen, start_block, block_count);
20781 
20782 	bzero(&cdb, sizeof (cdb));
20783 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20784 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20785 
20786 	/* Compute CDB size to use */
20787 	if (start_block > 0xffffffff)
20788 		cdbsize = CDB_GROUP4;
20789 	else if ((start_block & 0xFFE00000) ||
20790 	    (un->un_f_cfg_is_atapi == TRUE))
20791 		cdbsize = CDB_GROUP1;
20792 	else
20793 		cdbsize = CDB_GROUP0;
20794 
20795 	switch (cdbsize) {
20796 	case CDB_GROUP0:	/* 6-byte CDBs */
20797 		cdb.scc_cmd = cmd;
20798 		FORMG0ADDR(&cdb, start_block);
20799 		FORMG0COUNT(&cdb, block_count);
20800 		break;
20801 	case CDB_GROUP1:	/* 10-byte CDBs */
20802 		cdb.scc_cmd = cmd | SCMD_GROUP1;
20803 		FORMG1ADDR(&cdb, start_block);
20804 		FORMG1COUNT(&cdb, block_count);
20805 		break;
20806 	case CDB_GROUP4:	/* 16-byte CDBs */
20807 		cdb.scc_cmd = cmd | SCMD_GROUP4;
20808 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
20809 		FORMG4COUNT(&cdb, block_count);
20810 		break;
20811 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
20812 	default:
20813 		/* All others reserved */
20814 		return (EINVAL);
20815 	}
20816 
20817 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
20818 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20819 
20820 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20821 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20822 	ucmd_buf.uscsi_bufaddr	= bufaddr;
20823 	ucmd_buf.uscsi_buflen	= buflen;
20824 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20825 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20826 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
20827 	ucmd_buf.uscsi_timeout	= 60;
20828 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20829 				UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20830 	switch (status) {
20831 	case 0:
20832 		break;	/* Success! */
20833 	case EIO:
20834 		switch (ucmd_buf.uscsi_status) {
20835 		case STATUS_RESERVATION_CONFLICT:
20836 			status = EACCES;
20837 			break;
20838 		default:
20839 			break;
20840 		}
20841 		break;
20842 	default:
20843 		break;
20844 	}
20845 
20846 	if (status == 0) {
20847 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
20848 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20849 	}
20850 
20851 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
20852 
20853 	return (status);
20854 }
20855 
20856 
20857 /*
20858  *    Function: sd_send_scsi_LOG_SENSE
20859  *
20860  * Description: Issue a scsi LOG_SENSE command with the given parameters.
20861  *
20862  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20863  *
20864  * Return Code: 0   - Success
20865  *		errno return code from sd_send_scsi_cmd()
20866  *
20867  *     Context: Can sleep. Does not return until command is completed.
20868  */
20869 
20870 static int
20871 sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr, uint16_t buflen,
20872 	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
20873 	int path_flag)
20874 
20875 {
20876 	struct	scsi_extended_sense	sense_buf;
20877 	union scsi_cdb		cdb;
20878 	struct uscsi_cmd	ucmd_buf;
20879 	int			status;
20880 
20881 	ASSERT(un != NULL);
20882 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20883 
20884 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
20885 
20886 	bzero(&cdb, sizeof (cdb));
20887 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20888 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20889 
20890 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
20891 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
20892 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
20893 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
20894 	FORMG1COUNT(&cdb, buflen);
20895 
20896 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20897 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20898 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20899 	ucmd_buf.uscsi_buflen	= buflen;
20900 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20901 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20902 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20903 	ucmd_buf.uscsi_timeout	= 60;
20904 
20905 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20906 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20907 
20908 	switch (status) {
20909 	case 0:
20910 		break;
20911 	case EIO:
20912 		switch (ucmd_buf.uscsi_status) {
20913 		case STATUS_RESERVATION_CONFLICT:
20914 			status = EACCES;
20915 			break;
20916 		case STATUS_CHECK:
20917 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20918 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20919 				KEY_ILLEGAL_REQUEST) &&
20920 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
20921 				/*
20922 				 * ASC 0x24: INVALID FIELD IN CDB
20923 				 */
20924 				switch (page_code) {
20925 				case START_STOP_CYCLE_PAGE:
20926 					/*
20927 					 * The start stop cycle counter is
20928 					 * implemented as page 0x31 in earlier
20929 					 * generation disks. In new generation
20930 					 * disks the start stop cycle counter is
20931 					 * implemented as page 0xE. To properly
20932 					 * handle this case if an attempt for
20933 					 * log page 0xE is made and fails we
20934 					 * will try again using page 0x31.
20935 					 *
20936 					 * Network storage BU committed to
20937 					 * maintain the page 0x31 for this
20938 					 * purpose and will not have any other
20939 					 * page implemented with page code 0x31
20940 					 * until all disks transition to the
20941 					 * standard page.
20942 					 */
20943 					mutex_enter(SD_MUTEX(un));
20944 					un->un_start_stop_cycle_page =
20945 					    START_STOP_CYCLE_VU_PAGE;
20946 					cdb.cdb_opaque[2] =
20947 					    (char)(page_control << 6) |
20948 					    un->un_start_stop_cycle_page;
20949 					mutex_exit(SD_MUTEX(un));
20950 					status = sd_send_scsi_cmd(
20951 					    SD_GET_DEV(un), &ucmd_buf,
20952 					    UIO_SYSSPACE, UIO_SYSSPACE,
20953 					    UIO_SYSSPACE, path_flag);
20954 
20955 					break;
20956 				case TEMPERATURE_PAGE:
20957 					status = ENOTTY;
20958 					break;
20959 				default:
20960 					break;
20961 				}
20962 			}
20963 			break;
20964 		default:
20965 			break;
20966 		}
20967 		break;
20968 	default:
20969 		break;
20970 	}
20971 
20972 	if (status == 0) {
20973 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
20974 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20975 	}
20976 
20977 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
20978 
20979 	return (status);
20980 }
20981 
20982 
20983 /*
20984  *    Function: sdioctl
20985  *
20986  * Description: Driver's ioctl(9e) entry point function.
20987  *
20988  *   Arguments: dev     - device number
20989  *		cmd     - ioctl operation to be performed
20990  *		arg     - user argument, contains data to be set or reference
20991  *			  parameter for get
20992  *		flag    - bit flag, indicating open settings, 32/64 bit type
20993  *		cred_p  - user credential pointer
20994  *		rval_p  - calling process return value (OPT)
20995  *
20996  * Return Code: EINVAL
20997  *		ENOTTY
20998  *		ENXIO
20999  *		EIO
21000  *		EFAULT
21001  *		ENOTSUP
21002  *		EPERM
21003  *
21004  *     Context: Called from the device switch at normal priority.
21005  */
21006 
21007 static int
21008 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
21009 {
21010 	struct sd_lun	*un = NULL;
21011 	int		geom_validated = FALSE;
21012 	int		err = 0;
21013 	int		i = 0;
21014 	cred_t		*cr;
21015 
21016 	/*
21017 	 * All device accesses go thru sdstrategy where we check on suspend
21018 	 * status
21019 	 */
21020 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21021 		return (ENXIO);
21022 	}
21023 
21024 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21025 
21026 	/*
21027 	 * Moved this wait from sd_uscsi_strategy to here for
21028 	 * reasons of deadlock prevention. Internal driver commands,
21029 	 * specifically those to change a devices power level, result
21030 	 * in a call to sd_uscsi_strategy.
21031 	 */
21032 	mutex_enter(SD_MUTEX(un));
21033 	while ((un->un_state == SD_STATE_SUSPENDED) ||
21034 	    (un->un_state == SD_STATE_PM_CHANGING)) {
21035 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
21036 	}
21037 	/*
21038 	 * Twiddling the counter here protects commands from now
21039 	 * through to the top of sd_uscsi_strategy. Without the
21040 	 * counter inc. a power down, for example, could get in
21041 	 * after the above check for state is made and before
21042 	 * execution gets to the top of sd_uscsi_strategy.
21043 	 * That would cause problems.
21044 	 */
21045 	un->un_ncmds_in_driver++;
21046 
21047 	if ((un->un_f_geometry_is_valid == FALSE) &&
21048 	    (flag & (FNDELAY | FNONBLOCK))) {
21049 		switch (cmd) {
21050 		case CDROMPAUSE:
21051 		case CDROMRESUME:
21052 		case CDROMPLAYMSF:
21053 		case CDROMPLAYTRKIND:
21054 		case CDROMREADTOCHDR:
21055 		case CDROMREADTOCENTRY:
21056 		case CDROMSTOP:
21057 		case CDROMSTART:
21058 		case CDROMVOLCTRL:
21059 		case CDROMSUBCHNL:
21060 		case CDROMREADMODE2:
21061 		case CDROMREADMODE1:
21062 		case CDROMREADOFFSET:
21063 		case CDROMSBLKMODE:
21064 		case CDROMGBLKMODE:
21065 		case CDROMGDRVSPEED:
21066 		case CDROMSDRVSPEED:
21067 		case CDROMCDDA:
21068 		case CDROMCDXA:
21069 		case CDROMSUBCODE:
21070 			if (!ISCD(un)) {
21071 				un->un_ncmds_in_driver--;
21072 				ASSERT(un->un_ncmds_in_driver >= 0);
21073 				mutex_exit(SD_MUTEX(un));
21074 				return (ENOTTY);
21075 			}
21076 			break;
21077 		case FDEJECT:
21078 		case DKIOCEJECT:
21079 		case CDROMEJECT:
21080 			if (!un->un_f_eject_media_supported) {
21081 				un->un_ncmds_in_driver--;
21082 				ASSERT(un->un_ncmds_in_driver >= 0);
21083 				mutex_exit(SD_MUTEX(un));
21084 				return (ENOTTY);
21085 			}
21086 			break;
21087 		case DKIOCSVTOC:
21088 		case DKIOCSETEFI:
21089 		case DKIOCSMBOOT:
21090 		case DKIOCFLUSHWRITECACHE:
21091 			mutex_exit(SD_MUTEX(un));
21092 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
21093 			if (err != 0) {
21094 				mutex_enter(SD_MUTEX(un));
21095 				un->un_ncmds_in_driver--;
21096 				ASSERT(un->un_ncmds_in_driver >= 0);
21097 				mutex_exit(SD_MUTEX(un));
21098 				return (EIO);
21099 			}
21100 			mutex_enter(SD_MUTEX(un));
21101 			/* FALLTHROUGH */
21102 		case DKIOCREMOVABLE:
21103 		case DKIOCHOTPLUGGABLE:
21104 		case DKIOCINFO:
21105 		case DKIOCGMEDIAINFO:
21106 		case MHIOCENFAILFAST:
21107 		case MHIOCSTATUS:
21108 		case MHIOCTKOWN:
21109 		case MHIOCRELEASE:
21110 		case MHIOCGRP_INKEYS:
21111 		case MHIOCGRP_INRESV:
21112 		case MHIOCGRP_REGISTER:
21113 		case MHIOCGRP_RESERVE:
21114 		case MHIOCGRP_PREEMPTANDABORT:
21115 		case MHIOCGRP_REGISTERANDIGNOREKEY:
21116 		case CDROMCLOSETRAY:
21117 		case USCSICMD:
21118 			goto skip_ready_valid;
21119 		default:
21120 			break;
21121 		}
21122 
21123 		mutex_exit(SD_MUTEX(un));
21124 		err = sd_ready_and_valid(un);
21125 		mutex_enter(SD_MUTEX(un));
21126 		if (err == SD_READY_NOT_VALID) {
21127 			switch (cmd) {
21128 			case DKIOCGAPART:
21129 			case DKIOCGGEOM:
21130 			case DKIOCSGEOM:
21131 			case DKIOCGVTOC:
21132 			case DKIOCSVTOC:
21133 			case DKIOCSAPART:
21134 			case DKIOCG_PHYGEOM:
21135 			case DKIOCG_VIRTGEOM:
21136 				err = ENOTSUP;
21137 				un->un_ncmds_in_driver--;
21138 				ASSERT(un->un_ncmds_in_driver >= 0);
21139 				mutex_exit(SD_MUTEX(un));
21140 				return (err);
21141 			}
21142 		}
21143 		if (err != SD_READY_VALID) {
21144 			switch (cmd) {
21145 			case DKIOCSTATE:
21146 			case CDROMGDRVSPEED:
21147 			case CDROMSDRVSPEED:
21148 			case FDEJECT:	/* for eject command */
21149 			case DKIOCEJECT:
21150 			case CDROMEJECT:
21151 			case DKIOCGETEFI:
21152 			case DKIOCSGEOM:
21153 			case DKIOCREMOVABLE:
21154 			case DKIOCHOTPLUGGABLE:
21155 			case DKIOCSAPART:
21156 			case DKIOCSETEFI:
21157 				break;
21158 			default:
21159 				if (un->un_f_has_removable_media) {
21160 					err = ENXIO;
21161 				} else {
21162 					/* Do not map EACCES to EIO */
21163 					if (err != EACCES)
21164 						err = EIO;
21165 				}
21166 				un->un_ncmds_in_driver--;
21167 				ASSERT(un->un_ncmds_in_driver >= 0);
21168 				mutex_exit(SD_MUTEX(un));
21169 				return (err);
21170 			}
21171 		}
21172 		geom_validated = TRUE;
21173 	}
21174 	if ((un->un_f_geometry_is_valid == TRUE) &&
21175 	    (un->un_solaris_size > 0)) {
21176 		/*
21177 		 * the "geometry_is_valid" flag could be true if we
21178 		 * have an fdisk table but no Solaris partition
21179 		 */
21180 		if (un->un_vtoc.v_sanity != VTOC_SANE) {
21181 			/* it is EFI, so return ENOTSUP for these */
21182 			switch (cmd) {
21183 			case DKIOCGAPART:
21184 			case DKIOCGGEOM:
21185 			case DKIOCGVTOC:
21186 			case DKIOCSVTOC:
21187 			case DKIOCSAPART:
21188 				err = ENOTSUP;
21189 				un->un_ncmds_in_driver--;
21190 				ASSERT(un->un_ncmds_in_driver >= 0);
21191 				mutex_exit(SD_MUTEX(un));
21192 				return (err);
21193 			}
21194 		}
21195 	}
21196 
21197 skip_ready_valid:
21198 	mutex_exit(SD_MUTEX(un));
21199 
21200 	switch (cmd) {
21201 	case DKIOCINFO:
21202 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
21203 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
21204 		break;
21205 
21206 	case DKIOCGMEDIAINFO:
21207 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
21208 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
21209 		break;
21210 
21211 	case DKIOCGGEOM:
21212 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGGEOM\n");
21213 		err = sd_dkio_get_geometry(dev, (caddr_t)arg, flag,
21214 		    geom_validated);
21215 		break;
21216 
21217 	case DKIOCSGEOM:
21218 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSGEOM\n");
21219 		err = sd_dkio_set_geometry(dev, (caddr_t)arg, flag);
21220 		break;
21221 
21222 	case DKIOCGAPART:
21223 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGAPART\n");
21224 		err = sd_dkio_get_partition(dev, (caddr_t)arg, flag,
21225 		    geom_validated);
21226 		break;
21227 
21228 	case DKIOCSAPART:
21229 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSAPART\n");
21230 		err = sd_dkio_set_partition(dev, (caddr_t)arg, flag);
21231 		break;
21232 
21233 	case DKIOCGVTOC:
21234 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGVTOC\n");
21235 		err = sd_dkio_get_vtoc(dev, (caddr_t)arg, flag,
21236 		    geom_validated);
21237 		break;
21238 
21239 	case DKIOCGETEFI:
21240 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGETEFI\n");
21241 		err = sd_dkio_get_efi(dev, (caddr_t)arg, flag);
21242 		break;
21243 
21244 	case DKIOCPARTITION:
21245 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTITION\n");
21246 		err = sd_dkio_partition(dev, (caddr_t)arg, flag);
21247 		break;
21248 
21249 	case DKIOCSVTOC:
21250 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSVTOC\n");
21251 		err = sd_dkio_set_vtoc(dev, (caddr_t)arg, flag);
21252 		break;
21253 
21254 	case DKIOCSETEFI:
21255 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSETEFI\n");
21256 		err = sd_dkio_set_efi(dev, (caddr_t)arg, flag);
21257 		break;
21258 
21259 	case DKIOCGMBOOT:
21260 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMBOOT\n");
21261 		err = sd_dkio_get_mboot(dev, (caddr_t)arg, flag);
21262 		break;
21263 
21264 	case DKIOCSMBOOT:
21265 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSMBOOT\n");
21266 		err = sd_dkio_set_mboot(dev, (caddr_t)arg, flag);
21267 		break;
21268 
21269 	case DKIOCLOCK:
21270 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
21271 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
21272 		    SD_PATH_STANDARD);
21273 		break;
21274 
21275 	case DKIOCUNLOCK:
21276 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
21277 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
21278 		    SD_PATH_STANDARD);
21279 		break;
21280 
21281 	case DKIOCSTATE: {
21282 		enum dkio_state		state;
21283 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
21284 
21285 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
21286 			err = EFAULT;
21287 		} else {
21288 			err = sd_check_media(dev, state);
21289 			if (err == 0) {
21290 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
21291 				    sizeof (int), flag) != 0)
21292 					err = EFAULT;
21293 			}
21294 		}
21295 		break;
21296 	}
21297 
21298 	case DKIOCREMOVABLE:
21299 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
21300 		/*
21301 		 * At present, vold only does automount for removable-media
21302 		 * devices, in order not to break current applications, we
21303 		 * still let hopluggable devices pretend to be removable media
21304 		 * devices for vold. In the near future, once vold is EOL'ed,
21305 		 * we should remove this workaround.
21306 		 */
21307 		if (un->un_f_has_removable_media || un->un_f_is_hotpluggable) {
21308 			i = 1;
21309 		} else {
21310 			i = 0;
21311 		}
21312 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21313 			err = EFAULT;
21314 		} else {
21315 			err = 0;
21316 		}
21317 		break;
21318 
21319 	case DKIOCHOTPLUGGABLE:
21320 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
21321 		if (un->un_f_is_hotpluggable) {
21322 			i = 1;
21323 		} else {
21324 			i = 0;
21325 		}
21326 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21327 			err = EFAULT;
21328 		} else {
21329 			err = 0;
21330 		}
21331 		break;
21332 
21333 	case DKIOCGTEMPERATURE:
21334 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
21335 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
21336 		break;
21337 
21338 	case MHIOCENFAILFAST:
21339 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
21340 		if ((err = drv_priv(cred_p)) == 0) {
21341 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
21342 		}
21343 		break;
21344 
21345 	case MHIOCTKOWN:
21346 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
21347 		if ((err = drv_priv(cred_p)) == 0) {
21348 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
21349 		}
21350 		break;
21351 
21352 	case MHIOCRELEASE:
21353 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
21354 		if ((err = drv_priv(cred_p)) == 0) {
21355 			err = sd_mhdioc_release(dev);
21356 		}
21357 		break;
21358 
21359 	case MHIOCSTATUS:
21360 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
21361 		if ((err = drv_priv(cred_p)) == 0) {
21362 			switch (sd_send_scsi_TEST_UNIT_READY(un, 0)) {
21363 			case 0:
21364 				err = 0;
21365 				break;
21366 			case EACCES:
21367 				*rval_p = 1;
21368 				err = 0;
21369 				break;
21370 			default:
21371 				err = EIO;
21372 				break;
21373 			}
21374 		}
21375 		break;
21376 
21377 	case MHIOCQRESERVE:
21378 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
21379 		if ((err = drv_priv(cred_p)) == 0) {
21380 			err = sd_reserve_release(dev, SD_RESERVE);
21381 		}
21382 		break;
21383 
21384 	case MHIOCREREGISTERDEVID:
21385 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
21386 		if (drv_priv(cred_p) == EPERM) {
21387 			err = EPERM;
21388 		} else if (!un->un_f_devid_supported) {
21389 			err = ENOTTY;
21390 		} else {
21391 			err = sd_mhdioc_register_devid(dev);
21392 		}
21393 		break;
21394 
21395 	case MHIOCGRP_INKEYS:
21396 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
21397 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21398 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21399 				err = ENOTSUP;
21400 			} else {
21401 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
21402 				    flag);
21403 			}
21404 		}
21405 		break;
21406 
21407 	case MHIOCGRP_INRESV:
21408 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
21409 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21410 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21411 				err = ENOTSUP;
21412 			} else {
21413 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
21414 			}
21415 		}
21416 		break;
21417 
21418 	case MHIOCGRP_REGISTER:
21419 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
21420 		if ((err = drv_priv(cred_p)) != EPERM) {
21421 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21422 				err = ENOTSUP;
21423 			} else if (arg != NULL) {
21424 				mhioc_register_t reg;
21425 				if (ddi_copyin((void *)arg, &reg,
21426 				    sizeof (mhioc_register_t), flag) != 0) {
21427 					err = EFAULT;
21428 				} else {
21429 					err =
21430 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21431 					    un, SD_SCSI3_REGISTER,
21432 					    (uchar_t *)&reg);
21433 				}
21434 			}
21435 		}
21436 		break;
21437 
21438 	case MHIOCGRP_RESERVE:
21439 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
21440 		if ((err = drv_priv(cred_p)) != EPERM) {
21441 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21442 				err = ENOTSUP;
21443 			} else if (arg != NULL) {
21444 				mhioc_resv_desc_t resv_desc;
21445 				if (ddi_copyin((void *)arg, &resv_desc,
21446 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
21447 					err = EFAULT;
21448 				} else {
21449 					err =
21450 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21451 					    un, SD_SCSI3_RESERVE,
21452 					    (uchar_t *)&resv_desc);
21453 				}
21454 			}
21455 		}
21456 		break;
21457 
21458 	case MHIOCGRP_PREEMPTANDABORT:
21459 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21460 		if ((err = drv_priv(cred_p)) != EPERM) {
21461 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21462 				err = ENOTSUP;
21463 			} else if (arg != NULL) {
21464 				mhioc_preemptandabort_t preempt_abort;
21465 				if (ddi_copyin((void *)arg, &preempt_abort,
21466 				    sizeof (mhioc_preemptandabort_t),
21467 				    flag) != 0) {
21468 					err = EFAULT;
21469 				} else {
21470 					err =
21471 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21472 					    un, SD_SCSI3_PREEMPTANDABORT,
21473 					    (uchar_t *)&preempt_abort);
21474 				}
21475 			}
21476 		}
21477 		break;
21478 
21479 	case MHIOCGRP_REGISTERANDIGNOREKEY:
21480 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21481 		if ((err = drv_priv(cred_p)) != EPERM) {
21482 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21483 				err = ENOTSUP;
21484 			} else if (arg != NULL) {
21485 				mhioc_registerandignorekey_t r_and_i;
21486 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
21487 				    sizeof (mhioc_registerandignorekey_t),
21488 				    flag) != 0) {
21489 					err = EFAULT;
21490 				} else {
21491 					err =
21492 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21493 					    un, SD_SCSI3_REGISTERANDIGNOREKEY,
21494 					    (uchar_t *)&r_and_i);
21495 				}
21496 			}
21497 		}
21498 		break;
21499 
21500 	case USCSICMD:
21501 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
21502 		cr = ddi_get_cred();
21503 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
21504 			err = EPERM;
21505 		} else {
21506 			err = sd_uscsi_ioctl(dev, (caddr_t)arg, flag);
21507 		}
21508 		break;
21509 
21510 	case CDROMPAUSE:
21511 	case CDROMRESUME:
21512 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
21513 		if (!ISCD(un)) {
21514 			err = ENOTTY;
21515 		} else {
21516 			err = sr_pause_resume(dev, cmd);
21517 		}
21518 		break;
21519 
21520 	case CDROMPLAYMSF:
21521 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
21522 		if (!ISCD(un)) {
21523 			err = ENOTTY;
21524 		} else {
21525 			err = sr_play_msf(dev, (caddr_t)arg, flag);
21526 		}
21527 		break;
21528 
21529 	case CDROMPLAYTRKIND:
21530 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
21531 #if defined(__i386) || defined(__amd64)
21532 		/*
21533 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
21534 		 */
21535 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21536 #else
21537 		if (!ISCD(un)) {
21538 #endif
21539 			err = ENOTTY;
21540 		} else {
21541 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
21542 		}
21543 		break;
21544 
21545 	case CDROMREADTOCHDR:
21546 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
21547 		if (!ISCD(un)) {
21548 			err = ENOTTY;
21549 		} else {
21550 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
21551 		}
21552 		break;
21553 
21554 	case CDROMREADTOCENTRY:
21555 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
21556 		if (!ISCD(un)) {
21557 			err = ENOTTY;
21558 		} else {
21559 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
21560 		}
21561 		break;
21562 
21563 	case CDROMSTOP:
21564 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
21565 		if (!ISCD(un)) {
21566 			err = ENOTTY;
21567 		} else {
21568 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_STOP,
21569 			    SD_PATH_STANDARD);
21570 		}
21571 		break;
21572 
21573 	case CDROMSTART:
21574 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
21575 		if (!ISCD(un)) {
21576 			err = ENOTTY;
21577 		} else {
21578 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
21579 			    SD_PATH_STANDARD);
21580 		}
21581 		break;
21582 
21583 	case CDROMCLOSETRAY:
21584 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
21585 		if (!ISCD(un)) {
21586 			err = ENOTTY;
21587 		} else {
21588 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_CLOSE,
21589 			    SD_PATH_STANDARD);
21590 		}
21591 		break;
21592 
21593 	case FDEJECT:	/* for eject command */
21594 	case DKIOCEJECT:
21595 	case CDROMEJECT:
21596 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
21597 		if (!un->un_f_eject_media_supported) {
21598 			err = ENOTTY;
21599 		} else {
21600 			err = sr_eject(dev);
21601 		}
21602 		break;
21603 
21604 	case CDROMVOLCTRL:
21605 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
21606 		if (!ISCD(un)) {
21607 			err = ENOTTY;
21608 		} else {
21609 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
21610 		}
21611 		break;
21612 
21613 	case CDROMSUBCHNL:
21614 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
21615 		if (!ISCD(un)) {
21616 			err = ENOTTY;
21617 		} else {
21618 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
21619 		}
21620 		break;
21621 
21622 	case CDROMREADMODE2:
21623 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
21624 		if (!ISCD(un)) {
21625 			err = ENOTTY;
21626 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21627 			/*
21628 			 * If the drive supports READ CD, use that instead of
21629 			 * switching the LBA size via a MODE SELECT
21630 			 * Block Descriptor
21631 			 */
21632 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
21633 		} else {
21634 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
21635 		}
21636 		break;
21637 
21638 	case CDROMREADMODE1:
21639 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
21640 		if (!ISCD(un)) {
21641 			err = ENOTTY;
21642 		} else {
21643 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
21644 		}
21645 		break;
21646 
21647 	case CDROMREADOFFSET:
21648 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
21649 		if (!ISCD(un)) {
21650 			err = ENOTTY;
21651 		} else {
21652 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
21653 			    flag);
21654 		}
21655 		break;
21656 
21657 	case CDROMSBLKMODE:
21658 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
21659 		/*
21660 		 * There is no means of changing block size in case of atapi
21661 		 * drives, thus return ENOTTY if drive type is atapi
21662 		 */
21663 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21664 			err = ENOTTY;
21665 		} else if (un->un_f_mmc_cap == TRUE) {
21666 
21667 			/*
21668 			 * MMC Devices do not support changing the
21669 			 * logical block size
21670 			 *
21671 			 * Note: EINVAL is being returned instead of ENOTTY to
21672 			 * maintain consistancy with the original mmc
21673 			 * driver update.
21674 			 */
21675 			err = EINVAL;
21676 		} else {
21677 			mutex_enter(SD_MUTEX(un));
21678 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
21679 			    (un->un_ncmds_in_transport > 0)) {
21680 				mutex_exit(SD_MUTEX(un));
21681 				err = EINVAL;
21682 			} else {
21683 				mutex_exit(SD_MUTEX(un));
21684 				err = sr_change_blkmode(dev, cmd, arg, flag);
21685 			}
21686 		}
21687 		break;
21688 
21689 	case CDROMGBLKMODE:
21690 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
21691 		if (!ISCD(un)) {
21692 			err = ENOTTY;
21693 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
21694 		    (un->un_f_blockcount_is_valid != FALSE)) {
21695 			/*
21696 			 * Drive is an ATAPI drive so return target block
21697 			 * size for ATAPI drives since we cannot change the
21698 			 * blocksize on ATAPI drives. Used primarily to detect
21699 			 * if an ATAPI cdrom is present.
21700 			 */
21701 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
21702 			    sizeof (int), flag) != 0) {
21703 				err = EFAULT;
21704 			} else {
21705 				err = 0;
21706 			}
21707 
21708 		} else {
21709 			/*
21710 			 * Drive supports changing block sizes via a Mode
21711 			 * Select.
21712 			 */
21713 			err = sr_change_blkmode(dev, cmd, arg, flag);
21714 		}
21715 		break;
21716 
21717 	case CDROMGDRVSPEED:
21718 	case CDROMSDRVSPEED:
21719 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
21720 		if (!ISCD(un)) {
21721 			err = ENOTTY;
21722 		} else if (un->un_f_mmc_cap == TRUE) {
21723 			/*
21724 			 * Note: In the future the driver implementation
21725 			 * for getting and
21726 			 * setting cd speed should entail:
21727 			 * 1) If non-mmc try the Toshiba mode page
21728 			 *    (sr_change_speed)
21729 			 * 2) If mmc but no support for Real Time Streaming try
21730 			 *    the SET CD SPEED (0xBB) command
21731 			 *   (sr_atapi_change_speed)
21732 			 * 3) If mmc and support for Real Time Streaming
21733 			 *    try the GET PERFORMANCE and SET STREAMING
21734 			 *    commands (not yet implemented, 4380808)
21735 			 */
21736 			/*
21737 			 * As per recent MMC spec, CD-ROM speed is variable
21738 			 * and changes with LBA. Since there is no such
21739 			 * things as drive speed now, fail this ioctl.
21740 			 *
21741 			 * Note: EINVAL is returned for consistancy of original
21742 			 * implementation which included support for getting
21743 			 * the drive speed of mmc devices but not setting
21744 			 * the drive speed. Thus EINVAL would be returned
21745 			 * if a set request was made for an mmc device.
21746 			 * We no longer support get or set speed for
21747 			 * mmc but need to remain consistant with regard
21748 			 * to the error code returned.
21749 			 */
21750 			err = EINVAL;
21751 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21752 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
21753 		} else {
21754 			err = sr_change_speed(dev, cmd, arg, flag);
21755 		}
21756 		break;
21757 
21758 	case CDROMCDDA:
21759 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
21760 		if (!ISCD(un)) {
21761 			err = ENOTTY;
21762 		} else {
21763 			err = sr_read_cdda(dev, (void *)arg, flag);
21764 		}
21765 		break;
21766 
21767 	case CDROMCDXA:
21768 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
21769 		if (!ISCD(un)) {
21770 			err = ENOTTY;
21771 		} else {
21772 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
21773 		}
21774 		break;
21775 
21776 	case CDROMSUBCODE:
21777 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
21778 		if (!ISCD(un)) {
21779 			err = ENOTTY;
21780 		} else {
21781 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
21782 		}
21783 		break;
21784 
21785 	case DKIOCPARTINFO: {
21786 		/*
21787 		 * Return parameters describing the selected disk slice.
21788 		 * Note: this ioctl is for the intel platform only
21789 		 */
21790 #if defined(__i386) || defined(__amd64)
21791 		int part;
21792 
21793 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21794 		part = SDPART(dev);
21795 
21796 		/* don't check un_solaris_size for pN */
21797 		if (part < P0_RAW_DISK && un->un_solaris_size == 0) {
21798 			err = EIO;
21799 		} else {
21800 			struct part_info p;
21801 
21802 			p.p_start = (daddr_t)un->un_offset[part];
21803 			p.p_length = (int)un->un_map[part].dkl_nblk;
21804 #ifdef _MULTI_DATAMODEL
21805 			switch (ddi_model_convert_from(flag & FMODELS)) {
21806 			case DDI_MODEL_ILP32:
21807 			{
21808 				struct part_info32 p32;
21809 
21810 				p32.p_start = (daddr32_t)p.p_start;
21811 				p32.p_length = p.p_length;
21812 				if (ddi_copyout(&p32, (void *)arg,
21813 				    sizeof (p32), flag))
21814 					err = EFAULT;
21815 				break;
21816 			}
21817 
21818 			case DDI_MODEL_NONE:
21819 			{
21820 				if (ddi_copyout(&p, (void *)arg, sizeof (p),
21821 				    flag))
21822 					err = EFAULT;
21823 				break;
21824 			}
21825 			}
21826 #else /* ! _MULTI_DATAMODEL */
21827 			if (ddi_copyout(&p, (void *)arg, sizeof (p), flag))
21828 				err = EFAULT;
21829 #endif /* _MULTI_DATAMODEL */
21830 		}
21831 #else
21832 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21833 		err = ENOTTY;
21834 #endif
21835 		break;
21836 	}
21837 
21838 	case DKIOCG_PHYGEOM: {
21839 		/* Return the driver's notion of the media physical geometry */
21840 #if defined(__i386) || defined(__amd64)
21841 		uint64_t	capacity;
21842 		struct dk_geom	disk_geom;
21843 		struct dk_geom	*dkgp = &disk_geom;
21844 
21845 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21846 		mutex_enter(SD_MUTEX(un));
21847 
21848 		if (un->un_g.dkg_nhead != 0 &&
21849 		    un->un_g.dkg_nsect != 0) {
21850 			/*
21851 			 * We succeeded in getting a geometry, but
21852 			 * right now it is being reported as just the
21853 			 * Solaris fdisk partition, just like for
21854 			 * DKIOCGGEOM. We need to change that to be
21855 			 * correct for the entire disk now.
21856 			 */
21857 			bcopy(&un->un_g, dkgp, sizeof (*dkgp));
21858 			dkgp->dkg_acyl = 0;
21859 			dkgp->dkg_ncyl = un->un_blockcount /
21860 			    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21861 		} else {
21862 			bzero(dkgp, sizeof (struct dk_geom));
21863 			/*
21864 			 * This disk does not have a Solaris VTOC
21865 			 * so we must present a physical geometry
21866 			 * that will remain consistent regardless
21867 			 * of how the disk is used. This will ensure
21868 			 * that the geometry does not change regardless
21869 			 * of the fdisk partition type (ie. EFI, FAT32,
21870 			 * Solaris, etc).
21871 			 */
21872 			if (ISCD(un)) {
21873 				dkgp->dkg_nhead = un->un_pgeom.g_nhead;
21874 				dkgp->dkg_nsect = un->un_pgeom.g_nsect;
21875 				dkgp->dkg_ncyl = un->un_pgeom.g_ncyl;
21876 				dkgp->dkg_acyl = un->un_pgeom.g_acyl;
21877 			} else {
21878 				/*
21879 				 * Invalid un_blockcount can generate invalid
21880 				 * dk_geom and may result in division by zero
21881 				 * system failure. Should make sure blockcount
21882 				 * is valid before using it here.
21883 				 */
21884 				if (un->un_f_blockcount_is_valid == FALSE) {
21885 					mutex_exit(SD_MUTEX(un));
21886 					err = EIO;
21887 
21888 					break;
21889 				}
21890 
21891 				/*
21892 				 * Refer to comments related to off-by-1 at the
21893 				 * header of this file
21894 				 */
21895 				if (!un->un_f_capacity_adjusted &&
21896 					!un->un_f_has_removable_media &&
21897 				    !un->un_f_is_hotpluggable &&
21898 					(un->un_tgt_blocksize ==
21899 					un->un_sys_blocksize))
21900 					capacity = un->un_blockcount - 1;
21901 				else
21902 					capacity = un->un_blockcount;
21903 
21904 				sd_convert_geometry(capacity, dkgp);
21905 				dkgp->dkg_acyl = 0;
21906 				dkgp->dkg_ncyl = capacity /
21907 				    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21908 			}
21909 		}
21910 		dkgp->dkg_pcyl = dkgp->dkg_ncyl + dkgp->dkg_acyl;
21911 
21912 		if (ddi_copyout(dkgp, (void *)arg,
21913 		    sizeof (struct dk_geom), flag)) {
21914 			mutex_exit(SD_MUTEX(un));
21915 			err = EFAULT;
21916 		} else {
21917 			mutex_exit(SD_MUTEX(un));
21918 			err = 0;
21919 		}
21920 #else
21921 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21922 		err = ENOTTY;
21923 #endif
21924 		break;
21925 	}
21926 
21927 	case DKIOCG_VIRTGEOM: {
21928 		/* Return the driver's notion of the media's logical geometry */
21929 #if defined(__i386) || defined(__amd64)
21930 		struct dk_geom	disk_geom;
21931 		struct dk_geom	*dkgp = &disk_geom;
21932 
21933 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21934 		mutex_enter(SD_MUTEX(un));
21935 		/*
21936 		 * If there is no HBA geometry available, or
21937 		 * if the HBA returned us something that doesn't
21938 		 * really fit into an Int 13/function 8 geometry
21939 		 * result, just fail the ioctl.  See PSARC 1998/313.
21940 		 */
21941 		if (un->un_lgeom.g_nhead == 0 ||
21942 		    un->un_lgeom.g_nsect == 0 ||
21943 		    un->un_lgeom.g_ncyl > 1024) {
21944 			mutex_exit(SD_MUTEX(un));
21945 			err = EINVAL;
21946 		} else {
21947 			dkgp->dkg_ncyl	= un->un_lgeom.g_ncyl;
21948 			dkgp->dkg_acyl	= un->un_lgeom.g_acyl;
21949 			dkgp->dkg_pcyl	= dkgp->dkg_ncyl + dkgp->dkg_acyl;
21950 			dkgp->dkg_nhead	= un->un_lgeom.g_nhead;
21951 			dkgp->dkg_nsect	= un->un_lgeom.g_nsect;
21952 
21953 			if (ddi_copyout(dkgp, (void *)arg,
21954 			    sizeof (struct dk_geom), flag)) {
21955 				mutex_exit(SD_MUTEX(un));
21956 				err = EFAULT;
21957 			} else {
21958 				mutex_exit(SD_MUTEX(un));
21959 				err = 0;
21960 			}
21961 		}
21962 #else
21963 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21964 		err = ENOTTY;
21965 #endif
21966 		break;
21967 	}
21968 #ifdef SDDEBUG
21969 /* RESET/ABORTS testing ioctls */
21970 	case DKIOCRESET: {
21971 		int	reset_level;
21972 
21973 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
21974 			err = EFAULT;
21975 		} else {
21976 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
21977 			    "reset_level = 0x%lx\n", reset_level);
21978 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
21979 				err = 0;
21980 			} else {
21981 				err = EIO;
21982 			}
21983 		}
21984 		break;
21985 	}
21986 
21987 	case DKIOCABORT:
21988 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
21989 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
21990 			err = 0;
21991 		} else {
21992 			err = EIO;
21993 		}
21994 		break;
21995 #endif
21996 
21997 #ifdef SD_FAULT_INJECTION
21998 /* SDIOC FaultInjection testing ioctls */
21999 	case SDIOCSTART:
22000 	case SDIOCSTOP:
22001 	case SDIOCINSERTPKT:
22002 	case SDIOCINSERTXB:
22003 	case SDIOCINSERTUN:
22004 	case SDIOCINSERTARQ:
22005 	case SDIOCPUSH:
22006 	case SDIOCRETRIEVE:
22007 	case SDIOCRUN:
22008 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
22009 		    "SDIOC detected cmd:0x%X:\n", cmd);
22010 		/* call error generator */
22011 		sd_faultinjection_ioctl(cmd, arg, un);
22012 		err = 0;
22013 		break;
22014 
22015 #endif /* SD_FAULT_INJECTION */
22016 
22017 	case DKIOCFLUSHWRITECACHE:
22018 		{
22019 			struct dk_callback *dkc = (struct dk_callback *)arg;
22020 
22021 			mutex_enter(SD_MUTEX(un));
22022 			if (!un->un_f_sync_cache_supported ||
22023 			    !un->un_f_write_cache_enabled) {
22024 				err = un->un_f_sync_cache_supported ?
22025 					0 : ENOTSUP;
22026 				mutex_exit(SD_MUTEX(un));
22027 				if ((flag & FKIOCTL) && dkc != NULL &&
22028 				    dkc->dkc_callback != NULL) {
22029 					(*dkc->dkc_callback)(dkc->dkc_cookie,
22030 					    err);
22031 					/*
22032 					 * Did callback and reported error.
22033 					 * Since we did a callback, ioctl
22034 					 * should return 0.
22035 					 */
22036 					err = 0;
22037 				}
22038 				break;
22039 			}
22040 			mutex_exit(SD_MUTEX(un));
22041 
22042 			if ((flag & FKIOCTL) && dkc != NULL &&
22043 			    dkc->dkc_callback != NULL) {
22044 				/* async SYNC CACHE request */
22045 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
22046 			} else {
22047 				/* synchronous SYNC CACHE request */
22048 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
22049 			}
22050 		}
22051 		break;
22052 
22053 	case DKIOCGETWCE: {
22054 
22055 		int wce;
22056 
22057 		if ((err = sd_get_write_cache_enabled(un, &wce)) != 0) {
22058 			break;
22059 		}
22060 
22061 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
22062 			err = EFAULT;
22063 		}
22064 		break;
22065 	}
22066 
22067 	case DKIOCSETWCE: {
22068 
22069 		int wce, sync_supported;
22070 
22071 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
22072 			err = EFAULT;
22073 			break;
22074 		}
22075 
22076 		/*
22077 		 * Synchronize multiple threads trying to enable
22078 		 * or disable the cache via the un_f_wcc_cv
22079 		 * condition variable.
22080 		 */
22081 		mutex_enter(SD_MUTEX(un));
22082 
22083 		/*
22084 		 * Don't allow the cache to be enabled if the
22085 		 * config file has it disabled.
22086 		 */
22087 		if (un->un_f_opt_disable_cache && wce) {
22088 			mutex_exit(SD_MUTEX(un));
22089 			err = EINVAL;
22090 			break;
22091 		}
22092 
22093 		/*
22094 		 * Wait for write cache change in progress
22095 		 * bit to be clear before proceeding.
22096 		 */
22097 		while (un->un_f_wcc_inprog)
22098 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
22099 
22100 		un->un_f_wcc_inprog = 1;
22101 
22102 		if (un->un_f_write_cache_enabled && wce == 0) {
22103 			/*
22104 			 * Disable the write cache.  Don't clear
22105 			 * un_f_write_cache_enabled until after
22106 			 * the mode select and flush are complete.
22107 			 */
22108 			sync_supported = un->un_f_sync_cache_supported;
22109 			mutex_exit(SD_MUTEX(un));
22110 			if ((err = sd_cache_control(un, SD_CACHE_NOCHANGE,
22111 			    SD_CACHE_DISABLE)) == 0 && sync_supported) {
22112 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
22113 			}
22114 
22115 			mutex_enter(SD_MUTEX(un));
22116 			if (err == 0) {
22117 				un->un_f_write_cache_enabled = 0;
22118 			}
22119 
22120 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
22121 			/*
22122 			 * Set un_f_write_cache_enabled first, so there is
22123 			 * no window where the cache is enabled, but the
22124 			 * bit says it isn't.
22125 			 */
22126 			un->un_f_write_cache_enabled = 1;
22127 			mutex_exit(SD_MUTEX(un));
22128 
22129 			err = sd_cache_control(un, SD_CACHE_NOCHANGE,
22130 				SD_CACHE_ENABLE);
22131 
22132 			mutex_enter(SD_MUTEX(un));
22133 
22134 			if (err) {
22135 				un->un_f_write_cache_enabled = 0;
22136 			}
22137 		}
22138 
22139 		un->un_f_wcc_inprog = 0;
22140 		cv_broadcast(&un->un_wcc_cv);
22141 		mutex_exit(SD_MUTEX(un));
22142 		break;
22143 	}
22144 
22145 	default:
22146 		err = ENOTTY;
22147 		break;
22148 	}
22149 	mutex_enter(SD_MUTEX(un));
22150 	un->un_ncmds_in_driver--;
22151 	ASSERT(un->un_ncmds_in_driver >= 0);
22152 	mutex_exit(SD_MUTEX(un));
22153 
22154 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
22155 	return (err);
22156 }
22157 
22158 
22159 /*
22160  *    Function: sd_uscsi_ioctl
22161  *
22162  * Description: This routine is the driver entry point for handling USCSI ioctl
22163  *		requests (USCSICMD).
22164  *
22165  *   Arguments: dev	- the device number
22166  *		arg	- user provided scsi command
22167  *		flag	- this argument is a pass through to ddi_copyxxx()
22168  *			  directly from the mode argument of ioctl().
22169  *
22170  * Return Code: code returned by sd_send_scsi_cmd
22171  *		ENXIO
22172  *		EFAULT
22173  *		EAGAIN
22174  */
22175 
22176 static int
22177 sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag)
22178 {
22179 #ifdef _MULTI_DATAMODEL
22180 	/*
22181 	 * For use when a 32 bit app makes a call into a
22182 	 * 64 bit ioctl
22183 	 */
22184 	struct uscsi_cmd32	uscsi_cmd_32_for_64;
22185 	struct uscsi_cmd32	*ucmd32 = &uscsi_cmd_32_for_64;
22186 	model_t			model;
22187 #endif /* _MULTI_DATAMODEL */
22188 	struct uscsi_cmd	*scmd = NULL;
22189 	struct sd_lun		*un = NULL;
22190 	enum uio_seg		uioseg;
22191 	char			cdb[CDB_GROUP0];
22192 	int			rval = 0;
22193 
22194 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22195 		return (ENXIO);
22196 	}
22197 
22198 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: entry: un:0x%p\n", un);
22199 
22200 	scmd = (struct uscsi_cmd *)
22201 	    kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
22202 
22203 #ifdef _MULTI_DATAMODEL
22204 	switch (model = ddi_model_convert_from(flag & FMODELS)) {
22205 	case DDI_MODEL_ILP32:
22206 	{
22207 		if (ddi_copyin((void *)arg, ucmd32, sizeof (*ucmd32), flag)) {
22208 			rval = EFAULT;
22209 			goto done;
22210 		}
22211 		/*
22212 		 * Convert the ILP32 uscsi data from the
22213 		 * application to LP64 for internal use.
22214 		 */
22215 		uscsi_cmd32touscsi_cmd(ucmd32, scmd);
22216 		break;
22217 	}
22218 	case DDI_MODEL_NONE:
22219 		if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
22220 			rval = EFAULT;
22221 			goto done;
22222 		}
22223 		break;
22224 	}
22225 #else /* ! _MULTI_DATAMODEL */
22226 	if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
22227 		rval = EFAULT;
22228 		goto done;
22229 	}
22230 #endif /* _MULTI_DATAMODEL */
22231 
22232 	scmd->uscsi_flags &= ~USCSI_NOINTR;
22233 	uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE : UIO_USERSPACE;
22234 	if (un->un_f_format_in_progress == TRUE) {
22235 		rval = EAGAIN;
22236 		goto done;
22237 	}
22238 
22239 	/*
22240 	 * Gotta do the ddi_copyin() here on the uscsi_cdb so that
22241 	 * we will have a valid cdb[0] to test.
22242 	 */
22243 	if ((ddi_copyin(scmd->uscsi_cdb, cdb, CDB_GROUP0, flag) == 0) &&
22244 	    (cdb[0] == SCMD_FORMAT)) {
22245 		SD_TRACE(SD_LOG_IOCTL, un,
22246 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
22247 		mutex_enter(SD_MUTEX(un));
22248 		un->un_f_format_in_progress = TRUE;
22249 		mutex_exit(SD_MUTEX(un));
22250 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
22251 		    SD_PATH_STANDARD);
22252 		mutex_enter(SD_MUTEX(un));
22253 		un->un_f_format_in_progress = FALSE;
22254 		mutex_exit(SD_MUTEX(un));
22255 	} else {
22256 		SD_TRACE(SD_LOG_IOCTL, un,
22257 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
22258 		/*
22259 		 * It's OK to fall into here even if the ddi_copyin()
22260 		 * on the uscsi_cdb above fails, because sd_send_scsi_cmd()
22261 		 * does this same copyin and will return the EFAULT
22262 		 * if it fails.
22263 		 */
22264 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
22265 		    SD_PATH_STANDARD);
22266 	}
22267 #ifdef _MULTI_DATAMODEL
22268 	switch (model) {
22269 	case DDI_MODEL_ILP32:
22270 		/*
22271 		 * Convert back to ILP32 before copyout to the
22272 		 * application
22273 		 */
22274 		uscsi_cmdtouscsi_cmd32(scmd, ucmd32);
22275 		if (ddi_copyout(ucmd32, (void *)arg, sizeof (*ucmd32), flag)) {
22276 			if (rval != 0) {
22277 				rval = EFAULT;
22278 			}
22279 		}
22280 		break;
22281 	case DDI_MODEL_NONE:
22282 		if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
22283 			if (rval != 0) {
22284 				rval = EFAULT;
22285 			}
22286 		}
22287 		break;
22288 	}
22289 #else /* ! _MULTI_DATAMODE */
22290 	if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
22291 		if (rval != 0) {
22292 			rval = EFAULT;
22293 		}
22294 	}
22295 #endif /* _MULTI_DATAMODE */
22296 done:
22297 	kmem_free(scmd, sizeof (struct uscsi_cmd));
22298 
22299 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: exit: un:0x%p\n", un);
22300 
22301 	return (rval);
22302 }
22303 
22304 
22305 /*
22306  *    Function: sd_dkio_ctrl_info
22307  *
22308  * Description: This routine is the driver entry point for handling controller
22309  *		information ioctl requests (DKIOCINFO).
22310  *
22311  *   Arguments: dev  - the device number
22312  *		arg  - pointer to user provided dk_cinfo structure
22313  *		       specifying the controller type and attributes.
22314  *		flag - this argument is a pass through to ddi_copyxxx()
22315  *		       directly from the mode argument of ioctl().
22316  *
22317  * Return Code: 0
22318  *		EFAULT
22319  *		ENXIO
22320  */
22321 
22322 static int
22323 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
22324 {
22325 	struct sd_lun	*un = NULL;
22326 	struct dk_cinfo	*info;
22327 	dev_info_t	*pdip;
22328 	int		lun, tgt;
22329 
22330 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22331 		return (ENXIO);
22332 	}
22333 
22334 	info = (struct dk_cinfo *)
22335 		kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
22336 
22337 	switch (un->un_ctype) {
22338 	case CTYPE_CDROM:
22339 		info->dki_ctype = DKC_CDROM;
22340 		break;
22341 	default:
22342 		info->dki_ctype = DKC_SCSI_CCS;
22343 		break;
22344 	}
22345 	pdip = ddi_get_parent(SD_DEVINFO(un));
22346 	info->dki_cnum = ddi_get_instance(pdip);
22347 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
22348 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
22349 	} else {
22350 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
22351 		    DK_DEVLEN - 1);
22352 	}
22353 
22354 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22355 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
22356 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22357 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
22358 
22359 	/* Unit Information */
22360 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
22361 	info->dki_slave = ((tgt << 3) | lun);
22362 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
22363 	    DK_DEVLEN - 1);
22364 	info->dki_flags = DKI_FMTVOL;
22365 	info->dki_partition = SDPART(dev);
22366 
22367 	/* Max Transfer size of this device in blocks */
22368 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
22369 	info->dki_addr = 0;
22370 	info->dki_space = 0;
22371 	info->dki_prio = 0;
22372 	info->dki_vec = 0;
22373 
22374 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
22375 		kmem_free(info, sizeof (struct dk_cinfo));
22376 		return (EFAULT);
22377 	} else {
22378 		kmem_free(info, sizeof (struct dk_cinfo));
22379 		return (0);
22380 	}
22381 }
22382 
22383 
22384 /*
22385  *    Function: sd_get_media_info
22386  *
22387  * Description: This routine is the driver entry point for handling ioctl
22388  *		requests for the media type or command set profile used by the
22389  *		drive to operate on the media (DKIOCGMEDIAINFO).
22390  *
22391  *   Arguments: dev	- the device number
22392  *		arg	- pointer to user provided dk_minfo structure
22393  *			  specifying the media type, logical block size and
22394  *			  drive capacity.
22395  *		flag	- this argument is a pass through to ddi_copyxxx()
22396  *			  directly from the mode argument of ioctl().
22397  *
22398  * Return Code: 0
22399  *		EACCESS
22400  *		EFAULT
22401  *		ENXIO
22402  *		EIO
22403  */
22404 
22405 static int
22406 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
22407 {
22408 	struct sd_lun		*un = NULL;
22409 	struct uscsi_cmd	com;
22410 	struct scsi_inquiry	*sinq;
22411 	struct dk_minfo		media_info;
22412 	u_longlong_t		media_capacity;
22413 	uint64_t		capacity;
22414 	uint_t			lbasize;
22415 	uchar_t			*out_data;
22416 	uchar_t			*rqbuf;
22417 	int			rval = 0;
22418 	int			rtn;
22419 
22420 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
22421 	    (un->un_state == SD_STATE_OFFLINE)) {
22422 		return (ENXIO);
22423 	}
22424 
22425 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
22426 
22427 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
22428 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
22429 
22430 	/* Issue a TUR to determine if the drive is ready with media present */
22431 	rval = sd_send_scsi_TEST_UNIT_READY(un, SD_CHECK_FOR_MEDIA);
22432 	if (rval == ENXIO) {
22433 		goto done;
22434 	}
22435 
22436 	/* Now get configuration data */
22437 	if (ISCD(un)) {
22438 		media_info.dki_media_type = DK_CDROM;
22439 
22440 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
22441 		if (un->un_f_mmc_cap == TRUE) {
22442 			rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf,
22443 				SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN);
22444 
22445 			if (rtn) {
22446 				/*
22447 				 * Failed for other than an illegal request
22448 				 * or command not supported
22449 				 */
22450 				if ((com.uscsi_status == STATUS_CHECK) &&
22451 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
22452 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
22453 					    (rqbuf[12] != 0x20)) {
22454 						rval = EIO;
22455 						goto done;
22456 					}
22457 				}
22458 			} else {
22459 				/*
22460 				 * The GET CONFIGURATION command succeeded
22461 				 * so set the media type according to the
22462 				 * returned data
22463 				 */
22464 				media_info.dki_media_type = out_data[6];
22465 				media_info.dki_media_type <<= 8;
22466 				media_info.dki_media_type |= out_data[7];
22467 			}
22468 		}
22469 	} else {
22470 		/*
22471 		 * The profile list is not available, so we attempt to identify
22472 		 * the media type based on the inquiry data
22473 		 */
22474 		sinq = un->un_sd->sd_inq;
22475 		if (sinq->inq_qual == 0) {
22476 			/* This is a direct access device */
22477 			media_info.dki_media_type = DK_FIXED_DISK;
22478 
22479 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
22480 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
22481 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
22482 					media_info.dki_media_type = DK_ZIP;
22483 				} else if (
22484 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
22485 					media_info.dki_media_type = DK_JAZ;
22486 				}
22487 			}
22488 		} else {
22489 			/* Not a CD or direct access so return unknown media */
22490 			media_info.dki_media_type = DK_UNKNOWN;
22491 		}
22492 	}
22493 
22494 	/* Now read the capacity so we can provide the lbasize and capacity */
22495 	switch (sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
22496 	    SD_PATH_DIRECT)) {
22497 	case 0:
22498 		break;
22499 	case EACCES:
22500 		rval = EACCES;
22501 		goto done;
22502 	default:
22503 		rval = EIO;
22504 		goto done;
22505 	}
22506 
22507 	media_info.dki_lbsize = lbasize;
22508 	media_capacity = capacity;
22509 
22510 	/*
22511 	 * sd_send_scsi_READ_CAPACITY() reports capacity in
22512 	 * un->un_sys_blocksize chunks. So we need to convert it into
22513 	 * cap.lbasize chunks.
22514 	 */
22515 	media_capacity *= un->un_sys_blocksize;
22516 	media_capacity /= lbasize;
22517 	media_info.dki_capacity = media_capacity;
22518 
22519 	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
22520 		rval = EFAULT;
22521 		/* Put goto. Anybody might add some code below in future */
22522 		goto done;
22523 	}
22524 done:
22525 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
22526 	kmem_free(rqbuf, SENSE_LENGTH);
22527 	return (rval);
22528 }
22529 
22530 
22531 /*
22532  *    Function: sd_dkio_get_geometry
22533  *
22534  * Description: This routine is the driver entry point for handling user
22535  *		requests to get the device geometry (DKIOCGGEOM).
22536  *
22537  *   Arguments: dev  - the device number
22538  *		arg  - pointer to user provided dk_geom structure specifying
22539  *			the controller's notion of the current geometry.
22540  *		flag - this argument is a pass through to ddi_copyxxx()
22541  *		       directly from the mode argument of ioctl().
22542  *		geom_validated - flag indicating if the device geometry has been
22543  *				 previously validated in the sdioctl routine.
22544  *
22545  * Return Code: 0
22546  *		EFAULT
22547  *		ENXIO
22548  *		EIO
22549  */
22550 
22551 static int
22552 sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag, int geom_validated)
22553 {
22554 	struct sd_lun	*un = NULL;
22555 	struct dk_geom	*tmp_geom = NULL;
22556 	int		rval = 0;
22557 
22558 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22559 		return (ENXIO);
22560 	}
22561 
22562 	if (geom_validated == FALSE) {
22563 		/*
22564 		 * sd_validate_geometry does not spin a disk up
22565 		 * if it was spun down. We need to make sure it
22566 		 * is ready.
22567 		 */
22568 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22569 			return (rval);
22570 		}
22571 		mutex_enter(SD_MUTEX(un));
22572 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
22573 		mutex_exit(SD_MUTEX(un));
22574 	}
22575 	if (rval)
22576 		return (rval);
22577 
22578 	/*
22579 	 * It is possible that un_solaris_size is 0(uninitialized)
22580 	 * after sd_unit_attach. Reservation conflict may cause the
22581 	 * above situation. Thus, the zero check of un_solaris_size
22582 	 * should occur after the sd_validate_geometry() call.
22583 	 */
22584 #if defined(__i386) || defined(__amd64)
22585 	if (un->un_solaris_size == 0) {
22586 		return (EIO);
22587 	}
22588 #endif
22589 
22590 	/*
22591 	 * Make a local copy of the soft state geometry to avoid some potential
22592 	 * race conditions associated with holding the mutex and updating the
22593 	 * write_reinstruct value
22594 	 */
22595 	tmp_geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22596 	mutex_enter(SD_MUTEX(un));
22597 	bcopy(&un->un_g, tmp_geom, sizeof (struct dk_geom));
22598 	mutex_exit(SD_MUTEX(un));
22599 
22600 	if (tmp_geom->dkg_write_reinstruct == 0) {
22601 		tmp_geom->dkg_write_reinstruct =
22602 		    (int)((int)(tmp_geom->dkg_nsect * tmp_geom->dkg_rpm *
22603 		    sd_rot_delay) / (int)60000);
22604 	}
22605 
22606 	rval = ddi_copyout(tmp_geom, (void *)arg, sizeof (struct dk_geom),
22607 	    flag);
22608 	if (rval != 0) {
22609 		rval = EFAULT;
22610 	}
22611 
22612 	kmem_free(tmp_geom, sizeof (struct dk_geom));
22613 	return (rval);
22614 
22615 }
22616 
22617 
22618 /*
22619  *    Function: sd_dkio_set_geometry
22620  *
22621  * Description: This routine is the driver entry point for handling user
22622  *		requests to set the device geometry (DKIOCSGEOM). The actual
22623  *		device geometry is not updated, just the driver "notion" of it.
22624  *
22625  *   Arguments: dev  - the device number
22626  *		arg  - pointer to user provided dk_geom structure used to set
22627  *			the controller's notion of the current geometry.
22628  *		flag - this argument is a pass through to ddi_copyxxx()
22629  *		       directly from the mode argument of ioctl().
22630  *
22631  * Return Code: 0
22632  *		EFAULT
22633  *		ENXIO
22634  *		EIO
22635  */
22636 
22637 static int
22638 sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag)
22639 {
22640 	struct sd_lun	*un = NULL;
22641 	struct dk_geom	*tmp_geom;
22642 	struct dk_map	*lp;
22643 	int		rval = 0;
22644 	int		i;
22645 
22646 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22647 		return (ENXIO);
22648 	}
22649 
22650 	/*
22651 	 * Make sure there is no reservation conflict on the lun.
22652 	 */
22653 	if (sd_send_scsi_TEST_UNIT_READY(un, 0) == EACCES) {
22654 		return (EACCES);
22655 	}
22656 
22657 #if defined(__i386) || defined(__amd64)
22658 	if (un->un_solaris_size == 0) {
22659 		return (EIO);
22660 	}
22661 #endif
22662 
22663 	/*
22664 	 * We need to copy the user specified geometry into local
22665 	 * storage and then update the softstate. We don't want to hold
22666 	 * the mutex and copyin directly from the user to the soft state
22667 	 */
22668 	tmp_geom = (struct dk_geom *)
22669 	    kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22670 	rval = ddi_copyin(arg, tmp_geom, sizeof (struct dk_geom), flag);
22671 	if (rval != 0) {
22672 		kmem_free(tmp_geom, sizeof (struct dk_geom));
22673 		return (EFAULT);
22674 	}
22675 
22676 	mutex_enter(SD_MUTEX(un));
22677 	bcopy(tmp_geom, &un->un_g, sizeof (struct dk_geom));
22678 	for (i = 0; i < NDKMAP; i++) {
22679 		lp  = &un->un_map[i];
22680 		un->un_offset[i] =
22681 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22682 #if defined(__i386) || defined(__amd64)
22683 		un->un_offset[i] += un->un_solaris_offset;
22684 #endif
22685 	}
22686 	un->un_f_geometry_is_valid = FALSE;
22687 	mutex_exit(SD_MUTEX(un));
22688 	kmem_free(tmp_geom, sizeof (struct dk_geom));
22689 
22690 	return (rval);
22691 }
22692 
22693 
22694 /*
22695  *    Function: sd_dkio_get_partition
22696  *
22697  * Description: This routine is the driver entry point for handling user
22698  *		requests to get the partition table (DKIOCGAPART).
22699  *
22700  *   Arguments: dev  - the device number
22701  *		arg  - pointer to user provided dk_allmap structure specifying
22702  *			the controller's notion of the current partition table.
22703  *		flag - this argument is a pass through to ddi_copyxxx()
22704  *		       directly from the mode argument of ioctl().
22705  *		geom_validated - flag indicating if the device geometry has been
22706  *				 previously validated in the sdioctl routine.
22707  *
22708  * Return Code: 0
22709  *		EFAULT
22710  *		ENXIO
22711  *		EIO
22712  */
22713 
22714 static int
22715 sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag, int geom_validated)
22716 {
22717 	struct sd_lun	*un = NULL;
22718 	int		rval = 0;
22719 	int		size;
22720 
22721 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22722 		return (ENXIO);
22723 	}
22724 
22725 	/*
22726 	 * Make sure the geometry is valid before getting the partition
22727 	 * information.
22728 	 */
22729 	mutex_enter(SD_MUTEX(un));
22730 	if (geom_validated == FALSE) {
22731 		/*
22732 		 * sd_validate_geometry does not spin a disk up
22733 		 * if it was spun down. We need to make sure it
22734 		 * is ready before validating the geometry.
22735 		 */
22736 		mutex_exit(SD_MUTEX(un));
22737 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22738 			return (rval);
22739 		}
22740 		mutex_enter(SD_MUTEX(un));
22741 
22742 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22743 			mutex_exit(SD_MUTEX(un));
22744 			return (rval);
22745 		}
22746 	}
22747 	mutex_exit(SD_MUTEX(un));
22748 
22749 	/*
22750 	 * It is possible that un_solaris_size is 0(uninitialized)
22751 	 * after sd_unit_attach. Reservation conflict may cause the
22752 	 * above situation. Thus, the zero check of un_solaris_size
22753 	 * should occur after the sd_validate_geometry() call.
22754 	 */
22755 #if defined(__i386) || defined(__amd64)
22756 	if (un->un_solaris_size == 0) {
22757 		return (EIO);
22758 	}
22759 #endif
22760 
22761 #ifdef _MULTI_DATAMODEL
22762 	switch (ddi_model_convert_from(flag & FMODELS)) {
22763 	case DDI_MODEL_ILP32: {
22764 		struct dk_map32 dk_map32[NDKMAP];
22765 		int		i;
22766 
22767 		for (i = 0; i < NDKMAP; i++) {
22768 			dk_map32[i].dkl_cylno = un->un_map[i].dkl_cylno;
22769 			dk_map32[i].dkl_nblk  = un->un_map[i].dkl_nblk;
22770 		}
22771 		size = NDKMAP * sizeof (struct dk_map32);
22772 		rval = ddi_copyout(dk_map32, (void *)arg, size, flag);
22773 		if (rval != 0) {
22774 			rval = EFAULT;
22775 		}
22776 		break;
22777 	}
22778 	case DDI_MODEL_NONE:
22779 		size = NDKMAP * sizeof (struct dk_map);
22780 		rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22781 		if (rval != 0) {
22782 			rval = EFAULT;
22783 		}
22784 		break;
22785 	}
22786 #else /* ! _MULTI_DATAMODEL */
22787 	size = NDKMAP * sizeof (struct dk_map);
22788 	rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22789 	if (rval != 0) {
22790 		rval = EFAULT;
22791 	}
22792 #endif /* _MULTI_DATAMODEL */
22793 	return (rval);
22794 }
22795 
22796 
22797 /*
22798  *    Function: sd_dkio_set_partition
22799  *
22800  * Description: This routine is the driver entry point for handling user
22801  *		requests to set the partition table (DKIOCSAPART). The actual
22802  *		device partition is not updated.
22803  *
22804  *   Arguments: dev  - the device number
22805  *		arg  - pointer to user provided dk_allmap structure used to set
22806  *			the controller's notion of the partition table.
22807  *		flag - this argument is a pass through to ddi_copyxxx()
22808  *		       directly from the mode argument of ioctl().
22809  *
22810  * Return Code: 0
22811  *		EINVAL
22812  *		EFAULT
22813  *		ENXIO
22814  *		EIO
22815  */
22816 
22817 static int
22818 sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag)
22819 {
22820 	struct sd_lun	*un = NULL;
22821 	struct dk_map	dk_map[NDKMAP];
22822 	struct dk_map	*lp;
22823 	int		rval = 0;
22824 	int		size;
22825 	int		i;
22826 #if defined(_SUNOS_VTOC_16)
22827 	struct dkl_partition	*vp;
22828 #endif
22829 
22830 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22831 		return (ENXIO);
22832 	}
22833 
22834 	/*
22835 	 * Set the map for all logical partitions.  We lock
22836 	 * the priority just to make sure an interrupt doesn't
22837 	 * come in while the map is half updated.
22838 	 */
22839 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_solaris_size))
22840 	mutex_enter(SD_MUTEX(un));
22841 	if (un->un_blockcount > DK_MAX_BLOCKS) {
22842 		mutex_exit(SD_MUTEX(un));
22843 		return (ENOTSUP);
22844 	}
22845 	mutex_exit(SD_MUTEX(un));
22846 
22847 	/*
22848 	 * Make sure there is no reservation conflict on the lun.
22849 	 */
22850 	if (sd_send_scsi_TEST_UNIT_READY(un, 0) == EACCES) {
22851 		return (EACCES);
22852 	}
22853 
22854 #if defined(__i386) || defined(__amd64)
22855 	if (un->un_solaris_size == 0) {
22856 		return (EIO);
22857 	}
22858 #endif
22859 
22860 #ifdef _MULTI_DATAMODEL
22861 	switch (ddi_model_convert_from(flag & FMODELS)) {
22862 	case DDI_MODEL_ILP32: {
22863 		struct dk_map32 dk_map32[NDKMAP];
22864 
22865 		size = NDKMAP * sizeof (struct dk_map32);
22866 		rval = ddi_copyin((void *)arg, dk_map32, size, flag);
22867 		if (rval != 0) {
22868 			return (EFAULT);
22869 		}
22870 		for (i = 0; i < NDKMAP; i++) {
22871 			dk_map[i].dkl_cylno = dk_map32[i].dkl_cylno;
22872 			dk_map[i].dkl_nblk  = dk_map32[i].dkl_nblk;
22873 		}
22874 		break;
22875 	}
22876 	case DDI_MODEL_NONE:
22877 		size = NDKMAP * sizeof (struct dk_map);
22878 		rval = ddi_copyin((void *)arg, dk_map, size, flag);
22879 		if (rval != 0) {
22880 			return (EFAULT);
22881 		}
22882 		break;
22883 	}
22884 #else /* ! _MULTI_DATAMODEL */
22885 	size = NDKMAP * sizeof (struct dk_map);
22886 	rval = ddi_copyin((void *)arg, dk_map, size, flag);
22887 	if (rval != 0) {
22888 		return (EFAULT);
22889 	}
22890 #endif /* _MULTI_DATAMODEL */
22891 
22892 	mutex_enter(SD_MUTEX(un));
22893 	/* Note: The size used in this bcopy is set based upon the data model */
22894 	bcopy(dk_map, un->un_map, size);
22895 #if defined(_SUNOS_VTOC_16)
22896 	vp = (struct dkl_partition *)&(un->un_vtoc);
22897 #endif	/* defined(_SUNOS_VTOC_16) */
22898 	for (i = 0; i < NDKMAP; i++) {
22899 		lp  = &un->un_map[i];
22900 		un->un_offset[i] =
22901 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22902 #if defined(_SUNOS_VTOC_16)
22903 		vp->p_start = un->un_offset[i];
22904 		vp->p_size = lp->dkl_nblk;
22905 		vp++;
22906 #endif	/* defined(_SUNOS_VTOC_16) */
22907 #if defined(__i386) || defined(__amd64)
22908 		un->un_offset[i] += un->un_solaris_offset;
22909 #endif
22910 	}
22911 	mutex_exit(SD_MUTEX(un));
22912 	return (rval);
22913 }
22914 
22915 
22916 /*
22917  *    Function: sd_dkio_get_vtoc
22918  *
22919  * Description: This routine is the driver entry point for handling user
22920  *		requests to get the current volume table of contents
22921  *		(DKIOCGVTOC).
22922  *
22923  *   Arguments: dev  - the device number
22924  *		arg  - pointer to user provided vtoc structure specifying
22925  *			the current vtoc.
22926  *		flag - this argument is a pass through to ddi_copyxxx()
22927  *		       directly from the mode argument of ioctl().
22928  *		geom_validated - flag indicating if the device geometry has been
22929  *				 previously validated in the sdioctl routine.
22930  *
22931  * Return Code: 0
22932  *		EFAULT
22933  *		ENXIO
22934  *		EIO
22935  */
22936 
22937 static int
22938 sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag, int geom_validated)
22939 {
22940 	struct sd_lun	*un = NULL;
22941 #if defined(_SUNOS_VTOC_8)
22942 	struct vtoc	user_vtoc;
22943 #endif	/* defined(_SUNOS_VTOC_8) */
22944 	int		rval = 0;
22945 
22946 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22947 		return (ENXIO);
22948 	}
22949 
22950 	mutex_enter(SD_MUTEX(un));
22951 	if (geom_validated == FALSE) {
22952 		/*
22953 		 * sd_validate_geometry does not spin a disk up
22954 		 * if it was spun down. We need to make sure it
22955 		 * is ready.
22956 		 */
22957 		mutex_exit(SD_MUTEX(un));
22958 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22959 			return (rval);
22960 		}
22961 		mutex_enter(SD_MUTEX(un));
22962 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22963 			mutex_exit(SD_MUTEX(un));
22964 			return (rval);
22965 		}
22966 	}
22967 
22968 #if defined(_SUNOS_VTOC_8)
22969 	sd_build_user_vtoc(un, &user_vtoc);
22970 	mutex_exit(SD_MUTEX(un));
22971 
22972 #ifdef _MULTI_DATAMODEL
22973 	switch (ddi_model_convert_from(flag & FMODELS)) {
22974 	case DDI_MODEL_ILP32: {
22975 		struct vtoc32 user_vtoc32;
22976 
22977 		vtoctovtoc32(user_vtoc, user_vtoc32);
22978 		if (ddi_copyout(&user_vtoc32, (void *)arg,
22979 		    sizeof (struct vtoc32), flag)) {
22980 			return (EFAULT);
22981 		}
22982 		break;
22983 	}
22984 
22985 	case DDI_MODEL_NONE:
22986 		if (ddi_copyout(&user_vtoc, (void *)arg,
22987 		    sizeof (struct vtoc), flag)) {
22988 			return (EFAULT);
22989 		}
22990 		break;
22991 	}
22992 #else /* ! _MULTI_DATAMODEL */
22993 	if (ddi_copyout(&user_vtoc, (void *)arg, sizeof (struct vtoc), flag)) {
22994 		return (EFAULT);
22995 	}
22996 #endif /* _MULTI_DATAMODEL */
22997 
22998 #elif defined(_SUNOS_VTOC_16)
22999 	mutex_exit(SD_MUTEX(un));
23000 
23001 #ifdef _MULTI_DATAMODEL
23002 	/*
23003 	 * The un_vtoc structure is a "struct dk_vtoc"  which is always
23004 	 * 32-bit to maintain compatibility with existing on-disk
23005 	 * structures.  Thus, we need to convert the structure when copying
23006 	 * it out to a datamodel-dependent "struct vtoc" in a 64-bit
23007 	 * program.  If the target is a 32-bit program, then no conversion
23008 	 * is necessary.
23009 	 */
23010 	/* LINTED: logical expression always true: op "||" */
23011 	ASSERT(sizeof (un->un_vtoc) == sizeof (struct vtoc32));
23012 	switch (ddi_model_convert_from(flag & FMODELS)) {
23013 	case DDI_MODEL_ILP32:
23014 		if (ddi_copyout(&(un->un_vtoc), (void *)arg,
23015 		    sizeof (un->un_vtoc), flag)) {
23016 			return (EFAULT);
23017 		}
23018 		break;
23019 
23020 	case DDI_MODEL_NONE: {
23021 		struct vtoc user_vtoc;
23022 
23023 		vtoc32tovtoc(un->un_vtoc, user_vtoc);
23024 		if (ddi_copyout(&user_vtoc, (void *)arg,
23025 		    sizeof (struct vtoc), flag)) {
23026 			return (EFAULT);
23027 		}
23028 		break;
23029 	}
23030 	}
23031 #else /* ! _MULTI_DATAMODEL */
23032 	if (ddi_copyout(&(un->un_vtoc), (void *)arg, sizeof (un->un_vtoc),
23033 	    flag)) {
23034 		return (EFAULT);
23035 	}
23036 #endif /* _MULTI_DATAMODEL */
23037 #else
23038 #error "No VTOC format defined."
23039 #endif
23040 
23041 	return (rval);
23042 }
23043 
23044 static int
23045 sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag)
23046 {
23047 	struct sd_lun	*un = NULL;
23048 	dk_efi_t	user_efi;
23049 	int		rval = 0;
23050 	void		*buffer;
23051 
23052 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
23053 		return (ENXIO);
23054 
23055 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
23056 		return (EFAULT);
23057 
23058 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
23059 
23060 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
23061 	    (user_efi.dki_length > un->un_max_xfer_size))
23062 		return (EINVAL);
23063 
23064 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
23065 	rval = sd_send_scsi_READ(un, buffer, user_efi.dki_length,
23066 	    user_efi.dki_lba, SD_PATH_DIRECT);
23067 	if (rval == 0 && ddi_copyout(buffer, user_efi.dki_data,
23068 	    user_efi.dki_length, flag) != 0)
23069 		rval = EFAULT;
23070 
23071 	kmem_free(buffer, user_efi.dki_length);
23072 	return (rval);
23073 }
23074 
23075 /*
23076  *    Function: sd_build_user_vtoc
23077  *
23078  * Description: This routine populates a pass by reference variable with the
23079  *		current volume table of contents.
23080  *
23081  *   Arguments: un - driver soft state (unit) structure
23082  *		user_vtoc - pointer to vtoc structure to be populated
23083  */
23084 
23085 static void
23086 sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
23087 {
23088 	struct dk_map2		*lpart;
23089 	struct dk_map		*lmap;
23090 	struct partition	*vpart;
23091 	int			nblks;
23092 	int			i;
23093 
23094 	ASSERT(mutex_owned(SD_MUTEX(un)));
23095 
23096 	/*
23097 	 * Return vtoc structure fields in the provided VTOC area, addressed
23098 	 * by *vtoc.
23099 	 */
23100 	bzero(user_vtoc, sizeof (struct vtoc));
23101 	user_vtoc->v_bootinfo[0] = un->un_vtoc.v_bootinfo[0];
23102 	user_vtoc->v_bootinfo[1] = un->un_vtoc.v_bootinfo[1];
23103 	user_vtoc->v_bootinfo[2] = un->un_vtoc.v_bootinfo[2];
23104 	user_vtoc->v_sanity	= VTOC_SANE;
23105 	user_vtoc->v_version	= un->un_vtoc.v_version;
23106 	bcopy(un->un_vtoc.v_volume, user_vtoc->v_volume, LEN_DKL_VVOL);
23107 	user_vtoc->v_sectorsz = un->un_sys_blocksize;
23108 	user_vtoc->v_nparts = un->un_vtoc.v_nparts;
23109 	bcopy(un->un_vtoc.v_reserved, user_vtoc->v_reserved,
23110 	    sizeof (un->un_vtoc.v_reserved));
23111 	/*
23112 	 * Convert partitioning information.
23113 	 *
23114 	 * Note the conversion from starting cylinder number
23115 	 * to starting sector number.
23116 	 */
23117 	lmap = un->un_map;
23118 	lpart = (struct dk_map2 *)un->un_vtoc.v_part;
23119 	vpart = user_vtoc->v_part;
23120 
23121 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
23122 
23123 	for (i = 0; i < V_NUMPAR; i++) {
23124 		vpart->p_tag	= lpart->p_tag;
23125 		vpart->p_flag	= lpart->p_flag;
23126 		vpart->p_start	= lmap->dkl_cylno * nblks;
23127 		vpart->p_size	= lmap->dkl_nblk;
23128 		lmap++;
23129 		lpart++;
23130 		vpart++;
23131 
23132 		/* (4364927) */
23133 		user_vtoc->timestamp[i] = (time_t)un->un_vtoc.v_timestamp[i];
23134 	}
23135 
23136 	bcopy(un->un_asciilabel, user_vtoc->v_asciilabel, LEN_DKL_ASCII);
23137 }
23138 
23139 static int
23140 sd_dkio_partition(dev_t dev, caddr_t arg, int flag)
23141 {
23142 	struct sd_lun		*un = NULL;
23143 	struct partition64	p64;
23144 	int			rval = 0;
23145 	uint_t			nparts;
23146 	efi_gpe_t		*partitions;
23147 	efi_gpt_t		*buffer;
23148 	diskaddr_t		gpe_lba;
23149 
23150 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23151 		return (ENXIO);
23152 	}
23153 
23154 	if (ddi_copyin((const void *)arg, &p64,
23155 	    sizeof (struct partition64), flag)) {
23156 		return (EFAULT);
23157 	}
23158 
23159 	buffer = kmem_alloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
23160 	rval = sd_send_scsi_READ(un, buffer, DEV_BSIZE,
23161 		1, SD_PATH_DIRECT);
23162 	if (rval != 0)
23163 		goto done_error;
23164 
23165 	sd_swap_efi_gpt(buffer);
23166 
23167 	if ((rval = sd_validate_efi(buffer)) != 0)
23168 		goto done_error;
23169 
23170 	nparts = buffer->efi_gpt_NumberOfPartitionEntries;
23171 	gpe_lba = buffer->efi_gpt_PartitionEntryLBA;
23172 	if (p64.p_partno > nparts) {
23173 		/* couldn't find it */
23174 		rval = ESRCH;
23175 		goto done_error;
23176 	}
23177 	/*
23178 	 * if we're dealing with a partition that's out of the normal
23179 	 * 16K block, adjust accordingly
23180 	 */
23181 	gpe_lba += p64.p_partno / sizeof (efi_gpe_t);
23182 	rval = sd_send_scsi_READ(un, buffer, EFI_MIN_ARRAY_SIZE,
23183 			gpe_lba, SD_PATH_DIRECT);
23184 	if (rval) {
23185 		goto done_error;
23186 	}
23187 	partitions = (efi_gpe_t *)buffer;
23188 
23189 	sd_swap_efi_gpe(nparts, partitions);
23190 
23191 	partitions += p64.p_partno;
23192 	bcopy(&partitions->efi_gpe_PartitionTypeGUID, &p64.p_type,
23193 	    sizeof (struct uuid));
23194 	p64.p_start = partitions->efi_gpe_StartingLBA;
23195 	p64.p_size = partitions->efi_gpe_EndingLBA -
23196 			p64.p_start + 1;
23197 
23198 	if (ddi_copyout(&p64, (void *)arg, sizeof (struct partition64), flag))
23199 		rval = EFAULT;
23200 
23201 done_error:
23202 	kmem_free(buffer, EFI_MIN_ARRAY_SIZE);
23203 	return (rval);
23204 }
23205 
23206 
23207 /*
23208  *    Function: sd_dkio_set_vtoc
23209  *
23210  * Description: This routine is the driver entry point for handling user
23211  *		requests to set the current volume table of contents
23212  *		(DKIOCSVTOC).
23213  *
23214  *   Arguments: dev  - the device number
23215  *		arg  - pointer to user provided vtoc structure used to set the
23216  *			current vtoc.
23217  *		flag - this argument is a pass through to ddi_copyxxx()
23218  *		       directly from the mode argument of ioctl().
23219  *
23220  * Return Code: 0
23221  *		EFAULT
23222  *		ENXIO
23223  *		EINVAL
23224  *		ENOTSUP
23225  */
23226 
23227 static int
23228 sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag)
23229 {
23230 	struct sd_lun	*un = NULL;
23231 	struct vtoc	user_vtoc;
23232 	int		rval = 0;
23233 
23234 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23235 		return (ENXIO);
23236 	}
23237 
23238 #if defined(__i386) || defined(__amd64)
23239 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
23240 		return (EINVAL);
23241 	}
23242 #endif
23243 
23244 #ifdef _MULTI_DATAMODEL
23245 	switch (ddi_model_convert_from(flag & FMODELS)) {
23246 	case DDI_MODEL_ILP32: {
23247 		struct vtoc32 user_vtoc32;
23248 
23249 		if (ddi_copyin((const void *)arg, &user_vtoc32,
23250 		    sizeof (struct vtoc32), flag)) {
23251 			return (EFAULT);
23252 		}
23253 		vtoc32tovtoc(user_vtoc32, user_vtoc);
23254 		break;
23255 	}
23256 
23257 	case DDI_MODEL_NONE:
23258 		if (ddi_copyin((const void *)arg, &user_vtoc,
23259 		    sizeof (struct vtoc), flag)) {
23260 			return (EFAULT);
23261 		}
23262 		break;
23263 	}
23264 #else /* ! _MULTI_DATAMODEL */
23265 	if (ddi_copyin((const void *)arg, &user_vtoc,
23266 	    sizeof (struct vtoc), flag)) {
23267 		return (EFAULT);
23268 	}
23269 #endif /* _MULTI_DATAMODEL */
23270 
23271 	mutex_enter(SD_MUTEX(un));
23272 	if (un->un_blockcount > DK_MAX_BLOCKS) {
23273 		mutex_exit(SD_MUTEX(un));
23274 		return (ENOTSUP);
23275 	}
23276 	if (un->un_g.dkg_ncyl == 0) {
23277 		mutex_exit(SD_MUTEX(un));
23278 		return (EINVAL);
23279 	}
23280 
23281 	mutex_exit(SD_MUTEX(un));
23282 	sd_clear_efi(un);
23283 	ddi_remove_minor_node(SD_DEVINFO(un), "wd");
23284 	ddi_remove_minor_node(SD_DEVINFO(un), "wd,raw");
23285 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h",
23286 	    S_IFBLK, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23287 	    un->un_node_type, NULL);
23288 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h,raw",
23289 	    S_IFCHR, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23290 	    un->un_node_type, NULL);
23291 	mutex_enter(SD_MUTEX(un));
23292 
23293 	if ((rval = sd_build_label_vtoc(un, &user_vtoc)) == 0) {
23294 		if ((rval = sd_write_label(dev)) == 0) {
23295 			if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT))
23296 			    != 0) {
23297 				SD_ERROR(SD_LOG_IOCTL_DKIO, un,
23298 				    "sd_dkio_set_vtoc: "
23299 				    "Failed validate geometry\n");
23300 			}
23301 		}
23302 	}
23303 
23304 	/*
23305 	 * If sd_build_label_vtoc, or sd_write_label failed above write the
23306 	 * devid anyway, what can it hurt? Also preserve the device id by
23307 	 * writing to the disk acyl for the case where a devid has been
23308 	 * fabricated.
23309 	 */
23310 	if (un->un_f_devid_supported &&
23311 	    (un->un_f_opt_fab_devid == TRUE)) {
23312 		if (un->un_devid == NULL) {
23313 			sd_register_devid(un, SD_DEVINFO(un),
23314 			    SD_TARGET_IS_UNRESERVED);
23315 		} else {
23316 			/*
23317 			 * The device id for this disk has been
23318 			 * fabricated. Fabricated device id's are
23319 			 * managed by storing them in the last 2
23320 			 * available sectors on the drive. The device
23321 			 * id must be preserved by writing it back out
23322 			 * to this location.
23323 			 */
23324 			if (sd_write_deviceid(un) != 0) {
23325 				ddi_devid_free(un->un_devid);
23326 				un->un_devid = NULL;
23327 			}
23328 		}
23329 	}
23330 	mutex_exit(SD_MUTEX(un));
23331 	return (rval);
23332 }
23333 
23334 
23335 /*
23336  *    Function: sd_build_label_vtoc
23337  *
23338  * Description: This routine updates the driver soft state current volume table
23339  *		of contents based on a user specified vtoc.
23340  *
23341  *   Arguments: un - driver soft state (unit) structure
23342  *		user_vtoc - pointer to vtoc structure specifying vtoc to be used
23343  *			    to update the driver soft state.
23344  *
23345  * Return Code: 0
23346  *		EINVAL
23347  */
23348 
23349 static int
23350 sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
23351 {
23352 	struct dk_map		*lmap;
23353 	struct partition	*vpart;
23354 	int			nblks;
23355 #if defined(_SUNOS_VTOC_8)
23356 	int			ncyl;
23357 	struct dk_map2		*lpart;
23358 #endif	/* defined(_SUNOS_VTOC_8) */
23359 	int			i;
23360 
23361 	ASSERT(mutex_owned(SD_MUTEX(un)));
23362 
23363 	/* Sanity-check the vtoc */
23364 	if (user_vtoc->v_sanity != VTOC_SANE ||
23365 	    user_vtoc->v_sectorsz != un->un_sys_blocksize ||
23366 	    user_vtoc->v_nparts != V_NUMPAR) {
23367 		return (EINVAL);
23368 	}
23369 
23370 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
23371 	if (nblks == 0) {
23372 		return (EINVAL);
23373 	}
23374 
23375 #if defined(_SUNOS_VTOC_8)
23376 	vpart = user_vtoc->v_part;
23377 	for (i = 0; i < V_NUMPAR; i++) {
23378 		if ((vpart->p_start % nblks) != 0) {
23379 			return (EINVAL);
23380 		}
23381 		ncyl = vpart->p_start / nblks;
23382 		ncyl += vpart->p_size / nblks;
23383 		if ((vpart->p_size % nblks) != 0) {
23384 			ncyl++;
23385 		}
23386 		if (ncyl > (int)un->un_g.dkg_ncyl) {
23387 			return (EINVAL);
23388 		}
23389 		vpart++;
23390 	}
23391 #endif	/* defined(_SUNOS_VTOC_8) */
23392 
23393 	/* Put appropriate vtoc structure fields into the disk label */
23394 #if defined(_SUNOS_VTOC_16)
23395 	/*
23396 	 * The vtoc is always a 32bit data structure to maintain the
23397 	 * on-disk format. Convert "in place" instead of bcopying it.
23398 	 */
23399 	vtoctovtoc32((*user_vtoc), (*((struct vtoc32 *)&(un->un_vtoc))));
23400 
23401 	/*
23402 	 * in the 16-slice vtoc, starting sectors are expressed in
23403 	 * numbers *relative* to the start of the Solaris fdisk partition.
23404 	 */
23405 	lmap = un->un_map;
23406 	vpart = user_vtoc->v_part;
23407 
23408 	for (i = 0; i < (int)user_vtoc->v_nparts; i++, lmap++, vpart++) {
23409 		lmap->dkl_cylno = vpart->p_start / nblks;
23410 		lmap->dkl_nblk = vpart->p_size;
23411 	}
23412 
23413 #elif defined(_SUNOS_VTOC_8)
23414 
23415 	un->un_vtoc.v_bootinfo[0] = (uint32_t)user_vtoc->v_bootinfo[0];
23416 	un->un_vtoc.v_bootinfo[1] = (uint32_t)user_vtoc->v_bootinfo[1];
23417 	un->un_vtoc.v_bootinfo[2] = (uint32_t)user_vtoc->v_bootinfo[2];
23418 
23419 	un->un_vtoc.v_sanity = (uint32_t)user_vtoc->v_sanity;
23420 	un->un_vtoc.v_version = (uint32_t)user_vtoc->v_version;
23421 
23422 	bcopy(user_vtoc->v_volume, un->un_vtoc.v_volume, LEN_DKL_VVOL);
23423 
23424 	un->un_vtoc.v_nparts = user_vtoc->v_nparts;
23425 
23426 	bcopy(user_vtoc->v_reserved, un->un_vtoc.v_reserved,
23427 	    sizeof (un->un_vtoc.v_reserved));
23428 
23429 	/*
23430 	 * Note the conversion from starting sector number
23431 	 * to starting cylinder number.
23432 	 * Return error if division results in a remainder.
23433 	 */
23434 	lmap = un->un_map;
23435 	lpart = un->un_vtoc.v_part;
23436 	vpart = user_vtoc->v_part;
23437 
23438 	for (i = 0; i < (int)user_vtoc->v_nparts; i++) {
23439 		lpart->p_tag  = vpart->p_tag;
23440 		lpart->p_flag = vpart->p_flag;
23441 		lmap->dkl_cylno = vpart->p_start / nblks;
23442 		lmap->dkl_nblk = vpart->p_size;
23443 
23444 		lmap++;
23445 		lpart++;
23446 		vpart++;
23447 
23448 		/* (4387723) */
23449 #ifdef _LP64
23450 		if (user_vtoc->timestamp[i] > TIME32_MAX) {
23451 			un->un_vtoc.v_timestamp[i] = TIME32_MAX;
23452 		} else {
23453 			un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23454 		}
23455 #else
23456 		un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23457 #endif
23458 	}
23459 
23460 	bcopy(user_vtoc->v_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
23461 #else
23462 #error "No VTOC format defined."
23463 #endif
23464 	return (0);
23465 }
23466 
23467 /*
23468  *    Function: sd_clear_efi
23469  *
23470  * Description: This routine clears all EFI labels.
23471  *
23472  *   Arguments: un - driver soft state (unit) structure
23473  *
23474  * Return Code: void
23475  */
23476 
23477 static void
23478 sd_clear_efi(struct sd_lun *un)
23479 {
23480 	efi_gpt_t	*gpt;
23481 	uint_t		lbasize;
23482 	uint64_t	cap;
23483 	int rval;
23484 
23485 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23486 
23487 	mutex_enter(SD_MUTEX(un));
23488 	un->un_reserved = -1;
23489 	mutex_exit(SD_MUTEX(un));
23490 	gpt = kmem_alloc(sizeof (efi_gpt_t), KM_SLEEP);
23491 
23492 	if (sd_send_scsi_READ(un, gpt, DEV_BSIZE, 1, SD_PATH_DIRECT) != 0) {
23493 		goto done;
23494 	}
23495 
23496 	sd_swap_efi_gpt(gpt);
23497 	rval = sd_validate_efi(gpt);
23498 	if (rval == 0) {
23499 		/* clear primary */
23500 		bzero(gpt, sizeof (efi_gpt_t));
23501 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE, 1,
23502 			SD_PATH_DIRECT))) {
23503 			SD_INFO(SD_LOG_IO_PARTITION, un,
23504 				"sd_clear_efi: clear primary label failed\n");
23505 		}
23506 	}
23507 	/* the backup */
23508 	rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
23509 	    SD_PATH_DIRECT);
23510 	if (rval) {
23511 		goto done;
23512 	}
23513 	/*
23514 	 * The MMC standard allows READ CAPACITY to be
23515 	 * inaccurate by a bounded amount (in the interest of
23516 	 * response latency).  As a result, failed READs are
23517 	 * commonplace (due to the reading of metadata and not
23518 	 * data). Depending on the per-Vendor/drive Sense data,
23519 	 * the failed READ can cause many (unnecessary) retries.
23520 	 */
23521 	if ((rval = sd_send_scsi_READ(un, gpt, lbasize,
23522 	    cap - 1, ISCD(un) ? SD_PATH_DIRECT_PRIORITY :
23523 		SD_PATH_DIRECT)) != 0) {
23524 		goto done;
23525 	}
23526 	sd_swap_efi_gpt(gpt);
23527 	rval = sd_validate_efi(gpt);
23528 	if (rval == 0) {
23529 		/* clear backup */
23530 		SD_TRACE(SD_LOG_IOCTL, un, "sd_clear_efi clear backup@%lu\n",
23531 			cap-1);
23532 		bzero(gpt, sizeof (efi_gpt_t));
23533 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE,
23534 		    cap-1, SD_PATH_DIRECT))) {
23535 			SD_INFO(SD_LOG_IO_PARTITION, un,
23536 				"sd_clear_efi: clear backup label failed\n");
23537 		}
23538 	} else {
23539 		/*
23540 		 * Refer to comments related to off-by-1 at the
23541 		 * header of this file
23542 		 */
23543 		if ((rval = sd_send_scsi_READ(un, gpt, lbasize,
23544 		    cap - 2, ISCD(un) ? SD_PATH_DIRECT_PRIORITY :
23545 			SD_PATH_DIRECT)) != 0) {
23546 			goto done;
23547 		}
23548 		sd_swap_efi_gpt(gpt);
23549 		rval = sd_validate_efi(gpt);
23550 		if (rval == 0) {
23551 			/* clear legacy backup EFI label */
23552 			SD_TRACE(SD_LOG_IOCTL, un,
23553 			    "sd_clear_efi clear backup@%lu\n", cap-2);
23554 			bzero(gpt, sizeof (efi_gpt_t));
23555 			if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE,
23556 			    cap-2, SD_PATH_DIRECT))) {
23557 				SD_INFO(SD_LOG_IO_PARTITION,
23558 				    un, "sd_clear_efi: "
23559 				    " clear legacy backup label failed\n");
23560 			}
23561 		}
23562 	}
23563 
23564 done:
23565 	kmem_free(gpt, sizeof (efi_gpt_t));
23566 }
23567 
23568 /*
23569  *    Function: sd_set_vtoc
23570  *
23571  * Description: This routine writes data to the appropriate positions
23572  *
23573  *   Arguments: un - driver soft state (unit) structure
23574  *              dkl  - the data to be written
23575  *
23576  * Return: void
23577  */
23578 
23579 static int
23580 sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl)
23581 {
23582 	void			*shadow_buf;
23583 	uint_t			label_addr;
23584 	int			sec;
23585 	int			blk;
23586 	int			head;
23587 	int			cyl;
23588 	int			rval;
23589 
23590 #if defined(__i386) || defined(__amd64)
23591 	label_addr = un->un_solaris_offset + DK_LABEL_LOC;
23592 #else
23593 	/* Write the primary label at block 0 of the solaris partition. */
23594 	label_addr = 0;
23595 #endif
23596 
23597 	if (NOT_DEVBSIZE(un)) {
23598 		shadow_buf = kmem_zalloc(un->un_tgt_blocksize, KM_SLEEP);
23599 		/*
23600 		 * Read the target's first block.
23601 		 */
23602 		if ((rval = sd_send_scsi_READ(un, shadow_buf,
23603 		    un->un_tgt_blocksize, label_addr,
23604 		    SD_PATH_STANDARD)) != 0) {
23605 			goto exit;
23606 		}
23607 		/*
23608 		 * Copy the contents of the label into the shadow buffer
23609 		 * which is of the size of target block size.
23610 		 */
23611 		bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23612 	}
23613 
23614 	/* Write the primary label */
23615 	if (NOT_DEVBSIZE(un)) {
23616 		rval = sd_send_scsi_WRITE(un, shadow_buf, un->un_tgt_blocksize,
23617 		    label_addr, SD_PATH_STANDARD);
23618 	} else {
23619 		rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23620 		    label_addr, SD_PATH_STANDARD);
23621 	}
23622 	if (rval != 0) {
23623 		return (rval);
23624 	}
23625 
23626 	/*
23627 	 * Calculate where the backup labels go.  They are always on
23628 	 * the last alternate cylinder, but some older drives put them
23629 	 * on head 2 instead of the last head.	They are always on the
23630 	 * first 5 odd sectors of the appropriate track.
23631 	 *
23632 	 * We have no choice at this point, but to believe that the
23633 	 * disk label is valid.	 Use the geometry of the disk
23634 	 * as described in the label.
23635 	 */
23636 	cyl  = dkl->dkl_ncyl  + dkl->dkl_acyl - 1;
23637 	head = dkl->dkl_nhead - 1;
23638 
23639 	/*
23640 	 * Write and verify the backup labels. Make sure we don't try to
23641 	 * write past the last cylinder.
23642 	 */
23643 	for (sec = 1; ((sec < 5 * 2 + 1) && (sec < dkl->dkl_nsect)); sec += 2) {
23644 		blk = (daddr_t)(
23645 		    (cyl * ((dkl->dkl_nhead * dkl->dkl_nsect) - dkl->dkl_apc)) +
23646 		    (head * dkl->dkl_nsect) + sec);
23647 #if defined(__i386) || defined(__amd64)
23648 		blk += un->un_solaris_offset;
23649 #endif
23650 		if (NOT_DEVBSIZE(un)) {
23651 			uint64_t	tblk;
23652 			/*
23653 			 * Need to read the block first for read modify write.
23654 			 */
23655 			tblk = (uint64_t)blk;
23656 			blk = (int)((tblk * un->un_sys_blocksize) /
23657 			    un->un_tgt_blocksize);
23658 			if ((rval = sd_send_scsi_READ(un, shadow_buf,
23659 			    un->un_tgt_blocksize, blk,
23660 			    SD_PATH_STANDARD)) != 0) {
23661 				goto exit;
23662 			}
23663 			/*
23664 			 * Modify the shadow buffer with the label.
23665 			 */
23666 			bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23667 			rval = sd_send_scsi_WRITE(un, shadow_buf,
23668 			    un->un_tgt_blocksize, blk, SD_PATH_STANDARD);
23669 		} else {
23670 			rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23671 			    blk, SD_PATH_STANDARD);
23672 			SD_INFO(SD_LOG_IO_PARTITION, un,
23673 			"sd_set_vtoc: wrote backup label %d\n", blk);
23674 		}
23675 		if (rval != 0) {
23676 			goto exit;
23677 		}
23678 	}
23679 exit:
23680 	if (NOT_DEVBSIZE(un)) {
23681 		kmem_free(shadow_buf, un->un_tgt_blocksize);
23682 	}
23683 	return (rval);
23684 }
23685 
23686 /*
23687  *    Function: sd_clear_vtoc
23688  *
23689  * Description: This routine clears out the VTOC labels.
23690  *
23691  *   Arguments: un - driver soft state (unit) structure
23692  *
23693  * Return: void
23694  */
23695 
23696 static void
23697 sd_clear_vtoc(struct sd_lun *un)
23698 {
23699 	struct dk_label		*dkl;
23700 
23701 	mutex_exit(SD_MUTEX(un));
23702 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23703 	mutex_enter(SD_MUTEX(un));
23704 	/*
23705 	 * sd_set_vtoc uses these fields in order to figure out
23706 	 * where to overwrite the backup labels
23707 	 */
23708 	dkl->dkl_apc    = un->un_g.dkg_apc;
23709 	dkl->dkl_ncyl   = un->un_g.dkg_ncyl;
23710 	dkl->dkl_acyl   = un->un_g.dkg_acyl;
23711 	dkl->dkl_nhead  = un->un_g.dkg_nhead;
23712 	dkl->dkl_nsect  = un->un_g.dkg_nsect;
23713 	mutex_exit(SD_MUTEX(un));
23714 	(void) sd_set_vtoc(un, dkl);
23715 	kmem_free(dkl, sizeof (struct dk_label));
23716 
23717 	mutex_enter(SD_MUTEX(un));
23718 }
23719 
23720 /*
23721  *    Function: sd_write_label
23722  *
23723  * Description: This routine will validate and write the driver soft state vtoc
23724  *		contents to the device.
23725  *
23726  *   Arguments: dev - the device number
23727  *
23728  * Return Code: the code returned by sd_send_scsi_cmd()
23729  *		0
23730  *		EINVAL
23731  *		ENXIO
23732  *		ENOMEM
23733  */
23734 
23735 static int
23736 sd_write_label(dev_t dev)
23737 {
23738 	struct sd_lun		*un;
23739 	struct dk_label		*dkl;
23740 	short			sum;
23741 	short			*sp;
23742 	int			i;
23743 	int			rval;
23744 
23745 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23746 	    (un->un_state == SD_STATE_OFFLINE)) {
23747 		return (ENXIO);
23748 	}
23749 	ASSERT(mutex_owned(SD_MUTEX(un)));
23750 	mutex_exit(SD_MUTEX(un));
23751 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23752 	mutex_enter(SD_MUTEX(un));
23753 
23754 	bcopy(&un->un_vtoc, &dkl->dkl_vtoc, sizeof (struct dk_vtoc));
23755 	dkl->dkl_rpm	= un->un_g.dkg_rpm;
23756 	dkl->dkl_pcyl	= un->un_g.dkg_pcyl;
23757 	dkl->dkl_apc	= un->un_g.dkg_apc;
23758 	dkl->dkl_intrlv = un->un_g.dkg_intrlv;
23759 	dkl->dkl_ncyl	= un->un_g.dkg_ncyl;
23760 	dkl->dkl_acyl	= un->un_g.dkg_acyl;
23761 	dkl->dkl_nhead	= un->un_g.dkg_nhead;
23762 	dkl->dkl_nsect	= un->un_g.dkg_nsect;
23763 
23764 #if defined(_SUNOS_VTOC_8)
23765 	dkl->dkl_obs1	= un->un_g.dkg_obs1;
23766 	dkl->dkl_obs2	= un->un_g.dkg_obs2;
23767 	dkl->dkl_obs3	= un->un_g.dkg_obs3;
23768 	for (i = 0; i < NDKMAP; i++) {
23769 		dkl->dkl_map[i].dkl_cylno = un->un_map[i].dkl_cylno;
23770 		dkl->dkl_map[i].dkl_nblk  = un->un_map[i].dkl_nblk;
23771 	}
23772 	bcopy(un->un_asciilabel, dkl->dkl_asciilabel, LEN_DKL_ASCII);
23773 #elif defined(_SUNOS_VTOC_16)
23774 	dkl->dkl_skew	= un->un_dkg_skew;
23775 #else
23776 #error "No VTOC format defined."
23777 #endif
23778 
23779 	dkl->dkl_magic			= DKL_MAGIC;
23780 	dkl->dkl_write_reinstruct	= un->un_g.dkg_write_reinstruct;
23781 	dkl->dkl_read_reinstruct	= un->un_g.dkg_read_reinstruct;
23782 
23783 	/* Construct checksum for the new disk label */
23784 	sum = 0;
23785 	sp = (short *)dkl;
23786 	i = sizeof (struct dk_label) / sizeof (short);
23787 	while (i--) {
23788 		sum ^= *sp++;
23789 	}
23790 	dkl->dkl_cksum = sum;
23791 
23792 	mutex_exit(SD_MUTEX(un));
23793 
23794 	rval = sd_set_vtoc(un, dkl);
23795 exit:
23796 	kmem_free(dkl, sizeof (struct dk_label));
23797 	mutex_enter(SD_MUTEX(un));
23798 	return (rval);
23799 }
23800 
23801 static int
23802 sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag)
23803 {
23804 	struct sd_lun	*un = NULL;
23805 	dk_efi_t	user_efi;
23806 	int		rval = 0;
23807 	void		*buffer;
23808 	int		valid_efi;
23809 
23810 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
23811 		return (ENXIO);
23812 
23813 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
23814 		return (EFAULT);
23815 
23816 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
23817 
23818 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
23819 	    (user_efi.dki_length > un->un_max_xfer_size))
23820 		return (EINVAL);
23821 
23822 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
23823 	if (ddi_copyin(user_efi.dki_data, buffer, user_efi.dki_length, flag)) {
23824 		rval = EFAULT;
23825 	} else {
23826 		/*
23827 		 * let's clear the vtoc labels and clear the softstate
23828 		 * vtoc.
23829 		 */
23830 		mutex_enter(SD_MUTEX(un));
23831 		if (un->un_vtoc.v_sanity == VTOC_SANE) {
23832 			SD_TRACE(SD_LOG_IO_PARTITION, un,
23833 				"sd_dkio_set_efi: CLEAR VTOC\n");
23834 			sd_clear_vtoc(un);
23835 			bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
23836 			mutex_exit(SD_MUTEX(un));
23837 			ddi_remove_minor_node(SD_DEVINFO(un), "h");
23838 			ddi_remove_minor_node(SD_DEVINFO(un), "h,raw");
23839 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd",
23840 			    S_IFBLK,
23841 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23842 			    un->un_node_type, NULL);
23843 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd,raw",
23844 			    S_IFCHR,
23845 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23846 			    un->un_node_type, NULL);
23847 		} else
23848 			mutex_exit(SD_MUTEX(un));
23849 		rval = sd_send_scsi_WRITE(un, buffer, user_efi.dki_length,
23850 		    user_efi.dki_lba, SD_PATH_DIRECT);
23851 		if (rval == 0) {
23852 			mutex_enter(SD_MUTEX(un));
23853 
23854 			/*
23855 			 * Set the un_reserved for valid efi label.
23856 			 * Function clear_efi in fdisk and efi_write in
23857 			 * libefi both change efi label on disk in 3 steps
23858 			 * 1. Change primary gpt and gpe
23859 			 * 2. Change backup gpe
23860 			 * 3. Change backup gpt, which is one block
23861 			 * We only reread the efi label after the 3rd step,
23862 			 * or there will be warning "primary label corrupt".
23863 			 */
23864 			if (user_efi.dki_length == un->un_tgt_blocksize) {
23865 				un->un_f_geometry_is_valid = FALSE;
23866 				valid_efi = sd_use_efi(un, SD_PATH_DIRECT);
23867 				if ((valid_efi == 0) &&
23868 				    un->un_f_devid_supported &&
23869 				    (un->un_f_opt_fab_devid == TRUE)) {
23870 					if (un->un_devid == NULL) {
23871 						sd_register_devid(un,
23872 						    SD_DEVINFO(un),
23873 						    SD_TARGET_IS_UNRESERVED);
23874 					} else {
23875 						/*
23876 						 * The device id for this disk
23877 						 * has been fabricated. The
23878 						 * device id must be preserved
23879 						 * by writing it back out to
23880 						 * disk.
23881 						 */
23882 						if (sd_write_deviceid(un)
23883 						    != 0) {
23884 							ddi_devid_free(
23885 							    un->un_devid);
23886 							un->un_devid = NULL;
23887 						}
23888 					}
23889 				}
23890 			}
23891 
23892 			mutex_exit(SD_MUTEX(un));
23893 		}
23894 	}
23895 	kmem_free(buffer, user_efi.dki_length);
23896 	return (rval);
23897 }
23898 
23899 /*
23900  *    Function: sd_dkio_get_mboot
23901  *
23902  * Description: This routine is the driver entry point for handling user
23903  *		requests to get the current device mboot (DKIOCGMBOOT)
23904  *
23905  *   Arguments: dev  - the device number
23906  *		arg  - pointer to user provided mboot structure specifying
23907  *			the current mboot.
23908  *		flag - this argument is a pass through to ddi_copyxxx()
23909  *		       directly from the mode argument of ioctl().
23910  *
23911  * Return Code: 0
23912  *		EINVAL
23913  *		EFAULT
23914  *		ENXIO
23915  */
23916 
23917 static int
23918 sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag)
23919 {
23920 	struct sd_lun	*un;
23921 	struct mboot	*mboot;
23922 	int		rval;
23923 	size_t		buffer_size;
23924 
23925 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23926 	    (un->un_state == SD_STATE_OFFLINE)) {
23927 		return (ENXIO);
23928 	}
23929 
23930 	if (!un->un_f_mboot_supported || arg == NULL) {
23931 		return (EINVAL);
23932 	}
23933 
23934 	/*
23935 	 * Read the mboot block, located at absolute block 0 on the target.
23936 	 */
23937 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct mboot));
23938 
23939 	SD_TRACE(SD_LOG_IO_PARTITION, un,
23940 	    "sd_dkio_get_mboot: allocation size: 0x%x\n", buffer_size);
23941 
23942 	mboot = kmem_zalloc(buffer_size, KM_SLEEP);
23943 	if ((rval = sd_send_scsi_READ(un, mboot, buffer_size, 0,
23944 	    SD_PATH_STANDARD)) == 0) {
23945 		if (ddi_copyout(mboot, (void *)arg,
23946 		    sizeof (struct mboot), flag) != 0) {
23947 			rval = EFAULT;
23948 		}
23949 	}
23950 	kmem_free(mboot, buffer_size);
23951 	return (rval);
23952 }
23953 
23954 
23955 /*
23956  *    Function: sd_dkio_set_mboot
23957  *
23958  * Description: This routine is the driver entry point for handling user
23959  *		requests to validate and set the device master boot
23960  *		(DKIOCSMBOOT).
23961  *
23962  *   Arguments: dev  - the device number
23963  *		arg  - pointer to user provided mboot structure used to set the
23964  *			master boot.
23965  *		flag - this argument is a pass through to ddi_copyxxx()
23966  *		       directly from the mode argument of ioctl().
23967  *
23968  * Return Code: 0
23969  *		EINVAL
23970  *		EFAULT
23971  *		ENXIO
23972  */
23973 
23974 static int
23975 sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag)
23976 {
23977 	struct sd_lun	*un = NULL;
23978 	struct mboot	*mboot = NULL;
23979 	int		rval;
23980 	ushort_t	magic;
23981 
23982 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23983 		return (ENXIO);
23984 	}
23985 
23986 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23987 
23988 	if (!un->un_f_mboot_supported) {
23989 		return (EINVAL);
23990 	}
23991 
23992 	if (arg == NULL) {
23993 		return (EINVAL);
23994 	}
23995 
23996 	mboot = kmem_zalloc(sizeof (struct mboot), KM_SLEEP);
23997 
23998 	if (ddi_copyin((const void *)arg, mboot,
23999 	    sizeof (struct mboot), flag) != 0) {
24000 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
24001 		return (EFAULT);
24002 	}
24003 
24004 	/* Is this really a master boot record? */
24005 	magic = LE_16(mboot->signature);
24006 	if (magic != MBB_MAGIC) {
24007 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
24008 		return (EINVAL);
24009 	}
24010 
24011 	rval = sd_send_scsi_WRITE(un, mboot, un->un_sys_blocksize, 0,
24012 	    SD_PATH_STANDARD);
24013 
24014 	mutex_enter(SD_MUTEX(un));
24015 #if defined(__i386) || defined(__amd64)
24016 	if (rval == 0) {
24017 		/*
24018 		 * mboot has been written successfully.
24019 		 * update the fdisk and vtoc tables in memory
24020 		 */
24021 		rval = sd_update_fdisk_and_vtoc(un);
24022 		if ((un->un_f_geometry_is_valid == FALSE) || (rval != 0)) {
24023 			mutex_exit(SD_MUTEX(un));
24024 			kmem_free(mboot, (size_t)(sizeof (struct mboot)));
24025 			return (rval);
24026 		}
24027 	}
24028 
24029 #ifdef __lock_lint
24030 	sd_setup_default_geometry(un);
24031 #endif
24032 
24033 #else
24034 	if (rval == 0) {
24035 		/*
24036 		 * mboot has been written successfully.
24037 		 * set up the default geometry and VTOC
24038 		 */
24039 		if (un->un_blockcount <= DK_MAX_BLOCKS)
24040 			sd_setup_default_geometry(un);
24041 	}
24042 #endif
24043 	mutex_exit(SD_MUTEX(un));
24044 	kmem_free(mboot, (size_t)(sizeof (struct mboot)));
24045 	return (rval);
24046 }
24047 
24048 
24049 /*
24050  *    Function: sd_setup_default_geometry
24051  *
24052  * Description: This local utility routine sets the default geometry as part of
24053  *		setting the device mboot.
24054  *
24055  *   Arguments: un - driver soft state (unit) structure
24056  *
24057  * Note: This may be redundant with sd_build_default_label.
24058  */
24059 
24060 static void
24061 sd_setup_default_geometry(struct sd_lun *un)
24062 {
24063 	/* zero out the soft state geometry and partition table. */
24064 	bzero(&un->un_g, sizeof (struct dk_geom));
24065 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
24066 	bzero(un->un_map, NDKMAP * (sizeof (struct dk_map)));
24067 	un->un_asciilabel[0] = '\0';
24068 
24069 	/*
24070 	 * For the rpm, we use the minimum for the disk.
24071 	 * For the head, cyl and number of sector per track,
24072 	 * if the capacity <= 1GB, head = 64, sect = 32.
24073 	 * else head = 255, sect 63
24074 	 * Note: the capacity should be equal to C*H*S values.
24075 	 * This will cause some truncation of size due to
24076 	 * round off errors. For CD-ROMs, this truncation can
24077 	 * have adverse side effects, so returning ncyl and
24078 	 * nhead as 1. The nsect will overflow for most of
24079 	 * CD-ROMs as nsect is of type ushort.
24080 	 */
24081 	if (ISCD(un)) {
24082 		un->un_g.dkg_ncyl = 1;
24083 		un->un_g.dkg_nhead = 1;
24084 		un->un_g.dkg_nsect = un->un_blockcount;
24085 	} else {
24086 		if (un->un_blockcount <= 0x1000) {
24087 			/* Needed for unlabeled SCSI floppies. */
24088 			un->un_g.dkg_nhead = 2;
24089 			un->un_g.dkg_ncyl = 80;
24090 			un->un_g.dkg_pcyl = 80;
24091 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
24092 		} else if (un->un_blockcount <= 0x200000) {
24093 			un->un_g.dkg_nhead = 64;
24094 			un->un_g.dkg_nsect = 32;
24095 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
24096 		} else {
24097 			un->un_g.dkg_nhead = 255;
24098 			un->un_g.dkg_nsect = 63;
24099 			un->un_g.dkg_ncyl = un->un_blockcount / (255 * 63);
24100 		}
24101 		un->un_blockcount = un->un_g.dkg_ncyl *
24102 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
24103 	}
24104 	un->un_g.dkg_acyl = 0;
24105 	un->un_g.dkg_bcyl = 0;
24106 	un->un_g.dkg_intrlv = 1;
24107 	un->un_g.dkg_rpm = 200;
24108 	un->un_g.dkg_read_reinstruct = 0;
24109 	un->un_g.dkg_write_reinstruct = 0;
24110 	if (un->un_g.dkg_pcyl == 0) {
24111 		un->un_g.dkg_pcyl = un->un_g.dkg_ncyl + un->un_g.dkg_acyl;
24112 	}
24113 
24114 	un->un_map['a'-'a'].dkl_cylno = 0;
24115 	un->un_map['a'-'a'].dkl_nblk = un->un_blockcount;
24116 	un->un_map['c'-'a'].dkl_cylno = 0;
24117 	un->un_map['c'-'a'].dkl_nblk = un->un_blockcount;
24118 	un->un_f_geometry_is_valid = FALSE;
24119 }
24120 
24121 
24122 #if defined(__i386) || defined(__amd64)
24123 /*
24124  *    Function: sd_update_fdisk_and_vtoc
24125  *
24126  * Description: This local utility routine updates the device fdisk and vtoc
24127  *		as part of setting the device mboot.
24128  *
24129  *   Arguments: un - driver soft state (unit) structure
24130  *
24131  * Return Code: 0 for success or errno-type return code.
24132  *
24133  *    Note:x86: This looks like a duplicate of sd_validate_geometry(), but
24134  *		these did exist seperately in x86 sd.c!!!
24135  */
24136 
24137 static int
24138 sd_update_fdisk_and_vtoc(struct sd_lun *un)
24139 {
24140 	static char	labelstring[128];
24141 	static char	buf[256];
24142 	char		*label = 0;
24143 	int		count;
24144 	int		label_rc = 0;
24145 	int		gvalid = un->un_f_geometry_is_valid;
24146 	int		fdisk_rval;
24147 	int		lbasize;
24148 	int		capacity;
24149 
24150 	ASSERT(mutex_owned(SD_MUTEX(un)));
24151 
24152 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
24153 		return (EINVAL);
24154 	}
24155 
24156 	if (un->un_f_blockcount_is_valid == FALSE) {
24157 		return (EINVAL);
24158 	}
24159 
24160 #if defined(_SUNOS_VTOC_16)
24161 	/*
24162 	 * Set up the "whole disk" fdisk partition; this should always
24163 	 * exist, regardless of whether the disk contains an fdisk table
24164 	 * or vtoc.
24165 	 */
24166 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
24167 	un->un_map[P0_RAW_DISK].dkl_nblk = un->un_blockcount;
24168 #endif	/* defined(_SUNOS_VTOC_16) */
24169 
24170 	/*
24171 	 * copy the lbasize and capacity so that if they're
24172 	 * reset while we're not holding the SD_MUTEX(un), we will
24173 	 * continue to use valid values after the SD_MUTEX(un) is
24174 	 * reacquired.
24175 	 */
24176 	lbasize  = un->un_tgt_blocksize;
24177 	capacity = un->un_blockcount;
24178 
24179 	/*
24180 	 * refresh the logical and physical geometry caches.
24181 	 * (data from mode sense format/rigid disk geometry pages,
24182 	 * and scsi_ifgetcap("geometry").
24183 	 */
24184 	sd_resync_geom_caches(un, capacity, lbasize, SD_PATH_DIRECT);
24185 
24186 	/*
24187 	 * Only DIRECT ACCESS devices will have Sun labels.
24188 	 * CD's supposedly have a Sun label, too
24189 	 */
24190 	if (un->un_f_vtoc_label_supported) {
24191 		fdisk_rval = sd_read_fdisk(un, capacity, lbasize,
24192 		    SD_PATH_DIRECT);
24193 		if (fdisk_rval == SD_CMD_FAILURE) {
24194 			ASSERT(mutex_owned(SD_MUTEX(un)));
24195 			return (EIO);
24196 		}
24197 
24198 		if (fdisk_rval == SD_CMD_RESERVATION_CONFLICT) {
24199 			ASSERT(mutex_owned(SD_MUTEX(un)));
24200 			return (EACCES);
24201 		}
24202 
24203 		if (un->un_solaris_size <= DK_LABEL_LOC) {
24204 			/*
24205 			 * Found fdisk table but no Solaris partition entry,
24206 			 * so don't call sd_uselabel() and don't create
24207 			 * a default label.
24208 			 */
24209 			label_rc = 0;
24210 			un->un_f_geometry_is_valid = TRUE;
24211 			goto no_solaris_partition;
24212 		}
24213 
24214 #if defined(_SUNOS_VTOC_8)
24215 		label = (char *)un->un_asciilabel;
24216 #elif defined(_SUNOS_VTOC_16)
24217 		label = (char *)un->un_vtoc.v_asciilabel;
24218 #else
24219 #error "No VTOC format defined."
24220 #endif
24221 	} else if (capacity < 0) {
24222 		ASSERT(mutex_owned(SD_MUTEX(un)));
24223 		return (EINVAL);
24224 	}
24225 
24226 	/*
24227 	 * For Removable media We reach here if we have found a
24228 	 * SOLARIS PARTITION.
24229 	 * If un_f_geometry_is_valid is FALSE it indicates that the SOLARIS
24230 	 * PARTITION has changed from the previous one, hence we will setup a
24231 	 * default VTOC in this case.
24232 	 */
24233 	if (un->un_f_geometry_is_valid == FALSE) {
24234 		sd_build_default_label(un);
24235 		label_rc = 0;
24236 	}
24237 
24238 no_solaris_partition:
24239 	if ((!un->un_f_has_removable_media ||
24240 	    (un->un_f_has_removable_media &&
24241 	    un->un_mediastate == DKIO_EJECTED)) &&
24242 		(un->un_state == SD_STATE_NORMAL && !gvalid)) {
24243 		/*
24244 		 * Print out a message indicating who and what we are.
24245 		 * We do this only when we happen to really validate the
24246 		 * geometry. We may call sd_validate_geometry() at other
24247 		 * times, ioctl()'s like Get VTOC in which case we
24248 		 * don't want to print the label.
24249 		 * If the geometry is valid, print the label string,
24250 		 * else print vendor and product info, if available
24251 		 */
24252 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
24253 			SD_INFO(SD_LOG_IOCTL_DKIO, un, "?<%s>\n", label);
24254 		} else {
24255 			mutex_enter(&sd_label_mutex);
24256 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
24257 			    labelstring);
24258 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
24259 			    &labelstring[64]);
24260 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
24261 			    labelstring, &labelstring[64]);
24262 			if (un->un_f_blockcount_is_valid == TRUE) {
24263 				(void) sprintf(&buf[strlen(buf)],
24264 				    ", %" PRIu64 " %u byte blocks\n",
24265 				    un->un_blockcount,
24266 				    un->un_tgt_blocksize);
24267 			} else {
24268 				(void) sprintf(&buf[strlen(buf)],
24269 				    ", (unknown capacity)\n");
24270 			}
24271 			SD_INFO(SD_LOG_IOCTL_DKIO, un, buf);
24272 			mutex_exit(&sd_label_mutex);
24273 		}
24274 	}
24275 
24276 #if defined(_SUNOS_VTOC_16)
24277 	/*
24278 	 * If we have valid geometry, set up the remaining fdisk partitions.
24279 	 * Note that dkl_cylno is not used for the fdisk map entries, so
24280 	 * we set it to an entirely bogus value.
24281 	 */
24282 	for (count = 0; count < FD_NUMPART; count++) {
24283 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
24284 		un->un_map[FDISK_P1 + count].dkl_nblk =
24285 		    un->un_fmap[count].fmap_nblk;
24286 		un->un_offset[FDISK_P1 + count] =
24287 		    un->un_fmap[count].fmap_start;
24288 	}
24289 #endif
24290 
24291 	for (count = 0; count < NDKMAP; count++) {
24292 #if defined(_SUNOS_VTOC_8)
24293 		struct dk_map *lp  = &un->un_map[count];
24294 		un->un_offset[count] =
24295 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
24296 #elif defined(_SUNOS_VTOC_16)
24297 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
24298 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
24299 #else
24300 #error "No VTOC format defined."
24301 #endif
24302 	}
24303 
24304 	ASSERT(mutex_owned(SD_MUTEX(un)));
24305 	return (label_rc);
24306 }
24307 #endif
24308 
24309 
24310 /*
24311  *    Function: sd_check_media
24312  *
24313  * Description: This utility routine implements the functionality for the
24314  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
24315  *		driver state changes from that specified by the user
24316  *		(inserted or ejected). For example, if the user specifies
24317  *		DKIO_EJECTED and the current media state is inserted this
24318  *		routine will immediately return DKIO_INSERTED. However, if the
24319  *		current media state is not inserted the user thread will be
24320  *		blocked until the drive state changes. If DKIO_NONE is specified
24321  *		the user thread will block until a drive state change occurs.
24322  *
24323  *   Arguments: dev  - the device number
24324  *		state  - user pointer to a dkio_state, updated with the current
24325  *			drive state at return.
24326  *
24327  * Return Code: ENXIO
24328  *		EIO
24329  *		EAGAIN
24330  *		EINTR
24331  */
24332 
24333 static int
24334 sd_check_media(dev_t dev, enum dkio_state state)
24335 {
24336 	struct sd_lun		*un = NULL;
24337 	enum dkio_state		prev_state;
24338 	opaque_t		token = NULL;
24339 	int			rval = 0;
24340 
24341 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24342 		return (ENXIO);
24343 	}
24344 
24345 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
24346 
24347 	mutex_enter(SD_MUTEX(un));
24348 
24349 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
24350 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
24351 
24352 	prev_state = un->un_mediastate;
24353 
24354 	/* is there anything to do? */
24355 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
24356 		/*
24357 		 * submit the request to the scsi_watch service;
24358 		 * scsi_media_watch_cb() does the real work
24359 		 */
24360 		mutex_exit(SD_MUTEX(un));
24361 
24362 		/*
24363 		 * This change handles the case where a scsi watch request is
24364 		 * added to a device that is powered down. To accomplish this
24365 		 * we power up the device before adding the scsi watch request,
24366 		 * since the scsi watch sends a TUR directly to the device
24367 		 * which the device cannot handle if it is powered down.
24368 		 */
24369 		if (sd_pm_entry(un) != DDI_SUCCESS) {
24370 			mutex_enter(SD_MUTEX(un));
24371 			goto done;
24372 		}
24373 
24374 		token = scsi_watch_request_submit(SD_SCSI_DEVP(un),
24375 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
24376 		    (caddr_t)dev);
24377 
24378 		sd_pm_exit(un);
24379 
24380 		mutex_enter(SD_MUTEX(un));
24381 		if (token == NULL) {
24382 			rval = EAGAIN;
24383 			goto done;
24384 		}
24385 
24386 		/*
24387 		 * This is a special case IOCTL that doesn't return
24388 		 * until the media state changes. Routine sdpower
24389 		 * knows about and handles this so don't count it
24390 		 * as an active cmd in the driver, which would
24391 		 * keep the device busy to the pm framework.
24392 		 * If the count isn't decremented the device can't
24393 		 * be powered down.
24394 		 */
24395 		un->un_ncmds_in_driver--;
24396 		ASSERT(un->un_ncmds_in_driver >= 0);
24397 
24398 		/*
24399 		 * if a prior request had been made, this will be the same
24400 		 * token, as scsi_watch was designed that way.
24401 		 */
24402 		un->un_swr_token = token;
24403 		un->un_specified_mediastate = state;
24404 
24405 		/*
24406 		 * now wait for media change
24407 		 * we will not be signalled unless mediastate == state but it is
24408 		 * still better to test for this condition, since there is a
24409 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
24410 		 */
24411 		SD_TRACE(SD_LOG_COMMON, un,
24412 		    "sd_check_media: waiting for media state change\n");
24413 		while (un->un_mediastate == state) {
24414 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
24415 				SD_TRACE(SD_LOG_COMMON, un,
24416 				    "sd_check_media: waiting for media state "
24417 				    "was interrupted\n");
24418 				un->un_ncmds_in_driver++;
24419 				rval = EINTR;
24420 				goto done;
24421 			}
24422 			SD_TRACE(SD_LOG_COMMON, un,
24423 			    "sd_check_media: received signal, state=%x\n",
24424 			    un->un_mediastate);
24425 		}
24426 		/*
24427 		 * Inc the counter to indicate the device once again
24428 		 * has an active outstanding cmd.
24429 		 */
24430 		un->un_ncmds_in_driver++;
24431 	}
24432 
24433 	/* invalidate geometry */
24434 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
24435 		sr_ejected(un);
24436 	}
24437 
24438 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
24439 		uint64_t	capacity;
24440 		uint_t		lbasize;
24441 
24442 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
24443 		mutex_exit(SD_MUTEX(un));
24444 		/*
24445 		 * Since the following routines use SD_PATH_DIRECT, we must
24446 		 * call PM directly before the upcoming disk accesses. This
24447 		 * may cause the disk to be power/spin up.
24448 		 */
24449 
24450 		if (sd_pm_entry(un) == DDI_SUCCESS) {
24451 			rval = sd_send_scsi_READ_CAPACITY(un,
24452 			    &capacity,
24453 			    &lbasize, SD_PATH_DIRECT);
24454 			if (rval != 0) {
24455 				sd_pm_exit(un);
24456 				mutex_enter(SD_MUTEX(un));
24457 				goto done;
24458 			}
24459 		} else {
24460 			rval = EIO;
24461 			mutex_enter(SD_MUTEX(un));
24462 			goto done;
24463 		}
24464 		mutex_enter(SD_MUTEX(un));
24465 
24466 		sd_update_block_info(un, lbasize, capacity);
24467 
24468 		un->un_f_geometry_is_valid	= FALSE;
24469 		(void) sd_validate_geometry(un, SD_PATH_DIRECT);
24470 
24471 		mutex_exit(SD_MUTEX(un));
24472 		rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
24473 		    SD_PATH_DIRECT);
24474 		sd_pm_exit(un);
24475 
24476 		mutex_enter(SD_MUTEX(un));
24477 	}
24478 done:
24479 	un->un_f_watcht_stopped = FALSE;
24480 	if (un->un_swr_token) {
24481 		/*
24482 		 * Use of this local token and the mutex ensures that we avoid
24483 		 * some race conditions associated with terminating the
24484 		 * scsi watch.
24485 		 */
24486 		token = un->un_swr_token;
24487 		un->un_swr_token = (opaque_t)NULL;
24488 		mutex_exit(SD_MUTEX(un));
24489 		(void) scsi_watch_request_terminate(token,
24490 		    SCSI_WATCH_TERMINATE_WAIT);
24491 		mutex_enter(SD_MUTEX(un));
24492 	}
24493 
24494 	/*
24495 	 * Update the capacity kstat value, if no media previously
24496 	 * (capacity kstat is 0) and a media has been inserted
24497 	 * (un_f_blockcount_is_valid == TRUE)
24498 	 */
24499 	if (un->un_errstats) {
24500 		struct sd_errstats	*stp = NULL;
24501 
24502 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
24503 		if ((stp->sd_capacity.value.ui64 == 0) &&
24504 		    (un->un_f_blockcount_is_valid == TRUE)) {
24505 			stp->sd_capacity.value.ui64 =
24506 			    (uint64_t)((uint64_t)un->un_blockcount *
24507 			    un->un_sys_blocksize);
24508 		}
24509 	}
24510 	mutex_exit(SD_MUTEX(un));
24511 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
24512 	return (rval);
24513 }
24514 
24515 
24516 /*
24517  *    Function: sd_delayed_cv_broadcast
24518  *
24519  * Description: Delayed cv_broadcast to allow for target to recover from media
24520  *		insertion.
24521  *
24522  *   Arguments: arg - driver soft state (unit) structure
24523  */
24524 
24525 static void
24526 sd_delayed_cv_broadcast(void *arg)
24527 {
24528 	struct sd_lun *un = arg;
24529 
24530 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
24531 
24532 	mutex_enter(SD_MUTEX(un));
24533 	un->un_dcvb_timeid = NULL;
24534 	cv_broadcast(&un->un_state_cv);
24535 	mutex_exit(SD_MUTEX(un));
24536 }
24537 
24538 
24539 /*
24540  *    Function: sd_media_watch_cb
24541  *
24542  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
24543  *		routine processes the TUR sense data and updates the driver
24544  *		state if a transition has occurred. The user thread
24545  *		(sd_check_media) is then signalled.
24546  *
24547  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24548  *			among multiple watches that share this callback function
24549  *		resultp - scsi watch facility result packet containing scsi
24550  *			  packet, status byte and sense data
24551  *
24552  * Return Code: 0 for success, -1 for failure
24553  */
24554 
24555 static int
24556 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
24557 {
24558 	struct sd_lun			*un;
24559 	struct scsi_status		*statusp = resultp->statusp;
24560 	uint8_t				*sensep = (uint8_t *)resultp->sensep;
24561 	enum dkio_state			state = DKIO_NONE;
24562 	dev_t				dev = (dev_t)arg;
24563 	uchar_t				actual_sense_length;
24564 	uint8_t				skey, asc, ascq;
24565 
24566 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24567 		return (-1);
24568 	}
24569 	actual_sense_length = resultp->actual_sense_length;
24570 
24571 	mutex_enter(SD_MUTEX(un));
24572 	SD_TRACE(SD_LOG_COMMON, un,
24573 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
24574 	    *((char *)statusp), (void *)sensep, actual_sense_length);
24575 
24576 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
24577 		un->un_mediastate = DKIO_DEV_GONE;
24578 		cv_broadcast(&un->un_state_cv);
24579 		mutex_exit(SD_MUTEX(un));
24580 
24581 		return (0);
24582 	}
24583 
24584 	/*
24585 	 * If there was a check condition then sensep points to valid sense data
24586 	 * If status was not a check condition but a reservation or busy status
24587 	 * then the new state is DKIO_NONE
24588 	 */
24589 	if (sensep != NULL) {
24590 		skey = scsi_sense_key(sensep);
24591 		asc = scsi_sense_asc(sensep);
24592 		ascq = scsi_sense_ascq(sensep);
24593 
24594 		SD_INFO(SD_LOG_COMMON, un,
24595 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
24596 		    skey, asc, ascq);
24597 		/* This routine only uses up to 13 bytes of sense data. */
24598 		if (actual_sense_length >= 13) {
24599 			if (skey == KEY_UNIT_ATTENTION) {
24600 				if (asc == 0x28) {
24601 					state = DKIO_INSERTED;
24602 				}
24603 			} else {
24604 				/*
24605 				 * if 02/04/02  means that the host
24606 				 * should send start command. Explicitly
24607 				 * leave the media state as is
24608 				 * (inserted) as the media is inserted
24609 				 * and host has stopped device for PM
24610 				 * reasons. Upon next true read/write
24611 				 * to this media will bring the
24612 				 * device to the right state good for
24613 				 * media access.
24614 				 */
24615 				if ((skey == KEY_NOT_READY) &&
24616 				    (asc == 0x3a)) {
24617 					state = DKIO_EJECTED;
24618 				}
24619 
24620 				/*
24621 				 * If the drivge is busy with an operation
24622 				 * or long write, keep the media in an
24623 				 * inserted state.
24624 				 */
24625 
24626 				if ((skey == KEY_NOT_READY) &&
24627 				    (asc == 0x04) &&
24628 				    ((ascq == 0x02) ||
24629 				    (ascq == 0x07) ||
24630 				    (ascq == 0x08))) {
24631 					state = DKIO_INSERTED;
24632 				}
24633 			}
24634 		}
24635 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
24636 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
24637 		state = DKIO_INSERTED;
24638 	}
24639 
24640 	SD_TRACE(SD_LOG_COMMON, un,
24641 	    "sd_media_watch_cb: state=%x, specified=%x\n",
24642 	    state, un->un_specified_mediastate);
24643 
24644 	/*
24645 	 * now signal the waiting thread if this is *not* the specified state;
24646 	 * delay the signal if the state is DKIO_INSERTED to allow the target
24647 	 * to recover
24648 	 */
24649 	if (state != un->un_specified_mediastate) {
24650 		un->un_mediastate = state;
24651 		if (state == DKIO_INSERTED) {
24652 			/*
24653 			 * delay the signal to give the drive a chance
24654 			 * to do what it apparently needs to do
24655 			 */
24656 			SD_TRACE(SD_LOG_COMMON, un,
24657 			    "sd_media_watch_cb: delayed cv_broadcast\n");
24658 			if (un->un_dcvb_timeid == NULL) {
24659 				un->un_dcvb_timeid =
24660 				    timeout(sd_delayed_cv_broadcast, un,
24661 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
24662 			}
24663 		} else {
24664 			SD_TRACE(SD_LOG_COMMON, un,
24665 			    "sd_media_watch_cb: immediate cv_broadcast\n");
24666 			cv_broadcast(&un->un_state_cv);
24667 		}
24668 	}
24669 	mutex_exit(SD_MUTEX(un));
24670 	return (0);
24671 }
24672 
24673 
24674 /*
24675  *    Function: sd_dkio_get_temp
24676  *
24677  * Description: This routine is the driver entry point for handling ioctl
24678  *		requests to get the disk temperature.
24679  *
24680  *   Arguments: dev  - the device number
24681  *		arg  - pointer to user provided dk_temperature structure.
24682  *		flag - this argument is a pass through to ddi_copyxxx()
24683  *		       directly from the mode argument of ioctl().
24684  *
24685  * Return Code: 0
24686  *		EFAULT
24687  *		ENXIO
24688  *		EAGAIN
24689  */
24690 
24691 static int
24692 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
24693 {
24694 	struct sd_lun		*un = NULL;
24695 	struct dk_temperature	*dktemp = NULL;
24696 	uchar_t			*temperature_page;
24697 	int			rval = 0;
24698 	int			path_flag = SD_PATH_STANDARD;
24699 
24700 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24701 		return (ENXIO);
24702 	}
24703 
24704 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
24705 
24706 	/* copyin the disk temp argument to get the user flags */
24707 	if (ddi_copyin((void *)arg, dktemp,
24708 	    sizeof (struct dk_temperature), flag) != 0) {
24709 		rval = EFAULT;
24710 		goto done;
24711 	}
24712 
24713 	/* Initialize the temperature to invalid. */
24714 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24715 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24716 
24717 	/*
24718 	 * Note: Investigate removing the "bypass pm" semantic.
24719 	 * Can we just bypass PM always?
24720 	 */
24721 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
24722 		path_flag = SD_PATH_DIRECT;
24723 		ASSERT(!mutex_owned(&un->un_pm_mutex));
24724 		mutex_enter(&un->un_pm_mutex);
24725 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
24726 			/*
24727 			 * If DKT_BYPASS_PM is set, and the drive happens to be
24728 			 * in low power mode, we can not wake it up, Need to
24729 			 * return EAGAIN.
24730 			 */
24731 			mutex_exit(&un->un_pm_mutex);
24732 			rval = EAGAIN;
24733 			goto done;
24734 		} else {
24735 			/*
24736 			 * Indicate to PM the device is busy. This is required
24737 			 * to avoid a race - i.e. the ioctl is issuing a
24738 			 * command and the pm framework brings down the device
24739 			 * to low power mode (possible power cut-off on some
24740 			 * platforms).
24741 			 */
24742 			mutex_exit(&un->un_pm_mutex);
24743 			if (sd_pm_entry(un) != DDI_SUCCESS) {
24744 				rval = EAGAIN;
24745 				goto done;
24746 			}
24747 		}
24748 	}
24749 
24750 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
24751 
24752 	if ((rval = sd_send_scsi_LOG_SENSE(un, temperature_page,
24753 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag)) != 0) {
24754 		goto done2;
24755 	}
24756 
24757 	/*
24758 	 * For the current temperature verify that the parameter length is 0x02
24759 	 * and the parameter code is 0x00
24760 	 */
24761 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
24762 	    (temperature_page[5] == 0x00)) {
24763 		if (temperature_page[9] == 0xFF) {
24764 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24765 		} else {
24766 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
24767 		}
24768 	}
24769 
24770 	/*
24771 	 * For the reference temperature verify that the parameter
24772 	 * length is 0x02 and the parameter code is 0x01
24773 	 */
24774 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
24775 	    (temperature_page[11] == 0x01)) {
24776 		if (temperature_page[15] == 0xFF) {
24777 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24778 		} else {
24779 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
24780 		}
24781 	}
24782 
24783 	/* Do the copyout regardless of the temperature commands status. */
24784 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
24785 	    flag) != 0) {
24786 		rval = EFAULT;
24787 	}
24788 
24789 done2:
24790 	if (path_flag == SD_PATH_DIRECT) {
24791 		sd_pm_exit(un);
24792 	}
24793 
24794 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
24795 done:
24796 	if (dktemp != NULL) {
24797 		kmem_free(dktemp, sizeof (struct dk_temperature));
24798 	}
24799 
24800 	return (rval);
24801 }
24802 
24803 
24804 /*
24805  *    Function: sd_log_page_supported
24806  *
24807  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
24808  *		supported log pages.
24809  *
24810  *   Arguments: un -
24811  *		log_page -
24812  *
24813  * Return Code: -1 - on error (log sense is optional and may not be supported).
24814  *		0  - log page not found.
24815  *  		1  - log page found.
24816  */
24817 
24818 static int
24819 sd_log_page_supported(struct sd_lun *un, int log_page)
24820 {
24821 	uchar_t *log_page_data;
24822 	int	i;
24823 	int	match = 0;
24824 	int	log_size;
24825 
24826 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
24827 
24828 	if (sd_send_scsi_LOG_SENSE(un, log_page_data, 0xFF, 0, 0x01, 0,
24829 	    SD_PATH_DIRECT) != 0) {
24830 		SD_ERROR(SD_LOG_COMMON, un,
24831 		    "sd_log_page_supported: failed log page retrieval\n");
24832 		kmem_free(log_page_data, 0xFF);
24833 		return (-1);
24834 	}
24835 	log_size = log_page_data[3];
24836 
24837 	/*
24838 	 * The list of supported log pages start from the fourth byte. Check
24839 	 * until we run out of log pages or a match is found.
24840 	 */
24841 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
24842 		if (log_page_data[i] == log_page) {
24843 			match++;
24844 		}
24845 	}
24846 	kmem_free(log_page_data, 0xFF);
24847 	return (match);
24848 }
24849 
24850 
24851 /*
24852  *    Function: sd_mhdioc_failfast
24853  *
24854  * Description: This routine is the driver entry point for handling ioctl
24855  *		requests to enable/disable the multihost failfast option.
24856  *		(MHIOCENFAILFAST)
24857  *
24858  *   Arguments: dev	- the device number
24859  *		arg	- user specified probing interval.
24860  *		flag	- this argument is a pass through to ddi_copyxxx()
24861  *			  directly from the mode argument of ioctl().
24862  *
24863  * Return Code: 0
24864  *		EFAULT
24865  *		ENXIO
24866  */
24867 
24868 static int
24869 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
24870 {
24871 	struct sd_lun	*un = NULL;
24872 	int		mh_time;
24873 	int		rval = 0;
24874 
24875 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24876 		return (ENXIO);
24877 	}
24878 
24879 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
24880 		return (EFAULT);
24881 
24882 	if (mh_time) {
24883 		mutex_enter(SD_MUTEX(un));
24884 		un->un_resvd_status |= SD_FAILFAST;
24885 		mutex_exit(SD_MUTEX(un));
24886 		/*
24887 		 * If mh_time is INT_MAX, then this ioctl is being used for
24888 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
24889 		 */
24890 		if (mh_time != INT_MAX) {
24891 			rval = sd_check_mhd(dev, mh_time);
24892 		}
24893 	} else {
24894 		(void) sd_check_mhd(dev, 0);
24895 		mutex_enter(SD_MUTEX(un));
24896 		un->un_resvd_status &= ~SD_FAILFAST;
24897 		mutex_exit(SD_MUTEX(un));
24898 	}
24899 	return (rval);
24900 }
24901 
24902 
24903 /*
24904  *    Function: sd_mhdioc_takeown
24905  *
24906  * Description: This routine is the driver entry point for handling ioctl
24907  *		requests to forcefully acquire exclusive access rights to the
24908  *		multihost disk (MHIOCTKOWN).
24909  *
24910  *   Arguments: dev	- the device number
24911  *		arg	- user provided structure specifying the delay
24912  *			  parameters in milliseconds
24913  *		flag	- this argument is a pass through to ddi_copyxxx()
24914  *			  directly from the mode argument of ioctl().
24915  *
24916  * Return Code: 0
24917  *		EFAULT
24918  *		ENXIO
24919  */
24920 
24921 static int
24922 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
24923 {
24924 	struct sd_lun		*un = NULL;
24925 	struct mhioctkown	*tkown = NULL;
24926 	int			rval = 0;
24927 
24928 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24929 		return (ENXIO);
24930 	}
24931 
24932 	if (arg != NULL) {
24933 		tkown = (struct mhioctkown *)
24934 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
24935 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
24936 		if (rval != 0) {
24937 			rval = EFAULT;
24938 			goto error;
24939 		}
24940 	}
24941 
24942 	rval = sd_take_ownership(dev, tkown);
24943 	mutex_enter(SD_MUTEX(un));
24944 	if (rval == 0) {
24945 		un->un_resvd_status |= SD_RESERVE;
24946 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
24947 			sd_reinstate_resv_delay =
24948 			    tkown->reinstate_resv_delay * 1000;
24949 		} else {
24950 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
24951 		}
24952 		/*
24953 		 * Give the scsi_watch routine interval set by
24954 		 * the MHIOCENFAILFAST ioctl precedence here.
24955 		 */
24956 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
24957 			mutex_exit(SD_MUTEX(un));
24958 			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
24959 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
24960 			    "sd_mhdioc_takeown : %d\n",
24961 			    sd_reinstate_resv_delay);
24962 		} else {
24963 			mutex_exit(SD_MUTEX(un));
24964 		}
24965 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
24966 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24967 	} else {
24968 		un->un_resvd_status &= ~SD_RESERVE;
24969 		mutex_exit(SD_MUTEX(un));
24970 	}
24971 
24972 error:
24973 	if (tkown != NULL) {
24974 		kmem_free(tkown, sizeof (struct mhioctkown));
24975 	}
24976 	return (rval);
24977 }
24978 
24979 
24980 /*
24981  *    Function: sd_mhdioc_release
24982  *
24983  * Description: This routine is the driver entry point for handling ioctl
24984  *		requests to release exclusive access rights to the multihost
24985  *		disk (MHIOCRELEASE).
24986  *
24987  *   Arguments: dev	- the device number
24988  *
24989  * Return Code: 0
24990  *		ENXIO
24991  */
24992 
24993 static int
24994 sd_mhdioc_release(dev_t dev)
24995 {
24996 	struct sd_lun		*un = NULL;
24997 	timeout_id_t		resvd_timeid_save;
24998 	int			resvd_status_save;
24999 	int			rval = 0;
25000 
25001 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25002 		return (ENXIO);
25003 	}
25004 
25005 	mutex_enter(SD_MUTEX(un));
25006 	resvd_status_save = un->un_resvd_status;
25007 	un->un_resvd_status &=
25008 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
25009 	if (un->un_resvd_timeid) {
25010 		resvd_timeid_save = un->un_resvd_timeid;
25011 		un->un_resvd_timeid = NULL;
25012 		mutex_exit(SD_MUTEX(un));
25013 		(void) untimeout(resvd_timeid_save);
25014 	} else {
25015 		mutex_exit(SD_MUTEX(un));
25016 	}
25017 
25018 	/*
25019 	 * destroy any pending timeout thread that may be attempting to
25020 	 * reinstate reservation on this device.
25021 	 */
25022 	sd_rmv_resv_reclaim_req(dev);
25023 
25024 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
25025 		mutex_enter(SD_MUTEX(un));
25026 		if ((un->un_mhd_token) &&
25027 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
25028 			mutex_exit(SD_MUTEX(un));
25029 			(void) sd_check_mhd(dev, 0);
25030 		} else {
25031 			mutex_exit(SD_MUTEX(un));
25032 		}
25033 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
25034 		    sd_mhd_reset_notify_cb, (caddr_t)un);
25035 	} else {
25036 		/*
25037 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
25038 		 */
25039 		mutex_enter(SD_MUTEX(un));
25040 		un->un_resvd_status = resvd_status_save;
25041 		mutex_exit(SD_MUTEX(un));
25042 	}
25043 	return (rval);
25044 }
25045 
25046 
25047 /*
25048  *    Function: sd_mhdioc_register_devid
25049  *
25050  * Description: This routine is the driver entry point for handling ioctl
25051  *		requests to register the device id (MHIOCREREGISTERDEVID).
25052  *
25053  *		Note: The implementation for this ioctl has been updated to
25054  *		be consistent with the original PSARC case (1999/357)
25055  *		(4375899, 4241671, 4220005)
25056  *
25057  *   Arguments: dev	- the device number
25058  *
25059  * Return Code: 0
25060  *		ENXIO
25061  */
25062 
25063 static int
25064 sd_mhdioc_register_devid(dev_t dev)
25065 {
25066 	struct sd_lun	*un = NULL;
25067 	int		rval = 0;
25068 
25069 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25070 		return (ENXIO);
25071 	}
25072 
25073 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25074 
25075 	mutex_enter(SD_MUTEX(un));
25076 
25077 	/* If a devid already exists, de-register it */
25078 	if (un->un_devid != NULL) {
25079 		ddi_devid_unregister(SD_DEVINFO(un));
25080 		/*
25081 		 * After unregister devid, needs to free devid memory
25082 		 */
25083 		ddi_devid_free(un->un_devid);
25084 		un->un_devid = NULL;
25085 	}
25086 
25087 	/* Check for reservation conflict */
25088 	mutex_exit(SD_MUTEX(un));
25089 	rval = sd_send_scsi_TEST_UNIT_READY(un, 0);
25090 	mutex_enter(SD_MUTEX(un));
25091 
25092 	switch (rval) {
25093 	case 0:
25094 		sd_register_devid(un, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
25095 		break;
25096 	case EACCES:
25097 		break;
25098 	default:
25099 		rval = EIO;
25100 	}
25101 
25102 	mutex_exit(SD_MUTEX(un));
25103 	return (rval);
25104 }
25105 
25106 
25107 /*
25108  *    Function: sd_mhdioc_inkeys
25109  *
25110  * Description: This routine is the driver entry point for handling ioctl
25111  *		requests to issue the SCSI-3 Persistent In Read Keys command
25112  *		to the device (MHIOCGRP_INKEYS).
25113  *
25114  *   Arguments: dev	- the device number
25115  *		arg	- user provided in_keys structure
25116  *		flag	- this argument is a pass through to ddi_copyxxx()
25117  *			  directly from the mode argument of ioctl().
25118  *
25119  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
25120  *		ENXIO
25121  *		EFAULT
25122  */
25123 
25124 static int
25125 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
25126 {
25127 	struct sd_lun		*un;
25128 	mhioc_inkeys_t		inkeys;
25129 	int			rval = 0;
25130 
25131 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25132 		return (ENXIO);
25133 	}
25134 
25135 #ifdef _MULTI_DATAMODEL
25136 	switch (ddi_model_convert_from(flag & FMODELS)) {
25137 	case DDI_MODEL_ILP32: {
25138 		struct mhioc_inkeys32	inkeys32;
25139 
25140 		if (ddi_copyin(arg, &inkeys32,
25141 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
25142 			return (EFAULT);
25143 		}
25144 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
25145 		if ((rval = sd_persistent_reservation_in_read_keys(un,
25146 		    &inkeys, flag)) != 0) {
25147 			return (rval);
25148 		}
25149 		inkeys32.generation = inkeys.generation;
25150 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
25151 		    flag) != 0) {
25152 			return (EFAULT);
25153 		}
25154 		break;
25155 	}
25156 	case DDI_MODEL_NONE:
25157 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
25158 		    flag) != 0) {
25159 			return (EFAULT);
25160 		}
25161 		if ((rval = sd_persistent_reservation_in_read_keys(un,
25162 		    &inkeys, flag)) != 0) {
25163 			return (rval);
25164 		}
25165 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
25166 		    flag) != 0) {
25167 			return (EFAULT);
25168 		}
25169 		break;
25170 	}
25171 
25172 #else /* ! _MULTI_DATAMODEL */
25173 
25174 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
25175 		return (EFAULT);
25176 	}
25177 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
25178 	if (rval != 0) {
25179 		return (rval);
25180 	}
25181 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
25182 		return (EFAULT);
25183 	}
25184 
25185 #endif /* _MULTI_DATAMODEL */
25186 
25187 	return (rval);
25188 }
25189 
25190 
25191 /*
25192  *    Function: sd_mhdioc_inresv
25193  *
25194  * Description: This routine is the driver entry point for handling ioctl
25195  *		requests to issue the SCSI-3 Persistent In Read Reservations
25196  *		command to the device (MHIOCGRP_INKEYS).
25197  *
25198  *   Arguments: dev	- the device number
25199  *		arg	- user provided in_resv structure
25200  *		flag	- this argument is a pass through to ddi_copyxxx()
25201  *			  directly from the mode argument of ioctl().
25202  *
25203  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
25204  *		ENXIO
25205  *		EFAULT
25206  */
25207 
25208 static int
25209 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
25210 {
25211 	struct sd_lun		*un;
25212 	mhioc_inresvs_t		inresvs;
25213 	int			rval = 0;
25214 
25215 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25216 		return (ENXIO);
25217 	}
25218 
25219 #ifdef _MULTI_DATAMODEL
25220 
25221 	switch (ddi_model_convert_from(flag & FMODELS)) {
25222 	case DDI_MODEL_ILP32: {
25223 		struct mhioc_inresvs32	inresvs32;
25224 
25225 		if (ddi_copyin(arg, &inresvs32,
25226 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
25227 			return (EFAULT);
25228 		}
25229 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
25230 		if ((rval = sd_persistent_reservation_in_read_resv(un,
25231 		    &inresvs, flag)) != 0) {
25232 			return (rval);
25233 		}
25234 		inresvs32.generation = inresvs.generation;
25235 		if (ddi_copyout(&inresvs32, arg,
25236 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
25237 			return (EFAULT);
25238 		}
25239 		break;
25240 	}
25241 	case DDI_MODEL_NONE:
25242 		if (ddi_copyin(arg, &inresvs,
25243 		    sizeof (mhioc_inresvs_t), flag) != 0) {
25244 			return (EFAULT);
25245 		}
25246 		if ((rval = sd_persistent_reservation_in_read_resv(un,
25247 		    &inresvs, flag)) != 0) {
25248 			return (rval);
25249 		}
25250 		if (ddi_copyout(&inresvs, arg,
25251 		    sizeof (mhioc_inresvs_t), flag) != 0) {
25252 			return (EFAULT);
25253 		}
25254 		break;
25255 	}
25256 
25257 #else /* ! _MULTI_DATAMODEL */
25258 
25259 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
25260 		return (EFAULT);
25261 	}
25262 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
25263 	if (rval != 0) {
25264 		return (rval);
25265 	}
25266 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
25267 		return (EFAULT);
25268 	}
25269 
25270 #endif /* ! _MULTI_DATAMODEL */
25271 
25272 	return (rval);
25273 }
25274 
25275 
25276 /*
25277  * The following routines support the clustering functionality described below
25278  * and implement lost reservation reclaim functionality.
25279  *
25280  * Clustering
25281  * ----------
25282  * The clustering code uses two different, independent forms of SCSI
25283  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
25284  * Persistent Group Reservations. For any particular disk, it will use either
25285  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
25286  *
25287  * SCSI-2
25288  * The cluster software takes ownership of a multi-hosted disk by issuing the
25289  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
25290  * MHIOCRELEASE ioctl.Closely related is the MHIOCENFAILFAST ioctl -- a cluster,
25291  * just after taking ownership of the disk with the MHIOCTKOWN ioctl then issues
25292  * the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the driver. The
25293  * meaning of failfast is that if the driver (on this host) ever encounters the
25294  * scsi error return code RESERVATION_CONFLICT from the device, it should
25295  * immediately panic the host. The motivation for this ioctl is that if this
25296  * host does encounter reservation conflict, the underlying cause is that some
25297  * other host of the cluster has decided that this host is no longer in the
25298  * cluster and has seized control of the disks for itself. Since this host is no
25299  * longer in the cluster, it ought to panic itself. The MHIOCENFAILFAST ioctl
25300  * does two things:
25301  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
25302  *      error to panic the host
25303  *      (b) it sets up a periodic timer to test whether this host still has
25304  *      "access" (in that no other host has reserved the device):  if the
25305  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
25306  *      purpose of that periodic timer is to handle scenarios where the host is
25307  *      otherwise temporarily quiescent, temporarily doing no real i/o.
25308  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
25309  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
25310  * the device itself.
25311  *
25312  * SCSI-3 PGR
25313  * A direct semantic implementation of the SCSI-3 Persistent Reservation
25314  * facility is supported through the shared multihost disk ioctls
25315  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
25316  * MHIOCGRP_PREEMPTANDABORT)
25317  *
25318  * Reservation Reclaim:
25319  * --------------------
25320  * To support the lost reservation reclaim operations this driver creates a
25321  * single thread to handle reinstating reservations on all devices that have
25322  * lost reservations sd_resv_reclaim_requests are logged for all devices that
25323  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
25324  * and the reservation reclaim thread loops through the requests to regain the
25325  * lost reservations.
25326  */
25327 
25328 /*
25329  *    Function: sd_check_mhd()
25330  *
25331  * Description: This function sets up and submits a scsi watch request or
25332  *		terminates an existing watch request. This routine is used in
25333  *		support of reservation reclaim.
25334  *
25335  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
25336  *			 among multiple watches that share the callback function
25337  *		interval - the number of microseconds specifying the watch
25338  *			   interval for issuing TEST UNIT READY commands. If
25339  *			   set to 0 the watch should be terminated. If the
25340  *			   interval is set to 0 and if the device is required
25341  *			   to hold reservation while disabling failfast, the
25342  *			   watch is restarted with an interval of
25343  *			   reinstate_resv_delay.
25344  *
25345  * Return Code: 0	   - Successful submit/terminate of scsi watch request
25346  *		ENXIO      - Indicates an invalid device was specified
25347  *		EAGAIN     - Unable to submit the scsi watch request
25348  */
25349 
25350 static int
25351 sd_check_mhd(dev_t dev, int interval)
25352 {
25353 	struct sd_lun	*un;
25354 	opaque_t	token;
25355 
25356 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25357 		return (ENXIO);
25358 	}
25359 
25360 	/* is this a watch termination request? */
25361 	if (interval == 0) {
25362 		mutex_enter(SD_MUTEX(un));
25363 		/* if there is an existing watch task then terminate it */
25364 		if (un->un_mhd_token) {
25365 			token = un->un_mhd_token;
25366 			un->un_mhd_token = NULL;
25367 			mutex_exit(SD_MUTEX(un));
25368 			(void) scsi_watch_request_terminate(token,
25369 			    SCSI_WATCH_TERMINATE_WAIT);
25370 			mutex_enter(SD_MUTEX(un));
25371 		} else {
25372 			mutex_exit(SD_MUTEX(un));
25373 			/*
25374 			 * Note: If we return here we don't check for the
25375 			 * failfast case. This is the original legacy
25376 			 * implementation but perhaps we should be checking
25377 			 * the failfast case.
25378 			 */
25379 			return (0);
25380 		}
25381 		/*
25382 		 * If the device is required to hold reservation while
25383 		 * disabling failfast, we need to restart the scsi_watch
25384 		 * routine with an interval of reinstate_resv_delay.
25385 		 */
25386 		if (un->un_resvd_status & SD_RESERVE) {
25387 			interval = sd_reinstate_resv_delay/1000;
25388 		} else {
25389 			/* no failfast so bail */
25390 			mutex_exit(SD_MUTEX(un));
25391 			return (0);
25392 		}
25393 		mutex_exit(SD_MUTEX(un));
25394 	}
25395 
25396 	/*
25397 	 * adjust minimum time interval to 1 second,
25398 	 * and convert from msecs to usecs
25399 	 */
25400 	if (interval > 0 && interval < 1000) {
25401 		interval = 1000;
25402 	}
25403 	interval *= 1000;
25404 
25405 	/*
25406 	 * submit the request to the scsi_watch service
25407 	 */
25408 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
25409 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
25410 	if (token == NULL) {
25411 		return (EAGAIN);
25412 	}
25413 
25414 	/*
25415 	 * save token for termination later on
25416 	 */
25417 	mutex_enter(SD_MUTEX(un));
25418 	un->un_mhd_token = token;
25419 	mutex_exit(SD_MUTEX(un));
25420 	return (0);
25421 }
25422 
25423 
25424 /*
25425  *    Function: sd_mhd_watch_cb()
25426  *
25427  * Description: This function is the call back function used by the scsi watch
25428  *		facility. The scsi watch facility sends the "Test Unit Ready"
25429  *		and processes the status. If applicable (i.e. a "Unit Attention"
25430  *		status and automatic "Request Sense" not used) the scsi watch
25431  *		facility will send a "Request Sense" and retrieve the sense data
25432  *		to be passed to this callback function. In either case the
25433  *		automatic "Request Sense" or the facility submitting one, this
25434  *		callback is passed the status and sense data.
25435  *
25436  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25437  *			among multiple watches that share this callback function
25438  *		resultp - scsi watch facility result packet containing scsi
25439  *			  packet, status byte and sense data
25440  *
25441  * Return Code: 0 - continue the watch task
25442  *		non-zero - terminate the watch task
25443  */
25444 
25445 static int
25446 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
25447 {
25448 	struct sd_lun			*un;
25449 	struct scsi_status		*statusp;
25450 	uint8_t				*sensep;
25451 	struct scsi_pkt			*pkt;
25452 	uchar_t				actual_sense_length;
25453 	dev_t  				dev = (dev_t)arg;
25454 
25455 	ASSERT(resultp != NULL);
25456 	statusp			= resultp->statusp;
25457 	sensep			= (uint8_t *)resultp->sensep;
25458 	pkt			= resultp->pkt;
25459 	actual_sense_length	= resultp->actual_sense_length;
25460 
25461 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25462 		return (ENXIO);
25463 	}
25464 
25465 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25466 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
25467 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
25468 
25469 	/* Begin processing of the status and/or sense data */
25470 	if (pkt->pkt_reason != CMD_CMPLT) {
25471 		/* Handle the incomplete packet */
25472 		sd_mhd_watch_incomplete(un, pkt);
25473 		return (0);
25474 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
25475 		if (*((unsigned char *)statusp)
25476 		    == STATUS_RESERVATION_CONFLICT) {
25477 			/*
25478 			 * Handle a reservation conflict by panicking if
25479 			 * configured for failfast or by logging the conflict
25480 			 * and updating the reservation status
25481 			 */
25482 			mutex_enter(SD_MUTEX(un));
25483 			if ((un->un_resvd_status & SD_FAILFAST) &&
25484 			    (sd_failfast_enable)) {
25485 				sd_panic_for_res_conflict(un);
25486 				/*NOTREACHED*/
25487 			}
25488 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25489 			    "sd_mhd_watch_cb: Reservation Conflict\n");
25490 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
25491 			mutex_exit(SD_MUTEX(un));
25492 		}
25493 	}
25494 
25495 	if (sensep != NULL) {
25496 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
25497 			mutex_enter(SD_MUTEX(un));
25498 			if ((scsi_sense_asc(sensep) ==
25499 			    SD_SCSI_RESET_SENSE_CODE) &&
25500 			    (un->un_resvd_status & SD_RESERVE)) {
25501 				/*
25502 				 * The additional sense code indicates a power
25503 				 * on or bus device reset has occurred; update
25504 				 * the reservation status.
25505 				 */
25506 				un->un_resvd_status |=
25507 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25508 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25509 				    "sd_mhd_watch_cb: Lost Reservation\n");
25510 			}
25511 		} else {
25512 			return (0);
25513 		}
25514 	} else {
25515 		mutex_enter(SD_MUTEX(un));
25516 	}
25517 
25518 	if ((un->un_resvd_status & SD_RESERVE) &&
25519 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
25520 		if (un->un_resvd_status & SD_WANT_RESERVE) {
25521 			/*
25522 			 * A reset occurred in between the last probe and this
25523 			 * one so if a timeout is pending cancel it.
25524 			 */
25525 			if (un->un_resvd_timeid) {
25526 				timeout_id_t temp_id = un->un_resvd_timeid;
25527 				un->un_resvd_timeid = NULL;
25528 				mutex_exit(SD_MUTEX(un));
25529 				(void) untimeout(temp_id);
25530 				mutex_enter(SD_MUTEX(un));
25531 			}
25532 			un->un_resvd_status &= ~SD_WANT_RESERVE;
25533 		}
25534 		if (un->un_resvd_timeid == 0) {
25535 			/* Schedule a timeout to handle the lost reservation */
25536 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
25537 			    (void *)dev,
25538 			    drv_usectohz(sd_reinstate_resv_delay));
25539 		}
25540 	}
25541 	mutex_exit(SD_MUTEX(un));
25542 	return (0);
25543 }
25544 
25545 
25546 /*
25547  *    Function: sd_mhd_watch_incomplete()
25548  *
25549  * Description: This function is used to find out why a scsi pkt sent by the
25550  *		scsi watch facility was not completed. Under some scenarios this
25551  *		routine will return. Otherwise it will send a bus reset to see
25552  *		if the drive is still online.
25553  *
25554  *   Arguments: un  - driver soft state (unit) structure
25555  *		pkt - incomplete scsi pkt
25556  */
25557 
25558 static void
25559 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
25560 {
25561 	int	be_chatty;
25562 	int	perr;
25563 
25564 	ASSERT(pkt != NULL);
25565 	ASSERT(un != NULL);
25566 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
25567 	perr		= (pkt->pkt_statistics & STAT_PERR);
25568 
25569 	mutex_enter(SD_MUTEX(un));
25570 	if (un->un_state == SD_STATE_DUMPING) {
25571 		mutex_exit(SD_MUTEX(un));
25572 		return;
25573 	}
25574 
25575 	switch (pkt->pkt_reason) {
25576 	case CMD_UNX_BUS_FREE:
25577 		/*
25578 		 * If we had a parity error that caused the target to drop BSY*,
25579 		 * don't be chatty about it.
25580 		 */
25581 		if (perr && be_chatty) {
25582 			be_chatty = 0;
25583 		}
25584 		break;
25585 	case CMD_TAG_REJECT:
25586 		/*
25587 		 * The SCSI-2 spec states that a tag reject will be sent by the
25588 		 * target if tagged queuing is not supported. A tag reject may
25589 		 * also be sent during certain initialization periods or to
25590 		 * control internal resources. For the latter case the target
25591 		 * may also return Queue Full.
25592 		 *
25593 		 * If this driver receives a tag reject from a target that is
25594 		 * going through an init period or controlling internal
25595 		 * resources tagged queuing will be disabled. This is a less
25596 		 * than optimal behavior but the driver is unable to determine
25597 		 * the target state and assumes tagged queueing is not supported
25598 		 */
25599 		pkt->pkt_flags = 0;
25600 		un->un_tagflags = 0;
25601 
25602 		if (un->un_f_opt_queueing == TRUE) {
25603 			un->un_throttle = min(un->un_throttle, 3);
25604 		} else {
25605 			un->un_throttle = 1;
25606 		}
25607 		mutex_exit(SD_MUTEX(un));
25608 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
25609 		mutex_enter(SD_MUTEX(un));
25610 		break;
25611 	case CMD_INCOMPLETE:
25612 		/*
25613 		 * The transport stopped with an abnormal state, fallthrough and
25614 		 * reset the target and/or bus unless selection did not complete
25615 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
25616 		 * go through a target/bus reset
25617 		 */
25618 		if (pkt->pkt_state == STATE_GOT_BUS) {
25619 			break;
25620 		}
25621 		/*FALLTHROUGH*/
25622 
25623 	case CMD_TIMEOUT:
25624 	default:
25625 		/*
25626 		 * The lun may still be running the command, so a lun reset
25627 		 * should be attempted. If the lun reset fails or cannot be
25628 		 * issued, than try a target reset. Lastly try a bus reset.
25629 		 */
25630 		if ((pkt->pkt_statistics &
25631 		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
25632 			int reset_retval = 0;
25633 			mutex_exit(SD_MUTEX(un));
25634 			if (un->un_f_allow_bus_device_reset == TRUE) {
25635 				if (un->un_f_lun_reset_enabled == TRUE) {
25636 					reset_retval =
25637 					    scsi_reset(SD_ADDRESS(un),
25638 					    RESET_LUN);
25639 				}
25640 				if (reset_retval == 0) {
25641 					reset_retval =
25642 					    scsi_reset(SD_ADDRESS(un),
25643 					    RESET_TARGET);
25644 				}
25645 			}
25646 			if (reset_retval == 0) {
25647 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
25648 			}
25649 			mutex_enter(SD_MUTEX(un));
25650 		}
25651 		break;
25652 	}
25653 
25654 	/* A device/bus reset has occurred; update the reservation status. */
25655 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
25656 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
25657 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25658 			un->un_resvd_status |=
25659 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25660 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25661 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
25662 		}
25663 	}
25664 
25665 	/*
25666 	 * The disk has been turned off; Update the device state.
25667 	 *
25668 	 * Note: Should we be offlining the disk here?
25669 	 */
25670 	if (pkt->pkt_state == STATE_GOT_BUS) {
25671 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
25672 		    "Disk not responding to selection\n");
25673 		if (un->un_state != SD_STATE_OFFLINE) {
25674 			New_state(un, SD_STATE_OFFLINE);
25675 		}
25676 	} else if (be_chatty) {
25677 		/*
25678 		 * suppress messages if they are all the same pkt reason;
25679 		 * with TQ, many (up to 256) are returned with the same
25680 		 * pkt_reason
25681 		 */
25682 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
25683 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25684 			    "sd_mhd_watch_incomplete: "
25685 			    "SCSI transport failed: reason '%s'\n",
25686 			    scsi_rname(pkt->pkt_reason));
25687 		}
25688 	}
25689 	un->un_last_pkt_reason = pkt->pkt_reason;
25690 	mutex_exit(SD_MUTEX(un));
25691 }
25692 
25693 
25694 /*
25695  *    Function: sd_sname()
25696  *
25697  * Description: This is a simple little routine to return a string containing
25698  *		a printable description of command status byte for use in
25699  *		logging.
25700  *
25701  *   Arguments: status - pointer to a status byte
25702  *
25703  * Return Code: char * - string containing status description.
25704  */
25705 
25706 static char *
25707 sd_sname(uchar_t status)
25708 {
25709 	switch (status & STATUS_MASK) {
25710 	case STATUS_GOOD:
25711 		return ("good status");
25712 	case STATUS_CHECK:
25713 		return ("check condition");
25714 	case STATUS_MET:
25715 		return ("condition met");
25716 	case STATUS_BUSY:
25717 		return ("busy");
25718 	case STATUS_INTERMEDIATE:
25719 		return ("intermediate");
25720 	case STATUS_INTERMEDIATE_MET:
25721 		return ("intermediate - condition met");
25722 	case STATUS_RESERVATION_CONFLICT:
25723 		return ("reservation_conflict");
25724 	case STATUS_TERMINATED:
25725 		return ("command terminated");
25726 	case STATUS_QFULL:
25727 		return ("queue full");
25728 	default:
25729 		return ("<unknown status>");
25730 	}
25731 }
25732 
25733 
25734 /*
25735  *    Function: sd_mhd_resvd_recover()
25736  *
25737  * Description: This function adds a reservation entry to the
25738  *		sd_resv_reclaim_request list and signals the reservation
25739  *		reclaim thread that there is work pending. If the reservation
25740  *		reclaim thread has not been previously created this function
25741  *		will kick it off.
25742  *
25743  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25744  *			among multiple watches that share this callback function
25745  *
25746  *     Context: This routine is called by timeout() and is run in interrupt
25747  *		context. It must not sleep or call other functions which may
25748  *		sleep.
25749  */
25750 
25751 static void
25752 sd_mhd_resvd_recover(void *arg)
25753 {
25754 	dev_t			dev = (dev_t)arg;
25755 	struct sd_lun		*un;
25756 	struct sd_thr_request	*sd_treq = NULL;
25757 	struct sd_thr_request	*sd_cur = NULL;
25758 	struct sd_thr_request	*sd_prev = NULL;
25759 	int			already_there = 0;
25760 
25761 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25762 		return;
25763 	}
25764 
25765 	mutex_enter(SD_MUTEX(un));
25766 	un->un_resvd_timeid = NULL;
25767 	if (un->un_resvd_status & SD_WANT_RESERVE) {
25768 		/*
25769 		 * There was a reset so don't issue the reserve, allow the
25770 		 * sd_mhd_watch_cb callback function to notice this and
25771 		 * reschedule the timeout for reservation.
25772 		 */
25773 		mutex_exit(SD_MUTEX(un));
25774 		return;
25775 	}
25776 	mutex_exit(SD_MUTEX(un));
25777 
25778 	/*
25779 	 * Add this device to the sd_resv_reclaim_request list and the
25780 	 * sd_resv_reclaim_thread should take care of the rest.
25781 	 *
25782 	 * Note: We can't sleep in this context so if the memory allocation
25783 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
25784 	 * reschedule the timeout for reservation.  (4378460)
25785 	 */
25786 	sd_treq = (struct sd_thr_request *)
25787 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
25788 	if (sd_treq == NULL) {
25789 		return;
25790 	}
25791 
25792 	sd_treq->sd_thr_req_next = NULL;
25793 	sd_treq->dev = dev;
25794 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25795 	if (sd_tr.srq_thr_req_head == NULL) {
25796 		sd_tr.srq_thr_req_head = sd_treq;
25797 	} else {
25798 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
25799 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
25800 			if (sd_cur->dev == dev) {
25801 				/*
25802 				 * already in Queue so don't log
25803 				 * another request for the device
25804 				 */
25805 				already_there = 1;
25806 				break;
25807 			}
25808 			sd_prev = sd_cur;
25809 		}
25810 		if (!already_there) {
25811 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
25812 			    "logging request for %lx\n", dev);
25813 			sd_prev->sd_thr_req_next = sd_treq;
25814 		} else {
25815 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
25816 		}
25817 	}
25818 
25819 	/*
25820 	 * Create a kernel thread to do the reservation reclaim and free up this
25821 	 * thread. We cannot block this thread while we go away to do the
25822 	 * reservation reclaim
25823 	 */
25824 	if (sd_tr.srq_resv_reclaim_thread == NULL)
25825 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
25826 		    sd_resv_reclaim_thread, NULL,
25827 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
25828 
25829 	/* Tell the reservation reclaim thread that it has work to do */
25830 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
25831 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25832 }
25833 
25834 /*
25835  *    Function: sd_resv_reclaim_thread()
25836  *
25837  * Description: This function implements the reservation reclaim operations
25838  *
25839  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
25840  *		      among multiple watches that share this callback function
25841  */
25842 
25843 static void
25844 sd_resv_reclaim_thread()
25845 {
25846 	struct sd_lun		*un;
25847 	struct sd_thr_request	*sd_mhreq;
25848 
25849 	/* Wait for work */
25850 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25851 	if (sd_tr.srq_thr_req_head == NULL) {
25852 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
25853 		    &sd_tr.srq_resv_reclaim_mutex);
25854 	}
25855 
25856 	/* Loop while we have work */
25857 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
25858 		un = ddi_get_soft_state(sd_state,
25859 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
25860 		if (un == NULL) {
25861 			/*
25862 			 * softstate structure is NULL so just
25863 			 * dequeue the request and continue
25864 			 */
25865 			sd_tr.srq_thr_req_head =
25866 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25867 			kmem_free(sd_tr.srq_thr_cur_req,
25868 			    sizeof (struct sd_thr_request));
25869 			continue;
25870 		}
25871 
25872 		/* dequeue the request */
25873 		sd_mhreq = sd_tr.srq_thr_cur_req;
25874 		sd_tr.srq_thr_req_head =
25875 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25876 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25877 
25878 		/*
25879 		 * Reclaim reservation only if SD_RESERVE is still set. There
25880 		 * may have been a call to MHIOCRELEASE before we got here.
25881 		 */
25882 		mutex_enter(SD_MUTEX(un));
25883 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25884 			/*
25885 			 * Note: The SD_LOST_RESERVE flag is cleared before
25886 			 * reclaiming the reservation. If this is done after the
25887 			 * call to sd_reserve_release a reservation loss in the
25888 			 * window between pkt completion of reserve cmd and
25889 			 * mutex_enter below may not be recognized
25890 			 */
25891 			un->un_resvd_status &= ~SD_LOST_RESERVE;
25892 			mutex_exit(SD_MUTEX(un));
25893 
25894 			if (sd_reserve_release(sd_mhreq->dev,
25895 			    SD_RESERVE) == 0) {
25896 				mutex_enter(SD_MUTEX(un));
25897 				un->un_resvd_status |= SD_RESERVE;
25898 				mutex_exit(SD_MUTEX(un));
25899 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25900 				    "sd_resv_reclaim_thread: "
25901 				    "Reservation Recovered\n");
25902 			} else {
25903 				mutex_enter(SD_MUTEX(un));
25904 				un->un_resvd_status |= SD_LOST_RESERVE;
25905 				mutex_exit(SD_MUTEX(un));
25906 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25907 				    "sd_resv_reclaim_thread: Failed "
25908 				    "Reservation Recovery\n");
25909 			}
25910 		} else {
25911 			mutex_exit(SD_MUTEX(un));
25912 		}
25913 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25914 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
25915 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25916 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
25917 		/*
25918 		 * wakeup the destroy thread if anyone is waiting on
25919 		 * us to complete.
25920 		 */
25921 		cv_signal(&sd_tr.srq_inprocess_cv);
25922 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
25923 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
25924 	}
25925 
25926 	/*
25927 	 * cleanup the sd_tr structure now that this thread will not exist
25928 	 */
25929 	ASSERT(sd_tr.srq_thr_req_head == NULL);
25930 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
25931 	sd_tr.srq_resv_reclaim_thread = NULL;
25932 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25933 	thread_exit();
25934 }
25935 
25936 
25937 /*
25938  *    Function: sd_rmv_resv_reclaim_req()
25939  *
25940  * Description: This function removes any pending reservation reclaim requests
25941  *		for the specified device.
25942  *
25943  *   Arguments: dev - the device 'dev_t'
25944  */
25945 
25946 static void
25947 sd_rmv_resv_reclaim_req(dev_t dev)
25948 {
25949 	struct sd_thr_request *sd_mhreq;
25950 	struct sd_thr_request *sd_prev;
25951 
25952 	/* Remove a reservation reclaim request from the list */
25953 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25954 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
25955 		/*
25956 		 * We are attempting to reinstate reservation for
25957 		 * this device. We wait for sd_reserve_release()
25958 		 * to return before we return.
25959 		 */
25960 		cv_wait(&sd_tr.srq_inprocess_cv,
25961 		    &sd_tr.srq_resv_reclaim_mutex);
25962 	} else {
25963 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
25964 		if (sd_mhreq && sd_mhreq->dev == dev) {
25965 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
25966 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25967 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25968 			return;
25969 		}
25970 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
25971 			if (sd_mhreq && sd_mhreq->dev == dev) {
25972 				break;
25973 			}
25974 			sd_prev = sd_mhreq;
25975 		}
25976 		if (sd_mhreq != NULL) {
25977 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
25978 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25979 		}
25980 	}
25981 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25982 }
25983 
25984 
25985 /*
25986  *    Function: sd_mhd_reset_notify_cb()
25987  *
25988  * Description: This is a call back function for scsi_reset_notify. This
25989  *		function updates the softstate reserved status and logs the
25990  *		reset. The driver scsi watch facility callback function
25991  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
25992  *		will reclaim the reservation.
25993  *
25994  *   Arguments: arg  - driver soft state (unit) structure
25995  */
25996 
25997 static void
25998 sd_mhd_reset_notify_cb(caddr_t arg)
25999 {
26000 	struct sd_lun *un = (struct sd_lun *)arg;
26001 
26002 	mutex_enter(SD_MUTEX(un));
26003 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
26004 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
26005 		SD_INFO(SD_LOG_IOCTL_MHD, un,
26006 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
26007 	}
26008 	mutex_exit(SD_MUTEX(un));
26009 }
26010 
26011 
26012 /*
26013  *    Function: sd_take_ownership()
26014  *
26015  * Description: This routine implements an algorithm to achieve a stable
26016  *		reservation on disks which don't implement priority reserve,
26017  *		and makes sure that other host lose re-reservation attempts.
26018  *		This algorithm contains of a loop that keeps issuing the RESERVE
26019  *		for some period of time (min_ownership_delay, default 6 seconds)
26020  *		During that loop, it looks to see if there has been a bus device
26021  *		reset or bus reset (both of which cause an existing reservation
26022  *		to be lost). If the reservation is lost issue RESERVE until a
26023  *		period of min_ownership_delay with no resets has gone by, or
26024  *		until max_ownership_delay has expired. This loop ensures that
26025  *		the host really did manage to reserve the device, in spite of
26026  *		resets. The looping for min_ownership_delay (default six
26027  *		seconds) is important to early generation clustering products,
26028  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
26029  *		MHIOCENFAILFAST periodic timer of two seconds. By having
26030  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
26031  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
26032  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
26033  *		have already noticed, via the MHIOCENFAILFAST polling, that it
26034  *		no longer "owns" the disk and will have panicked itself.  Thus,
26035  *		the host issuing the MHIOCTKOWN is assured (with timing
26036  *		dependencies) that by the time it actually starts to use the
26037  *		disk for real work, the old owner is no longer accessing it.
26038  *
26039  *		min_ownership_delay is the minimum amount of time for which the
26040  *		disk must be reserved continuously devoid of resets before the
26041  *		MHIOCTKOWN ioctl will return success.
26042  *
26043  *		max_ownership_delay indicates the amount of time by which the
26044  *		take ownership should succeed or timeout with an error.
26045  *
26046  *   Arguments: dev - the device 'dev_t'
26047  *		*p  - struct containing timing info.
26048  *
26049  * Return Code: 0 for success or error code
26050  */
26051 
26052 static int
26053 sd_take_ownership(dev_t dev, struct mhioctkown *p)
26054 {
26055 	struct sd_lun	*un;
26056 	int		rval;
26057 	int		err;
26058 	int		reservation_count   = 0;
26059 	int		min_ownership_delay =  6000000; /* in usec */
26060 	int		max_ownership_delay = 30000000; /* in usec */
26061 	clock_t		start_time;	/* starting time of this algorithm */
26062 	clock_t		end_time;	/* time limit for giving up */
26063 	clock_t		ownership_time;	/* time limit for stable ownership */
26064 	clock_t		current_time;
26065 	clock_t		previous_current_time;
26066 
26067 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26068 		return (ENXIO);
26069 	}
26070 
26071 	/*
26072 	 * Attempt a device reservation. A priority reservation is requested.
26073 	 */
26074 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
26075 	    != SD_SUCCESS) {
26076 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26077 		    "sd_take_ownership: return(1)=%d\n", rval);
26078 		return (rval);
26079 	}
26080 
26081 	/* Update the softstate reserved status to indicate the reservation */
26082 	mutex_enter(SD_MUTEX(un));
26083 	un->un_resvd_status |= SD_RESERVE;
26084 	un->un_resvd_status &=
26085 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
26086 	mutex_exit(SD_MUTEX(un));
26087 
26088 	if (p != NULL) {
26089 		if (p->min_ownership_delay != 0) {
26090 			min_ownership_delay = p->min_ownership_delay * 1000;
26091 		}
26092 		if (p->max_ownership_delay != 0) {
26093 			max_ownership_delay = p->max_ownership_delay * 1000;
26094 		}
26095 	}
26096 	SD_INFO(SD_LOG_IOCTL_MHD, un,
26097 	    "sd_take_ownership: min, max delays: %d, %d\n",
26098 	    min_ownership_delay, max_ownership_delay);
26099 
26100 	start_time = ddi_get_lbolt();
26101 	current_time	= start_time;
26102 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
26103 	end_time	= start_time + drv_usectohz(max_ownership_delay);
26104 
26105 	while (current_time - end_time < 0) {
26106 		delay(drv_usectohz(500000));
26107 
26108 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
26109 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
26110 				mutex_enter(SD_MUTEX(un));
26111 				rval = (un->un_resvd_status &
26112 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
26113 				mutex_exit(SD_MUTEX(un));
26114 				break;
26115 			}
26116 		}
26117 		previous_current_time = current_time;
26118 		current_time = ddi_get_lbolt();
26119 		mutex_enter(SD_MUTEX(un));
26120 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
26121 			ownership_time = ddi_get_lbolt() +
26122 			    drv_usectohz(min_ownership_delay);
26123 			reservation_count = 0;
26124 		} else {
26125 			reservation_count++;
26126 		}
26127 		un->un_resvd_status |= SD_RESERVE;
26128 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
26129 		mutex_exit(SD_MUTEX(un));
26130 
26131 		SD_INFO(SD_LOG_IOCTL_MHD, un,
26132 		    "sd_take_ownership: ticks for loop iteration=%ld, "
26133 		    "reservation=%s\n", (current_time - previous_current_time),
26134 		    reservation_count ? "ok" : "reclaimed");
26135 
26136 		if (current_time - ownership_time >= 0 &&
26137 		    reservation_count >= 4) {
26138 			rval = 0; /* Achieved a stable ownership */
26139 			break;
26140 		}
26141 		if (current_time - end_time >= 0) {
26142 			rval = EACCES; /* No ownership in max possible time */
26143 			break;
26144 		}
26145 	}
26146 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
26147 	    "sd_take_ownership: return(2)=%d\n", rval);
26148 	return (rval);
26149 }
26150 
26151 
26152 /*
26153  *    Function: sd_reserve_release()
26154  *
26155  * Description: This function builds and sends scsi RESERVE, RELEASE, and
26156  *		PRIORITY RESERVE commands based on a user specified command type
26157  *
26158  *   Arguments: dev - the device 'dev_t'
26159  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
26160  *		      SD_RESERVE, SD_RELEASE
26161  *
26162  * Return Code: 0 or Error Code
26163  */
26164 
26165 static int
26166 sd_reserve_release(dev_t dev, int cmd)
26167 {
26168 	struct uscsi_cmd	*com = NULL;
26169 	struct sd_lun		*un = NULL;
26170 	char			cdb[CDB_GROUP0];
26171 	int			rval;
26172 
26173 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
26174 	    (cmd == SD_PRIORITY_RESERVE));
26175 
26176 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26177 		return (ENXIO);
26178 	}
26179 
26180 	/* instantiate and initialize the command and cdb */
26181 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26182 	bzero(cdb, CDB_GROUP0);
26183 	com->uscsi_flags   = USCSI_SILENT;
26184 	com->uscsi_timeout = un->un_reserve_release_time;
26185 	com->uscsi_cdblen  = CDB_GROUP0;
26186 	com->uscsi_cdb	   = cdb;
26187 	if (cmd == SD_RELEASE) {
26188 		cdb[0] = SCMD_RELEASE;
26189 	} else {
26190 		cdb[0] = SCMD_RESERVE;
26191 	}
26192 
26193 	/* Send the command. */
26194 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
26195 	    UIO_SYSSPACE, SD_PATH_STANDARD);
26196 
26197 	/*
26198 	 * "break" a reservation that is held by another host, by issuing a
26199 	 * reset if priority reserve is desired, and we could not get the
26200 	 * device.
26201 	 */
26202 	if ((cmd == SD_PRIORITY_RESERVE) &&
26203 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
26204 		/*
26205 		 * First try to reset the LUN. If we cannot, then try a target
26206 		 * reset, followed by a bus reset if the target reset fails.
26207 		 */
26208 		int reset_retval = 0;
26209 		if (un->un_f_lun_reset_enabled == TRUE) {
26210 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
26211 		}
26212 		if (reset_retval == 0) {
26213 			/* The LUN reset either failed or was not issued */
26214 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26215 		}
26216 		if ((reset_retval == 0) &&
26217 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
26218 			rval = EIO;
26219 			kmem_free(com, sizeof (*com));
26220 			return (rval);
26221 		}
26222 
26223 		bzero(com, sizeof (struct uscsi_cmd));
26224 		com->uscsi_flags   = USCSI_SILENT;
26225 		com->uscsi_cdb	   = cdb;
26226 		com->uscsi_cdblen  = CDB_GROUP0;
26227 		com->uscsi_timeout = 5;
26228 
26229 		/*
26230 		 * Reissue the last reserve command, this time without request
26231 		 * sense.  Assume that it is just a regular reserve command.
26232 		 */
26233 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
26234 		    UIO_SYSSPACE, SD_PATH_STANDARD);
26235 	}
26236 
26237 	/* Return an error if still getting a reservation conflict. */
26238 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
26239 		rval = EACCES;
26240 	}
26241 
26242 	kmem_free(com, sizeof (*com));
26243 	return (rval);
26244 }
26245 
26246 
26247 #define	SD_NDUMP_RETRIES	12
26248 /*
26249  *	System Crash Dump routine
26250  */
26251 
26252 static int
26253 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
26254 {
26255 	int		instance;
26256 	int		partition;
26257 	int		i;
26258 	int		err;
26259 	struct sd_lun	*un;
26260 	struct dk_map	*lp;
26261 	struct scsi_pkt *wr_pktp;
26262 	struct buf	*wr_bp;
26263 	struct buf	wr_buf;
26264 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
26265 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
26266 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
26267 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
26268 	size_t		io_start_offset;
26269 	int		doing_rmw = FALSE;
26270 	int		rval;
26271 #if defined(__i386) || defined(__amd64)
26272 	ssize_t dma_resid;
26273 	daddr_t oblkno;
26274 #endif
26275 
26276 	instance = SDUNIT(dev);
26277 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
26278 	    (!un->un_f_geometry_is_valid) || ISCD(un)) {
26279 		return (ENXIO);
26280 	}
26281 
26282 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
26283 
26284 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
26285 
26286 	partition = SDPART(dev);
26287 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
26288 
26289 	/* Validate blocks to dump at against partition size. */
26290 	lp = &un->un_map[partition];
26291 	if ((blkno + nblk) > lp->dkl_nblk) {
26292 		SD_TRACE(SD_LOG_DUMP, un,
26293 		    "sddump: dump range larger than partition: "
26294 		    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
26295 		    blkno, nblk, lp->dkl_nblk);
26296 		return (EINVAL);
26297 	}
26298 
26299 	mutex_enter(&un->un_pm_mutex);
26300 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
26301 		struct scsi_pkt *start_pktp;
26302 
26303 		mutex_exit(&un->un_pm_mutex);
26304 
26305 		/*
26306 		 * use pm framework to power on HBA 1st
26307 		 */
26308 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
26309 
26310 		/*
26311 		 * Dump no long uses sdpower to power on a device, it's
26312 		 * in-line here so it can be done in polled mode.
26313 		 */
26314 
26315 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
26316 
26317 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
26318 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
26319 
26320 		if (start_pktp == NULL) {
26321 			/* We were not given a SCSI packet, fail. */
26322 			return (EIO);
26323 		}
26324 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
26325 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
26326 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
26327 		start_pktp->pkt_flags = FLAG_NOINTR;
26328 
26329 		mutex_enter(SD_MUTEX(un));
26330 		SD_FILL_SCSI1_LUN(un, start_pktp);
26331 		mutex_exit(SD_MUTEX(un));
26332 		/*
26333 		 * Scsi_poll returns 0 (success) if the command completes and
26334 		 * the status block is STATUS_GOOD.
26335 		 */
26336 		if (sd_scsi_poll(un, start_pktp) != 0) {
26337 			scsi_destroy_pkt(start_pktp);
26338 			return (EIO);
26339 		}
26340 		scsi_destroy_pkt(start_pktp);
26341 		(void) sd_ddi_pm_resume(un);
26342 	} else {
26343 		mutex_exit(&un->un_pm_mutex);
26344 	}
26345 
26346 	mutex_enter(SD_MUTEX(un));
26347 	un->un_throttle = 0;
26348 
26349 	/*
26350 	 * The first time through, reset the specific target device.
26351 	 * However, when cpr calls sddump we know that sd is in a
26352 	 * a good state so no bus reset is required.
26353 	 * Clear sense data via Request Sense cmd.
26354 	 * In sddump we don't care about allow_bus_device_reset anymore
26355 	 */
26356 
26357 	if ((un->un_state != SD_STATE_SUSPENDED) &&
26358 	    (un->un_state != SD_STATE_DUMPING)) {
26359 
26360 		New_state(un, SD_STATE_DUMPING);
26361 
26362 		if (un->un_f_is_fibre == FALSE) {
26363 			mutex_exit(SD_MUTEX(un));
26364 			/*
26365 			 * Attempt a bus reset for parallel scsi.
26366 			 *
26367 			 * Note: A bus reset is required because on some host
26368 			 * systems (i.e. E420R) a bus device reset is
26369 			 * insufficient to reset the state of the target.
26370 			 *
26371 			 * Note: Don't issue the reset for fibre-channel,
26372 			 * because this tends to hang the bus (loop) for
26373 			 * too long while everyone is logging out and in
26374 			 * and the deadman timer for dumping will fire
26375 			 * before the dump is complete.
26376 			 */
26377 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
26378 				mutex_enter(SD_MUTEX(un));
26379 				Restore_state(un);
26380 				mutex_exit(SD_MUTEX(un));
26381 				return (EIO);
26382 			}
26383 
26384 			/* Delay to give the device some recovery time. */
26385 			drv_usecwait(10000);
26386 
26387 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
26388 				SD_INFO(SD_LOG_DUMP, un,
26389 					"sddump: sd_send_polled_RQS failed\n");
26390 			}
26391 			mutex_enter(SD_MUTEX(un));
26392 		}
26393 	}
26394 
26395 	/*
26396 	 * Convert the partition-relative block number to a
26397 	 * disk physical block number.
26398 	 */
26399 	blkno += un->un_offset[partition];
26400 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
26401 
26402 
26403 	/*
26404 	 * Check if the device has a non-512 block size.
26405 	 */
26406 	wr_bp = NULL;
26407 	if (NOT_DEVBSIZE(un)) {
26408 		tgt_byte_offset = blkno * un->un_sys_blocksize;
26409 		tgt_byte_count = nblk * un->un_sys_blocksize;
26410 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
26411 		    (tgt_byte_count % un->un_tgt_blocksize)) {
26412 			doing_rmw = TRUE;
26413 			/*
26414 			 * Calculate the block number and number of block
26415 			 * in terms of the media block size.
26416 			 */
26417 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26418 			tgt_nblk =
26419 			    ((tgt_byte_offset + tgt_byte_count +
26420 				(un->un_tgt_blocksize - 1)) /
26421 				un->un_tgt_blocksize) - tgt_blkno;
26422 
26423 			/*
26424 			 * Invoke the routine which is going to do read part
26425 			 * of read-modify-write.
26426 			 * Note that this routine returns a pointer to
26427 			 * a valid bp in wr_bp.
26428 			 */
26429 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
26430 			    &wr_bp);
26431 			if (err) {
26432 				mutex_exit(SD_MUTEX(un));
26433 				return (err);
26434 			}
26435 			/*
26436 			 * Offset is being calculated as -
26437 			 * (original block # * system block size) -
26438 			 * (new block # * target block size)
26439 			 */
26440 			io_start_offset =
26441 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
26442 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
26443 
26444 			ASSERT((io_start_offset >= 0) &&
26445 			    (io_start_offset < un->un_tgt_blocksize));
26446 			/*
26447 			 * Do the modify portion of read modify write.
26448 			 */
26449 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
26450 			    (size_t)nblk * un->un_sys_blocksize);
26451 		} else {
26452 			doing_rmw = FALSE;
26453 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26454 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
26455 		}
26456 
26457 		/* Convert blkno and nblk to target blocks */
26458 		blkno = tgt_blkno;
26459 		nblk = tgt_nblk;
26460 	} else {
26461 		wr_bp = &wr_buf;
26462 		bzero(wr_bp, sizeof (struct buf));
26463 		wr_bp->b_flags		= B_BUSY;
26464 		wr_bp->b_un.b_addr	= addr;
26465 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
26466 		wr_bp->b_resid		= 0;
26467 	}
26468 
26469 	mutex_exit(SD_MUTEX(un));
26470 
26471 	/*
26472 	 * Obtain a SCSI packet for the write command.
26473 	 * It should be safe to call the allocator here without
26474 	 * worrying about being locked for DVMA mapping because
26475 	 * the address we're passed is already a DVMA mapping
26476 	 *
26477 	 * We are also not going to worry about semaphore ownership
26478 	 * in the dump buffer. Dumping is single threaded at present.
26479 	 */
26480 
26481 	wr_pktp = NULL;
26482 
26483 #if defined(__i386) || defined(__amd64)
26484 	dma_resid = wr_bp->b_bcount;
26485 	oblkno = blkno;
26486 	while (dma_resid != 0) {
26487 #endif
26488 
26489 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26490 		wr_bp->b_flags &= ~B_ERROR;
26491 
26492 #if defined(__i386) || defined(__amd64)
26493 		blkno = oblkno +
26494 			((wr_bp->b_bcount - dma_resid) /
26495 			    un->un_tgt_blocksize);
26496 		nblk = dma_resid / un->un_tgt_blocksize;
26497 
26498 		if (wr_pktp) {
26499 			/* Partial DMA transfers after initial transfer */
26500 			rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
26501 			    blkno, nblk);
26502 		} else {
26503 			/* Initial transfer */
26504 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26505 			    un->un_pkt_flags, NULL_FUNC, NULL,
26506 			    blkno, nblk);
26507 		}
26508 #else
26509 		rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26510 		    0, NULL_FUNC, NULL, blkno, nblk);
26511 #endif
26512 
26513 		if (rval == 0) {
26514 			/* We were given a SCSI packet, continue. */
26515 			break;
26516 		}
26517 
26518 		if (i == 0) {
26519 			if (wr_bp->b_flags & B_ERROR) {
26520 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26521 				    "no resources for dumping; "
26522 				    "error code: 0x%x, retrying",
26523 				    geterror(wr_bp));
26524 			} else {
26525 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26526 				    "no resources for dumping; retrying");
26527 			}
26528 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
26529 			if (wr_bp->b_flags & B_ERROR) {
26530 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26531 				    "no resources for dumping; error code: "
26532 				    "0x%x, retrying\n", geterror(wr_bp));
26533 			}
26534 		} else {
26535 			if (wr_bp->b_flags & B_ERROR) {
26536 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26537 				    "no resources for dumping; "
26538 				    "error code: 0x%x, retries failed, "
26539 				    "giving up.\n", geterror(wr_bp));
26540 			} else {
26541 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26542 				    "no resources for dumping; "
26543 				    "retries failed, giving up.\n");
26544 			}
26545 			mutex_enter(SD_MUTEX(un));
26546 			Restore_state(un);
26547 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
26548 				mutex_exit(SD_MUTEX(un));
26549 				scsi_free_consistent_buf(wr_bp);
26550 			} else {
26551 				mutex_exit(SD_MUTEX(un));
26552 			}
26553 			return (EIO);
26554 		}
26555 		drv_usecwait(10000);
26556 	}
26557 
26558 #if defined(__i386) || defined(__amd64)
26559 	/*
26560 	 * save the resid from PARTIAL_DMA
26561 	 */
26562 	dma_resid = wr_pktp->pkt_resid;
26563 	if (dma_resid != 0)
26564 		nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
26565 	wr_pktp->pkt_resid = 0;
26566 #endif
26567 
26568 	/* SunBug 1222170 */
26569 	wr_pktp->pkt_flags = FLAG_NOINTR;
26570 
26571 	err = EIO;
26572 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26573 
26574 		/*
26575 		 * Scsi_poll returns 0 (success) if the command completes and
26576 		 * the status block is STATUS_GOOD.  We should only check
26577 		 * errors if this condition is not true.  Even then we should
26578 		 * send our own request sense packet only if we have a check
26579 		 * condition and auto request sense has not been performed by
26580 		 * the hba.
26581 		 */
26582 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
26583 
26584 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
26585 		    (wr_pktp->pkt_resid == 0)) {
26586 			err = SD_SUCCESS;
26587 			break;
26588 		}
26589 
26590 		/*
26591 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
26592 		 */
26593 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
26594 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26595 			    "Device is gone\n");
26596 			break;
26597 		}
26598 
26599 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
26600 			SD_INFO(SD_LOG_DUMP, un,
26601 			    "sddump: write failed with CHECK, try # %d\n", i);
26602 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
26603 				(void) sd_send_polled_RQS(un);
26604 			}
26605 
26606 			continue;
26607 		}
26608 
26609 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
26610 			int reset_retval = 0;
26611 
26612 			SD_INFO(SD_LOG_DUMP, un,
26613 			    "sddump: write failed with BUSY, try # %d\n", i);
26614 
26615 			if (un->un_f_lun_reset_enabled == TRUE) {
26616 				reset_retval = scsi_reset(SD_ADDRESS(un),
26617 				    RESET_LUN);
26618 			}
26619 			if (reset_retval == 0) {
26620 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26621 			}
26622 			(void) sd_send_polled_RQS(un);
26623 
26624 		} else {
26625 			SD_INFO(SD_LOG_DUMP, un,
26626 			    "sddump: write failed with 0x%x, try # %d\n",
26627 			    SD_GET_PKT_STATUS(wr_pktp), i);
26628 			mutex_enter(SD_MUTEX(un));
26629 			sd_reset_target(un, wr_pktp);
26630 			mutex_exit(SD_MUTEX(un));
26631 		}
26632 
26633 		/*
26634 		 * If we are not getting anywhere with lun/target resets,
26635 		 * let's reset the bus.
26636 		 */
26637 		if (i == SD_NDUMP_RETRIES/2) {
26638 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26639 			(void) sd_send_polled_RQS(un);
26640 		}
26641 
26642 	}
26643 #if defined(__i386) || defined(__amd64)
26644 	}	/* dma_resid */
26645 #endif
26646 
26647 	scsi_destroy_pkt(wr_pktp);
26648 	mutex_enter(SD_MUTEX(un));
26649 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
26650 		mutex_exit(SD_MUTEX(un));
26651 		scsi_free_consistent_buf(wr_bp);
26652 	} else {
26653 		mutex_exit(SD_MUTEX(un));
26654 	}
26655 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
26656 	return (err);
26657 }
26658 
26659 /*
26660  *    Function: sd_scsi_poll()
26661  *
26662  * Description: This is a wrapper for the scsi_poll call.
26663  *
26664  *   Arguments: sd_lun - The unit structure
26665  *              scsi_pkt - The scsi packet being sent to the device.
26666  *
26667  * Return Code: 0 - Command completed successfully with good status
26668  *             -1 - Command failed.  This could indicate a check condition
26669  *                  or other status value requiring recovery action.
26670  *
26671  */
26672 
26673 static int
26674 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
26675 {
26676 	int status;
26677 
26678 	ASSERT(un != NULL);
26679 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26680 	ASSERT(pktp != NULL);
26681 
26682 	status = SD_SUCCESS;
26683 
26684 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
26685 		pktp->pkt_flags |= un->un_tagflags;
26686 		pktp->pkt_flags &= ~FLAG_NODISCON;
26687 	}
26688 
26689 	status = sd_ddi_scsi_poll(pktp);
26690 	/*
26691 	 * Scsi_poll returns 0 (success) if the command completes and the
26692 	 * status block is STATUS_GOOD.  We should only check errors if this
26693 	 * condition is not true.  Even then we should send our own request
26694 	 * sense packet only if we have a check condition and auto
26695 	 * request sense has not been performed by the hba.
26696 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
26697 	 */
26698 	if ((status != SD_SUCCESS) &&
26699 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
26700 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
26701 	    (pktp->pkt_reason != CMD_DEV_GONE))
26702 		(void) sd_send_polled_RQS(un);
26703 
26704 	return (status);
26705 }
26706 
26707 /*
26708  *    Function: sd_send_polled_RQS()
26709  *
26710  * Description: This sends the request sense command to a device.
26711  *
26712  *   Arguments: sd_lun - The unit structure
26713  *
26714  * Return Code: 0 - Command completed successfully with good status
26715  *             -1 - Command failed.
26716  *
26717  */
26718 
26719 static int
26720 sd_send_polled_RQS(struct sd_lun *un)
26721 {
26722 	int	ret_val;
26723 	struct	scsi_pkt	*rqs_pktp;
26724 	struct	buf		*rqs_bp;
26725 
26726 	ASSERT(un != NULL);
26727 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26728 
26729 	ret_val = SD_SUCCESS;
26730 
26731 	rqs_pktp = un->un_rqs_pktp;
26732 	rqs_bp	 = un->un_rqs_bp;
26733 
26734 	mutex_enter(SD_MUTEX(un));
26735 
26736 	if (un->un_sense_isbusy) {
26737 		ret_val = SD_FAILURE;
26738 		mutex_exit(SD_MUTEX(un));
26739 		return (ret_val);
26740 	}
26741 
26742 	/*
26743 	 * If the request sense buffer (and packet) is not in use,
26744 	 * let's set the un_sense_isbusy and send our packet
26745 	 */
26746 	un->un_sense_isbusy 	= 1;
26747 	rqs_pktp->pkt_resid  	= 0;
26748 	rqs_pktp->pkt_reason 	= 0;
26749 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
26750 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
26751 
26752 	mutex_exit(SD_MUTEX(un));
26753 
26754 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
26755 	    " 0x%p\n", rqs_bp->b_un.b_addr);
26756 
26757 	/*
26758 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
26759 	 * axle - it has a call into us!
26760 	 */
26761 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
26762 		SD_INFO(SD_LOG_COMMON, un,
26763 		    "sd_send_polled_RQS: RQS failed\n");
26764 	}
26765 
26766 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
26767 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
26768 
26769 	mutex_enter(SD_MUTEX(un));
26770 	un->un_sense_isbusy = 0;
26771 	mutex_exit(SD_MUTEX(un));
26772 
26773 	return (ret_val);
26774 }
26775 
26776 /*
26777  * Defines needed for localized version of the scsi_poll routine.
26778  */
26779 #define	SD_CSEC		10000			/* usecs */
26780 #define	SD_SEC_TO_CSEC	(1000000/SD_CSEC)
26781 
26782 
26783 /*
26784  *    Function: sd_ddi_scsi_poll()
26785  *
26786  * Description: Localized version of the scsi_poll routine.  The purpose is to
26787  *		send a scsi_pkt to a device as a polled command.  This version
26788  *		is to ensure more robust handling of transport errors.
26789  *		Specifically this routine cures not ready, coming ready
26790  *		transition for power up and reset of sonoma's.  This can take
26791  *		up to 45 seconds for power-on and 20 seconds for reset of a
26792  * 		sonoma lun.
26793  *
26794  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
26795  *
26796  * Return Code: 0 - Command completed successfully with good status
26797  *             -1 - Command failed.
26798  *
26799  */
26800 
26801 static int
26802 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
26803 {
26804 	int busy_count;
26805 	int timeout;
26806 	int rval = SD_FAILURE;
26807 	int savef;
26808 	uint8_t *sensep;
26809 	long savet;
26810 	void (*savec)();
26811 	/*
26812 	 * The following is defined in machdep.c and is used in determining if
26813 	 * the scsi transport system will do polled I/O instead of interrupt
26814 	 * I/O when called from xx_dump().
26815 	 */
26816 	extern int do_polled_io;
26817 
26818 	/*
26819 	 * save old flags in pkt, to restore at end
26820 	 */
26821 	savef = pkt->pkt_flags;
26822 	savec = pkt->pkt_comp;
26823 	savet = pkt->pkt_time;
26824 
26825 	pkt->pkt_flags |= FLAG_NOINTR;
26826 
26827 	/*
26828 	 * XXX there is nothing in the SCSA spec that states that we should not
26829 	 * do a callback for polled cmds; however, removing this will break sd
26830 	 * and probably other target drivers
26831 	 */
26832 	pkt->pkt_comp = NULL;
26833 
26834 	/*
26835 	 * we don't like a polled command without timeout.
26836 	 * 60 seconds seems long enough.
26837 	 */
26838 	if (pkt->pkt_time == 0) {
26839 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
26840 	}
26841 
26842 	/*
26843 	 * Send polled cmd.
26844 	 *
26845 	 * We do some error recovery for various errors.  Tran_busy,
26846 	 * queue full, and non-dispatched commands are retried every 10 msec.
26847 	 * as they are typically transient failures.  Busy status and Not
26848 	 * Ready are retried every second as this status takes a while to
26849 	 * change.  Unit attention is retried for pkt_time (60) times
26850 	 * with no delay.
26851 	 */
26852 	timeout = pkt->pkt_time * SD_SEC_TO_CSEC;
26853 
26854 	for (busy_count = 0; busy_count < timeout; busy_count++) {
26855 		int rc;
26856 		int poll_delay;
26857 
26858 		/*
26859 		 * Initialize pkt status variables.
26860 		 */
26861 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
26862 
26863 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
26864 			if (rc != TRAN_BUSY) {
26865 				/* Transport failed - give up. */
26866 				break;
26867 			} else {
26868 				/* Transport busy - try again. */
26869 				poll_delay = 1 * SD_CSEC; /* 10 msec */
26870 			}
26871 		} else {
26872 			/*
26873 			 * Transport accepted - check pkt status.
26874 			 */
26875 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
26876 			if (pkt->pkt_reason == CMD_CMPLT &&
26877 			    rc == STATUS_CHECK &&
26878 			    pkt->pkt_state & STATE_ARQ_DONE) {
26879 				struct scsi_arq_status *arqstat =
26880 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
26881 
26882 				sensep = (uint8_t *)&arqstat->sts_sensedata;
26883 			} else {
26884 				sensep = NULL;
26885 			}
26886 
26887 			if ((pkt->pkt_reason == CMD_CMPLT) &&
26888 			    (rc == STATUS_GOOD)) {
26889 				/* No error - we're done */
26890 				rval = SD_SUCCESS;
26891 				break;
26892 
26893 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
26894 				/* Lost connection - give up */
26895 				break;
26896 
26897 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
26898 			    (pkt->pkt_state == 0)) {
26899 				/* Pkt not dispatched - try again. */
26900 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26901 
26902 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26903 			    (rc == STATUS_QFULL)) {
26904 				/* Queue full - try again. */
26905 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26906 
26907 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26908 			    (rc == STATUS_BUSY)) {
26909 				/* Busy - try again. */
26910 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26911 				busy_count += (SD_SEC_TO_CSEC - 1);
26912 
26913 			} else if ((sensep != NULL) &&
26914 			    (scsi_sense_key(sensep) ==
26915 				KEY_UNIT_ATTENTION)) {
26916 				/* Unit Attention - try again */
26917 				busy_count += (SD_SEC_TO_CSEC - 1); /* 1 */
26918 				continue;
26919 
26920 			} else if ((sensep != NULL) &&
26921 			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
26922 			    (scsi_sense_asc(sensep) == 0x04) &&
26923 			    (scsi_sense_ascq(sensep) == 0x01)) {
26924 				/* Not ready -> ready - try again. */
26925 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26926 				busy_count += (SD_SEC_TO_CSEC - 1);
26927 
26928 			} else {
26929 				/* BAD status - give up. */
26930 				break;
26931 			}
26932 		}
26933 
26934 		if ((curthread->t_flag & T_INTR_THREAD) == 0 &&
26935 		    !do_polled_io) {
26936 			delay(drv_usectohz(poll_delay));
26937 		} else {
26938 			/* we busy wait during cpr_dump or interrupt threads */
26939 			drv_usecwait(poll_delay);
26940 		}
26941 	}
26942 
26943 	pkt->pkt_flags = savef;
26944 	pkt->pkt_comp = savec;
26945 	pkt->pkt_time = savet;
26946 	return (rval);
26947 }
26948 
26949 
26950 /*
26951  *    Function: sd_persistent_reservation_in_read_keys
26952  *
26953  * Description: This routine is the driver entry point for handling CD-ROM
26954  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
26955  *		by sending the SCSI-3 PRIN commands to the device.
26956  *		Processes the read keys command response by copying the
26957  *		reservation key information into the user provided buffer.
26958  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
26959  *
26960  *   Arguments: un   -  Pointer to soft state struct for the target.
26961  *		usrp -	user provided pointer to multihost Persistent In Read
26962  *			Keys structure (mhioc_inkeys_t)
26963  *		flag -	this argument is a pass through to ddi_copyxxx()
26964  *			directly from the mode argument of ioctl().
26965  *
26966  * Return Code: 0   - Success
26967  *		EACCES
26968  *		ENOTSUP
26969  *		errno return code from sd_send_scsi_cmd()
26970  *
26971  *     Context: Can sleep. Does not return until command is completed.
26972  */
26973 
26974 static int
26975 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
26976     mhioc_inkeys_t *usrp, int flag)
26977 {
26978 #ifdef _MULTI_DATAMODEL
26979 	struct mhioc_key_list32	li32;
26980 #endif
26981 	sd_prin_readkeys_t	*in;
26982 	mhioc_inkeys_t		*ptr;
26983 	mhioc_key_list_t	li;
26984 	uchar_t			*data_bufp;
26985 	int 			data_len;
26986 	int			rval;
26987 	size_t			copysz;
26988 
26989 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
26990 		return (EINVAL);
26991 	}
26992 	bzero(&li, sizeof (mhioc_key_list_t));
26993 
26994 	/*
26995 	 * Get the listsize from user
26996 	 */
26997 #ifdef _MULTI_DATAMODEL
26998 
26999 	switch (ddi_model_convert_from(flag & FMODELS)) {
27000 	case DDI_MODEL_ILP32:
27001 		copysz = sizeof (struct mhioc_key_list32);
27002 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
27003 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27004 			    "sd_persistent_reservation_in_read_keys: "
27005 			    "failed ddi_copyin: mhioc_key_list32_t\n");
27006 			rval = EFAULT;
27007 			goto done;
27008 		}
27009 		li.listsize = li32.listsize;
27010 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
27011 		break;
27012 
27013 	case DDI_MODEL_NONE:
27014 		copysz = sizeof (mhioc_key_list_t);
27015 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
27016 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27017 			    "sd_persistent_reservation_in_read_keys: "
27018 			    "failed ddi_copyin: mhioc_key_list_t\n");
27019 			rval = EFAULT;
27020 			goto done;
27021 		}
27022 		break;
27023 	}
27024 
27025 #else /* ! _MULTI_DATAMODEL */
27026 	copysz = sizeof (mhioc_key_list_t);
27027 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
27028 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27029 		    "sd_persistent_reservation_in_read_keys: "
27030 		    "failed ddi_copyin: mhioc_key_list_t\n");
27031 		rval = EFAULT;
27032 		goto done;
27033 	}
27034 #endif
27035 
27036 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
27037 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
27038 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
27039 
27040 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS,
27041 	    data_len, data_bufp)) != 0) {
27042 		goto done;
27043 	}
27044 	in = (sd_prin_readkeys_t *)data_bufp;
27045 	ptr->generation = BE_32(in->generation);
27046 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
27047 
27048 	/*
27049 	 * Return the min(listsize, listlen) keys
27050 	 */
27051 #ifdef _MULTI_DATAMODEL
27052 
27053 	switch (ddi_model_convert_from(flag & FMODELS)) {
27054 	case DDI_MODEL_ILP32:
27055 		li32.listlen = li.listlen;
27056 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
27057 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27058 			    "sd_persistent_reservation_in_read_keys: "
27059 			    "failed ddi_copyout: mhioc_key_list32_t\n");
27060 			rval = EFAULT;
27061 			goto done;
27062 		}
27063 		break;
27064 
27065 	case DDI_MODEL_NONE:
27066 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
27067 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27068 			    "sd_persistent_reservation_in_read_keys: "
27069 			    "failed ddi_copyout: mhioc_key_list_t\n");
27070 			rval = EFAULT;
27071 			goto done;
27072 		}
27073 		break;
27074 	}
27075 
27076 #else /* ! _MULTI_DATAMODEL */
27077 
27078 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
27079 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27080 		    "sd_persistent_reservation_in_read_keys: "
27081 		    "failed ddi_copyout: mhioc_key_list_t\n");
27082 		rval = EFAULT;
27083 		goto done;
27084 	}
27085 
27086 #endif /* _MULTI_DATAMODEL */
27087 
27088 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
27089 	    li.listsize * MHIOC_RESV_KEY_SIZE);
27090 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
27091 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27092 		    "sd_persistent_reservation_in_read_keys: "
27093 		    "failed ddi_copyout: keylist\n");
27094 		rval = EFAULT;
27095 	}
27096 done:
27097 	kmem_free(data_bufp, data_len);
27098 	return (rval);
27099 }
27100 
27101 
27102 /*
27103  *    Function: sd_persistent_reservation_in_read_resv
27104  *
27105  * Description: This routine is the driver entry point for handling CD-ROM
27106  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
27107  *		by sending the SCSI-3 PRIN commands to the device.
27108  *		Process the read persistent reservations command response by
27109  *		copying the reservation information into the user provided
27110  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
27111  *
27112  *   Arguments: un   -  Pointer to soft state struct for the target.
27113  *		usrp -	user provided pointer to multihost Persistent In Read
27114  *			Keys structure (mhioc_inkeys_t)
27115  *		flag -	this argument is a pass through to ddi_copyxxx()
27116  *			directly from the mode argument of ioctl().
27117  *
27118  * Return Code: 0   - Success
27119  *		EACCES
27120  *		ENOTSUP
27121  *		errno return code from sd_send_scsi_cmd()
27122  *
27123  *     Context: Can sleep. Does not return until command is completed.
27124  */
27125 
27126 static int
27127 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
27128     mhioc_inresvs_t *usrp, int flag)
27129 {
27130 #ifdef _MULTI_DATAMODEL
27131 	struct mhioc_resv_desc_list32 resvlist32;
27132 #endif
27133 	sd_prin_readresv_t	*in;
27134 	mhioc_inresvs_t		*ptr;
27135 	sd_readresv_desc_t	*readresv_ptr;
27136 	mhioc_resv_desc_list_t	resvlist;
27137 	mhioc_resv_desc_t 	resvdesc;
27138 	uchar_t			*data_bufp;
27139 	int 			data_len;
27140 	int			rval;
27141 	int			i;
27142 	size_t			copysz;
27143 	mhioc_resv_desc_t	*bufp;
27144 
27145 	if ((ptr = usrp) == NULL) {
27146 		return (EINVAL);
27147 	}
27148 
27149 	/*
27150 	 * Get the listsize from user
27151 	 */
27152 #ifdef _MULTI_DATAMODEL
27153 	switch (ddi_model_convert_from(flag & FMODELS)) {
27154 	case DDI_MODEL_ILP32:
27155 		copysz = sizeof (struct mhioc_resv_desc_list32);
27156 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
27157 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27158 			    "sd_persistent_reservation_in_read_resv: "
27159 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27160 			rval = EFAULT;
27161 			goto done;
27162 		}
27163 		resvlist.listsize = resvlist32.listsize;
27164 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
27165 		break;
27166 
27167 	case DDI_MODEL_NONE:
27168 		copysz = sizeof (mhioc_resv_desc_list_t);
27169 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
27170 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27171 			    "sd_persistent_reservation_in_read_resv: "
27172 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27173 			rval = EFAULT;
27174 			goto done;
27175 		}
27176 		break;
27177 	}
27178 #else /* ! _MULTI_DATAMODEL */
27179 	copysz = sizeof (mhioc_resv_desc_list_t);
27180 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
27181 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27182 		    "sd_persistent_reservation_in_read_resv: "
27183 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27184 		rval = EFAULT;
27185 		goto done;
27186 	}
27187 #endif /* ! _MULTI_DATAMODEL */
27188 
27189 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
27190 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
27191 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
27192 
27193 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_RESV,
27194 	    data_len, data_bufp)) != 0) {
27195 		goto done;
27196 	}
27197 	in = (sd_prin_readresv_t *)data_bufp;
27198 	ptr->generation = BE_32(in->generation);
27199 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
27200 
27201 	/*
27202 	 * Return the min(listsize, listlen( keys
27203 	 */
27204 #ifdef _MULTI_DATAMODEL
27205 
27206 	switch (ddi_model_convert_from(flag & FMODELS)) {
27207 	case DDI_MODEL_ILP32:
27208 		resvlist32.listlen = resvlist.listlen;
27209 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
27210 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27211 			    "sd_persistent_reservation_in_read_resv: "
27212 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27213 			rval = EFAULT;
27214 			goto done;
27215 		}
27216 		break;
27217 
27218 	case DDI_MODEL_NONE:
27219 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
27220 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27221 			    "sd_persistent_reservation_in_read_resv: "
27222 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27223 			rval = EFAULT;
27224 			goto done;
27225 		}
27226 		break;
27227 	}
27228 
27229 #else /* ! _MULTI_DATAMODEL */
27230 
27231 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
27232 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27233 		    "sd_persistent_reservation_in_read_resv: "
27234 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27235 		rval = EFAULT;
27236 		goto done;
27237 	}
27238 
27239 #endif /* ! _MULTI_DATAMODEL */
27240 
27241 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
27242 	bufp = resvlist.list;
27243 	copysz = sizeof (mhioc_resv_desc_t);
27244 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
27245 	    i++, readresv_ptr++, bufp++) {
27246 
27247 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
27248 		    MHIOC_RESV_KEY_SIZE);
27249 		resvdesc.type  = readresv_ptr->type;
27250 		resvdesc.scope = readresv_ptr->scope;
27251 		resvdesc.scope_specific_addr =
27252 		    BE_32(readresv_ptr->scope_specific_addr);
27253 
27254 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
27255 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27256 			    "sd_persistent_reservation_in_read_resv: "
27257 			    "failed ddi_copyout: resvlist\n");
27258 			rval = EFAULT;
27259 			goto done;
27260 		}
27261 	}
27262 done:
27263 	kmem_free(data_bufp, data_len);
27264 	return (rval);
27265 }
27266 
27267 
27268 /*
27269  *    Function: sr_change_blkmode()
27270  *
27271  * Description: This routine is the driver entry point for handling CD-ROM
27272  *		block mode ioctl requests. Support for returning and changing
27273  *		the current block size in use by the device is implemented. The
27274  *		LBA size is changed via a MODE SELECT Block Descriptor.
27275  *
27276  *		This routine issues a mode sense with an allocation length of
27277  *		12 bytes for the mode page header and a single block descriptor.
27278  *
27279  *   Arguments: dev - the device 'dev_t'
27280  *		cmd - the request type; one of CDROMGBLKMODE (get) or
27281  *		      CDROMSBLKMODE (set)
27282  *		data - current block size or requested block size
27283  *		flag - this argument is a pass through to ddi_copyxxx() directly
27284  *		       from the mode argument of ioctl().
27285  *
27286  * Return Code: the code returned by sd_send_scsi_cmd()
27287  *		EINVAL if invalid arguments are provided
27288  *		EFAULT if ddi_copyxxx() fails
27289  *		ENXIO if fail ddi_get_soft_state
27290  *		EIO if invalid mode sense block descriptor length
27291  *
27292  */
27293 
27294 static int
27295 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
27296 {
27297 	struct sd_lun			*un = NULL;
27298 	struct mode_header		*sense_mhp, *select_mhp;
27299 	struct block_descriptor		*sense_desc, *select_desc;
27300 	int				current_bsize;
27301 	int				rval = EINVAL;
27302 	uchar_t				*sense = NULL;
27303 	uchar_t				*select = NULL;
27304 
27305 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
27306 
27307 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27308 		return (ENXIO);
27309 	}
27310 
27311 	/*
27312 	 * The block length is changed via the Mode Select block descriptor, the
27313 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
27314 	 * required as part of this routine. Therefore the mode sense allocation
27315 	 * length is specified to be the length of a mode page header and a
27316 	 * block descriptor.
27317 	 */
27318 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27319 
27320 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
27321 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD)) != 0) {
27322 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27323 		    "sr_change_blkmode: Mode Sense Failed\n");
27324 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27325 		return (rval);
27326 	}
27327 
27328 	/* Check the block descriptor len to handle only 1 block descriptor */
27329 	sense_mhp = (struct mode_header *)sense;
27330 	if ((sense_mhp->bdesc_length == 0) ||
27331 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
27332 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27333 		    "sr_change_blkmode: Mode Sense returned invalid block"
27334 		    " descriptor length\n");
27335 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27336 		return (EIO);
27337 	}
27338 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
27339 	current_bsize = ((sense_desc->blksize_hi << 16) |
27340 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
27341 
27342 	/* Process command */
27343 	switch (cmd) {
27344 	case CDROMGBLKMODE:
27345 		/* Return the block size obtained during the mode sense */
27346 		if (ddi_copyout(&current_bsize, (void *)data,
27347 		    sizeof (int), flag) != 0)
27348 			rval = EFAULT;
27349 		break;
27350 	case CDROMSBLKMODE:
27351 		/* Validate the requested block size */
27352 		switch (data) {
27353 		case CDROM_BLK_512:
27354 		case CDROM_BLK_1024:
27355 		case CDROM_BLK_2048:
27356 		case CDROM_BLK_2056:
27357 		case CDROM_BLK_2336:
27358 		case CDROM_BLK_2340:
27359 		case CDROM_BLK_2352:
27360 		case CDROM_BLK_2368:
27361 		case CDROM_BLK_2448:
27362 		case CDROM_BLK_2646:
27363 		case CDROM_BLK_2647:
27364 			break;
27365 		default:
27366 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27367 			    "sr_change_blkmode: "
27368 			    "Block Size '%ld' Not Supported\n", data);
27369 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27370 			return (EINVAL);
27371 		}
27372 
27373 		/*
27374 		 * The current block size matches the requested block size so
27375 		 * there is no need to send the mode select to change the size
27376 		 */
27377 		if (current_bsize == data) {
27378 			break;
27379 		}
27380 
27381 		/* Build the select data for the requested block size */
27382 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27383 		select_mhp = (struct mode_header *)select;
27384 		select_desc =
27385 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
27386 		/*
27387 		 * The LBA size is changed via the block descriptor, so the
27388 		 * descriptor is built according to the user data
27389 		 */
27390 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
27391 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
27392 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
27393 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
27394 
27395 		/* Send the mode select for the requested block size */
27396 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
27397 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27398 		    SD_PATH_STANDARD)) != 0) {
27399 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27400 			    "sr_change_blkmode: Mode Select Failed\n");
27401 			/*
27402 			 * The mode select failed for the requested block size,
27403 			 * so reset the data for the original block size and
27404 			 * send it to the target. The error is indicated by the
27405 			 * return value for the failed mode select.
27406 			 */
27407 			select_desc->blksize_hi  = sense_desc->blksize_hi;
27408 			select_desc->blksize_mid = sense_desc->blksize_mid;
27409 			select_desc->blksize_lo  = sense_desc->blksize_lo;
27410 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
27411 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27412 			    SD_PATH_STANDARD);
27413 		} else {
27414 			ASSERT(!mutex_owned(SD_MUTEX(un)));
27415 			mutex_enter(SD_MUTEX(un));
27416 			sd_update_block_info(un, (uint32_t)data, 0);
27417 
27418 			mutex_exit(SD_MUTEX(un));
27419 		}
27420 		break;
27421 	default:
27422 		/* should not reach here, but check anyway */
27423 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27424 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
27425 		rval = EINVAL;
27426 		break;
27427 	}
27428 
27429 	if (select) {
27430 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
27431 	}
27432 	if (sense) {
27433 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27434 	}
27435 	return (rval);
27436 }
27437 
27438 
27439 /*
27440  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
27441  * implement driver support for getting and setting the CD speed. The command
27442  * set used will be based on the device type. If the device has not been
27443  * identified as MMC the Toshiba vendor specific mode page will be used. If
27444  * the device is MMC but does not support the Real Time Streaming feature
27445  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
27446  * be used to read the speed.
27447  */
27448 
27449 /*
27450  *    Function: sr_change_speed()
27451  *
27452  * Description: This routine is the driver entry point for handling CD-ROM
27453  *		drive speed ioctl requests for devices supporting the Toshiba
27454  *		vendor specific drive speed mode page. Support for returning
27455  *		and changing the current drive speed in use by the device is
27456  *		implemented.
27457  *
27458  *   Arguments: dev - the device 'dev_t'
27459  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
27460  *		      CDROMSDRVSPEED (set)
27461  *		data - current drive speed or requested drive speed
27462  *		flag - this argument is a pass through to ddi_copyxxx() directly
27463  *		       from the mode argument of ioctl().
27464  *
27465  * Return Code: the code returned by sd_send_scsi_cmd()
27466  *		EINVAL if invalid arguments are provided
27467  *		EFAULT if ddi_copyxxx() fails
27468  *		ENXIO if fail ddi_get_soft_state
27469  *		EIO if invalid mode sense block descriptor length
27470  */
27471 
27472 static int
27473 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27474 {
27475 	struct sd_lun			*un = NULL;
27476 	struct mode_header		*sense_mhp, *select_mhp;
27477 	struct mode_speed		*sense_page, *select_page;
27478 	int				current_speed;
27479 	int				rval = EINVAL;
27480 	int				bd_len;
27481 	uchar_t				*sense = NULL;
27482 	uchar_t				*select = NULL;
27483 
27484 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27485 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27486 		return (ENXIO);
27487 	}
27488 
27489 	/*
27490 	 * Note: The drive speed is being modified here according to a Toshiba
27491 	 * vendor specific mode page (0x31).
27492 	 */
27493 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27494 
27495 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
27496 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
27497 		SD_PATH_STANDARD)) != 0) {
27498 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27499 		    "sr_change_speed: Mode Sense Failed\n");
27500 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27501 		return (rval);
27502 	}
27503 	sense_mhp  = (struct mode_header *)sense;
27504 
27505 	/* Check the block descriptor len to handle only 1 block descriptor */
27506 	bd_len = sense_mhp->bdesc_length;
27507 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27508 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27509 		    "sr_change_speed: Mode Sense returned invalid block "
27510 		    "descriptor length\n");
27511 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27512 		return (EIO);
27513 	}
27514 
27515 	sense_page = (struct mode_speed *)
27516 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
27517 	current_speed = sense_page->speed;
27518 
27519 	/* Process command */
27520 	switch (cmd) {
27521 	case CDROMGDRVSPEED:
27522 		/* Return the drive speed obtained during the mode sense */
27523 		if (current_speed == 0x2) {
27524 			current_speed = CDROM_TWELVE_SPEED;
27525 		}
27526 		if (ddi_copyout(&current_speed, (void *)data,
27527 		    sizeof (int), flag) != 0) {
27528 			rval = EFAULT;
27529 		}
27530 		break;
27531 	case CDROMSDRVSPEED:
27532 		/* Validate the requested drive speed */
27533 		switch ((uchar_t)data) {
27534 		case CDROM_TWELVE_SPEED:
27535 			data = 0x2;
27536 			/*FALLTHROUGH*/
27537 		case CDROM_NORMAL_SPEED:
27538 		case CDROM_DOUBLE_SPEED:
27539 		case CDROM_QUAD_SPEED:
27540 		case CDROM_MAXIMUM_SPEED:
27541 			break;
27542 		default:
27543 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27544 			    "sr_change_speed: "
27545 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
27546 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27547 			return (EINVAL);
27548 		}
27549 
27550 		/*
27551 		 * The current drive speed matches the requested drive speed so
27552 		 * there is no need to send the mode select to change the speed
27553 		 */
27554 		if (current_speed == data) {
27555 			break;
27556 		}
27557 
27558 		/* Build the select data for the requested drive speed */
27559 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27560 		select_mhp = (struct mode_header *)select;
27561 		select_mhp->bdesc_length = 0;
27562 		select_page =
27563 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27564 		select_page =
27565 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27566 		select_page->mode_page.code = CDROM_MODE_SPEED;
27567 		select_page->mode_page.length = 2;
27568 		select_page->speed = (uchar_t)data;
27569 
27570 		/* Send the mode select for the requested block size */
27571 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27572 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27573 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
27574 			/*
27575 			 * The mode select failed for the requested drive speed,
27576 			 * so reset the data for the original drive speed and
27577 			 * send it to the target. The error is indicated by the
27578 			 * return value for the failed mode select.
27579 			 */
27580 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27581 			    "sr_drive_speed: Mode Select Failed\n");
27582 			select_page->speed = sense_page->speed;
27583 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27584 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27585 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27586 		}
27587 		break;
27588 	default:
27589 		/* should not reach here, but check anyway */
27590 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27591 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
27592 		rval = EINVAL;
27593 		break;
27594 	}
27595 
27596 	if (select) {
27597 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
27598 	}
27599 	if (sense) {
27600 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27601 	}
27602 
27603 	return (rval);
27604 }
27605 
27606 
27607 /*
27608  *    Function: sr_atapi_change_speed()
27609  *
27610  * Description: This routine is the driver entry point for handling CD-ROM
27611  *		drive speed ioctl requests for MMC devices that do not support
27612  *		the Real Time Streaming feature (0x107).
27613  *
27614  *		Note: This routine will use the SET SPEED command which may not
27615  *		be supported by all devices.
27616  *
27617  *   Arguments: dev- the device 'dev_t'
27618  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
27619  *		     CDROMSDRVSPEED (set)
27620  *		data- current drive speed or requested drive speed
27621  *		flag- this argument is a pass through to ddi_copyxxx() directly
27622  *		      from the mode argument of ioctl().
27623  *
27624  * Return Code: the code returned by sd_send_scsi_cmd()
27625  *		EINVAL if invalid arguments are provided
27626  *		EFAULT if ddi_copyxxx() fails
27627  *		ENXIO if fail ddi_get_soft_state
27628  *		EIO if invalid mode sense block descriptor length
27629  */
27630 
27631 static int
27632 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27633 {
27634 	struct sd_lun			*un;
27635 	struct uscsi_cmd		*com = NULL;
27636 	struct mode_header_grp2		*sense_mhp;
27637 	uchar_t				*sense_page;
27638 	uchar_t				*sense = NULL;
27639 	char				cdb[CDB_GROUP5];
27640 	int				bd_len;
27641 	int				current_speed = 0;
27642 	int				max_speed = 0;
27643 	int				rval;
27644 
27645 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27646 
27647 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27648 		return (ENXIO);
27649 	}
27650 
27651 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
27652 
27653 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
27654 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
27655 	    SD_PATH_STANDARD)) != 0) {
27656 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27657 		    "sr_atapi_change_speed: Mode Sense Failed\n");
27658 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27659 		return (rval);
27660 	}
27661 
27662 	/* Check the block descriptor len to handle only 1 block descriptor */
27663 	sense_mhp = (struct mode_header_grp2 *)sense;
27664 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
27665 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27666 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27667 		    "sr_atapi_change_speed: Mode Sense returned invalid "
27668 		    "block descriptor length\n");
27669 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27670 		return (EIO);
27671 	}
27672 
27673 	/* Calculate the current and maximum drive speeds */
27674 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
27675 	current_speed = (sense_page[14] << 8) | sense_page[15];
27676 	max_speed = (sense_page[8] << 8) | sense_page[9];
27677 
27678 	/* Process the command */
27679 	switch (cmd) {
27680 	case CDROMGDRVSPEED:
27681 		current_speed /= SD_SPEED_1X;
27682 		if (ddi_copyout(&current_speed, (void *)data,
27683 		    sizeof (int), flag) != 0)
27684 			rval = EFAULT;
27685 		break;
27686 	case CDROMSDRVSPEED:
27687 		/* Convert the speed code to KB/sec */
27688 		switch ((uchar_t)data) {
27689 		case CDROM_NORMAL_SPEED:
27690 			current_speed = SD_SPEED_1X;
27691 			break;
27692 		case CDROM_DOUBLE_SPEED:
27693 			current_speed = 2 * SD_SPEED_1X;
27694 			break;
27695 		case CDROM_QUAD_SPEED:
27696 			current_speed = 4 * SD_SPEED_1X;
27697 			break;
27698 		case CDROM_TWELVE_SPEED:
27699 			current_speed = 12 * SD_SPEED_1X;
27700 			break;
27701 		case CDROM_MAXIMUM_SPEED:
27702 			current_speed = 0xffff;
27703 			break;
27704 		default:
27705 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27706 			    "sr_atapi_change_speed: invalid drive speed %d\n",
27707 			    (uchar_t)data);
27708 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27709 			return (EINVAL);
27710 		}
27711 
27712 		/* Check the request against the drive's max speed. */
27713 		if (current_speed != 0xffff) {
27714 			if (current_speed > max_speed) {
27715 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27716 				return (EINVAL);
27717 			}
27718 		}
27719 
27720 		/*
27721 		 * Build and send the SET SPEED command
27722 		 *
27723 		 * Note: The SET SPEED (0xBB) command used in this routine is
27724 		 * obsolete per the SCSI MMC spec but still supported in the
27725 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27726 		 * therefore the command is still implemented in this routine.
27727 		 */
27728 		bzero(cdb, sizeof (cdb));
27729 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
27730 		cdb[2] = (uchar_t)(current_speed >> 8);
27731 		cdb[3] = (uchar_t)current_speed;
27732 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27733 		com->uscsi_cdb	   = (caddr_t)cdb;
27734 		com->uscsi_cdblen  = CDB_GROUP5;
27735 		com->uscsi_bufaddr = NULL;
27736 		com->uscsi_buflen  = 0;
27737 		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
27738 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, 0,
27739 		    UIO_SYSSPACE, SD_PATH_STANDARD);
27740 		break;
27741 	default:
27742 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27743 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
27744 		rval = EINVAL;
27745 	}
27746 
27747 	if (sense) {
27748 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27749 	}
27750 	if (com) {
27751 		kmem_free(com, sizeof (*com));
27752 	}
27753 	return (rval);
27754 }
27755 
27756 
27757 /*
27758  *    Function: sr_pause_resume()
27759  *
27760  * Description: This routine is the driver entry point for handling CD-ROM
27761  *		pause/resume ioctl requests. This only affects the audio play
27762  *		operation.
27763  *
27764  *   Arguments: dev - the device 'dev_t'
27765  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
27766  *		      for setting the resume bit of the cdb.
27767  *
27768  * Return Code: the code returned by sd_send_scsi_cmd()
27769  *		EINVAL if invalid mode specified
27770  *
27771  */
27772 
27773 static int
27774 sr_pause_resume(dev_t dev, int cmd)
27775 {
27776 	struct sd_lun		*un;
27777 	struct uscsi_cmd	*com;
27778 	char			cdb[CDB_GROUP1];
27779 	int			rval;
27780 
27781 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27782 		return (ENXIO);
27783 	}
27784 
27785 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27786 	bzero(cdb, CDB_GROUP1);
27787 	cdb[0] = SCMD_PAUSE_RESUME;
27788 	switch (cmd) {
27789 	case CDROMRESUME:
27790 		cdb[8] = 1;
27791 		break;
27792 	case CDROMPAUSE:
27793 		cdb[8] = 0;
27794 		break;
27795 	default:
27796 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
27797 		    " Command '%x' Not Supported\n", cmd);
27798 		rval = EINVAL;
27799 		goto done;
27800 	}
27801 
27802 	com->uscsi_cdb    = cdb;
27803 	com->uscsi_cdblen = CDB_GROUP1;
27804 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27805 
27806 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27807 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27808 
27809 done:
27810 	kmem_free(com, sizeof (*com));
27811 	return (rval);
27812 }
27813 
27814 
27815 /*
27816  *    Function: sr_play_msf()
27817  *
27818  * Description: This routine is the driver entry point for handling CD-ROM
27819  *		ioctl requests to output the audio signals at the specified
27820  *		starting address and continue the audio play until the specified
27821  *		ending address (CDROMPLAYMSF) The address is in Minute Second
27822  *		Frame (MSF) format.
27823  *
27824  *   Arguments: dev	- the device 'dev_t'
27825  *		data	- pointer to user provided audio msf structure,
27826  *		          specifying start/end addresses.
27827  *		flag	- this argument is a pass through to ddi_copyxxx()
27828  *		          directly from the mode argument of ioctl().
27829  *
27830  * Return Code: the code returned by sd_send_scsi_cmd()
27831  *		EFAULT if ddi_copyxxx() fails
27832  *		ENXIO if fail ddi_get_soft_state
27833  *		EINVAL if data pointer is NULL
27834  */
27835 
27836 static int
27837 sr_play_msf(dev_t dev, caddr_t data, int flag)
27838 {
27839 	struct sd_lun		*un;
27840 	struct uscsi_cmd	*com;
27841 	struct cdrom_msf	msf_struct;
27842 	struct cdrom_msf	*msf = &msf_struct;
27843 	char			cdb[CDB_GROUP1];
27844 	int			rval;
27845 
27846 	if (data == NULL) {
27847 		return (EINVAL);
27848 	}
27849 
27850 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27851 		return (ENXIO);
27852 	}
27853 
27854 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
27855 		return (EFAULT);
27856 	}
27857 
27858 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27859 	bzero(cdb, CDB_GROUP1);
27860 	cdb[0] = SCMD_PLAYAUDIO_MSF;
27861 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
27862 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
27863 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
27864 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
27865 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
27866 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
27867 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
27868 	} else {
27869 		cdb[3] = msf->cdmsf_min0;
27870 		cdb[4] = msf->cdmsf_sec0;
27871 		cdb[5] = msf->cdmsf_frame0;
27872 		cdb[6] = msf->cdmsf_min1;
27873 		cdb[7] = msf->cdmsf_sec1;
27874 		cdb[8] = msf->cdmsf_frame1;
27875 	}
27876 	com->uscsi_cdb    = cdb;
27877 	com->uscsi_cdblen = CDB_GROUP1;
27878 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27879 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27880 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27881 	kmem_free(com, sizeof (*com));
27882 	return (rval);
27883 }
27884 
27885 
27886 /*
27887  *    Function: sr_play_trkind()
27888  *
27889  * Description: This routine is the driver entry point for handling CD-ROM
27890  *		ioctl requests to output the audio signals at the specified
27891  *		starting address and continue the audio play until the specified
27892  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
27893  *		format.
27894  *
27895  *   Arguments: dev	- the device 'dev_t'
27896  *		data	- pointer to user provided audio track/index structure,
27897  *		          specifying start/end addresses.
27898  *		flag	- this argument is a pass through to ddi_copyxxx()
27899  *		          directly from the mode argument of ioctl().
27900  *
27901  * Return Code: the code returned by sd_send_scsi_cmd()
27902  *		EFAULT if ddi_copyxxx() fails
27903  *		ENXIO if fail ddi_get_soft_state
27904  *		EINVAL if data pointer is NULL
27905  */
27906 
27907 static int
27908 sr_play_trkind(dev_t dev, caddr_t data, int flag)
27909 {
27910 	struct cdrom_ti		ti_struct;
27911 	struct cdrom_ti		*ti = &ti_struct;
27912 	struct uscsi_cmd	*com = NULL;
27913 	char			cdb[CDB_GROUP1];
27914 	int			rval;
27915 
27916 	if (data == NULL) {
27917 		return (EINVAL);
27918 	}
27919 
27920 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
27921 		return (EFAULT);
27922 	}
27923 
27924 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27925 	bzero(cdb, CDB_GROUP1);
27926 	cdb[0] = SCMD_PLAYAUDIO_TI;
27927 	cdb[4] = ti->cdti_trk0;
27928 	cdb[5] = ti->cdti_ind0;
27929 	cdb[7] = ti->cdti_trk1;
27930 	cdb[8] = ti->cdti_ind1;
27931 	com->uscsi_cdb    = cdb;
27932 	com->uscsi_cdblen = CDB_GROUP1;
27933 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27934 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27935 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27936 	kmem_free(com, sizeof (*com));
27937 	return (rval);
27938 }
27939 
27940 
27941 /*
27942  *    Function: sr_read_all_subcodes()
27943  *
27944  * Description: This routine is the driver entry point for handling CD-ROM
27945  *		ioctl requests to return raw subcode data while the target is
27946  *		playing audio (CDROMSUBCODE).
27947  *
27948  *   Arguments: dev	- the device 'dev_t'
27949  *		data	- pointer to user provided cdrom subcode structure,
27950  *		          specifying the transfer length and address.
27951  *		flag	- this argument is a pass through to ddi_copyxxx()
27952  *		          directly from the mode argument of ioctl().
27953  *
27954  * Return Code: the code returned by sd_send_scsi_cmd()
27955  *		EFAULT if ddi_copyxxx() fails
27956  *		ENXIO if fail ddi_get_soft_state
27957  *		EINVAL if data pointer is NULL
27958  */
27959 
27960 static int
27961 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
27962 {
27963 	struct sd_lun		*un = NULL;
27964 	struct uscsi_cmd	*com = NULL;
27965 	struct cdrom_subcode	*subcode = NULL;
27966 	int			rval;
27967 	size_t			buflen;
27968 	char			cdb[CDB_GROUP5];
27969 
27970 #ifdef _MULTI_DATAMODEL
27971 	/* To support ILP32 applications in an LP64 world */
27972 	struct cdrom_subcode32		cdrom_subcode32;
27973 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
27974 #endif
27975 	if (data == NULL) {
27976 		return (EINVAL);
27977 	}
27978 
27979 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27980 		return (ENXIO);
27981 	}
27982 
27983 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
27984 
27985 #ifdef _MULTI_DATAMODEL
27986 	switch (ddi_model_convert_from(flag & FMODELS)) {
27987 	case DDI_MODEL_ILP32:
27988 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
27989 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27990 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27991 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27992 			return (EFAULT);
27993 		}
27994 		/* Convert the ILP32 uscsi data from the application to LP64 */
27995 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
27996 		break;
27997 	case DDI_MODEL_NONE:
27998 		if (ddi_copyin(data, subcode,
27999 		    sizeof (struct cdrom_subcode), flag)) {
28000 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28001 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
28002 			kmem_free(subcode, sizeof (struct cdrom_subcode));
28003 			return (EFAULT);
28004 		}
28005 		break;
28006 	}
28007 #else /* ! _MULTI_DATAMODEL */
28008 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
28009 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28010 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
28011 		kmem_free(subcode, sizeof (struct cdrom_subcode));
28012 		return (EFAULT);
28013 	}
28014 #endif /* _MULTI_DATAMODEL */
28015 
28016 	/*
28017 	 * Since MMC-2 expects max 3 bytes for length, check if the
28018 	 * length input is greater than 3 bytes
28019 	 */
28020 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
28021 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28022 		    "sr_read_all_subcodes: "
28023 		    "cdrom transfer length too large: %d (limit %d)\n",
28024 		    subcode->cdsc_length, 0xFFFFFF);
28025 		kmem_free(subcode, sizeof (struct cdrom_subcode));
28026 		return (EINVAL);
28027 	}
28028 
28029 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
28030 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28031 	bzero(cdb, CDB_GROUP5);
28032 
28033 	if (un->un_f_mmc_cap == TRUE) {
28034 		cdb[0] = (char)SCMD_READ_CD;
28035 		cdb[2] = (char)0xff;
28036 		cdb[3] = (char)0xff;
28037 		cdb[4] = (char)0xff;
28038 		cdb[5] = (char)0xff;
28039 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
28040 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
28041 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
28042 		cdb[10] = 1;
28043 	} else {
28044 		/*
28045 		 * Note: A vendor specific command (0xDF) is being used her to
28046 		 * request a read of all subcodes.
28047 		 */
28048 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
28049 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
28050 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
28051 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
28052 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
28053 	}
28054 	com->uscsi_cdb	   = cdb;
28055 	com->uscsi_cdblen  = CDB_GROUP5;
28056 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
28057 	com->uscsi_buflen  = buflen;
28058 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28059 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28060 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28061 	kmem_free(subcode, sizeof (struct cdrom_subcode));
28062 	kmem_free(com, sizeof (*com));
28063 	return (rval);
28064 }
28065 
28066 
28067 /*
28068  *    Function: sr_read_subchannel()
28069  *
28070  * Description: This routine is the driver entry point for handling CD-ROM
28071  *		ioctl requests to return the Q sub-channel data of the CD
28072  *		current position block. (CDROMSUBCHNL) The data includes the
28073  *		track number, index number, absolute CD-ROM address (LBA or MSF
28074  *		format per the user) , track relative CD-ROM address (LBA or MSF
28075  *		format per the user), control data and audio status.
28076  *
28077  *   Arguments: dev	- the device 'dev_t'
28078  *		data	- pointer to user provided cdrom sub-channel structure
28079  *		flag	- this argument is a pass through to ddi_copyxxx()
28080  *		          directly from the mode argument of ioctl().
28081  *
28082  * Return Code: the code returned by sd_send_scsi_cmd()
28083  *		EFAULT if ddi_copyxxx() fails
28084  *		ENXIO if fail ddi_get_soft_state
28085  *		EINVAL if data pointer is NULL
28086  */
28087 
28088 static int
28089 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
28090 {
28091 	struct sd_lun		*un;
28092 	struct uscsi_cmd	*com;
28093 	struct cdrom_subchnl	subchanel;
28094 	struct cdrom_subchnl	*subchnl = &subchanel;
28095 	char			cdb[CDB_GROUP1];
28096 	caddr_t			buffer;
28097 	int			rval;
28098 
28099 	if (data == NULL) {
28100 		return (EINVAL);
28101 	}
28102 
28103 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28104 	    (un->un_state == SD_STATE_OFFLINE)) {
28105 		return (ENXIO);
28106 	}
28107 
28108 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
28109 		return (EFAULT);
28110 	}
28111 
28112 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
28113 	bzero(cdb, CDB_GROUP1);
28114 	cdb[0] = SCMD_READ_SUBCHANNEL;
28115 	/* Set the MSF bit based on the user requested address format */
28116 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
28117 	/*
28118 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
28119 	 * returned
28120 	 */
28121 	cdb[2] = 0x40;
28122 	/*
28123 	 * Set byte 3 to specify the return data format. A value of 0x01
28124 	 * indicates that the CD-ROM current position should be returned.
28125 	 */
28126 	cdb[3] = 0x01;
28127 	cdb[8] = 0x10;
28128 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28129 	com->uscsi_cdb	   = cdb;
28130 	com->uscsi_cdblen  = CDB_GROUP1;
28131 	com->uscsi_bufaddr = buffer;
28132 	com->uscsi_buflen  = 16;
28133 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28134 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28135 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28136 	if (rval != 0) {
28137 		kmem_free(buffer, 16);
28138 		kmem_free(com, sizeof (*com));
28139 		return (rval);
28140 	}
28141 
28142 	/* Process the returned Q sub-channel data */
28143 	subchnl->cdsc_audiostatus = buffer[1];
28144 	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
28145 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
28146 	subchnl->cdsc_trk	= buffer[6];
28147 	subchnl->cdsc_ind	= buffer[7];
28148 	if (subchnl->cdsc_format & CDROM_LBA) {
28149 		subchnl->cdsc_absaddr.lba =
28150 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28151 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28152 		subchnl->cdsc_reladdr.lba =
28153 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
28154 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
28155 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
28156 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
28157 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
28158 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
28159 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
28160 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
28161 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
28162 	} else {
28163 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
28164 		subchnl->cdsc_absaddr.msf.second = buffer[10];
28165 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
28166 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
28167 		subchnl->cdsc_reladdr.msf.second = buffer[14];
28168 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
28169 	}
28170 	kmem_free(buffer, 16);
28171 	kmem_free(com, sizeof (*com));
28172 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
28173 	    != 0) {
28174 		return (EFAULT);
28175 	}
28176 	return (rval);
28177 }
28178 
28179 
28180 /*
28181  *    Function: sr_read_tocentry()
28182  *
28183  * Description: This routine is the driver entry point for handling CD-ROM
28184  *		ioctl requests to read from the Table of Contents (TOC)
28185  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
28186  *		fields, the starting address (LBA or MSF format per the user)
28187  *		and the data mode if the user specified track is a data track.
28188  *
28189  *		Note: The READ HEADER (0x44) command used in this routine is
28190  *		obsolete per the SCSI MMC spec but still supported in the
28191  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
28192  *		therefore the command is still implemented in this routine.
28193  *
28194  *   Arguments: dev	- the device 'dev_t'
28195  *		data	- pointer to user provided toc entry structure,
28196  *			  specifying the track # and the address format
28197  *			  (LBA or MSF).
28198  *		flag	- this argument is a pass through to ddi_copyxxx()
28199  *		          directly from the mode argument of ioctl().
28200  *
28201  * Return Code: the code returned by sd_send_scsi_cmd()
28202  *		EFAULT if ddi_copyxxx() fails
28203  *		ENXIO if fail ddi_get_soft_state
28204  *		EINVAL if data pointer is NULL
28205  */
28206 
28207 static int
28208 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
28209 {
28210 	struct sd_lun		*un = NULL;
28211 	struct uscsi_cmd	*com;
28212 	struct cdrom_tocentry	toc_entry;
28213 	struct cdrom_tocentry	*entry = &toc_entry;
28214 	caddr_t			buffer;
28215 	int			rval;
28216 	char			cdb[CDB_GROUP1];
28217 
28218 	if (data == NULL) {
28219 		return (EINVAL);
28220 	}
28221 
28222 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28223 	    (un->un_state == SD_STATE_OFFLINE)) {
28224 		return (ENXIO);
28225 	}
28226 
28227 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
28228 		return (EFAULT);
28229 	}
28230 
28231 	/* Validate the requested track and address format */
28232 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
28233 		return (EINVAL);
28234 	}
28235 
28236 	if (entry->cdte_track == 0) {
28237 		return (EINVAL);
28238 	}
28239 
28240 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
28241 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28242 	bzero(cdb, CDB_GROUP1);
28243 
28244 	cdb[0] = SCMD_READ_TOC;
28245 	/* Set the MSF bit based on the user requested address format  */
28246 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
28247 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28248 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
28249 	} else {
28250 		cdb[6] = entry->cdte_track;
28251 	}
28252 
28253 	/*
28254 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
28255 	 * (4 byte TOC response header + 8 byte track descriptor)
28256 	 */
28257 	cdb[8] = 12;
28258 	com->uscsi_cdb	   = cdb;
28259 	com->uscsi_cdblen  = CDB_GROUP1;
28260 	com->uscsi_bufaddr = buffer;
28261 	com->uscsi_buflen  = 0x0C;
28262 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
28263 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28264 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28265 	if (rval != 0) {
28266 		kmem_free(buffer, 12);
28267 		kmem_free(com, sizeof (*com));
28268 		return (rval);
28269 	}
28270 
28271 	/* Process the toc entry */
28272 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
28273 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
28274 	if (entry->cdte_format & CDROM_LBA) {
28275 		entry->cdte_addr.lba =
28276 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28277 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28278 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
28279 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
28280 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
28281 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
28282 		/*
28283 		 * Send a READ TOC command using the LBA address format to get
28284 		 * the LBA for the track requested so it can be used in the
28285 		 * READ HEADER request
28286 		 *
28287 		 * Note: The MSF bit of the READ HEADER command specifies the
28288 		 * output format. The block address specified in that command
28289 		 * must be in LBA format.
28290 		 */
28291 		cdb[1] = 0;
28292 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28293 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28294 		if (rval != 0) {
28295 			kmem_free(buffer, 12);
28296 			kmem_free(com, sizeof (*com));
28297 			return (rval);
28298 		}
28299 	} else {
28300 		entry->cdte_addr.msf.minute	= buffer[9];
28301 		entry->cdte_addr.msf.second	= buffer[10];
28302 		entry->cdte_addr.msf.frame	= buffer[11];
28303 		/*
28304 		 * Send a READ TOC command using the LBA address format to get
28305 		 * the LBA for the track requested so it can be used in the
28306 		 * READ HEADER request
28307 		 *
28308 		 * Note: The MSF bit of the READ HEADER command specifies the
28309 		 * output format. The block address specified in that command
28310 		 * must be in LBA format.
28311 		 */
28312 		cdb[1] = 0;
28313 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28314 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28315 		if (rval != 0) {
28316 			kmem_free(buffer, 12);
28317 			kmem_free(com, sizeof (*com));
28318 			return (rval);
28319 		}
28320 	}
28321 
28322 	/*
28323 	 * Build and send the READ HEADER command to determine the data mode of
28324 	 * the user specified track.
28325 	 */
28326 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
28327 	    (entry->cdte_track != CDROM_LEADOUT)) {
28328 		bzero(cdb, CDB_GROUP1);
28329 		cdb[0] = SCMD_READ_HEADER;
28330 		cdb[2] = buffer[8];
28331 		cdb[3] = buffer[9];
28332 		cdb[4] = buffer[10];
28333 		cdb[5] = buffer[11];
28334 		cdb[8] = 0x08;
28335 		com->uscsi_buflen = 0x08;
28336 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28337 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28338 		if (rval == 0) {
28339 			entry->cdte_datamode = buffer[0];
28340 		} else {
28341 			/*
28342 			 * READ HEADER command failed, since this is
28343 			 * obsoleted in one spec, its better to return
28344 			 * -1 for an invlid track so that we can still
28345 			 * recieve the rest of the TOC data.
28346 			 */
28347 			entry->cdte_datamode = (uchar_t)-1;
28348 		}
28349 	} else {
28350 		entry->cdte_datamode = (uchar_t)-1;
28351 	}
28352 
28353 	kmem_free(buffer, 12);
28354 	kmem_free(com, sizeof (*com));
28355 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
28356 		return (EFAULT);
28357 
28358 	return (rval);
28359 }
28360 
28361 
28362 /*
28363  *    Function: sr_read_tochdr()
28364  *
28365  * Description: This routine is the driver entry point for handling CD-ROM
28366  * 		ioctl requests to read the Table of Contents (TOC) header
28367  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
28368  *		and ending track numbers
28369  *
28370  *   Arguments: dev	- the device 'dev_t'
28371  *		data	- pointer to user provided toc header structure,
28372  *			  specifying the starting and ending track numbers.
28373  *		flag	- this argument is a pass through to ddi_copyxxx()
28374  *			  directly from the mode argument of ioctl().
28375  *
28376  * Return Code: the code returned by sd_send_scsi_cmd()
28377  *		EFAULT if ddi_copyxxx() fails
28378  *		ENXIO if fail ddi_get_soft_state
28379  *		EINVAL if data pointer is NULL
28380  */
28381 
28382 static int
28383 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
28384 {
28385 	struct sd_lun		*un;
28386 	struct uscsi_cmd	*com;
28387 	struct cdrom_tochdr	toc_header;
28388 	struct cdrom_tochdr	*hdr = &toc_header;
28389 	char			cdb[CDB_GROUP1];
28390 	int			rval;
28391 	caddr_t			buffer;
28392 
28393 	if (data == NULL) {
28394 		return (EINVAL);
28395 	}
28396 
28397 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28398 	    (un->un_state == SD_STATE_OFFLINE)) {
28399 		return (ENXIO);
28400 	}
28401 
28402 	buffer = kmem_zalloc(4, KM_SLEEP);
28403 	bzero(cdb, CDB_GROUP1);
28404 	cdb[0] = SCMD_READ_TOC;
28405 	/*
28406 	 * Specifying a track number of 0x00 in the READ TOC command indicates
28407 	 * that the TOC header should be returned
28408 	 */
28409 	cdb[6] = 0x00;
28410 	/*
28411 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
28412 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
28413 	 */
28414 	cdb[8] = 0x04;
28415 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28416 	com->uscsi_cdb	   = cdb;
28417 	com->uscsi_cdblen  = CDB_GROUP1;
28418 	com->uscsi_bufaddr = buffer;
28419 	com->uscsi_buflen  = 0x04;
28420 	com->uscsi_timeout = 300;
28421 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28422 
28423 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28424 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28425 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28426 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
28427 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
28428 	} else {
28429 		hdr->cdth_trk0 = buffer[2];
28430 		hdr->cdth_trk1 = buffer[3];
28431 	}
28432 	kmem_free(buffer, 4);
28433 	kmem_free(com, sizeof (*com));
28434 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
28435 		return (EFAULT);
28436 	}
28437 	return (rval);
28438 }
28439 
28440 
28441 /*
28442  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
28443  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
28444  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
28445  * digital audio and extended architecture digital audio. These modes are
28446  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
28447  * MMC specs.
28448  *
28449  * In addition to support for the various data formats these routines also
28450  * include support for devices that implement only the direct access READ
28451  * commands (0x08, 0x28), devices that implement the READ_CD commands
28452  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
28453  * READ CDXA commands (0xD8, 0xDB)
28454  */
28455 
28456 /*
28457  *    Function: sr_read_mode1()
28458  *
28459  * Description: This routine is the driver entry point for handling CD-ROM
28460  *		ioctl read mode1 requests (CDROMREADMODE1).
28461  *
28462  *   Arguments: dev	- the device 'dev_t'
28463  *		data	- pointer to user provided cd read structure specifying
28464  *			  the lba buffer address and length.
28465  *		flag	- this argument is a pass through to ddi_copyxxx()
28466  *			  directly from the mode argument of ioctl().
28467  *
28468  * Return Code: the code returned by sd_send_scsi_cmd()
28469  *		EFAULT if ddi_copyxxx() fails
28470  *		ENXIO if fail ddi_get_soft_state
28471  *		EINVAL if data pointer is NULL
28472  */
28473 
28474 static int
28475 sr_read_mode1(dev_t dev, caddr_t data, int flag)
28476 {
28477 	struct sd_lun		*un;
28478 	struct cdrom_read	mode1_struct;
28479 	struct cdrom_read	*mode1 = &mode1_struct;
28480 	int			rval;
28481 #ifdef _MULTI_DATAMODEL
28482 	/* To support ILP32 applications in an LP64 world */
28483 	struct cdrom_read32	cdrom_read32;
28484 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28485 #endif /* _MULTI_DATAMODEL */
28486 
28487 	if (data == NULL) {
28488 		return (EINVAL);
28489 	}
28490 
28491 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28492 	    (un->un_state == SD_STATE_OFFLINE)) {
28493 		return (ENXIO);
28494 	}
28495 
28496 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28497 	    "sd_read_mode1: entry: un:0x%p\n", un);
28498 
28499 #ifdef _MULTI_DATAMODEL
28500 	switch (ddi_model_convert_from(flag & FMODELS)) {
28501 	case DDI_MODEL_ILP32:
28502 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28503 			return (EFAULT);
28504 		}
28505 		/* Convert the ILP32 uscsi data from the application to LP64 */
28506 		cdrom_read32tocdrom_read(cdrd32, mode1);
28507 		break;
28508 	case DDI_MODEL_NONE:
28509 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28510 			return (EFAULT);
28511 		}
28512 	}
28513 #else /* ! _MULTI_DATAMODEL */
28514 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28515 		return (EFAULT);
28516 	}
28517 #endif /* _MULTI_DATAMODEL */
28518 
28519 	rval = sd_send_scsi_READ(un, mode1->cdread_bufaddr,
28520 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
28521 
28522 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28523 	    "sd_read_mode1: exit: un:0x%p\n", un);
28524 
28525 	return (rval);
28526 }
28527 
28528 
28529 /*
28530  *    Function: sr_read_cd_mode2()
28531  *
28532  * Description: This routine is the driver entry point for handling CD-ROM
28533  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28534  *		support the READ CD (0xBE) command or the 1st generation
28535  *		READ CD (0xD4) command.
28536  *
28537  *   Arguments: dev	- the device 'dev_t'
28538  *		data	- pointer to user provided cd read structure specifying
28539  *			  the lba buffer address and length.
28540  *		flag	- this argument is a pass through to ddi_copyxxx()
28541  *			  directly from the mode argument of ioctl().
28542  *
28543  * Return Code: the code returned by sd_send_scsi_cmd()
28544  *		EFAULT if ddi_copyxxx() fails
28545  *		ENXIO if fail ddi_get_soft_state
28546  *		EINVAL if data pointer is NULL
28547  */
28548 
28549 static int
28550 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
28551 {
28552 	struct sd_lun		*un;
28553 	struct uscsi_cmd	*com;
28554 	struct cdrom_read	mode2_struct;
28555 	struct cdrom_read	*mode2 = &mode2_struct;
28556 	uchar_t			cdb[CDB_GROUP5];
28557 	int			nblocks;
28558 	int			rval;
28559 #ifdef _MULTI_DATAMODEL
28560 	/*  To support ILP32 applications in an LP64 world */
28561 	struct cdrom_read32	cdrom_read32;
28562 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28563 #endif /* _MULTI_DATAMODEL */
28564 
28565 	if (data == NULL) {
28566 		return (EINVAL);
28567 	}
28568 
28569 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28570 	    (un->un_state == SD_STATE_OFFLINE)) {
28571 		return (ENXIO);
28572 	}
28573 
28574 #ifdef _MULTI_DATAMODEL
28575 	switch (ddi_model_convert_from(flag & FMODELS)) {
28576 	case DDI_MODEL_ILP32:
28577 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28578 			return (EFAULT);
28579 		}
28580 		/* Convert the ILP32 uscsi data from the application to LP64 */
28581 		cdrom_read32tocdrom_read(cdrd32, mode2);
28582 		break;
28583 	case DDI_MODEL_NONE:
28584 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28585 			return (EFAULT);
28586 		}
28587 		break;
28588 	}
28589 
28590 #else /* ! _MULTI_DATAMODEL */
28591 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28592 		return (EFAULT);
28593 	}
28594 #endif /* _MULTI_DATAMODEL */
28595 
28596 	bzero(cdb, sizeof (cdb));
28597 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
28598 		/* Read command supported by 1st generation atapi drives */
28599 		cdb[0] = SCMD_READ_CDD4;
28600 	} else {
28601 		/* Universal CD Access Command */
28602 		cdb[0] = SCMD_READ_CD;
28603 	}
28604 
28605 	/*
28606 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
28607 	 */
28608 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
28609 
28610 	/* set the start address */
28611 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
28612 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
28613 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28614 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
28615 
28616 	/* set the transfer length */
28617 	nblocks = mode2->cdread_buflen / 2336;
28618 	cdb[6] = (uchar_t)(nblocks >> 16);
28619 	cdb[7] = (uchar_t)(nblocks >> 8);
28620 	cdb[8] = (uchar_t)nblocks;
28621 
28622 	/* set the filter bits */
28623 	cdb[9] = CDROM_READ_CD_USERDATA;
28624 
28625 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28626 	com->uscsi_cdb = (caddr_t)cdb;
28627 	com->uscsi_cdblen = sizeof (cdb);
28628 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28629 	com->uscsi_buflen = mode2->cdread_buflen;
28630 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28631 
28632 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28633 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28634 	kmem_free(com, sizeof (*com));
28635 	return (rval);
28636 }
28637 
28638 
28639 /*
28640  *    Function: sr_read_mode2()
28641  *
28642  * Description: This routine is the driver entry point for handling CD-ROM
28643  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28644  *		do not support the READ CD (0xBE) command.
28645  *
28646  *   Arguments: dev	- the device 'dev_t'
28647  *		data	- pointer to user provided cd read structure specifying
28648  *			  the lba buffer address and length.
28649  *		flag	- this argument is a pass through to ddi_copyxxx()
28650  *			  directly from the mode argument of ioctl().
28651  *
28652  * Return Code: the code returned by sd_send_scsi_cmd()
28653  *		EFAULT if ddi_copyxxx() fails
28654  *		ENXIO if fail ddi_get_soft_state
28655  *		EINVAL if data pointer is NULL
28656  *		EIO if fail to reset block size
28657  *		EAGAIN if commands are in progress in the driver
28658  */
28659 
28660 static int
28661 sr_read_mode2(dev_t dev, caddr_t data, int flag)
28662 {
28663 	struct sd_lun		*un;
28664 	struct cdrom_read	mode2_struct;
28665 	struct cdrom_read	*mode2 = &mode2_struct;
28666 	int			rval;
28667 	uint32_t		restore_blksize;
28668 	struct uscsi_cmd	*com;
28669 	uchar_t			cdb[CDB_GROUP0];
28670 	int			nblocks;
28671 
28672 #ifdef _MULTI_DATAMODEL
28673 	/* To support ILP32 applications in an LP64 world */
28674 	struct cdrom_read32	cdrom_read32;
28675 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28676 #endif /* _MULTI_DATAMODEL */
28677 
28678 	if (data == NULL) {
28679 		return (EINVAL);
28680 	}
28681 
28682 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28683 	    (un->un_state == SD_STATE_OFFLINE)) {
28684 		return (ENXIO);
28685 	}
28686 
28687 	/*
28688 	 * Because this routine will update the device and driver block size
28689 	 * being used we want to make sure there are no commands in progress.
28690 	 * If commands are in progress the user will have to try again.
28691 	 *
28692 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
28693 	 * in sdioctl to protect commands from sdioctl through to the top of
28694 	 * sd_uscsi_strategy. See sdioctl for details.
28695 	 */
28696 	mutex_enter(SD_MUTEX(un));
28697 	if (un->un_ncmds_in_driver != 1) {
28698 		mutex_exit(SD_MUTEX(un));
28699 		return (EAGAIN);
28700 	}
28701 	mutex_exit(SD_MUTEX(un));
28702 
28703 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28704 	    "sd_read_mode2: entry: un:0x%p\n", un);
28705 
28706 #ifdef _MULTI_DATAMODEL
28707 	switch (ddi_model_convert_from(flag & FMODELS)) {
28708 	case DDI_MODEL_ILP32:
28709 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28710 			return (EFAULT);
28711 		}
28712 		/* Convert the ILP32 uscsi data from the application to LP64 */
28713 		cdrom_read32tocdrom_read(cdrd32, mode2);
28714 		break;
28715 	case DDI_MODEL_NONE:
28716 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28717 			return (EFAULT);
28718 		}
28719 		break;
28720 	}
28721 #else /* ! _MULTI_DATAMODEL */
28722 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
28723 		return (EFAULT);
28724 	}
28725 #endif /* _MULTI_DATAMODEL */
28726 
28727 	/* Store the current target block size for restoration later */
28728 	restore_blksize = un->un_tgt_blocksize;
28729 
28730 	/* Change the device and soft state target block size to 2336 */
28731 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
28732 		rval = EIO;
28733 		goto done;
28734 	}
28735 
28736 
28737 	bzero(cdb, sizeof (cdb));
28738 
28739 	/* set READ operation */
28740 	cdb[0] = SCMD_READ;
28741 
28742 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
28743 	mode2->cdread_lba >>= 2;
28744 
28745 	/* set the start address */
28746 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
28747 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28748 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
28749 
28750 	/* set the transfer length */
28751 	nblocks = mode2->cdread_buflen / 2336;
28752 	cdb[4] = (uchar_t)nblocks & 0xFF;
28753 
28754 	/* build command */
28755 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28756 	com->uscsi_cdb = (caddr_t)cdb;
28757 	com->uscsi_cdblen = sizeof (cdb);
28758 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28759 	com->uscsi_buflen = mode2->cdread_buflen;
28760 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28761 
28762 	/*
28763 	 * Issue SCSI command with user space address for read buffer.
28764 	 *
28765 	 * This sends the command through main channel in the driver.
28766 	 *
28767 	 * Since this is accessed via an IOCTL call, we go through the
28768 	 * standard path, so that if the device was powered down, then
28769 	 * it would be 'awakened' to handle the command.
28770 	 */
28771 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28772 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28773 
28774 	kmem_free(com, sizeof (*com));
28775 
28776 	/* Restore the device and soft state target block size */
28777 	if (sr_sector_mode(dev, restore_blksize) != 0) {
28778 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28779 		    "can't do switch back to mode 1\n");
28780 		/*
28781 		 * If sd_send_scsi_READ succeeded we still need to report
28782 		 * an error because we failed to reset the block size
28783 		 */
28784 		if (rval == 0) {
28785 			rval = EIO;
28786 		}
28787 	}
28788 
28789 done:
28790 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28791 	    "sd_read_mode2: exit: un:0x%p\n", un);
28792 
28793 	return (rval);
28794 }
28795 
28796 
28797 /*
28798  *    Function: sr_sector_mode()
28799  *
28800  * Description: This utility function is used by sr_read_mode2 to set the target
28801  *		block size based on the user specified size. This is a legacy
28802  *		implementation based upon a vendor specific mode page
28803  *
28804  *   Arguments: dev	- the device 'dev_t'
28805  *		data	- flag indicating if block size is being set to 2336 or
28806  *			  512.
28807  *
28808  * Return Code: the code returned by sd_send_scsi_cmd()
28809  *		EFAULT if ddi_copyxxx() fails
28810  *		ENXIO if fail ddi_get_soft_state
28811  *		EINVAL if data pointer is NULL
28812  */
28813 
28814 static int
28815 sr_sector_mode(dev_t dev, uint32_t blksize)
28816 {
28817 	struct sd_lun	*un;
28818 	uchar_t		*sense;
28819 	uchar_t		*select;
28820 	int		rval;
28821 
28822 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28823 	    (un->un_state == SD_STATE_OFFLINE)) {
28824 		return (ENXIO);
28825 	}
28826 
28827 	sense = kmem_zalloc(20, KM_SLEEP);
28828 
28829 	/* Note: This is a vendor specific mode page (0x81) */
28830 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, 20, 0x81,
28831 	    SD_PATH_STANDARD)) != 0) {
28832 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28833 		    "sr_sector_mode: Mode Sense failed\n");
28834 		kmem_free(sense, 20);
28835 		return (rval);
28836 	}
28837 	select = kmem_zalloc(20, KM_SLEEP);
28838 	select[3] = 0x08;
28839 	select[10] = ((blksize >> 8) & 0xff);
28840 	select[11] = (blksize & 0xff);
28841 	select[12] = 0x01;
28842 	select[13] = 0x06;
28843 	select[14] = sense[14];
28844 	select[15] = sense[15];
28845 	if (blksize == SD_MODE2_BLKSIZE) {
28846 		select[14] |= 0x01;
28847 	}
28848 
28849 	if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select, 20,
28850 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
28851 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28852 		    "sr_sector_mode: Mode Select failed\n");
28853 	} else {
28854 		/*
28855 		 * Only update the softstate block size if we successfully
28856 		 * changed the device block mode.
28857 		 */
28858 		mutex_enter(SD_MUTEX(un));
28859 		sd_update_block_info(un, blksize, 0);
28860 		mutex_exit(SD_MUTEX(un));
28861 	}
28862 	kmem_free(sense, 20);
28863 	kmem_free(select, 20);
28864 	return (rval);
28865 }
28866 
28867 
28868 /*
28869  *    Function: sr_read_cdda()
28870  *
28871  * Description: This routine is the driver entry point for handling CD-ROM
28872  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
28873  *		the target supports CDDA these requests are handled via a vendor
28874  *		specific command (0xD8) If the target does not support CDDA
28875  *		these requests are handled via the READ CD command (0xBE).
28876  *
28877  *   Arguments: dev	- the device 'dev_t'
28878  *		data	- pointer to user provided CD-DA structure specifying
28879  *			  the track starting address, transfer length, and
28880  *			  subcode options.
28881  *		flag	- this argument is a pass through to ddi_copyxxx()
28882  *			  directly from the mode argument of ioctl().
28883  *
28884  * Return Code: the code returned by sd_send_scsi_cmd()
28885  *		EFAULT if ddi_copyxxx() fails
28886  *		ENXIO if fail ddi_get_soft_state
28887  *		EINVAL if invalid arguments are provided
28888  *		ENOTTY
28889  */
28890 
28891 static int
28892 sr_read_cdda(dev_t dev, caddr_t data, int flag)
28893 {
28894 	struct sd_lun			*un;
28895 	struct uscsi_cmd		*com;
28896 	struct cdrom_cdda		*cdda;
28897 	int				rval;
28898 	size_t				buflen;
28899 	char				cdb[CDB_GROUP5];
28900 
28901 #ifdef _MULTI_DATAMODEL
28902 	/* To support ILP32 applications in an LP64 world */
28903 	struct cdrom_cdda32	cdrom_cdda32;
28904 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
28905 #endif /* _MULTI_DATAMODEL */
28906 
28907 	if (data == NULL) {
28908 		return (EINVAL);
28909 	}
28910 
28911 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28912 		return (ENXIO);
28913 	}
28914 
28915 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
28916 
28917 #ifdef _MULTI_DATAMODEL
28918 	switch (ddi_model_convert_from(flag & FMODELS)) {
28919 	case DDI_MODEL_ILP32:
28920 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
28921 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28922 			    "sr_read_cdda: ddi_copyin Failed\n");
28923 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28924 			return (EFAULT);
28925 		}
28926 		/* Convert the ILP32 uscsi data from the application to LP64 */
28927 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
28928 		break;
28929 	case DDI_MODEL_NONE:
28930 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28931 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28932 			    "sr_read_cdda: ddi_copyin Failed\n");
28933 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28934 			return (EFAULT);
28935 		}
28936 		break;
28937 	}
28938 #else /* ! _MULTI_DATAMODEL */
28939 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28940 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28941 		    "sr_read_cdda: ddi_copyin Failed\n");
28942 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28943 		return (EFAULT);
28944 	}
28945 #endif /* _MULTI_DATAMODEL */
28946 
28947 	/*
28948 	 * Since MMC-2 expects max 3 bytes for length, check if the
28949 	 * length input is greater than 3 bytes
28950 	 */
28951 	if ((cdda->cdda_length & 0xFF000000) != 0) {
28952 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
28953 		    "cdrom transfer length too large: %d (limit %d)\n",
28954 		    cdda->cdda_length, 0xFFFFFF);
28955 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28956 		return (EINVAL);
28957 	}
28958 
28959 	switch (cdda->cdda_subcode) {
28960 	case CDROM_DA_NO_SUBCODE:
28961 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
28962 		break;
28963 	case CDROM_DA_SUBQ:
28964 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
28965 		break;
28966 	case CDROM_DA_ALL_SUBCODE:
28967 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
28968 		break;
28969 	case CDROM_DA_SUBCODE_ONLY:
28970 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
28971 		break;
28972 	default:
28973 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28974 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
28975 		    cdda->cdda_subcode);
28976 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28977 		return (EINVAL);
28978 	}
28979 
28980 	/* Build and send the command */
28981 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28982 	bzero(cdb, CDB_GROUP5);
28983 
28984 	if (un->un_f_cfg_cdda == TRUE) {
28985 		cdb[0] = (char)SCMD_READ_CD;
28986 		cdb[1] = 0x04;
28987 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28988 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28989 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28990 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28991 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28992 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28993 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
28994 		cdb[9] = 0x10;
28995 		switch (cdda->cdda_subcode) {
28996 		case CDROM_DA_NO_SUBCODE :
28997 			cdb[10] = 0x0;
28998 			break;
28999 		case CDROM_DA_SUBQ :
29000 			cdb[10] = 0x2;
29001 			break;
29002 		case CDROM_DA_ALL_SUBCODE :
29003 			cdb[10] = 0x1;
29004 			break;
29005 		case CDROM_DA_SUBCODE_ONLY :
29006 			/* FALLTHROUGH */
29007 		default :
29008 			kmem_free(cdda, sizeof (struct cdrom_cdda));
29009 			kmem_free(com, sizeof (*com));
29010 			return (ENOTTY);
29011 		}
29012 	} else {
29013 		cdb[0] = (char)SCMD_READ_CDDA;
29014 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
29015 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
29016 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
29017 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
29018 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
29019 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
29020 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
29021 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
29022 		cdb[10] = cdda->cdda_subcode;
29023 	}
29024 
29025 	com->uscsi_cdb = cdb;
29026 	com->uscsi_cdblen = CDB_GROUP5;
29027 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
29028 	com->uscsi_buflen = buflen;
29029 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29030 
29031 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
29032 	    UIO_SYSSPACE, SD_PATH_STANDARD);
29033 
29034 	kmem_free(cdda, sizeof (struct cdrom_cdda));
29035 	kmem_free(com, sizeof (*com));
29036 	return (rval);
29037 }
29038 
29039 
29040 /*
29041  *    Function: sr_read_cdxa()
29042  *
29043  * Description: This routine is the driver entry point for handling CD-ROM
29044  *		ioctl requests to return CD-XA (Extended Architecture) data.
29045  *		(CDROMCDXA).
29046  *
29047  *   Arguments: dev	- the device 'dev_t'
29048  *		data	- pointer to user provided CD-XA structure specifying
29049  *			  the data starting address, transfer length, and format
29050  *		flag	- this argument is a pass through to ddi_copyxxx()
29051  *			  directly from the mode argument of ioctl().
29052  *
29053  * Return Code: the code returned by sd_send_scsi_cmd()
29054  *		EFAULT if ddi_copyxxx() fails
29055  *		ENXIO if fail ddi_get_soft_state
29056  *		EINVAL if data pointer is NULL
29057  */
29058 
29059 static int
29060 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
29061 {
29062 	struct sd_lun		*un;
29063 	struct uscsi_cmd	*com;
29064 	struct cdrom_cdxa	*cdxa;
29065 	int			rval;
29066 	size_t			buflen;
29067 	char			cdb[CDB_GROUP5];
29068 	uchar_t			read_flags;
29069 
29070 #ifdef _MULTI_DATAMODEL
29071 	/* To support ILP32 applications in an LP64 world */
29072 	struct cdrom_cdxa32		cdrom_cdxa32;
29073 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
29074 #endif /* _MULTI_DATAMODEL */
29075 
29076 	if (data == NULL) {
29077 		return (EINVAL);
29078 	}
29079 
29080 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29081 		return (ENXIO);
29082 	}
29083 
29084 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
29085 
29086 #ifdef _MULTI_DATAMODEL
29087 	switch (ddi_model_convert_from(flag & FMODELS)) {
29088 	case DDI_MODEL_ILP32:
29089 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
29090 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29091 			return (EFAULT);
29092 		}
29093 		/*
29094 		 * Convert the ILP32 uscsi data from the
29095 		 * application to LP64 for internal use.
29096 		 */
29097 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
29098 		break;
29099 	case DDI_MODEL_NONE:
29100 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
29101 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29102 			return (EFAULT);
29103 		}
29104 		break;
29105 	}
29106 #else /* ! _MULTI_DATAMODEL */
29107 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
29108 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29109 		return (EFAULT);
29110 	}
29111 #endif /* _MULTI_DATAMODEL */
29112 
29113 	/*
29114 	 * Since MMC-2 expects max 3 bytes for length, check if the
29115 	 * length input is greater than 3 bytes
29116 	 */
29117 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
29118 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
29119 		    "cdrom transfer length too large: %d (limit %d)\n",
29120 		    cdxa->cdxa_length, 0xFFFFFF);
29121 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29122 		return (EINVAL);
29123 	}
29124 
29125 	switch (cdxa->cdxa_format) {
29126 	case CDROM_XA_DATA:
29127 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
29128 		read_flags = 0x10;
29129 		break;
29130 	case CDROM_XA_SECTOR_DATA:
29131 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
29132 		read_flags = 0xf8;
29133 		break;
29134 	case CDROM_XA_DATA_W_ERROR:
29135 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
29136 		read_flags = 0xfc;
29137 		break;
29138 	default:
29139 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29140 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
29141 		    cdxa->cdxa_format);
29142 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29143 		return (EINVAL);
29144 	}
29145 
29146 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29147 	bzero(cdb, CDB_GROUP5);
29148 	if (un->un_f_mmc_cap == TRUE) {
29149 		cdb[0] = (char)SCMD_READ_CD;
29150 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
29151 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
29152 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
29153 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
29154 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
29155 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
29156 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
29157 		cdb[9] = (char)read_flags;
29158 	} else {
29159 		/*
29160 		 * Note: A vendor specific command (0xDB) is being used her to
29161 		 * request a read of all subcodes.
29162 		 */
29163 		cdb[0] = (char)SCMD_READ_CDXA;
29164 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
29165 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
29166 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
29167 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
29168 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
29169 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
29170 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
29171 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
29172 		cdb[10] = cdxa->cdxa_format;
29173 	}
29174 	com->uscsi_cdb	   = cdb;
29175 	com->uscsi_cdblen  = CDB_GROUP5;
29176 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
29177 	com->uscsi_buflen  = buflen;
29178 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29179 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
29180 	    UIO_SYSSPACE, SD_PATH_STANDARD);
29181 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29182 	kmem_free(com, sizeof (*com));
29183 	return (rval);
29184 }
29185 
29186 
29187 /*
29188  *    Function: sr_eject()
29189  *
29190  * Description: This routine is the driver entry point for handling CD-ROM
29191  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
29192  *
29193  *   Arguments: dev	- the device 'dev_t'
29194  *
29195  * Return Code: the code returned by sd_send_scsi_cmd()
29196  */
29197 
29198 static int
29199 sr_eject(dev_t dev)
29200 {
29201 	struct sd_lun	*un;
29202 	int		rval;
29203 
29204 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29205 	    (un->un_state == SD_STATE_OFFLINE)) {
29206 		return (ENXIO);
29207 	}
29208 	if ((rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
29209 	    SD_PATH_STANDARD)) != 0) {
29210 		return (rval);
29211 	}
29212 
29213 	rval = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_EJECT,
29214 	    SD_PATH_STANDARD);
29215 
29216 	if (rval == 0) {
29217 		mutex_enter(SD_MUTEX(un));
29218 		sr_ejected(un);
29219 		un->un_mediastate = DKIO_EJECTED;
29220 		cv_broadcast(&un->un_state_cv);
29221 		mutex_exit(SD_MUTEX(un));
29222 	}
29223 	return (rval);
29224 }
29225 
29226 
29227 /*
29228  *    Function: sr_ejected()
29229  *
29230  * Description: This routine updates the soft state structure to invalidate the
29231  *		geometry information after the media has been ejected or a
29232  *		media eject has been detected.
29233  *
29234  *   Arguments: un - driver soft state (unit) structure
29235  */
29236 
29237 static void
29238 sr_ejected(struct sd_lun *un)
29239 {
29240 	struct sd_errstats *stp;
29241 
29242 	ASSERT(un != NULL);
29243 	ASSERT(mutex_owned(SD_MUTEX(un)));
29244 
29245 	un->un_f_blockcount_is_valid	= FALSE;
29246 	un->un_f_tgt_blocksize_is_valid	= FALSE;
29247 	un->un_f_geometry_is_valid	= FALSE;
29248 
29249 	if (un->un_errstats != NULL) {
29250 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
29251 		stp->sd_capacity.value.ui64 = 0;
29252 	}
29253 }
29254 
29255 
29256 /*
29257  *    Function: sr_check_wp()
29258  *
29259  * Description: This routine checks the write protection of a removable
29260  *      media disk and hotpluggable devices via the write protect bit of
29261  *      the Mode Page Header device specific field. Some devices choke
29262  *      on unsupported mode page. In order to workaround this issue,
29263  *      this routine has been implemented to use 0x3f mode page(request
29264  *      for all pages) for all device types.
29265  *
29266  *   Arguments: dev		- the device 'dev_t'
29267  *
29268  * Return Code: int indicating if the device is write protected (1) or not (0)
29269  *
29270  *     Context: Kernel thread.
29271  *
29272  */
29273 
29274 static int
29275 sr_check_wp(dev_t dev)
29276 {
29277 	struct sd_lun	*un;
29278 	uchar_t		device_specific;
29279 	uchar_t		*sense;
29280 	int		hdrlen;
29281 	int		rval = FALSE;
29282 
29283 	/*
29284 	 * Note: The return codes for this routine should be reworked to
29285 	 * properly handle the case of a NULL softstate.
29286 	 */
29287 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29288 		return (FALSE);
29289 	}
29290 
29291 	if (un->un_f_cfg_is_atapi == TRUE) {
29292 		/*
29293 		 * The mode page contents are not required; set the allocation
29294 		 * length for the mode page header only
29295 		 */
29296 		hdrlen = MODE_HEADER_LENGTH_GRP2;
29297 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29298 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense, hdrlen,
29299 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
29300 			goto err_exit;
29301 		device_specific =
29302 		    ((struct mode_header_grp2 *)sense)->device_specific;
29303 	} else {
29304 		hdrlen = MODE_HEADER_LENGTH;
29305 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29306 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, hdrlen,
29307 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
29308 			goto err_exit;
29309 		device_specific =
29310 		    ((struct mode_header *)sense)->device_specific;
29311 	}
29312 
29313 	/*
29314 	 * Write protect mode sense failed; not all disks
29315 	 * understand this query. Return FALSE assuming that
29316 	 * these devices are not writable.
29317 	 */
29318 	if (device_specific & WRITE_PROTECT) {
29319 		rval = TRUE;
29320 	}
29321 
29322 err_exit:
29323 	kmem_free(sense, hdrlen);
29324 	return (rval);
29325 }
29326 
29327 /*
29328  *    Function: sr_volume_ctrl()
29329  *
29330  * Description: This routine is the driver entry point for handling CD-ROM
29331  *		audio output volume ioctl requests. (CDROMVOLCTRL)
29332  *
29333  *   Arguments: dev	- the device 'dev_t'
29334  *		data	- pointer to user audio volume control structure
29335  *		flag	- this argument is a pass through to ddi_copyxxx()
29336  *			  directly from the mode argument of ioctl().
29337  *
29338  * Return Code: the code returned by sd_send_scsi_cmd()
29339  *		EFAULT if ddi_copyxxx() fails
29340  *		ENXIO if fail ddi_get_soft_state
29341  *		EINVAL if data pointer is NULL
29342  *
29343  */
29344 
29345 static int
29346 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
29347 {
29348 	struct sd_lun		*un;
29349 	struct cdrom_volctrl    volume;
29350 	struct cdrom_volctrl    *vol = &volume;
29351 	uchar_t			*sense_page;
29352 	uchar_t			*select_page;
29353 	uchar_t			*sense;
29354 	uchar_t			*select;
29355 	int			sense_buflen;
29356 	int			select_buflen;
29357 	int			rval;
29358 
29359 	if (data == NULL) {
29360 		return (EINVAL);
29361 	}
29362 
29363 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29364 	    (un->un_state == SD_STATE_OFFLINE)) {
29365 		return (ENXIO);
29366 	}
29367 
29368 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
29369 		return (EFAULT);
29370 	}
29371 
29372 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29373 		struct mode_header_grp2		*sense_mhp;
29374 		struct mode_header_grp2		*select_mhp;
29375 		int				bd_len;
29376 
29377 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
29378 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
29379 		    MODEPAGE_AUDIO_CTRL_LEN;
29380 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29381 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29382 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
29383 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29384 		    SD_PATH_STANDARD)) != 0) {
29385 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
29386 			    "sr_volume_ctrl: Mode Sense Failed\n");
29387 			kmem_free(sense, sense_buflen);
29388 			kmem_free(select, select_buflen);
29389 			return (rval);
29390 		}
29391 		sense_mhp = (struct mode_header_grp2 *)sense;
29392 		select_mhp = (struct mode_header_grp2 *)select;
29393 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
29394 		    sense_mhp->bdesc_length_lo;
29395 		if (bd_len > MODE_BLK_DESC_LENGTH) {
29396 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29397 			    "sr_volume_ctrl: Mode Sense returned invalid "
29398 			    "block descriptor length\n");
29399 			kmem_free(sense, sense_buflen);
29400 			kmem_free(select, select_buflen);
29401 			return (EIO);
29402 		}
29403 		sense_page = (uchar_t *)
29404 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
29405 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
29406 		select_mhp->length_msb = 0;
29407 		select_mhp->length_lsb = 0;
29408 		select_mhp->bdesc_length_hi = 0;
29409 		select_mhp->bdesc_length_lo = 0;
29410 	} else {
29411 		struct mode_header		*sense_mhp, *select_mhp;
29412 
29413 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29414 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29415 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29416 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29417 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
29418 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29419 		    SD_PATH_STANDARD)) != 0) {
29420 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29421 			    "sr_volume_ctrl: Mode Sense Failed\n");
29422 			kmem_free(sense, sense_buflen);
29423 			kmem_free(select, select_buflen);
29424 			return (rval);
29425 		}
29426 		sense_mhp  = (struct mode_header *)sense;
29427 		select_mhp = (struct mode_header *)select;
29428 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
29429 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29430 			    "sr_volume_ctrl: Mode Sense returned invalid "
29431 			    "block descriptor length\n");
29432 			kmem_free(sense, sense_buflen);
29433 			kmem_free(select, select_buflen);
29434 			return (EIO);
29435 		}
29436 		sense_page = (uchar_t *)
29437 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
29438 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
29439 		select_mhp->length = 0;
29440 		select_mhp->bdesc_length = 0;
29441 	}
29442 	/*
29443 	 * Note: An audio control data structure could be created and overlayed
29444 	 * on the following in place of the array indexing method implemented.
29445 	 */
29446 
29447 	/* Build the select data for the user volume data */
29448 	select_page[0] = MODEPAGE_AUDIO_CTRL;
29449 	select_page[1] = 0xE;
29450 	/* Set the immediate bit */
29451 	select_page[2] = 0x04;
29452 	/* Zero out reserved fields */
29453 	select_page[3] = 0x00;
29454 	select_page[4] = 0x00;
29455 	/* Return sense data for fields not to be modified */
29456 	select_page[5] = sense_page[5];
29457 	select_page[6] = sense_page[6];
29458 	select_page[7] = sense_page[7];
29459 	/* Set the user specified volume levels for channel 0 and 1 */
29460 	select_page[8] = 0x01;
29461 	select_page[9] = vol->channel0;
29462 	select_page[10] = 0x02;
29463 	select_page[11] = vol->channel1;
29464 	/* Channel 2 and 3 are currently unsupported so return the sense data */
29465 	select_page[12] = sense_page[12];
29466 	select_page[13] = sense_page[13];
29467 	select_page[14] = sense_page[14];
29468 	select_page[15] = sense_page[15];
29469 
29470 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29471 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, select,
29472 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29473 	} else {
29474 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
29475 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29476 	}
29477 
29478 	kmem_free(sense, sense_buflen);
29479 	kmem_free(select, select_buflen);
29480 	return (rval);
29481 }
29482 
29483 
29484 /*
29485  *    Function: sr_read_sony_session_offset()
29486  *
29487  * Description: This routine is the driver entry point for handling CD-ROM
29488  *		ioctl requests for session offset information. (CDROMREADOFFSET)
29489  *		The address of the first track in the last session of a
29490  *		multi-session CD-ROM is returned
29491  *
29492  *		Note: This routine uses a vendor specific key value in the
29493  *		command control field without implementing any vendor check here
29494  *		or in the ioctl routine.
29495  *
29496  *   Arguments: dev	- the device 'dev_t'
29497  *		data	- pointer to an int to hold the requested address
29498  *		flag	- this argument is a pass through to ddi_copyxxx()
29499  *			  directly from the mode argument of ioctl().
29500  *
29501  * Return Code: the code returned by sd_send_scsi_cmd()
29502  *		EFAULT if ddi_copyxxx() fails
29503  *		ENXIO if fail ddi_get_soft_state
29504  *		EINVAL if data pointer is NULL
29505  */
29506 
29507 static int
29508 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
29509 {
29510 	struct sd_lun		*un;
29511 	struct uscsi_cmd	*com;
29512 	caddr_t			buffer;
29513 	char			cdb[CDB_GROUP1];
29514 	int			session_offset = 0;
29515 	int			rval;
29516 
29517 	if (data == NULL) {
29518 		return (EINVAL);
29519 	}
29520 
29521 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29522 	    (un->un_state == SD_STATE_OFFLINE)) {
29523 		return (ENXIO);
29524 	}
29525 
29526 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
29527 	bzero(cdb, CDB_GROUP1);
29528 	cdb[0] = SCMD_READ_TOC;
29529 	/*
29530 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
29531 	 * (4 byte TOC response header + 8 byte response data)
29532 	 */
29533 	cdb[8] = SONY_SESSION_OFFSET_LEN;
29534 	/* Byte 9 is the control byte. A vendor specific value is used */
29535 	cdb[9] = SONY_SESSION_OFFSET_KEY;
29536 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29537 	com->uscsi_cdb = cdb;
29538 	com->uscsi_cdblen = CDB_GROUP1;
29539 	com->uscsi_bufaddr = buffer;
29540 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
29541 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29542 
29543 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
29544 	    UIO_SYSSPACE, SD_PATH_STANDARD);
29545 	if (rval != 0) {
29546 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29547 		kmem_free(com, sizeof (*com));
29548 		return (rval);
29549 	}
29550 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
29551 		session_offset =
29552 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
29553 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
29554 		/*
29555 		 * Offset returned offset in current lbasize block's. Convert to
29556 		 * 2k block's to return to the user
29557 		 */
29558 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
29559 			session_offset >>= 2;
29560 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
29561 			session_offset >>= 1;
29562 		}
29563 	}
29564 
29565 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
29566 		rval = EFAULT;
29567 	}
29568 
29569 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29570 	kmem_free(com, sizeof (*com));
29571 	return (rval);
29572 }
29573 
29574 
29575 /*
29576  *    Function: sd_wm_cache_constructor()
29577  *
29578  * Description: Cache Constructor for the wmap cache for the read/modify/write
29579  * 		devices.
29580  *
29581  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29582  *		un	- sd_lun structure for the device.
29583  *		flag	- the km flags passed to constructor
29584  *
29585  * Return Code: 0 on success.
29586  *		-1 on failure.
29587  */
29588 
29589 /*ARGSUSED*/
29590 static int
29591 sd_wm_cache_constructor(void *wm, void *un, int flags)
29592 {
29593 	bzero(wm, sizeof (struct sd_w_map));
29594 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
29595 	return (0);
29596 }
29597 
29598 
29599 /*
29600  *    Function: sd_wm_cache_destructor()
29601  *
29602  * Description: Cache destructor for the wmap cache for the read/modify/write
29603  * 		devices.
29604  *
29605  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29606  *		un	- sd_lun structure for the device.
29607  */
29608 /*ARGSUSED*/
29609 static void
29610 sd_wm_cache_destructor(void *wm, void *un)
29611 {
29612 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
29613 }
29614 
29615 
29616 /*
29617  *    Function: sd_range_lock()
29618  *
29619  * Description: Lock the range of blocks specified as parameter to ensure
29620  *		that read, modify write is atomic and no other i/o writes
29621  *		to the same location. The range is specified in terms
29622  *		of start and end blocks. Block numbers are the actual
29623  *		media block numbers and not system.
29624  *
29625  *   Arguments: un	- sd_lun structure for the device.
29626  *		startb - The starting block number
29627  *		endb - The end block number
29628  *		typ - type of i/o - simple/read_modify_write
29629  *
29630  * Return Code: wm  - pointer to the wmap structure.
29631  *
29632  *     Context: This routine can sleep.
29633  */
29634 
29635 static struct sd_w_map *
29636 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
29637 {
29638 	struct sd_w_map *wmp = NULL;
29639 	struct sd_w_map *sl_wmp = NULL;
29640 	struct sd_w_map *tmp_wmp;
29641 	wm_state state = SD_WM_CHK_LIST;
29642 
29643 
29644 	ASSERT(un != NULL);
29645 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29646 
29647 	mutex_enter(SD_MUTEX(un));
29648 
29649 	while (state != SD_WM_DONE) {
29650 
29651 		switch (state) {
29652 		case SD_WM_CHK_LIST:
29653 			/*
29654 			 * This is the starting state. Check the wmap list
29655 			 * to see if the range is currently available.
29656 			 */
29657 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
29658 				/*
29659 				 * If this is a simple write and no rmw
29660 				 * i/o is pending then try to lock the
29661 				 * range as the range should be available.
29662 				 */
29663 				state = SD_WM_LOCK_RANGE;
29664 			} else {
29665 				tmp_wmp = sd_get_range(un, startb, endb);
29666 				if (tmp_wmp != NULL) {
29667 					if ((wmp != NULL) && ONLIST(un, wmp)) {
29668 						/*
29669 						 * Should not keep onlist wmps
29670 						 * while waiting this macro
29671 						 * will also do wmp = NULL;
29672 						 */
29673 						FREE_ONLIST_WMAP(un, wmp);
29674 					}
29675 					/*
29676 					 * sl_wmp is the wmap on which wait
29677 					 * is done, since the tmp_wmp points
29678 					 * to the inuse wmap, set sl_wmp to
29679 					 * tmp_wmp and change the state to sleep
29680 					 */
29681 					sl_wmp = tmp_wmp;
29682 					state = SD_WM_WAIT_MAP;
29683 				} else {
29684 					state = SD_WM_LOCK_RANGE;
29685 				}
29686 
29687 			}
29688 			break;
29689 
29690 		case SD_WM_LOCK_RANGE:
29691 			ASSERT(un->un_wm_cache);
29692 			/*
29693 			 * The range need to be locked, try to get a wmap.
29694 			 * First attempt it with NO_SLEEP, want to avoid a sleep
29695 			 * if possible as we will have to release the sd mutex
29696 			 * if we have to sleep.
29697 			 */
29698 			if (wmp == NULL)
29699 				wmp = kmem_cache_alloc(un->un_wm_cache,
29700 				    KM_NOSLEEP);
29701 			if (wmp == NULL) {
29702 				mutex_exit(SD_MUTEX(un));
29703 				_NOTE(DATA_READABLE_WITHOUT_LOCK
29704 				    (sd_lun::un_wm_cache))
29705 				wmp = kmem_cache_alloc(un->un_wm_cache,
29706 				    KM_SLEEP);
29707 				mutex_enter(SD_MUTEX(un));
29708 				/*
29709 				 * we released the mutex so recheck and go to
29710 				 * check list state.
29711 				 */
29712 				state = SD_WM_CHK_LIST;
29713 			} else {
29714 				/*
29715 				 * We exit out of state machine since we
29716 				 * have the wmap. Do the housekeeping first.
29717 				 * place the wmap on the wmap list if it is not
29718 				 * on it already and then set the state to done.
29719 				 */
29720 				wmp->wm_start = startb;
29721 				wmp->wm_end = endb;
29722 				wmp->wm_flags = typ | SD_WM_BUSY;
29723 				if (typ & SD_WTYPE_RMW) {
29724 					un->un_rmw_count++;
29725 				}
29726 				/*
29727 				 * If not already on the list then link
29728 				 */
29729 				if (!ONLIST(un, wmp)) {
29730 					wmp->wm_next = un->un_wm;
29731 					wmp->wm_prev = NULL;
29732 					if (wmp->wm_next)
29733 						wmp->wm_next->wm_prev = wmp;
29734 					un->un_wm = wmp;
29735 				}
29736 				state = SD_WM_DONE;
29737 			}
29738 			break;
29739 
29740 		case SD_WM_WAIT_MAP:
29741 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
29742 			/*
29743 			 * Wait is done on sl_wmp, which is set in the
29744 			 * check_list state.
29745 			 */
29746 			sl_wmp->wm_wanted_count++;
29747 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
29748 			sl_wmp->wm_wanted_count--;
29749 			/*
29750 			 * We can reuse the memory from the completed sl_wmp
29751 			 * lock range for our new lock, but only if noone is
29752 			 * waiting for it.
29753 			 */
29754 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
29755 			if (sl_wmp->wm_wanted_count == 0) {
29756 				if (wmp != NULL)
29757 					CHK_N_FREEWMP(un, wmp);
29758 				wmp = sl_wmp;
29759 			}
29760 			sl_wmp = NULL;
29761 			/*
29762 			 * After waking up, need to recheck for availability of
29763 			 * range.
29764 			 */
29765 			state = SD_WM_CHK_LIST;
29766 			break;
29767 
29768 		default:
29769 			panic("sd_range_lock: "
29770 			    "Unknown state %d in sd_range_lock", state);
29771 			/*NOTREACHED*/
29772 		} /* switch(state) */
29773 
29774 	} /* while(state != SD_WM_DONE) */
29775 
29776 	mutex_exit(SD_MUTEX(un));
29777 
29778 	ASSERT(wmp != NULL);
29779 
29780 	return (wmp);
29781 }
29782 
29783 
29784 /*
29785  *    Function: sd_get_range()
29786  *
29787  * Description: Find if there any overlapping I/O to this one
29788  *		Returns the write-map of 1st such I/O, NULL otherwise.
29789  *
29790  *   Arguments: un	- sd_lun structure for the device.
29791  *		startb - The starting block number
29792  *		endb - The end block number
29793  *
29794  * Return Code: wm  - pointer to the wmap structure.
29795  */
29796 
29797 static struct sd_w_map *
29798 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
29799 {
29800 	struct sd_w_map *wmp;
29801 
29802 	ASSERT(un != NULL);
29803 
29804 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
29805 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
29806 			continue;
29807 		}
29808 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
29809 			break;
29810 		}
29811 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
29812 			break;
29813 		}
29814 	}
29815 
29816 	return (wmp);
29817 }
29818 
29819 
29820 /*
29821  *    Function: sd_free_inlist_wmap()
29822  *
29823  * Description: Unlink and free a write map struct.
29824  *
29825  *   Arguments: un      - sd_lun structure for the device.
29826  *		wmp	- sd_w_map which needs to be unlinked.
29827  */
29828 
29829 static void
29830 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
29831 {
29832 	ASSERT(un != NULL);
29833 
29834 	if (un->un_wm == wmp) {
29835 		un->un_wm = wmp->wm_next;
29836 	} else {
29837 		wmp->wm_prev->wm_next = wmp->wm_next;
29838 	}
29839 
29840 	if (wmp->wm_next) {
29841 		wmp->wm_next->wm_prev = wmp->wm_prev;
29842 	}
29843 
29844 	wmp->wm_next = wmp->wm_prev = NULL;
29845 
29846 	kmem_cache_free(un->un_wm_cache, wmp);
29847 }
29848 
29849 
29850 /*
29851  *    Function: sd_range_unlock()
29852  *
29853  * Description: Unlock the range locked by wm.
29854  *		Free write map if nobody else is waiting on it.
29855  *
29856  *   Arguments: un      - sd_lun structure for the device.
29857  *              wmp     - sd_w_map which needs to be unlinked.
29858  */
29859 
29860 static void
29861 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
29862 {
29863 	ASSERT(un != NULL);
29864 	ASSERT(wm != NULL);
29865 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29866 
29867 	mutex_enter(SD_MUTEX(un));
29868 
29869 	if (wm->wm_flags & SD_WTYPE_RMW) {
29870 		un->un_rmw_count--;
29871 	}
29872 
29873 	if (wm->wm_wanted_count) {
29874 		wm->wm_flags = 0;
29875 		/*
29876 		 * Broadcast that the wmap is available now.
29877 		 */
29878 		cv_broadcast(&wm->wm_avail);
29879 	} else {
29880 		/*
29881 		 * If no one is waiting on the map, it should be free'ed.
29882 		 */
29883 		sd_free_inlist_wmap(un, wm);
29884 	}
29885 
29886 	mutex_exit(SD_MUTEX(un));
29887 }
29888 
29889 
29890 /*
29891  *    Function: sd_read_modify_write_task
29892  *
29893  * Description: Called from a taskq thread to initiate the write phase of
29894  *		a read-modify-write request.  This is used for targets where
29895  *		un->un_sys_blocksize != un->un_tgt_blocksize.
29896  *
29897  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
29898  *
29899  *     Context: Called under taskq thread context.
29900  */
29901 
29902 static void
29903 sd_read_modify_write_task(void *arg)
29904 {
29905 	struct sd_mapblocksize_info	*bsp;
29906 	struct buf	*bp;
29907 	struct sd_xbuf	*xp;
29908 	struct sd_lun	*un;
29909 
29910 	bp = arg;	/* The bp is given in arg */
29911 	ASSERT(bp != NULL);
29912 
29913 	/* Get the pointer to the layer-private data struct */
29914 	xp = SD_GET_XBUF(bp);
29915 	ASSERT(xp != NULL);
29916 	bsp = xp->xb_private;
29917 	ASSERT(bsp != NULL);
29918 
29919 	un = SD_GET_UN(bp);
29920 	ASSERT(un != NULL);
29921 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29922 
29923 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29924 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
29925 
29926 	/*
29927 	 * This is the write phase of a read-modify-write request, called
29928 	 * under the context of a taskq thread in response to the completion
29929 	 * of the read portion of the rmw request completing under interrupt
29930 	 * context. The write request must be sent from here down the iostart
29931 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
29932 	 * we use the layer index saved in the layer-private data area.
29933 	 */
29934 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
29935 
29936 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29937 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
29938 }
29939 
29940 
29941 /*
29942  *    Function: sddump_do_read_of_rmw()
29943  *
29944  * Description: This routine will be called from sddump, If sddump is called
29945  *		with an I/O which not aligned on device blocksize boundary
29946  *		then the write has to be converted to read-modify-write.
29947  *		Do the read part here in order to keep sddump simple.
29948  *		Note - That the sd_mutex is held across the call to this
29949  *		routine.
29950  *
29951  *   Arguments: un	- sd_lun
29952  *		blkno	- block number in terms of media block size.
29953  *		nblk	- number of blocks.
29954  *		bpp	- pointer to pointer to the buf structure. On return
29955  *			from this function, *bpp points to the valid buffer
29956  *			to which the write has to be done.
29957  *
29958  * Return Code: 0 for success or errno-type return code
29959  */
29960 
29961 static int
29962 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
29963 	struct buf **bpp)
29964 {
29965 	int err;
29966 	int i;
29967 	int rval;
29968 	struct buf *bp;
29969 	struct scsi_pkt *pkt = NULL;
29970 	uint32_t target_blocksize;
29971 
29972 	ASSERT(un != NULL);
29973 	ASSERT(mutex_owned(SD_MUTEX(un)));
29974 
29975 	target_blocksize = un->un_tgt_blocksize;
29976 
29977 	mutex_exit(SD_MUTEX(un));
29978 
29979 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
29980 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
29981 	if (bp == NULL) {
29982 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29983 		    "no resources for dumping; giving up");
29984 		err = ENOMEM;
29985 		goto done;
29986 	}
29987 
29988 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
29989 	    blkno, nblk);
29990 	if (rval != 0) {
29991 		scsi_free_consistent_buf(bp);
29992 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29993 		    "no resources for dumping; giving up");
29994 		err = ENOMEM;
29995 		goto done;
29996 	}
29997 
29998 	pkt->pkt_flags |= FLAG_NOINTR;
29999 
30000 	err = EIO;
30001 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
30002 
30003 		/*
30004 		 * Scsi_poll returns 0 (success) if the command completes and
30005 		 * the status block is STATUS_GOOD.  We should only check
30006 		 * errors if this condition is not true.  Even then we should
30007 		 * send our own request sense packet only if we have a check
30008 		 * condition and auto request sense has not been performed by
30009 		 * the hba.
30010 		 */
30011 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
30012 
30013 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
30014 			err = 0;
30015 			break;
30016 		}
30017 
30018 		/*
30019 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
30020 		 * no need to read RQS data.
30021 		 */
30022 		if (pkt->pkt_reason == CMD_DEV_GONE) {
30023 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
30024 			    "Device is gone\n");
30025 			break;
30026 		}
30027 
30028 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
30029 			SD_INFO(SD_LOG_DUMP, un,
30030 			    "sddump: read failed with CHECK, try # %d\n", i);
30031 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
30032 				(void) sd_send_polled_RQS(un);
30033 			}
30034 
30035 			continue;
30036 		}
30037 
30038 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
30039 			int reset_retval = 0;
30040 
30041 			SD_INFO(SD_LOG_DUMP, un,
30042 			    "sddump: read failed with BUSY, try # %d\n", i);
30043 
30044 			if (un->un_f_lun_reset_enabled == TRUE) {
30045 				reset_retval = scsi_reset(SD_ADDRESS(un),
30046 				    RESET_LUN);
30047 			}
30048 			if (reset_retval == 0) {
30049 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
30050 			}
30051 			(void) sd_send_polled_RQS(un);
30052 
30053 		} else {
30054 			SD_INFO(SD_LOG_DUMP, un,
30055 			    "sddump: read failed with 0x%x, try # %d\n",
30056 			    SD_GET_PKT_STATUS(pkt), i);
30057 			mutex_enter(SD_MUTEX(un));
30058 			sd_reset_target(un, pkt);
30059 			mutex_exit(SD_MUTEX(un));
30060 		}
30061 
30062 		/*
30063 		 * If we are not getting anywhere with lun/target resets,
30064 		 * let's reset the bus.
30065 		 */
30066 		if (i > SD_NDUMP_RETRIES/2) {
30067 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
30068 			(void) sd_send_polled_RQS(un);
30069 		}
30070 
30071 	}
30072 	scsi_destroy_pkt(pkt);
30073 
30074 	if (err != 0) {
30075 		scsi_free_consistent_buf(bp);
30076 		*bpp = NULL;
30077 	} else {
30078 		*bpp = bp;
30079 	}
30080 
30081 done:
30082 	mutex_enter(SD_MUTEX(un));
30083 	return (err);
30084 }
30085 
30086 
30087 /*
30088  *    Function: sd_failfast_flushq
30089  *
30090  * Description: Take all bp's on the wait queue that have B_FAILFAST set
30091  *		in b_flags and move them onto the failfast queue, then kick
30092  *		off a thread to return all bp's on the failfast queue to
30093  *		their owners with an error set.
30094  *
30095  *   Arguments: un - pointer to the soft state struct for the instance.
30096  *
30097  *     Context: may execute in interrupt context.
30098  */
30099 
30100 static void
30101 sd_failfast_flushq(struct sd_lun *un)
30102 {
30103 	struct buf *bp;
30104 	struct buf *next_waitq_bp;
30105 	struct buf *prev_waitq_bp = NULL;
30106 
30107 	ASSERT(un != NULL);
30108 	ASSERT(mutex_owned(SD_MUTEX(un)));
30109 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
30110 	ASSERT(un->un_failfast_bp == NULL);
30111 
30112 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
30113 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
30114 
30115 	/*
30116 	 * Check if we should flush all bufs when entering failfast state, or
30117 	 * just those with B_FAILFAST set.
30118 	 */
30119 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
30120 		/*
30121 		 * Move *all* bp's on the wait queue to the failfast flush
30122 		 * queue, including those that do NOT have B_FAILFAST set.
30123 		 */
30124 		if (un->un_failfast_headp == NULL) {
30125 			ASSERT(un->un_failfast_tailp == NULL);
30126 			un->un_failfast_headp = un->un_waitq_headp;
30127 		} else {
30128 			ASSERT(un->un_failfast_tailp != NULL);
30129 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
30130 		}
30131 
30132 		un->un_failfast_tailp = un->un_waitq_tailp;
30133 
30134 		/* update kstat for each bp moved out of the waitq */
30135 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
30136 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
30137 		}
30138 
30139 		/* empty the waitq */
30140 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
30141 
30142 	} else {
30143 		/*
30144 		 * Go thru the wait queue, pick off all entries with
30145 		 * B_FAILFAST set, and move these onto the failfast queue.
30146 		 */
30147 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
30148 			/*
30149 			 * Save the pointer to the next bp on the wait queue,
30150 			 * so we get to it on the next iteration of this loop.
30151 			 */
30152 			next_waitq_bp = bp->av_forw;
30153 
30154 			/*
30155 			 * If this bp from the wait queue does NOT have
30156 			 * B_FAILFAST set, just move on to the next element
30157 			 * in the wait queue. Note, this is the only place
30158 			 * where it is correct to set prev_waitq_bp.
30159 			 */
30160 			if ((bp->b_flags & B_FAILFAST) == 0) {
30161 				prev_waitq_bp = bp;
30162 				continue;
30163 			}
30164 
30165 			/*
30166 			 * Remove the bp from the wait queue.
30167 			 */
30168 			if (bp == un->un_waitq_headp) {
30169 				/* The bp is the first element of the waitq. */
30170 				un->un_waitq_headp = next_waitq_bp;
30171 				if (un->un_waitq_headp == NULL) {
30172 					/* The wait queue is now empty */
30173 					un->un_waitq_tailp = NULL;
30174 				}
30175 			} else {
30176 				/*
30177 				 * The bp is either somewhere in the middle
30178 				 * or at the end of the wait queue.
30179 				 */
30180 				ASSERT(un->un_waitq_headp != NULL);
30181 				ASSERT(prev_waitq_bp != NULL);
30182 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
30183 				    == 0);
30184 				if (bp == un->un_waitq_tailp) {
30185 					/* bp is the last entry on the waitq. */
30186 					ASSERT(next_waitq_bp == NULL);
30187 					un->un_waitq_tailp = prev_waitq_bp;
30188 				}
30189 				prev_waitq_bp->av_forw = next_waitq_bp;
30190 			}
30191 			bp->av_forw = NULL;
30192 
30193 			/*
30194 			 * update kstat since the bp is moved out of
30195 			 * the waitq
30196 			 */
30197 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
30198 
30199 			/*
30200 			 * Now put the bp onto the failfast queue.
30201 			 */
30202 			if (un->un_failfast_headp == NULL) {
30203 				/* failfast queue is currently empty */
30204 				ASSERT(un->un_failfast_tailp == NULL);
30205 				un->un_failfast_headp =
30206 				    un->un_failfast_tailp = bp;
30207 			} else {
30208 				/* Add the bp to the end of the failfast q */
30209 				ASSERT(un->un_failfast_tailp != NULL);
30210 				ASSERT(un->un_failfast_tailp->b_flags &
30211 				    B_FAILFAST);
30212 				un->un_failfast_tailp->av_forw = bp;
30213 				un->un_failfast_tailp = bp;
30214 			}
30215 		}
30216 	}
30217 
30218 	/*
30219 	 * Now return all bp's on the failfast queue to their owners.
30220 	 */
30221 	while ((bp = un->un_failfast_headp) != NULL) {
30222 
30223 		un->un_failfast_headp = bp->av_forw;
30224 		if (un->un_failfast_headp == NULL) {
30225 			un->un_failfast_tailp = NULL;
30226 		}
30227 
30228 		/*
30229 		 * We want to return the bp with a failure error code, but
30230 		 * we do not want a call to sd_start_cmds() to occur here,
30231 		 * so use sd_return_failed_command_no_restart() instead of
30232 		 * sd_return_failed_command().
30233 		 */
30234 		sd_return_failed_command_no_restart(un, bp, EIO);
30235 	}
30236 
30237 	/* Flush the xbuf queues if required. */
30238 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
30239 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
30240 	}
30241 
30242 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
30243 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
30244 }
30245 
30246 
30247 /*
30248  *    Function: sd_failfast_flushq_callback
30249  *
30250  * Description: Return TRUE if the given bp meets the criteria for failfast
30251  *		flushing. Used with ddi_xbuf_flushq(9F).
30252  *
30253  *   Arguments: bp - ptr to buf struct to be examined.
30254  *
30255  *     Context: Any
30256  */
30257 
30258 static int
30259 sd_failfast_flushq_callback(struct buf *bp)
30260 {
30261 	/*
30262 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
30263 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
30264 	 */
30265 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
30266 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
30267 }
30268 
30269 
30270 
30271 #if defined(__i386) || defined(__amd64)
30272 /*
30273  * Function: sd_setup_next_xfer
30274  *
30275  * Description: Prepare next I/O operation using DMA_PARTIAL
30276  *
30277  */
30278 
30279 static int
30280 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
30281     struct scsi_pkt *pkt, struct sd_xbuf *xp)
30282 {
30283 	ssize_t	num_blks_not_xfered;
30284 	daddr_t	strt_blk_num;
30285 	ssize_t	bytes_not_xfered;
30286 	int	rval;
30287 
30288 	ASSERT(pkt->pkt_resid == 0);
30289 
30290 	/*
30291 	 * Calculate next block number and amount to be transferred.
30292 	 *
30293 	 * How much data NOT transfered to the HBA yet.
30294 	 */
30295 	bytes_not_xfered = xp->xb_dma_resid;
30296 
30297 	/*
30298 	 * figure how many blocks NOT transfered to the HBA yet.
30299 	 */
30300 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
30301 
30302 	/*
30303 	 * set starting block number to the end of what WAS transfered.
30304 	 */
30305 	strt_blk_num = xp->xb_blkno +
30306 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
30307 
30308 	/*
30309 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
30310 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
30311 	 * the disk mutex here.
30312 	 */
30313 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
30314 	    strt_blk_num, num_blks_not_xfered);
30315 
30316 	if (rval == 0) {
30317 
30318 		/*
30319 		 * Success.
30320 		 *
30321 		 * Adjust things if there are still more blocks to be
30322 		 * transfered.
30323 		 */
30324 		xp->xb_dma_resid = pkt->pkt_resid;
30325 		pkt->pkt_resid = 0;
30326 
30327 		return (1);
30328 	}
30329 
30330 	/*
30331 	 * There's really only one possible return value from
30332 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
30333 	 * returns NULL.
30334 	 */
30335 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
30336 
30337 	bp->b_resid = bp->b_bcount;
30338 	bp->b_flags |= B_ERROR;
30339 
30340 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30341 	    "Error setting up next portion of DMA transfer\n");
30342 
30343 	return (0);
30344 }
30345 #endif
30346 
30347 /*
30348  *    Function: sd_panic_for_res_conflict
30349  *
30350  * Description: Call panic with a string formated with "Reservation Conflict"
30351  *		and a human readable identifier indicating the SD instance
30352  *		that experienced the reservation conflict.
30353  *
30354  *   Arguments: un - pointer to the soft state struct for the instance.
30355  *
30356  *     Context: may execute in interrupt context.
30357  */
30358 
30359 #define	SD_RESV_CONFLICT_FMT_LEN 40
30360 void
30361 sd_panic_for_res_conflict(struct sd_lun *un)
30362 {
30363 	char panic_str[SD_RESV_CONFLICT_FMT_LEN+MAXPATHLEN];
30364 	char path_str[MAXPATHLEN];
30365 
30366 	(void) snprintf(panic_str, sizeof (panic_str),
30367 	    "Reservation Conflict\nDisk: %s",
30368 	    ddi_pathname(SD_DEVINFO(un), path_str));
30369 
30370 	panic(panic_str);
30371 }
30372 
30373 /*
30374  * Note: The following sd_faultinjection_ioctl( ) routines implement
30375  * driver support for handling fault injection for error analysis
30376  * causing faults in multiple layers of the driver.
30377  *
30378  */
30379 
30380 #ifdef SD_FAULT_INJECTION
30381 static uint_t   sd_fault_injection_on = 0;
30382 
30383 /*
30384  *    Function: sd_faultinjection_ioctl()
30385  *
30386  * Description: This routine is the driver entry point for handling
30387  *              faultinjection ioctls to inject errors into the
30388  *              layer model
30389  *
30390  *   Arguments: cmd	- the ioctl cmd recieved
30391  *		arg	- the arguments from user and returns
30392  */
30393 
30394 static void
30395 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
30396 
30397 	uint_t i;
30398 	uint_t rval;
30399 
30400 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
30401 
30402 	mutex_enter(SD_MUTEX(un));
30403 
30404 	switch (cmd) {
30405 	case SDIOCRUN:
30406 		/* Allow pushed faults to be injected */
30407 		SD_INFO(SD_LOG_SDTEST, un,
30408 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
30409 
30410 		sd_fault_injection_on = 1;
30411 
30412 		SD_INFO(SD_LOG_IOERR, un,
30413 		    "sd_faultinjection_ioctl: run finished\n");
30414 		break;
30415 
30416 	case SDIOCSTART:
30417 		/* Start Injection Session */
30418 		SD_INFO(SD_LOG_SDTEST, un,
30419 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
30420 
30421 		sd_fault_injection_on = 0;
30422 		un->sd_injection_mask = 0xFFFFFFFF;
30423 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30424 			un->sd_fi_fifo_pkt[i] = NULL;
30425 			un->sd_fi_fifo_xb[i] = NULL;
30426 			un->sd_fi_fifo_un[i] = NULL;
30427 			un->sd_fi_fifo_arq[i] = NULL;
30428 		}
30429 		un->sd_fi_fifo_start = 0;
30430 		un->sd_fi_fifo_end = 0;
30431 
30432 		mutex_enter(&(un->un_fi_mutex));
30433 		un->sd_fi_log[0] = '\0';
30434 		un->sd_fi_buf_len = 0;
30435 		mutex_exit(&(un->un_fi_mutex));
30436 
30437 		SD_INFO(SD_LOG_IOERR, un,
30438 		    "sd_faultinjection_ioctl: start finished\n");
30439 		break;
30440 
30441 	case SDIOCSTOP:
30442 		/* Stop Injection Session */
30443 		SD_INFO(SD_LOG_SDTEST, un,
30444 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
30445 		sd_fault_injection_on = 0;
30446 		un->sd_injection_mask = 0x0;
30447 
30448 		/* Empty stray or unuseds structs from fifo */
30449 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30450 			if (un->sd_fi_fifo_pkt[i] != NULL) {
30451 				kmem_free(un->sd_fi_fifo_pkt[i],
30452 				    sizeof (struct sd_fi_pkt));
30453 			}
30454 			if (un->sd_fi_fifo_xb[i] != NULL) {
30455 				kmem_free(un->sd_fi_fifo_xb[i],
30456 				    sizeof (struct sd_fi_xb));
30457 			}
30458 			if (un->sd_fi_fifo_un[i] != NULL) {
30459 				kmem_free(un->sd_fi_fifo_un[i],
30460 				    sizeof (struct sd_fi_un));
30461 			}
30462 			if (un->sd_fi_fifo_arq[i] != NULL) {
30463 				kmem_free(un->sd_fi_fifo_arq[i],
30464 				    sizeof (struct sd_fi_arq));
30465 			}
30466 			un->sd_fi_fifo_pkt[i] = NULL;
30467 			un->sd_fi_fifo_un[i] = NULL;
30468 			un->sd_fi_fifo_xb[i] = NULL;
30469 			un->sd_fi_fifo_arq[i] = NULL;
30470 		}
30471 		un->sd_fi_fifo_start = 0;
30472 		un->sd_fi_fifo_end = 0;
30473 
30474 		SD_INFO(SD_LOG_IOERR, un,
30475 		    "sd_faultinjection_ioctl: stop finished\n");
30476 		break;
30477 
30478 	case SDIOCINSERTPKT:
30479 		/* Store a packet struct to be pushed onto fifo */
30480 		SD_INFO(SD_LOG_SDTEST, un,
30481 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
30482 
30483 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30484 
30485 		sd_fault_injection_on = 0;
30486 
30487 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
30488 		if (un->sd_fi_fifo_pkt[i] != NULL) {
30489 			kmem_free(un->sd_fi_fifo_pkt[i],
30490 			    sizeof (struct sd_fi_pkt));
30491 		}
30492 		if (arg != NULL) {
30493 			un->sd_fi_fifo_pkt[i] =
30494 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
30495 			if (un->sd_fi_fifo_pkt[i] == NULL) {
30496 				/* Alloc failed don't store anything */
30497 				break;
30498 			}
30499 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
30500 			    sizeof (struct sd_fi_pkt), 0);
30501 			if (rval == -1) {
30502 				kmem_free(un->sd_fi_fifo_pkt[i],
30503 				    sizeof (struct sd_fi_pkt));
30504 				un->sd_fi_fifo_pkt[i] = NULL;
30505 			}
30506 		} else {
30507 			SD_INFO(SD_LOG_IOERR, un,
30508 			    "sd_faultinjection_ioctl: pkt null\n");
30509 		}
30510 		break;
30511 
30512 	case SDIOCINSERTXB:
30513 		/* Store a xb struct to be pushed onto fifo */
30514 		SD_INFO(SD_LOG_SDTEST, un,
30515 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
30516 
30517 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30518 
30519 		sd_fault_injection_on = 0;
30520 
30521 		if (un->sd_fi_fifo_xb[i] != NULL) {
30522 			kmem_free(un->sd_fi_fifo_xb[i],
30523 			    sizeof (struct sd_fi_xb));
30524 			un->sd_fi_fifo_xb[i] = NULL;
30525 		}
30526 		if (arg != NULL) {
30527 			un->sd_fi_fifo_xb[i] =
30528 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
30529 			if (un->sd_fi_fifo_xb[i] == NULL) {
30530 				/* Alloc failed don't store anything */
30531 				break;
30532 			}
30533 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
30534 			    sizeof (struct sd_fi_xb), 0);
30535 
30536 			if (rval == -1) {
30537 				kmem_free(un->sd_fi_fifo_xb[i],
30538 				    sizeof (struct sd_fi_xb));
30539 				un->sd_fi_fifo_xb[i] = NULL;
30540 			}
30541 		} else {
30542 			SD_INFO(SD_LOG_IOERR, un,
30543 			    "sd_faultinjection_ioctl: xb null\n");
30544 		}
30545 		break;
30546 
30547 	case SDIOCINSERTUN:
30548 		/* Store a un struct to be pushed onto fifo */
30549 		SD_INFO(SD_LOG_SDTEST, un,
30550 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
30551 
30552 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30553 
30554 		sd_fault_injection_on = 0;
30555 
30556 		if (un->sd_fi_fifo_un[i] != NULL) {
30557 			kmem_free(un->sd_fi_fifo_un[i],
30558 			    sizeof (struct sd_fi_un));
30559 			un->sd_fi_fifo_un[i] = NULL;
30560 		}
30561 		if (arg != NULL) {
30562 			un->sd_fi_fifo_un[i] =
30563 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
30564 			if (un->sd_fi_fifo_un[i] == NULL) {
30565 				/* Alloc failed don't store anything */
30566 				break;
30567 			}
30568 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
30569 			    sizeof (struct sd_fi_un), 0);
30570 			if (rval == -1) {
30571 				kmem_free(un->sd_fi_fifo_un[i],
30572 				    sizeof (struct sd_fi_un));
30573 				un->sd_fi_fifo_un[i] = NULL;
30574 			}
30575 
30576 		} else {
30577 			SD_INFO(SD_LOG_IOERR, un,
30578 			    "sd_faultinjection_ioctl: un null\n");
30579 		}
30580 
30581 		break;
30582 
30583 	case SDIOCINSERTARQ:
30584 		/* Store a arq struct to be pushed onto fifo */
30585 		SD_INFO(SD_LOG_SDTEST, un,
30586 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
30587 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30588 
30589 		sd_fault_injection_on = 0;
30590 
30591 		if (un->sd_fi_fifo_arq[i] != NULL) {
30592 			kmem_free(un->sd_fi_fifo_arq[i],
30593 			    sizeof (struct sd_fi_arq));
30594 			un->sd_fi_fifo_arq[i] = NULL;
30595 		}
30596 		if (arg != NULL) {
30597 			un->sd_fi_fifo_arq[i] =
30598 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
30599 			if (un->sd_fi_fifo_arq[i] == NULL) {
30600 				/* Alloc failed don't store anything */
30601 				break;
30602 			}
30603 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
30604 			    sizeof (struct sd_fi_arq), 0);
30605 			if (rval == -1) {
30606 				kmem_free(un->sd_fi_fifo_arq[i],
30607 				    sizeof (struct sd_fi_arq));
30608 				un->sd_fi_fifo_arq[i] = NULL;
30609 			}
30610 
30611 		} else {
30612 			SD_INFO(SD_LOG_IOERR, un,
30613 			    "sd_faultinjection_ioctl: arq null\n");
30614 		}
30615 
30616 		break;
30617 
30618 	case SDIOCPUSH:
30619 		/* Push stored xb, pkt, un, and arq onto fifo */
30620 		sd_fault_injection_on = 0;
30621 
30622 		if (arg != NULL) {
30623 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
30624 			if (rval != -1 &&
30625 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30626 				un->sd_fi_fifo_end += i;
30627 			}
30628 		} else {
30629 			SD_INFO(SD_LOG_IOERR, un,
30630 			    "sd_faultinjection_ioctl: push arg null\n");
30631 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30632 				un->sd_fi_fifo_end++;
30633 			}
30634 		}
30635 		SD_INFO(SD_LOG_IOERR, un,
30636 		    "sd_faultinjection_ioctl: push to end=%d\n",
30637 		    un->sd_fi_fifo_end);
30638 		break;
30639 
30640 	case SDIOCRETRIEVE:
30641 		/* Return buffer of log from Injection session */
30642 		SD_INFO(SD_LOG_SDTEST, un,
30643 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
30644 
30645 		sd_fault_injection_on = 0;
30646 
30647 		mutex_enter(&(un->un_fi_mutex));
30648 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
30649 		    un->sd_fi_buf_len+1, 0);
30650 		mutex_exit(&(un->un_fi_mutex));
30651 
30652 		if (rval == -1) {
30653 			/*
30654 			 * arg is possibly invalid setting
30655 			 * it to NULL for return
30656 			 */
30657 			arg = NULL;
30658 		}
30659 		break;
30660 	}
30661 
30662 	mutex_exit(SD_MUTEX(un));
30663 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
30664 			    " exit\n");
30665 }
30666 
30667 
30668 /*
30669  *    Function: sd_injection_log()
30670  *
30671  * Description: This routine adds buff to the already existing injection log
30672  *              for retrieval via faultinjection_ioctl for use in fault
30673  *              detection and recovery
30674  *
30675  *   Arguments: buf - the string to add to the log
30676  */
30677 
30678 static void
30679 sd_injection_log(char *buf, struct sd_lun *un)
30680 {
30681 	uint_t len;
30682 
30683 	ASSERT(un != NULL);
30684 	ASSERT(buf != NULL);
30685 
30686 	mutex_enter(&(un->un_fi_mutex));
30687 
30688 	len = min(strlen(buf), 255);
30689 	/* Add logged value to Injection log to be returned later */
30690 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
30691 		uint_t	offset = strlen((char *)un->sd_fi_log);
30692 		char *destp = (char *)un->sd_fi_log + offset;
30693 		int i;
30694 		for (i = 0; i < len; i++) {
30695 			*destp++ = *buf++;
30696 		}
30697 		un->sd_fi_buf_len += len;
30698 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
30699 	}
30700 
30701 	mutex_exit(&(un->un_fi_mutex));
30702 }
30703 
30704 
30705 /*
30706  *    Function: sd_faultinjection()
30707  *
30708  * Description: This routine takes the pkt and changes its
30709  *		content based on error injection scenerio.
30710  *
30711  *   Arguments: pktp	- packet to be changed
30712  */
30713 
30714 static void
30715 sd_faultinjection(struct scsi_pkt *pktp)
30716 {
30717 	uint_t i;
30718 	struct sd_fi_pkt *fi_pkt;
30719 	struct sd_fi_xb *fi_xb;
30720 	struct sd_fi_un *fi_un;
30721 	struct sd_fi_arq *fi_arq;
30722 	struct buf *bp;
30723 	struct sd_xbuf *xb;
30724 	struct sd_lun *un;
30725 
30726 	ASSERT(pktp != NULL);
30727 
30728 	/* pull bp xb and un from pktp */
30729 	bp = (struct buf *)pktp->pkt_private;
30730 	xb = SD_GET_XBUF(bp);
30731 	un = SD_GET_UN(bp);
30732 
30733 	ASSERT(un != NULL);
30734 
30735 	mutex_enter(SD_MUTEX(un));
30736 
30737 	SD_TRACE(SD_LOG_SDTEST, un,
30738 	    "sd_faultinjection: entry Injection from sdintr\n");
30739 
30740 	/* if injection is off return */
30741 	if (sd_fault_injection_on == 0 ||
30742 		un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
30743 		mutex_exit(SD_MUTEX(un));
30744 		return;
30745 	}
30746 
30747 
30748 	/* take next set off fifo */
30749 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
30750 
30751 	fi_pkt = un->sd_fi_fifo_pkt[i];
30752 	fi_xb = un->sd_fi_fifo_xb[i];
30753 	fi_un = un->sd_fi_fifo_un[i];
30754 	fi_arq = un->sd_fi_fifo_arq[i];
30755 
30756 
30757 	/* set variables accordingly */
30758 	/* set pkt if it was on fifo */
30759 	if (fi_pkt != NULL) {
30760 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
30761 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
30762 		SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
30763 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
30764 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
30765 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
30766 
30767 	}
30768 
30769 	/* set xb if it was on fifo */
30770 	if (fi_xb != NULL) {
30771 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
30772 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
30773 		SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
30774 		SD_CONDSET(xb, xb, xb_victim_retry_count,
30775 		    "xb_victim_retry_count");
30776 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
30777 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
30778 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
30779 
30780 		/* copy in block data from sense */
30781 		if (fi_xb->xb_sense_data[0] != -1) {
30782 			bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
30783 			    SENSE_LENGTH);
30784 		}
30785 
30786 		/* copy in extended sense codes */
30787 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_code,
30788 		    "es_code");
30789 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_key,
30790 		    "es_key");
30791 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_add_code,
30792 		    "es_add_code");
30793 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb,
30794 		    es_qual_code, "es_qual_code");
30795 	}
30796 
30797 	/* set un if it was on fifo */
30798 	if (fi_un != NULL) {
30799 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
30800 		SD_CONDSET(un, un, un_ctype, "un_ctype");
30801 		SD_CONDSET(un, un, un_reset_retry_count,
30802 		    "un_reset_retry_count");
30803 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
30804 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
30805 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
30806 		SD_CONDSET(un, un, un_f_geometry_is_valid,
30807 		    "un_f_geometry_is_valid");
30808 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
30809 		    "un_f_allow_bus_device_reset");
30810 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
30811 
30812 	}
30813 
30814 	/* copy in auto request sense if it was on fifo */
30815 	if (fi_arq != NULL) {
30816 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
30817 	}
30818 
30819 	/* free structs */
30820 	if (un->sd_fi_fifo_pkt[i] != NULL) {
30821 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
30822 	}
30823 	if (un->sd_fi_fifo_xb[i] != NULL) {
30824 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
30825 	}
30826 	if (un->sd_fi_fifo_un[i] != NULL) {
30827 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
30828 	}
30829 	if (un->sd_fi_fifo_arq[i] != NULL) {
30830 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
30831 	}
30832 
30833 	/*
30834 	 * kmem_free does not gurantee to set to NULL
30835 	 * since we uses these to determine if we set
30836 	 * values or not lets confirm they are always
30837 	 * NULL after free
30838 	 */
30839 	un->sd_fi_fifo_pkt[i] = NULL;
30840 	un->sd_fi_fifo_un[i] = NULL;
30841 	un->sd_fi_fifo_xb[i] = NULL;
30842 	un->sd_fi_fifo_arq[i] = NULL;
30843 
30844 	un->sd_fi_fifo_start++;
30845 
30846 	mutex_exit(SD_MUTEX(un));
30847 
30848 	SD_TRACE(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
30849 }
30850 
30851 #endif /* SD_FAULT_INJECTION */
30852 
30853 /*
30854  * This routine is invoked in sd_unit_attach(). Before calling it, the
30855  * properties in conf file should be processed already, and "hotpluggable"
30856  * property was processed also.
30857  *
30858  * The sd driver distinguishes 3 different type of devices: removable media,
30859  * non-removable media, and hotpluggable. Below the differences are defined:
30860  *
30861  * 1. Device ID
30862  *
30863  *     The device ID of a device is used to identify this device. Refer to
30864  *     ddi_devid_register(9F).
30865  *
30866  *     For a non-removable media disk device which can provide 0x80 or 0x83
30867  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
30868  *     device ID is created to identify this device. For other non-removable
30869  *     media devices, a default device ID is created only if this device has
30870  *     at least 2 alter cylinders. Otherwise, this device has no devid.
30871  *
30872  *     -------------------------------------------------------
30873  *     removable media   hotpluggable  | Can Have Device ID
30874  *     -------------------------------------------------------
30875  *         false             false     |     Yes
30876  *         false             true      |     Yes
30877  *         true                x       |     No
30878  *     ------------------------------------------------------
30879  *
30880  *
30881  * 2. SCSI group 4 commands
30882  *
30883  *     In SCSI specs, only some commands in group 4 command set can use
30884  *     8-byte addresses that can be used to access >2TB storage spaces.
30885  *     Other commands have no such capability. Without supporting group4,
30886  *     it is impossible to make full use of storage spaces of a disk with
30887  *     capacity larger than 2TB.
30888  *
30889  *     -----------------------------------------------
30890  *     removable media   hotpluggable   LP64  |  Group
30891  *     -----------------------------------------------
30892  *           false          false       false |   1
30893  *           false          false       true  |   4
30894  *           false          true        false |   1
30895  *           false          true        true  |   4
30896  *           true             x           x   |   5
30897  *     -----------------------------------------------
30898  *
30899  *
30900  * 3. Check for VTOC Label
30901  *
30902  *     If a direct-access disk has no EFI label, sd will check if it has a
30903  *     valid VTOC label. Now, sd also does that check for removable media
30904  *     and hotpluggable devices.
30905  *
30906  *     --------------------------------------------------------------
30907  *     Direct-Access   removable media    hotpluggable |  Check Label
30908  *     -------------------------------------------------------------
30909  *         false          false           false        |   No
30910  *         false          false           true         |   No
30911  *         false          true            false        |   Yes
30912  *         false          true            true         |   Yes
30913  *         true            x                x          |   Yes
30914  *     --------------------------------------------------------------
30915  *
30916  *
30917  * 4. Building default VTOC label
30918  *
30919  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
30920  *     If those devices have no valid VTOC label, sd(7d) will attempt to
30921  *     create default VTOC for them. Currently sd creates default VTOC label
30922  *     for all devices on x86 platform (VTOC_16), but only for removable
30923  *     media devices on SPARC (VTOC_8).
30924  *
30925  *     -----------------------------------------------------------
30926  *       removable media hotpluggable platform   |   Default Label
30927  *     -----------------------------------------------------------
30928  *             false          false    sparc     |     No
30929  *             false          true      x86      |     Yes
30930  *             false          true     sparc     |     Yes
30931  *             true             x        x       |     Yes
30932  *     ----------------------------------------------------------
30933  *
30934  *
30935  * 5. Supported blocksizes of target devices
30936  *
30937  *     Sd supports non-512-byte blocksize for removable media devices only.
30938  *     For other devices, only 512-byte blocksize is supported. This may be
30939  *     changed in near future because some RAID devices require non-512-byte
30940  *     blocksize
30941  *
30942  *     -----------------------------------------------------------
30943  *     removable media    hotpluggable    | non-512-byte blocksize
30944  *     -----------------------------------------------------------
30945  *           false          false         |   No
30946  *           false          true          |   No
30947  *           true             x           |   Yes
30948  *     -----------------------------------------------------------
30949  *
30950  *
30951  * 6. Automatic mount & unmount (i.e. vold)
30952  *
30953  *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
30954  *     if a device is removable media device. It return 1 for removable media
30955  *     devices, and 0 for others.
30956  *
30957  *     Vold treats a device as removable one only if DKIOREMOVABLE returns 1.
30958  *     And it does automounting only for removable media devices. In order to
30959  *     preserve users' experience and let vold continue to do automounting for
30960  *     USB disk devices, DKIOCREMOVABLE ioctl still returns 1 for USB/1394 disk
30961  *     devices.
30962  *
30963  *      ------------------------------------------------------
30964  *       removable media    hotpluggable   |  automatic mount
30965  *      ------------------------------------------------------
30966  *             false          false        |   No
30967  *             false          true         |   Yes
30968  *             true             x          |   Yes
30969  *      ------------------------------------------------------
30970  *
30971  *
30972  * 7. fdisk partition management
30973  *
30974  *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
30975  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
30976  *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
30977  *     fdisk partitions on both x86 and SPARC platform.
30978  *
30979  *     -----------------------------------------------------------
30980  *       platform   removable media  USB/1394  |  fdisk supported
30981  *     -----------------------------------------------------------
30982  *        x86         X               X        |       true
30983  *     ------------------------------------------------------------
30984  *        sparc       X               X        |       false
30985  *     ------------------------------------------------------------
30986  *
30987  *
30988  * 8. MBOOT/MBR
30989  *
30990  *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
30991  *     read/write mboot for removable media devices on sparc platform.
30992  *
30993  *     -----------------------------------------------------------
30994  *       platform   removable media  USB/1394  |  mboot supported
30995  *     -----------------------------------------------------------
30996  *        x86         X               X        |       true
30997  *     ------------------------------------------------------------
30998  *        sparc      false           false     |       false
30999  *        sparc      false           true      |       true
31000  *        sparc      true            false     |       true
31001  *        sparc      true            true      |       true
31002  *     ------------------------------------------------------------
31003  *
31004  *
31005  * 9.  error handling during opening device
31006  *
31007  *     If failed to open a disk device, an errno is returned. For some kinds
31008  *     of errors, different errno is returned depending on if this device is
31009  *     a removable media device. This brings USB/1394 hard disks in line with
31010  *     expected hard disk behavior. It is not expected that this breaks any
31011  *     application.
31012  *
31013  *     ------------------------------------------------------
31014  *       removable media    hotpluggable   |  errno
31015  *     ------------------------------------------------------
31016  *             false          false        |   EIO
31017  *             false          true         |   EIO
31018  *             true             x          |   ENXIO
31019  *     ------------------------------------------------------
31020  *
31021  *
31022  * 11. ioctls: DKIOCEJECT, CDROMEJECT
31023  *
31024  *     These IOCTLs are applicable only to removable media devices.
31025  *
31026  *     -----------------------------------------------------------
31027  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
31028  *     -----------------------------------------------------------
31029  *             false          false        |     No
31030  *             false          true         |     No
31031  *             true            x           |     Yes
31032  *     -----------------------------------------------------------
31033  *
31034  *
31035  * 12. Kstats for partitions
31036  *
31037  *     sd creates partition kstat for non-removable media devices. USB and
31038  *     Firewire hard disks now have partition kstats
31039  *
31040  *      ------------------------------------------------------
31041  *       removable media    hotplugable    |   kstat
31042  *      ------------------------------------------------------
31043  *             false          false        |    Yes
31044  *             false          true         |    Yes
31045  *             true             x          |    No
31046  *       ------------------------------------------------------
31047  *
31048  *
31049  * 13. Removable media & hotpluggable properties
31050  *
31051  *     Sd driver creates a "removable-media" property for removable media
31052  *     devices. Parent nexus drivers create a "hotpluggable" property if
31053  *     it supports hotplugging.
31054  *
31055  *     ---------------------------------------------------------------------
31056  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
31057  *     ---------------------------------------------------------------------
31058  *       false            false       |    No                   No
31059  *       false            true        |    No                   Yes
31060  *       true             false       |    Yes                  No
31061  *       true             true        |    Yes                  Yes
31062  *     ---------------------------------------------------------------------
31063  *
31064  *
31065  * 14. Power Management
31066  *
31067  *     sd only power manages removable media devices or devices that support
31068  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
31069  *
31070  *     A parent nexus that supports hotplugging can also set "pm-capable"
31071  *     if the disk can be power managed.
31072  *
31073  *     ------------------------------------------------------------
31074  *       removable media hotpluggable pm-capable  |   power manage
31075  *     ------------------------------------------------------------
31076  *             false          false     false     |     No
31077  *             false          false     true      |     Yes
31078  *             false          true      false     |     No
31079  *             false          true      true      |     Yes
31080  *             true             x        x        |     Yes
31081  *     ------------------------------------------------------------
31082  *
31083  *      USB and firewire hard disks can now be power managed independently
31084  *      of the framebuffer
31085  *
31086  *
31087  * 15. Support for USB disks with capacity larger than 1TB
31088  *
31089  *     Currently, sd doesn't permit a fixed disk device with capacity
31090  *     larger than 1TB to be used in a 32-bit operating system environment.
31091  *     However, sd doesn't do that for removable media devices. Instead, it
31092  *     assumes that removable media devices cannot have a capacity larger
31093  *     than 1TB. Therefore, using those devices on 32-bit system is partially
31094  *     supported, which can cause some unexpected results.
31095  *
31096  *     ---------------------------------------------------------------------
31097  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
31098  *     ---------------------------------------------------------------------
31099  *             false          false  |   true         |     no
31100  *             false          true   |   true         |     no
31101  *             true           false  |   true         |     Yes
31102  *             true           true   |   true         |     Yes
31103  *     ---------------------------------------------------------------------
31104  *
31105  *
31106  * 16. Check write-protection at open time
31107  *
31108  *     When a removable media device is being opened for writing without NDELAY
31109  *     flag, sd will check if this device is writable. If attempting to open
31110  *     without NDELAY flag a write-protected device, this operation will abort.
31111  *
31112  *     ------------------------------------------------------------
31113  *       removable media    USB/1394   |   WP Check
31114  *     ------------------------------------------------------------
31115  *             false          false    |     No
31116  *             false          true     |     No
31117  *             true           false    |     Yes
31118  *             true           true     |     Yes
31119  *     ------------------------------------------------------------
31120  *
31121  *
31122  * 17. syslog when corrupted VTOC is encountered
31123  *
31124  *      Currently, if an invalid VTOC is encountered, sd only print syslog
31125  *      for fixed SCSI disks.
31126  *     ------------------------------------------------------------
31127  *       removable media    USB/1394   |   print syslog
31128  *     ------------------------------------------------------------
31129  *             false          false    |     Yes
31130  *             false          true     |     No
31131  *             true           false    |     No
31132  *             true           true     |     No
31133  *     ------------------------------------------------------------
31134  */
31135 static void
31136 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
31137 {
31138 	int	pm_capable_prop;
31139 
31140 	ASSERT(un->un_sd);
31141 	ASSERT(un->un_sd->sd_inq);
31142 
31143 #if defined(_SUNOS_VTOC_16)
31144 	/*
31145 	 * For VTOC_16 devices, the default label will be created for all
31146 	 * devices. (see sd_build_default_label)
31147 	 */
31148 	un->un_f_default_vtoc_supported = TRUE;
31149 #endif
31150 
31151 	if (un->un_sd->sd_inq->inq_rmb) {
31152 		/*
31153 		 * The media of this device is removable. And for this kind
31154 		 * of devices, it is possible to change medium after openning
31155 		 * devices. Thus we should support this operation.
31156 		 */
31157 		un->un_f_has_removable_media = TRUE;
31158 
31159 #if defined(_SUNOS_VTOC_8)
31160 		/*
31161 		 * Note: currently, for VTOC_8 devices, default label is
31162 		 * created for removable and hotpluggable devices only.
31163 		 */
31164 		un->un_f_default_vtoc_supported = TRUE;
31165 #endif
31166 		/*
31167 		 * support non-512-byte blocksize of removable media devices
31168 		 */
31169 		un->un_f_non_devbsize_supported = TRUE;
31170 
31171 		/*
31172 		 * Assume that all removable media devices support DOOR_LOCK
31173 		 */
31174 		un->un_f_doorlock_supported = TRUE;
31175 
31176 		/*
31177 		 * For a removable media device, it is possible to be opened
31178 		 * with NDELAY flag when there is no media in drive, in this
31179 		 * case we don't care if device is writable. But if without
31180 		 * NDELAY flag, we need to check if media is write-protected.
31181 		 */
31182 		un->un_f_chk_wp_open = TRUE;
31183 
31184 		/*
31185 		 * need to start a SCSI watch thread to monitor media state,
31186 		 * when media is being inserted or ejected, notify syseventd.
31187 		 */
31188 		un->un_f_monitor_media_state = TRUE;
31189 
31190 		/*
31191 		 * Some devices don't support START_STOP_UNIT command.
31192 		 * Therefore, we'd better check if a device supports it
31193 		 * before sending it.
31194 		 */
31195 		un->un_f_check_start_stop = TRUE;
31196 
31197 		/*
31198 		 * support eject media ioctl:
31199 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
31200 		 */
31201 		un->un_f_eject_media_supported = TRUE;
31202 
31203 		/*
31204 		 * Because many removable-media devices don't support
31205 		 * LOG_SENSE, we couldn't use this command to check if
31206 		 * a removable media device support power-management.
31207 		 * We assume that they support power-management via
31208 		 * START_STOP_UNIT command and can be spun up and down
31209 		 * without limitations.
31210 		 */
31211 		un->un_f_pm_supported = TRUE;
31212 
31213 		/*
31214 		 * Need to create a zero length (Boolean) property
31215 		 * removable-media for the removable media devices.
31216 		 * Note that the return value of the property is not being
31217 		 * checked, since if unable to create the property
31218 		 * then do not want the attach to fail altogether. Consistent
31219 		 * with other property creation in attach.
31220 		 */
31221 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
31222 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
31223 
31224 	} else {
31225 		/*
31226 		 * create device ID for device
31227 		 */
31228 		un->un_f_devid_supported = TRUE;
31229 
31230 		/*
31231 		 * Spin up non-removable-media devices once it is attached
31232 		 */
31233 		un->un_f_attach_spinup = TRUE;
31234 
31235 		/*
31236 		 * According to SCSI specification, Sense data has two kinds of
31237 		 * format: fixed format, and descriptor format. At present, we
31238 		 * don't support descriptor format sense data for removable
31239 		 * media.
31240 		 */
31241 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31242 			un->un_f_descr_format_supported = TRUE;
31243 		}
31244 
31245 		/*
31246 		 * kstats are created only for non-removable media devices.
31247 		 *
31248 		 * Set this in sd.conf to 0 in order to disable kstats.  The
31249 		 * default is 1, so they are enabled by default.
31250 		 */
31251 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
31252 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
31253 			"enable-partition-kstats", 1));
31254 
31255 		/*
31256 		 * Check if HBA has set the "pm-capable" property.
31257 		 * If "pm-capable" exists and is non-zero then we can
31258 		 * power manage the device without checking the start/stop
31259 		 * cycle count log sense page.
31260 		 *
31261 		 * If "pm-capable" exists and is SD_PM_CAPABLE_FALSE (0)
31262 		 * then we should not power manage the device.
31263 		 *
31264 		 * If "pm-capable" doesn't exist then pm_capable_prop will
31265 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
31266 		 * sd will check the start/stop cycle count log sense page
31267 		 * and power manage the device if the cycle count limit has
31268 		 * not been exceeded.
31269 		 */
31270 		pm_capable_prop = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
31271 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
31272 		if (pm_capable_prop == SD_PM_CAPABLE_UNDEFINED) {
31273 			un->un_f_log_sense_supported = TRUE;
31274 		} else {
31275 			/*
31276 			 * pm-capable property exists.
31277 			 *
31278 			 * Convert "TRUE" values for pm_capable_prop to
31279 			 * SD_PM_CAPABLE_TRUE (1) to make it easier to check
31280 			 * later. "TRUE" values are any values except
31281 			 * SD_PM_CAPABLE_FALSE (0) and
31282 			 * SD_PM_CAPABLE_UNDEFINED (-1)
31283 			 */
31284 			if (pm_capable_prop == SD_PM_CAPABLE_FALSE) {
31285 				un->un_f_log_sense_supported = FALSE;
31286 			} else {
31287 				un->un_f_pm_supported = TRUE;
31288 			}
31289 
31290 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
31291 			    "sd_unit_attach: un:0x%p pm-capable "
31292 			    "property set to %d.\n", un, un->un_f_pm_supported);
31293 		}
31294 	}
31295 
31296 	if (un->un_f_is_hotpluggable) {
31297 #if defined(_SUNOS_VTOC_8)
31298 		/*
31299 		 * Note: currently, for VTOC_8 devices, default label is
31300 		 * created for removable and hotpluggable devices only.
31301 		 */
31302 		un->un_f_default_vtoc_supported = TRUE;
31303 #endif
31304 
31305 		/*
31306 		 * Temporarily, let hotpluggable devices pretend to be
31307 		 * removable-media devices for vold.
31308 		 */
31309 		un->un_f_monitor_media_state = TRUE;
31310 
31311 		un->un_f_check_start_stop = TRUE;
31312 
31313 	}
31314 
31315 	/*
31316 	 * By default, only DIRECT ACCESS devices and CDs will have Sun
31317 	 * labels.
31318 	 */
31319 	if ((SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) ||
31320 	    (un->un_sd->sd_inq->inq_rmb)) {
31321 		/*
31322 		 * Direct access devices have disk label
31323 		 */
31324 		un->un_f_vtoc_label_supported = TRUE;
31325 	}
31326 
31327 	/*
31328 	 * Fdisk partitions are supported for all direct access devices on
31329 	 * x86 platform, and just for removable media and hotpluggable
31330 	 * devices on SPARC platform. Later, we will set the following flag
31331 	 * to FALSE if current device is not removable media or hotpluggable
31332 	 * device and if sd works on SAPRC platform.
31333 	 */
31334 	if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31335 		un->un_f_mboot_supported = TRUE;
31336 	}
31337 
31338 	if (!un->un_f_is_hotpluggable &&
31339 	    !un->un_sd->sd_inq->inq_rmb) {
31340 
31341 #if defined(_SUNOS_VTOC_8)
31342 		/*
31343 		 * Don't support fdisk on fixed disk
31344 		 */
31345 		un->un_f_mboot_supported = FALSE;
31346 #endif
31347 
31348 		/*
31349 		 * Fixed disk support SYNC CACHE
31350 		 */
31351 		un->un_f_sync_cache_supported = TRUE;
31352 
31353 		/*
31354 		 * For fixed disk, if its VTOC is not valid, we will write
31355 		 * errlog into system log
31356 		 */
31357 		if (un->un_f_vtoc_label_supported)
31358 			un->un_f_vtoc_errlog_supported = TRUE;
31359 	}
31360 }
31361