xref: /titanic_50/usr/src/uts/common/io/scsi/targets/sd.c (revision c7bb2ee82f92ee64ea8b84f5253552f3c2b251d6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * SCSI disk target driver.
30  */
31 #include <sys/scsi/scsi.h>
32 #include <sys/dkbad.h>
33 #include <sys/dklabel.h>
34 #include <sys/dkio.h>
35 #include <sys/fdio.h>
36 #include <sys/cdio.h>
37 #include <sys/mhd.h>
38 #include <sys/vtoc.h>
39 #include <sys/dktp/fdisk.h>
40 #include <sys/file.h>
41 #include <sys/stat.h>
42 #include <sys/kstat.h>
43 #include <sys/vtrace.h>
44 #include <sys/note.h>
45 #include <sys/thread.h>
46 #include <sys/proc.h>
47 #include <sys/efi_partition.h>
48 #include <sys/var.h>
49 #include <sys/aio_req.h>
50 
51 #ifdef __lock_lint
52 #define	_LP64
53 #define	__amd64
54 #endif
55 
56 #if (defined(__fibre))
57 /* Note: is there a leadville version of the following? */
58 #include <sys/fc4/fcal_linkapp.h>
59 #endif
60 #include <sys/taskq.h>
61 #include <sys/uuid.h>
62 #include <sys/byteorder.h>
63 #include <sys/sdt.h>
64 
65 #include "sd_xbuf.h"
66 
67 #include <sys/scsi/targets/sddef.h>
68 
69 
70 /*
71  * Loadable module info.
72  */
73 #if (defined(__fibre))
74 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver %I%"
75 char _depends_on[]	= "misc/scsi drv/fcp";
76 #else
77 #define	SD_MODULE_NAME	"SCSI Disk Driver %I%"
78 char _depends_on[]	= "misc/scsi";
79 #endif
80 
81 /*
82  * Define the interconnect type, to allow the driver to distinguish
83  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
84  *
85  * This is really for backward compatability. In the future, the driver
86  * should actually check the "interconnect-type" property as reported by
87  * the HBA; however at present this property is not defined by all HBAs,
88  * so we will use this #define (1) to permit the driver to run in
89  * backward-compatability mode; and (2) to print a notification message
90  * if an FC HBA does not support the "interconnect-type" property.  The
91  * behavior of the driver will be to assume parallel SCSI behaviors unless
92  * the "interconnect-type" property is defined by the HBA **AND** has a
93  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
94  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
95  * Channel behaviors (as per the old ssd).  (Note that the
96  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
97  * will result in the driver assuming parallel SCSI behaviors.)
98  *
99  * (see common/sys/scsi/impl/services.h)
100  *
101  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
102  * since some FC HBAs may already support that, and there is some code in
103  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
104  * default would confuse that code, and besides things should work fine
105  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
106  * "interconnect_type" property.
107  *
108  * Notes for off-by-1 workaround:
109  * -----------------------------
110  *
111  *    SCSI READ_CAPACITY command returns the LBA number of the
112  *    last logical block, but sd once treated this number as
113  *    disks' capacity on x86 platform. And LBAs are addressed
114  *    based 0. So the last block was lost on x86 platform.
115  *
116  *    Now, we remove this workaround. In order for present sd
117  *    driver to work with disks which are labeled/partitioned
118  *    via previous sd, we add workaround as follows:
119  *
120  *    1) Locate backup EFI label: sd searchs the next to last
121  *       block for backup EFI label if it can't find it on the
122  *       last block;
123  *    2) Calculate geometry: refer to sd_convert_geometry(), If
124  *       capacity increasing by 1 causes disks' capacity to cross
125  *       over the limits in table CHS_values, geometry info will
126  *       change. This will raise an issue: In case that primary
127  *       VTOC label is destroyed, format commandline can restore
128  *       it via backup VTOC labels. And format locates backup VTOC
129  *       labels by use of geometry from sd driver. So changing
130  *       geometry will prevent format from finding backup VTOC
131  *       labels. To eliminate this side effect for compatibility,
132  *       sd uses (capacity -1) to calculate geometry;
133  *    3) 1TB disks: VTOC uses 32-bit signed int, thus sd doesn't
134  *       support VTOC for a disk which has more than DK_MAX_BLOCKS
135  *       LBAs. However, for exactly 1TB disk, it was treated as
136  *       (1T - 512)B in the past, and could have VTOC. To overcome
137  *       this, if an exactly 1TB disk has solaris fdisk partition,
138  *       it will be allowed to work with sd.
139  */
140 #if (defined(__fibre))
141 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
142 #else
143 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
144 #endif
145 
146 /*
147  * The name of the driver, established from the module name in _init.
148  */
149 static	char *sd_label			= NULL;
150 
151 /*
152  * Driver name is unfortunately prefixed on some driver.conf properties.
153  */
154 #if (defined(__fibre))
155 #define	sd_max_xfer_size		ssd_max_xfer_size
156 #define	sd_config_list			ssd_config_list
157 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
158 static	char *sd_config_list		= "ssd-config-list";
159 #else
160 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
161 static	char *sd_config_list		= "sd-config-list";
162 #endif
163 
164 /*
165  * Driver global variables
166  */
167 
168 #if (defined(__fibre))
169 /*
170  * These #defines are to avoid namespace collisions that occur because this
171  * code is currently used to compile two seperate driver modules: sd and ssd.
172  * All global variables need to be treated this way (even if declared static)
173  * in order to allow the debugger to resolve the names properly.
174  * It is anticipated that in the near future the ssd module will be obsoleted,
175  * at which time this namespace issue should go away.
176  */
177 #define	sd_state			ssd_state
178 #define	sd_io_time			ssd_io_time
179 #define	sd_failfast_enable		ssd_failfast_enable
180 #define	sd_ua_retry_count		ssd_ua_retry_count
181 #define	sd_report_pfa			ssd_report_pfa
182 #define	sd_max_throttle			ssd_max_throttle
183 #define	sd_min_throttle			ssd_min_throttle
184 #define	sd_rot_delay			ssd_rot_delay
185 
186 #define	sd_retry_on_reservation_conflict	\
187 					ssd_retry_on_reservation_conflict
188 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
189 #define	sd_resv_conflict_name		ssd_resv_conflict_name
190 
191 #define	sd_component_mask		ssd_component_mask
192 #define	sd_level_mask			ssd_level_mask
193 #define	sd_debug_un			ssd_debug_un
194 #define	sd_error_level			ssd_error_level
195 
196 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
197 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
198 
199 #define	sd_tr				ssd_tr
200 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
201 #define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
202 #define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
203 #define	sd_check_media_time		ssd_check_media_time
204 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
205 #define	sd_label_mutex			ssd_label_mutex
206 #define	sd_detach_mutex			ssd_detach_mutex
207 #define	sd_log_buf			ssd_log_buf
208 #define	sd_log_mutex			ssd_log_mutex
209 
210 #define	sd_disk_table			ssd_disk_table
211 #define	sd_disk_table_size		ssd_disk_table_size
212 #define	sd_sense_mutex			ssd_sense_mutex
213 #define	sd_cdbtab			ssd_cdbtab
214 
215 #define	sd_cb_ops			ssd_cb_ops
216 #define	sd_ops				ssd_ops
217 #define	sd_additional_codes		ssd_additional_codes
218 
219 #define	sd_minor_data			ssd_minor_data
220 #define	sd_minor_data_efi		ssd_minor_data_efi
221 
222 #define	sd_tq				ssd_tq
223 #define	sd_wmr_tq			ssd_wmr_tq
224 #define	sd_taskq_name			ssd_taskq_name
225 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
226 #define	sd_taskq_minalloc		ssd_taskq_minalloc
227 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
228 
229 #define	sd_dump_format_string		ssd_dump_format_string
230 
231 #define	sd_iostart_chain		ssd_iostart_chain
232 #define	sd_iodone_chain			ssd_iodone_chain
233 
234 #define	sd_pm_idletime			ssd_pm_idletime
235 
236 #define	sd_force_pm_supported		ssd_force_pm_supported
237 
238 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
239 
240 #endif
241 
242 
243 #ifdef	SDDEBUG
244 int	sd_force_pm_supported		= 0;
245 #endif	/* SDDEBUG */
246 
247 void *sd_state				= NULL;
248 int sd_io_time				= SD_IO_TIME;
249 int sd_failfast_enable			= 1;
250 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
251 int sd_report_pfa			= 1;
252 int sd_max_throttle			= SD_MAX_THROTTLE;
253 int sd_min_throttle			= SD_MIN_THROTTLE;
254 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
255 int sd_qfull_throttle_enable		= TRUE;
256 
257 int sd_retry_on_reservation_conflict	= 1;
258 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
259 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
260 
261 static int sd_dtype_optical_bind	= -1;
262 
263 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
264 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
265 
266 /*
267  * Global data for debug logging. To enable debug printing, sd_component_mask
268  * and sd_level_mask should be set to the desired bit patterns as outlined in
269  * sddef.h.
270  */
271 uint_t	sd_component_mask		= 0x0;
272 uint_t	sd_level_mask			= 0x0;
273 struct	sd_lun *sd_debug_un		= NULL;
274 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
275 
276 /* Note: these may go away in the future... */
277 static uint32_t	sd_xbuf_active_limit	= 512;
278 static uint32_t sd_xbuf_reserve_limit	= 16;
279 
280 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
281 
282 /*
283  * Timer value used to reset the throttle after it has been reduced
284  * (typically in response to TRAN_BUSY or STATUS_QFULL)
285  */
286 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
287 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
288 
289 /*
290  * Interval value associated with the media change scsi watch.
291  */
292 static int sd_check_media_time		= 3000000;
293 
294 /*
295  * Wait value used for in progress operations during a DDI_SUSPEND
296  */
297 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
298 
299 /*
300  * sd_label_mutex protects a static buffer used in the disk label
301  * component of the driver
302  */
303 static kmutex_t sd_label_mutex;
304 
305 /*
306  * sd_detach_mutex protects un_layer_count, un_detach_count, and
307  * un_opens_in_progress in the sd_lun structure.
308  */
309 static kmutex_t sd_detach_mutex;
310 
311 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
312 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
313 
314 /*
315  * Global buffer and mutex for debug logging
316  */
317 static char	sd_log_buf[1024];
318 static kmutex_t	sd_log_mutex;
319 
320 
321 /*
322  * "Smart" Probe Caching structs, globals, #defines, etc.
323  * For parallel scsi and non-self-identify device only.
324  */
325 
326 /*
327  * The following resources and routines are implemented to support
328  * "smart" probing, which caches the scsi_probe() results in an array,
329  * in order to help avoid long probe times.
330  */
331 struct sd_scsi_probe_cache {
332 	struct	sd_scsi_probe_cache	*next;
333 	dev_info_t	*pdip;
334 	int		cache[NTARGETS_WIDE];
335 };
336 
337 static kmutex_t	sd_scsi_probe_cache_mutex;
338 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
339 
340 /*
341  * Really we only need protection on the head of the linked list, but
342  * better safe than sorry.
343  */
344 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
345     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
346 
347 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
348     sd_scsi_probe_cache_head))
349 
350 
351 /*
352  * Vendor specific data name property declarations
353  */
354 
355 #if defined(__fibre) || defined(__i386) ||defined(__amd64)
356 
357 static sd_tunables seagate_properties = {
358 	SEAGATE_THROTTLE_VALUE,
359 	0,
360 	0,
361 	0,
362 	0,
363 	0,
364 	0,
365 	0,
366 	0
367 };
368 
369 
370 static sd_tunables fujitsu_properties = {
371 	FUJITSU_THROTTLE_VALUE,
372 	0,
373 	0,
374 	0,
375 	0,
376 	0,
377 	0,
378 	0,
379 	0
380 };
381 
382 static sd_tunables ibm_properties = {
383 	IBM_THROTTLE_VALUE,
384 	0,
385 	0,
386 	0,
387 	0,
388 	0,
389 	0,
390 	0,
391 	0
392 };
393 
394 static sd_tunables purple_properties = {
395 	PURPLE_THROTTLE_VALUE,
396 	0,
397 	0,
398 	PURPLE_BUSY_RETRIES,
399 	PURPLE_RESET_RETRY_COUNT,
400 	PURPLE_RESERVE_RELEASE_TIME,
401 	0,
402 	0,
403 	0
404 };
405 
406 static sd_tunables sve_properties = {
407 	SVE_THROTTLE_VALUE,
408 	0,
409 	0,
410 	SVE_BUSY_RETRIES,
411 	SVE_RESET_RETRY_COUNT,
412 	SVE_RESERVE_RELEASE_TIME,
413 	SVE_MIN_THROTTLE_VALUE,
414 	SVE_DISKSORT_DISABLED_FLAG,
415 	0
416 };
417 
418 static sd_tunables maserati_properties = {
419 	0,
420 	0,
421 	0,
422 	0,
423 	0,
424 	0,
425 	0,
426 	MASERATI_DISKSORT_DISABLED_FLAG,
427 	MASERATI_LUN_RESET_ENABLED_FLAG
428 };
429 
430 static sd_tunables pirus_properties = {
431 	PIRUS_THROTTLE_VALUE,
432 	0,
433 	PIRUS_NRR_COUNT,
434 	PIRUS_BUSY_RETRIES,
435 	PIRUS_RESET_RETRY_COUNT,
436 	0,
437 	PIRUS_MIN_THROTTLE_VALUE,
438 	PIRUS_DISKSORT_DISABLED_FLAG,
439 	PIRUS_LUN_RESET_ENABLED_FLAG
440 };
441 
442 #endif
443 
444 #if (defined(__sparc) && !defined(__fibre)) || \
445 	(defined(__i386) || defined(__amd64))
446 
447 
448 static sd_tunables elite_properties = {
449 	ELITE_THROTTLE_VALUE,
450 	0,
451 	0,
452 	0,
453 	0,
454 	0,
455 	0,
456 	0,
457 	0
458 };
459 
460 static sd_tunables st31200n_properties = {
461 	ST31200N_THROTTLE_VALUE,
462 	0,
463 	0,
464 	0,
465 	0,
466 	0,
467 	0,
468 	0,
469 	0
470 };
471 
472 #endif /* Fibre or not */
473 
474 static sd_tunables lsi_properties_scsi = {
475 	LSI_THROTTLE_VALUE,
476 	0,
477 	LSI_NOTREADY_RETRIES,
478 	0,
479 	0,
480 	0,
481 	0,
482 	0,
483 	0
484 };
485 
486 static sd_tunables symbios_properties = {
487 	SYMBIOS_THROTTLE_VALUE,
488 	0,
489 	SYMBIOS_NOTREADY_RETRIES,
490 	0,
491 	0,
492 	0,
493 	0,
494 	0,
495 	0
496 };
497 
498 static sd_tunables lsi_properties = {
499 	0,
500 	0,
501 	LSI_NOTREADY_RETRIES,
502 	0,
503 	0,
504 	0,
505 	0,
506 	0,
507 	0
508 };
509 
510 static sd_tunables lsi_oem_properties = {
511 	0,
512 	0,
513 	LSI_OEM_NOTREADY_RETRIES,
514 	0,
515 	0,
516 	0,
517 	0,
518 	0,
519 	0
520 };
521 
522 
523 
524 #if (defined(SD_PROP_TST))
525 
526 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
527 #define	SD_TST_THROTTLE_VAL	16
528 #define	SD_TST_NOTREADY_VAL	12
529 #define	SD_TST_BUSY_VAL		60
530 #define	SD_TST_RST_RETRY_VAL	36
531 #define	SD_TST_RSV_REL_TIME	60
532 
533 static sd_tunables tst_properties = {
534 	SD_TST_THROTTLE_VAL,
535 	SD_TST_CTYPE_VAL,
536 	SD_TST_NOTREADY_VAL,
537 	SD_TST_BUSY_VAL,
538 	SD_TST_RST_RETRY_VAL,
539 	SD_TST_RSV_REL_TIME,
540 	0,
541 	0,
542 	0
543 };
544 #endif
545 
546 /* This is similiar to the ANSI toupper implementation */
547 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
548 
549 /*
550  * Static Driver Configuration Table
551  *
552  * This is the table of disks which need throttle adjustment (or, perhaps
553  * something else as defined by the flags at a future time.)  device_id
554  * is a string consisting of concatenated vid (vendor), pid (product/model)
555  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
556  * the parts of the string are as defined by the sizes in the scsi_inquiry
557  * structure.  Device type is searched as far as the device_id string is
558  * defined.  Flags defines which values are to be set in the driver from the
559  * properties list.
560  *
561  * Entries below which begin and end with a "*" are a special case.
562  * These do not have a specific vendor, and the string which follows
563  * can appear anywhere in the 16 byte PID portion of the inquiry data.
564  *
565  * Entries below which begin and end with a " " (blank) are a special
566  * case. The comparison function will treat multiple consecutive blanks
567  * as equivalent to a single blank. For example, this causes a
568  * sd_disk_table entry of " NEC CDROM " to match a device's id string
569  * of  "NEC       CDROM".
570  *
571  * Note: The MD21 controller type has been obsoleted.
572  *	 ST318202F is a Legacy device
573  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
574  *	 made with an FC connection. The entries here are a legacy.
575  */
576 static sd_disk_config_t sd_disk_table[] = {
577 #if defined(__fibre) || defined(__i386) || defined(__amd64)
578 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
579 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
580 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
581 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
582 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
583 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
584 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
585 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
586 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
587 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
588 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
589 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
590 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
591 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
592 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
593 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
594 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
595 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
596 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
597 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
598 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
599 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
600 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
601 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
602 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
603 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
604 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
605 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
606 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
607 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
608 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
609 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
610 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
611 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
612 	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
613 	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
614 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
615 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
616 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
617 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
618 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
619 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
620 	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
621 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
622 	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
623 			SD_CONF_BSET_BSY_RETRY_COUNT|
624 			SD_CONF_BSET_RST_RETRIES|
625 			SD_CONF_BSET_RSV_REL_TIME,
626 		&purple_properties },
627 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
628 		SD_CONF_BSET_BSY_RETRY_COUNT|
629 		SD_CONF_BSET_RST_RETRIES|
630 		SD_CONF_BSET_RSV_REL_TIME|
631 		SD_CONF_BSET_MIN_THROTTLE|
632 		SD_CONF_BSET_DISKSORT_DISABLED,
633 		&sve_properties },
634 	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
635 			SD_CONF_BSET_BSY_RETRY_COUNT|
636 			SD_CONF_BSET_RST_RETRIES|
637 			SD_CONF_BSET_RSV_REL_TIME,
638 		&purple_properties },
639 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
640 		SD_CONF_BSET_LUN_RESET_ENABLED,
641 		&maserati_properties },
642 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
643 		SD_CONF_BSET_NRR_COUNT|
644 		SD_CONF_BSET_BSY_RETRY_COUNT|
645 		SD_CONF_BSET_RST_RETRIES|
646 		SD_CONF_BSET_MIN_THROTTLE|
647 		SD_CONF_BSET_DISKSORT_DISABLED|
648 		SD_CONF_BSET_LUN_RESET_ENABLED,
649 		&pirus_properties },
650 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
651 		SD_CONF_BSET_NRR_COUNT|
652 		SD_CONF_BSET_BSY_RETRY_COUNT|
653 		SD_CONF_BSET_RST_RETRIES|
654 		SD_CONF_BSET_MIN_THROTTLE|
655 		SD_CONF_BSET_DISKSORT_DISABLED|
656 		SD_CONF_BSET_LUN_RESET_ENABLED,
657 		&pirus_properties },
658 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
659 		SD_CONF_BSET_NRR_COUNT|
660 		SD_CONF_BSET_BSY_RETRY_COUNT|
661 		SD_CONF_BSET_RST_RETRIES|
662 		SD_CONF_BSET_MIN_THROTTLE|
663 		SD_CONF_BSET_DISKSORT_DISABLED|
664 		SD_CONF_BSET_LUN_RESET_ENABLED,
665 		&pirus_properties },
666 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
667 		SD_CONF_BSET_NRR_COUNT|
668 		SD_CONF_BSET_BSY_RETRY_COUNT|
669 		SD_CONF_BSET_RST_RETRIES|
670 		SD_CONF_BSET_MIN_THROTTLE|
671 		SD_CONF_BSET_DISKSORT_DISABLED|
672 		SD_CONF_BSET_LUN_RESET_ENABLED,
673 		&pirus_properties },
674 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
675 		SD_CONF_BSET_NRR_COUNT|
676 		SD_CONF_BSET_BSY_RETRY_COUNT|
677 		SD_CONF_BSET_RST_RETRIES|
678 		SD_CONF_BSET_MIN_THROTTLE|
679 		SD_CONF_BSET_DISKSORT_DISABLED|
680 		SD_CONF_BSET_LUN_RESET_ENABLED,
681 		&pirus_properties },
682 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
683 		SD_CONF_BSET_NRR_COUNT|
684 		SD_CONF_BSET_BSY_RETRY_COUNT|
685 		SD_CONF_BSET_RST_RETRIES|
686 		SD_CONF_BSET_MIN_THROTTLE|
687 		SD_CONF_BSET_DISKSORT_DISABLED|
688 		SD_CONF_BSET_LUN_RESET_ENABLED,
689 		&pirus_properties },
690 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
691 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
692 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
693 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
694 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
695 #endif /* fibre or NON-sparc platforms */
696 #if ((defined(__sparc) && !defined(__fibre)) ||\
697 	(defined(__i386) || defined(__amd64)))
698 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
699 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
700 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
701 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
702 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
703 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
704 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
705 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
706 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
707 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
708 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
709 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
710 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
711 	    &symbios_properties },
712 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
713 	    &lsi_properties_scsi },
714 #if defined(__i386) || defined(__amd64)
715 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
716 				    | SD_CONF_BSET_READSUB_BCD
717 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
718 				    | SD_CONF_BSET_NO_READ_HEADER
719 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
720 
721 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
722 				    | SD_CONF_BSET_READSUB_BCD
723 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
724 				    | SD_CONF_BSET_NO_READ_HEADER
725 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
726 #endif /* __i386 || __amd64 */
727 #endif /* sparc NON-fibre or NON-sparc platforms */
728 
729 #if (defined(SD_PROP_TST))
730 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
731 				| SD_CONF_BSET_CTYPE
732 				| SD_CONF_BSET_NRR_COUNT
733 				| SD_CONF_BSET_FAB_DEVID
734 				| SD_CONF_BSET_NOCACHE
735 				| SD_CONF_BSET_BSY_RETRY_COUNT
736 				| SD_CONF_BSET_PLAYMSF_BCD
737 				| SD_CONF_BSET_READSUB_BCD
738 				| SD_CONF_BSET_READ_TOC_TRK_BCD
739 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
740 				| SD_CONF_BSET_NO_READ_HEADER
741 				| SD_CONF_BSET_READ_CD_XD4
742 				| SD_CONF_BSET_RST_RETRIES
743 				| SD_CONF_BSET_RSV_REL_TIME
744 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
745 #endif
746 };
747 
748 static const int sd_disk_table_size =
749 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
750 
751 
752 /*
753  * Return codes of sd_uselabel().
754  */
755 #define	SD_LABEL_IS_VALID		0
756 #define	SD_LABEL_IS_INVALID		1
757 
758 #define	SD_INTERCONNECT_PARALLEL	0
759 #define	SD_INTERCONNECT_FABRIC		1
760 #define	SD_INTERCONNECT_FIBRE		2
761 #define	SD_INTERCONNECT_SSA		3
762 #define	SD_INTERCONNECT_SATA		4
763 #define	SD_IS_PARALLEL_SCSI(un)		\
764 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
765 #define	SD_IS_SERIAL(un)		\
766 	((un)->un_interconnect_type == SD_INTERCONNECT_SATA)
767 
768 /*
769  * Definitions used by device id registration routines
770  */
771 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
772 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
773 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
774 #define	WD_NODE			7	/* the whole disk minor */
775 
776 static kmutex_t sd_sense_mutex = {0};
777 
778 /*
779  * Macros for updates of the driver state
780  */
781 #define	New_state(un, s)        \
782 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
783 #define	Restore_state(un)	\
784 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
785 
786 static struct sd_cdbinfo sd_cdbtab[] = {
787 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
788 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
789 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
790 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
791 };
792 
793 /*
794  * Specifies the number of seconds that must have elapsed since the last
795  * cmd. has completed for a device to be declared idle to the PM framework.
796  */
797 static int sd_pm_idletime = 1;
798 
799 /*
800  * Internal function prototypes
801  */
802 
803 #if (defined(__fibre))
804 /*
805  * These #defines are to avoid namespace collisions that occur because this
806  * code is currently used to compile two seperate driver modules: sd and ssd.
807  * All function names need to be treated this way (even if declared static)
808  * in order to allow the debugger to resolve the names properly.
809  * It is anticipated that in the near future the ssd module will be obsoleted,
810  * at which time this ugliness should go away.
811  */
812 #define	sd_log_trace			ssd_log_trace
813 #define	sd_log_info			ssd_log_info
814 #define	sd_log_err			ssd_log_err
815 #define	sdprobe				ssdprobe
816 #define	sdinfo				ssdinfo
817 #define	sd_prop_op			ssd_prop_op
818 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
819 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
820 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
821 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
822 #define	sd_spin_up_unit			ssd_spin_up_unit
823 #define	sd_enable_descr_sense		ssd_enable_descr_sense
824 #define	sd_reenable_dsense_task		ssd_reenable_dsense_task
825 #define	sd_set_mmc_caps			ssd_set_mmc_caps
826 #define	sd_read_unit_properties		ssd_read_unit_properties
827 #define	sd_process_sdconf_file		ssd_process_sdconf_file
828 #define	sd_process_sdconf_table		ssd_process_sdconf_table
829 #define	sd_sdconf_id_match		ssd_sdconf_id_match
830 #define	sd_blank_cmp			ssd_blank_cmp
831 #define	sd_chk_vers1_data		ssd_chk_vers1_data
832 #define	sd_set_vers1_properties		ssd_set_vers1_properties
833 #define	sd_validate_geometry		ssd_validate_geometry
834 
835 #if defined(_SUNOS_VTOC_16)
836 #define	sd_convert_geometry		ssd_convert_geometry
837 #endif
838 
839 #define	sd_resync_geom_caches		ssd_resync_geom_caches
840 #define	sd_read_fdisk			ssd_read_fdisk
841 #define	sd_get_physical_geometry	ssd_get_physical_geometry
842 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
843 #define	sd_update_block_info		ssd_update_block_info
844 #define	sd_swap_efi_gpt			ssd_swap_efi_gpt
845 #define	sd_swap_efi_gpe			ssd_swap_efi_gpe
846 #define	sd_validate_efi			ssd_validate_efi
847 #define	sd_use_efi			ssd_use_efi
848 #define	sd_uselabel			ssd_uselabel
849 #define	sd_build_default_label		ssd_build_default_label
850 #define	sd_has_max_chs_vals		ssd_has_max_chs_vals
851 #define	sd_inq_fill			ssd_inq_fill
852 #define	sd_register_devid		ssd_register_devid
853 #define	sd_get_devid_block		ssd_get_devid_block
854 #define	sd_get_devid			ssd_get_devid
855 #define	sd_create_devid			ssd_create_devid
856 #define	sd_write_deviceid		ssd_write_deviceid
857 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
858 #define	sd_setup_pm			ssd_setup_pm
859 #define	sd_create_pm_components		ssd_create_pm_components
860 #define	sd_ddi_suspend			ssd_ddi_suspend
861 #define	sd_ddi_pm_suspend		ssd_ddi_pm_suspend
862 #define	sd_ddi_resume			ssd_ddi_resume
863 #define	sd_ddi_pm_resume		ssd_ddi_pm_resume
864 #define	sdpower				ssdpower
865 #define	sdattach			ssdattach
866 #define	sddetach			ssddetach
867 #define	sd_unit_attach			ssd_unit_attach
868 #define	sd_unit_detach			ssd_unit_detach
869 #define	sd_set_unit_attributes		ssd_set_unit_attributes
870 #define	sd_create_minor_nodes		ssd_create_minor_nodes
871 #define	sd_create_errstats		ssd_create_errstats
872 #define	sd_set_errstats			ssd_set_errstats
873 #define	sd_set_pstats			ssd_set_pstats
874 #define	sddump				ssddump
875 #define	sd_scsi_poll			ssd_scsi_poll
876 #define	sd_send_polled_RQS		ssd_send_polled_RQS
877 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
878 #define	sd_init_event_callbacks		ssd_init_event_callbacks
879 #define	sd_event_callback		ssd_event_callback
880 #define	sd_cache_control		ssd_cache_control
881 #define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
882 #define	sd_make_device			ssd_make_device
883 #define	sdopen				ssdopen
884 #define	sdclose				ssdclose
885 #define	sd_ready_and_valid		ssd_ready_and_valid
886 #define	sdmin				ssdmin
887 #define	sdread				ssdread
888 #define	sdwrite				ssdwrite
889 #define	sdaread				ssdaread
890 #define	sdawrite			ssdawrite
891 #define	sdstrategy			ssdstrategy
892 #define	sdioctl				ssdioctl
893 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
894 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
895 #define	sd_checksum_iostart		ssd_checksum_iostart
896 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
897 #define	sd_pm_iostart			ssd_pm_iostart
898 #define	sd_core_iostart			ssd_core_iostart
899 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
900 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
901 #define	sd_checksum_iodone		ssd_checksum_iodone
902 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
903 #define	sd_pm_iodone			ssd_pm_iodone
904 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
905 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
906 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
907 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
908 #define	sd_buf_iodone			ssd_buf_iodone
909 #define	sd_uscsi_strategy		ssd_uscsi_strategy
910 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
911 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
912 #define	sd_uscsi_iodone			ssd_uscsi_iodone
913 #define	sd_xbuf_strategy		ssd_xbuf_strategy
914 #define	sd_xbuf_init			ssd_xbuf_init
915 #define	sd_pm_entry			ssd_pm_entry
916 #define	sd_pm_exit			ssd_pm_exit
917 
918 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
919 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
920 
921 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
922 #define	sdintr				ssdintr
923 #define	sd_start_cmds			ssd_start_cmds
924 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
925 #define	sd_bioclone_alloc		ssd_bioclone_alloc
926 #define	sd_bioclone_free		ssd_bioclone_free
927 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
928 #define	sd_shadow_buf_free		ssd_shadow_buf_free
929 #define	sd_print_transport_rejected_message	\
930 					ssd_print_transport_rejected_message
931 #define	sd_retry_command		ssd_retry_command
932 #define	sd_set_retry_bp			ssd_set_retry_bp
933 #define	sd_send_request_sense_command	ssd_send_request_sense_command
934 #define	sd_start_retry_command		ssd_start_retry_command
935 #define	sd_start_direct_priority_command	\
936 					ssd_start_direct_priority_command
937 #define	sd_return_failed_command	ssd_return_failed_command
938 #define	sd_return_failed_command_no_restart	\
939 					ssd_return_failed_command_no_restart
940 #define	sd_return_command		ssd_return_command
941 #define	sd_sync_with_callback		ssd_sync_with_callback
942 #define	sdrunout			ssdrunout
943 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
944 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
945 #define	sd_reduce_throttle		ssd_reduce_throttle
946 #define	sd_restore_throttle		ssd_restore_throttle
947 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
948 #define	sd_init_cdb_limits		ssd_init_cdb_limits
949 #define	sd_pkt_status_good		ssd_pkt_status_good
950 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
951 #define	sd_pkt_status_busy		ssd_pkt_status_busy
952 #define	sd_pkt_status_reservation_conflict	\
953 					ssd_pkt_status_reservation_conflict
954 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
955 #define	sd_handle_request_sense		ssd_handle_request_sense
956 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
957 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
958 #define	sd_validate_sense_data		ssd_validate_sense_data
959 #define	sd_decode_sense			ssd_decode_sense
960 #define	sd_print_sense_msg		ssd_print_sense_msg
961 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
962 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
963 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
964 #define	sd_sense_key_medium_or_hardware_error	\
965 					ssd_sense_key_medium_or_hardware_error
966 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
967 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
968 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
969 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
970 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
971 #define	sd_sense_key_default		ssd_sense_key_default
972 #define	sd_print_retry_msg		ssd_print_retry_msg
973 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
974 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
975 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
976 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
977 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
978 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
979 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
980 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
981 #define	sd_pkt_reason_default		ssd_pkt_reason_default
982 #define	sd_reset_target			ssd_reset_target
983 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
984 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
985 #define	sd_taskq_create			ssd_taskq_create
986 #define	sd_taskq_delete			ssd_taskq_delete
987 #define	sd_media_change_task		ssd_media_change_task
988 #define	sd_handle_mchange		ssd_handle_mchange
989 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
990 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
991 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
992 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
993 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
994 					sd_send_scsi_feature_GET_CONFIGURATION
995 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
996 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
997 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
998 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
999 					ssd_send_scsi_PERSISTENT_RESERVE_IN
1000 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
1001 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
1002 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
1003 #define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
1004 					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
1005 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
1006 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
1007 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
1008 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
1009 #define	sd_alloc_rqs			ssd_alloc_rqs
1010 #define	sd_free_rqs			ssd_free_rqs
1011 #define	sd_dump_memory			ssd_dump_memory
1012 #define	sd_uscsi_ioctl			ssd_uscsi_ioctl
1013 #define	sd_get_media_info		ssd_get_media_info
1014 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
1015 #define	sd_dkio_get_geometry		ssd_dkio_get_geometry
1016 #define	sd_dkio_set_geometry		ssd_dkio_set_geometry
1017 #define	sd_dkio_get_partition		ssd_dkio_get_partition
1018 #define	sd_dkio_set_partition		ssd_dkio_set_partition
1019 #define	sd_dkio_partition		ssd_dkio_partition
1020 #define	sd_dkio_get_vtoc		ssd_dkio_get_vtoc
1021 #define	sd_dkio_get_efi			ssd_dkio_get_efi
1022 #define	sd_build_user_vtoc		ssd_build_user_vtoc
1023 #define	sd_dkio_set_vtoc		ssd_dkio_set_vtoc
1024 #define	sd_dkio_set_efi			ssd_dkio_set_efi
1025 #define	sd_build_label_vtoc		ssd_build_label_vtoc
1026 #define	sd_write_label			ssd_write_label
1027 #define	sd_clear_vtoc			ssd_clear_vtoc
1028 #define	sd_clear_efi			ssd_clear_efi
1029 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
1030 #define	sd_setup_next_xfer		ssd_setup_next_xfer
1031 #define	sd_dkio_get_temp		ssd_dkio_get_temp
1032 #define	sd_dkio_get_mboot		ssd_dkio_get_mboot
1033 #define	sd_dkio_set_mboot		ssd_dkio_set_mboot
1034 #define	sd_setup_default_geometry	ssd_setup_default_geometry
1035 #define	sd_update_fdisk_and_vtoc	ssd_update_fdisk_and_vtoc
1036 #define	sd_check_mhd			ssd_check_mhd
1037 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
1038 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1039 #define	sd_sname			ssd_sname
1040 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1041 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1042 #define	sd_take_ownership		ssd_take_ownership
1043 #define	sd_reserve_release		ssd_reserve_release
1044 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1045 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1046 #define	sd_persistent_reservation_in_read_keys	\
1047 					ssd_persistent_reservation_in_read_keys
1048 #define	sd_persistent_reservation_in_read_resv	\
1049 					ssd_persistent_reservation_in_read_resv
1050 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1051 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1052 #define	sd_mhdioc_release		ssd_mhdioc_release
1053 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1054 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1055 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1056 #define	sr_change_blkmode		ssr_change_blkmode
1057 #define	sr_change_speed			ssr_change_speed
1058 #define	sr_atapi_change_speed		ssr_atapi_change_speed
1059 #define	sr_pause_resume			ssr_pause_resume
1060 #define	sr_play_msf			ssr_play_msf
1061 #define	sr_play_trkind			ssr_play_trkind
1062 #define	sr_read_all_subcodes		ssr_read_all_subcodes
1063 #define	sr_read_subchannel		ssr_read_subchannel
1064 #define	sr_read_tocentry		ssr_read_tocentry
1065 #define	sr_read_tochdr			ssr_read_tochdr
1066 #define	sr_read_cdda			ssr_read_cdda
1067 #define	sr_read_cdxa			ssr_read_cdxa
1068 #define	sr_read_mode1			ssr_read_mode1
1069 #define	sr_read_mode2			ssr_read_mode2
1070 #define	sr_read_cd_mode2		ssr_read_cd_mode2
1071 #define	sr_sector_mode			ssr_sector_mode
1072 #define	sr_eject			ssr_eject
1073 #define	sr_ejected			ssr_ejected
1074 #define	sr_check_wp			ssr_check_wp
1075 #define	sd_check_media			ssd_check_media
1076 #define	sd_media_watch_cb		ssd_media_watch_cb
1077 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1078 #define	sr_volume_ctrl			ssr_volume_ctrl
1079 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1080 #define	sd_log_page_supported		ssd_log_page_supported
1081 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1082 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1083 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1084 #define	sd_range_lock			ssd_range_lock
1085 #define	sd_get_range			ssd_get_range
1086 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1087 #define	sd_range_unlock			ssd_range_unlock
1088 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1089 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1090 
1091 #define	sd_iostart_chain		ssd_iostart_chain
1092 #define	sd_iodone_chain			ssd_iodone_chain
1093 #define	sd_initpkt_map			ssd_initpkt_map
1094 #define	sd_destroypkt_map		ssd_destroypkt_map
1095 #define	sd_chain_type_map		ssd_chain_type_map
1096 #define	sd_chain_index_map		ssd_chain_index_map
1097 
1098 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1099 #define	sd_failfast_flushq		ssd_failfast_flushq
1100 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1101 
1102 #define	sd_is_lsi			ssd_is_lsi
1103 
1104 #endif	/* #if (defined(__fibre)) */
1105 
1106 
1107 int _init(void);
1108 int _fini(void);
1109 int _info(struct modinfo *modinfop);
1110 
1111 /*PRINTFLIKE3*/
1112 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1113 /*PRINTFLIKE3*/
1114 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1115 /*PRINTFLIKE3*/
1116 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1117 
1118 static int sdprobe(dev_info_t *devi);
1119 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1120     void **result);
1121 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1122     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1123 
1124 /*
1125  * Smart probe for parallel scsi
1126  */
1127 static void sd_scsi_probe_cache_init(void);
1128 static void sd_scsi_probe_cache_fini(void);
1129 static void sd_scsi_clear_probe_cache(void);
1130 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1131 
1132 static int	sd_spin_up_unit(struct sd_lun *un);
1133 #ifdef _LP64
1134 static void	sd_enable_descr_sense(struct sd_lun *un);
1135 static void	sd_reenable_dsense_task(void *arg);
1136 #endif /* _LP64 */
1137 
1138 static void	sd_set_mmc_caps(struct sd_lun *un);
1139 
1140 static void sd_read_unit_properties(struct sd_lun *un);
1141 static int  sd_process_sdconf_file(struct sd_lun *un);
1142 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1143     int *data_list, sd_tunables *values);
1144 static void sd_process_sdconf_table(struct sd_lun *un);
1145 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1146 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1147 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1148 	int list_len, char *dataname_ptr);
1149 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1150     sd_tunables *prop_list);
1151 static int  sd_validate_geometry(struct sd_lun *un, int path_flag);
1152 
1153 #if defined(_SUNOS_VTOC_16)
1154 static void sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g);
1155 #endif
1156 
1157 static void sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
1158 	int path_flag);
1159 static int  sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize,
1160 	int path_flag);
1161 static void sd_get_physical_geometry(struct sd_lun *un,
1162 	struct geom_cache *pgeom_p, int capacity, int lbasize, int path_flag);
1163 static void sd_get_virtual_geometry(struct sd_lun *un, int capacity,
1164 	int lbasize);
1165 static int  sd_uselabel(struct sd_lun *un, struct dk_label *l, int path_flag);
1166 static void sd_swap_efi_gpt(efi_gpt_t *);
1167 static void sd_swap_efi_gpe(int nparts, efi_gpe_t *);
1168 static int sd_validate_efi(efi_gpt_t *);
1169 static int sd_use_efi(struct sd_lun *, int);
1170 static void sd_build_default_label(struct sd_lun *un);
1171 
1172 #if defined(_FIRMWARE_NEEDS_FDISK)
1173 static int  sd_has_max_chs_vals(struct ipart *fdp);
1174 #endif
1175 static void sd_inq_fill(char *p, int l, char *s);
1176 
1177 
1178 static void sd_register_devid(struct sd_lun *un, dev_info_t *devi,
1179     int reservation_flag);
1180 static daddr_t  sd_get_devid_block(struct sd_lun *un);
1181 static int  sd_get_devid(struct sd_lun *un);
1182 static int  sd_get_serialnum(struct sd_lun *un, uchar_t *wwn, int *len);
1183 static ddi_devid_t sd_create_devid(struct sd_lun *un);
1184 static int  sd_write_deviceid(struct sd_lun *un);
1185 static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1186 static int  sd_check_vpd_page_support(struct sd_lun *un);
1187 
1188 static void sd_setup_pm(struct sd_lun *un, dev_info_t *devi);
1189 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1190 
1191 static int  sd_ddi_suspend(dev_info_t *devi);
1192 static int  sd_ddi_pm_suspend(struct sd_lun *un);
1193 static int  sd_ddi_resume(dev_info_t *devi);
1194 static int  sd_ddi_pm_resume(struct sd_lun *un);
1195 static int  sdpower(dev_info_t *devi, int component, int level);
1196 
1197 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1198 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1199 static int  sd_unit_attach(dev_info_t *devi);
1200 static int  sd_unit_detach(dev_info_t *devi);
1201 
1202 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1203 static int  sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi);
1204 static void sd_create_errstats(struct sd_lun *un, int instance);
1205 static void sd_set_errstats(struct sd_lun *un);
1206 static void sd_set_pstats(struct sd_lun *un);
1207 
1208 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1209 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1210 static int  sd_send_polled_RQS(struct sd_lun *un);
1211 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1212 
1213 #if (defined(__fibre))
1214 /*
1215  * Event callbacks (photon)
1216  */
1217 static void sd_init_event_callbacks(struct sd_lun *un);
1218 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1219 #endif
1220 
1221 /*
1222  * Defines for sd_cache_control
1223  */
1224 
1225 #define	SD_CACHE_ENABLE		1
1226 #define	SD_CACHE_DISABLE	0
1227 #define	SD_CACHE_NOCHANGE	-1
1228 
1229 static int   sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag);
1230 static int   sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled);
1231 static dev_t sd_make_device(dev_info_t *devi);
1232 
1233 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1234 	uint64_t capacity);
1235 
1236 /*
1237  * Driver entry point functions.
1238  */
1239 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1240 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1241 static int  sd_ready_and_valid(struct sd_lun *un);
1242 
1243 static void sdmin(struct buf *bp);
1244 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1245 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1246 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1247 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1248 
1249 static int sdstrategy(struct buf *bp);
1250 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1251 
1252 /*
1253  * Function prototypes for layering functions in the iostart chain.
1254  */
1255 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1256 	struct buf *bp);
1257 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1258 	struct buf *bp);
1259 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1260 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1261 	struct buf *bp);
1262 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1263 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1264 
1265 /*
1266  * Function prototypes for layering functions in the iodone chain.
1267  */
1268 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1269 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1270 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1271 	struct buf *bp);
1272 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1273 	struct buf *bp);
1274 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1275 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1276 	struct buf *bp);
1277 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1278 
1279 /*
1280  * Prototypes for functions to support buf(9S) based IO.
1281  */
1282 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1283 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1284 static void sd_destroypkt_for_buf(struct buf *);
1285 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1286 	struct buf *bp, int flags,
1287 	int (*callback)(caddr_t), caddr_t callback_arg,
1288 	diskaddr_t lba, uint32_t blockcount);
1289 #if defined(__i386) || defined(__amd64)
1290 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1291 	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1292 #endif /* defined(__i386) || defined(__amd64) */
1293 
1294 /*
1295  * Prototypes for functions to support USCSI IO.
1296  */
1297 static int sd_uscsi_strategy(struct buf *bp);
1298 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1299 static void sd_destroypkt_for_uscsi(struct buf *);
1300 
1301 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1302 	uchar_t chain_type, void *pktinfop);
1303 
1304 static int  sd_pm_entry(struct sd_lun *un);
1305 static void sd_pm_exit(struct sd_lun *un);
1306 
1307 static void sd_pm_idletimeout_handler(void *arg);
1308 
1309 /*
1310  * sd_core internal functions (used at the sd_core_io layer).
1311  */
1312 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1313 static void sdintr(struct scsi_pkt *pktp);
1314 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1315 
1316 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
1317 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
1318 	int path_flag);
1319 
1320 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1321 	daddr_t blkno, int (*func)(struct buf *));
1322 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1323 	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1324 static void sd_bioclone_free(struct buf *bp);
1325 static void sd_shadow_buf_free(struct buf *bp);
1326 
1327 static void sd_print_transport_rejected_message(struct sd_lun *un,
1328 	struct sd_xbuf *xp, int code);
1329 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1330     void *arg, int code);
1331 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1332     void *arg, int code);
1333 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1334     void *arg, int code);
1335 
1336 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1337 	int retry_check_flag,
1338 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1339 		int c),
1340 	void *user_arg, int failure_code,  clock_t retry_delay,
1341 	void (*statp)(kstat_io_t *));
1342 
1343 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1344 	clock_t retry_delay, void (*statp)(kstat_io_t *));
1345 
1346 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1347 	struct scsi_pkt *pktp);
1348 static void sd_start_retry_command(void *arg);
1349 static void sd_start_direct_priority_command(void *arg);
1350 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1351 	int errcode);
1352 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1353 	struct buf *bp, int errcode);
1354 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1355 static void sd_sync_with_callback(struct sd_lun *un);
1356 static int sdrunout(caddr_t arg);
1357 
1358 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1359 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1360 
1361 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1362 static void sd_restore_throttle(void *arg);
1363 
1364 static void sd_init_cdb_limits(struct sd_lun *un);
1365 
1366 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1367 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1368 
1369 /*
1370  * Error handling functions
1371  */
1372 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1373 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1374 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1375 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1376 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1377 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1378 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1379 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1380 
1381 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1382 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1383 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1384 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1385 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1386 	struct sd_xbuf *xp);
1387 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1388 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1389 
1390 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1391 	void *arg, int code);
1392 
1393 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1394 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1395 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1396 	uint8_t *sense_datap,
1397 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1398 static void sd_sense_key_not_ready(struct sd_lun *un,
1399 	uint8_t *sense_datap,
1400 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1401 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1402 	uint8_t *sense_datap,
1403 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1404 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1405 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1406 static void sd_sense_key_unit_attention(struct sd_lun *un,
1407 	uint8_t *sense_datap,
1408 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1409 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1410 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1411 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1412 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1413 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1414 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1415 static void sd_sense_key_default(struct sd_lun *un,
1416 	uint8_t *sense_datap,
1417 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1418 
1419 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1420 	void *arg, int flag);
1421 
1422 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1423 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1424 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1425 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1426 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1427 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1428 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1429 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1430 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1431 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1432 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1433 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1434 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1435 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1436 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1437 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1438 
1439 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1440 
1441 static void sd_start_stop_unit_callback(void *arg);
1442 static void sd_start_stop_unit_task(void *arg);
1443 
1444 static void sd_taskq_create(void);
1445 static void sd_taskq_delete(void);
1446 static void sd_media_change_task(void *arg);
1447 
1448 static int sd_handle_mchange(struct sd_lun *un);
1449 static int sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag);
1450 static int sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp,
1451 	uint32_t *lbap, int path_flag);
1452 static int sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
1453 	uint32_t *lbap, int path_flag);
1454 static int sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag,
1455 	int path_flag);
1456 static int sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr,
1457 	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1458 static int sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag);
1459 static int sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un,
1460 	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1461 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un,
1462 	uchar_t usr_cmd, uchar_t *usr_bufp);
1463 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1464 	struct dk_callback *dkc);
1465 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1466 static int sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un,
1467 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1468 	uchar_t *bufaddr, uint_t buflen);
1469 static int sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
1470 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1471 	uchar_t *bufaddr, uint_t buflen, char feature);
1472 static int sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize,
1473 	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1474 static int sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize,
1475 	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1476 static int sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
1477 	size_t buflen, daddr_t start_block, int path_flag);
1478 #define	sd_send_scsi_READ(un, bufaddr, buflen, start_block, path_flag)	\
1479 	sd_send_scsi_RDWR(un, SCMD_READ, bufaddr, buflen, start_block, \
1480 	path_flag)
1481 #define	sd_send_scsi_WRITE(un, bufaddr, buflen, start_block, path_flag)	\
1482 	sd_send_scsi_RDWR(un, SCMD_WRITE, bufaddr, buflen, start_block,\
1483 	path_flag)
1484 
1485 static int sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr,
1486 	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1487 	uint16_t param_ptr, int path_flag);
1488 
1489 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1490 static void sd_free_rqs(struct sd_lun *un);
1491 
1492 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1493 	uchar_t *data, int len, int fmt);
1494 static void sd_panic_for_res_conflict(struct sd_lun *un);
1495 
1496 /*
1497  * Disk Ioctl Function Prototypes
1498  */
1499 static int sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag);
1500 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1501 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1502 static int sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag,
1503 	int geom_validated);
1504 static int sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag);
1505 static int sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag,
1506 	int geom_validated);
1507 static int sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag);
1508 static int sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag,
1509 	int geom_validated);
1510 static int sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag);
1511 static int sd_dkio_partition(dev_t dev, caddr_t arg, int flag);
1512 static void sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1513 static int sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag);
1514 static int sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag);
1515 static int sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1516 static int sd_write_label(dev_t dev);
1517 static int sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl);
1518 static void sd_clear_vtoc(struct sd_lun *un);
1519 static void sd_clear_efi(struct sd_lun *un);
1520 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1521 static int sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag);
1522 static int sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag);
1523 static void sd_setup_default_geometry(struct sd_lun *un);
1524 #if defined(__i386) || defined(__amd64)
1525 static int sd_update_fdisk_and_vtoc(struct sd_lun *un);
1526 #endif
1527 
1528 /*
1529  * Multi-host Ioctl Prototypes
1530  */
1531 static int sd_check_mhd(dev_t dev, int interval);
1532 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1533 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1534 static char *sd_sname(uchar_t status);
1535 static void sd_mhd_resvd_recover(void *arg);
1536 static void sd_resv_reclaim_thread();
1537 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1538 static int sd_reserve_release(dev_t dev, int cmd);
1539 static void sd_rmv_resv_reclaim_req(dev_t dev);
1540 static void sd_mhd_reset_notify_cb(caddr_t arg);
1541 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1542 	mhioc_inkeys_t *usrp, int flag);
1543 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1544 	mhioc_inresvs_t *usrp, int flag);
1545 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1546 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1547 static int sd_mhdioc_release(dev_t dev);
1548 static int sd_mhdioc_register_devid(dev_t dev);
1549 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1550 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1551 
1552 /*
1553  * SCSI removable prototypes
1554  */
1555 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1556 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1557 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1558 static int sr_pause_resume(dev_t dev, int mode);
1559 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1560 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1561 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1562 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1563 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1564 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1565 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1566 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1567 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1568 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1569 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1570 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1571 static int sr_eject(dev_t dev);
1572 static void sr_ejected(register struct sd_lun *un);
1573 static int sr_check_wp(dev_t dev);
1574 static int sd_check_media(dev_t dev, enum dkio_state state);
1575 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1576 static void sd_delayed_cv_broadcast(void *arg);
1577 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1578 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1579 
1580 static int sd_log_page_supported(struct sd_lun *un, int log_page);
1581 
1582 /*
1583  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1584  */
1585 static void sd_check_for_writable_cd(struct sd_lun *un);
1586 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1587 static void sd_wm_cache_destructor(void *wm, void *un);
1588 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1589 	daddr_t endb, ushort_t typ);
1590 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1591 	daddr_t endb);
1592 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1593 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1594 static void sd_read_modify_write_task(void * arg);
1595 static int
1596 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1597 	struct buf **bpp);
1598 
1599 
1600 /*
1601  * Function prototypes for failfast support.
1602  */
1603 static void sd_failfast_flushq(struct sd_lun *un);
1604 static int sd_failfast_flushq_callback(struct buf *bp);
1605 
1606 /*
1607  * Function prototypes to check for lsi devices
1608  */
1609 static void sd_is_lsi(struct sd_lun *un);
1610 
1611 /*
1612  * Function prototypes for x86 support
1613  */
1614 #if defined(__i386) || defined(__amd64)
1615 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1616 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1617 #endif
1618 
1619 /*
1620  * Constants for failfast support:
1621  *
1622  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1623  * failfast processing being performed.
1624  *
1625  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1626  * failfast processing on all bufs with B_FAILFAST set.
1627  */
1628 
1629 #define	SD_FAILFAST_INACTIVE		0
1630 #define	SD_FAILFAST_ACTIVE		1
1631 
1632 /*
1633  * Bitmask to control behavior of buf(9S) flushes when a transition to
1634  * the failfast state occurs. Optional bits include:
1635  *
1636  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1637  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1638  * be flushed.
1639  *
1640  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1641  * driver, in addition to the regular wait queue. This includes the xbuf
1642  * queues. When clear, only the driver's wait queue will be flushed.
1643  */
1644 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1645 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1646 
1647 /*
1648  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1649  * to flush all queues within the driver.
1650  */
1651 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1652 
1653 
1654 /*
1655  * SD Testing Fault Injection
1656  */
1657 #ifdef SD_FAULT_INJECTION
1658 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1659 static void sd_faultinjection(struct scsi_pkt *pktp);
1660 static void sd_injection_log(char *buf, struct sd_lun *un);
1661 #endif
1662 
1663 /*
1664  * Device driver ops vector
1665  */
1666 static struct cb_ops sd_cb_ops = {
1667 	sdopen,			/* open */
1668 	sdclose,		/* close */
1669 	sdstrategy,		/* strategy */
1670 	nodev,			/* print */
1671 	sddump,			/* dump */
1672 	sdread,			/* read */
1673 	sdwrite,		/* write */
1674 	sdioctl,		/* ioctl */
1675 	nodev,			/* devmap */
1676 	nodev,			/* mmap */
1677 	nodev,			/* segmap */
1678 	nochpoll,		/* poll */
1679 	sd_prop_op,		/* cb_prop_op */
1680 	0,			/* streamtab  */
1681 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1682 	CB_REV,			/* cb_rev */
1683 	sdaread, 		/* async I/O read entry point */
1684 	sdawrite		/* async I/O write entry point */
1685 };
1686 
1687 static struct dev_ops sd_ops = {
1688 	DEVO_REV,		/* devo_rev, */
1689 	0,			/* refcnt  */
1690 	sdinfo,			/* info */
1691 	nulldev,		/* identify */
1692 	sdprobe,		/* probe */
1693 	sdattach,		/* attach */
1694 	sddetach,		/* detach */
1695 	nodev,			/* reset */
1696 	&sd_cb_ops,		/* driver operations */
1697 	NULL,			/* bus operations */
1698 	sdpower			/* power */
1699 };
1700 
1701 
1702 /*
1703  * This is the loadable module wrapper.
1704  */
1705 #include <sys/modctl.h>
1706 
1707 static struct modldrv modldrv = {
1708 	&mod_driverops,		/* Type of module. This one is a driver */
1709 	SD_MODULE_NAME,		/* Module name. */
1710 	&sd_ops			/* driver ops */
1711 };
1712 
1713 
1714 static struct modlinkage modlinkage = {
1715 	MODREV_1,
1716 	&modldrv,
1717 	NULL
1718 };
1719 
1720 
1721 static struct scsi_asq_key_strings sd_additional_codes[] = {
1722 	0x81, 0, "Logical Unit is Reserved",
1723 	0x85, 0, "Audio Address Not Valid",
1724 	0xb6, 0, "Media Load Mechanism Failed",
1725 	0xB9, 0, "Audio Play Operation Aborted",
1726 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1727 	0x53, 2, "Medium removal prevented",
1728 	0x6f, 0, "Authentication failed during key exchange",
1729 	0x6f, 1, "Key not present",
1730 	0x6f, 2, "Key not established",
1731 	0x6f, 3, "Read without proper authentication",
1732 	0x6f, 4, "Mismatched region to this logical unit",
1733 	0x6f, 5, "Region reset count error",
1734 	0xffff, 0x0, NULL
1735 };
1736 
1737 
1738 /*
1739  * Struct for passing printing information for sense data messages
1740  */
1741 struct sd_sense_info {
1742 	int	ssi_severity;
1743 	int	ssi_pfa_flag;
1744 };
1745 
1746 /*
1747  * Table of function pointers for iostart-side routines. Seperate "chains"
1748  * of layered function calls are formed by placing the function pointers
1749  * sequentially in the desired order. Functions are called according to an
1750  * incrementing table index ordering. The last function in each chain must
1751  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1752  * in the sd_iodone_chain[] array.
1753  *
1754  * Note: It may seem more natural to organize both the iostart and iodone
1755  * functions together, into an array of structures (or some similar
1756  * organization) with a common index, rather than two seperate arrays which
1757  * must be maintained in synchronization. The purpose of this division is
1758  * to achiece improved performance: individual arrays allows for more
1759  * effective cache line utilization on certain platforms.
1760  */
1761 
1762 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1763 
1764 
1765 static sd_chain_t sd_iostart_chain[] = {
1766 
1767 	/* Chain for buf IO for disk drive targets (PM enabled) */
1768 	sd_mapblockaddr_iostart,	/* Index: 0 */
1769 	sd_pm_iostart,			/* Index: 1 */
1770 	sd_core_iostart,		/* Index: 2 */
1771 
1772 	/* Chain for buf IO for disk drive targets (PM disabled) */
1773 	sd_mapblockaddr_iostart,	/* Index: 3 */
1774 	sd_core_iostart,		/* Index: 4 */
1775 
1776 	/* Chain for buf IO for removable-media targets (PM enabled) */
1777 	sd_mapblockaddr_iostart,	/* Index: 5 */
1778 	sd_mapblocksize_iostart,	/* Index: 6 */
1779 	sd_pm_iostart,			/* Index: 7 */
1780 	sd_core_iostart,		/* Index: 8 */
1781 
1782 	/* Chain for buf IO for removable-media targets (PM disabled) */
1783 	sd_mapblockaddr_iostart,	/* Index: 9 */
1784 	sd_mapblocksize_iostart,	/* Index: 10 */
1785 	sd_core_iostart,		/* Index: 11 */
1786 
1787 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1788 	sd_mapblockaddr_iostart,	/* Index: 12 */
1789 	sd_checksum_iostart,		/* Index: 13 */
1790 	sd_pm_iostart,			/* Index: 14 */
1791 	sd_core_iostart,		/* Index: 15 */
1792 
1793 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1794 	sd_mapblockaddr_iostart,	/* Index: 16 */
1795 	sd_checksum_iostart,		/* Index: 17 */
1796 	sd_core_iostart,		/* Index: 18 */
1797 
1798 	/* Chain for USCSI commands (all targets) */
1799 	sd_pm_iostart,			/* Index: 19 */
1800 	sd_core_iostart,		/* Index: 20 */
1801 
1802 	/* Chain for checksumming USCSI commands (all targets) */
1803 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1804 	sd_pm_iostart,			/* Index: 22 */
1805 	sd_core_iostart,		/* Index: 23 */
1806 
1807 	/* Chain for "direct" USCSI commands (all targets) */
1808 	sd_core_iostart,		/* Index: 24 */
1809 
1810 	/* Chain for "direct priority" USCSI commands (all targets) */
1811 	sd_core_iostart,		/* Index: 25 */
1812 };
1813 
1814 /*
1815  * Macros to locate the first function of each iostart chain in the
1816  * sd_iostart_chain[] array. These are located by the index in the array.
1817  */
1818 #define	SD_CHAIN_DISK_IOSTART			0
1819 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1820 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1821 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1822 #define	SD_CHAIN_CHKSUM_IOSTART			12
1823 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1824 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1825 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1826 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1827 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1828 
1829 
1830 /*
1831  * Table of function pointers for the iodone-side routines for the driver-
1832  * internal layering mechanism.  The calling sequence for iodone routines
1833  * uses a decrementing table index, so the last routine called in a chain
1834  * must be at the lowest array index location for that chain.  The last
1835  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1836  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1837  * of the functions in an iodone side chain must correspond to the ordering
1838  * of the iostart routines for that chain.  Note that there is no iodone
1839  * side routine that corresponds to sd_core_iostart(), so there is no
1840  * entry in the table for this.
1841  */
1842 
1843 static sd_chain_t sd_iodone_chain[] = {
1844 
1845 	/* Chain for buf IO for disk drive targets (PM enabled) */
1846 	sd_buf_iodone,			/* Index: 0 */
1847 	sd_mapblockaddr_iodone,		/* Index: 1 */
1848 	sd_pm_iodone,			/* Index: 2 */
1849 
1850 	/* Chain for buf IO for disk drive targets (PM disabled) */
1851 	sd_buf_iodone,			/* Index: 3 */
1852 	sd_mapblockaddr_iodone,		/* Index: 4 */
1853 
1854 	/* Chain for buf IO for removable-media targets (PM enabled) */
1855 	sd_buf_iodone,			/* Index: 5 */
1856 	sd_mapblockaddr_iodone,		/* Index: 6 */
1857 	sd_mapblocksize_iodone,		/* Index: 7 */
1858 	sd_pm_iodone,			/* Index: 8 */
1859 
1860 	/* Chain for buf IO for removable-media targets (PM disabled) */
1861 	sd_buf_iodone,			/* Index: 9 */
1862 	sd_mapblockaddr_iodone,		/* Index: 10 */
1863 	sd_mapblocksize_iodone,		/* Index: 11 */
1864 
1865 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1866 	sd_buf_iodone,			/* Index: 12 */
1867 	sd_mapblockaddr_iodone,		/* Index: 13 */
1868 	sd_checksum_iodone,		/* Index: 14 */
1869 	sd_pm_iodone,			/* Index: 15 */
1870 
1871 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1872 	sd_buf_iodone,			/* Index: 16 */
1873 	sd_mapblockaddr_iodone,		/* Index: 17 */
1874 	sd_checksum_iodone,		/* Index: 18 */
1875 
1876 	/* Chain for USCSI commands (non-checksum targets) */
1877 	sd_uscsi_iodone,		/* Index: 19 */
1878 	sd_pm_iodone,			/* Index: 20 */
1879 
1880 	/* Chain for USCSI commands (checksum targets) */
1881 	sd_uscsi_iodone,		/* Index: 21 */
1882 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1883 	sd_pm_iodone,			/* Index: 22 */
1884 
1885 	/* Chain for "direct" USCSI commands (all targets) */
1886 	sd_uscsi_iodone,		/* Index: 24 */
1887 
1888 	/* Chain for "direct priority" USCSI commands (all targets) */
1889 	sd_uscsi_iodone,		/* Index: 25 */
1890 };
1891 
1892 
1893 /*
1894  * Macros to locate the "first" function in the sd_iodone_chain[] array for
1895  * each iodone-side chain. These are located by the array index, but as the
1896  * iodone side functions are called in a decrementing-index order, the
1897  * highest index number in each chain must be specified (as these correspond
1898  * to the first function in the iodone chain that will be called by the core
1899  * at IO completion time).
1900  */
1901 
1902 #define	SD_CHAIN_DISK_IODONE			2
1903 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
1904 #define	SD_CHAIN_RMMEDIA_IODONE			8
1905 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1906 #define	SD_CHAIN_CHKSUM_IODONE			15
1907 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1908 #define	SD_CHAIN_USCSI_CMD_IODONE		20
1909 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1910 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
1911 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1912 
1913 
1914 
1915 
1916 /*
1917  * Array to map a layering chain index to the appropriate initpkt routine.
1918  * The redundant entries are present so that the index used for accessing
1919  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1920  * with this table as well.
1921  */
1922 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1923 
1924 static sd_initpkt_t	sd_initpkt_map[] = {
1925 
1926 	/* Chain for buf IO for disk drive targets (PM enabled) */
1927 	sd_initpkt_for_buf,		/* Index: 0 */
1928 	sd_initpkt_for_buf,		/* Index: 1 */
1929 	sd_initpkt_for_buf,		/* Index: 2 */
1930 
1931 	/* Chain for buf IO for disk drive targets (PM disabled) */
1932 	sd_initpkt_for_buf,		/* Index: 3 */
1933 	sd_initpkt_for_buf,		/* Index: 4 */
1934 
1935 	/* Chain for buf IO for removable-media targets (PM enabled) */
1936 	sd_initpkt_for_buf,		/* Index: 5 */
1937 	sd_initpkt_for_buf,		/* Index: 6 */
1938 	sd_initpkt_for_buf,		/* Index: 7 */
1939 	sd_initpkt_for_buf,		/* Index: 8 */
1940 
1941 	/* Chain for buf IO for removable-media targets (PM disabled) */
1942 	sd_initpkt_for_buf,		/* Index: 9 */
1943 	sd_initpkt_for_buf,		/* Index: 10 */
1944 	sd_initpkt_for_buf,		/* Index: 11 */
1945 
1946 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1947 	sd_initpkt_for_buf,		/* Index: 12 */
1948 	sd_initpkt_for_buf,		/* Index: 13 */
1949 	sd_initpkt_for_buf,		/* Index: 14 */
1950 	sd_initpkt_for_buf,		/* Index: 15 */
1951 
1952 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1953 	sd_initpkt_for_buf,		/* Index: 16 */
1954 	sd_initpkt_for_buf,		/* Index: 17 */
1955 	sd_initpkt_for_buf,		/* Index: 18 */
1956 
1957 	/* Chain for USCSI commands (non-checksum targets) */
1958 	sd_initpkt_for_uscsi,		/* Index: 19 */
1959 	sd_initpkt_for_uscsi,		/* Index: 20 */
1960 
1961 	/* Chain for USCSI commands (checksum targets) */
1962 	sd_initpkt_for_uscsi,		/* Index: 21 */
1963 	sd_initpkt_for_uscsi,		/* Index: 22 */
1964 	sd_initpkt_for_uscsi,		/* Index: 22 */
1965 
1966 	/* Chain for "direct" USCSI commands (all targets) */
1967 	sd_initpkt_for_uscsi,		/* Index: 24 */
1968 
1969 	/* Chain for "direct priority" USCSI commands (all targets) */
1970 	sd_initpkt_for_uscsi,		/* Index: 25 */
1971 
1972 };
1973 
1974 
1975 /*
1976  * Array to map a layering chain index to the appropriate destroypktpkt routine.
1977  * The redundant entries are present so that the index used for accessing
1978  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1979  * with this table as well.
1980  */
1981 typedef void (*sd_destroypkt_t)(struct buf *);
1982 
1983 static sd_destroypkt_t	sd_destroypkt_map[] = {
1984 
1985 	/* Chain for buf IO for disk drive targets (PM enabled) */
1986 	sd_destroypkt_for_buf,		/* Index: 0 */
1987 	sd_destroypkt_for_buf,		/* Index: 1 */
1988 	sd_destroypkt_for_buf,		/* Index: 2 */
1989 
1990 	/* Chain for buf IO for disk drive targets (PM disabled) */
1991 	sd_destroypkt_for_buf,		/* Index: 3 */
1992 	sd_destroypkt_for_buf,		/* Index: 4 */
1993 
1994 	/* Chain for buf IO for removable-media targets (PM enabled) */
1995 	sd_destroypkt_for_buf,		/* Index: 5 */
1996 	sd_destroypkt_for_buf,		/* Index: 6 */
1997 	sd_destroypkt_for_buf,		/* Index: 7 */
1998 	sd_destroypkt_for_buf,		/* Index: 8 */
1999 
2000 	/* Chain for buf IO for removable-media targets (PM disabled) */
2001 	sd_destroypkt_for_buf,		/* Index: 9 */
2002 	sd_destroypkt_for_buf,		/* Index: 10 */
2003 	sd_destroypkt_for_buf,		/* Index: 11 */
2004 
2005 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2006 	sd_destroypkt_for_buf,		/* Index: 12 */
2007 	sd_destroypkt_for_buf,		/* Index: 13 */
2008 	sd_destroypkt_for_buf,		/* Index: 14 */
2009 	sd_destroypkt_for_buf,		/* Index: 15 */
2010 
2011 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2012 	sd_destroypkt_for_buf,		/* Index: 16 */
2013 	sd_destroypkt_for_buf,		/* Index: 17 */
2014 	sd_destroypkt_for_buf,		/* Index: 18 */
2015 
2016 	/* Chain for USCSI commands (non-checksum targets) */
2017 	sd_destroypkt_for_uscsi,	/* Index: 19 */
2018 	sd_destroypkt_for_uscsi,	/* Index: 20 */
2019 
2020 	/* Chain for USCSI commands (checksum targets) */
2021 	sd_destroypkt_for_uscsi,	/* Index: 21 */
2022 	sd_destroypkt_for_uscsi,	/* Index: 22 */
2023 	sd_destroypkt_for_uscsi,	/* Index: 22 */
2024 
2025 	/* Chain for "direct" USCSI commands (all targets) */
2026 	sd_destroypkt_for_uscsi,	/* Index: 24 */
2027 
2028 	/* Chain for "direct priority" USCSI commands (all targets) */
2029 	sd_destroypkt_for_uscsi,	/* Index: 25 */
2030 
2031 };
2032 
2033 
2034 
2035 /*
2036  * Array to map a layering chain index to the appropriate chain "type".
2037  * The chain type indicates a specific property/usage of the chain.
2038  * The redundant entries are present so that the index used for accessing
2039  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2040  * with this table as well.
2041  */
2042 
2043 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
2044 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
2045 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
2046 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
2047 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
2048 						/* (for error recovery) */
2049 
2050 static int sd_chain_type_map[] = {
2051 
2052 	/* Chain for buf IO for disk drive targets (PM enabled) */
2053 	SD_CHAIN_BUFIO,			/* Index: 0 */
2054 	SD_CHAIN_BUFIO,			/* Index: 1 */
2055 	SD_CHAIN_BUFIO,			/* Index: 2 */
2056 
2057 	/* Chain for buf IO for disk drive targets (PM disabled) */
2058 	SD_CHAIN_BUFIO,			/* Index: 3 */
2059 	SD_CHAIN_BUFIO,			/* Index: 4 */
2060 
2061 	/* Chain for buf IO for removable-media targets (PM enabled) */
2062 	SD_CHAIN_BUFIO,			/* Index: 5 */
2063 	SD_CHAIN_BUFIO,			/* Index: 6 */
2064 	SD_CHAIN_BUFIO,			/* Index: 7 */
2065 	SD_CHAIN_BUFIO,			/* Index: 8 */
2066 
2067 	/* Chain for buf IO for removable-media targets (PM disabled) */
2068 	SD_CHAIN_BUFIO,			/* Index: 9 */
2069 	SD_CHAIN_BUFIO,			/* Index: 10 */
2070 	SD_CHAIN_BUFIO,			/* Index: 11 */
2071 
2072 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2073 	SD_CHAIN_BUFIO,			/* Index: 12 */
2074 	SD_CHAIN_BUFIO,			/* Index: 13 */
2075 	SD_CHAIN_BUFIO,			/* Index: 14 */
2076 	SD_CHAIN_BUFIO,			/* Index: 15 */
2077 
2078 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2079 	SD_CHAIN_BUFIO,			/* Index: 16 */
2080 	SD_CHAIN_BUFIO,			/* Index: 17 */
2081 	SD_CHAIN_BUFIO,			/* Index: 18 */
2082 
2083 	/* Chain for USCSI commands (non-checksum targets) */
2084 	SD_CHAIN_USCSI,			/* Index: 19 */
2085 	SD_CHAIN_USCSI,			/* Index: 20 */
2086 
2087 	/* Chain for USCSI commands (checksum targets) */
2088 	SD_CHAIN_USCSI,			/* Index: 21 */
2089 	SD_CHAIN_USCSI,			/* Index: 22 */
2090 	SD_CHAIN_USCSI,			/* Index: 22 */
2091 
2092 	/* Chain for "direct" USCSI commands (all targets) */
2093 	SD_CHAIN_DIRECT,		/* Index: 24 */
2094 
2095 	/* Chain for "direct priority" USCSI commands (all targets) */
2096 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2097 };
2098 
2099 
2100 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2101 #define	SD_IS_BUFIO(xp)			\
2102 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2103 
2104 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2105 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2106 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2107 
2108 
2109 
2110 /*
2111  * Struct, array, and macros to map a specific chain to the appropriate
2112  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2113  *
2114  * The sd_chain_index_map[] array is used at attach time to set the various
2115  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2116  * chain to be used with the instance. This allows different instances to use
2117  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2118  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2119  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2120  * dynamically & without the use of locking; and (2) a layer may update the
2121  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2122  * to allow for deferred processing of an IO within the same chain from a
2123  * different execution context.
2124  */
2125 
2126 struct sd_chain_index {
2127 	int	sci_iostart_index;
2128 	int	sci_iodone_index;
2129 };
2130 
2131 static struct sd_chain_index	sd_chain_index_map[] = {
2132 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2133 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2134 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2135 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2136 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2137 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2138 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2139 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2140 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2141 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2142 };
2143 
2144 
2145 /*
2146  * The following are indexes into the sd_chain_index_map[] array.
2147  */
2148 
2149 /* un->un_buf_chain_type must be set to one of these */
2150 #define	SD_CHAIN_INFO_DISK		0
2151 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2152 #define	SD_CHAIN_INFO_RMMEDIA		2
2153 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2154 #define	SD_CHAIN_INFO_CHKSUM		4
2155 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2156 
2157 /* un->un_uscsi_chain_type must be set to one of these */
2158 #define	SD_CHAIN_INFO_USCSI_CMD		6
2159 /* USCSI with PM disabled is the same as DIRECT */
2160 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2161 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2162 
2163 /* un->un_direct_chain_type must be set to one of these */
2164 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2165 
2166 /* un->un_priority_chain_type must be set to one of these */
2167 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2168 
2169 /* size for devid inquiries */
2170 #define	MAX_INQUIRY_SIZE		0xF0
2171 
2172 /*
2173  * Macros used by functions to pass a given buf(9S) struct along to the
2174  * next function in the layering chain for further processing.
2175  *
2176  * In the following macros, passing more than three arguments to the called
2177  * routines causes the optimizer for the SPARC compiler to stop doing tail
2178  * call elimination which results in significant performance degradation.
2179  */
2180 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2181 	((*(sd_iostart_chain[index]))(index, un, bp))
2182 
2183 #define	SD_BEGIN_IODONE(index, un, bp)	\
2184 	((*(sd_iodone_chain[index]))(index, un, bp))
2185 
2186 #define	SD_NEXT_IOSTART(index, un, bp)				\
2187 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2188 
2189 #define	SD_NEXT_IODONE(index, un, bp)				\
2190 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2191 
2192 /*
2193  *    Function: _init
2194  *
2195  * Description: This is the driver _init(9E) entry point.
2196  *
2197  * Return Code: Returns the value from mod_install(9F) or
2198  *		ddi_soft_state_init(9F) as appropriate.
2199  *
2200  *     Context: Called when driver module loaded.
2201  */
2202 
2203 int
2204 _init(void)
2205 {
2206 	int	err;
2207 
2208 	/* establish driver name from module name */
2209 	sd_label = mod_modname(&modlinkage);
2210 
2211 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2212 		SD_MAXUNIT);
2213 
2214 	if (err != 0) {
2215 		return (err);
2216 	}
2217 
2218 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2219 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2220 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2221 
2222 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2223 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2224 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2225 
2226 	/*
2227 	 * it's ok to init here even for fibre device
2228 	 */
2229 	sd_scsi_probe_cache_init();
2230 
2231 	/*
2232 	 * Creating taskq before mod_install ensures that all callers (threads)
2233 	 * that enter the module after a successfull mod_install encounter
2234 	 * a valid taskq.
2235 	 */
2236 	sd_taskq_create();
2237 
2238 	err = mod_install(&modlinkage);
2239 	if (err != 0) {
2240 		/* delete taskq if install fails */
2241 		sd_taskq_delete();
2242 
2243 		mutex_destroy(&sd_detach_mutex);
2244 		mutex_destroy(&sd_log_mutex);
2245 		mutex_destroy(&sd_label_mutex);
2246 
2247 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2248 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2249 		cv_destroy(&sd_tr.srq_inprocess_cv);
2250 
2251 		sd_scsi_probe_cache_fini();
2252 
2253 		ddi_soft_state_fini(&sd_state);
2254 		return (err);
2255 	}
2256 
2257 	return (err);
2258 }
2259 
2260 
2261 /*
2262  *    Function: _fini
2263  *
2264  * Description: This is the driver _fini(9E) entry point.
2265  *
2266  * Return Code: Returns the value from mod_remove(9F)
2267  *
2268  *     Context: Called when driver module is unloaded.
2269  */
2270 
2271 int
2272 _fini(void)
2273 {
2274 	int err;
2275 
2276 	if ((err = mod_remove(&modlinkage)) != 0) {
2277 		return (err);
2278 	}
2279 
2280 	sd_taskq_delete();
2281 
2282 	mutex_destroy(&sd_detach_mutex);
2283 	mutex_destroy(&sd_log_mutex);
2284 	mutex_destroy(&sd_label_mutex);
2285 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2286 
2287 	sd_scsi_probe_cache_fini();
2288 
2289 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2290 	cv_destroy(&sd_tr.srq_inprocess_cv);
2291 
2292 	ddi_soft_state_fini(&sd_state);
2293 
2294 	return (err);
2295 }
2296 
2297 
2298 /*
2299  *    Function: _info
2300  *
2301  * Description: This is the driver _info(9E) entry point.
2302  *
2303  *   Arguments: modinfop - pointer to the driver modinfo structure
2304  *
2305  * Return Code: Returns the value from mod_info(9F).
2306  *
2307  *     Context: Kernel thread context
2308  */
2309 
2310 int
2311 _info(struct modinfo *modinfop)
2312 {
2313 	return (mod_info(&modlinkage, modinfop));
2314 }
2315 
2316 
2317 /*
2318  * The following routines implement the driver message logging facility.
2319  * They provide component- and level- based debug output filtering.
2320  * Output may also be restricted to messages for a single instance by
2321  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2322  * to NULL, then messages for all instances are printed.
2323  *
2324  * These routines have been cloned from each other due to the language
2325  * constraints of macros and variable argument list processing.
2326  */
2327 
2328 
2329 /*
2330  *    Function: sd_log_err
2331  *
2332  * Description: This routine is called by the SD_ERROR macro for debug
2333  *		logging of error conditions.
2334  *
2335  *   Arguments: comp - driver component being logged
2336  *		dev  - pointer to driver info structure
2337  *		fmt  - error string and format to be logged
2338  */
2339 
2340 static void
2341 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2342 {
2343 	va_list		ap;
2344 	dev_info_t	*dev;
2345 
2346 	ASSERT(un != NULL);
2347 	dev = SD_DEVINFO(un);
2348 	ASSERT(dev != NULL);
2349 
2350 	/*
2351 	 * Filter messages based on the global component and level masks.
2352 	 * Also print if un matches the value of sd_debug_un, or if
2353 	 * sd_debug_un is set to NULL.
2354 	 */
2355 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2356 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2357 		mutex_enter(&sd_log_mutex);
2358 		va_start(ap, fmt);
2359 		(void) vsprintf(sd_log_buf, fmt, ap);
2360 		va_end(ap);
2361 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2362 		mutex_exit(&sd_log_mutex);
2363 	}
2364 #ifdef SD_FAULT_INJECTION
2365 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2366 	if (un->sd_injection_mask & comp) {
2367 		mutex_enter(&sd_log_mutex);
2368 		va_start(ap, fmt);
2369 		(void) vsprintf(sd_log_buf, fmt, ap);
2370 		va_end(ap);
2371 		sd_injection_log(sd_log_buf, un);
2372 		mutex_exit(&sd_log_mutex);
2373 	}
2374 #endif
2375 }
2376 
2377 
2378 /*
2379  *    Function: sd_log_info
2380  *
2381  * Description: This routine is called by the SD_INFO macro for debug
2382  *		logging of general purpose informational conditions.
2383  *
2384  *   Arguments: comp - driver component being logged
2385  *		dev  - pointer to driver info structure
2386  *		fmt  - info string and format to be logged
2387  */
2388 
2389 static void
2390 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2391 {
2392 	va_list		ap;
2393 	dev_info_t	*dev;
2394 
2395 	ASSERT(un != NULL);
2396 	dev = SD_DEVINFO(un);
2397 	ASSERT(dev != NULL);
2398 
2399 	/*
2400 	 * Filter messages based on the global component and level masks.
2401 	 * Also print if un matches the value of sd_debug_un, or if
2402 	 * sd_debug_un is set to NULL.
2403 	 */
2404 	if ((sd_component_mask & component) &&
2405 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2406 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2407 		mutex_enter(&sd_log_mutex);
2408 		va_start(ap, fmt);
2409 		(void) vsprintf(sd_log_buf, fmt, ap);
2410 		va_end(ap);
2411 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2412 		mutex_exit(&sd_log_mutex);
2413 	}
2414 #ifdef SD_FAULT_INJECTION
2415 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2416 	if (un->sd_injection_mask & component) {
2417 		mutex_enter(&sd_log_mutex);
2418 		va_start(ap, fmt);
2419 		(void) vsprintf(sd_log_buf, fmt, ap);
2420 		va_end(ap);
2421 		sd_injection_log(sd_log_buf, un);
2422 		mutex_exit(&sd_log_mutex);
2423 	}
2424 #endif
2425 }
2426 
2427 
2428 /*
2429  *    Function: sd_log_trace
2430  *
2431  * Description: This routine is called by the SD_TRACE macro for debug
2432  *		logging of trace conditions (i.e. function entry/exit).
2433  *
2434  *   Arguments: comp - driver component being logged
2435  *		dev  - pointer to driver info structure
2436  *		fmt  - trace string and format to be logged
2437  */
2438 
2439 static void
2440 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2441 {
2442 	va_list		ap;
2443 	dev_info_t	*dev;
2444 
2445 	ASSERT(un != NULL);
2446 	dev = SD_DEVINFO(un);
2447 	ASSERT(dev != NULL);
2448 
2449 	/*
2450 	 * Filter messages based on the global component and level masks.
2451 	 * Also print if un matches the value of sd_debug_un, or if
2452 	 * sd_debug_un is set to NULL.
2453 	 */
2454 	if ((sd_component_mask & component) &&
2455 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2456 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2457 		mutex_enter(&sd_log_mutex);
2458 		va_start(ap, fmt);
2459 		(void) vsprintf(sd_log_buf, fmt, ap);
2460 		va_end(ap);
2461 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2462 		mutex_exit(&sd_log_mutex);
2463 	}
2464 #ifdef SD_FAULT_INJECTION
2465 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2466 	if (un->sd_injection_mask & component) {
2467 		mutex_enter(&sd_log_mutex);
2468 		va_start(ap, fmt);
2469 		(void) vsprintf(sd_log_buf, fmt, ap);
2470 		va_end(ap);
2471 		sd_injection_log(sd_log_buf, un);
2472 		mutex_exit(&sd_log_mutex);
2473 	}
2474 #endif
2475 }
2476 
2477 
2478 /*
2479  *    Function: sdprobe
2480  *
2481  * Description: This is the driver probe(9e) entry point function.
2482  *
2483  *   Arguments: devi - opaque device info handle
2484  *
2485  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2486  *              DDI_PROBE_FAILURE: If the probe failed.
2487  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2488  *				   but may be present in the future.
2489  */
2490 
2491 static int
2492 sdprobe(dev_info_t *devi)
2493 {
2494 	struct scsi_device	*devp;
2495 	int			rval;
2496 	int			instance;
2497 
2498 	/*
2499 	 * if it wasn't for pln, sdprobe could actually be nulldev
2500 	 * in the "__fibre" case.
2501 	 */
2502 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2503 		return (DDI_PROBE_DONTCARE);
2504 	}
2505 
2506 	devp = ddi_get_driver_private(devi);
2507 
2508 	if (devp == NULL) {
2509 		/* Ooops... nexus driver is mis-configured... */
2510 		return (DDI_PROBE_FAILURE);
2511 	}
2512 
2513 	instance = ddi_get_instance(devi);
2514 
2515 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2516 		return (DDI_PROBE_PARTIAL);
2517 	}
2518 
2519 	/*
2520 	 * Call the SCSA utility probe routine to see if we actually
2521 	 * have a target at this SCSI nexus.
2522 	 */
2523 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2524 	case SCSIPROBE_EXISTS:
2525 		switch (devp->sd_inq->inq_dtype) {
2526 		case DTYPE_DIRECT:
2527 			rval = DDI_PROBE_SUCCESS;
2528 			break;
2529 		case DTYPE_RODIRECT:
2530 			/* CDs etc. Can be removable media */
2531 			rval = DDI_PROBE_SUCCESS;
2532 			break;
2533 		case DTYPE_OPTICAL:
2534 			/*
2535 			 * Rewritable optical driver HP115AA
2536 			 * Can also be removable media
2537 			 */
2538 
2539 			/*
2540 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2541 			 * pre solaris 9 sparc sd behavior is required
2542 			 *
2543 			 * If first time through and sd_dtype_optical_bind
2544 			 * has not been set in /etc/system check properties
2545 			 */
2546 
2547 			if (sd_dtype_optical_bind  < 0) {
2548 			    sd_dtype_optical_bind = ddi_prop_get_int
2549 				(DDI_DEV_T_ANY,	devi,	0,
2550 				"optical-device-bind",	1);
2551 			}
2552 
2553 			if (sd_dtype_optical_bind == 0) {
2554 				rval = DDI_PROBE_FAILURE;
2555 			} else {
2556 				rval = DDI_PROBE_SUCCESS;
2557 			}
2558 			break;
2559 
2560 		case DTYPE_NOTPRESENT:
2561 		default:
2562 			rval = DDI_PROBE_FAILURE;
2563 			break;
2564 		}
2565 		break;
2566 	default:
2567 		rval = DDI_PROBE_PARTIAL;
2568 		break;
2569 	}
2570 
2571 	/*
2572 	 * This routine checks for resource allocation prior to freeing,
2573 	 * so it will take care of the "smart probing" case where a
2574 	 * scsi_probe() may or may not have been issued and will *not*
2575 	 * free previously-freed resources.
2576 	 */
2577 	scsi_unprobe(devp);
2578 	return (rval);
2579 }
2580 
2581 
2582 /*
2583  *    Function: sdinfo
2584  *
2585  * Description: This is the driver getinfo(9e) entry point function.
2586  * 		Given the device number, return the devinfo pointer from
2587  *		the scsi_device structure or the instance number
2588  *		associated with the dev_t.
2589  *
2590  *   Arguments: dip     - pointer to device info structure
2591  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2592  *			  DDI_INFO_DEVT2INSTANCE)
2593  *		arg     - driver dev_t
2594  *		resultp - user buffer for request response
2595  *
2596  * Return Code: DDI_SUCCESS
2597  *              DDI_FAILURE
2598  */
2599 /* ARGSUSED */
2600 static int
2601 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2602 {
2603 	struct sd_lun	*un;
2604 	dev_t		dev;
2605 	int		instance;
2606 	int		error;
2607 
2608 	switch (infocmd) {
2609 	case DDI_INFO_DEVT2DEVINFO:
2610 		dev = (dev_t)arg;
2611 		instance = SDUNIT(dev);
2612 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2613 			return (DDI_FAILURE);
2614 		}
2615 		*result = (void *) SD_DEVINFO(un);
2616 		error = DDI_SUCCESS;
2617 		break;
2618 	case DDI_INFO_DEVT2INSTANCE:
2619 		dev = (dev_t)arg;
2620 		instance = SDUNIT(dev);
2621 		*result = (void *)(uintptr_t)instance;
2622 		error = DDI_SUCCESS;
2623 		break;
2624 	default:
2625 		error = DDI_FAILURE;
2626 	}
2627 	return (error);
2628 }
2629 
2630 /*
2631  *    Function: sd_prop_op
2632  *
2633  * Description: This is the driver prop_op(9e) entry point function.
2634  *		Return the number of blocks for the partition in question
2635  *		or forward the request to the property facilities.
2636  *
2637  *   Arguments: dev       - device number
2638  *		dip       - pointer to device info structure
2639  *		prop_op   - property operator
2640  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2641  *		name      - pointer to property name
2642  *		valuep    - pointer or address of the user buffer
2643  *		lengthp   - property length
2644  *
2645  * Return Code: DDI_PROP_SUCCESS
2646  *              DDI_PROP_NOT_FOUND
2647  *              DDI_PROP_UNDEFINED
2648  *              DDI_PROP_NO_MEMORY
2649  *              DDI_PROP_BUF_TOO_SMALL
2650  */
2651 
2652 static int
2653 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2654 	char *name, caddr_t valuep, int *lengthp)
2655 {
2656 	int		instance = ddi_get_instance(dip);
2657 	struct sd_lun	*un;
2658 	uint64_t	nblocks64;
2659 
2660 	/*
2661 	 * Our dynamic properties are all device specific and size oriented.
2662 	 * Requests issued under conditions where size is valid are passed
2663 	 * to ddi_prop_op_nblocks with the size information, otherwise the
2664 	 * request is passed to ddi_prop_op. Size depends on valid geometry.
2665 	 */
2666 	un = ddi_get_soft_state(sd_state, instance);
2667 	if ((dev == DDI_DEV_T_ANY) || (un == NULL) ||
2668 	    (un->un_f_geometry_is_valid == FALSE)) {
2669 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2670 		    name, valuep, lengthp));
2671 	} else {
2672 		/* get nblocks value */
2673 		ASSERT(!mutex_owned(SD_MUTEX(un)));
2674 		mutex_enter(SD_MUTEX(un));
2675 		nblocks64 = (ulong_t)un->un_map[SDPART(dev)].dkl_nblk;
2676 		mutex_exit(SD_MUTEX(un));
2677 
2678 		return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags,
2679 		    name, valuep, lengthp, nblocks64));
2680 	}
2681 }
2682 
2683 /*
2684  * The following functions are for smart probing:
2685  * sd_scsi_probe_cache_init()
2686  * sd_scsi_probe_cache_fini()
2687  * sd_scsi_clear_probe_cache()
2688  * sd_scsi_probe_with_cache()
2689  */
2690 
2691 /*
2692  *    Function: sd_scsi_probe_cache_init
2693  *
2694  * Description: Initializes the probe response cache mutex and head pointer.
2695  *
2696  *     Context: Kernel thread context
2697  */
2698 
2699 static void
2700 sd_scsi_probe_cache_init(void)
2701 {
2702 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2703 	sd_scsi_probe_cache_head = NULL;
2704 }
2705 
2706 
2707 /*
2708  *    Function: sd_scsi_probe_cache_fini
2709  *
2710  * Description: Frees all resources associated with the probe response cache.
2711  *
2712  *     Context: Kernel thread context
2713  */
2714 
2715 static void
2716 sd_scsi_probe_cache_fini(void)
2717 {
2718 	struct sd_scsi_probe_cache *cp;
2719 	struct sd_scsi_probe_cache *ncp;
2720 
2721 	/* Clean up our smart probing linked list */
2722 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2723 		ncp = cp->next;
2724 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2725 	}
2726 	sd_scsi_probe_cache_head = NULL;
2727 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2728 }
2729 
2730 
2731 /*
2732  *    Function: sd_scsi_clear_probe_cache
2733  *
2734  * Description: This routine clears the probe response cache. This is
2735  *		done when open() returns ENXIO so that when deferred
2736  *		attach is attempted (possibly after a device has been
2737  *		turned on) we will retry the probe. Since we don't know
2738  *		which target we failed to open, we just clear the
2739  *		entire cache.
2740  *
2741  *     Context: Kernel thread context
2742  */
2743 
2744 static void
2745 sd_scsi_clear_probe_cache(void)
2746 {
2747 	struct sd_scsi_probe_cache	*cp;
2748 	int				i;
2749 
2750 	mutex_enter(&sd_scsi_probe_cache_mutex);
2751 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2752 		/*
2753 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2754 		 * force probing to be performed the next time
2755 		 * sd_scsi_probe_with_cache is called.
2756 		 */
2757 		for (i = 0; i < NTARGETS_WIDE; i++) {
2758 			cp->cache[i] = SCSIPROBE_EXISTS;
2759 		}
2760 	}
2761 	mutex_exit(&sd_scsi_probe_cache_mutex);
2762 }
2763 
2764 
2765 /*
2766  *    Function: sd_scsi_probe_with_cache
2767  *
2768  * Description: This routine implements support for a scsi device probe
2769  *		with cache. The driver maintains a cache of the target
2770  *		responses to scsi probes. If we get no response from a
2771  *		target during a probe inquiry, we remember that, and we
2772  *		avoid additional calls to scsi_probe on non-zero LUNs
2773  *		on the same target until the cache is cleared. By doing
2774  *		so we avoid the 1/4 sec selection timeout for nonzero
2775  *		LUNs. lun0 of a target is always probed.
2776  *
2777  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2778  *              waitfunc - indicates what the allocator routines should
2779  *			   do when resources are not available. This value
2780  *			   is passed on to scsi_probe() when that routine
2781  *			   is called.
2782  *
2783  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2784  *		otherwise the value returned by scsi_probe(9F).
2785  *
2786  *     Context: Kernel thread context
2787  */
2788 
2789 static int
2790 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2791 {
2792 	struct sd_scsi_probe_cache	*cp;
2793 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2794 	int		lun, tgt;
2795 
2796 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2797 	    SCSI_ADDR_PROP_LUN, 0);
2798 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2799 	    SCSI_ADDR_PROP_TARGET, -1);
2800 
2801 	/* Make sure caching enabled and target in range */
2802 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2803 		/* do it the old way (no cache) */
2804 		return (scsi_probe(devp, waitfn));
2805 	}
2806 
2807 	mutex_enter(&sd_scsi_probe_cache_mutex);
2808 
2809 	/* Find the cache for this scsi bus instance */
2810 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2811 		if (cp->pdip == pdip) {
2812 			break;
2813 		}
2814 	}
2815 
2816 	/* If we can't find a cache for this pdip, create one */
2817 	if (cp == NULL) {
2818 		int i;
2819 
2820 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2821 		    KM_SLEEP);
2822 		cp->pdip = pdip;
2823 		cp->next = sd_scsi_probe_cache_head;
2824 		sd_scsi_probe_cache_head = cp;
2825 		for (i = 0; i < NTARGETS_WIDE; i++) {
2826 			cp->cache[i] = SCSIPROBE_EXISTS;
2827 		}
2828 	}
2829 
2830 	mutex_exit(&sd_scsi_probe_cache_mutex);
2831 
2832 	/* Recompute the cache for this target if LUN zero */
2833 	if (lun == 0) {
2834 		cp->cache[tgt] = SCSIPROBE_EXISTS;
2835 	}
2836 
2837 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2838 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2839 		return (SCSIPROBE_NORESP);
2840 	}
2841 
2842 	/* Do the actual probe; save & return the result */
2843 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2844 }
2845 
2846 
2847 /*
2848  *    Function: sd_spin_up_unit
2849  *
2850  * Description: Issues the following commands to spin-up the device:
2851  *		START STOP UNIT, and INQUIRY.
2852  *
2853  *   Arguments: un - driver soft state (unit) structure
2854  *
2855  * Return Code: 0 - success
2856  *		EIO - failure
2857  *		EACCES - reservation conflict
2858  *
2859  *     Context: Kernel thread context
2860  */
2861 
2862 static int
2863 sd_spin_up_unit(struct sd_lun *un)
2864 {
2865 	size_t	resid		= 0;
2866 	int	has_conflict	= FALSE;
2867 	uchar_t *bufaddr;
2868 
2869 	ASSERT(un != NULL);
2870 
2871 	/*
2872 	 * Send a throwaway START UNIT command.
2873 	 *
2874 	 * If we fail on this, we don't care presently what precisely
2875 	 * is wrong.  EMC's arrays will also fail this with a check
2876 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
2877 	 * we don't want to fail the attach because it may become
2878 	 * "active" later.
2879 	 */
2880 	if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START, SD_PATH_DIRECT)
2881 	    == EACCES)
2882 		has_conflict = TRUE;
2883 
2884 	/*
2885 	 * Send another INQUIRY command to the target. This is necessary for
2886 	 * non-removable media direct access devices because their INQUIRY data
2887 	 * may not be fully qualified until they are spun up (perhaps via the
2888 	 * START command above).  Note: This seems to be needed for some
2889 	 * legacy devices only.) The INQUIRY command should succeed even if a
2890 	 * Reservation Conflict is present.
2891 	 */
2892 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
2893 	if (sd_send_scsi_INQUIRY(un, bufaddr, SUN_INQSIZE, 0, 0, &resid) != 0) {
2894 		kmem_free(bufaddr, SUN_INQSIZE);
2895 		return (EIO);
2896 	}
2897 
2898 	/*
2899 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
2900 	 * Note that this routine does not return a failure here even if the
2901 	 * INQUIRY command did not return any data.  This is a legacy behavior.
2902 	 */
2903 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
2904 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
2905 	}
2906 
2907 	kmem_free(bufaddr, SUN_INQSIZE);
2908 
2909 	/* If we hit a reservation conflict above, tell the caller. */
2910 	if (has_conflict == TRUE) {
2911 		return (EACCES);
2912 	}
2913 
2914 	return (0);
2915 }
2916 
2917 #ifdef _LP64
2918 /*
2919  *    Function: sd_enable_descr_sense
2920  *
2921  * Description: This routine attempts to select descriptor sense format
2922  *		using the Control mode page.  Devices that support 64 bit
2923  *		LBAs (for >2TB luns) should also implement descriptor
2924  *		sense data so we will call this function whenever we see
2925  *		a lun larger than 2TB.  If for some reason the device
2926  *		supports 64 bit LBAs but doesn't support descriptor sense
2927  *		presumably the mode select will fail.  Everything will
2928  *		continue to work normally except that we will not get
2929  *		complete sense data for commands that fail with an LBA
2930  *		larger than 32 bits.
2931  *
2932  *   Arguments: un - driver soft state (unit) structure
2933  *
2934  *     Context: Kernel thread context only
2935  */
2936 
2937 static void
2938 sd_enable_descr_sense(struct sd_lun *un)
2939 {
2940 	uchar_t			*header;
2941 	struct mode_control_scsi3 *ctrl_bufp;
2942 	size_t			buflen;
2943 	size_t			bd_len;
2944 
2945 	/*
2946 	 * Read MODE SENSE page 0xA, Control Mode Page
2947 	 */
2948 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
2949 	    sizeof (struct mode_control_scsi3);
2950 	header = kmem_zalloc(buflen, KM_SLEEP);
2951 	if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
2952 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT) != 0) {
2953 		SD_ERROR(SD_LOG_COMMON, un,
2954 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
2955 		goto eds_exit;
2956 	}
2957 
2958 	/*
2959 	 * Determine size of Block Descriptors in order to locate
2960 	 * the mode page data. ATAPI devices return 0, SCSI devices
2961 	 * should return MODE_BLK_DESC_LENGTH.
2962 	 */
2963 	bd_len  = ((struct mode_header *)header)->bdesc_length;
2964 
2965 	ctrl_bufp = (struct mode_control_scsi3 *)
2966 	    (header + MODE_HEADER_LENGTH + bd_len);
2967 
2968 	/*
2969 	 * Clear PS bit for MODE SELECT
2970 	 */
2971 	ctrl_bufp->mode_page.ps = 0;
2972 
2973 	/*
2974 	 * Set D_SENSE to enable descriptor sense format.
2975 	 */
2976 	ctrl_bufp->d_sense = 1;
2977 
2978 	/*
2979 	 * Use MODE SELECT to commit the change to the D_SENSE bit
2980 	 */
2981 	if (sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
2982 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT) != 0) {
2983 		SD_INFO(SD_LOG_COMMON, un,
2984 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
2985 		goto eds_exit;
2986 	}
2987 
2988 eds_exit:
2989 	kmem_free(header, buflen);
2990 }
2991 
2992 /*
2993  *    Function: sd_reenable_dsense_task
2994  *
2995  * Description: Re-enable descriptor sense after device or bus reset
2996  *
2997  *     Context: Executes in a taskq() thread context
2998  */
2999 static void
3000 sd_reenable_dsense_task(void *arg)
3001 {
3002 	struct	sd_lun	*un = arg;
3003 
3004 	ASSERT(un != NULL);
3005 	sd_enable_descr_sense(un);
3006 }
3007 #endif /* _LP64 */
3008 
3009 /*
3010  *    Function: sd_set_mmc_caps
3011  *
3012  * Description: This routine determines if the device is MMC compliant and if
3013  *		the device supports CDDA via a mode sense of the CDVD
3014  *		capabilities mode page. Also checks if the device is a
3015  *		dvdram writable device.
3016  *
3017  *   Arguments: un - driver soft state (unit) structure
3018  *
3019  *     Context: Kernel thread context only
3020  */
3021 
3022 static void
3023 sd_set_mmc_caps(struct sd_lun *un)
3024 {
3025 	struct mode_header_grp2		*sense_mhp;
3026 	uchar_t				*sense_page;
3027 	caddr_t				buf;
3028 	int				bd_len;
3029 	int				status;
3030 	struct uscsi_cmd		com;
3031 	int				rtn;
3032 	uchar_t				*out_data_rw, *out_data_hd;
3033 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3034 
3035 	ASSERT(un != NULL);
3036 
3037 	/*
3038 	 * The flags which will be set in this function are - mmc compliant,
3039 	 * dvdram writable device, cdda support. Initialize them to FALSE
3040 	 * and if a capability is detected - it will be set to TRUE.
3041 	 */
3042 	un->un_f_mmc_cap = FALSE;
3043 	un->un_f_dvdram_writable_device = FALSE;
3044 	un->un_f_cfg_cdda = FALSE;
3045 
3046 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3047 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3048 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3049 
3050 	if (status != 0) {
3051 		/* command failed; just return */
3052 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3053 		return;
3054 	}
3055 	/*
3056 	 * If the mode sense request for the CDROM CAPABILITIES
3057 	 * page (0x2A) succeeds the device is assumed to be MMC.
3058 	 */
3059 	un->un_f_mmc_cap = TRUE;
3060 
3061 	/* Get to the page data */
3062 	sense_mhp = (struct mode_header_grp2 *)buf;
3063 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3064 	    sense_mhp->bdesc_length_lo;
3065 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3066 		/*
3067 		 * We did not get back the expected block descriptor
3068 		 * length so we cannot determine if the device supports
3069 		 * CDDA. However, we still indicate the device is MMC
3070 		 * according to the successful response to the page
3071 		 * 0x2A mode sense request.
3072 		 */
3073 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3074 		    "sd_set_mmc_caps: Mode Sense returned "
3075 		    "invalid block descriptor length\n");
3076 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3077 		return;
3078 	}
3079 
3080 	/* See if read CDDA is supported */
3081 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3082 	    bd_len);
3083 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3084 
3085 	/* See if writing DVD RAM is supported. */
3086 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3087 	if (un->un_f_dvdram_writable_device == TRUE) {
3088 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3089 		return;
3090 	}
3091 
3092 	/*
3093 	 * If the device presents DVD or CD capabilities in the mode
3094 	 * page, we can return here since a RRD will not have
3095 	 * these capabilities.
3096 	 */
3097 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3098 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3099 		return;
3100 	}
3101 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3102 
3103 	/*
3104 	 * If un->un_f_dvdram_writable_device is still FALSE,
3105 	 * check for a Removable Rigid Disk (RRD).  A RRD
3106 	 * device is identified by the features RANDOM_WRITABLE and
3107 	 * HARDWARE_DEFECT_MANAGEMENT.
3108 	 */
3109 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3110 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3111 
3112 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3113 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3114 	    RANDOM_WRITABLE);
3115 	if (rtn != 0) {
3116 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3117 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3118 		return;
3119 	}
3120 
3121 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3122 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3123 
3124 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3125 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3126 	    HARDWARE_DEFECT_MANAGEMENT);
3127 	if (rtn == 0) {
3128 		/*
3129 		 * We have good information, check for random writable
3130 		 * and hardware defect features.
3131 		 */
3132 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3133 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3134 			un->un_f_dvdram_writable_device = TRUE;
3135 		}
3136 	}
3137 
3138 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3139 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3140 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3141 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3142 }
3143 
3144 /*
3145  *    Function: sd_check_for_writable_cd
3146  *
3147  * Description: This routine determines if the media in the device is
3148  *		writable or not. It uses the get configuration command (0x46)
3149  *		to determine if the media is writable
3150  *
3151  *   Arguments: un - driver soft state (unit) structure
3152  *
3153  *     Context: Never called at interrupt context.
3154  */
3155 
3156 static void
3157 sd_check_for_writable_cd(struct sd_lun *un)
3158 {
3159 	struct uscsi_cmd		com;
3160 	uchar_t				*out_data;
3161 	uchar_t				*rqbuf;
3162 	int				rtn;
3163 	uchar_t				*out_data_rw, *out_data_hd;
3164 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3165 	struct mode_header_grp2		*sense_mhp;
3166 	uchar_t				*sense_page;
3167 	caddr_t				buf;
3168 	int				bd_len;
3169 	int				status;
3170 
3171 	ASSERT(un != NULL);
3172 	ASSERT(mutex_owned(SD_MUTEX(un)));
3173 
3174 	/*
3175 	 * Initialize the writable media to false, if configuration info.
3176 	 * tells us otherwise then only we will set it.
3177 	 */
3178 	un->un_f_mmc_writable_media = FALSE;
3179 	mutex_exit(SD_MUTEX(un));
3180 
3181 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3182 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3183 
3184 	rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf, SENSE_LENGTH,
3185 	    out_data, SD_PROFILE_HEADER_LEN);
3186 
3187 	mutex_enter(SD_MUTEX(un));
3188 	if (rtn == 0) {
3189 		/*
3190 		 * We have good information, check for writable DVD.
3191 		 */
3192 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3193 			un->un_f_mmc_writable_media = TRUE;
3194 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3195 			kmem_free(rqbuf, SENSE_LENGTH);
3196 			return;
3197 		}
3198 	}
3199 
3200 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3201 	kmem_free(rqbuf, SENSE_LENGTH);
3202 
3203 	/*
3204 	 * Determine if this is a RRD type device.
3205 	 */
3206 	mutex_exit(SD_MUTEX(un));
3207 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3208 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3209 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3210 	mutex_enter(SD_MUTEX(un));
3211 	if (status != 0) {
3212 		/* command failed; just return */
3213 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3214 		return;
3215 	}
3216 
3217 	/* Get to the page data */
3218 	sense_mhp = (struct mode_header_grp2 *)buf;
3219 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3220 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3221 		/*
3222 		 * We did not get back the expected block descriptor length so
3223 		 * we cannot check the mode page.
3224 		 */
3225 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3226 		    "sd_check_for_writable_cd: Mode Sense returned "
3227 		    "invalid block descriptor length\n");
3228 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3229 		return;
3230 	}
3231 
3232 	/*
3233 	 * If the device presents DVD or CD capabilities in the mode
3234 	 * page, we can return here since a RRD device will not have
3235 	 * these capabilities.
3236 	 */
3237 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3238 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3239 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3240 		return;
3241 	}
3242 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3243 
3244 	/*
3245 	 * If un->un_f_mmc_writable_media is still FALSE,
3246 	 * check for RRD type media.  A RRD device is identified
3247 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3248 	 */
3249 	mutex_exit(SD_MUTEX(un));
3250 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3251 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3252 
3253 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3254 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3255 	    RANDOM_WRITABLE);
3256 	if (rtn != 0) {
3257 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3258 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3259 		mutex_enter(SD_MUTEX(un));
3260 		return;
3261 	}
3262 
3263 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3264 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3265 
3266 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3267 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3268 	    HARDWARE_DEFECT_MANAGEMENT);
3269 	mutex_enter(SD_MUTEX(un));
3270 	if (rtn == 0) {
3271 		/*
3272 		 * We have good information, check for random writable
3273 		 * and hardware defect features as current.
3274 		 */
3275 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3276 		    (out_data_rw[10] & 0x1) &&
3277 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3278 		    (out_data_hd[10] & 0x1)) {
3279 			un->un_f_mmc_writable_media = TRUE;
3280 		}
3281 	}
3282 
3283 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3284 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3285 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3286 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3287 }
3288 
3289 /*
3290  *    Function: sd_read_unit_properties
3291  *
3292  * Description: The following implements a property lookup mechanism.
3293  *		Properties for particular disks (keyed on vendor, model
3294  *		and rev numbers) are sought in the sd.conf file via
3295  *		sd_process_sdconf_file(), and if not found there, are
3296  *		looked for in a list hardcoded in this driver via
3297  *		sd_process_sdconf_table() Once located the properties
3298  *		are used to update the driver unit structure.
3299  *
3300  *   Arguments: un - driver soft state (unit) structure
3301  */
3302 
3303 static void
3304 sd_read_unit_properties(struct sd_lun *un)
3305 {
3306 	/*
3307 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3308 	 * the "sd-config-list" property (from the sd.conf file) or if
3309 	 * there was not a match for the inquiry vid/pid. If this event
3310 	 * occurs the static driver configuration table is searched for
3311 	 * a match.
3312 	 */
3313 	ASSERT(un != NULL);
3314 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3315 		sd_process_sdconf_table(un);
3316 	}
3317 
3318 	/* check for LSI device */
3319 	sd_is_lsi(un);
3320 
3321 
3322 }
3323 
3324 
3325 /*
3326  *    Function: sd_process_sdconf_file
3327  *
3328  * Description: Use ddi_getlongprop to obtain the properties from the
3329  *		driver's config file (ie, sd.conf) and update the driver
3330  *		soft state structure accordingly.
3331  *
3332  *   Arguments: un - driver soft state (unit) structure
3333  *
3334  * Return Code: SD_SUCCESS - The properties were successfully set according
3335  *			     to the driver configuration file.
3336  *		SD_FAILURE - The driver config list was not obtained or
3337  *			     there was no vid/pid match. This indicates that
3338  *			     the static config table should be used.
3339  *
3340  * The config file has a property, "sd-config-list", which consists of
3341  * one or more duplets as follows:
3342  *
3343  *  sd-config-list=
3344  *	<duplet>,
3345  *	[<duplet>,]
3346  *	[<duplet>];
3347  *
3348  * The structure of each duplet is as follows:
3349  *
3350  *  <duplet>:= <vid+pid>,<data-property-name_list>
3351  *
3352  * The first entry of the duplet is the device ID string (the concatenated
3353  * vid & pid; not to be confused with a device_id).  This is defined in
3354  * the same way as in the sd_disk_table.
3355  *
3356  * The second part of the duplet is a string that identifies a
3357  * data-property-name-list. The data-property-name-list is defined as
3358  * follows:
3359  *
3360  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3361  *
3362  * The syntax of <data-property-name> depends on the <version> field.
3363  *
3364  * If version = SD_CONF_VERSION_1 we have the following syntax:
3365  *
3366  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3367  *
3368  * where the prop0 value will be used to set prop0 if bit0 set in the
3369  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3370  *
3371  */
3372 
3373 static int
3374 sd_process_sdconf_file(struct sd_lun *un)
3375 {
3376 	char	*config_list = NULL;
3377 	int	config_list_len;
3378 	int	len;
3379 	int	dupletlen = 0;
3380 	char	*vidptr;
3381 	int	vidlen;
3382 	char	*dnlist_ptr;
3383 	char	*dataname_ptr;
3384 	int	dnlist_len;
3385 	int	dataname_len;
3386 	int	*data_list;
3387 	int	data_list_len;
3388 	int	rval = SD_FAILURE;
3389 	int	i;
3390 
3391 	ASSERT(un != NULL);
3392 
3393 	/* Obtain the configuration list associated with the .conf file */
3394 	if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), DDI_PROP_DONTPASS,
3395 	    sd_config_list, (caddr_t)&config_list, &config_list_len)
3396 	    != DDI_PROP_SUCCESS) {
3397 		return (SD_FAILURE);
3398 	}
3399 
3400 	/*
3401 	 * Compare vids in each duplet to the inquiry vid - if a match is
3402 	 * made, get the data value and update the soft state structure
3403 	 * accordingly.
3404 	 *
3405 	 * Note: This algorithm is complex and difficult to maintain. It should
3406 	 * be replaced with a more robust implementation.
3407 	 */
3408 	for (len = config_list_len, vidptr = config_list; len > 0;
3409 	    vidptr += dupletlen, len -= dupletlen) {
3410 		/*
3411 		 * Note: The assumption here is that each vid entry is on
3412 		 * a unique line from its associated duplet.
3413 		 */
3414 		vidlen = dupletlen = (int)strlen(vidptr);
3415 		if ((vidlen == 0) ||
3416 		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3417 			dupletlen++;
3418 			continue;
3419 		}
3420 
3421 		/*
3422 		 * dnlist contains 1 or more blank separated
3423 		 * data-property-name entries
3424 		 */
3425 		dnlist_ptr = vidptr + vidlen + 1;
3426 		dnlist_len = (int)strlen(dnlist_ptr);
3427 		dupletlen += dnlist_len + 2;
3428 
3429 		/*
3430 		 * Set a pointer for the first data-property-name
3431 		 * entry in the list
3432 		 */
3433 		dataname_ptr = dnlist_ptr;
3434 		dataname_len = 0;
3435 
3436 		/*
3437 		 * Loop through all data-property-name entries in the
3438 		 * data-property-name-list setting the properties for each.
3439 		 */
3440 		while (dataname_len < dnlist_len) {
3441 			int version;
3442 
3443 			/*
3444 			 * Determine the length of the current
3445 			 * data-property-name entry by indexing until a
3446 			 * blank or NULL is encountered. When the space is
3447 			 * encountered reset it to a NULL for compliance
3448 			 * with ddi_getlongprop().
3449 			 */
3450 			for (i = 0; ((dataname_ptr[i] != ' ') &&
3451 			    (dataname_ptr[i] != '\0')); i++) {
3452 				;
3453 			}
3454 
3455 			dataname_len += i;
3456 			/* If not null terminated, Make it so */
3457 			if (dataname_ptr[i] == ' ') {
3458 				dataname_ptr[i] = '\0';
3459 			}
3460 			dataname_len++;
3461 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3462 			    "sd_process_sdconf_file: disk:%s, data:%s\n",
3463 			    vidptr, dataname_ptr);
3464 
3465 			/* Get the data list */
3466 			if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), 0,
3467 			    dataname_ptr, (caddr_t)&data_list, &data_list_len)
3468 			    != DDI_PROP_SUCCESS) {
3469 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3470 				    "sd_process_sdconf_file: data property (%s)"
3471 				    " has no value\n", dataname_ptr);
3472 				dataname_ptr = dnlist_ptr + dataname_len;
3473 				continue;
3474 			}
3475 
3476 			version = data_list[0];
3477 
3478 			if (version == SD_CONF_VERSION_1) {
3479 				sd_tunables values;
3480 
3481 				/* Set the properties */
3482 				if (sd_chk_vers1_data(un, data_list[1],
3483 				    &data_list[2], data_list_len, dataname_ptr)
3484 				    == SD_SUCCESS) {
3485 					sd_get_tunables_from_conf(un,
3486 					    data_list[1], &data_list[2],
3487 					    &values);
3488 					sd_set_vers1_properties(un,
3489 					    data_list[1], &values);
3490 					rval = SD_SUCCESS;
3491 				} else {
3492 					rval = SD_FAILURE;
3493 				}
3494 			} else {
3495 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3496 				    "data property %s version 0x%x is invalid.",
3497 				    dataname_ptr, version);
3498 				rval = SD_FAILURE;
3499 			}
3500 			kmem_free(data_list, data_list_len);
3501 			dataname_ptr = dnlist_ptr + dataname_len;
3502 		}
3503 	}
3504 
3505 	/* free up the memory allocated by ddi_getlongprop */
3506 	if (config_list) {
3507 		kmem_free(config_list, config_list_len);
3508 	}
3509 
3510 	return (rval);
3511 }
3512 
3513 /*
3514  *    Function: sd_get_tunables_from_conf()
3515  *
3516  *
3517  *    This function reads the data list from the sd.conf file and pulls
3518  *    the values that can have numeric values as arguments and places
3519  *    the values in the apropriate sd_tunables member.
3520  *    Since the order of the data list members varies across platforms
3521  *    This function reads them from the data list in a platform specific
3522  *    order and places them into the correct sd_tunable member that is
3523  *    a consistant across all platforms.
3524  */
3525 static void
3526 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3527     sd_tunables *values)
3528 {
3529 	int i;
3530 	int mask;
3531 
3532 	bzero(values, sizeof (sd_tunables));
3533 
3534 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3535 
3536 		mask = 1 << i;
3537 		if (mask > flags) {
3538 			break;
3539 		}
3540 
3541 		switch (mask & flags) {
3542 		case 0:	/* This mask bit not set in flags */
3543 			continue;
3544 		case SD_CONF_BSET_THROTTLE:
3545 			values->sdt_throttle = data_list[i];
3546 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3547 			    "sd_get_tunables_from_conf: throttle = %d\n",
3548 			    values->sdt_throttle);
3549 			break;
3550 		case SD_CONF_BSET_CTYPE:
3551 			values->sdt_ctype = data_list[i];
3552 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3553 			    "sd_get_tunables_from_conf: ctype = %d\n",
3554 			    values->sdt_ctype);
3555 			break;
3556 		case SD_CONF_BSET_NRR_COUNT:
3557 			values->sdt_not_rdy_retries = data_list[i];
3558 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3559 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
3560 			    values->sdt_not_rdy_retries);
3561 			break;
3562 		case SD_CONF_BSET_BSY_RETRY_COUNT:
3563 			values->sdt_busy_retries = data_list[i];
3564 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3565 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
3566 			    values->sdt_busy_retries);
3567 			break;
3568 		case SD_CONF_BSET_RST_RETRIES:
3569 			values->sdt_reset_retries = data_list[i];
3570 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3571 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
3572 			    values->sdt_reset_retries);
3573 			break;
3574 		case SD_CONF_BSET_RSV_REL_TIME:
3575 			values->sdt_reserv_rel_time = data_list[i];
3576 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3577 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
3578 			    values->sdt_reserv_rel_time);
3579 			break;
3580 		case SD_CONF_BSET_MIN_THROTTLE:
3581 			values->sdt_min_throttle = data_list[i];
3582 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3583 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
3584 			    values->sdt_min_throttle);
3585 			break;
3586 		case SD_CONF_BSET_DISKSORT_DISABLED:
3587 			values->sdt_disk_sort_dis = data_list[i];
3588 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3589 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
3590 			    values->sdt_disk_sort_dis);
3591 			break;
3592 		case SD_CONF_BSET_LUN_RESET_ENABLED:
3593 			values->sdt_lun_reset_enable = data_list[i];
3594 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3595 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
3596 			    "\n", values->sdt_lun_reset_enable);
3597 			break;
3598 		}
3599 	}
3600 }
3601 
3602 /*
3603  *    Function: sd_process_sdconf_table
3604  *
3605  * Description: Search the static configuration table for a match on the
3606  *		inquiry vid/pid and update the driver soft state structure
3607  *		according to the table property values for the device.
3608  *
3609  *		The form of a configuration table entry is:
3610  *		  <vid+pid>,<flags>,<property-data>
3611  *		  "SEAGATE ST42400N",1,63,0,0			(Fibre)
3612  *		  "SEAGATE ST42400N",1,63,0,0,0,0		(Sparc)
3613  *		  "SEAGATE ST42400N",1,63,0,0,0,0,0,0,0,0,0,0	(Intel)
3614  *
3615  *   Arguments: un - driver soft state (unit) structure
3616  */
3617 
3618 static void
3619 sd_process_sdconf_table(struct sd_lun *un)
3620 {
3621 	char	*id = NULL;
3622 	int	table_index;
3623 	int	idlen;
3624 
3625 	ASSERT(un != NULL);
3626 	for (table_index = 0; table_index < sd_disk_table_size;
3627 	    table_index++) {
3628 		id = sd_disk_table[table_index].device_id;
3629 		idlen = strlen(id);
3630 		if (idlen == 0) {
3631 			continue;
3632 		}
3633 
3634 		/*
3635 		 * The static configuration table currently does not
3636 		 * implement version 10 properties. Additionally,
3637 		 * multiple data-property-name entries are not
3638 		 * implemented in the static configuration table.
3639 		 */
3640 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
3641 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3642 			    "sd_process_sdconf_table: disk %s\n", id);
3643 			sd_set_vers1_properties(un,
3644 			    sd_disk_table[table_index].flags,
3645 			    sd_disk_table[table_index].properties);
3646 			break;
3647 		}
3648 	}
3649 }
3650 
3651 
3652 /*
3653  *    Function: sd_sdconf_id_match
3654  *
3655  * Description: This local function implements a case sensitive vid/pid
3656  *		comparison as well as the boundary cases of wild card and
3657  *		multiple blanks.
3658  *
3659  *		Note: An implicit assumption made here is that the scsi
3660  *		inquiry structure will always keep the vid, pid and
3661  *		revision strings in consecutive sequence, so they can be
3662  *		read as a single string. If this assumption is not the
3663  *		case, a separate string, to be used for the check, needs
3664  *		to be built with these strings concatenated.
3665  *
3666  *   Arguments: un - driver soft state (unit) structure
3667  *		id - table or config file vid/pid
3668  *		idlen  - length of the vid/pid (bytes)
3669  *
3670  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3671  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3672  */
3673 
3674 static int
3675 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
3676 {
3677 	struct scsi_inquiry	*sd_inq;
3678 	int 			rval = SD_SUCCESS;
3679 
3680 	ASSERT(un != NULL);
3681 	sd_inq = un->un_sd->sd_inq;
3682 	ASSERT(id != NULL);
3683 
3684 	/*
3685 	 * We use the inq_vid as a pointer to a buffer containing the
3686 	 * vid and pid and use the entire vid/pid length of the table
3687 	 * entry for the comparison. This works because the inq_pid
3688 	 * data member follows inq_vid in the scsi_inquiry structure.
3689 	 */
3690 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
3691 		/*
3692 		 * The user id string is compared to the inquiry vid/pid
3693 		 * using a case insensitive comparison and ignoring
3694 		 * multiple spaces.
3695 		 */
3696 		rval = sd_blank_cmp(un, id, idlen);
3697 		if (rval != SD_SUCCESS) {
3698 			/*
3699 			 * User id strings that start and end with a "*"
3700 			 * are a special case. These do not have a
3701 			 * specific vendor, and the product string can
3702 			 * appear anywhere in the 16 byte PID portion of
3703 			 * the inquiry data. This is a simple strstr()
3704 			 * type search for the user id in the inquiry data.
3705 			 */
3706 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
3707 				char	*pidptr = &id[1];
3708 				int	i;
3709 				int	j;
3710 				int	pidstrlen = idlen - 2;
3711 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
3712 				    pidstrlen;
3713 
3714 				if (j < 0) {
3715 					return (SD_FAILURE);
3716 				}
3717 				for (i = 0; i < j; i++) {
3718 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
3719 					    pidptr, pidstrlen) == 0) {
3720 						rval = SD_SUCCESS;
3721 						break;
3722 					}
3723 				}
3724 			}
3725 		}
3726 	}
3727 	return (rval);
3728 }
3729 
3730 
3731 /*
3732  *    Function: sd_blank_cmp
3733  *
3734  * Description: If the id string starts and ends with a space, treat
3735  *		multiple consecutive spaces as equivalent to a single
3736  *		space. For example, this causes a sd_disk_table entry
3737  *		of " NEC CDROM " to match a device's id string of
3738  *		"NEC       CDROM".
3739  *
3740  *		Note: The success exit condition for this routine is if
3741  *		the pointer to the table entry is '\0' and the cnt of
3742  *		the inquiry length is zero. This will happen if the inquiry
3743  *		string returned by the device is padded with spaces to be
3744  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
3745  *		SCSI spec states that the inquiry string is to be padded with
3746  *		spaces.
3747  *
3748  *   Arguments: un - driver soft state (unit) structure
3749  *		id - table or config file vid/pid
3750  *		idlen  - length of the vid/pid (bytes)
3751  *
3752  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3753  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3754  */
3755 
3756 static int
3757 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
3758 {
3759 	char		*p1;
3760 	char		*p2;
3761 	int		cnt;
3762 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
3763 	    sizeof (SD_INQUIRY(un)->inq_pid);
3764 
3765 	ASSERT(un != NULL);
3766 	p2 = un->un_sd->sd_inq->inq_vid;
3767 	ASSERT(id != NULL);
3768 	p1 = id;
3769 
3770 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
3771 		/*
3772 		 * Note: string p1 is terminated by a NUL but string p2
3773 		 * isn't.  The end of p2 is determined by cnt.
3774 		 */
3775 		for (;;) {
3776 			/* skip over any extra blanks in both strings */
3777 			while ((*p1 != '\0') && (*p1 == ' ')) {
3778 				p1++;
3779 			}
3780 			while ((cnt != 0) && (*p2 == ' ')) {
3781 				p2++;
3782 				cnt--;
3783 			}
3784 
3785 			/* compare the two strings */
3786 			if ((cnt == 0) ||
3787 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
3788 				break;
3789 			}
3790 			while ((cnt > 0) &&
3791 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
3792 				p1++;
3793 				p2++;
3794 				cnt--;
3795 			}
3796 		}
3797 	}
3798 
3799 	/* return SD_SUCCESS if both strings match */
3800 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
3801 }
3802 
3803 
3804 /*
3805  *    Function: sd_chk_vers1_data
3806  *
3807  * Description: Verify the version 1 device properties provided by the
3808  *		user via the configuration file
3809  *
3810  *   Arguments: un	     - driver soft state (unit) structure
3811  *		flags	     - integer mask indicating properties to be set
3812  *		prop_list    - integer list of property values
3813  *		list_len     - length of user provided data
3814  *
3815  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
3816  *		SD_FAILURE - Indicates the user provided data is invalid
3817  */
3818 
3819 static int
3820 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
3821     int list_len, char *dataname_ptr)
3822 {
3823 	int i;
3824 	int mask = 1;
3825 	int index = 0;
3826 
3827 	ASSERT(un != NULL);
3828 
3829 	/* Check for a NULL property name and list */
3830 	if (dataname_ptr == NULL) {
3831 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3832 		    "sd_chk_vers1_data: NULL data property name.");
3833 		return (SD_FAILURE);
3834 	}
3835 	if (prop_list == NULL) {
3836 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3837 		    "sd_chk_vers1_data: %s NULL data property list.",
3838 		    dataname_ptr);
3839 		return (SD_FAILURE);
3840 	}
3841 
3842 	/* Display a warning if undefined bits are set in the flags */
3843 	if (flags & ~SD_CONF_BIT_MASK) {
3844 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3845 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
3846 		    "Properties not set.",
3847 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
3848 		return (SD_FAILURE);
3849 	}
3850 
3851 	/*
3852 	 * Verify the length of the list by identifying the highest bit set
3853 	 * in the flags and validating that the property list has a length
3854 	 * up to the index of this bit.
3855 	 */
3856 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3857 		if (flags & mask) {
3858 			index++;
3859 		}
3860 		mask = 1 << i;
3861 	}
3862 	if ((list_len / sizeof (int)) < (index + 2)) {
3863 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3864 		    "sd_chk_vers1_data: "
3865 		    "Data property list %s size is incorrect. "
3866 		    "Properties not set.", dataname_ptr);
3867 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
3868 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
3869 		return (SD_FAILURE);
3870 	}
3871 	return (SD_SUCCESS);
3872 }
3873 
3874 
3875 /*
3876  *    Function: sd_set_vers1_properties
3877  *
3878  * Description: Set version 1 device properties based on a property list
3879  *		retrieved from the driver configuration file or static
3880  *		configuration table. Version 1 properties have the format:
3881  *
3882  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3883  *
3884  *		where the prop0 value will be used to set prop0 if bit0
3885  *		is set in the flags
3886  *
3887  *   Arguments: un	     - driver soft state (unit) structure
3888  *		flags	     - integer mask indicating properties to be set
3889  *		prop_list    - integer list of property values
3890  */
3891 
3892 static void
3893 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
3894 {
3895 	ASSERT(un != NULL);
3896 
3897 	/*
3898 	 * Set the flag to indicate cache is to be disabled. An attempt
3899 	 * to disable the cache via sd_cache_control() will be made
3900 	 * later during attach once the basic initialization is complete.
3901 	 */
3902 	if (flags & SD_CONF_BSET_NOCACHE) {
3903 		un->un_f_opt_disable_cache = TRUE;
3904 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3905 		    "sd_set_vers1_properties: caching disabled flag set\n");
3906 	}
3907 
3908 	/* CD-specific configuration parameters */
3909 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
3910 		un->un_f_cfg_playmsf_bcd = TRUE;
3911 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3912 		    "sd_set_vers1_properties: playmsf_bcd set\n");
3913 	}
3914 	if (flags & SD_CONF_BSET_READSUB_BCD) {
3915 		un->un_f_cfg_readsub_bcd = TRUE;
3916 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3917 		    "sd_set_vers1_properties: readsub_bcd set\n");
3918 	}
3919 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
3920 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
3921 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3922 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
3923 	}
3924 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
3925 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
3926 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3927 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
3928 	}
3929 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
3930 		un->un_f_cfg_no_read_header = TRUE;
3931 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3932 			    "sd_set_vers1_properties: no_read_header set\n");
3933 	}
3934 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
3935 		un->un_f_cfg_read_cd_xd4 = TRUE;
3936 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3937 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
3938 	}
3939 
3940 	/* Support for devices which do not have valid/unique serial numbers */
3941 	if (flags & SD_CONF_BSET_FAB_DEVID) {
3942 		un->un_f_opt_fab_devid = TRUE;
3943 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3944 		    "sd_set_vers1_properties: fab_devid bit set\n");
3945 	}
3946 
3947 	/* Support for user throttle configuration */
3948 	if (flags & SD_CONF_BSET_THROTTLE) {
3949 		ASSERT(prop_list != NULL);
3950 		un->un_saved_throttle = un->un_throttle =
3951 		    prop_list->sdt_throttle;
3952 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3953 		    "sd_set_vers1_properties: throttle set to %d\n",
3954 		    prop_list->sdt_throttle);
3955 	}
3956 
3957 	/* Set the per disk retry count according to the conf file or table. */
3958 	if (flags & SD_CONF_BSET_NRR_COUNT) {
3959 		ASSERT(prop_list != NULL);
3960 		if (prop_list->sdt_not_rdy_retries) {
3961 			un->un_notready_retry_count =
3962 				prop_list->sdt_not_rdy_retries;
3963 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3964 			    "sd_set_vers1_properties: not ready retry count"
3965 			    " set to %d\n", un->un_notready_retry_count);
3966 		}
3967 	}
3968 
3969 	/* The controller type is reported for generic disk driver ioctls */
3970 	if (flags & SD_CONF_BSET_CTYPE) {
3971 		ASSERT(prop_list != NULL);
3972 		switch (prop_list->sdt_ctype) {
3973 		case CTYPE_CDROM:
3974 			un->un_ctype = prop_list->sdt_ctype;
3975 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3976 			    "sd_set_vers1_properties: ctype set to "
3977 			    "CTYPE_CDROM\n");
3978 			break;
3979 		case CTYPE_CCS:
3980 			un->un_ctype = prop_list->sdt_ctype;
3981 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3982 				"sd_set_vers1_properties: ctype set to "
3983 				"CTYPE_CCS\n");
3984 			break;
3985 		case CTYPE_ROD:		/* RW optical */
3986 			un->un_ctype = prop_list->sdt_ctype;
3987 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3988 			    "sd_set_vers1_properties: ctype set to "
3989 			    "CTYPE_ROD\n");
3990 			break;
3991 		default:
3992 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3993 			    "sd_set_vers1_properties: Could not set "
3994 			    "invalid ctype value (%d)",
3995 			    prop_list->sdt_ctype);
3996 		}
3997 	}
3998 
3999 	/* Purple failover timeout */
4000 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4001 		ASSERT(prop_list != NULL);
4002 		un->un_busy_retry_count =
4003 			prop_list->sdt_busy_retries;
4004 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4005 		    "sd_set_vers1_properties: "
4006 		    "busy retry count set to %d\n",
4007 		    un->un_busy_retry_count);
4008 	}
4009 
4010 	/* Purple reset retry count */
4011 	if (flags & SD_CONF_BSET_RST_RETRIES) {
4012 		ASSERT(prop_list != NULL);
4013 		un->un_reset_retry_count =
4014 			prop_list->sdt_reset_retries;
4015 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4016 		    "sd_set_vers1_properties: "
4017 		    "reset retry count set to %d\n",
4018 		    un->un_reset_retry_count);
4019 	}
4020 
4021 	/* Purple reservation release timeout */
4022 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4023 		ASSERT(prop_list != NULL);
4024 		un->un_reserve_release_time =
4025 			prop_list->sdt_reserv_rel_time;
4026 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4027 		    "sd_set_vers1_properties: "
4028 		    "reservation release timeout set to %d\n",
4029 		    un->un_reserve_release_time);
4030 	}
4031 
4032 	/*
4033 	 * Driver flag telling the driver to verify that no commands are pending
4034 	 * for a device before issuing a Test Unit Ready. This is a workaround
4035 	 * for a firmware bug in some Seagate eliteI drives.
4036 	 */
4037 	if (flags & SD_CONF_BSET_TUR_CHECK) {
4038 		un->un_f_cfg_tur_check = TRUE;
4039 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4040 		    "sd_set_vers1_properties: tur queue check set\n");
4041 	}
4042 
4043 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4044 		un->un_min_throttle = prop_list->sdt_min_throttle;
4045 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4046 		    "sd_set_vers1_properties: min throttle set to %d\n",
4047 		    un->un_min_throttle);
4048 	}
4049 
4050 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4051 		un->un_f_disksort_disabled =
4052 		    (prop_list->sdt_disk_sort_dis != 0) ?
4053 		    TRUE : FALSE;
4054 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4055 		    "sd_set_vers1_properties: disksort disabled "
4056 		    "flag set to %d\n",
4057 		    prop_list->sdt_disk_sort_dis);
4058 	}
4059 
4060 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4061 		un->un_f_lun_reset_enabled =
4062 		    (prop_list->sdt_lun_reset_enable != 0) ?
4063 		    TRUE : FALSE;
4064 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4065 		    "sd_set_vers1_properties: lun reset enabled "
4066 		    "flag set to %d\n",
4067 		    prop_list->sdt_lun_reset_enable);
4068 	}
4069 
4070 	/*
4071 	 * Validate the throttle values.
4072 	 * If any of the numbers are invalid, set everything to defaults.
4073 	 */
4074 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4075 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4076 	    (un->un_min_throttle > un->un_throttle)) {
4077 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4078 		un->un_min_throttle = sd_min_throttle;
4079 	}
4080 }
4081 
4082 /*
4083  *   Function: sd_is_lsi()
4084  *
4085  *   Description: Check for lsi devices, step throught the static device
4086  *	table to match vid/pid.
4087  *
4088  *   Args: un - ptr to sd_lun
4089  *
4090  *   Notes:  When creating new LSI property, need to add the new LSI property
4091  *		to this function.
4092  */
4093 static void
4094 sd_is_lsi(struct sd_lun *un)
4095 {
4096 	char	*id = NULL;
4097 	int	table_index;
4098 	int	idlen;
4099 	void	*prop;
4100 
4101 	ASSERT(un != NULL);
4102 	for (table_index = 0; table_index < sd_disk_table_size;
4103 	    table_index++) {
4104 		id = sd_disk_table[table_index].device_id;
4105 		idlen = strlen(id);
4106 		if (idlen == 0) {
4107 			continue;
4108 		}
4109 
4110 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4111 			prop = sd_disk_table[table_index].properties;
4112 			if (prop == &lsi_properties ||
4113 			    prop == &lsi_oem_properties ||
4114 			    prop == &lsi_properties_scsi ||
4115 			    prop == &symbios_properties) {
4116 				un->un_f_cfg_is_lsi = TRUE;
4117 			}
4118 			break;
4119 		}
4120 	}
4121 }
4122 
4123 
4124 /*
4125  * The following routines support reading and interpretation of disk labels,
4126  * including Solaris BE (8-slice) vtoc's, Solaris LE (16-slice) vtoc's, and
4127  * fdisk tables.
4128  */
4129 
4130 /*
4131  *    Function: sd_validate_geometry
4132  *
4133  * Description: Read the label from the disk (if present). Update the unit's
4134  *		geometry and vtoc information from the data in the label.
4135  *		Verify that the label is valid.
4136  *
4137  *   Arguments: un - driver soft state (unit) structure
4138  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4139  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4140  *			to use the USCSI "direct" chain and bypass the normal
4141  *			command waitq.
4142  *
4143  * Return Code: 0 - Successful completion
4144  *		EINVAL  - Invalid value in un->un_tgt_blocksize or
4145  *			  un->un_blockcount; or label on disk is corrupted
4146  *			  or unreadable.
4147  *		EACCES  - Reservation conflict at the device.
4148  *		ENOMEM  - Resource allocation error
4149  *		ENOTSUP - geometry not applicable
4150  *
4151  *     Context: Kernel thread only (can sleep).
4152  */
4153 
4154 static int
4155 sd_validate_geometry(struct sd_lun *un, int path_flag)
4156 {
4157 	static	char		labelstring[128];
4158 	static	char		buf[256];
4159 	char	*label		= NULL;
4160 	int	label_error = 0;
4161 	int	gvalid		= un->un_f_geometry_is_valid;
4162 	int	lbasize;
4163 	uint_t	capacity;
4164 	int	count;
4165 
4166 	ASSERT(un != NULL);
4167 	ASSERT(mutex_owned(SD_MUTEX(un)));
4168 
4169 	/*
4170 	 * If the required values are not valid, then try getting them
4171 	 * once via read capacity. If that fails, then fail this call.
4172 	 * This is necessary with the new mpxio failover behavior in
4173 	 * the T300 where we can get an attach for the inactive path
4174 	 * before the active path. The inactive path fails commands with
4175 	 * sense data of 02,04,88 which happens to the read capacity
4176 	 * before mpxio has had sufficient knowledge to know if it should
4177 	 * force a fail over or not. (Which it won't do at attach anyhow).
4178 	 * If the read capacity at attach time fails, un_tgt_blocksize and
4179 	 * un_blockcount won't be valid.
4180 	 */
4181 	if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4182 	    (un->un_f_blockcount_is_valid != TRUE)) {
4183 		uint64_t	cap;
4184 		uint32_t	lbasz;
4185 		int		rval;
4186 
4187 		mutex_exit(SD_MUTEX(un));
4188 		rval = sd_send_scsi_READ_CAPACITY(un, &cap,
4189 		    &lbasz, SD_PATH_DIRECT);
4190 		mutex_enter(SD_MUTEX(un));
4191 		if (rval == 0) {
4192 			/*
4193 			 * The following relies on
4194 			 * sd_send_scsi_READ_CAPACITY never
4195 			 * returning 0 for capacity and/or lbasize.
4196 			 */
4197 			sd_update_block_info(un, lbasz, cap);
4198 		}
4199 
4200 		if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4201 		    (un->un_f_blockcount_is_valid != TRUE)) {
4202 			return (EINVAL);
4203 		}
4204 	}
4205 
4206 	/*
4207 	 * Copy the lbasize and capacity so that if they're reset while we're
4208 	 * not holding the SD_MUTEX, we will continue to use valid values
4209 	 * after the SD_MUTEX is reacquired. (4119659)
4210 	 */
4211 	lbasize  = un->un_tgt_blocksize;
4212 	capacity = un->un_blockcount;
4213 
4214 #if defined(_SUNOS_VTOC_16)
4215 	/*
4216 	 * Set up the "whole disk" fdisk partition; this should always
4217 	 * exist, regardless of whether the disk contains an fdisk table
4218 	 * or vtoc.
4219 	 */
4220 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
4221 	un->un_map[P0_RAW_DISK].dkl_nblk  = capacity;
4222 #endif
4223 
4224 	/*
4225 	 * Refresh the logical and physical geometry caches.
4226 	 * (data from MODE SENSE format/rigid disk geometry pages,
4227 	 * and scsi_ifgetcap("geometry").
4228 	 */
4229 	sd_resync_geom_caches(un, capacity, lbasize, path_flag);
4230 
4231 	label_error = sd_use_efi(un, path_flag);
4232 	if (label_error == 0) {
4233 		/* found a valid EFI label */
4234 		SD_TRACE(SD_LOG_IO_PARTITION, un,
4235 			"sd_validate_geometry: found EFI label\n");
4236 		un->un_solaris_offset = 0;
4237 		un->un_solaris_size = capacity;
4238 		return (ENOTSUP);
4239 	}
4240 	if (un->un_blockcount > DK_MAX_BLOCKS) {
4241 		if (label_error == ESRCH) {
4242 			/*
4243 			 * they've configured a LUN over 1TB, but used
4244 			 * format.dat to restrict format's view of the
4245 			 * capacity to be under 1TB
4246 			 */
4247 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4248 "is >1TB and has a VTOC label: use format(1M) to either decrease the");
4249 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
4250 "size to be < 1TB or relabel the disk with an EFI label");
4251 		} else {
4252 			/* unlabeled disk over 1TB */
4253 #if defined(__i386) || defined(__amd64)
4254 			/*
4255 			 * Refer to comments on off-by-1 at the head of the file
4256 			 * A 1TB disk was treated as (1T - 512)B in the past,
4257 			 * thus, it might have valid solaris partition. We
4258 			 * will return ENOTSUP later only if this disk has no
4259 			 * valid solaris partition.
4260 			 */
4261 			if ((un->un_tgt_blocksize != un->un_sys_blocksize) ||
4262 			    (un->un_blockcount - 1 > DK_MAX_BLOCKS) ||
4263 			    un->un_f_has_removable_media ||
4264 			    un->un_f_is_hotpluggable)
4265 #endif
4266 				return (ENOTSUP);
4267 		}
4268 	}
4269 	label_error = 0;
4270 
4271 	/*
4272 	 * at this point it is either labeled with a VTOC or it is
4273 	 * under 1TB (<= 1TB actually for off-by-1)
4274 	 */
4275 	if (un->un_f_vtoc_label_supported) {
4276 		struct	dk_label *dkl;
4277 		offset_t dkl1;
4278 		offset_t label_addr, real_addr;
4279 		int	rval;
4280 		size_t	buffer_size;
4281 
4282 		/*
4283 		 * Note: This will set up un->un_solaris_size and
4284 		 * un->un_solaris_offset.
4285 		 */
4286 		switch (sd_read_fdisk(un, capacity, lbasize, path_flag)) {
4287 		case SD_CMD_RESERVATION_CONFLICT:
4288 			ASSERT(mutex_owned(SD_MUTEX(un)));
4289 			return (EACCES);
4290 		case SD_CMD_FAILURE:
4291 			ASSERT(mutex_owned(SD_MUTEX(un)));
4292 			return (ENOMEM);
4293 		}
4294 
4295 		if (un->un_solaris_size <= DK_LABEL_LOC) {
4296 
4297 #if defined(__i386) || defined(__amd64)
4298 			/*
4299 			 * Refer to comments on off-by-1 at the head of the file
4300 			 * This is for 1TB disk only. Since that there is no
4301 			 * solaris partitions, return ENOTSUP as we do for
4302 			 * >1TB disk.
4303 			 */
4304 			if (un->un_blockcount > DK_MAX_BLOCKS)
4305 				return (ENOTSUP);
4306 #endif
4307 			/*
4308 			 * Found fdisk table but no Solaris partition entry,
4309 			 * so don't call sd_uselabel() and don't create
4310 			 * a default label.
4311 			 */
4312 			label_error = 0;
4313 			un->un_f_geometry_is_valid = TRUE;
4314 			goto no_solaris_partition;
4315 		}
4316 		label_addr = (daddr_t)(un->un_solaris_offset + DK_LABEL_LOC);
4317 
4318 #if defined(__i386) || defined(__amd64)
4319 		/*
4320 		 * Refer to comments on off-by-1 at the head of the file
4321 		 * Now, this 1TB disk has valid solaris partition. It
4322 		 * must be created by previous sd driver, we have to
4323 		 * treat it as (1T-512)B.
4324 		 */
4325 		if (un->un_blockcount > DK_MAX_BLOCKS) {
4326 			un->un_f_capacity_adjusted = 1;
4327 			un->un_blockcount = DK_MAX_BLOCKS;
4328 			un->un_map[P0_RAW_DISK].dkl_nblk  = DK_MAX_BLOCKS;
4329 
4330 			/*
4331 			 * Refer to sd_read_fdisk, when there is no
4332 			 * fdisk partition table, un_solaris_size is
4333 			 * set to disk's capacity. In this case, we
4334 			 * need to adjust it
4335 			 */
4336 			if (un->un_solaris_size > DK_MAX_BLOCKS)
4337 				un->un_solaris_size = DK_MAX_BLOCKS;
4338 			sd_resync_geom_caches(un, DK_MAX_BLOCKS,
4339 			    lbasize, path_flag);
4340 		}
4341 #endif
4342 
4343 		/*
4344 		 * sys_blocksize != tgt_blocksize, need to re-adjust
4345 		 * blkno and save the index to beginning of dk_label
4346 		 */
4347 		real_addr = SD_SYS2TGTBLOCK(un, label_addr);
4348 		buffer_size = SD_REQBYTES2TGTBYTES(un,
4349 		    sizeof (struct dk_label));
4350 
4351 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_validate_geometry: "
4352 		    "label_addr: 0x%x allocation size: 0x%x\n",
4353 		    label_addr, buffer_size);
4354 		dkl = kmem_zalloc(buffer_size, KM_NOSLEEP);
4355 		if (dkl == NULL) {
4356 			return (ENOMEM);
4357 		}
4358 
4359 		mutex_exit(SD_MUTEX(un));
4360 		rval = sd_send_scsi_READ(un, dkl, buffer_size, real_addr,
4361 		    path_flag);
4362 		mutex_enter(SD_MUTEX(un));
4363 
4364 		switch (rval) {
4365 		case 0:
4366 			/*
4367 			 * sd_uselabel will establish that the geometry
4368 			 * is valid.
4369 			 * For sys_blocksize != tgt_blocksize, need
4370 			 * to index into the beginning of dk_label
4371 			 */
4372 			dkl1 = (daddr_t)dkl
4373 				+ SD_TGTBYTEOFFSET(un, label_addr, real_addr);
4374 			if (sd_uselabel(un, (struct dk_label *)(uintptr_t)dkl1,
4375 			    path_flag) != SD_LABEL_IS_VALID) {
4376 				label_error = EINVAL;
4377 			}
4378 			break;
4379 		case EACCES:
4380 			label_error = EACCES;
4381 			break;
4382 		default:
4383 			label_error = EINVAL;
4384 			break;
4385 		}
4386 
4387 		kmem_free(dkl, buffer_size);
4388 
4389 #if defined(_SUNOS_VTOC_8)
4390 		label = (char *)un->un_asciilabel;
4391 #elif defined(_SUNOS_VTOC_16)
4392 		label = (char *)un->un_vtoc.v_asciilabel;
4393 #else
4394 #error "No VTOC format defined."
4395 #endif
4396 	}
4397 
4398 	/*
4399 	 * If a valid label was not found, AND if no reservation conflict
4400 	 * was detected, then go ahead and create a default label (4069506).
4401 	 */
4402 	if (un->un_f_default_vtoc_supported && (label_error != EACCES)) {
4403 		if (un->un_f_geometry_is_valid == FALSE) {
4404 			sd_build_default_label(un);
4405 		}
4406 		label_error = 0;
4407 	}
4408 
4409 no_solaris_partition:
4410 	if ((!un->un_f_has_removable_media ||
4411 	    (un->un_f_has_removable_media &&
4412 		un->un_mediastate == DKIO_EJECTED)) &&
4413 		(un->un_state == SD_STATE_NORMAL && !gvalid)) {
4414 		/*
4415 		 * Print out a message indicating who and what we are.
4416 		 * We do this only when we happen to really validate the
4417 		 * geometry. We may call sd_validate_geometry() at other
4418 		 * times, e.g., ioctl()'s like Get VTOC in which case we
4419 		 * don't want to print the label.
4420 		 * If the geometry is valid, print the label string,
4421 		 * else print vendor and product info, if available
4422 		 */
4423 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
4424 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "?<%s>\n", label);
4425 		} else {
4426 			mutex_enter(&sd_label_mutex);
4427 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
4428 			    labelstring);
4429 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
4430 			    &labelstring[64]);
4431 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
4432 			    labelstring, &labelstring[64]);
4433 			if (un->un_f_blockcount_is_valid == TRUE) {
4434 				(void) sprintf(&buf[strlen(buf)],
4435 				    ", %llu %u byte blocks\n",
4436 				    (longlong_t)un->un_blockcount,
4437 				    un->un_tgt_blocksize);
4438 			} else {
4439 				(void) sprintf(&buf[strlen(buf)],
4440 				    ", (unknown capacity)\n");
4441 			}
4442 			SD_INFO(SD_LOG_ATTACH_DETACH, un, buf);
4443 			mutex_exit(&sd_label_mutex);
4444 		}
4445 	}
4446 
4447 #if defined(_SUNOS_VTOC_16)
4448 	/*
4449 	 * If we have valid geometry, set up the remaining fdisk partitions.
4450 	 * Note that dkl_cylno is not used for the fdisk map entries, so
4451 	 * we set it to an entirely bogus value.
4452 	 */
4453 	for (count = 0; count < FD_NUMPART; count++) {
4454 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
4455 		un->un_map[FDISK_P1 + count].dkl_nblk =
4456 		    un->un_fmap[count].fmap_nblk;
4457 
4458 		un->un_offset[FDISK_P1 + count] =
4459 		    un->un_fmap[count].fmap_start;
4460 	}
4461 #endif
4462 
4463 	for (count = 0; count < NDKMAP; count++) {
4464 #if defined(_SUNOS_VTOC_8)
4465 		struct dk_map *lp  = &un->un_map[count];
4466 		un->un_offset[count] =
4467 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
4468 #elif defined(_SUNOS_VTOC_16)
4469 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
4470 
4471 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
4472 #else
4473 #error "No VTOC format defined."
4474 #endif
4475 	}
4476 
4477 	return (label_error);
4478 }
4479 
4480 
4481 #if defined(_SUNOS_VTOC_16)
4482 /*
4483  * Macro: MAX_BLKS
4484  *
4485  *	This macro is used for table entries where we need to have the largest
4486  *	possible sector value for that head & SPT (sectors per track)
4487  *	combination.  Other entries for some smaller disk sizes are set by
4488  *	convention to match those used by X86 BIOS usage.
4489  */
4490 #define	MAX_BLKS(heads, spt)	UINT16_MAX * heads * spt, heads, spt
4491 
4492 /*
4493  *    Function: sd_convert_geometry
4494  *
4495  * Description: Convert physical geometry into a dk_geom structure. In
4496  *		other words, make sure we don't wrap 16-bit values.
4497  *		e.g. converting from geom_cache to dk_geom
4498  *
4499  *     Context: Kernel thread only
4500  */
4501 static void
4502 sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g)
4503 {
4504 	int i;
4505 	static const struct chs_values {
4506 		uint_t max_cap;		/* Max Capacity for this HS. */
4507 		uint_t nhead;		/* Heads to use. */
4508 		uint_t nsect;		/* SPT to use. */
4509 	} CHS_values[] = {
4510 		{0x00200000,  64, 32},		/* 1GB or smaller disk. */
4511 		{0x01000000, 128, 32},		/* 8GB or smaller disk. */
4512 		{MAX_BLKS(255,  63)},		/* 502.02GB or smaller disk. */
4513 		{MAX_BLKS(255, 126)},		/* .98TB or smaller disk. */
4514 		{DK_MAX_BLOCKS, 255, 189}	/* Max size is just under 1TB */
4515 	};
4516 
4517 	/* Unlabeled SCSI floppy device */
4518 	if (capacity <= 0x1000) {
4519 		un_g->dkg_nhead = 2;
4520 		un_g->dkg_ncyl = 80;
4521 		un_g->dkg_nsect = capacity / (un_g->dkg_nhead * un_g->dkg_ncyl);
4522 		return;
4523 	}
4524 
4525 	/*
4526 	 * For all devices we calculate cylinders using the
4527 	 * heads and sectors we assign based on capacity of the
4528 	 * device.  The table is designed to be compatible with the
4529 	 * way other operating systems lay out fdisk tables for X86
4530 	 * and to insure that the cylinders never exceed 65535 to
4531 	 * prevent problems with X86 ioctls that report geometry.
4532 	 * We use SPT that are multiples of 63, since other OSes that
4533 	 * are not limited to 16-bits for cylinders stop at 63 SPT
4534 	 * we make do by using multiples of 63 SPT.
4535 	 *
4536 	 * Note than capacities greater than or equal to 1TB will simply
4537 	 * get the largest geometry from the table. This should be okay
4538 	 * since disks this large shouldn't be using CHS values anyway.
4539 	 */
4540 	for (i = 0; CHS_values[i].max_cap < capacity &&
4541 	    CHS_values[i].max_cap != DK_MAX_BLOCKS; i++)
4542 		;
4543 
4544 	un_g->dkg_nhead = CHS_values[i].nhead;
4545 	un_g->dkg_nsect = CHS_values[i].nsect;
4546 }
4547 #endif
4548 
4549 
4550 /*
4551  *    Function: sd_resync_geom_caches
4552  *
4553  * Description: (Re)initialize both geometry caches: the virtual geometry
4554  *		information is extracted from the HBA (the "geometry"
4555  *		capability), and the physical geometry cache data is
4556  *		generated by issuing MODE SENSE commands.
4557  *
4558  *   Arguments: un - driver soft state (unit) structure
4559  *		capacity - disk capacity in #blocks
4560  *		lbasize - disk block size in bytes
4561  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4562  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4563  *			to use the USCSI "direct" chain and bypass the normal
4564  *			command waitq.
4565  *
4566  *     Context: Kernel thread only (can sleep).
4567  */
4568 
4569 static void
4570 sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
4571 	int path_flag)
4572 {
4573 	struct 	geom_cache 	pgeom;
4574 	struct 	geom_cache	*pgeom_p = &pgeom;
4575 	int 	spc;
4576 	unsigned short nhead;
4577 	unsigned short nsect;
4578 
4579 	ASSERT(un != NULL);
4580 	ASSERT(mutex_owned(SD_MUTEX(un)));
4581 
4582 	/*
4583 	 * Ask the controller for its logical geometry.
4584 	 * Note: if the HBA does not support scsi_ifgetcap("geometry"),
4585 	 * then the lgeom cache will be invalid.
4586 	 */
4587 	sd_get_virtual_geometry(un, capacity, lbasize);
4588 
4589 	/*
4590 	 * Initialize the pgeom cache from lgeom, so that if MODE SENSE
4591 	 * doesn't work, DKIOCG_PHYSGEOM can return reasonable values.
4592 	 */
4593 	if (un->un_lgeom.g_nsect == 0 || un->un_lgeom.g_nhead == 0) {
4594 		/*
4595 		 * Note: Perhaps this needs to be more adaptive? The rationale
4596 		 * is that, if there's no HBA geometry from the HBA driver, any
4597 		 * guess is good, since this is the physical geometry. If MODE
4598 		 * SENSE fails this gives a max cylinder size for non-LBA access
4599 		 */
4600 		nhead = 255;
4601 		nsect = 63;
4602 	} else {
4603 		nhead = un->un_lgeom.g_nhead;
4604 		nsect = un->un_lgeom.g_nsect;
4605 	}
4606 
4607 	if (ISCD(un)) {
4608 		pgeom_p->g_nhead = 1;
4609 		pgeom_p->g_nsect = nsect * nhead;
4610 	} else {
4611 		pgeom_p->g_nhead = nhead;
4612 		pgeom_p->g_nsect = nsect;
4613 	}
4614 
4615 	spc = pgeom_p->g_nhead * pgeom_p->g_nsect;
4616 	pgeom_p->g_capacity = capacity;
4617 	pgeom_p->g_ncyl = pgeom_p->g_capacity / spc;
4618 	pgeom_p->g_acyl = 0;
4619 
4620 	/*
4621 	 * Retrieve fresh geometry data from the hardware, stash it
4622 	 * here temporarily before we rebuild the incore label.
4623 	 *
4624 	 * We want to use the MODE SENSE commands to derive the
4625 	 * physical geometry of the device, but if either command
4626 	 * fails, the logical geometry is used as the fallback for
4627 	 * disk label geometry.
4628 	 */
4629 	mutex_exit(SD_MUTEX(un));
4630 	sd_get_physical_geometry(un, pgeom_p, capacity, lbasize, path_flag);
4631 	mutex_enter(SD_MUTEX(un));
4632 
4633 	/*
4634 	 * Now update the real copy while holding the mutex. This
4635 	 * way the global copy is never in an inconsistent state.
4636 	 */
4637 	bcopy(pgeom_p, &un->un_pgeom,  sizeof (un->un_pgeom));
4638 
4639 	SD_INFO(SD_LOG_COMMON, un, "sd_resync_geom_caches: "
4640 	    "(cached from lgeom)\n");
4641 	SD_INFO(SD_LOG_COMMON, un,
4642 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4643 	    un->un_pgeom.g_ncyl, un->un_pgeom.g_acyl,
4644 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
4645 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
4646 	    "intrlv: %d; rpm: %d\n", un->un_pgeom.g_secsize,
4647 	    un->un_pgeom.g_capacity, un->un_pgeom.g_intrlv,
4648 	    un->un_pgeom.g_rpm);
4649 }
4650 
4651 
4652 /*
4653  *    Function: sd_read_fdisk
4654  *
4655  * Description: utility routine to read the fdisk table.
4656  *
4657  *   Arguments: un - driver soft state (unit) structure
4658  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4659  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4660  *			to use the USCSI "direct" chain and bypass the normal
4661  *			command waitq.
4662  *
4663  * Return Code: SD_CMD_SUCCESS
4664  *		SD_CMD_FAILURE
4665  *
4666  *     Context: Kernel thread only (can sleep).
4667  */
4668 /* ARGSUSED */
4669 static int
4670 sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize, int path_flag)
4671 {
4672 #if defined(_NO_FDISK_PRESENT)
4673 
4674 	un->un_solaris_offset = 0;
4675 	un->un_solaris_size = capacity;
4676 	bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4677 	return (SD_CMD_SUCCESS);
4678 
4679 #elif defined(_FIRMWARE_NEEDS_FDISK)
4680 
4681 	struct ipart	*fdp;
4682 	struct mboot	*mbp;
4683 	struct ipart	fdisk[FD_NUMPART];
4684 	int		i;
4685 	char		sigbuf[2];
4686 	caddr_t		bufp;
4687 	int		uidx;
4688 	int		rval;
4689 	int		lba = 0;
4690 	uint_t		solaris_offset;	/* offset to solaris part. */
4691 	daddr_t		solaris_size;	/* size of solaris partition */
4692 	uint32_t	blocksize;
4693 
4694 	ASSERT(un != NULL);
4695 	ASSERT(mutex_owned(SD_MUTEX(un)));
4696 	ASSERT(un->un_f_tgt_blocksize_is_valid == TRUE);
4697 
4698 	blocksize = un->un_tgt_blocksize;
4699 
4700 	/*
4701 	 * Start off assuming no fdisk table
4702 	 */
4703 	solaris_offset = 0;
4704 	solaris_size   = capacity;
4705 
4706 	mutex_exit(SD_MUTEX(un));
4707 	bufp = kmem_zalloc(blocksize, KM_SLEEP);
4708 	rval = sd_send_scsi_READ(un, bufp, blocksize, 0, path_flag);
4709 	mutex_enter(SD_MUTEX(un));
4710 
4711 	if (rval != 0) {
4712 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4713 		    "sd_read_fdisk: fdisk read err\n");
4714 		kmem_free(bufp, blocksize);
4715 		return (SD_CMD_FAILURE);
4716 	}
4717 
4718 	mbp = (struct mboot *)bufp;
4719 
4720 	/*
4721 	 * The fdisk table does not begin on a 4-byte boundary within the
4722 	 * master boot record, so we copy it to an aligned structure to avoid
4723 	 * alignment exceptions on some processors.
4724 	 */
4725 	bcopy(&mbp->parts[0], fdisk, sizeof (fdisk));
4726 
4727 	/*
4728 	 * Check for lba support before verifying sig; sig might not be
4729 	 * there, say on a blank disk, but the max_chs mark may still
4730 	 * be present.
4731 	 *
4732 	 * Note: LBA support and BEFs are an x86-only concept but this
4733 	 * code should work OK on SPARC as well.
4734 	 */
4735 
4736 	/*
4737 	 * First, check for lba-access-ok on root node (or prom root node)
4738 	 * if present there, don't need to search fdisk table.
4739 	 */
4740 	if (ddi_getprop(DDI_DEV_T_ANY, ddi_root_node(), 0,
4741 	    "lba-access-ok", 0) != 0) {
4742 		/* All drives do LBA; don't search fdisk table */
4743 		lba = 1;
4744 	} else {
4745 		/* Okay, look for mark in fdisk table */
4746 		for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4747 			/* accumulate "lba" value from all partitions */
4748 			lba = (lba || sd_has_max_chs_vals(fdp));
4749 		}
4750 	}
4751 
4752 	if (lba != 0) {
4753 		dev_t dev = sd_make_device(SD_DEVINFO(un));
4754 
4755 		if (ddi_getprop(dev, SD_DEVINFO(un), DDI_PROP_DONTPASS,
4756 		    "lba-access-ok", 0) == 0) {
4757 			/* not found; create it */
4758 			if (ddi_prop_create(dev, SD_DEVINFO(un), 0,
4759 			    "lba-access-ok", (caddr_t)NULL, 0) !=
4760 			    DDI_PROP_SUCCESS) {
4761 				SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4762 				    "sd_read_fdisk: Can't create lba property "
4763 				    "for instance %d\n",
4764 				    ddi_get_instance(SD_DEVINFO(un)));
4765 			}
4766 		}
4767 	}
4768 
4769 	bcopy(&mbp->signature, sigbuf, sizeof (sigbuf));
4770 
4771 	/*
4772 	 * Endian-independent signature check
4773 	 */
4774 	if (((sigbuf[1] & 0xFF) != ((MBB_MAGIC >> 8) & 0xFF)) ||
4775 	    (sigbuf[0] != (MBB_MAGIC & 0xFF))) {
4776 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4777 		    "sd_read_fdisk: no fdisk\n");
4778 		bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4779 		rval = SD_CMD_SUCCESS;
4780 		goto done;
4781 	}
4782 
4783 #ifdef SDDEBUG
4784 	if (sd_level_mask & SD_LOGMASK_INFO) {
4785 		fdp = fdisk;
4786 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_read_fdisk:\n");
4787 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "         relsect    "
4788 		    "numsect         sysid       bootid\n");
4789 		for (i = 0; i < FD_NUMPART; i++, fdp++) {
4790 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4791 			    "    %d:  %8d   %8d     0x%08x     0x%08x\n",
4792 			    i, fdp->relsect, fdp->numsect,
4793 			    fdp->systid, fdp->bootid);
4794 		}
4795 	}
4796 #endif
4797 
4798 	/*
4799 	 * Try to find the unix partition
4800 	 */
4801 	uidx = -1;
4802 	solaris_offset = 0;
4803 	solaris_size   = 0;
4804 
4805 	for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4806 		int	relsect;
4807 		int	numsect;
4808 
4809 		if (fdp->numsect == 0) {
4810 			un->un_fmap[i].fmap_start = 0;
4811 			un->un_fmap[i].fmap_nblk  = 0;
4812 			continue;
4813 		}
4814 
4815 		/*
4816 		 * Data in the fdisk table is little-endian.
4817 		 */
4818 		relsect = LE_32(fdp->relsect);
4819 		numsect = LE_32(fdp->numsect);
4820 
4821 		un->un_fmap[i].fmap_start = relsect;
4822 		un->un_fmap[i].fmap_nblk  = numsect;
4823 
4824 		if (fdp->systid != SUNIXOS &&
4825 		    fdp->systid != SUNIXOS2 &&
4826 		    fdp->systid != EFI_PMBR) {
4827 			continue;
4828 		}
4829 
4830 		/*
4831 		 * use the last active solaris partition id found
4832 		 * (there should only be 1 active partition id)
4833 		 *
4834 		 * if there are no active solaris partition id
4835 		 * then use the first inactive solaris partition id
4836 		 */
4837 		if ((uidx == -1) || (fdp->bootid == ACTIVE)) {
4838 			uidx = i;
4839 			solaris_offset = relsect;
4840 			solaris_size   = numsect;
4841 		}
4842 	}
4843 
4844 	SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk 0x%x 0x%lx",
4845 	    un->un_solaris_offset, un->un_solaris_size);
4846 
4847 	rval = SD_CMD_SUCCESS;
4848 
4849 done:
4850 
4851 	/*
4852 	 * Clear the VTOC info, only if the Solaris partition entry
4853 	 * has moved, changed size, been deleted, or if the size of
4854 	 * the partition is too small to even fit the label sector.
4855 	 */
4856 	if ((un->un_solaris_offset != solaris_offset) ||
4857 	    (un->un_solaris_size != solaris_size) ||
4858 	    solaris_size <= DK_LABEL_LOC) {
4859 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk moved 0x%x 0x%lx",
4860 			solaris_offset, solaris_size);
4861 		bzero(&un->un_g, sizeof (struct dk_geom));
4862 		bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
4863 		bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
4864 		un->un_f_geometry_is_valid = FALSE;
4865 	}
4866 	un->un_solaris_offset = solaris_offset;
4867 	un->un_solaris_size = solaris_size;
4868 	kmem_free(bufp, blocksize);
4869 	return (rval);
4870 
4871 #else	/* #elif defined(_FIRMWARE_NEEDS_FDISK) */
4872 #error "fdisk table presence undetermined for this platform."
4873 #endif	/* #if defined(_NO_FDISK_PRESENT) */
4874 }
4875 
4876 
4877 /*
4878  *    Function: sd_get_physical_geometry
4879  *
4880  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4881  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4882  *		target, and use this information to initialize the physical
4883  *		geometry cache specified by pgeom_p.
4884  *
4885  *		MODE SENSE is an optional command, so failure in this case
4886  *		does not necessarily denote an error. We want to use the
4887  *		MODE SENSE commands to derive the physical geometry of the
4888  *		device, but if either command fails, the logical geometry is
4889  *		used as the fallback for disk label geometry.
4890  *
4891  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4892  *		have already been initialized for the current target and
4893  *		that the current values be passed as args so that we don't
4894  *		end up ever trying to use -1 as a valid value. This could
4895  *		happen if either value is reset while we're not holding
4896  *		the mutex.
4897  *
4898  *   Arguments: un - driver soft state (unit) structure
4899  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4900  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4901  *			to use the USCSI "direct" chain and bypass the normal
4902  *			command waitq.
4903  *
4904  *     Context: Kernel thread only (can sleep).
4905  */
4906 
4907 static void
4908 sd_get_physical_geometry(struct sd_lun *un, struct geom_cache *pgeom_p,
4909 	int capacity, int lbasize, int path_flag)
4910 {
4911 	struct	mode_format	*page3p;
4912 	struct	mode_geometry	*page4p;
4913 	struct	mode_header	*headerp;
4914 	int	sector_size;
4915 	int	nsect;
4916 	int	nhead;
4917 	int	ncyl;
4918 	int	intrlv;
4919 	int	spc;
4920 	int	modesense_capacity;
4921 	int	rpm;
4922 	int	bd_len;
4923 	int	mode_header_length;
4924 	uchar_t	*p3bufp;
4925 	uchar_t	*p4bufp;
4926 	int	cdbsize;
4927 
4928 	ASSERT(un != NULL);
4929 	ASSERT(!(mutex_owned(SD_MUTEX(un))));
4930 
4931 	if (un->un_f_blockcount_is_valid != TRUE) {
4932 		return;
4933 	}
4934 
4935 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
4936 		return;
4937 	}
4938 
4939 	if (lbasize == 0) {
4940 		if (ISCD(un)) {
4941 			lbasize = 2048;
4942 		} else {
4943 			lbasize = un->un_sys_blocksize;
4944 		}
4945 	}
4946 	pgeom_p->g_secsize = (unsigned short)lbasize;
4947 
4948 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4949 
4950 	/*
4951 	 * Retrieve MODE SENSE page 3 - Format Device Page
4952 	 */
4953 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4954 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p3bufp,
4955 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag)
4956 	    != 0) {
4957 		SD_ERROR(SD_LOG_COMMON, un,
4958 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4959 		goto page3_exit;
4960 	}
4961 
4962 	/*
4963 	 * Determine size of Block Descriptors in order to locate the mode
4964 	 * page data.  ATAPI devices return 0, SCSI devices should return
4965 	 * MODE_BLK_DESC_LENGTH.
4966 	 */
4967 	headerp = (struct mode_header *)p3bufp;
4968 	if (un->un_f_cfg_is_atapi == TRUE) {
4969 		struct mode_header_grp2 *mhp =
4970 		    (struct mode_header_grp2 *)headerp;
4971 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4972 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4973 	} else {
4974 		mode_header_length = MODE_HEADER_LENGTH;
4975 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4976 	}
4977 
4978 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4979 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4980 		    "received unexpected bd_len of %d, page3\n", bd_len);
4981 		goto page3_exit;
4982 	}
4983 
4984 	page3p = (struct mode_format *)
4985 	    ((caddr_t)headerp + mode_header_length + bd_len);
4986 
4987 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4988 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4989 		    "mode sense pg3 code mismatch %d\n",
4990 		    page3p->mode_page.code);
4991 		goto page3_exit;
4992 	}
4993 
4994 	/*
4995 	 * Use this physical geometry data only if BOTH MODE SENSE commands
4996 	 * complete successfully; otherwise, revert to the logical geometry.
4997 	 * So, we need to save everything in temporary variables.
4998 	 */
4999 	sector_size = BE_16(page3p->data_bytes_sect);
5000 
5001 	/*
5002 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
5003 	 */
5004 	if (sector_size == 0) {
5005 		sector_size = (ISCD(un)) ? 2048 : un->un_sys_blocksize;
5006 	} else {
5007 		sector_size &= ~(un->un_sys_blocksize - 1);
5008 	}
5009 
5010 	nsect  = BE_16(page3p->sect_track);
5011 	intrlv = BE_16(page3p->interleave);
5012 
5013 	SD_INFO(SD_LOG_COMMON, un,
5014 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
5015 	SD_INFO(SD_LOG_COMMON, un,
5016 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
5017 	    page3p->mode_page.code, nsect, sector_size);
5018 	SD_INFO(SD_LOG_COMMON, un,
5019 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
5020 	    BE_16(page3p->track_skew),
5021 	    BE_16(page3p->cylinder_skew));
5022 
5023 
5024 	/*
5025 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
5026 	 */
5027 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
5028 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p4bufp,
5029 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag)
5030 	    != 0) {
5031 		SD_ERROR(SD_LOG_COMMON, un,
5032 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
5033 		goto page4_exit;
5034 	}
5035 
5036 	/*
5037 	 * Determine size of Block Descriptors in order to locate the mode
5038 	 * page data.  ATAPI devices return 0, SCSI devices should return
5039 	 * MODE_BLK_DESC_LENGTH.
5040 	 */
5041 	headerp = (struct mode_header *)p4bufp;
5042 	if (un->un_f_cfg_is_atapi == TRUE) {
5043 		struct mode_header_grp2 *mhp =
5044 		    (struct mode_header_grp2 *)headerp;
5045 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
5046 	} else {
5047 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
5048 	}
5049 
5050 	if (bd_len > MODE_BLK_DESC_LENGTH) {
5051 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
5052 		    "received unexpected bd_len of %d, page4\n", bd_len);
5053 		goto page4_exit;
5054 	}
5055 
5056 	page4p = (struct mode_geometry *)
5057 	    ((caddr_t)headerp + mode_header_length + bd_len);
5058 
5059 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
5060 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
5061 		    "mode sense pg4 code mismatch %d\n",
5062 		    page4p->mode_page.code);
5063 		goto page4_exit;
5064 	}
5065 
5066 	/*
5067 	 * Stash the data now, after we know that both commands completed.
5068 	 */
5069 
5070 	mutex_enter(SD_MUTEX(un));
5071 
5072 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
5073 	spc   = nhead * nsect;
5074 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
5075 	rpm   = BE_16(page4p->rpm);
5076 
5077 	modesense_capacity = spc * ncyl;
5078 
5079 	SD_INFO(SD_LOG_COMMON, un,
5080 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
5081 	SD_INFO(SD_LOG_COMMON, un,
5082 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
5083 	SD_INFO(SD_LOG_COMMON, un,
5084 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
5085 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
5086 	    (void *)pgeom_p, capacity);
5087 
5088 	/*
5089 	 * Compensate if the drive's geometry is not rectangular, i.e.,
5090 	 * the product of C * H * S returned by MODE SENSE >= that returned
5091 	 * by read capacity. This is an idiosyncrasy of the original x86
5092 	 * disk subsystem.
5093 	 */
5094 	if (modesense_capacity >= capacity) {
5095 		SD_INFO(SD_LOG_COMMON, un,
5096 		    "sd_get_physical_geometry: adjusting acyl; "
5097 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
5098 		    (modesense_capacity - capacity + spc - 1) / spc);
5099 		if (sector_size != 0) {
5100 			/* 1243403: NEC D38x7 drives don't support sec size */
5101 			pgeom_p->g_secsize = (unsigned short)sector_size;
5102 		}
5103 		pgeom_p->g_nsect    = (unsigned short)nsect;
5104 		pgeom_p->g_nhead    = (unsigned short)nhead;
5105 		pgeom_p->g_capacity = capacity;
5106 		pgeom_p->g_acyl	    =
5107 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
5108 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
5109 	}
5110 
5111 	pgeom_p->g_rpm    = (unsigned short)rpm;
5112 	pgeom_p->g_intrlv = (unsigned short)intrlv;
5113 
5114 	SD_INFO(SD_LOG_COMMON, un,
5115 	    "sd_get_physical_geometry: mode sense geometry:\n");
5116 	SD_INFO(SD_LOG_COMMON, un,
5117 	    "   nsect: %d; sector size: %d; interlv: %d\n",
5118 	    nsect, sector_size, intrlv);
5119 	SD_INFO(SD_LOG_COMMON, un,
5120 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
5121 	    nhead, ncyl, rpm, modesense_capacity);
5122 	SD_INFO(SD_LOG_COMMON, un,
5123 	    "sd_get_physical_geometry: (cached)\n");
5124 	SD_INFO(SD_LOG_COMMON, un,
5125 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5126 	    un->un_pgeom.g_ncyl,  un->un_pgeom.g_acyl,
5127 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
5128 	SD_INFO(SD_LOG_COMMON, un,
5129 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
5130 	    un->un_pgeom.g_secsize, un->un_pgeom.g_capacity,
5131 	    un->un_pgeom.g_intrlv, un->un_pgeom.g_rpm);
5132 
5133 	mutex_exit(SD_MUTEX(un));
5134 
5135 page4_exit:
5136 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
5137 page3_exit:
5138 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
5139 }
5140 
5141 
5142 /*
5143  *    Function: sd_get_virtual_geometry
5144  *
5145  * Description: Ask the controller to tell us about the target device.
5146  *
5147  *   Arguments: un - pointer to softstate
5148  *		capacity - disk capacity in #blocks
5149  *		lbasize - disk block size in bytes
5150  *
5151  *     Context: Kernel thread only
5152  */
5153 
5154 static void
5155 sd_get_virtual_geometry(struct sd_lun *un, int capacity, int lbasize)
5156 {
5157 	struct	geom_cache 	*lgeom_p = &un->un_lgeom;
5158 	uint_t	geombuf;
5159 	int	spc;
5160 
5161 	ASSERT(un != NULL);
5162 	ASSERT(mutex_owned(SD_MUTEX(un)));
5163 
5164 	mutex_exit(SD_MUTEX(un));
5165 
5166 	/* Set sector size, and total number of sectors */
5167 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
5168 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
5169 
5170 	/* Let the HBA tell us its geometry */
5171 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
5172 
5173 	mutex_enter(SD_MUTEX(un));
5174 
5175 	/* A value of -1 indicates an undefined "geometry" property */
5176 	if (geombuf == (-1)) {
5177 		return;
5178 	}
5179 
5180 	/* Initialize the logical geometry cache. */
5181 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
5182 	lgeom_p->g_nsect   = geombuf & 0xffff;
5183 	lgeom_p->g_secsize = un->un_sys_blocksize;
5184 
5185 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
5186 
5187 	/*
5188 	 * Note: The driver originally converted the capacity value from
5189 	 * target blocks to system blocks. However, the capacity value passed
5190 	 * to this routine is already in terms of system blocks (this scaling
5191 	 * is done when the READ CAPACITY command is issued and processed).
5192 	 * This 'error' may have gone undetected because the usage of g_ncyl
5193 	 * (which is based upon g_capacity) is very limited within the driver
5194 	 */
5195 	lgeom_p->g_capacity = capacity;
5196 
5197 	/*
5198 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
5199 	 * hba may return zero values if the device has been removed.
5200 	 */
5201 	if (spc == 0) {
5202 		lgeom_p->g_ncyl = 0;
5203 	} else {
5204 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
5205 	}
5206 	lgeom_p->g_acyl = 0;
5207 
5208 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
5209 	SD_INFO(SD_LOG_COMMON, un,
5210 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5211 	    un->un_lgeom.g_ncyl,  un->un_lgeom.g_acyl,
5212 	    un->un_lgeom.g_nhead, un->un_lgeom.g_nsect);
5213 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
5214 	    "intrlv: %d; rpm: %d\n", un->un_lgeom.g_secsize,
5215 	    un->un_lgeom.g_capacity, un->un_lgeom.g_intrlv, un->un_lgeom.g_rpm);
5216 }
5217 
5218 
5219 /*
5220  *    Function: sd_update_block_info
5221  *
5222  * Description: Calculate a byte count to sector count bitshift value
5223  *		from sector size.
5224  *
5225  *   Arguments: un: unit struct.
5226  *		lbasize: new target sector size
5227  *		capacity: new target capacity, ie. block count
5228  *
5229  *     Context: Kernel thread context
5230  */
5231 
5232 static void
5233 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
5234 {
5235 	if (lbasize != 0) {
5236 		un->un_tgt_blocksize = lbasize;
5237 		un->un_f_tgt_blocksize_is_valid	= TRUE;
5238 	}
5239 
5240 	if (capacity != 0) {
5241 		un->un_blockcount		= capacity;
5242 		un->un_f_blockcount_is_valid	= TRUE;
5243 	}
5244 }
5245 
5246 
5247 static void
5248 sd_swap_efi_gpt(efi_gpt_t *e)
5249 {
5250 	_NOTE(ASSUMING_PROTECTED(*e))
5251 	e->efi_gpt_Signature = LE_64(e->efi_gpt_Signature);
5252 	e->efi_gpt_Revision = LE_32(e->efi_gpt_Revision);
5253 	e->efi_gpt_HeaderSize = LE_32(e->efi_gpt_HeaderSize);
5254 	e->efi_gpt_HeaderCRC32 = LE_32(e->efi_gpt_HeaderCRC32);
5255 	e->efi_gpt_MyLBA = LE_64(e->efi_gpt_MyLBA);
5256 	e->efi_gpt_AlternateLBA = LE_64(e->efi_gpt_AlternateLBA);
5257 	e->efi_gpt_FirstUsableLBA = LE_64(e->efi_gpt_FirstUsableLBA);
5258 	e->efi_gpt_LastUsableLBA = LE_64(e->efi_gpt_LastUsableLBA);
5259 	UUID_LE_CONVERT(e->efi_gpt_DiskGUID, e->efi_gpt_DiskGUID);
5260 	e->efi_gpt_PartitionEntryLBA = LE_64(e->efi_gpt_PartitionEntryLBA);
5261 	e->efi_gpt_NumberOfPartitionEntries =
5262 	    LE_32(e->efi_gpt_NumberOfPartitionEntries);
5263 	e->efi_gpt_SizeOfPartitionEntry =
5264 	    LE_32(e->efi_gpt_SizeOfPartitionEntry);
5265 	e->efi_gpt_PartitionEntryArrayCRC32 =
5266 	    LE_32(e->efi_gpt_PartitionEntryArrayCRC32);
5267 }
5268 
5269 static void
5270 sd_swap_efi_gpe(int nparts, efi_gpe_t *p)
5271 {
5272 	int i;
5273 
5274 	_NOTE(ASSUMING_PROTECTED(*p))
5275 	for (i = 0; i < nparts; i++) {
5276 		UUID_LE_CONVERT(p[i].efi_gpe_PartitionTypeGUID,
5277 		    p[i].efi_gpe_PartitionTypeGUID);
5278 		p[i].efi_gpe_StartingLBA = LE_64(p[i].efi_gpe_StartingLBA);
5279 		p[i].efi_gpe_EndingLBA = LE_64(p[i].efi_gpe_EndingLBA);
5280 		/* PartitionAttrs */
5281 	}
5282 }
5283 
5284 static int
5285 sd_validate_efi(efi_gpt_t *labp)
5286 {
5287 	if (labp->efi_gpt_Signature != EFI_SIGNATURE)
5288 		return (EINVAL);
5289 	/* at least 96 bytes in this version of the spec. */
5290 	if (sizeof (efi_gpt_t) - sizeof (labp->efi_gpt_Reserved2) >
5291 	    labp->efi_gpt_HeaderSize)
5292 		return (EINVAL);
5293 	/* this should be 128 bytes */
5294 	if (labp->efi_gpt_SizeOfPartitionEntry != sizeof (efi_gpe_t))
5295 		return (EINVAL);
5296 	return (0);
5297 }
5298 
5299 static int
5300 sd_use_efi(struct sd_lun *un, int path_flag)
5301 {
5302 	int		i;
5303 	int		rval = 0;
5304 	efi_gpe_t	*partitions;
5305 	uchar_t		*buf;
5306 	uint_t		lbasize;
5307 	uint64_t	cap;
5308 	uint_t		nparts;
5309 	diskaddr_t	gpe_lba;
5310 
5311 	ASSERT(mutex_owned(SD_MUTEX(un)));
5312 	lbasize = un->un_tgt_blocksize;
5313 
5314 	mutex_exit(SD_MUTEX(un));
5315 
5316 	buf = kmem_zalloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
5317 
5318 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
5319 		rval = EINVAL;
5320 		goto done_err;
5321 	}
5322 
5323 	rval = sd_send_scsi_READ(un, buf, lbasize, 0, path_flag);
5324 	if (rval) {
5325 		goto done_err;
5326 	}
5327 	if (((struct dk_label *)buf)->dkl_magic == DKL_MAGIC) {
5328 		/* not ours */
5329 		rval = ESRCH;
5330 		goto done_err;
5331 	}
5332 
5333 	rval = sd_send_scsi_READ(un, buf, lbasize, 1, path_flag);
5334 	if (rval) {
5335 		goto done_err;
5336 	}
5337 	sd_swap_efi_gpt((efi_gpt_t *)buf);
5338 
5339 	if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0) {
5340 		/*
5341 		 * Couldn't read the primary, try the backup.  Our
5342 		 * capacity at this point could be based on CHS, so
5343 		 * check what the device reports.
5344 		 */
5345 		rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
5346 		    path_flag);
5347 		if (rval) {
5348 			goto done_err;
5349 		}
5350 
5351 		/*
5352 		 * The MMC standard allows READ CAPACITY to be
5353 		 * inaccurate by a bounded amount (in the interest of
5354 		 * response latency).  As a result, failed READs are
5355 		 * commonplace (due to the reading of metadata and not
5356 		 * data). Depending on the per-Vendor/drive Sense data,
5357 		 * the failed READ can cause many (unnecessary) retries.
5358 		 */
5359 		if ((rval = sd_send_scsi_READ(un, buf, lbasize,
5360 		    cap - 1, (ISCD(un)) ? SD_PATH_DIRECT_PRIORITY :
5361 			path_flag)) != 0) {
5362 				goto done_err;
5363 		}
5364 
5365 		sd_swap_efi_gpt((efi_gpt_t *)buf);
5366 		if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0) {
5367 
5368 			/*
5369 			 * Refer to comments related to off-by-1 at the
5370 			 * header of this file. Search the next to last
5371 			 * block for backup EFI label.
5372 			 */
5373 			if ((rval = sd_send_scsi_READ(un, buf, lbasize,
5374 			    cap - 2, (ISCD(un)) ? SD_PATH_DIRECT_PRIORITY :
5375 				path_flag)) != 0) {
5376 					goto done_err;
5377 			}
5378 			sd_swap_efi_gpt((efi_gpt_t *)buf);
5379 			if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0)
5380 				goto done_err;
5381 		}
5382 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5383 		    "primary label corrupt; using backup\n");
5384 	}
5385 
5386 	nparts = ((efi_gpt_t *)buf)->efi_gpt_NumberOfPartitionEntries;
5387 	gpe_lba = ((efi_gpt_t *)buf)->efi_gpt_PartitionEntryLBA;
5388 
5389 	rval = sd_send_scsi_READ(un, buf, EFI_MIN_ARRAY_SIZE, gpe_lba,
5390 	    path_flag);
5391 	if (rval) {
5392 		goto done_err;
5393 	}
5394 	partitions = (efi_gpe_t *)buf;
5395 
5396 	if (nparts > MAXPART) {
5397 		nparts = MAXPART;
5398 	}
5399 	sd_swap_efi_gpe(nparts, partitions);
5400 
5401 	mutex_enter(SD_MUTEX(un));
5402 
5403 	/* Fill in partition table. */
5404 	for (i = 0; i < nparts; i++) {
5405 		if (partitions->efi_gpe_StartingLBA != 0 ||
5406 		    partitions->efi_gpe_EndingLBA != 0) {
5407 			un->un_map[i].dkl_cylno =
5408 			    partitions->efi_gpe_StartingLBA;
5409 			un->un_map[i].dkl_nblk =
5410 			    partitions->efi_gpe_EndingLBA -
5411 			    partitions->efi_gpe_StartingLBA + 1;
5412 			un->un_offset[i] =
5413 			    partitions->efi_gpe_StartingLBA;
5414 		}
5415 		if (i == WD_NODE) {
5416 			/*
5417 			 * minor number 7 corresponds to the whole disk
5418 			 */
5419 			un->un_map[i].dkl_cylno = 0;
5420 			un->un_map[i].dkl_nblk = un->un_blockcount;
5421 			un->un_offset[i] = 0;
5422 		}
5423 		partitions++;
5424 	}
5425 	un->un_solaris_offset = 0;
5426 	un->un_solaris_size = cap;
5427 	un->un_f_geometry_is_valid = TRUE;
5428 
5429 	/* clear the vtoc label */
5430 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5431 
5432 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5433 	return (0);
5434 
5435 done_err:
5436 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5437 	mutex_enter(SD_MUTEX(un));
5438 	/*
5439 	 * if we didn't find something that could look like a VTOC
5440 	 * and the disk is over 1TB, we know there isn't a valid label.
5441 	 * Otherwise let sd_uselabel decide what to do.  We only
5442 	 * want to invalidate this if we're certain the label isn't
5443 	 * valid because sd_prop_op will now fail, which in turn
5444 	 * causes things like opens and stats on the partition to fail.
5445 	 */
5446 	if ((un->un_blockcount > DK_MAX_BLOCKS) && (rval != ESRCH)) {
5447 		un->un_f_geometry_is_valid = FALSE;
5448 	}
5449 	return (rval);
5450 }
5451 
5452 
5453 /*
5454  *    Function: sd_uselabel
5455  *
5456  * Description: Validate the disk label and update the relevant data (geometry,
5457  *		partition, vtoc, and capacity data) in the sd_lun struct.
5458  *		Marks the geometry of the unit as being valid.
5459  *
5460  *   Arguments: un: unit struct.
5461  *		dk_label: disk label
5462  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
5463  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
5464  *			to use the USCSI "direct" chain and bypass the normal
5465  *			command waitq.
5466  *
5467  * Return Code: SD_LABEL_IS_VALID: Label read from disk is OK; geometry,
5468  *		partition, vtoc, and capacity data are good.
5469  *
5470  *		SD_LABEL_IS_INVALID: Magic number or checksum error in the
5471  *		label; or computed capacity does not jibe with capacity
5472  *		reported from the READ CAPACITY command.
5473  *
5474  *     Context: Kernel thread only (can sleep).
5475  */
5476 
5477 static int
5478 sd_uselabel(struct sd_lun *un, struct dk_label *labp, int path_flag)
5479 {
5480 	short	*sp;
5481 	short	sum;
5482 	short	count;
5483 	int	label_error = SD_LABEL_IS_VALID;
5484 	int	i;
5485 	int	capacity;
5486 	int	part_end;
5487 	int	track_capacity;
5488 	int	err;
5489 #if defined(_SUNOS_VTOC_16)
5490 	struct	dkl_partition	*vpartp;
5491 #endif
5492 	ASSERT(un != NULL);
5493 	ASSERT(mutex_owned(SD_MUTEX(un)));
5494 
5495 	/* Validate the magic number of the label. */
5496 	if (labp->dkl_magic != DKL_MAGIC) {
5497 #if defined(__sparc)
5498 		if ((un->un_state == SD_STATE_NORMAL) &&
5499 			un->un_f_vtoc_errlog_supported) {
5500 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5501 			    "Corrupt label; wrong magic number\n");
5502 		}
5503 #endif
5504 		return (SD_LABEL_IS_INVALID);
5505 	}
5506 
5507 	/* Validate the checksum of the label. */
5508 	sp  = (short *)labp;
5509 	sum = 0;
5510 	count = sizeof (struct dk_label) / sizeof (short);
5511 	while (count--)	 {
5512 		sum ^= *sp++;
5513 	}
5514 
5515 	if (sum != 0) {
5516 #if	defined(_SUNOS_VTOC_16)
5517 		if ((un->un_state == SD_STATE_NORMAL) && !ISCD(un)) {
5518 #elif defined(_SUNOS_VTOC_8)
5519 		if ((un->un_state == SD_STATE_NORMAL) &&
5520 		    un->un_f_vtoc_errlog_supported) {
5521 #endif
5522 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5523 			    "Corrupt label - label checksum failed\n");
5524 		}
5525 		return (SD_LABEL_IS_INVALID);
5526 	}
5527 
5528 
5529 	/*
5530 	 * Fill in geometry structure with data from label.
5531 	 */
5532 	bzero(&un->un_g, sizeof (struct dk_geom));
5533 	un->un_g.dkg_ncyl   = labp->dkl_ncyl;
5534 	un->un_g.dkg_acyl   = labp->dkl_acyl;
5535 	un->un_g.dkg_bcyl   = 0;
5536 	un->un_g.dkg_nhead  = labp->dkl_nhead;
5537 	un->un_g.dkg_nsect  = labp->dkl_nsect;
5538 	un->un_g.dkg_intrlv = labp->dkl_intrlv;
5539 
5540 #if defined(_SUNOS_VTOC_8)
5541 	un->un_g.dkg_gap1   = labp->dkl_gap1;
5542 	un->un_g.dkg_gap2   = labp->dkl_gap2;
5543 	un->un_g.dkg_bhead  = labp->dkl_bhead;
5544 #endif
5545 #if defined(_SUNOS_VTOC_16)
5546 	un->un_dkg_skew = labp->dkl_skew;
5547 #endif
5548 
5549 #if defined(__i386) || defined(__amd64)
5550 	un->un_g.dkg_apc = labp->dkl_apc;
5551 #endif
5552 
5553 	/*
5554 	 * Currently we rely on the values in the label being accurate. If
5555 	 * dlk_rpm or dlk_pcly are zero in the label, use a default value.
5556 	 *
5557 	 * Note: In the future a MODE SENSE may be used to retrieve this data,
5558 	 * although this command is optional in SCSI-2.
5559 	 */
5560 	un->un_g.dkg_rpm  = (labp->dkl_rpm  != 0) ? labp->dkl_rpm  : 3600;
5561 	un->un_g.dkg_pcyl = (labp->dkl_pcyl != 0) ? labp->dkl_pcyl :
5562 	    (un->un_g.dkg_ncyl + un->un_g.dkg_acyl);
5563 
5564 	/*
5565 	 * The Read and Write reinstruct values may not be valid
5566 	 * for older disks.
5567 	 */
5568 	un->un_g.dkg_read_reinstruct  = labp->dkl_read_reinstruct;
5569 	un->un_g.dkg_write_reinstruct = labp->dkl_write_reinstruct;
5570 
5571 	/* Fill in partition table. */
5572 #if defined(_SUNOS_VTOC_8)
5573 	for (i = 0; i < NDKMAP; i++) {
5574 		un->un_map[i].dkl_cylno = labp->dkl_map[i].dkl_cylno;
5575 		un->un_map[i].dkl_nblk  = labp->dkl_map[i].dkl_nblk;
5576 	}
5577 #endif
5578 #if  defined(_SUNOS_VTOC_16)
5579 	vpartp		= labp->dkl_vtoc.v_part;
5580 	track_capacity	= labp->dkl_nhead * labp->dkl_nsect;
5581 
5582 	/* Prevent divide by zero */
5583 	if (track_capacity == 0) {
5584 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5585 		    "Corrupt label - zero nhead or nsect value\n");
5586 
5587 		return (SD_LABEL_IS_INVALID);
5588 	}
5589 
5590 	for (i = 0; i < NDKMAP; i++, vpartp++) {
5591 		un->un_map[i].dkl_cylno = vpartp->p_start / track_capacity;
5592 		un->un_map[i].dkl_nblk  = vpartp->p_size;
5593 	}
5594 #endif
5595 
5596 	/* Fill in VTOC Structure. */
5597 	bcopy(&labp->dkl_vtoc, &un->un_vtoc, sizeof (struct dk_vtoc));
5598 #if defined(_SUNOS_VTOC_8)
5599 	/*
5600 	 * The 8-slice vtoc does not include the ascii label; save it into
5601 	 * the device's soft state structure here.
5602 	 */
5603 	bcopy(labp->dkl_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
5604 #endif
5605 
5606 	/* Now look for a valid capacity. */
5607 	track_capacity	= (un->un_g.dkg_nhead * un->un_g.dkg_nsect);
5608 	capacity	= (un->un_g.dkg_ncyl  * track_capacity);
5609 
5610 	if (un->un_g.dkg_acyl) {
5611 #if defined(__i386) || defined(__amd64)
5612 		/* we may have > 1 alts cylinder */
5613 		capacity += (track_capacity * un->un_g.dkg_acyl);
5614 #else
5615 		capacity += track_capacity;
5616 #endif
5617 	}
5618 
5619 	/*
5620 	 * Force check here to ensure the computed capacity is valid.
5621 	 * If capacity is zero, it indicates an invalid label and
5622 	 * we should abort updating the relevant data then.
5623 	 */
5624 	if (capacity == 0) {
5625 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5626 		    "Corrupt label - no valid capacity could be retrieved\n");
5627 
5628 		return (SD_LABEL_IS_INVALID);
5629 	}
5630 
5631 	/* Mark the geometry as valid. */
5632 	un->un_f_geometry_is_valid = TRUE;
5633 
5634 	/*
5635 	 * At this point, un->un_blockcount should contain valid data from
5636 	 * the READ CAPACITY command.
5637 	 */
5638 	if (un->un_f_blockcount_is_valid != TRUE) {
5639 		/*
5640 		 * We have a situation where the target didn't give us a good
5641 		 * READ CAPACITY value, yet there appears to be a valid label.
5642 		 * In this case, we'll fake the capacity.
5643 		 */
5644 		un->un_blockcount = capacity;
5645 		un->un_f_blockcount_is_valid = TRUE;
5646 		goto done;
5647 	}
5648 
5649 
5650 	if ((capacity <= un->un_blockcount) ||
5651 	    (un->un_state != SD_STATE_NORMAL)) {
5652 #if defined(_SUNOS_VTOC_8)
5653 		/*
5654 		 * We can't let this happen on drives that are subdivided
5655 		 * into logical disks (i.e., that have an fdisk table).
5656 		 * The un_blockcount field should always hold the full media
5657 		 * size in sectors, period.  This code would overwrite
5658 		 * un_blockcount with the size of the Solaris fdisk partition.
5659 		 */
5660 		SD_ERROR(SD_LOG_COMMON, un,
5661 		    "sd_uselabel: Label %d blocks; Drive %d blocks\n",
5662 		    capacity, un->un_blockcount);
5663 		un->un_blockcount = capacity;
5664 		un->un_f_blockcount_is_valid = TRUE;
5665 #endif	/* defined(_SUNOS_VTOC_8) */
5666 		goto done;
5667 	}
5668 
5669 	if (ISCD(un)) {
5670 		/* For CDROMs, we trust that the data in the label is OK. */
5671 #if defined(_SUNOS_VTOC_8)
5672 		for (i = 0; i < NDKMAP; i++) {
5673 			part_end = labp->dkl_nhead * labp->dkl_nsect *
5674 			    labp->dkl_map[i].dkl_cylno +
5675 			    labp->dkl_map[i].dkl_nblk  - 1;
5676 
5677 			if ((labp->dkl_map[i].dkl_nblk) &&
5678 			    (part_end > un->un_blockcount)) {
5679 				un->un_f_geometry_is_valid = FALSE;
5680 				break;
5681 			}
5682 		}
5683 #endif
5684 #if defined(_SUNOS_VTOC_16)
5685 		vpartp = &(labp->dkl_vtoc.v_part[0]);
5686 		for (i = 0; i < NDKMAP; i++, vpartp++) {
5687 			part_end = vpartp->p_start + vpartp->p_size;
5688 			if ((vpartp->p_size > 0) &&
5689 			    (part_end > un->un_blockcount)) {
5690 				un->un_f_geometry_is_valid = FALSE;
5691 				break;
5692 			}
5693 		}
5694 #endif
5695 	} else {
5696 		uint64_t t_capacity;
5697 		uint32_t t_lbasize;
5698 
5699 		mutex_exit(SD_MUTEX(un));
5700 		err = sd_send_scsi_READ_CAPACITY(un, &t_capacity, &t_lbasize,
5701 		    path_flag);
5702 		ASSERT(t_capacity <= DK_MAX_BLOCKS);
5703 		mutex_enter(SD_MUTEX(un));
5704 
5705 		if (err == 0) {
5706 			sd_update_block_info(un, t_lbasize, t_capacity);
5707 		}
5708 
5709 		if (capacity > un->un_blockcount) {
5710 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5711 			    "Corrupt label - bad geometry\n");
5712 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
5713 			    "Label says %u blocks; Drive says %llu blocks\n",
5714 			    capacity, (unsigned long long)un->un_blockcount);
5715 			un->un_f_geometry_is_valid = FALSE;
5716 			label_error = SD_LABEL_IS_INVALID;
5717 		}
5718 	}
5719 
5720 done:
5721 
5722 	SD_INFO(SD_LOG_COMMON, un, "sd_uselabel: (label geometry)\n");
5723 	SD_INFO(SD_LOG_COMMON, un,
5724 	    "   ncyl: %d; acyl: %d; nhead: %d; nsect: %d\n",
5725 	    un->un_g.dkg_ncyl,  un->un_g.dkg_acyl,
5726 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5727 	SD_INFO(SD_LOG_COMMON, un,
5728 	    "   lbasize: %d; capacity: %d; intrlv: %d; rpm: %d\n",
5729 	    un->un_tgt_blocksize, un->un_blockcount,
5730 	    un->un_g.dkg_intrlv, un->un_g.dkg_rpm);
5731 	SD_INFO(SD_LOG_COMMON, un, "   wrt_reinstr: %d; rd_reinstr: %d\n",
5732 	    un->un_g.dkg_write_reinstruct, un->un_g.dkg_read_reinstruct);
5733 
5734 	ASSERT(mutex_owned(SD_MUTEX(un)));
5735 
5736 	return (label_error);
5737 }
5738 
5739 
5740 /*
5741  *    Function: sd_build_default_label
5742  *
5743  * Description: Generate a default label for those devices that do not have
5744  *		one, e.g., new media, removable cartridges, etc..
5745  *
5746  *     Context: Kernel thread only
5747  */
5748 
5749 static void
5750 sd_build_default_label(struct sd_lun *un)
5751 {
5752 #if defined(_SUNOS_VTOC_16)
5753 	uint_t	phys_spc;
5754 	uint_t	disksize;
5755 	struct	dk_geom un_g;
5756 	uint64_t capacity;
5757 #endif
5758 
5759 	ASSERT(un != NULL);
5760 	ASSERT(mutex_owned(SD_MUTEX(un)));
5761 
5762 #if defined(_SUNOS_VTOC_8)
5763 	/*
5764 	 * Note: This is a legacy check for non-removable devices on VTOC_8
5765 	 * only. This may be a valid check for VTOC_16 as well.
5766 	 * Once we understand why there is this difference between SPARC and
5767 	 * x86 platform, we could remove this legacy check.
5768 	 */
5769 	ASSERT(un->un_f_default_vtoc_supported);
5770 #endif
5771 
5772 	bzero(&un->un_g, sizeof (struct dk_geom));
5773 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5774 	bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
5775 
5776 #if defined(_SUNOS_VTOC_8)
5777 
5778 	/*
5779 	 * It's a REMOVABLE media, therefore no label (on sparc, anyway).
5780 	 * But it is still necessary to set up various geometry information,
5781 	 * and we are doing this here.
5782 	 */
5783 
5784 	/*
5785 	 * For the rpm, we use the minimum for the disk.  For the head, cyl,
5786 	 * and number of sector per track, if the capacity <= 1GB, head = 64,
5787 	 * sect = 32.  else head = 255, sect 63 Note: the capacity should be
5788 	 * equal to C*H*S values.  This will cause some truncation of size due
5789 	 * to round off errors. For CD-ROMs, this truncation can have adverse
5790 	 * side effects, so returning ncyl and nhead as 1. The nsect will
5791 	 * overflow for most of CD-ROMs as nsect is of type ushort. (4190569)
5792 	 */
5793 	if (ISCD(un)) {
5794 		/*
5795 		 * Preserve the old behavior for non-writable
5796 		 * medias. Since dkg_nsect is a ushort, it
5797 		 * will lose bits as cdroms have more than
5798 		 * 65536 sectors. So if we recalculate
5799 		 * capacity, it will become much shorter.
5800 		 * But the dkg_* information is not
5801 		 * used for CDROMs so it is OK. But for
5802 		 * Writable CDs we need this information
5803 		 * to be valid (for newfs say). So we
5804 		 * make nsect and nhead > 1 that way
5805 		 * nsect can still stay within ushort limit
5806 		 * without losing any bits.
5807 		 */
5808 		if (un->un_f_mmc_writable_media == TRUE) {
5809 			un->un_g.dkg_nhead = 64;
5810 			un->un_g.dkg_nsect = 32;
5811 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
5812 			un->un_blockcount = un->un_g.dkg_ncyl *
5813 			    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5814 		} else {
5815 			un->un_g.dkg_ncyl  = 1;
5816 			un->un_g.dkg_nhead = 1;
5817 			un->un_g.dkg_nsect = un->un_blockcount;
5818 		}
5819 	} else {
5820 		if (un->un_blockcount <= 0x1000) {
5821 			/* unlabeled SCSI floppy device */
5822 			un->un_g.dkg_nhead = 2;
5823 			un->un_g.dkg_ncyl = 80;
5824 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
5825 		} else if (un->un_blockcount <= 0x200000) {
5826 			un->un_g.dkg_nhead = 64;
5827 			un->un_g.dkg_nsect = 32;
5828 			un->un_g.dkg_ncyl  = un->un_blockcount / (64 * 32);
5829 		} else {
5830 			un->un_g.dkg_nhead = 255;
5831 			un->un_g.dkg_nsect = 63;
5832 			un->un_g.dkg_ncyl  = un->un_blockcount / (255 * 63);
5833 		}
5834 		un->un_blockcount =
5835 		    un->un_g.dkg_ncyl * un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5836 	}
5837 
5838 	un->un_g.dkg_acyl	= 0;
5839 	un->un_g.dkg_bcyl	= 0;
5840 	un->un_g.dkg_rpm	= 200;
5841 	un->un_asciilabel[0]	= '\0';
5842 	un->un_g.dkg_pcyl	= un->un_g.dkg_ncyl;
5843 
5844 	un->un_map[0].dkl_cylno = 0;
5845 	un->un_map[0].dkl_nblk  = un->un_blockcount;
5846 	un->un_map[2].dkl_cylno = 0;
5847 	un->un_map[2].dkl_nblk  = un->un_blockcount;
5848 
5849 #elif defined(_SUNOS_VTOC_16)
5850 
5851 	if (un->un_solaris_size == 0) {
5852 		/*
5853 		 * Got fdisk table but no solaris entry therefore
5854 		 * don't create a default label
5855 		 */
5856 		un->un_f_geometry_is_valid = TRUE;
5857 		return;
5858 	}
5859 
5860 	/*
5861 	 * For CDs we continue to use the physical geometry to calculate
5862 	 * number of cylinders. All other devices must convert the
5863 	 * physical geometry (geom_cache) to values that will fit
5864 	 * in a dk_geom structure.
5865 	 */
5866 	if (ISCD(un)) {
5867 		phys_spc = un->un_pgeom.g_nhead * un->un_pgeom.g_nsect;
5868 	} else {
5869 		/* Convert physical geometry to disk geometry */
5870 		bzero(&un_g, sizeof (struct dk_geom));
5871 
5872 		/*
5873 		 * Refer to comments related to off-by-1 at the
5874 		 * header of this file.
5875 		 * Before caculating geometry, capacity should be
5876 		 * decreased by 1. That un_f_capacity_adjusted is
5877 		 * TRUE means that we are treating a 1TB disk as
5878 		 * (1T - 512)B. And the capacity of disks is already
5879 		 * decreased by 1.
5880 		 */
5881 		if (!un->un_f_capacity_adjusted &&
5882 		    !un->un_f_has_removable_media &&
5883 		    !un->un_f_is_hotpluggable &&
5884 			un->un_tgt_blocksize == un->un_sys_blocksize)
5885 			capacity = un->un_blockcount - 1;
5886 		else
5887 			capacity = un->un_blockcount;
5888 
5889 		sd_convert_geometry(capacity, &un_g);
5890 		bcopy(&un_g, &un->un_g, sizeof (un->un_g));
5891 		phys_spc = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5892 	}
5893 
5894 	ASSERT(phys_spc != 0);
5895 	un->un_g.dkg_pcyl = un->un_solaris_size / phys_spc;
5896 	un->un_g.dkg_acyl = DK_ACYL;
5897 	un->un_g.dkg_ncyl = un->un_g.dkg_pcyl - DK_ACYL;
5898 	disksize = un->un_g.dkg_ncyl * phys_spc;
5899 
5900 	if (ISCD(un)) {
5901 		/*
5902 		 * CD's don't use the "heads * sectors * cyls"-type of
5903 		 * geometry, but instead use the entire capacity of the media.
5904 		 */
5905 		disksize = un->un_solaris_size;
5906 		un->un_g.dkg_nhead = 1;
5907 		un->un_g.dkg_nsect = 1;
5908 		un->un_g.dkg_rpm =
5909 		    (un->un_pgeom.g_rpm == 0) ? 200 : un->un_pgeom.g_rpm;
5910 
5911 		un->un_vtoc.v_part[0].p_start = 0;
5912 		un->un_vtoc.v_part[0].p_size  = disksize;
5913 		un->un_vtoc.v_part[0].p_tag   = V_BACKUP;
5914 		un->un_vtoc.v_part[0].p_flag  = V_UNMNT;
5915 
5916 		un->un_map[0].dkl_cylno = 0;
5917 		un->un_map[0].dkl_nblk  = disksize;
5918 		un->un_offset[0] = 0;
5919 
5920 	} else {
5921 		/*
5922 		 * Hard disks and removable media cartridges
5923 		 */
5924 		un->un_g.dkg_rpm =
5925 		    (un->un_pgeom.g_rpm == 0) ? 3600: un->un_pgeom.g_rpm;
5926 		un->un_vtoc.v_sectorsz = un->un_sys_blocksize;
5927 
5928 		/* Add boot slice */
5929 		un->un_vtoc.v_part[8].p_start = 0;
5930 		un->un_vtoc.v_part[8].p_size  = phys_spc;
5931 		un->un_vtoc.v_part[8].p_tag   = V_BOOT;
5932 		un->un_vtoc.v_part[8].p_flag  = V_UNMNT;
5933 
5934 		un->un_map[8].dkl_cylno = 0;
5935 		un->un_map[8].dkl_nblk  = phys_spc;
5936 		un->un_offset[8] = 0;
5937 	}
5938 
5939 	un->un_g.dkg_apc = 0;
5940 	un->un_vtoc.v_nparts = V_NUMPAR;
5941 	un->un_vtoc.v_version = V_VERSION;
5942 
5943 	/* Add backup slice */
5944 	un->un_vtoc.v_part[2].p_start = 0;
5945 	un->un_vtoc.v_part[2].p_size  = disksize;
5946 	un->un_vtoc.v_part[2].p_tag   = V_BACKUP;
5947 	un->un_vtoc.v_part[2].p_flag  = V_UNMNT;
5948 
5949 	un->un_map[2].dkl_cylno = 0;
5950 	un->un_map[2].dkl_nblk  = disksize;
5951 	un->un_offset[2] = 0;
5952 
5953 	(void) sprintf(un->un_vtoc.v_asciilabel, "DEFAULT cyl %d alt %d"
5954 	    " hd %d sec %d", un->un_g.dkg_ncyl, un->un_g.dkg_acyl,
5955 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5956 
5957 #else
5958 #error "No VTOC format defined."
5959 #endif
5960 
5961 	un->un_g.dkg_read_reinstruct  = 0;
5962 	un->un_g.dkg_write_reinstruct = 0;
5963 
5964 	un->un_g.dkg_intrlv = 1;
5965 
5966 	un->un_vtoc.v_sanity  = VTOC_SANE;
5967 
5968 	un->un_f_geometry_is_valid = TRUE;
5969 
5970 	SD_INFO(SD_LOG_COMMON, un,
5971 	    "sd_build_default_label: Default label created: "
5972 	    "cyl: %d\tacyl: %d\tnhead: %d\tnsect: %d\tcap: %d\n",
5973 	    un->un_g.dkg_ncyl, un->un_g.dkg_acyl, un->un_g.dkg_nhead,
5974 	    un->un_g.dkg_nsect, un->un_blockcount);
5975 }
5976 
5977 
5978 #if defined(_FIRMWARE_NEEDS_FDISK)
5979 /*
5980  * Max CHS values, as they are encoded into bytes, for 1022/254/63
5981  */
5982 #define	LBA_MAX_SECT	(63 | ((1022 & 0x300) >> 2))
5983 #define	LBA_MAX_CYL	(1022 & 0xFF)
5984 #define	LBA_MAX_HEAD	(254)
5985 
5986 
5987 /*
5988  *    Function: sd_has_max_chs_vals
5989  *
5990  * Description: Return TRUE if Cylinder-Head-Sector values are all at maximum.
5991  *
5992  *   Arguments: fdp - ptr to CHS info
5993  *
5994  * Return Code: True or false
5995  *
5996  *     Context: Any.
5997  */
5998 
5999 static int
6000 sd_has_max_chs_vals(struct ipart *fdp)
6001 {
6002 	return ((fdp->begcyl  == LBA_MAX_CYL)	&&
6003 	    (fdp->beghead == LBA_MAX_HEAD)	&&
6004 	    (fdp->begsect == LBA_MAX_SECT)	&&
6005 	    (fdp->endcyl  == LBA_MAX_CYL)	&&
6006 	    (fdp->endhead == LBA_MAX_HEAD)	&&
6007 	    (fdp->endsect == LBA_MAX_SECT));
6008 }
6009 #endif
6010 
6011 
6012 /*
6013  *    Function: sd_inq_fill
6014  *
6015  * Description: Print a piece of inquiry data, cleaned up for non-printable
6016  *		characters and stopping at the first space character after
6017  *		the beginning of the passed string;
6018  *
6019  *   Arguments: p - source string
6020  *		l - maximum length to copy
6021  *		s - destination string
6022  *
6023  *     Context: Any.
6024  */
6025 
6026 static void
6027 sd_inq_fill(char *p, int l, char *s)
6028 {
6029 	unsigned i = 0;
6030 	char c;
6031 
6032 	while (i++ < l) {
6033 		if ((c = *p++) < ' ' || c >= 0x7F) {
6034 			c = '*';
6035 		} else if (i != 1 && c == ' ') {
6036 			break;
6037 		}
6038 		*s++ = c;
6039 	}
6040 	*s++ = 0;
6041 }
6042 
6043 
6044 /*
6045  *    Function: sd_register_devid
6046  *
6047  * Description: This routine will obtain the device id information from the
6048  *		target, obtain the serial number, and register the device
6049  *		id with the ddi framework.
6050  *
6051  *   Arguments: devi - the system's dev_info_t for the device.
6052  *		un - driver soft state (unit) structure
6053  *		reservation_flag - indicates if a reservation conflict
6054  *		occurred during attach
6055  *
6056  *     Context: Kernel Thread
6057  */
6058 static void
6059 sd_register_devid(struct sd_lun *un, dev_info_t *devi, int reservation_flag)
6060 {
6061 	int		rval		= 0;
6062 	uchar_t		*inq80		= NULL;
6063 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
6064 	size_t		inq80_resid	= 0;
6065 	uchar_t		*inq83		= NULL;
6066 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
6067 	size_t		inq83_resid	= 0;
6068 
6069 	ASSERT(un != NULL);
6070 	ASSERT(mutex_owned(SD_MUTEX(un)));
6071 	ASSERT((SD_DEVINFO(un)) == devi);
6072 
6073 	/*
6074 	 * This is the case of antiquated Sun disk drives that have the
6075 	 * FAB_DEVID property set in the disk_table.  These drives
6076 	 * manage the devid's by storing them in last 2 available sectors
6077 	 * on the drive and have them fabricated by the ddi layer by calling
6078 	 * ddi_devid_init and passing the DEVID_FAB flag.
6079 	 */
6080 	if (un->un_f_opt_fab_devid == TRUE) {
6081 		/*
6082 		 * Depending on EINVAL isn't reliable, since a reserved disk
6083 		 * may result in invalid geometry, so check to make sure a
6084 		 * reservation conflict did not occur during attach.
6085 		 */
6086 		if ((sd_get_devid(un) == EINVAL) &&
6087 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
6088 			/*
6089 			 * The devid is invalid AND there is no reservation
6090 			 * conflict.  Fabricate a new devid.
6091 			 */
6092 			(void) sd_create_devid(un);
6093 		}
6094 
6095 		/* Register the devid if it exists */
6096 		if (un->un_devid != NULL) {
6097 			(void) ddi_devid_register(SD_DEVINFO(un),
6098 			    un->un_devid);
6099 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6100 			    "sd_register_devid: Devid Fabricated\n");
6101 		}
6102 		return;
6103 	}
6104 
6105 	/*
6106 	 * We check the availibility of the World Wide Name (0x83) and Unit
6107 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
6108 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
6109 	 * 0x83 is availible, that is the best choice.  Our next choice is
6110 	 * 0x80.  If neither are availible, we munge the devid from the device
6111 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
6112 	 * to fabricate a devid for non-Sun qualified disks.
6113 	 */
6114 	if (sd_check_vpd_page_support(un) == 0) {
6115 		/* collect page 80 data if available */
6116 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
6117 
6118 			mutex_exit(SD_MUTEX(un));
6119 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
6120 			rval = sd_send_scsi_INQUIRY(un, inq80, inq80_len,
6121 			    0x01, 0x80, &inq80_resid);
6122 
6123 			if (rval != 0) {
6124 				kmem_free(inq80, inq80_len);
6125 				inq80 = NULL;
6126 				inq80_len = 0;
6127 			}
6128 			mutex_enter(SD_MUTEX(un));
6129 		}
6130 
6131 		/* collect page 83 data if available */
6132 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
6133 			mutex_exit(SD_MUTEX(un));
6134 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
6135 			rval = sd_send_scsi_INQUIRY(un, inq83, inq83_len,
6136 			    0x01, 0x83, &inq83_resid);
6137 
6138 			if (rval != 0) {
6139 				kmem_free(inq83, inq83_len);
6140 				inq83 = NULL;
6141 				inq83_len = 0;
6142 			}
6143 			mutex_enter(SD_MUTEX(un));
6144 		}
6145 	}
6146 
6147 	/* encode best devid possible based on data available */
6148 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
6149 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
6150 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
6151 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
6152 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
6153 
6154 		/* devid successfully encoded, register devid */
6155 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
6156 
6157 	} else {
6158 		/*
6159 		 * Unable to encode a devid based on data available.
6160 		 * This is not a Sun qualified disk.  Older Sun disk
6161 		 * drives that have the SD_FAB_DEVID property
6162 		 * set in the disk_table and non Sun qualified
6163 		 * disks are treated in the same manner.  These
6164 		 * drives manage the devid's by storing them in
6165 		 * last 2 available sectors on the drive and
6166 		 * have them fabricated by the ddi layer by
6167 		 * calling ddi_devid_init and passing the
6168 		 * DEVID_FAB flag.
6169 		 * Create a fabricate devid only if there's no
6170 		 * fabricate devid existed.
6171 		 */
6172 		if (sd_get_devid(un) == EINVAL) {
6173 			(void) sd_create_devid(un);
6174 			un->un_f_opt_fab_devid = TRUE;
6175 		}
6176 
6177 		/* Register the devid if it exists */
6178 		if (un->un_devid != NULL) {
6179 			(void) ddi_devid_register(SD_DEVINFO(un),
6180 			    un->un_devid);
6181 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6182 			    "sd_register_devid: devid fabricated using "
6183 			    "ddi framework\n");
6184 		}
6185 	}
6186 
6187 	/* clean up resources */
6188 	if (inq80 != NULL) {
6189 		kmem_free(inq80, inq80_len);
6190 	}
6191 	if (inq83 != NULL) {
6192 		kmem_free(inq83, inq83_len);
6193 	}
6194 }
6195 
6196 static daddr_t
6197 sd_get_devid_block(struct sd_lun *un)
6198 {
6199 	daddr_t			spc, blk, head, cyl;
6200 
6201 	if (un->un_blockcount <= DK_MAX_BLOCKS) {
6202 		/* this geometry doesn't allow us to write a devid */
6203 		if (un->un_g.dkg_acyl < 2) {
6204 			return (-1);
6205 		}
6206 
6207 		/*
6208 		 * Subtract 2 guarantees that the next to last cylinder
6209 		 * is used
6210 		 */
6211 		cyl  = un->un_g.dkg_ncyl  + un->un_g.dkg_acyl - 2;
6212 		spc  = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
6213 		head = un->un_g.dkg_nhead - 1;
6214 		blk  = (cyl * (spc - un->un_g.dkg_apc)) +
6215 		    (head * un->un_g.dkg_nsect) + 1;
6216 	} else {
6217 		if (un->un_reserved != -1) {
6218 			blk = un->un_map[un->un_reserved].dkl_cylno + 1;
6219 		} else {
6220 			return (-1);
6221 		}
6222 	}
6223 	return (blk);
6224 }
6225 
6226 /*
6227  *    Function: sd_get_devid
6228  *
6229  * Description: This routine will return 0 if a valid device id has been
6230  *		obtained from the target and stored in the soft state. If a
6231  *		valid device id has not been previously read and stored, a
6232  *		read attempt will be made.
6233  *
6234  *   Arguments: un - driver soft state (unit) structure
6235  *
6236  * Return Code: 0 if we successfully get the device id
6237  *
6238  *     Context: Kernel Thread
6239  */
6240 
6241 static int
6242 sd_get_devid(struct sd_lun *un)
6243 {
6244 	struct dk_devid		*dkdevid;
6245 	ddi_devid_t		tmpid;
6246 	uint_t			*ip;
6247 	size_t			sz;
6248 	daddr_t			blk;
6249 	int			status;
6250 	int			chksum;
6251 	int			i;
6252 	size_t			buffer_size;
6253 
6254 	ASSERT(un != NULL);
6255 	ASSERT(mutex_owned(SD_MUTEX(un)));
6256 
6257 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
6258 	    un);
6259 
6260 	if (un->un_devid != NULL) {
6261 		return (0);
6262 	}
6263 
6264 	blk = sd_get_devid_block(un);
6265 	if (blk < 0)
6266 		return (EINVAL);
6267 
6268 	/*
6269 	 * Read and verify device id, stored in the reserved cylinders at the
6270 	 * end of the disk. Backup label is on the odd sectors of the last
6271 	 * track of the last cylinder. Device id will be on track of the next
6272 	 * to last cylinder.
6273 	 */
6274 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
6275 	mutex_exit(SD_MUTEX(un));
6276 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
6277 	status = sd_send_scsi_READ(un, dkdevid, buffer_size, blk,
6278 	    SD_PATH_DIRECT);
6279 	if (status != 0) {
6280 		goto error;
6281 	}
6282 
6283 	/* Validate the revision */
6284 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
6285 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
6286 		status = EINVAL;
6287 		goto error;
6288 	}
6289 
6290 	/* Calculate the checksum */
6291 	chksum = 0;
6292 	ip = (uint_t *)dkdevid;
6293 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6294 	    i++) {
6295 		chksum ^= ip[i];
6296 	}
6297 
6298 	/* Compare the checksums */
6299 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
6300 		status = EINVAL;
6301 		goto error;
6302 	}
6303 
6304 	/* Validate the device id */
6305 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
6306 		status = EINVAL;
6307 		goto error;
6308 	}
6309 
6310 	/*
6311 	 * Store the device id in the driver soft state
6312 	 */
6313 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
6314 	tmpid = kmem_alloc(sz, KM_SLEEP);
6315 
6316 	mutex_enter(SD_MUTEX(un));
6317 
6318 	un->un_devid = tmpid;
6319 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
6320 
6321 	kmem_free(dkdevid, buffer_size);
6322 
6323 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
6324 
6325 	return (status);
6326 error:
6327 	mutex_enter(SD_MUTEX(un));
6328 	kmem_free(dkdevid, buffer_size);
6329 	return (status);
6330 }
6331 
6332 
6333 /*
6334  *    Function: sd_create_devid
6335  *
6336  * Description: This routine will fabricate the device id and write it
6337  *		to the disk.
6338  *
6339  *   Arguments: un - driver soft state (unit) structure
6340  *
6341  * Return Code: value of the fabricated device id
6342  *
6343  *     Context: Kernel Thread
6344  */
6345 
6346 static ddi_devid_t
6347 sd_create_devid(struct sd_lun *un)
6348 {
6349 	ASSERT(un != NULL);
6350 
6351 	/* Fabricate the devid */
6352 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
6353 	    == DDI_FAILURE) {
6354 		return (NULL);
6355 	}
6356 
6357 	/* Write the devid to disk */
6358 	if (sd_write_deviceid(un) != 0) {
6359 		ddi_devid_free(un->un_devid);
6360 		un->un_devid = NULL;
6361 	}
6362 
6363 	return (un->un_devid);
6364 }
6365 
6366 
6367 /*
6368  *    Function: sd_write_deviceid
6369  *
6370  * Description: This routine will write the device id to the disk
6371  *		reserved sector.
6372  *
6373  *   Arguments: un - driver soft state (unit) structure
6374  *
6375  * Return Code: EINVAL
6376  *		value returned by sd_send_scsi_cmd
6377  *
6378  *     Context: Kernel Thread
6379  */
6380 
6381 static int
6382 sd_write_deviceid(struct sd_lun *un)
6383 {
6384 	struct dk_devid		*dkdevid;
6385 	daddr_t			blk;
6386 	uint_t			*ip, chksum;
6387 	int			status;
6388 	int			i;
6389 
6390 	ASSERT(mutex_owned(SD_MUTEX(un)));
6391 
6392 	blk = sd_get_devid_block(un);
6393 	if (blk < 0)
6394 		return (-1);
6395 	mutex_exit(SD_MUTEX(un));
6396 
6397 	/* Allocate the buffer */
6398 	dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
6399 
6400 	/* Fill in the revision */
6401 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
6402 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
6403 
6404 	/* Copy in the device id */
6405 	mutex_enter(SD_MUTEX(un));
6406 	bcopy(un->un_devid, &dkdevid->dkd_devid,
6407 	    ddi_devid_sizeof(un->un_devid));
6408 	mutex_exit(SD_MUTEX(un));
6409 
6410 	/* Calculate the checksum */
6411 	chksum = 0;
6412 	ip = (uint_t *)dkdevid;
6413 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6414 	    i++) {
6415 		chksum ^= ip[i];
6416 	}
6417 
6418 	/* Fill-in checksum */
6419 	DKD_FORMCHKSUM(chksum, dkdevid);
6420 
6421 	/* Write the reserved sector */
6422 	status = sd_send_scsi_WRITE(un, dkdevid, un->un_sys_blocksize, blk,
6423 	    SD_PATH_DIRECT);
6424 
6425 	kmem_free(dkdevid, un->un_sys_blocksize);
6426 
6427 	mutex_enter(SD_MUTEX(un));
6428 	return (status);
6429 }
6430 
6431 
6432 /*
6433  *    Function: sd_check_vpd_page_support
6434  *
6435  * Description: This routine sends an inquiry command with the EVPD bit set and
6436  *		a page code of 0x00 to the device. It is used to determine which
6437  *		vital product pages are availible to find the devid. We are
6438  *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
6439  *		device does not support that command.
6440  *
6441  *   Arguments: un  - driver soft state (unit) structure
6442  *
6443  * Return Code: 0 - success
6444  *		1 - check condition
6445  *
6446  *     Context: This routine can sleep.
6447  */
6448 
6449 static int
6450 sd_check_vpd_page_support(struct sd_lun *un)
6451 {
6452 	uchar_t	*page_list	= NULL;
6453 	uchar_t	page_length	= 0xff;	/* Use max possible length */
6454 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
6455 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
6456 	int    	rval		= 0;
6457 	int	counter;
6458 
6459 	ASSERT(un != NULL);
6460 	ASSERT(mutex_owned(SD_MUTEX(un)));
6461 
6462 	mutex_exit(SD_MUTEX(un));
6463 
6464 	/*
6465 	 * We'll set the page length to the maximum to save figuring it out
6466 	 * with an additional call.
6467 	 */
6468 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
6469 
6470 	rval = sd_send_scsi_INQUIRY(un, page_list, page_length, evpd,
6471 	    page_code, NULL);
6472 
6473 	mutex_enter(SD_MUTEX(un));
6474 
6475 	/*
6476 	 * Now we must validate that the device accepted the command, as some
6477 	 * drives do not support it.  If the drive does support it, we will
6478 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
6479 	 * not, we return -1.
6480 	 */
6481 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
6482 		/* Loop to find one of the 2 pages we need */
6483 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
6484 
6485 		/*
6486 		 * Pages are returned in ascending order, and 0x83 is what we
6487 		 * are hoping for.
6488 		 */
6489 		while ((page_list[counter] <= 0x83) &&
6490 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
6491 		    VPD_HEAD_OFFSET))) {
6492 			/*
6493 			 * Add 3 because page_list[3] is the number of
6494 			 * pages minus 3
6495 			 */
6496 
6497 			switch (page_list[counter]) {
6498 			case 0x00:
6499 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
6500 				break;
6501 			case 0x80:
6502 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
6503 				break;
6504 			case 0x81:
6505 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
6506 				break;
6507 			case 0x82:
6508 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
6509 				break;
6510 			case 0x83:
6511 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
6512 				break;
6513 			}
6514 			counter++;
6515 		}
6516 
6517 	} else {
6518 		rval = -1;
6519 
6520 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6521 		    "sd_check_vpd_page_support: This drive does not implement "
6522 		    "VPD pages.\n");
6523 	}
6524 
6525 	kmem_free(page_list, page_length);
6526 
6527 	return (rval);
6528 }
6529 
6530 
6531 /*
6532  *    Function: sd_setup_pm
6533  *
6534  * Description: Initialize Power Management on the device
6535  *
6536  *     Context: Kernel Thread
6537  */
6538 
6539 static void
6540 sd_setup_pm(struct sd_lun *un, dev_info_t *devi)
6541 {
6542 	uint_t	log_page_size;
6543 	uchar_t	*log_page_data;
6544 	int	rval;
6545 
6546 	/*
6547 	 * Since we are called from attach, holding a mutex for
6548 	 * un is unnecessary. Because some of the routines called
6549 	 * from here require SD_MUTEX to not be held, assert this
6550 	 * right up front.
6551 	 */
6552 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6553 	/*
6554 	 * Since the sd device does not have the 'reg' property,
6555 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
6556 	 * The following code is to tell cpr that this device
6557 	 * DOES need to be suspended and resumed.
6558 	 */
6559 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
6560 	    "pm-hardware-state", "needs-suspend-resume");
6561 
6562 	/*
6563 	 * This complies with the new power management framework
6564 	 * for certain desktop machines. Create the pm_components
6565 	 * property as a string array property.
6566 	 */
6567 	if (un->un_f_pm_supported) {
6568 		/*
6569 		 * not all devices have a motor, try it first.
6570 		 * some devices may return ILLEGAL REQUEST, some
6571 		 * will hang
6572 		 * The following START_STOP_UNIT is used to check if target
6573 		 * device has a motor.
6574 		 */
6575 		un->un_f_start_stop_supported = TRUE;
6576 		if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
6577 		    SD_PATH_DIRECT) != 0) {
6578 			un->un_f_start_stop_supported = FALSE;
6579 		}
6580 
6581 		/*
6582 		 * create pm properties anyways otherwise the parent can't
6583 		 * go to sleep
6584 		 */
6585 		(void) sd_create_pm_components(devi, un);
6586 		un->un_f_pm_is_enabled = TRUE;
6587 		return;
6588 	}
6589 
6590 	if (!un->un_f_log_sense_supported) {
6591 		un->un_power_level = SD_SPINDLE_ON;
6592 		un->un_f_pm_is_enabled = FALSE;
6593 		return;
6594 	}
6595 
6596 	rval = sd_log_page_supported(un, START_STOP_CYCLE_PAGE);
6597 
6598 #ifdef	SDDEBUG
6599 	if (sd_force_pm_supported) {
6600 		/* Force a successful result */
6601 		rval = 1;
6602 	}
6603 #endif
6604 
6605 	/*
6606 	 * If the start-stop cycle counter log page is not supported
6607 	 * or if the pm-capable property is SD_PM_CAPABLE_FALSE (0)
6608 	 * then we should not create the pm_components property.
6609 	 */
6610 	if (rval == -1) {
6611 		/*
6612 		 * Error.
6613 		 * Reading log sense failed, most likely this is
6614 		 * an older drive that does not support log sense.
6615 		 * If this fails auto-pm is not supported.
6616 		 */
6617 		un->un_power_level = SD_SPINDLE_ON;
6618 		un->un_f_pm_is_enabled = FALSE;
6619 
6620 	} else if (rval == 0) {
6621 		/*
6622 		 * Page not found.
6623 		 * The start stop cycle counter is implemented as page
6624 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
6625 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
6626 		 */
6627 		if (sd_log_page_supported(un, START_STOP_CYCLE_VU_PAGE) == 1) {
6628 			/*
6629 			 * Page found, use this one.
6630 			 */
6631 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
6632 			un->un_f_pm_is_enabled = TRUE;
6633 		} else {
6634 			/*
6635 			 * Error or page not found.
6636 			 * auto-pm is not supported for this device.
6637 			 */
6638 			un->un_power_level = SD_SPINDLE_ON;
6639 			un->un_f_pm_is_enabled = FALSE;
6640 		}
6641 	} else {
6642 		/*
6643 		 * Page found, use it.
6644 		 */
6645 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
6646 		un->un_f_pm_is_enabled = TRUE;
6647 	}
6648 
6649 
6650 	if (un->un_f_pm_is_enabled == TRUE) {
6651 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6652 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6653 
6654 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
6655 		    log_page_size, un->un_start_stop_cycle_page,
6656 		    0x01, 0, SD_PATH_DIRECT);
6657 #ifdef	SDDEBUG
6658 		if (sd_force_pm_supported) {
6659 			/* Force a successful result */
6660 			rval = 0;
6661 		}
6662 #endif
6663 
6664 		/*
6665 		 * If the Log sense for Page( Start/stop cycle counter page)
6666 		 * succeeds, then power managment is supported and we can
6667 		 * enable auto-pm.
6668 		 */
6669 		if (rval == 0)  {
6670 			(void) sd_create_pm_components(devi, un);
6671 		} else {
6672 			un->un_power_level = SD_SPINDLE_ON;
6673 			un->un_f_pm_is_enabled = FALSE;
6674 		}
6675 
6676 		kmem_free(log_page_data, log_page_size);
6677 	}
6678 }
6679 
6680 
6681 /*
6682  *    Function: sd_create_pm_components
6683  *
6684  * Description: Initialize PM property.
6685  *
6686  *     Context: Kernel thread context
6687  */
6688 
6689 static void
6690 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
6691 {
6692 	char *pm_comp[] = { "NAME=spindle-motor", "0=off", "1=on", NULL };
6693 
6694 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6695 
6696 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
6697 	    "pm-components", pm_comp, 3) == DDI_PROP_SUCCESS) {
6698 		/*
6699 		 * When components are initially created they are idle,
6700 		 * power up any non-removables.
6701 		 * Note: the return value of pm_raise_power can't be used
6702 		 * for determining if PM should be enabled for this device.
6703 		 * Even if you check the return values and remove this
6704 		 * property created above, the PM framework will not honor the
6705 		 * change after the first call to pm_raise_power. Hence,
6706 		 * removal of that property does not help if pm_raise_power
6707 		 * fails. In the case of removable media, the start/stop
6708 		 * will fail if the media is not present.
6709 		 */
6710 		if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
6711 		    SD_SPINDLE_ON) == DDI_SUCCESS)) {
6712 			mutex_enter(SD_MUTEX(un));
6713 			un->un_power_level = SD_SPINDLE_ON;
6714 			mutex_enter(&un->un_pm_mutex);
6715 			/* Set to on and not busy. */
6716 			un->un_pm_count = 0;
6717 		} else {
6718 			mutex_enter(SD_MUTEX(un));
6719 			un->un_power_level = SD_SPINDLE_OFF;
6720 			mutex_enter(&un->un_pm_mutex);
6721 			/* Set to off. */
6722 			un->un_pm_count = -1;
6723 		}
6724 		mutex_exit(&un->un_pm_mutex);
6725 		mutex_exit(SD_MUTEX(un));
6726 	} else {
6727 		un->un_power_level = SD_SPINDLE_ON;
6728 		un->un_f_pm_is_enabled = FALSE;
6729 	}
6730 }
6731 
6732 
6733 /*
6734  *    Function: sd_ddi_suspend
6735  *
6736  * Description: Performs system power-down operations. This includes
6737  *		setting the drive state to indicate its suspended so
6738  *		that no new commands will be accepted. Also, wait for
6739  *		all commands that are in transport or queued to a timer
6740  *		for retry to complete. All timeout threads are cancelled.
6741  *
6742  * Return Code: DDI_FAILURE or DDI_SUCCESS
6743  *
6744  *     Context: Kernel thread context
6745  */
6746 
6747 static int
6748 sd_ddi_suspend(dev_info_t *devi)
6749 {
6750 	struct	sd_lun	*un;
6751 	clock_t		wait_cmds_complete;
6752 
6753 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6754 	if (un == NULL) {
6755 		return (DDI_FAILURE);
6756 	}
6757 
6758 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
6759 
6760 	mutex_enter(SD_MUTEX(un));
6761 
6762 	/* Return success if the device is already suspended. */
6763 	if (un->un_state == SD_STATE_SUSPENDED) {
6764 		mutex_exit(SD_MUTEX(un));
6765 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6766 		    "device already suspended, exiting\n");
6767 		return (DDI_SUCCESS);
6768 	}
6769 
6770 	/* Return failure if the device is being used by HA */
6771 	if (un->un_resvd_status &
6772 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
6773 		mutex_exit(SD_MUTEX(un));
6774 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6775 		    "device in use by HA, exiting\n");
6776 		return (DDI_FAILURE);
6777 	}
6778 
6779 	/*
6780 	 * Return failure if the device is in a resource wait
6781 	 * or power changing state.
6782 	 */
6783 	if ((un->un_state == SD_STATE_RWAIT) ||
6784 	    (un->un_state == SD_STATE_PM_CHANGING)) {
6785 		mutex_exit(SD_MUTEX(un));
6786 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6787 		    "device in resource wait state, exiting\n");
6788 		return (DDI_FAILURE);
6789 	}
6790 
6791 
6792 	un->un_save_state = un->un_last_state;
6793 	New_state(un, SD_STATE_SUSPENDED);
6794 
6795 	/*
6796 	 * Wait for all commands that are in transport or queued to a timer
6797 	 * for retry to complete.
6798 	 *
6799 	 * While waiting, no new commands will be accepted or sent because of
6800 	 * the new state we set above.
6801 	 *
6802 	 * Wait till current operation has completed. If we are in the resource
6803 	 * wait state (with an intr outstanding) then we need to wait till the
6804 	 * intr completes and starts the next cmd. We want to wait for
6805 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
6806 	 */
6807 	wait_cmds_complete = ddi_get_lbolt() +
6808 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
6809 
6810 	while (un->un_ncmds_in_transport != 0) {
6811 		/*
6812 		 * Fail if commands do not finish in the specified time.
6813 		 */
6814 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
6815 		    wait_cmds_complete) == -1) {
6816 			/*
6817 			 * Undo the state changes made above. Everything
6818 			 * must go back to it's original value.
6819 			 */
6820 			Restore_state(un);
6821 			un->un_last_state = un->un_save_state;
6822 			/* Wake up any threads that might be waiting. */
6823 			cv_broadcast(&un->un_suspend_cv);
6824 			mutex_exit(SD_MUTEX(un));
6825 			SD_ERROR(SD_LOG_IO_PM, un,
6826 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
6827 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
6828 			return (DDI_FAILURE);
6829 		}
6830 	}
6831 
6832 	/*
6833 	 * Cancel SCSI watch thread and timeouts, if any are active
6834 	 */
6835 
6836 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
6837 		opaque_t temp_token = un->un_swr_token;
6838 		mutex_exit(SD_MUTEX(un));
6839 		scsi_watch_suspend(temp_token);
6840 		mutex_enter(SD_MUTEX(un));
6841 	}
6842 
6843 	if (un->un_reset_throttle_timeid != NULL) {
6844 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
6845 		un->un_reset_throttle_timeid = NULL;
6846 		mutex_exit(SD_MUTEX(un));
6847 		(void) untimeout(temp_id);
6848 		mutex_enter(SD_MUTEX(un));
6849 	}
6850 
6851 	if (un->un_dcvb_timeid != NULL) {
6852 		timeout_id_t temp_id = un->un_dcvb_timeid;
6853 		un->un_dcvb_timeid = NULL;
6854 		mutex_exit(SD_MUTEX(un));
6855 		(void) untimeout(temp_id);
6856 		mutex_enter(SD_MUTEX(un));
6857 	}
6858 
6859 	mutex_enter(&un->un_pm_mutex);
6860 	if (un->un_pm_timeid != NULL) {
6861 		timeout_id_t temp_id = un->un_pm_timeid;
6862 		un->un_pm_timeid = NULL;
6863 		mutex_exit(&un->un_pm_mutex);
6864 		mutex_exit(SD_MUTEX(un));
6865 		(void) untimeout(temp_id);
6866 		mutex_enter(SD_MUTEX(un));
6867 	} else {
6868 		mutex_exit(&un->un_pm_mutex);
6869 	}
6870 
6871 	if (un->un_retry_timeid != NULL) {
6872 		timeout_id_t temp_id = un->un_retry_timeid;
6873 		un->un_retry_timeid = NULL;
6874 		mutex_exit(SD_MUTEX(un));
6875 		(void) untimeout(temp_id);
6876 		mutex_enter(SD_MUTEX(un));
6877 	}
6878 
6879 	if (un->un_direct_priority_timeid != NULL) {
6880 		timeout_id_t temp_id = un->un_direct_priority_timeid;
6881 		un->un_direct_priority_timeid = NULL;
6882 		mutex_exit(SD_MUTEX(un));
6883 		(void) untimeout(temp_id);
6884 		mutex_enter(SD_MUTEX(un));
6885 	}
6886 
6887 	if (un->un_f_is_fibre == TRUE) {
6888 		/*
6889 		 * Remove callbacks for insert and remove events
6890 		 */
6891 		if (un->un_insert_event != NULL) {
6892 			mutex_exit(SD_MUTEX(un));
6893 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
6894 			mutex_enter(SD_MUTEX(un));
6895 			un->un_insert_event = NULL;
6896 		}
6897 
6898 		if (un->un_remove_event != NULL) {
6899 			mutex_exit(SD_MUTEX(un));
6900 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
6901 			mutex_enter(SD_MUTEX(un));
6902 			un->un_remove_event = NULL;
6903 		}
6904 	}
6905 
6906 	mutex_exit(SD_MUTEX(un));
6907 
6908 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
6909 
6910 	return (DDI_SUCCESS);
6911 }
6912 
6913 
6914 /*
6915  *    Function: sd_ddi_pm_suspend
6916  *
6917  * Description: Set the drive state to low power.
6918  *		Someone else is required to actually change the drive
6919  *		power level.
6920  *
6921  *   Arguments: un - driver soft state (unit) structure
6922  *
6923  * Return Code: DDI_FAILURE or DDI_SUCCESS
6924  *
6925  *     Context: Kernel thread context
6926  */
6927 
6928 static int
6929 sd_ddi_pm_suspend(struct sd_lun *un)
6930 {
6931 	ASSERT(un != NULL);
6932 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: entry\n");
6933 
6934 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6935 	mutex_enter(SD_MUTEX(un));
6936 
6937 	/*
6938 	 * Exit if power management is not enabled for this device, or if
6939 	 * the device is being used by HA.
6940 	 */
6941 	if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
6942 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
6943 		mutex_exit(SD_MUTEX(un));
6944 		SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exiting\n");
6945 		return (DDI_SUCCESS);
6946 	}
6947 
6948 	SD_INFO(SD_LOG_POWER, un, "sd_ddi_pm_suspend: un_ncmds_in_driver=%ld\n",
6949 	    un->un_ncmds_in_driver);
6950 
6951 	/*
6952 	 * See if the device is not busy, ie.:
6953 	 *    - we have no commands in the driver for this device
6954 	 *    - not waiting for resources
6955 	 */
6956 	if ((un->un_ncmds_in_driver == 0) &&
6957 	    (un->un_state != SD_STATE_RWAIT)) {
6958 		/*
6959 		 * The device is not busy, so it is OK to go to low power state.
6960 		 * Indicate low power, but rely on someone else to actually
6961 		 * change it.
6962 		 */
6963 		mutex_enter(&un->un_pm_mutex);
6964 		un->un_pm_count = -1;
6965 		mutex_exit(&un->un_pm_mutex);
6966 		un->un_power_level = SD_SPINDLE_OFF;
6967 	}
6968 
6969 	mutex_exit(SD_MUTEX(un));
6970 
6971 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exit\n");
6972 
6973 	return (DDI_SUCCESS);
6974 }
6975 
6976 
6977 /*
6978  *    Function: sd_ddi_resume
6979  *
6980  * Description: Performs system power-up operations..
6981  *
6982  * Return Code: DDI_SUCCESS
6983  *		DDI_FAILURE
6984  *
6985  *     Context: Kernel thread context
6986  */
6987 
6988 static int
6989 sd_ddi_resume(dev_info_t *devi)
6990 {
6991 	struct	sd_lun	*un;
6992 
6993 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6994 	if (un == NULL) {
6995 		return (DDI_FAILURE);
6996 	}
6997 
6998 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
6999 
7000 	mutex_enter(SD_MUTEX(un));
7001 	Restore_state(un);
7002 
7003 	/*
7004 	 * Restore the state which was saved to give the
7005 	 * the right state in un_last_state
7006 	 */
7007 	un->un_last_state = un->un_save_state;
7008 	/*
7009 	 * Note: throttle comes back at full.
7010 	 * Also note: this MUST be done before calling pm_raise_power
7011 	 * otherwise the system can get hung in biowait. The scenario where
7012 	 * this'll happen is under cpr suspend. Writing of the system
7013 	 * state goes through sddump, which writes 0 to un_throttle. If
7014 	 * writing the system state then fails, example if the partition is
7015 	 * too small, then cpr attempts a resume. If throttle isn't restored
7016 	 * from the saved value until after calling pm_raise_power then
7017 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
7018 	 * in biowait.
7019 	 */
7020 	un->un_throttle = un->un_saved_throttle;
7021 
7022 	/*
7023 	 * The chance of failure is very rare as the only command done in power
7024 	 * entry point is START command when you transition from 0->1 or
7025 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
7026 	 * which suspend was done. Ignore the return value as the resume should
7027 	 * not be failed. In the case of removable media the media need not be
7028 	 * inserted and hence there is a chance that raise power will fail with
7029 	 * media not present.
7030 	 */
7031 	if (un->un_f_attach_spinup) {
7032 		mutex_exit(SD_MUTEX(un));
7033 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
7034 		mutex_enter(SD_MUTEX(un));
7035 	}
7036 
7037 	/*
7038 	 * Don't broadcast to the suspend cv and therefore possibly
7039 	 * start I/O until after power has been restored.
7040 	 */
7041 	cv_broadcast(&un->un_suspend_cv);
7042 	cv_broadcast(&un->un_state_cv);
7043 
7044 	/* restart thread */
7045 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
7046 		scsi_watch_resume(un->un_swr_token);
7047 	}
7048 
7049 #if (defined(__fibre))
7050 	if (un->un_f_is_fibre == TRUE) {
7051 		/*
7052 		 * Add callbacks for insert and remove events
7053 		 */
7054 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
7055 			sd_init_event_callbacks(un);
7056 		}
7057 	}
7058 #endif
7059 
7060 	/*
7061 	 * Transport any pending commands to the target.
7062 	 *
7063 	 * If this is a low-activity device commands in queue will have to wait
7064 	 * until new commands come in, which may take awhile. Also, we
7065 	 * specifically don't check un_ncmds_in_transport because we know that
7066 	 * there really are no commands in progress after the unit was
7067 	 * suspended and we could have reached the throttle level, been
7068 	 * suspended, and have no new commands coming in for awhile. Highly
7069 	 * unlikely, but so is the low-activity disk scenario.
7070 	 */
7071 	ddi_xbuf_dispatch(un->un_xbuf_attr);
7072 
7073 	sd_start_cmds(un, NULL);
7074 	mutex_exit(SD_MUTEX(un));
7075 
7076 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
7077 
7078 	return (DDI_SUCCESS);
7079 }
7080 
7081 
7082 /*
7083  *    Function: sd_ddi_pm_resume
7084  *
7085  * Description: Set the drive state to powered on.
7086  *		Someone else is required to actually change the drive
7087  *		power level.
7088  *
7089  *   Arguments: un - driver soft state (unit) structure
7090  *
7091  * Return Code: DDI_SUCCESS
7092  *
7093  *     Context: Kernel thread context
7094  */
7095 
7096 static int
7097 sd_ddi_pm_resume(struct sd_lun *un)
7098 {
7099 	ASSERT(un != NULL);
7100 
7101 	ASSERT(!mutex_owned(SD_MUTEX(un)));
7102 	mutex_enter(SD_MUTEX(un));
7103 	un->un_power_level = SD_SPINDLE_ON;
7104 
7105 	ASSERT(!mutex_owned(&un->un_pm_mutex));
7106 	mutex_enter(&un->un_pm_mutex);
7107 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
7108 		un->un_pm_count++;
7109 		ASSERT(un->un_pm_count == 0);
7110 		/*
7111 		 * Note: no longer do the cv_broadcast on un_suspend_cv. The
7112 		 * un_suspend_cv is for a system resume, not a power management
7113 		 * device resume. (4297749)
7114 		 *	 cv_broadcast(&un->un_suspend_cv);
7115 		 */
7116 	}
7117 	mutex_exit(&un->un_pm_mutex);
7118 	mutex_exit(SD_MUTEX(un));
7119 
7120 	return (DDI_SUCCESS);
7121 }
7122 
7123 
7124 /*
7125  *    Function: sd_pm_idletimeout_handler
7126  *
7127  * Description: A timer routine that's active only while a device is busy.
7128  *		The purpose is to extend slightly the pm framework's busy
7129  *		view of the device to prevent busy/idle thrashing for
7130  *		back-to-back commands. Do this by comparing the current time
7131  *		to the time at which the last command completed and when the
7132  *		difference is greater than sd_pm_idletime, call
7133  *		pm_idle_component. In addition to indicating idle to the pm
7134  *		framework, update the chain type to again use the internal pm
7135  *		layers of the driver.
7136  *
7137  *   Arguments: arg - driver soft state (unit) structure
7138  *
7139  *     Context: Executes in a timeout(9F) thread context
7140  */
7141 
7142 static void
7143 sd_pm_idletimeout_handler(void *arg)
7144 {
7145 	struct sd_lun *un = arg;
7146 
7147 	time_t	now;
7148 
7149 	mutex_enter(&sd_detach_mutex);
7150 	if (un->un_detach_count != 0) {
7151 		/* Abort if the instance is detaching */
7152 		mutex_exit(&sd_detach_mutex);
7153 		return;
7154 	}
7155 	mutex_exit(&sd_detach_mutex);
7156 
7157 	now = ddi_get_time();
7158 	/*
7159 	 * Grab both mutexes, in the proper order, since we're accessing
7160 	 * both PM and softstate variables.
7161 	 */
7162 	mutex_enter(SD_MUTEX(un));
7163 	mutex_enter(&un->un_pm_mutex);
7164 	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
7165 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
7166 		/*
7167 		 * Update the chain types.
7168 		 * This takes affect on the next new command received.
7169 		 */
7170 		if (un->un_f_non_devbsize_supported) {
7171 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
7172 		} else {
7173 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
7174 		}
7175 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
7176 
7177 		SD_TRACE(SD_LOG_IO_PM, un,
7178 		    "sd_pm_idletimeout_handler: idling device\n");
7179 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7180 		un->un_pm_idle_timeid = NULL;
7181 	} else {
7182 		un->un_pm_idle_timeid =
7183 			timeout(sd_pm_idletimeout_handler, un,
7184 			(drv_usectohz((clock_t)300000))); /* 300 ms. */
7185 	}
7186 	mutex_exit(&un->un_pm_mutex);
7187 	mutex_exit(SD_MUTEX(un));
7188 }
7189 
7190 
7191 /*
7192  *    Function: sd_pm_timeout_handler
7193  *
7194  * Description: Callback to tell framework we are idle.
7195  *
7196  *     Context: timeout(9f) thread context.
7197  */
7198 
7199 static void
7200 sd_pm_timeout_handler(void *arg)
7201 {
7202 	struct sd_lun *un = arg;
7203 
7204 	(void) pm_idle_component(SD_DEVINFO(un), 0);
7205 	mutex_enter(&un->un_pm_mutex);
7206 	un->un_pm_timeid = NULL;
7207 	mutex_exit(&un->un_pm_mutex);
7208 }
7209 
7210 
7211 /*
7212  *    Function: sdpower
7213  *
7214  * Description: PM entry point.
7215  *
7216  * Return Code: DDI_SUCCESS
7217  *		DDI_FAILURE
7218  *
7219  *     Context: Kernel thread context
7220  */
7221 
7222 static int
7223 sdpower(dev_info_t *devi, int component, int level)
7224 {
7225 	struct sd_lun	*un;
7226 	int		instance;
7227 	int		rval = DDI_SUCCESS;
7228 	uint_t		i, log_page_size, maxcycles, ncycles;
7229 	uchar_t		*log_page_data;
7230 	int		log_sense_page;
7231 	int		medium_present;
7232 	time_t		intvlp;
7233 	dev_t		dev;
7234 	struct pm_trans_data	sd_pm_tran_data;
7235 	uchar_t		save_state;
7236 	int		sval;
7237 	uchar_t		state_before_pm;
7238 	int		got_semaphore_here;
7239 
7240 	instance = ddi_get_instance(devi);
7241 
7242 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
7243 	    (SD_SPINDLE_OFF > level) || (level > SD_SPINDLE_ON) ||
7244 	    component != 0) {
7245 		return (DDI_FAILURE);
7246 	}
7247 
7248 	dev = sd_make_device(SD_DEVINFO(un));
7249 
7250 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
7251 
7252 	/*
7253 	 * Must synchronize power down with close.
7254 	 * Attempt to decrement/acquire the open/close semaphore,
7255 	 * but do NOT wait on it. If it's not greater than zero,
7256 	 * ie. it can't be decremented without waiting, then
7257 	 * someone else, either open or close, already has it
7258 	 * and the try returns 0. Use that knowledge here to determine
7259 	 * if it's OK to change the device power level.
7260 	 * Also, only increment it on exit if it was decremented, ie. gotten,
7261 	 * here.
7262 	 */
7263 	got_semaphore_here = sema_tryp(&un->un_semoclose);
7264 
7265 	mutex_enter(SD_MUTEX(un));
7266 
7267 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
7268 	    un->un_ncmds_in_driver);
7269 
7270 	/*
7271 	 * If un_ncmds_in_driver is non-zero it indicates commands are
7272 	 * already being processed in the driver, or if the semaphore was
7273 	 * not gotten here it indicates an open or close is being processed.
7274 	 * At the same time somebody is requesting to go low power which
7275 	 * can't happen, therefore we need to return failure.
7276 	 */
7277 	if ((level == SD_SPINDLE_OFF) &&
7278 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
7279 		mutex_exit(SD_MUTEX(un));
7280 
7281 		if (got_semaphore_here != 0) {
7282 			sema_v(&un->un_semoclose);
7283 		}
7284 		SD_TRACE(SD_LOG_IO_PM, un,
7285 		    "sdpower: exit, device has queued cmds.\n");
7286 		return (DDI_FAILURE);
7287 	}
7288 
7289 	/*
7290 	 * if it is OFFLINE that means the disk is completely dead
7291 	 * in our case we have to put the disk in on or off by sending commands
7292 	 * Of course that will fail anyway so return back here.
7293 	 *
7294 	 * Power changes to a device that's OFFLINE or SUSPENDED
7295 	 * are not allowed.
7296 	 */
7297 	if ((un->un_state == SD_STATE_OFFLINE) ||
7298 	    (un->un_state == SD_STATE_SUSPENDED)) {
7299 		mutex_exit(SD_MUTEX(un));
7300 
7301 		if (got_semaphore_here != 0) {
7302 			sema_v(&un->un_semoclose);
7303 		}
7304 		SD_TRACE(SD_LOG_IO_PM, un,
7305 		    "sdpower: exit, device is off-line.\n");
7306 		return (DDI_FAILURE);
7307 	}
7308 
7309 	/*
7310 	 * Change the device's state to indicate it's power level
7311 	 * is being changed. Do this to prevent a power off in the
7312 	 * middle of commands, which is especially bad on devices
7313 	 * that are really powered off instead of just spun down.
7314 	 */
7315 	state_before_pm = un->un_state;
7316 	un->un_state = SD_STATE_PM_CHANGING;
7317 
7318 	mutex_exit(SD_MUTEX(un));
7319 
7320 	/*
7321 	 * If "pm-capable" property is set to TRUE by HBA drivers,
7322 	 * bypass the following checking, otherwise, check the log
7323 	 * sense information for this device
7324 	 */
7325 	if ((level == SD_SPINDLE_OFF) && un->un_f_log_sense_supported) {
7326 		/*
7327 		 * Get the log sense information to understand whether the
7328 		 * the powercycle counts have gone beyond the threshhold.
7329 		 */
7330 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
7331 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
7332 
7333 		mutex_enter(SD_MUTEX(un));
7334 		log_sense_page = un->un_start_stop_cycle_page;
7335 		mutex_exit(SD_MUTEX(un));
7336 
7337 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
7338 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
7339 #ifdef	SDDEBUG
7340 		if (sd_force_pm_supported) {
7341 			/* Force a successful result */
7342 			rval = 0;
7343 		}
7344 #endif
7345 		if (rval != 0) {
7346 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
7347 			    "Log Sense Failed\n");
7348 			kmem_free(log_page_data, log_page_size);
7349 			/* Cannot support power management on those drives */
7350 
7351 			if (got_semaphore_here != 0) {
7352 				sema_v(&un->un_semoclose);
7353 			}
7354 			/*
7355 			 * On exit put the state back to it's original value
7356 			 * and broadcast to anyone waiting for the power
7357 			 * change completion.
7358 			 */
7359 			mutex_enter(SD_MUTEX(un));
7360 			un->un_state = state_before_pm;
7361 			cv_broadcast(&un->un_suspend_cv);
7362 			mutex_exit(SD_MUTEX(un));
7363 			SD_TRACE(SD_LOG_IO_PM, un,
7364 			    "sdpower: exit, Log Sense Failed.\n");
7365 			return (DDI_FAILURE);
7366 		}
7367 
7368 		/*
7369 		 * From the page data - Convert the essential information to
7370 		 * pm_trans_data
7371 		 */
7372 		maxcycles =
7373 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
7374 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
7375 
7376 		sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
7377 
7378 		ncycles =
7379 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
7380 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
7381 
7382 		sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
7383 
7384 		for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
7385 			sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
7386 			    log_page_data[8+i];
7387 		}
7388 
7389 		kmem_free(log_page_data, log_page_size);
7390 
7391 		/*
7392 		 * Call pm_trans_check routine to get the Ok from
7393 		 * the global policy
7394 		 */
7395 
7396 		sd_pm_tran_data.format = DC_SCSI_FORMAT;
7397 		sd_pm_tran_data.un.scsi_cycles.flag = 0;
7398 
7399 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
7400 #ifdef	SDDEBUG
7401 		if (sd_force_pm_supported) {
7402 			/* Force a successful result */
7403 			rval = 1;
7404 		}
7405 #endif
7406 		switch (rval) {
7407 		case 0:
7408 			/*
7409 			 * Not Ok to Power cycle or error in parameters passed
7410 			 * Would have given the advised time to consider power
7411 			 * cycle. Based on the new intvlp parameter we are
7412 			 * supposed to pretend we are busy so that pm framework
7413 			 * will never call our power entry point. Because of
7414 			 * that install a timeout handler and wait for the
7415 			 * recommended time to elapse so that power management
7416 			 * can be effective again.
7417 			 *
7418 			 * To effect this behavior, call pm_busy_component to
7419 			 * indicate to the framework this device is busy.
7420 			 * By not adjusting un_pm_count the rest of PM in
7421 			 * the driver will function normally, and independant
7422 			 * of this but because the framework is told the device
7423 			 * is busy it won't attempt powering down until it gets
7424 			 * a matching idle. The timeout handler sends this.
7425 			 * Note: sd_pm_entry can't be called here to do this
7426 			 * because sdpower may have been called as a result
7427 			 * of a call to pm_raise_power from within sd_pm_entry.
7428 			 *
7429 			 * If a timeout handler is already active then
7430 			 * don't install another.
7431 			 */
7432 			mutex_enter(&un->un_pm_mutex);
7433 			if (un->un_pm_timeid == NULL) {
7434 				un->un_pm_timeid =
7435 				    timeout(sd_pm_timeout_handler,
7436 				    un, intvlp * drv_usectohz(1000000));
7437 				mutex_exit(&un->un_pm_mutex);
7438 				(void) pm_busy_component(SD_DEVINFO(un), 0);
7439 			} else {
7440 				mutex_exit(&un->un_pm_mutex);
7441 			}
7442 			if (got_semaphore_here != 0) {
7443 				sema_v(&un->un_semoclose);
7444 			}
7445 			/*
7446 			 * On exit put the state back to it's original value
7447 			 * and broadcast to anyone waiting for the power
7448 			 * change completion.
7449 			 */
7450 			mutex_enter(SD_MUTEX(un));
7451 			un->un_state = state_before_pm;
7452 			cv_broadcast(&un->un_suspend_cv);
7453 			mutex_exit(SD_MUTEX(un));
7454 
7455 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
7456 			    "trans check Failed, not ok to power cycle.\n");
7457 			return (DDI_FAILURE);
7458 
7459 		case -1:
7460 			if (got_semaphore_here != 0) {
7461 				sema_v(&un->un_semoclose);
7462 			}
7463 			/*
7464 			 * On exit put the state back to it's original value
7465 			 * and broadcast to anyone waiting for the power
7466 			 * change completion.
7467 			 */
7468 			mutex_enter(SD_MUTEX(un));
7469 			un->un_state = state_before_pm;
7470 			cv_broadcast(&un->un_suspend_cv);
7471 			mutex_exit(SD_MUTEX(un));
7472 			SD_TRACE(SD_LOG_IO_PM, un,
7473 			    "sdpower: exit, trans check command Failed.\n");
7474 			return (DDI_FAILURE);
7475 		}
7476 	}
7477 
7478 	if (level == SD_SPINDLE_OFF) {
7479 		/*
7480 		 * Save the last state... if the STOP FAILS we need it
7481 		 * for restoring
7482 		 */
7483 		mutex_enter(SD_MUTEX(un));
7484 		save_state = un->un_last_state;
7485 		/*
7486 		 * There must not be any cmds. getting processed
7487 		 * in the driver when we get here. Power to the
7488 		 * device is potentially going off.
7489 		 */
7490 		ASSERT(un->un_ncmds_in_driver == 0);
7491 		mutex_exit(SD_MUTEX(un));
7492 
7493 		/*
7494 		 * For now suspend the device completely before spindle is
7495 		 * turned off
7496 		 */
7497 		if ((rval = sd_ddi_pm_suspend(un)) == DDI_FAILURE) {
7498 			if (got_semaphore_here != 0) {
7499 				sema_v(&un->un_semoclose);
7500 			}
7501 			/*
7502 			 * On exit put the state back to it's original value
7503 			 * and broadcast to anyone waiting for the power
7504 			 * change completion.
7505 			 */
7506 			mutex_enter(SD_MUTEX(un));
7507 			un->un_state = state_before_pm;
7508 			cv_broadcast(&un->un_suspend_cv);
7509 			mutex_exit(SD_MUTEX(un));
7510 			SD_TRACE(SD_LOG_IO_PM, un,
7511 			    "sdpower: exit, PM suspend Failed.\n");
7512 			return (DDI_FAILURE);
7513 		}
7514 	}
7515 
7516 	/*
7517 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
7518 	 * close, or strategy. Dump no long uses this routine, it uses it's
7519 	 * own code so it can be done in polled mode.
7520 	 */
7521 
7522 	medium_present = TRUE;
7523 
7524 	/*
7525 	 * When powering up, issue a TUR in case the device is at unit
7526 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
7527 	 * a deadlock on un_pm_busy_cv will occur.
7528 	 */
7529 	if (level == SD_SPINDLE_ON) {
7530 		(void) sd_send_scsi_TEST_UNIT_READY(un,
7531 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
7532 	}
7533 
7534 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
7535 	    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
7536 
7537 	sval = sd_send_scsi_START_STOP_UNIT(un,
7538 	    ((level == SD_SPINDLE_ON) ? SD_TARGET_START : SD_TARGET_STOP),
7539 	    SD_PATH_DIRECT);
7540 	/* Command failed, check for media present. */
7541 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
7542 		medium_present = FALSE;
7543 	}
7544 
7545 	/*
7546 	 * The conditions of interest here are:
7547 	 *   if a spindle off with media present fails,
7548 	 *	then restore the state and return an error.
7549 	 *   else if a spindle on fails,
7550 	 *	then return an error (there's no state to restore).
7551 	 * In all other cases we setup for the new state
7552 	 * and return success.
7553 	 */
7554 	switch (level) {
7555 	case SD_SPINDLE_OFF:
7556 		if ((medium_present == TRUE) && (sval != 0)) {
7557 			/* The stop command from above failed */
7558 			rval = DDI_FAILURE;
7559 			/*
7560 			 * The stop command failed, and we have media
7561 			 * present. Put the level back by calling the
7562 			 * sd_pm_resume() and set the state back to
7563 			 * it's previous value.
7564 			 */
7565 			(void) sd_ddi_pm_resume(un);
7566 			mutex_enter(SD_MUTEX(un));
7567 			un->un_last_state = save_state;
7568 			mutex_exit(SD_MUTEX(un));
7569 			break;
7570 		}
7571 		/*
7572 		 * The stop command from above succeeded.
7573 		 */
7574 		if (un->un_f_monitor_media_state) {
7575 			/*
7576 			 * Terminate watch thread in case of removable media
7577 			 * devices going into low power state. This is as per
7578 			 * the requirements of pm framework, otherwise commands
7579 			 * will be generated for the device (through watch
7580 			 * thread), even when the device is in low power state.
7581 			 */
7582 			mutex_enter(SD_MUTEX(un));
7583 			un->un_f_watcht_stopped = FALSE;
7584 			if (un->un_swr_token != NULL) {
7585 				opaque_t temp_token = un->un_swr_token;
7586 				un->un_f_watcht_stopped = TRUE;
7587 				un->un_swr_token = NULL;
7588 				mutex_exit(SD_MUTEX(un));
7589 				(void) scsi_watch_request_terminate(temp_token,
7590 				    SCSI_WATCH_TERMINATE_WAIT);
7591 			} else {
7592 				mutex_exit(SD_MUTEX(un));
7593 			}
7594 		}
7595 		break;
7596 
7597 	default:	/* The level requested is spindle on... */
7598 		/*
7599 		 * Legacy behavior: return success on a failed spinup
7600 		 * if there is no media in the drive.
7601 		 * Do this by looking at medium_present here.
7602 		 */
7603 		if ((sval != 0) && medium_present) {
7604 			/* The start command from above failed */
7605 			rval = DDI_FAILURE;
7606 			break;
7607 		}
7608 		/*
7609 		 * The start command from above succeeded
7610 		 * Resume the devices now that we have
7611 		 * started the disks
7612 		 */
7613 		(void) sd_ddi_pm_resume(un);
7614 
7615 		/*
7616 		 * Resume the watch thread since it was suspended
7617 		 * when the device went into low power mode.
7618 		 */
7619 		if (un->un_f_monitor_media_state) {
7620 			mutex_enter(SD_MUTEX(un));
7621 			if (un->un_f_watcht_stopped == TRUE) {
7622 				opaque_t temp_token;
7623 
7624 				un->un_f_watcht_stopped = FALSE;
7625 				mutex_exit(SD_MUTEX(un));
7626 				temp_token = scsi_watch_request_submit(
7627 				    SD_SCSI_DEVP(un),
7628 				    sd_check_media_time,
7629 				    SENSE_LENGTH, sd_media_watch_cb,
7630 				    (caddr_t)dev);
7631 				mutex_enter(SD_MUTEX(un));
7632 				un->un_swr_token = temp_token;
7633 			}
7634 			mutex_exit(SD_MUTEX(un));
7635 		}
7636 	}
7637 	if (got_semaphore_here != 0) {
7638 		sema_v(&un->un_semoclose);
7639 	}
7640 	/*
7641 	 * On exit put the state back to it's original value
7642 	 * and broadcast to anyone waiting for the power
7643 	 * change completion.
7644 	 */
7645 	mutex_enter(SD_MUTEX(un));
7646 	un->un_state = state_before_pm;
7647 	cv_broadcast(&un->un_suspend_cv);
7648 	mutex_exit(SD_MUTEX(un));
7649 
7650 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
7651 
7652 	return (rval);
7653 }
7654 
7655 
7656 
7657 /*
7658  *    Function: sdattach
7659  *
7660  * Description: Driver's attach(9e) entry point function.
7661  *
7662  *   Arguments: devi - opaque device info handle
7663  *		cmd  - attach  type
7664  *
7665  * Return Code: DDI_SUCCESS
7666  *		DDI_FAILURE
7667  *
7668  *     Context: Kernel thread context
7669  */
7670 
7671 static int
7672 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
7673 {
7674 	switch (cmd) {
7675 	case DDI_ATTACH:
7676 		return (sd_unit_attach(devi));
7677 	case DDI_RESUME:
7678 		return (sd_ddi_resume(devi));
7679 	default:
7680 		break;
7681 	}
7682 	return (DDI_FAILURE);
7683 }
7684 
7685 
7686 /*
7687  *    Function: sddetach
7688  *
7689  * Description: Driver's detach(9E) entry point function.
7690  *
7691  *   Arguments: devi - opaque device info handle
7692  *		cmd  - detach  type
7693  *
7694  * Return Code: DDI_SUCCESS
7695  *		DDI_FAILURE
7696  *
7697  *     Context: Kernel thread context
7698  */
7699 
7700 static int
7701 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
7702 {
7703 	switch (cmd) {
7704 	case DDI_DETACH:
7705 		return (sd_unit_detach(devi));
7706 	case DDI_SUSPEND:
7707 		return (sd_ddi_suspend(devi));
7708 	default:
7709 		break;
7710 	}
7711 	return (DDI_FAILURE);
7712 }
7713 
7714 
7715 /*
7716  *     Function: sd_sync_with_callback
7717  *
7718  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
7719  *		 state while the callback routine is active.
7720  *
7721  *    Arguments: un: softstate structure for the instance
7722  *
7723  *	Context: Kernel thread context
7724  */
7725 
7726 static void
7727 sd_sync_with_callback(struct sd_lun *un)
7728 {
7729 	ASSERT(un != NULL);
7730 
7731 	mutex_enter(SD_MUTEX(un));
7732 
7733 	ASSERT(un->un_in_callback >= 0);
7734 
7735 	while (un->un_in_callback > 0) {
7736 		mutex_exit(SD_MUTEX(un));
7737 		delay(2);
7738 		mutex_enter(SD_MUTEX(un));
7739 	}
7740 
7741 	mutex_exit(SD_MUTEX(un));
7742 }
7743 
7744 /*
7745  *    Function: sd_unit_attach
7746  *
7747  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
7748  *		the soft state structure for the device and performs
7749  *		all necessary structure and device initializations.
7750  *
7751  *   Arguments: devi: the system's dev_info_t for the device.
7752  *
7753  * Return Code: DDI_SUCCESS if attach is successful.
7754  *		DDI_FAILURE if any part of the attach fails.
7755  *
7756  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
7757  *		Kernel thread context only.  Can sleep.
7758  */
7759 
7760 static int
7761 sd_unit_attach(dev_info_t *devi)
7762 {
7763 	struct	scsi_device	*devp;
7764 	struct	sd_lun		*un;
7765 	char			*variantp;
7766 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
7767 	int	instance;
7768 	int	rval;
7769 	int	wc_enabled;
7770 	uint64_t	capacity;
7771 	uint_t		lbasize;
7772 
7773 	/*
7774 	 * Retrieve the target driver's private data area. This was set
7775 	 * up by the HBA.
7776 	 */
7777 	devp = ddi_get_driver_private(devi);
7778 
7779 	/*
7780 	 * Since we have no idea what state things were left in by the last
7781 	 * user of the device, set up some 'default' settings, ie. turn 'em
7782 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
7783 	 * Do this before the scsi_probe, which sends an inquiry.
7784 	 * This is a fix for bug (4430280).
7785 	 * Of special importance is wide-xfer. The drive could have been left
7786 	 * in wide transfer mode by the last driver to communicate with it,
7787 	 * this includes us. If that's the case, and if the following is not
7788 	 * setup properly or we don't re-negotiate with the drive prior to
7789 	 * transferring data to/from the drive, it causes bus parity errors,
7790 	 * data overruns, and unexpected interrupts. This first occurred when
7791 	 * the fix for bug (4378686) was made.
7792 	 */
7793 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
7794 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
7795 	(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
7796 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
7797 
7798 	/*
7799 	 * Use scsi_probe() to issue an INQUIRY command to the device.
7800 	 * This call will allocate and fill in the scsi_inquiry structure
7801 	 * and point the sd_inq member of the scsi_device structure to it.
7802 	 * If the attach succeeds, then this memory will not be de-allocated
7803 	 * (via scsi_unprobe()) until the instance is detached.
7804 	 */
7805 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
7806 		goto probe_failed;
7807 	}
7808 
7809 	/*
7810 	 * Check the device type as specified in the inquiry data and
7811 	 * claim it if it is of a type that we support.
7812 	 */
7813 	switch (devp->sd_inq->inq_dtype) {
7814 	case DTYPE_DIRECT:
7815 		break;
7816 	case DTYPE_RODIRECT:
7817 		break;
7818 	case DTYPE_OPTICAL:
7819 		break;
7820 	case DTYPE_NOTPRESENT:
7821 	default:
7822 		/* Unsupported device type; fail the attach. */
7823 		goto probe_failed;
7824 	}
7825 
7826 	/*
7827 	 * Allocate the soft state structure for this unit.
7828 	 *
7829 	 * We rely upon this memory being set to all zeroes by
7830 	 * ddi_soft_state_zalloc().  We assume that any member of the
7831 	 * soft state structure that is not explicitly initialized by
7832 	 * this routine will have a value of zero.
7833 	 */
7834 	instance = ddi_get_instance(devp->sd_dev);
7835 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
7836 		goto probe_failed;
7837 	}
7838 
7839 	/*
7840 	 * Retrieve a pointer to the newly-allocated soft state.
7841 	 *
7842 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
7843 	 * was successful, unless something has gone horribly wrong and the
7844 	 * ddi's soft state internals are corrupt (in which case it is
7845 	 * probably better to halt here than just fail the attach....)
7846 	 */
7847 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
7848 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
7849 		    instance);
7850 		/*NOTREACHED*/
7851 	}
7852 
7853 	/*
7854 	 * Link the back ptr of the driver soft state to the scsi_device
7855 	 * struct for this lun.
7856 	 * Save a pointer to the softstate in the driver-private area of
7857 	 * the scsi_device struct.
7858 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
7859 	 * we first set un->un_sd below.
7860 	 */
7861 	un->un_sd = devp;
7862 	devp->sd_private = (opaque_t)un;
7863 
7864 	/*
7865 	 * The following must be after devp is stored in the soft state struct.
7866 	 */
7867 #ifdef SDDEBUG
7868 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7869 	    "%s_unit_attach: un:0x%p instance:%d\n",
7870 	    ddi_driver_name(devi), un, instance);
7871 #endif
7872 
7873 	/*
7874 	 * Set up the device type and node type (for the minor nodes).
7875 	 * By default we assume that the device can at least support the
7876 	 * Common Command Set. Call it a CD-ROM if it reports itself
7877 	 * as a RODIRECT device.
7878 	 */
7879 	switch (devp->sd_inq->inq_dtype) {
7880 	case DTYPE_RODIRECT:
7881 		un->un_node_type = DDI_NT_CD_CHAN;
7882 		un->un_ctype	 = CTYPE_CDROM;
7883 		break;
7884 	case DTYPE_OPTICAL:
7885 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7886 		un->un_ctype	 = CTYPE_ROD;
7887 		break;
7888 	default:
7889 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7890 		un->un_ctype	 = CTYPE_CCS;
7891 		break;
7892 	}
7893 
7894 	/*
7895 	 * Try to read the interconnect type from the HBA.
7896 	 *
7897 	 * Note: This driver is currently compiled as two binaries, a parallel
7898 	 * scsi version (sd) and a fibre channel version (ssd). All functional
7899 	 * differences are determined at compile time. In the future a single
7900 	 * binary will be provided and the inteconnect type will be used to
7901 	 * differentiate between fibre and parallel scsi behaviors. At that time
7902 	 * it will be necessary for all fibre channel HBAs to support this
7903 	 * property.
7904 	 *
7905 	 * set un_f_is_fiber to TRUE ( default fiber )
7906 	 */
7907 	un->un_f_is_fibre = TRUE;
7908 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
7909 	case INTERCONNECT_SSA:
7910 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
7911 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7912 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
7913 		break;
7914 	case INTERCONNECT_PARALLEL:
7915 		un->un_f_is_fibre = FALSE;
7916 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7917 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7918 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
7919 		break;
7920 	case INTERCONNECT_SATA:
7921 		un->un_f_is_fibre = FALSE;
7922 		un->un_interconnect_type = SD_INTERCONNECT_SATA;
7923 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7924 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
7925 		break;
7926 	case INTERCONNECT_FIBRE:
7927 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
7928 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7929 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
7930 		break;
7931 	case INTERCONNECT_FABRIC:
7932 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
7933 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
7934 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7935 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
7936 		break;
7937 	default:
7938 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
7939 		/*
7940 		 * The HBA does not support the "interconnect-type" property
7941 		 * (or did not provide a recognized type).
7942 		 *
7943 		 * Note: This will be obsoleted when a single fibre channel
7944 		 * and parallel scsi driver is delivered. In the meantime the
7945 		 * interconnect type will be set to the platform default.If that
7946 		 * type is not parallel SCSI, it means that we should be
7947 		 * assuming "ssd" semantics. However, here this also means that
7948 		 * the FC HBA is not supporting the "interconnect-type" property
7949 		 * like we expect it to, so log this occurrence.
7950 		 */
7951 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
7952 		if (!SD_IS_PARALLEL_SCSI(un)) {
7953 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7954 			    "sd_unit_attach: un:0x%p Assuming "
7955 			    "INTERCONNECT_FIBRE\n", un);
7956 		} else {
7957 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7958 			    "sd_unit_attach: un:0x%p Assuming "
7959 			    "INTERCONNECT_PARALLEL\n", un);
7960 			un->un_f_is_fibre = FALSE;
7961 		}
7962 #else
7963 		/*
7964 		 * Note: This source will be implemented when a single fibre
7965 		 * channel and parallel scsi driver is delivered. The default
7966 		 * will be to assume that if a device does not support the
7967 		 * "interconnect-type" property it is a parallel SCSI HBA and
7968 		 * we will set the interconnect type for parallel scsi.
7969 		 */
7970 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7971 		un->un_f_is_fibre = FALSE;
7972 #endif
7973 		break;
7974 	}
7975 
7976 	if (un->un_f_is_fibre == TRUE) {
7977 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
7978 			SCSI_VERSION_3) {
7979 			switch (un->un_interconnect_type) {
7980 			case SD_INTERCONNECT_FIBRE:
7981 			case SD_INTERCONNECT_SSA:
7982 				un->un_node_type = DDI_NT_BLOCK_WWN;
7983 				break;
7984 			default:
7985 				break;
7986 			}
7987 		}
7988 	}
7989 
7990 	/*
7991 	 * Initialize the Request Sense command for the target
7992 	 */
7993 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
7994 		goto alloc_rqs_failed;
7995 	}
7996 
7997 	/*
7998 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
7999 	 * with seperate binary for sd and ssd.
8000 	 *
8001 	 * x86 has 1 binary, un_retry_count is set base on connection type.
8002 	 * The hardcoded values will go away when Sparc uses 1 binary
8003 	 * for sd and ssd.  This hardcoded values need to match
8004 	 * SD_RETRY_COUNT in sddef.h
8005 	 * The value used is base on interconnect type.
8006 	 * fibre = 3, parallel = 5
8007 	 */
8008 #if defined(__i386) || defined(__amd64)
8009 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
8010 #else
8011 	un->un_retry_count = SD_RETRY_COUNT;
8012 #endif
8013 
8014 	/*
8015 	 * Set the per disk retry count to the default number of retries
8016 	 * for disks and CDROMs. This value can be overridden by the
8017 	 * disk property list or an entry in sd.conf.
8018 	 */
8019 	un->un_notready_retry_count =
8020 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
8021 			: DISK_NOT_READY_RETRY_COUNT(un);
8022 
8023 	/*
8024 	 * Set the busy retry count to the default value of un_retry_count.
8025 	 * This can be overridden by entries in sd.conf or the device
8026 	 * config table.
8027 	 */
8028 	un->un_busy_retry_count = un->un_retry_count;
8029 
8030 	/*
8031 	 * Init the reset threshold for retries.  This number determines
8032 	 * how many retries must be performed before a reset can be issued
8033 	 * (for certain error conditions). This can be overridden by entries
8034 	 * in sd.conf or the device config table.
8035 	 */
8036 	un->un_reset_retry_count = (un->un_retry_count / 2);
8037 
8038 	/*
8039 	 * Set the victim_retry_count to the default un_retry_count
8040 	 */
8041 	un->un_victim_retry_count = (2 * un->un_retry_count);
8042 
8043 	/*
8044 	 * Set the reservation release timeout to the default value of
8045 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
8046 	 * device config table.
8047 	 */
8048 	un->un_reserve_release_time = 5;
8049 
8050 	/*
8051 	 * Set up the default maximum transfer size. Note that this may
8052 	 * get updated later in the attach, when setting up default wide
8053 	 * operations for disks.
8054 	 */
8055 #if defined(__i386) || defined(__amd64)
8056 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
8057 #else
8058 	un->un_max_xfer_size = (uint_t)maxphys;
8059 #endif
8060 
8061 	/*
8062 	 * Get "allow bus device reset" property (defaults to "enabled" if
8063 	 * the property was not defined). This is to disable bus resets for
8064 	 * certain kinds of error recovery. Note: In the future when a run-time
8065 	 * fibre check is available the soft state flag should default to
8066 	 * enabled.
8067 	 */
8068 	if (un->un_f_is_fibre == TRUE) {
8069 		un->un_f_allow_bus_device_reset = TRUE;
8070 	} else {
8071 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
8072 			"allow-bus-device-reset", 1) != 0) {
8073 			un->un_f_allow_bus_device_reset = TRUE;
8074 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8075 			"sd_unit_attach: un:0x%p Bus device reset enabled\n",
8076 				un);
8077 		} else {
8078 			un->un_f_allow_bus_device_reset = FALSE;
8079 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8080 			"sd_unit_attach: un:0x%p Bus device reset disabled\n",
8081 				un);
8082 		}
8083 	}
8084 
8085 	/*
8086 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
8087 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
8088 	 *
8089 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
8090 	 * property. The new "variant" property with a value of "atapi" has been
8091 	 * introduced so that future 'variants' of standard SCSI behavior (like
8092 	 * atapi) could be specified by the underlying HBA drivers by supplying
8093 	 * a new value for the "variant" property, instead of having to define a
8094 	 * new property.
8095 	 */
8096 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
8097 		un->un_f_cfg_is_atapi = TRUE;
8098 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8099 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
8100 	}
8101 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
8102 	    &variantp) == DDI_PROP_SUCCESS) {
8103 		if (strcmp(variantp, "atapi") == 0) {
8104 			un->un_f_cfg_is_atapi = TRUE;
8105 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8106 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
8107 		}
8108 		ddi_prop_free(variantp);
8109 	}
8110 
8111 	un->un_cmd_timeout	= SD_IO_TIME;
8112 
8113 	/* Info on current states, statuses, etc. (Updated frequently) */
8114 	un->un_state		= SD_STATE_NORMAL;
8115 	un->un_last_state	= SD_STATE_NORMAL;
8116 
8117 	/* Control & status info for command throttling */
8118 	un->un_throttle		= sd_max_throttle;
8119 	un->un_saved_throttle	= sd_max_throttle;
8120 	un->un_min_throttle	= sd_min_throttle;
8121 
8122 	if (un->un_f_is_fibre == TRUE) {
8123 		un->un_f_use_adaptive_throttle = TRUE;
8124 	} else {
8125 		un->un_f_use_adaptive_throttle = FALSE;
8126 	}
8127 
8128 	/* Removable media support. */
8129 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
8130 	un->un_mediastate		= DKIO_NONE;
8131 	un->un_specified_mediastate	= DKIO_NONE;
8132 
8133 	/* CVs for suspend/resume (PM or DR) */
8134 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
8135 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
8136 
8137 	/* Power management support. */
8138 	un->un_power_level = SD_SPINDLE_UNINIT;
8139 
8140 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
8141 	un->un_f_wcc_inprog = 0;
8142 
8143 	/*
8144 	 * The open/close semaphore is used to serialize threads executing
8145 	 * in the driver's open & close entry point routines for a given
8146 	 * instance.
8147 	 */
8148 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
8149 
8150 	/*
8151 	 * The conf file entry and softstate variable is a forceful override,
8152 	 * meaning a non-zero value must be entered to change the default.
8153 	 */
8154 	un->un_f_disksort_disabled = FALSE;
8155 
8156 	/*
8157 	 * Retrieve the properties from the static driver table or the driver
8158 	 * configuration file (.conf) for this unit and update the soft state
8159 	 * for the device as needed for the indicated properties.
8160 	 * Note: the property configuration needs to occur here as some of the
8161 	 * following routines may have dependancies on soft state flags set
8162 	 * as part of the driver property configuration.
8163 	 */
8164 	sd_read_unit_properties(un);
8165 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8166 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
8167 
8168 	/*
8169 	 * Only if a device has "hotpluggable" property, it is
8170 	 * treated as hotpluggable device. Otherwise, it is
8171 	 * regarded as non-hotpluggable one.
8172 	 */
8173 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
8174 	    -1) != -1) {
8175 		un->un_f_is_hotpluggable = TRUE;
8176 	}
8177 
8178 	/*
8179 	 * set unit's attributes(flags) according to "hotpluggable" and
8180 	 * RMB bit in INQUIRY data.
8181 	 */
8182 	sd_set_unit_attributes(un, devi);
8183 
8184 	/*
8185 	 * By default, we mark the capacity, lbasize, and geometry
8186 	 * as invalid. Only if we successfully read a valid capacity
8187 	 * will we update the un_blockcount and un_tgt_blocksize with the
8188 	 * valid values (the geometry will be validated later).
8189 	 */
8190 	un->un_f_blockcount_is_valid	= FALSE;
8191 	un->un_f_tgt_blocksize_is_valid	= FALSE;
8192 	un->un_f_geometry_is_valid	= FALSE;
8193 
8194 	/*
8195 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
8196 	 * otherwise.
8197 	 */
8198 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
8199 	un->un_blockcount = 0;
8200 
8201 	/*
8202 	 * Set up the per-instance info needed to determine the correct
8203 	 * CDBs and other info for issuing commands to the target.
8204 	 */
8205 	sd_init_cdb_limits(un);
8206 
8207 	/*
8208 	 * Set up the IO chains to use, based upon the target type.
8209 	 */
8210 	if (un->un_f_non_devbsize_supported) {
8211 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
8212 	} else {
8213 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
8214 	}
8215 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
8216 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
8217 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
8218 
8219 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
8220 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
8221 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
8222 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
8223 
8224 
8225 	if (ISCD(un)) {
8226 		un->un_additional_codes = sd_additional_codes;
8227 	} else {
8228 		un->un_additional_codes = NULL;
8229 	}
8230 
8231 	/*
8232 	 * Create the kstats here so they can be available for attach-time
8233 	 * routines that send commands to the unit (either polled or via
8234 	 * sd_send_scsi_cmd).
8235 	 *
8236 	 * Note: This is a critical sequence that needs to be maintained:
8237 	 *	1) Instantiate the kstats here, before any routines using the
8238 	 *	   iopath (i.e. sd_send_scsi_cmd).
8239 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8240 	 *	   stats (sd_set_pstats), following sd_validate_geometry(),
8241 	 *	   sd_register_devid(), and sd_cache_control().
8242 	 */
8243 
8244 	un->un_stats = kstat_create(sd_label, instance,
8245 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
8246 	if (un->un_stats != NULL) {
8247 		un->un_stats->ks_lock = SD_MUTEX(un);
8248 		kstat_install(un->un_stats);
8249 	}
8250 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8251 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
8252 
8253 	sd_create_errstats(un, instance);
8254 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8255 	    "sd_unit_attach: un:0x%p errstats created\n", un);
8256 
8257 	/*
8258 	 * The following if/else code was relocated here from below as part
8259 	 * of the fix for bug (4430280). However with the default setup added
8260 	 * on entry to this routine, it's no longer absolutely necessary for
8261 	 * this to be before the call to sd_spin_up_unit.
8262 	 */
8263 	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
8264 		/*
8265 		 * If SCSI-2 tagged queueing is supported by the target
8266 		 * and by the host adapter then we will enable it.
8267 		 */
8268 		un->un_tagflags = 0;
8269 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8270 		    (devp->sd_inq->inq_cmdque) &&
8271 		    (un->un_f_arq_enabled == TRUE)) {
8272 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
8273 			    1, 1) == 1) {
8274 				un->un_tagflags = FLAG_STAG;
8275 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8276 				    "sd_unit_attach: un:0x%p tag queueing "
8277 				    "enabled\n", un);
8278 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
8279 			    "untagged-qing", 0) == 1) {
8280 				un->un_f_opt_queueing = TRUE;
8281 				un->un_saved_throttle = un->un_throttle =
8282 				    min(un->un_throttle, 3);
8283 			} else {
8284 				un->un_f_opt_queueing = FALSE;
8285 				un->un_saved_throttle = un->un_throttle = 1;
8286 			}
8287 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
8288 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
8289 			/* The Host Adapter supports internal queueing. */
8290 			un->un_f_opt_queueing = TRUE;
8291 			un->un_saved_throttle = un->un_throttle =
8292 			    min(un->un_throttle, 3);
8293 		} else {
8294 			un->un_f_opt_queueing = FALSE;
8295 			un->un_saved_throttle = un->un_throttle = 1;
8296 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8297 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
8298 		}
8299 
8300 		/*
8301 		 * Enable large transfers for SATA/SAS drives
8302 		 */
8303 		if (SD_IS_SERIAL(un)) {
8304 			un->un_max_xfer_size =
8305 			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8306 			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
8307 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8308 			    "sd_unit_attach: un:0x%p max transfer "
8309 			    "size=0x%x\n", un, un->un_max_xfer_size);
8310 
8311 		}
8312 
8313 		/* Setup or tear down default wide operations for disks */
8314 
8315 		/*
8316 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
8317 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
8318 		 * system and be set to different values. In the future this
8319 		 * code may need to be updated when the ssd module is
8320 		 * obsoleted and removed from the system. (4299588)
8321 		 */
8322 		if (SD_IS_PARALLEL_SCSI(un) &&
8323 		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8324 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
8325 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8326 			    1, 1) == 1) {
8327 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8328 				    "sd_unit_attach: un:0x%p Wide Transfer "
8329 				    "enabled\n", un);
8330 			}
8331 
8332 			/*
8333 			 * If tagged queuing has also been enabled, then
8334 			 * enable large xfers
8335 			 */
8336 			if (un->un_saved_throttle == sd_max_throttle) {
8337 				un->un_max_xfer_size =
8338 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8339 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
8340 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8341 				    "sd_unit_attach: un:0x%p max transfer "
8342 				    "size=0x%x\n", un, un->un_max_xfer_size);
8343 			}
8344 		} else {
8345 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8346 			    0, 1) == 1) {
8347 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8348 				    "sd_unit_attach: un:0x%p "
8349 				    "Wide Transfer disabled\n", un);
8350 			}
8351 		}
8352 	} else {
8353 		un->un_tagflags = FLAG_STAG;
8354 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
8355 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
8356 	}
8357 
8358 	/*
8359 	 * If this target supports LUN reset, try to enable it.
8360 	 */
8361 	if (un->un_f_lun_reset_enabled) {
8362 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
8363 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8364 			    "un:0x%p lun_reset capability set\n", un);
8365 		} else {
8366 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8367 			    "un:0x%p lun-reset capability not set\n", un);
8368 		}
8369 	}
8370 
8371 	/*
8372 	 * At this point in the attach, we have enough info in the
8373 	 * soft state to be able to issue commands to the target.
8374 	 *
8375 	 * All command paths used below MUST issue their commands as
8376 	 * SD_PATH_DIRECT. This is important as intermediate layers
8377 	 * are not all initialized yet (such as PM).
8378 	 */
8379 
8380 	/*
8381 	 * Send a TEST UNIT READY command to the device. This should clear
8382 	 * any outstanding UNIT ATTENTION that may be present.
8383 	 *
8384 	 * Note: Don't check for success, just track if there is a reservation,
8385 	 * this is a throw away command to clear any unit attentions.
8386 	 *
8387 	 * Note: This MUST be the first command issued to the target during
8388 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
8389 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
8390 	 * with attempts at spinning up a device with no media.
8391 	 */
8392 	if (sd_send_scsi_TEST_UNIT_READY(un, SD_DONT_RETRY_TUR) == EACCES) {
8393 		reservation_flag = SD_TARGET_IS_RESERVED;
8394 	}
8395 
8396 	/*
8397 	 * If the device is NOT a removable media device, attempt to spin
8398 	 * it up (using the START_STOP_UNIT command) and read its capacity
8399 	 * (using the READ CAPACITY command).  Note, however, that either
8400 	 * of these could fail and in some cases we would continue with
8401 	 * the attach despite the failure (see below).
8402 	 */
8403 	if (un->un_f_descr_format_supported) {
8404 		switch (sd_spin_up_unit(un)) {
8405 		case 0:
8406 			/*
8407 			 * Spin-up was successful; now try to read the
8408 			 * capacity.  If successful then save the results
8409 			 * and mark the capacity & lbasize as valid.
8410 			 */
8411 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8412 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
8413 
8414 			switch (sd_send_scsi_READ_CAPACITY(un, &capacity,
8415 			    &lbasize, SD_PATH_DIRECT)) {
8416 			case 0: {
8417 				if (capacity > DK_MAX_BLOCKS) {
8418 #ifdef _LP64
8419 					if (capacity + 1 >
8420 					    SD_GROUP1_MAX_ADDRESS) {
8421 						/*
8422 						 * Enable descriptor format
8423 						 * sense data so that we can
8424 						 * get 64 bit sense data
8425 						 * fields.
8426 						 */
8427 						sd_enable_descr_sense(un);
8428 					}
8429 #else
8430 					/* 32-bit kernels can't handle this */
8431 					scsi_log(SD_DEVINFO(un),
8432 					    sd_label, CE_WARN,
8433 					    "disk has %llu blocks, which "
8434 					    "is too large for a 32-bit "
8435 					    "kernel", capacity);
8436 
8437 #if defined(__i386) || defined(__amd64)
8438 					/*
8439 					 * Refer to comments related to off-by-1
8440 					 * at the header of this file.
8441 					 * 1TB disk was treated as (1T - 512)B
8442 					 * in the past, so that it might has
8443 					 * valid VTOC and solaris partitions,
8444 					 * we have to allow it to continue to
8445 					 * work.
8446 					 */
8447 					if (capacity -1 > DK_MAX_BLOCKS)
8448 #endif
8449 					goto spinup_failed;
8450 #endif
8451 				}
8452 
8453 				/*
8454 				 * Here it's not necessary to check the case:
8455 				 * the capacity of the device is bigger than
8456 				 * what the max hba cdb can support. Because
8457 				 * sd_send_scsi_READ_CAPACITY will retrieve
8458 				 * the capacity by sending USCSI command, which
8459 				 * is constrained by the max hba cdb. Actually,
8460 				 * sd_send_scsi_READ_CAPACITY will return
8461 				 * EINVAL when using bigger cdb than required
8462 				 * cdb length. Will handle this case in
8463 				 * "case EINVAL".
8464 				 */
8465 
8466 				/*
8467 				 * The following relies on
8468 				 * sd_send_scsi_READ_CAPACITY never
8469 				 * returning 0 for capacity and/or lbasize.
8470 				 */
8471 				sd_update_block_info(un, lbasize, capacity);
8472 
8473 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8474 				    "sd_unit_attach: un:0x%p capacity = %ld "
8475 				    "blocks; lbasize= %ld.\n", un,
8476 				    un->un_blockcount, un->un_tgt_blocksize);
8477 
8478 				break;
8479 			}
8480 			case EINVAL:
8481 				/*
8482 				 * In the case where the max-cdb-length property
8483 				 * is smaller than the required CDB length for
8484 				 * a SCSI device, a target driver can fail to
8485 				 * attach to that device.
8486 				 */
8487 				scsi_log(SD_DEVINFO(un),
8488 				    sd_label, CE_WARN,
8489 				    "disk capacity is too large "
8490 				    "for current cdb length");
8491 				goto spinup_failed;
8492 			case EACCES:
8493 				/*
8494 				 * Should never get here if the spin-up
8495 				 * succeeded, but code it in anyway.
8496 				 * From here, just continue with the attach...
8497 				 */
8498 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8499 				    "sd_unit_attach: un:0x%p "
8500 				    "sd_send_scsi_READ_CAPACITY "
8501 				    "returned reservation conflict\n", un);
8502 				reservation_flag = SD_TARGET_IS_RESERVED;
8503 				break;
8504 			default:
8505 				/*
8506 				 * Likewise, should never get here if the
8507 				 * spin-up succeeded. Just continue with
8508 				 * the attach...
8509 				 */
8510 				break;
8511 			}
8512 			break;
8513 		case EACCES:
8514 			/*
8515 			 * Device is reserved by another host.  In this case
8516 			 * we could not spin it up or read the capacity, but
8517 			 * we continue with the attach anyway.
8518 			 */
8519 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8520 			    "sd_unit_attach: un:0x%p spin-up reservation "
8521 			    "conflict.\n", un);
8522 			reservation_flag = SD_TARGET_IS_RESERVED;
8523 			break;
8524 		default:
8525 			/* Fail the attach if the spin-up failed. */
8526 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8527 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
8528 			goto spinup_failed;
8529 		}
8530 	}
8531 
8532 	/*
8533 	 * Check to see if this is a MMC drive
8534 	 */
8535 	if (ISCD(un)) {
8536 		sd_set_mmc_caps(un);
8537 	}
8538 
8539 	/*
8540 	 * Create the minor nodes for the device.
8541 	 * Note: If we want to support fdisk on both sparc and intel, this will
8542 	 * have to separate out the notion that VTOC8 is always sparc, and
8543 	 * VTOC16 is always intel (tho these can be the defaults).  The vtoc
8544 	 * type will have to be determined at run-time, and the fdisk
8545 	 * partitioning will have to have been read & set up before we
8546 	 * create the minor nodes. (any other inits (such as kstats) that
8547 	 * also ought to be done before creating the minor nodes?) (Doesn't
8548 	 * setting up the minor nodes kind of imply that we're ready to
8549 	 * handle an open from userland?)
8550 	 */
8551 	if (sd_create_minor_nodes(un, devi) != DDI_SUCCESS) {
8552 		goto create_minor_nodes_failed;
8553 	}
8554 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8555 	    "sd_unit_attach: un:0x%p minor nodes created\n", un);
8556 
8557 	/*
8558 	 * Add a zero-length attribute to tell the world we support
8559 	 * kernel ioctls (for layered drivers)
8560 	 */
8561 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8562 	    DDI_KERNEL_IOCTL, NULL, 0);
8563 
8564 	/*
8565 	 * Add a boolean property to tell the world we support
8566 	 * the B_FAILFAST flag (for layered drivers)
8567 	 */
8568 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8569 	    "ddi-failfast-supported", NULL, 0);
8570 
8571 	/*
8572 	 * Initialize power management
8573 	 */
8574 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
8575 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
8576 	sd_setup_pm(un, devi);
8577 	if (un->un_f_pm_is_enabled == FALSE) {
8578 		/*
8579 		 * For performance, point to a jump table that does
8580 		 * not include pm.
8581 		 * The direct and priority chains don't change with PM.
8582 		 *
8583 		 * Note: this is currently done based on individual device
8584 		 * capabilities. When an interface for determining system
8585 		 * power enabled state becomes available, or when additional
8586 		 * layers are added to the command chain, these values will
8587 		 * have to be re-evaluated for correctness.
8588 		 */
8589 		if (un->un_f_non_devbsize_supported) {
8590 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
8591 		} else {
8592 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
8593 		}
8594 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8595 	}
8596 
8597 	/*
8598 	 * This property is set to 0 by HA software to avoid retries
8599 	 * on a reserved disk. (The preferred property name is
8600 	 * "retry-on-reservation-conflict") (1189689)
8601 	 *
8602 	 * Note: The use of a global here can have unintended consequences. A
8603 	 * per instance variable is preferrable to match the capabilities of
8604 	 * different underlying hba's (4402600)
8605 	 */
8606 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
8607 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
8608 	    sd_retry_on_reservation_conflict);
8609 	if (sd_retry_on_reservation_conflict != 0) {
8610 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
8611 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
8612 		    sd_retry_on_reservation_conflict);
8613 	}
8614 
8615 	/* Set up options for QFULL handling. */
8616 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8617 	    "qfull-retries", -1)) != -1) {
8618 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
8619 		    rval, 1);
8620 	}
8621 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8622 	    "qfull-retry-interval", -1)) != -1) {
8623 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
8624 		    rval, 1);
8625 	}
8626 
8627 	/*
8628 	 * This just prints a message that announces the existence of the
8629 	 * device. The message is always printed in the system logfile, but
8630 	 * only appears on the console if the system is booted with the
8631 	 * -v (verbose) argument.
8632 	 */
8633 	ddi_report_dev(devi);
8634 
8635 	/*
8636 	 * The framework calls driver attach routines single-threaded
8637 	 * for a given instance.  However we still acquire SD_MUTEX here
8638 	 * because this required for calling the sd_validate_geometry()
8639 	 * and sd_register_devid() functions.
8640 	 */
8641 	mutex_enter(SD_MUTEX(un));
8642 	un->un_f_geometry_is_valid = FALSE;
8643 	un->un_mediastate = DKIO_NONE;
8644 	un->un_reserved = -1;
8645 
8646 	/*
8647 	 * Read and validate the device's geometry (ie, disk label)
8648 	 * A new unformatted drive will not have a valid geometry, but
8649 	 * the driver needs to successfully attach to this device so
8650 	 * the drive can be formatted via ioctls.
8651 	 */
8652 	if (((sd_validate_geometry(un, SD_PATH_DIRECT) ==
8653 	    ENOTSUP)) &&
8654 	    (un->un_blockcount < DK_MAX_BLOCKS)) {
8655 		/*
8656 		 * We found a small disk with an EFI label on it;
8657 		 * we need to fix up the minor nodes accordingly.
8658 		 */
8659 		ddi_remove_minor_node(devi, "h");
8660 		ddi_remove_minor_node(devi, "h,raw");
8661 		(void) ddi_create_minor_node(devi, "wd",
8662 		    S_IFBLK,
8663 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8664 		    un->un_node_type, NULL);
8665 		(void) ddi_create_minor_node(devi, "wd,raw",
8666 		    S_IFCHR,
8667 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8668 		    un->un_node_type, NULL);
8669 	}
8670 #if defined(__i386) || defined(__amd64)
8671 	else if (un->un_f_capacity_adjusted == 1) {
8672 		/*
8673 		 * Refer to comments related to off-by-1 at the
8674 		 * header of this file.
8675 		 * Adjust minor node for 1TB disk.
8676 		 */
8677 		ddi_remove_minor_node(devi, "wd");
8678 		ddi_remove_minor_node(devi, "wd,raw");
8679 		(void) ddi_create_minor_node(devi, "h",
8680 		    S_IFBLK,
8681 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8682 		    un->un_node_type, NULL);
8683 		(void) ddi_create_minor_node(devi, "h,raw",
8684 		    S_IFCHR,
8685 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8686 		    un->un_node_type, NULL);
8687 	}
8688 #endif
8689 	/*
8690 	 * Read and initialize the devid for the unit.
8691 	 */
8692 	ASSERT(un->un_errstats != NULL);
8693 	if (un->un_f_devid_supported) {
8694 		sd_register_devid(un, devi, reservation_flag);
8695 	}
8696 	mutex_exit(SD_MUTEX(un));
8697 
8698 #if (defined(__fibre))
8699 	/*
8700 	 * Register callbacks for fibre only.  You can't do this soley
8701 	 * on the basis of the devid_type because this is hba specific.
8702 	 * We need to query our hba capabilities to find out whether to
8703 	 * register or not.
8704 	 */
8705 	if (un->un_f_is_fibre) {
8706 	    if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
8707 		sd_init_event_callbacks(un);
8708 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8709 		    "sd_unit_attach: un:0x%p event callbacks inserted", un);
8710 	    }
8711 	}
8712 #endif
8713 
8714 	if (un->un_f_opt_disable_cache == TRUE) {
8715 		/*
8716 		 * Disable both read cache and write cache.  This is
8717 		 * the historic behavior of the keywords in the config file.
8718 		 */
8719 		if (sd_cache_control(un, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
8720 		    0) {
8721 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8722 			    "sd_unit_attach: un:0x%p Could not disable "
8723 			    "caching", un);
8724 			goto devid_failed;
8725 		}
8726 	}
8727 
8728 	/*
8729 	 * Check the value of the WCE bit now and
8730 	 * set un_f_write_cache_enabled accordingly.
8731 	 */
8732 	(void) sd_get_write_cache_enabled(un, &wc_enabled);
8733 	mutex_enter(SD_MUTEX(un));
8734 	un->un_f_write_cache_enabled = (wc_enabled != 0);
8735 	mutex_exit(SD_MUTEX(un));
8736 
8737 	/*
8738 	 * Set the pstat and error stat values here, so data obtained during the
8739 	 * previous attach-time routines is available.
8740 	 *
8741 	 * Note: This is a critical sequence that needs to be maintained:
8742 	 *	1) Instantiate the kstats before any routines using the iopath
8743 	 *	   (i.e. sd_send_scsi_cmd).
8744 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8745 	 *	   stats (sd_set_pstats)here, following sd_validate_geometry(),
8746 	 *	   sd_register_devid(), and sd_cache_control().
8747 	 */
8748 	if (un->un_f_pkstats_enabled) {
8749 		sd_set_pstats(un);
8750 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8751 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
8752 	}
8753 
8754 	sd_set_errstats(un);
8755 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8756 	    "sd_unit_attach: un:0x%p errstats set\n", un);
8757 
8758 	/*
8759 	 * Find out what type of reservation this disk supports.
8760 	 */
8761 	switch (sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS, 0, NULL)) {
8762 	case 0:
8763 		/*
8764 		 * SCSI-3 reservations are supported.
8765 		 */
8766 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8767 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8768 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
8769 		break;
8770 	case ENOTSUP:
8771 		/*
8772 		 * The PERSISTENT RESERVE IN command would not be recognized by
8773 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
8774 		 */
8775 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8776 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
8777 		un->un_reservation_type = SD_SCSI2_RESERVATION;
8778 		break;
8779 	default:
8780 		/*
8781 		 * default to SCSI-3 reservations
8782 		 */
8783 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8784 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
8785 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8786 		break;
8787 	}
8788 
8789 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8790 	    "sd_unit_attach: un:0x%p exit success\n", un);
8791 
8792 	return (DDI_SUCCESS);
8793 
8794 	/*
8795 	 * An error occurred during the attach; clean up & return failure.
8796 	 */
8797 
8798 devid_failed:
8799 
8800 setup_pm_failed:
8801 	ddi_remove_minor_node(devi, NULL);
8802 
8803 create_minor_nodes_failed:
8804 	/*
8805 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8806 	 */
8807 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8808 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8809 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8810 
8811 	if (un->un_f_is_fibre == FALSE) {
8812 	    (void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8813 	}
8814 
8815 spinup_failed:
8816 
8817 	mutex_enter(SD_MUTEX(un));
8818 
8819 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
8820 	if (un->un_direct_priority_timeid != NULL) {
8821 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8822 		un->un_direct_priority_timeid = NULL;
8823 		mutex_exit(SD_MUTEX(un));
8824 		(void) untimeout(temp_id);
8825 		mutex_enter(SD_MUTEX(un));
8826 	}
8827 
8828 	/* Cancel any pending start/stop timeouts */
8829 	if (un->un_startstop_timeid != NULL) {
8830 		timeout_id_t temp_id = un->un_startstop_timeid;
8831 		un->un_startstop_timeid = NULL;
8832 		mutex_exit(SD_MUTEX(un));
8833 		(void) untimeout(temp_id);
8834 		mutex_enter(SD_MUTEX(un));
8835 	}
8836 
8837 	/* Cancel any pending reset-throttle timeouts */
8838 	if (un->un_reset_throttle_timeid != NULL) {
8839 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8840 		un->un_reset_throttle_timeid = NULL;
8841 		mutex_exit(SD_MUTEX(un));
8842 		(void) untimeout(temp_id);
8843 		mutex_enter(SD_MUTEX(un));
8844 	}
8845 
8846 	/* Cancel any pending retry timeouts */
8847 	if (un->un_retry_timeid != NULL) {
8848 		timeout_id_t temp_id = un->un_retry_timeid;
8849 		un->un_retry_timeid = NULL;
8850 		mutex_exit(SD_MUTEX(un));
8851 		(void) untimeout(temp_id);
8852 		mutex_enter(SD_MUTEX(un));
8853 	}
8854 
8855 	/* Cancel any pending delayed cv broadcast timeouts */
8856 	if (un->un_dcvb_timeid != NULL) {
8857 		timeout_id_t temp_id = un->un_dcvb_timeid;
8858 		un->un_dcvb_timeid = NULL;
8859 		mutex_exit(SD_MUTEX(un));
8860 		(void) untimeout(temp_id);
8861 		mutex_enter(SD_MUTEX(un));
8862 	}
8863 
8864 	mutex_exit(SD_MUTEX(un));
8865 
8866 	/* There should not be any in-progress I/O so ASSERT this check */
8867 	ASSERT(un->un_ncmds_in_transport == 0);
8868 	ASSERT(un->un_ncmds_in_driver == 0);
8869 
8870 	/* Do not free the softstate if the callback routine is active */
8871 	sd_sync_with_callback(un);
8872 
8873 	/*
8874 	 * Partition stats apparently are not used with removables. These would
8875 	 * not have been created during attach, so no need to clean them up...
8876 	 */
8877 	if (un->un_stats != NULL) {
8878 		kstat_delete(un->un_stats);
8879 		un->un_stats = NULL;
8880 	}
8881 	if (un->un_errstats != NULL) {
8882 		kstat_delete(un->un_errstats);
8883 		un->un_errstats = NULL;
8884 	}
8885 
8886 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8887 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8888 
8889 	ddi_prop_remove_all(devi);
8890 	sema_destroy(&un->un_semoclose);
8891 	cv_destroy(&un->un_state_cv);
8892 
8893 getrbuf_failed:
8894 
8895 	sd_free_rqs(un);
8896 
8897 alloc_rqs_failed:
8898 
8899 	devp->sd_private = NULL;
8900 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
8901 
8902 get_softstate_failed:
8903 	/*
8904 	 * Note: the man pages are unclear as to whether or not doing a
8905 	 * ddi_soft_state_free(sd_state, instance) is the right way to
8906 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
8907 	 * ddi_get_soft_state() fails.  The implication seems to be
8908 	 * that the get_soft_state cannot fail if the zalloc succeeds.
8909 	 */
8910 	ddi_soft_state_free(sd_state, instance);
8911 
8912 probe_failed:
8913 	scsi_unprobe(devp);
8914 #ifdef SDDEBUG
8915 	if ((sd_component_mask & SD_LOG_ATTACH_DETACH) &&
8916 	    (sd_level_mask & SD_LOGMASK_TRACE)) {
8917 		cmn_err(CE_CONT, "sd_unit_attach: un:0x%p exit failure\n",
8918 		    (void *)un);
8919 	}
8920 #endif
8921 	return (DDI_FAILURE);
8922 }
8923 
8924 
8925 /*
8926  *    Function: sd_unit_detach
8927  *
8928  * Description: Performs DDI_DETACH processing for sddetach().
8929  *
8930  * Return Code: DDI_SUCCESS
8931  *		DDI_FAILURE
8932  *
8933  *     Context: Kernel thread context
8934  */
8935 
8936 static int
8937 sd_unit_detach(dev_info_t *devi)
8938 {
8939 	struct scsi_device	*devp;
8940 	struct sd_lun		*un;
8941 	int			i;
8942 	dev_t			dev;
8943 	int			instance = ddi_get_instance(devi);
8944 
8945 	mutex_enter(&sd_detach_mutex);
8946 
8947 	/*
8948 	 * Fail the detach for any of the following:
8949 	 *  - Unable to get the sd_lun struct for the instance
8950 	 *  - A layered driver has an outstanding open on the instance
8951 	 *  - Another thread is already detaching this instance
8952 	 *  - Another thread is currently performing an open
8953 	 */
8954 	devp = ddi_get_driver_private(devi);
8955 	if ((devp == NULL) ||
8956 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
8957 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
8958 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
8959 		mutex_exit(&sd_detach_mutex);
8960 		return (DDI_FAILURE);
8961 	}
8962 
8963 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
8964 
8965 	/*
8966 	 * Mark this instance as currently in a detach, to inhibit any
8967 	 * opens from a layered driver.
8968 	 */
8969 	un->un_detach_count++;
8970 	mutex_exit(&sd_detach_mutex);
8971 
8972 	dev = sd_make_device(SD_DEVINFO(un));
8973 
8974 	_NOTE(COMPETING_THREADS_NOW);
8975 
8976 	mutex_enter(SD_MUTEX(un));
8977 
8978 	/*
8979 	 * Fail the detach if there are any outstanding layered
8980 	 * opens on this device.
8981 	 */
8982 	for (i = 0; i < NDKMAP; i++) {
8983 		if (un->un_ocmap.lyropen[i] != 0) {
8984 			goto err_notclosed;
8985 		}
8986 	}
8987 
8988 	/*
8989 	 * Verify there are NO outstanding commands issued to this device.
8990 	 * ie, un_ncmds_in_transport == 0.
8991 	 * It's possible to have outstanding commands through the physio
8992 	 * code path, even though everything's closed.
8993 	 */
8994 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
8995 	    (un->un_direct_priority_timeid != NULL) ||
8996 	    (un->un_state == SD_STATE_RWAIT)) {
8997 		mutex_exit(SD_MUTEX(un));
8998 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8999 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
9000 		goto err_stillbusy;
9001 	}
9002 
9003 	/*
9004 	 * If we have the device reserved, release the reservation.
9005 	 */
9006 	if ((un->un_resvd_status & SD_RESERVE) &&
9007 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
9008 		mutex_exit(SD_MUTEX(un));
9009 		/*
9010 		 * Note: sd_reserve_release sends a command to the device
9011 		 * via the sd_ioctlcmd() path, and can sleep.
9012 		 */
9013 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
9014 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9015 			    "sd_dr_detach: Cannot release reservation \n");
9016 		}
9017 	} else {
9018 		mutex_exit(SD_MUTEX(un));
9019 	}
9020 
9021 	/*
9022 	 * Untimeout any reserve recover, throttle reset, restart unit
9023 	 * and delayed broadcast timeout threads. Protect the timeout pointer
9024 	 * from getting nulled by their callback functions.
9025 	 */
9026 	mutex_enter(SD_MUTEX(un));
9027 	if (un->un_resvd_timeid != NULL) {
9028 		timeout_id_t temp_id = un->un_resvd_timeid;
9029 		un->un_resvd_timeid = NULL;
9030 		mutex_exit(SD_MUTEX(un));
9031 		(void) untimeout(temp_id);
9032 		mutex_enter(SD_MUTEX(un));
9033 	}
9034 
9035 	if (un->un_reset_throttle_timeid != NULL) {
9036 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
9037 		un->un_reset_throttle_timeid = NULL;
9038 		mutex_exit(SD_MUTEX(un));
9039 		(void) untimeout(temp_id);
9040 		mutex_enter(SD_MUTEX(un));
9041 	}
9042 
9043 	if (un->un_startstop_timeid != NULL) {
9044 		timeout_id_t temp_id = un->un_startstop_timeid;
9045 		un->un_startstop_timeid = NULL;
9046 		mutex_exit(SD_MUTEX(un));
9047 		(void) untimeout(temp_id);
9048 		mutex_enter(SD_MUTEX(un));
9049 	}
9050 
9051 	if (un->un_dcvb_timeid != NULL) {
9052 		timeout_id_t temp_id = un->un_dcvb_timeid;
9053 		un->un_dcvb_timeid = NULL;
9054 		mutex_exit(SD_MUTEX(un));
9055 		(void) untimeout(temp_id);
9056 	} else {
9057 		mutex_exit(SD_MUTEX(un));
9058 	}
9059 
9060 	/* Remove any pending reservation reclaim requests for this device */
9061 	sd_rmv_resv_reclaim_req(dev);
9062 
9063 	mutex_enter(SD_MUTEX(un));
9064 
9065 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
9066 	if (un->un_direct_priority_timeid != NULL) {
9067 		timeout_id_t temp_id = un->un_direct_priority_timeid;
9068 		un->un_direct_priority_timeid = NULL;
9069 		mutex_exit(SD_MUTEX(un));
9070 		(void) untimeout(temp_id);
9071 		mutex_enter(SD_MUTEX(un));
9072 	}
9073 
9074 	/* Cancel any active multi-host disk watch thread requests */
9075 	if (un->un_mhd_token != NULL) {
9076 		mutex_exit(SD_MUTEX(un));
9077 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
9078 		if (scsi_watch_request_terminate(un->un_mhd_token,
9079 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
9080 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9081 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
9082 			/*
9083 			 * Note: We are returning here after having removed
9084 			 * some driver timeouts above. This is consistent with
9085 			 * the legacy implementation but perhaps the watch
9086 			 * terminate call should be made with the wait flag set.
9087 			 */
9088 			goto err_stillbusy;
9089 		}
9090 		mutex_enter(SD_MUTEX(un));
9091 		un->un_mhd_token = NULL;
9092 	}
9093 
9094 	if (un->un_swr_token != NULL) {
9095 		mutex_exit(SD_MUTEX(un));
9096 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
9097 		if (scsi_watch_request_terminate(un->un_swr_token,
9098 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
9099 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9100 			    "sd_dr_detach: Cannot cancel swr watch request\n");
9101 			/*
9102 			 * Note: We are returning here after having removed
9103 			 * some driver timeouts above. This is consistent with
9104 			 * the legacy implementation but perhaps the watch
9105 			 * terminate call should be made with the wait flag set.
9106 			 */
9107 			goto err_stillbusy;
9108 		}
9109 		mutex_enter(SD_MUTEX(un));
9110 		un->un_swr_token = NULL;
9111 	}
9112 
9113 	mutex_exit(SD_MUTEX(un));
9114 
9115 	/*
9116 	 * Clear any scsi_reset_notifies. We clear the reset notifies
9117 	 * if we have not registered one.
9118 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
9119 	 */
9120 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
9121 	    sd_mhd_reset_notify_cb, (caddr_t)un);
9122 
9123 	/*
9124 	 * protect the timeout pointers from getting nulled by
9125 	 * their callback functions during the cancellation process.
9126 	 * In such a scenario untimeout can be invoked with a null value.
9127 	 */
9128 	_NOTE(NO_COMPETING_THREADS_NOW);
9129 
9130 	mutex_enter(&un->un_pm_mutex);
9131 	if (un->un_pm_idle_timeid != NULL) {
9132 		timeout_id_t temp_id = un->un_pm_idle_timeid;
9133 		un->un_pm_idle_timeid = NULL;
9134 		mutex_exit(&un->un_pm_mutex);
9135 
9136 		/*
9137 		 * Timeout is active; cancel it.
9138 		 * Note that it'll never be active on a device
9139 		 * that does not support PM therefore we don't
9140 		 * have to check before calling pm_idle_component.
9141 		 */
9142 		(void) untimeout(temp_id);
9143 		(void) pm_idle_component(SD_DEVINFO(un), 0);
9144 		mutex_enter(&un->un_pm_mutex);
9145 	}
9146 
9147 	/*
9148 	 * Check whether there is already a timeout scheduled for power
9149 	 * management. If yes then don't lower the power here, that's.
9150 	 * the timeout handler's job.
9151 	 */
9152 	if (un->un_pm_timeid != NULL) {
9153 		timeout_id_t temp_id = un->un_pm_timeid;
9154 		un->un_pm_timeid = NULL;
9155 		mutex_exit(&un->un_pm_mutex);
9156 		/*
9157 		 * Timeout is active; cancel it.
9158 		 * Note that it'll never be active on a device
9159 		 * that does not support PM therefore we don't
9160 		 * have to check before calling pm_idle_component.
9161 		 */
9162 		(void) untimeout(temp_id);
9163 		(void) pm_idle_component(SD_DEVINFO(un), 0);
9164 
9165 	} else {
9166 		mutex_exit(&un->un_pm_mutex);
9167 		if ((un->un_f_pm_is_enabled == TRUE) &&
9168 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_SPINDLE_OFF) !=
9169 		    DDI_SUCCESS)) {
9170 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9171 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
9172 			/*
9173 			 * Fix for bug: 4297749, item # 13
9174 			 * The above test now includes a check to see if PM is
9175 			 * supported by this device before call
9176 			 * pm_lower_power().
9177 			 * Note, the following is not dead code. The call to
9178 			 * pm_lower_power above will generate a call back into
9179 			 * our sdpower routine which might result in a timeout
9180 			 * handler getting activated. Therefore the following
9181 			 * code is valid and necessary.
9182 			 */
9183 			mutex_enter(&un->un_pm_mutex);
9184 			if (un->un_pm_timeid != NULL) {
9185 				timeout_id_t temp_id = un->un_pm_timeid;
9186 				un->un_pm_timeid = NULL;
9187 				mutex_exit(&un->un_pm_mutex);
9188 				(void) untimeout(temp_id);
9189 				(void) pm_idle_component(SD_DEVINFO(un), 0);
9190 			} else {
9191 				mutex_exit(&un->un_pm_mutex);
9192 			}
9193 		}
9194 	}
9195 
9196 	/*
9197 	 * Cleanup from the scsi_ifsetcap() calls (437868)
9198 	 * Relocated here from above to be after the call to
9199 	 * pm_lower_power, which was getting errors.
9200 	 */
9201 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
9202 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
9203 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
9204 
9205 	if (un->un_f_is_fibre == FALSE) {
9206 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
9207 	}
9208 
9209 	/*
9210 	 * Remove any event callbacks, fibre only
9211 	 */
9212 	if (un->un_f_is_fibre == TRUE) {
9213 		if ((un->un_insert_event != NULL) &&
9214 			(ddi_remove_event_handler(un->un_insert_cb_id) !=
9215 				DDI_SUCCESS)) {
9216 			/*
9217 			 * Note: We are returning here after having done
9218 			 * substantial cleanup above. This is consistent
9219 			 * with the legacy implementation but this may not
9220 			 * be the right thing to do.
9221 			 */
9222 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9223 				"sd_dr_detach: Cannot cancel insert event\n");
9224 			goto err_remove_event;
9225 		}
9226 		un->un_insert_event = NULL;
9227 
9228 		if ((un->un_remove_event != NULL) &&
9229 			(ddi_remove_event_handler(un->un_remove_cb_id) !=
9230 				DDI_SUCCESS)) {
9231 			/*
9232 			 * Note: We are returning here after having done
9233 			 * substantial cleanup above. This is consistent
9234 			 * with the legacy implementation but this may not
9235 			 * be the right thing to do.
9236 			 */
9237 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9238 				"sd_dr_detach: Cannot cancel remove event\n");
9239 			goto err_remove_event;
9240 		}
9241 		un->un_remove_event = NULL;
9242 	}
9243 
9244 	/* Do not free the softstate if the callback routine is active */
9245 	sd_sync_with_callback(un);
9246 
9247 	/*
9248 	 * Hold the detach mutex here, to make sure that no other threads ever
9249 	 * can access a (partially) freed soft state structure.
9250 	 */
9251 	mutex_enter(&sd_detach_mutex);
9252 
9253 	/*
9254 	 * Clean up the soft state struct.
9255 	 * Cleanup is done in reverse order of allocs/inits.
9256 	 * At this point there should be no competing threads anymore.
9257 	 */
9258 
9259 	/* Unregister and free device id. */
9260 	ddi_devid_unregister(devi);
9261 	if (un->un_devid) {
9262 		ddi_devid_free(un->un_devid);
9263 		un->un_devid = NULL;
9264 	}
9265 
9266 	/*
9267 	 * Destroy wmap cache if it exists.
9268 	 */
9269 	if (un->un_wm_cache != NULL) {
9270 		kmem_cache_destroy(un->un_wm_cache);
9271 		un->un_wm_cache = NULL;
9272 	}
9273 
9274 	/* Remove minor nodes */
9275 	ddi_remove_minor_node(devi, NULL);
9276 
9277 	/*
9278 	 * kstat cleanup is done in detach for all device types (4363169).
9279 	 * We do not want to fail detach if the device kstats are not deleted
9280 	 * since there is a confusion about the devo_refcnt for the device.
9281 	 * We just delete the kstats and let detach complete successfully.
9282 	 */
9283 	if (un->un_stats != NULL) {
9284 		kstat_delete(un->un_stats);
9285 		un->un_stats = NULL;
9286 	}
9287 	if (un->un_errstats != NULL) {
9288 		kstat_delete(un->un_errstats);
9289 		un->un_errstats = NULL;
9290 	}
9291 
9292 	/* Remove partition stats */
9293 	if (un->un_f_pkstats_enabled) {
9294 		for (i = 0; i < NSDMAP; i++) {
9295 			if (un->un_pstats[i] != NULL) {
9296 				kstat_delete(un->un_pstats[i]);
9297 				un->un_pstats[i] = NULL;
9298 			}
9299 		}
9300 	}
9301 
9302 	/* Remove xbuf registration */
9303 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
9304 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
9305 
9306 	/* Remove driver properties */
9307 	ddi_prop_remove_all(devi);
9308 
9309 	mutex_destroy(&un->un_pm_mutex);
9310 	cv_destroy(&un->un_pm_busy_cv);
9311 
9312 	cv_destroy(&un->un_wcc_cv);
9313 
9314 	/* Open/close semaphore */
9315 	sema_destroy(&un->un_semoclose);
9316 
9317 	/* Removable media condvar. */
9318 	cv_destroy(&un->un_state_cv);
9319 
9320 	/* Suspend/resume condvar. */
9321 	cv_destroy(&un->un_suspend_cv);
9322 	cv_destroy(&un->un_disk_busy_cv);
9323 
9324 	sd_free_rqs(un);
9325 
9326 	/* Free up soft state */
9327 	devp->sd_private = NULL;
9328 	bzero(un, sizeof (struct sd_lun));
9329 	ddi_soft_state_free(sd_state, instance);
9330 
9331 	mutex_exit(&sd_detach_mutex);
9332 
9333 	/* This frees up the INQUIRY data associated with the device. */
9334 	scsi_unprobe(devp);
9335 
9336 	return (DDI_SUCCESS);
9337 
9338 err_notclosed:
9339 	mutex_exit(SD_MUTEX(un));
9340 
9341 err_stillbusy:
9342 	_NOTE(NO_COMPETING_THREADS_NOW);
9343 
9344 err_remove_event:
9345 	mutex_enter(&sd_detach_mutex);
9346 	un->un_detach_count--;
9347 	mutex_exit(&sd_detach_mutex);
9348 
9349 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
9350 	return (DDI_FAILURE);
9351 }
9352 
9353 
9354 /*
9355  * Driver minor node structure and data table
9356  */
9357 struct driver_minor_data {
9358 	char	*name;
9359 	minor_t	minor;
9360 	int	type;
9361 };
9362 
9363 static struct driver_minor_data sd_minor_data[] = {
9364 	{"a", 0, S_IFBLK},
9365 	{"b", 1, S_IFBLK},
9366 	{"c", 2, S_IFBLK},
9367 	{"d", 3, S_IFBLK},
9368 	{"e", 4, S_IFBLK},
9369 	{"f", 5, S_IFBLK},
9370 	{"g", 6, S_IFBLK},
9371 	{"h", 7, S_IFBLK},
9372 #if defined(_SUNOS_VTOC_16)
9373 	{"i", 8, S_IFBLK},
9374 	{"j", 9, S_IFBLK},
9375 	{"k", 10, S_IFBLK},
9376 	{"l", 11, S_IFBLK},
9377 	{"m", 12, S_IFBLK},
9378 	{"n", 13, S_IFBLK},
9379 	{"o", 14, S_IFBLK},
9380 	{"p", 15, S_IFBLK},
9381 #endif			/* defined(_SUNOS_VTOC_16) */
9382 #if defined(_FIRMWARE_NEEDS_FDISK)
9383 	{"q", 16, S_IFBLK},
9384 	{"r", 17, S_IFBLK},
9385 	{"s", 18, S_IFBLK},
9386 	{"t", 19, S_IFBLK},
9387 	{"u", 20, S_IFBLK},
9388 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9389 	{"a,raw", 0, S_IFCHR},
9390 	{"b,raw", 1, S_IFCHR},
9391 	{"c,raw", 2, S_IFCHR},
9392 	{"d,raw", 3, S_IFCHR},
9393 	{"e,raw", 4, S_IFCHR},
9394 	{"f,raw", 5, S_IFCHR},
9395 	{"g,raw", 6, S_IFCHR},
9396 	{"h,raw", 7, S_IFCHR},
9397 #if defined(_SUNOS_VTOC_16)
9398 	{"i,raw", 8, S_IFCHR},
9399 	{"j,raw", 9, S_IFCHR},
9400 	{"k,raw", 10, S_IFCHR},
9401 	{"l,raw", 11, S_IFCHR},
9402 	{"m,raw", 12, S_IFCHR},
9403 	{"n,raw", 13, S_IFCHR},
9404 	{"o,raw", 14, S_IFCHR},
9405 	{"p,raw", 15, S_IFCHR},
9406 #endif			/* defined(_SUNOS_VTOC_16) */
9407 #if defined(_FIRMWARE_NEEDS_FDISK)
9408 	{"q,raw", 16, S_IFCHR},
9409 	{"r,raw", 17, S_IFCHR},
9410 	{"s,raw", 18, S_IFCHR},
9411 	{"t,raw", 19, S_IFCHR},
9412 	{"u,raw", 20, S_IFCHR},
9413 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9414 	{0}
9415 };
9416 
9417 static struct driver_minor_data sd_minor_data_efi[] = {
9418 	{"a", 0, S_IFBLK},
9419 	{"b", 1, S_IFBLK},
9420 	{"c", 2, S_IFBLK},
9421 	{"d", 3, S_IFBLK},
9422 	{"e", 4, S_IFBLK},
9423 	{"f", 5, S_IFBLK},
9424 	{"g", 6, S_IFBLK},
9425 	{"wd", 7, S_IFBLK},
9426 #if defined(_FIRMWARE_NEEDS_FDISK)
9427 	{"q", 16, S_IFBLK},
9428 	{"r", 17, S_IFBLK},
9429 	{"s", 18, S_IFBLK},
9430 	{"t", 19, S_IFBLK},
9431 	{"u", 20, S_IFBLK},
9432 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9433 	{"a,raw", 0, S_IFCHR},
9434 	{"b,raw", 1, S_IFCHR},
9435 	{"c,raw", 2, S_IFCHR},
9436 	{"d,raw", 3, S_IFCHR},
9437 	{"e,raw", 4, S_IFCHR},
9438 	{"f,raw", 5, S_IFCHR},
9439 	{"g,raw", 6, S_IFCHR},
9440 	{"wd,raw", 7, S_IFCHR},
9441 #if defined(_FIRMWARE_NEEDS_FDISK)
9442 	{"q,raw", 16, S_IFCHR},
9443 	{"r,raw", 17, S_IFCHR},
9444 	{"s,raw", 18, S_IFCHR},
9445 	{"t,raw", 19, S_IFCHR},
9446 	{"u,raw", 20, S_IFCHR},
9447 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9448 	{0}
9449 };
9450 
9451 
9452 /*
9453  *    Function: sd_create_minor_nodes
9454  *
9455  * Description: Create the minor device nodes for the instance.
9456  *
9457  *   Arguments: un - driver soft state (unit) structure
9458  *		devi - pointer to device info structure
9459  *
9460  * Return Code: DDI_SUCCESS
9461  *		DDI_FAILURE
9462  *
9463  *     Context: Kernel thread context
9464  */
9465 
9466 static int
9467 sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi)
9468 {
9469 	struct driver_minor_data	*dmdp;
9470 	struct scsi_device		*devp;
9471 	int				instance;
9472 	char				name[48];
9473 
9474 	ASSERT(un != NULL);
9475 	devp = ddi_get_driver_private(devi);
9476 	instance = ddi_get_instance(devp->sd_dev);
9477 
9478 	/*
9479 	 * Create all the minor nodes for this target.
9480 	 */
9481 	if (un->un_blockcount > DK_MAX_BLOCKS)
9482 		dmdp = sd_minor_data_efi;
9483 	else
9484 		dmdp = sd_minor_data;
9485 	while (dmdp->name != NULL) {
9486 
9487 		(void) sprintf(name, "%s", dmdp->name);
9488 
9489 		if (ddi_create_minor_node(devi, name, dmdp->type,
9490 		    (instance << SDUNIT_SHIFT) | dmdp->minor,
9491 		    un->un_node_type, NULL) == DDI_FAILURE) {
9492 			/*
9493 			 * Clean up any nodes that may have been created, in
9494 			 * case this fails in the middle of the loop.
9495 			 */
9496 			ddi_remove_minor_node(devi, NULL);
9497 			return (DDI_FAILURE);
9498 		}
9499 		dmdp++;
9500 	}
9501 
9502 	return (DDI_SUCCESS);
9503 }
9504 
9505 
9506 /*
9507  *    Function: sd_create_errstats
9508  *
9509  * Description: This routine instantiates the device error stats.
9510  *
9511  *		Note: During attach the stats are instantiated first so they are
9512  *		available for attach-time routines that utilize the driver
9513  *		iopath to send commands to the device. The stats are initialized
9514  *		separately so data obtained during some attach-time routines is
9515  *		available. (4362483)
9516  *
9517  *   Arguments: un - driver soft state (unit) structure
9518  *		instance - driver instance
9519  *
9520  *     Context: Kernel thread context
9521  */
9522 
9523 static void
9524 sd_create_errstats(struct sd_lun *un, int instance)
9525 {
9526 	struct	sd_errstats	*stp;
9527 	char	kstatmodule_err[KSTAT_STRLEN];
9528 	char	kstatname[KSTAT_STRLEN];
9529 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
9530 
9531 	ASSERT(un != NULL);
9532 
9533 	if (un->un_errstats != NULL) {
9534 		return;
9535 	}
9536 
9537 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
9538 	    "%serr", sd_label);
9539 	(void) snprintf(kstatname, sizeof (kstatname),
9540 	    "%s%d,err", sd_label, instance);
9541 
9542 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
9543 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
9544 
9545 	if (un->un_errstats == NULL) {
9546 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9547 		    "sd_create_errstats: Failed kstat_create\n");
9548 		return;
9549 	}
9550 
9551 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9552 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
9553 	    KSTAT_DATA_UINT32);
9554 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
9555 	    KSTAT_DATA_UINT32);
9556 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
9557 	    KSTAT_DATA_UINT32);
9558 	kstat_named_init(&stp->sd_vid,		"Vendor",
9559 	    KSTAT_DATA_CHAR);
9560 	kstat_named_init(&stp->sd_pid,		"Product",
9561 	    KSTAT_DATA_CHAR);
9562 	kstat_named_init(&stp->sd_revision,	"Revision",
9563 	    KSTAT_DATA_CHAR);
9564 	kstat_named_init(&stp->sd_serial,	"Serial No",
9565 	    KSTAT_DATA_CHAR);
9566 	kstat_named_init(&stp->sd_capacity,	"Size",
9567 	    KSTAT_DATA_ULONGLONG);
9568 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
9569 	    KSTAT_DATA_UINT32);
9570 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
9571 	    KSTAT_DATA_UINT32);
9572 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
9573 	    KSTAT_DATA_UINT32);
9574 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
9575 	    KSTAT_DATA_UINT32);
9576 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
9577 	    KSTAT_DATA_UINT32);
9578 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
9579 	    KSTAT_DATA_UINT32);
9580 
9581 	un->un_errstats->ks_private = un;
9582 	un->un_errstats->ks_update  = nulldev;
9583 
9584 	kstat_install(un->un_errstats);
9585 }
9586 
9587 
9588 /*
9589  *    Function: sd_set_errstats
9590  *
9591  * Description: This routine sets the value of the vendor id, product id,
9592  *		revision, serial number, and capacity device error stats.
9593  *
9594  *		Note: During attach the stats are instantiated first so they are
9595  *		available for attach-time routines that utilize the driver
9596  *		iopath to send commands to the device. The stats are initialized
9597  *		separately so data obtained during some attach-time routines is
9598  *		available. (4362483)
9599  *
9600  *   Arguments: un - driver soft state (unit) structure
9601  *
9602  *     Context: Kernel thread context
9603  */
9604 
9605 static void
9606 sd_set_errstats(struct sd_lun *un)
9607 {
9608 	struct	sd_errstats	*stp;
9609 
9610 	ASSERT(un != NULL);
9611 	ASSERT(un->un_errstats != NULL);
9612 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9613 	ASSERT(stp != NULL);
9614 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
9615 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
9616 	(void) strncpy(stp->sd_revision.value.c,
9617 	    un->un_sd->sd_inq->inq_revision, 4);
9618 
9619 	/*
9620 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
9621 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
9622 	 * (4376302))
9623 	 */
9624 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
9625 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9626 		    sizeof (SD_INQUIRY(un)->inq_serial));
9627 	}
9628 
9629 	if (un->un_f_blockcount_is_valid != TRUE) {
9630 		/*
9631 		 * Set capacity error stat to 0 for no media. This ensures
9632 		 * a valid capacity is displayed in response to 'iostat -E'
9633 		 * when no media is present in the device.
9634 		 */
9635 		stp->sd_capacity.value.ui64 = 0;
9636 	} else {
9637 		/*
9638 		 * Multiply un_blockcount by un->un_sys_blocksize to get
9639 		 * capacity.
9640 		 *
9641 		 * Note: for non-512 blocksize devices "un_blockcount" has been
9642 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
9643 		 * (un_tgt_blocksize / un->un_sys_blocksize).
9644 		 */
9645 		stp->sd_capacity.value.ui64 = (uint64_t)
9646 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
9647 	}
9648 }
9649 
9650 
9651 /*
9652  *    Function: sd_set_pstats
9653  *
9654  * Description: This routine instantiates and initializes the partition
9655  *              stats for each partition with more than zero blocks.
9656  *		(4363169)
9657  *
9658  *   Arguments: un - driver soft state (unit) structure
9659  *
9660  *     Context: Kernel thread context
9661  */
9662 
9663 static void
9664 sd_set_pstats(struct sd_lun *un)
9665 {
9666 	char	kstatname[KSTAT_STRLEN];
9667 	int	instance;
9668 	int	i;
9669 
9670 	ASSERT(un != NULL);
9671 
9672 	instance = ddi_get_instance(SD_DEVINFO(un));
9673 
9674 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
9675 	for (i = 0; i < NSDMAP; i++) {
9676 		if ((un->un_pstats[i] == NULL) &&
9677 		    (un->un_map[i].dkl_nblk != 0)) {
9678 			(void) snprintf(kstatname, sizeof (kstatname),
9679 			    "%s%d,%s", sd_label, instance,
9680 			    sd_minor_data[i].name);
9681 			un->un_pstats[i] = kstat_create(sd_label,
9682 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
9683 			    1, KSTAT_FLAG_PERSISTENT);
9684 			if (un->un_pstats[i] != NULL) {
9685 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
9686 				kstat_install(un->un_pstats[i]);
9687 			}
9688 		}
9689 	}
9690 }
9691 
9692 
9693 #if (defined(__fibre))
9694 /*
9695  *    Function: sd_init_event_callbacks
9696  *
9697  * Description: This routine initializes the insertion and removal event
9698  *		callbacks. (fibre only)
9699  *
9700  *   Arguments: un - driver soft state (unit) structure
9701  *
9702  *     Context: Kernel thread context
9703  */
9704 
9705 static void
9706 sd_init_event_callbacks(struct sd_lun *un)
9707 {
9708 	ASSERT(un != NULL);
9709 
9710 	if ((un->un_insert_event == NULL) &&
9711 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
9712 	    &un->un_insert_event) == DDI_SUCCESS)) {
9713 		/*
9714 		 * Add the callback for an insertion event
9715 		 */
9716 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9717 		    un->un_insert_event, sd_event_callback, (void *)un,
9718 		    &(un->un_insert_cb_id));
9719 	}
9720 
9721 	if ((un->un_remove_event == NULL) &&
9722 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
9723 	    &un->un_remove_event) == DDI_SUCCESS)) {
9724 		/*
9725 		 * Add the callback for a removal event
9726 		 */
9727 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9728 		    un->un_remove_event, sd_event_callback, (void *)un,
9729 		    &(un->un_remove_cb_id));
9730 	}
9731 }
9732 
9733 
9734 /*
9735  *    Function: sd_event_callback
9736  *
9737  * Description: This routine handles insert/remove events (photon). The
9738  *		state is changed to OFFLINE which can be used to supress
9739  *		error msgs. (fibre only)
9740  *
9741  *   Arguments: un - driver soft state (unit) structure
9742  *
9743  *     Context: Callout thread context
9744  */
9745 /* ARGSUSED */
9746 static void
9747 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
9748     void *bus_impldata)
9749 {
9750 	struct sd_lun *un = (struct sd_lun *)arg;
9751 
9752 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
9753 	if (event == un->un_insert_event) {
9754 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
9755 		mutex_enter(SD_MUTEX(un));
9756 		if (un->un_state == SD_STATE_OFFLINE) {
9757 			if (un->un_last_state != SD_STATE_SUSPENDED) {
9758 				un->un_state = un->un_last_state;
9759 			} else {
9760 				/*
9761 				 * We have gone through SUSPEND/RESUME while
9762 				 * we were offline. Restore the last state
9763 				 */
9764 				un->un_state = un->un_save_state;
9765 			}
9766 		}
9767 		mutex_exit(SD_MUTEX(un));
9768 
9769 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
9770 	} else if (event == un->un_remove_event) {
9771 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
9772 		mutex_enter(SD_MUTEX(un));
9773 		/*
9774 		 * We need to handle an event callback that occurs during
9775 		 * the suspend operation, since we don't prevent it.
9776 		 */
9777 		if (un->un_state != SD_STATE_OFFLINE) {
9778 			if (un->un_state != SD_STATE_SUSPENDED) {
9779 				New_state(un, SD_STATE_OFFLINE);
9780 			} else {
9781 				un->un_last_state = SD_STATE_OFFLINE;
9782 			}
9783 		}
9784 		mutex_exit(SD_MUTEX(un));
9785 	} else {
9786 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
9787 		    "!Unknown event\n");
9788 	}
9789 
9790 }
9791 #endif
9792 
9793 /*
9794  *    Function: sd_cache_control()
9795  *
9796  * Description: This routine is the driver entry point for setting
9797  *		read and write caching by modifying the WCE (write cache
9798  *		enable) and RCD (read cache disable) bits of mode
9799  *		page 8 (MODEPAGE_CACHING).
9800  *
9801  *   Arguments: un - driver soft state (unit) structure
9802  *		rcd_flag - flag for controlling the read cache
9803  *		wce_flag - flag for controlling the write cache
9804  *
9805  * Return Code: EIO
9806  *		code returned by sd_send_scsi_MODE_SENSE and
9807  *		sd_send_scsi_MODE_SELECT
9808  *
9809  *     Context: Kernel Thread
9810  */
9811 
9812 static int
9813 sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag)
9814 {
9815 	struct mode_caching	*mode_caching_page;
9816 	uchar_t			*header;
9817 	size_t			buflen;
9818 	int			hdrlen;
9819 	int			bd_len;
9820 	int			rval = 0;
9821 	struct mode_header_grp2	*mhp;
9822 
9823 	ASSERT(un != NULL);
9824 
9825 	/*
9826 	 * Do a test unit ready, otherwise a mode sense may not work if this
9827 	 * is the first command sent to the device after boot.
9828 	 */
9829 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9830 
9831 	if (un->un_f_cfg_is_atapi == TRUE) {
9832 		hdrlen = MODE_HEADER_LENGTH_GRP2;
9833 	} else {
9834 		hdrlen = MODE_HEADER_LENGTH;
9835 	}
9836 
9837 	/*
9838 	 * Allocate memory for the retrieved mode page and its headers.  Set
9839 	 * a pointer to the page itself.  Use mode_cache_scsi3 to insure
9840 	 * we get all of the mode sense data otherwise, the mode select
9841 	 * will fail.  mode_cache_scsi3 is a superset of mode_caching.
9842 	 */
9843 	buflen = hdrlen + MODE_BLK_DESC_LENGTH +
9844 		sizeof (struct mode_cache_scsi3);
9845 
9846 	header = kmem_zalloc(buflen, KM_SLEEP);
9847 
9848 	/* Get the information from the device. */
9849 	if (un->un_f_cfg_is_atapi == TRUE) {
9850 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
9851 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9852 	} else {
9853 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
9854 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9855 	}
9856 	if (rval != 0) {
9857 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9858 		    "sd_cache_control: Mode Sense Failed\n");
9859 		kmem_free(header, buflen);
9860 		return (rval);
9861 	}
9862 
9863 	/*
9864 	 * Determine size of Block Descriptors in order to locate
9865 	 * the mode page data. ATAPI devices return 0, SCSI devices
9866 	 * should return MODE_BLK_DESC_LENGTH.
9867 	 */
9868 	if (un->un_f_cfg_is_atapi == TRUE) {
9869 		mhp	= (struct mode_header_grp2 *)header;
9870 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9871 	} else {
9872 		bd_len  = ((struct mode_header *)header)->bdesc_length;
9873 	}
9874 
9875 	if (bd_len > MODE_BLK_DESC_LENGTH) {
9876 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9877 		    "sd_cache_control: Mode Sense returned invalid "
9878 		    "block descriptor length\n");
9879 		kmem_free(header, buflen);
9880 		return (EIO);
9881 	}
9882 
9883 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9884 
9885 	/* Check the relevant bits on successful mode sense. */
9886 	if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
9887 	    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
9888 	    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
9889 	    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
9890 
9891 		size_t sbuflen;
9892 		uchar_t save_pg;
9893 
9894 		/*
9895 		 * Construct select buffer length based on the
9896 		 * length of the sense data returned.
9897 		 */
9898 		sbuflen =  hdrlen + MODE_BLK_DESC_LENGTH +
9899 				sizeof (struct mode_page) +
9900 				(int)mode_caching_page->mode_page.length;
9901 
9902 		/*
9903 		 * Set the caching bits as requested.
9904 		 */
9905 		if (rcd_flag == SD_CACHE_ENABLE)
9906 			mode_caching_page->rcd = 0;
9907 		else if (rcd_flag == SD_CACHE_DISABLE)
9908 			mode_caching_page->rcd = 1;
9909 
9910 		if (wce_flag == SD_CACHE_ENABLE)
9911 			mode_caching_page->wce = 1;
9912 		else if (wce_flag == SD_CACHE_DISABLE)
9913 			mode_caching_page->wce = 0;
9914 
9915 		/*
9916 		 * Save the page if the mode sense says the
9917 		 * drive supports it.
9918 		 */
9919 		save_pg = mode_caching_page->mode_page.ps ?
9920 				SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
9921 
9922 		/* Clear reserved bits before mode select. */
9923 		mode_caching_page->mode_page.ps = 0;
9924 
9925 		/*
9926 		 * Clear out mode header for mode select.
9927 		 * The rest of the retrieved page will be reused.
9928 		 */
9929 		bzero(header, hdrlen);
9930 
9931 		if (un->un_f_cfg_is_atapi == TRUE) {
9932 			mhp = (struct mode_header_grp2 *)header;
9933 			mhp->bdesc_length_hi = bd_len >> 8;
9934 			mhp->bdesc_length_lo = (uchar_t)bd_len & 0xff;
9935 		} else {
9936 			((struct mode_header *)header)->bdesc_length = bd_len;
9937 		}
9938 
9939 		/* Issue mode select to change the cache settings */
9940 		if (un->un_f_cfg_is_atapi == TRUE) {
9941 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, header,
9942 			    sbuflen, save_pg, SD_PATH_DIRECT);
9943 		} else {
9944 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
9945 			    sbuflen, save_pg, SD_PATH_DIRECT);
9946 		}
9947 	}
9948 
9949 	kmem_free(header, buflen);
9950 	return (rval);
9951 }
9952 
9953 
9954 /*
9955  *    Function: sd_get_write_cache_enabled()
9956  *
9957  * Description: This routine is the driver entry point for determining if
9958  *		write caching is enabled.  It examines the WCE (write cache
9959  *		enable) bits of mode page 8 (MODEPAGE_CACHING).
9960  *
9961  *   Arguments: un - driver soft state (unit) structure
9962  *   		is_enabled - pointer to int where write cache enabled state
9963  *   			is returned (non-zero -> write cache enabled)
9964  *
9965  *
9966  * Return Code: EIO
9967  *		code returned by sd_send_scsi_MODE_SENSE
9968  *
9969  *     Context: Kernel Thread
9970  *
9971  * NOTE: If ioctl is added to disable write cache, this sequence should
9972  * be followed so that no locking is required for accesses to
9973  * un->un_f_write_cache_enabled:
9974  * 	do mode select to clear wce
9975  * 	do synchronize cache to flush cache
9976  * 	set un->un_f_write_cache_enabled = FALSE
9977  *
9978  * Conversely, an ioctl to enable the write cache should be done
9979  * in this order:
9980  * 	set un->un_f_write_cache_enabled = TRUE
9981  * 	do mode select to set wce
9982  */
9983 
9984 static int
9985 sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled)
9986 {
9987 	struct mode_caching	*mode_caching_page;
9988 	uchar_t			*header;
9989 	size_t			buflen;
9990 	int			hdrlen;
9991 	int			bd_len;
9992 	int			rval = 0;
9993 
9994 	ASSERT(un != NULL);
9995 	ASSERT(is_enabled != NULL);
9996 
9997 	/* in case of error, flag as enabled */
9998 	*is_enabled = TRUE;
9999 
10000 	/*
10001 	 * Do a test unit ready, otherwise a mode sense may not work if this
10002 	 * is the first command sent to the device after boot.
10003 	 */
10004 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
10005 
10006 	if (un->un_f_cfg_is_atapi == TRUE) {
10007 		hdrlen = MODE_HEADER_LENGTH_GRP2;
10008 	} else {
10009 		hdrlen = MODE_HEADER_LENGTH;
10010 	}
10011 
10012 	/*
10013 	 * Allocate memory for the retrieved mode page and its headers.  Set
10014 	 * a pointer to the page itself.
10015 	 */
10016 	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
10017 	header = kmem_zalloc(buflen, KM_SLEEP);
10018 
10019 	/* Get the information from the device. */
10020 	if (un->un_f_cfg_is_atapi == TRUE) {
10021 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
10022 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
10023 	} else {
10024 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
10025 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
10026 	}
10027 	if (rval != 0) {
10028 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
10029 		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
10030 		kmem_free(header, buflen);
10031 		return (rval);
10032 	}
10033 
10034 	/*
10035 	 * Determine size of Block Descriptors in order to locate
10036 	 * the mode page data. ATAPI devices return 0, SCSI devices
10037 	 * should return MODE_BLK_DESC_LENGTH.
10038 	 */
10039 	if (un->un_f_cfg_is_atapi == TRUE) {
10040 		struct mode_header_grp2	*mhp;
10041 		mhp	= (struct mode_header_grp2 *)header;
10042 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
10043 	} else {
10044 		bd_len  = ((struct mode_header *)header)->bdesc_length;
10045 	}
10046 
10047 	if (bd_len > MODE_BLK_DESC_LENGTH) {
10048 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10049 		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
10050 		    "block descriptor length\n");
10051 		kmem_free(header, buflen);
10052 		return (EIO);
10053 	}
10054 
10055 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
10056 	*is_enabled = mode_caching_page->wce;
10057 
10058 	kmem_free(header, buflen);
10059 	return (0);
10060 }
10061 
10062 
10063 /*
10064  *    Function: sd_make_device
10065  *
10066  * Description: Utility routine to return the Solaris device number from
10067  *		the data in the device's dev_info structure.
10068  *
10069  * Return Code: The Solaris device number
10070  *
10071  *     Context: Any
10072  */
10073 
10074 static dev_t
10075 sd_make_device(dev_info_t *devi)
10076 {
10077 	return (makedevice(ddi_name_to_major(ddi_get_name(devi)),
10078 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
10079 }
10080 
10081 
10082 /*
10083  *    Function: sd_pm_entry
10084  *
10085  * Description: Called at the start of a new command to manage power
10086  *		and busy status of a device. This includes determining whether
10087  *		the current power state of the device is sufficient for
10088  *		performing the command or whether it must be changed.
10089  *		The PM framework is notified appropriately.
10090  *		Only with a return status of DDI_SUCCESS will the
10091  *		component be busy to the framework.
10092  *
10093  *		All callers of sd_pm_entry must check the return status
10094  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
10095  *		of DDI_FAILURE indicates the device failed to power up.
10096  *		In this case un_pm_count has been adjusted so the result
10097  *		on exit is still powered down, ie. count is less than 0.
10098  *		Calling sd_pm_exit with this count value hits an ASSERT.
10099  *
10100  * Return Code: DDI_SUCCESS or DDI_FAILURE
10101  *
10102  *     Context: Kernel thread context.
10103  */
10104 
10105 static int
10106 sd_pm_entry(struct sd_lun *un)
10107 {
10108 	int return_status = DDI_SUCCESS;
10109 
10110 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10111 	ASSERT(!mutex_owned(&un->un_pm_mutex));
10112 
10113 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
10114 
10115 	if (un->un_f_pm_is_enabled == FALSE) {
10116 		SD_TRACE(SD_LOG_IO_PM, un,
10117 		    "sd_pm_entry: exiting, PM not enabled\n");
10118 		return (return_status);
10119 	}
10120 
10121 	/*
10122 	 * Just increment a counter if PM is enabled. On the transition from
10123 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
10124 	 * the count with each IO and mark the device as idle when the count
10125 	 * hits 0.
10126 	 *
10127 	 * If the count is less than 0 the device is powered down. If a powered
10128 	 * down device is successfully powered up then the count must be
10129 	 * incremented to reflect the power up. Note that it'll get incremented
10130 	 * a second time to become busy.
10131 	 *
10132 	 * Because the following has the potential to change the device state
10133 	 * and must release the un_pm_mutex to do so, only one thread can be
10134 	 * allowed through at a time.
10135 	 */
10136 
10137 	mutex_enter(&un->un_pm_mutex);
10138 	while (un->un_pm_busy == TRUE) {
10139 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
10140 	}
10141 	un->un_pm_busy = TRUE;
10142 
10143 	if (un->un_pm_count < 1) {
10144 
10145 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
10146 
10147 		/*
10148 		 * Indicate we are now busy so the framework won't attempt to
10149 		 * power down the device. This call will only fail if either
10150 		 * we passed a bad component number or the device has no
10151 		 * components. Neither of these should ever happen.
10152 		 */
10153 		mutex_exit(&un->un_pm_mutex);
10154 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
10155 		ASSERT(return_status == DDI_SUCCESS);
10156 
10157 		mutex_enter(&un->un_pm_mutex);
10158 
10159 		if (un->un_pm_count < 0) {
10160 			mutex_exit(&un->un_pm_mutex);
10161 
10162 			SD_TRACE(SD_LOG_IO_PM, un,
10163 			    "sd_pm_entry: power up component\n");
10164 
10165 			/*
10166 			 * pm_raise_power will cause sdpower to be called
10167 			 * which brings the device power level to the
10168 			 * desired state, ON in this case. If successful,
10169 			 * un_pm_count and un_power_level will be updated
10170 			 * appropriately.
10171 			 */
10172 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
10173 			    SD_SPINDLE_ON);
10174 
10175 			mutex_enter(&un->un_pm_mutex);
10176 
10177 			if (return_status != DDI_SUCCESS) {
10178 				/*
10179 				 * Power up failed.
10180 				 * Idle the device and adjust the count
10181 				 * so the result on exit is that we're
10182 				 * still powered down, ie. count is less than 0.
10183 				 */
10184 				SD_TRACE(SD_LOG_IO_PM, un,
10185 				    "sd_pm_entry: power up failed,"
10186 				    " idle the component\n");
10187 
10188 				(void) pm_idle_component(SD_DEVINFO(un), 0);
10189 				un->un_pm_count--;
10190 			} else {
10191 				/*
10192 				 * Device is powered up, verify the
10193 				 * count is non-negative.
10194 				 * This is debug only.
10195 				 */
10196 				ASSERT(un->un_pm_count == 0);
10197 			}
10198 		}
10199 
10200 		if (return_status == DDI_SUCCESS) {
10201 			/*
10202 			 * For performance, now that the device has been tagged
10203 			 * as busy, and it's known to be powered up, update the
10204 			 * chain types to use jump tables that do not include
10205 			 * pm. This significantly lowers the overhead and
10206 			 * therefore improves performance.
10207 			 */
10208 
10209 			mutex_exit(&un->un_pm_mutex);
10210 			mutex_enter(SD_MUTEX(un));
10211 			SD_TRACE(SD_LOG_IO_PM, un,
10212 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
10213 			    un->un_uscsi_chain_type);
10214 
10215 			if (un->un_f_non_devbsize_supported) {
10216 				un->un_buf_chain_type =
10217 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
10218 			} else {
10219 				un->un_buf_chain_type =
10220 				    SD_CHAIN_INFO_DISK_NO_PM;
10221 			}
10222 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
10223 
10224 			SD_TRACE(SD_LOG_IO_PM, un,
10225 			    "             changed  uscsi_chain_type to   %d\n",
10226 			    un->un_uscsi_chain_type);
10227 			mutex_exit(SD_MUTEX(un));
10228 			mutex_enter(&un->un_pm_mutex);
10229 
10230 			if (un->un_pm_idle_timeid == NULL) {
10231 				/* 300 ms. */
10232 				un->un_pm_idle_timeid =
10233 				    timeout(sd_pm_idletimeout_handler, un,
10234 				    (drv_usectohz((clock_t)300000)));
10235 				/*
10236 				 * Include an extra call to busy which keeps the
10237 				 * device busy with-respect-to the PM layer
10238 				 * until the timer fires, at which time it'll
10239 				 * get the extra idle call.
10240 				 */
10241 				(void) pm_busy_component(SD_DEVINFO(un), 0);
10242 			}
10243 		}
10244 	}
10245 	un->un_pm_busy = FALSE;
10246 	/* Next... */
10247 	cv_signal(&un->un_pm_busy_cv);
10248 
10249 	un->un_pm_count++;
10250 
10251 	SD_TRACE(SD_LOG_IO_PM, un,
10252 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
10253 
10254 	mutex_exit(&un->un_pm_mutex);
10255 
10256 	return (return_status);
10257 }
10258 
10259 
10260 /*
10261  *    Function: sd_pm_exit
10262  *
10263  * Description: Called at the completion of a command to manage busy
10264  *		status for the device. If the device becomes idle the
10265  *		PM framework is notified.
10266  *
10267  *     Context: Kernel thread context
10268  */
10269 
10270 static void
10271 sd_pm_exit(struct sd_lun *un)
10272 {
10273 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10274 	ASSERT(!mutex_owned(&un->un_pm_mutex));
10275 
10276 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
10277 
10278 	/*
10279 	 * After attach the following flag is only read, so don't
10280 	 * take the penalty of acquiring a mutex for it.
10281 	 */
10282 	if (un->un_f_pm_is_enabled == TRUE) {
10283 
10284 		mutex_enter(&un->un_pm_mutex);
10285 		un->un_pm_count--;
10286 
10287 		SD_TRACE(SD_LOG_IO_PM, un,
10288 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
10289 
10290 		ASSERT(un->un_pm_count >= 0);
10291 		if (un->un_pm_count == 0) {
10292 			mutex_exit(&un->un_pm_mutex);
10293 
10294 			SD_TRACE(SD_LOG_IO_PM, un,
10295 			    "sd_pm_exit: idle component\n");
10296 
10297 			(void) pm_idle_component(SD_DEVINFO(un), 0);
10298 
10299 		} else {
10300 			mutex_exit(&un->un_pm_mutex);
10301 		}
10302 	}
10303 
10304 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
10305 }
10306 
10307 
10308 /*
10309  *    Function: sdopen
10310  *
10311  * Description: Driver's open(9e) entry point function.
10312  *
10313  *   Arguments: dev_i   - pointer to device number
10314  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
10315  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10316  *		cred_p  - user credential pointer
10317  *
10318  * Return Code: EINVAL
10319  *		ENXIO
10320  *		EIO
10321  *		EROFS
10322  *		EBUSY
10323  *
10324  *     Context: Kernel thread context
10325  */
10326 /* ARGSUSED */
10327 static int
10328 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
10329 {
10330 	struct sd_lun	*un;
10331 	int		nodelay;
10332 	int		part;
10333 	uint64_t	partmask;
10334 	int		instance;
10335 	dev_t		dev;
10336 	int		rval = EIO;
10337 
10338 	/* Validate the open type */
10339 	if (otyp >= OTYPCNT) {
10340 		return (EINVAL);
10341 	}
10342 
10343 	dev = *dev_p;
10344 	instance = SDUNIT(dev);
10345 	mutex_enter(&sd_detach_mutex);
10346 
10347 	/*
10348 	 * Fail the open if there is no softstate for the instance, or
10349 	 * if another thread somewhere is trying to detach the instance.
10350 	 */
10351 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
10352 	    (un->un_detach_count != 0)) {
10353 		mutex_exit(&sd_detach_mutex);
10354 		/*
10355 		 * The probe cache only needs to be cleared when open (9e) fails
10356 		 * with ENXIO (4238046).
10357 		 */
10358 		/*
10359 		 * un-conditionally clearing probe cache is ok with
10360 		 * separate sd/ssd binaries
10361 		 * x86 platform can be an issue with both parallel
10362 		 * and fibre in 1 binary
10363 		 */
10364 		sd_scsi_clear_probe_cache();
10365 		return (ENXIO);
10366 	}
10367 
10368 	/*
10369 	 * The un_layer_count is to prevent another thread in specfs from
10370 	 * trying to detach the instance, which can happen when we are
10371 	 * called from a higher-layer driver instead of thru specfs.
10372 	 * This will not be needed when DDI provides a layered driver
10373 	 * interface that allows specfs to know that an instance is in
10374 	 * use by a layered driver & should not be detached.
10375 	 *
10376 	 * Note: the semantics for layered driver opens are exactly one
10377 	 * close for every open.
10378 	 */
10379 	if (otyp == OTYP_LYR) {
10380 		un->un_layer_count++;
10381 	}
10382 
10383 	/*
10384 	 * Keep a count of the current # of opens in progress. This is because
10385 	 * some layered drivers try to call us as a regular open. This can
10386 	 * cause problems that we cannot prevent, however by keeping this count
10387 	 * we can at least keep our open and detach routines from racing against
10388 	 * each other under such conditions.
10389 	 */
10390 	un->un_opens_in_progress++;
10391 	mutex_exit(&sd_detach_mutex);
10392 
10393 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
10394 	part	 = SDPART(dev);
10395 	partmask = 1 << part;
10396 
10397 	/*
10398 	 * We use a semaphore here in order to serialize
10399 	 * open and close requests on the device.
10400 	 */
10401 	sema_p(&un->un_semoclose);
10402 
10403 	mutex_enter(SD_MUTEX(un));
10404 
10405 	/*
10406 	 * All device accesses go thru sdstrategy() where we check
10407 	 * on suspend status but there could be a scsi_poll command,
10408 	 * which bypasses sdstrategy(), so we need to check pm
10409 	 * status.
10410 	 */
10411 
10412 	if (!nodelay) {
10413 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10414 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10415 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10416 		}
10417 
10418 		mutex_exit(SD_MUTEX(un));
10419 		if (sd_pm_entry(un) != DDI_SUCCESS) {
10420 			rval = EIO;
10421 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
10422 			    "sdopen: sd_pm_entry failed\n");
10423 			goto open_failed_with_pm;
10424 		}
10425 		mutex_enter(SD_MUTEX(un));
10426 	}
10427 
10428 	/* check for previous exclusive open */
10429 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
10430 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10431 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
10432 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
10433 
10434 	if (un->un_exclopen & (partmask)) {
10435 		goto excl_open_fail;
10436 	}
10437 
10438 	if (flag & FEXCL) {
10439 		int i;
10440 		if (un->un_ocmap.lyropen[part]) {
10441 			goto excl_open_fail;
10442 		}
10443 		for (i = 0; i < (OTYPCNT - 1); i++) {
10444 			if (un->un_ocmap.regopen[i] & (partmask)) {
10445 				goto excl_open_fail;
10446 			}
10447 		}
10448 	}
10449 
10450 	/*
10451 	 * Check the write permission if this is a removable media device,
10452 	 * NDELAY has not been set, and writable permission is requested.
10453 	 *
10454 	 * Note: If NDELAY was set and this is write-protected media the WRITE
10455 	 * attempt will fail with EIO as part of the I/O processing. This is a
10456 	 * more permissive implementation that allows the open to succeed and
10457 	 * WRITE attempts to fail when appropriate.
10458 	 */
10459 	if (un->un_f_chk_wp_open) {
10460 		if ((flag & FWRITE) && (!nodelay)) {
10461 			mutex_exit(SD_MUTEX(un));
10462 			/*
10463 			 * Defer the check for write permission on writable
10464 			 * DVD drive till sdstrategy and will not fail open even
10465 			 * if FWRITE is set as the device can be writable
10466 			 * depending upon the media and the media can change
10467 			 * after the call to open().
10468 			 */
10469 			if (un->un_f_dvdram_writable_device == FALSE) {
10470 				if (ISCD(un) || sr_check_wp(dev)) {
10471 				rval = EROFS;
10472 				mutex_enter(SD_MUTEX(un));
10473 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10474 				    "write to cd or write protected media\n");
10475 				goto open_fail;
10476 				}
10477 			}
10478 			mutex_enter(SD_MUTEX(un));
10479 		}
10480 	}
10481 
10482 	/*
10483 	 * If opening in NDELAY/NONBLOCK mode, just return.
10484 	 * Check if disk is ready and has a valid geometry later.
10485 	 */
10486 	if (!nodelay) {
10487 		mutex_exit(SD_MUTEX(un));
10488 		rval = sd_ready_and_valid(un);
10489 		mutex_enter(SD_MUTEX(un));
10490 		/*
10491 		 * Fail if device is not ready or if the number of disk
10492 		 * blocks is zero or negative for non CD devices.
10493 		 */
10494 		if ((rval != SD_READY_VALID) ||
10495 		    (!ISCD(un) && un->un_map[part].dkl_nblk <= 0)) {
10496 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
10497 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10498 			    "device not ready or invalid disk block value\n");
10499 			goto open_fail;
10500 		}
10501 #if defined(__i386) || defined(__amd64)
10502 	} else {
10503 		uchar_t *cp;
10504 		/*
10505 		 * x86 requires special nodelay handling, so that p0 is
10506 		 * always defined and accessible.
10507 		 * Invalidate geometry only if device is not already open.
10508 		 */
10509 		cp = &un->un_ocmap.chkd[0];
10510 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10511 			if (*cp != (uchar_t)0) {
10512 			    break;
10513 			}
10514 			cp++;
10515 		}
10516 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10517 			un->un_f_geometry_is_valid = FALSE;
10518 		}
10519 
10520 #endif
10521 	}
10522 
10523 	if (otyp == OTYP_LYR) {
10524 		un->un_ocmap.lyropen[part]++;
10525 	} else {
10526 		un->un_ocmap.regopen[otyp] |= partmask;
10527 	}
10528 
10529 	/* Set up open and exclusive open flags */
10530 	if (flag & FEXCL) {
10531 		un->un_exclopen |= (partmask);
10532 	}
10533 
10534 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10535 	    "open of part %d type %d\n", part, otyp);
10536 
10537 	mutex_exit(SD_MUTEX(un));
10538 	if (!nodelay) {
10539 		sd_pm_exit(un);
10540 	}
10541 
10542 	sema_v(&un->un_semoclose);
10543 
10544 	mutex_enter(&sd_detach_mutex);
10545 	un->un_opens_in_progress--;
10546 	mutex_exit(&sd_detach_mutex);
10547 
10548 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
10549 	return (DDI_SUCCESS);
10550 
10551 excl_open_fail:
10552 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
10553 	rval = EBUSY;
10554 
10555 open_fail:
10556 	mutex_exit(SD_MUTEX(un));
10557 
10558 	/*
10559 	 * On a failed open we must exit the pm management.
10560 	 */
10561 	if (!nodelay) {
10562 		sd_pm_exit(un);
10563 	}
10564 open_failed_with_pm:
10565 	sema_v(&un->un_semoclose);
10566 
10567 	mutex_enter(&sd_detach_mutex);
10568 	un->un_opens_in_progress--;
10569 	if (otyp == OTYP_LYR) {
10570 		un->un_layer_count--;
10571 	}
10572 	mutex_exit(&sd_detach_mutex);
10573 
10574 	return (rval);
10575 }
10576 
10577 
10578 /*
10579  *    Function: sdclose
10580  *
10581  * Description: Driver's close(9e) entry point function.
10582  *
10583  *   Arguments: dev    - device number
10584  *		flag   - file status flag, informational only
10585  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10586  *		cred_p - user credential pointer
10587  *
10588  * Return Code: ENXIO
10589  *
10590  *     Context: Kernel thread context
10591  */
10592 /* ARGSUSED */
10593 static int
10594 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
10595 {
10596 	struct sd_lun	*un;
10597 	uchar_t		*cp;
10598 	int		part;
10599 	int		nodelay;
10600 	int		rval = 0;
10601 
10602 	/* Validate the open type */
10603 	if (otyp >= OTYPCNT) {
10604 		return (ENXIO);
10605 	}
10606 
10607 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10608 		return (ENXIO);
10609 	}
10610 
10611 	part = SDPART(dev);
10612 	nodelay = flag & (FNDELAY | FNONBLOCK);
10613 
10614 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10615 	    "sdclose: close of part %d type %d\n", part, otyp);
10616 
10617 	/*
10618 	 * We use a semaphore here in order to serialize
10619 	 * open and close requests on the device.
10620 	 */
10621 	sema_p(&un->un_semoclose);
10622 
10623 	mutex_enter(SD_MUTEX(un));
10624 
10625 	/* Don't proceed if power is being changed. */
10626 	while (un->un_state == SD_STATE_PM_CHANGING) {
10627 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10628 	}
10629 
10630 	if (un->un_exclopen & (1 << part)) {
10631 		un->un_exclopen &= ~(1 << part);
10632 	}
10633 
10634 	/* Update the open partition map */
10635 	if (otyp == OTYP_LYR) {
10636 		un->un_ocmap.lyropen[part] -= 1;
10637 	} else {
10638 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
10639 	}
10640 
10641 	cp = &un->un_ocmap.chkd[0];
10642 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10643 		if (*cp != NULL) {
10644 			break;
10645 		}
10646 		cp++;
10647 	}
10648 
10649 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10650 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
10651 
10652 		/*
10653 		 * We avoid persistance upon the last close, and set
10654 		 * the throttle back to the maximum.
10655 		 */
10656 		un->un_throttle = un->un_saved_throttle;
10657 
10658 		if (un->un_state == SD_STATE_OFFLINE) {
10659 			if (un->un_f_is_fibre == FALSE) {
10660 				scsi_log(SD_DEVINFO(un), sd_label,
10661 					CE_WARN, "offline\n");
10662 			}
10663 			un->un_f_geometry_is_valid = FALSE;
10664 
10665 		} else {
10666 			/*
10667 			 * Flush any outstanding writes in NVRAM cache.
10668 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
10669 			 * cmd, it may not work for non-Pluto devices.
10670 			 * SYNCHRONIZE CACHE is not required for removables,
10671 			 * except DVD-RAM drives.
10672 			 *
10673 			 * Also note: because SYNCHRONIZE CACHE is currently
10674 			 * the only command issued here that requires the
10675 			 * drive be powered up, only do the power up before
10676 			 * sending the Sync Cache command. If additional
10677 			 * commands are added which require a powered up
10678 			 * drive, the following sequence may have to change.
10679 			 *
10680 			 * And finally, note that parallel SCSI on SPARC
10681 			 * only issues a Sync Cache to DVD-RAM, a newly
10682 			 * supported device.
10683 			 */
10684 #if defined(__i386) || defined(__amd64)
10685 			if (un->un_f_sync_cache_supported ||
10686 			    un->un_f_dvdram_writable_device == TRUE) {
10687 #else
10688 			if (un->un_f_dvdram_writable_device == TRUE) {
10689 #endif
10690 				mutex_exit(SD_MUTEX(un));
10691 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10692 					rval =
10693 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
10694 					    NULL);
10695 					/* ignore error if not supported */
10696 					if (rval == ENOTSUP) {
10697 						rval = 0;
10698 					} else if (rval != 0) {
10699 						rval = EIO;
10700 					}
10701 					sd_pm_exit(un);
10702 				} else {
10703 					rval = EIO;
10704 				}
10705 				mutex_enter(SD_MUTEX(un));
10706 			}
10707 
10708 			/*
10709 			 * For devices which supports DOOR_LOCK, send an ALLOW
10710 			 * MEDIA REMOVAL command, but don't get upset if it
10711 			 * fails. We need to raise the power of the drive before
10712 			 * we can call sd_send_scsi_DOORLOCK()
10713 			 */
10714 			if (un->un_f_doorlock_supported) {
10715 				mutex_exit(SD_MUTEX(un));
10716 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10717 					rval = sd_send_scsi_DOORLOCK(un,
10718 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
10719 
10720 					sd_pm_exit(un);
10721 					if (ISCD(un) && (rval != 0) &&
10722 					    (nodelay != 0)) {
10723 						rval = ENXIO;
10724 					}
10725 				} else {
10726 					rval = EIO;
10727 				}
10728 				mutex_enter(SD_MUTEX(un));
10729 			}
10730 
10731 			/*
10732 			 * If a device has removable media, invalidate all
10733 			 * parameters related to media, such as geometry,
10734 			 * blocksize, and blockcount.
10735 			 */
10736 			if (un->un_f_has_removable_media) {
10737 				sr_ejected(un);
10738 			}
10739 
10740 			/*
10741 			 * Destroy the cache (if it exists) which was
10742 			 * allocated for the write maps since this is
10743 			 * the last close for this media.
10744 			 */
10745 			if (un->un_wm_cache) {
10746 				/*
10747 				 * Check if there are pending commands.
10748 				 * and if there are give a warning and
10749 				 * do not destroy the cache.
10750 				 */
10751 				if (un->un_ncmds_in_driver > 0) {
10752 					scsi_log(SD_DEVINFO(un),
10753 					    sd_label, CE_WARN,
10754 					    "Unable to clean up memory "
10755 					    "because of pending I/O\n");
10756 				} else {
10757 					kmem_cache_destroy(
10758 					    un->un_wm_cache);
10759 					un->un_wm_cache = NULL;
10760 				}
10761 			}
10762 		}
10763 	}
10764 
10765 	mutex_exit(SD_MUTEX(un));
10766 	sema_v(&un->un_semoclose);
10767 
10768 	if (otyp == OTYP_LYR) {
10769 		mutex_enter(&sd_detach_mutex);
10770 		/*
10771 		 * The detach routine may run when the layer count
10772 		 * drops to zero.
10773 		 */
10774 		un->un_layer_count--;
10775 		mutex_exit(&sd_detach_mutex);
10776 	}
10777 
10778 	return (rval);
10779 }
10780 
10781 
10782 /*
10783  *    Function: sd_ready_and_valid
10784  *
10785  * Description: Test if device is ready and has a valid geometry.
10786  *
10787  *   Arguments: dev - device number
10788  *		un  - driver soft state (unit) structure
10789  *
10790  * Return Code: SD_READY_VALID		ready and valid label
10791  *		SD_READY_NOT_VALID	ready, geom ops never applicable
10792  *		SD_NOT_READY_VALID	not ready, no label
10793  *
10794  *     Context: Never called at interrupt context.
10795  */
10796 
10797 static int
10798 sd_ready_and_valid(struct sd_lun *un)
10799 {
10800 	struct sd_errstats	*stp;
10801 	uint64_t		capacity;
10802 	uint_t			lbasize;
10803 	int			rval = SD_READY_VALID;
10804 	char			name_str[48];
10805 
10806 	ASSERT(un != NULL);
10807 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10808 
10809 	mutex_enter(SD_MUTEX(un));
10810 	/*
10811 	 * If a device has removable media, we must check if media is
10812 	 * ready when checking if this device is ready and valid.
10813 	 */
10814 	if (un->un_f_has_removable_media) {
10815 		mutex_exit(SD_MUTEX(un));
10816 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
10817 			rval = SD_NOT_READY_VALID;
10818 			mutex_enter(SD_MUTEX(un));
10819 			goto done;
10820 		}
10821 
10822 		mutex_enter(SD_MUTEX(un));
10823 		if ((un->un_f_geometry_is_valid == FALSE) ||
10824 		    (un->un_f_blockcount_is_valid == FALSE) ||
10825 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
10826 
10827 			/* capacity has to be read every open. */
10828 			mutex_exit(SD_MUTEX(un));
10829 			if (sd_send_scsi_READ_CAPACITY(un, &capacity,
10830 			    &lbasize, SD_PATH_DIRECT) != 0) {
10831 				mutex_enter(SD_MUTEX(un));
10832 				un->un_f_geometry_is_valid = FALSE;
10833 				rval = SD_NOT_READY_VALID;
10834 				goto done;
10835 			} else {
10836 				mutex_enter(SD_MUTEX(un));
10837 				sd_update_block_info(un, lbasize, capacity);
10838 			}
10839 		}
10840 
10841 		/*
10842 		 * Check if the media in the device is writable or not.
10843 		 */
10844 		if ((un->un_f_geometry_is_valid == FALSE) && ISCD(un)) {
10845 			sd_check_for_writable_cd(un);
10846 		}
10847 
10848 	} else {
10849 		/*
10850 		 * Do a test unit ready to clear any unit attention from non-cd
10851 		 * devices.
10852 		 */
10853 		mutex_exit(SD_MUTEX(un));
10854 		(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
10855 		mutex_enter(SD_MUTEX(un));
10856 	}
10857 
10858 
10859 	/*
10860 	 * If this is a non 512 block device, allocate space for
10861 	 * the wmap cache. This is being done here since every time
10862 	 * a media is changed this routine will be called and the
10863 	 * block size is a function of media rather than device.
10864 	 */
10865 	if (un->un_f_non_devbsize_supported && NOT_DEVBSIZE(un)) {
10866 		if (!(un->un_wm_cache)) {
10867 			(void) snprintf(name_str, sizeof (name_str),
10868 			    "%s%d_cache",
10869 			    ddi_driver_name(SD_DEVINFO(un)),
10870 			    ddi_get_instance(SD_DEVINFO(un)));
10871 			un->un_wm_cache = kmem_cache_create(
10872 			    name_str, sizeof (struct sd_w_map),
10873 			    8, sd_wm_cache_constructor,
10874 			    sd_wm_cache_destructor, NULL,
10875 			    (void *)un, NULL, 0);
10876 			if (!(un->un_wm_cache)) {
10877 					rval = ENOMEM;
10878 					goto done;
10879 			}
10880 		}
10881 	}
10882 
10883 	if (un->un_state == SD_STATE_NORMAL) {
10884 		/*
10885 		 * If the target is not yet ready here (defined by a TUR
10886 		 * failure), invalidate the geometry and print an 'offline'
10887 		 * message. This is a legacy message, as the state of the
10888 		 * target is not actually changed to SD_STATE_OFFLINE.
10889 		 *
10890 		 * If the TUR fails for EACCES (Reservation Conflict), it
10891 		 * means there actually is nothing wrong with the target that
10892 		 * would require invalidating the geometry, so continue in
10893 		 * that case as if the TUR was successful.
10894 		 */
10895 		int err;
10896 
10897 		mutex_exit(SD_MUTEX(un));
10898 		err = sd_send_scsi_TEST_UNIT_READY(un, 0);
10899 		mutex_enter(SD_MUTEX(un));
10900 
10901 		if ((err != 0) && (err != EACCES)) {
10902 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10903 			    "offline\n");
10904 			un->un_f_geometry_is_valid = FALSE;
10905 			rval = SD_NOT_READY_VALID;
10906 			goto done;
10907 		}
10908 	}
10909 
10910 	if (un->un_f_format_in_progress == FALSE) {
10911 		/*
10912 		 * Note: sd_validate_geometry may return TRUE, but that does
10913 		 * not necessarily mean un_f_geometry_is_valid == TRUE!
10914 		 */
10915 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
10916 		if (rval == ENOTSUP) {
10917 			if (un->un_f_geometry_is_valid == TRUE)
10918 				rval = 0;
10919 			else {
10920 				rval = SD_READY_NOT_VALID;
10921 				goto done;
10922 			}
10923 		}
10924 		if (rval != 0) {
10925 			/*
10926 			 * We don't check the validity of geometry for
10927 			 * CDROMs. Also we assume we have a good label
10928 			 * even if sd_validate_geometry returned ENOMEM.
10929 			 */
10930 			if (!ISCD(un) && rval != ENOMEM) {
10931 				rval = SD_NOT_READY_VALID;
10932 				goto done;
10933 			}
10934 		}
10935 	}
10936 
10937 	/*
10938 	 * If this device supports DOOR_LOCK command, try and send
10939 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
10940 	 * if it fails. For a CD, however, it is an error
10941 	 */
10942 	if (un->un_f_doorlock_supported) {
10943 		mutex_exit(SD_MUTEX(un));
10944 		if ((sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
10945 		    SD_PATH_DIRECT) != 0) && ISCD(un)) {
10946 			rval = SD_NOT_READY_VALID;
10947 			mutex_enter(SD_MUTEX(un));
10948 			goto done;
10949 		}
10950 		mutex_enter(SD_MUTEX(un));
10951 	}
10952 
10953 	/* The state has changed, inform the media watch routines */
10954 	un->un_mediastate = DKIO_INSERTED;
10955 	cv_broadcast(&un->un_state_cv);
10956 	rval = SD_READY_VALID;
10957 
10958 done:
10959 
10960 	/*
10961 	 * Initialize the capacity kstat value, if no media previously
10962 	 * (capacity kstat is 0) and a media has been inserted
10963 	 * (un_blockcount > 0).
10964 	 */
10965 	if (un->un_errstats != NULL) {
10966 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
10967 		if ((stp->sd_capacity.value.ui64 == 0) &&
10968 		    (un->un_f_blockcount_is_valid == TRUE)) {
10969 			stp->sd_capacity.value.ui64 =
10970 			    (uint64_t)((uint64_t)un->un_blockcount *
10971 			    un->un_sys_blocksize);
10972 		}
10973 	}
10974 
10975 	mutex_exit(SD_MUTEX(un));
10976 	return (rval);
10977 }
10978 
10979 
10980 /*
10981  *    Function: sdmin
10982  *
10983  * Description: Routine to limit the size of a data transfer. Used in
10984  *		conjunction with physio(9F).
10985  *
10986  *   Arguments: bp - pointer to the indicated buf(9S) struct.
10987  *
10988  *     Context: Kernel thread context.
10989  */
10990 
10991 static void
10992 sdmin(struct buf *bp)
10993 {
10994 	struct sd_lun	*un;
10995 	int		instance;
10996 
10997 	instance = SDUNIT(bp->b_edev);
10998 
10999 	un = ddi_get_soft_state(sd_state, instance);
11000 	ASSERT(un != NULL);
11001 
11002 	if (bp->b_bcount > un->un_max_xfer_size) {
11003 		bp->b_bcount = un->un_max_xfer_size;
11004 	}
11005 }
11006 
11007 
11008 /*
11009  *    Function: sdread
11010  *
11011  * Description: Driver's read(9e) entry point function.
11012  *
11013  *   Arguments: dev   - device number
11014  *		uio   - structure pointer describing where data is to be stored
11015  *			in user's space
11016  *		cred_p  - user credential pointer
11017  *
11018  * Return Code: ENXIO
11019  *		EIO
11020  *		EINVAL
11021  *		value returned by physio
11022  *
11023  *     Context: Kernel thread context.
11024  */
11025 /* ARGSUSED */
11026 static int
11027 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
11028 {
11029 	struct sd_lun	*un = NULL;
11030 	int		secmask;
11031 	int		err;
11032 
11033 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11034 		return (ENXIO);
11035 	}
11036 
11037 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11038 
11039 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11040 		mutex_enter(SD_MUTEX(un));
11041 		/*
11042 		 * Because the call to sd_ready_and_valid will issue I/O we
11043 		 * must wait here if either the device is suspended or
11044 		 * if it's power level is changing.
11045 		 */
11046 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11047 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11048 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11049 		}
11050 		un->un_ncmds_in_driver++;
11051 		mutex_exit(SD_MUTEX(un));
11052 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11053 			mutex_enter(SD_MUTEX(un));
11054 			un->un_ncmds_in_driver--;
11055 			ASSERT(un->un_ncmds_in_driver >= 0);
11056 			mutex_exit(SD_MUTEX(un));
11057 			return (EIO);
11058 		}
11059 		mutex_enter(SD_MUTEX(un));
11060 		un->un_ncmds_in_driver--;
11061 		ASSERT(un->un_ncmds_in_driver >= 0);
11062 		mutex_exit(SD_MUTEX(un));
11063 	}
11064 
11065 	/*
11066 	 * Read requests are restricted to multiples of the system block size.
11067 	 */
11068 	secmask = un->un_sys_blocksize - 1;
11069 
11070 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11071 		SD_ERROR(SD_LOG_READ_WRITE, un,
11072 		    "sdread: file offset not modulo %d\n",
11073 		    un->un_sys_blocksize);
11074 		err = EINVAL;
11075 	} else if (uio->uio_iov->iov_len & (secmask)) {
11076 		SD_ERROR(SD_LOG_READ_WRITE, un,
11077 		    "sdread: transfer length not modulo %d\n",
11078 		    un->un_sys_blocksize);
11079 		err = EINVAL;
11080 	} else {
11081 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
11082 	}
11083 	return (err);
11084 }
11085 
11086 
11087 /*
11088  *    Function: sdwrite
11089  *
11090  * Description: Driver's write(9e) entry point function.
11091  *
11092  *   Arguments: dev   - device number
11093  *		uio   - structure pointer describing where data is stored in
11094  *			user's space
11095  *		cred_p  - user credential pointer
11096  *
11097  * Return Code: ENXIO
11098  *		EIO
11099  *		EINVAL
11100  *		value returned by physio
11101  *
11102  *     Context: Kernel thread context.
11103  */
11104 /* ARGSUSED */
11105 static int
11106 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
11107 {
11108 	struct sd_lun	*un = NULL;
11109 	int		secmask;
11110 	int		err;
11111 
11112 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11113 		return (ENXIO);
11114 	}
11115 
11116 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11117 
11118 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11119 		mutex_enter(SD_MUTEX(un));
11120 		/*
11121 		 * Because the call to sd_ready_and_valid will issue I/O we
11122 		 * must wait here if either the device is suspended or
11123 		 * if it's power level is changing.
11124 		 */
11125 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11126 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11127 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11128 		}
11129 		un->un_ncmds_in_driver++;
11130 		mutex_exit(SD_MUTEX(un));
11131 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11132 			mutex_enter(SD_MUTEX(un));
11133 			un->un_ncmds_in_driver--;
11134 			ASSERT(un->un_ncmds_in_driver >= 0);
11135 			mutex_exit(SD_MUTEX(un));
11136 			return (EIO);
11137 		}
11138 		mutex_enter(SD_MUTEX(un));
11139 		un->un_ncmds_in_driver--;
11140 		ASSERT(un->un_ncmds_in_driver >= 0);
11141 		mutex_exit(SD_MUTEX(un));
11142 	}
11143 
11144 	/*
11145 	 * Write requests are restricted to multiples of the system block size.
11146 	 */
11147 	secmask = un->un_sys_blocksize - 1;
11148 
11149 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11150 		SD_ERROR(SD_LOG_READ_WRITE, un,
11151 		    "sdwrite: file offset not modulo %d\n",
11152 		    un->un_sys_blocksize);
11153 		err = EINVAL;
11154 	} else if (uio->uio_iov->iov_len & (secmask)) {
11155 		SD_ERROR(SD_LOG_READ_WRITE, un,
11156 		    "sdwrite: transfer length not modulo %d\n",
11157 		    un->un_sys_blocksize);
11158 		err = EINVAL;
11159 	} else {
11160 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
11161 	}
11162 	return (err);
11163 }
11164 
11165 
11166 /*
11167  *    Function: sdaread
11168  *
11169  * Description: Driver's aread(9e) entry point function.
11170  *
11171  *   Arguments: dev   - device number
11172  *		aio   - structure pointer describing where data is to be stored
11173  *		cred_p  - user credential pointer
11174  *
11175  * Return Code: ENXIO
11176  *		EIO
11177  *		EINVAL
11178  *		value returned by aphysio
11179  *
11180  *     Context: Kernel thread context.
11181  */
11182 /* ARGSUSED */
11183 static int
11184 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11185 {
11186 	struct sd_lun	*un = NULL;
11187 	struct uio	*uio = aio->aio_uio;
11188 	int		secmask;
11189 	int		err;
11190 
11191 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11192 		return (ENXIO);
11193 	}
11194 
11195 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11196 
11197 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11198 		mutex_enter(SD_MUTEX(un));
11199 		/*
11200 		 * Because the call to sd_ready_and_valid will issue I/O we
11201 		 * must wait here if either the device is suspended or
11202 		 * if it's power level is changing.
11203 		 */
11204 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11205 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11206 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11207 		}
11208 		un->un_ncmds_in_driver++;
11209 		mutex_exit(SD_MUTEX(un));
11210 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11211 			mutex_enter(SD_MUTEX(un));
11212 			un->un_ncmds_in_driver--;
11213 			ASSERT(un->un_ncmds_in_driver >= 0);
11214 			mutex_exit(SD_MUTEX(un));
11215 			return (EIO);
11216 		}
11217 		mutex_enter(SD_MUTEX(un));
11218 		un->un_ncmds_in_driver--;
11219 		ASSERT(un->un_ncmds_in_driver >= 0);
11220 		mutex_exit(SD_MUTEX(un));
11221 	}
11222 
11223 	/*
11224 	 * Read requests are restricted to multiples of the system block size.
11225 	 */
11226 	secmask = un->un_sys_blocksize - 1;
11227 
11228 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11229 		SD_ERROR(SD_LOG_READ_WRITE, un,
11230 		    "sdaread: file offset not modulo %d\n",
11231 		    un->un_sys_blocksize);
11232 		err = EINVAL;
11233 	} else if (uio->uio_iov->iov_len & (secmask)) {
11234 		SD_ERROR(SD_LOG_READ_WRITE, un,
11235 		    "sdaread: transfer length not modulo %d\n",
11236 		    un->un_sys_blocksize);
11237 		err = EINVAL;
11238 	} else {
11239 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
11240 	}
11241 	return (err);
11242 }
11243 
11244 
11245 /*
11246  *    Function: sdawrite
11247  *
11248  * Description: Driver's awrite(9e) entry point function.
11249  *
11250  *   Arguments: dev   - device number
11251  *		aio   - structure pointer describing where data is stored
11252  *		cred_p  - user credential pointer
11253  *
11254  * Return Code: ENXIO
11255  *		EIO
11256  *		EINVAL
11257  *		value returned by aphysio
11258  *
11259  *     Context: Kernel thread context.
11260  */
11261 /* ARGSUSED */
11262 static int
11263 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11264 {
11265 	struct sd_lun	*un = NULL;
11266 	struct uio	*uio = aio->aio_uio;
11267 	int		secmask;
11268 	int		err;
11269 
11270 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11271 		return (ENXIO);
11272 	}
11273 
11274 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11275 
11276 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11277 		mutex_enter(SD_MUTEX(un));
11278 		/*
11279 		 * Because the call to sd_ready_and_valid will issue I/O we
11280 		 * must wait here if either the device is suspended or
11281 		 * if it's power level is changing.
11282 		 */
11283 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11284 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11285 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11286 		}
11287 		un->un_ncmds_in_driver++;
11288 		mutex_exit(SD_MUTEX(un));
11289 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11290 			mutex_enter(SD_MUTEX(un));
11291 			un->un_ncmds_in_driver--;
11292 			ASSERT(un->un_ncmds_in_driver >= 0);
11293 			mutex_exit(SD_MUTEX(un));
11294 			return (EIO);
11295 		}
11296 		mutex_enter(SD_MUTEX(un));
11297 		un->un_ncmds_in_driver--;
11298 		ASSERT(un->un_ncmds_in_driver >= 0);
11299 		mutex_exit(SD_MUTEX(un));
11300 	}
11301 
11302 	/*
11303 	 * Write requests are restricted to multiples of the system block size.
11304 	 */
11305 	secmask = un->un_sys_blocksize - 1;
11306 
11307 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11308 		SD_ERROR(SD_LOG_READ_WRITE, un,
11309 		    "sdawrite: file offset not modulo %d\n",
11310 		    un->un_sys_blocksize);
11311 		err = EINVAL;
11312 	} else if (uio->uio_iov->iov_len & (secmask)) {
11313 		SD_ERROR(SD_LOG_READ_WRITE, un,
11314 		    "sdawrite: transfer length not modulo %d\n",
11315 		    un->un_sys_blocksize);
11316 		err = EINVAL;
11317 	} else {
11318 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
11319 	}
11320 	return (err);
11321 }
11322 
11323 
11324 
11325 
11326 
11327 /*
11328  * Driver IO processing follows the following sequence:
11329  *
11330  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
11331  *         |                |                     ^
11332  *         v                v                     |
11333  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
11334  *         |                |                     |                   |
11335  *         v                |                     |                   |
11336  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
11337  *         |                |                     ^                   ^
11338  *         v                v                     |                   |
11339  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
11340  *         |                |                     |                   |
11341  *     +---+                |                     +------------+      +-------+
11342  *     |                    |                                  |              |
11343  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11344  *     |                    v                                  |              |
11345  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
11346  *     |                    |                                  ^              |
11347  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11348  *     |                    v                                  |              |
11349  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
11350  *     |                    |                                  ^              |
11351  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11352  *     |                    v                                  |              |
11353  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
11354  *     |                    |                                  ^              |
11355  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
11356  *     |                    v                                  |              |
11357  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
11358  *     |                    |                                  ^              |
11359  *     |                    |                                  |              |
11360  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
11361  *                          |                           ^
11362  *                          v                           |
11363  *                   sd_core_iostart()                  |
11364  *                          |                           |
11365  *                          |                           +------>(*destroypkt)()
11366  *                          +-> sd_start_cmds() <-+     |           |
11367  *                          |                     |     |           v
11368  *                          |                     |     |  scsi_destroy_pkt(9F)
11369  *                          |                     |     |
11370  *                          +->(*initpkt)()       +- sdintr()
11371  *                          |  |                        |  |
11372  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
11373  *                          |  +-> scsi_setup_cdb(9F)   |
11374  *                          |                           |
11375  *                          +--> scsi_transport(9F)     |
11376  *                                     |                |
11377  *                                     +----> SCSA ---->+
11378  *
11379  *
11380  * This code is based upon the following presumtions:
11381  *
11382  *   - iostart and iodone functions operate on buf(9S) structures. These
11383  *     functions perform the necessary operations on the buf(9S) and pass
11384  *     them along to the next function in the chain by using the macros
11385  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
11386  *     (for iodone side functions).
11387  *
11388  *   - The iostart side functions may sleep. The iodone side functions
11389  *     are called under interrupt context and may NOT sleep. Therefore
11390  *     iodone side functions also may not call iostart side functions.
11391  *     (NOTE: iostart side functions should NOT sleep for memory, as
11392  *     this could result in deadlock.)
11393  *
11394  *   - An iostart side function may call its corresponding iodone side
11395  *     function directly (if necessary).
11396  *
11397  *   - In the event of an error, an iostart side function can return a buf(9S)
11398  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
11399  *     b_error in the usual way of course).
11400  *
11401  *   - The taskq mechanism may be used by the iodone side functions to dispatch
11402  *     requests to the iostart side functions.  The iostart side functions in
11403  *     this case would be called under the context of a taskq thread, so it's
11404  *     OK for them to block/sleep/spin in this case.
11405  *
11406  *   - iostart side functions may allocate "shadow" buf(9S) structs and
11407  *     pass them along to the next function in the chain.  The corresponding
11408  *     iodone side functions must coalesce the "shadow" bufs and return
11409  *     the "original" buf to the next higher layer.
11410  *
11411  *   - The b_private field of the buf(9S) struct holds a pointer to
11412  *     an sd_xbuf struct, which contains information needed to
11413  *     construct the scsi_pkt for the command.
11414  *
11415  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
11416  *     layer must acquire & release the SD_MUTEX(un) as needed.
11417  */
11418 
11419 
11420 /*
11421  * Create taskq for all targets in the system. This is created at
11422  * _init(9E) and destroyed at _fini(9E).
11423  *
11424  * Note: here we set the minalloc to a reasonably high number to ensure that
11425  * we will have an adequate supply of task entries available at interrupt time.
11426  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
11427  * sd_create_taskq().  Since we do not want to sleep for allocations at
11428  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
11429  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
11430  * requests any one instant in time.
11431  */
11432 #define	SD_TASKQ_NUMTHREADS	8
11433 #define	SD_TASKQ_MINALLOC	256
11434 #define	SD_TASKQ_MAXALLOC	256
11435 
11436 static taskq_t	*sd_tq = NULL;
11437 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
11438 
11439 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
11440 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
11441 
11442 /*
11443  * The following task queue is being created for the write part of
11444  * read-modify-write of non-512 block size devices.
11445  * Limit the number of threads to 1 for now. This number has been choosen
11446  * considering the fact that it applies only to dvd ram drives/MO drives
11447  * currently. Performance for which is not main criteria at this stage.
11448  * Note: It needs to be explored if we can use a single taskq in future
11449  */
11450 #define	SD_WMR_TASKQ_NUMTHREADS	1
11451 static taskq_t	*sd_wmr_tq = NULL;
11452 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
11453 
11454 /*
11455  *    Function: sd_taskq_create
11456  *
11457  * Description: Create taskq thread(s) and preallocate task entries
11458  *
11459  * Return Code: Returns a pointer to the allocated taskq_t.
11460  *
11461  *     Context: Can sleep. Requires blockable context.
11462  *
11463  *       Notes: - The taskq() facility currently is NOT part of the DDI.
11464  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
11465  *		- taskq_create() will block for memory, also it will panic
11466  *		  if it cannot create the requested number of threads.
11467  *		- Currently taskq_create() creates threads that cannot be
11468  *		  swapped.
11469  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
11470  *		  supply of taskq entries at interrupt time (ie, so that we
11471  *		  do not have to sleep for memory)
11472  */
11473 
11474 static void
11475 sd_taskq_create(void)
11476 {
11477 	char	taskq_name[TASKQ_NAMELEN];
11478 
11479 	ASSERT(sd_tq == NULL);
11480 	ASSERT(sd_wmr_tq == NULL);
11481 
11482 	(void) snprintf(taskq_name, sizeof (taskq_name),
11483 	    "%s_drv_taskq", sd_label);
11484 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
11485 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11486 	    TASKQ_PREPOPULATE));
11487 
11488 	(void) snprintf(taskq_name, sizeof (taskq_name),
11489 	    "%s_rmw_taskq", sd_label);
11490 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
11491 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11492 	    TASKQ_PREPOPULATE));
11493 }
11494 
11495 
11496 /*
11497  *    Function: sd_taskq_delete
11498  *
11499  * Description: Complementary cleanup routine for sd_taskq_create().
11500  *
11501  *     Context: Kernel thread context.
11502  */
11503 
11504 static void
11505 sd_taskq_delete(void)
11506 {
11507 	ASSERT(sd_tq != NULL);
11508 	ASSERT(sd_wmr_tq != NULL);
11509 	taskq_destroy(sd_tq);
11510 	taskq_destroy(sd_wmr_tq);
11511 	sd_tq = NULL;
11512 	sd_wmr_tq = NULL;
11513 }
11514 
11515 
11516 /*
11517  *    Function: sdstrategy
11518  *
11519  * Description: Driver's strategy (9E) entry point function.
11520  *
11521  *   Arguments: bp - pointer to buf(9S)
11522  *
11523  * Return Code: Always returns zero
11524  *
11525  *     Context: Kernel thread context.
11526  */
11527 
11528 static int
11529 sdstrategy(struct buf *bp)
11530 {
11531 	struct sd_lun *un;
11532 
11533 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11534 	if (un == NULL) {
11535 		bioerror(bp, EIO);
11536 		bp->b_resid = bp->b_bcount;
11537 		biodone(bp);
11538 		return (0);
11539 	}
11540 	/* As was done in the past, fail new cmds. if state is dumping. */
11541 	if (un->un_state == SD_STATE_DUMPING) {
11542 		bioerror(bp, ENXIO);
11543 		bp->b_resid = bp->b_bcount;
11544 		biodone(bp);
11545 		return (0);
11546 	}
11547 
11548 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11549 
11550 	/*
11551 	 * Commands may sneak in while we released the mutex in
11552 	 * DDI_SUSPEND, we should block new commands. However, old
11553 	 * commands that are still in the driver at this point should
11554 	 * still be allowed to drain.
11555 	 */
11556 	mutex_enter(SD_MUTEX(un));
11557 	/*
11558 	 * Must wait here if either the device is suspended or
11559 	 * if it's power level is changing.
11560 	 */
11561 	while ((un->un_state == SD_STATE_SUSPENDED) ||
11562 	    (un->un_state == SD_STATE_PM_CHANGING)) {
11563 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11564 	}
11565 
11566 	un->un_ncmds_in_driver++;
11567 
11568 	/*
11569 	 * atapi: Since we are running the CD for now in PIO mode we need to
11570 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11571 	 * the HBA's init_pkt routine.
11572 	 */
11573 	if (un->un_f_cfg_is_atapi == TRUE) {
11574 		mutex_exit(SD_MUTEX(un));
11575 		bp_mapin(bp);
11576 		mutex_enter(SD_MUTEX(un));
11577 	}
11578 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
11579 	    un->un_ncmds_in_driver);
11580 
11581 	mutex_exit(SD_MUTEX(un));
11582 
11583 	/*
11584 	 * This will (eventually) allocate the sd_xbuf area and
11585 	 * call sd_xbuf_strategy().  We just want to return the
11586 	 * result of ddi_xbuf_qstrategy so that we have an opt-
11587 	 * imized tail call which saves us a stack frame.
11588 	 */
11589 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
11590 }
11591 
11592 
11593 /*
11594  *    Function: sd_xbuf_strategy
11595  *
11596  * Description: Function for initiating IO operations via the
11597  *		ddi_xbuf_qstrategy() mechanism.
11598  *
11599  *     Context: Kernel thread context.
11600  */
11601 
11602 static void
11603 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
11604 {
11605 	struct sd_lun *un = arg;
11606 
11607 	ASSERT(bp != NULL);
11608 	ASSERT(xp != NULL);
11609 	ASSERT(un != NULL);
11610 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11611 
11612 	/*
11613 	 * Initialize the fields in the xbuf and save a pointer to the
11614 	 * xbuf in bp->b_private.
11615 	 */
11616 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
11617 
11618 	/* Send the buf down the iostart chain */
11619 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
11620 }
11621 
11622 
11623 /*
11624  *    Function: sd_xbuf_init
11625  *
11626  * Description: Prepare the given sd_xbuf struct for use.
11627  *
11628  *   Arguments: un - ptr to softstate
11629  *		bp - ptr to associated buf(9S)
11630  *		xp - ptr to associated sd_xbuf
11631  *		chain_type - IO chain type to use:
11632  *			SD_CHAIN_NULL
11633  *			SD_CHAIN_BUFIO
11634  *			SD_CHAIN_USCSI
11635  *			SD_CHAIN_DIRECT
11636  *			SD_CHAIN_DIRECT_PRIORITY
11637  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
11638  *			initialization; may be NULL if none.
11639  *
11640  *     Context: Kernel thread context
11641  */
11642 
11643 static void
11644 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
11645 	uchar_t chain_type, void *pktinfop)
11646 {
11647 	int index;
11648 
11649 	ASSERT(un != NULL);
11650 	ASSERT(bp != NULL);
11651 	ASSERT(xp != NULL);
11652 
11653 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
11654 	    bp, chain_type);
11655 
11656 	xp->xb_un	= un;
11657 	xp->xb_pktp	= NULL;
11658 	xp->xb_pktinfo	= pktinfop;
11659 	xp->xb_private	= bp->b_private;
11660 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
11661 
11662 	/*
11663 	 * Set up the iostart and iodone chain indexes in the xbuf, based
11664 	 * upon the specified chain type to use.
11665 	 */
11666 	switch (chain_type) {
11667 	case SD_CHAIN_NULL:
11668 		/*
11669 		 * Fall thru to just use the values for the buf type, even
11670 		 * tho for the NULL chain these values will never be used.
11671 		 */
11672 		/* FALLTHRU */
11673 	case SD_CHAIN_BUFIO:
11674 		index = un->un_buf_chain_type;
11675 		break;
11676 	case SD_CHAIN_USCSI:
11677 		index = un->un_uscsi_chain_type;
11678 		break;
11679 	case SD_CHAIN_DIRECT:
11680 		index = un->un_direct_chain_type;
11681 		break;
11682 	case SD_CHAIN_DIRECT_PRIORITY:
11683 		index = un->un_priority_chain_type;
11684 		break;
11685 	default:
11686 		/* We're really broken if we ever get here... */
11687 		panic("sd_xbuf_init: illegal chain type!");
11688 		/*NOTREACHED*/
11689 	}
11690 
11691 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
11692 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
11693 
11694 	/*
11695 	 * It might be a bit easier to simply bzero the entire xbuf above,
11696 	 * but it turns out that since we init a fair number of members anyway,
11697 	 * we save a fair number cycles by doing explicit assignment of zero.
11698 	 */
11699 	xp->xb_pkt_flags	= 0;
11700 	xp->xb_dma_resid	= 0;
11701 	xp->xb_retry_count	= 0;
11702 	xp->xb_victim_retry_count = 0;
11703 	xp->xb_ua_retry_count	= 0;
11704 	xp->xb_sense_bp		= NULL;
11705 	xp->xb_sense_status	= 0;
11706 	xp->xb_sense_state	= 0;
11707 	xp->xb_sense_resid	= 0;
11708 
11709 	bp->b_private	= xp;
11710 	bp->b_flags	&= ~(B_DONE | B_ERROR);
11711 	bp->b_resid	= 0;
11712 	bp->av_forw	= NULL;
11713 	bp->av_back	= NULL;
11714 	bioerror(bp, 0);
11715 
11716 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
11717 }
11718 
11719 
11720 /*
11721  *    Function: sd_uscsi_strategy
11722  *
11723  * Description: Wrapper for calling into the USCSI chain via physio(9F)
11724  *
11725  *   Arguments: bp - buf struct ptr
11726  *
11727  * Return Code: Always returns 0
11728  *
11729  *     Context: Kernel thread context
11730  */
11731 
11732 static int
11733 sd_uscsi_strategy(struct buf *bp)
11734 {
11735 	struct sd_lun		*un;
11736 	struct sd_uscsi_info	*uip;
11737 	struct sd_xbuf		*xp;
11738 	uchar_t			chain_type;
11739 
11740 	ASSERT(bp != NULL);
11741 
11742 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11743 	if (un == NULL) {
11744 		bioerror(bp, EIO);
11745 		bp->b_resid = bp->b_bcount;
11746 		biodone(bp);
11747 		return (0);
11748 	}
11749 
11750 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11751 
11752 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
11753 
11754 	mutex_enter(SD_MUTEX(un));
11755 	/*
11756 	 * atapi: Since we are running the CD for now in PIO mode we need to
11757 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11758 	 * the HBA's init_pkt routine.
11759 	 */
11760 	if (un->un_f_cfg_is_atapi == TRUE) {
11761 		mutex_exit(SD_MUTEX(un));
11762 		bp_mapin(bp);
11763 		mutex_enter(SD_MUTEX(un));
11764 	}
11765 	un->un_ncmds_in_driver++;
11766 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
11767 	    un->un_ncmds_in_driver);
11768 	mutex_exit(SD_MUTEX(un));
11769 
11770 	/*
11771 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
11772 	 */
11773 	ASSERT(bp->b_private != NULL);
11774 	uip = (struct sd_uscsi_info *)bp->b_private;
11775 
11776 	switch (uip->ui_flags) {
11777 	case SD_PATH_DIRECT:
11778 		chain_type = SD_CHAIN_DIRECT;
11779 		break;
11780 	case SD_PATH_DIRECT_PRIORITY:
11781 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
11782 		break;
11783 	default:
11784 		chain_type = SD_CHAIN_USCSI;
11785 		break;
11786 	}
11787 
11788 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
11789 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
11790 
11791 	/* Use the index obtained within xbuf_init */
11792 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
11793 
11794 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
11795 
11796 	return (0);
11797 }
11798 
11799 
11800 /*
11801  * These routines perform raw i/o operations.
11802  */
11803 /*ARGSUSED*/
11804 static void
11805 sduscsimin(struct buf *bp)
11806 {
11807 	/*
11808 	 * do not break up because the CDB count would then
11809 	 * be incorrect and data underruns would result (incomplete
11810 	 * read/writes which would be retried and then failed, see
11811 	 * sdintr().
11812 	 */
11813 }
11814 
11815 
11816 
11817 /*
11818  *    Function: sd_send_scsi_cmd
11819  *
11820  * Description: Runs a USCSI command for user (when called thru sdioctl),
11821  *		or for the driver
11822  *
11823  *   Arguments: dev - the dev_t for the device
11824  *		incmd - ptr to a valid uscsi_cmd struct
11825  *		cdbspace - UIO_USERSPACE or UIO_SYSSPACE
11826  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11827  *		rqbufspace - UIO_USERSPACE or UIO_SYSSPACE
11828  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11829  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11830  *			to use the USCSI "direct" chain and bypass the normal
11831  *			command waitq.
11832  *
11833  * Return Code: 0 -  successful completion of the given command
11834  *		EIO - scsi_reset() failed, or see biowait()/physio() codes.
11835  *		ENXIO  - soft state not found for specified dev
11836  *		EINVAL
11837  *		EFAULT - copyin/copyout error
11838  *		return code of biowait(9F) or physio(9F):
11839  *			EIO - IO error, caller may check incmd->uscsi_status
11840  *			ENXIO
11841  *			EACCES - reservation conflict
11842  *
11843  *     Context: Waits for command to complete. Can sleep.
11844  */
11845 
11846 static int
11847 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
11848 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
11849 	int path_flag)
11850 {
11851 	struct sd_uscsi_info	*uip;
11852 	struct uscsi_cmd	*uscmd;
11853 	struct sd_lun	*un;
11854 	struct buf	*bp;
11855 	int	rval;
11856 	int	flags;
11857 
11858 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
11859 	if (un == NULL) {
11860 		return (ENXIO);
11861 	}
11862 
11863 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11864 
11865 #ifdef SDDEBUG
11866 	switch (dataspace) {
11867 	case UIO_USERSPACE:
11868 		SD_TRACE(SD_LOG_IO, un,
11869 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_USERSPACE\n", un);
11870 		break;
11871 	case UIO_SYSSPACE:
11872 		SD_TRACE(SD_LOG_IO, un,
11873 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_SYSSPACE\n", un);
11874 		break;
11875 	default:
11876 		SD_TRACE(SD_LOG_IO, un,
11877 		    "sd_send_scsi_cmd: entry: un:0x%p UNEXPECTED SPACE\n", un);
11878 		break;
11879 	}
11880 #endif
11881 
11882 	/*
11883 	 * Perform resets directly; no need to generate a command to do it.
11884 	 */
11885 	if (incmd->uscsi_flags & (USCSI_RESET | USCSI_RESET_ALL)) {
11886 		flags = ((incmd->uscsi_flags & USCSI_RESET_ALL) != 0) ?
11887 		    RESET_ALL : RESET_TARGET;
11888 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: Issuing reset\n");
11889 		if (scsi_reset(SD_ADDRESS(un), flags) == 0) {
11890 			/* Reset attempt was unsuccessful */
11891 			SD_TRACE(SD_LOG_IO, un,
11892 			    "sd_send_scsi_cmd: reset: failure\n");
11893 			return (EIO);
11894 		}
11895 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: reset: success\n");
11896 		return (0);
11897 	}
11898 
11899 	/* Perfunctory sanity check... */
11900 	if (incmd->uscsi_cdblen <= 0) {
11901 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11902 		    "invalid uscsi_cdblen, returning EINVAL\n");
11903 		return (EINVAL);
11904 	} else if (incmd->uscsi_cdblen > un->un_max_hba_cdb) {
11905 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11906 		    "unsupported uscsi_cdblen, returning EINVAL\n");
11907 		return (EINVAL);
11908 	}
11909 
11910 	/*
11911 	 * In order to not worry about where the uscsi structure came from
11912 	 * (or where the cdb it points to came from) we're going to make
11913 	 * kmem_alloc'd copies of them here. This will also allow reference
11914 	 * to the data they contain long after this process has gone to
11915 	 * sleep and its kernel stack has been unmapped, etc.
11916 	 *
11917 	 * First get some memory for the uscsi_cmd struct and copy the
11918 	 * contents of the given uscsi_cmd struct into it.
11919 	 */
11920 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
11921 	bcopy(incmd, uscmd, sizeof (struct uscsi_cmd));
11922 
11923 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: uscsi_cmd",
11924 	    (uchar_t *)uscmd, sizeof (struct uscsi_cmd), SD_LOG_HEX);
11925 
11926 	/*
11927 	 * Now get some space for the CDB, and copy the given CDB into
11928 	 * it. Use ddi_copyin() in case the data is in user space.
11929 	 */
11930 	uscmd->uscsi_cdb = kmem_zalloc((size_t)incmd->uscsi_cdblen, KM_SLEEP);
11931 	flags = (cdbspace == UIO_SYSSPACE) ? FKIOCTL : 0;
11932 	if (ddi_copyin(incmd->uscsi_cdb, uscmd->uscsi_cdb,
11933 	    (uint_t)incmd->uscsi_cdblen, flags) != 0) {
11934 		kmem_free(uscmd->uscsi_cdb, (size_t)incmd->uscsi_cdblen);
11935 		kmem_free(uscmd, sizeof (struct uscsi_cmd));
11936 		return (EFAULT);
11937 	}
11938 
11939 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: CDB",
11940 	    (uchar_t *)uscmd->uscsi_cdb, incmd->uscsi_cdblen, SD_LOG_HEX);
11941 
11942 	bp = getrbuf(KM_SLEEP);
11943 
11944 	/*
11945 	 * Allocate an sd_uscsi_info struct and fill it with the info
11946 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
11947 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
11948 	 * since we allocate the buf here in this function, we do not
11949 	 * need to preserve the prior contents of b_private.
11950 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
11951 	 */
11952 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
11953 	uip->ui_flags = path_flag;
11954 	uip->ui_cmdp  = uscmd;
11955 	bp->b_private = uip;
11956 
11957 	/*
11958 	 * Initialize Request Sense buffering, if requested.
11959 	 */
11960 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
11961 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
11962 		/*
11963 		 * Here uscmd->uscsi_rqbuf currently points to the caller's
11964 		 * buffer, but we replace this with a kernel buffer that
11965 		 * we allocate to use with the sense data. The sense data
11966 		 * (if present) gets copied into this new buffer before the
11967 		 * command is completed.  Then we copy the sense data from
11968 		 * our allocated buf into the caller's buffer below. Note
11969 		 * that incmd->uscsi_rqbuf and incmd->uscsi_rqlen are used
11970 		 * below to perform the copy back to the caller's buf.
11971 		 */
11972 		uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
11973 		if (rqbufspace == UIO_USERSPACE) {
11974 			uscmd->uscsi_rqlen   = SENSE_LENGTH;
11975 			uscmd->uscsi_rqresid = SENSE_LENGTH;
11976 		} else {
11977 			uchar_t rlen = min(SENSE_LENGTH, uscmd->uscsi_rqlen);
11978 			uscmd->uscsi_rqlen   = rlen;
11979 			uscmd->uscsi_rqresid = rlen;
11980 		}
11981 	} else {
11982 		uscmd->uscsi_rqbuf = NULL;
11983 		uscmd->uscsi_rqlen   = 0;
11984 		uscmd->uscsi_rqresid = 0;
11985 	}
11986 
11987 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: rqbuf:0x%p  rqlen:%d\n",
11988 	    uscmd->uscsi_rqbuf, uscmd->uscsi_rqlen);
11989 
11990 	if (un->un_f_is_fibre == FALSE) {
11991 		/*
11992 		 * Force asynchronous mode, if necessary.  Doing this here
11993 		 * has the unfortunate effect of running other queued
11994 		 * commands async also, but since the main purpose of this
11995 		 * capability is downloading new drive firmware, we can
11996 		 * probably live with it.
11997 		 */
11998 		if ((uscmd->uscsi_flags & USCSI_ASYNC) != 0) {
11999 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
12000 				== 1) {
12001 				if (scsi_ifsetcap(SD_ADDRESS(un),
12002 					    "synchronous", 0, 1) == 1) {
12003 					SD_TRACE(SD_LOG_IO, un,
12004 					"sd_send_scsi_cmd: forced async ok\n");
12005 				} else {
12006 					SD_TRACE(SD_LOG_IO, un,
12007 					"sd_send_scsi_cmd:\
12008 					forced async failed\n");
12009 					rval = EINVAL;
12010 					goto done;
12011 				}
12012 			}
12013 		}
12014 
12015 		/*
12016 		 * Re-enable synchronous mode, if requested
12017 		 */
12018 		if (uscmd->uscsi_flags & USCSI_SYNC) {
12019 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
12020 				== 0) {
12021 				int i = scsi_ifsetcap(SD_ADDRESS(un),
12022 						"synchronous", 1, 1);
12023 				SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12024 					"re-enabled sync %s\n",
12025 					(i == 1) ? "ok" : "failed");
12026 			}
12027 		}
12028 	}
12029 
12030 	/*
12031 	 * Commands sent with priority are intended for error recovery
12032 	 * situations, and do not have retries performed.
12033 	 */
12034 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
12035 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
12036 	}
12037 
12038 	/*
12039 	 * If we're going to do actual I/O, let physio do all the right things
12040 	 */
12041 	if (uscmd->uscsi_buflen != 0) {
12042 		struct iovec	aiov;
12043 		struct uio	auio;
12044 		struct uio	*uio = &auio;
12045 
12046 		bzero(&auio, sizeof (struct uio));
12047 		bzero(&aiov, sizeof (struct iovec));
12048 		aiov.iov_base = uscmd->uscsi_bufaddr;
12049 		aiov.iov_len  = uscmd->uscsi_buflen;
12050 		uio->uio_iov  = &aiov;
12051 
12052 		uio->uio_iovcnt  = 1;
12053 		uio->uio_resid   = uscmd->uscsi_buflen;
12054 		uio->uio_segflg  = dataspace;
12055 
12056 		/*
12057 		 * physio() will block here until the command completes....
12058 		 */
12059 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling physio.\n");
12060 
12061 		rval = physio(sd_uscsi_strategy, bp, dev,
12062 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE),
12063 		    sduscsimin, uio);
12064 
12065 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12066 		    "returned from physio with 0x%x\n", rval);
12067 
12068 	} else {
12069 		/*
12070 		 * We have to mimic what physio would do here! Argh!
12071 		 */
12072 		bp->b_flags  = B_BUSY |
12073 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE);
12074 		bp->b_edev   = dev;
12075 		bp->b_dev    = cmpdev(dev);	/* maybe unnecessary? */
12076 		bp->b_bcount = 0;
12077 		bp->b_blkno  = 0;
12078 
12079 		SD_TRACE(SD_LOG_IO, un,
12080 		    "sd_send_scsi_cmd: calling sd_uscsi_strategy...\n");
12081 
12082 		(void) sd_uscsi_strategy(bp);
12083 
12084 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling biowait\n");
12085 
12086 		rval = biowait(bp);
12087 
12088 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12089 		    "returned from  biowait with 0x%x\n", rval);
12090 	}
12091 
12092 done:
12093 
12094 #ifdef SDDEBUG
12095 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12096 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
12097 	    uscmd->uscsi_status, uscmd->uscsi_resid);
12098 	if (uscmd->uscsi_bufaddr != NULL) {
12099 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12100 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
12101 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
12102 		if (dataspace == UIO_SYSSPACE) {
12103 			SD_DUMP_MEMORY(un, SD_LOG_IO,
12104 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
12105 			    uscmd->uscsi_buflen, SD_LOG_HEX);
12106 		}
12107 	}
12108 #endif
12109 
12110 	/*
12111 	 * Get the status and residual to return to the caller.
12112 	 */
12113 	incmd->uscsi_status = uscmd->uscsi_status;
12114 	incmd->uscsi_resid  = uscmd->uscsi_resid;
12115 
12116 	/*
12117 	 * If the caller wants sense data, copy back whatever sense data
12118 	 * we may have gotten, and update the relevant rqsense info.
12119 	 */
12120 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
12121 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
12122 
12123 		int rqlen = uscmd->uscsi_rqlen - uscmd->uscsi_rqresid;
12124 		rqlen = min(((int)incmd->uscsi_rqlen), rqlen);
12125 
12126 		/* Update the Request Sense status and resid */
12127 		incmd->uscsi_rqresid  = incmd->uscsi_rqlen - rqlen;
12128 		incmd->uscsi_rqstatus = uscmd->uscsi_rqstatus;
12129 
12130 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12131 		    "uscsi_rqstatus: 0x%02x  uscsi_rqresid:0x%x\n",
12132 		    incmd->uscsi_rqstatus, incmd->uscsi_rqresid);
12133 
12134 		/* Copy out the sense data for user processes */
12135 		if ((incmd->uscsi_rqbuf != NULL) && (rqlen != 0)) {
12136 			int flags =
12137 			    (rqbufspace == UIO_USERSPACE) ? 0 : FKIOCTL;
12138 			if (ddi_copyout(uscmd->uscsi_rqbuf, incmd->uscsi_rqbuf,
12139 			    rqlen, flags) != 0) {
12140 				rval = EFAULT;
12141 			}
12142 			/*
12143 			 * Note: Can't touch incmd->uscsi_rqbuf so use
12144 			 * uscmd->uscsi_rqbuf instead. They're the same.
12145 			 */
12146 			SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12147 			    "incmd->uscsi_rqbuf: 0x%p  rqlen:%d\n",
12148 			    incmd->uscsi_rqbuf, rqlen);
12149 			SD_DUMP_MEMORY(un, SD_LOG_IO, "rq",
12150 			    (uchar_t *)uscmd->uscsi_rqbuf, rqlen, SD_LOG_HEX);
12151 		}
12152 	}
12153 
12154 	/*
12155 	 * Free allocated resources and return; mapout the buf in case it was
12156 	 * mapped in by a lower layer.
12157 	 */
12158 	bp_mapout(bp);
12159 	freerbuf(bp);
12160 	kmem_free(uip, sizeof (struct sd_uscsi_info));
12161 	if (uscmd->uscsi_rqbuf != NULL) {
12162 		kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
12163 	}
12164 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
12165 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
12166 
12167 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: exit\n");
12168 
12169 	return (rval);
12170 }
12171 
12172 
12173 /*
12174  *    Function: sd_buf_iodone
12175  *
12176  * Description: Frees the sd_xbuf & returns the buf to its originator.
12177  *
12178  *     Context: May be called from interrupt context.
12179  */
12180 /* ARGSUSED */
12181 static void
12182 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
12183 {
12184 	struct sd_xbuf *xp;
12185 
12186 	ASSERT(un != NULL);
12187 	ASSERT(bp != NULL);
12188 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12189 
12190 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
12191 
12192 	xp = SD_GET_XBUF(bp);
12193 	ASSERT(xp != NULL);
12194 
12195 	mutex_enter(SD_MUTEX(un));
12196 
12197 	/*
12198 	 * Grab time when the cmd completed.
12199 	 * This is used for determining if the system has been
12200 	 * idle long enough to make it idle to the PM framework.
12201 	 * This is for lowering the overhead, and therefore improving
12202 	 * performance per I/O operation.
12203 	 */
12204 	un->un_pm_idle_time = ddi_get_time();
12205 
12206 	un->un_ncmds_in_driver--;
12207 	ASSERT(un->un_ncmds_in_driver >= 0);
12208 	SD_INFO(SD_LOG_IO, un, "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
12209 	    un->un_ncmds_in_driver);
12210 
12211 	mutex_exit(SD_MUTEX(un));
12212 
12213 	ddi_xbuf_done(bp, un->un_xbuf_attr);	/* xbuf is gone after this */
12214 	biodone(bp);				/* bp is gone after this */
12215 
12216 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
12217 }
12218 
12219 
12220 /*
12221  *    Function: sd_uscsi_iodone
12222  *
12223  * Description: Frees the sd_xbuf & returns the buf to its originator.
12224  *
12225  *     Context: May be called from interrupt context.
12226  */
12227 /* ARGSUSED */
12228 static void
12229 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12230 {
12231 	struct sd_xbuf *xp;
12232 
12233 	ASSERT(un != NULL);
12234 	ASSERT(bp != NULL);
12235 
12236 	xp = SD_GET_XBUF(bp);
12237 	ASSERT(xp != NULL);
12238 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12239 
12240 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
12241 
12242 	bp->b_private = xp->xb_private;
12243 
12244 	mutex_enter(SD_MUTEX(un));
12245 
12246 	/*
12247 	 * Grab time when the cmd completed.
12248 	 * This is used for determining if the system has been
12249 	 * idle long enough to make it idle to the PM framework.
12250 	 * This is for lowering the overhead, and therefore improving
12251 	 * performance per I/O operation.
12252 	 */
12253 	un->un_pm_idle_time = ddi_get_time();
12254 
12255 	un->un_ncmds_in_driver--;
12256 	ASSERT(un->un_ncmds_in_driver >= 0);
12257 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
12258 	    un->un_ncmds_in_driver);
12259 
12260 	mutex_exit(SD_MUTEX(un));
12261 
12262 	kmem_free(xp, sizeof (struct sd_xbuf));
12263 	biodone(bp);
12264 
12265 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
12266 }
12267 
12268 
12269 /*
12270  *    Function: sd_mapblockaddr_iostart
12271  *
12272  * Description: Verify request lies withing the partition limits for
12273  *		the indicated minor device.  Issue "overrun" buf if
12274  *		request would exceed partition range.  Converts
12275  *		partition-relative block address to absolute.
12276  *
12277  *     Context: Can sleep
12278  *
12279  *      Issues: This follows what the old code did, in terms of accessing
12280  *		some of the partition info in the unit struct without holding
12281  *		the mutext.  This is a general issue, if the partition info
12282  *		can be altered while IO is in progress... as soon as we send
12283  *		a buf, its partitioning can be invalid before it gets to the
12284  *		device.  Probably the right fix is to move partitioning out
12285  *		of the driver entirely.
12286  */
12287 
12288 static void
12289 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
12290 {
12291 	daddr_t	nblocks;	/* #blocks in the given partition */
12292 	daddr_t	blocknum;	/* Block number specified by the buf */
12293 	size_t	requested_nblocks;
12294 	size_t	available_nblocks;
12295 	int	partition;
12296 	diskaddr_t	partition_offset;
12297 	struct sd_xbuf *xp;
12298 
12299 
12300 	ASSERT(un != NULL);
12301 	ASSERT(bp != NULL);
12302 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12303 
12304 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12305 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
12306 
12307 	xp = SD_GET_XBUF(bp);
12308 	ASSERT(xp != NULL);
12309 
12310 	/*
12311 	 * If the geometry is not indicated as valid, attempt to access
12312 	 * the unit & verify the geometry/label. This can be the case for
12313 	 * removable-media devices, of if the device was opened in
12314 	 * NDELAY/NONBLOCK mode.
12315 	 */
12316 	if ((un->un_f_geometry_is_valid != TRUE) &&
12317 	    (sd_ready_and_valid(un) != SD_READY_VALID)) {
12318 		/*
12319 		 * For removable devices it is possible to start an I/O
12320 		 * without a media by opening the device in nodelay mode.
12321 		 * Also for writable CDs there can be many scenarios where
12322 		 * there is no geometry yet but volume manager is trying to
12323 		 * issue a read() just because it can see TOC on the CD. So
12324 		 * do not print a message for removables.
12325 		 */
12326 		if (!un->un_f_has_removable_media) {
12327 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12328 			    "i/o to invalid geometry\n");
12329 		}
12330 		bioerror(bp, EIO);
12331 		bp->b_resid = bp->b_bcount;
12332 		SD_BEGIN_IODONE(index, un, bp);
12333 		return;
12334 	}
12335 
12336 	partition = SDPART(bp->b_edev);
12337 
12338 	/* #blocks in partition */
12339 	nblocks = un->un_map[partition].dkl_nblk;    /* #blocks in partition */
12340 
12341 	/* Use of a local variable potentially improves performance slightly */
12342 	partition_offset = un->un_offset[partition];
12343 
12344 	/*
12345 	 * blocknum is the starting block number of the request. At this
12346 	 * point it is still relative to the start of the minor device.
12347 	 */
12348 	blocknum = xp->xb_blkno;
12349 
12350 	/*
12351 	 * Legacy: If the starting block number is one past the last block
12352 	 * in the partition, do not set B_ERROR in the buf.
12353 	 */
12354 	if (blocknum == nblocks)  {
12355 		goto error_exit;
12356 	}
12357 
12358 	/*
12359 	 * Confirm that the first block of the request lies within the
12360 	 * partition limits. Also the requested number of bytes must be
12361 	 * a multiple of the system block size.
12362 	 */
12363 	if ((blocknum < 0) || (blocknum >= nblocks) ||
12364 	    ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
12365 		bp->b_flags |= B_ERROR;
12366 		goto error_exit;
12367 	}
12368 
12369 	/*
12370 	 * If the requsted # blocks exceeds the available # blocks, that
12371 	 * is an overrun of the partition.
12372 	 */
12373 	requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
12374 	available_nblocks = (size_t)(nblocks - blocknum);
12375 	ASSERT(nblocks >= blocknum);
12376 
12377 	if (requested_nblocks > available_nblocks) {
12378 		/*
12379 		 * Allocate an "overrun" buf to allow the request to proceed
12380 		 * for the amount of space available in the partition. The
12381 		 * amount not transferred will be added into the b_resid
12382 		 * when the operation is complete. The overrun buf
12383 		 * replaces the original buf here, and the original buf
12384 		 * is saved inside the overrun buf, for later use.
12385 		 */
12386 		size_t resid = SD_SYSBLOCKS2BYTES(un,
12387 		    (offset_t)(requested_nblocks - available_nblocks));
12388 		size_t count = bp->b_bcount - resid;
12389 		/*
12390 		 * Note: count is an unsigned entity thus it'll NEVER
12391 		 * be less than 0 so ASSERT the original values are
12392 		 * correct.
12393 		 */
12394 		ASSERT(bp->b_bcount >= resid);
12395 
12396 		bp = sd_bioclone_alloc(bp, count, blocknum,
12397 			(int (*)(struct buf *)) sd_mapblockaddr_iodone);
12398 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
12399 		ASSERT(xp != NULL);
12400 	}
12401 
12402 	/* At this point there should be no residual for this buf. */
12403 	ASSERT(bp->b_resid == 0);
12404 
12405 	/* Convert the block number to an absolute address. */
12406 	xp->xb_blkno += partition_offset;
12407 
12408 	SD_NEXT_IOSTART(index, un, bp);
12409 
12410 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12411 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
12412 
12413 	return;
12414 
12415 error_exit:
12416 	bp->b_resid = bp->b_bcount;
12417 	SD_BEGIN_IODONE(index, un, bp);
12418 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12419 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
12420 }
12421 
12422 
12423 /*
12424  *    Function: sd_mapblockaddr_iodone
12425  *
12426  * Description: Completion-side processing for partition management.
12427  *
12428  *     Context: May be called under interrupt context
12429  */
12430 
12431 static void
12432 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
12433 {
12434 	/* int	partition; */	/* Not used, see below. */
12435 	ASSERT(un != NULL);
12436 	ASSERT(bp != NULL);
12437 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12438 
12439 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12440 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
12441 
12442 	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
12443 		/*
12444 		 * We have an "overrun" buf to deal with...
12445 		 */
12446 		struct sd_xbuf	*xp;
12447 		struct buf	*obp;	/* ptr to the original buf */
12448 
12449 		xp = SD_GET_XBUF(bp);
12450 		ASSERT(xp != NULL);
12451 
12452 		/* Retrieve the pointer to the original buf */
12453 		obp = (struct buf *)xp->xb_private;
12454 		ASSERT(obp != NULL);
12455 
12456 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
12457 		bioerror(obp, bp->b_error);
12458 
12459 		sd_bioclone_free(bp);
12460 
12461 		/*
12462 		 * Get back the original buf.
12463 		 * Note that since the restoration of xb_blkno below
12464 		 * was removed, the sd_xbuf is not needed.
12465 		 */
12466 		bp = obp;
12467 		/*
12468 		 * xp = SD_GET_XBUF(bp);
12469 		 * ASSERT(xp != NULL);
12470 		 */
12471 	}
12472 
12473 	/*
12474 	 * Convert sd->xb_blkno back to a minor-device relative value.
12475 	 * Note: this has been commented out, as it is not needed in the
12476 	 * current implementation of the driver (ie, since this function
12477 	 * is at the top of the layering chains, so the info will be
12478 	 * discarded) and it is in the "hot" IO path.
12479 	 *
12480 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
12481 	 * xp->xb_blkno -= un->un_offset[partition];
12482 	 */
12483 
12484 	SD_NEXT_IODONE(index, un, bp);
12485 
12486 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12487 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
12488 }
12489 
12490 
12491 /*
12492  *    Function: sd_mapblocksize_iostart
12493  *
12494  * Description: Convert between system block size (un->un_sys_blocksize)
12495  *		and target block size (un->un_tgt_blocksize).
12496  *
12497  *     Context: Can sleep to allocate resources.
12498  *
12499  * Assumptions: A higher layer has already performed any partition validation,
12500  *		and converted the xp->xb_blkno to an absolute value relative
12501  *		to the start of the device.
12502  *
12503  *		It is also assumed that the higher layer has implemented
12504  *		an "overrun" mechanism for the case where the request would
12505  *		read/write beyond the end of a partition.  In this case we
12506  *		assume (and ASSERT) that bp->b_resid == 0.
12507  *
12508  *		Note: The implementation for this routine assumes the target
12509  *		block size remains constant between allocation and transport.
12510  */
12511 
12512 static void
12513 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
12514 {
12515 	struct sd_mapblocksize_info	*bsp;
12516 	struct sd_xbuf			*xp;
12517 	offset_t first_byte;
12518 	daddr_t	start_block, end_block;
12519 	daddr_t	request_bytes;
12520 	ushort_t is_aligned = FALSE;
12521 
12522 	ASSERT(un != NULL);
12523 	ASSERT(bp != NULL);
12524 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12525 	ASSERT(bp->b_resid == 0);
12526 
12527 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12528 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
12529 
12530 	/*
12531 	 * For a non-writable CD, a write request is an error
12532 	 */
12533 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
12534 	    (un->un_f_mmc_writable_media == FALSE)) {
12535 		bioerror(bp, EIO);
12536 		bp->b_resid = bp->b_bcount;
12537 		SD_BEGIN_IODONE(index, un, bp);
12538 		return;
12539 	}
12540 
12541 	/*
12542 	 * We do not need a shadow buf if the device is using
12543 	 * un->un_sys_blocksize as its block size or if bcount == 0.
12544 	 * In this case there is no layer-private data block allocated.
12545 	 */
12546 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12547 	    (bp->b_bcount == 0)) {
12548 		goto done;
12549 	}
12550 
12551 #if defined(__i386) || defined(__amd64)
12552 	/* We do not support non-block-aligned transfers for ROD devices */
12553 	ASSERT(!ISROD(un));
12554 #endif
12555 
12556 	xp = SD_GET_XBUF(bp);
12557 	ASSERT(xp != NULL);
12558 
12559 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12560 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
12561 	    un->un_tgt_blocksize, un->un_sys_blocksize);
12562 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12563 	    "request start block:0x%x\n", xp->xb_blkno);
12564 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12565 	    "request len:0x%x\n", bp->b_bcount);
12566 
12567 	/*
12568 	 * Allocate the layer-private data area for the mapblocksize layer.
12569 	 * Layers are allowed to use the xp_private member of the sd_xbuf
12570 	 * struct to store the pointer to their layer-private data block, but
12571 	 * each layer also has the responsibility of restoring the prior
12572 	 * contents of xb_private before returning the buf/xbuf to the
12573 	 * higher layer that sent it.
12574 	 *
12575 	 * Here we save the prior contents of xp->xb_private into the
12576 	 * bsp->mbs_oprivate field of our layer-private data area. This value
12577 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
12578 	 * the layer-private area and returning the buf/xbuf to the layer
12579 	 * that sent it.
12580 	 *
12581 	 * Note that here we use kmem_zalloc for the allocation as there are
12582 	 * parts of the mapblocksize code that expect certain fields to be
12583 	 * zero unless explicitly set to a required value.
12584 	 */
12585 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12586 	bsp->mbs_oprivate = xp->xb_private;
12587 	xp->xb_private = bsp;
12588 
12589 	/*
12590 	 * This treats the data on the disk (target) as an array of bytes.
12591 	 * first_byte is the byte offset, from the beginning of the device,
12592 	 * to the location of the request. This is converted from a
12593 	 * un->un_sys_blocksize block address to a byte offset, and then back
12594 	 * to a block address based upon a un->un_tgt_blocksize block size.
12595 	 *
12596 	 * xp->xb_blkno should be absolute upon entry into this function,
12597 	 * but, but it is based upon partitions that use the "system"
12598 	 * block size. It must be adjusted to reflect the block size of
12599 	 * the target.
12600 	 *
12601 	 * Note that end_block is actually the block that follows the last
12602 	 * block of the request, but that's what is needed for the computation.
12603 	 */
12604 	first_byte  = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12605 	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
12606 	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
12607 	    un->un_tgt_blocksize;
12608 
12609 	/* request_bytes is rounded up to a multiple of the target block size */
12610 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
12611 
12612 	/*
12613 	 * See if the starting address of the request and the request
12614 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
12615 	 * then we do not need to allocate a shadow buf to handle the request.
12616 	 */
12617 	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
12618 	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
12619 		is_aligned = TRUE;
12620 	}
12621 
12622 	if ((bp->b_flags & B_READ) == 0) {
12623 		/*
12624 		 * Lock the range for a write operation. An aligned request is
12625 		 * considered a simple write; otherwise the request must be a
12626 		 * read-modify-write.
12627 		 */
12628 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
12629 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
12630 	}
12631 
12632 	/*
12633 	 * Alloc a shadow buf if the request is not aligned. Also, this is
12634 	 * where the READ command is generated for a read-modify-write. (The
12635 	 * write phase is deferred until after the read completes.)
12636 	 */
12637 	if (is_aligned == FALSE) {
12638 
12639 		struct sd_mapblocksize_info	*shadow_bsp;
12640 		struct sd_xbuf	*shadow_xp;
12641 		struct buf	*shadow_bp;
12642 
12643 		/*
12644 		 * Allocate the shadow buf and it associated xbuf. Note that
12645 		 * after this call the xb_blkno value in both the original
12646 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
12647 		 * same: absolute relative to the start of the device, and
12648 		 * adjusted for the target block size. The b_blkno in the
12649 		 * shadow buf will also be set to this value. We should never
12650 		 * change b_blkno in the original bp however.
12651 		 *
12652 		 * Note also that the shadow buf will always need to be a
12653 		 * READ command, regardless of whether the incoming command
12654 		 * is a READ or a WRITE.
12655 		 */
12656 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
12657 		    xp->xb_blkno,
12658 		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
12659 
12660 		shadow_xp = SD_GET_XBUF(shadow_bp);
12661 
12662 		/*
12663 		 * Allocate the layer-private data for the shadow buf.
12664 		 * (No need to preserve xb_private in the shadow xbuf.)
12665 		 */
12666 		shadow_xp->xb_private = shadow_bsp =
12667 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12668 
12669 		/*
12670 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
12671 		 * to figure out where the start of the user data is (based upon
12672 		 * the system block size) in the data returned by the READ
12673 		 * command (which will be based upon the target blocksize). Note
12674 		 * that this is only really used if the request is unaligned.
12675 		 */
12676 		bsp->mbs_copy_offset = (ssize_t)(first_byte -
12677 		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
12678 		ASSERT((bsp->mbs_copy_offset >= 0) &&
12679 		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
12680 
12681 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
12682 
12683 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
12684 
12685 		/* Transfer the wmap (if any) to the shadow buf */
12686 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
12687 		bsp->mbs_wmp = NULL;
12688 
12689 		/*
12690 		 * The shadow buf goes on from here in place of the
12691 		 * original buf.
12692 		 */
12693 		shadow_bsp->mbs_orig_bp = bp;
12694 		bp = shadow_bp;
12695 	}
12696 
12697 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12698 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
12699 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12700 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
12701 	    request_bytes);
12702 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12703 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
12704 
12705 done:
12706 	SD_NEXT_IOSTART(index, un, bp);
12707 
12708 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12709 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
12710 }
12711 
12712 
12713 /*
12714  *    Function: sd_mapblocksize_iodone
12715  *
12716  * Description: Completion side processing for block-size mapping.
12717  *
12718  *     Context: May be called under interrupt context
12719  */
12720 
12721 static void
12722 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
12723 {
12724 	struct sd_mapblocksize_info	*bsp;
12725 	struct sd_xbuf	*xp;
12726 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
12727 	struct buf	*orig_bp;	/* ptr to the original buf */
12728 	offset_t	shadow_end;
12729 	offset_t	request_end;
12730 	offset_t	shadow_start;
12731 	ssize_t		copy_offset;
12732 	size_t		copy_length;
12733 	size_t		shortfall;
12734 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
12735 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
12736 
12737 	ASSERT(un != NULL);
12738 	ASSERT(bp != NULL);
12739 
12740 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12741 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
12742 
12743 	/*
12744 	 * There is no shadow buf or layer-private data if the target is
12745 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
12746 	 */
12747 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12748 	    (bp->b_bcount == 0)) {
12749 		goto exit;
12750 	}
12751 
12752 	xp = SD_GET_XBUF(bp);
12753 	ASSERT(xp != NULL);
12754 
12755 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
12756 	bsp = xp->xb_private;
12757 
12758 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
12759 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
12760 
12761 	if (is_write) {
12762 		/*
12763 		 * For a WRITE request we must free up the block range that
12764 		 * we have locked up.  This holds regardless of whether this is
12765 		 * an aligned write request or a read-modify-write request.
12766 		 */
12767 		sd_range_unlock(un, bsp->mbs_wmp);
12768 		bsp->mbs_wmp = NULL;
12769 	}
12770 
12771 	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
12772 		/*
12773 		 * An aligned read or write command will have no shadow buf;
12774 		 * there is not much else to do with it.
12775 		 */
12776 		goto done;
12777 	}
12778 
12779 	orig_bp = bsp->mbs_orig_bp;
12780 	ASSERT(orig_bp != NULL);
12781 	orig_xp = SD_GET_XBUF(orig_bp);
12782 	ASSERT(orig_xp != NULL);
12783 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12784 
12785 	if (!is_write && has_wmap) {
12786 		/*
12787 		 * A READ with a wmap means this is the READ phase of a
12788 		 * read-modify-write. If an error occurred on the READ then
12789 		 * we do not proceed with the WRITE phase or copy any data.
12790 		 * Just release the write maps and return with an error.
12791 		 */
12792 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
12793 			orig_bp->b_resid = orig_bp->b_bcount;
12794 			bioerror(orig_bp, bp->b_error);
12795 			sd_range_unlock(un, bsp->mbs_wmp);
12796 			goto freebuf_done;
12797 		}
12798 	}
12799 
12800 	/*
12801 	 * Here is where we set up to copy the data from the shadow buf
12802 	 * into the space associated with the original buf.
12803 	 *
12804 	 * To deal with the conversion between block sizes, these
12805 	 * computations treat the data as an array of bytes, with the
12806 	 * first byte (byte 0) corresponding to the first byte in the
12807 	 * first block on the disk.
12808 	 */
12809 
12810 	/*
12811 	 * shadow_start and shadow_len indicate the location and size of
12812 	 * the data returned with the shadow IO request.
12813 	 */
12814 	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12815 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
12816 
12817 	/*
12818 	 * copy_offset gives the offset (in bytes) from the start of the first
12819 	 * block of the READ request to the beginning of the data.  We retrieve
12820 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
12821 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
12822 	 * data to be copied (in bytes).
12823 	 */
12824 	copy_offset  = bsp->mbs_copy_offset;
12825 	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
12826 	copy_length  = orig_bp->b_bcount;
12827 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
12828 
12829 	/*
12830 	 * Set up the resid and error fields of orig_bp as appropriate.
12831 	 */
12832 	if (shadow_end >= request_end) {
12833 		/* We got all the requested data; set resid to zero */
12834 		orig_bp->b_resid = 0;
12835 	} else {
12836 		/*
12837 		 * We failed to get enough data to fully satisfy the original
12838 		 * request. Just copy back whatever data we got and set
12839 		 * up the residual and error code as required.
12840 		 *
12841 		 * 'shortfall' is the amount by which the data received with the
12842 		 * shadow buf has "fallen short" of the requested amount.
12843 		 */
12844 		shortfall = (size_t)(request_end - shadow_end);
12845 
12846 		if (shortfall > orig_bp->b_bcount) {
12847 			/*
12848 			 * We did not get enough data to even partially
12849 			 * fulfill the original request.  The residual is
12850 			 * equal to the amount requested.
12851 			 */
12852 			orig_bp->b_resid = orig_bp->b_bcount;
12853 		} else {
12854 			/*
12855 			 * We did not get all the data that we requested
12856 			 * from the device, but we will try to return what
12857 			 * portion we did get.
12858 			 */
12859 			orig_bp->b_resid = shortfall;
12860 		}
12861 		ASSERT(copy_length >= orig_bp->b_resid);
12862 		copy_length  -= orig_bp->b_resid;
12863 	}
12864 
12865 	/* Propagate the error code from the shadow buf to the original buf */
12866 	bioerror(orig_bp, bp->b_error);
12867 
12868 	if (is_write) {
12869 		goto freebuf_done;	/* No data copying for a WRITE */
12870 	}
12871 
12872 	if (has_wmap) {
12873 		/*
12874 		 * This is a READ command from the READ phase of a
12875 		 * read-modify-write request. We have to copy the data given
12876 		 * by the user OVER the data returned by the READ command,
12877 		 * then convert the command from a READ to a WRITE and send
12878 		 * it back to the target.
12879 		 */
12880 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
12881 		    copy_length);
12882 
12883 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
12884 
12885 		/*
12886 		 * Dispatch the WRITE command to the taskq thread, which
12887 		 * will in turn send the command to the target. When the
12888 		 * WRITE command completes, we (sd_mapblocksize_iodone())
12889 		 * will get called again as part of the iodone chain
12890 		 * processing for it. Note that we will still be dealing
12891 		 * with the shadow buf at that point.
12892 		 */
12893 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
12894 		    KM_NOSLEEP) != 0) {
12895 			/*
12896 			 * Dispatch was successful so we are done. Return
12897 			 * without going any higher up the iodone chain. Do
12898 			 * not free up any layer-private data until after the
12899 			 * WRITE completes.
12900 			 */
12901 			return;
12902 		}
12903 
12904 		/*
12905 		 * Dispatch of the WRITE command failed; set up the error
12906 		 * condition and send this IO back up the iodone chain.
12907 		 */
12908 		bioerror(orig_bp, EIO);
12909 		orig_bp->b_resid = orig_bp->b_bcount;
12910 
12911 	} else {
12912 		/*
12913 		 * This is a regular READ request (ie, not a RMW). Copy the
12914 		 * data from the shadow buf into the original buf. The
12915 		 * copy_offset compensates for any "misalignment" between the
12916 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
12917 		 * original buf (with its un->un_sys_blocksize blocks).
12918 		 */
12919 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
12920 		    copy_length);
12921 	}
12922 
12923 freebuf_done:
12924 
12925 	/*
12926 	 * At this point we still have both the shadow buf AND the original
12927 	 * buf to deal with, as well as the layer-private data area in each.
12928 	 * Local variables are as follows:
12929 	 *
12930 	 * bp -- points to shadow buf
12931 	 * xp -- points to xbuf of shadow buf
12932 	 * bsp -- points to layer-private data area of shadow buf
12933 	 * orig_bp -- points to original buf
12934 	 *
12935 	 * First free the shadow buf and its associated xbuf, then free the
12936 	 * layer-private data area from the shadow buf. There is no need to
12937 	 * restore xb_private in the shadow xbuf.
12938 	 */
12939 	sd_shadow_buf_free(bp);
12940 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12941 
12942 	/*
12943 	 * Now update the local variables to point to the original buf, xbuf,
12944 	 * and layer-private area.
12945 	 */
12946 	bp = orig_bp;
12947 	xp = SD_GET_XBUF(bp);
12948 	ASSERT(xp != NULL);
12949 	ASSERT(xp == orig_xp);
12950 	bsp = xp->xb_private;
12951 	ASSERT(bsp != NULL);
12952 
12953 done:
12954 	/*
12955 	 * Restore xb_private to whatever it was set to by the next higher
12956 	 * layer in the chain, then free the layer-private data area.
12957 	 */
12958 	xp->xb_private = bsp->mbs_oprivate;
12959 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12960 
12961 exit:
12962 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
12963 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
12964 
12965 	SD_NEXT_IODONE(index, un, bp);
12966 }
12967 
12968 
12969 /*
12970  *    Function: sd_checksum_iostart
12971  *
12972  * Description: A stub function for a layer that's currently not used.
12973  *		For now just a placeholder.
12974  *
12975  *     Context: Kernel thread context
12976  */
12977 
12978 static void
12979 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
12980 {
12981 	ASSERT(un != NULL);
12982 	ASSERT(bp != NULL);
12983 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12984 	SD_NEXT_IOSTART(index, un, bp);
12985 }
12986 
12987 
12988 /*
12989  *    Function: sd_checksum_iodone
12990  *
12991  * Description: A stub function for a layer that's currently not used.
12992  *		For now just a placeholder.
12993  *
12994  *     Context: May be called under interrupt context
12995  */
12996 
12997 static void
12998 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
12999 {
13000 	ASSERT(un != NULL);
13001 	ASSERT(bp != NULL);
13002 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13003 	SD_NEXT_IODONE(index, un, bp);
13004 }
13005 
13006 
13007 /*
13008  *    Function: sd_checksum_uscsi_iostart
13009  *
13010  * Description: A stub function for a layer that's currently not used.
13011  *		For now just a placeholder.
13012  *
13013  *     Context: Kernel thread context
13014  */
13015 
13016 static void
13017 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
13018 {
13019 	ASSERT(un != NULL);
13020 	ASSERT(bp != NULL);
13021 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13022 	SD_NEXT_IOSTART(index, un, bp);
13023 }
13024 
13025 
13026 /*
13027  *    Function: sd_checksum_uscsi_iodone
13028  *
13029  * Description: A stub function for a layer that's currently not used.
13030  *		For now just a placeholder.
13031  *
13032  *     Context: May be called under interrupt context
13033  */
13034 
13035 static void
13036 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
13037 {
13038 	ASSERT(un != NULL);
13039 	ASSERT(bp != NULL);
13040 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13041 	SD_NEXT_IODONE(index, un, bp);
13042 }
13043 
13044 
13045 /*
13046  *    Function: sd_pm_iostart
13047  *
13048  * Description: iostart-side routine for Power mangement.
13049  *
13050  *     Context: Kernel thread context
13051  */
13052 
13053 static void
13054 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
13055 {
13056 	ASSERT(un != NULL);
13057 	ASSERT(bp != NULL);
13058 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13059 	ASSERT(!mutex_owned(&un->un_pm_mutex));
13060 
13061 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
13062 
13063 	if (sd_pm_entry(un) != DDI_SUCCESS) {
13064 		/*
13065 		 * Set up to return the failed buf back up the 'iodone'
13066 		 * side of the calling chain.
13067 		 */
13068 		bioerror(bp, EIO);
13069 		bp->b_resid = bp->b_bcount;
13070 
13071 		SD_BEGIN_IODONE(index, un, bp);
13072 
13073 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13074 		return;
13075 	}
13076 
13077 	SD_NEXT_IOSTART(index, un, bp);
13078 
13079 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13080 }
13081 
13082 
13083 /*
13084  *    Function: sd_pm_iodone
13085  *
13086  * Description: iodone-side routine for power mangement.
13087  *
13088  *     Context: may be called from interrupt context
13089  */
13090 
13091 static void
13092 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
13093 {
13094 	ASSERT(un != NULL);
13095 	ASSERT(bp != NULL);
13096 	ASSERT(!mutex_owned(&un->un_pm_mutex));
13097 
13098 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
13099 
13100 	/*
13101 	 * After attach the following flag is only read, so don't
13102 	 * take the penalty of acquiring a mutex for it.
13103 	 */
13104 	if (un->un_f_pm_is_enabled == TRUE) {
13105 		sd_pm_exit(un);
13106 	}
13107 
13108 	SD_NEXT_IODONE(index, un, bp);
13109 
13110 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
13111 }
13112 
13113 
13114 /*
13115  *    Function: sd_core_iostart
13116  *
13117  * Description: Primary driver function for enqueuing buf(9S) structs from
13118  *		the system and initiating IO to the target device
13119  *
13120  *     Context: Kernel thread context. Can sleep.
13121  *
13122  * Assumptions:  - The given xp->xb_blkno is absolute
13123  *		   (ie, relative to the start of the device).
13124  *		 - The IO is to be done using the native blocksize of
13125  *		   the device, as specified in un->un_tgt_blocksize.
13126  */
13127 /* ARGSUSED */
13128 static void
13129 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
13130 {
13131 	struct sd_xbuf *xp;
13132 
13133 	ASSERT(un != NULL);
13134 	ASSERT(bp != NULL);
13135 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13136 	ASSERT(bp->b_resid == 0);
13137 
13138 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
13139 
13140 	xp = SD_GET_XBUF(bp);
13141 	ASSERT(xp != NULL);
13142 
13143 	mutex_enter(SD_MUTEX(un));
13144 
13145 	/*
13146 	 * If we are currently in the failfast state, fail any new IO
13147 	 * that has B_FAILFAST set, then return.
13148 	 */
13149 	if ((bp->b_flags & B_FAILFAST) &&
13150 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
13151 		mutex_exit(SD_MUTEX(un));
13152 		bioerror(bp, EIO);
13153 		bp->b_resid = bp->b_bcount;
13154 		SD_BEGIN_IODONE(index, un, bp);
13155 		return;
13156 	}
13157 
13158 	if (SD_IS_DIRECT_PRIORITY(xp)) {
13159 		/*
13160 		 * Priority command -- transport it immediately.
13161 		 *
13162 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
13163 		 * because all direct priority commands should be associated
13164 		 * with error recovery actions which we don't want to retry.
13165 		 */
13166 		sd_start_cmds(un, bp);
13167 	} else {
13168 		/*
13169 		 * Normal command -- add it to the wait queue, then start
13170 		 * transporting commands from the wait queue.
13171 		 */
13172 		sd_add_buf_to_waitq(un, bp);
13173 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
13174 		sd_start_cmds(un, NULL);
13175 	}
13176 
13177 	mutex_exit(SD_MUTEX(un));
13178 
13179 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
13180 }
13181 
13182 
13183 /*
13184  *    Function: sd_init_cdb_limits
13185  *
13186  * Description: This is to handle scsi_pkt initialization differences
13187  *		between the driver platforms.
13188  *
13189  *		Legacy behaviors:
13190  *
13191  *		If the block number or the sector count exceeds the
13192  *		capabilities of a Group 0 command, shift over to a
13193  *		Group 1 command. We don't blindly use Group 1
13194  *		commands because a) some drives (CDC Wren IVs) get a
13195  *		bit confused, and b) there is probably a fair amount
13196  *		of speed difference for a target to receive and decode
13197  *		a 10 byte command instead of a 6 byte command.
13198  *
13199  *		The xfer time difference of 6 vs 10 byte CDBs is
13200  *		still significant so this code is still worthwhile.
13201  *		10 byte CDBs are very inefficient with the fas HBA driver
13202  *		and older disks. Each CDB byte took 1 usec with some
13203  *		popular disks.
13204  *
13205  *     Context: Must be called at attach time
13206  */
13207 
13208 static void
13209 sd_init_cdb_limits(struct sd_lun *un)
13210 {
13211 	int hba_cdb_limit;
13212 
13213 	/*
13214 	 * Use CDB_GROUP1 commands for most devices except for
13215 	 * parallel SCSI fixed drives in which case we get better
13216 	 * performance using CDB_GROUP0 commands (where applicable).
13217 	 */
13218 	un->un_mincdb = SD_CDB_GROUP1;
13219 #if !defined(__fibre)
13220 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
13221 	    !un->un_f_has_removable_media) {
13222 		un->un_mincdb = SD_CDB_GROUP0;
13223 	}
13224 #endif
13225 
13226 	/*
13227 	 * Try to read the max-cdb-length supported by HBA.
13228 	 */
13229 	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
13230 	if (0 >= un->un_max_hba_cdb) {
13231 		un->un_max_hba_cdb = CDB_GROUP4;
13232 		hba_cdb_limit = SD_CDB_GROUP4;
13233 	} else if (0 < un->un_max_hba_cdb &&
13234 	    un->un_max_hba_cdb < CDB_GROUP1) {
13235 		hba_cdb_limit = SD_CDB_GROUP0;
13236 	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
13237 	    un->un_max_hba_cdb < CDB_GROUP5) {
13238 		hba_cdb_limit = SD_CDB_GROUP1;
13239 	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
13240 	    un->un_max_hba_cdb < CDB_GROUP4) {
13241 		hba_cdb_limit = SD_CDB_GROUP5;
13242 	} else {
13243 		hba_cdb_limit = SD_CDB_GROUP4;
13244 	}
13245 
13246 	/*
13247 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
13248 	 * commands for fixed disks unless we are building for a 32 bit
13249 	 * kernel.
13250 	 */
13251 #ifdef _LP64
13252 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13253 	    min(hba_cdb_limit, SD_CDB_GROUP4);
13254 #else
13255 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13256 	    min(hba_cdb_limit, SD_CDB_GROUP1);
13257 #endif
13258 
13259 	/*
13260 	 * x86 systems require the PKT_DMA_PARTIAL flag
13261 	 */
13262 #if defined(__x86)
13263 	un->un_pkt_flags = PKT_DMA_PARTIAL;
13264 #else
13265 	un->un_pkt_flags = 0;
13266 #endif
13267 
13268 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
13269 	    ? sizeof (struct scsi_arq_status) : 1);
13270 	un->un_cmd_timeout = (ushort_t)sd_io_time;
13271 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
13272 }
13273 
13274 
13275 /*
13276  *    Function: sd_initpkt_for_buf
13277  *
13278  * Description: Allocate and initialize for transport a scsi_pkt struct,
13279  *		based upon the info specified in the given buf struct.
13280  *
13281  *		Assumes the xb_blkno in the request is absolute (ie,
13282  *		relative to the start of the device (NOT partition!).
13283  *		Also assumes that the request is using the native block
13284  *		size of the device (as returned by the READ CAPACITY
13285  *		command).
13286  *
13287  * Return Code: SD_PKT_ALLOC_SUCCESS
13288  *		SD_PKT_ALLOC_FAILURE
13289  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13290  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13291  *
13292  *     Context: Kernel thread and may be called from software interrupt context
13293  *		as part of a sdrunout callback. This function may not block or
13294  *		call routines that block
13295  */
13296 
13297 static int
13298 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
13299 {
13300 	struct sd_xbuf	*xp;
13301 	struct scsi_pkt *pktp = NULL;
13302 	struct sd_lun	*un;
13303 	size_t		blockcount;
13304 	daddr_t		startblock;
13305 	int		rval;
13306 	int		cmd_flags;
13307 
13308 	ASSERT(bp != NULL);
13309 	ASSERT(pktpp != NULL);
13310 	xp = SD_GET_XBUF(bp);
13311 	ASSERT(xp != NULL);
13312 	un = SD_GET_UN(bp);
13313 	ASSERT(un != NULL);
13314 	ASSERT(mutex_owned(SD_MUTEX(un)));
13315 	ASSERT(bp->b_resid == 0);
13316 
13317 	SD_TRACE(SD_LOG_IO_CORE, un,
13318 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
13319 
13320 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13321 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
13322 		/*
13323 		 * Already have a scsi_pkt -- just need DMA resources.
13324 		 * We must recompute the CDB in case the mapping returns
13325 		 * a nonzero pkt_resid.
13326 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
13327 		 * that is being retried, the unmap/remap of the DMA resouces
13328 		 * will result in the entire transfer starting over again
13329 		 * from the very first block.
13330 		 */
13331 		ASSERT(xp->xb_pktp != NULL);
13332 		pktp = xp->xb_pktp;
13333 	} else {
13334 		pktp = NULL;
13335 	}
13336 #endif /* __i386 || __amd64 */
13337 
13338 	startblock = xp->xb_blkno;	/* Absolute block num. */
13339 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
13340 
13341 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13342 
13343 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
13344 
13345 #else
13346 
13347 	cmd_flags = un->un_pkt_flags | xp->xb_pkt_flags;
13348 
13349 #endif
13350 
13351 	/*
13352 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
13353 	 * call scsi_init_pkt, and build the CDB.
13354 	 */
13355 	rval = sd_setup_rw_pkt(un, &pktp, bp,
13356 	    cmd_flags, sdrunout, (caddr_t)un,
13357 	    startblock, blockcount);
13358 
13359 	if (rval == 0) {
13360 		/*
13361 		 * Success.
13362 		 *
13363 		 * If partial DMA is being used and required for this transfer.
13364 		 * set it up here.
13365 		 */
13366 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
13367 		    (pktp->pkt_resid != 0)) {
13368 
13369 			/*
13370 			 * Save the CDB length and pkt_resid for the
13371 			 * next xfer
13372 			 */
13373 			xp->xb_dma_resid = pktp->pkt_resid;
13374 
13375 			/* rezero resid */
13376 			pktp->pkt_resid = 0;
13377 
13378 		} else {
13379 			xp->xb_dma_resid = 0;
13380 		}
13381 
13382 		pktp->pkt_flags = un->un_tagflags;
13383 		pktp->pkt_time  = un->un_cmd_timeout;
13384 		pktp->pkt_comp  = sdintr;
13385 
13386 		pktp->pkt_private = bp;
13387 		*pktpp = pktp;
13388 
13389 		SD_TRACE(SD_LOG_IO_CORE, un,
13390 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
13391 
13392 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13393 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
13394 #endif
13395 
13396 		return (SD_PKT_ALLOC_SUCCESS);
13397 
13398 	}
13399 
13400 	/*
13401 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
13402 	 * from sd_setup_rw_pkt.
13403 	 */
13404 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
13405 
13406 	if (rval == SD_PKT_ALLOC_FAILURE) {
13407 		*pktpp = NULL;
13408 		/*
13409 		 * Set the driver state to RWAIT to indicate the driver
13410 		 * is waiting on resource allocations. The driver will not
13411 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13412 		 */
13413 		New_state(un, SD_STATE_RWAIT);
13414 
13415 		SD_ERROR(SD_LOG_IO_CORE, un,
13416 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
13417 
13418 		if ((bp->b_flags & B_ERROR) != 0) {
13419 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13420 		}
13421 		return (SD_PKT_ALLOC_FAILURE);
13422 	} else {
13423 		/*
13424 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13425 		 *
13426 		 * This should never happen.  Maybe someone messed with the
13427 		 * kernel's minphys?
13428 		 */
13429 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13430 		    "Request rejected: too large for CDB: "
13431 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
13432 		SD_ERROR(SD_LOG_IO_CORE, un,
13433 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
13434 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13435 
13436 	}
13437 }
13438 
13439 
13440 /*
13441  *    Function: sd_destroypkt_for_buf
13442  *
13443  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
13444  *
13445  *     Context: Kernel thread or interrupt context
13446  */
13447 
13448 static void
13449 sd_destroypkt_for_buf(struct buf *bp)
13450 {
13451 	ASSERT(bp != NULL);
13452 	ASSERT(SD_GET_UN(bp) != NULL);
13453 
13454 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13455 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
13456 
13457 	ASSERT(SD_GET_PKTP(bp) != NULL);
13458 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13459 
13460 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13461 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
13462 }
13463 
13464 /*
13465  *    Function: sd_setup_rw_pkt
13466  *
13467  * Description: Determines appropriate CDB group for the requested LBA
13468  *		and transfer length, calls scsi_init_pkt, and builds
13469  *		the CDB.  Do not use for partial DMA transfers except
13470  *		for the initial transfer since the CDB size must
13471  *		remain constant.
13472  *
13473  *     Context: Kernel thread and may be called from software interrupt
13474  *		context as part of a sdrunout callback. This function may not
13475  *		block or call routines that block
13476  */
13477 
13478 
13479 int
13480 sd_setup_rw_pkt(struct sd_lun *un,
13481     struct scsi_pkt **pktpp, struct buf *bp, int flags,
13482     int (*callback)(caddr_t), caddr_t callback_arg,
13483     diskaddr_t lba, uint32_t blockcount)
13484 {
13485 	struct scsi_pkt *return_pktp;
13486 	union scsi_cdb *cdbp;
13487 	struct sd_cdbinfo *cp = NULL;
13488 	int i;
13489 
13490 	/*
13491 	 * See which size CDB to use, based upon the request.
13492 	 */
13493 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
13494 
13495 		/*
13496 		 * Check lba and block count against sd_cdbtab limits.
13497 		 * In the partial DMA case, we have to use the same size
13498 		 * CDB for all the transfers.  Check lba + blockcount
13499 		 * against the max LBA so we know that segment of the
13500 		 * transfer can use the CDB we select.
13501 		 */
13502 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
13503 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
13504 
13505 			/*
13506 			 * The command will fit into the CDB type
13507 			 * specified by sd_cdbtab[i].
13508 			 */
13509 			cp = sd_cdbtab + i;
13510 
13511 			/*
13512 			 * Call scsi_init_pkt so we can fill in the
13513 			 * CDB.
13514 			 */
13515 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
13516 			    bp, cp->sc_grpcode, un->un_status_len, 0,
13517 			    flags, callback, callback_arg);
13518 
13519 			if (return_pktp != NULL) {
13520 
13521 				/*
13522 				 * Return new value of pkt
13523 				 */
13524 				*pktpp = return_pktp;
13525 
13526 				/*
13527 				 * To be safe, zero the CDB insuring there is
13528 				 * no leftover data from a previous command.
13529 				 */
13530 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
13531 
13532 				/*
13533 				 * Handle partial DMA mapping
13534 				 */
13535 				if (return_pktp->pkt_resid != 0) {
13536 
13537 					/*
13538 					 * Not going to xfer as many blocks as
13539 					 * originally expected
13540 					 */
13541 					blockcount -=
13542 					    SD_BYTES2TGTBLOCKS(un,
13543 						return_pktp->pkt_resid);
13544 				}
13545 
13546 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
13547 
13548 				/*
13549 				 * Set command byte based on the CDB
13550 				 * type we matched.
13551 				 */
13552 				cdbp->scc_cmd = cp->sc_grpmask |
13553 				    ((bp->b_flags & B_READ) ?
13554 					SCMD_READ : SCMD_WRITE);
13555 
13556 				SD_FILL_SCSI1_LUN(un, return_pktp);
13557 
13558 				/*
13559 				 * Fill in LBA and length
13560 				 */
13561 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
13562 				    (cp->sc_grpcode == CDB_GROUP4) ||
13563 				    (cp->sc_grpcode == CDB_GROUP0) ||
13564 				    (cp->sc_grpcode == CDB_GROUP5));
13565 
13566 				if (cp->sc_grpcode == CDB_GROUP1) {
13567 					FORMG1ADDR(cdbp, lba);
13568 					FORMG1COUNT(cdbp, blockcount);
13569 					return (0);
13570 				} else if (cp->sc_grpcode == CDB_GROUP4) {
13571 					FORMG4LONGADDR(cdbp, lba);
13572 					FORMG4COUNT(cdbp, blockcount);
13573 					return (0);
13574 				} else if (cp->sc_grpcode == CDB_GROUP0) {
13575 					FORMG0ADDR(cdbp, lba);
13576 					FORMG0COUNT(cdbp, blockcount);
13577 					return (0);
13578 				} else if (cp->sc_grpcode == CDB_GROUP5) {
13579 					FORMG5ADDR(cdbp, lba);
13580 					FORMG5COUNT(cdbp, blockcount);
13581 					return (0);
13582 				}
13583 
13584 				/*
13585 				 * It should be impossible to not match one
13586 				 * of the CDB types above, so we should never
13587 				 * reach this point.  Set the CDB command byte
13588 				 * to test-unit-ready to avoid writing
13589 				 * to somewhere we don't intend.
13590 				 */
13591 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
13592 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13593 			} else {
13594 				/*
13595 				 * Couldn't get scsi_pkt
13596 				 */
13597 				return (SD_PKT_ALLOC_FAILURE);
13598 			}
13599 		}
13600 	}
13601 
13602 	/*
13603 	 * None of the available CDB types were suitable.  This really
13604 	 * should never happen:  on a 64 bit system we support
13605 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
13606 	 * and on a 32 bit system we will refuse to bind to a device
13607 	 * larger than 2TB so addresses will never be larger than 32 bits.
13608 	 */
13609 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13610 }
13611 
13612 #if defined(__i386) || defined(__amd64)
13613 /*
13614  *    Function: sd_setup_next_rw_pkt
13615  *
13616  * Description: Setup packet for partial DMA transfers, except for the
13617  * 		initial transfer.  sd_setup_rw_pkt should be used for
13618  *		the initial transfer.
13619  *
13620  *     Context: Kernel thread and may be called from interrupt context.
13621  */
13622 
13623 int
13624 sd_setup_next_rw_pkt(struct sd_lun *un,
13625     struct scsi_pkt *pktp, struct buf *bp,
13626     diskaddr_t lba, uint32_t blockcount)
13627 {
13628 	uchar_t com;
13629 	union scsi_cdb *cdbp;
13630 	uchar_t cdb_group_id;
13631 
13632 	ASSERT(pktp != NULL);
13633 	ASSERT(pktp->pkt_cdbp != NULL);
13634 
13635 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
13636 	com = cdbp->scc_cmd;
13637 	cdb_group_id = CDB_GROUPID(com);
13638 
13639 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
13640 	    (cdb_group_id == CDB_GROUPID_1) ||
13641 	    (cdb_group_id == CDB_GROUPID_4) ||
13642 	    (cdb_group_id == CDB_GROUPID_5));
13643 
13644 	/*
13645 	 * Move pkt to the next portion of the xfer.
13646 	 * func is NULL_FUNC so we do not have to release
13647 	 * the disk mutex here.
13648 	 */
13649 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
13650 	    NULL_FUNC, NULL) == pktp) {
13651 		/* Success.  Handle partial DMA */
13652 		if (pktp->pkt_resid != 0) {
13653 			blockcount -=
13654 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
13655 		}
13656 
13657 		cdbp->scc_cmd = com;
13658 		SD_FILL_SCSI1_LUN(un, pktp);
13659 		if (cdb_group_id == CDB_GROUPID_1) {
13660 			FORMG1ADDR(cdbp, lba);
13661 			FORMG1COUNT(cdbp, blockcount);
13662 			return (0);
13663 		} else if (cdb_group_id == CDB_GROUPID_4) {
13664 			FORMG4LONGADDR(cdbp, lba);
13665 			FORMG4COUNT(cdbp, blockcount);
13666 			return (0);
13667 		} else if (cdb_group_id == CDB_GROUPID_0) {
13668 			FORMG0ADDR(cdbp, lba);
13669 			FORMG0COUNT(cdbp, blockcount);
13670 			return (0);
13671 		} else if (cdb_group_id == CDB_GROUPID_5) {
13672 			FORMG5ADDR(cdbp, lba);
13673 			FORMG5COUNT(cdbp, blockcount);
13674 			return (0);
13675 		}
13676 
13677 		/* Unreachable */
13678 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13679 	}
13680 
13681 	/*
13682 	 * Error setting up next portion of cmd transfer.
13683 	 * Something is definitely very wrong and this
13684 	 * should not happen.
13685 	 */
13686 	return (SD_PKT_ALLOC_FAILURE);
13687 }
13688 #endif /* defined(__i386) || defined(__amd64) */
13689 
13690 /*
13691  *    Function: sd_initpkt_for_uscsi
13692  *
13693  * Description: Allocate and initialize for transport a scsi_pkt struct,
13694  *		based upon the info specified in the given uscsi_cmd struct.
13695  *
13696  * Return Code: SD_PKT_ALLOC_SUCCESS
13697  *		SD_PKT_ALLOC_FAILURE
13698  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13699  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13700  *
13701  *     Context: Kernel thread and may be called from software interrupt context
13702  *		as part of a sdrunout callback. This function may not block or
13703  *		call routines that block
13704  */
13705 
13706 static int
13707 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
13708 {
13709 	struct uscsi_cmd *uscmd;
13710 	struct sd_xbuf	*xp;
13711 	struct scsi_pkt	*pktp;
13712 	struct sd_lun	*un;
13713 	uint32_t	flags = 0;
13714 
13715 	ASSERT(bp != NULL);
13716 	ASSERT(pktpp != NULL);
13717 	xp = SD_GET_XBUF(bp);
13718 	ASSERT(xp != NULL);
13719 	un = SD_GET_UN(bp);
13720 	ASSERT(un != NULL);
13721 	ASSERT(mutex_owned(SD_MUTEX(un)));
13722 
13723 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13724 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13725 	ASSERT(uscmd != NULL);
13726 
13727 	SD_TRACE(SD_LOG_IO_CORE, un,
13728 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
13729 
13730 	/*
13731 	 * Allocate the scsi_pkt for the command.
13732 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
13733 	 *	 during scsi_init_pkt time and will continue to use the
13734 	 *	 same path as long as the same scsi_pkt is used without
13735 	 *	 intervening scsi_dma_free(). Since uscsi command does
13736 	 *	 not call scsi_dmafree() before retry failed command, it
13737 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
13738 	 *	 set such that scsi_vhci can use other available path for
13739 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
13740 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
13741 	 */
13742 	pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
13743 	    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
13744 	    sizeof (struct scsi_arq_status), 0,
13745 	    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
13746 	    sdrunout, (caddr_t)un);
13747 
13748 	if (pktp == NULL) {
13749 		*pktpp = NULL;
13750 		/*
13751 		 * Set the driver state to RWAIT to indicate the driver
13752 		 * is waiting on resource allocations. The driver will not
13753 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13754 		 */
13755 		New_state(un, SD_STATE_RWAIT);
13756 
13757 		SD_ERROR(SD_LOG_IO_CORE, un,
13758 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
13759 
13760 		if ((bp->b_flags & B_ERROR) != 0) {
13761 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13762 		}
13763 		return (SD_PKT_ALLOC_FAILURE);
13764 	}
13765 
13766 	/*
13767 	 * We do not do DMA breakup for USCSI commands, so return failure
13768 	 * here if all the needed DMA resources were not allocated.
13769 	 */
13770 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
13771 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
13772 		scsi_destroy_pkt(pktp);
13773 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
13774 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
13775 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
13776 	}
13777 
13778 	/* Init the cdb from the given uscsi struct */
13779 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
13780 	    uscmd->uscsi_cdb[0], 0, 0, 0);
13781 
13782 	SD_FILL_SCSI1_LUN(un, pktp);
13783 
13784 	/*
13785 	 * Set up the optional USCSI flags. See the uscsi (7I) man page
13786 	 * for listing of the supported flags.
13787 	 */
13788 
13789 	if (uscmd->uscsi_flags & USCSI_SILENT) {
13790 		flags |= FLAG_SILENT;
13791 	}
13792 
13793 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
13794 		flags |= FLAG_DIAGNOSE;
13795 	}
13796 
13797 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
13798 		flags |= FLAG_ISOLATE;
13799 	}
13800 
13801 	if (un->un_f_is_fibre == FALSE) {
13802 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
13803 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
13804 		}
13805 	}
13806 
13807 	/*
13808 	 * Set the pkt flags here so we save time later.
13809 	 * Note: These flags are NOT in the uscsi man page!!!
13810 	 */
13811 	if (uscmd->uscsi_flags & USCSI_HEAD) {
13812 		flags |= FLAG_HEAD;
13813 	}
13814 
13815 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
13816 		flags |= FLAG_NOINTR;
13817 	}
13818 
13819 	/*
13820 	 * For tagged queueing, things get a bit complicated.
13821 	 * Check first for head of queue and last for ordered queue.
13822 	 * If neither head nor order, use the default driver tag flags.
13823 	 */
13824 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
13825 		if (uscmd->uscsi_flags & USCSI_HTAG) {
13826 			flags |= FLAG_HTAG;
13827 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
13828 			flags |= FLAG_OTAG;
13829 		} else {
13830 			flags |= un->un_tagflags & FLAG_TAGMASK;
13831 		}
13832 	}
13833 
13834 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
13835 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
13836 	}
13837 
13838 	pktp->pkt_flags = flags;
13839 
13840 	/* Copy the caller's CDB into the pkt... */
13841 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
13842 
13843 	if (uscmd->uscsi_timeout == 0) {
13844 		pktp->pkt_time = un->un_uscsi_timeout;
13845 	} else {
13846 		pktp->pkt_time = uscmd->uscsi_timeout;
13847 	}
13848 
13849 	/* need it later to identify USCSI request in sdintr */
13850 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
13851 
13852 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
13853 
13854 	pktp->pkt_private = bp;
13855 	pktp->pkt_comp = sdintr;
13856 	*pktpp = pktp;
13857 
13858 	SD_TRACE(SD_LOG_IO_CORE, un,
13859 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
13860 
13861 	return (SD_PKT_ALLOC_SUCCESS);
13862 }
13863 
13864 
13865 /*
13866  *    Function: sd_destroypkt_for_uscsi
13867  *
13868  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
13869  *		IOs.. Also saves relevant info into the associated uscsi_cmd
13870  *		struct.
13871  *
13872  *     Context: May be called under interrupt context
13873  */
13874 
13875 static void
13876 sd_destroypkt_for_uscsi(struct buf *bp)
13877 {
13878 	struct uscsi_cmd *uscmd;
13879 	struct sd_xbuf	*xp;
13880 	struct scsi_pkt	*pktp;
13881 	struct sd_lun	*un;
13882 
13883 	ASSERT(bp != NULL);
13884 	xp = SD_GET_XBUF(bp);
13885 	ASSERT(xp != NULL);
13886 	un = SD_GET_UN(bp);
13887 	ASSERT(un != NULL);
13888 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13889 	pktp = SD_GET_PKTP(bp);
13890 	ASSERT(pktp != NULL);
13891 
13892 	SD_TRACE(SD_LOG_IO_CORE, un,
13893 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
13894 
13895 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13896 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13897 	ASSERT(uscmd != NULL);
13898 
13899 	/* Save the status and the residual into the uscsi_cmd struct */
13900 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
13901 	uscmd->uscsi_resid  = bp->b_resid;
13902 
13903 	/*
13904 	 * If enabled, copy any saved sense data into the area specified
13905 	 * by the uscsi command.
13906 	 */
13907 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
13908 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
13909 		/*
13910 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
13911 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
13912 		 */
13913 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
13914 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
13915 		bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf, SENSE_LENGTH);
13916 	}
13917 
13918 	/* We are done with the scsi_pkt; free it now */
13919 	ASSERT(SD_GET_PKTP(bp) != NULL);
13920 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13921 
13922 	SD_TRACE(SD_LOG_IO_CORE, un,
13923 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
13924 }
13925 
13926 
13927 /*
13928  *    Function: sd_bioclone_alloc
13929  *
13930  * Description: Allocate a buf(9S) and init it as per the given buf
13931  *		and the various arguments.  The associated sd_xbuf
13932  *		struct is (nearly) duplicated.  The struct buf *bp
13933  *		argument is saved in new_xp->xb_private.
13934  *
13935  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13936  *		datalen - size of data area for the shadow bp
13937  *		blkno - starting LBA
13938  *		func - function pointer for b_iodone in the shadow buf. (May
13939  *			be NULL if none.)
13940  *
13941  * Return Code: Pointer to allocates buf(9S) struct
13942  *
13943  *     Context: Can sleep.
13944  */
13945 
13946 static struct buf *
13947 sd_bioclone_alloc(struct buf *bp, size_t datalen,
13948 	daddr_t blkno, int (*func)(struct buf *))
13949 {
13950 	struct	sd_lun	*un;
13951 	struct	sd_xbuf	*xp;
13952 	struct	sd_xbuf	*new_xp;
13953 	struct	buf	*new_bp;
13954 
13955 	ASSERT(bp != NULL);
13956 	xp = SD_GET_XBUF(bp);
13957 	ASSERT(xp != NULL);
13958 	un = SD_GET_UN(bp);
13959 	ASSERT(un != NULL);
13960 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13961 
13962 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
13963 	    NULL, KM_SLEEP);
13964 
13965 	new_bp->b_lblkno	= blkno;
13966 
13967 	/*
13968 	 * Allocate an xbuf for the shadow bp and copy the contents of the
13969 	 * original xbuf into it.
13970 	 */
13971 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13972 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13973 
13974 	/*
13975 	 * The given bp is automatically saved in the xb_private member
13976 	 * of the new xbuf.  Callers are allowed to depend on this.
13977 	 */
13978 	new_xp->xb_private = bp;
13979 
13980 	new_bp->b_private  = new_xp;
13981 
13982 	return (new_bp);
13983 }
13984 
13985 /*
13986  *    Function: sd_shadow_buf_alloc
13987  *
13988  * Description: Allocate a buf(9S) and init it as per the given buf
13989  *		and the various arguments.  The associated sd_xbuf
13990  *		struct is (nearly) duplicated.  The struct buf *bp
13991  *		argument is saved in new_xp->xb_private.
13992  *
13993  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13994  *		datalen - size of data area for the shadow bp
13995  *		bflags - B_READ or B_WRITE (pseudo flag)
13996  *		blkno - starting LBA
13997  *		func - function pointer for b_iodone in the shadow buf. (May
13998  *			be NULL if none.)
13999  *
14000  * Return Code: Pointer to allocates buf(9S) struct
14001  *
14002  *     Context: Can sleep.
14003  */
14004 
14005 static struct buf *
14006 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
14007 	daddr_t blkno, int (*func)(struct buf *))
14008 {
14009 	struct	sd_lun	*un;
14010 	struct	sd_xbuf	*xp;
14011 	struct	sd_xbuf	*new_xp;
14012 	struct	buf	*new_bp;
14013 
14014 	ASSERT(bp != NULL);
14015 	xp = SD_GET_XBUF(bp);
14016 	ASSERT(xp != NULL);
14017 	un = SD_GET_UN(bp);
14018 	ASSERT(un != NULL);
14019 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14020 
14021 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
14022 		bp_mapin(bp);
14023 	}
14024 
14025 	bflags &= (B_READ | B_WRITE);
14026 #if defined(__i386) || defined(__amd64)
14027 	new_bp = getrbuf(KM_SLEEP);
14028 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
14029 	new_bp->b_bcount = datalen;
14030 	new_bp->b_flags = bflags |
14031 	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
14032 #else
14033 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
14034 	    datalen, bflags, SLEEP_FUNC, NULL);
14035 #endif
14036 	new_bp->av_forw	= NULL;
14037 	new_bp->av_back	= NULL;
14038 	new_bp->b_dev	= bp->b_dev;
14039 	new_bp->b_blkno	= blkno;
14040 	new_bp->b_iodone = func;
14041 	new_bp->b_edev	= bp->b_edev;
14042 	new_bp->b_resid	= 0;
14043 
14044 	/* We need to preserve the B_FAILFAST flag */
14045 	if (bp->b_flags & B_FAILFAST) {
14046 		new_bp->b_flags |= B_FAILFAST;
14047 	}
14048 
14049 	/*
14050 	 * Allocate an xbuf for the shadow bp and copy the contents of the
14051 	 * original xbuf into it.
14052 	 */
14053 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14054 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
14055 
14056 	/* Need later to copy data between the shadow buf & original buf! */
14057 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
14058 
14059 	/*
14060 	 * The given bp is automatically saved in the xb_private member
14061 	 * of the new xbuf.  Callers are allowed to depend on this.
14062 	 */
14063 	new_xp->xb_private = bp;
14064 
14065 	new_bp->b_private  = new_xp;
14066 
14067 	return (new_bp);
14068 }
14069 
14070 /*
14071  *    Function: sd_bioclone_free
14072  *
14073  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
14074  *		in the larger than partition operation.
14075  *
14076  *     Context: May be called under interrupt context
14077  */
14078 
14079 static void
14080 sd_bioclone_free(struct buf *bp)
14081 {
14082 	struct sd_xbuf	*xp;
14083 
14084 	ASSERT(bp != NULL);
14085 	xp = SD_GET_XBUF(bp);
14086 	ASSERT(xp != NULL);
14087 
14088 	/*
14089 	 * Call bp_mapout() before freeing the buf,  in case a lower
14090 	 * layer or HBA  had done a bp_mapin().  we must do this here
14091 	 * as we are the "originator" of the shadow buf.
14092 	 */
14093 	bp_mapout(bp);
14094 
14095 	/*
14096 	 * Null out b_iodone before freeing the bp, to ensure that the driver
14097 	 * never gets confused by a stale value in this field. (Just a little
14098 	 * extra defensiveness here.)
14099 	 */
14100 	bp->b_iodone = NULL;
14101 
14102 	freerbuf(bp);
14103 
14104 	kmem_free(xp, sizeof (struct sd_xbuf));
14105 }
14106 
14107 /*
14108  *    Function: sd_shadow_buf_free
14109  *
14110  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
14111  *
14112  *     Context: May be called under interrupt context
14113  */
14114 
14115 static void
14116 sd_shadow_buf_free(struct buf *bp)
14117 {
14118 	struct sd_xbuf	*xp;
14119 
14120 	ASSERT(bp != NULL);
14121 	xp = SD_GET_XBUF(bp);
14122 	ASSERT(xp != NULL);
14123 
14124 #if defined(__sparc)
14125 	/*
14126 	 * Call bp_mapout() before freeing the buf,  in case a lower
14127 	 * layer or HBA  had done a bp_mapin().  we must do this here
14128 	 * as we are the "originator" of the shadow buf.
14129 	 */
14130 	bp_mapout(bp);
14131 #endif
14132 
14133 	/*
14134 	 * Null out b_iodone before freeing the bp, to ensure that the driver
14135 	 * never gets confused by a stale value in this field. (Just a little
14136 	 * extra defensiveness here.)
14137 	 */
14138 	bp->b_iodone = NULL;
14139 
14140 #if defined(__i386) || defined(__amd64)
14141 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
14142 	freerbuf(bp);
14143 #else
14144 	scsi_free_consistent_buf(bp);
14145 #endif
14146 
14147 	kmem_free(xp, sizeof (struct sd_xbuf));
14148 }
14149 
14150 
14151 /*
14152  *    Function: sd_print_transport_rejected_message
14153  *
14154  * Description: This implements the ludicrously complex rules for printing
14155  *		a "transport rejected" message.  This is to address the
14156  *		specific problem of having a flood of this error message
14157  *		produced when a failover occurs.
14158  *
14159  *     Context: Any.
14160  */
14161 
14162 static void
14163 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
14164 	int code)
14165 {
14166 	ASSERT(un != NULL);
14167 	ASSERT(mutex_owned(SD_MUTEX(un)));
14168 	ASSERT(xp != NULL);
14169 
14170 	/*
14171 	 * Print the "transport rejected" message under the following
14172 	 * conditions:
14173 	 *
14174 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
14175 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
14176 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
14177 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
14178 	 *   scsi_transport(9F) (which indicates that the target might have
14179 	 *   gone off-line).  This uses the un->un_tran_fatal_count
14180 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
14181 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
14182 	 *   from scsi_transport().
14183 	 *
14184 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
14185 	 * the preceeding cases in order for the message to be printed.
14186 	 */
14187 	if ((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) {
14188 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
14189 		    (code != TRAN_FATAL_ERROR) ||
14190 		    (un->un_tran_fatal_count == 1)) {
14191 			switch (code) {
14192 			case TRAN_BADPKT:
14193 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14194 				    "transport rejected bad packet\n");
14195 				break;
14196 			case TRAN_FATAL_ERROR:
14197 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14198 				    "transport rejected fatal error\n");
14199 				break;
14200 			default:
14201 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14202 				    "transport rejected (%d)\n", code);
14203 				break;
14204 			}
14205 		}
14206 	}
14207 }
14208 
14209 
14210 /*
14211  *    Function: sd_add_buf_to_waitq
14212  *
14213  * Description: Add the given buf(9S) struct to the wait queue for the
14214  *		instance.  If sorting is enabled, then the buf is added
14215  *		to the queue via an elevator sort algorithm (a la
14216  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
14217  *		If sorting is not enabled, then the buf is just added
14218  *		to the end of the wait queue.
14219  *
14220  * Return Code: void
14221  *
14222  *     Context: Does not sleep/block, therefore technically can be called
14223  *		from any context.  However if sorting is enabled then the
14224  *		execution time is indeterminate, and may take long if
14225  *		the wait queue grows large.
14226  */
14227 
14228 static void
14229 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
14230 {
14231 	struct buf *ap;
14232 
14233 	ASSERT(bp != NULL);
14234 	ASSERT(un != NULL);
14235 	ASSERT(mutex_owned(SD_MUTEX(un)));
14236 
14237 	/* If the queue is empty, add the buf as the only entry & return. */
14238 	if (un->un_waitq_headp == NULL) {
14239 		ASSERT(un->un_waitq_tailp == NULL);
14240 		un->un_waitq_headp = un->un_waitq_tailp = bp;
14241 		bp->av_forw = NULL;
14242 		return;
14243 	}
14244 
14245 	ASSERT(un->un_waitq_tailp != NULL);
14246 
14247 	/*
14248 	 * If sorting is disabled, just add the buf to the tail end of
14249 	 * the wait queue and return.
14250 	 */
14251 	if (un->un_f_disksort_disabled) {
14252 		un->un_waitq_tailp->av_forw = bp;
14253 		un->un_waitq_tailp = bp;
14254 		bp->av_forw = NULL;
14255 		return;
14256 	}
14257 
14258 	/*
14259 	 * Sort thru the list of requests currently on the wait queue
14260 	 * and add the new buf request at the appropriate position.
14261 	 *
14262 	 * The un->un_waitq_headp is an activity chain pointer on which
14263 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
14264 	 * first queue holds those requests which are positioned after
14265 	 * the current SD_GET_BLKNO() (in the first request); the second holds
14266 	 * requests which came in after their SD_GET_BLKNO() number was passed.
14267 	 * Thus we implement a one way scan, retracting after reaching
14268 	 * the end of the drive to the first request on the second
14269 	 * queue, at which time it becomes the first queue.
14270 	 * A one-way scan is natural because of the way UNIX read-ahead
14271 	 * blocks are allocated.
14272 	 *
14273 	 * If we lie after the first request, then we must locate the
14274 	 * second request list and add ourselves to it.
14275 	 */
14276 	ap = un->un_waitq_headp;
14277 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
14278 		while (ap->av_forw != NULL) {
14279 			/*
14280 			 * Look for an "inversion" in the (normally
14281 			 * ascending) block numbers. This indicates
14282 			 * the start of the second request list.
14283 			 */
14284 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
14285 				/*
14286 				 * Search the second request list for the
14287 				 * first request at a larger block number.
14288 				 * We go before that; however if there is
14289 				 * no such request, we go at the end.
14290 				 */
14291 				do {
14292 					if (SD_GET_BLKNO(bp) <
14293 					    SD_GET_BLKNO(ap->av_forw)) {
14294 						goto insert;
14295 					}
14296 					ap = ap->av_forw;
14297 				} while (ap->av_forw != NULL);
14298 				goto insert;		/* after last */
14299 			}
14300 			ap = ap->av_forw;
14301 		}
14302 
14303 		/*
14304 		 * No inversions... we will go after the last, and
14305 		 * be the first request in the second request list.
14306 		 */
14307 		goto insert;
14308 	}
14309 
14310 	/*
14311 	 * Request is at/after the current request...
14312 	 * sort in the first request list.
14313 	 */
14314 	while (ap->av_forw != NULL) {
14315 		/*
14316 		 * We want to go after the current request (1) if
14317 		 * there is an inversion after it (i.e. it is the end
14318 		 * of the first request list), or (2) if the next
14319 		 * request is a larger block no. than our request.
14320 		 */
14321 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
14322 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
14323 			goto insert;
14324 		}
14325 		ap = ap->av_forw;
14326 	}
14327 
14328 	/*
14329 	 * Neither a second list nor a larger request, therefore
14330 	 * we go at the end of the first list (which is the same
14331 	 * as the end of the whole schebang).
14332 	 */
14333 insert:
14334 	bp->av_forw = ap->av_forw;
14335 	ap->av_forw = bp;
14336 
14337 	/*
14338 	 * If we inserted onto the tail end of the waitq, make sure the
14339 	 * tail pointer is updated.
14340 	 */
14341 	if (ap == un->un_waitq_tailp) {
14342 		un->un_waitq_tailp = bp;
14343 	}
14344 }
14345 
14346 
14347 /*
14348  *    Function: sd_start_cmds
14349  *
14350  * Description: Remove and transport cmds from the driver queues.
14351  *
14352  *   Arguments: un - pointer to the unit (soft state) struct for the target.
14353  *
14354  *		immed_bp - ptr to a buf to be transported immediately. Only
14355  *		the immed_bp is transported; bufs on the waitq are not
14356  *		processed and the un_retry_bp is not checked.  If immed_bp is
14357  *		NULL, then normal queue processing is performed.
14358  *
14359  *     Context: May be called from kernel thread context, interrupt context,
14360  *		or runout callback context. This function may not block or
14361  *		call routines that block.
14362  */
14363 
14364 static void
14365 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
14366 {
14367 	struct	sd_xbuf	*xp;
14368 	struct	buf	*bp;
14369 	void	(*statp)(kstat_io_t *);
14370 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14371 	void	(*saved_statp)(kstat_io_t *);
14372 #endif
14373 	int	rval;
14374 
14375 	ASSERT(un != NULL);
14376 	ASSERT(mutex_owned(SD_MUTEX(un)));
14377 	ASSERT(un->un_ncmds_in_transport >= 0);
14378 	ASSERT(un->un_throttle >= 0);
14379 
14380 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
14381 
14382 	do {
14383 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14384 		saved_statp = NULL;
14385 #endif
14386 
14387 		/*
14388 		 * If we are syncing or dumping, fail the command to
14389 		 * avoid recursively calling back into scsi_transport().
14390 		 * The dump I/O itself uses a separate code path so this
14391 		 * only prevents non-dump I/O from being sent while dumping.
14392 		 * File system sync takes place before dumping begins.
14393 		 * During panic, filesystem I/O is allowed provided
14394 		 * un_in_callback is <= 1.  This is to prevent recursion
14395 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
14396 		 * sd_start_cmds and so on.  See panic.c for more information
14397 		 * about the states the system can be in during panic.
14398 		 */
14399 		if ((un->un_state == SD_STATE_DUMPING) ||
14400 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
14401 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14402 			    "sd_start_cmds: panicking\n");
14403 			goto exit;
14404 		}
14405 
14406 		if ((bp = immed_bp) != NULL) {
14407 			/*
14408 			 * We have a bp that must be transported immediately.
14409 			 * It's OK to transport the immed_bp here without doing
14410 			 * the throttle limit check because the immed_bp is
14411 			 * always used in a retry/recovery case. This means
14412 			 * that we know we are not at the throttle limit by
14413 			 * virtue of the fact that to get here we must have
14414 			 * already gotten a command back via sdintr(). This also
14415 			 * relies on (1) the command on un_retry_bp preventing
14416 			 * further commands from the waitq from being issued;
14417 			 * and (2) the code in sd_retry_command checking the
14418 			 * throttle limit before issuing a delayed or immediate
14419 			 * retry. This holds even if the throttle limit is
14420 			 * currently ratcheted down from its maximum value.
14421 			 */
14422 			statp = kstat_runq_enter;
14423 			if (bp == un->un_retry_bp) {
14424 				ASSERT((un->un_retry_statp == NULL) ||
14425 				    (un->un_retry_statp == kstat_waitq_enter) ||
14426 				    (un->un_retry_statp ==
14427 				    kstat_runq_back_to_waitq));
14428 				/*
14429 				 * If the waitq kstat was incremented when
14430 				 * sd_set_retry_bp() queued this bp for a retry,
14431 				 * then we must set up statp so that the waitq
14432 				 * count will get decremented correctly below.
14433 				 * Also we must clear un->un_retry_statp to
14434 				 * ensure that we do not act on a stale value
14435 				 * in this field.
14436 				 */
14437 				if ((un->un_retry_statp == kstat_waitq_enter) ||
14438 				    (un->un_retry_statp ==
14439 				    kstat_runq_back_to_waitq)) {
14440 					statp = kstat_waitq_to_runq;
14441 				}
14442 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14443 				saved_statp = un->un_retry_statp;
14444 #endif
14445 				un->un_retry_statp = NULL;
14446 
14447 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14448 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
14449 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
14450 				    un, un->un_retry_bp, un->un_throttle,
14451 				    un->un_ncmds_in_transport);
14452 			} else {
14453 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
14454 				    "processing priority bp:0x%p\n", bp);
14455 			}
14456 
14457 		} else if ((bp = un->un_waitq_headp) != NULL) {
14458 			/*
14459 			 * A command on the waitq is ready to go, but do not
14460 			 * send it if:
14461 			 *
14462 			 * (1) the throttle limit has been reached, or
14463 			 * (2) a retry is pending, or
14464 			 * (3) a START_STOP_UNIT callback pending, or
14465 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
14466 			 *	command is pending.
14467 			 *
14468 			 * For all of these conditions, IO processing will
14469 			 * restart after the condition is cleared.
14470 			 */
14471 			if (un->un_ncmds_in_transport >= un->un_throttle) {
14472 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14473 				    "sd_start_cmds: exiting, "
14474 				    "throttle limit reached!\n");
14475 				goto exit;
14476 			}
14477 			if (un->un_retry_bp != NULL) {
14478 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14479 				    "sd_start_cmds: exiting, retry pending!\n");
14480 				goto exit;
14481 			}
14482 			if (un->un_startstop_timeid != NULL) {
14483 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14484 				    "sd_start_cmds: exiting, "
14485 				    "START_STOP pending!\n");
14486 				goto exit;
14487 			}
14488 			if (un->un_direct_priority_timeid != NULL) {
14489 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14490 				    "sd_start_cmds: exiting, "
14491 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
14492 				goto exit;
14493 			}
14494 
14495 			/* Dequeue the command */
14496 			un->un_waitq_headp = bp->av_forw;
14497 			if (un->un_waitq_headp == NULL) {
14498 				un->un_waitq_tailp = NULL;
14499 			}
14500 			bp->av_forw = NULL;
14501 			statp = kstat_waitq_to_runq;
14502 			SD_TRACE(SD_LOG_IO_CORE, un,
14503 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
14504 
14505 		} else {
14506 			/* No work to do so bail out now */
14507 			SD_TRACE(SD_LOG_IO_CORE, un,
14508 			    "sd_start_cmds: no more work, exiting!\n");
14509 			goto exit;
14510 		}
14511 
14512 		/*
14513 		 * Reset the state to normal. This is the mechanism by which
14514 		 * the state transitions from either SD_STATE_RWAIT or
14515 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
14516 		 * If state is SD_STATE_PM_CHANGING then this command is
14517 		 * part of the device power control and the state must
14518 		 * not be put back to normal. Doing so would would
14519 		 * allow new commands to proceed when they shouldn't,
14520 		 * the device may be going off.
14521 		 */
14522 		if ((un->un_state != SD_STATE_SUSPENDED) &&
14523 		    (un->un_state != SD_STATE_PM_CHANGING)) {
14524 			New_state(un, SD_STATE_NORMAL);
14525 		    }
14526 
14527 		xp = SD_GET_XBUF(bp);
14528 		ASSERT(xp != NULL);
14529 
14530 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14531 		/*
14532 		 * Allocate the scsi_pkt if we need one, or attach DMA
14533 		 * resources if we have a scsi_pkt that needs them. The
14534 		 * latter should only occur for commands that are being
14535 		 * retried.
14536 		 */
14537 		if ((xp->xb_pktp == NULL) ||
14538 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
14539 #else
14540 		if (xp->xb_pktp == NULL) {
14541 #endif
14542 			/*
14543 			 * There is no scsi_pkt allocated for this buf. Call
14544 			 * the initpkt function to allocate & init one.
14545 			 *
14546 			 * The scsi_init_pkt runout callback functionality is
14547 			 * implemented as follows:
14548 			 *
14549 			 * 1) The initpkt function always calls
14550 			 *    scsi_init_pkt(9F) with sdrunout specified as the
14551 			 *    callback routine.
14552 			 * 2) A successful packet allocation is initialized and
14553 			 *    the I/O is transported.
14554 			 * 3) The I/O associated with an allocation resource
14555 			 *    failure is left on its queue to be retried via
14556 			 *    runout or the next I/O.
14557 			 * 4) The I/O associated with a DMA error is removed
14558 			 *    from the queue and failed with EIO. Processing of
14559 			 *    the transport queues is also halted to be
14560 			 *    restarted via runout or the next I/O.
14561 			 * 5) The I/O associated with a CDB size or packet
14562 			 *    size error is removed from the queue and failed
14563 			 *    with EIO. Processing of the transport queues is
14564 			 *    continued.
14565 			 *
14566 			 * Note: there is no interface for canceling a runout
14567 			 * callback. To prevent the driver from detaching or
14568 			 * suspending while a runout is pending the driver
14569 			 * state is set to SD_STATE_RWAIT
14570 			 *
14571 			 * Note: using the scsi_init_pkt callback facility can
14572 			 * result in an I/O request persisting at the head of
14573 			 * the list which cannot be satisfied even after
14574 			 * multiple retries. In the future the driver may
14575 			 * implement some kind of maximum runout count before
14576 			 * failing an I/O.
14577 			 *
14578 			 * Note: the use of funcp below may seem superfluous,
14579 			 * but it helps warlock figure out the correct
14580 			 * initpkt function calls (see [s]sd.wlcmd).
14581 			 */
14582 			struct scsi_pkt	*pktp;
14583 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
14584 
14585 			ASSERT(bp != un->un_rqs_bp);
14586 
14587 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
14588 			switch ((*funcp)(bp, &pktp)) {
14589 			case  SD_PKT_ALLOC_SUCCESS:
14590 				xp->xb_pktp = pktp;
14591 				SD_TRACE(SD_LOG_IO_CORE, un,
14592 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
14593 				    pktp);
14594 				goto got_pkt;
14595 
14596 			case SD_PKT_ALLOC_FAILURE:
14597 				/*
14598 				 * Temporary (hopefully) resource depletion.
14599 				 * Since retries and RQS commands always have a
14600 				 * scsi_pkt allocated, these cases should never
14601 				 * get here. So the only cases this needs to
14602 				 * handle is a bp from the waitq (which we put
14603 				 * back onto the waitq for sdrunout), or a bp
14604 				 * sent as an immed_bp (which we just fail).
14605 				 */
14606 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14607 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
14608 
14609 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14610 
14611 				if (bp == immed_bp) {
14612 					/*
14613 					 * If SD_XB_DMA_FREED is clear, then
14614 					 * this is a failure to allocate a
14615 					 * scsi_pkt, and we must fail the
14616 					 * command.
14617 					 */
14618 					if ((xp->xb_pkt_flags &
14619 					    SD_XB_DMA_FREED) == 0) {
14620 						break;
14621 					}
14622 
14623 					/*
14624 					 * If this immediate command is NOT our
14625 					 * un_retry_bp, then we must fail it.
14626 					 */
14627 					if (bp != un->un_retry_bp) {
14628 						break;
14629 					}
14630 
14631 					/*
14632 					 * We get here if this cmd is our
14633 					 * un_retry_bp that was DMAFREED, but
14634 					 * scsi_init_pkt() failed to reallocate
14635 					 * DMA resources when we attempted to
14636 					 * retry it. This can happen when an
14637 					 * mpxio failover is in progress, but
14638 					 * we don't want to just fail the
14639 					 * command in this case.
14640 					 *
14641 					 * Use timeout(9F) to restart it after
14642 					 * a 100ms delay.  We don't want to
14643 					 * let sdrunout() restart it, because
14644 					 * sdrunout() is just supposed to start
14645 					 * commands that are sitting on the
14646 					 * wait queue.  The un_retry_bp stays
14647 					 * set until the command completes, but
14648 					 * sdrunout can be called many times
14649 					 * before that happens.  Since sdrunout
14650 					 * cannot tell if the un_retry_bp is
14651 					 * already in the transport, it could
14652 					 * end up calling scsi_transport() for
14653 					 * the un_retry_bp multiple times.
14654 					 *
14655 					 * Also: don't schedule the callback
14656 					 * if some other callback is already
14657 					 * pending.
14658 					 */
14659 					if (un->un_retry_statp == NULL) {
14660 						/*
14661 						 * restore the kstat pointer to
14662 						 * keep kstat counts coherent
14663 						 * when we do retry the command.
14664 						 */
14665 						un->un_retry_statp =
14666 						    saved_statp;
14667 					}
14668 
14669 					if ((un->un_startstop_timeid == NULL) &&
14670 					    (un->un_retry_timeid == NULL) &&
14671 					    (un->un_direct_priority_timeid ==
14672 					    NULL)) {
14673 
14674 						un->un_retry_timeid =
14675 						    timeout(
14676 						    sd_start_retry_command,
14677 						    un, SD_RESTART_TIMEOUT);
14678 					}
14679 					goto exit;
14680 				}
14681 
14682 #else
14683 				if (bp == immed_bp) {
14684 					break;	/* Just fail the command */
14685 				}
14686 #endif
14687 
14688 				/* Add the buf back to the head of the waitq */
14689 				bp->av_forw = un->un_waitq_headp;
14690 				un->un_waitq_headp = bp;
14691 				if (un->un_waitq_tailp == NULL) {
14692 					un->un_waitq_tailp = bp;
14693 				}
14694 				goto exit;
14695 
14696 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
14697 				/*
14698 				 * HBA DMA resource failure. Fail the command
14699 				 * and continue processing of the queues.
14700 				 */
14701 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14702 				    "sd_start_cmds: "
14703 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
14704 				break;
14705 
14706 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
14707 				/*
14708 				 * Note:x86: Partial DMA mapping not supported
14709 				 * for USCSI commands, and all the needed DMA
14710 				 * resources were not allocated.
14711 				 */
14712 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14713 				    "sd_start_cmds: "
14714 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
14715 				break;
14716 
14717 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
14718 				/*
14719 				 * Note:x86: Request cannot fit into CDB based
14720 				 * on lba and len.
14721 				 */
14722 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14723 				    "sd_start_cmds: "
14724 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
14725 				break;
14726 
14727 			default:
14728 				/* Should NEVER get here! */
14729 				panic("scsi_initpkt error");
14730 				/*NOTREACHED*/
14731 			}
14732 
14733 			/*
14734 			 * Fatal error in allocating a scsi_pkt for this buf.
14735 			 * Update kstats & return the buf with an error code.
14736 			 * We must use sd_return_failed_command_no_restart() to
14737 			 * avoid a recursive call back into sd_start_cmds().
14738 			 * However this also means that we must keep processing
14739 			 * the waitq here in order to avoid stalling.
14740 			 */
14741 			if (statp == kstat_waitq_to_runq) {
14742 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
14743 			}
14744 			sd_return_failed_command_no_restart(un, bp, EIO);
14745 			if (bp == immed_bp) {
14746 				/* immed_bp is gone by now, so clear this */
14747 				immed_bp = NULL;
14748 			}
14749 			continue;
14750 		}
14751 got_pkt:
14752 		if (bp == immed_bp) {
14753 			/* goto the head of the class.... */
14754 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
14755 		}
14756 
14757 		un->un_ncmds_in_transport++;
14758 		SD_UPDATE_KSTATS(un, statp, bp);
14759 
14760 		/*
14761 		 * Call scsi_transport() to send the command to the target.
14762 		 * According to SCSA architecture, we must drop the mutex here
14763 		 * before calling scsi_transport() in order to avoid deadlock.
14764 		 * Note that the scsi_pkt's completion routine can be executed
14765 		 * (from interrupt context) even before the call to
14766 		 * scsi_transport() returns.
14767 		 */
14768 		SD_TRACE(SD_LOG_IO_CORE, un,
14769 		    "sd_start_cmds: calling scsi_transport()\n");
14770 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
14771 
14772 		mutex_exit(SD_MUTEX(un));
14773 		rval = scsi_transport(xp->xb_pktp);
14774 		mutex_enter(SD_MUTEX(un));
14775 
14776 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14777 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
14778 
14779 		switch (rval) {
14780 		case TRAN_ACCEPT:
14781 			/* Clear this with every pkt accepted by the HBA */
14782 			un->un_tran_fatal_count = 0;
14783 			break;	/* Success; try the next cmd (if any) */
14784 
14785 		case TRAN_BUSY:
14786 			un->un_ncmds_in_transport--;
14787 			ASSERT(un->un_ncmds_in_transport >= 0);
14788 
14789 			/*
14790 			 * Don't retry request sense, the sense data
14791 			 * is lost when another request is sent.
14792 			 * Free up the rqs buf and retry
14793 			 * the original failed cmd.  Update kstat.
14794 			 */
14795 			if (bp == un->un_rqs_bp) {
14796 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14797 				bp = sd_mark_rqs_idle(un, xp);
14798 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
14799 					NULL, NULL, EIO, SD_BSY_TIMEOUT / 500,
14800 					kstat_waitq_enter);
14801 				goto exit;
14802 			}
14803 
14804 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14805 			/*
14806 			 * Free the DMA resources for the  scsi_pkt. This will
14807 			 * allow mpxio to select another path the next time
14808 			 * we call scsi_transport() with this scsi_pkt.
14809 			 * See sdintr() for the rationalization behind this.
14810 			 */
14811 			if ((un->un_f_is_fibre == TRUE) &&
14812 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14813 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
14814 				scsi_dmafree(xp->xb_pktp);
14815 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14816 			}
14817 #endif
14818 
14819 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
14820 				/*
14821 				 * Commands that are SD_PATH_DIRECT_PRIORITY
14822 				 * are for error recovery situations. These do
14823 				 * not use the normal command waitq, so if they
14824 				 * get a TRAN_BUSY we cannot put them back onto
14825 				 * the waitq for later retry. One possible
14826 				 * problem is that there could already be some
14827 				 * other command on un_retry_bp that is waiting
14828 				 * for this one to complete, so we would be
14829 				 * deadlocked if we put this command back onto
14830 				 * the waitq for later retry (since un_retry_bp
14831 				 * must complete before the driver gets back to
14832 				 * commands on the waitq).
14833 				 *
14834 				 * To avoid deadlock we must schedule a callback
14835 				 * that will restart this command after a set
14836 				 * interval.  This should keep retrying for as
14837 				 * long as the underlying transport keeps
14838 				 * returning TRAN_BUSY (just like for other
14839 				 * commands).  Use the same timeout interval as
14840 				 * for the ordinary TRAN_BUSY retry.
14841 				 */
14842 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14843 				    "sd_start_cmds: scsi_transport() returned "
14844 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
14845 
14846 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14847 				un->un_direct_priority_timeid =
14848 				    timeout(sd_start_direct_priority_command,
14849 				    bp, SD_BSY_TIMEOUT / 500);
14850 
14851 				goto exit;
14852 			}
14853 
14854 			/*
14855 			 * For TRAN_BUSY, we want to reduce the throttle value,
14856 			 * unless we are retrying a command.
14857 			 */
14858 			if (bp != un->un_retry_bp) {
14859 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
14860 			}
14861 
14862 			/*
14863 			 * Set up the bp to be tried again 10 ms later.
14864 			 * Note:x86: Is there a timeout value in the sd_lun
14865 			 * for this condition?
14866 			 */
14867 			sd_set_retry_bp(un, bp, SD_BSY_TIMEOUT / 500,
14868 				kstat_runq_back_to_waitq);
14869 			goto exit;
14870 
14871 		case TRAN_FATAL_ERROR:
14872 			un->un_tran_fatal_count++;
14873 			/* FALLTHRU */
14874 
14875 		case TRAN_BADPKT:
14876 		default:
14877 			un->un_ncmds_in_transport--;
14878 			ASSERT(un->un_ncmds_in_transport >= 0);
14879 
14880 			/*
14881 			 * If this is our REQUEST SENSE command with a
14882 			 * transport error, we must get back the pointers
14883 			 * to the original buf, and mark the REQUEST
14884 			 * SENSE command as "available".
14885 			 */
14886 			if (bp == un->un_rqs_bp) {
14887 				bp = sd_mark_rqs_idle(un, xp);
14888 				xp = SD_GET_XBUF(bp);
14889 			} else {
14890 				/*
14891 				 * Legacy behavior: do not update transport
14892 				 * error count for request sense commands.
14893 				 */
14894 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
14895 			}
14896 
14897 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14898 			sd_print_transport_rejected_message(un, xp, rval);
14899 
14900 			/*
14901 			 * We must use sd_return_failed_command_no_restart() to
14902 			 * avoid a recursive call back into sd_start_cmds().
14903 			 * However this also means that we must keep processing
14904 			 * the waitq here in order to avoid stalling.
14905 			 */
14906 			sd_return_failed_command_no_restart(un, bp, EIO);
14907 
14908 			/*
14909 			 * Notify any threads waiting in sd_ddi_suspend() that
14910 			 * a command completion has occurred.
14911 			 */
14912 			if (un->un_state == SD_STATE_SUSPENDED) {
14913 				cv_broadcast(&un->un_disk_busy_cv);
14914 			}
14915 
14916 			if (bp == immed_bp) {
14917 				/* immed_bp is gone by now, so clear this */
14918 				immed_bp = NULL;
14919 			}
14920 			break;
14921 		}
14922 
14923 	} while (immed_bp == NULL);
14924 
14925 exit:
14926 	ASSERT(mutex_owned(SD_MUTEX(un)));
14927 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
14928 }
14929 
14930 
14931 /*
14932  *    Function: sd_return_command
14933  *
14934  * Description: Returns a command to its originator (with or without an
14935  *		error).  Also starts commands waiting to be transported
14936  *		to the target.
14937  *
14938  *     Context: May be called from interrupt, kernel, or timeout context
14939  */
14940 
14941 static void
14942 sd_return_command(struct sd_lun *un, struct buf *bp)
14943 {
14944 	struct sd_xbuf *xp;
14945 #if defined(__i386) || defined(__amd64)
14946 	struct scsi_pkt *pktp;
14947 #endif
14948 
14949 	ASSERT(bp != NULL);
14950 	ASSERT(un != NULL);
14951 	ASSERT(mutex_owned(SD_MUTEX(un)));
14952 	ASSERT(bp != un->un_rqs_bp);
14953 	xp = SD_GET_XBUF(bp);
14954 	ASSERT(xp != NULL);
14955 
14956 #if defined(__i386) || defined(__amd64)
14957 	pktp = SD_GET_PKTP(bp);
14958 #endif
14959 
14960 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
14961 
14962 #if defined(__i386) || defined(__amd64)
14963 	/*
14964 	 * Note:x86: check for the "sdrestart failed" case.
14965 	 */
14966 	if (((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
14967 		(geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
14968 		(xp->xb_pktp->pkt_resid == 0)) {
14969 
14970 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
14971 			/*
14972 			 * Successfully set up next portion of cmd
14973 			 * transfer, try sending it
14974 			 */
14975 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
14976 			    NULL, NULL, 0, (clock_t)0, NULL);
14977 			sd_start_cmds(un, NULL);
14978 			return;	/* Note:x86: need a return here? */
14979 		}
14980 	}
14981 #endif
14982 
14983 	/*
14984 	 * If this is the failfast bp, clear it from un_failfast_bp. This
14985 	 * can happen if upon being re-tried the failfast bp either
14986 	 * succeeded or encountered another error (possibly even a different
14987 	 * error than the one that precipitated the failfast state, but in
14988 	 * that case it would have had to exhaust retries as well). Regardless,
14989 	 * this should not occur whenever the instance is in the active
14990 	 * failfast state.
14991 	 */
14992 	if (bp == un->un_failfast_bp) {
14993 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
14994 		un->un_failfast_bp = NULL;
14995 	}
14996 
14997 	/*
14998 	 * Clear the failfast state upon successful completion of ANY cmd.
14999 	 */
15000 	if (bp->b_error == 0) {
15001 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15002 	}
15003 
15004 	/*
15005 	 * This is used if the command was retried one or more times. Show that
15006 	 * we are done with it, and allow processing of the waitq to resume.
15007 	 */
15008 	if (bp == un->un_retry_bp) {
15009 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15010 		    "sd_return_command: un:0x%p: "
15011 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15012 		un->un_retry_bp = NULL;
15013 		un->un_retry_statp = NULL;
15014 	}
15015 
15016 	SD_UPDATE_RDWR_STATS(un, bp);
15017 	SD_UPDATE_PARTITION_STATS(un, bp);
15018 
15019 	switch (un->un_state) {
15020 	case SD_STATE_SUSPENDED:
15021 		/*
15022 		 * Notify any threads waiting in sd_ddi_suspend() that
15023 		 * a command completion has occurred.
15024 		 */
15025 		cv_broadcast(&un->un_disk_busy_cv);
15026 		break;
15027 	default:
15028 		sd_start_cmds(un, NULL);
15029 		break;
15030 	}
15031 
15032 	/* Return this command up the iodone chain to its originator. */
15033 	mutex_exit(SD_MUTEX(un));
15034 
15035 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15036 	xp->xb_pktp = NULL;
15037 
15038 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15039 
15040 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15041 	mutex_enter(SD_MUTEX(un));
15042 
15043 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
15044 }
15045 
15046 
15047 /*
15048  *    Function: sd_return_failed_command
15049  *
15050  * Description: Command completion when an error occurred.
15051  *
15052  *     Context: May be called from interrupt context
15053  */
15054 
15055 static void
15056 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
15057 {
15058 	ASSERT(bp != NULL);
15059 	ASSERT(un != NULL);
15060 	ASSERT(mutex_owned(SD_MUTEX(un)));
15061 
15062 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15063 	    "sd_return_failed_command: entry\n");
15064 
15065 	/*
15066 	 * b_resid could already be nonzero due to a partial data
15067 	 * transfer, so do not change it here.
15068 	 */
15069 	SD_BIOERROR(bp, errcode);
15070 
15071 	sd_return_command(un, bp);
15072 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15073 	    "sd_return_failed_command: exit\n");
15074 }
15075 
15076 
15077 /*
15078  *    Function: sd_return_failed_command_no_restart
15079  *
15080  * Description: Same as sd_return_failed_command, but ensures that no
15081  *		call back into sd_start_cmds will be issued.
15082  *
15083  *     Context: May be called from interrupt context
15084  */
15085 
15086 static void
15087 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
15088 	int errcode)
15089 {
15090 	struct sd_xbuf *xp;
15091 
15092 	ASSERT(bp != NULL);
15093 	ASSERT(un != NULL);
15094 	ASSERT(mutex_owned(SD_MUTEX(un)));
15095 	xp = SD_GET_XBUF(bp);
15096 	ASSERT(xp != NULL);
15097 	ASSERT(errcode != 0);
15098 
15099 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15100 	    "sd_return_failed_command_no_restart: entry\n");
15101 
15102 	/*
15103 	 * b_resid could already be nonzero due to a partial data
15104 	 * transfer, so do not change it here.
15105 	 */
15106 	SD_BIOERROR(bp, errcode);
15107 
15108 	/*
15109 	 * If this is the failfast bp, clear it. This can happen if the
15110 	 * failfast bp encounterd a fatal error when we attempted to
15111 	 * re-try it (such as a scsi_transport(9F) failure).  However
15112 	 * we should NOT be in an active failfast state if the failfast
15113 	 * bp is not NULL.
15114 	 */
15115 	if (bp == un->un_failfast_bp) {
15116 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
15117 		un->un_failfast_bp = NULL;
15118 	}
15119 
15120 	if (bp == un->un_retry_bp) {
15121 		/*
15122 		 * This command was retried one or more times. Show that we are
15123 		 * done with it, and allow processing of the waitq to resume.
15124 		 */
15125 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15126 		    "sd_return_failed_command_no_restart: "
15127 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15128 		un->un_retry_bp = NULL;
15129 		un->un_retry_statp = NULL;
15130 	}
15131 
15132 	SD_UPDATE_RDWR_STATS(un, bp);
15133 	SD_UPDATE_PARTITION_STATS(un, bp);
15134 
15135 	mutex_exit(SD_MUTEX(un));
15136 
15137 	if (xp->xb_pktp != NULL) {
15138 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15139 		xp->xb_pktp = NULL;
15140 	}
15141 
15142 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15143 
15144 	mutex_enter(SD_MUTEX(un));
15145 
15146 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15147 	    "sd_return_failed_command_no_restart: exit\n");
15148 }
15149 
15150 
15151 /*
15152  *    Function: sd_retry_command
15153  *
15154  * Description: queue up a command for retry, or (optionally) fail it
15155  *		if retry counts are exhausted.
15156  *
15157  *   Arguments: un - Pointer to the sd_lun struct for the target.
15158  *
15159  *		bp - Pointer to the buf for the command to be retried.
15160  *
15161  *		retry_check_flag - Flag to see which (if any) of the retry
15162  *		   counts should be decremented/checked. If the indicated
15163  *		   retry count is exhausted, then the command will not be
15164  *		   retried; it will be failed instead. This should use a
15165  *		   value equal to one of the following:
15166  *
15167  *			SD_RETRIES_NOCHECK
15168  *			SD_RESD_RETRIES_STANDARD
15169  *			SD_RETRIES_VICTIM
15170  *
15171  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
15172  *		   if the check should be made to see of FLAG_ISOLATE is set
15173  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
15174  *		   not retried, it is simply failed.
15175  *
15176  *		user_funcp - Ptr to function to call before dispatching the
15177  *		   command. May be NULL if no action needs to be performed.
15178  *		   (Primarily intended for printing messages.)
15179  *
15180  *		user_arg - Optional argument to be passed along to
15181  *		   the user_funcp call.
15182  *
15183  *		failure_code - errno return code to set in the bp if the
15184  *		   command is going to be failed.
15185  *
15186  *		retry_delay - Retry delay interval in (clock_t) units. May
15187  *		   be zero which indicates that the retry should be retried
15188  *		   immediately (ie, without an intervening delay).
15189  *
15190  *		statp - Ptr to kstat function to be updated if the command
15191  *		   is queued for a delayed retry. May be NULL if no kstat
15192  *		   update is desired.
15193  *
15194  *     Context: May be called from interupt context.
15195  */
15196 
15197 static void
15198 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
15199 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
15200 	code), void *user_arg, int failure_code,  clock_t retry_delay,
15201 	void (*statp)(kstat_io_t *))
15202 {
15203 	struct sd_xbuf	*xp;
15204 	struct scsi_pkt	*pktp;
15205 
15206 	ASSERT(un != NULL);
15207 	ASSERT(mutex_owned(SD_MUTEX(un)));
15208 	ASSERT(bp != NULL);
15209 	xp = SD_GET_XBUF(bp);
15210 	ASSERT(xp != NULL);
15211 	pktp = SD_GET_PKTP(bp);
15212 	ASSERT(pktp != NULL);
15213 
15214 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15215 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
15216 
15217 	/*
15218 	 * If we are syncing or dumping, fail the command to avoid
15219 	 * recursively calling back into scsi_transport().
15220 	 */
15221 	if (ddi_in_panic()) {
15222 		goto fail_command_no_log;
15223 	}
15224 
15225 	/*
15226 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
15227 	 * log an error and fail the command.
15228 	 */
15229 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
15230 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15231 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
15232 		sd_dump_memory(un, SD_LOG_IO, "CDB",
15233 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15234 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15235 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15236 		goto fail_command;
15237 	}
15238 
15239 	/*
15240 	 * If we are suspended, then put the command onto head of the
15241 	 * wait queue since we don't want to start more commands.
15242 	 */
15243 	switch (un->un_state) {
15244 	case SD_STATE_SUSPENDED:
15245 	case SD_STATE_DUMPING:
15246 		bp->av_forw = un->un_waitq_headp;
15247 		un->un_waitq_headp = bp;
15248 		if (un->un_waitq_tailp == NULL) {
15249 			un->un_waitq_tailp = bp;
15250 		}
15251 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
15252 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
15253 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
15254 		return;
15255 	default:
15256 		break;
15257 	}
15258 
15259 	/*
15260 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
15261 	 * is set; if it is then we do not want to retry the command.
15262 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
15263 	 */
15264 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
15265 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
15266 			goto fail_command;
15267 		}
15268 	}
15269 
15270 
15271 	/*
15272 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
15273 	 * command timeout or a selection timeout has occurred. This means
15274 	 * that we were unable to establish an kind of communication with
15275 	 * the target, and subsequent retries and/or commands are likely
15276 	 * to encounter similar results and take a long time to complete.
15277 	 *
15278 	 * If this is a failfast error condition, we need to update the
15279 	 * failfast state, even if this bp does not have B_FAILFAST set.
15280 	 */
15281 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
15282 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
15283 			ASSERT(un->un_failfast_bp == NULL);
15284 			/*
15285 			 * If we are already in the active failfast state, and
15286 			 * another failfast error condition has been detected,
15287 			 * then fail this command if it has B_FAILFAST set.
15288 			 * If B_FAILFAST is clear, then maintain the legacy
15289 			 * behavior of retrying heroically, even tho this will
15290 			 * take a lot more time to fail the command.
15291 			 */
15292 			if (bp->b_flags & B_FAILFAST) {
15293 				goto fail_command;
15294 			}
15295 		} else {
15296 			/*
15297 			 * We're not in the active failfast state, but we
15298 			 * have a failfast error condition, so we must begin
15299 			 * transition to the next state. We do this regardless
15300 			 * of whether or not this bp has B_FAILFAST set.
15301 			 */
15302 			if (un->un_failfast_bp == NULL) {
15303 				/*
15304 				 * This is the first bp to meet a failfast
15305 				 * condition so save it on un_failfast_bp &
15306 				 * do normal retry processing. Do not enter
15307 				 * active failfast state yet. This marks
15308 				 * entry into the "failfast pending" state.
15309 				 */
15310 				un->un_failfast_bp = bp;
15311 
15312 			} else if (un->un_failfast_bp == bp) {
15313 				/*
15314 				 * This is the second time *this* bp has
15315 				 * encountered a failfast error condition,
15316 				 * so enter active failfast state & flush
15317 				 * queues as appropriate.
15318 				 */
15319 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
15320 				un->un_failfast_bp = NULL;
15321 				sd_failfast_flushq(un);
15322 
15323 				/*
15324 				 * Fail this bp now if B_FAILFAST set;
15325 				 * otherwise continue with retries. (It would
15326 				 * be pretty ironic if this bp succeeded on a
15327 				 * subsequent retry after we just flushed all
15328 				 * the queues).
15329 				 */
15330 				if (bp->b_flags & B_FAILFAST) {
15331 					goto fail_command;
15332 				}
15333 
15334 #if !defined(lint) && !defined(__lint)
15335 			} else {
15336 				/*
15337 				 * If neither of the preceeding conditionals
15338 				 * was true, it means that there is some
15339 				 * *other* bp that has met an inital failfast
15340 				 * condition and is currently either being
15341 				 * retried or is waiting to be retried. In
15342 				 * that case we should perform normal retry
15343 				 * processing on *this* bp, since there is a
15344 				 * chance that the current failfast condition
15345 				 * is transient and recoverable. If that does
15346 				 * not turn out to be the case, then retries
15347 				 * will be cleared when the wait queue is
15348 				 * flushed anyway.
15349 				 */
15350 #endif
15351 			}
15352 		}
15353 	} else {
15354 		/*
15355 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
15356 		 * likely were able to at least establish some level of
15357 		 * communication with the target and subsequent commands
15358 		 * and/or retries are likely to get through to the target,
15359 		 * In this case we want to be aggressive about clearing
15360 		 * the failfast state. Note that this does not affect
15361 		 * the "failfast pending" condition.
15362 		 */
15363 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15364 	}
15365 
15366 
15367 	/*
15368 	 * Check the specified retry count to see if we can still do
15369 	 * any retries with this pkt before we should fail it.
15370 	 */
15371 	switch (retry_check_flag & SD_RETRIES_MASK) {
15372 	case SD_RETRIES_VICTIM:
15373 		/*
15374 		 * Check the victim retry count. If exhausted, then fall
15375 		 * thru & check against the standard retry count.
15376 		 */
15377 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
15378 			/* Increment count & proceed with the retry */
15379 			xp->xb_victim_retry_count++;
15380 			break;
15381 		}
15382 		/* Victim retries exhausted, fall back to std. retries... */
15383 		/* FALLTHRU */
15384 
15385 	case SD_RETRIES_STANDARD:
15386 		if (xp->xb_retry_count >= un->un_retry_count) {
15387 			/* Retries exhausted, fail the command */
15388 			SD_TRACE(SD_LOG_IO_CORE, un,
15389 			    "sd_retry_command: retries exhausted!\n");
15390 			/*
15391 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
15392 			 * commands with nonzero pkt_resid.
15393 			 */
15394 			if ((pktp->pkt_reason == CMD_CMPLT) &&
15395 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
15396 			    (pktp->pkt_resid != 0)) {
15397 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
15398 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
15399 					SD_UPDATE_B_RESID(bp, pktp);
15400 				}
15401 			}
15402 			goto fail_command;
15403 		}
15404 		xp->xb_retry_count++;
15405 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15406 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15407 		break;
15408 
15409 	case SD_RETRIES_UA:
15410 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
15411 			/* Retries exhausted, fail the command */
15412 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15413 			    "Unit Attention retries exhausted. "
15414 			    "Check the target.\n");
15415 			goto fail_command;
15416 		}
15417 		xp->xb_ua_retry_count++;
15418 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15419 		    "sd_retry_command: retry count:%d\n",
15420 			xp->xb_ua_retry_count);
15421 		break;
15422 
15423 	case SD_RETRIES_BUSY:
15424 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
15425 			/* Retries exhausted, fail the command */
15426 			SD_TRACE(SD_LOG_IO_CORE, un,
15427 			    "sd_retry_command: retries exhausted!\n");
15428 			goto fail_command;
15429 		}
15430 		xp->xb_retry_count++;
15431 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15432 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15433 		break;
15434 
15435 	case SD_RETRIES_NOCHECK:
15436 	default:
15437 		/* No retry count to check. Just proceed with the retry */
15438 		break;
15439 	}
15440 
15441 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15442 
15443 	/*
15444 	 * If we were given a zero timeout, we must attempt to retry the
15445 	 * command immediately (ie, without a delay).
15446 	 */
15447 	if (retry_delay == 0) {
15448 		/*
15449 		 * Check some limiting conditions to see if we can actually
15450 		 * do the immediate retry.  If we cannot, then we must
15451 		 * fall back to queueing up a delayed retry.
15452 		 */
15453 		if (un->un_ncmds_in_transport >= un->un_throttle) {
15454 			/*
15455 			 * We are at the throttle limit for the target,
15456 			 * fall back to delayed retry.
15457 			 */
15458 			retry_delay = SD_BSY_TIMEOUT;
15459 			statp = kstat_waitq_enter;
15460 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15461 			    "sd_retry_command: immed. retry hit "
15462 			    "throttle!\n");
15463 		} else {
15464 			/*
15465 			 * We're clear to proceed with the immediate retry.
15466 			 * First call the user-provided function (if any)
15467 			 */
15468 			if (user_funcp != NULL) {
15469 				(*user_funcp)(un, bp, user_arg,
15470 				    SD_IMMEDIATE_RETRY_ISSUED);
15471 #ifdef __lock_lint
15472 				sd_print_incomplete_msg(un, bp, user_arg,
15473 				    SD_IMMEDIATE_RETRY_ISSUED);
15474 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
15475 				    SD_IMMEDIATE_RETRY_ISSUED);
15476 				sd_print_sense_failed_msg(un, bp, user_arg,
15477 				    SD_IMMEDIATE_RETRY_ISSUED);
15478 #endif
15479 			}
15480 
15481 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15482 			    "sd_retry_command: issuing immediate retry\n");
15483 
15484 			/*
15485 			 * Call sd_start_cmds() to transport the command to
15486 			 * the target.
15487 			 */
15488 			sd_start_cmds(un, bp);
15489 
15490 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15491 			    "sd_retry_command exit\n");
15492 			return;
15493 		}
15494 	}
15495 
15496 	/*
15497 	 * Set up to retry the command after a delay.
15498 	 * First call the user-provided function (if any)
15499 	 */
15500 	if (user_funcp != NULL) {
15501 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
15502 	}
15503 
15504 	sd_set_retry_bp(un, bp, retry_delay, statp);
15505 
15506 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15507 	return;
15508 
15509 fail_command:
15510 
15511 	if (user_funcp != NULL) {
15512 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
15513 	}
15514 
15515 fail_command_no_log:
15516 
15517 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15518 	    "sd_retry_command: returning failed command\n");
15519 
15520 	sd_return_failed_command(un, bp, failure_code);
15521 
15522 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15523 }
15524 
15525 
15526 /*
15527  *    Function: sd_set_retry_bp
15528  *
15529  * Description: Set up the given bp for retry.
15530  *
15531  *   Arguments: un - ptr to associated softstate
15532  *		bp - ptr to buf(9S) for the command
15533  *		retry_delay - time interval before issuing retry (may be 0)
15534  *		statp - optional pointer to kstat function
15535  *
15536  *     Context: May be called under interrupt context
15537  */
15538 
15539 static void
15540 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
15541 	void (*statp)(kstat_io_t *))
15542 {
15543 	ASSERT(un != NULL);
15544 	ASSERT(mutex_owned(SD_MUTEX(un)));
15545 	ASSERT(bp != NULL);
15546 
15547 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15548 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
15549 
15550 	/*
15551 	 * Indicate that the command is being retried. This will not allow any
15552 	 * other commands on the wait queue to be transported to the target
15553 	 * until this command has been completed (success or failure). The
15554 	 * "retry command" is not transported to the target until the given
15555 	 * time delay expires, unless the user specified a 0 retry_delay.
15556 	 *
15557 	 * Note: the timeout(9F) callback routine is what actually calls
15558 	 * sd_start_cmds() to transport the command, with the exception of a
15559 	 * zero retry_delay. The only current implementor of a zero retry delay
15560 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
15561 	 */
15562 	if (un->un_retry_bp == NULL) {
15563 		ASSERT(un->un_retry_statp == NULL);
15564 		un->un_retry_bp = bp;
15565 
15566 		/*
15567 		 * If the user has not specified a delay the command should
15568 		 * be queued and no timeout should be scheduled.
15569 		 */
15570 		if (retry_delay == 0) {
15571 			/*
15572 			 * Save the kstat pointer that will be used in the
15573 			 * call to SD_UPDATE_KSTATS() below, so that
15574 			 * sd_start_cmds() can correctly decrement the waitq
15575 			 * count when it is time to transport this command.
15576 			 */
15577 			un->un_retry_statp = statp;
15578 			goto done;
15579 		}
15580 	}
15581 
15582 	if (un->un_retry_bp == bp) {
15583 		/*
15584 		 * Save the kstat pointer that will be used in the call to
15585 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
15586 		 * correctly decrement the waitq count when it is time to
15587 		 * transport this command.
15588 		 */
15589 		un->un_retry_statp = statp;
15590 
15591 		/*
15592 		 * Schedule a timeout if:
15593 		 *   1) The user has specified a delay.
15594 		 *   2) There is not a START_STOP_UNIT callback pending.
15595 		 *
15596 		 * If no delay has been specified, then it is up to the caller
15597 		 * to ensure that IO processing continues without stalling.
15598 		 * Effectively, this means that the caller will issue the
15599 		 * required call to sd_start_cmds(). The START_STOP_UNIT
15600 		 * callback does this after the START STOP UNIT command has
15601 		 * completed. In either of these cases we should not schedule
15602 		 * a timeout callback here.  Also don't schedule the timeout if
15603 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
15604 		 */
15605 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
15606 		    (un->un_direct_priority_timeid == NULL)) {
15607 			un->un_retry_timeid =
15608 			    timeout(sd_start_retry_command, un, retry_delay);
15609 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15610 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
15611 			    " bp:0x%p un_retry_timeid:0x%p\n",
15612 			    un, bp, un->un_retry_timeid);
15613 		}
15614 	} else {
15615 		/*
15616 		 * We only get in here if there is already another command
15617 		 * waiting to be retried.  In this case, we just put the
15618 		 * given command onto the wait queue, so it can be transported
15619 		 * after the current retry command has completed.
15620 		 *
15621 		 * Also we have to make sure that if the command at the head
15622 		 * of the wait queue is the un_failfast_bp, that we do not
15623 		 * put ahead of it any other commands that are to be retried.
15624 		 */
15625 		if ((un->un_failfast_bp != NULL) &&
15626 		    (un->un_failfast_bp == un->un_waitq_headp)) {
15627 			/*
15628 			 * Enqueue this command AFTER the first command on
15629 			 * the wait queue (which is also un_failfast_bp).
15630 			 */
15631 			bp->av_forw = un->un_waitq_headp->av_forw;
15632 			un->un_waitq_headp->av_forw = bp;
15633 			if (un->un_waitq_headp == un->un_waitq_tailp) {
15634 				un->un_waitq_tailp = bp;
15635 			}
15636 		} else {
15637 			/* Enqueue this command at the head of the waitq. */
15638 			bp->av_forw = un->un_waitq_headp;
15639 			un->un_waitq_headp = bp;
15640 			if (un->un_waitq_tailp == NULL) {
15641 				un->un_waitq_tailp = bp;
15642 			}
15643 		}
15644 
15645 		if (statp == NULL) {
15646 			statp = kstat_waitq_enter;
15647 		}
15648 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15649 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
15650 	}
15651 
15652 done:
15653 	if (statp != NULL) {
15654 		SD_UPDATE_KSTATS(un, statp, bp);
15655 	}
15656 
15657 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15658 	    "sd_set_retry_bp: exit un:0x%p\n", un);
15659 }
15660 
15661 
15662 /*
15663  *    Function: sd_start_retry_command
15664  *
15665  * Description: Start the command that has been waiting on the target's
15666  *		retry queue.  Called from timeout(9F) context after the
15667  *		retry delay interval has expired.
15668  *
15669  *   Arguments: arg - pointer to associated softstate for the device.
15670  *
15671  *     Context: timeout(9F) thread context.  May not sleep.
15672  */
15673 
15674 static void
15675 sd_start_retry_command(void *arg)
15676 {
15677 	struct sd_lun *un = arg;
15678 
15679 	ASSERT(un != NULL);
15680 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15681 
15682 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15683 	    "sd_start_retry_command: entry\n");
15684 
15685 	mutex_enter(SD_MUTEX(un));
15686 
15687 	un->un_retry_timeid = NULL;
15688 
15689 	if (un->un_retry_bp != NULL) {
15690 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15691 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
15692 		    un, un->un_retry_bp);
15693 		sd_start_cmds(un, un->un_retry_bp);
15694 	}
15695 
15696 	mutex_exit(SD_MUTEX(un));
15697 
15698 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15699 	    "sd_start_retry_command: exit\n");
15700 }
15701 
15702 
15703 /*
15704  *    Function: sd_start_direct_priority_command
15705  *
15706  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
15707  *		received TRAN_BUSY when we called scsi_transport() to send it
15708  *		to the underlying HBA. This function is called from timeout(9F)
15709  *		context after the delay interval has expired.
15710  *
15711  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
15712  *
15713  *     Context: timeout(9F) thread context.  May not sleep.
15714  */
15715 
15716 static void
15717 sd_start_direct_priority_command(void *arg)
15718 {
15719 	struct buf	*priority_bp = arg;
15720 	struct sd_lun	*un;
15721 
15722 	ASSERT(priority_bp != NULL);
15723 	un = SD_GET_UN(priority_bp);
15724 	ASSERT(un != NULL);
15725 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15726 
15727 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15728 	    "sd_start_direct_priority_command: entry\n");
15729 
15730 	mutex_enter(SD_MUTEX(un));
15731 	un->un_direct_priority_timeid = NULL;
15732 	sd_start_cmds(un, priority_bp);
15733 	mutex_exit(SD_MUTEX(un));
15734 
15735 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15736 	    "sd_start_direct_priority_command: exit\n");
15737 }
15738 
15739 
15740 /*
15741  *    Function: sd_send_request_sense_command
15742  *
15743  * Description: Sends a REQUEST SENSE command to the target
15744  *
15745  *     Context: May be called from interrupt context.
15746  */
15747 
15748 static void
15749 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
15750 	struct scsi_pkt *pktp)
15751 {
15752 	ASSERT(bp != NULL);
15753 	ASSERT(un != NULL);
15754 	ASSERT(mutex_owned(SD_MUTEX(un)));
15755 
15756 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
15757 	    "entry: buf:0x%p\n", bp);
15758 
15759 	/*
15760 	 * If we are syncing or dumping, then fail the command to avoid a
15761 	 * recursive callback into scsi_transport(). Also fail the command
15762 	 * if we are suspended (legacy behavior).
15763 	 */
15764 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
15765 	    (un->un_state == SD_STATE_DUMPING)) {
15766 		sd_return_failed_command(un, bp, EIO);
15767 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15768 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
15769 		return;
15770 	}
15771 
15772 	/*
15773 	 * Retry the failed command and don't issue the request sense if:
15774 	 *    1) the sense buf is busy
15775 	 *    2) we have 1 or more outstanding commands on the target
15776 	 *    (the sense data will be cleared or invalidated any way)
15777 	 *
15778 	 * Note: There could be an issue with not checking a retry limit here,
15779 	 * the problem is determining which retry limit to check.
15780 	 */
15781 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
15782 		/* Don't retry if the command is flagged as non-retryable */
15783 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15784 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15785 			    NULL, NULL, 0, SD_BSY_TIMEOUT, kstat_waitq_enter);
15786 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15787 			    "sd_send_request_sense_command: "
15788 			    "at full throttle, retrying exit\n");
15789 		} else {
15790 			sd_return_failed_command(un, bp, EIO);
15791 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15792 			    "sd_send_request_sense_command: "
15793 			    "at full throttle, non-retryable exit\n");
15794 		}
15795 		return;
15796 	}
15797 
15798 	sd_mark_rqs_busy(un, bp);
15799 	sd_start_cmds(un, un->un_rqs_bp);
15800 
15801 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15802 	    "sd_send_request_sense_command: exit\n");
15803 }
15804 
15805 
15806 /*
15807  *    Function: sd_mark_rqs_busy
15808  *
15809  * Description: Indicate that the request sense bp for this instance is
15810  *		in use.
15811  *
15812  *     Context: May be called under interrupt context
15813  */
15814 
15815 static void
15816 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
15817 {
15818 	struct sd_xbuf	*sense_xp;
15819 
15820 	ASSERT(un != NULL);
15821 	ASSERT(bp != NULL);
15822 	ASSERT(mutex_owned(SD_MUTEX(un)));
15823 	ASSERT(un->un_sense_isbusy == 0);
15824 
15825 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
15826 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
15827 
15828 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
15829 	ASSERT(sense_xp != NULL);
15830 
15831 	SD_INFO(SD_LOG_IO, un,
15832 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
15833 
15834 	ASSERT(sense_xp->xb_pktp != NULL);
15835 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
15836 	    == (FLAG_SENSING | FLAG_HEAD));
15837 
15838 	un->un_sense_isbusy = 1;
15839 	un->un_rqs_bp->b_resid = 0;
15840 	sense_xp->xb_pktp->pkt_resid  = 0;
15841 	sense_xp->xb_pktp->pkt_reason = 0;
15842 
15843 	/* So we can get back the bp at interrupt time! */
15844 	sense_xp->xb_sense_bp = bp;
15845 
15846 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
15847 
15848 	/*
15849 	 * Mark this buf as awaiting sense data. (This is already set in
15850 	 * the pkt_flags for the RQS packet.)
15851 	 */
15852 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
15853 
15854 	sense_xp->xb_retry_count	= 0;
15855 	sense_xp->xb_victim_retry_count = 0;
15856 	sense_xp->xb_ua_retry_count	= 0;
15857 	sense_xp->xb_dma_resid  = 0;
15858 
15859 	/* Clean up the fields for auto-request sense */
15860 	sense_xp->xb_sense_status = 0;
15861 	sense_xp->xb_sense_state  = 0;
15862 	sense_xp->xb_sense_resid  = 0;
15863 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
15864 
15865 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
15866 }
15867 
15868 
15869 /*
15870  *    Function: sd_mark_rqs_idle
15871  *
15872  * Description: SD_MUTEX must be held continuously through this routine
15873  *		to prevent reuse of the rqs struct before the caller can
15874  *		complete it's processing.
15875  *
15876  * Return Code: Pointer to the RQS buf
15877  *
15878  *     Context: May be called under interrupt context
15879  */
15880 
15881 static struct buf *
15882 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
15883 {
15884 	struct buf *bp;
15885 	ASSERT(un != NULL);
15886 	ASSERT(sense_xp != NULL);
15887 	ASSERT(mutex_owned(SD_MUTEX(un)));
15888 	ASSERT(un->un_sense_isbusy != 0);
15889 
15890 	un->un_sense_isbusy = 0;
15891 	bp = sense_xp->xb_sense_bp;
15892 	sense_xp->xb_sense_bp = NULL;
15893 
15894 	/* This pkt is no longer interested in getting sense data */
15895 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
15896 
15897 	return (bp);
15898 }
15899 
15900 
15901 
15902 /*
15903  *    Function: sd_alloc_rqs
15904  *
15905  * Description: Set up the unit to receive auto request sense data
15906  *
15907  * Return Code: DDI_SUCCESS or DDI_FAILURE
15908  *
15909  *     Context: Called under attach(9E) context
15910  */
15911 
15912 static int
15913 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
15914 {
15915 	struct sd_xbuf *xp;
15916 
15917 	ASSERT(un != NULL);
15918 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15919 	ASSERT(un->un_rqs_bp == NULL);
15920 	ASSERT(un->un_rqs_pktp == NULL);
15921 
15922 	/*
15923 	 * First allocate the required buf and scsi_pkt structs, then set up
15924 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
15925 	 */
15926 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
15927 	    SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
15928 	if (un->un_rqs_bp == NULL) {
15929 		return (DDI_FAILURE);
15930 	}
15931 
15932 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
15933 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
15934 
15935 	if (un->un_rqs_pktp == NULL) {
15936 		sd_free_rqs(un);
15937 		return (DDI_FAILURE);
15938 	}
15939 
15940 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
15941 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
15942 	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
15943 
15944 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
15945 
15946 	/* Set up the other needed members in the ARQ scsi_pkt. */
15947 	un->un_rqs_pktp->pkt_comp   = sdintr;
15948 	un->un_rqs_pktp->pkt_time   = sd_io_time;
15949 	un->un_rqs_pktp->pkt_flags |=
15950 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
15951 
15952 	/*
15953 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
15954 	 * provide any intpkt, destroypkt routines as we take care of
15955 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
15956 	 */
15957 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
15958 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
15959 	xp->xb_pktp = un->un_rqs_pktp;
15960 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
15961 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
15962 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
15963 
15964 	/*
15965 	 * Save the pointer to the request sense private bp so it can
15966 	 * be retrieved in sdintr.
15967 	 */
15968 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
15969 	ASSERT(un->un_rqs_bp->b_private == xp);
15970 
15971 	/*
15972 	 * See if the HBA supports auto-request sense for the specified
15973 	 * target/lun. If it does, then try to enable it (if not already
15974 	 * enabled).
15975 	 *
15976 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
15977 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
15978 	 * return success.  However, in both of these cases ARQ is always
15979 	 * enabled and scsi_ifgetcap will always return true. The best approach
15980 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
15981 	 *
15982 	 * The 3rd case is the HBA (adp) always return enabled on
15983 	 * scsi_ifgetgetcap even when it's not enable, the best approach
15984 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
15985 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
15986 	 */
15987 
15988 	if (un->un_f_is_fibre == TRUE) {
15989 		un->un_f_arq_enabled = TRUE;
15990 	} else {
15991 #if defined(__i386) || defined(__amd64)
15992 		/*
15993 		 * Circumvent the Adaptec bug, remove this code when
15994 		 * the bug is fixed
15995 		 */
15996 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
15997 #endif
15998 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
15999 		case 0:
16000 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16001 				"sd_alloc_rqs: HBA supports ARQ\n");
16002 			/*
16003 			 * ARQ is supported by this HBA but currently is not
16004 			 * enabled. Attempt to enable it and if successful then
16005 			 * mark this instance as ARQ enabled.
16006 			 */
16007 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
16008 				== 1) {
16009 				/* Successfully enabled ARQ in the HBA */
16010 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16011 					"sd_alloc_rqs: ARQ enabled\n");
16012 				un->un_f_arq_enabled = TRUE;
16013 			} else {
16014 				/* Could not enable ARQ in the HBA */
16015 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16016 				"sd_alloc_rqs: failed ARQ enable\n");
16017 				un->un_f_arq_enabled = FALSE;
16018 			}
16019 			break;
16020 		case 1:
16021 			/*
16022 			 * ARQ is supported by this HBA and is already enabled.
16023 			 * Just mark ARQ as enabled for this instance.
16024 			 */
16025 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16026 				"sd_alloc_rqs: ARQ already enabled\n");
16027 			un->un_f_arq_enabled = TRUE;
16028 			break;
16029 		default:
16030 			/*
16031 			 * ARQ is not supported by this HBA; disable it for this
16032 			 * instance.
16033 			 */
16034 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16035 				"sd_alloc_rqs: HBA does not support ARQ\n");
16036 			un->un_f_arq_enabled = FALSE;
16037 			break;
16038 		}
16039 	}
16040 
16041 	return (DDI_SUCCESS);
16042 }
16043 
16044 
16045 /*
16046  *    Function: sd_free_rqs
16047  *
16048  * Description: Cleanup for the pre-instance RQS command.
16049  *
16050  *     Context: Kernel thread context
16051  */
16052 
16053 static void
16054 sd_free_rqs(struct sd_lun *un)
16055 {
16056 	ASSERT(un != NULL);
16057 
16058 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
16059 
16060 	/*
16061 	 * If consistent memory is bound to a scsi_pkt, the pkt
16062 	 * has to be destroyed *before* freeing the consistent memory.
16063 	 * Don't change the sequence of this operations.
16064 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
16065 	 * after it was freed in scsi_free_consistent_buf().
16066 	 */
16067 	if (un->un_rqs_pktp != NULL) {
16068 		scsi_destroy_pkt(un->un_rqs_pktp);
16069 		un->un_rqs_pktp = NULL;
16070 	}
16071 
16072 	if (un->un_rqs_bp != NULL) {
16073 		kmem_free(SD_GET_XBUF(un->un_rqs_bp), sizeof (struct sd_xbuf));
16074 		scsi_free_consistent_buf(un->un_rqs_bp);
16075 		un->un_rqs_bp = NULL;
16076 	}
16077 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
16078 }
16079 
16080 
16081 
16082 /*
16083  *    Function: sd_reduce_throttle
16084  *
16085  * Description: Reduces the maximun # of outstanding commands on a
16086  *		target to the current number of outstanding commands.
16087  *		Queues a tiemout(9F) callback to restore the limit
16088  *		after a specified interval has elapsed.
16089  *		Typically used when we get a TRAN_BUSY return code
16090  *		back from scsi_transport().
16091  *
16092  *   Arguments: un - ptr to the sd_lun softstate struct
16093  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
16094  *
16095  *     Context: May be called from interrupt context
16096  */
16097 
16098 static void
16099 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
16100 {
16101 	ASSERT(un != NULL);
16102 	ASSERT(mutex_owned(SD_MUTEX(un)));
16103 	ASSERT(un->un_ncmds_in_transport >= 0);
16104 
16105 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16106 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
16107 	    un, un->un_throttle, un->un_ncmds_in_transport);
16108 
16109 	if (un->un_throttle > 1) {
16110 		if (un->un_f_use_adaptive_throttle == TRUE) {
16111 			switch (throttle_type) {
16112 			case SD_THROTTLE_TRAN_BUSY:
16113 				if (un->un_busy_throttle == 0) {
16114 					un->un_busy_throttle = un->un_throttle;
16115 				}
16116 				break;
16117 			case SD_THROTTLE_QFULL:
16118 				un->un_busy_throttle = 0;
16119 				break;
16120 			default:
16121 				ASSERT(FALSE);
16122 			}
16123 
16124 			if (un->un_ncmds_in_transport > 0) {
16125 			    un->un_throttle = un->un_ncmds_in_transport;
16126 			}
16127 
16128 		} else {
16129 			if (un->un_ncmds_in_transport == 0) {
16130 				un->un_throttle = 1;
16131 			} else {
16132 				un->un_throttle = un->un_ncmds_in_transport;
16133 			}
16134 		}
16135 	}
16136 
16137 	/* Reschedule the timeout if none is currently active */
16138 	if (un->un_reset_throttle_timeid == NULL) {
16139 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
16140 		    un, SD_THROTTLE_RESET_INTERVAL);
16141 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16142 		    "sd_reduce_throttle: timeout scheduled!\n");
16143 	}
16144 
16145 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16146 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16147 }
16148 
16149 
16150 
16151 /*
16152  *    Function: sd_restore_throttle
16153  *
16154  * Description: Callback function for timeout(9F).  Resets the current
16155  *		value of un->un_throttle to its default.
16156  *
16157  *   Arguments: arg - pointer to associated softstate for the device.
16158  *
16159  *     Context: May be called from interrupt context
16160  */
16161 
16162 static void
16163 sd_restore_throttle(void *arg)
16164 {
16165 	struct sd_lun	*un = arg;
16166 
16167 	ASSERT(un != NULL);
16168 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16169 
16170 	mutex_enter(SD_MUTEX(un));
16171 
16172 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16173 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16174 
16175 	un->un_reset_throttle_timeid = NULL;
16176 
16177 	if (un->un_f_use_adaptive_throttle == TRUE) {
16178 		/*
16179 		 * If un_busy_throttle is nonzero, then it contains the
16180 		 * value that un_throttle was when we got a TRAN_BUSY back
16181 		 * from scsi_transport(). We want to revert back to this
16182 		 * value.
16183 		 *
16184 		 * In the QFULL case, the throttle limit will incrementally
16185 		 * increase until it reaches max throttle.
16186 		 */
16187 		if (un->un_busy_throttle > 0) {
16188 			un->un_throttle = un->un_busy_throttle;
16189 			un->un_busy_throttle = 0;
16190 		} else {
16191 			/*
16192 			 * increase throttle by 10% open gate slowly, schedule
16193 			 * another restore if saved throttle has not been
16194 			 * reached
16195 			 */
16196 			short throttle;
16197 			if (sd_qfull_throttle_enable) {
16198 				throttle = un->un_throttle +
16199 				    max((un->un_throttle / 10), 1);
16200 				un->un_throttle =
16201 				    (throttle < un->un_saved_throttle) ?
16202 				    throttle : un->un_saved_throttle;
16203 				if (un->un_throttle < un->un_saved_throttle) {
16204 				    un->un_reset_throttle_timeid =
16205 					timeout(sd_restore_throttle,
16206 					un, SD_QFULL_THROTTLE_RESET_INTERVAL);
16207 				}
16208 			}
16209 		}
16210 
16211 		/*
16212 		 * If un_throttle has fallen below the low-water mark, we
16213 		 * restore the maximum value here (and allow it to ratchet
16214 		 * down again if necessary).
16215 		 */
16216 		if (un->un_throttle < un->un_min_throttle) {
16217 			un->un_throttle = un->un_saved_throttle;
16218 		}
16219 	} else {
16220 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16221 		    "restoring limit from 0x%x to 0x%x\n",
16222 		    un->un_throttle, un->un_saved_throttle);
16223 		un->un_throttle = un->un_saved_throttle;
16224 	}
16225 
16226 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16227 	    "sd_restore_throttle: calling sd_start_cmds!\n");
16228 
16229 	sd_start_cmds(un, NULL);
16230 
16231 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16232 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
16233 	    un, un->un_throttle);
16234 
16235 	mutex_exit(SD_MUTEX(un));
16236 
16237 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
16238 }
16239 
16240 /*
16241  *    Function: sdrunout
16242  *
16243  * Description: Callback routine for scsi_init_pkt when a resource allocation
16244  *		fails.
16245  *
16246  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
16247  *		soft state instance.
16248  *
16249  * Return Code: The scsi_init_pkt routine allows for the callback function to
16250  *		return a 0 indicating the callback should be rescheduled or a 1
16251  *		indicating not to reschedule. This routine always returns 1
16252  *		because the driver always provides a callback function to
16253  *		scsi_init_pkt. This results in a callback always being scheduled
16254  *		(via the scsi_init_pkt callback implementation) if a resource
16255  *		failure occurs.
16256  *
16257  *     Context: This callback function may not block or call routines that block
16258  *
16259  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
16260  *		request persisting at the head of the list which cannot be
16261  *		satisfied even after multiple retries. In the future the driver
16262  *		may implement some time of maximum runout count before failing
16263  *		an I/O.
16264  */
16265 
16266 static int
16267 sdrunout(caddr_t arg)
16268 {
16269 	struct sd_lun	*un = (struct sd_lun *)arg;
16270 
16271 	ASSERT(un != NULL);
16272 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16273 
16274 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
16275 
16276 	mutex_enter(SD_MUTEX(un));
16277 	sd_start_cmds(un, NULL);
16278 	mutex_exit(SD_MUTEX(un));
16279 	/*
16280 	 * This callback routine always returns 1 (i.e. do not reschedule)
16281 	 * because we always specify sdrunout as the callback handler for
16282 	 * scsi_init_pkt inside the call to sd_start_cmds.
16283 	 */
16284 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
16285 	return (1);
16286 }
16287 
16288 
16289 /*
16290  *    Function: sdintr
16291  *
16292  * Description: Completion callback routine for scsi_pkt(9S) structs
16293  *		sent to the HBA driver via scsi_transport(9F).
16294  *
16295  *     Context: Interrupt context
16296  */
16297 
16298 static void
16299 sdintr(struct scsi_pkt *pktp)
16300 {
16301 	struct buf	*bp;
16302 	struct sd_xbuf	*xp;
16303 	struct sd_lun	*un;
16304 
16305 	ASSERT(pktp != NULL);
16306 	bp = (struct buf *)pktp->pkt_private;
16307 	ASSERT(bp != NULL);
16308 	xp = SD_GET_XBUF(bp);
16309 	ASSERT(xp != NULL);
16310 	ASSERT(xp->xb_pktp != NULL);
16311 	un = SD_GET_UN(bp);
16312 	ASSERT(un != NULL);
16313 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16314 
16315 #ifdef SD_FAULT_INJECTION
16316 
16317 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
16318 	/* SD FaultInjection */
16319 	sd_faultinjection(pktp);
16320 
16321 #endif /* SD_FAULT_INJECTION */
16322 
16323 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
16324 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
16325 
16326 	mutex_enter(SD_MUTEX(un));
16327 
16328 	/* Reduce the count of the #commands currently in transport */
16329 	un->un_ncmds_in_transport--;
16330 	ASSERT(un->un_ncmds_in_transport >= 0);
16331 
16332 	/* Increment counter to indicate that the callback routine is active */
16333 	un->un_in_callback++;
16334 
16335 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
16336 
16337 #ifdef	SDDEBUG
16338 	if (bp == un->un_retry_bp) {
16339 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
16340 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
16341 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
16342 	}
16343 #endif
16344 
16345 	/*
16346 	 * If pkt_reason is CMD_DEV_GONE, just fail the command
16347 	 */
16348 	if (pktp->pkt_reason == CMD_DEV_GONE) {
16349 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16350 			    "Device is gone\n");
16351 		sd_return_failed_command(un, bp, EIO);
16352 		goto exit;
16353 	}
16354 
16355 	/*
16356 	 * First see if the pkt has auto-request sense data with it....
16357 	 * Look at the packet state first so we don't take a performance
16358 	 * hit looking at the arq enabled flag unless absolutely necessary.
16359 	 */
16360 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
16361 	    (un->un_f_arq_enabled == TRUE)) {
16362 		/*
16363 		 * The HBA did an auto request sense for this command so check
16364 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16365 		 * driver command that should not be retried.
16366 		 */
16367 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16368 			/*
16369 			 * Save the relevant sense info into the xp for the
16370 			 * original cmd.
16371 			 */
16372 			struct scsi_arq_status *asp;
16373 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16374 			xp->xb_sense_status =
16375 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
16376 			xp->xb_sense_state  = asp->sts_rqpkt_state;
16377 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16378 			bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16379 			    min(sizeof (struct scsi_extended_sense),
16380 			    SENSE_LENGTH));
16381 
16382 			/* fail the command */
16383 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16384 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
16385 			sd_return_failed_command(un, bp, EIO);
16386 			goto exit;
16387 		}
16388 
16389 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16390 		/*
16391 		 * We want to either retry or fail this command, so free
16392 		 * the DMA resources here.  If we retry the command then
16393 		 * the DMA resources will be reallocated in sd_start_cmds().
16394 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
16395 		 * causes the *entire* transfer to start over again from the
16396 		 * beginning of the request, even for PARTIAL chunks that
16397 		 * have already transferred successfully.
16398 		 */
16399 		if ((un->un_f_is_fibre == TRUE) &&
16400 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16401 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16402 			scsi_dmafree(pktp);
16403 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16404 		}
16405 #endif
16406 
16407 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16408 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
16409 
16410 		sd_handle_auto_request_sense(un, bp, xp, pktp);
16411 		goto exit;
16412 	}
16413 
16414 	/* Next see if this is the REQUEST SENSE pkt for the instance */
16415 	if (pktp->pkt_flags & FLAG_SENSING)  {
16416 		/* This pktp is from the unit's REQUEST_SENSE command */
16417 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16418 		    "sdintr: sd_handle_request_sense\n");
16419 		sd_handle_request_sense(un, bp, xp, pktp);
16420 		goto exit;
16421 	}
16422 
16423 	/*
16424 	 * Check to see if the command successfully completed as requested;
16425 	 * this is the most common case (and also the hot performance path).
16426 	 *
16427 	 * Requirements for successful completion are:
16428 	 * pkt_reason is CMD_CMPLT and packet status is status good.
16429 	 * In addition:
16430 	 * - A residual of zero indicates successful completion no matter what
16431 	 *   the command is.
16432 	 * - If the residual is not zero and the command is not a read or
16433 	 *   write, then it's still defined as successful completion. In other
16434 	 *   words, if the command is a read or write the residual must be
16435 	 *   zero for successful completion.
16436 	 * - If the residual is not zero and the command is a read or
16437 	 *   write, and it's a USCSICMD, then it's still defined as
16438 	 *   successful completion.
16439 	 */
16440 	if ((pktp->pkt_reason == CMD_CMPLT) &&
16441 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
16442 
16443 		/*
16444 		 * Since this command is returned with a good status, we
16445 		 * can reset the count for Sonoma failover.
16446 		 */
16447 		un->un_sonoma_failure_count = 0;
16448 
16449 		/*
16450 		 * Return all USCSI commands on good status
16451 		 */
16452 		if (pktp->pkt_resid == 0) {
16453 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16454 			    "sdintr: returning command for resid == 0\n");
16455 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
16456 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
16457 			SD_UPDATE_B_RESID(bp, pktp);
16458 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16459 			    "sdintr: returning command for resid != 0\n");
16460 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16461 			SD_UPDATE_B_RESID(bp, pktp);
16462 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16463 				"sdintr: returning uscsi command\n");
16464 		} else {
16465 			goto not_successful;
16466 		}
16467 		sd_return_command(un, bp);
16468 
16469 		/*
16470 		 * Decrement counter to indicate that the callback routine
16471 		 * is done.
16472 		 */
16473 		un->un_in_callback--;
16474 		ASSERT(un->un_in_callback >= 0);
16475 		mutex_exit(SD_MUTEX(un));
16476 
16477 		return;
16478 	}
16479 
16480 not_successful:
16481 
16482 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16483 	/*
16484 	 * The following is based upon knowledge of the underlying transport
16485 	 * and its use of DMA resources.  This code should be removed when
16486 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
16487 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
16488 	 * and sd_start_cmds().
16489 	 *
16490 	 * Free any DMA resources associated with this command if there
16491 	 * is a chance it could be retried or enqueued for later retry.
16492 	 * If we keep the DMA binding then mpxio cannot reissue the
16493 	 * command on another path whenever a path failure occurs.
16494 	 *
16495 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
16496 	 * causes the *entire* transfer to start over again from the
16497 	 * beginning of the request, even for PARTIAL chunks that
16498 	 * have already transferred successfully.
16499 	 *
16500 	 * This is only done for non-uscsi commands (and also skipped for the
16501 	 * driver's internal RQS command). Also just do this for Fibre Channel
16502 	 * devices as these are the only ones that support mpxio.
16503 	 */
16504 	if ((un->un_f_is_fibre == TRUE) &&
16505 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16506 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16507 		scsi_dmafree(pktp);
16508 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16509 	}
16510 #endif
16511 
16512 	/*
16513 	 * The command did not successfully complete as requested so check
16514 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16515 	 * driver command that should not be retried so just return. If
16516 	 * FLAG_DIAGNOSE is not set the error will be processed below.
16517 	 */
16518 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16519 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16520 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
16521 		/*
16522 		 * Issue a request sense if a check condition caused the error
16523 		 * (we handle the auto request sense case above), otherwise
16524 		 * just fail the command.
16525 		 */
16526 		if ((pktp->pkt_reason == CMD_CMPLT) &&
16527 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
16528 			sd_send_request_sense_command(un, bp, pktp);
16529 		} else {
16530 			sd_return_failed_command(un, bp, EIO);
16531 		}
16532 		goto exit;
16533 	}
16534 
16535 	/*
16536 	 * The command did not successfully complete as requested so process
16537 	 * the error, retry, and/or attempt recovery.
16538 	 */
16539 	switch (pktp->pkt_reason) {
16540 	case CMD_CMPLT:
16541 		switch (SD_GET_PKT_STATUS(pktp)) {
16542 		case STATUS_GOOD:
16543 			/*
16544 			 * The command completed successfully with a non-zero
16545 			 * residual
16546 			 */
16547 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16548 			    "sdintr: STATUS_GOOD \n");
16549 			sd_pkt_status_good(un, bp, xp, pktp);
16550 			break;
16551 
16552 		case STATUS_CHECK:
16553 		case STATUS_TERMINATED:
16554 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16555 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
16556 			sd_pkt_status_check_condition(un, bp, xp, pktp);
16557 			break;
16558 
16559 		case STATUS_BUSY:
16560 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16561 			    "sdintr: STATUS_BUSY\n");
16562 			sd_pkt_status_busy(un, bp, xp, pktp);
16563 			break;
16564 
16565 		case STATUS_RESERVATION_CONFLICT:
16566 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16567 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
16568 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16569 			break;
16570 
16571 		case STATUS_QFULL:
16572 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16573 			    "sdintr: STATUS_QFULL\n");
16574 			sd_pkt_status_qfull(un, bp, xp, pktp);
16575 			break;
16576 
16577 		case STATUS_MET:
16578 		case STATUS_INTERMEDIATE:
16579 		case STATUS_SCSI2:
16580 		case STATUS_INTERMEDIATE_MET:
16581 		case STATUS_ACA_ACTIVE:
16582 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16583 			    "Unexpected SCSI status received: 0x%x\n",
16584 			    SD_GET_PKT_STATUS(pktp));
16585 			sd_return_failed_command(un, bp, EIO);
16586 			break;
16587 
16588 		default:
16589 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16590 			    "Invalid SCSI status received: 0x%x\n",
16591 			    SD_GET_PKT_STATUS(pktp));
16592 			sd_return_failed_command(un, bp, EIO);
16593 			break;
16594 
16595 		}
16596 		break;
16597 
16598 	case CMD_INCOMPLETE:
16599 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16600 		    "sdintr:  CMD_INCOMPLETE\n");
16601 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
16602 		break;
16603 	case CMD_TRAN_ERR:
16604 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16605 		    "sdintr: CMD_TRAN_ERR\n");
16606 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
16607 		break;
16608 	case CMD_RESET:
16609 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16610 		    "sdintr: CMD_RESET \n");
16611 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
16612 		break;
16613 	case CMD_ABORTED:
16614 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16615 		    "sdintr: CMD_ABORTED \n");
16616 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
16617 		break;
16618 	case CMD_TIMEOUT:
16619 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16620 		    "sdintr: CMD_TIMEOUT\n");
16621 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
16622 		break;
16623 	case CMD_UNX_BUS_FREE:
16624 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16625 		    "sdintr: CMD_UNX_BUS_FREE \n");
16626 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
16627 		break;
16628 	case CMD_TAG_REJECT:
16629 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16630 		    "sdintr: CMD_TAG_REJECT\n");
16631 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
16632 		break;
16633 	default:
16634 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16635 		    "sdintr: default\n");
16636 		sd_pkt_reason_default(un, bp, xp, pktp);
16637 		break;
16638 	}
16639 
16640 exit:
16641 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
16642 
16643 	/* Decrement counter to indicate that the callback routine is done. */
16644 	un->un_in_callback--;
16645 	ASSERT(un->un_in_callback >= 0);
16646 
16647 	/*
16648 	 * At this point, the pkt has been dispatched, ie, it is either
16649 	 * being re-tried or has been returned to its caller and should
16650 	 * not be referenced.
16651 	 */
16652 
16653 	mutex_exit(SD_MUTEX(un));
16654 }
16655 
16656 
16657 /*
16658  *    Function: sd_print_incomplete_msg
16659  *
16660  * Description: Prints the error message for a CMD_INCOMPLETE error.
16661  *
16662  *   Arguments: un - ptr to associated softstate for the device.
16663  *		bp - ptr to the buf(9S) for the command.
16664  *		arg - message string ptr
16665  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
16666  *			or SD_NO_RETRY_ISSUED.
16667  *
16668  *     Context: May be called under interrupt context
16669  */
16670 
16671 static void
16672 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
16673 {
16674 	struct scsi_pkt	*pktp;
16675 	char	*msgp;
16676 	char	*cmdp = arg;
16677 
16678 	ASSERT(un != NULL);
16679 	ASSERT(mutex_owned(SD_MUTEX(un)));
16680 	ASSERT(bp != NULL);
16681 	ASSERT(arg != NULL);
16682 	pktp = SD_GET_PKTP(bp);
16683 	ASSERT(pktp != NULL);
16684 
16685 	switch (code) {
16686 	case SD_DELAYED_RETRY_ISSUED:
16687 	case SD_IMMEDIATE_RETRY_ISSUED:
16688 		msgp = "retrying";
16689 		break;
16690 	case SD_NO_RETRY_ISSUED:
16691 	default:
16692 		msgp = "giving up";
16693 		break;
16694 	}
16695 
16696 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16697 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16698 		    "incomplete %s- %s\n", cmdp, msgp);
16699 	}
16700 }
16701 
16702 
16703 
16704 /*
16705  *    Function: sd_pkt_status_good
16706  *
16707  * Description: Processing for a STATUS_GOOD code in pkt_status.
16708  *
16709  *     Context: May be called under interrupt context
16710  */
16711 
16712 static void
16713 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
16714 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16715 {
16716 	char	*cmdp;
16717 
16718 	ASSERT(un != NULL);
16719 	ASSERT(mutex_owned(SD_MUTEX(un)));
16720 	ASSERT(bp != NULL);
16721 	ASSERT(xp != NULL);
16722 	ASSERT(pktp != NULL);
16723 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
16724 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
16725 	ASSERT(pktp->pkt_resid != 0);
16726 
16727 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
16728 
16729 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16730 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
16731 	case SCMD_READ:
16732 		cmdp = "read";
16733 		break;
16734 	case SCMD_WRITE:
16735 		cmdp = "write";
16736 		break;
16737 	default:
16738 		SD_UPDATE_B_RESID(bp, pktp);
16739 		sd_return_command(un, bp);
16740 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16741 		return;
16742 	}
16743 
16744 	/*
16745 	 * See if we can retry the read/write, preferrably immediately.
16746 	 * If retries are exhaused, then sd_retry_command() will update
16747 	 * the b_resid count.
16748 	 */
16749 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
16750 	    cmdp, EIO, (clock_t)0, NULL);
16751 
16752 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16753 }
16754 
16755 
16756 
16757 
16758 
16759 /*
16760  *    Function: sd_handle_request_sense
16761  *
16762  * Description: Processing for non-auto Request Sense command.
16763  *
16764  *   Arguments: un - ptr to associated softstate
16765  *		sense_bp - ptr to buf(9S) for the RQS command
16766  *		sense_xp - ptr to the sd_xbuf for the RQS command
16767  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
16768  *
16769  *     Context: May be called under interrupt context
16770  */
16771 
16772 static void
16773 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
16774 	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
16775 {
16776 	struct buf	*cmd_bp;	/* buf for the original command */
16777 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
16778 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
16779 
16780 	ASSERT(un != NULL);
16781 	ASSERT(mutex_owned(SD_MUTEX(un)));
16782 	ASSERT(sense_bp != NULL);
16783 	ASSERT(sense_xp != NULL);
16784 	ASSERT(sense_pktp != NULL);
16785 
16786 	/*
16787 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
16788 	 * RQS command and not the original command.
16789 	 */
16790 	ASSERT(sense_pktp == un->un_rqs_pktp);
16791 	ASSERT(sense_bp   == un->un_rqs_bp);
16792 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
16793 	    (FLAG_SENSING | FLAG_HEAD));
16794 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
16795 	    FLAG_SENSING) == FLAG_SENSING);
16796 
16797 	/* These are the bp, xp, and pktp for the original command */
16798 	cmd_bp = sense_xp->xb_sense_bp;
16799 	cmd_xp = SD_GET_XBUF(cmd_bp);
16800 	cmd_pktp = SD_GET_PKTP(cmd_bp);
16801 
16802 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
16803 		/*
16804 		 * The REQUEST SENSE command failed.  Release the REQUEST
16805 		 * SENSE command for re-use, get back the bp for the original
16806 		 * command, and attempt to re-try the original command if
16807 		 * FLAG_DIAGNOSE is not set in the original packet.
16808 		 */
16809 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16810 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16811 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
16812 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
16813 			    NULL, NULL, EIO, (clock_t)0, NULL);
16814 			return;
16815 		}
16816 	}
16817 
16818 	/*
16819 	 * Save the relevant sense info into the xp for the original cmd.
16820 	 *
16821 	 * Note: if the request sense failed the state info will be zero
16822 	 * as set in sd_mark_rqs_busy()
16823 	 */
16824 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
16825 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
16826 	cmd_xp->xb_sense_resid  = sense_pktp->pkt_resid;
16827 	bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data, SENSE_LENGTH);
16828 
16829 	/*
16830 	 *  Free up the RQS command....
16831 	 *  NOTE:
16832 	 *	Must do this BEFORE calling sd_validate_sense_data!
16833 	 *	sd_validate_sense_data may return the original command in
16834 	 *	which case the pkt will be freed and the flags can no
16835 	 *	longer be touched.
16836 	 *	SD_MUTEX is held through this process until the command
16837 	 *	is dispatched based upon the sense data, so there are
16838 	 *	no race conditions.
16839 	 */
16840 	(void) sd_mark_rqs_idle(un, sense_xp);
16841 
16842 	/*
16843 	 * For a retryable command see if we have valid sense data, if so then
16844 	 * turn it over to sd_decode_sense() to figure out the right course of
16845 	 * action. Just fail a non-retryable command.
16846 	 */
16847 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16848 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp) ==
16849 		    SD_SENSE_DATA_IS_VALID) {
16850 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
16851 		}
16852 	} else {
16853 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
16854 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
16855 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
16856 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
16857 		sd_return_failed_command(un, cmd_bp, EIO);
16858 	}
16859 }
16860 
16861 
16862 
16863 
16864 /*
16865  *    Function: sd_handle_auto_request_sense
16866  *
16867  * Description: Processing for auto-request sense information.
16868  *
16869  *   Arguments: un - ptr to associated softstate
16870  *		bp - ptr to buf(9S) for the command
16871  *		xp - ptr to the sd_xbuf for the command
16872  *		pktp - ptr to the scsi_pkt(9S) for the command
16873  *
16874  *     Context: May be called under interrupt context
16875  */
16876 
16877 static void
16878 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
16879 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16880 {
16881 	struct scsi_arq_status *asp;
16882 
16883 	ASSERT(un != NULL);
16884 	ASSERT(mutex_owned(SD_MUTEX(un)));
16885 	ASSERT(bp != NULL);
16886 	ASSERT(xp != NULL);
16887 	ASSERT(pktp != NULL);
16888 	ASSERT(pktp != un->un_rqs_pktp);
16889 	ASSERT(bp   != un->un_rqs_bp);
16890 
16891 	/*
16892 	 * For auto-request sense, we get a scsi_arq_status back from
16893 	 * the HBA, with the sense data in the sts_sensedata member.
16894 	 * The pkt_scbp of the packet points to this scsi_arq_status.
16895 	 */
16896 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16897 
16898 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
16899 		/*
16900 		 * The auto REQUEST SENSE failed; see if we can re-try
16901 		 * the original command.
16902 		 */
16903 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16904 		    "auto request sense failed (reason=%s)\n",
16905 		    scsi_rname(asp->sts_rqpkt_reason));
16906 
16907 		sd_reset_target(un, pktp);
16908 
16909 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16910 		    NULL, NULL, EIO, (clock_t)0, NULL);
16911 		return;
16912 	}
16913 
16914 	/* Save the relevant sense info into the xp for the original cmd. */
16915 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
16916 	xp->xb_sense_state  = asp->sts_rqpkt_state;
16917 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16918 	bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16919 	    min(sizeof (struct scsi_extended_sense), SENSE_LENGTH));
16920 
16921 	/*
16922 	 * See if we have valid sense data, if so then turn it over to
16923 	 * sd_decode_sense() to figure out the right course of action.
16924 	 */
16925 	if (sd_validate_sense_data(un, bp, xp) == SD_SENSE_DATA_IS_VALID) {
16926 		sd_decode_sense(un, bp, xp, pktp);
16927 	}
16928 }
16929 
16930 
16931 /*
16932  *    Function: sd_print_sense_failed_msg
16933  *
16934  * Description: Print log message when RQS has failed.
16935  *
16936  *   Arguments: un - ptr to associated softstate
16937  *		bp - ptr to buf(9S) for the command
16938  *		arg - generic message string ptr
16939  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16940  *			or SD_NO_RETRY_ISSUED
16941  *
16942  *     Context: May be called from interrupt context
16943  */
16944 
16945 static void
16946 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
16947 	int code)
16948 {
16949 	char	*msgp = arg;
16950 
16951 	ASSERT(un != NULL);
16952 	ASSERT(mutex_owned(SD_MUTEX(un)));
16953 	ASSERT(bp != NULL);
16954 
16955 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
16956 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
16957 	}
16958 }
16959 
16960 
16961 /*
16962  *    Function: sd_validate_sense_data
16963  *
16964  * Description: Check the given sense data for validity.
16965  *		If the sense data is not valid, the command will
16966  *		be either failed or retried!
16967  *
16968  * Return Code: SD_SENSE_DATA_IS_INVALID
16969  *		SD_SENSE_DATA_IS_VALID
16970  *
16971  *     Context: May be called from interrupt context
16972  */
16973 
16974 static int
16975 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp)
16976 {
16977 	struct scsi_extended_sense *esp;
16978 	struct	scsi_pkt *pktp;
16979 	size_t	actual_len;
16980 	char	*msgp = NULL;
16981 
16982 	ASSERT(un != NULL);
16983 	ASSERT(mutex_owned(SD_MUTEX(un)));
16984 	ASSERT(bp != NULL);
16985 	ASSERT(bp != un->un_rqs_bp);
16986 	ASSERT(xp != NULL);
16987 
16988 	pktp = SD_GET_PKTP(bp);
16989 	ASSERT(pktp != NULL);
16990 
16991 	/*
16992 	 * Check the status of the RQS command (auto or manual).
16993 	 */
16994 	switch (xp->xb_sense_status & STATUS_MASK) {
16995 	case STATUS_GOOD:
16996 		break;
16997 
16998 	case STATUS_RESERVATION_CONFLICT:
16999 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
17000 		return (SD_SENSE_DATA_IS_INVALID);
17001 
17002 	case STATUS_BUSY:
17003 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17004 		    "Busy Status on REQUEST SENSE\n");
17005 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
17006 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
17007 		return (SD_SENSE_DATA_IS_INVALID);
17008 
17009 	case STATUS_QFULL:
17010 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17011 		    "QFULL Status on REQUEST SENSE\n");
17012 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
17013 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
17014 		return (SD_SENSE_DATA_IS_INVALID);
17015 
17016 	case STATUS_CHECK:
17017 	case STATUS_TERMINATED:
17018 		msgp = "Check Condition on REQUEST SENSE\n";
17019 		goto sense_failed;
17020 
17021 	default:
17022 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
17023 		goto sense_failed;
17024 	}
17025 
17026 	/*
17027 	 * See if we got the minimum required amount of sense data.
17028 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
17029 	 * or less.
17030 	 */
17031 	actual_len = (int)(SENSE_LENGTH - xp->xb_sense_resid);
17032 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
17033 	    (actual_len == 0)) {
17034 		msgp = "Request Sense couldn't get sense data\n";
17035 		goto sense_failed;
17036 	}
17037 
17038 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
17039 		msgp = "Not enough sense information\n";
17040 		goto sense_failed;
17041 	}
17042 
17043 	/*
17044 	 * We require the extended sense data
17045 	 */
17046 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
17047 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
17048 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17049 			static char tmp[8];
17050 			static char buf[148];
17051 			char *p = (char *)(xp->xb_sense_data);
17052 			int i;
17053 
17054 			mutex_enter(&sd_sense_mutex);
17055 			(void) strcpy(buf, "undecodable sense information:");
17056 			for (i = 0; i < actual_len; i++) {
17057 				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
17058 				(void) strcpy(&buf[strlen(buf)], tmp);
17059 			}
17060 			i = strlen(buf);
17061 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
17062 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, buf);
17063 			mutex_exit(&sd_sense_mutex);
17064 		}
17065 		/* Note: Legacy behavior, fail the command with no retry */
17066 		sd_return_failed_command(un, bp, EIO);
17067 		return (SD_SENSE_DATA_IS_INVALID);
17068 	}
17069 
17070 	/*
17071 	 * Check that es_code is valid (es_class concatenated with es_code
17072 	 * make up the "response code" field.  es_class will always be 7, so
17073 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
17074 	 * format.
17075 	 */
17076 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
17077 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
17078 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
17079 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
17080 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
17081 		goto sense_failed;
17082 	}
17083 
17084 	return (SD_SENSE_DATA_IS_VALID);
17085 
17086 sense_failed:
17087 	/*
17088 	 * If the request sense failed (for whatever reason), attempt
17089 	 * to retry the original command.
17090 	 */
17091 #if defined(__i386) || defined(__amd64)
17092 	/*
17093 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
17094 	 * sddef.h for Sparc platform, and x86 uses 1 binary
17095 	 * for both SCSI/FC.
17096 	 * The SD_RETRY_DELAY value need to be adjusted here
17097 	 * when SD_RETRY_DELAY change in sddef.h
17098 	 */
17099 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17100 	    sd_print_sense_failed_msg, msgp, EIO,
17101 		un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
17102 #else
17103 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17104 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
17105 #endif
17106 
17107 	return (SD_SENSE_DATA_IS_INVALID);
17108 }
17109 
17110 
17111 
17112 /*
17113  *    Function: sd_decode_sense
17114  *
17115  * Description: Take recovery action(s) when SCSI Sense Data is received.
17116  *
17117  *     Context: Interrupt context.
17118  */
17119 
17120 static void
17121 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17122 	struct scsi_pkt *pktp)
17123 {
17124 	uint8_t sense_key;
17125 
17126 	ASSERT(un != NULL);
17127 	ASSERT(mutex_owned(SD_MUTEX(un)));
17128 	ASSERT(bp != NULL);
17129 	ASSERT(bp != un->un_rqs_bp);
17130 	ASSERT(xp != NULL);
17131 	ASSERT(pktp != NULL);
17132 
17133 	sense_key = scsi_sense_key(xp->xb_sense_data);
17134 
17135 	switch (sense_key) {
17136 	case KEY_NO_SENSE:
17137 		sd_sense_key_no_sense(un, bp, xp, pktp);
17138 		break;
17139 	case KEY_RECOVERABLE_ERROR:
17140 		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
17141 		    bp, xp, pktp);
17142 		break;
17143 	case KEY_NOT_READY:
17144 		sd_sense_key_not_ready(un, xp->xb_sense_data,
17145 		    bp, xp, pktp);
17146 		break;
17147 	case KEY_MEDIUM_ERROR:
17148 	case KEY_HARDWARE_ERROR:
17149 		sd_sense_key_medium_or_hardware_error(un,
17150 		    xp->xb_sense_data, bp, xp, pktp);
17151 		break;
17152 	case KEY_ILLEGAL_REQUEST:
17153 		sd_sense_key_illegal_request(un, bp, xp, pktp);
17154 		break;
17155 	case KEY_UNIT_ATTENTION:
17156 		sd_sense_key_unit_attention(un, xp->xb_sense_data,
17157 		    bp, xp, pktp);
17158 		break;
17159 	case KEY_WRITE_PROTECT:
17160 	case KEY_VOLUME_OVERFLOW:
17161 	case KEY_MISCOMPARE:
17162 		sd_sense_key_fail_command(un, bp, xp, pktp);
17163 		break;
17164 	case KEY_BLANK_CHECK:
17165 		sd_sense_key_blank_check(un, bp, xp, pktp);
17166 		break;
17167 	case KEY_ABORTED_COMMAND:
17168 		sd_sense_key_aborted_command(un, bp, xp, pktp);
17169 		break;
17170 	case KEY_VENDOR_UNIQUE:
17171 	case KEY_COPY_ABORTED:
17172 	case KEY_EQUAL:
17173 	case KEY_RESERVED:
17174 	default:
17175 		sd_sense_key_default(un, xp->xb_sense_data,
17176 		    bp, xp, pktp);
17177 		break;
17178 	}
17179 }
17180 
17181 
17182 /*
17183  *    Function: sd_dump_memory
17184  *
17185  * Description: Debug logging routine to print the contents of a user provided
17186  *		buffer. The output of the buffer is broken up into 256 byte
17187  *		segments due to a size constraint of the scsi_log.
17188  *		implementation.
17189  *
17190  *   Arguments: un - ptr to softstate
17191  *		comp - component mask
17192  *		title - "title" string to preceed data when printed
17193  *		data - ptr to data block to be printed
17194  *		len - size of data block to be printed
17195  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
17196  *
17197  *     Context: May be called from interrupt context
17198  */
17199 
17200 #define	SD_DUMP_MEMORY_BUF_SIZE	256
17201 
17202 static char *sd_dump_format_string[] = {
17203 		" 0x%02x",
17204 		" %c"
17205 };
17206 
17207 static void
17208 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
17209     int len, int fmt)
17210 {
17211 	int	i, j;
17212 	int	avail_count;
17213 	int	start_offset;
17214 	int	end_offset;
17215 	size_t	entry_len;
17216 	char	*bufp;
17217 	char	*local_buf;
17218 	char	*format_string;
17219 
17220 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
17221 
17222 	/*
17223 	 * In the debug version of the driver, this function is called from a
17224 	 * number of places which are NOPs in the release driver.
17225 	 * The debug driver therefore has additional methods of filtering
17226 	 * debug output.
17227 	 */
17228 #ifdef SDDEBUG
17229 	/*
17230 	 * In the debug version of the driver we can reduce the amount of debug
17231 	 * messages by setting sd_error_level to something other than
17232 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
17233 	 * sd_component_mask.
17234 	 */
17235 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
17236 	    (sd_error_level != SCSI_ERR_ALL)) {
17237 		return;
17238 	}
17239 	if (((sd_component_mask & comp) == 0) ||
17240 	    (sd_error_level != SCSI_ERR_ALL)) {
17241 		return;
17242 	}
17243 #else
17244 	if (sd_error_level != SCSI_ERR_ALL) {
17245 		return;
17246 	}
17247 #endif
17248 
17249 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
17250 	bufp = local_buf;
17251 	/*
17252 	 * Available length is the length of local_buf[], minus the
17253 	 * length of the title string, minus one for the ":", minus
17254 	 * one for the newline, minus one for the NULL terminator.
17255 	 * This gives the #bytes available for holding the printed
17256 	 * values from the given data buffer.
17257 	 */
17258 	if (fmt == SD_LOG_HEX) {
17259 		format_string = sd_dump_format_string[0];
17260 	} else /* SD_LOG_CHAR */ {
17261 		format_string = sd_dump_format_string[1];
17262 	}
17263 	/*
17264 	 * Available count is the number of elements from the given
17265 	 * data buffer that we can fit into the available length.
17266 	 * This is based upon the size of the format string used.
17267 	 * Make one entry and find it's size.
17268 	 */
17269 	(void) sprintf(bufp, format_string, data[0]);
17270 	entry_len = strlen(bufp);
17271 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
17272 
17273 	j = 0;
17274 	while (j < len) {
17275 		bufp = local_buf;
17276 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
17277 		start_offset = j;
17278 
17279 		end_offset = start_offset + avail_count;
17280 
17281 		(void) sprintf(bufp, "%s:", title);
17282 		bufp += strlen(bufp);
17283 		for (i = start_offset; ((i < end_offset) && (j < len));
17284 		    i++, j++) {
17285 			(void) sprintf(bufp, format_string, data[i]);
17286 			bufp += entry_len;
17287 		}
17288 		(void) sprintf(bufp, "\n");
17289 
17290 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
17291 	}
17292 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
17293 }
17294 
17295 /*
17296  *    Function: sd_print_sense_msg
17297  *
17298  * Description: Log a message based upon the given sense data.
17299  *
17300  *   Arguments: un - ptr to associated softstate
17301  *		bp - ptr to buf(9S) for the command
17302  *		arg - ptr to associate sd_sense_info struct
17303  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17304  *			or SD_NO_RETRY_ISSUED
17305  *
17306  *     Context: May be called from interrupt context
17307  */
17308 
17309 static void
17310 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17311 {
17312 	struct sd_xbuf	*xp;
17313 	struct scsi_pkt	*pktp;
17314 	uint8_t *sensep;
17315 	daddr_t request_blkno;
17316 	diskaddr_t err_blkno;
17317 	int severity;
17318 	int pfa_flag;
17319 	extern struct scsi_key_strings scsi_cmds[];
17320 
17321 	ASSERT(un != NULL);
17322 	ASSERT(mutex_owned(SD_MUTEX(un)));
17323 	ASSERT(bp != NULL);
17324 	xp = SD_GET_XBUF(bp);
17325 	ASSERT(xp != NULL);
17326 	pktp = SD_GET_PKTP(bp);
17327 	ASSERT(pktp != NULL);
17328 	ASSERT(arg != NULL);
17329 
17330 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
17331 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
17332 
17333 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
17334 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
17335 		severity = SCSI_ERR_RETRYABLE;
17336 	}
17337 
17338 	/* Use absolute block number for the request block number */
17339 	request_blkno = xp->xb_blkno;
17340 
17341 	/*
17342 	 * Now try to get the error block number from the sense data
17343 	 */
17344 	sensep = xp->xb_sense_data;
17345 
17346 	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
17347 		(uint64_t *)&err_blkno)) {
17348 		/*
17349 		 * We retrieved the error block number from the information
17350 		 * portion of the sense data.
17351 		 *
17352 		 * For USCSI commands we are better off using the error
17353 		 * block no. as the requested block no. (This is the best
17354 		 * we can estimate.)
17355 		 */
17356 		if ((SD_IS_BUFIO(xp) == FALSE) &&
17357 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
17358 			request_blkno = err_blkno;
17359 		}
17360 	} else {
17361 		/*
17362 		 * Without the es_valid bit set (for fixed format) or an
17363 		 * information descriptor (for descriptor format) we cannot
17364 		 * be certain of the error blkno, so just use the
17365 		 * request_blkno.
17366 		 */
17367 		err_blkno = (diskaddr_t)request_blkno;
17368 	}
17369 
17370 	/*
17371 	 * The following will log the buffer contents for the release driver
17372 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
17373 	 * level is set to verbose.
17374 	 */
17375 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
17376 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17377 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
17378 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
17379 
17380 	if (pfa_flag == FALSE) {
17381 		/* This is normally only set for USCSI */
17382 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
17383 			return;
17384 		}
17385 
17386 		if ((SD_IS_BUFIO(xp) == TRUE) &&
17387 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
17388 		    (severity < sd_error_level))) {
17389 			return;
17390 		}
17391 	}
17392 
17393 	/*
17394 	 * Check for Sonoma Failover and keep a count of how many failed I/O's
17395 	 */
17396 	if ((SD_IS_LSI(un)) &&
17397 	    (scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) &&
17398 	    (scsi_sense_asc(sensep) == 0x94) &&
17399 	    (scsi_sense_ascq(sensep) == 0x01)) {
17400 		un->un_sonoma_failure_count++;
17401 		if (un->un_sonoma_failure_count > 1) {
17402 			return;
17403 		}
17404 	}
17405 
17406 	scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
17407 	    request_blkno, err_blkno, scsi_cmds,
17408 	    (struct scsi_extended_sense *)sensep,
17409 	    un->un_additional_codes, NULL);
17410 }
17411 
17412 /*
17413  *    Function: sd_sense_key_no_sense
17414  *
17415  * Description: Recovery action when sense data was not received.
17416  *
17417  *     Context: May be called from interrupt context
17418  */
17419 
17420 static void
17421 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
17422 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17423 {
17424 	struct sd_sense_info	si;
17425 
17426 	ASSERT(un != NULL);
17427 	ASSERT(mutex_owned(SD_MUTEX(un)));
17428 	ASSERT(bp != NULL);
17429 	ASSERT(xp != NULL);
17430 	ASSERT(pktp != NULL);
17431 
17432 	si.ssi_severity = SCSI_ERR_FATAL;
17433 	si.ssi_pfa_flag = FALSE;
17434 
17435 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17436 
17437 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17438 		&si, EIO, (clock_t)0, NULL);
17439 }
17440 
17441 
17442 /*
17443  *    Function: sd_sense_key_recoverable_error
17444  *
17445  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
17446  *
17447  *     Context: May be called from interrupt context
17448  */
17449 
17450 static void
17451 sd_sense_key_recoverable_error(struct sd_lun *un,
17452 	uint8_t *sense_datap,
17453 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17454 {
17455 	struct sd_sense_info	si;
17456 	uint8_t asc = scsi_sense_asc(sense_datap);
17457 
17458 	ASSERT(un != NULL);
17459 	ASSERT(mutex_owned(SD_MUTEX(un)));
17460 	ASSERT(bp != NULL);
17461 	ASSERT(xp != NULL);
17462 	ASSERT(pktp != NULL);
17463 
17464 	/*
17465 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
17466 	 */
17467 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
17468 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17469 		si.ssi_severity = SCSI_ERR_INFO;
17470 		si.ssi_pfa_flag = TRUE;
17471 	} else {
17472 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
17473 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
17474 		si.ssi_severity = SCSI_ERR_RECOVERED;
17475 		si.ssi_pfa_flag = FALSE;
17476 	}
17477 
17478 	if (pktp->pkt_resid == 0) {
17479 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17480 		sd_return_command(un, bp);
17481 		return;
17482 	}
17483 
17484 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17485 	    &si, EIO, (clock_t)0, NULL);
17486 }
17487 
17488 
17489 
17490 
17491 /*
17492  *    Function: sd_sense_key_not_ready
17493  *
17494  * Description: Recovery actions for a SCSI "Not Ready" sense key.
17495  *
17496  *     Context: May be called from interrupt context
17497  */
17498 
17499 static void
17500 sd_sense_key_not_ready(struct sd_lun *un,
17501 	uint8_t *sense_datap,
17502 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17503 {
17504 	struct sd_sense_info	si;
17505 	uint8_t asc = scsi_sense_asc(sense_datap);
17506 	uint8_t ascq = scsi_sense_ascq(sense_datap);
17507 
17508 	ASSERT(un != NULL);
17509 	ASSERT(mutex_owned(SD_MUTEX(un)));
17510 	ASSERT(bp != NULL);
17511 	ASSERT(xp != NULL);
17512 	ASSERT(pktp != NULL);
17513 
17514 	si.ssi_severity = SCSI_ERR_FATAL;
17515 	si.ssi_pfa_flag = FALSE;
17516 
17517 	/*
17518 	 * Update error stats after first NOT READY error. Disks may have
17519 	 * been powered down and may need to be restarted.  For CDROMs,
17520 	 * report NOT READY errors only if media is present.
17521 	 */
17522 	if ((ISCD(un) && (un->un_f_geometry_is_valid == TRUE)) ||
17523 	    (xp->xb_retry_count > 0)) {
17524 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17525 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
17526 	}
17527 
17528 	/*
17529 	 * Just fail if the "not ready" retry limit has been reached.
17530 	 */
17531 	if (xp->xb_retry_count >= un->un_notready_retry_count) {
17532 		/* Special check for error message printing for removables. */
17533 		if (un->un_f_has_removable_media && (asc == 0x04) &&
17534 		    (ascq >= 0x04)) {
17535 			si.ssi_severity = SCSI_ERR_ALL;
17536 		}
17537 		goto fail_command;
17538 	}
17539 
17540 	/*
17541 	 * Check the ASC and ASCQ in the sense data as needed, to determine
17542 	 * what to do.
17543 	 */
17544 	switch (asc) {
17545 	case 0x04:	/* LOGICAL UNIT NOT READY */
17546 		/*
17547 		 * disk drives that don't spin up result in a very long delay
17548 		 * in format without warning messages. We will log a message
17549 		 * if the error level is set to verbose.
17550 		 */
17551 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17552 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17553 			    "logical unit not ready, resetting disk\n");
17554 		}
17555 
17556 		/*
17557 		 * There are different requirements for CDROMs and disks for
17558 		 * the number of retries.  If a CD-ROM is giving this, it is
17559 		 * probably reading TOC and is in the process of getting
17560 		 * ready, so we should keep on trying for a long time to make
17561 		 * sure that all types of media are taken in account (for
17562 		 * some media the drive takes a long time to read TOC).  For
17563 		 * disks we do not want to retry this too many times as this
17564 		 * can cause a long hang in format when the drive refuses to
17565 		 * spin up (a very common failure).
17566 		 */
17567 		switch (ascq) {
17568 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
17569 			/*
17570 			 * Disk drives frequently refuse to spin up which
17571 			 * results in a very long hang in format without
17572 			 * warning messages.
17573 			 *
17574 			 * Note: This code preserves the legacy behavior of
17575 			 * comparing xb_retry_count against zero for fibre
17576 			 * channel targets instead of comparing against the
17577 			 * un_reset_retry_count value.  The reason for this
17578 			 * discrepancy has been so utterly lost beneath the
17579 			 * Sands of Time that even Indiana Jones could not
17580 			 * find it.
17581 			 */
17582 			if (un->un_f_is_fibre == TRUE) {
17583 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17584 					(xp->xb_retry_count > 0)) &&
17585 					(un->un_startstop_timeid == NULL)) {
17586 					scsi_log(SD_DEVINFO(un), sd_label,
17587 					CE_WARN, "logical unit not ready, "
17588 					"resetting disk\n");
17589 					sd_reset_target(un, pktp);
17590 				}
17591 			} else {
17592 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17593 					(xp->xb_retry_count >
17594 					un->un_reset_retry_count)) &&
17595 					(un->un_startstop_timeid == NULL)) {
17596 					scsi_log(SD_DEVINFO(un), sd_label,
17597 					CE_WARN, "logical unit not ready, "
17598 					"resetting disk\n");
17599 					sd_reset_target(un, pktp);
17600 				}
17601 			}
17602 			break;
17603 
17604 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
17605 			/*
17606 			 * If the target is in the process of becoming
17607 			 * ready, just proceed with the retry. This can
17608 			 * happen with CD-ROMs that take a long time to
17609 			 * read TOC after a power cycle or reset.
17610 			 */
17611 			goto do_retry;
17612 
17613 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
17614 			break;
17615 
17616 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
17617 			/*
17618 			 * Retries cannot help here so just fail right away.
17619 			 */
17620 			goto fail_command;
17621 
17622 		case 0x88:
17623 			/*
17624 			 * Vendor-unique code for T3/T4: it indicates a
17625 			 * path problem in a mutipathed config, but as far as
17626 			 * the target driver is concerned it equates to a fatal
17627 			 * error, so we should just fail the command right away
17628 			 * (without printing anything to the console). If this
17629 			 * is not a T3/T4, fall thru to the default recovery
17630 			 * action.
17631 			 * T3/T4 is FC only, don't need to check is_fibre
17632 			 */
17633 			if (SD_IS_T3(un) || SD_IS_T4(un)) {
17634 				sd_return_failed_command(un, bp, EIO);
17635 				return;
17636 			}
17637 			/* FALLTHRU */
17638 
17639 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
17640 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
17641 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
17642 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
17643 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
17644 		default:    /* Possible future codes in SCSI spec? */
17645 			/*
17646 			 * For removable-media devices, do not retry if
17647 			 * ASCQ > 2 as these result mostly from USCSI commands
17648 			 * on MMC devices issued to check status of an
17649 			 * operation initiated in immediate mode.  Also for
17650 			 * ASCQ >= 4 do not print console messages as these
17651 			 * mainly represent a user-initiated operation
17652 			 * instead of a system failure.
17653 			 */
17654 			if (un->un_f_has_removable_media) {
17655 				si.ssi_severity = SCSI_ERR_ALL;
17656 				goto fail_command;
17657 			}
17658 			break;
17659 		}
17660 
17661 		/*
17662 		 * As part of our recovery attempt for the NOT READY
17663 		 * condition, we issue a START STOP UNIT command. However
17664 		 * we want to wait for a short delay before attempting this
17665 		 * as there may still be more commands coming back from the
17666 		 * target with the check condition. To do this we use
17667 		 * timeout(9F) to call sd_start_stop_unit_callback() after
17668 		 * the delay interval expires. (sd_start_stop_unit_callback()
17669 		 * dispatches sd_start_stop_unit_task(), which will issue
17670 		 * the actual START STOP UNIT command. The delay interval
17671 		 * is one-half of the delay that we will use to retry the
17672 		 * command that generated the NOT READY condition.
17673 		 *
17674 		 * Note that we could just dispatch sd_start_stop_unit_task()
17675 		 * from here and allow it to sleep for the delay interval,
17676 		 * but then we would be tying up the taskq thread
17677 		 * uncesessarily for the duration of the delay.
17678 		 *
17679 		 * Do not issue the START STOP UNIT if the current command
17680 		 * is already a START STOP UNIT.
17681 		 */
17682 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
17683 			break;
17684 		}
17685 
17686 		/*
17687 		 * Do not schedule the timeout if one is already pending.
17688 		 */
17689 		if (un->un_startstop_timeid != NULL) {
17690 			SD_INFO(SD_LOG_ERROR, un,
17691 			    "sd_sense_key_not_ready: restart already issued to"
17692 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
17693 			    ddi_get_instance(SD_DEVINFO(un)));
17694 			break;
17695 		}
17696 
17697 		/*
17698 		 * Schedule the START STOP UNIT command, then queue the command
17699 		 * for a retry.
17700 		 *
17701 		 * Note: A timeout is not scheduled for this retry because we
17702 		 * want the retry to be serial with the START_STOP_UNIT. The
17703 		 * retry will be started when the START_STOP_UNIT is completed
17704 		 * in sd_start_stop_unit_task.
17705 		 */
17706 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
17707 		    un, SD_BSY_TIMEOUT / 2);
17708 		xp->xb_retry_count++;
17709 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
17710 		return;
17711 
17712 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
17713 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17714 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17715 			    "unit does not respond to selection\n");
17716 		}
17717 		break;
17718 
17719 	case 0x3A:	/* MEDIUM NOT PRESENT */
17720 		if (sd_error_level >= SCSI_ERR_FATAL) {
17721 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17722 			    "Caddy not inserted in drive\n");
17723 		}
17724 
17725 		sr_ejected(un);
17726 		un->un_mediastate = DKIO_EJECTED;
17727 		/* The state has changed, inform the media watch routines */
17728 		cv_broadcast(&un->un_state_cv);
17729 		/* Just fail if no media is present in the drive. */
17730 		goto fail_command;
17731 
17732 	default:
17733 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17734 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
17735 			    "Unit not Ready. Additional sense code 0x%x\n",
17736 			    asc);
17737 		}
17738 		break;
17739 	}
17740 
17741 do_retry:
17742 
17743 	/*
17744 	 * Retry the command, as some targets may report NOT READY for
17745 	 * several seconds after being reset.
17746 	 */
17747 	xp->xb_retry_count++;
17748 	si.ssi_severity = SCSI_ERR_RETRYABLE;
17749 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17750 	    &si, EIO, SD_BSY_TIMEOUT, NULL);
17751 
17752 	return;
17753 
17754 fail_command:
17755 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17756 	sd_return_failed_command(un, bp, EIO);
17757 }
17758 
17759 
17760 
17761 /*
17762  *    Function: sd_sense_key_medium_or_hardware_error
17763  *
17764  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
17765  *		sense key.
17766  *
17767  *     Context: May be called from interrupt context
17768  */
17769 
17770 static void
17771 sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
17772 	uint8_t *sense_datap,
17773 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17774 {
17775 	struct sd_sense_info	si;
17776 	uint8_t sense_key = scsi_sense_key(sense_datap);
17777 	uint8_t asc = scsi_sense_asc(sense_datap);
17778 
17779 	ASSERT(un != NULL);
17780 	ASSERT(mutex_owned(SD_MUTEX(un)));
17781 	ASSERT(bp != NULL);
17782 	ASSERT(xp != NULL);
17783 	ASSERT(pktp != NULL);
17784 
17785 	si.ssi_severity = SCSI_ERR_FATAL;
17786 	si.ssi_pfa_flag = FALSE;
17787 
17788 	if (sense_key == KEY_MEDIUM_ERROR) {
17789 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
17790 	}
17791 
17792 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17793 
17794 	if ((un->un_reset_retry_count != 0) &&
17795 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
17796 		mutex_exit(SD_MUTEX(un));
17797 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
17798 		if (un->un_f_allow_bus_device_reset == TRUE) {
17799 
17800 			boolean_t try_resetting_target = B_TRUE;
17801 
17802 			/*
17803 			 * We need to be able to handle specific ASC when we are
17804 			 * handling a KEY_HARDWARE_ERROR. In particular
17805 			 * taking the default action of resetting the target may
17806 			 * not be the appropriate way to attempt recovery.
17807 			 * Resetting a target because of a single LUN failure
17808 			 * victimizes all LUNs on that target.
17809 			 *
17810 			 * This is true for the LSI arrays, if an LSI
17811 			 * array controller returns an ASC of 0x84 (LUN Dead) we
17812 			 * should trust it.
17813 			 */
17814 
17815 			if (sense_key == KEY_HARDWARE_ERROR) {
17816 				switch (asc) {
17817 				case 0x84:
17818 					if (SD_IS_LSI(un)) {
17819 						try_resetting_target = B_FALSE;
17820 					}
17821 					break;
17822 				default:
17823 					break;
17824 				}
17825 			}
17826 
17827 			if (try_resetting_target == B_TRUE) {
17828 				int reset_retval = 0;
17829 				if (un->un_f_lun_reset_enabled == TRUE) {
17830 					SD_TRACE(SD_LOG_IO_CORE, un,
17831 					    "sd_sense_key_medium_or_hardware_"
17832 					    "error: issuing RESET_LUN\n");
17833 					reset_retval =
17834 					    scsi_reset(SD_ADDRESS(un),
17835 					    RESET_LUN);
17836 				}
17837 				if (reset_retval == 0) {
17838 					SD_TRACE(SD_LOG_IO_CORE, un,
17839 					    "sd_sense_key_medium_or_hardware_"
17840 					    "error: issuing RESET_TARGET\n");
17841 					(void) scsi_reset(SD_ADDRESS(un),
17842 					    RESET_TARGET);
17843 				}
17844 			}
17845 		}
17846 		mutex_enter(SD_MUTEX(un));
17847 	}
17848 
17849 	/*
17850 	 * This really ought to be a fatal error, but we will retry anyway
17851 	 * as some drives report this as a spurious error.
17852 	 */
17853 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17854 	    &si, EIO, (clock_t)0, NULL);
17855 }
17856 
17857 
17858 
17859 /*
17860  *    Function: sd_sense_key_illegal_request
17861  *
17862  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
17863  *
17864  *     Context: May be called from interrupt context
17865  */
17866 
17867 static void
17868 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
17869 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17870 {
17871 	struct sd_sense_info	si;
17872 
17873 	ASSERT(un != NULL);
17874 	ASSERT(mutex_owned(SD_MUTEX(un)));
17875 	ASSERT(bp != NULL);
17876 	ASSERT(xp != NULL);
17877 	ASSERT(pktp != NULL);
17878 
17879 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17880 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
17881 
17882 	si.ssi_severity = SCSI_ERR_INFO;
17883 	si.ssi_pfa_flag = FALSE;
17884 
17885 	/* Pointless to retry if the target thinks it's an illegal request */
17886 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17887 	sd_return_failed_command(un, bp, EIO);
17888 }
17889 
17890 
17891 
17892 
17893 /*
17894  *    Function: sd_sense_key_unit_attention
17895  *
17896  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
17897  *
17898  *     Context: May be called from interrupt context
17899  */
17900 
17901 static void
17902 sd_sense_key_unit_attention(struct sd_lun *un,
17903 	uint8_t *sense_datap,
17904 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17905 {
17906 	/*
17907 	 * For UNIT ATTENTION we allow retries for one minute. Devices
17908 	 * like Sonoma can return UNIT ATTENTION close to a minute
17909 	 * under certain conditions.
17910 	 */
17911 	int	retry_check_flag = SD_RETRIES_UA;
17912 	boolean_t	kstat_updated = B_FALSE;
17913 	struct	sd_sense_info		si;
17914 	uint8_t asc = scsi_sense_asc(sense_datap);
17915 
17916 	ASSERT(un != NULL);
17917 	ASSERT(mutex_owned(SD_MUTEX(un)));
17918 	ASSERT(bp != NULL);
17919 	ASSERT(xp != NULL);
17920 	ASSERT(pktp != NULL);
17921 
17922 	si.ssi_severity = SCSI_ERR_INFO;
17923 	si.ssi_pfa_flag = FALSE;
17924 
17925 
17926 	switch (asc) {
17927 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
17928 		if (sd_report_pfa != 0) {
17929 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17930 			si.ssi_pfa_flag = TRUE;
17931 			retry_check_flag = SD_RETRIES_STANDARD;
17932 			goto do_retry;
17933 		}
17934 
17935 		break;
17936 
17937 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
17938 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
17939 			un->un_resvd_status |=
17940 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
17941 		}
17942 #ifdef _LP64
17943 		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
17944 			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
17945 			    un, KM_NOSLEEP) == 0) {
17946 				/*
17947 				 * If we can't dispatch the task we'll just
17948 				 * live without descriptor sense.  We can
17949 				 * try again on the next "unit attention"
17950 				 */
17951 				SD_ERROR(SD_LOG_ERROR, un,
17952 				    "sd_sense_key_unit_attention: "
17953 				    "Could not dispatch "
17954 				    "sd_reenable_dsense_task\n");
17955 			}
17956 		}
17957 #endif /* _LP64 */
17958 		/* FALLTHRU */
17959 
17960 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
17961 		if (!un->un_f_has_removable_media) {
17962 			break;
17963 		}
17964 
17965 		/*
17966 		 * When we get a unit attention from a removable-media device,
17967 		 * it may be in a state that will take a long time to recover
17968 		 * (e.g., from a reset).  Since we are executing in interrupt
17969 		 * context here, we cannot wait around for the device to come
17970 		 * back. So hand this command off to sd_media_change_task()
17971 		 * for deferred processing under taskq thread context. (Note
17972 		 * that the command still may be failed if a problem is
17973 		 * encountered at a later time.)
17974 		 */
17975 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
17976 		    KM_NOSLEEP) == 0) {
17977 			/*
17978 			 * Cannot dispatch the request so fail the command.
17979 			 */
17980 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
17981 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17982 			si.ssi_severity = SCSI_ERR_FATAL;
17983 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17984 			sd_return_failed_command(un, bp, EIO);
17985 		}
17986 
17987 		/*
17988 		 * If failed to dispatch sd_media_change_task(), we already
17989 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
17990 		 * we should update kstat later if it encounters an error. So,
17991 		 * we update kstat_updated flag here.
17992 		 */
17993 		kstat_updated = B_TRUE;
17994 
17995 		/*
17996 		 * Either the command has been successfully dispatched to a
17997 		 * task Q for retrying, or the dispatch failed. In either case
17998 		 * do NOT retry again by calling sd_retry_command. This sets up
17999 		 * two retries of the same command and when one completes and
18000 		 * frees the resources the other will access freed memory,
18001 		 * a bad thing.
18002 		 */
18003 		return;
18004 
18005 	default:
18006 		break;
18007 	}
18008 
18009 	/*
18010 	 * Update kstat if we haven't done that.
18011 	 */
18012 	if (!kstat_updated) {
18013 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18014 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18015 	}
18016 
18017 do_retry:
18018 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
18019 	    EIO, SD_UA_RETRY_DELAY, NULL);
18020 }
18021 
18022 
18023 
18024 /*
18025  *    Function: sd_sense_key_fail_command
18026  *
18027  * Description: Use to fail a command when we don't like the sense key that
18028  *		was returned.
18029  *
18030  *     Context: May be called from interrupt context
18031  */
18032 
18033 static void
18034 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
18035 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18036 {
18037 	struct sd_sense_info	si;
18038 
18039 	ASSERT(un != NULL);
18040 	ASSERT(mutex_owned(SD_MUTEX(un)));
18041 	ASSERT(bp != NULL);
18042 	ASSERT(xp != NULL);
18043 	ASSERT(pktp != NULL);
18044 
18045 	si.ssi_severity = SCSI_ERR_FATAL;
18046 	si.ssi_pfa_flag = FALSE;
18047 
18048 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18049 	sd_return_failed_command(un, bp, EIO);
18050 }
18051 
18052 
18053 
18054 /*
18055  *    Function: sd_sense_key_blank_check
18056  *
18057  * Description: Recovery actions for a SCSI "Blank Check" sense key.
18058  *		Has no monetary connotation.
18059  *
18060  *     Context: May be called from interrupt context
18061  */
18062 
18063 static void
18064 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
18065 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18066 {
18067 	struct sd_sense_info	si;
18068 
18069 	ASSERT(un != NULL);
18070 	ASSERT(mutex_owned(SD_MUTEX(un)));
18071 	ASSERT(bp != NULL);
18072 	ASSERT(xp != NULL);
18073 	ASSERT(pktp != NULL);
18074 
18075 	/*
18076 	 * Blank check is not fatal for removable devices, therefore
18077 	 * it does not require a console message.
18078 	 */
18079 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
18080 	    SCSI_ERR_FATAL;
18081 	si.ssi_pfa_flag = FALSE;
18082 
18083 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18084 	sd_return_failed_command(un, bp, EIO);
18085 }
18086 
18087 
18088 
18089 
18090 /*
18091  *    Function: sd_sense_key_aborted_command
18092  *
18093  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
18094  *
18095  *     Context: May be called from interrupt context
18096  */
18097 
18098 static void
18099 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
18100 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18101 {
18102 	struct sd_sense_info	si;
18103 
18104 	ASSERT(un != NULL);
18105 	ASSERT(mutex_owned(SD_MUTEX(un)));
18106 	ASSERT(bp != NULL);
18107 	ASSERT(xp != NULL);
18108 	ASSERT(pktp != NULL);
18109 
18110 	si.ssi_severity = SCSI_ERR_FATAL;
18111 	si.ssi_pfa_flag = FALSE;
18112 
18113 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18114 
18115 	/*
18116 	 * This really ought to be a fatal error, but we will retry anyway
18117 	 * as some drives report this as a spurious error.
18118 	 */
18119 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18120 	    &si, EIO, (clock_t)0, NULL);
18121 }
18122 
18123 
18124 
18125 /*
18126  *    Function: sd_sense_key_default
18127  *
18128  * Description: Default recovery action for several SCSI sense keys (basically
18129  *		attempts a retry).
18130  *
18131  *     Context: May be called from interrupt context
18132  */
18133 
18134 static void
18135 sd_sense_key_default(struct sd_lun *un,
18136 	uint8_t *sense_datap,
18137 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18138 {
18139 	struct sd_sense_info	si;
18140 	uint8_t sense_key = scsi_sense_key(sense_datap);
18141 
18142 	ASSERT(un != NULL);
18143 	ASSERT(mutex_owned(SD_MUTEX(un)));
18144 	ASSERT(bp != NULL);
18145 	ASSERT(xp != NULL);
18146 	ASSERT(pktp != NULL);
18147 
18148 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18149 
18150 	/*
18151 	 * Undecoded sense key.	Attempt retries and hope that will fix
18152 	 * the problem.  Otherwise, we're dead.
18153 	 */
18154 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
18155 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18156 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
18157 	}
18158 
18159 	si.ssi_severity = SCSI_ERR_FATAL;
18160 	si.ssi_pfa_flag = FALSE;
18161 
18162 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18163 	    &si, EIO, (clock_t)0, NULL);
18164 }
18165 
18166 
18167 
18168 /*
18169  *    Function: sd_print_retry_msg
18170  *
18171  * Description: Print a message indicating the retry action being taken.
18172  *
18173  *   Arguments: un - ptr to associated softstate
18174  *		bp - ptr to buf(9S) for the command
18175  *		arg - not used.
18176  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18177  *			or SD_NO_RETRY_ISSUED
18178  *
18179  *     Context: May be called from interrupt context
18180  */
18181 /* ARGSUSED */
18182 static void
18183 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
18184 {
18185 	struct sd_xbuf	*xp;
18186 	struct scsi_pkt *pktp;
18187 	char *reasonp;
18188 	char *msgp;
18189 
18190 	ASSERT(un != NULL);
18191 	ASSERT(mutex_owned(SD_MUTEX(un)));
18192 	ASSERT(bp != NULL);
18193 	pktp = SD_GET_PKTP(bp);
18194 	ASSERT(pktp != NULL);
18195 	xp = SD_GET_XBUF(bp);
18196 	ASSERT(xp != NULL);
18197 
18198 	ASSERT(!mutex_owned(&un->un_pm_mutex));
18199 	mutex_enter(&un->un_pm_mutex);
18200 	if ((un->un_state == SD_STATE_SUSPENDED) ||
18201 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
18202 	    (pktp->pkt_flags & FLAG_SILENT)) {
18203 		mutex_exit(&un->un_pm_mutex);
18204 		goto update_pkt_reason;
18205 	}
18206 	mutex_exit(&un->un_pm_mutex);
18207 
18208 	/*
18209 	 * Suppress messages if they are all the same pkt_reason; with
18210 	 * TQ, many (up to 256) are returned with the same pkt_reason.
18211 	 * If we are in panic, then suppress the retry messages.
18212 	 */
18213 	switch (flag) {
18214 	case SD_NO_RETRY_ISSUED:
18215 		msgp = "giving up";
18216 		break;
18217 	case SD_IMMEDIATE_RETRY_ISSUED:
18218 	case SD_DELAYED_RETRY_ISSUED:
18219 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
18220 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
18221 		    (sd_error_level != SCSI_ERR_ALL))) {
18222 			return;
18223 		}
18224 		msgp = "retrying command";
18225 		break;
18226 	default:
18227 		goto update_pkt_reason;
18228 	}
18229 
18230 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
18231 	    scsi_rname(pktp->pkt_reason));
18232 
18233 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18234 	    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
18235 
18236 update_pkt_reason:
18237 	/*
18238 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
18239 	 * This is to prevent multiple console messages for the same failure
18240 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
18241 	 * when the command is retried successfully because there still may be
18242 	 * more commands coming back with the same value of pktp->pkt_reason.
18243 	 */
18244 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
18245 		un->un_last_pkt_reason = pktp->pkt_reason;
18246 	}
18247 }
18248 
18249 
18250 /*
18251  *    Function: sd_print_cmd_incomplete_msg
18252  *
18253  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
18254  *
18255  *   Arguments: un - ptr to associated softstate
18256  *		bp - ptr to buf(9S) for the command
18257  *		arg - passed to sd_print_retry_msg()
18258  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18259  *			or SD_NO_RETRY_ISSUED
18260  *
18261  *     Context: May be called from interrupt context
18262  */
18263 
18264 static void
18265 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
18266 	int code)
18267 {
18268 	dev_info_t	*dip;
18269 
18270 	ASSERT(un != NULL);
18271 	ASSERT(mutex_owned(SD_MUTEX(un)));
18272 	ASSERT(bp != NULL);
18273 
18274 	switch (code) {
18275 	case SD_NO_RETRY_ISSUED:
18276 		/* Command was failed. Someone turned off this target? */
18277 		if (un->un_state != SD_STATE_OFFLINE) {
18278 			/*
18279 			 * Suppress message if we are detaching and
18280 			 * device has been disconnected
18281 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
18282 			 * private interface and not part of the DDI
18283 			 */
18284 			dip = un->un_sd->sd_dev;
18285 			if (!(DEVI_IS_DETACHING(dip) &&
18286 			    DEVI_IS_DEVICE_REMOVED(dip))) {
18287 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18288 				"disk not responding to selection\n");
18289 			}
18290 			New_state(un, SD_STATE_OFFLINE);
18291 		}
18292 		break;
18293 
18294 	case SD_DELAYED_RETRY_ISSUED:
18295 	case SD_IMMEDIATE_RETRY_ISSUED:
18296 	default:
18297 		/* Command was successfully queued for retry */
18298 		sd_print_retry_msg(un, bp, arg, code);
18299 		break;
18300 	}
18301 }
18302 
18303 
18304 /*
18305  *    Function: sd_pkt_reason_cmd_incomplete
18306  *
18307  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
18308  *
18309  *     Context: May be called from interrupt context
18310  */
18311 
18312 static void
18313 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
18314 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18315 {
18316 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
18317 
18318 	ASSERT(un != NULL);
18319 	ASSERT(mutex_owned(SD_MUTEX(un)));
18320 	ASSERT(bp != NULL);
18321 	ASSERT(xp != NULL);
18322 	ASSERT(pktp != NULL);
18323 
18324 	/* Do not do a reset if selection did not complete */
18325 	/* Note: Should this not just check the bit? */
18326 	if (pktp->pkt_state != STATE_GOT_BUS) {
18327 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18328 		sd_reset_target(un, pktp);
18329 	}
18330 
18331 	/*
18332 	 * If the target was not successfully selected, then set
18333 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
18334 	 * with the target, and further retries and/or commands are
18335 	 * likely to take a long time.
18336 	 */
18337 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
18338 		flag |= SD_RETRIES_FAILFAST;
18339 	}
18340 
18341 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18342 
18343 	sd_retry_command(un, bp, flag,
18344 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18345 }
18346 
18347 
18348 
18349 /*
18350  *    Function: sd_pkt_reason_cmd_tran_err
18351  *
18352  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
18353  *
18354  *     Context: May be called from interrupt context
18355  */
18356 
18357 static void
18358 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
18359 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18360 {
18361 	ASSERT(un != NULL);
18362 	ASSERT(mutex_owned(SD_MUTEX(un)));
18363 	ASSERT(bp != NULL);
18364 	ASSERT(xp != NULL);
18365 	ASSERT(pktp != NULL);
18366 
18367 	/*
18368 	 * Do not reset if we got a parity error, or if
18369 	 * selection did not complete.
18370 	 */
18371 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18372 	/* Note: Should this not just check the bit for pkt_state? */
18373 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
18374 	    (pktp->pkt_state != STATE_GOT_BUS)) {
18375 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18376 		sd_reset_target(un, pktp);
18377 	}
18378 
18379 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18380 
18381 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18382 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18383 }
18384 
18385 
18386 
18387 /*
18388  *    Function: sd_pkt_reason_cmd_reset
18389  *
18390  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
18391  *
18392  *     Context: May be called from interrupt context
18393  */
18394 
18395 static void
18396 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
18397 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18398 {
18399 	ASSERT(un != NULL);
18400 	ASSERT(mutex_owned(SD_MUTEX(un)));
18401 	ASSERT(bp != NULL);
18402 	ASSERT(xp != NULL);
18403 	ASSERT(pktp != NULL);
18404 
18405 	/* The target may still be running the command, so try to reset. */
18406 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18407 	sd_reset_target(un, pktp);
18408 
18409 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18410 
18411 	/*
18412 	 * If pkt_reason is CMD_RESET chances are that this pkt got
18413 	 * reset because another target on this bus caused it. The target
18414 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18415 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18416 	 */
18417 
18418 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18419 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18420 }
18421 
18422 
18423 
18424 
18425 /*
18426  *    Function: sd_pkt_reason_cmd_aborted
18427  *
18428  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
18429  *
18430  *     Context: May be called from interrupt context
18431  */
18432 
18433 static void
18434 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
18435 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18436 {
18437 	ASSERT(un != NULL);
18438 	ASSERT(mutex_owned(SD_MUTEX(un)));
18439 	ASSERT(bp != NULL);
18440 	ASSERT(xp != NULL);
18441 	ASSERT(pktp != NULL);
18442 
18443 	/* The target may still be running the command, so try to reset. */
18444 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18445 	sd_reset_target(un, pktp);
18446 
18447 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18448 
18449 	/*
18450 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
18451 	 * aborted because another target on this bus caused it. The target
18452 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18453 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18454 	 */
18455 
18456 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18457 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18458 }
18459 
18460 
18461 
18462 /*
18463  *    Function: sd_pkt_reason_cmd_timeout
18464  *
18465  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
18466  *
18467  *     Context: May be called from interrupt context
18468  */
18469 
18470 static void
18471 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
18472 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18473 {
18474 	ASSERT(un != NULL);
18475 	ASSERT(mutex_owned(SD_MUTEX(un)));
18476 	ASSERT(bp != NULL);
18477 	ASSERT(xp != NULL);
18478 	ASSERT(pktp != NULL);
18479 
18480 
18481 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18482 	sd_reset_target(un, pktp);
18483 
18484 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18485 
18486 	/*
18487 	 * A command timeout indicates that we could not establish
18488 	 * communication with the target, so set SD_RETRIES_FAILFAST
18489 	 * as further retries/commands are likely to take a long time.
18490 	 */
18491 	sd_retry_command(un, bp,
18492 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
18493 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18494 }
18495 
18496 
18497 
18498 /*
18499  *    Function: sd_pkt_reason_cmd_unx_bus_free
18500  *
18501  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
18502  *
18503  *     Context: May be called from interrupt context
18504  */
18505 
18506 static void
18507 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
18508 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18509 {
18510 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
18511 
18512 	ASSERT(un != NULL);
18513 	ASSERT(mutex_owned(SD_MUTEX(un)));
18514 	ASSERT(bp != NULL);
18515 	ASSERT(xp != NULL);
18516 	ASSERT(pktp != NULL);
18517 
18518 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18519 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18520 
18521 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
18522 	    sd_print_retry_msg : NULL;
18523 
18524 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18525 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18526 }
18527 
18528 
18529 /*
18530  *    Function: sd_pkt_reason_cmd_tag_reject
18531  *
18532  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
18533  *
18534  *     Context: May be called from interrupt context
18535  */
18536 
18537 static void
18538 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
18539 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18540 {
18541 	ASSERT(un != NULL);
18542 	ASSERT(mutex_owned(SD_MUTEX(un)));
18543 	ASSERT(bp != NULL);
18544 	ASSERT(xp != NULL);
18545 	ASSERT(pktp != NULL);
18546 
18547 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18548 	pktp->pkt_flags = 0;
18549 	un->un_tagflags = 0;
18550 	if (un->un_f_opt_queueing == TRUE) {
18551 		un->un_throttle = min(un->un_throttle, 3);
18552 	} else {
18553 		un->un_throttle = 1;
18554 	}
18555 	mutex_exit(SD_MUTEX(un));
18556 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
18557 	mutex_enter(SD_MUTEX(un));
18558 
18559 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18560 
18561 	/* Legacy behavior not to check retry counts here. */
18562 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
18563 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18564 }
18565 
18566 
18567 /*
18568  *    Function: sd_pkt_reason_default
18569  *
18570  * Description: Default recovery actions for SCSA pkt_reason values that
18571  *		do not have more explicit recovery actions.
18572  *
18573  *     Context: May be called from interrupt context
18574  */
18575 
18576 static void
18577 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
18578 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18579 {
18580 	ASSERT(un != NULL);
18581 	ASSERT(mutex_owned(SD_MUTEX(un)));
18582 	ASSERT(bp != NULL);
18583 	ASSERT(xp != NULL);
18584 	ASSERT(pktp != NULL);
18585 
18586 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18587 	sd_reset_target(un, pktp);
18588 
18589 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18590 
18591 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18592 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18593 }
18594 
18595 
18596 
18597 /*
18598  *    Function: sd_pkt_status_check_condition
18599  *
18600  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
18601  *
18602  *     Context: May be called from interrupt context
18603  */
18604 
18605 static void
18606 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
18607 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18608 {
18609 	ASSERT(un != NULL);
18610 	ASSERT(mutex_owned(SD_MUTEX(un)));
18611 	ASSERT(bp != NULL);
18612 	ASSERT(xp != NULL);
18613 	ASSERT(pktp != NULL);
18614 
18615 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
18616 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
18617 
18618 	/*
18619 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
18620 	 * command will be retried after the request sense). Otherwise, retry
18621 	 * the command. Note: we are issuing the request sense even though the
18622 	 * retry limit may have been reached for the failed command.
18623 	 */
18624 	if (un->un_f_arq_enabled == FALSE) {
18625 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18626 		    "no ARQ, sending request sense command\n");
18627 		sd_send_request_sense_command(un, bp, pktp);
18628 	} else {
18629 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18630 		    "ARQ,retrying request sense command\n");
18631 #if defined(__i386) || defined(__amd64)
18632 		/*
18633 		 * The SD_RETRY_DELAY value need to be adjusted here
18634 		 * when SD_RETRY_DELAY change in sddef.h
18635 		 */
18636 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18637 			un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
18638 			NULL);
18639 #else
18640 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
18641 		    EIO, SD_RETRY_DELAY, NULL);
18642 #endif
18643 	}
18644 
18645 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
18646 }
18647 
18648 
18649 /*
18650  *    Function: sd_pkt_status_busy
18651  *
18652  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
18653  *
18654  *     Context: May be called from interrupt context
18655  */
18656 
18657 static void
18658 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18659 	struct scsi_pkt *pktp)
18660 {
18661 	ASSERT(un != NULL);
18662 	ASSERT(mutex_owned(SD_MUTEX(un)));
18663 	ASSERT(bp != NULL);
18664 	ASSERT(xp != NULL);
18665 	ASSERT(pktp != NULL);
18666 
18667 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18668 	    "sd_pkt_status_busy: entry\n");
18669 
18670 	/* If retries are exhausted, just fail the command. */
18671 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
18672 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18673 		    "device busy too long\n");
18674 		sd_return_failed_command(un, bp, EIO);
18675 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18676 		    "sd_pkt_status_busy: exit\n");
18677 		return;
18678 	}
18679 	xp->xb_retry_count++;
18680 
18681 	/*
18682 	 * Try to reset the target. However, we do not want to perform
18683 	 * more than one reset if the device continues to fail. The reset
18684 	 * will be performed when the retry count reaches the reset
18685 	 * threshold.  This threshold should be set such that at least
18686 	 * one retry is issued before the reset is performed.
18687 	 */
18688 	if (xp->xb_retry_count ==
18689 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
18690 		int rval = 0;
18691 		mutex_exit(SD_MUTEX(un));
18692 		if (un->un_f_allow_bus_device_reset == TRUE) {
18693 			/*
18694 			 * First try to reset the LUN; if we cannot then
18695 			 * try to reset the target.
18696 			 */
18697 			if (un->un_f_lun_reset_enabled == TRUE) {
18698 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18699 				    "sd_pkt_status_busy: RESET_LUN\n");
18700 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18701 			}
18702 			if (rval == 0) {
18703 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18704 				    "sd_pkt_status_busy: RESET_TARGET\n");
18705 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18706 			}
18707 		}
18708 		if (rval == 0) {
18709 			/*
18710 			 * If the RESET_LUN and/or RESET_TARGET failed,
18711 			 * try RESET_ALL
18712 			 */
18713 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18714 			    "sd_pkt_status_busy: RESET_ALL\n");
18715 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
18716 		}
18717 		mutex_enter(SD_MUTEX(un));
18718 		if (rval == 0) {
18719 			/*
18720 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
18721 			 * At this point we give up & fail the command.
18722 			 */
18723 			sd_return_failed_command(un, bp, EIO);
18724 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18725 			    "sd_pkt_status_busy: exit (failed cmd)\n");
18726 			return;
18727 		}
18728 	}
18729 
18730 	/*
18731 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
18732 	 * we have already checked the retry counts above.
18733 	 */
18734 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
18735 	    EIO, SD_BSY_TIMEOUT, NULL);
18736 
18737 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18738 	    "sd_pkt_status_busy: exit\n");
18739 }
18740 
18741 
18742 /*
18743  *    Function: sd_pkt_status_reservation_conflict
18744  *
18745  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
18746  *		command status.
18747  *
18748  *     Context: May be called from interrupt context
18749  */
18750 
18751 static void
18752 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
18753 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18754 {
18755 	ASSERT(un != NULL);
18756 	ASSERT(mutex_owned(SD_MUTEX(un)));
18757 	ASSERT(bp != NULL);
18758 	ASSERT(xp != NULL);
18759 	ASSERT(pktp != NULL);
18760 
18761 	/*
18762 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
18763 	 * conflict could be due to various reasons like incorrect keys, not
18764 	 * registered or not reserved etc. So, we return EACCES to the caller.
18765 	 */
18766 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
18767 		int cmd = SD_GET_PKT_OPCODE(pktp);
18768 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
18769 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
18770 			sd_return_failed_command(un, bp, EACCES);
18771 			return;
18772 		}
18773 	}
18774 
18775 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
18776 
18777 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
18778 		if (sd_failfast_enable != 0) {
18779 			/* By definition, we must panic here.... */
18780 			sd_panic_for_res_conflict(un);
18781 			/*NOTREACHED*/
18782 		}
18783 		SD_ERROR(SD_LOG_IO, un,
18784 		    "sd_handle_resv_conflict: Disk Reserved\n");
18785 		sd_return_failed_command(un, bp, EACCES);
18786 		return;
18787 	}
18788 
18789 	/*
18790 	 * 1147670: retry only if sd_retry_on_reservation_conflict
18791 	 * property is set (default is 1). Retries will not succeed
18792 	 * on a disk reserved by another initiator. HA systems
18793 	 * may reset this via sd.conf to avoid these retries.
18794 	 *
18795 	 * Note: The legacy return code for this failure is EIO, however EACCES
18796 	 * seems more appropriate for a reservation conflict.
18797 	 */
18798 	if (sd_retry_on_reservation_conflict == 0) {
18799 		SD_ERROR(SD_LOG_IO, un,
18800 		    "sd_handle_resv_conflict: Device Reserved\n");
18801 		sd_return_failed_command(un, bp, EIO);
18802 		return;
18803 	}
18804 
18805 	/*
18806 	 * Retry the command if we can.
18807 	 *
18808 	 * Note: The legacy return code for this failure is EIO, however EACCES
18809 	 * seems more appropriate for a reservation conflict.
18810 	 */
18811 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18812 	    (clock_t)2, NULL);
18813 }
18814 
18815 
18816 
18817 /*
18818  *    Function: sd_pkt_status_qfull
18819  *
18820  * Description: Handle a QUEUE FULL condition from the target.  This can
18821  *		occur if the HBA does not handle the queue full condition.
18822  *		(Basically this means third-party HBAs as Sun HBAs will
18823  *		handle the queue full condition.)  Note that if there are
18824  *		some commands already in the transport, then the queue full
18825  *		has occurred because the queue for this nexus is actually
18826  *		full. If there are no commands in the transport, then the
18827  *		queue full is resulting from some other initiator or lun
18828  *		consuming all the resources at the target.
18829  *
18830  *     Context: May be called from interrupt context
18831  */
18832 
18833 static void
18834 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
18835 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18836 {
18837 	ASSERT(un != NULL);
18838 	ASSERT(mutex_owned(SD_MUTEX(un)));
18839 	ASSERT(bp != NULL);
18840 	ASSERT(xp != NULL);
18841 	ASSERT(pktp != NULL);
18842 
18843 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18844 	    "sd_pkt_status_qfull: entry\n");
18845 
18846 	/*
18847 	 * Just lower the QFULL throttle and retry the command.  Note that
18848 	 * we do not limit the number of retries here.
18849 	 */
18850 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
18851 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
18852 	    SD_RESTART_TIMEOUT, NULL);
18853 
18854 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18855 	    "sd_pkt_status_qfull: exit\n");
18856 }
18857 
18858 
18859 /*
18860  *    Function: sd_reset_target
18861  *
18862  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
18863  *		RESET_TARGET, or RESET_ALL.
18864  *
18865  *     Context: May be called under interrupt context.
18866  */
18867 
18868 static void
18869 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
18870 {
18871 	int rval = 0;
18872 
18873 	ASSERT(un != NULL);
18874 	ASSERT(mutex_owned(SD_MUTEX(un)));
18875 	ASSERT(pktp != NULL);
18876 
18877 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
18878 
18879 	/*
18880 	 * No need to reset if the transport layer has already done so.
18881 	 */
18882 	if ((pktp->pkt_statistics &
18883 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
18884 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18885 		    "sd_reset_target: no reset\n");
18886 		return;
18887 	}
18888 
18889 	mutex_exit(SD_MUTEX(un));
18890 
18891 	if (un->un_f_allow_bus_device_reset == TRUE) {
18892 		if (un->un_f_lun_reset_enabled == TRUE) {
18893 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18894 			    "sd_reset_target: RESET_LUN\n");
18895 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18896 		}
18897 		if (rval == 0) {
18898 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18899 			    "sd_reset_target: RESET_TARGET\n");
18900 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18901 		}
18902 	}
18903 
18904 	if (rval == 0) {
18905 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18906 		    "sd_reset_target: RESET_ALL\n");
18907 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
18908 	}
18909 
18910 	mutex_enter(SD_MUTEX(un));
18911 
18912 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
18913 }
18914 
18915 
18916 /*
18917  *    Function: sd_media_change_task
18918  *
18919  * Description: Recovery action for CDROM to become available.
18920  *
18921  *     Context: Executes in a taskq() thread context
18922  */
18923 
18924 static void
18925 sd_media_change_task(void *arg)
18926 {
18927 	struct	scsi_pkt	*pktp = arg;
18928 	struct	sd_lun		*un;
18929 	struct	buf		*bp;
18930 	struct	sd_xbuf		*xp;
18931 	int	err		= 0;
18932 	int	retry_count	= 0;
18933 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
18934 	struct	sd_sense_info	si;
18935 
18936 	ASSERT(pktp != NULL);
18937 	bp = (struct buf *)pktp->pkt_private;
18938 	ASSERT(bp != NULL);
18939 	xp = SD_GET_XBUF(bp);
18940 	ASSERT(xp != NULL);
18941 	un = SD_GET_UN(bp);
18942 	ASSERT(un != NULL);
18943 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18944 	ASSERT(un->un_f_monitor_media_state);
18945 
18946 	si.ssi_severity = SCSI_ERR_INFO;
18947 	si.ssi_pfa_flag = FALSE;
18948 
18949 	/*
18950 	 * When a reset is issued on a CDROM, it takes a long time to
18951 	 * recover. First few attempts to read capacity and other things
18952 	 * related to handling unit attention fail (with a ASC 0x4 and
18953 	 * ASCQ 0x1). In that case we want to do enough retries and we want
18954 	 * to limit the retries in other cases of genuine failures like
18955 	 * no media in drive.
18956 	 */
18957 	while (retry_count++ < retry_limit) {
18958 		if ((err = sd_handle_mchange(un)) == 0) {
18959 			break;
18960 		}
18961 		if (err == EAGAIN) {
18962 			retry_limit = SD_UNIT_ATTENTION_RETRY;
18963 		}
18964 		/* Sleep for 0.5 sec. & try again */
18965 		delay(drv_usectohz(500000));
18966 	}
18967 
18968 	/*
18969 	 * Dispatch (retry or fail) the original command here,
18970 	 * along with appropriate console messages....
18971 	 *
18972 	 * Must grab the mutex before calling sd_retry_command,
18973 	 * sd_print_sense_msg and sd_return_failed_command.
18974 	 */
18975 	mutex_enter(SD_MUTEX(un));
18976 	if (err != SD_CMD_SUCCESS) {
18977 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18978 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18979 		si.ssi_severity = SCSI_ERR_FATAL;
18980 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18981 		sd_return_failed_command(un, bp, EIO);
18982 	} else {
18983 		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
18984 		    &si, EIO, (clock_t)0, NULL);
18985 	}
18986 	mutex_exit(SD_MUTEX(un));
18987 }
18988 
18989 
18990 
18991 /*
18992  *    Function: sd_handle_mchange
18993  *
18994  * Description: Perform geometry validation & other recovery when CDROM
18995  *		has been removed from drive.
18996  *
18997  * Return Code: 0 for success
18998  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
18999  *		sd_send_scsi_READ_CAPACITY()
19000  *
19001  *     Context: Executes in a taskq() thread context
19002  */
19003 
19004 static int
19005 sd_handle_mchange(struct sd_lun *un)
19006 {
19007 	uint64_t	capacity;
19008 	uint32_t	lbasize;
19009 	int		rval;
19010 
19011 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19012 	ASSERT(un->un_f_monitor_media_state);
19013 
19014 	if ((rval = sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
19015 	    SD_PATH_DIRECT_PRIORITY)) != 0) {
19016 		return (rval);
19017 	}
19018 
19019 	mutex_enter(SD_MUTEX(un));
19020 	sd_update_block_info(un, lbasize, capacity);
19021 
19022 	if (un->un_errstats != NULL) {
19023 		struct	sd_errstats *stp =
19024 		    (struct sd_errstats *)un->un_errstats->ks_data;
19025 		stp->sd_capacity.value.ui64 = (uint64_t)
19026 		    ((uint64_t)un->un_blockcount *
19027 		    (uint64_t)un->un_tgt_blocksize);
19028 	}
19029 
19030 	/*
19031 	 * Note: Maybe let the strategy/partitioning chain worry about getting
19032 	 * valid geometry.
19033 	 */
19034 	un->un_f_geometry_is_valid = FALSE;
19035 	(void) sd_validate_geometry(un, SD_PATH_DIRECT_PRIORITY);
19036 	if (un->un_f_geometry_is_valid == FALSE) {
19037 		mutex_exit(SD_MUTEX(un));
19038 		return (EIO);
19039 	}
19040 
19041 	mutex_exit(SD_MUTEX(un));
19042 
19043 	/*
19044 	 * Try to lock the door
19045 	 */
19046 	return (sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
19047 	    SD_PATH_DIRECT_PRIORITY));
19048 }
19049 
19050 
19051 /*
19052  *    Function: sd_send_scsi_DOORLOCK
19053  *
19054  * Description: Issue the scsi DOOR LOCK command
19055  *
19056  *   Arguments: un    - pointer to driver soft state (unit) structure for
19057  *			this target.
19058  *		flag  - SD_REMOVAL_ALLOW
19059  *			SD_REMOVAL_PREVENT
19060  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19061  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19062  *			to use the USCSI "direct" chain and bypass the normal
19063  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19064  *			command is issued as part of an error recovery action.
19065  *
19066  * Return Code: 0   - Success
19067  *		errno return code from sd_send_scsi_cmd()
19068  *
19069  *     Context: Can sleep.
19070  */
19071 
19072 static int
19073 sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag)
19074 {
19075 	union scsi_cdb		cdb;
19076 	struct uscsi_cmd	ucmd_buf;
19077 	struct scsi_extended_sense	sense_buf;
19078 	int			status;
19079 
19080 	ASSERT(un != NULL);
19081 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19082 
19083 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
19084 
19085 	/* already determined doorlock is not supported, fake success */
19086 	if (un->un_f_doorlock_supported == FALSE) {
19087 		return (0);
19088 	}
19089 
19090 	bzero(&cdb, sizeof (cdb));
19091 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19092 
19093 	cdb.scc_cmd = SCMD_DOORLOCK;
19094 	cdb.cdb_opaque[4] = (uchar_t)flag;
19095 
19096 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19097 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19098 	ucmd_buf.uscsi_bufaddr	= NULL;
19099 	ucmd_buf.uscsi_buflen	= 0;
19100 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19101 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19102 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19103 	ucmd_buf.uscsi_timeout	= 15;
19104 
19105 	SD_TRACE(SD_LOG_IO, un,
19106 	    "sd_send_scsi_DOORLOCK: returning sd_send_scsi_cmd()\n");
19107 
19108 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19109 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19110 
19111 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
19112 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19113 	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
19114 		/* fake success and skip subsequent doorlock commands */
19115 		un->un_f_doorlock_supported = FALSE;
19116 		return (0);
19117 	}
19118 
19119 	return (status);
19120 }
19121 
19122 /*
19123  *    Function: sd_send_scsi_READ_CAPACITY
19124  *
19125  * Description: This routine uses the scsi READ CAPACITY command to determine
19126  *		the device capacity in number of blocks and the device native
19127  *		block size. If this function returns a failure, then the
19128  *		values in *capp and *lbap are undefined.  If the capacity
19129  *		returned is 0xffffffff then the lun is too large for a
19130  *		normal READ CAPACITY command and the results of a
19131  *		READ CAPACITY 16 will be used instead.
19132  *
19133  *   Arguments: un   - ptr to soft state struct for the target
19134  *		capp - ptr to unsigned 64-bit variable to receive the
19135  *			capacity value from the command.
19136  *		lbap - ptr to unsigned 32-bit varaible to receive the
19137  *			block size value from the command
19138  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19139  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19140  *			to use the USCSI "direct" chain and bypass the normal
19141  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19142  *			command is issued as part of an error recovery action.
19143  *
19144  * Return Code: 0   - Success
19145  *		EIO - IO error
19146  *		EACCES - Reservation conflict detected
19147  *		EAGAIN - Device is becoming ready
19148  *		errno return code from sd_send_scsi_cmd()
19149  *
19150  *     Context: Can sleep.  Blocks until command completes.
19151  */
19152 
19153 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
19154 
19155 static int
19156 sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp, uint32_t *lbap,
19157 	int path_flag)
19158 {
19159 	struct	scsi_extended_sense	sense_buf;
19160 	struct	uscsi_cmd	ucmd_buf;
19161 	union	scsi_cdb	cdb;
19162 	uint32_t		*capacity_buf;
19163 	uint64_t		capacity;
19164 	uint32_t		lbasize;
19165 	int			status;
19166 
19167 	ASSERT(un != NULL);
19168 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19169 	ASSERT(capp != NULL);
19170 	ASSERT(lbap != NULL);
19171 
19172 	SD_TRACE(SD_LOG_IO, un,
19173 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19174 
19175 	/*
19176 	 * First send a READ_CAPACITY command to the target.
19177 	 * (This command is mandatory under SCSI-2.)
19178 	 *
19179 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
19180 	 * Medium Indicator bit is cleared.  The address field must be
19181 	 * zero if the PMI bit is zero.
19182 	 */
19183 	bzero(&cdb, sizeof (cdb));
19184 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19185 
19186 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
19187 
19188 	cdb.scc_cmd = SCMD_READ_CAPACITY;
19189 
19190 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19191 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19192 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
19193 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
19194 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19195 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19196 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19197 	ucmd_buf.uscsi_timeout	= 60;
19198 
19199 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19200 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19201 
19202 	switch (status) {
19203 	case 0:
19204 		/* Return failure if we did not get valid capacity data. */
19205 		if (ucmd_buf.uscsi_resid != 0) {
19206 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19207 			return (EIO);
19208 		}
19209 
19210 		/*
19211 		 * Read capacity and block size from the READ CAPACITY 10 data.
19212 		 * This data may be adjusted later due to device specific
19213 		 * issues.
19214 		 *
19215 		 * According to the SCSI spec, the READ CAPACITY 10
19216 		 * command returns the following:
19217 		 *
19218 		 *  bytes 0-3: Maximum logical block address available.
19219 		 *		(MSB in byte:0 & LSB in byte:3)
19220 		 *
19221 		 *  bytes 4-7: Block length in bytes
19222 		 *		(MSB in byte:4 & LSB in byte:7)
19223 		 *
19224 		 */
19225 		capacity = BE_32(capacity_buf[0]);
19226 		lbasize = BE_32(capacity_buf[1]);
19227 
19228 		/*
19229 		 * Done with capacity_buf
19230 		 */
19231 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19232 
19233 		/*
19234 		 * if the reported capacity is set to all 0xf's, then
19235 		 * this disk is too large and requires SBC-2 commands.
19236 		 * Reissue the request using READ CAPACITY 16.
19237 		 */
19238 		if (capacity == 0xffffffff) {
19239 			status = sd_send_scsi_READ_CAPACITY_16(un, &capacity,
19240 			    &lbasize, path_flag);
19241 			if (status != 0) {
19242 				return (status);
19243 			}
19244 		}
19245 		break;	/* Success! */
19246 	case EIO:
19247 		switch (ucmd_buf.uscsi_status) {
19248 		case STATUS_RESERVATION_CONFLICT:
19249 			status = EACCES;
19250 			break;
19251 		case STATUS_CHECK:
19252 			/*
19253 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19254 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19255 			 */
19256 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19257 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19258 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19259 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19260 				return (EAGAIN);
19261 			}
19262 			break;
19263 		default:
19264 			break;
19265 		}
19266 		/* FALLTHRU */
19267 	default:
19268 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19269 		return (status);
19270 	}
19271 
19272 	/*
19273 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
19274 	 * (2352 and 0 are common) so for these devices always force the value
19275 	 * to 2048 as required by the ATAPI specs.
19276 	 */
19277 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
19278 		lbasize = 2048;
19279 	}
19280 
19281 	/*
19282 	 * Get the maximum LBA value from the READ CAPACITY data.
19283 	 * Here we assume that the Partial Medium Indicator (PMI) bit
19284 	 * was cleared when issuing the command. This means that the LBA
19285 	 * returned from the device is the LBA of the last logical block
19286 	 * on the logical unit.  The actual logical block count will be
19287 	 * this value plus one.
19288 	 *
19289 	 * Currently the capacity is saved in terms of un->un_sys_blocksize,
19290 	 * so scale the capacity value to reflect this.
19291 	 */
19292 	capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
19293 
19294 #if defined(__i386) || defined(__amd64)
19295 	/*
19296 	 * Refer to comments related to off-by-1 at the
19297 	 * header of this file.
19298 	 * Treat 1TB disk as (1T - 512)B.
19299 	 */
19300 	if (un->un_f_capacity_adjusted == 1)
19301 	    capacity = DK_MAX_BLOCKS;
19302 #endif
19303 
19304 	/*
19305 	 * Copy the values from the READ CAPACITY command into the space
19306 	 * provided by the caller.
19307 	 */
19308 	*capp = capacity;
19309 	*lbap = lbasize;
19310 
19311 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
19312 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19313 
19314 	/*
19315 	 * Both the lbasize and capacity from the device must be nonzero,
19316 	 * otherwise we assume that the values are not valid and return
19317 	 * failure to the caller. (4203735)
19318 	 */
19319 	if ((capacity == 0) || (lbasize == 0)) {
19320 		return (EIO);
19321 	}
19322 
19323 	return (0);
19324 }
19325 
19326 /*
19327  *    Function: sd_send_scsi_READ_CAPACITY_16
19328  *
19329  * Description: This routine uses the scsi READ CAPACITY 16 command to
19330  *		determine the device capacity in number of blocks and the
19331  *		device native block size.  If this function returns a failure,
19332  *		then the values in *capp and *lbap are undefined.
19333  *		This routine should always be called by
19334  *		sd_send_scsi_READ_CAPACITY which will appy any device
19335  *		specific adjustments to capacity and lbasize.
19336  *
19337  *   Arguments: un   - ptr to soft state struct for the target
19338  *		capp - ptr to unsigned 64-bit variable to receive the
19339  *			capacity value from the command.
19340  *		lbap - ptr to unsigned 32-bit varaible to receive the
19341  *			block size value from the command
19342  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19343  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19344  *			to use the USCSI "direct" chain and bypass the normal
19345  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
19346  *			this command is issued as part of an error recovery
19347  *			action.
19348  *
19349  * Return Code: 0   - Success
19350  *		EIO - IO error
19351  *		EACCES - Reservation conflict detected
19352  *		EAGAIN - Device is becoming ready
19353  *		errno return code from sd_send_scsi_cmd()
19354  *
19355  *     Context: Can sleep.  Blocks until command completes.
19356  */
19357 
19358 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
19359 
19360 static int
19361 sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
19362 	uint32_t *lbap, int path_flag)
19363 {
19364 	struct	scsi_extended_sense	sense_buf;
19365 	struct	uscsi_cmd	ucmd_buf;
19366 	union	scsi_cdb	cdb;
19367 	uint64_t		*capacity16_buf;
19368 	uint64_t		capacity;
19369 	uint32_t		lbasize;
19370 	int			status;
19371 
19372 	ASSERT(un != NULL);
19373 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19374 	ASSERT(capp != NULL);
19375 	ASSERT(lbap != NULL);
19376 
19377 	SD_TRACE(SD_LOG_IO, un,
19378 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19379 
19380 	/*
19381 	 * First send a READ_CAPACITY_16 command to the target.
19382 	 *
19383 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
19384 	 * Medium Indicator bit is cleared.  The address field must be
19385 	 * zero if the PMI bit is zero.
19386 	 */
19387 	bzero(&cdb, sizeof (cdb));
19388 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19389 
19390 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
19391 
19392 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19393 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
19394 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
19395 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
19396 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19397 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19398 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19399 	ucmd_buf.uscsi_timeout	= 60;
19400 
19401 	/*
19402 	 * Read Capacity (16) is a Service Action In command.  One
19403 	 * command byte (0x9E) is overloaded for multiple operations,
19404 	 * with the second CDB byte specifying the desired operation
19405 	 */
19406 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
19407 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
19408 
19409 	/*
19410 	 * Fill in allocation length field
19411 	 */
19412 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
19413 
19414 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19415 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19416 
19417 	switch (status) {
19418 	case 0:
19419 		/* Return failure if we did not get valid capacity data. */
19420 		if (ucmd_buf.uscsi_resid > 20) {
19421 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19422 			return (EIO);
19423 		}
19424 
19425 		/*
19426 		 * Read capacity and block size from the READ CAPACITY 10 data.
19427 		 * This data may be adjusted later due to device specific
19428 		 * issues.
19429 		 *
19430 		 * According to the SCSI spec, the READ CAPACITY 10
19431 		 * command returns the following:
19432 		 *
19433 		 *  bytes 0-7: Maximum logical block address available.
19434 		 *		(MSB in byte:0 & LSB in byte:7)
19435 		 *
19436 		 *  bytes 8-11: Block length in bytes
19437 		 *		(MSB in byte:8 & LSB in byte:11)
19438 		 *
19439 		 */
19440 		capacity = BE_64(capacity16_buf[0]);
19441 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
19442 
19443 		/*
19444 		 * Done with capacity16_buf
19445 		 */
19446 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19447 
19448 		/*
19449 		 * if the reported capacity is set to all 0xf's, then
19450 		 * this disk is too large.  This could only happen with
19451 		 * a device that supports LBAs larger than 64 bits which
19452 		 * are not defined by any current T10 standards.
19453 		 */
19454 		if (capacity == 0xffffffffffffffff) {
19455 			return (EIO);
19456 		}
19457 		break;	/* Success! */
19458 	case EIO:
19459 		switch (ucmd_buf.uscsi_status) {
19460 		case STATUS_RESERVATION_CONFLICT:
19461 			status = EACCES;
19462 			break;
19463 		case STATUS_CHECK:
19464 			/*
19465 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19466 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19467 			 */
19468 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19469 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19470 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19471 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19472 				return (EAGAIN);
19473 			}
19474 			break;
19475 		default:
19476 			break;
19477 		}
19478 		/* FALLTHRU */
19479 	default:
19480 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19481 		return (status);
19482 	}
19483 
19484 	*capp = capacity;
19485 	*lbap = lbasize;
19486 
19487 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
19488 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19489 
19490 	return (0);
19491 }
19492 
19493 
19494 /*
19495  *    Function: sd_send_scsi_START_STOP_UNIT
19496  *
19497  * Description: Issue a scsi START STOP UNIT command to the target.
19498  *
19499  *   Arguments: un    - pointer to driver soft state (unit) structure for
19500  *			this target.
19501  *		flag  - SD_TARGET_START
19502  *			SD_TARGET_STOP
19503  *			SD_TARGET_EJECT
19504  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19505  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19506  *			to use the USCSI "direct" chain and bypass the normal
19507  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19508  *			command is issued as part of an error recovery action.
19509  *
19510  * Return Code: 0   - Success
19511  *		EIO - IO error
19512  *		EACCES - Reservation conflict detected
19513  *		ENXIO  - Not Ready, medium not present
19514  *		errno return code from sd_send_scsi_cmd()
19515  *
19516  *     Context: Can sleep.
19517  */
19518 
19519 static int
19520 sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag, int path_flag)
19521 {
19522 	struct	scsi_extended_sense	sense_buf;
19523 	union scsi_cdb		cdb;
19524 	struct uscsi_cmd	ucmd_buf;
19525 	int			status;
19526 
19527 	ASSERT(un != NULL);
19528 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19529 
19530 	SD_TRACE(SD_LOG_IO, un,
19531 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
19532 
19533 	if (un->un_f_check_start_stop &&
19534 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
19535 	    (un->un_f_start_stop_supported != TRUE)) {
19536 		return (0);
19537 	}
19538 
19539 	bzero(&cdb, sizeof (cdb));
19540 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19541 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19542 
19543 	cdb.scc_cmd = SCMD_START_STOP;
19544 	cdb.cdb_opaque[4] = (uchar_t)flag;
19545 
19546 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19547 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19548 	ucmd_buf.uscsi_bufaddr	= NULL;
19549 	ucmd_buf.uscsi_buflen	= 0;
19550 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19551 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19552 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19553 	ucmd_buf.uscsi_timeout	= 200;
19554 
19555 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19556 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19557 
19558 	switch (status) {
19559 	case 0:
19560 		break;	/* Success! */
19561 	case EIO:
19562 		switch (ucmd_buf.uscsi_status) {
19563 		case STATUS_RESERVATION_CONFLICT:
19564 			status = EACCES;
19565 			break;
19566 		case STATUS_CHECK:
19567 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
19568 				switch (scsi_sense_key(
19569 						(uint8_t *)&sense_buf)) {
19570 				case KEY_ILLEGAL_REQUEST:
19571 					status = ENOTSUP;
19572 					break;
19573 				case KEY_NOT_READY:
19574 					if (scsi_sense_asc(
19575 						    (uint8_t *)&sense_buf)
19576 					    == 0x3A) {
19577 						status = ENXIO;
19578 					}
19579 					break;
19580 				default:
19581 					break;
19582 				}
19583 			}
19584 			break;
19585 		default:
19586 			break;
19587 		}
19588 		break;
19589 	default:
19590 		break;
19591 	}
19592 
19593 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
19594 
19595 	return (status);
19596 }
19597 
19598 
19599 /*
19600  *    Function: sd_start_stop_unit_callback
19601  *
19602  * Description: timeout(9F) callback to begin recovery process for a
19603  *		device that has spun down.
19604  *
19605  *   Arguments: arg - pointer to associated softstate struct.
19606  *
19607  *     Context: Executes in a timeout(9F) thread context
19608  */
19609 
19610 static void
19611 sd_start_stop_unit_callback(void *arg)
19612 {
19613 	struct sd_lun	*un = arg;
19614 	ASSERT(un != NULL);
19615 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19616 
19617 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
19618 
19619 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
19620 }
19621 
19622 
19623 /*
19624  *    Function: sd_start_stop_unit_task
19625  *
19626  * Description: Recovery procedure when a drive is spun down.
19627  *
19628  *   Arguments: arg - pointer to associated softstate struct.
19629  *
19630  *     Context: Executes in a taskq() thread context
19631  */
19632 
19633 static void
19634 sd_start_stop_unit_task(void *arg)
19635 {
19636 	struct sd_lun	*un = arg;
19637 
19638 	ASSERT(un != NULL);
19639 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19640 
19641 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
19642 
19643 	/*
19644 	 * Some unformatted drives report not ready error, no need to
19645 	 * restart if format has been initiated.
19646 	 */
19647 	mutex_enter(SD_MUTEX(un));
19648 	if (un->un_f_format_in_progress == TRUE) {
19649 		mutex_exit(SD_MUTEX(un));
19650 		return;
19651 	}
19652 	mutex_exit(SD_MUTEX(un));
19653 
19654 	/*
19655 	 * When a START STOP command is issued from here, it is part of a
19656 	 * failure recovery operation and must be issued before any other
19657 	 * commands, including any pending retries. Thus it must be sent
19658 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
19659 	 * succeeds or not, we will start I/O after the attempt.
19660 	 */
19661 	(void) sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
19662 	    SD_PATH_DIRECT_PRIORITY);
19663 
19664 	/*
19665 	 * The above call blocks until the START_STOP_UNIT command completes.
19666 	 * Now that it has completed, we must re-try the original IO that
19667 	 * received the NOT READY condition in the first place. There are
19668 	 * three possible conditions here:
19669 	 *
19670 	 *  (1) The original IO is on un_retry_bp.
19671 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
19672 	 *	is NULL.
19673 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
19674 	 *	points to some other, unrelated bp.
19675 	 *
19676 	 * For each case, we must call sd_start_cmds() with un_retry_bp
19677 	 * as the argument. If un_retry_bp is NULL, this will initiate
19678 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
19679 	 * then this will process the bp on un_retry_bp. That may or may not
19680 	 * be the original IO, but that does not matter: the important thing
19681 	 * is to keep the IO processing going at this point.
19682 	 *
19683 	 * Note: This is a very specific error recovery sequence associated
19684 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
19685 	 * serialize the I/O with completion of the spin-up.
19686 	 */
19687 	mutex_enter(SD_MUTEX(un));
19688 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19689 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
19690 	    un, un->un_retry_bp);
19691 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
19692 	sd_start_cmds(un, un->un_retry_bp);
19693 	mutex_exit(SD_MUTEX(un));
19694 
19695 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
19696 }
19697 
19698 
19699 /*
19700  *    Function: sd_send_scsi_INQUIRY
19701  *
19702  * Description: Issue the scsi INQUIRY command.
19703  *
19704  *   Arguments: un
19705  *		bufaddr
19706  *		buflen
19707  *		evpd
19708  *		page_code
19709  *		page_length
19710  *
19711  * Return Code: 0   - Success
19712  *		errno return code from sd_send_scsi_cmd()
19713  *
19714  *     Context: Can sleep. Does not return until command is completed.
19715  */
19716 
19717 static int
19718 sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr, size_t buflen,
19719 	uchar_t evpd, uchar_t page_code, size_t *residp)
19720 {
19721 	union scsi_cdb		cdb;
19722 	struct uscsi_cmd	ucmd_buf;
19723 	int			status;
19724 
19725 	ASSERT(un != NULL);
19726 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19727 	ASSERT(bufaddr != NULL);
19728 
19729 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
19730 
19731 	bzero(&cdb, sizeof (cdb));
19732 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19733 	bzero(bufaddr, buflen);
19734 
19735 	cdb.scc_cmd = SCMD_INQUIRY;
19736 	cdb.cdb_opaque[1] = evpd;
19737 	cdb.cdb_opaque[2] = page_code;
19738 	FORMG0COUNT(&cdb, buflen);
19739 
19740 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19741 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19742 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19743 	ucmd_buf.uscsi_buflen	= buflen;
19744 	ucmd_buf.uscsi_rqbuf	= NULL;
19745 	ucmd_buf.uscsi_rqlen	= 0;
19746 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
19747 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
19748 
19749 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19750 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_DIRECT);
19751 
19752 	if ((status == 0) && (residp != NULL)) {
19753 		*residp = ucmd_buf.uscsi_resid;
19754 	}
19755 
19756 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
19757 
19758 	return (status);
19759 }
19760 
19761 
19762 /*
19763  *    Function: sd_send_scsi_TEST_UNIT_READY
19764  *
19765  * Description: Issue the scsi TEST UNIT READY command.
19766  *		This routine can be told to set the flag USCSI_DIAGNOSE to
19767  *		prevent retrying failed commands. Use this when the intent
19768  *		is either to check for device readiness, to clear a Unit
19769  *		Attention, or to clear any outstanding sense data.
19770  *		However under specific conditions the expected behavior
19771  *		is for retries to bring a device ready, so use the flag
19772  *		with caution.
19773  *
19774  *   Arguments: un
19775  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
19776  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
19777  *			0: dont check for media present, do retries on cmd.
19778  *
19779  * Return Code: 0   - Success
19780  *		EIO - IO error
19781  *		EACCES - Reservation conflict detected
19782  *		ENXIO  - Not Ready, medium not present
19783  *		errno return code from sd_send_scsi_cmd()
19784  *
19785  *     Context: Can sleep. Does not return until command is completed.
19786  */
19787 
19788 static int
19789 sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag)
19790 {
19791 	struct	scsi_extended_sense	sense_buf;
19792 	union scsi_cdb		cdb;
19793 	struct uscsi_cmd	ucmd_buf;
19794 	int			status;
19795 
19796 	ASSERT(un != NULL);
19797 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19798 
19799 	SD_TRACE(SD_LOG_IO, un,
19800 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
19801 
19802 	/*
19803 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
19804 	 * timeouts when they receive a TUR and the queue is not empty. Check
19805 	 * the configuration flag set during attach (indicating the drive has
19806 	 * this firmware bug) and un_ncmds_in_transport before issuing the
19807 	 * TUR. If there are
19808 	 * pending commands return success, this is a bit arbitrary but is ok
19809 	 * for non-removables (i.e. the eliteI disks) and non-clustering
19810 	 * configurations.
19811 	 */
19812 	if (un->un_f_cfg_tur_check == TRUE) {
19813 		mutex_enter(SD_MUTEX(un));
19814 		if (un->un_ncmds_in_transport != 0) {
19815 			mutex_exit(SD_MUTEX(un));
19816 			return (0);
19817 		}
19818 		mutex_exit(SD_MUTEX(un));
19819 	}
19820 
19821 	bzero(&cdb, sizeof (cdb));
19822 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19823 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19824 
19825 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
19826 
19827 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19828 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19829 	ucmd_buf.uscsi_bufaddr	= NULL;
19830 	ucmd_buf.uscsi_buflen	= 0;
19831 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19832 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19833 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19834 
19835 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
19836 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
19837 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
19838 	}
19839 	ucmd_buf.uscsi_timeout	= 60;
19840 
19841 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19842 	    UIO_SYSSPACE, UIO_SYSSPACE,
19843 	    ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT : SD_PATH_STANDARD));
19844 
19845 	switch (status) {
19846 	case 0:
19847 		break;	/* Success! */
19848 	case EIO:
19849 		switch (ucmd_buf.uscsi_status) {
19850 		case STATUS_RESERVATION_CONFLICT:
19851 			status = EACCES;
19852 			break;
19853 		case STATUS_CHECK:
19854 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
19855 				break;
19856 			}
19857 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19858 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
19859 				KEY_NOT_READY) &&
19860 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
19861 				status = ENXIO;
19862 			}
19863 			break;
19864 		default:
19865 			break;
19866 		}
19867 		break;
19868 	default:
19869 		break;
19870 	}
19871 
19872 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
19873 
19874 	return (status);
19875 }
19876 
19877 
19878 /*
19879  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
19880  *
19881  * Description: Issue the scsi PERSISTENT RESERVE IN command.
19882  *
19883  *   Arguments: un
19884  *
19885  * Return Code: 0   - Success
19886  *		EACCES
19887  *		ENOTSUP
19888  *		errno return code from sd_send_scsi_cmd()
19889  *
19890  *     Context: Can sleep. Does not return until command is completed.
19891  */
19892 
19893 static int
19894 sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un, uchar_t  usr_cmd,
19895 	uint16_t data_len, uchar_t *data_bufp)
19896 {
19897 	struct scsi_extended_sense	sense_buf;
19898 	union scsi_cdb		cdb;
19899 	struct uscsi_cmd	ucmd_buf;
19900 	int			status;
19901 	int			no_caller_buf = FALSE;
19902 
19903 	ASSERT(un != NULL);
19904 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19905 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
19906 
19907 	SD_TRACE(SD_LOG_IO, un,
19908 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
19909 
19910 	bzero(&cdb, sizeof (cdb));
19911 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19912 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19913 	if (data_bufp == NULL) {
19914 		/* Allocate a default buf if the caller did not give one */
19915 		ASSERT(data_len == 0);
19916 		data_len  = MHIOC_RESV_KEY_SIZE;
19917 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
19918 		no_caller_buf = TRUE;
19919 	}
19920 
19921 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
19922 	cdb.cdb_opaque[1] = usr_cmd;
19923 	FORMG1COUNT(&cdb, data_len);
19924 
19925 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19926 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19927 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
19928 	ucmd_buf.uscsi_buflen	= data_len;
19929 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19930 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19931 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19932 	ucmd_buf.uscsi_timeout	= 60;
19933 
19934 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19935 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
19936 
19937 	switch (status) {
19938 	case 0:
19939 		break;	/* Success! */
19940 	case EIO:
19941 		switch (ucmd_buf.uscsi_status) {
19942 		case STATUS_RESERVATION_CONFLICT:
19943 			status = EACCES;
19944 			break;
19945 		case STATUS_CHECK:
19946 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19947 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
19948 				KEY_ILLEGAL_REQUEST)) {
19949 				status = ENOTSUP;
19950 			}
19951 			break;
19952 		default:
19953 			break;
19954 		}
19955 		break;
19956 	default:
19957 		break;
19958 	}
19959 
19960 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
19961 
19962 	if (no_caller_buf == TRUE) {
19963 		kmem_free(data_bufp, data_len);
19964 	}
19965 
19966 	return (status);
19967 }
19968 
19969 
19970 /*
19971  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
19972  *
19973  * Description: This routine is the driver entry point for handling CD-ROM
19974  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
19975  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
19976  *		device.
19977  *
19978  *   Arguments: un  -   Pointer to soft state struct for the target.
19979  *		usr_cmd SCSI-3 reservation facility command (one of
19980  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
19981  *			SD_SCSI3_PREEMPTANDABORT)
19982  *		usr_bufp - user provided pointer register, reserve descriptor or
19983  *			preempt and abort structure (mhioc_register_t,
19984  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
19985  *
19986  * Return Code: 0   - Success
19987  *		EACCES
19988  *		ENOTSUP
19989  *		errno return code from sd_send_scsi_cmd()
19990  *
19991  *     Context: Can sleep. Does not return until command is completed.
19992  */
19993 
19994 static int
19995 sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un, uchar_t usr_cmd,
19996 	uchar_t	*usr_bufp)
19997 {
19998 	struct scsi_extended_sense	sense_buf;
19999 	union scsi_cdb		cdb;
20000 	struct uscsi_cmd	ucmd_buf;
20001 	int			status;
20002 	uchar_t			data_len = sizeof (sd_prout_t);
20003 	sd_prout_t		*prp;
20004 
20005 	ASSERT(un != NULL);
20006 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20007 	ASSERT(data_len == 24);	/* required by scsi spec */
20008 
20009 	SD_TRACE(SD_LOG_IO, un,
20010 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
20011 
20012 	if (usr_bufp == NULL) {
20013 		return (EINVAL);
20014 	}
20015 
20016 	bzero(&cdb, sizeof (cdb));
20017 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20018 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20019 	prp = kmem_zalloc(data_len, KM_SLEEP);
20020 
20021 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
20022 	cdb.cdb_opaque[1] = usr_cmd;
20023 	FORMG1COUNT(&cdb, data_len);
20024 
20025 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20026 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20027 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
20028 	ucmd_buf.uscsi_buflen	= data_len;
20029 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20030 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20031 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20032 	ucmd_buf.uscsi_timeout	= 60;
20033 
20034 	switch (usr_cmd) {
20035 	case SD_SCSI3_REGISTER: {
20036 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
20037 
20038 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20039 		bcopy(ptr->newkey.key, prp->service_key,
20040 		    MHIOC_RESV_KEY_SIZE);
20041 		prp->aptpl = ptr->aptpl;
20042 		break;
20043 	}
20044 	case SD_SCSI3_RESERVE:
20045 	case SD_SCSI3_RELEASE: {
20046 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
20047 
20048 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20049 		prp->scope_address = BE_32(ptr->scope_specific_addr);
20050 		cdb.cdb_opaque[2] = ptr->type;
20051 		break;
20052 	}
20053 	case SD_SCSI3_PREEMPTANDABORT: {
20054 		mhioc_preemptandabort_t *ptr =
20055 		    (mhioc_preemptandabort_t *)usr_bufp;
20056 
20057 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20058 		bcopy(ptr->victim_key.key, prp->service_key,
20059 		    MHIOC_RESV_KEY_SIZE);
20060 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
20061 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
20062 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
20063 		break;
20064 	}
20065 	case SD_SCSI3_REGISTERANDIGNOREKEY:
20066 	{
20067 		mhioc_registerandignorekey_t *ptr;
20068 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
20069 		bcopy(ptr->newkey.key,
20070 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
20071 		prp->aptpl = ptr->aptpl;
20072 		break;
20073 	}
20074 	default:
20075 		ASSERT(FALSE);
20076 		break;
20077 	}
20078 
20079 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20080 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20081 
20082 	switch (status) {
20083 	case 0:
20084 		break;	/* Success! */
20085 	case EIO:
20086 		switch (ucmd_buf.uscsi_status) {
20087 		case STATUS_RESERVATION_CONFLICT:
20088 			status = EACCES;
20089 			break;
20090 		case STATUS_CHECK:
20091 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20092 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20093 				KEY_ILLEGAL_REQUEST)) {
20094 				status = ENOTSUP;
20095 			}
20096 			break;
20097 		default:
20098 			break;
20099 		}
20100 		break;
20101 	default:
20102 		break;
20103 	}
20104 
20105 	kmem_free(prp, data_len);
20106 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
20107 	return (status);
20108 }
20109 
20110 
20111 /*
20112  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
20113  *
20114  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
20115  *
20116  *   Arguments: un - pointer to the target's soft state struct
20117  *
20118  * Return Code: 0 - success
20119  *		errno-type error code
20120  *
20121  *     Context: kernel thread context only.
20122  */
20123 
20124 static int
20125 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
20126 {
20127 	struct sd_uscsi_info	*uip;
20128 	struct uscsi_cmd	*uscmd;
20129 	union scsi_cdb		*cdb;
20130 	struct buf		*bp;
20131 	int			rval = 0;
20132 
20133 	SD_TRACE(SD_LOG_IO, un,
20134 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
20135 
20136 	ASSERT(un != NULL);
20137 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20138 
20139 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
20140 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
20141 
20142 	/*
20143 	 * First get some memory for the uscsi_cmd struct and cdb
20144 	 * and initialize for SYNCHRONIZE_CACHE cmd.
20145 	 */
20146 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
20147 	uscmd->uscsi_cdblen = CDB_GROUP1;
20148 	uscmd->uscsi_cdb = (caddr_t)cdb;
20149 	uscmd->uscsi_bufaddr = NULL;
20150 	uscmd->uscsi_buflen = 0;
20151 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
20152 	uscmd->uscsi_rqlen = SENSE_LENGTH;
20153 	uscmd->uscsi_rqresid = SENSE_LENGTH;
20154 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
20155 	uscmd->uscsi_timeout = sd_io_time;
20156 
20157 	/*
20158 	 * Allocate an sd_uscsi_info struct and fill it with the info
20159 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
20160 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
20161 	 * since we allocate the buf here in this function, we do not
20162 	 * need to preserve the prior contents of b_private.
20163 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
20164 	 */
20165 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
20166 	uip->ui_flags = SD_PATH_DIRECT;
20167 	uip->ui_cmdp  = uscmd;
20168 
20169 	bp = getrbuf(KM_SLEEP);
20170 	bp->b_private = uip;
20171 
20172 	/*
20173 	 * Setup buffer to carry uscsi request.
20174 	 */
20175 	bp->b_flags  = B_BUSY;
20176 	bp->b_bcount = 0;
20177 	bp->b_blkno  = 0;
20178 
20179 	if (dkc != NULL) {
20180 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
20181 		uip->ui_dkc = *dkc;
20182 	}
20183 
20184 	bp->b_edev = SD_GET_DEV(un);
20185 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
20186 
20187 	(void) sd_uscsi_strategy(bp);
20188 
20189 	/*
20190 	 * If synchronous request, wait for completion
20191 	 * If async just return and let b_iodone callback
20192 	 * cleanup.
20193 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
20194 	 * but it was also incremented in sd_uscsi_strategy(), so
20195 	 * we should be ok.
20196 	 */
20197 	if (dkc == NULL) {
20198 		(void) biowait(bp);
20199 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
20200 	}
20201 
20202 	return (rval);
20203 }
20204 
20205 
20206 static int
20207 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
20208 {
20209 	struct sd_uscsi_info *uip;
20210 	struct uscsi_cmd *uscmd;
20211 	uint8_t *sense_buf;
20212 	struct sd_lun *un;
20213 	int status;
20214 
20215 	uip = (struct sd_uscsi_info *)(bp->b_private);
20216 	ASSERT(uip != NULL);
20217 
20218 	uscmd = uip->ui_cmdp;
20219 	ASSERT(uscmd != NULL);
20220 
20221 	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
20222 	ASSERT(sense_buf != NULL);
20223 
20224 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
20225 	ASSERT(un != NULL);
20226 
20227 	status = geterror(bp);
20228 	switch (status) {
20229 	case 0:
20230 		break;	/* Success! */
20231 	case EIO:
20232 		switch (uscmd->uscsi_status) {
20233 		case STATUS_RESERVATION_CONFLICT:
20234 			/* Ignore reservation conflict */
20235 			status = 0;
20236 			goto done;
20237 
20238 		case STATUS_CHECK:
20239 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
20240 			    (scsi_sense_key(sense_buf) ==
20241 				KEY_ILLEGAL_REQUEST)) {
20242 				/* Ignore Illegal Request error */
20243 				mutex_enter(SD_MUTEX(un));
20244 				un->un_f_sync_cache_supported = FALSE;
20245 				mutex_exit(SD_MUTEX(un));
20246 				status = ENOTSUP;
20247 				goto done;
20248 			}
20249 			break;
20250 		default:
20251 			break;
20252 		}
20253 		/* FALLTHRU */
20254 	default:
20255 		/* Ignore error if the media is not present */
20256 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
20257 			status = 0;
20258 			goto done;
20259 		}
20260 		/* If we reach this, we had an error */
20261 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
20262 		    "SYNCHRONIZE CACHE command failed (%d)\n", status);
20263 		break;
20264 	}
20265 
20266 done:
20267 	if (uip->ui_dkc.dkc_callback != NULL) {
20268 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
20269 	}
20270 
20271 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
20272 	freerbuf(bp);
20273 	kmem_free(uip, sizeof (struct sd_uscsi_info));
20274 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
20275 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
20276 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
20277 
20278 	return (status);
20279 }
20280 
20281 
20282 /*
20283  *    Function: sd_send_scsi_GET_CONFIGURATION
20284  *
20285  * Description: Issues the get configuration command to the device.
20286  *		Called from sd_check_for_writable_cd & sd_get_media_info
20287  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
20288  *   Arguments: un
20289  *		ucmdbuf
20290  *		rqbuf
20291  *		rqbuflen
20292  *		bufaddr
20293  *		buflen
20294  *
20295  * Return Code: 0   - Success
20296  *		errno return code from sd_send_scsi_cmd()
20297  *
20298  *     Context: Can sleep. Does not return until command is completed.
20299  *
20300  */
20301 
20302 static int
20303 sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un, struct uscsi_cmd *ucmdbuf,
20304 	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen)
20305 {
20306 	char	cdb[CDB_GROUP1];
20307 	int	status;
20308 
20309 	ASSERT(un != NULL);
20310 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20311 	ASSERT(bufaddr != NULL);
20312 	ASSERT(ucmdbuf != NULL);
20313 	ASSERT(rqbuf != NULL);
20314 
20315 	SD_TRACE(SD_LOG_IO, un,
20316 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
20317 
20318 	bzero(cdb, sizeof (cdb));
20319 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20320 	bzero(rqbuf, rqbuflen);
20321 	bzero(bufaddr, buflen);
20322 
20323 	/*
20324 	 * Set up cdb field for the get configuration command.
20325 	 */
20326 	cdb[0] = SCMD_GET_CONFIGURATION;
20327 	cdb[1] = 0x02;  /* Requested Type */
20328 	cdb[8] = SD_PROFILE_HEADER_LEN;
20329 	ucmdbuf->uscsi_cdb = cdb;
20330 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20331 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20332 	ucmdbuf->uscsi_buflen = buflen;
20333 	ucmdbuf->uscsi_timeout = sd_io_time;
20334 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20335 	ucmdbuf->uscsi_rqlen = rqbuflen;
20336 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20337 
20338 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20339 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20340 
20341 	switch (status) {
20342 	case 0:
20343 		break;  /* Success! */
20344 	case EIO:
20345 		switch (ucmdbuf->uscsi_status) {
20346 		case STATUS_RESERVATION_CONFLICT:
20347 			status = EACCES;
20348 			break;
20349 		default:
20350 			break;
20351 		}
20352 		break;
20353 	default:
20354 		break;
20355 	}
20356 
20357 	if (status == 0) {
20358 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20359 		    "sd_send_scsi_GET_CONFIGURATION: data",
20360 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20361 	}
20362 
20363 	SD_TRACE(SD_LOG_IO, un,
20364 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
20365 
20366 	return (status);
20367 }
20368 
20369 /*
20370  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
20371  *
20372  * Description: Issues the get configuration command to the device to
20373  *              retrieve a specfic feature. Called from
20374  *		sd_check_for_writable_cd & sd_set_mmc_caps.
20375  *   Arguments: un
20376  *              ucmdbuf
20377  *              rqbuf
20378  *              rqbuflen
20379  *              bufaddr
20380  *              buflen
20381  *		feature
20382  *
20383  * Return Code: 0   - Success
20384  *              errno return code from sd_send_scsi_cmd()
20385  *
20386  *     Context: Can sleep. Does not return until command is completed.
20387  *
20388  */
20389 static int
20390 sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
20391 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
20392 	uchar_t *bufaddr, uint_t buflen, char feature)
20393 {
20394 	char    cdb[CDB_GROUP1];
20395 	int	status;
20396 
20397 	ASSERT(un != NULL);
20398 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20399 	ASSERT(bufaddr != NULL);
20400 	ASSERT(ucmdbuf != NULL);
20401 	ASSERT(rqbuf != NULL);
20402 
20403 	SD_TRACE(SD_LOG_IO, un,
20404 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
20405 
20406 	bzero(cdb, sizeof (cdb));
20407 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20408 	bzero(rqbuf, rqbuflen);
20409 	bzero(bufaddr, buflen);
20410 
20411 	/*
20412 	 * Set up cdb field for the get configuration command.
20413 	 */
20414 	cdb[0] = SCMD_GET_CONFIGURATION;
20415 	cdb[1] = 0x02;  /* Requested Type */
20416 	cdb[3] = feature;
20417 	cdb[8] = buflen;
20418 	ucmdbuf->uscsi_cdb = cdb;
20419 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20420 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20421 	ucmdbuf->uscsi_buflen = buflen;
20422 	ucmdbuf->uscsi_timeout = sd_io_time;
20423 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20424 	ucmdbuf->uscsi_rqlen = rqbuflen;
20425 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20426 
20427 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20428 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20429 
20430 	switch (status) {
20431 	case 0:
20432 		break;  /* Success! */
20433 	case EIO:
20434 		switch (ucmdbuf->uscsi_status) {
20435 		case STATUS_RESERVATION_CONFLICT:
20436 			status = EACCES;
20437 			break;
20438 		default:
20439 			break;
20440 		}
20441 		break;
20442 	default:
20443 		break;
20444 	}
20445 
20446 	if (status == 0) {
20447 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20448 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
20449 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20450 	}
20451 
20452 	SD_TRACE(SD_LOG_IO, un,
20453 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
20454 
20455 	return (status);
20456 }
20457 
20458 
20459 /*
20460  *    Function: sd_send_scsi_MODE_SENSE
20461  *
20462  * Description: Utility function for issuing a scsi MODE SENSE command.
20463  *		Note: This routine uses a consistent implementation for Group0,
20464  *		Group1, and Group2 commands across all platforms. ATAPI devices
20465  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20466  *
20467  *   Arguments: un - pointer to the softstate struct for the target.
20468  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20469  *			  CDB_GROUP[1|2] (10 byte).
20470  *		bufaddr - buffer for page data retrieved from the target.
20471  *		buflen - size of page to be retrieved.
20472  *		page_code - page code of data to be retrieved from the target.
20473  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20474  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20475  *			to use the USCSI "direct" chain and bypass the normal
20476  *			command waitq.
20477  *
20478  * Return Code: 0   - Success
20479  *		errno return code from sd_send_scsi_cmd()
20480  *
20481  *     Context: Can sleep. Does not return until command is completed.
20482  */
20483 
20484 static int
20485 sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20486 	size_t buflen,  uchar_t page_code, int path_flag)
20487 {
20488 	struct	scsi_extended_sense	sense_buf;
20489 	union scsi_cdb		cdb;
20490 	struct uscsi_cmd	ucmd_buf;
20491 	int			status;
20492 	int			headlen;
20493 
20494 	ASSERT(un != NULL);
20495 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20496 	ASSERT(bufaddr != NULL);
20497 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20498 	    (cdbsize == CDB_GROUP2));
20499 
20500 	SD_TRACE(SD_LOG_IO, un,
20501 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
20502 
20503 	bzero(&cdb, sizeof (cdb));
20504 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20505 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20506 	bzero(bufaddr, buflen);
20507 
20508 	if (cdbsize == CDB_GROUP0) {
20509 		cdb.scc_cmd = SCMD_MODE_SENSE;
20510 		cdb.cdb_opaque[2] = page_code;
20511 		FORMG0COUNT(&cdb, buflen);
20512 		headlen = MODE_HEADER_LENGTH;
20513 	} else {
20514 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
20515 		cdb.cdb_opaque[2] = page_code;
20516 		FORMG1COUNT(&cdb, buflen);
20517 		headlen = MODE_HEADER_LENGTH_GRP2;
20518 	}
20519 
20520 	ASSERT(headlen <= buflen);
20521 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20522 
20523 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20524 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20525 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20526 	ucmd_buf.uscsi_buflen	= buflen;
20527 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20528 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20529 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20530 	ucmd_buf.uscsi_timeout	= 60;
20531 
20532 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20533 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20534 
20535 	switch (status) {
20536 	case 0:
20537 		/*
20538 		 * sr_check_wp() uses 0x3f page code and check the header of
20539 		 * mode page to determine if target device is write-protected.
20540 		 * But some USB devices return 0 bytes for 0x3f page code. For
20541 		 * this case, make sure that mode page header is returned at
20542 		 * least.
20543 		 */
20544 		if (buflen - ucmd_buf.uscsi_resid <  headlen)
20545 			status = EIO;
20546 		break;	/* Success! */
20547 	case EIO:
20548 		switch (ucmd_buf.uscsi_status) {
20549 		case STATUS_RESERVATION_CONFLICT:
20550 			status = EACCES;
20551 			break;
20552 		default:
20553 			break;
20554 		}
20555 		break;
20556 	default:
20557 		break;
20558 	}
20559 
20560 	if (status == 0) {
20561 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
20562 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20563 	}
20564 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
20565 
20566 	return (status);
20567 }
20568 
20569 
20570 /*
20571  *    Function: sd_send_scsi_MODE_SELECT
20572  *
20573  * Description: Utility function for issuing a scsi MODE SELECT command.
20574  *		Note: This routine uses a consistent implementation for Group0,
20575  *		Group1, and Group2 commands across all platforms. ATAPI devices
20576  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20577  *
20578  *   Arguments: un - pointer to the softstate struct for the target.
20579  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20580  *			  CDB_GROUP[1|2] (10 byte).
20581  *		bufaddr - buffer for page data retrieved from the target.
20582  *		buflen - size of page to be retrieved.
20583  *		save_page - boolean to determin if SP bit should be set.
20584  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20585  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20586  *			to use the USCSI "direct" chain and bypass the normal
20587  *			command waitq.
20588  *
20589  * Return Code: 0   - Success
20590  *		errno return code from sd_send_scsi_cmd()
20591  *
20592  *     Context: Can sleep. Does not return until command is completed.
20593  */
20594 
20595 static int
20596 sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20597 	size_t buflen,  uchar_t save_page, int path_flag)
20598 {
20599 	struct	scsi_extended_sense	sense_buf;
20600 	union scsi_cdb		cdb;
20601 	struct uscsi_cmd	ucmd_buf;
20602 	int			status;
20603 
20604 	ASSERT(un != NULL);
20605 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20606 	ASSERT(bufaddr != NULL);
20607 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20608 	    (cdbsize == CDB_GROUP2));
20609 
20610 	SD_TRACE(SD_LOG_IO, un,
20611 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
20612 
20613 	bzero(&cdb, sizeof (cdb));
20614 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20615 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20616 
20617 	/* Set the PF bit for many third party drives */
20618 	cdb.cdb_opaque[1] = 0x10;
20619 
20620 	/* Set the savepage(SP) bit if given */
20621 	if (save_page == SD_SAVE_PAGE) {
20622 		cdb.cdb_opaque[1] |= 0x01;
20623 	}
20624 
20625 	if (cdbsize == CDB_GROUP0) {
20626 		cdb.scc_cmd = SCMD_MODE_SELECT;
20627 		FORMG0COUNT(&cdb, buflen);
20628 	} else {
20629 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
20630 		FORMG1COUNT(&cdb, buflen);
20631 	}
20632 
20633 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20634 
20635 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20636 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20637 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20638 	ucmd_buf.uscsi_buflen	= buflen;
20639 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20640 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20641 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20642 	ucmd_buf.uscsi_timeout	= 60;
20643 
20644 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20645 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20646 
20647 	switch (status) {
20648 	case 0:
20649 		break;	/* Success! */
20650 	case EIO:
20651 		switch (ucmd_buf.uscsi_status) {
20652 		case STATUS_RESERVATION_CONFLICT:
20653 			status = EACCES;
20654 			break;
20655 		default:
20656 			break;
20657 		}
20658 		break;
20659 	default:
20660 		break;
20661 	}
20662 
20663 	if (status == 0) {
20664 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
20665 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20666 	}
20667 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
20668 
20669 	return (status);
20670 }
20671 
20672 
20673 /*
20674  *    Function: sd_send_scsi_RDWR
20675  *
20676  * Description: Issue a scsi READ or WRITE command with the given parameters.
20677  *
20678  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20679  *		cmd:	 SCMD_READ or SCMD_WRITE
20680  *		bufaddr: Address of caller's buffer to receive the RDWR data
20681  *		buflen:  Length of caller's buffer receive the RDWR data.
20682  *		start_block: Block number for the start of the RDWR operation.
20683  *			 (Assumes target-native block size.)
20684  *		residp:  Pointer to variable to receive the redisual of the
20685  *			 RDWR operation (may be NULL of no residual requested).
20686  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20687  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20688  *			to use the USCSI "direct" chain and bypass the normal
20689  *			command waitq.
20690  *
20691  * Return Code: 0   - Success
20692  *		errno return code from sd_send_scsi_cmd()
20693  *
20694  *     Context: Can sleep. Does not return until command is completed.
20695  */
20696 
20697 static int
20698 sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
20699 	size_t buflen, daddr_t start_block, int path_flag)
20700 {
20701 	struct	scsi_extended_sense	sense_buf;
20702 	union scsi_cdb		cdb;
20703 	struct uscsi_cmd	ucmd_buf;
20704 	uint32_t		block_count;
20705 	int			status;
20706 	int			cdbsize;
20707 	uchar_t			flag;
20708 
20709 	ASSERT(un != NULL);
20710 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20711 	ASSERT(bufaddr != NULL);
20712 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
20713 
20714 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
20715 
20716 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
20717 		return (EINVAL);
20718 	}
20719 
20720 	mutex_enter(SD_MUTEX(un));
20721 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
20722 	mutex_exit(SD_MUTEX(un));
20723 
20724 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
20725 
20726 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
20727 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
20728 	    bufaddr, buflen, start_block, block_count);
20729 
20730 	bzero(&cdb, sizeof (cdb));
20731 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20732 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20733 
20734 	/* Compute CDB size to use */
20735 	if (start_block > 0xffffffff)
20736 		cdbsize = CDB_GROUP4;
20737 	else if ((start_block & 0xFFE00000) ||
20738 	    (un->un_f_cfg_is_atapi == TRUE))
20739 		cdbsize = CDB_GROUP1;
20740 	else
20741 		cdbsize = CDB_GROUP0;
20742 
20743 	switch (cdbsize) {
20744 	case CDB_GROUP0:	/* 6-byte CDBs */
20745 		cdb.scc_cmd = cmd;
20746 		FORMG0ADDR(&cdb, start_block);
20747 		FORMG0COUNT(&cdb, block_count);
20748 		break;
20749 	case CDB_GROUP1:	/* 10-byte CDBs */
20750 		cdb.scc_cmd = cmd | SCMD_GROUP1;
20751 		FORMG1ADDR(&cdb, start_block);
20752 		FORMG1COUNT(&cdb, block_count);
20753 		break;
20754 	case CDB_GROUP4:	/* 16-byte CDBs */
20755 		cdb.scc_cmd = cmd | SCMD_GROUP4;
20756 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
20757 		FORMG4COUNT(&cdb, block_count);
20758 		break;
20759 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
20760 	default:
20761 		/* All others reserved */
20762 		return (EINVAL);
20763 	}
20764 
20765 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
20766 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20767 
20768 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20769 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20770 	ucmd_buf.uscsi_bufaddr	= bufaddr;
20771 	ucmd_buf.uscsi_buflen	= buflen;
20772 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20773 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20774 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
20775 	ucmd_buf.uscsi_timeout	= 60;
20776 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20777 				UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20778 	switch (status) {
20779 	case 0:
20780 		break;	/* Success! */
20781 	case EIO:
20782 		switch (ucmd_buf.uscsi_status) {
20783 		case STATUS_RESERVATION_CONFLICT:
20784 			status = EACCES;
20785 			break;
20786 		default:
20787 			break;
20788 		}
20789 		break;
20790 	default:
20791 		break;
20792 	}
20793 
20794 	if (status == 0) {
20795 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
20796 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20797 	}
20798 
20799 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
20800 
20801 	return (status);
20802 }
20803 
20804 
20805 /*
20806  *    Function: sd_send_scsi_LOG_SENSE
20807  *
20808  * Description: Issue a scsi LOG_SENSE command with the given parameters.
20809  *
20810  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20811  *
20812  * Return Code: 0   - Success
20813  *		errno return code from sd_send_scsi_cmd()
20814  *
20815  *     Context: Can sleep. Does not return until command is completed.
20816  */
20817 
20818 static int
20819 sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr, uint16_t buflen,
20820 	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
20821 	int path_flag)
20822 
20823 {
20824 	struct	scsi_extended_sense	sense_buf;
20825 	union scsi_cdb		cdb;
20826 	struct uscsi_cmd	ucmd_buf;
20827 	int			status;
20828 
20829 	ASSERT(un != NULL);
20830 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20831 
20832 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
20833 
20834 	bzero(&cdb, sizeof (cdb));
20835 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20836 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20837 
20838 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
20839 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
20840 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
20841 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
20842 	FORMG1COUNT(&cdb, buflen);
20843 
20844 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20845 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20846 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20847 	ucmd_buf.uscsi_buflen	= buflen;
20848 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20849 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20850 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20851 	ucmd_buf.uscsi_timeout	= 60;
20852 
20853 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20854 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20855 
20856 	switch (status) {
20857 	case 0:
20858 		break;
20859 	case EIO:
20860 		switch (ucmd_buf.uscsi_status) {
20861 		case STATUS_RESERVATION_CONFLICT:
20862 			status = EACCES;
20863 			break;
20864 		case STATUS_CHECK:
20865 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20866 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20867 				KEY_ILLEGAL_REQUEST) &&
20868 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
20869 				/*
20870 				 * ASC 0x24: INVALID FIELD IN CDB
20871 				 */
20872 				switch (page_code) {
20873 				case START_STOP_CYCLE_PAGE:
20874 					/*
20875 					 * The start stop cycle counter is
20876 					 * implemented as page 0x31 in earlier
20877 					 * generation disks. In new generation
20878 					 * disks the start stop cycle counter is
20879 					 * implemented as page 0xE. To properly
20880 					 * handle this case if an attempt for
20881 					 * log page 0xE is made and fails we
20882 					 * will try again using page 0x31.
20883 					 *
20884 					 * Network storage BU committed to
20885 					 * maintain the page 0x31 for this
20886 					 * purpose and will not have any other
20887 					 * page implemented with page code 0x31
20888 					 * until all disks transition to the
20889 					 * standard page.
20890 					 */
20891 					mutex_enter(SD_MUTEX(un));
20892 					un->un_start_stop_cycle_page =
20893 					    START_STOP_CYCLE_VU_PAGE;
20894 					cdb.cdb_opaque[2] =
20895 					    (char)(page_control << 6) |
20896 					    un->un_start_stop_cycle_page;
20897 					mutex_exit(SD_MUTEX(un));
20898 					status = sd_send_scsi_cmd(
20899 					    SD_GET_DEV(un), &ucmd_buf,
20900 					    UIO_SYSSPACE, UIO_SYSSPACE,
20901 					    UIO_SYSSPACE, path_flag);
20902 
20903 					break;
20904 				case TEMPERATURE_PAGE:
20905 					status = ENOTTY;
20906 					break;
20907 				default:
20908 					break;
20909 				}
20910 			}
20911 			break;
20912 		default:
20913 			break;
20914 		}
20915 		break;
20916 	default:
20917 		break;
20918 	}
20919 
20920 	if (status == 0) {
20921 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
20922 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20923 	}
20924 
20925 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
20926 
20927 	return (status);
20928 }
20929 
20930 
20931 /*
20932  *    Function: sdioctl
20933  *
20934  * Description: Driver's ioctl(9e) entry point function.
20935  *
20936  *   Arguments: dev     - device number
20937  *		cmd     - ioctl operation to be performed
20938  *		arg     - user argument, contains data to be set or reference
20939  *			  parameter for get
20940  *		flag    - bit flag, indicating open settings, 32/64 bit type
20941  *		cred_p  - user credential pointer
20942  *		rval_p  - calling process return value (OPT)
20943  *
20944  * Return Code: EINVAL
20945  *		ENOTTY
20946  *		ENXIO
20947  *		EIO
20948  *		EFAULT
20949  *		ENOTSUP
20950  *		EPERM
20951  *
20952  *     Context: Called from the device switch at normal priority.
20953  */
20954 
20955 static int
20956 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
20957 {
20958 	struct sd_lun	*un = NULL;
20959 	int		geom_validated = FALSE;
20960 	int		err = 0;
20961 	int		i = 0;
20962 	cred_t		*cr;
20963 
20964 	/*
20965 	 * All device accesses go thru sdstrategy where we check on suspend
20966 	 * status
20967 	 */
20968 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20969 		return (ENXIO);
20970 	}
20971 
20972 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20973 
20974 	/*
20975 	 * Moved this wait from sd_uscsi_strategy to here for
20976 	 * reasons of deadlock prevention. Internal driver commands,
20977 	 * specifically those to change a devices power level, result
20978 	 * in a call to sd_uscsi_strategy.
20979 	 */
20980 	mutex_enter(SD_MUTEX(un));
20981 	while ((un->un_state == SD_STATE_SUSPENDED) ||
20982 	    (un->un_state == SD_STATE_PM_CHANGING)) {
20983 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
20984 	}
20985 	/*
20986 	 * Twiddling the counter here protects commands from now
20987 	 * through to the top of sd_uscsi_strategy. Without the
20988 	 * counter inc. a power down, for example, could get in
20989 	 * after the above check for state is made and before
20990 	 * execution gets to the top of sd_uscsi_strategy.
20991 	 * That would cause problems.
20992 	 */
20993 	un->un_ncmds_in_driver++;
20994 
20995 	if ((un->un_f_geometry_is_valid == FALSE) &&
20996 	    (flag & (FNDELAY | FNONBLOCK))) {
20997 		switch (cmd) {
20998 		case CDROMPAUSE:
20999 		case CDROMRESUME:
21000 		case CDROMPLAYMSF:
21001 		case CDROMPLAYTRKIND:
21002 		case CDROMREADTOCHDR:
21003 		case CDROMREADTOCENTRY:
21004 		case CDROMSTOP:
21005 		case CDROMSTART:
21006 		case CDROMVOLCTRL:
21007 		case CDROMSUBCHNL:
21008 		case CDROMREADMODE2:
21009 		case CDROMREADMODE1:
21010 		case CDROMREADOFFSET:
21011 		case CDROMSBLKMODE:
21012 		case CDROMGBLKMODE:
21013 		case CDROMGDRVSPEED:
21014 		case CDROMSDRVSPEED:
21015 		case CDROMCDDA:
21016 		case CDROMCDXA:
21017 		case CDROMSUBCODE:
21018 			if (!ISCD(un)) {
21019 				un->un_ncmds_in_driver--;
21020 				ASSERT(un->un_ncmds_in_driver >= 0);
21021 				mutex_exit(SD_MUTEX(un));
21022 				return (ENOTTY);
21023 			}
21024 			break;
21025 		case FDEJECT:
21026 		case DKIOCEJECT:
21027 		case CDROMEJECT:
21028 			if (!un->un_f_eject_media_supported) {
21029 				un->un_ncmds_in_driver--;
21030 				ASSERT(un->un_ncmds_in_driver >= 0);
21031 				mutex_exit(SD_MUTEX(un));
21032 				return (ENOTTY);
21033 			}
21034 			break;
21035 		case DKIOCSVTOC:
21036 		case DKIOCSETEFI:
21037 		case DKIOCSMBOOT:
21038 		case DKIOCFLUSHWRITECACHE:
21039 			mutex_exit(SD_MUTEX(un));
21040 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
21041 			if (err != 0) {
21042 				mutex_enter(SD_MUTEX(un));
21043 				un->un_ncmds_in_driver--;
21044 				ASSERT(un->un_ncmds_in_driver >= 0);
21045 				mutex_exit(SD_MUTEX(un));
21046 				return (EIO);
21047 			}
21048 			mutex_enter(SD_MUTEX(un));
21049 			/* FALLTHROUGH */
21050 		case DKIOCREMOVABLE:
21051 		case DKIOCHOTPLUGGABLE:
21052 		case DKIOCINFO:
21053 		case DKIOCGMEDIAINFO:
21054 		case MHIOCENFAILFAST:
21055 		case MHIOCSTATUS:
21056 		case MHIOCTKOWN:
21057 		case MHIOCRELEASE:
21058 		case MHIOCGRP_INKEYS:
21059 		case MHIOCGRP_INRESV:
21060 		case MHIOCGRP_REGISTER:
21061 		case MHIOCGRP_RESERVE:
21062 		case MHIOCGRP_PREEMPTANDABORT:
21063 		case MHIOCGRP_REGISTERANDIGNOREKEY:
21064 		case CDROMCLOSETRAY:
21065 		case USCSICMD:
21066 			goto skip_ready_valid;
21067 		default:
21068 			break;
21069 		}
21070 
21071 		mutex_exit(SD_MUTEX(un));
21072 		err = sd_ready_and_valid(un);
21073 		mutex_enter(SD_MUTEX(un));
21074 		if (err == SD_READY_NOT_VALID) {
21075 			switch (cmd) {
21076 			case DKIOCGAPART:
21077 			case DKIOCGGEOM:
21078 			case DKIOCSGEOM:
21079 			case DKIOCGVTOC:
21080 			case DKIOCSVTOC:
21081 			case DKIOCSAPART:
21082 			case DKIOCG_PHYGEOM:
21083 			case DKIOCG_VIRTGEOM:
21084 				err = ENOTSUP;
21085 				un->un_ncmds_in_driver--;
21086 				ASSERT(un->un_ncmds_in_driver >= 0);
21087 				mutex_exit(SD_MUTEX(un));
21088 				return (err);
21089 			}
21090 		}
21091 		if (err != SD_READY_VALID) {
21092 			switch (cmd) {
21093 			case DKIOCSTATE:
21094 			case CDROMGDRVSPEED:
21095 			case CDROMSDRVSPEED:
21096 			case FDEJECT:	/* for eject command */
21097 			case DKIOCEJECT:
21098 			case CDROMEJECT:
21099 			case DKIOCGETEFI:
21100 			case DKIOCSGEOM:
21101 			case DKIOCREMOVABLE:
21102 			case DKIOCHOTPLUGGABLE:
21103 			case DKIOCSAPART:
21104 			case DKIOCSETEFI:
21105 				break;
21106 			default:
21107 				if (un->un_f_has_removable_media) {
21108 					err = ENXIO;
21109 				} else {
21110 					/* Do not map EACCES to EIO */
21111 					if (err != EACCES)
21112 						err = EIO;
21113 				}
21114 				un->un_ncmds_in_driver--;
21115 				ASSERT(un->un_ncmds_in_driver >= 0);
21116 				mutex_exit(SD_MUTEX(un));
21117 				return (err);
21118 			}
21119 		}
21120 		geom_validated = TRUE;
21121 	}
21122 	if ((un->un_f_geometry_is_valid == TRUE) &&
21123 	    (un->un_solaris_size > 0)) {
21124 		/*
21125 		 * the "geometry_is_valid" flag could be true if we
21126 		 * have an fdisk table but no Solaris partition
21127 		 */
21128 		if (un->un_vtoc.v_sanity != VTOC_SANE) {
21129 			/* it is EFI, so return ENOTSUP for these */
21130 			switch (cmd) {
21131 			case DKIOCGAPART:
21132 			case DKIOCGGEOM:
21133 			case DKIOCGVTOC:
21134 			case DKIOCSVTOC:
21135 			case DKIOCSAPART:
21136 				err = ENOTSUP;
21137 				un->un_ncmds_in_driver--;
21138 				ASSERT(un->un_ncmds_in_driver >= 0);
21139 				mutex_exit(SD_MUTEX(un));
21140 				return (err);
21141 			}
21142 		}
21143 	}
21144 
21145 skip_ready_valid:
21146 	mutex_exit(SD_MUTEX(un));
21147 
21148 	switch (cmd) {
21149 	case DKIOCINFO:
21150 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
21151 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
21152 		break;
21153 
21154 	case DKIOCGMEDIAINFO:
21155 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
21156 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
21157 		break;
21158 
21159 	case DKIOCGGEOM:
21160 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGGEOM\n");
21161 		err = sd_dkio_get_geometry(dev, (caddr_t)arg, flag,
21162 		    geom_validated);
21163 		break;
21164 
21165 	case DKIOCSGEOM:
21166 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSGEOM\n");
21167 		err = sd_dkio_set_geometry(dev, (caddr_t)arg, flag);
21168 		break;
21169 
21170 	case DKIOCGAPART:
21171 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGAPART\n");
21172 		err = sd_dkio_get_partition(dev, (caddr_t)arg, flag,
21173 		    geom_validated);
21174 		break;
21175 
21176 	case DKIOCSAPART:
21177 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSAPART\n");
21178 		err = sd_dkio_set_partition(dev, (caddr_t)arg, flag);
21179 		break;
21180 
21181 	case DKIOCGVTOC:
21182 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGVTOC\n");
21183 		err = sd_dkio_get_vtoc(dev, (caddr_t)arg, flag,
21184 		    geom_validated);
21185 		break;
21186 
21187 	case DKIOCGETEFI:
21188 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGETEFI\n");
21189 		err = sd_dkio_get_efi(dev, (caddr_t)arg, flag);
21190 		break;
21191 
21192 	case DKIOCPARTITION:
21193 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTITION\n");
21194 		err = sd_dkio_partition(dev, (caddr_t)arg, flag);
21195 		break;
21196 
21197 	case DKIOCSVTOC:
21198 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSVTOC\n");
21199 		err = sd_dkio_set_vtoc(dev, (caddr_t)arg, flag);
21200 		break;
21201 
21202 	case DKIOCSETEFI:
21203 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSETEFI\n");
21204 		err = sd_dkio_set_efi(dev, (caddr_t)arg, flag);
21205 		break;
21206 
21207 	case DKIOCGMBOOT:
21208 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMBOOT\n");
21209 		err = sd_dkio_get_mboot(dev, (caddr_t)arg, flag);
21210 		break;
21211 
21212 	case DKIOCSMBOOT:
21213 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSMBOOT\n");
21214 		err = sd_dkio_set_mboot(dev, (caddr_t)arg, flag);
21215 		break;
21216 
21217 	case DKIOCLOCK:
21218 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
21219 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
21220 		    SD_PATH_STANDARD);
21221 		break;
21222 
21223 	case DKIOCUNLOCK:
21224 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
21225 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
21226 		    SD_PATH_STANDARD);
21227 		break;
21228 
21229 	case DKIOCSTATE: {
21230 		enum dkio_state		state;
21231 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
21232 
21233 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
21234 			err = EFAULT;
21235 		} else {
21236 			err = sd_check_media(dev, state);
21237 			if (err == 0) {
21238 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
21239 				    sizeof (int), flag) != 0)
21240 					err = EFAULT;
21241 			}
21242 		}
21243 		break;
21244 	}
21245 
21246 	case DKIOCREMOVABLE:
21247 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
21248 		/*
21249 		 * At present, vold only does automount for removable-media
21250 		 * devices, in order not to break current applications, we
21251 		 * still let hopluggable devices pretend to be removable media
21252 		 * devices for vold. In the near future, once vold is EOL'ed,
21253 		 * we should remove this workaround.
21254 		 */
21255 		if (un->un_f_has_removable_media || un->un_f_is_hotpluggable) {
21256 			i = 1;
21257 		} else {
21258 			i = 0;
21259 		}
21260 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21261 			err = EFAULT;
21262 		} else {
21263 			err = 0;
21264 		}
21265 		break;
21266 
21267 	case DKIOCHOTPLUGGABLE:
21268 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
21269 		if (un->un_f_is_hotpluggable) {
21270 			i = 1;
21271 		} else {
21272 			i = 0;
21273 		}
21274 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21275 			err = EFAULT;
21276 		} else {
21277 			err = 0;
21278 		}
21279 		break;
21280 
21281 	case DKIOCGTEMPERATURE:
21282 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
21283 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
21284 		break;
21285 
21286 	case MHIOCENFAILFAST:
21287 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
21288 		if ((err = drv_priv(cred_p)) == 0) {
21289 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
21290 		}
21291 		break;
21292 
21293 	case MHIOCTKOWN:
21294 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
21295 		if ((err = drv_priv(cred_p)) == 0) {
21296 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
21297 		}
21298 		break;
21299 
21300 	case MHIOCRELEASE:
21301 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
21302 		if ((err = drv_priv(cred_p)) == 0) {
21303 			err = sd_mhdioc_release(dev);
21304 		}
21305 		break;
21306 
21307 	case MHIOCSTATUS:
21308 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
21309 		if ((err = drv_priv(cred_p)) == 0) {
21310 			switch (sd_send_scsi_TEST_UNIT_READY(un, 0)) {
21311 			case 0:
21312 				err = 0;
21313 				break;
21314 			case EACCES:
21315 				*rval_p = 1;
21316 				err = 0;
21317 				break;
21318 			default:
21319 				err = EIO;
21320 				break;
21321 			}
21322 		}
21323 		break;
21324 
21325 	case MHIOCQRESERVE:
21326 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
21327 		if ((err = drv_priv(cred_p)) == 0) {
21328 			err = sd_reserve_release(dev, SD_RESERVE);
21329 		}
21330 		break;
21331 
21332 	case MHIOCREREGISTERDEVID:
21333 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
21334 		if (drv_priv(cred_p) == EPERM) {
21335 			err = EPERM;
21336 		} else if (!un->un_f_devid_supported) {
21337 			err = ENOTTY;
21338 		} else {
21339 			err = sd_mhdioc_register_devid(dev);
21340 		}
21341 		break;
21342 
21343 	case MHIOCGRP_INKEYS:
21344 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
21345 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21346 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21347 				err = ENOTSUP;
21348 			} else {
21349 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
21350 				    flag);
21351 			}
21352 		}
21353 		break;
21354 
21355 	case MHIOCGRP_INRESV:
21356 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
21357 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21358 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21359 				err = ENOTSUP;
21360 			} else {
21361 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
21362 			}
21363 		}
21364 		break;
21365 
21366 	case MHIOCGRP_REGISTER:
21367 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
21368 		if ((err = drv_priv(cred_p)) != EPERM) {
21369 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21370 				err = ENOTSUP;
21371 			} else if (arg != NULL) {
21372 				mhioc_register_t reg;
21373 				if (ddi_copyin((void *)arg, &reg,
21374 				    sizeof (mhioc_register_t), flag) != 0) {
21375 					err = EFAULT;
21376 				} else {
21377 					err =
21378 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21379 					    un, SD_SCSI3_REGISTER,
21380 					    (uchar_t *)&reg);
21381 				}
21382 			}
21383 		}
21384 		break;
21385 
21386 	case MHIOCGRP_RESERVE:
21387 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
21388 		if ((err = drv_priv(cred_p)) != EPERM) {
21389 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21390 				err = ENOTSUP;
21391 			} else if (arg != NULL) {
21392 				mhioc_resv_desc_t resv_desc;
21393 				if (ddi_copyin((void *)arg, &resv_desc,
21394 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
21395 					err = EFAULT;
21396 				} else {
21397 					err =
21398 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21399 					    un, SD_SCSI3_RESERVE,
21400 					    (uchar_t *)&resv_desc);
21401 				}
21402 			}
21403 		}
21404 		break;
21405 
21406 	case MHIOCGRP_PREEMPTANDABORT:
21407 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21408 		if ((err = drv_priv(cred_p)) != EPERM) {
21409 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21410 				err = ENOTSUP;
21411 			} else if (arg != NULL) {
21412 				mhioc_preemptandabort_t preempt_abort;
21413 				if (ddi_copyin((void *)arg, &preempt_abort,
21414 				    sizeof (mhioc_preemptandabort_t),
21415 				    flag) != 0) {
21416 					err = EFAULT;
21417 				} else {
21418 					err =
21419 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21420 					    un, SD_SCSI3_PREEMPTANDABORT,
21421 					    (uchar_t *)&preempt_abort);
21422 				}
21423 			}
21424 		}
21425 		break;
21426 
21427 	case MHIOCGRP_REGISTERANDIGNOREKEY:
21428 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21429 		if ((err = drv_priv(cred_p)) != EPERM) {
21430 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21431 				err = ENOTSUP;
21432 			} else if (arg != NULL) {
21433 				mhioc_registerandignorekey_t r_and_i;
21434 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
21435 				    sizeof (mhioc_registerandignorekey_t),
21436 				    flag) != 0) {
21437 					err = EFAULT;
21438 				} else {
21439 					err =
21440 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21441 					    un, SD_SCSI3_REGISTERANDIGNOREKEY,
21442 					    (uchar_t *)&r_and_i);
21443 				}
21444 			}
21445 		}
21446 		break;
21447 
21448 	case USCSICMD:
21449 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
21450 		cr = ddi_get_cred();
21451 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
21452 			err = EPERM;
21453 		} else {
21454 			err = sd_uscsi_ioctl(dev, (caddr_t)arg, flag);
21455 		}
21456 		break;
21457 
21458 	case CDROMPAUSE:
21459 	case CDROMRESUME:
21460 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
21461 		if (!ISCD(un)) {
21462 			err = ENOTTY;
21463 		} else {
21464 			err = sr_pause_resume(dev, cmd);
21465 		}
21466 		break;
21467 
21468 	case CDROMPLAYMSF:
21469 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
21470 		if (!ISCD(un)) {
21471 			err = ENOTTY;
21472 		} else {
21473 			err = sr_play_msf(dev, (caddr_t)arg, flag);
21474 		}
21475 		break;
21476 
21477 	case CDROMPLAYTRKIND:
21478 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
21479 #if defined(__i386) || defined(__amd64)
21480 		/*
21481 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
21482 		 */
21483 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21484 #else
21485 		if (!ISCD(un)) {
21486 #endif
21487 			err = ENOTTY;
21488 		} else {
21489 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
21490 		}
21491 		break;
21492 
21493 	case CDROMREADTOCHDR:
21494 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
21495 		if (!ISCD(un)) {
21496 			err = ENOTTY;
21497 		} else {
21498 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
21499 		}
21500 		break;
21501 
21502 	case CDROMREADTOCENTRY:
21503 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
21504 		if (!ISCD(un)) {
21505 			err = ENOTTY;
21506 		} else {
21507 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
21508 		}
21509 		break;
21510 
21511 	case CDROMSTOP:
21512 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
21513 		if (!ISCD(un)) {
21514 			err = ENOTTY;
21515 		} else {
21516 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_STOP,
21517 			    SD_PATH_STANDARD);
21518 		}
21519 		break;
21520 
21521 	case CDROMSTART:
21522 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
21523 		if (!ISCD(un)) {
21524 			err = ENOTTY;
21525 		} else {
21526 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
21527 			    SD_PATH_STANDARD);
21528 		}
21529 		break;
21530 
21531 	case CDROMCLOSETRAY:
21532 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
21533 		if (!ISCD(un)) {
21534 			err = ENOTTY;
21535 		} else {
21536 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_CLOSE,
21537 			    SD_PATH_STANDARD);
21538 		}
21539 		break;
21540 
21541 	case FDEJECT:	/* for eject command */
21542 	case DKIOCEJECT:
21543 	case CDROMEJECT:
21544 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
21545 		if (!un->un_f_eject_media_supported) {
21546 			err = ENOTTY;
21547 		} else {
21548 			err = sr_eject(dev);
21549 		}
21550 		break;
21551 
21552 	case CDROMVOLCTRL:
21553 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
21554 		if (!ISCD(un)) {
21555 			err = ENOTTY;
21556 		} else {
21557 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
21558 		}
21559 		break;
21560 
21561 	case CDROMSUBCHNL:
21562 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
21563 		if (!ISCD(un)) {
21564 			err = ENOTTY;
21565 		} else {
21566 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
21567 		}
21568 		break;
21569 
21570 	case CDROMREADMODE2:
21571 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
21572 		if (!ISCD(un)) {
21573 			err = ENOTTY;
21574 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21575 			/*
21576 			 * If the drive supports READ CD, use that instead of
21577 			 * switching the LBA size via a MODE SELECT
21578 			 * Block Descriptor
21579 			 */
21580 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
21581 		} else {
21582 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
21583 		}
21584 		break;
21585 
21586 	case CDROMREADMODE1:
21587 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
21588 		if (!ISCD(un)) {
21589 			err = ENOTTY;
21590 		} else {
21591 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
21592 		}
21593 		break;
21594 
21595 	case CDROMREADOFFSET:
21596 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
21597 		if (!ISCD(un)) {
21598 			err = ENOTTY;
21599 		} else {
21600 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
21601 			    flag);
21602 		}
21603 		break;
21604 
21605 	case CDROMSBLKMODE:
21606 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
21607 		/*
21608 		 * There is no means of changing block size in case of atapi
21609 		 * drives, thus return ENOTTY if drive type is atapi
21610 		 */
21611 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21612 			err = ENOTTY;
21613 		} else if (un->un_f_mmc_cap == TRUE) {
21614 
21615 			/*
21616 			 * MMC Devices do not support changing the
21617 			 * logical block size
21618 			 *
21619 			 * Note: EINVAL is being returned instead of ENOTTY to
21620 			 * maintain consistancy with the original mmc
21621 			 * driver update.
21622 			 */
21623 			err = EINVAL;
21624 		} else {
21625 			mutex_enter(SD_MUTEX(un));
21626 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
21627 			    (un->un_ncmds_in_transport > 0)) {
21628 				mutex_exit(SD_MUTEX(un));
21629 				err = EINVAL;
21630 			} else {
21631 				mutex_exit(SD_MUTEX(un));
21632 				err = sr_change_blkmode(dev, cmd, arg, flag);
21633 			}
21634 		}
21635 		break;
21636 
21637 	case CDROMGBLKMODE:
21638 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
21639 		if (!ISCD(un)) {
21640 			err = ENOTTY;
21641 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
21642 		    (un->un_f_blockcount_is_valid != FALSE)) {
21643 			/*
21644 			 * Drive is an ATAPI drive so return target block
21645 			 * size for ATAPI drives since we cannot change the
21646 			 * blocksize on ATAPI drives. Used primarily to detect
21647 			 * if an ATAPI cdrom is present.
21648 			 */
21649 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
21650 			    sizeof (int), flag) != 0) {
21651 				err = EFAULT;
21652 			} else {
21653 				err = 0;
21654 			}
21655 
21656 		} else {
21657 			/*
21658 			 * Drive supports changing block sizes via a Mode
21659 			 * Select.
21660 			 */
21661 			err = sr_change_blkmode(dev, cmd, arg, flag);
21662 		}
21663 		break;
21664 
21665 	case CDROMGDRVSPEED:
21666 	case CDROMSDRVSPEED:
21667 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
21668 		if (!ISCD(un)) {
21669 			err = ENOTTY;
21670 		} else if (un->un_f_mmc_cap == TRUE) {
21671 			/*
21672 			 * Note: In the future the driver implementation
21673 			 * for getting and
21674 			 * setting cd speed should entail:
21675 			 * 1) If non-mmc try the Toshiba mode page
21676 			 *    (sr_change_speed)
21677 			 * 2) If mmc but no support for Real Time Streaming try
21678 			 *    the SET CD SPEED (0xBB) command
21679 			 *   (sr_atapi_change_speed)
21680 			 * 3) If mmc and support for Real Time Streaming
21681 			 *    try the GET PERFORMANCE and SET STREAMING
21682 			 *    commands (not yet implemented, 4380808)
21683 			 */
21684 			/*
21685 			 * As per recent MMC spec, CD-ROM speed is variable
21686 			 * and changes with LBA. Since there is no such
21687 			 * things as drive speed now, fail this ioctl.
21688 			 *
21689 			 * Note: EINVAL is returned for consistancy of original
21690 			 * implementation which included support for getting
21691 			 * the drive speed of mmc devices but not setting
21692 			 * the drive speed. Thus EINVAL would be returned
21693 			 * if a set request was made for an mmc device.
21694 			 * We no longer support get or set speed for
21695 			 * mmc but need to remain consistant with regard
21696 			 * to the error code returned.
21697 			 */
21698 			err = EINVAL;
21699 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21700 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
21701 		} else {
21702 			err = sr_change_speed(dev, cmd, arg, flag);
21703 		}
21704 		break;
21705 
21706 	case CDROMCDDA:
21707 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
21708 		if (!ISCD(un)) {
21709 			err = ENOTTY;
21710 		} else {
21711 			err = sr_read_cdda(dev, (void *)arg, flag);
21712 		}
21713 		break;
21714 
21715 	case CDROMCDXA:
21716 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
21717 		if (!ISCD(un)) {
21718 			err = ENOTTY;
21719 		} else {
21720 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
21721 		}
21722 		break;
21723 
21724 	case CDROMSUBCODE:
21725 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
21726 		if (!ISCD(un)) {
21727 			err = ENOTTY;
21728 		} else {
21729 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
21730 		}
21731 		break;
21732 
21733 	case DKIOCPARTINFO: {
21734 		/*
21735 		 * Return parameters describing the selected disk slice.
21736 		 * Note: this ioctl is for the intel platform only
21737 		 */
21738 #if defined(__i386) || defined(__amd64)
21739 		int part;
21740 
21741 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21742 		part = SDPART(dev);
21743 
21744 		/* don't check un_solaris_size for pN */
21745 		if (part < P0_RAW_DISK && un->un_solaris_size == 0) {
21746 			err = EIO;
21747 		} else {
21748 			struct part_info p;
21749 
21750 			p.p_start = (daddr_t)un->un_offset[part];
21751 			p.p_length = (int)un->un_map[part].dkl_nblk;
21752 #ifdef _MULTI_DATAMODEL
21753 			switch (ddi_model_convert_from(flag & FMODELS)) {
21754 			case DDI_MODEL_ILP32:
21755 			{
21756 				struct part_info32 p32;
21757 
21758 				p32.p_start = (daddr32_t)p.p_start;
21759 				p32.p_length = p.p_length;
21760 				if (ddi_copyout(&p32, (void *)arg,
21761 				    sizeof (p32), flag))
21762 					err = EFAULT;
21763 				break;
21764 			}
21765 
21766 			case DDI_MODEL_NONE:
21767 			{
21768 				if (ddi_copyout(&p, (void *)arg, sizeof (p),
21769 				    flag))
21770 					err = EFAULT;
21771 				break;
21772 			}
21773 			}
21774 #else /* ! _MULTI_DATAMODEL */
21775 			if (ddi_copyout(&p, (void *)arg, sizeof (p), flag))
21776 				err = EFAULT;
21777 #endif /* _MULTI_DATAMODEL */
21778 		}
21779 #else
21780 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21781 		err = ENOTTY;
21782 #endif
21783 		break;
21784 	}
21785 
21786 	case DKIOCG_PHYGEOM: {
21787 		/* Return the driver's notion of the media physical geometry */
21788 #if defined(__i386) || defined(__amd64)
21789 		uint64_t	capacity;
21790 		struct dk_geom	disk_geom;
21791 		struct dk_geom	*dkgp = &disk_geom;
21792 
21793 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21794 		mutex_enter(SD_MUTEX(un));
21795 
21796 		if (un->un_g.dkg_nhead != 0 &&
21797 		    un->un_g.dkg_nsect != 0) {
21798 			/*
21799 			 * We succeeded in getting a geometry, but
21800 			 * right now it is being reported as just the
21801 			 * Solaris fdisk partition, just like for
21802 			 * DKIOCGGEOM. We need to change that to be
21803 			 * correct for the entire disk now.
21804 			 */
21805 			bcopy(&un->un_g, dkgp, sizeof (*dkgp));
21806 			dkgp->dkg_acyl = 0;
21807 			dkgp->dkg_ncyl = un->un_blockcount /
21808 			    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21809 		} else {
21810 			bzero(dkgp, sizeof (struct dk_geom));
21811 			/*
21812 			 * This disk does not have a Solaris VTOC
21813 			 * so we must present a physical geometry
21814 			 * that will remain consistent regardless
21815 			 * of how the disk is used. This will ensure
21816 			 * that the geometry does not change regardless
21817 			 * of the fdisk partition type (ie. EFI, FAT32,
21818 			 * Solaris, etc).
21819 			 */
21820 			if (ISCD(un)) {
21821 				dkgp->dkg_nhead = un->un_pgeom.g_nhead;
21822 				dkgp->dkg_nsect = un->un_pgeom.g_nsect;
21823 				dkgp->dkg_ncyl = un->un_pgeom.g_ncyl;
21824 				dkgp->dkg_acyl = un->un_pgeom.g_acyl;
21825 			} else {
21826 				/*
21827 				 * Invalid un_blockcount can generate invalid
21828 				 * dk_geom and may result in division by zero
21829 				 * system failure. Should make sure blockcount
21830 				 * is valid before using it here.
21831 				 */
21832 				if (un->un_f_blockcount_is_valid == FALSE) {
21833 					mutex_exit(SD_MUTEX(un));
21834 					err = EIO;
21835 
21836 					break;
21837 				}
21838 
21839 				/*
21840 				 * Refer to comments related to off-by-1 at the
21841 				 * header of this file
21842 				 */
21843 				if (!un->un_f_capacity_adjusted &&
21844 					!un->un_f_has_removable_media &&
21845 				    !un->un_f_is_hotpluggable &&
21846 					(un->un_tgt_blocksize ==
21847 					un->un_sys_blocksize))
21848 					capacity = un->un_blockcount - 1;
21849 				else
21850 					capacity = un->un_blockcount;
21851 
21852 				sd_convert_geometry(capacity, dkgp);
21853 				dkgp->dkg_acyl = 0;
21854 				dkgp->dkg_ncyl = capacity /
21855 				    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21856 			}
21857 		}
21858 		dkgp->dkg_pcyl = dkgp->dkg_ncyl + dkgp->dkg_acyl;
21859 
21860 		if (ddi_copyout(dkgp, (void *)arg,
21861 		    sizeof (struct dk_geom), flag)) {
21862 			mutex_exit(SD_MUTEX(un));
21863 			err = EFAULT;
21864 		} else {
21865 			mutex_exit(SD_MUTEX(un));
21866 			err = 0;
21867 		}
21868 #else
21869 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21870 		err = ENOTTY;
21871 #endif
21872 		break;
21873 	}
21874 
21875 	case DKIOCG_VIRTGEOM: {
21876 		/* Return the driver's notion of the media's logical geometry */
21877 #if defined(__i386) || defined(__amd64)
21878 		struct dk_geom	disk_geom;
21879 		struct dk_geom	*dkgp = &disk_geom;
21880 
21881 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21882 		mutex_enter(SD_MUTEX(un));
21883 		/*
21884 		 * If there is no HBA geometry available, or
21885 		 * if the HBA returned us something that doesn't
21886 		 * really fit into an Int 13/function 8 geometry
21887 		 * result, just fail the ioctl.  See PSARC 1998/313.
21888 		 */
21889 		if (un->un_lgeom.g_nhead == 0 ||
21890 		    un->un_lgeom.g_nsect == 0 ||
21891 		    un->un_lgeom.g_ncyl > 1024) {
21892 			mutex_exit(SD_MUTEX(un));
21893 			err = EINVAL;
21894 		} else {
21895 			dkgp->dkg_ncyl	= un->un_lgeom.g_ncyl;
21896 			dkgp->dkg_acyl	= un->un_lgeom.g_acyl;
21897 			dkgp->dkg_pcyl	= dkgp->dkg_ncyl + dkgp->dkg_acyl;
21898 			dkgp->dkg_nhead	= un->un_lgeom.g_nhead;
21899 			dkgp->dkg_nsect	= un->un_lgeom.g_nsect;
21900 
21901 			if (ddi_copyout(dkgp, (void *)arg,
21902 			    sizeof (struct dk_geom), flag)) {
21903 				mutex_exit(SD_MUTEX(un));
21904 				err = EFAULT;
21905 			} else {
21906 				mutex_exit(SD_MUTEX(un));
21907 				err = 0;
21908 			}
21909 		}
21910 #else
21911 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21912 		err = ENOTTY;
21913 #endif
21914 		break;
21915 	}
21916 #ifdef SDDEBUG
21917 /* RESET/ABORTS testing ioctls */
21918 	case DKIOCRESET: {
21919 		int	reset_level;
21920 
21921 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
21922 			err = EFAULT;
21923 		} else {
21924 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
21925 			    "reset_level = 0x%lx\n", reset_level);
21926 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
21927 				err = 0;
21928 			} else {
21929 				err = EIO;
21930 			}
21931 		}
21932 		break;
21933 	}
21934 
21935 	case DKIOCABORT:
21936 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
21937 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
21938 			err = 0;
21939 		} else {
21940 			err = EIO;
21941 		}
21942 		break;
21943 #endif
21944 
21945 #ifdef SD_FAULT_INJECTION
21946 /* SDIOC FaultInjection testing ioctls */
21947 	case SDIOCSTART:
21948 	case SDIOCSTOP:
21949 	case SDIOCINSERTPKT:
21950 	case SDIOCINSERTXB:
21951 	case SDIOCINSERTUN:
21952 	case SDIOCINSERTARQ:
21953 	case SDIOCPUSH:
21954 	case SDIOCRETRIEVE:
21955 	case SDIOCRUN:
21956 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
21957 		    "SDIOC detected cmd:0x%X:\n", cmd);
21958 		/* call error generator */
21959 		sd_faultinjection_ioctl(cmd, arg, un);
21960 		err = 0;
21961 		break;
21962 
21963 #endif /* SD_FAULT_INJECTION */
21964 
21965 	case DKIOCFLUSHWRITECACHE:
21966 		{
21967 			struct dk_callback *dkc = (struct dk_callback *)arg;
21968 
21969 			mutex_enter(SD_MUTEX(un));
21970 			if (!un->un_f_sync_cache_supported ||
21971 			    !un->un_f_write_cache_enabled) {
21972 				err = un->un_f_sync_cache_supported ?
21973 					0 : ENOTSUP;
21974 				mutex_exit(SD_MUTEX(un));
21975 				if ((flag & FKIOCTL) && dkc != NULL &&
21976 				    dkc->dkc_callback != NULL) {
21977 					(*dkc->dkc_callback)(dkc->dkc_cookie,
21978 					    err);
21979 					/*
21980 					 * Did callback and reported error.
21981 					 * Since we did a callback, ioctl
21982 					 * should return 0.
21983 					 */
21984 					err = 0;
21985 				}
21986 				break;
21987 			}
21988 			mutex_exit(SD_MUTEX(un));
21989 
21990 			if ((flag & FKIOCTL) && dkc != NULL &&
21991 			    dkc->dkc_callback != NULL) {
21992 				/* async SYNC CACHE request */
21993 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
21994 			} else {
21995 				/* synchronous SYNC CACHE request */
21996 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
21997 			}
21998 		}
21999 		break;
22000 
22001 	case DKIOCGETWCE: {
22002 
22003 		int wce;
22004 
22005 		if ((err = sd_get_write_cache_enabled(un, &wce)) != 0) {
22006 			break;
22007 		}
22008 
22009 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
22010 			err = EFAULT;
22011 		}
22012 		break;
22013 	}
22014 
22015 	case DKIOCSETWCE: {
22016 
22017 		int wce, sync_supported;
22018 
22019 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
22020 			err = EFAULT;
22021 			break;
22022 		}
22023 
22024 		/*
22025 		 * Synchronize multiple threads trying to enable
22026 		 * or disable the cache via the un_f_wcc_cv
22027 		 * condition variable.
22028 		 */
22029 		mutex_enter(SD_MUTEX(un));
22030 
22031 		/*
22032 		 * Don't allow the cache to be enabled if the
22033 		 * config file has it disabled.
22034 		 */
22035 		if (un->un_f_opt_disable_cache && wce) {
22036 			mutex_exit(SD_MUTEX(un));
22037 			err = EINVAL;
22038 			break;
22039 		}
22040 
22041 		/*
22042 		 * Wait for write cache change in progress
22043 		 * bit to be clear before proceeding.
22044 		 */
22045 		while (un->un_f_wcc_inprog)
22046 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
22047 
22048 		un->un_f_wcc_inprog = 1;
22049 
22050 		if (un->un_f_write_cache_enabled && wce == 0) {
22051 			/*
22052 			 * Disable the write cache.  Don't clear
22053 			 * un_f_write_cache_enabled until after
22054 			 * the mode select and flush are complete.
22055 			 */
22056 			sync_supported = un->un_f_sync_cache_supported;
22057 			mutex_exit(SD_MUTEX(un));
22058 			if ((err = sd_cache_control(un, SD_CACHE_NOCHANGE,
22059 			    SD_CACHE_DISABLE)) == 0 && sync_supported) {
22060 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
22061 			}
22062 
22063 			mutex_enter(SD_MUTEX(un));
22064 			if (err == 0) {
22065 				un->un_f_write_cache_enabled = 0;
22066 			}
22067 
22068 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
22069 			/*
22070 			 * Set un_f_write_cache_enabled first, so there is
22071 			 * no window where the cache is enabled, but the
22072 			 * bit says it isn't.
22073 			 */
22074 			un->un_f_write_cache_enabled = 1;
22075 			mutex_exit(SD_MUTEX(un));
22076 
22077 			err = sd_cache_control(un, SD_CACHE_NOCHANGE,
22078 				SD_CACHE_ENABLE);
22079 
22080 			mutex_enter(SD_MUTEX(un));
22081 
22082 			if (err) {
22083 				un->un_f_write_cache_enabled = 0;
22084 			}
22085 		}
22086 
22087 		un->un_f_wcc_inprog = 0;
22088 		cv_broadcast(&un->un_wcc_cv);
22089 		mutex_exit(SD_MUTEX(un));
22090 		break;
22091 	}
22092 
22093 	default:
22094 		err = ENOTTY;
22095 		break;
22096 	}
22097 	mutex_enter(SD_MUTEX(un));
22098 	un->un_ncmds_in_driver--;
22099 	ASSERT(un->un_ncmds_in_driver >= 0);
22100 	mutex_exit(SD_MUTEX(un));
22101 
22102 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
22103 	return (err);
22104 }
22105 
22106 
22107 /*
22108  *    Function: sd_uscsi_ioctl
22109  *
22110  * Description: This routine is the driver entry point for handling USCSI ioctl
22111  *		requests (USCSICMD).
22112  *
22113  *   Arguments: dev	- the device number
22114  *		arg	- user provided scsi command
22115  *		flag	- this argument is a pass through to ddi_copyxxx()
22116  *			  directly from the mode argument of ioctl().
22117  *
22118  * Return Code: code returned by sd_send_scsi_cmd
22119  *		ENXIO
22120  *		EFAULT
22121  *		EAGAIN
22122  */
22123 
22124 static int
22125 sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag)
22126 {
22127 #ifdef _MULTI_DATAMODEL
22128 	/*
22129 	 * For use when a 32 bit app makes a call into a
22130 	 * 64 bit ioctl
22131 	 */
22132 	struct uscsi_cmd32	uscsi_cmd_32_for_64;
22133 	struct uscsi_cmd32	*ucmd32 = &uscsi_cmd_32_for_64;
22134 	model_t			model;
22135 #endif /* _MULTI_DATAMODEL */
22136 	struct uscsi_cmd	*scmd = NULL;
22137 	struct sd_lun		*un = NULL;
22138 	enum uio_seg		uioseg;
22139 	char			cdb[CDB_GROUP0];
22140 	int			rval = 0;
22141 
22142 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22143 		return (ENXIO);
22144 	}
22145 
22146 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: entry: un:0x%p\n", un);
22147 
22148 	scmd = (struct uscsi_cmd *)
22149 	    kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
22150 
22151 #ifdef _MULTI_DATAMODEL
22152 	switch (model = ddi_model_convert_from(flag & FMODELS)) {
22153 	case DDI_MODEL_ILP32:
22154 	{
22155 		if (ddi_copyin((void *)arg, ucmd32, sizeof (*ucmd32), flag)) {
22156 			rval = EFAULT;
22157 			goto done;
22158 		}
22159 		/*
22160 		 * Convert the ILP32 uscsi data from the
22161 		 * application to LP64 for internal use.
22162 		 */
22163 		uscsi_cmd32touscsi_cmd(ucmd32, scmd);
22164 		break;
22165 	}
22166 	case DDI_MODEL_NONE:
22167 		if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
22168 			rval = EFAULT;
22169 			goto done;
22170 		}
22171 		break;
22172 	}
22173 #else /* ! _MULTI_DATAMODEL */
22174 	if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
22175 		rval = EFAULT;
22176 		goto done;
22177 	}
22178 #endif /* _MULTI_DATAMODEL */
22179 
22180 	scmd->uscsi_flags &= ~USCSI_NOINTR;
22181 	uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE : UIO_USERSPACE;
22182 	if (un->un_f_format_in_progress == TRUE) {
22183 		rval = EAGAIN;
22184 		goto done;
22185 	}
22186 
22187 	/*
22188 	 * Gotta do the ddi_copyin() here on the uscsi_cdb so that
22189 	 * we will have a valid cdb[0] to test.
22190 	 */
22191 	if ((ddi_copyin(scmd->uscsi_cdb, cdb, CDB_GROUP0, flag) == 0) &&
22192 	    (cdb[0] == SCMD_FORMAT)) {
22193 		SD_TRACE(SD_LOG_IOCTL, un,
22194 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
22195 		mutex_enter(SD_MUTEX(un));
22196 		un->un_f_format_in_progress = TRUE;
22197 		mutex_exit(SD_MUTEX(un));
22198 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
22199 		    SD_PATH_STANDARD);
22200 		mutex_enter(SD_MUTEX(un));
22201 		un->un_f_format_in_progress = FALSE;
22202 		mutex_exit(SD_MUTEX(un));
22203 	} else {
22204 		SD_TRACE(SD_LOG_IOCTL, un,
22205 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
22206 		/*
22207 		 * It's OK to fall into here even if the ddi_copyin()
22208 		 * on the uscsi_cdb above fails, because sd_send_scsi_cmd()
22209 		 * does this same copyin and will return the EFAULT
22210 		 * if it fails.
22211 		 */
22212 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
22213 		    SD_PATH_STANDARD);
22214 	}
22215 #ifdef _MULTI_DATAMODEL
22216 	switch (model) {
22217 	case DDI_MODEL_ILP32:
22218 		/*
22219 		 * Convert back to ILP32 before copyout to the
22220 		 * application
22221 		 */
22222 		uscsi_cmdtouscsi_cmd32(scmd, ucmd32);
22223 		if (ddi_copyout(ucmd32, (void *)arg, sizeof (*ucmd32), flag)) {
22224 			if (rval != 0) {
22225 				rval = EFAULT;
22226 			}
22227 		}
22228 		break;
22229 	case DDI_MODEL_NONE:
22230 		if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
22231 			if (rval != 0) {
22232 				rval = EFAULT;
22233 			}
22234 		}
22235 		break;
22236 	}
22237 #else /* ! _MULTI_DATAMODE */
22238 	if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
22239 		if (rval != 0) {
22240 			rval = EFAULT;
22241 		}
22242 	}
22243 #endif /* _MULTI_DATAMODE */
22244 done:
22245 	kmem_free(scmd, sizeof (struct uscsi_cmd));
22246 
22247 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: exit: un:0x%p\n", un);
22248 
22249 	return (rval);
22250 }
22251 
22252 
22253 /*
22254  *    Function: sd_dkio_ctrl_info
22255  *
22256  * Description: This routine is the driver entry point for handling controller
22257  *		information ioctl requests (DKIOCINFO).
22258  *
22259  *   Arguments: dev  - the device number
22260  *		arg  - pointer to user provided dk_cinfo structure
22261  *		       specifying the controller type and attributes.
22262  *		flag - this argument is a pass through to ddi_copyxxx()
22263  *		       directly from the mode argument of ioctl().
22264  *
22265  * Return Code: 0
22266  *		EFAULT
22267  *		ENXIO
22268  */
22269 
22270 static int
22271 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
22272 {
22273 	struct sd_lun	*un = NULL;
22274 	struct dk_cinfo	*info;
22275 	dev_info_t	*pdip;
22276 	int		lun, tgt;
22277 
22278 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22279 		return (ENXIO);
22280 	}
22281 
22282 	info = (struct dk_cinfo *)
22283 		kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
22284 
22285 	switch (un->un_ctype) {
22286 	case CTYPE_CDROM:
22287 		info->dki_ctype = DKC_CDROM;
22288 		break;
22289 	default:
22290 		info->dki_ctype = DKC_SCSI_CCS;
22291 		break;
22292 	}
22293 	pdip = ddi_get_parent(SD_DEVINFO(un));
22294 	info->dki_cnum = ddi_get_instance(pdip);
22295 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
22296 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
22297 	} else {
22298 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
22299 		    DK_DEVLEN - 1);
22300 	}
22301 
22302 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22303 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
22304 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22305 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
22306 
22307 	/* Unit Information */
22308 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
22309 	info->dki_slave = ((tgt << 3) | lun);
22310 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
22311 	    DK_DEVLEN - 1);
22312 	info->dki_flags = DKI_FMTVOL;
22313 	info->dki_partition = SDPART(dev);
22314 
22315 	/* Max Transfer size of this device in blocks */
22316 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
22317 	info->dki_addr = 0;
22318 	info->dki_space = 0;
22319 	info->dki_prio = 0;
22320 	info->dki_vec = 0;
22321 
22322 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
22323 		kmem_free(info, sizeof (struct dk_cinfo));
22324 		return (EFAULT);
22325 	} else {
22326 		kmem_free(info, sizeof (struct dk_cinfo));
22327 		return (0);
22328 	}
22329 }
22330 
22331 
22332 /*
22333  *    Function: sd_get_media_info
22334  *
22335  * Description: This routine is the driver entry point for handling ioctl
22336  *		requests for the media type or command set profile used by the
22337  *		drive to operate on the media (DKIOCGMEDIAINFO).
22338  *
22339  *   Arguments: dev	- the device number
22340  *		arg	- pointer to user provided dk_minfo structure
22341  *			  specifying the media type, logical block size and
22342  *			  drive capacity.
22343  *		flag	- this argument is a pass through to ddi_copyxxx()
22344  *			  directly from the mode argument of ioctl().
22345  *
22346  * Return Code: 0
22347  *		EACCESS
22348  *		EFAULT
22349  *		ENXIO
22350  *		EIO
22351  */
22352 
22353 static int
22354 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
22355 {
22356 	struct sd_lun		*un = NULL;
22357 	struct uscsi_cmd	com;
22358 	struct scsi_inquiry	*sinq;
22359 	struct dk_minfo		media_info;
22360 	u_longlong_t		media_capacity;
22361 	uint64_t		capacity;
22362 	uint_t			lbasize;
22363 	uchar_t			*out_data;
22364 	uchar_t			*rqbuf;
22365 	int			rval = 0;
22366 	int			rtn;
22367 
22368 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
22369 	    (un->un_state == SD_STATE_OFFLINE)) {
22370 		return (ENXIO);
22371 	}
22372 
22373 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
22374 
22375 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
22376 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
22377 
22378 	/* Issue a TUR to determine if the drive is ready with media present */
22379 	rval = sd_send_scsi_TEST_UNIT_READY(un, SD_CHECK_FOR_MEDIA);
22380 	if (rval == ENXIO) {
22381 		goto done;
22382 	}
22383 
22384 	/* Now get configuration data */
22385 	if (ISCD(un)) {
22386 		media_info.dki_media_type = DK_CDROM;
22387 
22388 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
22389 		if (un->un_f_mmc_cap == TRUE) {
22390 			rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf,
22391 				SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN);
22392 
22393 			if (rtn) {
22394 				/*
22395 				 * Failed for other than an illegal request
22396 				 * or command not supported
22397 				 */
22398 				if ((com.uscsi_status == STATUS_CHECK) &&
22399 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
22400 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
22401 					    (rqbuf[12] != 0x20)) {
22402 						rval = EIO;
22403 						goto done;
22404 					}
22405 				}
22406 			} else {
22407 				/*
22408 				 * The GET CONFIGURATION command succeeded
22409 				 * so set the media type according to the
22410 				 * returned data
22411 				 */
22412 				media_info.dki_media_type = out_data[6];
22413 				media_info.dki_media_type <<= 8;
22414 				media_info.dki_media_type |= out_data[7];
22415 			}
22416 		}
22417 	} else {
22418 		/*
22419 		 * The profile list is not available, so we attempt to identify
22420 		 * the media type based on the inquiry data
22421 		 */
22422 		sinq = un->un_sd->sd_inq;
22423 		if (sinq->inq_qual == 0) {
22424 			/* This is a direct access device */
22425 			media_info.dki_media_type = DK_FIXED_DISK;
22426 
22427 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
22428 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
22429 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
22430 					media_info.dki_media_type = DK_ZIP;
22431 				} else if (
22432 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
22433 					media_info.dki_media_type = DK_JAZ;
22434 				}
22435 			}
22436 		} else {
22437 			/* Not a CD or direct access so return unknown media */
22438 			media_info.dki_media_type = DK_UNKNOWN;
22439 		}
22440 	}
22441 
22442 	/* Now read the capacity so we can provide the lbasize and capacity */
22443 	switch (sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
22444 	    SD_PATH_DIRECT)) {
22445 	case 0:
22446 		break;
22447 	case EACCES:
22448 		rval = EACCES;
22449 		goto done;
22450 	default:
22451 		rval = EIO;
22452 		goto done;
22453 	}
22454 
22455 	media_info.dki_lbsize = lbasize;
22456 	media_capacity = capacity;
22457 
22458 	/*
22459 	 * sd_send_scsi_READ_CAPACITY() reports capacity in
22460 	 * un->un_sys_blocksize chunks. So we need to convert it into
22461 	 * cap.lbasize chunks.
22462 	 */
22463 	media_capacity *= un->un_sys_blocksize;
22464 	media_capacity /= lbasize;
22465 	media_info.dki_capacity = media_capacity;
22466 
22467 	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
22468 		rval = EFAULT;
22469 		/* Put goto. Anybody might add some code below in future */
22470 		goto done;
22471 	}
22472 done:
22473 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
22474 	kmem_free(rqbuf, SENSE_LENGTH);
22475 	return (rval);
22476 }
22477 
22478 
22479 /*
22480  *    Function: sd_dkio_get_geometry
22481  *
22482  * Description: This routine is the driver entry point for handling user
22483  *		requests to get the device geometry (DKIOCGGEOM).
22484  *
22485  *   Arguments: dev  - the device number
22486  *		arg  - pointer to user provided dk_geom structure specifying
22487  *			the controller's notion of the current geometry.
22488  *		flag - this argument is a pass through to ddi_copyxxx()
22489  *		       directly from the mode argument of ioctl().
22490  *		geom_validated - flag indicating if the device geometry has been
22491  *				 previously validated in the sdioctl routine.
22492  *
22493  * Return Code: 0
22494  *		EFAULT
22495  *		ENXIO
22496  *		EIO
22497  */
22498 
22499 static int
22500 sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag, int geom_validated)
22501 {
22502 	struct sd_lun	*un = NULL;
22503 	struct dk_geom	*tmp_geom = NULL;
22504 	int		rval = 0;
22505 
22506 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22507 		return (ENXIO);
22508 	}
22509 
22510 	if (geom_validated == FALSE) {
22511 		/*
22512 		 * sd_validate_geometry does not spin a disk up
22513 		 * if it was spun down. We need to make sure it
22514 		 * is ready.
22515 		 */
22516 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22517 			return (rval);
22518 		}
22519 		mutex_enter(SD_MUTEX(un));
22520 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
22521 		mutex_exit(SD_MUTEX(un));
22522 	}
22523 	if (rval)
22524 		return (rval);
22525 
22526 	/*
22527 	 * It is possible that un_solaris_size is 0(uninitialized)
22528 	 * after sd_unit_attach. Reservation conflict may cause the
22529 	 * above situation. Thus, the zero check of un_solaris_size
22530 	 * should occur after the sd_validate_geometry() call.
22531 	 */
22532 #if defined(__i386) || defined(__amd64)
22533 	if (un->un_solaris_size == 0) {
22534 		return (EIO);
22535 	}
22536 #endif
22537 
22538 	/*
22539 	 * Make a local copy of the soft state geometry to avoid some potential
22540 	 * race conditions associated with holding the mutex and updating the
22541 	 * write_reinstruct value
22542 	 */
22543 	tmp_geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22544 	mutex_enter(SD_MUTEX(un));
22545 	bcopy(&un->un_g, tmp_geom, sizeof (struct dk_geom));
22546 	mutex_exit(SD_MUTEX(un));
22547 
22548 	if (tmp_geom->dkg_write_reinstruct == 0) {
22549 		tmp_geom->dkg_write_reinstruct =
22550 		    (int)((int)(tmp_geom->dkg_nsect * tmp_geom->dkg_rpm *
22551 		    sd_rot_delay) / (int)60000);
22552 	}
22553 
22554 	rval = ddi_copyout(tmp_geom, (void *)arg, sizeof (struct dk_geom),
22555 	    flag);
22556 	if (rval != 0) {
22557 		rval = EFAULT;
22558 	}
22559 
22560 	kmem_free(tmp_geom, sizeof (struct dk_geom));
22561 	return (rval);
22562 
22563 }
22564 
22565 
22566 /*
22567  *    Function: sd_dkio_set_geometry
22568  *
22569  * Description: This routine is the driver entry point for handling user
22570  *		requests to set the device geometry (DKIOCSGEOM). The actual
22571  *		device geometry is not updated, just the driver "notion" of it.
22572  *
22573  *   Arguments: dev  - the device number
22574  *		arg  - pointer to user provided dk_geom structure used to set
22575  *			the controller's notion of the current geometry.
22576  *		flag - this argument is a pass through to ddi_copyxxx()
22577  *		       directly from the mode argument of ioctl().
22578  *
22579  * Return Code: 0
22580  *		EFAULT
22581  *		ENXIO
22582  *		EIO
22583  */
22584 
22585 static int
22586 sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag)
22587 {
22588 	struct sd_lun	*un = NULL;
22589 	struct dk_geom	*tmp_geom;
22590 	struct dk_map	*lp;
22591 	int		rval = 0;
22592 	int		i;
22593 
22594 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22595 		return (ENXIO);
22596 	}
22597 
22598 	/*
22599 	 * Make sure there is no reservation conflict on the lun.
22600 	 */
22601 	if (sd_send_scsi_TEST_UNIT_READY(un, 0) == EACCES) {
22602 		return (EACCES);
22603 	}
22604 
22605 #if defined(__i386) || defined(__amd64)
22606 	if (un->un_solaris_size == 0) {
22607 		return (EIO);
22608 	}
22609 #endif
22610 
22611 	/*
22612 	 * We need to copy the user specified geometry into local
22613 	 * storage and then update the softstate. We don't want to hold
22614 	 * the mutex and copyin directly from the user to the soft state
22615 	 */
22616 	tmp_geom = (struct dk_geom *)
22617 	    kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22618 	rval = ddi_copyin(arg, tmp_geom, sizeof (struct dk_geom), flag);
22619 	if (rval != 0) {
22620 		kmem_free(tmp_geom, sizeof (struct dk_geom));
22621 		return (EFAULT);
22622 	}
22623 
22624 	mutex_enter(SD_MUTEX(un));
22625 	bcopy(tmp_geom, &un->un_g, sizeof (struct dk_geom));
22626 	for (i = 0; i < NDKMAP; i++) {
22627 		lp  = &un->un_map[i];
22628 		un->un_offset[i] =
22629 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22630 #if defined(__i386) || defined(__amd64)
22631 		un->un_offset[i] += un->un_solaris_offset;
22632 #endif
22633 	}
22634 	un->un_f_geometry_is_valid = FALSE;
22635 	mutex_exit(SD_MUTEX(un));
22636 	kmem_free(tmp_geom, sizeof (struct dk_geom));
22637 
22638 	return (rval);
22639 }
22640 
22641 
22642 /*
22643  *    Function: sd_dkio_get_partition
22644  *
22645  * Description: This routine is the driver entry point for handling user
22646  *		requests to get the partition table (DKIOCGAPART).
22647  *
22648  *   Arguments: dev  - the device number
22649  *		arg  - pointer to user provided dk_allmap structure specifying
22650  *			the controller's notion of the current partition table.
22651  *		flag - this argument is a pass through to ddi_copyxxx()
22652  *		       directly from the mode argument of ioctl().
22653  *		geom_validated - flag indicating if the device geometry has been
22654  *				 previously validated in the sdioctl routine.
22655  *
22656  * Return Code: 0
22657  *		EFAULT
22658  *		ENXIO
22659  *		EIO
22660  */
22661 
22662 static int
22663 sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag, int geom_validated)
22664 {
22665 	struct sd_lun	*un = NULL;
22666 	int		rval = 0;
22667 	int		size;
22668 
22669 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22670 		return (ENXIO);
22671 	}
22672 
22673 	/*
22674 	 * Make sure the geometry is valid before getting the partition
22675 	 * information.
22676 	 */
22677 	mutex_enter(SD_MUTEX(un));
22678 	if (geom_validated == FALSE) {
22679 		/*
22680 		 * sd_validate_geometry does not spin a disk up
22681 		 * if it was spun down. We need to make sure it
22682 		 * is ready before validating the geometry.
22683 		 */
22684 		mutex_exit(SD_MUTEX(un));
22685 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22686 			return (rval);
22687 		}
22688 		mutex_enter(SD_MUTEX(un));
22689 
22690 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22691 			mutex_exit(SD_MUTEX(un));
22692 			return (rval);
22693 		}
22694 	}
22695 	mutex_exit(SD_MUTEX(un));
22696 
22697 	/*
22698 	 * It is possible that un_solaris_size is 0(uninitialized)
22699 	 * after sd_unit_attach. Reservation conflict may cause the
22700 	 * above situation. Thus, the zero check of un_solaris_size
22701 	 * should occur after the sd_validate_geometry() call.
22702 	 */
22703 #if defined(__i386) || defined(__amd64)
22704 	if (un->un_solaris_size == 0) {
22705 		return (EIO);
22706 	}
22707 #endif
22708 
22709 #ifdef _MULTI_DATAMODEL
22710 	switch (ddi_model_convert_from(flag & FMODELS)) {
22711 	case DDI_MODEL_ILP32: {
22712 		struct dk_map32 dk_map32[NDKMAP];
22713 		int		i;
22714 
22715 		for (i = 0; i < NDKMAP; i++) {
22716 			dk_map32[i].dkl_cylno = un->un_map[i].dkl_cylno;
22717 			dk_map32[i].dkl_nblk  = un->un_map[i].dkl_nblk;
22718 		}
22719 		size = NDKMAP * sizeof (struct dk_map32);
22720 		rval = ddi_copyout(dk_map32, (void *)arg, size, flag);
22721 		if (rval != 0) {
22722 			rval = EFAULT;
22723 		}
22724 		break;
22725 	}
22726 	case DDI_MODEL_NONE:
22727 		size = NDKMAP * sizeof (struct dk_map);
22728 		rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22729 		if (rval != 0) {
22730 			rval = EFAULT;
22731 		}
22732 		break;
22733 	}
22734 #else /* ! _MULTI_DATAMODEL */
22735 	size = NDKMAP * sizeof (struct dk_map);
22736 	rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22737 	if (rval != 0) {
22738 		rval = EFAULT;
22739 	}
22740 #endif /* _MULTI_DATAMODEL */
22741 	return (rval);
22742 }
22743 
22744 
22745 /*
22746  *    Function: sd_dkio_set_partition
22747  *
22748  * Description: This routine is the driver entry point for handling user
22749  *		requests to set the partition table (DKIOCSAPART). The actual
22750  *		device partition is not updated.
22751  *
22752  *   Arguments: dev  - the device number
22753  *		arg  - pointer to user provided dk_allmap structure used to set
22754  *			the controller's notion of the partition table.
22755  *		flag - this argument is a pass through to ddi_copyxxx()
22756  *		       directly from the mode argument of ioctl().
22757  *
22758  * Return Code: 0
22759  *		EINVAL
22760  *		EFAULT
22761  *		ENXIO
22762  *		EIO
22763  */
22764 
22765 static int
22766 sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag)
22767 {
22768 	struct sd_lun	*un = NULL;
22769 	struct dk_map	dk_map[NDKMAP];
22770 	struct dk_map	*lp;
22771 	int		rval = 0;
22772 	int		size;
22773 	int		i;
22774 #if defined(_SUNOS_VTOC_16)
22775 	struct dkl_partition	*vp;
22776 #endif
22777 
22778 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22779 		return (ENXIO);
22780 	}
22781 
22782 	/*
22783 	 * Set the map for all logical partitions.  We lock
22784 	 * the priority just to make sure an interrupt doesn't
22785 	 * come in while the map is half updated.
22786 	 */
22787 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_solaris_size))
22788 	mutex_enter(SD_MUTEX(un));
22789 	if (un->un_blockcount > DK_MAX_BLOCKS) {
22790 		mutex_exit(SD_MUTEX(un));
22791 		return (ENOTSUP);
22792 	}
22793 	mutex_exit(SD_MUTEX(un));
22794 
22795 	/*
22796 	 * Make sure there is no reservation conflict on the lun.
22797 	 */
22798 	if (sd_send_scsi_TEST_UNIT_READY(un, 0) == EACCES) {
22799 		return (EACCES);
22800 	}
22801 
22802 #if defined(__i386) || defined(__amd64)
22803 	if (un->un_solaris_size == 0) {
22804 		return (EIO);
22805 	}
22806 #endif
22807 
22808 #ifdef _MULTI_DATAMODEL
22809 	switch (ddi_model_convert_from(flag & FMODELS)) {
22810 	case DDI_MODEL_ILP32: {
22811 		struct dk_map32 dk_map32[NDKMAP];
22812 
22813 		size = NDKMAP * sizeof (struct dk_map32);
22814 		rval = ddi_copyin((void *)arg, dk_map32, size, flag);
22815 		if (rval != 0) {
22816 			return (EFAULT);
22817 		}
22818 		for (i = 0; i < NDKMAP; i++) {
22819 			dk_map[i].dkl_cylno = dk_map32[i].dkl_cylno;
22820 			dk_map[i].dkl_nblk  = dk_map32[i].dkl_nblk;
22821 		}
22822 		break;
22823 	}
22824 	case DDI_MODEL_NONE:
22825 		size = NDKMAP * sizeof (struct dk_map);
22826 		rval = ddi_copyin((void *)arg, dk_map, size, flag);
22827 		if (rval != 0) {
22828 			return (EFAULT);
22829 		}
22830 		break;
22831 	}
22832 #else /* ! _MULTI_DATAMODEL */
22833 	size = NDKMAP * sizeof (struct dk_map);
22834 	rval = ddi_copyin((void *)arg, dk_map, size, flag);
22835 	if (rval != 0) {
22836 		return (EFAULT);
22837 	}
22838 #endif /* _MULTI_DATAMODEL */
22839 
22840 	mutex_enter(SD_MUTEX(un));
22841 	/* Note: The size used in this bcopy is set based upon the data model */
22842 	bcopy(dk_map, un->un_map, size);
22843 #if defined(_SUNOS_VTOC_16)
22844 	vp = (struct dkl_partition *)&(un->un_vtoc);
22845 #endif	/* defined(_SUNOS_VTOC_16) */
22846 	for (i = 0; i < NDKMAP; i++) {
22847 		lp  = &un->un_map[i];
22848 		un->un_offset[i] =
22849 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22850 #if defined(_SUNOS_VTOC_16)
22851 		vp->p_start = un->un_offset[i];
22852 		vp->p_size = lp->dkl_nblk;
22853 		vp++;
22854 #endif	/* defined(_SUNOS_VTOC_16) */
22855 #if defined(__i386) || defined(__amd64)
22856 		un->un_offset[i] += un->un_solaris_offset;
22857 #endif
22858 	}
22859 	mutex_exit(SD_MUTEX(un));
22860 	return (rval);
22861 }
22862 
22863 
22864 /*
22865  *    Function: sd_dkio_get_vtoc
22866  *
22867  * Description: This routine is the driver entry point for handling user
22868  *		requests to get the current volume table of contents
22869  *		(DKIOCGVTOC).
22870  *
22871  *   Arguments: dev  - the device number
22872  *		arg  - pointer to user provided vtoc structure specifying
22873  *			the current vtoc.
22874  *		flag - this argument is a pass through to ddi_copyxxx()
22875  *		       directly from the mode argument of ioctl().
22876  *		geom_validated - flag indicating if the device geometry has been
22877  *				 previously validated in the sdioctl routine.
22878  *
22879  * Return Code: 0
22880  *		EFAULT
22881  *		ENXIO
22882  *		EIO
22883  */
22884 
22885 static int
22886 sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag, int geom_validated)
22887 {
22888 	struct sd_lun	*un = NULL;
22889 #if defined(_SUNOS_VTOC_8)
22890 	struct vtoc	user_vtoc;
22891 #endif	/* defined(_SUNOS_VTOC_8) */
22892 	int		rval = 0;
22893 
22894 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22895 		return (ENXIO);
22896 	}
22897 
22898 	mutex_enter(SD_MUTEX(un));
22899 	if (geom_validated == FALSE) {
22900 		/*
22901 		 * sd_validate_geometry does not spin a disk up
22902 		 * if it was spun down. We need to make sure it
22903 		 * is ready.
22904 		 */
22905 		mutex_exit(SD_MUTEX(un));
22906 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22907 			return (rval);
22908 		}
22909 		mutex_enter(SD_MUTEX(un));
22910 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22911 			mutex_exit(SD_MUTEX(un));
22912 			return (rval);
22913 		}
22914 	}
22915 
22916 #if defined(_SUNOS_VTOC_8)
22917 	sd_build_user_vtoc(un, &user_vtoc);
22918 	mutex_exit(SD_MUTEX(un));
22919 
22920 #ifdef _MULTI_DATAMODEL
22921 	switch (ddi_model_convert_from(flag & FMODELS)) {
22922 	case DDI_MODEL_ILP32: {
22923 		struct vtoc32 user_vtoc32;
22924 
22925 		vtoctovtoc32(user_vtoc, user_vtoc32);
22926 		if (ddi_copyout(&user_vtoc32, (void *)arg,
22927 		    sizeof (struct vtoc32), flag)) {
22928 			return (EFAULT);
22929 		}
22930 		break;
22931 	}
22932 
22933 	case DDI_MODEL_NONE:
22934 		if (ddi_copyout(&user_vtoc, (void *)arg,
22935 		    sizeof (struct vtoc), flag)) {
22936 			return (EFAULT);
22937 		}
22938 		break;
22939 	}
22940 #else /* ! _MULTI_DATAMODEL */
22941 	if (ddi_copyout(&user_vtoc, (void *)arg, sizeof (struct vtoc), flag)) {
22942 		return (EFAULT);
22943 	}
22944 #endif /* _MULTI_DATAMODEL */
22945 
22946 #elif defined(_SUNOS_VTOC_16)
22947 	mutex_exit(SD_MUTEX(un));
22948 
22949 #ifdef _MULTI_DATAMODEL
22950 	/*
22951 	 * The un_vtoc structure is a "struct dk_vtoc"  which is always
22952 	 * 32-bit to maintain compatibility with existing on-disk
22953 	 * structures.  Thus, we need to convert the structure when copying
22954 	 * it out to a datamodel-dependent "struct vtoc" in a 64-bit
22955 	 * program.  If the target is a 32-bit program, then no conversion
22956 	 * is necessary.
22957 	 */
22958 	/* LINTED: logical expression always true: op "||" */
22959 	ASSERT(sizeof (un->un_vtoc) == sizeof (struct vtoc32));
22960 	switch (ddi_model_convert_from(flag & FMODELS)) {
22961 	case DDI_MODEL_ILP32:
22962 		if (ddi_copyout(&(un->un_vtoc), (void *)arg,
22963 		    sizeof (un->un_vtoc), flag)) {
22964 			return (EFAULT);
22965 		}
22966 		break;
22967 
22968 	case DDI_MODEL_NONE: {
22969 		struct vtoc user_vtoc;
22970 
22971 		vtoc32tovtoc(un->un_vtoc, user_vtoc);
22972 		if (ddi_copyout(&user_vtoc, (void *)arg,
22973 		    sizeof (struct vtoc), flag)) {
22974 			return (EFAULT);
22975 		}
22976 		break;
22977 	}
22978 	}
22979 #else /* ! _MULTI_DATAMODEL */
22980 	if (ddi_copyout(&(un->un_vtoc), (void *)arg, sizeof (un->un_vtoc),
22981 	    flag)) {
22982 		return (EFAULT);
22983 	}
22984 #endif /* _MULTI_DATAMODEL */
22985 #else
22986 #error "No VTOC format defined."
22987 #endif
22988 
22989 	return (rval);
22990 }
22991 
22992 static int
22993 sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag)
22994 {
22995 	struct sd_lun	*un = NULL;
22996 	dk_efi_t	user_efi;
22997 	int		rval = 0;
22998 	void		*buffer;
22999 
23000 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
23001 		return (ENXIO);
23002 
23003 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
23004 		return (EFAULT);
23005 
23006 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
23007 
23008 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
23009 	    (user_efi.dki_length > un->un_max_xfer_size))
23010 		return (EINVAL);
23011 
23012 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
23013 	rval = sd_send_scsi_READ(un, buffer, user_efi.dki_length,
23014 	    user_efi.dki_lba, SD_PATH_DIRECT);
23015 	if (rval == 0 && ddi_copyout(buffer, user_efi.dki_data,
23016 	    user_efi.dki_length, flag) != 0)
23017 		rval = EFAULT;
23018 
23019 	kmem_free(buffer, user_efi.dki_length);
23020 	return (rval);
23021 }
23022 
23023 /*
23024  *    Function: sd_build_user_vtoc
23025  *
23026  * Description: This routine populates a pass by reference variable with the
23027  *		current volume table of contents.
23028  *
23029  *   Arguments: un - driver soft state (unit) structure
23030  *		user_vtoc - pointer to vtoc structure to be populated
23031  */
23032 
23033 static void
23034 sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
23035 {
23036 	struct dk_map2		*lpart;
23037 	struct dk_map		*lmap;
23038 	struct partition	*vpart;
23039 	int			nblks;
23040 	int			i;
23041 
23042 	ASSERT(mutex_owned(SD_MUTEX(un)));
23043 
23044 	/*
23045 	 * Return vtoc structure fields in the provided VTOC area, addressed
23046 	 * by *vtoc.
23047 	 */
23048 	bzero(user_vtoc, sizeof (struct vtoc));
23049 	user_vtoc->v_bootinfo[0] = un->un_vtoc.v_bootinfo[0];
23050 	user_vtoc->v_bootinfo[1] = un->un_vtoc.v_bootinfo[1];
23051 	user_vtoc->v_bootinfo[2] = un->un_vtoc.v_bootinfo[2];
23052 	user_vtoc->v_sanity	= VTOC_SANE;
23053 	user_vtoc->v_version	= un->un_vtoc.v_version;
23054 	bcopy(un->un_vtoc.v_volume, user_vtoc->v_volume, LEN_DKL_VVOL);
23055 	user_vtoc->v_sectorsz = un->un_sys_blocksize;
23056 	user_vtoc->v_nparts = un->un_vtoc.v_nparts;
23057 	bcopy(un->un_vtoc.v_reserved, user_vtoc->v_reserved,
23058 	    sizeof (un->un_vtoc.v_reserved));
23059 	/*
23060 	 * Convert partitioning information.
23061 	 *
23062 	 * Note the conversion from starting cylinder number
23063 	 * to starting sector number.
23064 	 */
23065 	lmap = un->un_map;
23066 	lpart = (struct dk_map2 *)un->un_vtoc.v_part;
23067 	vpart = user_vtoc->v_part;
23068 
23069 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
23070 
23071 	for (i = 0; i < V_NUMPAR; i++) {
23072 		vpart->p_tag	= lpart->p_tag;
23073 		vpart->p_flag	= lpart->p_flag;
23074 		vpart->p_start	= lmap->dkl_cylno * nblks;
23075 		vpart->p_size	= lmap->dkl_nblk;
23076 		lmap++;
23077 		lpart++;
23078 		vpart++;
23079 
23080 		/* (4364927) */
23081 		user_vtoc->timestamp[i] = (time_t)un->un_vtoc.v_timestamp[i];
23082 	}
23083 
23084 	bcopy(un->un_asciilabel, user_vtoc->v_asciilabel, LEN_DKL_ASCII);
23085 }
23086 
23087 static int
23088 sd_dkio_partition(dev_t dev, caddr_t arg, int flag)
23089 {
23090 	struct sd_lun		*un = NULL;
23091 	struct partition64	p64;
23092 	int			rval = 0;
23093 	uint_t			nparts;
23094 	efi_gpe_t		*partitions;
23095 	efi_gpt_t		*buffer;
23096 	diskaddr_t		gpe_lba;
23097 
23098 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23099 		return (ENXIO);
23100 	}
23101 
23102 	if (ddi_copyin((const void *)arg, &p64,
23103 	    sizeof (struct partition64), flag)) {
23104 		return (EFAULT);
23105 	}
23106 
23107 	buffer = kmem_alloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
23108 	rval = sd_send_scsi_READ(un, buffer, DEV_BSIZE,
23109 		1, SD_PATH_DIRECT);
23110 	if (rval != 0)
23111 		goto done_error;
23112 
23113 	sd_swap_efi_gpt(buffer);
23114 
23115 	if ((rval = sd_validate_efi(buffer)) != 0)
23116 		goto done_error;
23117 
23118 	nparts = buffer->efi_gpt_NumberOfPartitionEntries;
23119 	gpe_lba = buffer->efi_gpt_PartitionEntryLBA;
23120 	if (p64.p_partno > nparts) {
23121 		/* couldn't find it */
23122 		rval = ESRCH;
23123 		goto done_error;
23124 	}
23125 	/*
23126 	 * if we're dealing with a partition that's out of the normal
23127 	 * 16K block, adjust accordingly
23128 	 */
23129 	gpe_lba += p64.p_partno / sizeof (efi_gpe_t);
23130 	rval = sd_send_scsi_READ(un, buffer, EFI_MIN_ARRAY_SIZE,
23131 			gpe_lba, SD_PATH_DIRECT);
23132 	if (rval) {
23133 		goto done_error;
23134 	}
23135 	partitions = (efi_gpe_t *)buffer;
23136 
23137 	sd_swap_efi_gpe(nparts, partitions);
23138 
23139 	partitions += p64.p_partno;
23140 	bcopy(&partitions->efi_gpe_PartitionTypeGUID, &p64.p_type,
23141 	    sizeof (struct uuid));
23142 	p64.p_start = partitions->efi_gpe_StartingLBA;
23143 	p64.p_size = partitions->efi_gpe_EndingLBA -
23144 			p64.p_start + 1;
23145 
23146 	if (ddi_copyout(&p64, (void *)arg, sizeof (struct partition64), flag))
23147 		rval = EFAULT;
23148 
23149 done_error:
23150 	kmem_free(buffer, EFI_MIN_ARRAY_SIZE);
23151 	return (rval);
23152 }
23153 
23154 
23155 /*
23156  *    Function: sd_dkio_set_vtoc
23157  *
23158  * Description: This routine is the driver entry point for handling user
23159  *		requests to set the current volume table of contents
23160  *		(DKIOCSVTOC).
23161  *
23162  *   Arguments: dev  - the device number
23163  *		arg  - pointer to user provided vtoc structure used to set the
23164  *			current vtoc.
23165  *		flag - this argument is a pass through to ddi_copyxxx()
23166  *		       directly from the mode argument of ioctl().
23167  *
23168  * Return Code: 0
23169  *		EFAULT
23170  *		ENXIO
23171  *		EINVAL
23172  *		ENOTSUP
23173  */
23174 
23175 static int
23176 sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag)
23177 {
23178 	struct sd_lun	*un = NULL;
23179 	struct vtoc	user_vtoc;
23180 	int		rval = 0;
23181 
23182 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23183 		return (ENXIO);
23184 	}
23185 
23186 #if defined(__i386) || defined(__amd64)
23187 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
23188 		return (EINVAL);
23189 	}
23190 #endif
23191 
23192 #ifdef _MULTI_DATAMODEL
23193 	switch (ddi_model_convert_from(flag & FMODELS)) {
23194 	case DDI_MODEL_ILP32: {
23195 		struct vtoc32 user_vtoc32;
23196 
23197 		if (ddi_copyin((const void *)arg, &user_vtoc32,
23198 		    sizeof (struct vtoc32), flag)) {
23199 			return (EFAULT);
23200 		}
23201 		vtoc32tovtoc(user_vtoc32, user_vtoc);
23202 		break;
23203 	}
23204 
23205 	case DDI_MODEL_NONE:
23206 		if (ddi_copyin((const void *)arg, &user_vtoc,
23207 		    sizeof (struct vtoc), flag)) {
23208 			return (EFAULT);
23209 		}
23210 		break;
23211 	}
23212 #else /* ! _MULTI_DATAMODEL */
23213 	if (ddi_copyin((const void *)arg, &user_vtoc,
23214 	    sizeof (struct vtoc), flag)) {
23215 		return (EFAULT);
23216 	}
23217 #endif /* _MULTI_DATAMODEL */
23218 
23219 	mutex_enter(SD_MUTEX(un));
23220 	if (un->un_blockcount > DK_MAX_BLOCKS) {
23221 		mutex_exit(SD_MUTEX(un));
23222 		return (ENOTSUP);
23223 	}
23224 	if (un->un_g.dkg_ncyl == 0) {
23225 		mutex_exit(SD_MUTEX(un));
23226 		return (EINVAL);
23227 	}
23228 
23229 	mutex_exit(SD_MUTEX(un));
23230 	sd_clear_efi(un);
23231 	ddi_remove_minor_node(SD_DEVINFO(un), "wd");
23232 	ddi_remove_minor_node(SD_DEVINFO(un), "wd,raw");
23233 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h",
23234 	    S_IFBLK, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23235 	    un->un_node_type, NULL);
23236 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h,raw",
23237 	    S_IFCHR, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23238 	    un->un_node_type, NULL);
23239 	mutex_enter(SD_MUTEX(un));
23240 
23241 	if ((rval = sd_build_label_vtoc(un, &user_vtoc)) == 0) {
23242 		if ((rval = sd_write_label(dev)) == 0) {
23243 			if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT))
23244 			    != 0) {
23245 				SD_ERROR(SD_LOG_IOCTL_DKIO, un,
23246 				    "sd_dkio_set_vtoc: "
23247 				    "Failed validate geometry\n");
23248 			}
23249 		}
23250 	}
23251 
23252 	/*
23253 	 * If sd_build_label_vtoc, or sd_write_label failed above write the
23254 	 * devid anyway, what can it hurt? Also preserve the device id by
23255 	 * writing to the disk acyl for the case where a devid has been
23256 	 * fabricated.
23257 	 */
23258 	if (un->un_f_devid_supported &&
23259 	    (un->un_f_opt_fab_devid == TRUE)) {
23260 		if (un->un_devid == NULL) {
23261 			sd_register_devid(un, SD_DEVINFO(un),
23262 			    SD_TARGET_IS_UNRESERVED);
23263 		} else {
23264 			/*
23265 			 * The device id for this disk has been
23266 			 * fabricated. Fabricated device id's are
23267 			 * managed by storing them in the last 2
23268 			 * available sectors on the drive. The device
23269 			 * id must be preserved by writing it back out
23270 			 * to this location.
23271 			 */
23272 			if (sd_write_deviceid(un) != 0) {
23273 				ddi_devid_free(un->un_devid);
23274 				un->un_devid = NULL;
23275 			}
23276 		}
23277 	}
23278 	mutex_exit(SD_MUTEX(un));
23279 	return (rval);
23280 }
23281 
23282 
23283 /*
23284  *    Function: sd_build_label_vtoc
23285  *
23286  * Description: This routine updates the driver soft state current volume table
23287  *		of contents based on a user specified vtoc.
23288  *
23289  *   Arguments: un - driver soft state (unit) structure
23290  *		user_vtoc - pointer to vtoc structure specifying vtoc to be used
23291  *			    to update the driver soft state.
23292  *
23293  * Return Code: 0
23294  *		EINVAL
23295  */
23296 
23297 static int
23298 sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
23299 {
23300 	struct dk_map		*lmap;
23301 	struct partition	*vpart;
23302 	int			nblks;
23303 #if defined(_SUNOS_VTOC_8)
23304 	int			ncyl;
23305 	struct dk_map2		*lpart;
23306 #endif	/* defined(_SUNOS_VTOC_8) */
23307 	int			i;
23308 
23309 	ASSERT(mutex_owned(SD_MUTEX(un)));
23310 
23311 	/* Sanity-check the vtoc */
23312 	if (user_vtoc->v_sanity != VTOC_SANE ||
23313 	    user_vtoc->v_sectorsz != un->un_sys_blocksize ||
23314 	    user_vtoc->v_nparts != V_NUMPAR) {
23315 		return (EINVAL);
23316 	}
23317 
23318 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
23319 	if (nblks == 0) {
23320 		return (EINVAL);
23321 	}
23322 
23323 #if defined(_SUNOS_VTOC_8)
23324 	vpart = user_vtoc->v_part;
23325 	for (i = 0; i < V_NUMPAR; i++) {
23326 		if ((vpart->p_start % nblks) != 0) {
23327 			return (EINVAL);
23328 		}
23329 		ncyl = vpart->p_start / nblks;
23330 		ncyl += vpart->p_size / nblks;
23331 		if ((vpart->p_size % nblks) != 0) {
23332 			ncyl++;
23333 		}
23334 		if (ncyl > (int)un->un_g.dkg_ncyl) {
23335 			return (EINVAL);
23336 		}
23337 		vpart++;
23338 	}
23339 #endif	/* defined(_SUNOS_VTOC_8) */
23340 
23341 	/* Put appropriate vtoc structure fields into the disk label */
23342 #if defined(_SUNOS_VTOC_16)
23343 	/*
23344 	 * The vtoc is always a 32bit data structure to maintain the
23345 	 * on-disk format. Convert "in place" instead of bcopying it.
23346 	 */
23347 	vtoctovtoc32((*user_vtoc), (*((struct vtoc32 *)&(un->un_vtoc))));
23348 
23349 	/*
23350 	 * in the 16-slice vtoc, starting sectors are expressed in
23351 	 * numbers *relative* to the start of the Solaris fdisk partition.
23352 	 */
23353 	lmap = un->un_map;
23354 	vpart = user_vtoc->v_part;
23355 
23356 	for (i = 0; i < (int)user_vtoc->v_nparts; i++, lmap++, vpart++) {
23357 		lmap->dkl_cylno = vpart->p_start / nblks;
23358 		lmap->dkl_nblk = vpart->p_size;
23359 	}
23360 
23361 #elif defined(_SUNOS_VTOC_8)
23362 
23363 	un->un_vtoc.v_bootinfo[0] = (uint32_t)user_vtoc->v_bootinfo[0];
23364 	un->un_vtoc.v_bootinfo[1] = (uint32_t)user_vtoc->v_bootinfo[1];
23365 	un->un_vtoc.v_bootinfo[2] = (uint32_t)user_vtoc->v_bootinfo[2];
23366 
23367 	un->un_vtoc.v_sanity = (uint32_t)user_vtoc->v_sanity;
23368 	un->un_vtoc.v_version = (uint32_t)user_vtoc->v_version;
23369 
23370 	bcopy(user_vtoc->v_volume, un->un_vtoc.v_volume, LEN_DKL_VVOL);
23371 
23372 	un->un_vtoc.v_nparts = user_vtoc->v_nparts;
23373 
23374 	bcopy(user_vtoc->v_reserved, un->un_vtoc.v_reserved,
23375 	    sizeof (un->un_vtoc.v_reserved));
23376 
23377 	/*
23378 	 * Note the conversion from starting sector number
23379 	 * to starting cylinder number.
23380 	 * Return error if division results in a remainder.
23381 	 */
23382 	lmap = un->un_map;
23383 	lpart = un->un_vtoc.v_part;
23384 	vpart = user_vtoc->v_part;
23385 
23386 	for (i = 0; i < (int)user_vtoc->v_nparts; i++) {
23387 		lpart->p_tag  = vpart->p_tag;
23388 		lpart->p_flag = vpart->p_flag;
23389 		lmap->dkl_cylno = vpart->p_start / nblks;
23390 		lmap->dkl_nblk = vpart->p_size;
23391 
23392 		lmap++;
23393 		lpart++;
23394 		vpart++;
23395 
23396 		/* (4387723) */
23397 #ifdef _LP64
23398 		if (user_vtoc->timestamp[i] > TIME32_MAX) {
23399 			un->un_vtoc.v_timestamp[i] = TIME32_MAX;
23400 		} else {
23401 			un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23402 		}
23403 #else
23404 		un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23405 #endif
23406 	}
23407 
23408 	bcopy(user_vtoc->v_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
23409 #else
23410 #error "No VTOC format defined."
23411 #endif
23412 	return (0);
23413 }
23414 
23415 /*
23416  *    Function: sd_clear_efi
23417  *
23418  * Description: This routine clears all EFI labels.
23419  *
23420  *   Arguments: un - driver soft state (unit) structure
23421  *
23422  * Return Code: void
23423  */
23424 
23425 static void
23426 sd_clear_efi(struct sd_lun *un)
23427 {
23428 	efi_gpt_t	*gpt;
23429 	uint_t		lbasize;
23430 	uint64_t	cap;
23431 	int rval;
23432 
23433 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23434 
23435 	gpt = kmem_alloc(sizeof (efi_gpt_t), KM_SLEEP);
23436 
23437 	if (sd_send_scsi_READ(un, gpt, DEV_BSIZE, 1, SD_PATH_DIRECT) != 0) {
23438 		goto done;
23439 	}
23440 
23441 	sd_swap_efi_gpt(gpt);
23442 	rval = sd_validate_efi(gpt);
23443 	if (rval == 0) {
23444 		/* clear primary */
23445 		bzero(gpt, sizeof (efi_gpt_t));
23446 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE, 1,
23447 			SD_PATH_DIRECT))) {
23448 			SD_INFO(SD_LOG_IO_PARTITION, un,
23449 				"sd_clear_efi: clear primary label failed\n");
23450 		}
23451 	}
23452 	/* the backup */
23453 	rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
23454 	    SD_PATH_DIRECT);
23455 	if (rval) {
23456 		goto done;
23457 	}
23458 	/*
23459 	 * The MMC standard allows READ CAPACITY to be
23460 	 * inaccurate by a bounded amount (in the interest of
23461 	 * response latency).  As a result, failed READs are
23462 	 * commonplace (due to the reading of metadata and not
23463 	 * data). Depending on the per-Vendor/drive Sense data,
23464 	 * the failed READ can cause many (unnecessary) retries.
23465 	 */
23466 	if ((rval = sd_send_scsi_READ(un, gpt, lbasize,
23467 	    cap - 1, ISCD(un) ? SD_PATH_DIRECT_PRIORITY :
23468 		SD_PATH_DIRECT)) != 0) {
23469 		goto done;
23470 	}
23471 	sd_swap_efi_gpt(gpt);
23472 	rval = sd_validate_efi(gpt);
23473 	if (rval == 0) {
23474 		/* clear backup */
23475 		SD_TRACE(SD_LOG_IOCTL, un, "sd_clear_efi clear backup@%lu\n",
23476 			cap-1);
23477 		bzero(gpt, sizeof (efi_gpt_t));
23478 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE,
23479 		    cap-1, SD_PATH_DIRECT))) {
23480 			SD_INFO(SD_LOG_IO_PARTITION, un,
23481 				"sd_clear_efi: clear backup label failed\n");
23482 		}
23483 	} else {
23484 		/*
23485 		 * Refer to comments related to off-by-1 at the
23486 		 * header of this file
23487 		 */
23488 		if ((rval = sd_send_scsi_READ(un, gpt, lbasize,
23489 		    cap - 2, ISCD(un) ? SD_PATH_DIRECT_PRIORITY :
23490 			SD_PATH_DIRECT)) != 0) {
23491 			goto done;
23492 		}
23493 		sd_swap_efi_gpt(gpt);
23494 		rval = sd_validate_efi(gpt);
23495 		if (rval == 0) {
23496 			/* clear legacy backup EFI label */
23497 			SD_TRACE(SD_LOG_IOCTL, un,
23498 			    "sd_clear_efi clear backup@%lu\n", cap-2);
23499 			bzero(gpt, sizeof (efi_gpt_t));
23500 			if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE,
23501 			    cap-2, SD_PATH_DIRECT))) {
23502 				SD_INFO(SD_LOG_IO_PARTITION,
23503 				    un, "sd_clear_efi: "
23504 				    " clear legacy backup label failed\n");
23505 			}
23506 		}
23507 	}
23508 
23509 done:
23510 	kmem_free(gpt, sizeof (efi_gpt_t));
23511 }
23512 
23513 /*
23514  *    Function: sd_set_vtoc
23515  *
23516  * Description: This routine writes data to the appropriate positions
23517  *
23518  *   Arguments: un - driver soft state (unit) structure
23519  *              dkl  - the data to be written
23520  *
23521  * Return: void
23522  */
23523 
23524 static int
23525 sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl)
23526 {
23527 	void			*shadow_buf;
23528 	uint_t			label_addr;
23529 	int			sec;
23530 	int			blk;
23531 	int			head;
23532 	int			cyl;
23533 	int			rval;
23534 
23535 #if defined(__i386) || defined(__amd64)
23536 	label_addr = un->un_solaris_offset + DK_LABEL_LOC;
23537 #else
23538 	/* Write the primary label at block 0 of the solaris partition. */
23539 	label_addr = 0;
23540 #endif
23541 
23542 	if (NOT_DEVBSIZE(un)) {
23543 		shadow_buf = kmem_zalloc(un->un_tgt_blocksize, KM_SLEEP);
23544 		/*
23545 		 * Read the target's first block.
23546 		 */
23547 		if ((rval = sd_send_scsi_READ(un, shadow_buf,
23548 		    un->un_tgt_blocksize, label_addr,
23549 		    SD_PATH_STANDARD)) != 0) {
23550 			goto exit;
23551 		}
23552 		/*
23553 		 * Copy the contents of the label into the shadow buffer
23554 		 * which is of the size of target block size.
23555 		 */
23556 		bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23557 	}
23558 
23559 	/* Write the primary label */
23560 	if (NOT_DEVBSIZE(un)) {
23561 		rval = sd_send_scsi_WRITE(un, shadow_buf, un->un_tgt_blocksize,
23562 		    label_addr, SD_PATH_STANDARD);
23563 	} else {
23564 		rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23565 		    label_addr, SD_PATH_STANDARD);
23566 	}
23567 	if (rval != 0) {
23568 		return (rval);
23569 	}
23570 
23571 	/*
23572 	 * Calculate where the backup labels go.  They are always on
23573 	 * the last alternate cylinder, but some older drives put them
23574 	 * on head 2 instead of the last head.	They are always on the
23575 	 * first 5 odd sectors of the appropriate track.
23576 	 *
23577 	 * We have no choice at this point, but to believe that the
23578 	 * disk label is valid.	 Use the geometry of the disk
23579 	 * as described in the label.
23580 	 */
23581 	cyl  = dkl->dkl_ncyl  + dkl->dkl_acyl - 1;
23582 	head = dkl->dkl_nhead - 1;
23583 
23584 	/*
23585 	 * Write and verify the backup labels. Make sure we don't try to
23586 	 * write past the last cylinder.
23587 	 */
23588 	for (sec = 1; ((sec < 5 * 2 + 1) && (sec < dkl->dkl_nsect)); sec += 2) {
23589 		blk = (daddr_t)(
23590 		    (cyl * ((dkl->dkl_nhead * dkl->dkl_nsect) - dkl->dkl_apc)) +
23591 		    (head * dkl->dkl_nsect) + sec);
23592 #if defined(__i386) || defined(__amd64)
23593 		blk += un->un_solaris_offset;
23594 #endif
23595 		if (NOT_DEVBSIZE(un)) {
23596 			uint64_t	tblk;
23597 			/*
23598 			 * Need to read the block first for read modify write.
23599 			 */
23600 			tblk = (uint64_t)blk;
23601 			blk = (int)((tblk * un->un_sys_blocksize) /
23602 			    un->un_tgt_blocksize);
23603 			if ((rval = sd_send_scsi_READ(un, shadow_buf,
23604 			    un->un_tgt_blocksize, blk,
23605 			    SD_PATH_STANDARD)) != 0) {
23606 				goto exit;
23607 			}
23608 			/*
23609 			 * Modify the shadow buffer with the label.
23610 			 */
23611 			bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23612 			rval = sd_send_scsi_WRITE(un, shadow_buf,
23613 			    un->un_tgt_blocksize, blk, SD_PATH_STANDARD);
23614 		} else {
23615 			rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23616 			    blk, SD_PATH_STANDARD);
23617 			SD_INFO(SD_LOG_IO_PARTITION, un,
23618 			"sd_set_vtoc: wrote backup label %d\n", blk);
23619 		}
23620 		if (rval != 0) {
23621 			goto exit;
23622 		}
23623 	}
23624 exit:
23625 	if (NOT_DEVBSIZE(un)) {
23626 		kmem_free(shadow_buf, un->un_tgt_blocksize);
23627 	}
23628 	return (rval);
23629 }
23630 
23631 /*
23632  *    Function: sd_clear_vtoc
23633  *
23634  * Description: This routine clears out the VTOC labels.
23635  *
23636  *   Arguments: un - driver soft state (unit) structure
23637  *
23638  * Return: void
23639  */
23640 
23641 static void
23642 sd_clear_vtoc(struct sd_lun *un)
23643 {
23644 	struct dk_label		*dkl;
23645 
23646 	mutex_exit(SD_MUTEX(un));
23647 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23648 	mutex_enter(SD_MUTEX(un));
23649 	/*
23650 	 * sd_set_vtoc uses these fields in order to figure out
23651 	 * where to overwrite the backup labels
23652 	 */
23653 	dkl->dkl_apc    = un->un_g.dkg_apc;
23654 	dkl->dkl_ncyl   = un->un_g.dkg_ncyl;
23655 	dkl->dkl_acyl   = un->un_g.dkg_acyl;
23656 	dkl->dkl_nhead  = un->un_g.dkg_nhead;
23657 	dkl->dkl_nsect  = un->un_g.dkg_nsect;
23658 	mutex_exit(SD_MUTEX(un));
23659 	(void) sd_set_vtoc(un, dkl);
23660 	kmem_free(dkl, sizeof (struct dk_label));
23661 
23662 	mutex_enter(SD_MUTEX(un));
23663 }
23664 
23665 /*
23666  *    Function: sd_write_label
23667  *
23668  * Description: This routine will validate and write the driver soft state vtoc
23669  *		contents to the device.
23670  *
23671  *   Arguments: dev - the device number
23672  *
23673  * Return Code: the code returned by sd_send_scsi_cmd()
23674  *		0
23675  *		EINVAL
23676  *		ENXIO
23677  *		ENOMEM
23678  */
23679 
23680 static int
23681 sd_write_label(dev_t dev)
23682 {
23683 	struct sd_lun		*un;
23684 	struct dk_label		*dkl;
23685 	short			sum;
23686 	short			*sp;
23687 	int			i;
23688 	int			rval;
23689 
23690 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23691 	    (un->un_state == SD_STATE_OFFLINE)) {
23692 		return (ENXIO);
23693 	}
23694 	ASSERT(mutex_owned(SD_MUTEX(un)));
23695 	mutex_exit(SD_MUTEX(un));
23696 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23697 	mutex_enter(SD_MUTEX(un));
23698 
23699 	bcopy(&un->un_vtoc, &dkl->dkl_vtoc, sizeof (struct dk_vtoc));
23700 	dkl->dkl_rpm	= un->un_g.dkg_rpm;
23701 	dkl->dkl_pcyl	= un->un_g.dkg_pcyl;
23702 	dkl->dkl_apc	= un->un_g.dkg_apc;
23703 	dkl->dkl_intrlv = un->un_g.dkg_intrlv;
23704 	dkl->dkl_ncyl	= un->un_g.dkg_ncyl;
23705 	dkl->dkl_acyl	= un->un_g.dkg_acyl;
23706 	dkl->dkl_nhead	= un->un_g.dkg_nhead;
23707 	dkl->dkl_nsect	= un->un_g.dkg_nsect;
23708 
23709 #if defined(_SUNOS_VTOC_8)
23710 	dkl->dkl_obs1	= un->un_g.dkg_obs1;
23711 	dkl->dkl_obs2	= un->un_g.dkg_obs2;
23712 	dkl->dkl_obs3	= un->un_g.dkg_obs3;
23713 	for (i = 0; i < NDKMAP; i++) {
23714 		dkl->dkl_map[i].dkl_cylno = un->un_map[i].dkl_cylno;
23715 		dkl->dkl_map[i].dkl_nblk  = un->un_map[i].dkl_nblk;
23716 	}
23717 	bcopy(un->un_asciilabel, dkl->dkl_asciilabel, LEN_DKL_ASCII);
23718 #elif defined(_SUNOS_VTOC_16)
23719 	dkl->dkl_skew	= un->un_dkg_skew;
23720 #else
23721 #error "No VTOC format defined."
23722 #endif
23723 
23724 	dkl->dkl_magic			= DKL_MAGIC;
23725 	dkl->dkl_write_reinstruct	= un->un_g.dkg_write_reinstruct;
23726 	dkl->dkl_read_reinstruct	= un->un_g.dkg_read_reinstruct;
23727 
23728 	/* Construct checksum for the new disk label */
23729 	sum = 0;
23730 	sp = (short *)dkl;
23731 	i = sizeof (struct dk_label) / sizeof (short);
23732 	while (i--) {
23733 		sum ^= *sp++;
23734 	}
23735 	dkl->dkl_cksum = sum;
23736 
23737 	mutex_exit(SD_MUTEX(un));
23738 
23739 	rval = sd_set_vtoc(un, dkl);
23740 exit:
23741 	kmem_free(dkl, sizeof (struct dk_label));
23742 	mutex_enter(SD_MUTEX(un));
23743 	return (rval);
23744 }
23745 
23746 static int
23747 sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag)
23748 {
23749 	struct sd_lun	*un = NULL;
23750 	dk_efi_t	user_efi;
23751 	int		rval = 0;
23752 	void		*buffer;
23753 
23754 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
23755 		return (ENXIO);
23756 
23757 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
23758 		return (EFAULT);
23759 
23760 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
23761 
23762 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
23763 	    (user_efi.dki_length > un->un_max_xfer_size))
23764 		return (EINVAL);
23765 
23766 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
23767 	if (ddi_copyin(user_efi.dki_data, buffer, user_efi.dki_length, flag)) {
23768 		rval = EFAULT;
23769 	} else {
23770 		/*
23771 		 * let's clear the vtoc labels and clear the softstate
23772 		 * vtoc.
23773 		 */
23774 		mutex_enter(SD_MUTEX(un));
23775 		if (un->un_vtoc.v_sanity == VTOC_SANE) {
23776 			SD_TRACE(SD_LOG_IO_PARTITION, un,
23777 				"sd_dkio_set_efi: CLEAR VTOC\n");
23778 			sd_clear_vtoc(un);
23779 			bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
23780 			mutex_exit(SD_MUTEX(un));
23781 			ddi_remove_minor_node(SD_DEVINFO(un), "h");
23782 			ddi_remove_minor_node(SD_DEVINFO(un), "h,raw");
23783 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd",
23784 			    S_IFBLK,
23785 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23786 			    un->un_node_type, NULL);
23787 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd,raw",
23788 			    S_IFCHR,
23789 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23790 			    un->un_node_type, NULL);
23791 		} else
23792 			mutex_exit(SD_MUTEX(un));
23793 		rval = sd_send_scsi_WRITE(un, buffer, user_efi.dki_length,
23794 		    user_efi.dki_lba, SD_PATH_DIRECT);
23795 		if (rval == 0) {
23796 			mutex_enter(SD_MUTEX(un));
23797 			un->un_f_geometry_is_valid = FALSE;
23798 			mutex_exit(SD_MUTEX(un));
23799 		}
23800 	}
23801 	kmem_free(buffer, user_efi.dki_length);
23802 	return (rval);
23803 }
23804 
23805 /*
23806  *    Function: sd_dkio_get_mboot
23807  *
23808  * Description: This routine is the driver entry point for handling user
23809  *		requests to get the current device mboot (DKIOCGMBOOT)
23810  *
23811  *   Arguments: dev  - the device number
23812  *		arg  - pointer to user provided mboot structure specifying
23813  *			the current mboot.
23814  *		flag - this argument is a pass through to ddi_copyxxx()
23815  *		       directly from the mode argument of ioctl().
23816  *
23817  * Return Code: 0
23818  *		EINVAL
23819  *		EFAULT
23820  *		ENXIO
23821  */
23822 
23823 static int
23824 sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag)
23825 {
23826 	struct sd_lun	*un;
23827 	struct mboot	*mboot;
23828 	int		rval;
23829 	size_t		buffer_size;
23830 
23831 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23832 	    (un->un_state == SD_STATE_OFFLINE)) {
23833 		return (ENXIO);
23834 	}
23835 
23836 	if (!un->un_f_mboot_supported || arg == NULL) {
23837 		return (EINVAL);
23838 	}
23839 
23840 	/*
23841 	 * Read the mboot block, located at absolute block 0 on the target.
23842 	 */
23843 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct mboot));
23844 
23845 	SD_TRACE(SD_LOG_IO_PARTITION, un,
23846 	    "sd_dkio_get_mboot: allocation size: 0x%x\n", buffer_size);
23847 
23848 	mboot = kmem_zalloc(buffer_size, KM_SLEEP);
23849 	if ((rval = sd_send_scsi_READ(un, mboot, buffer_size, 0,
23850 	    SD_PATH_STANDARD)) == 0) {
23851 		if (ddi_copyout(mboot, (void *)arg,
23852 		    sizeof (struct mboot), flag) != 0) {
23853 			rval = EFAULT;
23854 		}
23855 	}
23856 	kmem_free(mboot, buffer_size);
23857 	return (rval);
23858 }
23859 
23860 
23861 /*
23862  *    Function: sd_dkio_set_mboot
23863  *
23864  * Description: This routine is the driver entry point for handling user
23865  *		requests to validate and set the device master boot
23866  *		(DKIOCSMBOOT).
23867  *
23868  *   Arguments: dev  - the device number
23869  *		arg  - pointer to user provided mboot structure used to set the
23870  *			master boot.
23871  *		flag - this argument is a pass through to ddi_copyxxx()
23872  *		       directly from the mode argument of ioctl().
23873  *
23874  * Return Code: 0
23875  *		EINVAL
23876  *		EFAULT
23877  *		ENXIO
23878  */
23879 
23880 static int
23881 sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag)
23882 {
23883 	struct sd_lun	*un = NULL;
23884 	struct mboot	*mboot = NULL;
23885 	int		rval;
23886 	ushort_t	magic;
23887 
23888 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23889 		return (ENXIO);
23890 	}
23891 
23892 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23893 
23894 	if (!un->un_f_mboot_supported) {
23895 		return (EINVAL);
23896 	}
23897 
23898 	if (arg == NULL) {
23899 		return (EINVAL);
23900 	}
23901 
23902 	mboot = kmem_zalloc(sizeof (struct mboot), KM_SLEEP);
23903 
23904 	if (ddi_copyin((const void *)arg, mboot,
23905 	    sizeof (struct mboot), flag) != 0) {
23906 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23907 		return (EFAULT);
23908 	}
23909 
23910 	/* Is this really a master boot record? */
23911 	magic = LE_16(mboot->signature);
23912 	if (magic != MBB_MAGIC) {
23913 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23914 		return (EINVAL);
23915 	}
23916 
23917 	rval = sd_send_scsi_WRITE(un, mboot, un->un_sys_blocksize, 0,
23918 	    SD_PATH_STANDARD);
23919 
23920 	mutex_enter(SD_MUTEX(un));
23921 #if defined(__i386) || defined(__amd64)
23922 	if (rval == 0) {
23923 		/*
23924 		 * mboot has been written successfully.
23925 		 * update the fdisk and vtoc tables in memory
23926 		 */
23927 		rval = sd_update_fdisk_and_vtoc(un);
23928 		if ((un->un_f_geometry_is_valid == FALSE) || (rval != 0)) {
23929 			mutex_exit(SD_MUTEX(un));
23930 			kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23931 			return (rval);
23932 		}
23933 	}
23934 
23935 	/*
23936 	 * If the mboot write fails, write the devid anyway, what can it hurt?
23937 	 * Also preserve the device id by writing to the disk acyl for the case
23938 	 * where a devid has been fabricated.
23939 	 */
23940 	if (un->un_f_devid_supported && un->un_f_opt_fab_devid) {
23941 		if (un->un_devid == NULL) {
23942 			sd_register_devid(un, SD_DEVINFO(un),
23943 			    SD_TARGET_IS_UNRESERVED);
23944 		} else {
23945 			/*
23946 			 * The device id for this disk has been
23947 			 * fabricated. Fabricated device id's are
23948 			 * managed by storing them in the last 2
23949 			 * available sectors on the drive. The device
23950 			 * id must be preserved by writing it back out
23951 			 * to this location.
23952 			 */
23953 			if (sd_write_deviceid(un) != 0) {
23954 				ddi_devid_free(un->un_devid);
23955 				un->un_devid = NULL;
23956 			}
23957 		}
23958 	}
23959 
23960 #ifdef __lock_lint
23961 	sd_setup_default_geometry(un);
23962 #endif
23963 
23964 #else
23965 	if (rval == 0) {
23966 		/*
23967 		 * mboot has been written successfully.
23968 		 * set up the default geometry and VTOC
23969 		 */
23970 		if (un->un_blockcount <= DK_MAX_BLOCKS)
23971 			sd_setup_default_geometry(un);
23972 	}
23973 #endif
23974 	mutex_exit(SD_MUTEX(un));
23975 	kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23976 	return (rval);
23977 }
23978 
23979 
23980 /*
23981  *    Function: sd_setup_default_geometry
23982  *
23983  * Description: This local utility routine sets the default geometry as part of
23984  *		setting the device mboot.
23985  *
23986  *   Arguments: un - driver soft state (unit) structure
23987  *
23988  * Note: This may be redundant with sd_build_default_label.
23989  */
23990 
23991 static void
23992 sd_setup_default_geometry(struct sd_lun *un)
23993 {
23994 	/* zero out the soft state geometry and partition table. */
23995 	bzero(&un->un_g, sizeof (struct dk_geom));
23996 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
23997 	bzero(un->un_map, NDKMAP * (sizeof (struct dk_map)));
23998 	un->un_asciilabel[0] = '\0';
23999 
24000 	/*
24001 	 * For the rpm, we use the minimum for the disk.
24002 	 * For the head, cyl and number of sector per track,
24003 	 * if the capacity <= 1GB, head = 64, sect = 32.
24004 	 * else head = 255, sect 63
24005 	 * Note: the capacity should be equal to C*H*S values.
24006 	 * This will cause some truncation of size due to
24007 	 * round off errors. For CD-ROMs, this truncation can
24008 	 * have adverse side effects, so returning ncyl and
24009 	 * nhead as 1. The nsect will overflow for most of
24010 	 * CD-ROMs as nsect is of type ushort.
24011 	 */
24012 	if (ISCD(un)) {
24013 		un->un_g.dkg_ncyl = 1;
24014 		un->un_g.dkg_nhead = 1;
24015 		un->un_g.dkg_nsect = un->un_blockcount;
24016 	} else {
24017 		if (un->un_blockcount <= 0x1000) {
24018 			/* Needed for unlabeled SCSI floppies. */
24019 			un->un_g.dkg_nhead = 2;
24020 			un->un_g.dkg_ncyl = 80;
24021 			un->un_g.dkg_pcyl = 80;
24022 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
24023 		} else if (un->un_blockcount <= 0x200000) {
24024 			un->un_g.dkg_nhead = 64;
24025 			un->un_g.dkg_nsect = 32;
24026 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
24027 		} else {
24028 			un->un_g.dkg_nhead = 255;
24029 			un->un_g.dkg_nsect = 63;
24030 			un->un_g.dkg_ncyl = un->un_blockcount / (255 * 63);
24031 		}
24032 		un->un_blockcount = un->un_g.dkg_ncyl *
24033 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
24034 	}
24035 	un->un_g.dkg_acyl = 0;
24036 	un->un_g.dkg_bcyl = 0;
24037 	un->un_g.dkg_intrlv = 1;
24038 	un->un_g.dkg_rpm = 200;
24039 	un->un_g.dkg_read_reinstruct = 0;
24040 	un->un_g.dkg_write_reinstruct = 0;
24041 	if (un->un_g.dkg_pcyl == 0) {
24042 		un->un_g.dkg_pcyl = un->un_g.dkg_ncyl + un->un_g.dkg_acyl;
24043 	}
24044 
24045 	un->un_map['a'-'a'].dkl_cylno = 0;
24046 	un->un_map['a'-'a'].dkl_nblk = un->un_blockcount;
24047 	un->un_map['c'-'a'].dkl_cylno = 0;
24048 	un->un_map['c'-'a'].dkl_nblk = un->un_blockcount;
24049 	un->un_f_geometry_is_valid = FALSE;
24050 }
24051 
24052 
24053 #if defined(__i386) || defined(__amd64)
24054 /*
24055  *    Function: sd_update_fdisk_and_vtoc
24056  *
24057  * Description: This local utility routine updates the device fdisk and vtoc
24058  *		as part of setting the device mboot.
24059  *
24060  *   Arguments: un - driver soft state (unit) structure
24061  *
24062  * Return Code: 0 for success or errno-type return code.
24063  *
24064  *    Note:x86: This looks like a duplicate of sd_validate_geometry(), but
24065  *		these did exist seperately in x86 sd.c!!!
24066  */
24067 
24068 static int
24069 sd_update_fdisk_and_vtoc(struct sd_lun *un)
24070 {
24071 	static char	labelstring[128];
24072 	static char	buf[256];
24073 	char		*label = 0;
24074 	int		count;
24075 	int		label_rc = 0;
24076 	int		gvalid = un->un_f_geometry_is_valid;
24077 	int		fdisk_rval;
24078 	int		lbasize;
24079 	int		capacity;
24080 
24081 	ASSERT(mutex_owned(SD_MUTEX(un)));
24082 
24083 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
24084 		return (EINVAL);
24085 	}
24086 
24087 	if (un->un_f_blockcount_is_valid == FALSE) {
24088 		return (EINVAL);
24089 	}
24090 
24091 #if defined(_SUNOS_VTOC_16)
24092 	/*
24093 	 * Set up the "whole disk" fdisk partition; this should always
24094 	 * exist, regardless of whether the disk contains an fdisk table
24095 	 * or vtoc.
24096 	 */
24097 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
24098 	un->un_map[P0_RAW_DISK].dkl_nblk = un->un_blockcount;
24099 #endif	/* defined(_SUNOS_VTOC_16) */
24100 
24101 	/*
24102 	 * copy the lbasize and capacity so that if they're
24103 	 * reset while we're not holding the SD_MUTEX(un), we will
24104 	 * continue to use valid values after the SD_MUTEX(un) is
24105 	 * reacquired.
24106 	 */
24107 	lbasize  = un->un_tgt_blocksize;
24108 	capacity = un->un_blockcount;
24109 
24110 	/*
24111 	 * refresh the logical and physical geometry caches.
24112 	 * (data from mode sense format/rigid disk geometry pages,
24113 	 * and scsi_ifgetcap("geometry").
24114 	 */
24115 	sd_resync_geom_caches(un, capacity, lbasize, SD_PATH_DIRECT);
24116 
24117 	/*
24118 	 * Only DIRECT ACCESS devices will have Sun labels.
24119 	 * CD's supposedly have a Sun label, too
24120 	 */
24121 	if (un->un_f_vtoc_label_supported) {
24122 		fdisk_rval = sd_read_fdisk(un, capacity, lbasize,
24123 		    SD_PATH_DIRECT);
24124 		if (fdisk_rval == SD_CMD_FAILURE) {
24125 			ASSERT(mutex_owned(SD_MUTEX(un)));
24126 			return (EIO);
24127 		}
24128 
24129 		if (fdisk_rval == SD_CMD_RESERVATION_CONFLICT) {
24130 			ASSERT(mutex_owned(SD_MUTEX(un)));
24131 			return (EACCES);
24132 		}
24133 
24134 		if (un->un_solaris_size <= DK_LABEL_LOC) {
24135 			/*
24136 			 * Found fdisk table but no Solaris partition entry,
24137 			 * so don't call sd_uselabel() and don't create
24138 			 * a default label.
24139 			 */
24140 			label_rc = 0;
24141 			un->un_f_geometry_is_valid = TRUE;
24142 			goto no_solaris_partition;
24143 		}
24144 
24145 #if defined(_SUNOS_VTOC_8)
24146 		label = (char *)un->un_asciilabel;
24147 #elif defined(_SUNOS_VTOC_16)
24148 		label = (char *)un->un_vtoc.v_asciilabel;
24149 #else
24150 #error "No VTOC format defined."
24151 #endif
24152 	} else if (capacity < 0) {
24153 		ASSERT(mutex_owned(SD_MUTEX(un)));
24154 		return (EINVAL);
24155 	}
24156 
24157 	/*
24158 	 * For Removable media We reach here if we have found a
24159 	 * SOLARIS PARTITION.
24160 	 * If un_f_geometry_is_valid is FALSE it indicates that the SOLARIS
24161 	 * PARTITION has changed from the previous one, hence we will setup a
24162 	 * default VTOC in this case.
24163 	 */
24164 	if (un->un_f_geometry_is_valid == FALSE) {
24165 		sd_build_default_label(un);
24166 		label_rc = 0;
24167 	}
24168 
24169 no_solaris_partition:
24170 	if ((!un->un_f_has_removable_media ||
24171 	    (un->un_f_has_removable_media &&
24172 	    un->un_mediastate == DKIO_EJECTED)) &&
24173 		(un->un_state == SD_STATE_NORMAL && !gvalid)) {
24174 		/*
24175 		 * Print out a message indicating who and what we are.
24176 		 * We do this only when we happen to really validate the
24177 		 * geometry. We may call sd_validate_geometry() at other
24178 		 * times, ioctl()'s like Get VTOC in which case we
24179 		 * don't want to print the label.
24180 		 * If the geometry is valid, print the label string,
24181 		 * else print vendor and product info, if available
24182 		 */
24183 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
24184 			SD_INFO(SD_LOG_IOCTL_DKIO, un, "?<%s>\n", label);
24185 		} else {
24186 			mutex_enter(&sd_label_mutex);
24187 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
24188 			    labelstring);
24189 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
24190 			    &labelstring[64]);
24191 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
24192 			    labelstring, &labelstring[64]);
24193 			if (un->un_f_blockcount_is_valid == TRUE) {
24194 				(void) sprintf(&buf[strlen(buf)],
24195 				    ", %" PRIu64 " %u byte blocks\n",
24196 				    un->un_blockcount,
24197 				    un->un_tgt_blocksize);
24198 			} else {
24199 				(void) sprintf(&buf[strlen(buf)],
24200 				    ", (unknown capacity)\n");
24201 			}
24202 			SD_INFO(SD_LOG_IOCTL_DKIO, un, buf);
24203 			mutex_exit(&sd_label_mutex);
24204 		}
24205 	}
24206 
24207 #if defined(_SUNOS_VTOC_16)
24208 	/*
24209 	 * If we have valid geometry, set up the remaining fdisk partitions.
24210 	 * Note that dkl_cylno is not used for the fdisk map entries, so
24211 	 * we set it to an entirely bogus value.
24212 	 */
24213 	for (count = 0; count < FD_NUMPART; count++) {
24214 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
24215 		un->un_map[FDISK_P1 + count].dkl_nblk =
24216 		    un->un_fmap[count].fmap_nblk;
24217 		un->un_offset[FDISK_P1 + count] =
24218 		    un->un_fmap[count].fmap_start;
24219 	}
24220 #endif
24221 
24222 	for (count = 0; count < NDKMAP; count++) {
24223 #if defined(_SUNOS_VTOC_8)
24224 		struct dk_map *lp  = &un->un_map[count];
24225 		un->un_offset[count] =
24226 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
24227 #elif defined(_SUNOS_VTOC_16)
24228 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
24229 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
24230 #else
24231 #error "No VTOC format defined."
24232 #endif
24233 	}
24234 
24235 	ASSERT(mutex_owned(SD_MUTEX(un)));
24236 	return (label_rc);
24237 }
24238 #endif
24239 
24240 
24241 /*
24242  *    Function: sd_check_media
24243  *
24244  * Description: This utility routine implements the functionality for the
24245  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
24246  *		driver state changes from that specified by the user
24247  *		(inserted or ejected). For example, if the user specifies
24248  *		DKIO_EJECTED and the current media state is inserted this
24249  *		routine will immediately return DKIO_INSERTED. However, if the
24250  *		current media state is not inserted the user thread will be
24251  *		blocked until the drive state changes. If DKIO_NONE is specified
24252  *		the user thread will block until a drive state change occurs.
24253  *
24254  *   Arguments: dev  - the device number
24255  *		state  - user pointer to a dkio_state, updated with the current
24256  *			drive state at return.
24257  *
24258  * Return Code: ENXIO
24259  *		EIO
24260  *		EAGAIN
24261  *		EINTR
24262  */
24263 
24264 static int
24265 sd_check_media(dev_t dev, enum dkio_state state)
24266 {
24267 	struct sd_lun		*un = NULL;
24268 	enum dkio_state		prev_state;
24269 	opaque_t		token = NULL;
24270 	int			rval = 0;
24271 
24272 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24273 		return (ENXIO);
24274 	}
24275 
24276 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
24277 
24278 	mutex_enter(SD_MUTEX(un));
24279 
24280 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
24281 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
24282 
24283 	prev_state = un->un_mediastate;
24284 
24285 	/* is there anything to do? */
24286 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
24287 		/*
24288 		 * submit the request to the scsi_watch service;
24289 		 * scsi_media_watch_cb() does the real work
24290 		 */
24291 		mutex_exit(SD_MUTEX(un));
24292 
24293 		/*
24294 		 * This change handles the case where a scsi watch request is
24295 		 * added to a device that is powered down. To accomplish this
24296 		 * we power up the device before adding the scsi watch request,
24297 		 * since the scsi watch sends a TUR directly to the device
24298 		 * which the device cannot handle if it is powered down.
24299 		 */
24300 		if (sd_pm_entry(un) != DDI_SUCCESS) {
24301 			mutex_enter(SD_MUTEX(un));
24302 			goto done;
24303 		}
24304 
24305 		token = scsi_watch_request_submit(SD_SCSI_DEVP(un),
24306 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
24307 		    (caddr_t)dev);
24308 
24309 		sd_pm_exit(un);
24310 
24311 		mutex_enter(SD_MUTEX(un));
24312 		if (token == NULL) {
24313 			rval = EAGAIN;
24314 			goto done;
24315 		}
24316 
24317 		/*
24318 		 * This is a special case IOCTL that doesn't return
24319 		 * until the media state changes. Routine sdpower
24320 		 * knows about and handles this so don't count it
24321 		 * as an active cmd in the driver, which would
24322 		 * keep the device busy to the pm framework.
24323 		 * If the count isn't decremented the device can't
24324 		 * be powered down.
24325 		 */
24326 		un->un_ncmds_in_driver--;
24327 		ASSERT(un->un_ncmds_in_driver >= 0);
24328 
24329 		/*
24330 		 * if a prior request had been made, this will be the same
24331 		 * token, as scsi_watch was designed that way.
24332 		 */
24333 		un->un_swr_token = token;
24334 		un->un_specified_mediastate = state;
24335 
24336 		/*
24337 		 * now wait for media change
24338 		 * we will not be signalled unless mediastate == state but it is
24339 		 * still better to test for this condition, since there is a
24340 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
24341 		 */
24342 		SD_TRACE(SD_LOG_COMMON, un,
24343 		    "sd_check_media: waiting for media state change\n");
24344 		while (un->un_mediastate == state) {
24345 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
24346 				SD_TRACE(SD_LOG_COMMON, un,
24347 				    "sd_check_media: waiting for media state "
24348 				    "was interrupted\n");
24349 				un->un_ncmds_in_driver++;
24350 				rval = EINTR;
24351 				goto done;
24352 			}
24353 			SD_TRACE(SD_LOG_COMMON, un,
24354 			    "sd_check_media: received signal, state=%x\n",
24355 			    un->un_mediastate);
24356 		}
24357 		/*
24358 		 * Inc the counter to indicate the device once again
24359 		 * has an active outstanding cmd.
24360 		 */
24361 		un->un_ncmds_in_driver++;
24362 	}
24363 
24364 	/* invalidate geometry */
24365 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
24366 		sr_ejected(un);
24367 	}
24368 
24369 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
24370 		uint64_t	capacity;
24371 		uint_t		lbasize;
24372 
24373 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
24374 		mutex_exit(SD_MUTEX(un));
24375 		/*
24376 		 * Since the following routines use SD_PATH_DIRECT, we must
24377 		 * call PM directly before the upcoming disk accesses. This
24378 		 * may cause the disk to be power/spin up.
24379 		 */
24380 
24381 		if (sd_pm_entry(un) == DDI_SUCCESS) {
24382 			rval = sd_send_scsi_READ_CAPACITY(un,
24383 			    &capacity,
24384 			    &lbasize, SD_PATH_DIRECT);
24385 			if (rval != 0) {
24386 				sd_pm_exit(un);
24387 				mutex_enter(SD_MUTEX(un));
24388 				goto done;
24389 			}
24390 		} else {
24391 			rval = EIO;
24392 			mutex_enter(SD_MUTEX(un));
24393 			goto done;
24394 		}
24395 		mutex_enter(SD_MUTEX(un));
24396 
24397 		sd_update_block_info(un, lbasize, capacity);
24398 
24399 		un->un_f_geometry_is_valid	= FALSE;
24400 		(void) sd_validate_geometry(un, SD_PATH_DIRECT);
24401 
24402 		mutex_exit(SD_MUTEX(un));
24403 		rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
24404 		    SD_PATH_DIRECT);
24405 		sd_pm_exit(un);
24406 
24407 		mutex_enter(SD_MUTEX(un));
24408 	}
24409 done:
24410 	un->un_f_watcht_stopped = FALSE;
24411 	if (un->un_swr_token) {
24412 		/*
24413 		 * Use of this local token and the mutex ensures that we avoid
24414 		 * some race conditions associated with terminating the
24415 		 * scsi watch.
24416 		 */
24417 		token = un->un_swr_token;
24418 		un->un_swr_token = (opaque_t)NULL;
24419 		mutex_exit(SD_MUTEX(un));
24420 		(void) scsi_watch_request_terminate(token,
24421 		    SCSI_WATCH_TERMINATE_WAIT);
24422 		mutex_enter(SD_MUTEX(un));
24423 	}
24424 
24425 	/*
24426 	 * Update the capacity kstat value, if no media previously
24427 	 * (capacity kstat is 0) and a media has been inserted
24428 	 * (un_f_blockcount_is_valid == TRUE)
24429 	 */
24430 	if (un->un_errstats) {
24431 		struct sd_errstats	*stp = NULL;
24432 
24433 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
24434 		if ((stp->sd_capacity.value.ui64 == 0) &&
24435 		    (un->un_f_blockcount_is_valid == TRUE)) {
24436 			stp->sd_capacity.value.ui64 =
24437 			    (uint64_t)((uint64_t)un->un_blockcount *
24438 			    un->un_sys_blocksize);
24439 		}
24440 	}
24441 	mutex_exit(SD_MUTEX(un));
24442 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
24443 	return (rval);
24444 }
24445 
24446 
24447 /*
24448  *    Function: sd_delayed_cv_broadcast
24449  *
24450  * Description: Delayed cv_broadcast to allow for target to recover from media
24451  *		insertion.
24452  *
24453  *   Arguments: arg - driver soft state (unit) structure
24454  */
24455 
24456 static void
24457 sd_delayed_cv_broadcast(void *arg)
24458 {
24459 	struct sd_lun *un = arg;
24460 
24461 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
24462 
24463 	mutex_enter(SD_MUTEX(un));
24464 	un->un_dcvb_timeid = NULL;
24465 	cv_broadcast(&un->un_state_cv);
24466 	mutex_exit(SD_MUTEX(un));
24467 }
24468 
24469 
24470 /*
24471  *    Function: sd_media_watch_cb
24472  *
24473  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
24474  *		routine processes the TUR sense data and updates the driver
24475  *		state if a transition has occurred. The user thread
24476  *		(sd_check_media) is then signalled.
24477  *
24478  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24479  *			among multiple watches that share this callback function
24480  *		resultp - scsi watch facility result packet containing scsi
24481  *			  packet, status byte and sense data
24482  *
24483  * Return Code: 0 for success, -1 for failure
24484  */
24485 
24486 static int
24487 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
24488 {
24489 	struct sd_lun			*un;
24490 	struct scsi_status		*statusp = resultp->statusp;
24491 	uint8_t				*sensep = (uint8_t *)resultp->sensep;
24492 	enum dkio_state			state = DKIO_NONE;
24493 	dev_t				dev = (dev_t)arg;
24494 	uchar_t				actual_sense_length;
24495 	uint8_t				skey, asc, ascq;
24496 
24497 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24498 		return (-1);
24499 	}
24500 	actual_sense_length = resultp->actual_sense_length;
24501 
24502 	mutex_enter(SD_MUTEX(un));
24503 	SD_TRACE(SD_LOG_COMMON, un,
24504 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
24505 	    *((char *)statusp), (void *)sensep, actual_sense_length);
24506 
24507 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
24508 		un->un_mediastate = DKIO_DEV_GONE;
24509 		cv_broadcast(&un->un_state_cv);
24510 		mutex_exit(SD_MUTEX(un));
24511 
24512 		return (0);
24513 	}
24514 
24515 	/*
24516 	 * If there was a check condition then sensep points to valid sense data
24517 	 * If status was not a check condition but a reservation or busy status
24518 	 * then the new state is DKIO_NONE
24519 	 */
24520 	if (sensep != NULL) {
24521 		skey = scsi_sense_key(sensep);
24522 		asc = scsi_sense_asc(sensep);
24523 		ascq = scsi_sense_ascq(sensep);
24524 
24525 		SD_INFO(SD_LOG_COMMON, un,
24526 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
24527 		    skey, asc, ascq);
24528 		/* This routine only uses up to 13 bytes of sense data. */
24529 		if (actual_sense_length >= 13) {
24530 			if (skey == KEY_UNIT_ATTENTION) {
24531 				if (asc == 0x28) {
24532 					state = DKIO_INSERTED;
24533 				}
24534 			} else {
24535 				/*
24536 				 * if 02/04/02  means that the host
24537 				 * should send start command. Explicitly
24538 				 * leave the media state as is
24539 				 * (inserted) as the media is inserted
24540 				 * and host has stopped device for PM
24541 				 * reasons. Upon next true read/write
24542 				 * to this media will bring the
24543 				 * device to the right state good for
24544 				 * media access.
24545 				 */
24546 				if ((skey == KEY_NOT_READY) &&
24547 				    (asc == 0x3a)) {
24548 					state = DKIO_EJECTED;
24549 				}
24550 
24551 				/*
24552 				 * If the drivge is busy with an operation
24553 				 * or long write, keep the media in an
24554 				 * inserted state.
24555 				 */
24556 
24557 				if ((skey == KEY_NOT_READY) &&
24558 				    (asc == 0x04) &&
24559 				    ((ascq == 0x02) ||
24560 				    (ascq == 0x07) ||
24561 				    (ascq == 0x08))) {
24562 					state = DKIO_INSERTED;
24563 				}
24564 			}
24565 		}
24566 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
24567 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
24568 		state = DKIO_INSERTED;
24569 	}
24570 
24571 	SD_TRACE(SD_LOG_COMMON, un,
24572 	    "sd_media_watch_cb: state=%x, specified=%x\n",
24573 	    state, un->un_specified_mediastate);
24574 
24575 	/*
24576 	 * now signal the waiting thread if this is *not* the specified state;
24577 	 * delay the signal if the state is DKIO_INSERTED to allow the target
24578 	 * to recover
24579 	 */
24580 	if (state != un->un_specified_mediastate) {
24581 		un->un_mediastate = state;
24582 		if (state == DKIO_INSERTED) {
24583 			/*
24584 			 * delay the signal to give the drive a chance
24585 			 * to do what it apparently needs to do
24586 			 */
24587 			SD_TRACE(SD_LOG_COMMON, un,
24588 			    "sd_media_watch_cb: delayed cv_broadcast\n");
24589 			if (un->un_dcvb_timeid == NULL) {
24590 				un->un_dcvb_timeid =
24591 				    timeout(sd_delayed_cv_broadcast, un,
24592 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
24593 			}
24594 		} else {
24595 			SD_TRACE(SD_LOG_COMMON, un,
24596 			    "sd_media_watch_cb: immediate cv_broadcast\n");
24597 			cv_broadcast(&un->un_state_cv);
24598 		}
24599 	}
24600 	mutex_exit(SD_MUTEX(un));
24601 	return (0);
24602 }
24603 
24604 
24605 /*
24606  *    Function: sd_dkio_get_temp
24607  *
24608  * Description: This routine is the driver entry point for handling ioctl
24609  *		requests to get the disk temperature.
24610  *
24611  *   Arguments: dev  - the device number
24612  *		arg  - pointer to user provided dk_temperature structure.
24613  *		flag - this argument is a pass through to ddi_copyxxx()
24614  *		       directly from the mode argument of ioctl().
24615  *
24616  * Return Code: 0
24617  *		EFAULT
24618  *		ENXIO
24619  *		EAGAIN
24620  */
24621 
24622 static int
24623 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
24624 {
24625 	struct sd_lun		*un = NULL;
24626 	struct dk_temperature	*dktemp = NULL;
24627 	uchar_t			*temperature_page;
24628 	int			rval = 0;
24629 	int			path_flag = SD_PATH_STANDARD;
24630 
24631 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24632 		return (ENXIO);
24633 	}
24634 
24635 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
24636 
24637 	/* copyin the disk temp argument to get the user flags */
24638 	if (ddi_copyin((void *)arg, dktemp,
24639 	    sizeof (struct dk_temperature), flag) != 0) {
24640 		rval = EFAULT;
24641 		goto done;
24642 	}
24643 
24644 	/* Initialize the temperature to invalid. */
24645 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24646 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24647 
24648 	/*
24649 	 * Note: Investigate removing the "bypass pm" semantic.
24650 	 * Can we just bypass PM always?
24651 	 */
24652 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
24653 		path_flag = SD_PATH_DIRECT;
24654 		ASSERT(!mutex_owned(&un->un_pm_mutex));
24655 		mutex_enter(&un->un_pm_mutex);
24656 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
24657 			/*
24658 			 * If DKT_BYPASS_PM is set, and the drive happens to be
24659 			 * in low power mode, we can not wake it up, Need to
24660 			 * return EAGAIN.
24661 			 */
24662 			mutex_exit(&un->un_pm_mutex);
24663 			rval = EAGAIN;
24664 			goto done;
24665 		} else {
24666 			/*
24667 			 * Indicate to PM the device is busy. This is required
24668 			 * to avoid a race - i.e. the ioctl is issuing a
24669 			 * command and the pm framework brings down the device
24670 			 * to low power mode (possible power cut-off on some
24671 			 * platforms).
24672 			 */
24673 			mutex_exit(&un->un_pm_mutex);
24674 			if (sd_pm_entry(un) != DDI_SUCCESS) {
24675 				rval = EAGAIN;
24676 				goto done;
24677 			}
24678 		}
24679 	}
24680 
24681 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
24682 
24683 	if ((rval = sd_send_scsi_LOG_SENSE(un, temperature_page,
24684 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag)) != 0) {
24685 		goto done2;
24686 	}
24687 
24688 	/*
24689 	 * For the current temperature verify that the parameter length is 0x02
24690 	 * and the parameter code is 0x00
24691 	 */
24692 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
24693 	    (temperature_page[5] == 0x00)) {
24694 		if (temperature_page[9] == 0xFF) {
24695 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24696 		} else {
24697 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
24698 		}
24699 	}
24700 
24701 	/*
24702 	 * For the reference temperature verify that the parameter
24703 	 * length is 0x02 and the parameter code is 0x01
24704 	 */
24705 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
24706 	    (temperature_page[11] == 0x01)) {
24707 		if (temperature_page[15] == 0xFF) {
24708 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24709 		} else {
24710 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
24711 		}
24712 	}
24713 
24714 	/* Do the copyout regardless of the temperature commands status. */
24715 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
24716 	    flag) != 0) {
24717 		rval = EFAULT;
24718 	}
24719 
24720 done2:
24721 	if (path_flag == SD_PATH_DIRECT) {
24722 		sd_pm_exit(un);
24723 	}
24724 
24725 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
24726 done:
24727 	if (dktemp != NULL) {
24728 		kmem_free(dktemp, sizeof (struct dk_temperature));
24729 	}
24730 
24731 	return (rval);
24732 }
24733 
24734 
24735 /*
24736  *    Function: sd_log_page_supported
24737  *
24738  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
24739  *		supported log pages.
24740  *
24741  *   Arguments: un -
24742  *		log_page -
24743  *
24744  * Return Code: -1 - on error (log sense is optional and may not be supported).
24745  *		0  - log page not found.
24746  *  		1  - log page found.
24747  */
24748 
24749 static int
24750 sd_log_page_supported(struct sd_lun *un, int log_page)
24751 {
24752 	uchar_t *log_page_data;
24753 	int	i;
24754 	int	match = 0;
24755 	int	log_size;
24756 
24757 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
24758 
24759 	if (sd_send_scsi_LOG_SENSE(un, log_page_data, 0xFF, 0, 0x01, 0,
24760 	    SD_PATH_DIRECT) != 0) {
24761 		SD_ERROR(SD_LOG_COMMON, un,
24762 		    "sd_log_page_supported: failed log page retrieval\n");
24763 		kmem_free(log_page_data, 0xFF);
24764 		return (-1);
24765 	}
24766 	log_size = log_page_data[3];
24767 
24768 	/*
24769 	 * The list of supported log pages start from the fourth byte. Check
24770 	 * until we run out of log pages or a match is found.
24771 	 */
24772 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
24773 		if (log_page_data[i] == log_page) {
24774 			match++;
24775 		}
24776 	}
24777 	kmem_free(log_page_data, 0xFF);
24778 	return (match);
24779 }
24780 
24781 
24782 /*
24783  *    Function: sd_mhdioc_failfast
24784  *
24785  * Description: This routine is the driver entry point for handling ioctl
24786  *		requests to enable/disable the multihost failfast option.
24787  *		(MHIOCENFAILFAST)
24788  *
24789  *   Arguments: dev	- the device number
24790  *		arg	- user specified probing interval.
24791  *		flag	- this argument is a pass through to ddi_copyxxx()
24792  *			  directly from the mode argument of ioctl().
24793  *
24794  * Return Code: 0
24795  *		EFAULT
24796  *		ENXIO
24797  */
24798 
24799 static int
24800 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
24801 {
24802 	struct sd_lun	*un = NULL;
24803 	int		mh_time;
24804 	int		rval = 0;
24805 
24806 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24807 		return (ENXIO);
24808 	}
24809 
24810 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
24811 		return (EFAULT);
24812 
24813 	if (mh_time) {
24814 		mutex_enter(SD_MUTEX(un));
24815 		un->un_resvd_status |= SD_FAILFAST;
24816 		mutex_exit(SD_MUTEX(un));
24817 		/*
24818 		 * If mh_time is INT_MAX, then this ioctl is being used for
24819 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
24820 		 */
24821 		if (mh_time != INT_MAX) {
24822 			rval = sd_check_mhd(dev, mh_time);
24823 		}
24824 	} else {
24825 		(void) sd_check_mhd(dev, 0);
24826 		mutex_enter(SD_MUTEX(un));
24827 		un->un_resvd_status &= ~SD_FAILFAST;
24828 		mutex_exit(SD_MUTEX(un));
24829 	}
24830 	return (rval);
24831 }
24832 
24833 
24834 /*
24835  *    Function: sd_mhdioc_takeown
24836  *
24837  * Description: This routine is the driver entry point for handling ioctl
24838  *		requests to forcefully acquire exclusive access rights to the
24839  *		multihost disk (MHIOCTKOWN).
24840  *
24841  *   Arguments: dev	- the device number
24842  *		arg	- user provided structure specifying the delay
24843  *			  parameters in milliseconds
24844  *		flag	- this argument is a pass through to ddi_copyxxx()
24845  *			  directly from the mode argument of ioctl().
24846  *
24847  * Return Code: 0
24848  *		EFAULT
24849  *		ENXIO
24850  */
24851 
24852 static int
24853 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
24854 {
24855 	struct sd_lun		*un = NULL;
24856 	struct mhioctkown	*tkown = NULL;
24857 	int			rval = 0;
24858 
24859 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24860 		return (ENXIO);
24861 	}
24862 
24863 	if (arg != NULL) {
24864 		tkown = (struct mhioctkown *)
24865 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
24866 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
24867 		if (rval != 0) {
24868 			rval = EFAULT;
24869 			goto error;
24870 		}
24871 	}
24872 
24873 	rval = sd_take_ownership(dev, tkown);
24874 	mutex_enter(SD_MUTEX(un));
24875 	if (rval == 0) {
24876 		un->un_resvd_status |= SD_RESERVE;
24877 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
24878 			sd_reinstate_resv_delay =
24879 			    tkown->reinstate_resv_delay * 1000;
24880 		} else {
24881 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
24882 		}
24883 		/*
24884 		 * Give the scsi_watch routine interval set by
24885 		 * the MHIOCENFAILFAST ioctl precedence here.
24886 		 */
24887 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
24888 			mutex_exit(SD_MUTEX(un));
24889 			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
24890 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
24891 			    "sd_mhdioc_takeown : %d\n",
24892 			    sd_reinstate_resv_delay);
24893 		} else {
24894 			mutex_exit(SD_MUTEX(un));
24895 		}
24896 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
24897 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24898 	} else {
24899 		un->un_resvd_status &= ~SD_RESERVE;
24900 		mutex_exit(SD_MUTEX(un));
24901 	}
24902 
24903 error:
24904 	if (tkown != NULL) {
24905 		kmem_free(tkown, sizeof (struct mhioctkown));
24906 	}
24907 	return (rval);
24908 }
24909 
24910 
24911 /*
24912  *    Function: sd_mhdioc_release
24913  *
24914  * Description: This routine is the driver entry point for handling ioctl
24915  *		requests to release exclusive access rights to the multihost
24916  *		disk (MHIOCRELEASE).
24917  *
24918  *   Arguments: dev	- the device number
24919  *
24920  * Return Code: 0
24921  *		ENXIO
24922  */
24923 
24924 static int
24925 sd_mhdioc_release(dev_t dev)
24926 {
24927 	struct sd_lun		*un = NULL;
24928 	timeout_id_t		resvd_timeid_save;
24929 	int			resvd_status_save;
24930 	int			rval = 0;
24931 
24932 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24933 		return (ENXIO);
24934 	}
24935 
24936 	mutex_enter(SD_MUTEX(un));
24937 	resvd_status_save = un->un_resvd_status;
24938 	un->un_resvd_status &=
24939 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
24940 	if (un->un_resvd_timeid) {
24941 		resvd_timeid_save = un->un_resvd_timeid;
24942 		un->un_resvd_timeid = NULL;
24943 		mutex_exit(SD_MUTEX(un));
24944 		(void) untimeout(resvd_timeid_save);
24945 	} else {
24946 		mutex_exit(SD_MUTEX(un));
24947 	}
24948 
24949 	/*
24950 	 * destroy any pending timeout thread that may be attempting to
24951 	 * reinstate reservation on this device.
24952 	 */
24953 	sd_rmv_resv_reclaim_req(dev);
24954 
24955 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
24956 		mutex_enter(SD_MUTEX(un));
24957 		if ((un->un_mhd_token) &&
24958 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
24959 			mutex_exit(SD_MUTEX(un));
24960 			(void) sd_check_mhd(dev, 0);
24961 		} else {
24962 			mutex_exit(SD_MUTEX(un));
24963 		}
24964 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
24965 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24966 	} else {
24967 		/*
24968 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
24969 		 */
24970 		mutex_enter(SD_MUTEX(un));
24971 		un->un_resvd_status = resvd_status_save;
24972 		mutex_exit(SD_MUTEX(un));
24973 	}
24974 	return (rval);
24975 }
24976 
24977 
24978 /*
24979  *    Function: sd_mhdioc_register_devid
24980  *
24981  * Description: This routine is the driver entry point for handling ioctl
24982  *		requests to register the device id (MHIOCREREGISTERDEVID).
24983  *
24984  *		Note: The implementation for this ioctl has been updated to
24985  *		be consistent with the original PSARC case (1999/357)
24986  *		(4375899, 4241671, 4220005)
24987  *
24988  *   Arguments: dev	- the device number
24989  *
24990  * Return Code: 0
24991  *		ENXIO
24992  */
24993 
24994 static int
24995 sd_mhdioc_register_devid(dev_t dev)
24996 {
24997 	struct sd_lun	*un = NULL;
24998 	int		rval = 0;
24999 
25000 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25001 		return (ENXIO);
25002 	}
25003 
25004 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25005 
25006 	mutex_enter(SD_MUTEX(un));
25007 
25008 	/* If a devid already exists, de-register it */
25009 	if (un->un_devid != NULL) {
25010 		ddi_devid_unregister(SD_DEVINFO(un));
25011 		/*
25012 		 * After unregister devid, needs to free devid memory
25013 		 */
25014 		ddi_devid_free(un->un_devid);
25015 		un->un_devid = NULL;
25016 	}
25017 
25018 	/* Check for reservation conflict */
25019 	mutex_exit(SD_MUTEX(un));
25020 	rval = sd_send_scsi_TEST_UNIT_READY(un, 0);
25021 	mutex_enter(SD_MUTEX(un));
25022 
25023 	switch (rval) {
25024 	case 0:
25025 		sd_register_devid(un, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
25026 		break;
25027 	case EACCES:
25028 		break;
25029 	default:
25030 		rval = EIO;
25031 	}
25032 
25033 	mutex_exit(SD_MUTEX(un));
25034 	return (rval);
25035 }
25036 
25037 
25038 /*
25039  *    Function: sd_mhdioc_inkeys
25040  *
25041  * Description: This routine is the driver entry point for handling ioctl
25042  *		requests to issue the SCSI-3 Persistent In Read Keys command
25043  *		to the device (MHIOCGRP_INKEYS).
25044  *
25045  *   Arguments: dev	- the device number
25046  *		arg	- user provided in_keys structure
25047  *		flag	- this argument is a pass through to ddi_copyxxx()
25048  *			  directly from the mode argument of ioctl().
25049  *
25050  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
25051  *		ENXIO
25052  *		EFAULT
25053  */
25054 
25055 static int
25056 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
25057 {
25058 	struct sd_lun		*un;
25059 	mhioc_inkeys_t		inkeys;
25060 	int			rval = 0;
25061 
25062 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25063 		return (ENXIO);
25064 	}
25065 
25066 #ifdef _MULTI_DATAMODEL
25067 	switch (ddi_model_convert_from(flag & FMODELS)) {
25068 	case DDI_MODEL_ILP32: {
25069 		struct mhioc_inkeys32	inkeys32;
25070 
25071 		if (ddi_copyin(arg, &inkeys32,
25072 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
25073 			return (EFAULT);
25074 		}
25075 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
25076 		if ((rval = sd_persistent_reservation_in_read_keys(un,
25077 		    &inkeys, flag)) != 0) {
25078 			return (rval);
25079 		}
25080 		inkeys32.generation = inkeys.generation;
25081 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
25082 		    flag) != 0) {
25083 			return (EFAULT);
25084 		}
25085 		break;
25086 	}
25087 	case DDI_MODEL_NONE:
25088 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
25089 		    flag) != 0) {
25090 			return (EFAULT);
25091 		}
25092 		if ((rval = sd_persistent_reservation_in_read_keys(un,
25093 		    &inkeys, flag)) != 0) {
25094 			return (rval);
25095 		}
25096 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
25097 		    flag) != 0) {
25098 			return (EFAULT);
25099 		}
25100 		break;
25101 	}
25102 
25103 #else /* ! _MULTI_DATAMODEL */
25104 
25105 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
25106 		return (EFAULT);
25107 	}
25108 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
25109 	if (rval != 0) {
25110 		return (rval);
25111 	}
25112 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
25113 		return (EFAULT);
25114 	}
25115 
25116 #endif /* _MULTI_DATAMODEL */
25117 
25118 	return (rval);
25119 }
25120 
25121 
25122 /*
25123  *    Function: sd_mhdioc_inresv
25124  *
25125  * Description: This routine is the driver entry point for handling ioctl
25126  *		requests to issue the SCSI-3 Persistent In Read Reservations
25127  *		command to the device (MHIOCGRP_INKEYS).
25128  *
25129  *   Arguments: dev	- the device number
25130  *		arg	- user provided in_resv structure
25131  *		flag	- this argument is a pass through to ddi_copyxxx()
25132  *			  directly from the mode argument of ioctl().
25133  *
25134  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
25135  *		ENXIO
25136  *		EFAULT
25137  */
25138 
25139 static int
25140 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
25141 {
25142 	struct sd_lun		*un;
25143 	mhioc_inresvs_t		inresvs;
25144 	int			rval = 0;
25145 
25146 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25147 		return (ENXIO);
25148 	}
25149 
25150 #ifdef _MULTI_DATAMODEL
25151 
25152 	switch (ddi_model_convert_from(flag & FMODELS)) {
25153 	case DDI_MODEL_ILP32: {
25154 		struct mhioc_inresvs32	inresvs32;
25155 
25156 		if (ddi_copyin(arg, &inresvs32,
25157 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
25158 			return (EFAULT);
25159 		}
25160 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
25161 		if ((rval = sd_persistent_reservation_in_read_resv(un,
25162 		    &inresvs, flag)) != 0) {
25163 			return (rval);
25164 		}
25165 		inresvs32.generation = inresvs.generation;
25166 		if (ddi_copyout(&inresvs32, arg,
25167 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
25168 			return (EFAULT);
25169 		}
25170 		break;
25171 	}
25172 	case DDI_MODEL_NONE:
25173 		if (ddi_copyin(arg, &inresvs,
25174 		    sizeof (mhioc_inresvs_t), flag) != 0) {
25175 			return (EFAULT);
25176 		}
25177 		if ((rval = sd_persistent_reservation_in_read_resv(un,
25178 		    &inresvs, flag)) != 0) {
25179 			return (rval);
25180 		}
25181 		if (ddi_copyout(&inresvs, arg,
25182 		    sizeof (mhioc_inresvs_t), flag) != 0) {
25183 			return (EFAULT);
25184 		}
25185 		break;
25186 	}
25187 
25188 #else /* ! _MULTI_DATAMODEL */
25189 
25190 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
25191 		return (EFAULT);
25192 	}
25193 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
25194 	if (rval != 0) {
25195 		return (rval);
25196 	}
25197 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
25198 		return (EFAULT);
25199 	}
25200 
25201 #endif /* ! _MULTI_DATAMODEL */
25202 
25203 	return (rval);
25204 }
25205 
25206 
25207 /*
25208  * The following routines support the clustering functionality described below
25209  * and implement lost reservation reclaim functionality.
25210  *
25211  * Clustering
25212  * ----------
25213  * The clustering code uses two different, independent forms of SCSI
25214  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
25215  * Persistent Group Reservations. For any particular disk, it will use either
25216  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
25217  *
25218  * SCSI-2
25219  * The cluster software takes ownership of a multi-hosted disk by issuing the
25220  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
25221  * MHIOCRELEASE ioctl.Closely related is the MHIOCENFAILFAST ioctl -- a cluster,
25222  * just after taking ownership of the disk with the MHIOCTKOWN ioctl then issues
25223  * the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the driver. The
25224  * meaning of failfast is that if the driver (on this host) ever encounters the
25225  * scsi error return code RESERVATION_CONFLICT from the device, it should
25226  * immediately panic the host. The motivation for this ioctl is that if this
25227  * host does encounter reservation conflict, the underlying cause is that some
25228  * other host of the cluster has decided that this host is no longer in the
25229  * cluster and has seized control of the disks for itself. Since this host is no
25230  * longer in the cluster, it ought to panic itself. The MHIOCENFAILFAST ioctl
25231  * does two things:
25232  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
25233  *      error to panic the host
25234  *      (b) it sets up a periodic timer to test whether this host still has
25235  *      "access" (in that no other host has reserved the device):  if the
25236  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
25237  *      purpose of that periodic timer is to handle scenarios where the host is
25238  *      otherwise temporarily quiescent, temporarily doing no real i/o.
25239  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
25240  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
25241  * the device itself.
25242  *
25243  * SCSI-3 PGR
25244  * A direct semantic implementation of the SCSI-3 Persistent Reservation
25245  * facility is supported through the shared multihost disk ioctls
25246  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
25247  * MHIOCGRP_PREEMPTANDABORT)
25248  *
25249  * Reservation Reclaim:
25250  * --------------------
25251  * To support the lost reservation reclaim operations this driver creates a
25252  * single thread to handle reinstating reservations on all devices that have
25253  * lost reservations sd_resv_reclaim_requests are logged for all devices that
25254  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
25255  * and the reservation reclaim thread loops through the requests to regain the
25256  * lost reservations.
25257  */
25258 
25259 /*
25260  *    Function: sd_check_mhd()
25261  *
25262  * Description: This function sets up and submits a scsi watch request or
25263  *		terminates an existing watch request. This routine is used in
25264  *		support of reservation reclaim.
25265  *
25266  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
25267  *			 among multiple watches that share the callback function
25268  *		interval - the number of microseconds specifying the watch
25269  *			   interval for issuing TEST UNIT READY commands. If
25270  *			   set to 0 the watch should be terminated. If the
25271  *			   interval is set to 0 and if the device is required
25272  *			   to hold reservation while disabling failfast, the
25273  *			   watch is restarted with an interval of
25274  *			   reinstate_resv_delay.
25275  *
25276  * Return Code: 0	   - Successful submit/terminate of scsi watch request
25277  *		ENXIO      - Indicates an invalid device was specified
25278  *		EAGAIN     - Unable to submit the scsi watch request
25279  */
25280 
25281 static int
25282 sd_check_mhd(dev_t dev, int interval)
25283 {
25284 	struct sd_lun	*un;
25285 	opaque_t	token;
25286 
25287 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25288 		return (ENXIO);
25289 	}
25290 
25291 	/* is this a watch termination request? */
25292 	if (interval == 0) {
25293 		mutex_enter(SD_MUTEX(un));
25294 		/* if there is an existing watch task then terminate it */
25295 		if (un->un_mhd_token) {
25296 			token = un->un_mhd_token;
25297 			un->un_mhd_token = NULL;
25298 			mutex_exit(SD_MUTEX(un));
25299 			(void) scsi_watch_request_terminate(token,
25300 			    SCSI_WATCH_TERMINATE_WAIT);
25301 			mutex_enter(SD_MUTEX(un));
25302 		} else {
25303 			mutex_exit(SD_MUTEX(un));
25304 			/*
25305 			 * Note: If we return here we don't check for the
25306 			 * failfast case. This is the original legacy
25307 			 * implementation but perhaps we should be checking
25308 			 * the failfast case.
25309 			 */
25310 			return (0);
25311 		}
25312 		/*
25313 		 * If the device is required to hold reservation while
25314 		 * disabling failfast, we need to restart the scsi_watch
25315 		 * routine with an interval of reinstate_resv_delay.
25316 		 */
25317 		if (un->un_resvd_status & SD_RESERVE) {
25318 			interval = sd_reinstate_resv_delay/1000;
25319 		} else {
25320 			/* no failfast so bail */
25321 			mutex_exit(SD_MUTEX(un));
25322 			return (0);
25323 		}
25324 		mutex_exit(SD_MUTEX(un));
25325 	}
25326 
25327 	/*
25328 	 * adjust minimum time interval to 1 second,
25329 	 * and convert from msecs to usecs
25330 	 */
25331 	if (interval > 0 && interval < 1000) {
25332 		interval = 1000;
25333 	}
25334 	interval *= 1000;
25335 
25336 	/*
25337 	 * submit the request to the scsi_watch service
25338 	 */
25339 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
25340 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
25341 	if (token == NULL) {
25342 		return (EAGAIN);
25343 	}
25344 
25345 	/*
25346 	 * save token for termination later on
25347 	 */
25348 	mutex_enter(SD_MUTEX(un));
25349 	un->un_mhd_token = token;
25350 	mutex_exit(SD_MUTEX(un));
25351 	return (0);
25352 }
25353 
25354 
25355 /*
25356  *    Function: sd_mhd_watch_cb()
25357  *
25358  * Description: This function is the call back function used by the scsi watch
25359  *		facility. The scsi watch facility sends the "Test Unit Ready"
25360  *		and processes the status. If applicable (i.e. a "Unit Attention"
25361  *		status and automatic "Request Sense" not used) the scsi watch
25362  *		facility will send a "Request Sense" and retrieve the sense data
25363  *		to be passed to this callback function. In either case the
25364  *		automatic "Request Sense" or the facility submitting one, this
25365  *		callback is passed the status and sense data.
25366  *
25367  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25368  *			among multiple watches that share this callback function
25369  *		resultp - scsi watch facility result packet containing scsi
25370  *			  packet, status byte and sense data
25371  *
25372  * Return Code: 0 - continue the watch task
25373  *		non-zero - terminate the watch task
25374  */
25375 
25376 static int
25377 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
25378 {
25379 	struct sd_lun			*un;
25380 	struct scsi_status		*statusp;
25381 	uint8_t				*sensep;
25382 	struct scsi_pkt			*pkt;
25383 	uchar_t				actual_sense_length;
25384 	dev_t  				dev = (dev_t)arg;
25385 
25386 	ASSERT(resultp != NULL);
25387 	statusp			= resultp->statusp;
25388 	sensep			= (uint8_t *)resultp->sensep;
25389 	pkt			= resultp->pkt;
25390 	actual_sense_length	= resultp->actual_sense_length;
25391 
25392 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25393 		return (ENXIO);
25394 	}
25395 
25396 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25397 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
25398 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
25399 
25400 	/* Begin processing of the status and/or sense data */
25401 	if (pkt->pkt_reason != CMD_CMPLT) {
25402 		/* Handle the incomplete packet */
25403 		sd_mhd_watch_incomplete(un, pkt);
25404 		return (0);
25405 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
25406 		if (*((unsigned char *)statusp)
25407 		    == STATUS_RESERVATION_CONFLICT) {
25408 			/*
25409 			 * Handle a reservation conflict by panicking if
25410 			 * configured for failfast or by logging the conflict
25411 			 * and updating the reservation status
25412 			 */
25413 			mutex_enter(SD_MUTEX(un));
25414 			if ((un->un_resvd_status & SD_FAILFAST) &&
25415 			    (sd_failfast_enable)) {
25416 				sd_panic_for_res_conflict(un);
25417 				/*NOTREACHED*/
25418 			}
25419 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25420 			    "sd_mhd_watch_cb: Reservation Conflict\n");
25421 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
25422 			mutex_exit(SD_MUTEX(un));
25423 		}
25424 	}
25425 
25426 	if (sensep != NULL) {
25427 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
25428 			mutex_enter(SD_MUTEX(un));
25429 			if ((scsi_sense_asc(sensep) ==
25430 			    SD_SCSI_RESET_SENSE_CODE) &&
25431 			    (un->un_resvd_status & SD_RESERVE)) {
25432 				/*
25433 				 * The additional sense code indicates a power
25434 				 * on or bus device reset has occurred; update
25435 				 * the reservation status.
25436 				 */
25437 				un->un_resvd_status |=
25438 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25439 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25440 				    "sd_mhd_watch_cb: Lost Reservation\n");
25441 			}
25442 		} else {
25443 			return (0);
25444 		}
25445 	} else {
25446 		mutex_enter(SD_MUTEX(un));
25447 	}
25448 
25449 	if ((un->un_resvd_status & SD_RESERVE) &&
25450 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
25451 		if (un->un_resvd_status & SD_WANT_RESERVE) {
25452 			/*
25453 			 * A reset occurred in between the last probe and this
25454 			 * one so if a timeout is pending cancel it.
25455 			 */
25456 			if (un->un_resvd_timeid) {
25457 				timeout_id_t temp_id = un->un_resvd_timeid;
25458 				un->un_resvd_timeid = NULL;
25459 				mutex_exit(SD_MUTEX(un));
25460 				(void) untimeout(temp_id);
25461 				mutex_enter(SD_MUTEX(un));
25462 			}
25463 			un->un_resvd_status &= ~SD_WANT_RESERVE;
25464 		}
25465 		if (un->un_resvd_timeid == 0) {
25466 			/* Schedule a timeout to handle the lost reservation */
25467 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
25468 			    (void *)dev,
25469 			    drv_usectohz(sd_reinstate_resv_delay));
25470 		}
25471 	}
25472 	mutex_exit(SD_MUTEX(un));
25473 	return (0);
25474 }
25475 
25476 
25477 /*
25478  *    Function: sd_mhd_watch_incomplete()
25479  *
25480  * Description: This function is used to find out why a scsi pkt sent by the
25481  *		scsi watch facility was not completed. Under some scenarios this
25482  *		routine will return. Otherwise it will send a bus reset to see
25483  *		if the drive is still online.
25484  *
25485  *   Arguments: un  - driver soft state (unit) structure
25486  *		pkt - incomplete scsi pkt
25487  */
25488 
25489 static void
25490 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
25491 {
25492 	int	be_chatty;
25493 	int	perr;
25494 
25495 	ASSERT(pkt != NULL);
25496 	ASSERT(un != NULL);
25497 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
25498 	perr		= (pkt->pkt_statistics & STAT_PERR);
25499 
25500 	mutex_enter(SD_MUTEX(un));
25501 	if (un->un_state == SD_STATE_DUMPING) {
25502 		mutex_exit(SD_MUTEX(un));
25503 		return;
25504 	}
25505 
25506 	switch (pkt->pkt_reason) {
25507 	case CMD_UNX_BUS_FREE:
25508 		/*
25509 		 * If we had a parity error that caused the target to drop BSY*,
25510 		 * don't be chatty about it.
25511 		 */
25512 		if (perr && be_chatty) {
25513 			be_chatty = 0;
25514 		}
25515 		break;
25516 	case CMD_TAG_REJECT:
25517 		/*
25518 		 * The SCSI-2 spec states that a tag reject will be sent by the
25519 		 * target if tagged queuing is not supported. A tag reject may
25520 		 * also be sent during certain initialization periods or to
25521 		 * control internal resources. For the latter case the target
25522 		 * may also return Queue Full.
25523 		 *
25524 		 * If this driver receives a tag reject from a target that is
25525 		 * going through an init period or controlling internal
25526 		 * resources tagged queuing will be disabled. This is a less
25527 		 * than optimal behavior but the driver is unable to determine
25528 		 * the target state and assumes tagged queueing is not supported
25529 		 */
25530 		pkt->pkt_flags = 0;
25531 		un->un_tagflags = 0;
25532 
25533 		if (un->un_f_opt_queueing == TRUE) {
25534 			un->un_throttle = min(un->un_throttle, 3);
25535 		} else {
25536 			un->un_throttle = 1;
25537 		}
25538 		mutex_exit(SD_MUTEX(un));
25539 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
25540 		mutex_enter(SD_MUTEX(un));
25541 		break;
25542 	case CMD_INCOMPLETE:
25543 		/*
25544 		 * The transport stopped with an abnormal state, fallthrough and
25545 		 * reset the target and/or bus unless selection did not complete
25546 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
25547 		 * go through a target/bus reset
25548 		 */
25549 		if (pkt->pkt_state == STATE_GOT_BUS) {
25550 			break;
25551 		}
25552 		/*FALLTHROUGH*/
25553 
25554 	case CMD_TIMEOUT:
25555 	default:
25556 		/*
25557 		 * The lun may still be running the command, so a lun reset
25558 		 * should be attempted. If the lun reset fails or cannot be
25559 		 * issued, than try a target reset. Lastly try a bus reset.
25560 		 */
25561 		if ((pkt->pkt_statistics &
25562 		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
25563 			int reset_retval = 0;
25564 			mutex_exit(SD_MUTEX(un));
25565 			if (un->un_f_allow_bus_device_reset == TRUE) {
25566 				if (un->un_f_lun_reset_enabled == TRUE) {
25567 					reset_retval =
25568 					    scsi_reset(SD_ADDRESS(un),
25569 					    RESET_LUN);
25570 				}
25571 				if (reset_retval == 0) {
25572 					reset_retval =
25573 					    scsi_reset(SD_ADDRESS(un),
25574 					    RESET_TARGET);
25575 				}
25576 			}
25577 			if (reset_retval == 0) {
25578 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
25579 			}
25580 			mutex_enter(SD_MUTEX(un));
25581 		}
25582 		break;
25583 	}
25584 
25585 	/* A device/bus reset has occurred; update the reservation status. */
25586 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
25587 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
25588 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25589 			un->un_resvd_status |=
25590 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25591 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25592 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
25593 		}
25594 	}
25595 
25596 	/*
25597 	 * The disk has been turned off; Update the device state.
25598 	 *
25599 	 * Note: Should we be offlining the disk here?
25600 	 */
25601 	if (pkt->pkt_state == STATE_GOT_BUS) {
25602 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
25603 		    "Disk not responding to selection\n");
25604 		if (un->un_state != SD_STATE_OFFLINE) {
25605 			New_state(un, SD_STATE_OFFLINE);
25606 		}
25607 	} else if (be_chatty) {
25608 		/*
25609 		 * suppress messages if they are all the same pkt reason;
25610 		 * with TQ, many (up to 256) are returned with the same
25611 		 * pkt_reason
25612 		 */
25613 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
25614 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25615 			    "sd_mhd_watch_incomplete: "
25616 			    "SCSI transport failed: reason '%s'\n",
25617 			    scsi_rname(pkt->pkt_reason));
25618 		}
25619 	}
25620 	un->un_last_pkt_reason = pkt->pkt_reason;
25621 	mutex_exit(SD_MUTEX(un));
25622 }
25623 
25624 
25625 /*
25626  *    Function: sd_sname()
25627  *
25628  * Description: This is a simple little routine to return a string containing
25629  *		a printable description of command status byte for use in
25630  *		logging.
25631  *
25632  *   Arguments: status - pointer to a status byte
25633  *
25634  * Return Code: char * - string containing status description.
25635  */
25636 
25637 static char *
25638 sd_sname(uchar_t status)
25639 {
25640 	switch (status & STATUS_MASK) {
25641 	case STATUS_GOOD:
25642 		return ("good status");
25643 	case STATUS_CHECK:
25644 		return ("check condition");
25645 	case STATUS_MET:
25646 		return ("condition met");
25647 	case STATUS_BUSY:
25648 		return ("busy");
25649 	case STATUS_INTERMEDIATE:
25650 		return ("intermediate");
25651 	case STATUS_INTERMEDIATE_MET:
25652 		return ("intermediate - condition met");
25653 	case STATUS_RESERVATION_CONFLICT:
25654 		return ("reservation_conflict");
25655 	case STATUS_TERMINATED:
25656 		return ("command terminated");
25657 	case STATUS_QFULL:
25658 		return ("queue full");
25659 	default:
25660 		return ("<unknown status>");
25661 	}
25662 }
25663 
25664 
25665 /*
25666  *    Function: sd_mhd_resvd_recover()
25667  *
25668  * Description: This function adds a reservation entry to the
25669  *		sd_resv_reclaim_request list and signals the reservation
25670  *		reclaim thread that there is work pending. If the reservation
25671  *		reclaim thread has not been previously created this function
25672  *		will kick it off.
25673  *
25674  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25675  *			among multiple watches that share this callback function
25676  *
25677  *     Context: This routine is called by timeout() and is run in interrupt
25678  *		context. It must not sleep or call other functions which may
25679  *		sleep.
25680  */
25681 
25682 static void
25683 sd_mhd_resvd_recover(void *arg)
25684 {
25685 	dev_t			dev = (dev_t)arg;
25686 	struct sd_lun		*un;
25687 	struct sd_thr_request	*sd_treq = NULL;
25688 	struct sd_thr_request	*sd_cur = NULL;
25689 	struct sd_thr_request	*sd_prev = NULL;
25690 	int			already_there = 0;
25691 
25692 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25693 		return;
25694 	}
25695 
25696 	mutex_enter(SD_MUTEX(un));
25697 	un->un_resvd_timeid = NULL;
25698 	if (un->un_resvd_status & SD_WANT_RESERVE) {
25699 		/*
25700 		 * There was a reset so don't issue the reserve, allow the
25701 		 * sd_mhd_watch_cb callback function to notice this and
25702 		 * reschedule the timeout for reservation.
25703 		 */
25704 		mutex_exit(SD_MUTEX(un));
25705 		return;
25706 	}
25707 	mutex_exit(SD_MUTEX(un));
25708 
25709 	/*
25710 	 * Add this device to the sd_resv_reclaim_request list and the
25711 	 * sd_resv_reclaim_thread should take care of the rest.
25712 	 *
25713 	 * Note: We can't sleep in this context so if the memory allocation
25714 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
25715 	 * reschedule the timeout for reservation.  (4378460)
25716 	 */
25717 	sd_treq = (struct sd_thr_request *)
25718 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
25719 	if (sd_treq == NULL) {
25720 		return;
25721 	}
25722 
25723 	sd_treq->sd_thr_req_next = NULL;
25724 	sd_treq->dev = dev;
25725 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25726 	if (sd_tr.srq_thr_req_head == NULL) {
25727 		sd_tr.srq_thr_req_head = sd_treq;
25728 	} else {
25729 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
25730 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
25731 			if (sd_cur->dev == dev) {
25732 				/*
25733 				 * already in Queue so don't log
25734 				 * another request for the device
25735 				 */
25736 				already_there = 1;
25737 				break;
25738 			}
25739 			sd_prev = sd_cur;
25740 		}
25741 		if (!already_there) {
25742 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
25743 			    "logging request for %lx\n", dev);
25744 			sd_prev->sd_thr_req_next = sd_treq;
25745 		} else {
25746 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
25747 		}
25748 	}
25749 
25750 	/*
25751 	 * Create a kernel thread to do the reservation reclaim and free up this
25752 	 * thread. We cannot block this thread while we go away to do the
25753 	 * reservation reclaim
25754 	 */
25755 	if (sd_tr.srq_resv_reclaim_thread == NULL)
25756 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
25757 		    sd_resv_reclaim_thread, NULL,
25758 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
25759 
25760 	/* Tell the reservation reclaim thread that it has work to do */
25761 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
25762 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25763 }
25764 
25765 /*
25766  *    Function: sd_resv_reclaim_thread()
25767  *
25768  * Description: This function implements the reservation reclaim operations
25769  *
25770  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
25771  *		      among multiple watches that share this callback function
25772  */
25773 
25774 static void
25775 sd_resv_reclaim_thread()
25776 {
25777 	struct sd_lun		*un;
25778 	struct sd_thr_request	*sd_mhreq;
25779 
25780 	/* Wait for work */
25781 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25782 	if (sd_tr.srq_thr_req_head == NULL) {
25783 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
25784 		    &sd_tr.srq_resv_reclaim_mutex);
25785 	}
25786 
25787 	/* Loop while we have work */
25788 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
25789 		un = ddi_get_soft_state(sd_state,
25790 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
25791 		if (un == NULL) {
25792 			/*
25793 			 * softstate structure is NULL so just
25794 			 * dequeue the request and continue
25795 			 */
25796 			sd_tr.srq_thr_req_head =
25797 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25798 			kmem_free(sd_tr.srq_thr_cur_req,
25799 			    sizeof (struct sd_thr_request));
25800 			continue;
25801 		}
25802 
25803 		/* dequeue the request */
25804 		sd_mhreq = sd_tr.srq_thr_cur_req;
25805 		sd_tr.srq_thr_req_head =
25806 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25807 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25808 
25809 		/*
25810 		 * Reclaim reservation only if SD_RESERVE is still set. There
25811 		 * may have been a call to MHIOCRELEASE before we got here.
25812 		 */
25813 		mutex_enter(SD_MUTEX(un));
25814 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25815 			/*
25816 			 * Note: The SD_LOST_RESERVE flag is cleared before
25817 			 * reclaiming the reservation. If this is done after the
25818 			 * call to sd_reserve_release a reservation loss in the
25819 			 * window between pkt completion of reserve cmd and
25820 			 * mutex_enter below may not be recognized
25821 			 */
25822 			un->un_resvd_status &= ~SD_LOST_RESERVE;
25823 			mutex_exit(SD_MUTEX(un));
25824 
25825 			if (sd_reserve_release(sd_mhreq->dev,
25826 			    SD_RESERVE) == 0) {
25827 				mutex_enter(SD_MUTEX(un));
25828 				un->un_resvd_status |= SD_RESERVE;
25829 				mutex_exit(SD_MUTEX(un));
25830 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25831 				    "sd_resv_reclaim_thread: "
25832 				    "Reservation Recovered\n");
25833 			} else {
25834 				mutex_enter(SD_MUTEX(un));
25835 				un->un_resvd_status |= SD_LOST_RESERVE;
25836 				mutex_exit(SD_MUTEX(un));
25837 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25838 				    "sd_resv_reclaim_thread: Failed "
25839 				    "Reservation Recovery\n");
25840 			}
25841 		} else {
25842 			mutex_exit(SD_MUTEX(un));
25843 		}
25844 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25845 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
25846 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25847 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
25848 		/*
25849 		 * wakeup the destroy thread if anyone is waiting on
25850 		 * us to complete.
25851 		 */
25852 		cv_signal(&sd_tr.srq_inprocess_cv);
25853 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
25854 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
25855 	}
25856 
25857 	/*
25858 	 * cleanup the sd_tr structure now that this thread will not exist
25859 	 */
25860 	ASSERT(sd_tr.srq_thr_req_head == NULL);
25861 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
25862 	sd_tr.srq_resv_reclaim_thread = NULL;
25863 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25864 	thread_exit();
25865 }
25866 
25867 
25868 /*
25869  *    Function: sd_rmv_resv_reclaim_req()
25870  *
25871  * Description: This function removes any pending reservation reclaim requests
25872  *		for the specified device.
25873  *
25874  *   Arguments: dev - the device 'dev_t'
25875  */
25876 
25877 static void
25878 sd_rmv_resv_reclaim_req(dev_t dev)
25879 {
25880 	struct sd_thr_request *sd_mhreq;
25881 	struct sd_thr_request *sd_prev;
25882 
25883 	/* Remove a reservation reclaim request from the list */
25884 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25885 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
25886 		/*
25887 		 * We are attempting to reinstate reservation for
25888 		 * this device. We wait for sd_reserve_release()
25889 		 * to return before we return.
25890 		 */
25891 		cv_wait(&sd_tr.srq_inprocess_cv,
25892 		    &sd_tr.srq_resv_reclaim_mutex);
25893 	} else {
25894 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
25895 		if (sd_mhreq && sd_mhreq->dev == dev) {
25896 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
25897 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25898 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25899 			return;
25900 		}
25901 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
25902 			if (sd_mhreq && sd_mhreq->dev == dev) {
25903 				break;
25904 			}
25905 			sd_prev = sd_mhreq;
25906 		}
25907 		if (sd_mhreq != NULL) {
25908 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
25909 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25910 		}
25911 	}
25912 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25913 }
25914 
25915 
25916 /*
25917  *    Function: sd_mhd_reset_notify_cb()
25918  *
25919  * Description: This is a call back function for scsi_reset_notify. This
25920  *		function updates the softstate reserved status and logs the
25921  *		reset. The driver scsi watch facility callback function
25922  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
25923  *		will reclaim the reservation.
25924  *
25925  *   Arguments: arg  - driver soft state (unit) structure
25926  */
25927 
25928 static void
25929 sd_mhd_reset_notify_cb(caddr_t arg)
25930 {
25931 	struct sd_lun *un = (struct sd_lun *)arg;
25932 
25933 	mutex_enter(SD_MUTEX(un));
25934 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25935 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
25936 		SD_INFO(SD_LOG_IOCTL_MHD, un,
25937 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
25938 	}
25939 	mutex_exit(SD_MUTEX(un));
25940 }
25941 
25942 
25943 /*
25944  *    Function: sd_take_ownership()
25945  *
25946  * Description: This routine implements an algorithm to achieve a stable
25947  *		reservation on disks which don't implement priority reserve,
25948  *		and makes sure that other host lose re-reservation attempts.
25949  *		This algorithm contains of a loop that keeps issuing the RESERVE
25950  *		for some period of time (min_ownership_delay, default 6 seconds)
25951  *		During that loop, it looks to see if there has been a bus device
25952  *		reset or bus reset (both of which cause an existing reservation
25953  *		to be lost). If the reservation is lost issue RESERVE until a
25954  *		period of min_ownership_delay with no resets has gone by, or
25955  *		until max_ownership_delay has expired. This loop ensures that
25956  *		the host really did manage to reserve the device, in spite of
25957  *		resets. The looping for min_ownership_delay (default six
25958  *		seconds) is important to early generation clustering products,
25959  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
25960  *		MHIOCENFAILFAST periodic timer of two seconds. By having
25961  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
25962  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
25963  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
25964  *		have already noticed, via the MHIOCENFAILFAST polling, that it
25965  *		no longer "owns" the disk and will have panicked itself.  Thus,
25966  *		the host issuing the MHIOCTKOWN is assured (with timing
25967  *		dependencies) that by the time it actually starts to use the
25968  *		disk for real work, the old owner is no longer accessing it.
25969  *
25970  *		min_ownership_delay is the minimum amount of time for which the
25971  *		disk must be reserved continuously devoid of resets before the
25972  *		MHIOCTKOWN ioctl will return success.
25973  *
25974  *		max_ownership_delay indicates the amount of time by which the
25975  *		take ownership should succeed or timeout with an error.
25976  *
25977  *   Arguments: dev - the device 'dev_t'
25978  *		*p  - struct containing timing info.
25979  *
25980  * Return Code: 0 for success or error code
25981  */
25982 
25983 static int
25984 sd_take_ownership(dev_t dev, struct mhioctkown *p)
25985 {
25986 	struct sd_lun	*un;
25987 	int		rval;
25988 	int		err;
25989 	int		reservation_count   = 0;
25990 	int		min_ownership_delay =  6000000; /* in usec */
25991 	int		max_ownership_delay = 30000000; /* in usec */
25992 	clock_t		start_time;	/* starting time of this algorithm */
25993 	clock_t		end_time;	/* time limit for giving up */
25994 	clock_t		ownership_time;	/* time limit for stable ownership */
25995 	clock_t		current_time;
25996 	clock_t		previous_current_time;
25997 
25998 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25999 		return (ENXIO);
26000 	}
26001 
26002 	/*
26003 	 * Attempt a device reservation. A priority reservation is requested.
26004 	 */
26005 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
26006 	    != SD_SUCCESS) {
26007 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26008 		    "sd_take_ownership: return(1)=%d\n", rval);
26009 		return (rval);
26010 	}
26011 
26012 	/* Update the softstate reserved status to indicate the reservation */
26013 	mutex_enter(SD_MUTEX(un));
26014 	un->un_resvd_status |= SD_RESERVE;
26015 	un->un_resvd_status &=
26016 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
26017 	mutex_exit(SD_MUTEX(un));
26018 
26019 	if (p != NULL) {
26020 		if (p->min_ownership_delay != 0) {
26021 			min_ownership_delay = p->min_ownership_delay * 1000;
26022 		}
26023 		if (p->max_ownership_delay != 0) {
26024 			max_ownership_delay = p->max_ownership_delay * 1000;
26025 		}
26026 	}
26027 	SD_INFO(SD_LOG_IOCTL_MHD, un,
26028 	    "sd_take_ownership: min, max delays: %d, %d\n",
26029 	    min_ownership_delay, max_ownership_delay);
26030 
26031 	start_time = ddi_get_lbolt();
26032 	current_time	= start_time;
26033 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
26034 	end_time	= start_time + drv_usectohz(max_ownership_delay);
26035 
26036 	while (current_time - end_time < 0) {
26037 		delay(drv_usectohz(500000));
26038 
26039 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
26040 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
26041 				mutex_enter(SD_MUTEX(un));
26042 				rval = (un->un_resvd_status &
26043 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
26044 				mutex_exit(SD_MUTEX(un));
26045 				break;
26046 			}
26047 		}
26048 		previous_current_time = current_time;
26049 		current_time = ddi_get_lbolt();
26050 		mutex_enter(SD_MUTEX(un));
26051 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
26052 			ownership_time = ddi_get_lbolt() +
26053 			    drv_usectohz(min_ownership_delay);
26054 			reservation_count = 0;
26055 		} else {
26056 			reservation_count++;
26057 		}
26058 		un->un_resvd_status |= SD_RESERVE;
26059 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
26060 		mutex_exit(SD_MUTEX(un));
26061 
26062 		SD_INFO(SD_LOG_IOCTL_MHD, un,
26063 		    "sd_take_ownership: ticks for loop iteration=%ld, "
26064 		    "reservation=%s\n", (current_time - previous_current_time),
26065 		    reservation_count ? "ok" : "reclaimed");
26066 
26067 		if (current_time - ownership_time >= 0 &&
26068 		    reservation_count >= 4) {
26069 			rval = 0; /* Achieved a stable ownership */
26070 			break;
26071 		}
26072 		if (current_time - end_time >= 0) {
26073 			rval = EACCES; /* No ownership in max possible time */
26074 			break;
26075 		}
26076 	}
26077 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
26078 	    "sd_take_ownership: return(2)=%d\n", rval);
26079 	return (rval);
26080 }
26081 
26082 
26083 /*
26084  *    Function: sd_reserve_release()
26085  *
26086  * Description: This function builds and sends scsi RESERVE, RELEASE, and
26087  *		PRIORITY RESERVE commands based on a user specified command type
26088  *
26089  *   Arguments: dev - the device 'dev_t'
26090  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
26091  *		      SD_RESERVE, SD_RELEASE
26092  *
26093  * Return Code: 0 or Error Code
26094  */
26095 
26096 static int
26097 sd_reserve_release(dev_t dev, int cmd)
26098 {
26099 	struct uscsi_cmd	*com = NULL;
26100 	struct sd_lun		*un = NULL;
26101 	char			cdb[CDB_GROUP0];
26102 	int			rval;
26103 
26104 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
26105 	    (cmd == SD_PRIORITY_RESERVE));
26106 
26107 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26108 		return (ENXIO);
26109 	}
26110 
26111 	/* instantiate and initialize the command and cdb */
26112 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26113 	bzero(cdb, CDB_GROUP0);
26114 	com->uscsi_flags   = USCSI_SILENT;
26115 	com->uscsi_timeout = un->un_reserve_release_time;
26116 	com->uscsi_cdblen  = CDB_GROUP0;
26117 	com->uscsi_cdb	   = cdb;
26118 	if (cmd == SD_RELEASE) {
26119 		cdb[0] = SCMD_RELEASE;
26120 	} else {
26121 		cdb[0] = SCMD_RESERVE;
26122 	}
26123 
26124 	/* Send the command. */
26125 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
26126 	    UIO_SYSSPACE, SD_PATH_STANDARD);
26127 
26128 	/*
26129 	 * "break" a reservation that is held by another host, by issuing a
26130 	 * reset if priority reserve is desired, and we could not get the
26131 	 * device.
26132 	 */
26133 	if ((cmd == SD_PRIORITY_RESERVE) &&
26134 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
26135 		/*
26136 		 * First try to reset the LUN. If we cannot, then try a target
26137 		 * reset, followed by a bus reset if the target reset fails.
26138 		 */
26139 		int reset_retval = 0;
26140 		if (un->un_f_lun_reset_enabled == TRUE) {
26141 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
26142 		}
26143 		if (reset_retval == 0) {
26144 			/* The LUN reset either failed or was not issued */
26145 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26146 		}
26147 		if ((reset_retval == 0) &&
26148 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
26149 			rval = EIO;
26150 			kmem_free(com, sizeof (*com));
26151 			return (rval);
26152 		}
26153 
26154 		bzero(com, sizeof (struct uscsi_cmd));
26155 		com->uscsi_flags   = USCSI_SILENT;
26156 		com->uscsi_cdb	   = cdb;
26157 		com->uscsi_cdblen  = CDB_GROUP0;
26158 		com->uscsi_timeout = 5;
26159 
26160 		/*
26161 		 * Reissue the last reserve command, this time without request
26162 		 * sense.  Assume that it is just a regular reserve command.
26163 		 */
26164 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
26165 		    UIO_SYSSPACE, SD_PATH_STANDARD);
26166 	}
26167 
26168 	/* Return an error if still getting a reservation conflict. */
26169 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
26170 		rval = EACCES;
26171 	}
26172 
26173 	kmem_free(com, sizeof (*com));
26174 	return (rval);
26175 }
26176 
26177 
26178 #define	SD_NDUMP_RETRIES	12
26179 /*
26180  *	System Crash Dump routine
26181  */
26182 
26183 static int
26184 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
26185 {
26186 	int		instance;
26187 	int		partition;
26188 	int		i;
26189 	int		err;
26190 	struct sd_lun	*un;
26191 	struct dk_map	*lp;
26192 	struct scsi_pkt *wr_pktp;
26193 	struct buf	*wr_bp;
26194 	struct buf	wr_buf;
26195 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
26196 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
26197 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
26198 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
26199 	size_t		io_start_offset;
26200 	int		doing_rmw = FALSE;
26201 	int		rval;
26202 #if defined(__i386) || defined(__amd64)
26203 	ssize_t dma_resid;
26204 	daddr_t oblkno;
26205 #endif
26206 
26207 	instance = SDUNIT(dev);
26208 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
26209 	    (!un->un_f_geometry_is_valid) || ISCD(un)) {
26210 		return (ENXIO);
26211 	}
26212 
26213 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
26214 
26215 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
26216 
26217 	partition = SDPART(dev);
26218 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
26219 
26220 	/* Validate blocks to dump at against partition size. */
26221 	lp = &un->un_map[partition];
26222 	if ((blkno + nblk) > lp->dkl_nblk) {
26223 		SD_TRACE(SD_LOG_DUMP, un,
26224 		    "sddump: dump range larger than partition: "
26225 		    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
26226 		    blkno, nblk, lp->dkl_nblk);
26227 		return (EINVAL);
26228 	}
26229 
26230 	mutex_enter(&un->un_pm_mutex);
26231 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
26232 		struct scsi_pkt *start_pktp;
26233 
26234 		mutex_exit(&un->un_pm_mutex);
26235 
26236 		/*
26237 		 * use pm framework to power on HBA 1st
26238 		 */
26239 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
26240 
26241 		/*
26242 		 * Dump no long uses sdpower to power on a device, it's
26243 		 * in-line here so it can be done in polled mode.
26244 		 */
26245 
26246 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
26247 
26248 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
26249 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
26250 
26251 		if (start_pktp == NULL) {
26252 			/* We were not given a SCSI packet, fail. */
26253 			return (EIO);
26254 		}
26255 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
26256 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
26257 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
26258 		start_pktp->pkt_flags = FLAG_NOINTR;
26259 
26260 		mutex_enter(SD_MUTEX(un));
26261 		SD_FILL_SCSI1_LUN(un, start_pktp);
26262 		mutex_exit(SD_MUTEX(un));
26263 		/*
26264 		 * Scsi_poll returns 0 (success) if the command completes and
26265 		 * the status block is STATUS_GOOD.
26266 		 */
26267 		if (sd_scsi_poll(un, start_pktp) != 0) {
26268 			scsi_destroy_pkt(start_pktp);
26269 			return (EIO);
26270 		}
26271 		scsi_destroy_pkt(start_pktp);
26272 		(void) sd_ddi_pm_resume(un);
26273 	} else {
26274 		mutex_exit(&un->un_pm_mutex);
26275 	}
26276 
26277 	mutex_enter(SD_MUTEX(un));
26278 	un->un_throttle = 0;
26279 
26280 	/*
26281 	 * The first time through, reset the specific target device.
26282 	 * However, when cpr calls sddump we know that sd is in a
26283 	 * a good state so no bus reset is required.
26284 	 * Clear sense data via Request Sense cmd.
26285 	 * In sddump we don't care about allow_bus_device_reset anymore
26286 	 */
26287 
26288 	if ((un->un_state != SD_STATE_SUSPENDED) &&
26289 	    (un->un_state != SD_STATE_DUMPING)) {
26290 
26291 		New_state(un, SD_STATE_DUMPING);
26292 
26293 		if (un->un_f_is_fibre == FALSE) {
26294 			mutex_exit(SD_MUTEX(un));
26295 			/*
26296 			 * Attempt a bus reset for parallel scsi.
26297 			 *
26298 			 * Note: A bus reset is required because on some host
26299 			 * systems (i.e. E420R) a bus device reset is
26300 			 * insufficient to reset the state of the target.
26301 			 *
26302 			 * Note: Don't issue the reset for fibre-channel,
26303 			 * because this tends to hang the bus (loop) for
26304 			 * too long while everyone is logging out and in
26305 			 * and the deadman timer for dumping will fire
26306 			 * before the dump is complete.
26307 			 */
26308 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
26309 				mutex_enter(SD_MUTEX(un));
26310 				Restore_state(un);
26311 				mutex_exit(SD_MUTEX(un));
26312 				return (EIO);
26313 			}
26314 
26315 			/* Delay to give the device some recovery time. */
26316 			drv_usecwait(10000);
26317 
26318 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
26319 				SD_INFO(SD_LOG_DUMP, un,
26320 					"sddump: sd_send_polled_RQS failed\n");
26321 			}
26322 			mutex_enter(SD_MUTEX(un));
26323 		}
26324 	}
26325 
26326 	/*
26327 	 * Convert the partition-relative block number to a
26328 	 * disk physical block number.
26329 	 */
26330 	blkno += un->un_offset[partition];
26331 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
26332 
26333 
26334 	/*
26335 	 * Check if the device has a non-512 block size.
26336 	 */
26337 	wr_bp = NULL;
26338 	if (NOT_DEVBSIZE(un)) {
26339 		tgt_byte_offset = blkno * un->un_sys_blocksize;
26340 		tgt_byte_count = nblk * un->un_sys_blocksize;
26341 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
26342 		    (tgt_byte_count % un->un_tgt_blocksize)) {
26343 			doing_rmw = TRUE;
26344 			/*
26345 			 * Calculate the block number and number of block
26346 			 * in terms of the media block size.
26347 			 */
26348 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26349 			tgt_nblk =
26350 			    ((tgt_byte_offset + tgt_byte_count +
26351 				(un->un_tgt_blocksize - 1)) /
26352 				un->un_tgt_blocksize) - tgt_blkno;
26353 
26354 			/*
26355 			 * Invoke the routine which is going to do read part
26356 			 * of read-modify-write.
26357 			 * Note that this routine returns a pointer to
26358 			 * a valid bp in wr_bp.
26359 			 */
26360 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
26361 			    &wr_bp);
26362 			if (err) {
26363 				mutex_exit(SD_MUTEX(un));
26364 				return (err);
26365 			}
26366 			/*
26367 			 * Offset is being calculated as -
26368 			 * (original block # * system block size) -
26369 			 * (new block # * target block size)
26370 			 */
26371 			io_start_offset =
26372 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
26373 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
26374 
26375 			ASSERT((io_start_offset >= 0) &&
26376 			    (io_start_offset < un->un_tgt_blocksize));
26377 			/*
26378 			 * Do the modify portion of read modify write.
26379 			 */
26380 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
26381 			    (size_t)nblk * un->un_sys_blocksize);
26382 		} else {
26383 			doing_rmw = FALSE;
26384 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26385 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
26386 		}
26387 
26388 		/* Convert blkno and nblk to target blocks */
26389 		blkno = tgt_blkno;
26390 		nblk = tgt_nblk;
26391 	} else {
26392 		wr_bp = &wr_buf;
26393 		bzero(wr_bp, sizeof (struct buf));
26394 		wr_bp->b_flags		= B_BUSY;
26395 		wr_bp->b_un.b_addr	= addr;
26396 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
26397 		wr_bp->b_resid		= 0;
26398 	}
26399 
26400 	mutex_exit(SD_MUTEX(un));
26401 
26402 	/*
26403 	 * Obtain a SCSI packet for the write command.
26404 	 * It should be safe to call the allocator here without
26405 	 * worrying about being locked for DVMA mapping because
26406 	 * the address we're passed is already a DVMA mapping
26407 	 *
26408 	 * We are also not going to worry about semaphore ownership
26409 	 * in the dump buffer. Dumping is single threaded at present.
26410 	 */
26411 
26412 	wr_pktp = NULL;
26413 
26414 #if defined(__i386) || defined(__amd64)
26415 	dma_resid = wr_bp->b_bcount;
26416 	oblkno = blkno;
26417 	while (dma_resid != 0) {
26418 #endif
26419 
26420 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26421 		wr_bp->b_flags &= ~B_ERROR;
26422 
26423 #if defined(__i386) || defined(__amd64)
26424 		blkno = oblkno +
26425 			((wr_bp->b_bcount - dma_resid) /
26426 			    un->un_tgt_blocksize);
26427 		nblk = dma_resid / un->un_tgt_blocksize;
26428 
26429 		if (wr_pktp) {
26430 			/* Partial DMA transfers after initial transfer */
26431 			rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
26432 			    blkno, nblk);
26433 		} else {
26434 			/* Initial transfer */
26435 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26436 			    un->un_pkt_flags, NULL_FUNC, NULL,
26437 			    blkno, nblk);
26438 		}
26439 #else
26440 		rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26441 		    0, NULL_FUNC, NULL, blkno, nblk);
26442 #endif
26443 
26444 		if (rval == 0) {
26445 			/* We were given a SCSI packet, continue. */
26446 			break;
26447 		}
26448 
26449 		if (i == 0) {
26450 			if (wr_bp->b_flags & B_ERROR) {
26451 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26452 				    "no resources for dumping; "
26453 				    "error code: 0x%x, retrying",
26454 				    geterror(wr_bp));
26455 			} else {
26456 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26457 				    "no resources for dumping; retrying");
26458 			}
26459 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
26460 			if (wr_bp->b_flags & B_ERROR) {
26461 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26462 				    "no resources for dumping; error code: "
26463 				    "0x%x, retrying\n", geterror(wr_bp));
26464 			}
26465 		} else {
26466 			if (wr_bp->b_flags & B_ERROR) {
26467 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26468 				    "no resources for dumping; "
26469 				    "error code: 0x%x, retries failed, "
26470 				    "giving up.\n", geterror(wr_bp));
26471 			} else {
26472 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26473 				    "no resources for dumping; "
26474 				    "retries failed, giving up.\n");
26475 			}
26476 			mutex_enter(SD_MUTEX(un));
26477 			Restore_state(un);
26478 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
26479 				mutex_exit(SD_MUTEX(un));
26480 				scsi_free_consistent_buf(wr_bp);
26481 			} else {
26482 				mutex_exit(SD_MUTEX(un));
26483 			}
26484 			return (EIO);
26485 		}
26486 		drv_usecwait(10000);
26487 	}
26488 
26489 #if defined(__i386) || defined(__amd64)
26490 	/*
26491 	 * save the resid from PARTIAL_DMA
26492 	 */
26493 	dma_resid = wr_pktp->pkt_resid;
26494 	if (dma_resid != 0)
26495 		nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
26496 	wr_pktp->pkt_resid = 0;
26497 #endif
26498 
26499 	/* SunBug 1222170 */
26500 	wr_pktp->pkt_flags = FLAG_NOINTR;
26501 
26502 	err = EIO;
26503 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26504 
26505 		/*
26506 		 * Scsi_poll returns 0 (success) if the command completes and
26507 		 * the status block is STATUS_GOOD.  We should only check
26508 		 * errors if this condition is not true.  Even then we should
26509 		 * send our own request sense packet only if we have a check
26510 		 * condition and auto request sense has not been performed by
26511 		 * the hba.
26512 		 */
26513 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
26514 
26515 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
26516 		    (wr_pktp->pkt_resid == 0)) {
26517 			err = SD_SUCCESS;
26518 			break;
26519 		}
26520 
26521 		/*
26522 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
26523 		 */
26524 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
26525 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26526 			    "Device is gone\n");
26527 			break;
26528 		}
26529 
26530 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
26531 			SD_INFO(SD_LOG_DUMP, un,
26532 			    "sddump: write failed with CHECK, try # %d\n", i);
26533 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
26534 				(void) sd_send_polled_RQS(un);
26535 			}
26536 
26537 			continue;
26538 		}
26539 
26540 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
26541 			int reset_retval = 0;
26542 
26543 			SD_INFO(SD_LOG_DUMP, un,
26544 			    "sddump: write failed with BUSY, try # %d\n", i);
26545 
26546 			if (un->un_f_lun_reset_enabled == TRUE) {
26547 				reset_retval = scsi_reset(SD_ADDRESS(un),
26548 				    RESET_LUN);
26549 			}
26550 			if (reset_retval == 0) {
26551 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26552 			}
26553 			(void) sd_send_polled_RQS(un);
26554 
26555 		} else {
26556 			SD_INFO(SD_LOG_DUMP, un,
26557 			    "sddump: write failed with 0x%x, try # %d\n",
26558 			    SD_GET_PKT_STATUS(wr_pktp), i);
26559 			mutex_enter(SD_MUTEX(un));
26560 			sd_reset_target(un, wr_pktp);
26561 			mutex_exit(SD_MUTEX(un));
26562 		}
26563 
26564 		/*
26565 		 * If we are not getting anywhere with lun/target resets,
26566 		 * let's reset the bus.
26567 		 */
26568 		if (i == SD_NDUMP_RETRIES/2) {
26569 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26570 			(void) sd_send_polled_RQS(un);
26571 		}
26572 
26573 	}
26574 #if defined(__i386) || defined(__amd64)
26575 	}	/* dma_resid */
26576 #endif
26577 
26578 	scsi_destroy_pkt(wr_pktp);
26579 	mutex_enter(SD_MUTEX(un));
26580 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
26581 		mutex_exit(SD_MUTEX(un));
26582 		scsi_free_consistent_buf(wr_bp);
26583 	} else {
26584 		mutex_exit(SD_MUTEX(un));
26585 	}
26586 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
26587 	return (err);
26588 }
26589 
26590 /*
26591  *    Function: sd_scsi_poll()
26592  *
26593  * Description: This is a wrapper for the scsi_poll call.
26594  *
26595  *   Arguments: sd_lun - The unit structure
26596  *              scsi_pkt - The scsi packet being sent to the device.
26597  *
26598  * Return Code: 0 - Command completed successfully with good status
26599  *             -1 - Command failed.  This could indicate a check condition
26600  *                  or other status value requiring recovery action.
26601  *
26602  */
26603 
26604 static int
26605 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
26606 {
26607 	int status;
26608 
26609 	ASSERT(un != NULL);
26610 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26611 	ASSERT(pktp != NULL);
26612 
26613 	status = SD_SUCCESS;
26614 
26615 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
26616 		pktp->pkt_flags |= un->un_tagflags;
26617 		pktp->pkt_flags &= ~FLAG_NODISCON;
26618 	}
26619 
26620 	status = sd_ddi_scsi_poll(pktp);
26621 	/*
26622 	 * Scsi_poll returns 0 (success) if the command completes and the
26623 	 * status block is STATUS_GOOD.  We should only check errors if this
26624 	 * condition is not true.  Even then we should send our own request
26625 	 * sense packet only if we have a check condition and auto
26626 	 * request sense has not been performed by the hba.
26627 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
26628 	 */
26629 	if ((status != SD_SUCCESS) &&
26630 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
26631 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
26632 	    (pktp->pkt_reason != CMD_DEV_GONE))
26633 		(void) sd_send_polled_RQS(un);
26634 
26635 	return (status);
26636 }
26637 
26638 /*
26639  *    Function: sd_send_polled_RQS()
26640  *
26641  * Description: This sends the request sense command to a device.
26642  *
26643  *   Arguments: sd_lun - The unit structure
26644  *
26645  * Return Code: 0 - Command completed successfully with good status
26646  *             -1 - Command failed.
26647  *
26648  */
26649 
26650 static int
26651 sd_send_polled_RQS(struct sd_lun *un)
26652 {
26653 	int	ret_val;
26654 	struct	scsi_pkt	*rqs_pktp;
26655 	struct	buf		*rqs_bp;
26656 
26657 	ASSERT(un != NULL);
26658 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26659 
26660 	ret_val = SD_SUCCESS;
26661 
26662 	rqs_pktp = un->un_rqs_pktp;
26663 	rqs_bp	 = un->un_rqs_bp;
26664 
26665 	mutex_enter(SD_MUTEX(un));
26666 
26667 	if (un->un_sense_isbusy) {
26668 		ret_val = SD_FAILURE;
26669 		mutex_exit(SD_MUTEX(un));
26670 		return (ret_val);
26671 	}
26672 
26673 	/*
26674 	 * If the request sense buffer (and packet) is not in use,
26675 	 * let's set the un_sense_isbusy and send our packet
26676 	 */
26677 	un->un_sense_isbusy 	= 1;
26678 	rqs_pktp->pkt_resid  	= 0;
26679 	rqs_pktp->pkt_reason 	= 0;
26680 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
26681 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
26682 
26683 	mutex_exit(SD_MUTEX(un));
26684 
26685 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
26686 	    " 0x%p\n", rqs_bp->b_un.b_addr);
26687 
26688 	/*
26689 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
26690 	 * axle - it has a call into us!
26691 	 */
26692 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
26693 		SD_INFO(SD_LOG_COMMON, un,
26694 		    "sd_send_polled_RQS: RQS failed\n");
26695 	}
26696 
26697 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
26698 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
26699 
26700 	mutex_enter(SD_MUTEX(un));
26701 	un->un_sense_isbusy = 0;
26702 	mutex_exit(SD_MUTEX(un));
26703 
26704 	return (ret_val);
26705 }
26706 
26707 /*
26708  * Defines needed for localized version of the scsi_poll routine.
26709  */
26710 #define	SD_CSEC		10000			/* usecs */
26711 #define	SD_SEC_TO_CSEC	(1000000/SD_CSEC)
26712 
26713 
26714 /*
26715  *    Function: sd_ddi_scsi_poll()
26716  *
26717  * Description: Localized version of the scsi_poll routine.  The purpose is to
26718  *		send a scsi_pkt to a device as a polled command.  This version
26719  *		is to ensure more robust handling of transport errors.
26720  *		Specifically this routine cures not ready, coming ready
26721  *		transition for power up and reset of sonoma's.  This can take
26722  *		up to 45 seconds for power-on and 20 seconds for reset of a
26723  * 		sonoma lun.
26724  *
26725  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
26726  *
26727  * Return Code: 0 - Command completed successfully with good status
26728  *             -1 - Command failed.
26729  *
26730  */
26731 
26732 static int
26733 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
26734 {
26735 	int busy_count;
26736 	int timeout;
26737 	int rval = SD_FAILURE;
26738 	int savef;
26739 	uint8_t *sensep;
26740 	long savet;
26741 	void (*savec)();
26742 	/*
26743 	 * The following is defined in machdep.c and is used in determining if
26744 	 * the scsi transport system will do polled I/O instead of interrupt
26745 	 * I/O when called from xx_dump().
26746 	 */
26747 	extern int do_polled_io;
26748 
26749 	/*
26750 	 * save old flags in pkt, to restore at end
26751 	 */
26752 	savef = pkt->pkt_flags;
26753 	savec = pkt->pkt_comp;
26754 	savet = pkt->pkt_time;
26755 
26756 	pkt->pkt_flags |= FLAG_NOINTR;
26757 
26758 	/*
26759 	 * XXX there is nothing in the SCSA spec that states that we should not
26760 	 * do a callback for polled cmds; however, removing this will break sd
26761 	 * and probably other target drivers
26762 	 */
26763 	pkt->pkt_comp = NULL;
26764 
26765 	/*
26766 	 * we don't like a polled command without timeout.
26767 	 * 60 seconds seems long enough.
26768 	 */
26769 	if (pkt->pkt_time == 0) {
26770 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
26771 	}
26772 
26773 	/*
26774 	 * Send polled cmd.
26775 	 *
26776 	 * We do some error recovery for various errors.  Tran_busy,
26777 	 * queue full, and non-dispatched commands are retried every 10 msec.
26778 	 * as they are typically transient failures.  Busy status and Not
26779 	 * Ready are retried every second as this status takes a while to
26780 	 * change.  Unit attention is retried for pkt_time (60) times
26781 	 * with no delay.
26782 	 */
26783 	timeout = pkt->pkt_time * SD_SEC_TO_CSEC;
26784 
26785 	for (busy_count = 0; busy_count < timeout; busy_count++) {
26786 		int rc;
26787 		int poll_delay;
26788 
26789 		/*
26790 		 * Initialize pkt status variables.
26791 		 */
26792 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
26793 
26794 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
26795 			if (rc != TRAN_BUSY) {
26796 				/* Transport failed - give up. */
26797 				break;
26798 			} else {
26799 				/* Transport busy - try again. */
26800 				poll_delay = 1 * SD_CSEC; /* 10 msec */
26801 			}
26802 		} else {
26803 			/*
26804 			 * Transport accepted - check pkt status.
26805 			 */
26806 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
26807 			if (pkt->pkt_reason == CMD_CMPLT &&
26808 			    rc == STATUS_CHECK &&
26809 			    pkt->pkt_state & STATE_ARQ_DONE) {
26810 				struct scsi_arq_status *arqstat =
26811 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
26812 
26813 				sensep = (uint8_t *)&arqstat->sts_sensedata;
26814 			} else {
26815 				sensep = NULL;
26816 			}
26817 
26818 			if ((pkt->pkt_reason == CMD_CMPLT) &&
26819 			    (rc == STATUS_GOOD)) {
26820 				/* No error - we're done */
26821 				rval = SD_SUCCESS;
26822 				break;
26823 
26824 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
26825 				/* Lost connection - give up */
26826 				break;
26827 
26828 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
26829 			    (pkt->pkt_state == 0)) {
26830 				/* Pkt not dispatched - try again. */
26831 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26832 
26833 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26834 			    (rc == STATUS_QFULL)) {
26835 				/* Queue full - try again. */
26836 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26837 
26838 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26839 			    (rc == STATUS_BUSY)) {
26840 				/* Busy - try again. */
26841 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26842 				busy_count += (SD_SEC_TO_CSEC - 1);
26843 
26844 			} else if ((sensep != NULL) &&
26845 			    (scsi_sense_key(sensep) ==
26846 				KEY_UNIT_ATTENTION)) {
26847 				/* Unit Attention - try again */
26848 				busy_count += (SD_SEC_TO_CSEC - 1); /* 1 */
26849 				continue;
26850 
26851 			} else if ((sensep != NULL) &&
26852 			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
26853 			    (scsi_sense_asc(sensep) == 0x04) &&
26854 			    (scsi_sense_ascq(sensep) == 0x01)) {
26855 				/* Not ready -> ready - try again. */
26856 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26857 				busy_count += (SD_SEC_TO_CSEC - 1);
26858 
26859 			} else {
26860 				/* BAD status - give up. */
26861 				break;
26862 			}
26863 		}
26864 
26865 		if ((curthread->t_flag & T_INTR_THREAD) == 0 &&
26866 		    !do_polled_io) {
26867 			delay(drv_usectohz(poll_delay));
26868 		} else {
26869 			/* we busy wait during cpr_dump or interrupt threads */
26870 			drv_usecwait(poll_delay);
26871 		}
26872 	}
26873 
26874 	pkt->pkt_flags = savef;
26875 	pkt->pkt_comp = savec;
26876 	pkt->pkt_time = savet;
26877 	return (rval);
26878 }
26879 
26880 
26881 /*
26882  *    Function: sd_persistent_reservation_in_read_keys
26883  *
26884  * Description: This routine is the driver entry point for handling CD-ROM
26885  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
26886  *		by sending the SCSI-3 PRIN commands to the device.
26887  *		Processes the read keys command response by copying the
26888  *		reservation key information into the user provided buffer.
26889  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
26890  *
26891  *   Arguments: un   -  Pointer to soft state struct for the target.
26892  *		usrp -	user provided pointer to multihost Persistent In Read
26893  *			Keys structure (mhioc_inkeys_t)
26894  *		flag -	this argument is a pass through to ddi_copyxxx()
26895  *			directly from the mode argument of ioctl().
26896  *
26897  * Return Code: 0   - Success
26898  *		EACCES
26899  *		ENOTSUP
26900  *		errno return code from sd_send_scsi_cmd()
26901  *
26902  *     Context: Can sleep. Does not return until command is completed.
26903  */
26904 
26905 static int
26906 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
26907     mhioc_inkeys_t *usrp, int flag)
26908 {
26909 #ifdef _MULTI_DATAMODEL
26910 	struct mhioc_key_list32	li32;
26911 #endif
26912 	sd_prin_readkeys_t	*in;
26913 	mhioc_inkeys_t		*ptr;
26914 	mhioc_key_list_t	li;
26915 	uchar_t			*data_bufp;
26916 	int 			data_len;
26917 	int			rval;
26918 	size_t			copysz;
26919 
26920 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
26921 		return (EINVAL);
26922 	}
26923 	bzero(&li, sizeof (mhioc_key_list_t));
26924 
26925 	/*
26926 	 * Get the listsize from user
26927 	 */
26928 #ifdef _MULTI_DATAMODEL
26929 
26930 	switch (ddi_model_convert_from(flag & FMODELS)) {
26931 	case DDI_MODEL_ILP32:
26932 		copysz = sizeof (struct mhioc_key_list32);
26933 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
26934 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26935 			    "sd_persistent_reservation_in_read_keys: "
26936 			    "failed ddi_copyin: mhioc_key_list32_t\n");
26937 			rval = EFAULT;
26938 			goto done;
26939 		}
26940 		li.listsize = li32.listsize;
26941 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
26942 		break;
26943 
26944 	case DDI_MODEL_NONE:
26945 		copysz = sizeof (mhioc_key_list_t);
26946 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26947 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26948 			    "sd_persistent_reservation_in_read_keys: "
26949 			    "failed ddi_copyin: mhioc_key_list_t\n");
26950 			rval = EFAULT;
26951 			goto done;
26952 		}
26953 		break;
26954 	}
26955 
26956 #else /* ! _MULTI_DATAMODEL */
26957 	copysz = sizeof (mhioc_key_list_t);
26958 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26959 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26960 		    "sd_persistent_reservation_in_read_keys: "
26961 		    "failed ddi_copyin: mhioc_key_list_t\n");
26962 		rval = EFAULT;
26963 		goto done;
26964 	}
26965 #endif
26966 
26967 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
26968 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
26969 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26970 
26971 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS,
26972 	    data_len, data_bufp)) != 0) {
26973 		goto done;
26974 	}
26975 	in = (sd_prin_readkeys_t *)data_bufp;
26976 	ptr->generation = BE_32(in->generation);
26977 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
26978 
26979 	/*
26980 	 * Return the min(listsize, listlen) keys
26981 	 */
26982 #ifdef _MULTI_DATAMODEL
26983 
26984 	switch (ddi_model_convert_from(flag & FMODELS)) {
26985 	case DDI_MODEL_ILP32:
26986 		li32.listlen = li.listlen;
26987 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
26988 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26989 			    "sd_persistent_reservation_in_read_keys: "
26990 			    "failed ddi_copyout: mhioc_key_list32_t\n");
26991 			rval = EFAULT;
26992 			goto done;
26993 		}
26994 		break;
26995 
26996 	case DDI_MODEL_NONE:
26997 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
26998 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26999 			    "sd_persistent_reservation_in_read_keys: "
27000 			    "failed ddi_copyout: mhioc_key_list_t\n");
27001 			rval = EFAULT;
27002 			goto done;
27003 		}
27004 		break;
27005 	}
27006 
27007 #else /* ! _MULTI_DATAMODEL */
27008 
27009 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
27010 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27011 		    "sd_persistent_reservation_in_read_keys: "
27012 		    "failed ddi_copyout: mhioc_key_list_t\n");
27013 		rval = EFAULT;
27014 		goto done;
27015 	}
27016 
27017 #endif /* _MULTI_DATAMODEL */
27018 
27019 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
27020 	    li.listsize * MHIOC_RESV_KEY_SIZE);
27021 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
27022 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27023 		    "sd_persistent_reservation_in_read_keys: "
27024 		    "failed ddi_copyout: keylist\n");
27025 		rval = EFAULT;
27026 	}
27027 done:
27028 	kmem_free(data_bufp, data_len);
27029 	return (rval);
27030 }
27031 
27032 
27033 /*
27034  *    Function: sd_persistent_reservation_in_read_resv
27035  *
27036  * Description: This routine is the driver entry point for handling CD-ROM
27037  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
27038  *		by sending the SCSI-3 PRIN commands to the device.
27039  *		Process the read persistent reservations command response by
27040  *		copying the reservation information into the user provided
27041  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
27042  *
27043  *   Arguments: un   -  Pointer to soft state struct for the target.
27044  *		usrp -	user provided pointer to multihost Persistent In Read
27045  *			Keys structure (mhioc_inkeys_t)
27046  *		flag -	this argument is a pass through to ddi_copyxxx()
27047  *			directly from the mode argument of ioctl().
27048  *
27049  * Return Code: 0   - Success
27050  *		EACCES
27051  *		ENOTSUP
27052  *		errno return code from sd_send_scsi_cmd()
27053  *
27054  *     Context: Can sleep. Does not return until command is completed.
27055  */
27056 
27057 static int
27058 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
27059     mhioc_inresvs_t *usrp, int flag)
27060 {
27061 #ifdef _MULTI_DATAMODEL
27062 	struct mhioc_resv_desc_list32 resvlist32;
27063 #endif
27064 	sd_prin_readresv_t	*in;
27065 	mhioc_inresvs_t		*ptr;
27066 	sd_readresv_desc_t	*readresv_ptr;
27067 	mhioc_resv_desc_list_t	resvlist;
27068 	mhioc_resv_desc_t 	resvdesc;
27069 	uchar_t			*data_bufp;
27070 	int 			data_len;
27071 	int			rval;
27072 	int			i;
27073 	size_t			copysz;
27074 	mhioc_resv_desc_t	*bufp;
27075 
27076 	if ((ptr = usrp) == NULL) {
27077 		return (EINVAL);
27078 	}
27079 
27080 	/*
27081 	 * Get the listsize from user
27082 	 */
27083 #ifdef _MULTI_DATAMODEL
27084 	switch (ddi_model_convert_from(flag & FMODELS)) {
27085 	case DDI_MODEL_ILP32:
27086 		copysz = sizeof (struct mhioc_resv_desc_list32);
27087 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
27088 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27089 			    "sd_persistent_reservation_in_read_resv: "
27090 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27091 			rval = EFAULT;
27092 			goto done;
27093 		}
27094 		resvlist.listsize = resvlist32.listsize;
27095 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
27096 		break;
27097 
27098 	case DDI_MODEL_NONE:
27099 		copysz = sizeof (mhioc_resv_desc_list_t);
27100 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
27101 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27102 			    "sd_persistent_reservation_in_read_resv: "
27103 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27104 			rval = EFAULT;
27105 			goto done;
27106 		}
27107 		break;
27108 	}
27109 #else /* ! _MULTI_DATAMODEL */
27110 	copysz = sizeof (mhioc_resv_desc_list_t);
27111 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
27112 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27113 		    "sd_persistent_reservation_in_read_resv: "
27114 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27115 		rval = EFAULT;
27116 		goto done;
27117 	}
27118 #endif /* ! _MULTI_DATAMODEL */
27119 
27120 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
27121 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
27122 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
27123 
27124 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_RESV,
27125 	    data_len, data_bufp)) != 0) {
27126 		goto done;
27127 	}
27128 	in = (sd_prin_readresv_t *)data_bufp;
27129 	ptr->generation = BE_32(in->generation);
27130 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
27131 
27132 	/*
27133 	 * Return the min(listsize, listlen( keys
27134 	 */
27135 #ifdef _MULTI_DATAMODEL
27136 
27137 	switch (ddi_model_convert_from(flag & FMODELS)) {
27138 	case DDI_MODEL_ILP32:
27139 		resvlist32.listlen = resvlist.listlen;
27140 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
27141 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27142 			    "sd_persistent_reservation_in_read_resv: "
27143 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27144 			rval = EFAULT;
27145 			goto done;
27146 		}
27147 		break;
27148 
27149 	case DDI_MODEL_NONE:
27150 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
27151 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27152 			    "sd_persistent_reservation_in_read_resv: "
27153 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27154 			rval = EFAULT;
27155 			goto done;
27156 		}
27157 		break;
27158 	}
27159 
27160 #else /* ! _MULTI_DATAMODEL */
27161 
27162 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
27163 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27164 		    "sd_persistent_reservation_in_read_resv: "
27165 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27166 		rval = EFAULT;
27167 		goto done;
27168 	}
27169 
27170 #endif /* ! _MULTI_DATAMODEL */
27171 
27172 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
27173 	bufp = resvlist.list;
27174 	copysz = sizeof (mhioc_resv_desc_t);
27175 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
27176 	    i++, readresv_ptr++, bufp++) {
27177 
27178 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
27179 		    MHIOC_RESV_KEY_SIZE);
27180 		resvdesc.type  = readresv_ptr->type;
27181 		resvdesc.scope = readresv_ptr->scope;
27182 		resvdesc.scope_specific_addr =
27183 		    BE_32(readresv_ptr->scope_specific_addr);
27184 
27185 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
27186 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27187 			    "sd_persistent_reservation_in_read_resv: "
27188 			    "failed ddi_copyout: resvlist\n");
27189 			rval = EFAULT;
27190 			goto done;
27191 		}
27192 	}
27193 done:
27194 	kmem_free(data_bufp, data_len);
27195 	return (rval);
27196 }
27197 
27198 
27199 /*
27200  *    Function: sr_change_blkmode()
27201  *
27202  * Description: This routine is the driver entry point for handling CD-ROM
27203  *		block mode ioctl requests. Support for returning and changing
27204  *		the current block size in use by the device is implemented. The
27205  *		LBA size is changed via a MODE SELECT Block Descriptor.
27206  *
27207  *		This routine issues a mode sense with an allocation length of
27208  *		12 bytes for the mode page header and a single block descriptor.
27209  *
27210  *   Arguments: dev - the device 'dev_t'
27211  *		cmd - the request type; one of CDROMGBLKMODE (get) or
27212  *		      CDROMSBLKMODE (set)
27213  *		data - current block size or requested block size
27214  *		flag - this argument is a pass through to ddi_copyxxx() directly
27215  *		       from the mode argument of ioctl().
27216  *
27217  * Return Code: the code returned by sd_send_scsi_cmd()
27218  *		EINVAL if invalid arguments are provided
27219  *		EFAULT if ddi_copyxxx() fails
27220  *		ENXIO if fail ddi_get_soft_state
27221  *		EIO if invalid mode sense block descriptor length
27222  *
27223  */
27224 
27225 static int
27226 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
27227 {
27228 	struct sd_lun			*un = NULL;
27229 	struct mode_header		*sense_mhp, *select_mhp;
27230 	struct block_descriptor		*sense_desc, *select_desc;
27231 	int				current_bsize;
27232 	int				rval = EINVAL;
27233 	uchar_t				*sense = NULL;
27234 	uchar_t				*select = NULL;
27235 
27236 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
27237 
27238 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27239 		return (ENXIO);
27240 	}
27241 
27242 	/*
27243 	 * The block length is changed via the Mode Select block descriptor, the
27244 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
27245 	 * required as part of this routine. Therefore the mode sense allocation
27246 	 * length is specified to be the length of a mode page header and a
27247 	 * block descriptor.
27248 	 */
27249 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27250 
27251 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
27252 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD)) != 0) {
27253 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27254 		    "sr_change_blkmode: Mode Sense Failed\n");
27255 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27256 		return (rval);
27257 	}
27258 
27259 	/* Check the block descriptor len to handle only 1 block descriptor */
27260 	sense_mhp = (struct mode_header *)sense;
27261 	if ((sense_mhp->bdesc_length == 0) ||
27262 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
27263 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27264 		    "sr_change_blkmode: Mode Sense returned invalid block"
27265 		    " descriptor length\n");
27266 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27267 		return (EIO);
27268 	}
27269 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
27270 	current_bsize = ((sense_desc->blksize_hi << 16) |
27271 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
27272 
27273 	/* Process command */
27274 	switch (cmd) {
27275 	case CDROMGBLKMODE:
27276 		/* Return the block size obtained during the mode sense */
27277 		if (ddi_copyout(&current_bsize, (void *)data,
27278 		    sizeof (int), flag) != 0)
27279 			rval = EFAULT;
27280 		break;
27281 	case CDROMSBLKMODE:
27282 		/* Validate the requested block size */
27283 		switch (data) {
27284 		case CDROM_BLK_512:
27285 		case CDROM_BLK_1024:
27286 		case CDROM_BLK_2048:
27287 		case CDROM_BLK_2056:
27288 		case CDROM_BLK_2336:
27289 		case CDROM_BLK_2340:
27290 		case CDROM_BLK_2352:
27291 		case CDROM_BLK_2368:
27292 		case CDROM_BLK_2448:
27293 		case CDROM_BLK_2646:
27294 		case CDROM_BLK_2647:
27295 			break;
27296 		default:
27297 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27298 			    "sr_change_blkmode: "
27299 			    "Block Size '%ld' Not Supported\n", data);
27300 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27301 			return (EINVAL);
27302 		}
27303 
27304 		/*
27305 		 * The current block size matches the requested block size so
27306 		 * there is no need to send the mode select to change the size
27307 		 */
27308 		if (current_bsize == data) {
27309 			break;
27310 		}
27311 
27312 		/* Build the select data for the requested block size */
27313 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27314 		select_mhp = (struct mode_header *)select;
27315 		select_desc =
27316 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
27317 		/*
27318 		 * The LBA size is changed via the block descriptor, so the
27319 		 * descriptor is built according to the user data
27320 		 */
27321 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
27322 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
27323 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
27324 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
27325 
27326 		/* Send the mode select for the requested block size */
27327 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
27328 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27329 		    SD_PATH_STANDARD)) != 0) {
27330 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27331 			    "sr_change_blkmode: Mode Select Failed\n");
27332 			/*
27333 			 * The mode select failed for the requested block size,
27334 			 * so reset the data for the original block size and
27335 			 * send it to the target. The error is indicated by the
27336 			 * return value for the failed mode select.
27337 			 */
27338 			select_desc->blksize_hi  = sense_desc->blksize_hi;
27339 			select_desc->blksize_mid = sense_desc->blksize_mid;
27340 			select_desc->blksize_lo  = sense_desc->blksize_lo;
27341 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
27342 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27343 			    SD_PATH_STANDARD);
27344 		} else {
27345 			ASSERT(!mutex_owned(SD_MUTEX(un)));
27346 			mutex_enter(SD_MUTEX(un));
27347 			sd_update_block_info(un, (uint32_t)data, 0);
27348 
27349 			mutex_exit(SD_MUTEX(un));
27350 		}
27351 		break;
27352 	default:
27353 		/* should not reach here, but check anyway */
27354 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27355 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
27356 		rval = EINVAL;
27357 		break;
27358 	}
27359 
27360 	if (select) {
27361 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
27362 	}
27363 	if (sense) {
27364 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27365 	}
27366 	return (rval);
27367 }
27368 
27369 
27370 /*
27371  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
27372  * implement driver support for getting and setting the CD speed. The command
27373  * set used will be based on the device type. If the device has not been
27374  * identified as MMC the Toshiba vendor specific mode page will be used. If
27375  * the device is MMC but does not support the Real Time Streaming feature
27376  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
27377  * be used to read the speed.
27378  */
27379 
27380 /*
27381  *    Function: sr_change_speed()
27382  *
27383  * Description: This routine is the driver entry point for handling CD-ROM
27384  *		drive speed ioctl requests for devices supporting the Toshiba
27385  *		vendor specific drive speed mode page. Support for returning
27386  *		and changing the current drive speed in use by the device is
27387  *		implemented.
27388  *
27389  *   Arguments: dev - the device 'dev_t'
27390  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
27391  *		      CDROMSDRVSPEED (set)
27392  *		data - current drive speed or requested drive speed
27393  *		flag - this argument is a pass through to ddi_copyxxx() directly
27394  *		       from the mode argument of ioctl().
27395  *
27396  * Return Code: the code returned by sd_send_scsi_cmd()
27397  *		EINVAL if invalid arguments are provided
27398  *		EFAULT if ddi_copyxxx() fails
27399  *		ENXIO if fail ddi_get_soft_state
27400  *		EIO if invalid mode sense block descriptor length
27401  */
27402 
27403 static int
27404 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27405 {
27406 	struct sd_lun			*un = NULL;
27407 	struct mode_header		*sense_mhp, *select_mhp;
27408 	struct mode_speed		*sense_page, *select_page;
27409 	int				current_speed;
27410 	int				rval = EINVAL;
27411 	int				bd_len;
27412 	uchar_t				*sense = NULL;
27413 	uchar_t				*select = NULL;
27414 
27415 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27416 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27417 		return (ENXIO);
27418 	}
27419 
27420 	/*
27421 	 * Note: The drive speed is being modified here according to a Toshiba
27422 	 * vendor specific mode page (0x31).
27423 	 */
27424 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27425 
27426 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
27427 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
27428 		SD_PATH_STANDARD)) != 0) {
27429 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27430 		    "sr_change_speed: Mode Sense Failed\n");
27431 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27432 		return (rval);
27433 	}
27434 	sense_mhp  = (struct mode_header *)sense;
27435 
27436 	/* Check the block descriptor len to handle only 1 block descriptor */
27437 	bd_len = sense_mhp->bdesc_length;
27438 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27439 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27440 		    "sr_change_speed: Mode Sense returned invalid block "
27441 		    "descriptor length\n");
27442 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27443 		return (EIO);
27444 	}
27445 
27446 	sense_page = (struct mode_speed *)
27447 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
27448 	current_speed = sense_page->speed;
27449 
27450 	/* Process command */
27451 	switch (cmd) {
27452 	case CDROMGDRVSPEED:
27453 		/* Return the drive speed obtained during the mode sense */
27454 		if (current_speed == 0x2) {
27455 			current_speed = CDROM_TWELVE_SPEED;
27456 		}
27457 		if (ddi_copyout(&current_speed, (void *)data,
27458 		    sizeof (int), flag) != 0) {
27459 			rval = EFAULT;
27460 		}
27461 		break;
27462 	case CDROMSDRVSPEED:
27463 		/* Validate the requested drive speed */
27464 		switch ((uchar_t)data) {
27465 		case CDROM_TWELVE_SPEED:
27466 			data = 0x2;
27467 			/*FALLTHROUGH*/
27468 		case CDROM_NORMAL_SPEED:
27469 		case CDROM_DOUBLE_SPEED:
27470 		case CDROM_QUAD_SPEED:
27471 		case CDROM_MAXIMUM_SPEED:
27472 			break;
27473 		default:
27474 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27475 			    "sr_change_speed: "
27476 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
27477 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27478 			return (EINVAL);
27479 		}
27480 
27481 		/*
27482 		 * The current drive speed matches the requested drive speed so
27483 		 * there is no need to send the mode select to change the speed
27484 		 */
27485 		if (current_speed == data) {
27486 			break;
27487 		}
27488 
27489 		/* Build the select data for the requested drive speed */
27490 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27491 		select_mhp = (struct mode_header *)select;
27492 		select_mhp->bdesc_length = 0;
27493 		select_page =
27494 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27495 		select_page =
27496 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27497 		select_page->mode_page.code = CDROM_MODE_SPEED;
27498 		select_page->mode_page.length = 2;
27499 		select_page->speed = (uchar_t)data;
27500 
27501 		/* Send the mode select for the requested block size */
27502 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27503 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27504 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
27505 			/*
27506 			 * The mode select failed for the requested drive speed,
27507 			 * so reset the data for the original drive speed and
27508 			 * send it to the target. The error is indicated by the
27509 			 * return value for the failed mode select.
27510 			 */
27511 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27512 			    "sr_drive_speed: Mode Select Failed\n");
27513 			select_page->speed = sense_page->speed;
27514 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27515 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27516 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27517 		}
27518 		break;
27519 	default:
27520 		/* should not reach here, but check anyway */
27521 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27522 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
27523 		rval = EINVAL;
27524 		break;
27525 	}
27526 
27527 	if (select) {
27528 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
27529 	}
27530 	if (sense) {
27531 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27532 	}
27533 
27534 	return (rval);
27535 }
27536 
27537 
27538 /*
27539  *    Function: sr_atapi_change_speed()
27540  *
27541  * Description: This routine is the driver entry point for handling CD-ROM
27542  *		drive speed ioctl requests for MMC devices that do not support
27543  *		the Real Time Streaming feature (0x107).
27544  *
27545  *		Note: This routine will use the SET SPEED command which may not
27546  *		be supported by all devices.
27547  *
27548  *   Arguments: dev- the device 'dev_t'
27549  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
27550  *		     CDROMSDRVSPEED (set)
27551  *		data- current drive speed or requested drive speed
27552  *		flag- this argument is a pass through to ddi_copyxxx() directly
27553  *		      from the mode argument of ioctl().
27554  *
27555  * Return Code: the code returned by sd_send_scsi_cmd()
27556  *		EINVAL if invalid arguments are provided
27557  *		EFAULT if ddi_copyxxx() fails
27558  *		ENXIO if fail ddi_get_soft_state
27559  *		EIO if invalid mode sense block descriptor length
27560  */
27561 
27562 static int
27563 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27564 {
27565 	struct sd_lun			*un;
27566 	struct uscsi_cmd		*com = NULL;
27567 	struct mode_header_grp2		*sense_mhp;
27568 	uchar_t				*sense_page;
27569 	uchar_t				*sense = NULL;
27570 	char				cdb[CDB_GROUP5];
27571 	int				bd_len;
27572 	int				current_speed = 0;
27573 	int				max_speed = 0;
27574 	int				rval;
27575 
27576 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27577 
27578 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27579 		return (ENXIO);
27580 	}
27581 
27582 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
27583 
27584 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
27585 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
27586 	    SD_PATH_STANDARD)) != 0) {
27587 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27588 		    "sr_atapi_change_speed: Mode Sense Failed\n");
27589 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27590 		return (rval);
27591 	}
27592 
27593 	/* Check the block descriptor len to handle only 1 block descriptor */
27594 	sense_mhp = (struct mode_header_grp2 *)sense;
27595 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
27596 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27597 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27598 		    "sr_atapi_change_speed: Mode Sense returned invalid "
27599 		    "block descriptor length\n");
27600 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27601 		return (EIO);
27602 	}
27603 
27604 	/* Calculate the current and maximum drive speeds */
27605 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
27606 	current_speed = (sense_page[14] << 8) | sense_page[15];
27607 	max_speed = (sense_page[8] << 8) | sense_page[9];
27608 
27609 	/* Process the command */
27610 	switch (cmd) {
27611 	case CDROMGDRVSPEED:
27612 		current_speed /= SD_SPEED_1X;
27613 		if (ddi_copyout(&current_speed, (void *)data,
27614 		    sizeof (int), flag) != 0)
27615 			rval = EFAULT;
27616 		break;
27617 	case CDROMSDRVSPEED:
27618 		/* Convert the speed code to KB/sec */
27619 		switch ((uchar_t)data) {
27620 		case CDROM_NORMAL_SPEED:
27621 			current_speed = SD_SPEED_1X;
27622 			break;
27623 		case CDROM_DOUBLE_SPEED:
27624 			current_speed = 2 * SD_SPEED_1X;
27625 			break;
27626 		case CDROM_QUAD_SPEED:
27627 			current_speed = 4 * SD_SPEED_1X;
27628 			break;
27629 		case CDROM_TWELVE_SPEED:
27630 			current_speed = 12 * SD_SPEED_1X;
27631 			break;
27632 		case CDROM_MAXIMUM_SPEED:
27633 			current_speed = 0xffff;
27634 			break;
27635 		default:
27636 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27637 			    "sr_atapi_change_speed: invalid drive speed %d\n",
27638 			    (uchar_t)data);
27639 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27640 			return (EINVAL);
27641 		}
27642 
27643 		/* Check the request against the drive's max speed. */
27644 		if (current_speed != 0xffff) {
27645 			if (current_speed > max_speed) {
27646 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27647 				return (EINVAL);
27648 			}
27649 		}
27650 
27651 		/*
27652 		 * Build and send the SET SPEED command
27653 		 *
27654 		 * Note: The SET SPEED (0xBB) command used in this routine is
27655 		 * obsolete per the SCSI MMC spec but still supported in the
27656 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27657 		 * therefore the command is still implemented in this routine.
27658 		 */
27659 		bzero(cdb, sizeof (cdb));
27660 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
27661 		cdb[2] = (uchar_t)(current_speed >> 8);
27662 		cdb[3] = (uchar_t)current_speed;
27663 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27664 		com->uscsi_cdb	   = (caddr_t)cdb;
27665 		com->uscsi_cdblen  = CDB_GROUP5;
27666 		com->uscsi_bufaddr = NULL;
27667 		com->uscsi_buflen  = 0;
27668 		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
27669 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, 0,
27670 		    UIO_SYSSPACE, SD_PATH_STANDARD);
27671 		break;
27672 	default:
27673 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27674 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
27675 		rval = EINVAL;
27676 	}
27677 
27678 	if (sense) {
27679 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27680 	}
27681 	if (com) {
27682 		kmem_free(com, sizeof (*com));
27683 	}
27684 	return (rval);
27685 }
27686 
27687 
27688 /*
27689  *    Function: sr_pause_resume()
27690  *
27691  * Description: This routine is the driver entry point for handling CD-ROM
27692  *		pause/resume ioctl requests. This only affects the audio play
27693  *		operation.
27694  *
27695  *   Arguments: dev - the device 'dev_t'
27696  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
27697  *		      for setting the resume bit of the cdb.
27698  *
27699  * Return Code: the code returned by sd_send_scsi_cmd()
27700  *		EINVAL if invalid mode specified
27701  *
27702  */
27703 
27704 static int
27705 sr_pause_resume(dev_t dev, int cmd)
27706 {
27707 	struct sd_lun		*un;
27708 	struct uscsi_cmd	*com;
27709 	char			cdb[CDB_GROUP1];
27710 	int			rval;
27711 
27712 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27713 		return (ENXIO);
27714 	}
27715 
27716 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27717 	bzero(cdb, CDB_GROUP1);
27718 	cdb[0] = SCMD_PAUSE_RESUME;
27719 	switch (cmd) {
27720 	case CDROMRESUME:
27721 		cdb[8] = 1;
27722 		break;
27723 	case CDROMPAUSE:
27724 		cdb[8] = 0;
27725 		break;
27726 	default:
27727 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
27728 		    " Command '%x' Not Supported\n", cmd);
27729 		rval = EINVAL;
27730 		goto done;
27731 	}
27732 
27733 	com->uscsi_cdb    = cdb;
27734 	com->uscsi_cdblen = CDB_GROUP1;
27735 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27736 
27737 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27738 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27739 
27740 done:
27741 	kmem_free(com, sizeof (*com));
27742 	return (rval);
27743 }
27744 
27745 
27746 /*
27747  *    Function: sr_play_msf()
27748  *
27749  * Description: This routine is the driver entry point for handling CD-ROM
27750  *		ioctl requests to output the audio signals at the specified
27751  *		starting address and continue the audio play until the specified
27752  *		ending address (CDROMPLAYMSF) The address is in Minute Second
27753  *		Frame (MSF) format.
27754  *
27755  *   Arguments: dev	- the device 'dev_t'
27756  *		data	- pointer to user provided audio msf structure,
27757  *		          specifying start/end addresses.
27758  *		flag	- this argument is a pass through to ddi_copyxxx()
27759  *		          directly from the mode argument of ioctl().
27760  *
27761  * Return Code: the code returned by sd_send_scsi_cmd()
27762  *		EFAULT if ddi_copyxxx() fails
27763  *		ENXIO if fail ddi_get_soft_state
27764  *		EINVAL if data pointer is NULL
27765  */
27766 
27767 static int
27768 sr_play_msf(dev_t dev, caddr_t data, int flag)
27769 {
27770 	struct sd_lun		*un;
27771 	struct uscsi_cmd	*com;
27772 	struct cdrom_msf	msf_struct;
27773 	struct cdrom_msf	*msf = &msf_struct;
27774 	char			cdb[CDB_GROUP1];
27775 	int			rval;
27776 
27777 	if (data == NULL) {
27778 		return (EINVAL);
27779 	}
27780 
27781 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27782 		return (ENXIO);
27783 	}
27784 
27785 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
27786 		return (EFAULT);
27787 	}
27788 
27789 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27790 	bzero(cdb, CDB_GROUP1);
27791 	cdb[0] = SCMD_PLAYAUDIO_MSF;
27792 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
27793 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
27794 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
27795 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
27796 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
27797 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
27798 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
27799 	} else {
27800 		cdb[3] = msf->cdmsf_min0;
27801 		cdb[4] = msf->cdmsf_sec0;
27802 		cdb[5] = msf->cdmsf_frame0;
27803 		cdb[6] = msf->cdmsf_min1;
27804 		cdb[7] = msf->cdmsf_sec1;
27805 		cdb[8] = msf->cdmsf_frame1;
27806 	}
27807 	com->uscsi_cdb    = cdb;
27808 	com->uscsi_cdblen = CDB_GROUP1;
27809 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27810 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27811 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27812 	kmem_free(com, sizeof (*com));
27813 	return (rval);
27814 }
27815 
27816 
27817 /*
27818  *    Function: sr_play_trkind()
27819  *
27820  * Description: This routine is the driver entry point for handling CD-ROM
27821  *		ioctl requests to output the audio signals at the specified
27822  *		starting address and continue the audio play until the specified
27823  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
27824  *		format.
27825  *
27826  *   Arguments: dev	- the device 'dev_t'
27827  *		data	- pointer to user provided audio track/index structure,
27828  *		          specifying start/end addresses.
27829  *		flag	- this argument is a pass through to ddi_copyxxx()
27830  *		          directly from the mode argument of ioctl().
27831  *
27832  * Return Code: the code returned by sd_send_scsi_cmd()
27833  *		EFAULT if ddi_copyxxx() fails
27834  *		ENXIO if fail ddi_get_soft_state
27835  *		EINVAL if data pointer is NULL
27836  */
27837 
27838 static int
27839 sr_play_trkind(dev_t dev, caddr_t data, int flag)
27840 {
27841 	struct cdrom_ti		ti_struct;
27842 	struct cdrom_ti		*ti = &ti_struct;
27843 	struct uscsi_cmd	*com = NULL;
27844 	char			cdb[CDB_GROUP1];
27845 	int			rval;
27846 
27847 	if (data == NULL) {
27848 		return (EINVAL);
27849 	}
27850 
27851 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
27852 		return (EFAULT);
27853 	}
27854 
27855 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27856 	bzero(cdb, CDB_GROUP1);
27857 	cdb[0] = SCMD_PLAYAUDIO_TI;
27858 	cdb[4] = ti->cdti_trk0;
27859 	cdb[5] = ti->cdti_ind0;
27860 	cdb[7] = ti->cdti_trk1;
27861 	cdb[8] = ti->cdti_ind1;
27862 	com->uscsi_cdb    = cdb;
27863 	com->uscsi_cdblen = CDB_GROUP1;
27864 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27865 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27866 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27867 	kmem_free(com, sizeof (*com));
27868 	return (rval);
27869 }
27870 
27871 
27872 /*
27873  *    Function: sr_read_all_subcodes()
27874  *
27875  * Description: This routine is the driver entry point for handling CD-ROM
27876  *		ioctl requests to return raw subcode data while the target is
27877  *		playing audio (CDROMSUBCODE).
27878  *
27879  *   Arguments: dev	- the device 'dev_t'
27880  *		data	- pointer to user provided cdrom subcode structure,
27881  *		          specifying the transfer length and address.
27882  *		flag	- this argument is a pass through to ddi_copyxxx()
27883  *		          directly from the mode argument of ioctl().
27884  *
27885  * Return Code: the code returned by sd_send_scsi_cmd()
27886  *		EFAULT if ddi_copyxxx() fails
27887  *		ENXIO if fail ddi_get_soft_state
27888  *		EINVAL if data pointer is NULL
27889  */
27890 
27891 static int
27892 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
27893 {
27894 	struct sd_lun		*un = NULL;
27895 	struct uscsi_cmd	*com = NULL;
27896 	struct cdrom_subcode	*subcode = NULL;
27897 	int			rval;
27898 	size_t			buflen;
27899 	char			cdb[CDB_GROUP5];
27900 
27901 #ifdef _MULTI_DATAMODEL
27902 	/* To support ILP32 applications in an LP64 world */
27903 	struct cdrom_subcode32		cdrom_subcode32;
27904 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
27905 #endif
27906 	if (data == NULL) {
27907 		return (EINVAL);
27908 	}
27909 
27910 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27911 		return (ENXIO);
27912 	}
27913 
27914 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
27915 
27916 #ifdef _MULTI_DATAMODEL
27917 	switch (ddi_model_convert_from(flag & FMODELS)) {
27918 	case DDI_MODEL_ILP32:
27919 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
27920 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27921 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27922 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27923 			return (EFAULT);
27924 		}
27925 		/* Convert the ILP32 uscsi data from the application to LP64 */
27926 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
27927 		break;
27928 	case DDI_MODEL_NONE:
27929 		if (ddi_copyin(data, subcode,
27930 		    sizeof (struct cdrom_subcode), flag)) {
27931 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27932 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27933 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27934 			return (EFAULT);
27935 		}
27936 		break;
27937 	}
27938 #else /* ! _MULTI_DATAMODEL */
27939 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
27940 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27941 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
27942 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27943 		return (EFAULT);
27944 	}
27945 #endif /* _MULTI_DATAMODEL */
27946 
27947 	/*
27948 	 * Since MMC-2 expects max 3 bytes for length, check if the
27949 	 * length input is greater than 3 bytes
27950 	 */
27951 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
27952 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27953 		    "sr_read_all_subcodes: "
27954 		    "cdrom transfer length too large: %d (limit %d)\n",
27955 		    subcode->cdsc_length, 0xFFFFFF);
27956 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27957 		return (EINVAL);
27958 	}
27959 
27960 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
27961 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27962 	bzero(cdb, CDB_GROUP5);
27963 
27964 	if (un->un_f_mmc_cap == TRUE) {
27965 		cdb[0] = (char)SCMD_READ_CD;
27966 		cdb[2] = (char)0xff;
27967 		cdb[3] = (char)0xff;
27968 		cdb[4] = (char)0xff;
27969 		cdb[5] = (char)0xff;
27970 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27971 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27972 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
27973 		cdb[10] = 1;
27974 	} else {
27975 		/*
27976 		 * Note: A vendor specific command (0xDF) is being used her to
27977 		 * request a read of all subcodes.
27978 		 */
27979 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
27980 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
27981 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27982 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27983 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
27984 	}
27985 	com->uscsi_cdb	   = cdb;
27986 	com->uscsi_cdblen  = CDB_GROUP5;
27987 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
27988 	com->uscsi_buflen  = buflen;
27989 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27990 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
27991 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27992 	kmem_free(subcode, sizeof (struct cdrom_subcode));
27993 	kmem_free(com, sizeof (*com));
27994 	return (rval);
27995 }
27996 
27997 
27998 /*
27999  *    Function: sr_read_subchannel()
28000  *
28001  * Description: This routine is the driver entry point for handling CD-ROM
28002  *		ioctl requests to return the Q sub-channel data of the CD
28003  *		current position block. (CDROMSUBCHNL) The data includes the
28004  *		track number, index number, absolute CD-ROM address (LBA or MSF
28005  *		format per the user) , track relative CD-ROM address (LBA or MSF
28006  *		format per the user), control data and audio status.
28007  *
28008  *   Arguments: dev	- the device 'dev_t'
28009  *		data	- pointer to user provided cdrom sub-channel structure
28010  *		flag	- this argument is a pass through to ddi_copyxxx()
28011  *		          directly from the mode argument of ioctl().
28012  *
28013  * Return Code: the code returned by sd_send_scsi_cmd()
28014  *		EFAULT if ddi_copyxxx() fails
28015  *		ENXIO if fail ddi_get_soft_state
28016  *		EINVAL if data pointer is NULL
28017  */
28018 
28019 static int
28020 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
28021 {
28022 	struct sd_lun		*un;
28023 	struct uscsi_cmd	*com;
28024 	struct cdrom_subchnl	subchanel;
28025 	struct cdrom_subchnl	*subchnl = &subchanel;
28026 	char			cdb[CDB_GROUP1];
28027 	caddr_t			buffer;
28028 	int			rval;
28029 
28030 	if (data == NULL) {
28031 		return (EINVAL);
28032 	}
28033 
28034 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28035 	    (un->un_state == SD_STATE_OFFLINE)) {
28036 		return (ENXIO);
28037 	}
28038 
28039 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
28040 		return (EFAULT);
28041 	}
28042 
28043 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
28044 	bzero(cdb, CDB_GROUP1);
28045 	cdb[0] = SCMD_READ_SUBCHANNEL;
28046 	/* Set the MSF bit based on the user requested address format */
28047 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
28048 	/*
28049 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
28050 	 * returned
28051 	 */
28052 	cdb[2] = 0x40;
28053 	/*
28054 	 * Set byte 3 to specify the return data format. A value of 0x01
28055 	 * indicates that the CD-ROM current position should be returned.
28056 	 */
28057 	cdb[3] = 0x01;
28058 	cdb[8] = 0x10;
28059 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28060 	com->uscsi_cdb	   = cdb;
28061 	com->uscsi_cdblen  = CDB_GROUP1;
28062 	com->uscsi_bufaddr = buffer;
28063 	com->uscsi_buflen  = 16;
28064 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28065 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28066 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28067 	if (rval != 0) {
28068 		kmem_free(buffer, 16);
28069 		kmem_free(com, sizeof (*com));
28070 		return (rval);
28071 	}
28072 
28073 	/* Process the returned Q sub-channel data */
28074 	subchnl->cdsc_audiostatus = buffer[1];
28075 	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
28076 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
28077 	subchnl->cdsc_trk	= buffer[6];
28078 	subchnl->cdsc_ind	= buffer[7];
28079 	if (subchnl->cdsc_format & CDROM_LBA) {
28080 		subchnl->cdsc_absaddr.lba =
28081 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28082 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28083 		subchnl->cdsc_reladdr.lba =
28084 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
28085 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
28086 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
28087 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
28088 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
28089 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
28090 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
28091 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
28092 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
28093 	} else {
28094 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
28095 		subchnl->cdsc_absaddr.msf.second = buffer[10];
28096 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
28097 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
28098 		subchnl->cdsc_reladdr.msf.second = buffer[14];
28099 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
28100 	}
28101 	kmem_free(buffer, 16);
28102 	kmem_free(com, sizeof (*com));
28103 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
28104 	    != 0) {
28105 		return (EFAULT);
28106 	}
28107 	return (rval);
28108 }
28109 
28110 
28111 /*
28112  *    Function: sr_read_tocentry()
28113  *
28114  * Description: This routine is the driver entry point for handling CD-ROM
28115  *		ioctl requests to read from the Table of Contents (TOC)
28116  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
28117  *		fields, the starting address (LBA or MSF format per the user)
28118  *		and the data mode if the user specified track is a data track.
28119  *
28120  *		Note: The READ HEADER (0x44) command used in this routine is
28121  *		obsolete per the SCSI MMC spec but still supported in the
28122  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
28123  *		therefore the command is still implemented in this routine.
28124  *
28125  *   Arguments: dev	- the device 'dev_t'
28126  *		data	- pointer to user provided toc entry structure,
28127  *			  specifying the track # and the address format
28128  *			  (LBA or MSF).
28129  *		flag	- this argument is a pass through to ddi_copyxxx()
28130  *		          directly from the mode argument of ioctl().
28131  *
28132  * Return Code: the code returned by sd_send_scsi_cmd()
28133  *		EFAULT if ddi_copyxxx() fails
28134  *		ENXIO if fail ddi_get_soft_state
28135  *		EINVAL if data pointer is NULL
28136  */
28137 
28138 static int
28139 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
28140 {
28141 	struct sd_lun		*un = NULL;
28142 	struct uscsi_cmd	*com;
28143 	struct cdrom_tocentry	toc_entry;
28144 	struct cdrom_tocentry	*entry = &toc_entry;
28145 	caddr_t			buffer;
28146 	int			rval;
28147 	char			cdb[CDB_GROUP1];
28148 
28149 	if (data == NULL) {
28150 		return (EINVAL);
28151 	}
28152 
28153 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28154 	    (un->un_state == SD_STATE_OFFLINE)) {
28155 		return (ENXIO);
28156 	}
28157 
28158 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
28159 		return (EFAULT);
28160 	}
28161 
28162 	/* Validate the requested track and address format */
28163 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
28164 		return (EINVAL);
28165 	}
28166 
28167 	if (entry->cdte_track == 0) {
28168 		return (EINVAL);
28169 	}
28170 
28171 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
28172 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28173 	bzero(cdb, CDB_GROUP1);
28174 
28175 	cdb[0] = SCMD_READ_TOC;
28176 	/* Set the MSF bit based on the user requested address format  */
28177 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
28178 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28179 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
28180 	} else {
28181 		cdb[6] = entry->cdte_track;
28182 	}
28183 
28184 	/*
28185 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
28186 	 * (4 byte TOC response header + 8 byte track descriptor)
28187 	 */
28188 	cdb[8] = 12;
28189 	com->uscsi_cdb	   = cdb;
28190 	com->uscsi_cdblen  = CDB_GROUP1;
28191 	com->uscsi_bufaddr = buffer;
28192 	com->uscsi_buflen  = 0x0C;
28193 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
28194 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28195 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28196 	if (rval != 0) {
28197 		kmem_free(buffer, 12);
28198 		kmem_free(com, sizeof (*com));
28199 		return (rval);
28200 	}
28201 
28202 	/* Process the toc entry */
28203 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
28204 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
28205 	if (entry->cdte_format & CDROM_LBA) {
28206 		entry->cdte_addr.lba =
28207 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28208 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28209 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
28210 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
28211 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
28212 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
28213 		/*
28214 		 * Send a READ TOC command using the LBA address format to get
28215 		 * the LBA for the track requested so it can be used in the
28216 		 * READ HEADER request
28217 		 *
28218 		 * Note: The MSF bit of the READ HEADER command specifies the
28219 		 * output format. The block address specified in that command
28220 		 * must be in LBA format.
28221 		 */
28222 		cdb[1] = 0;
28223 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28224 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28225 		if (rval != 0) {
28226 			kmem_free(buffer, 12);
28227 			kmem_free(com, sizeof (*com));
28228 			return (rval);
28229 		}
28230 	} else {
28231 		entry->cdte_addr.msf.minute	= buffer[9];
28232 		entry->cdte_addr.msf.second	= buffer[10];
28233 		entry->cdte_addr.msf.frame	= buffer[11];
28234 		/*
28235 		 * Send a READ TOC command using the LBA address format to get
28236 		 * the LBA for the track requested so it can be used in the
28237 		 * READ HEADER request
28238 		 *
28239 		 * Note: The MSF bit of the READ HEADER command specifies the
28240 		 * output format. The block address specified in that command
28241 		 * must be in LBA format.
28242 		 */
28243 		cdb[1] = 0;
28244 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28245 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28246 		if (rval != 0) {
28247 			kmem_free(buffer, 12);
28248 			kmem_free(com, sizeof (*com));
28249 			return (rval);
28250 		}
28251 	}
28252 
28253 	/*
28254 	 * Build and send the READ HEADER command to determine the data mode of
28255 	 * the user specified track.
28256 	 */
28257 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
28258 	    (entry->cdte_track != CDROM_LEADOUT)) {
28259 		bzero(cdb, CDB_GROUP1);
28260 		cdb[0] = SCMD_READ_HEADER;
28261 		cdb[2] = buffer[8];
28262 		cdb[3] = buffer[9];
28263 		cdb[4] = buffer[10];
28264 		cdb[5] = buffer[11];
28265 		cdb[8] = 0x08;
28266 		com->uscsi_buflen = 0x08;
28267 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28268 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28269 		if (rval == 0) {
28270 			entry->cdte_datamode = buffer[0];
28271 		} else {
28272 			/*
28273 			 * READ HEADER command failed, since this is
28274 			 * obsoleted in one spec, its better to return
28275 			 * -1 for an invlid track so that we can still
28276 			 * recieve the rest of the TOC data.
28277 			 */
28278 			entry->cdte_datamode = (uchar_t)-1;
28279 		}
28280 	} else {
28281 		entry->cdte_datamode = (uchar_t)-1;
28282 	}
28283 
28284 	kmem_free(buffer, 12);
28285 	kmem_free(com, sizeof (*com));
28286 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
28287 		return (EFAULT);
28288 
28289 	return (rval);
28290 }
28291 
28292 
28293 /*
28294  *    Function: sr_read_tochdr()
28295  *
28296  * Description: This routine is the driver entry point for handling CD-ROM
28297  * 		ioctl requests to read the Table of Contents (TOC) header
28298  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
28299  *		and ending track numbers
28300  *
28301  *   Arguments: dev	- the device 'dev_t'
28302  *		data	- pointer to user provided toc header structure,
28303  *			  specifying the starting and ending track numbers.
28304  *		flag	- this argument is a pass through to ddi_copyxxx()
28305  *			  directly from the mode argument of ioctl().
28306  *
28307  * Return Code: the code returned by sd_send_scsi_cmd()
28308  *		EFAULT if ddi_copyxxx() fails
28309  *		ENXIO if fail ddi_get_soft_state
28310  *		EINVAL if data pointer is NULL
28311  */
28312 
28313 static int
28314 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
28315 {
28316 	struct sd_lun		*un;
28317 	struct uscsi_cmd	*com;
28318 	struct cdrom_tochdr	toc_header;
28319 	struct cdrom_tochdr	*hdr = &toc_header;
28320 	char			cdb[CDB_GROUP1];
28321 	int			rval;
28322 	caddr_t			buffer;
28323 
28324 	if (data == NULL) {
28325 		return (EINVAL);
28326 	}
28327 
28328 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28329 	    (un->un_state == SD_STATE_OFFLINE)) {
28330 		return (ENXIO);
28331 	}
28332 
28333 	buffer = kmem_zalloc(4, KM_SLEEP);
28334 	bzero(cdb, CDB_GROUP1);
28335 	cdb[0] = SCMD_READ_TOC;
28336 	/*
28337 	 * Specifying a track number of 0x00 in the READ TOC command indicates
28338 	 * that the TOC header should be returned
28339 	 */
28340 	cdb[6] = 0x00;
28341 	/*
28342 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
28343 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
28344 	 */
28345 	cdb[8] = 0x04;
28346 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28347 	com->uscsi_cdb	   = cdb;
28348 	com->uscsi_cdblen  = CDB_GROUP1;
28349 	com->uscsi_bufaddr = buffer;
28350 	com->uscsi_buflen  = 0x04;
28351 	com->uscsi_timeout = 300;
28352 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28353 
28354 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28355 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28356 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28357 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
28358 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
28359 	} else {
28360 		hdr->cdth_trk0 = buffer[2];
28361 		hdr->cdth_trk1 = buffer[3];
28362 	}
28363 	kmem_free(buffer, 4);
28364 	kmem_free(com, sizeof (*com));
28365 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
28366 		return (EFAULT);
28367 	}
28368 	return (rval);
28369 }
28370 
28371 
28372 /*
28373  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
28374  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
28375  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
28376  * digital audio and extended architecture digital audio. These modes are
28377  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
28378  * MMC specs.
28379  *
28380  * In addition to support for the various data formats these routines also
28381  * include support for devices that implement only the direct access READ
28382  * commands (0x08, 0x28), devices that implement the READ_CD commands
28383  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
28384  * READ CDXA commands (0xD8, 0xDB)
28385  */
28386 
28387 /*
28388  *    Function: sr_read_mode1()
28389  *
28390  * Description: This routine is the driver entry point for handling CD-ROM
28391  *		ioctl read mode1 requests (CDROMREADMODE1).
28392  *
28393  *   Arguments: dev	- the device 'dev_t'
28394  *		data	- pointer to user provided cd read structure specifying
28395  *			  the lba buffer address and length.
28396  *		flag	- this argument is a pass through to ddi_copyxxx()
28397  *			  directly from the mode argument of ioctl().
28398  *
28399  * Return Code: the code returned by sd_send_scsi_cmd()
28400  *		EFAULT if ddi_copyxxx() fails
28401  *		ENXIO if fail ddi_get_soft_state
28402  *		EINVAL if data pointer is NULL
28403  */
28404 
28405 static int
28406 sr_read_mode1(dev_t dev, caddr_t data, int flag)
28407 {
28408 	struct sd_lun		*un;
28409 	struct cdrom_read	mode1_struct;
28410 	struct cdrom_read	*mode1 = &mode1_struct;
28411 	int			rval;
28412 #ifdef _MULTI_DATAMODEL
28413 	/* To support ILP32 applications in an LP64 world */
28414 	struct cdrom_read32	cdrom_read32;
28415 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28416 #endif /* _MULTI_DATAMODEL */
28417 
28418 	if (data == NULL) {
28419 		return (EINVAL);
28420 	}
28421 
28422 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28423 	    (un->un_state == SD_STATE_OFFLINE)) {
28424 		return (ENXIO);
28425 	}
28426 
28427 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28428 	    "sd_read_mode1: entry: un:0x%p\n", un);
28429 
28430 #ifdef _MULTI_DATAMODEL
28431 	switch (ddi_model_convert_from(flag & FMODELS)) {
28432 	case DDI_MODEL_ILP32:
28433 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28434 			return (EFAULT);
28435 		}
28436 		/* Convert the ILP32 uscsi data from the application to LP64 */
28437 		cdrom_read32tocdrom_read(cdrd32, mode1);
28438 		break;
28439 	case DDI_MODEL_NONE:
28440 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28441 			return (EFAULT);
28442 		}
28443 	}
28444 #else /* ! _MULTI_DATAMODEL */
28445 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28446 		return (EFAULT);
28447 	}
28448 #endif /* _MULTI_DATAMODEL */
28449 
28450 	rval = sd_send_scsi_READ(un, mode1->cdread_bufaddr,
28451 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
28452 
28453 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28454 	    "sd_read_mode1: exit: un:0x%p\n", un);
28455 
28456 	return (rval);
28457 }
28458 
28459 
28460 /*
28461  *    Function: sr_read_cd_mode2()
28462  *
28463  * Description: This routine is the driver entry point for handling CD-ROM
28464  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28465  *		support the READ CD (0xBE) command or the 1st generation
28466  *		READ CD (0xD4) command.
28467  *
28468  *   Arguments: dev	- the device 'dev_t'
28469  *		data	- pointer to user provided cd read structure specifying
28470  *			  the lba buffer address and length.
28471  *		flag	- this argument is a pass through to ddi_copyxxx()
28472  *			  directly from the mode argument of ioctl().
28473  *
28474  * Return Code: the code returned by sd_send_scsi_cmd()
28475  *		EFAULT if ddi_copyxxx() fails
28476  *		ENXIO if fail ddi_get_soft_state
28477  *		EINVAL if data pointer is NULL
28478  */
28479 
28480 static int
28481 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
28482 {
28483 	struct sd_lun		*un;
28484 	struct uscsi_cmd	*com;
28485 	struct cdrom_read	mode2_struct;
28486 	struct cdrom_read	*mode2 = &mode2_struct;
28487 	uchar_t			cdb[CDB_GROUP5];
28488 	int			nblocks;
28489 	int			rval;
28490 #ifdef _MULTI_DATAMODEL
28491 	/*  To support ILP32 applications in an LP64 world */
28492 	struct cdrom_read32	cdrom_read32;
28493 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28494 #endif /* _MULTI_DATAMODEL */
28495 
28496 	if (data == NULL) {
28497 		return (EINVAL);
28498 	}
28499 
28500 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28501 	    (un->un_state == SD_STATE_OFFLINE)) {
28502 		return (ENXIO);
28503 	}
28504 
28505 #ifdef _MULTI_DATAMODEL
28506 	switch (ddi_model_convert_from(flag & FMODELS)) {
28507 	case DDI_MODEL_ILP32:
28508 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28509 			return (EFAULT);
28510 		}
28511 		/* Convert the ILP32 uscsi data from the application to LP64 */
28512 		cdrom_read32tocdrom_read(cdrd32, mode2);
28513 		break;
28514 	case DDI_MODEL_NONE:
28515 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28516 			return (EFAULT);
28517 		}
28518 		break;
28519 	}
28520 
28521 #else /* ! _MULTI_DATAMODEL */
28522 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28523 		return (EFAULT);
28524 	}
28525 #endif /* _MULTI_DATAMODEL */
28526 
28527 	bzero(cdb, sizeof (cdb));
28528 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
28529 		/* Read command supported by 1st generation atapi drives */
28530 		cdb[0] = SCMD_READ_CDD4;
28531 	} else {
28532 		/* Universal CD Access Command */
28533 		cdb[0] = SCMD_READ_CD;
28534 	}
28535 
28536 	/*
28537 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
28538 	 */
28539 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
28540 
28541 	/* set the start address */
28542 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
28543 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
28544 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28545 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
28546 
28547 	/* set the transfer length */
28548 	nblocks = mode2->cdread_buflen / 2336;
28549 	cdb[6] = (uchar_t)(nblocks >> 16);
28550 	cdb[7] = (uchar_t)(nblocks >> 8);
28551 	cdb[8] = (uchar_t)nblocks;
28552 
28553 	/* set the filter bits */
28554 	cdb[9] = CDROM_READ_CD_USERDATA;
28555 
28556 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28557 	com->uscsi_cdb = (caddr_t)cdb;
28558 	com->uscsi_cdblen = sizeof (cdb);
28559 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28560 	com->uscsi_buflen = mode2->cdread_buflen;
28561 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28562 
28563 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28564 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28565 	kmem_free(com, sizeof (*com));
28566 	return (rval);
28567 }
28568 
28569 
28570 /*
28571  *    Function: sr_read_mode2()
28572  *
28573  * Description: This routine is the driver entry point for handling CD-ROM
28574  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28575  *		do not support the READ CD (0xBE) command.
28576  *
28577  *   Arguments: dev	- the device 'dev_t'
28578  *		data	- pointer to user provided cd read structure specifying
28579  *			  the lba buffer address and length.
28580  *		flag	- this argument is a pass through to ddi_copyxxx()
28581  *			  directly from the mode argument of ioctl().
28582  *
28583  * Return Code: the code returned by sd_send_scsi_cmd()
28584  *		EFAULT if ddi_copyxxx() fails
28585  *		ENXIO if fail ddi_get_soft_state
28586  *		EINVAL if data pointer is NULL
28587  *		EIO if fail to reset block size
28588  *		EAGAIN if commands are in progress in the driver
28589  */
28590 
28591 static int
28592 sr_read_mode2(dev_t dev, caddr_t data, int flag)
28593 {
28594 	struct sd_lun		*un;
28595 	struct cdrom_read	mode2_struct;
28596 	struct cdrom_read	*mode2 = &mode2_struct;
28597 	int			rval;
28598 	uint32_t		restore_blksize;
28599 	struct uscsi_cmd	*com;
28600 	uchar_t			cdb[CDB_GROUP0];
28601 	int			nblocks;
28602 
28603 #ifdef _MULTI_DATAMODEL
28604 	/* To support ILP32 applications in an LP64 world */
28605 	struct cdrom_read32	cdrom_read32;
28606 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28607 #endif /* _MULTI_DATAMODEL */
28608 
28609 	if (data == NULL) {
28610 		return (EINVAL);
28611 	}
28612 
28613 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28614 	    (un->un_state == SD_STATE_OFFLINE)) {
28615 		return (ENXIO);
28616 	}
28617 
28618 	/*
28619 	 * Because this routine will update the device and driver block size
28620 	 * being used we want to make sure there are no commands in progress.
28621 	 * If commands are in progress the user will have to try again.
28622 	 *
28623 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
28624 	 * in sdioctl to protect commands from sdioctl through to the top of
28625 	 * sd_uscsi_strategy. See sdioctl for details.
28626 	 */
28627 	mutex_enter(SD_MUTEX(un));
28628 	if (un->un_ncmds_in_driver != 1) {
28629 		mutex_exit(SD_MUTEX(un));
28630 		return (EAGAIN);
28631 	}
28632 	mutex_exit(SD_MUTEX(un));
28633 
28634 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28635 	    "sd_read_mode2: entry: un:0x%p\n", un);
28636 
28637 #ifdef _MULTI_DATAMODEL
28638 	switch (ddi_model_convert_from(flag & FMODELS)) {
28639 	case DDI_MODEL_ILP32:
28640 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28641 			return (EFAULT);
28642 		}
28643 		/* Convert the ILP32 uscsi data from the application to LP64 */
28644 		cdrom_read32tocdrom_read(cdrd32, mode2);
28645 		break;
28646 	case DDI_MODEL_NONE:
28647 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28648 			return (EFAULT);
28649 		}
28650 		break;
28651 	}
28652 #else /* ! _MULTI_DATAMODEL */
28653 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
28654 		return (EFAULT);
28655 	}
28656 #endif /* _MULTI_DATAMODEL */
28657 
28658 	/* Store the current target block size for restoration later */
28659 	restore_blksize = un->un_tgt_blocksize;
28660 
28661 	/* Change the device and soft state target block size to 2336 */
28662 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
28663 		rval = EIO;
28664 		goto done;
28665 	}
28666 
28667 
28668 	bzero(cdb, sizeof (cdb));
28669 
28670 	/* set READ operation */
28671 	cdb[0] = SCMD_READ;
28672 
28673 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
28674 	mode2->cdread_lba >>= 2;
28675 
28676 	/* set the start address */
28677 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
28678 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28679 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
28680 
28681 	/* set the transfer length */
28682 	nblocks = mode2->cdread_buflen / 2336;
28683 	cdb[4] = (uchar_t)nblocks & 0xFF;
28684 
28685 	/* build command */
28686 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28687 	com->uscsi_cdb = (caddr_t)cdb;
28688 	com->uscsi_cdblen = sizeof (cdb);
28689 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28690 	com->uscsi_buflen = mode2->cdread_buflen;
28691 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28692 
28693 	/*
28694 	 * Issue SCSI command with user space address for read buffer.
28695 	 *
28696 	 * This sends the command through main channel in the driver.
28697 	 *
28698 	 * Since this is accessed via an IOCTL call, we go through the
28699 	 * standard path, so that if the device was powered down, then
28700 	 * it would be 'awakened' to handle the command.
28701 	 */
28702 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28703 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28704 
28705 	kmem_free(com, sizeof (*com));
28706 
28707 	/* Restore the device and soft state target block size */
28708 	if (sr_sector_mode(dev, restore_blksize) != 0) {
28709 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28710 		    "can't do switch back to mode 1\n");
28711 		/*
28712 		 * If sd_send_scsi_READ succeeded we still need to report
28713 		 * an error because we failed to reset the block size
28714 		 */
28715 		if (rval == 0) {
28716 			rval = EIO;
28717 		}
28718 	}
28719 
28720 done:
28721 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28722 	    "sd_read_mode2: exit: un:0x%p\n", un);
28723 
28724 	return (rval);
28725 }
28726 
28727 
28728 /*
28729  *    Function: sr_sector_mode()
28730  *
28731  * Description: This utility function is used by sr_read_mode2 to set the target
28732  *		block size based on the user specified size. This is a legacy
28733  *		implementation based upon a vendor specific mode page
28734  *
28735  *   Arguments: dev	- the device 'dev_t'
28736  *		data	- flag indicating if block size is being set to 2336 or
28737  *			  512.
28738  *
28739  * Return Code: the code returned by sd_send_scsi_cmd()
28740  *		EFAULT if ddi_copyxxx() fails
28741  *		ENXIO if fail ddi_get_soft_state
28742  *		EINVAL if data pointer is NULL
28743  */
28744 
28745 static int
28746 sr_sector_mode(dev_t dev, uint32_t blksize)
28747 {
28748 	struct sd_lun	*un;
28749 	uchar_t		*sense;
28750 	uchar_t		*select;
28751 	int		rval;
28752 
28753 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28754 	    (un->un_state == SD_STATE_OFFLINE)) {
28755 		return (ENXIO);
28756 	}
28757 
28758 	sense = kmem_zalloc(20, KM_SLEEP);
28759 
28760 	/* Note: This is a vendor specific mode page (0x81) */
28761 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, 20, 0x81,
28762 	    SD_PATH_STANDARD)) != 0) {
28763 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28764 		    "sr_sector_mode: Mode Sense failed\n");
28765 		kmem_free(sense, 20);
28766 		return (rval);
28767 	}
28768 	select = kmem_zalloc(20, KM_SLEEP);
28769 	select[3] = 0x08;
28770 	select[10] = ((blksize >> 8) & 0xff);
28771 	select[11] = (blksize & 0xff);
28772 	select[12] = 0x01;
28773 	select[13] = 0x06;
28774 	select[14] = sense[14];
28775 	select[15] = sense[15];
28776 	if (blksize == SD_MODE2_BLKSIZE) {
28777 		select[14] |= 0x01;
28778 	}
28779 
28780 	if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select, 20,
28781 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
28782 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28783 		    "sr_sector_mode: Mode Select failed\n");
28784 	} else {
28785 		/*
28786 		 * Only update the softstate block size if we successfully
28787 		 * changed the device block mode.
28788 		 */
28789 		mutex_enter(SD_MUTEX(un));
28790 		sd_update_block_info(un, blksize, 0);
28791 		mutex_exit(SD_MUTEX(un));
28792 	}
28793 	kmem_free(sense, 20);
28794 	kmem_free(select, 20);
28795 	return (rval);
28796 }
28797 
28798 
28799 /*
28800  *    Function: sr_read_cdda()
28801  *
28802  * Description: This routine is the driver entry point for handling CD-ROM
28803  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
28804  *		the target supports CDDA these requests are handled via a vendor
28805  *		specific command (0xD8) If the target does not support CDDA
28806  *		these requests are handled via the READ CD command (0xBE).
28807  *
28808  *   Arguments: dev	- the device 'dev_t'
28809  *		data	- pointer to user provided CD-DA structure specifying
28810  *			  the track starting address, transfer length, and
28811  *			  subcode options.
28812  *		flag	- this argument is a pass through to ddi_copyxxx()
28813  *			  directly from the mode argument of ioctl().
28814  *
28815  * Return Code: the code returned by sd_send_scsi_cmd()
28816  *		EFAULT if ddi_copyxxx() fails
28817  *		ENXIO if fail ddi_get_soft_state
28818  *		EINVAL if invalid arguments are provided
28819  *		ENOTTY
28820  */
28821 
28822 static int
28823 sr_read_cdda(dev_t dev, caddr_t data, int flag)
28824 {
28825 	struct sd_lun			*un;
28826 	struct uscsi_cmd		*com;
28827 	struct cdrom_cdda		*cdda;
28828 	int				rval;
28829 	size_t				buflen;
28830 	char				cdb[CDB_GROUP5];
28831 
28832 #ifdef _MULTI_DATAMODEL
28833 	/* To support ILP32 applications in an LP64 world */
28834 	struct cdrom_cdda32	cdrom_cdda32;
28835 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
28836 #endif /* _MULTI_DATAMODEL */
28837 
28838 	if (data == NULL) {
28839 		return (EINVAL);
28840 	}
28841 
28842 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28843 		return (ENXIO);
28844 	}
28845 
28846 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
28847 
28848 #ifdef _MULTI_DATAMODEL
28849 	switch (ddi_model_convert_from(flag & FMODELS)) {
28850 	case DDI_MODEL_ILP32:
28851 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
28852 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28853 			    "sr_read_cdda: ddi_copyin Failed\n");
28854 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28855 			return (EFAULT);
28856 		}
28857 		/* Convert the ILP32 uscsi data from the application to LP64 */
28858 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
28859 		break;
28860 	case DDI_MODEL_NONE:
28861 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28862 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28863 			    "sr_read_cdda: ddi_copyin Failed\n");
28864 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28865 			return (EFAULT);
28866 		}
28867 		break;
28868 	}
28869 #else /* ! _MULTI_DATAMODEL */
28870 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28871 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28872 		    "sr_read_cdda: ddi_copyin Failed\n");
28873 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28874 		return (EFAULT);
28875 	}
28876 #endif /* _MULTI_DATAMODEL */
28877 
28878 	/*
28879 	 * Since MMC-2 expects max 3 bytes for length, check if the
28880 	 * length input is greater than 3 bytes
28881 	 */
28882 	if ((cdda->cdda_length & 0xFF000000) != 0) {
28883 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
28884 		    "cdrom transfer length too large: %d (limit %d)\n",
28885 		    cdda->cdda_length, 0xFFFFFF);
28886 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28887 		return (EINVAL);
28888 	}
28889 
28890 	switch (cdda->cdda_subcode) {
28891 	case CDROM_DA_NO_SUBCODE:
28892 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
28893 		break;
28894 	case CDROM_DA_SUBQ:
28895 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
28896 		break;
28897 	case CDROM_DA_ALL_SUBCODE:
28898 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
28899 		break;
28900 	case CDROM_DA_SUBCODE_ONLY:
28901 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
28902 		break;
28903 	default:
28904 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28905 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
28906 		    cdda->cdda_subcode);
28907 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28908 		return (EINVAL);
28909 	}
28910 
28911 	/* Build and send the command */
28912 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28913 	bzero(cdb, CDB_GROUP5);
28914 
28915 	if (un->un_f_cfg_cdda == TRUE) {
28916 		cdb[0] = (char)SCMD_READ_CD;
28917 		cdb[1] = 0x04;
28918 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28919 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28920 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28921 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28922 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28923 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28924 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
28925 		cdb[9] = 0x10;
28926 		switch (cdda->cdda_subcode) {
28927 		case CDROM_DA_NO_SUBCODE :
28928 			cdb[10] = 0x0;
28929 			break;
28930 		case CDROM_DA_SUBQ :
28931 			cdb[10] = 0x2;
28932 			break;
28933 		case CDROM_DA_ALL_SUBCODE :
28934 			cdb[10] = 0x1;
28935 			break;
28936 		case CDROM_DA_SUBCODE_ONLY :
28937 			/* FALLTHROUGH */
28938 		default :
28939 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28940 			kmem_free(com, sizeof (*com));
28941 			return (ENOTTY);
28942 		}
28943 	} else {
28944 		cdb[0] = (char)SCMD_READ_CDDA;
28945 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28946 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28947 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28948 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28949 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
28950 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28951 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28952 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
28953 		cdb[10] = cdda->cdda_subcode;
28954 	}
28955 
28956 	com->uscsi_cdb = cdb;
28957 	com->uscsi_cdblen = CDB_GROUP5;
28958 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
28959 	com->uscsi_buflen = buflen;
28960 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28961 
28962 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28963 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28964 
28965 	kmem_free(cdda, sizeof (struct cdrom_cdda));
28966 	kmem_free(com, sizeof (*com));
28967 	return (rval);
28968 }
28969 
28970 
28971 /*
28972  *    Function: sr_read_cdxa()
28973  *
28974  * Description: This routine is the driver entry point for handling CD-ROM
28975  *		ioctl requests to return CD-XA (Extended Architecture) data.
28976  *		(CDROMCDXA).
28977  *
28978  *   Arguments: dev	- the device 'dev_t'
28979  *		data	- pointer to user provided CD-XA structure specifying
28980  *			  the data starting address, transfer length, and format
28981  *		flag	- this argument is a pass through to ddi_copyxxx()
28982  *			  directly from the mode argument of ioctl().
28983  *
28984  * Return Code: the code returned by sd_send_scsi_cmd()
28985  *		EFAULT if ddi_copyxxx() fails
28986  *		ENXIO if fail ddi_get_soft_state
28987  *		EINVAL if data pointer is NULL
28988  */
28989 
28990 static int
28991 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
28992 {
28993 	struct sd_lun		*un;
28994 	struct uscsi_cmd	*com;
28995 	struct cdrom_cdxa	*cdxa;
28996 	int			rval;
28997 	size_t			buflen;
28998 	char			cdb[CDB_GROUP5];
28999 	uchar_t			read_flags;
29000 
29001 #ifdef _MULTI_DATAMODEL
29002 	/* To support ILP32 applications in an LP64 world */
29003 	struct cdrom_cdxa32		cdrom_cdxa32;
29004 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
29005 #endif /* _MULTI_DATAMODEL */
29006 
29007 	if (data == NULL) {
29008 		return (EINVAL);
29009 	}
29010 
29011 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29012 		return (ENXIO);
29013 	}
29014 
29015 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
29016 
29017 #ifdef _MULTI_DATAMODEL
29018 	switch (ddi_model_convert_from(flag & FMODELS)) {
29019 	case DDI_MODEL_ILP32:
29020 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
29021 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29022 			return (EFAULT);
29023 		}
29024 		/*
29025 		 * Convert the ILP32 uscsi data from the
29026 		 * application to LP64 for internal use.
29027 		 */
29028 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
29029 		break;
29030 	case DDI_MODEL_NONE:
29031 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
29032 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29033 			return (EFAULT);
29034 		}
29035 		break;
29036 	}
29037 #else /* ! _MULTI_DATAMODEL */
29038 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
29039 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29040 		return (EFAULT);
29041 	}
29042 #endif /* _MULTI_DATAMODEL */
29043 
29044 	/*
29045 	 * Since MMC-2 expects max 3 bytes for length, check if the
29046 	 * length input is greater than 3 bytes
29047 	 */
29048 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
29049 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
29050 		    "cdrom transfer length too large: %d (limit %d)\n",
29051 		    cdxa->cdxa_length, 0xFFFFFF);
29052 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29053 		return (EINVAL);
29054 	}
29055 
29056 	switch (cdxa->cdxa_format) {
29057 	case CDROM_XA_DATA:
29058 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
29059 		read_flags = 0x10;
29060 		break;
29061 	case CDROM_XA_SECTOR_DATA:
29062 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
29063 		read_flags = 0xf8;
29064 		break;
29065 	case CDROM_XA_DATA_W_ERROR:
29066 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
29067 		read_flags = 0xfc;
29068 		break;
29069 	default:
29070 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29071 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
29072 		    cdxa->cdxa_format);
29073 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29074 		return (EINVAL);
29075 	}
29076 
29077 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29078 	bzero(cdb, CDB_GROUP5);
29079 	if (un->un_f_mmc_cap == TRUE) {
29080 		cdb[0] = (char)SCMD_READ_CD;
29081 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
29082 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
29083 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
29084 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
29085 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
29086 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
29087 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
29088 		cdb[9] = (char)read_flags;
29089 	} else {
29090 		/*
29091 		 * Note: A vendor specific command (0xDB) is being used her to
29092 		 * request a read of all subcodes.
29093 		 */
29094 		cdb[0] = (char)SCMD_READ_CDXA;
29095 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
29096 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
29097 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
29098 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
29099 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
29100 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
29101 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
29102 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
29103 		cdb[10] = cdxa->cdxa_format;
29104 	}
29105 	com->uscsi_cdb	   = cdb;
29106 	com->uscsi_cdblen  = CDB_GROUP5;
29107 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
29108 	com->uscsi_buflen  = buflen;
29109 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29110 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
29111 	    UIO_SYSSPACE, SD_PATH_STANDARD);
29112 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29113 	kmem_free(com, sizeof (*com));
29114 	return (rval);
29115 }
29116 
29117 
29118 /*
29119  *    Function: sr_eject()
29120  *
29121  * Description: This routine is the driver entry point for handling CD-ROM
29122  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
29123  *
29124  *   Arguments: dev	- the device 'dev_t'
29125  *
29126  * Return Code: the code returned by sd_send_scsi_cmd()
29127  */
29128 
29129 static int
29130 sr_eject(dev_t dev)
29131 {
29132 	struct sd_lun	*un;
29133 	int		rval;
29134 
29135 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29136 	    (un->un_state == SD_STATE_OFFLINE)) {
29137 		return (ENXIO);
29138 	}
29139 	if ((rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
29140 	    SD_PATH_STANDARD)) != 0) {
29141 		return (rval);
29142 	}
29143 
29144 	rval = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_EJECT,
29145 	    SD_PATH_STANDARD);
29146 
29147 	if (rval == 0) {
29148 		mutex_enter(SD_MUTEX(un));
29149 		sr_ejected(un);
29150 		un->un_mediastate = DKIO_EJECTED;
29151 		cv_broadcast(&un->un_state_cv);
29152 		mutex_exit(SD_MUTEX(un));
29153 	}
29154 	return (rval);
29155 }
29156 
29157 
29158 /*
29159  *    Function: sr_ejected()
29160  *
29161  * Description: This routine updates the soft state structure to invalidate the
29162  *		geometry information after the media has been ejected or a
29163  *		media eject has been detected.
29164  *
29165  *   Arguments: un - driver soft state (unit) structure
29166  */
29167 
29168 static void
29169 sr_ejected(struct sd_lun *un)
29170 {
29171 	struct sd_errstats *stp;
29172 
29173 	ASSERT(un != NULL);
29174 	ASSERT(mutex_owned(SD_MUTEX(un)));
29175 
29176 	un->un_f_blockcount_is_valid	= FALSE;
29177 	un->un_f_tgt_blocksize_is_valid	= FALSE;
29178 	un->un_f_geometry_is_valid	= FALSE;
29179 
29180 	if (un->un_errstats != NULL) {
29181 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
29182 		stp->sd_capacity.value.ui64 = 0;
29183 	}
29184 }
29185 
29186 
29187 /*
29188  *    Function: sr_check_wp()
29189  *
29190  * Description: This routine checks the write protection of a removable
29191  *      media disk and hotpluggable devices via the write protect bit of
29192  *      the Mode Page Header device specific field. Some devices choke
29193  *      on unsupported mode page. In order to workaround this issue,
29194  *      this routine has been implemented to use 0x3f mode page(request
29195  *      for all pages) for all device types.
29196  *
29197  *   Arguments: dev		- the device 'dev_t'
29198  *
29199  * Return Code: int indicating if the device is write protected (1) or not (0)
29200  *
29201  *     Context: Kernel thread.
29202  *
29203  */
29204 
29205 static int
29206 sr_check_wp(dev_t dev)
29207 {
29208 	struct sd_lun	*un;
29209 	uchar_t		device_specific;
29210 	uchar_t		*sense;
29211 	int		hdrlen;
29212 	int		rval = FALSE;
29213 
29214 	/*
29215 	 * Note: The return codes for this routine should be reworked to
29216 	 * properly handle the case of a NULL softstate.
29217 	 */
29218 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29219 		return (FALSE);
29220 	}
29221 
29222 	if (un->un_f_cfg_is_atapi == TRUE) {
29223 		/*
29224 		 * The mode page contents are not required; set the allocation
29225 		 * length for the mode page header only
29226 		 */
29227 		hdrlen = MODE_HEADER_LENGTH_GRP2;
29228 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29229 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense, hdrlen,
29230 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
29231 			goto err_exit;
29232 		device_specific =
29233 		    ((struct mode_header_grp2 *)sense)->device_specific;
29234 	} else {
29235 		hdrlen = MODE_HEADER_LENGTH;
29236 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29237 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, hdrlen,
29238 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
29239 			goto err_exit;
29240 		device_specific =
29241 		    ((struct mode_header *)sense)->device_specific;
29242 	}
29243 
29244 	/*
29245 	 * Write protect mode sense failed; not all disks
29246 	 * understand this query. Return FALSE assuming that
29247 	 * these devices are not writable.
29248 	 */
29249 	if (device_specific & WRITE_PROTECT) {
29250 		rval = TRUE;
29251 	}
29252 
29253 err_exit:
29254 	kmem_free(sense, hdrlen);
29255 	return (rval);
29256 }
29257 
29258 /*
29259  *    Function: sr_volume_ctrl()
29260  *
29261  * Description: This routine is the driver entry point for handling CD-ROM
29262  *		audio output volume ioctl requests. (CDROMVOLCTRL)
29263  *
29264  *   Arguments: dev	- the device 'dev_t'
29265  *		data	- pointer to user audio volume control structure
29266  *		flag	- this argument is a pass through to ddi_copyxxx()
29267  *			  directly from the mode argument of ioctl().
29268  *
29269  * Return Code: the code returned by sd_send_scsi_cmd()
29270  *		EFAULT if ddi_copyxxx() fails
29271  *		ENXIO if fail ddi_get_soft_state
29272  *		EINVAL if data pointer is NULL
29273  *
29274  */
29275 
29276 static int
29277 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
29278 {
29279 	struct sd_lun		*un;
29280 	struct cdrom_volctrl    volume;
29281 	struct cdrom_volctrl    *vol = &volume;
29282 	uchar_t			*sense_page;
29283 	uchar_t			*select_page;
29284 	uchar_t			*sense;
29285 	uchar_t			*select;
29286 	int			sense_buflen;
29287 	int			select_buflen;
29288 	int			rval;
29289 
29290 	if (data == NULL) {
29291 		return (EINVAL);
29292 	}
29293 
29294 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29295 	    (un->un_state == SD_STATE_OFFLINE)) {
29296 		return (ENXIO);
29297 	}
29298 
29299 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
29300 		return (EFAULT);
29301 	}
29302 
29303 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29304 		struct mode_header_grp2		*sense_mhp;
29305 		struct mode_header_grp2		*select_mhp;
29306 		int				bd_len;
29307 
29308 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
29309 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
29310 		    MODEPAGE_AUDIO_CTRL_LEN;
29311 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29312 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29313 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
29314 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29315 		    SD_PATH_STANDARD)) != 0) {
29316 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
29317 			    "sr_volume_ctrl: Mode Sense Failed\n");
29318 			kmem_free(sense, sense_buflen);
29319 			kmem_free(select, select_buflen);
29320 			return (rval);
29321 		}
29322 		sense_mhp = (struct mode_header_grp2 *)sense;
29323 		select_mhp = (struct mode_header_grp2 *)select;
29324 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
29325 		    sense_mhp->bdesc_length_lo;
29326 		if (bd_len > MODE_BLK_DESC_LENGTH) {
29327 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29328 			    "sr_volume_ctrl: Mode Sense returned invalid "
29329 			    "block descriptor length\n");
29330 			kmem_free(sense, sense_buflen);
29331 			kmem_free(select, select_buflen);
29332 			return (EIO);
29333 		}
29334 		sense_page = (uchar_t *)
29335 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
29336 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
29337 		select_mhp->length_msb = 0;
29338 		select_mhp->length_lsb = 0;
29339 		select_mhp->bdesc_length_hi = 0;
29340 		select_mhp->bdesc_length_lo = 0;
29341 	} else {
29342 		struct mode_header		*sense_mhp, *select_mhp;
29343 
29344 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29345 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29346 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29347 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29348 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
29349 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29350 		    SD_PATH_STANDARD)) != 0) {
29351 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29352 			    "sr_volume_ctrl: Mode Sense Failed\n");
29353 			kmem_free(sense, sense_buflen);
29354 			kmem_free(select, select_buflen);
29355 			return (rval);
29356 		}
29357 		sense_mhp  = (struct mode_header *)sense;
29358 		select_mhp = (struct mode_header *)select;
29359 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
29360 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29361 			    "sr_volume_ctrl: Mode Sense returned invalid "
29362 			    "block descriptor length\n");
29363 			kmem_free(sense, sense_buflen);
29364 			kmem_free(select, select_buflen);
29365 			return (EIO);
29366 		}
29367 		sense_page = (uchar_t *)
29368 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
29369 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
29370 		select_mhp->length = 0;
29371 		select_mhp->bdesc_length = 0;
29372 	}
29373 	/*
29374 	 * Note: An audio control data structure could be created and overlayed
29375 	 * on the following in place of the array indexing method implemented.
29376 	 */
29377 
29378 	/* Build the select data for the user volume data */
29379 	select_page[0] = MODEPAGE_AUDIO_CTRL;
29380 	select_page[1] = 0xE;
29381 	/* Set the immediate bit */
29382 	select_page[2] = 0x04;
29383 	/* Zero out reserved fields */
29384 	select_page[3] = 0x00;
29385 	select_page[4] = 0x00;
29386 	/* Return sense data for fields not to be modified */
29387 	select_page[5] = sense_page[5];
29388 	select_page[6] = sense_page[6];
29389 	select_page[7] = sense_page[7];
29390 	/* Set the user specified volume levels for channel 0 and 1 */
29391 	select_page[8] = 0x01;
29392 	select_page[9] = vol->channel0;
29393 	select_page[10] = 0x02;
29394 	select_page[11] = vol->channel1;
29395 	/* Channel 2 and 3 are currently unsupported so return the sense data */
29396 	select_page[12] = sense_page[12];
29397 	select_page[13] = sense_page[13];
29398 	select_page[14] = sense_page[14];
29399 	select_page[15] = sense_page[15];
29400 
29401 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29402 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, select,
29403 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29404 	} else {
29405 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
29406 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29407 	}
29408 
29409 	kmem_free(sense, sense_buflen);
29410 	kmem_free(select, select_buflen);
29411 	return (rval);
29412 }
29413 
29414 
29415 /*
29416  *    Function: sr_read_sony_session_offset()
29417  *
29418  * Description: This routine is the driver entry point for handling CD-ROM
29419  *		ioctl requests for session offset information. (CDROMREADOFFSET)
29420  *		The address of the first track in the last session of a
29421  *		multi-session CD-ROM is returned
29422  *
29423  *		Note: This routine uses a vendor specific key value in the
29424  *		command control field without implementing any vendor check here
29425  *		or in the ioctl routine.
29426  *
29427  *   Arguments: dev	- the device 'dev_t'
29428  *		data	- pointer to an int to hold the requested address
29429  *		flag	- this argument is a pass through to ddi_copyxxx()
29430  *			  directly from the mode argument of ioctl().
29431  *
29432  * Return Code: the code returned by sd_send_scsi_cmd()
29433  *		EFAULT if ddi_copyxxx() fails
29434  *		ENXIO if fail ddi_get_soft_state
29435  *		EINVAL if data pointer is NULL
29436  */
29437 
29438 static int
29439 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
29440 {
29441 	struct sd_lun		*un;
29442 	struct uscsi_cmd	*com;
29443 	caddr_t			buffer;
29444 	char			cdb[CDB_GROUP1];
29445 	int			session_offset = 0;
29446 	int			rval;
29447 
29448 	if (data == NULL) {
29449 		return (EINVAL);
29450 	}
29451 
29452 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29453 	    (un->un_state == SD_STATE_OFFLINE)) {
29454 		return (ENXIO);
29455 	}
29456 
29457 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
29458 	bzero(cdb, CDB_GROUP1);
29459 	cdb[0] = SCMD_READ_TOC;
29460 	/*
29461 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
29462 	 * (4 byte TOC response header + 8 byte response data)
29463 	 */
29464 	cdb[8] = SONY_SESSION_OFFSET_LEN;
29465 	/* Byte 9 is the control byte. A vendor specific value is used */
29466 	cdb[9] = SONY_SESSION_OFFSET_KEY;
29467 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29468 	com->uscsi_cdb = cdb;
29469 	com->uscsi_cdblen = CDB_GROUP1;
29470 	com->uscsi_bufaddr = buffer;
29471 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
29472 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29473 
29474 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
29475 	    UIO_SYSSPACE, SD_PATH_STANDARD);
29476 	if (rval != 0) {
29477 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29478 		kmem_free(com, sizeof (*com));
29479 		return (rval);
29480 	}
29481 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
29482 		session_offset =
29483 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
29484 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
29485 		/*
29486 		 * Offset returned offset in current lbasize block's. Convert to
29487 		 * 2k block's to return to the user
29488 		 */
29489 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
29490 			session_offset >>= 2;
29491 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
29492 			session_offset >>= 1;
29493 		}
29494 	}
29495 
29496 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
29497 		rval = EFAULT;
29498 	}
29499 
29500 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29501 	kmem_free(com, sizeof (*com));
29502 	return (rval);
29503 }
29504 
29505 
29506 /*
29507  *    Function: sd_wm_cache_constructor()
29508  *
29509  * Description: Cache Constructor for the wmap cache for the read/modify/write
29510  * 		devices.
29511  *
29512  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29513  *		un	- sd_lun structure for the device.
29514  *		flag	- the km flags passed to constructor
29515  *
29516  * Return Code: 0 on success.
29517  *		-1 on failure.
29518  */
29519 
29520 /*ARGSUSED*/
29521 static int
29522 sd_wm_cache_constructor(void *wm, void *un, int flags)
29523 {
29524 	bzero(wm, sizeof (struct sd_w_map));
29525 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
29526 	return (0);
29527 }
29528 
29529 
29530 /*
29531  *    Function: sd_wm_cache_destructor()
29532  *
29533  * Description: Cache destructor for the wmap cache for the read/modify/write
29534  * 		devices.
29535  *
29536  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29537  *		un	- sd_lun structure for the device.
29538  */
29539 /*ARGSUSED*/
29540 static void
29541 sd_wm_cache_destructor(void *wm, void *un)
29542 {
29543 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
29544 }
29545 
29546 
29547 /*
29548  *    Function: sd_range_lock()
29549  *
29550  * Description: Lock the range of blocks specified as parameter to ensure
29551  *		that read, modify write is atomic and no other i/o writes
29552  *		to the same location. The range is specified in terms
29553  *		of start and end blocks. Block numbers are the actual
29554  *		media block numbers and not system.
29555  *
29556  *   Arguments: un	- sd_lun structure for the device.
29557  *		startb - The starting block number
29558  *		endb - The end block number
29559  *		typ - type of i/o - simple/read_modify_write
29560  *
29561  * Return Code: wm  - pointer to the wmap structure.
29562  *
29563  *     Context: This routine can sleep.
29564  */
29565 
29566 static struct sd_w_map *
29567 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
29568 {
29569 	struct sd_w_map *wmp = NULL;
29570 	struct sd_w_map *sl_wmp = NULL;
29571 	struct sd_w_map *tmp_wmp;
29572 	wm_state state = SD_WM_CHK_LIST;
29573 
29574 
29575 	ASSERT(un != NULL);
29576 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29577 
29578 	mutex_enter(SD_MUTEX(un));
29579 
29580 	while (state != SD_WM_DONE) {
29581 
29582 		switch (state) {
29583 		case SD_WM_CHK_LIST:
29584 			/*
29585 			 * This is the starting state. Check the wmap list
29586 			 * to see if the range is currently available.
29587 			 */
29588 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
29589 				/*
29590 				 * If this is a simple write and no rmw
29591 				 * i/o is pending then try to lock the
29592 				 * range as the range should be available.
29593 				 */
29594 				state = SD_WM_LOCK_RANGE;
29595 			} else {
29596 				tmp_wmp = sd_get_range(un, startb, endb);
29597 				if (tmp_wmp != NULL) {
29598 					if ((wmp != NULL) && ONLIST(un, wmp)) {
29599 						/*
29600 						 * Should not keep onlist wmps
29601 						 * while waiting this macro
29602 						 * will also do wmp = NULL;
29603 						 */
29604 						FREE_ONLIST_WMAP(un, wmp);
29605 					}
29606 					/*
29607 					 * sl_wmp is the wmap on which wait
29608 					 * is done, since the tmp_wmp points
29609 					 * to the inuse wmap, set sl_wmp to
29610 					 * tmp_wmp and change the state to sleep
29611 					 */
29612 					sl_wmp = tmp_wmp;
29613 					state = SD_WM_WAIT_MAP;
29614 				} else {
29615 					state = SD_WM_LOCK_RANGE;
29616 				}
29617 
29618 			}
29619 			break;
29620 
29621 		case SD_WM_LOCK_RANGE:
29622 			ASSERT(un->un_wm_cache);
29623 			/*
29624 			 * The range need to be locked, try to get a wmap.
29625 			 * First attempt it with NO_SLEEP, want to avoid a sleep
29626 			 * if possible as we will have to release the sd mutex
29627 			 * if we have to sleep.
29628 			 */
29629 			if (wmp == NULL)
29630 				wmp = kmem_cache_alloc(un->un_wm_cache,
29631 				    KM_NOSLEEP);
29632 			if (wmp == NULL) {
29633 				mutex_exit(SD_MUTEX(un));
29634 				_NOTE(DATA_READABLE_WITHOUT_LOCK
29635 				    (sd_lun::un_wm_cache))
29636 				wmp = kmem_cache_alloc(un->un_wm_cache,
29637 				    KM_SLEEP);
29638 				mutex_enter(SD_MUTEX(un));
29639 				/*
29640 				 * we released the mutex so recheck and go to
29641 				 * check list state.
29642 				 */
29643 				state = SD_WM_CHK_LIST;
29644 			} else {
29645 				/*
29646 				 * We exit out of state machine since we
29647 				 * have the wmap. Do the housekeeping first.
29648 				 * place the wmap on the wmap list if it is not
29649 				 * on it already and then set the state to done.
29650 				 */
29651 				wmp->wm_start = startb;
29652 				wmp->wm_end = endb;
29653 				wmp->wm_flags = typ | SD_WM_BUSY;
29654 				if (typ & SD_WTYPE_RMW) {
29655 					un->un_rmw_count++;
29656 				}
29657 				/*
29658 				 * If not already on the list then link
29659 				 */
29660 				if (!ONLIST(un, wmp)) {
29661 					wmp->wm_next = un->un_wm;
29662 					wmp->wm_prev = NULL;
29663 					if (wmp->wm_next)
29664 						wmp->wm_next->wm_prev = wmp;
29665 					un->un_wm = wmp;
29666 				}
29667 				state = SD_WM_DONE;
29668 			}
29669 			break;
29670 
29671 		case SD_WM_WAIT_MAP:
29672 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
29673 			/*
29674 			 * Wait is done on sl_wmp, which is set in the
29675 			 * check_list state.
29676 			 */
29677 			sl_wmp->wm_wanted_count++;
29678 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
29679 			sl_wmp->wm_wanted_count--;
29680 			/*
29681 			 * We can reuse the memory from the completed sl_wmp
29682 			 * lock range for our new lock, but only if noone is
29683 			 * waiting for it.
29684 			 */
29685 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
29686 			if (sl_wmp->wm_wanted_count == 0) {
29687 				if (wmp != NULL)
29688 					CHK_N_FREEWMP(un, wmp);
29689 				wmp = sl_wmp;
29690 			}
29691 			sl_wmp = NULL;
29692 			/*
29693 			 * After waking up, need to recheck for availability of
29694 			 * range.
29695 			 */
29696 			state = SD_WM_CHK_LIST;
29697 			break;
29698 
29699 		default:
29700 			panic("sd_range_lock: "
29701 			    "Unknown state %d in sd_range_lock", state);
29702 			/*NOTREACHED*/
29703 		} /* switch(state) */
29704 
29705 	} /* while(state != SD_WM_DONE) */
29706 
29707 	mutex_exit(SD_MUTEX(un));
29708 
29709 	ASSERT(wmp != NULL);
29710 
29711 	return (wmp);
29712 }
29713 
29714 
29715 /*
29716  *    Function: sd_get_range()
29717  *
29718  * Description: Find if there any overlapping I/O to this one
29719  *		Returns the write-map of 1st such I/O, NULL otherwise.
29720  *
29721  *   Arguments: un	- sd_lun structure for the device.
29722  *		startb - The starting block number
29723  *		endb - The end block number
29724  *
29725  * Return Code: wm  - pointer to the wmap structure.
29726  */
29727 
29728 static struct sd_w_map *
29729 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
29730 {
29731 	struct sd_w_map *wmp;
29732 
29733 	ASSERT(un != NULL);
29734 
29735 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
29736 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
29737 			continue;
29738 		}
29739 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
29740 			break;
29741 		}
29742 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
29743 			break;
29744 		}
29745 	}
29746 
29747 	return (wmp);
29748 }
29749 
29750 
29751 /*
29752  *    Function: sd_free_inlist_wmap()
29753  *
29754  * Description: Unlink and free a write map struct.
29755  *
29756  *   Arguments: un      - sd_lun structure for the device.
29757  *		wmp	- sd_w_map which needs to be unlinked.
29758  */
29759 
29760 static void
29761 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
29762 {
29763 	ASSERT(un != NULL);
29764 
29765 	if (un->un_wm == wmp) {
29766 		un->un_wm = wmp->wm_next;
29767 	} else {
29768 		wmp->wm_prev->wm_next = wmp->wm_next;
29769 	}
29770 
29771 	if (wmp->wm_next) {
29772 		wmp->wm_next->wm_prev = wmp->wm_prev;
29773 	}
29774 
29775 	wmp->wm_next = wmp->wm_prev = NULL;
29776 
29777 	kmem_cache_free(un->un_wm_cache, wmp);
29778 }
29779 
29780 
29781 /*
29782  *    Function: sd_range_unlock()
29783  *
29784  * Description: Unlock the range locked by wm.
29785  *		Free write map if nobody else is waiting on it.
29786  *
29787  *   Arguments: un      - sd_lun structure for the device.
29788  *              wmp     - sd_w_map which needs to be unlinked.
29789  */
29790 
29791 static void
29792 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
29793 {
29794 	ASSERT(un != NULL);
29795 	ASSERT(wm != NULL);
29796 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29797 
29798 	mutex_enter(SD_MUTEX(un));
29799 
29800 	if (wm->wm_flags & SD_WTYPE_RMW) {
29801 		un->un_rmw_count--;
29802 	}
29803 
29804 	if (wm->wm_wanted_count) {
29805 		wm->wm_flags = 0;
29806 		/*
29807 		 * Broadcast that the wmap is available now.
29808 		 */
29809 		cv_broadcast(&wm->wm_avail);
29810 	} else {
29811 		/*
29812 		 * If no one is waiting on the map, it should be free'ed.
29813 		 */
29814 		sd_free_inlist_wmap(un, wm);
29815 	}
29816 
29817 	mutex_exit(SD_MUTEX(un));
29818 }
29819 
29820 
29821 /*
29822  *    Function: sd_read_modify_write_task
29823  *
29824  * Description: Called from a taskq thread to initiate the write phase of
29825  *		a read-modify-write request.  This is used for targets where
29826  *		un->un_sys_blocksize != un->un_tgt_blocksize.
29827  *
29828  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
29829  *
29830  *     Context: Called under taskq thread context.
29831  */
29832 
29833 static void
29834 sd_read_modify_write_task(void *arg)
29835 {
29836 	struct sd_mapblocksize_info	*bsp;
29837 	struct buf	*bp;
29838 	struct sd_xbuf	*xp;
29839 	struct sd_lun	*un;
29840 
29841 	bp = arg;	/* The bp is given in arg */
29842 	ASSERT(bp != NULL);
29843 
29844 	/* Get the pointer to the layer-private data struct */
29845 	xp = SD_GET_XBUF(bp);
29846 	ASSERT(xp != NULL);
29847 	bsp = xp->xb_private;
29848 	ASSERT(bsp != NULL);
29849 
29850 	un = SD_GET_UN(bp);
29851 	ASSERT(un != NULL);
29852 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29853 
29854 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29855 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
29856 
29857 	/*
29858 	 * This is the write phase of a read-modify-write request, called
29859 	 * under the context of a taskq thread in response to the completion
29860 	 * of the read portion of the rmw request completing under interrupt
29861 	 * context. The write request must be sent from here down the iostart
29862 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
29863 	 * we use the layer index saved in the layer-private data area.
29864 	 */
29865 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
29866 
29867 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29868 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
29869 }
29870 
29871 
29872 /*
29873  *    Function: sddump_do_read_of_rmw()
29874  *
29875  * Description: This routine will be called from sddump, If sddump is called
29876  *		with an I/O which not aligned on device blocksize boundary
29877  *		then the write has to be converted to read-modify-write.
29878  *		Do the read part here in order to keep sddump simple.
29879  *		Note - That the sd_mutex is held across the call to this
29880  *		routine.
29881  *
29882  *   Arguments: un	- sd_lun
29883  *		blkno	- block number in terms of media block size.
29884  *		nblk	- number of blocks.
29885  *		bpp	- pointer to pointer to the buf structure. On return
29886  *			from this function, *bpp points to the valid buffer
29887  *			to which the write has to be done.
29888  *
29889  * Return Code: 0 for success or errno-type return code
29890  */
29891 
29892 static int
29893 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
29894 	struct buf **bpp)
29895 {
29896 	int err;
29897 	int i;
29898 	int rval;
29899 	struct buf *bp;
29900 	struct scsi_pkt *pkt = NULL;
29901 	uint32_t target_blocksize;
29902 
29903 	ASSERT(un != NULL);
29904 	ASSERT(mutex_owned(SD_MUTEX(un)));
29905 
29906 	target_blocksize = un->un_tgt_blocksize;
29907 
29908 	mutex_exit(SD_MUTEX(un));
29909 
29910 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
29911 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
29912 	if (bp == NULL) {
29913 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29914 		    "no resources for dumping; giving up");
29915 		err = ENOMEM;
29916 		goto done;
29917 	}
29918 
29919 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
29920 	    blkno, nblk);
29921 	if (rval != 0) {
29922 		scsi_free_consistent_buf(bp);
29923 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29924 		    "no resources for dumping; giving up");
29925 		err = ENOMEM;
29926 		goto done;
29927 	}
29928 
29929 	pkt->pkt_flags |= FLAG_NOINTR;
29930 
29931 	err = EIO;
29932 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
29933 
29934 		/*
29935 		 * Scsi_poll returns 0 (success) if the command completes and
29936 		 * the status block is STATUS_GOOD.  We should only check
29937 		 * errors if this condition is not true.  Even then we should
29938 		 * send our own request sense packet only if we have a check
29939 		 * condition and auto request sense has not been performed by
29940 		 * the hba.
29941 		 */
29942 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
29943 
29944 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
29945 			err = 0;
29946 			break;
29947 		}
29948 
29949 		/*
29950 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
29951 		 * no need to read RQS data.
29952 		 */
29953 		if (pkt->pkt_reason == CMD_DEV_GONE) {
29954 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29955 			    "Device is gone\n");
29956 			break;
29957 		}
29958 
29959 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
29960 			SD_INFO(SD_LOG_DUMP, un,
29961 			    "sddump: read failed with CHECK, try # %d\n", i);
29962 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
29963 				(void) sd_send_polled_RQS(un);
29964 			}
29965 
29966 			continue;
29967 		}
29968 
29969 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
29970 			int reset_retval = 0;
29971 
29972 			SD_INFO(SD_LOG_DUMP, un,
29973 			    "sddump: read failed with BUSY, try # %d\n", i);
29974 
29975 			if (un->un_f_lun_reset_enabled == TRUE) {
29976 				reset_retval = scsi_reset(SD_ADDRESS(un),
29977 				    RESET_LUN);
29978 			}
29979 			if (reset_retval == 0) {
29980 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
29981 			}
29982 			(void) sd_send_polled_RQS(un);
29983 
29984 		} else {
29985 			SD_INFO(SD_LOG_DUMP, un,
29986 			    "sddump: read failed with 0x%x, try # %d\n",
29987 			    SD_GET_PKT_STATUS(pkt), i);
29988 			mutex_enter(SD_MUTEX(un));
29989 			sd_reset_target(un, pkt);
29990 			mutex_exit(SD_MUTEX(un));
29991 		}
29992 
29993 		/*
29994 		 * If we are not getting anywhere with lun/target resets,
29995 		 * let's reset the bus.
29996 		 */
29997 		if (i > SD_NDUMP_RETRIES/2) {
29998 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
29999 			(void) sd_send_polled_RQS(un);
30000 		}
30001 
30002 	}
30003 	scsi_destroy_pkt(pkt);
30004 
30005 	if (err != 0) {
30006 		scsi_free_consistent_buf(bp);
30007 		*bpp = NULL;
30008 	} else {
30009 		*bpp = bp;
30010 	}
30011 
30012 done:
30013 	mutex_enter(SD_MUTEX(un));
30014 	return (err);
30015 }
30016 
30017 
30018 /*
30019  *    Function: sd_failfast_flushq
30020  *
30021  * Description: Take all bp's on the wait queue that have B_FAILFAST set
30022  *		in b_flags and move them onto the failfast queue, then kick
30023  *		off a thread to return all bp's on the failfast queue to
30024  *		their owners with an error set.
30025  *
30026  *   Arguments: un - pointer to the soft state struct for the instance.
30027  *
30028  *     Context: may execute in interrupt context.
30029  */
30030 
30031 static void
30032 sd_failfast_flushq(struct sd_lun *un)
30033 {
30034 	struct buf *bp;
30035 	struct buf *next_waitq_bp;
30036 	struct buf *prev_waitq_bp = NULL;
30037 
30038 	ASSERT(un != NULL);
30039 	ASSERT(mutex_owned(SD_MUTEX(un)));
30040 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
30041 	ASSERT(un->un_failfast_bp == NULL);
30042 
30043 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
30044 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
30045 
30046 	/*
30047 	 * Check if we should flush all bufs when entering failfast state, or
30048 	 * just those with B_FAILFAST set.
30049 	 */
30050 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
30051 		/*
30052 		 * Move *all* bp's on the wait queue to the failfast flush
30053 		 * queue, including those that do NOT have B_FAILFAST set.
30054 		 */
30055 		if (un->un_failfast_headp == NULL) {
30056 			ASSERT(un->un_failfast_tailp == NULL);
30057 			un->un_failfast_headp = un->un_waitq_headp;
30058 		} else {
30059 			ASSERT(un->un_failfast_tailp != NULL);
30060 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
30061 		}
30062 
30063 		un->un_failfast_tailp = un->un_waitq_tailp;
30064 
30065 		/* update kstat for each bp moved out of the waitq */
30066 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
30067 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
30068 		}
30069 
30070 		/* empty the waitq */
30071 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
30072 
30073 	} else {
30074 		/*
30075 		 * Go thru the wait queue, pick off all entries with
30076 		 * B_FAILFAST set, and move these onto the failfast queue.
30077 		 */
30078 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
30079 			/*
30080 			 * Save the pointer to the next bp on the wait queue,
30081 			 * so we get to it on the next iteration of this loop.
30082 			 */
30083 			next_waitq_bp = bp->av_forw;
30084 
30085 			/*
30086 			 * If this bp from the wait queue does NOT have
30087 			 * B_FAILFAST set, just move on to the next element
30088 			 * in the wait queue. Note, this is the only place
30089 			 * where it is correct to set prev_waitq_bp.
30090 			 */
30091 			if ((bp->b_flags & B_FAILFAST) == 0) {
30092 				prev_waitq_bp = bp;
30093 				continue;
30094 			}
30095 
30096 			/*
30097 			 * Remove the bp from the wait queue.
30098 			 */
30099 			if (bp == un->un_waitq_headp) {
30100 				/* The bp is the first element of the waitq. */
30101 				un->un_waitq_headp = next_waitq_bp;
30102 				if (un->un_waitq_headp == NULL) {
30103 					/* The wait queue is now empty */
30104 					un->un_waitq_tailp = NULL;
30105 				}
30106 			} else {
30107 				/*
30108 				 * The bp is either somewhere in the middle
30109 				 * or at the end of the wait queue.
30110 				 */
30111 				ASSERT(un->un_waitq_headp != NULL);
30112 				ASSERT(prev_waitq_bp != NULL);
30113 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
30114 				    == 0);
30115 				if (bp == un->un_waitq_tailp) {
30116 					/* bp is the last entry on the waitq. */
30117 					ASSERT(next_waitq_bp == NULL);
30118 					un->un_waitq_tailp = prev_waitq_bp;
30119 				}
30120 				prev_waitq_bp->av_forw = next_waitq_bp;
30121 			}
30122 			bp->av_forw = NULL;
30123 
30124 			/*
30125 			 * update kstat since the bp is moved out of
30126 			 * the waitq
30127 			 */
30128 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
30129 
30130 			/*
30131 			 * Now put the bp onto the failfast queue.
30132 			 */
30133 			if (un->un_failfast_headp == NULL) {
30134 				/* failfast queue is currently empty */
30135 				ASSERT(un->un_failfast_tailp == NULL);
30136 				un->un_failfast_headp =
30137 				    un->un_failfast_tailp = bp;
30138 			} else {
30139 				/* Add the bp to the end of the failfast q */
30140 				ASSERT(un->un_failfast_tailp != NULL);
30141 				ASSERT(un->un_failfast_tailp->b_flags &
30142 				    B_FAILFAST);
30143 				un->un_failfast_tailp->av_forw = bp;
30144 				un->un_failfast_tailp = bp;
30145 			}
30146 		}
30147 	}
30148 
30149 	/*
30150 	 * Now return all bp's on the failfast queue to their owners.
30151 	 */
30152 	while ((bp = un->un_failfast_headp) != NULL) {
30153 
30154 		un->un_failfast_headp = bp->av_forw;
30155 		if (un->un_failfast_headp == NULL) {
30156 			un->un_failfast_tailp = NULL;
30157 		}
30158 
30159 		/*
30160 		 * We want to return the bp with a failure error code, but
30161 		 * we do not want a call to sd_start_cmds() to occur here,
30162 		 * so use sd_return_failed_command_no_restart() instead of
30163 		 * sd_return_failed_command().
30164 		 */
30165 		sd_return_failed_command_no_restart(un, bp, EIO);
30166 	}
30167 
30168 	/* Flush the xbuf queues if required. */
30169 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
30170 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
30171 	}
30172 
30173 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
30174 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
30175 }
30176 
30177 
30178 /*
30179  *    Function: sd_failfast_flushq_callback
30180  *
30181  * Description: Return TRUE if the given bp meets the criteria for failfast
30182  *		flushing. Used with ddi_xbuf_flushq(9F).
30183  *
30184  *   Arguments: bp - ptr to buf struct to be examined.
30185  *
30186  *     Context: Any
30187  */
30188 
30189 static int
30190 sd_failfast_flushq_callback(struct buf *bp)
30191 {
30192 	/*
30193 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
30194 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
30195 	 */
30196 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
30197 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
30198 }
30199 
30200 
30201 
30202 #if defined(__i386) || defined(__amd64)
30203 /*
30204  * Function: sd_setup_next_xfer
30205  *
30206  * Description: Prepare next I/O operation using DMA_PARTIAL
30207  *
30208  */
30209 
30210 static int
30211 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
30212     struct scsi_pkt *pkt, struct sd_xbuf *xp)
30213 {
30214 	ssize_t	num_blks_not_xfered;
30215 	daddr_t	strt_blk_num;
30216 	ssize_t	bytes_not_xfered;
30217 	int	rval;
30218 
30219 	ASSERT(pkt->pkt_resid == 0);
30220 
30221 	/*
30222 	 * Calculate next block number and amount to be transferred.
30223 	 *
30224 	 * How much data NOT transfered to the HBA yet.
30225 	 */
30226 	bytes_not_xfered = xp->xb_dma_resid;
30227 
30228 	/*
30229 	 * figure how many blocks NOT transfered to the HBA yet.
30230 	 */
30231 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
30232 
30233 	/*
30234 	 * set starting block number to the end of what WAS transfered.
30235 	 */
30236 	strt_blk_num = xp->xb_blkno +
30237 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
30238 
30239 	/*
30240 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
30241 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
30242 	 * the disk mutex here.
30243 	 */
30244 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
30245 	    strt_blk_num, num_blks_not_xfered);
30246 
30247 	if (rval == 0) {
30248 
30249 		/*
30250 		 * Success.
30251 		 *
30252 		 * Adjust things if there are still more blocks to be
30253 		 * transfered.
30254 		 */
30255 		xp->xb_dma_resid = pkt->pkt_resid;
30256 		pkt->pkt_resid = 0;
30257 
30258 		return (1);
30259 	}
30260 
30261 	/*
30262 	 * There's really only one possible return value from
30263 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
30264 	 * returns NULL.
30265 	 */
30266 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
30267 
30268 	bp->b_resid = bp->b_bcount;
30269 	bp->b_flags |= B_ERROR;
30270 
30271 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30272 	    "Error setting up next portion of DMA transfer\n");
30273 
30274 	return (0);
30275 }
30276 #endif
30277 
30278 /*
30279  *    Function: sd_panic_for_res_conflict
30280  *
30281  * Description: Call panic with a string formated with "Reservation Conflict"
30282  *		and a human readable identifier indicating the SD instance
30283  *		that experienced the reservation conflict.
30284  *
30285  *   Arguments: un - pointer to the soft state struct for the instance.
30286  *
30287  *     Context: may execute in interrupt context.
30288  */
30289 
30290 #define	SD_RESV_CONFLICT_FMT_LEN 40
30291 void
30292 sd_panic_for_res_conflict(struct sd_lun *un)
30293 {
30294 	char panic_str[SD_RESV_CONFLICT_FMT_LEN+MAXPATHLEN];
30295 	char path_str[MAXPATHLEN];
30296 
30297 	(void) snprintf(panic_str, sizeof (panic_str),
30298 	    "Reservation Conflict\nDisk: %s",
30299 	    ddi_pathname(SD_DEVINFO(un), path_str));
30300 
30301 	panic(panic_str);
30302 }
30303 
30304 /*
30305  * Note: The following sd_faultinjection_ioctl( ) routines implement
30306  * driver support for handling fault injection for error analysis
30307  * causing faults in multiple layers of the driver.
30308  *
30309  */
30310 
30311 #ifdef SD_FAULT_INJECTION
30312 static uint_t   sd_fault_injection_on = 0;
30313 
30314 /*
30315  *    Function: sd_faultinjection_ioctl()
30316  *
30317  * Description: This routine is the driver entry point for handling
30318  *              faultinjection ioctls to inject errors into the
30319  *              layer model
30320  *
30321  *   Arguments: cmd	- the ioctl cmd recieved
30322  *		arg	- the arguments from user and returns
30323  */
30324 
30325 static void
30326 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
30327 
30328 	uint_t i;
30329 	uint_t rval;
30330 
30331 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
30332 
30333 	mutex_enter(SD_MUTEX(un));
30334 
30335 	switch (cmd) {
30336 	case SDIOCRUN:
30337 		/* Allow pushed faults to be injected */
30338 		SD_INFO(SD_LOG_SDTEST, un,
30339 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
30340 
30341 		sd_fault_injection_on = 1;
30342 
30343 		SD_INFO(SD_LOG_IOERR, un,
30344 		    "sd_faultinjection_ioctl: run finished\n");
30345 		break;
30346 
30347 	case SDIOCSTART:
30348 		/* Start Injection Session */
30349 		SD_INFO(SD_LOG_SDTEST, un,
30350 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
30351 
30352 		sd_fault_injection_on = 0;
30353 		un->sd_injection_mask = 0xFFFFFFFF;
30354 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30355 			un->sd_fi_fifo_pkt[i] = NULL;
30356 			un->sd_fi_fifo_xb[i] = NULL;
30357 			un->sd_fi_fifo_un[i] = NULL;
30358 			un->sd_fi_fifo_arq[i] = NULL;
30359 		}
30360 		un->sd_fi_fifo_start = 0;
30361 		un->sd_fi_fifo_end = 0;
30362 
30363 		mutex_enter(&(un->un_fi_mutex));
30364 		un->sd_fi_log[0] = '\0';
30365 		un->sd_fi_buf_len = 0;
30366 		mutex_exit(&(un->un_fi_mutex));
30367 
30368 		SD_INFO(SD_LOG_IOERR, un,
30369 		    "sd_faultinjection_ioctl: start finished\n");
30370 		break;
30371 
30372 	case SDIOCSTOP:
30373 		/* Stop Injection Session */
30374 		SD_INFO(SD_LOG_SDTEST, un,
30375 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
30376 		sd_fault_injection_on = 0;
30377 		un->sd_injection_mask = 0x0;
30378 
30379 		/* Empty stray or unuseds structs from fifo */
30380 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30381 			if (un->sd_fi_fifo_pkt[i] != NULL) {
30382 				kmem_free(un->sd_fi_fifo_pkt[i],
30383 				    sizeof (struct sd_fi_pkt));
30384 			}
30385 			if (un->sd_fi_fifo_xb[i] != NULL) {
30386 				kmem_free(un->sd_fi_fifo_xb[i],
30387 				    sizeof (struct sd_fi_xb));
30388 			}
30389 			if (un->sd_fi_fifo_un[i] != NULL) {
30390 				kmem_free(un->sd_fi_fifo_un[i],
30391 				    sizeof (struct sd_fi_un));
30392 			}
30393 			if (un->sd_fi_fifo_arq[i] != NULL) {
30394 				kmem_free(un->sd_fi_fifo_arq[i],
30395 				    sizeof (struct sd_fi_arq));
30396 			}
30397 			un->sd_fi_fifo_pkt[i] = NULL;
30398 			un->sd_fi_fifo_un[i] = NULL;
30399 			un->sd_fi_fifo_xb[i] = NULL;
30400 			un->sd_fi_fifo_arq[i] = NULL;
30401 		}
30402 		un->sd_fi_fifo_start = 0;
30403 		un->sd_fi_fifo_end = 0;
30404 
30405 		SD_INFO(SD_LOG_IOERR, un,
30406 		    "sd_faultinjection_ioctl: stop finished\n");
30407 		break;
30408 
30409 	case SDIOCINSERTPKT:
30410 		/* Store a packet struct to be pushed onto fifo */
30411 		SD_INFO(SD_LOG_SDTEST, un,
30412 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
30413 
30414 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30415 
30416 		sd_fault_injection_on = 0;
30417 
30418 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
30419 		if (un->sd_fi_fifo_pkt[i] != NULL) {
30420 			kmem_free(un->sd_fi_fifo_pkt[i],
30421 			    sizeof (struct sd_fi_pkt));
30422 		}
30423 		if (arg != NULL) {
30424 			un->sd_fi_fifo_pkt[i] =
30425 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
30426 			if (un->sd_fi_fifo_pkt[i] == NULL) {
30427 				/* Alloc failed don't store anything */
30428 				break;
30429 			}
30430 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
30431 			    sizeof (struct sd_fi_pkt), 0);
30432 			if (rval == -1) {
30433 				kmem_free(un->sd_fi_fifo_pkt[i],
30434 				    sizeof (struct sd_fi_pkt));
30435 				un->sd_fi_fifo_pkt[i] = NULL;
30436 			}
30437 		} else {
30438 			SD_INFO(SD_LOG_IOERR, un,
30439 			    "sd_faultinjection_ioctl: pkt null\n");
30440 		}
30441 		break;
30442 
30443 	case SDIOCINSERTXB:
30444 		/* Store a xb struct to be pushed onto fifo */
30445 		SD_INFO(SD_LOG_SDTEST, un,
30446 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
30447 
30448 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30449 
30450 		sd_fault_injection_on = 0;
30451 
30452 		if (un->sd_fi_fifo_xb[i] != NULL) {
30453 			kmem_free(un->sd_fi_fifo_xb[i],
30454 			    sizeof (struct sd_fi_xb));
30455 			un->sd_fi_fifo_xb[i] = NULL;
30456 		}
30457 		if (arg != NULL) {
30458 			un->sd_fi_fifo_xb[i] =
30459 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
30460 			if (un->sd_fi_fifo_xb[i] == NULL) {
30461 				/* Alloc failed don't store anything */
30462 				break;
30463 			}
30464 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
30465 			    sizeof (struct sd_fi_xb), 0);
30466 
30467 			if (rval == -1) {
30468 				kmem_free(un->sd_fi_fifo_xb[i],
30469 				    sizeof (struct sd_fi_xb));
30470 				un->sd_fi_fifo_xb[i] = NULL;
30471 			}
30472 		} else {
30473 			SD_INFO(SD_LOG_IOERR, un,
30474 			    "sd_faultinjection_ioctl: xb null\n");
30475 		}
30476 		break;
30477 
30478 	case SDIOCINSERTUN:
30479 		/* Store a un struct to be pushed onto fifo */
30480 		SD_INFO(SD_LOG_SDTEST, un,
30481 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
30482 
30483 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30484 
30485 		sd_fault_injection_on = 0;
30486 
30487 		if (un->sd_fi_fifo_un[i] != NULL) {
30488 			kmem_free(un->sd_fi_fifo_un[i],
30489 			    sizeof (struct sd_fi_un));
30490 			un->sd_fi_fifo_un[i] = NULL;
30491 		}
30492 		if (arg != NULL) {
30493 			un->sd_fi_fifo_un[i] =
30494 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
30495 			if (un->sd_fi_fifo_un[i] == NULL) {
30496 				/* Alloc failed don't store anything */
30497 				break;
30498 			}
30499 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
30500 			    sizeof (struct sd_fi_un), 0);
30501 			if (rval == -1) {
30502 				kmem_free(un->sd_fi_fifo_un[i],
30503 				    sizeof (struct sd_fi_un));
30504 				un->sd_fi_fifo_un[i] = NULL;
30505 			}
30506 
30507 		} else {
30508 			SD_INFO(SD_LOG_IOERR, un,
30509 			    "sd_faultinjection_ioctl: un null\n");
30510 		}
30511 
30512 		break;
30513 
30514 	case SDIOCINSERTARQ:
30515 		/* Store a arq struct to be pushed onto fifo */
30516 		SD_INFO(SD_LOG_SDTEST, un,
30517 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
30518 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30519 
30520 		sd_fault_injection_on = 0;
30521 
30522 		if (un->sd_fi_fifo_arq[i] != NULL) {
30523 			kmem_free(un->sd_fi_fifo_arq[i],
30524 			    sizeof (struct sd_fi_arq));
30525 			un->sd_fi_fifo_arq[i] = NULL;
30526 		}
30527 		if (arg != NULL) {
30528 			un->sd_fi_fifo_arq[i] =
30529 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
30530 			if (un->sd_fi_fifo_arq[i] == NULL) {
30531 				/* Alloc failed don't store anything */
30532 				break;
30533 			}
30534 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
30535 			    sizeof (struct sd_fi_arq), 0);
30536 			if (rval == -1) {
30537 				kmem_free(un->sd_fi_fifo_arq[i],
30538 				    sizeof (struct sd_fi_arq));
30539 				un->sd_fi_fifo_arq[i] = NULL;
30540 			}
30541 
30542 		} else {
30543 			SD_INFO(SD_LOG_IOERR, un,
30544 			    "sd_faultinjection_ioctl: arq null\n");
30545 		}
30546 
30547 		break;
30548 
30549 	case SDIOCPUSH:
30550 		/* Push stored xb, pkt, un, and arq onto fifo */
30551 		sd_fault_injection_on = 0;
30552 
30553 		if (arg != NULL) {
30554 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
30555 			if (rval != -1 &&
30556 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30557 				un->sd_fi_fifo_end += i;
30558 			}
30559 		} else {
30560 			SD_INFO(SD_LOG_IOERR, un,
30561 			    "sd_faultinjection_ioctl: push arg null\n");
30562 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30563 				un->sd_fi_fifo_end++;
30564 			}
30565 		}
30566 		SD_INFO(SD_LOG_IOERR, un,
30567 		    "sd_faultinjection_ioctl: push to end=%d\n",
30568 		    un->sd_fi_fifo_end);
30569 		break;
30570 
30571 	case SDIOCRETRIEVE:
30572 		/* Return buffer of log from Injection session */
30573 		SD_INFO(SD_LOG_SDTEST, un,
30574 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
30575 
30576 		sd_fault_injection_on = 0;
30577 
30578 		mutex_enter(&(un->un_fi_mutex));
30579 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
30580 		    un->sd_fi_buf_len+1, 0);
30581 		mutex_exit(&(un->un_fi_mutex));
30582 
30583 		if (rval == -1) {
30584 			/*
30585 			 * arg is possibly invalid setting
30586 			 * it to NULL for return
30587 			 */
30588 			arg = NULL;
30589 		}
30590 		break;
30591 	}
30592 
30593 	mutex_exit(SD_MUTEX(un));
30594 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
30595 			    " exit\n");
30596 }
30597 
30598 
30599 /*
30600  *    Function: sd_injection_log()
30601  *
30602  * Description: This routine adds buff to the already existing injection log
30603  *              for retrieval via faultinjection_ioctl for use in fault
30604  *              detection and recovery
30605  *
30606  *   Arguments: buf - the string to add to the log
30607  */
30608 
30609 static void
30610 sd_injection_log(char *buf, struct sd_lun *un)
30611 {
30612 	uint_t len;
30613 
30614 	ASSERT(un != NULL);
30615 	ASSERT(buf != NULL);
30616 
30617 	mutex_enter(&(un->un_fi_mutex));
30618 
30619 	len = min(strlen(buf), 255);
30620 	/* Add logged value to Injection log to be returned later */
30621 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
30622 		uint_t	offset = strlen((char *)un->sd_fi_log);
30623 		char *destp = (char *)un->sd_fi_log + offset;
30624 		int i;
30625 		for (i = 0; i < len; i++) {
30626 			*destp++ = *buf++;
30627 		}
30628 		un->sd_fi_buf_len += len;
30629 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
30630 	}
30631 
30632 	mutex_exit(&(un->un_fi_mutex));
30633 }
30634 
30635 
30636 /*
30637  *    Function: sd_faultinjection()
30638  *
30639  * Description: This routine takes the pkt and changes its
30640  *		content based on error injection scenerio.
30641  *
30642  *   Arguments: pktp	- packet to be changed
30643  */
30644 
30645 static void
30646 sd_faultinjection(struct scsi_pkt *pktp)
30647 {
30648 	uint_t i;
30649 	struct sd_fi_pkt *fi_pkt;
30650 	struct sd_fi_xb *fi_xb;
30651 	struct sd_fi_un *fi_un;
30652 	struct sd_fi_arq *fi_arq;
30653 	struct buf *bp;
30654 	struct sd_xbuf *xb;
30655 	struct sd_lun *un;
30656 
30657 	ASSERT(pktp != NULL);
30658 
30659 	/* pull bp xb and un from pktp */
30660 	bp = (struct buf *)pktp->pkt_private;
30661 	xb = SD_GET_XBUF(bp);
30662 	un = SD_GET_UN(bp);
30663 
30664 	ASSERT(un != NULL);
30665 
30666 	mutex_enter(SD_MUTEX(un));
30667 
30668 	SD_TRACE(SD_LOG_SDTEST, un,
30669 	    "sd_faultinjection: entry Injection from sdintr\n");
30670 
30671 	/* if injection is off return */
30672 	if (sd_fault_injection_on == 0 ||
30673 		un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
30674 		mutex_exit(SD_MUTEX(un));
30675 		return;
30676 	}
30677 
30678 
30679 	/* take next set off fifo */
30680 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
30681 
30682 	fi_pkt = un->sd_fi_fifo_pkt[i];
30683 	fi_xb = un->sd_fi_fifo_xb[i];
30684 	fi_un = un->sd_fi_fifo_un[i];
30685 	fi_arq = un->sd_fi_fifo_arq[i];
30686 
30687 
30688 	/* set variables accordingly */
30689 	/* set pkt if it was on fifo */
30690 	if (fi_pkt != NULL) {
30691 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
30692 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
30693 		SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
30694 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
30695 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
30696 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
30697 
30698 	}
30699 
30700 	/* set xb if it was on fifo */
30701 	if (fi_xb != NULL) {
30702 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
30703 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
30704 		SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
30705 		SD_CONDSET(xb, xb, xb_victim_retry_count,
30706 		    "xb_victim_retry_count");
30707 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
30708 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
30709 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
30710 
30711 		/* copy in block data from sense */
30712 		if (fi_xb->xb_sense_data[0] != -1) {
30713 			bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
30714 			    SENSE_LENGTH);
30715 		}
30716 
30717 		/* copy in extended sense codes */
30718 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_code,
30719 		    "es_code");
30720 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_key,
30721 		    "es_key");
30722 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_add_code,
30723 		    "es_add_code");
30724 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb,
30725 		    es_qual_code, "es_qual_code");
30726 	}
30727 
30728 	/* set un if it was on fifo */
30729 	if (fi_un != NULL) {
30730 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
30731 		SD_CONDSET(un, un, un_ctype, "un_ctype");
30732 		SD_CONDSET(un, un, un_reset_retry_count,
30733 		    "un_reset_retry_count");
30734 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
30735 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
30736 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
30737 		SD_CONDSET(un, un, un_f_geometry_is_valid,
30738 		    "un_f_geometry_is_valid");
30739 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
30740 		    "un_f_allow_bus_device_reset");
30741 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
30742 
30743 	}
30744 
30745 	/* copy in auto request sense if it was on fifo */
30746 	if (fi_arq != NULL) {
30747 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
30748 	}
30749 
30750 	/* free structs */
30751 	if (un->sd_fi_fifo_pkt[i] != NULL) {
30752 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
30753 	}
30754 	if (un->sd_fi_fifo_xb[i] != NULL) {
30755 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
30756 	}
30757 	if (un->sd_fi_fifo_un[i] != NULL) {
30758 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
30759 	}
30760 	if (un->sd_fi_fifo_arq[i] != NULL) {
30761 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
30762 	}
30763 
30764 	/*
30765 	 * kmem_free does not gurantee to set to NULL
30766 	 * since we uses these to determine if we set
30767 	 * values or not lets confirm they are always
30768 	 * NULL after free
30769 	 */
30770 	un->sd_fi_fifo_pkt[i] = NULL;
30771 	un->sd_fi_fifo_un[i] = NULL;
30772 	un->sd_fi_fifo_xb[i] = NULL;
30773 	un->sd_fi_fifo_arq[i] = NULL;
30774 
30775 	un->sd_fi_fifo_start++;
30776 
30777 	mutex_exit(SD_MUTEX(un));
30778 
30779 	SD_TRACE(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
30780 }
30781 
30782 #endif /* SD_FAULT_INJECTION */
30783 
30784 /*
30785  * This routine is invoked in sd_unit_attach(). Before calling it, the
30786  * properties in conf file should be processed already, and "hotpluggable"
30787  * property was processed also.
30788  *
30789  * The sd driver distinguishes 3 different type of devices: removable media,
30790  * non-removable media, and hotpluggable. Below the differences are defined:
30791  *
30792  * 1. Device ID
30793  *
30794  *     The device ID of a device is used to identify this device. Refer to
30795  *     ddi_devid_register(9F).
30796  *
30797  *     For a non-removable media disk device which can provide 0x80 or 0x83
30798  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
30799  *     device ID is created to identify this device. For other non-removable
30800  *     media devices, a default device ID is created only if this device has
30801  *     at least 2 alter cylinders. Otherwise, this device has no devid.
30802  *
30803  *     -------------------------------------------------------
30804  *     removable media   hotpluggable  | Can Have Device ID
30805  *     -------------------------------------------------------
30806  *         false             false     |     Yes
30807  *         false             true      |     Yes
30808  *         true                x       |     No
30809  *     ------------------------------------------------------
30810  *
30811  *
30812  * 2. SCSI group 4 commands
30813  *
30814  *     In SCSI specs, only some commands in group 4 command set can use
30815  *     8-byte addresses that can be used to access >2TB storage spaces.
30816  *     Other commands have no such capability. Without supporting group4,
30817  *     it is impossible to make full use of storage spaces of a disk with
30818  *     capacity larger than 2TB.
30819  *
30820  *     -----------------------------------------------
30821  *     removable media   hotpluggable   LP64  |  Group
30822  *     -----------------------------------------------
30823  *           false          false       false |   1
30824  *           false          false       true  |   4
30825  *           false          true        false |   1
30826  *           false          true        true  |   4
30827  *           true             x           x   |   5
30828  *     -----------------------------------------------
30829  *
30830  *
30831  * 3. Check for VTOC Label
30832  *
30833  *     If a direct-access disk has no EFI label, sd will check if it has a
30834  *     valid VTOC label. Now, sd also does that check for removable media
30835  *     and hotpluggable devices.
30836  *
30837  *     --------------------------------------------------------------
30838  *     Direct-Access   removable media    hotpluggable |  Check Label
30839  *     -------------------------------------------------------------
30840  *         false          false           false        |   No
30841  *         false          false           true         |   No
30842  *         false          true            false        |   Yes
30843  *         false          true            true         |   Yes
30844  *         true            x                x          |   Yes
30845  *     --------------------------------------------------------------
30846  *
30847  *
30848  * 4. Building default VTOC label
30849  *
30850  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
30851  *     If those devices have no valid VTOC label, sd(7d) will attempt to
30852  *     create default VTOC for them. Currently sd creates default VTOC label
30853  *     for all devices on x86 platform (VTOC_16), but only for removable
30854  *     media devices on SPARC (VTOC_8).
30855  *
30856  *     -----------------------------------------------------------
30857  *       removable media hotpluggable platform   |   Default Label
30858  *     -----------------------------------------------------------
30859  *             false          false    sparc     |     No
30860  *             false          true      x86      |     Yes
30861  *             false          true     sparc     |     Yes
30862  *             true             x        x       |     Yes
30863  *     ----------------------------------------------------------
30864  *
30865  *
30866  * 5. Supported blocksizes of target devices
30867  *
30868  *     Sd supports non-512-byte blocksize for removable media devices only.
30869  *     For other devices, only 512-byte blocksize is supported. This may be
30870  *     changed in near future because some RAID devices require non-512-byte
30871  *     blocksize
30872  *
30873  *     -----------------------------------------------------------
30874  *     removable media    hotpluggable    | non-512-byte blocksize
30875  *     -----------------------------------------------------------
30876  *           false          false         |   No
30877  *           false          true          |   No
30878  *           true             x           |   Yes
30879  *     -----------------------------------------------------------
30880  *
30881  *
30882  * 6. Automatic mount & unmount (i.e. vold)
30883  *
30884  *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
30885  *     if a device is removable media device. It return 1 for removable media
30886  *     devices, and 0 for others.
30887  *
30888  *     Vold treats a device as removable one only if DKIOREMOVABLE returns 1.
30889  *     And it does automounting only for removable media devices. In order to
30890  *     preserve users' experience and let vold continue to do automounting for
30891  *     USB disk devices, DKIOCREMOVABLE ioctl still returns 1 for USB/1394 disk
30892  *     devices.
30893  *
30894  *      ------------------------------------------------------
30895  *       removable media    hotpluggable   |  automatic mount
30896  *      ------------------------------------------------------
30897  *             false          false        |   No
30898  *             false          true         |   Yes
30899  *             true             x          |   Yes
30900  *      ------------------------------------------------------
30901  *
30902  *
30903  * 7. fdisk partition management
30904  *
30905  *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
30906  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
30907  *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
30908  *     fdisk partitions on both x86 and SPARC platform.
30909  *
30910  *     -----------------------------------------------------------
30911  *       platform   removable media  USB/1394  |  fdisk supported
30912  *     -----------------------------------------------------------
30913  *        x86         X               X        |       true
30914  *     ------------------------------------------------------------
30915  *        sparc       X               X        |       false
30916  *     ------------------------------------------------------------
30917  *
30918  *
30919  * 8. MBOOT/MBR
30920  *
30921  *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
30922  *     read/write mboot for removable media devices on sparc platform.
30923  *
30924  *     -----------------------------------------------------------
30925  *       platform   removable media  USB/1394  |  mboot supported
30926  *     -----------------------------------------------------------
30927  *        x86         X               X        |       true
30928  *     ------------------------------------------------------------
30929  *        sparc      false           false     |       false
30930  *        sparc      false           true      |       true
30931  *        sparc      true            false     |       true
30932  *        sparc      true            true      |       true
30933  *     ------------------------------------------------------------
30934  *
30935  *
30936  * 9.  error handling during opening device
30937  *
30938  *     If failed to open a disk device, an errno is returned. For some kinds
30939  *     of errors, different errno is returned depending on if this device is
30940  *     a removable media device. This brings USB/1394 hard disks in line with
30941  *     expected hard disk behavior. It is not expected that this breaks any
30942  *     application.
30943  *
30944  *     ------------------------------------------------------
30945  *       removable media    hotpluggable   |  errno
30946  *     ------------------------------------------------------
30947  *             false          false        |   EIO
30948  *             false          true         |   EIO
30949  *             true             x          |   ENXIO
30950  *     ------------------------------------------------------
30951  *
30952  *
30953  * 11. ioctls: DKIOCEJECT, CDROMEJECT
30954  *
30955  *     These IOCTLs are applicable only to removable media devices.
30956  *
30957  *     -----------------------------------------------------------
30958  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
30959  *     -----------------------------------------------------------
30960  *             false          false        |     No
30961  *             false          true         |     No
30962  *             true            x           |     Yes
30963  *     -----------------------------------------------------------
30964  *
30965  *
30966  * 12. Kstats for partitions
30967  *
30968  *     sd creates partition kstat for non-removable media devices. USB and
30969  *     Firewire hard disks now have partition kstats
30970  *
30971  *      ------------------------------------------------------
30972  *       removable media    hotplugable    |   kstat
30973  *      ------------------------------------------------------
30974  *             false          false        |    Yes
30975  *             false          true         |    Yes
30976  *             true             x          |    No
30977  *       ------------------------------------------------------
30978  *
30979  *
30980  * 13. Removable media & hotpluggable properties
30981  *
30982  *     Sd driver creates a "removable-media" property for removable media
30983  *     devices. Parent nexus drivers create a "hotpluggable" property if
30984  *     it supports hotplugging.
30985  *
30986  *     ---------------------------------------------------------------------
30987  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
30988  *     ---------------------------------------------------------------------
30989  *       false            false       |    No                   No
30990  *       false            true        |    No                   Yes
30991  *       true             false       |    Yes                  No
30992  *       true             true        |    Yes                  Yes
30993  *     ---------------------------------------------------------------------
30994  *
30995  *
30996  * 14. Power Management
30997  *
30998  *     sd only power manages removable media devices or devices that support
30999  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
31000  *
31001  *     A parent nexus that supports hotplugging can also set "pm-capable"
31002  *     if the disk can be power managed.
31003  *
31004  *     ------------------------------------------------------------
31005  *       removable media hotpluggable pm-capable  |   power manage
31006  *     ------------------------------------------------------------
31007  *             false          false     false     |     No
31008  *             false          false     true      |     Yes
31009  *             false          true      false     |     No
31010  *             false          true      true      |     Yes
31011  *             true             x        x        |     Yes
31012  *     ------------------------------------------------------------
31013  *
31014  *      USB and firewire hard disks can now be power managed independently
31015  *      of the framebuffer
31016  *
31017  *
31018  * 15. Support for USB disks with capacity larger than 1TB
31019  *
31020  *     Currently, sd doesn't permit a fixed disk device with capacity
31021  *     larger than 1TB to be used in a 32-bit operating system environment.
31022  *     However, sd doesn't do that for removable media devices. Instead, it
31023  *     assumes that removable media devices cannot have a capacity larger
31024  *     than 1TB. Therefore, using those devices on 32-bit system is partially
31025  *     supported, which can cause some unexpected results.
31026  *
31027  *     ---------------------------------------------------------------------
31028  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
31029  *     ---------------------------------------------------------------------
31030  *             false          false  |   true         |     no
31031  *             false          true   |   true         |     no
31032  *             true           false  |   true         |     Yes
31033  *             true           true   |   true         |     Yes
31034  *     ---------------------------------------------------------------------
31035  *
31036  *
31037  * 16. Check write-protection at open time
31038  *
31039  *     When a removable media device is being opened for writing without NDELAY
31040  *     flag, sd will check if this device is writable. If attempting to open
31041  *     without NDELAY flag a write-protected device, this operation will abort.
31042  *
31043  *     ------------------------------------------------------------
31044  *       removable media    USB/1394   |   WP Check
31045  *     ------------------------------------------------------------
31046  *             false          false    |     No
31047  *             false          true     |     No
31048  *             true           false    |     Yes
31049  *             true           true     |     Yes
31050  *     ------------------------------------------------------------
31051  *
31052  *
31053  * 17. syslog when corrupted VTOC is encountered
31054  *
31055  *      Currently, if an invalid VTOC is encountered, sd only print syslog
31056  *      for fixed SCSI disks.
31057  *     ------------------------------------------------------------
31058  *       removable media    USB/1394   |   print syslog
31059  *     ------------------------------------------------------------
31060  *             false          false    |     Yes
31061  *             false          true     |     No
31062  *             true           false    |     No
31063  *             true           true     |     No
31064  *     ------------------------------------------------------------
31065  */
31066 static void
31067 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
31068 {
31069 	int	pm_capable_prop;
31070 
31071 	ASSERT(un->un_sd);
31072 	ASSERT(un->un_sd->sd_inq);
31073 
31074 #if defined(_SUNOS_VTOC_16)
31075 	/*
31076 	 * For VTOC_16 devices, the default label will be created for all
31077 	 * devices. (see sd_build_default_label)
31078 	 */
31079 	un->un_f_default_vtoc_supported = TRUE;
31080 #endif
31081 
31082 	if (un->un_sd->sd_inq->inq_rmb) {
31083 		/*
31084 		 * The media of this device is removable. And for this kind
31085 		 * of devices, it is possible to change medium after openning
31086 		 * devices. Thus we should support this operation.
31087 		 */
31088 		un->un_f_has_removable_media = TRUE;
31089 
31090 #if defined(_SUNOS_VTOC_8)
31091 		/*
31092 		 * Note: currently, for VTOC_8 devices, default label is
31093 		 * created for removable and hotpluggable devices only.
31094 		 */
31095 		un->un_f_default_vtoc_supported = TRUE;
31096 #endif
31097 		/*
31098 		 * support non-512-byte blocksize of removable media devices
31099 		 */
31100 		un->un_f_non_devbsize_supported = TRUE;
31101 
31102 		/*
31103 		 * Assume that all removable media devices support DOOR_LOCK
31104 		 */
31105 		un->un_f_doorlock_supported = TRUE;
31106 
31107 		/*
31108 		 * For a removable media device, it is possible to be opened
31109 		 * with NDELAY flag when there is no media in drive, in this
31110 		 * case we don't care if device is writable. But if without
31111 		 * NDELAY flag, we need to check if media is write-protected.
31112 		 */
31113 		un->un_f_chk_wp_open = TRUE;
31114 
31115 		/*
31116 		 * need to start a SCSI watch thread to monitor media state,
31117 		 * when media is being inserted or ejected, notify syseventd.
31118 		 */
31119 		un->un_f_monitor_media_state = TRUE;
31120 
31121 		/*
31122 		 * Some devices don't support START_STOP_UNIT command.
31123 		 * Therefore, we'd better check if a device supports it
31124 		 * before sending it.
31125 		 */
31126 		un->un_f_check_start_stop = TRUE;
31127 
31128 		/*
31129 		 * support eject media ioctl:
31130 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
31131 		 */
31132 		un->un_f_eject_media_supported = TRUE;
31133 
31134 		/*
31135 		 * Because many removable-media devices don't support
31136 		 * LOG_SENSE, we couldn't use this command to check if
31137 		 * a removable media device support power-management.
31138 		 * We assume that they support power-management via
31139 		 * START_STOP_UNIT command and can be spun up and down
31140 		 * without limitations.
31141 		 */
31142 		un->un_f_pm_supported = TRUE;
31143 
31144 		/*
31145 		 * Need to create a zero length (Boolean) property
31146 		 * removable-media for the removable media devices.
31147 		 * Note that the return value of the property is not being
31148 		 * checked, since if unable to create the property
31149 		 * then do not want the attach to fail altogether. Consistent
31150 		 * with other property creation in attach.
31151 		 */
31152 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
31153 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
31154 
31155 	} else {
31156 		/*
31157 		 * create device ID for device
31158 		 */
31159 		un->un_f_devid_supported = TRUE;
31160 
31161 		/*
31162 		 * Spin up non-removable-media devices once it is attached
31163 		 */
31164 		un->un_f_attach_spinup = TRUE;
31165 
31166 		/*
31167 		 * According to SCSI specification, Sense data has two kinds of
31168 		 * format: fixed format, and descriptor format. At present, we
31169 		 * don't support descriptor format sense data for removable
31170 		 * media.
31171 		 */
31172 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31173 			un->un_f_descr_format_supported = TRUE;
31174 		}
31175 
31176 		/*
31177 		 * kstats are created only for non-removable media devices.
31178 		 *
31179 		 * Set this in sd.conf to 0 in order to disable kstats.  The
31180 		 * default is 1, so they are enabled by default.
31181 		 */
31182 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
31183 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
31184 			"enable-partition-kstats", 1));
31185 
31186 		/*
31187 		 * Check if HBA has set the "pm-capable" property.
31188 		 * If "pm-capable" exists and is non-zero then we can
31189 		 * power manage the device without checking the start/stop
31190 		 * cycle count log sense page.
31191 		 *
31192 		 * If "pm-capable" exists and is SD_PM_CAPABLE_FALSE (0)
31193 		 * then we should not power manage the device.
31194 		 *
31195 		 * If "pm-capable" doesn't exist then pm_capable_prop will
31196 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
31197 		 * sd will check the start/stop cycle count log sense page
31198 		 * and power manage the device if the cycle count limit has
31199 		 * not been exceeded.
31200 		 */
31201 		pm_capable_prop = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
31202 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
31203 		if (pm_capable_prop == SD_PM_CAPABLE_UNDEFINED) {
31204 			un->un_f_log_sense_supported = TRUE;
31205 		} else {
31206 			/*
31207 			 * pm-capable property exists.
31208 			 *
31209 			 * Convert "TRUE" values for pm_capable_prop to
31210 			 * SD_PM_CAPABLE_TRUE (1) to make it easier to check
31211 			 * later. "TRUE" values are any values except
31212 			 * SD_PM_CAPABLE_FALSE (0) and
31213 			 * SD_PM_CAPABLE_UNDEFINED (-1)
31214 			 */
31215 			if (pm_capable_prop == SD_PM_CAPABLE_FALSE) {
31216 				un->un_f_log_sense_supported = FALSE;
31217 			} else {
31218 				un->un_f_pm_supported = TRUE;
31219 			}
31220 
31221 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
31222 			    "sd_unit_attach: un:0x%p pm-capable "
31223 			    "property set to %d.\n", un, un->un_f_pm_supported);
31224 		}
31225 	}
31226 
31227 	if (un->un_f_is_hotpluggable) {
31228 #if defined(_SUNOS_VTOC_8)
31229 		/*
31230 		 * Note: currently, for VTOC_8 devices, default label is
31231 		 * created for removable and hotpluggable devices only.
31232 		 */
31233 		un->un_f_default_vtoc_supported = TRUE;
31234 #endif
31235 
31236 		/*
31237 		 * Temporarily, let hotpluggable devices pretend to be
31238 		 * removable-media devices for vold.
31239 		 */
31240 		un->un_f_monitor_media_state = TRUE;
31241 
31242 		un->un_f_check_start_stop = TRUE;
31243 
31244 	}
31245 
31246 	/*
31247 	 * By default, only DIRECT ACCESS devices and CDs will have Sun
31248 	 * labels.
31249 	 */
31250 	if ((SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) ||
31251 	    (un->un_sd->sd_inq->inq_rmb)) {
31252 		/*
31253 		 * Direct access devices have disk label
31254 		 */
31255 		un->un_f_vtoc_label_supported = TRUE;
31256 	}
31257 
31258 	/*
31259 	 * Fdisk partitions are supported for all direct access devices on
31260 	 * x86 platform, and just for removable media and hotpluggable
31261 	 * devices on SPARC platform. Later, we will set the following flag
31262 	 * to FALSE if current device is not removable media or hotpluggable
31263 	 * device and if sd works on SAPRC platform.
31264 	 */
31265 	if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31266 		un->un_f_mboot_supported = TRUE;
31267 	}
31268 
31269 	if (!un->un_f_is_hotpluggable &&
31270 	    !un->un_sd->sd_inq->inq_rmb) {
31271 
31272 #if defined(_SUNOS_VTOC_8)
31273 		/*
31274 		 * Don't support fdisk on fixed disk
31275 		 */
31276 		un->un_f_mboot_supported = FALSE;
31277 #endif
31278 
31279 		/*
31280 		 * Fixed disk support SYNC CACHE
31281 		 */
31282 		un->un_f_sync_cache_supported = TRUE;
31283 
31284 		/*
31285 		 * For fixed disk, if its VTOC is not valid, we will write
31286 		 * errlog into system log
31287 		 */
31288 		if (un->un_f_vtoc_label_supported)
31289 			un->un_f_vtoc_errlog_supported = TRUE;
31290 	}
31291 }
31292