xref: /titanic_44/usr/src/uts/common/io/scsi/targets/sd.c (revision af6a7a2cfb786fe2584df2dac456bedbb878fe82)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * SCSI disk target driver.
30  */
31 #include <sys/scsi/scsi.h>
32 #include <sys/dkbad.h>
33 #include <sys/dklabel.h>
34 #include <sys/dkio.h>
35 #include <sys/fdio.h>
36 #include <sys/cdio.h>
37 #include <sys/mhd.h>
38 #include <sys/vtoc.h>
39 #include <sys/dktp/fdisk.h>
40 #include <sys/file.h>
41 #include <sys/stat.h>
42 #include <sys/kstat.h>
43 #include <sys/vtrace.h>
44 #include <sys/note.h>
45 #include <sys/thread.h>
46 #include <sys/proc.h>
47 #include <sys/efi_partition.h>
48 #include <sys/var.h>
49 #include <sys/aio_req.h>
50 
51 #ifdef __lock_lint
52 #define	_LP64
53 #define	__amd64
54 #endif
55 
56 #if (defined(__fibre))
57 /* Note: is there a leadville version of the following? */
58 #include <sys/fc4/fcal_linkapp.h>
59 #endif
60 #include <sys/taskq.h>
61 #include <sys/uuid.h>
62 #include <sys/byteorder.h>
63 #include <sys/sdt.h>
64 
65 #include "sd_xbuf.h"
66 
67 #include <sys/scsi/targets/sddef.h>
68 
69 
70 /*
71  * Loadable module info.
72  */
73 #if (defined(__fibre))
74 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver %I%"
75 char _depends_on[]	= "misc/scsi drv/fcp";
76 #else
77 #define	SD_MODULE_NAME	"SCSI Disk Driver %I%"
78 char _depends_on[]	= "misc/scsi";
79 #endif
80 
81 /*
82  * Define the interconnect type, to allow the driver to distinguish
83  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
84  *
85  * This is really for backward compatability. In the future, the driver
86  * should actually check the "interconnect-type" property as reported by
87  * the HBA; however at present this property is not defined by all HBAs,
88  * so we will use this #define (1) to permit the driver to run in
89  * backward-compatability mode; and (2) to print a notification message
90  * if an FC HBA does not support the "interconnect-type" property.  The
91  * behavior of the driver will be to assume parallel SCSI behaviors unless
92  * the "interconnect-type" property is defined by the HBA **AND** has a
93  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
94  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
95  * Channel behaviors (as per the old ssd).  (Note that the
96  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
97  * will result in the driver assuming parallel SCSI behaviors.)
98  *
99  * (see common/sys/scsi/impl/services.h)
100  *
101  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
102  * since some FC HBAs may already support that, and there is some code in
103  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
104  * default would confuse that code, and besides things should work fine
105  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
106  * "interconnect_type" property.
107  *
108  * Notes for off-by-1 workaround:
109  * -----------------------------
110  *
111  *    SCSI READ_CAPACITY command returns the LBA number of the
112  *    last logical block, but sd once treated this number as
113  *    disks' capacity on x86 platform. And LBAs are addressed
114  *    based 0. So the last block was lost on x86 platform.
115  *
116  *    Now, we remove this workaround. In order for present sd
117  *    driver to work with disks which are labeled/partitioned
118  *    via previous sd, we add workaround as follows:
119  *
120  *    1) Locate backup EFI label: sd searchs the next to last
121  *       block for backup EFI label if it can't find it on the
122  *       last block;
123  *    2) Calculate geometry: refer to sd_convert_geometry(), If
124  *       capacity increasing by 1 causes disks' capacity to cross
125  *       over the limits in table CHS_values, geometry info will
126  *       change. This will raise an issue: In case that primary
127  *       VTOC label is destroyed, format commandline can restore
128  *       it via backup VTOC labels. And format locates backup VTOC
129  *       labels by use of geometry from sd driver. So changing
130  *       geometry will prevent format from finding backup VTOC
131  *       labels. To eliminate this side effect for compatibility,
132  *       sd uses (capacity -1) to calculate geometry;
133  *    3) 1TB disks: VTOC uses 32-bit signed int, thus sd doesn't
134  *       support VTOC for a disk which has more than DK_MAX_BLOCKS
135  *       LBAs. However, for exactly 1TB disk, it was treated as
136  *       (1T - 512)B in the past, and could have VTOC. To overcome
137  *       this, if an exactly 1TB disk has solaris fdisk partition,
138  *       it will be allowed to work with sd.
139  */
140 #if (defined(__fibre))
141 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
142 #else
143 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
144 #endif
145 
146 /*
147  * The name of the driver, established from the module name in _init.
148  */
149 static	char *sd_label			= NULL;
150 
151 /*
152  * Driver name is unfortunately prefixed on some driver.conf properties.
153  */
154 #if (defined(__fibre))
155 #define	sd_max_xfer_size		ssd_max_xfer_size
156 #define	sd_config_list			ssd_config_list
157 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
158 static	char *sd_config_list		= "ssd-config-list";
159 #else
160 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
161 static	char *sd_config_list		= "sd-config-list";
162 #endif
163 
164 /*
165  * Driver global variables
166  */
167 
168 #if (defined(__fibre))
169 /*
170  * These #defines are to avoid namespace collisions that occur because this
171  * code is currently used to compile two seperate driver modules: sd and ssd.
172  * All global variables need to be treated this way (even if declared static)
173  * in order to allow the debugger to resolve the names properly.
174  * It is anticipated that in the near future the ssd module will be obsoleted,
175  * at which time this namespace issue should go away.
176  */
177 #define	sd_state			ssd_state
178 #define	sd_io_time			ssd_io_time
179 #define	sd_failfast_enable		ssd_failfast_enable
180 #define	sd_ua_retry_count		ssd_ua_retry_count
181 #define	sd_report_pfa			ssd_report_pfa
182 #define	sd_max_throttle			ssd_max_throttle
183 #define	sd_min_throttle			ssd_min_throttle
184 #define	sd_rot_delay			ssd_rot_delay
185 
186 #define	sd_retry_on_reservation_conflict	\
187 					ssd_retry_on_reservation_conflict
188 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
189 #define	sd_resv_conflict_name		ssd_resv_conflict_name
190 
191 #define	sd_component_mask		ssd_component_mask
192 #define	sd_level_mask			ssd_level_mask
193 #define	sd_debug_un			ssd_debug_un
194 #define	sd_error_level			ssd_error_level
195 
196 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
197 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
198 
199 #define	sd_tr				ssd_tr
200 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
201 #define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
202 #define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
203 #define	sd_check_media_time		ssd_check_media_time
204 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
205 #define	sd_label_mutex			ssd_label_mutex
206 #define	sd_detach_mutex			ssd_detach_mutex
207 #define	sd_log_buf			ssd_log_buf
208 #define	sd_log_mutex			ssd_log_mutex
209 
210 #define	sd_disk_table			ssd_disk_table
211 #define	sd_disk_table_size		ssd_disk_table_size
212 #define	sd_sense_mutex			ssd_sense_mutex
213 #define	sd_cdbtab			ssd_cdbtab
214 
215 #define	sd_cb_ops			ssd_cb_ops
216 #define	sd_ops				ssd_ops
217 #define	sd_additional_codes		ssd_additional_codes
218 
219 #define	sd_minor_data			ssd_minor_data
220 #define	sd_minor_data_efi		ssd_minor_data_efi
221 
222 #define	sd_tq				ssd_tq
223 #define	sd_wmr_tq			ssd_wmr_tq
224 #define	sd_taskq_name			ssd_taskq_name
225 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
226 #define	sd_taskq_minalloc		ssd_taskq_minalloc
227 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
228 
229 #define	sd_dump_format_string		ssd_dump_format_string
230 
231 #define	sd_iostart_chain		ssd_iostart_chain
232 #define	sd_iodone_chain			ssd_iodone_chain
233 
234 #define	sd_pm_idletime			ssd_pm_idletime
235 
236 #define	sd_force_pm_supported		ssd_force_pm_supported
237 
238 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
239 
240 #endif
241 
242 
243 #ifdef	SDDEBUG
244 int	sd_force_pm_supported		= 0;
245 #endif	/* SDDEBUG */
246 
247 void *sd_state				= NULL;
248 int sd_io_time				= SD_IO_TIME;
249 int sd_failfast_enable			= 1;
250 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
251 int sd_report_pfa			= 1;
252 int sd_max_throttle			= SD_MAX_THROTTLE;
253 int sd_min_throttle			= SD_MIN_THROTTLE;
254 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
255 int sd_qfull_throttle_enable		= TRUE;
256 
257 int sd_retry_on_reservation_conflict	= 1;
258 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
259 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
260 
261 static int sd_dtype_optical_bind	= -1;
262 
263 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
264 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
265 
266 /*
267  * Global data for debug logging. To enable debug printing, sd_component_mask
268  * and sd_level_mask should be set to the desired bit patterns as outlined in
269  * sddef.h.
270  */
271 uint_t	sd_component_mask		= 0x0;
272 uint_t	sd_level_mask			= 0x0;
273 struct	sd_lun *sd_debug_un		= NULL;
274 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
275 
276 /* Note: these may go away in the future... */
277 static uint32_t	sd_xbuf_active_limit	= 512;
278 static uint32_t sd_xbuf_reserve_limit	= 16;
279 
280 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
281 
282 /*
283  * Timer value used to reset the throttle after it has been reduced
284  * (typically in response to TRAN_BUSY or STATUS_QFULL)
285  */
286 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
287 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
288 
289 /*
290  * Interval value associated with the media change scsi watch.
291  */
292 static int sd_check_media_time		= 3000000;
293 
294 /*
295  * Wait value used for in progress operations during a DDI_SUSPEND
296  */
297 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
298 
299 /*
300  * sd_label_mutex protects a static buffer used in the disk label
301  * component of the driver
302  */
303 static kmutex_t sd_label_mutex;
304 
305 /*
306  * sd_detach_mutex protects un_layer_count, un_detach_count, and
307  * un_opens_in_progress in the sd_lun structure.
308  */
309 static kmutex_t sd_detach_mutex;
310 
311 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
312 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
313 
314 /*
315  * Global buffer and mutex for debug logging
316  */
317 static char	sd_log_buf[1024];
318 static kmutex_t	sd_log_mutex;
319 
320 
321 /*
322  * "Smart" Probe Caching structs, globals, #defines, etc.
323  * For parallel scsi and non-self-identify device only.
324  */
325 
326 /*
327  * The following resources and routines are implemented to support
328  * "smart" probing, which caches the scsi_probe() results in an array,
329  * in order to help avoid long probe times.
330  */
331 struct sd_scsi_probe_cache {
332 	struct	sd_scsi_probe_cache	*next;
333 	dev_info_t	*pdip;
334 	int		cache[NTARGETS_WIDE];
335 };
336 
337 static kmutex_t	sd_scsi_probe_cache_mutex;
338 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
339 
340 /*
341  * Really we only need protection on the head of the linked list, but
342  * better safe than sorry.
343  */
344 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
345     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
346 
347 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
348     sd_scsi_probe_cache_head))
349 
350 
351 /*
352  * Vendor specific data name property declarations
353  */
354 
355 #if defined(__fibre) || defined(__i386) ||defined(__amd64)
356 
357 static sd_tunables seagate_properties = {
358 	SEAGATE_THROTTLE_VALUE,
359 	0,
360 	0,
361 	0,
362 	0,
363 	0,
364 	0,
365 	0,
366 	0
367 };
368 
369 
370 static sd_tunables fujitsu_properties = {
371 	FUJITSU_THROTTLE_VALUE,
372 	0,
373 	0,
374 	0,
375 	0,
376 	0,
377 	0,
378 	0,
379 	0
380 };
381 
382 static sd_tunables ibm_properties = {
383 	IBM_THROTTLE_VALUE,
384 	0,
385 	0,
386 	0,
387 	0,
388 	0,
389 	0,
390 	0,
391 	0
392 };
393 
394 static sd_tunables purple_properties = {
395 	PURPLE_THROTTLE_VALUE,
396 	0,
397 	0,
398 	PURPLE_BUSY_RETRIES,
399 	PURPLE_RESET_RETRY_COUNT,
400 	PURPLE_RESERVE_RELEASE_TIME,
401 	0,
402 	0,
403 	0
404 };
405 
406 static sd_tunables sve_properties = {
407 	SVE_THROTTLE_VALUE,
408 	0,
409 	0,
410 	SVE_BUSY_RETRIES,
411 	SVE_RESET_RETRY_COUNT,
412 	SVE_RESERVE_RELEASE_TIME,
413 	SVE_MIN_THROTTLE_VALUE,
414 	SVE_DISKSORT_DISABLED_FLAG,
415 	0
416 };
417 
418 static sd_tunables maserati_properties = {
419 	0,
420 	0,
421 	0,
422 	0,
423 	0,
424 	0,
425 	0,
426 	MASERATI_DISKSORT_DISABLED_FLAG,
427 	MASERATI_LUN_RESET_ENABLED_FLAG
428 };
429 
430 static sd_tunables pirus_properties = {
431 	PIRUS_THROTTLE_VALUE,
432 	0,
433 	PIRUS_NRR_COUNT,
434 	PIRUS_BUSY_RETRIES,
435 	PIRUS_RESET_RETRY_COUNT,
436 	0,
437 	PIRUS_MIN_THROTTLE_VALUE,
438 	PIRUS_DISKSORT_DISABLED_FLAG,
439 	PIRUS_LUN_RESET_ENABLED_FLAG
440 };
441 
442 #endif
443 
444 #if (defined(__sparc) && !defined(__fibre)) || \
445 	(defined(__i386) || defined(__amd64))
446 
447 
448 static sd_tunables elite_properties = {
449 	ELITE_THROTTLE_VALUE,
450 	0,
451 	0,
452 	0,
453 	0,
454 	0,
455 	0,
456 	0,
457 	0
458 };
459 
460 static sd_tunables st31200n_properties = {
461 	ST31200N_THROTTLE_VALUE,
462 	0,
463 	0,
464 	0,
465 	0,
466 	0,
467 	0,
468 	0,
469 	0
470 };
471 
472 #endif /* Fibre or not */
473 
474 static sd_tunables lsi_properties_scsi = {
475 	LSI_THROTTLE_VALUE,
476 	0,
477 	LSI_NOTREADY_RETRIES,
478 	0,
479 	0,
480 	0,
481 	0,
482 	0,
483 	0
484 };
485 
486 static sd_tunables symbios_properties = {
487 	SYMBIOS_THROTTLE_VALUE,
488 	0,
489 	SYMBIOS_NOTREADY_RETRIES,
490 	0,
491 	0,
492 	0,
493 	0,
494 	0,
495 	0
496 };
497 
498 static sd_tunables lsi_properties = {
499 	0,
500 	0,
501 	LSI_NOTREADY_RETRIES,
502 	0,
503 	0,
504 	0,
505 	0,
506 	0,
507 	0
508 };
509 
510 static sd_tunables lsi_oem_properties = {
511 	0,
512 	0,
513 	LSI_OEM_NOTREADY_RETRIES,
514 	0,
515 	0,
516 	0,
517 	0,
518 	0,
519 	0
520 };
521 
522 
523 
524 #if (defined(SD_PROP_TST))
525 
526 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
527 #define	SD_TST_THROTTLE_VAL	16
528 #define	SD_TST_NOTREADY_VAL	12
529 #define	SD_TST_BUSY_VAL		60
530 #define	SD_TST_RST_RETRY_VAL	36
531 #define	SD_TST_RSV_REL_TIME	60
532 
533 static sd_tunables tst_properties = {
534 	SD_TST_THROTTLE_VAL,
535 	SD_TST_CTYPE_VAL,
536 	SD_TST_NOTREADY_VAL,
537 	SD_TST_BUSY_VAL,
538 	SD_TST_RST_RETRY_VAL,
539 	SD_TST_RSV_REL_TIME,
540 	0,
541 	0,
542 	0
543 };
544 #endif
545 
546 /* This is similiar to the ANSI toupper implementation */
547 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
548 
549 /*
550  * Static Driver Configuration Table
551  *
552  * This is the table of disks which need throttle adjustment (or, perhaps
553  * something else as defined by the flags at a future time.)  device_id
554  * is a string consisting of concatenated vid (vendor), pid (product/model)
555  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
556  * the parts of the string are as defined by the sizes in the scsi_inquiry
557  * structure.  Device type is searched as far as the device_id string is
558  * defined.  Flags defines which values are to be set in the driver from the
559  * properties list.
560  *
561  * Entries below which begin and end with a "*" are a special case.
562  * These do not have a specific vendor, and the string which follows
563  * can appear anywhere in the 16 byte PID portion of the inquiry data.
564  *
565  * Entries below which begin and end with a " " (blank) are a special
566  * case. The comparison function will treat multiple consecutive blanks
567  * as equivalent to a single blank. For example, this causes a
568  * sd_disk_table entry of " NEC CDROM " to match a device's id string
569  * of  "NEC       CDROM".
570  *
571  * Note: The MD21 controller type has been obsoleted.
572  *	 ST318202F is a Legacy device
573  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
574  *	 made with an FC connection. The entries here are a legacy.
575  */
576 static sd_disk_config_t sd_disk_table[] = {
577 #if defined(__fibre) || defined(__i386) || defined(__amd64)
578 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
579 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
580 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
581 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
582 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
583 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
584 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
585 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
586 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
587 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
588 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
589 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
590 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
591 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
592 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
593 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
594 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
595 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
596 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
597 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
598 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
599 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
600 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
601 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
602 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
603 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
604 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
605 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
606 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
607 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
608 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
609 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
610 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
611 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
612 	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
613 	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
614 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
615 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
616 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
617 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
618 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
619 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
620 	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
621 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
622 	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
623 			SD_CONF_BSET_BSY_RETRY_COUNT|
624 			SD_CONF_BSET_RST_RETRIES|
625 			SD_CONF_BSET_RSV_REL_TIME,
626 		&purple_properties },
627 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
628 		SD_CONF_BSET_BSY_RETRY_COUNT|
629 		SD_CONF_BSET_RST_RETRIES|
630 		SD_CONF_BSET_RSV_REL_TIME|
631 		SD_CONF_BSET_MIN_THROTTLE|
632 		SD_CONF_BSET_DISKSORT_DISABLED,
633 		&sve_properties },
634 	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
635 			SD_CONF_BSET_BSY_RETRY_COUNT|
636 			SD_CONF_BSET_RST_RETRIES|
637 			SD_CONF_BSET_RSV_REL_TIME,
638 		&purple_properties },
639 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
640 		SD_CONF_BSET_LUN_RESET_ENABLED,
641 		&maserati_properties },
642 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
643 		SD_CONF_BSET_NRR_COUNT|
644 		SD_CONF_BSET_BSY_RETRY_COUNT|
645 		SD_CONF_BSET_RST_RETRIES|
646 		SD_CONF_BSET_MIN_THROTTLE|
647 		SD_CONF_BSET_DISKSORT_DISABLED|
648 		SD_CONF_BSET_LUN_RESET_ENABLED,
649 		&pirus_properties },
650 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
651 		SD_CONF_BSET_NRR_COUNT|
652 		SD_CONF_BSET_BSY_RETRY_COUNT|
653 		SD_CONF_BSET_RST_RETRIES|
654 		SD_CONF_BSET_MIN_THROTTLE|
655 		SD_CONF_BSET_DISKSORT_DISABLED|
656 		SD_CONF_BSET_LUN_RESET_ENABLED,
657 		&pirus_properties },
658 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
659 		SD_CONF_BSET_NRR_COUNT|
660 		SD_CONF_BSET_BSY_RETRY_COUNT|
661 		SD_CONF_BSET_RST_RETRIES|
662 		SD_CONF_BSET_MIN_THROTTLE|
663 		SD_CONF_BSET_DISKSORT_DISABLED|
664 		SD_CONF_BSET_LUN_RESET_ENABLED,
665 		&pirus_properties },
666 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
667 		SD_CONF_BSET_NRR_COUNT|
668 		SD_CONF_BSET_BSY_RETRY_COUNT|
669 		SD_CONF_BSET_RST_RETRIES|
670 		SD_CONF_BSET_MIN_THROTTLE|
671 		SD_CONF_BSET_DISKSORT_DISABLED|
672 		SD_CONF_BSET_LUN_RESET_ENABLED,
673 		&pirus_properties },
674 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
675 		SD_CONF_BSET_NRR_COUNT|
676 		SD_CONF_BSET_BSY_RETRY_COUNT|
677 		SD_CONF_BSET_RST_RETRIES|
678 		SD_CONF_BSET_MIN_THROTTLE|
679 		SD_CONF_BSET_DISKSORT_DISABLED|
680 		SD_CONF_BSET_LUN_RESET_ENABLED,
681 		&pirus_properties },
682 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
683 		SD_CONF_BSET_NRR_COUNT|
684 		SD_CONF_BSET_BSY_RETRY_COUNT|
685 		SD_CONF_BSET_RST_RETRIES|
686 		SD_CONF_BSET_MIN_THROTTLE|
687 		SD_CONF_BSET_DISKSORT_DISABLED|
688 		SD_CONF_BSET_LUN_RESET_ENABLED,
689 		&pirus_properties },
690 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
691 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
692 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
693 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
694 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
695 #endif /* fibre or NON-sparc platforms */
696 #if ((defined(__sparc) && !defined(__fibre)) ||\
697 	(defined(__i386) || defined(__amd64)))
698 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
699 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
700 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
701 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
702 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
703 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
704 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
705 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
706 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
707 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
708 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
709 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
710 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
711 	    &symbios_properties },
712 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
713 	    &lsi_properties_scsi },
714 #if defined(__i386) || defined(__amd64)
715 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
716 				    | SD_CONF_BSET_READSUB_BCD
717 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
718 				    | SD_CONF_BSET_NO_READ_HEADER
719 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
720 
721 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
722 				    | SD_CONF_BSET_READSUB_BCD
723 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
724 				    | SD_CONF_BSET_NO_READ_HEADER
725 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
726 #endif /* __i386 || __amd64 */
727 #endif /* sparc NON-fibre or NON-sparc platforms */
728 
729 #if (defined(SD_PROP_TST))
730 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
731 				| SD_CONF_BSET_CTYPE
732 				| SD_CONF_BSET_NRR_COUNT
733 				| SD_CONF_BSET_FAB_DEVID
734 				| SD_CONF_BSET_NOCACHE
735 				| SD_CONF_BSET_BSY_RETRY_COUNT
736 				| SD_CONF_BSET_PLAYMSF_BCD
737 				| SD_CONF_BSET_READSUB_BCD
738 				| SD_CONF_BSET_READ_TOC_TRK_BCD
739 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
740 				| SD_CONF_BSET_NO_READ_HEADER
741 				| SD_CONF_BSET_READ_CD_XD4
742 				| SD_CONF_BSET_RST_RETRIES
743 				| SD_CONF_BSET_RSV_REL_TIME
744 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
745 #endif
746 };
747 
748 static const int sd_disk_table_size =
749 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
750 
751 
752 /*
753  * Return codes of sd_uselabel().
754  */
755 #define	SD_LABEL_IS_VALID		0
756 #define	SD_LABEL_IS_INVALID		1
757 
758 #define	SD_INTERCONNECT_PARALLEL	0
759 #define	SD_INTERCONNECT_FABRIC		1
760 #define	SD_INTERCONNECT_FIBRE		2
761 #define	SD_INTERCONNECT_SSA		3
762 #define	SD_INTERCONNECT_SATA		4
763 #define	SD_IS_PARALLEL_SCSI(un)		\
764 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
765 #define	SD_IS_SERIAL(un)		\
766 	((un)->un_interconnect_type == SD_INTERCONNECT_SATA)
767 
768 /*
769  * Definitions used by device id registration routines
770  */
771 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
772 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
773 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
774 #define	WD_NODE			7	/* the whole disk minor */
775 
776 static kmutex_t sd_sense_mutex = {0};
777 
778 /*
779  * Macros for updates of the driver state
780  */
781 #define	New_state(un, s)        \
782 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
783 #define	Restore_state(un)	\
784 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
785 
786 static struct sd_cdbinfo sd_cdbtab[] = {
787 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
788 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
789 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
790 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
791 };
792 
793 /*
794  * Specifies the number of seconds that must have elapsed since the last
795  * cmd. has completed for a device to be declared idle to the PM framework.
796  */
797 static int sd_pm_idletime = 1;
798 
799 /*
800  * Internal function prototypes
801  */
802 
803 #if (defined(__fibre))
804 /*
805  * These #defines are to avoid namespace collisions that occur because this
806  * code is currently used to compile two seperate driver modules: sd and ssd.
807  * All function names need to be treated this way (even if declared static)
808  * in order to allow the debugger to resolve the names properly.
809  * It is anticipated that in the near future the ssd module will be obsoleted,
810  * at which time this ugliness should go away.
811  */
812 #define	sd_log_trace			ssd_log_trace
813 #define	sd_log_info			ssd_log_info
814 #define	sd_log_err			ssd_log_err
815 #define	sdprobe				ssdprobe
816 #define	sdinfo				ssdinfo
817 #define	sd_prop_op			ssd_prop_op
818 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
819 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
820 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
821 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
822 #define	sd_spin_up_unit			ssd_spin_up_unit
823 #define	sd_enable_descr_sense		ssd_enable_descr_sense
824 #define	sd_reenable_dsense_task		ssd_reenable_dsense_task
825 #define	sd_set_mmc_caps			ssd_set_mmc_caps
826 #define	sd_read_unit_properties		ssd_read_unit_properties
827 #define	sd_process_sdconf_file		ssd_process_sdconf_file
828 #define	sd_process_sdconf_table		ssd_process_sdconf_table
829 #define	sd_sdconf_id_match		ssd_sdconf_id_match
830 #define	sd_blank_cmp			ssd_blank_cmp
831 #define	sd_chk_vers1_data		ssd_chk_vers1_data
832 #define	sd_set_vers1_properties		ssd_set_vers1_properties
833 #define	sd_validate_geometry		ssd_validate_geometry
834 
835 #if defined(_SUNOS_VTOC_16)
836 #define	sd_convert_geometry		ssd_convert_geometry
837 #endif
838 
839 #define	sd_resync_geom_caches		ssd_resync_geom_caches
840 #define	sd_read_fdisk			ssd_read_fdisk
841 #define	sd_get_physical_geometry	ssd_get_physical_geometry
842 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
843 #define	sd_update_block_info		ssd_update_block_info
844 #define	sd_swap_efi_gpt			ssd_swap_efi_gpt
845 #define	sd_swap_efi_gpe			ssd_swap_efi_gpe
846 #define	sd_validate_efi			ssd_validate_efi
847 #define	sd_use_efi			ssd_use_efi
848 #define	sd_uselabel			ssd_uselabel
849 #define	sd_build_default_label		ssd_build_default_label
850 #define	sd_has_max_chs_vals		ssd_has_max_chs_vals
851 #define	sd_inq_fill			ssd_inq_fill
852 #define	sd_register_devid		ssd_register_devid
853 #define	sd_get_devid_block		ssd_get_devid_block
854 #define	sd_get_devid			ssd_get_devid
855 #define	sd_create_devid			ssd_create_devid
856 #define	sd_write_deviceid		ssd_write_deviceid
857 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
858 #define	sd_setup_pm			ssd_setup_pm
859 #define	sd_create_pm_components		ssd_create_pm_components
860 #define	sd_ddi_suspend			ssd_ddi_suspend
861 #define	sd_ddi_pm_suspend		ssd_ddi_pm_suspend
862 #define	sd_ddi_resume			ssd_ddi_resume
863 #define	sd_ddi_pm_resume		ssd_ddi_pm_resume
864 #define	sdpower				ssdpower
865 #define	sdattach			ssdattach
866 #define	sddetach			ssddetach
867 #define	sd_unit_attach			ssd_unit_attach
868 #define	sd_unit_detach			ssd_unit_detach
869 #define	sd_set_unit_attributes		ssd_set_unit_attributes
870 #define	sd_create_minor_nodes		ssd_create_minor_nodes
871 #define	sd_create_errstats		ssd_create_errstats
872 #define	sd_set_errstats			ssd_set_errstats
873 #define	sd_set_pstats			ssd_set_pstats
874 #define	sddump				ssddump
875 #define	sd_scsi_poll			ssd_scsi_poll
876 #define	sd_send_polled_RQS		ssd_send_polled_RQS
877 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
878 #define	sd_init_event_callbacks		ssd_init_event_callbacks
879 #define	sd_event_callback		ssd_event_callback
880 #define	sd_cache_control		ssd_cache_control
881 #define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
882 #define	sd_make_device			ssd_make_device
883 #define	sdopen				ssdopen
884 #define	sdclose				ssdclose
885 #define	sd_ready_and_valid		ssd_ready_and_valid
886 #define	sdmin				ssdmin
887 #define	sdread				ssdread
888 #define	sdwrite				ssdwrite
889 #define	sdaread				ssdaread
890 #define	sdawrite			ssdawrite
891 #define	sdstrategy			ssdstrategy
892 #define	sdioctl				ssdioctl
893 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
894 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
895 #define	sd_checksum_iostart		ssd_checksum_iostart
896 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
897 #define	sd_pm_iostart			ssd_pm_iostart
898 #define	sd_core_iostart			ssd_core_iostart
899 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
900 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
901 #define	sd_checksum_iodone		ssd_checksum_iodone
902 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
903 #define	sd_pm_iodone			ssd_pm_iodone
904 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
905 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
906 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
907 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
908 #define	sd_buf_iodone			ssd_buf_iodone
909 #define	sd_uscsi_strategy		ssd_uscsi_strategy
910 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
911 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
912 #define	sd_uscsi_iodone			ssd_uscsi_iodone
913 #define	sd_xbuf_strategy		ssd_xbuf_strategy
914 #define	sd_xbuf_init			ssd_xbuf_init
915 #define	sd_pm_entry			ssd_pm_entry
916 #define	sd_pm_exit			ssd_pm_exit
917 
918 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
919 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
920 
921 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
922 #define	sdintr				ssdintr
923 #define	sd_start_cmds			ssd_start_cmds
924 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
925 #define	sd_bioclone_alloc		ssd_bioclone_alloc
926 #define	sd_bioclone_free		ssd_bioclone_free
927 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
928 #define	sd_shadow_buf_free		ssd_shadow_buf_free
929 #define	sd_print_transport_rejected_message	\
930 					ssd_print_transport_rejected_message
931 #define	sd_retry_command		ssd_retry_command
932 #define	sd_set_retry_bp			ssd_set_retry_bp
933 #define	sd_send_request_sense_command	ssd_send_request_sense_command
934 #define	sd_start_retry_command		ssd_start_retry_command
935 #define	sd_start_direct_priority_command	\
936 					ssd_start_direct_priority_command
937 #define	sd_return_failed_command	ssd_return_failed_command
938 #define	sd_return_failed_command_no_restart	\
939 					ssd_return_failed_command_no_restart
940 #define	sd_return_command		ssd_return_command
941 #define	sd_sync_with_callback		ssd_sync_with_callback
942 #define	sdrunout			ssdrunout
943 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
944 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
945 #define	sd_reduce_throttle		ssd_reduce_throttle
946 #define	sd_restore_throttle		ssd_restore_throttle
947 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
948 #define	sd_init_cdb_limits		ssd_init_cdb_limits
949 #define	sd_pkt_status_good		ssd_pkt_status_good
950 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
951 #define	sd_pkt_status_busy		ssd_pkt_status_busy
952 #define	sd_pkt_status_reservation_conflict	\
953 					ssd_pkt_status_reservation_conflict
954 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
955 #define	sd_handle_request_sense		ssd_handle_request_sense
956 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
957 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
958 #define	sd_validate_sense_data		ssd_validate_sense_data
959 #define	sd_decode_sense			ssd_decode_sense
960 #define	sd_print_sense_msg		ssd_print_sense_msg
961 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
962 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
963 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
964 #define	sd_sense_key_medium_or_hardware_error	\
965 					ssd_sense_key_medium_or_hardware_error
966 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
967 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
968 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
969 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
970 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
971 #define	sd_sense_key_default		ssd_sense_key_default
972 #define	sd_print_retry_msg		ssd_print_retry_msg
973 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
974 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
975 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
976 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
977 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
978 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
979 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
980 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
981 #define	sd_pkt_reason_default		ssd_pkt_reason_default
982 #define	sd_reset_target			ssd_reset_target
983 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
984 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
985 #define	sd_taskq_create			ssd_taskq_create
986 #define	sd_taskq_delete			ssd_taskq_delete
987 #define	sd_media_change_task		ssd_media_change_task
988 #define	sd_handle_mchange		ssd_handle_mchange
989 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
990 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
991 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
992 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
993 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
994 					sd_send_scsi_feature_GET_CONFIGURATION
995 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
996 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
997 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
998 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
999 					ssd_send_scsi_PERSISTENT_RESERVE_IN
1000 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
1001 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
1002 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
1003 #define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
1004 					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
1005 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
1006 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
1007 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
1008 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
1009 #define	sd_alloc_rqs			ssd_alloc_rqs
1010 #define	sd_free_rqs			ssd_free_rqs
1011 #define	sd_dump_memory			ssd_dump_memory
1012 #define	sd_uscsi_ioctl			ssd_uscsi_ioctl
1013 #define	sd_get_media_info		ssd_get_media_info
1014 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
1015 #define	sd_dkio_get_geometry		ssd_dkio_get_geometry
1016 #define	sd_dkio_set_geometry		ssd_dkio_set_geometry
1017 #define	sd_dkio_get_partition		ssd_dkio_get_partition
1018 #define	sd_dkio_set_partition		ssd_dkio_set_partition
1019 #define	sd_dkio_partition		ssd_dkio_partition
1020 #define	sd_dkio_get_vtoc		ssd_dkio_get_vtoc
1021 #define	sd_dkio_get_efi			ssd_dkio_get_efi
1022 #define	sd_build_user_vtoc		ssd_build_user_vtoc
1023 #define	sd_dkio_set_vtoc		ssd_dkio_set_vtoc
1024 #define	sd_dkio_set_efi			ssd_dkio_set_efi
1025 #define	sd_build_label_vtoc		ssd_build_label_vtoc
1026 #define	sd_write_label			ssd_write_label
1027 #define	sd_clear_vtoc			ssd_clear_vtoc
1028 #define	sd_clear_efi			ssd_clear_efi
1029 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
1030 #define	sd_setup_next_xfer		ssd_setup_next_xfer
1031 #define	sd_dkio_get_temp		ssd_dkio_get_temp
1032 #define	sd_dkio_get_mboot		ssd_dkio_get_mboot
1033 #define	sd_dkio_set_mboot		ssd_dkio_set_mboot
1034 #define	sd_setup_default_geometry	ssd_setup_default_geometry
1035 #define	sd_update_fdisk_and_vtoc	ssd_update_fdisk_and_vtoc
1036 #define	sd_check_mhd			ssd_check_mhd
1037 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
1038 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1039 #define	sd_sname			ssd_sname
1040 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1041 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1042 #define	sd_take_ownership		ssd_take_ownership
1043 #define	sd_reserve_release		ssd_reserve_release
1044 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1045 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1046 #define	sd_persistent_reservation_in_read_keys	\
1047 					ssd_persistent_reservation_in_read_keys
1048 #define	sd_persistent_reservation_in_read_resv	\
1049 					ssd_persistent_reservation_in_read_resv
1050 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1051 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1052 #define	sd_mhdioc_release		ssd_mhdioc_release
1053 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1054 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1055 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1056 #define	sr_change_blkmode		ssr_change_blkmode
1057 #define	sr_change_speed			ssr_change_speed
1058 #define	sr_atapi_change_speed		ssr_atapi_change_speed
1059 #define	sr_pause_resume			ssr_pause_resume
1060 #define	sr_play_msf			ssr_play_msf
1061 #define	sr_play_trkind			ssr_play_trkind
1062 #define	sr_read_all_subcodes		ssr_read_all_subcodes
1063 #define	sr_read_subchannel		ssr_read_subchannel
1064 #define	sr_read_tocentry		ssr_read_tocentry
1065 #define	sr_read_tochdr			ssr_read_tochdr
1066 #define	sr_read_cdda			ssr_read_cdda
1067 #define	sr_read_cdxa			ssr_read_cdxa
1068 #define	sr_read_mode1			ssr_read_mode1
1069 #define	sr_read_mode2			ssr_read_mode2
1070 #define	sr_read_cd_mode2		ssr_read_cd_mode2
1071 #define	sr_sector_mode			ssr_sector_mode
1072 #define	sr_eject			ssr_eject
1073 #define	sr_ejected			ssr_ejected
1074 #define	sr_check_wp			ssr_check_wp
1075 #define	sd_check_media			ssd_check_media
1076 #define	sd_media_watch_cb		ssd_media_watch_cb
1077 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1078 #define	sr_volume_ctrl			ssr_volume_ctrl
1079 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1080 #define	sd_log_page_supported		ssd_log_page_supported
1081 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1082 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1083 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1084 #define	sd_range_lock			ssd_range_lock
1085 #define	sd_get_range			ssd_get_range
1086 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1087 #define	sd_range_unlock			ssd_range_unlock
1088 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1089 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1090 
1091 #define	sd_iostart_chain		ssd_iostart_chain
1092 #define	sd_iodone_chain			ssd_iodone_chain
1093 #define	sd_initpkt_map			ssd_initpkt_map
1094 #define	sd_destroypkt_map		ssd_destroypkt_map
1095 #define	sd_chain_type_map		ssd_chain_type_map
1096 #define	sd_chain_index_map		ssd_chain_index_map
1097 
1098 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1099 #define	sd_failfast_flushq		ssd_failfast_flushq
1100 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1101 
1102 #define	sd_is_lsi			ssd_is_lsi
1103 
1104 #endif	/* #if (defined(__fibre)) */
1105 
1106 
1107 int _init(void);
1108 int _fini(void);
1109 int _info(struct modinfo *modinfop);
1110 
1111 /*PRINTFLIKE3*/
1112 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1113 /*PRINTFLIKE3*/
1114 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1115 /*PRINTFLIKE3*/
1116 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1117 
1118 static int sdprobe(dev_info_t *devi);
1119 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1120     void **result);
1121 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1122     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1123 
1124 /*
1125  * Smart probe for parallel scsi
1126  */
1127 static void sd_scsi_probe_cache_init(void);
1128 static void sd_scsi_probe_cache_fini(void);
1129 static void sd_scsi_clear_probe_cache(void);
1130 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1131 
1132 static int	sd_spin_up_unit(struct sd_lun *un);
1133 #ifdef _LP64
1134 static void	sd_enable_descr_sense(struct sd_lun *un);
1135 static void	sd_reenable_dsense_task(void *arg);
1136 #endif /* _LP64 */
1137 
1138 static void	sd_set_mmc_caps(struct sd_lun *un);
1139 
1140 static void sd_read_unit_properties(struct sd_lun *un);
1141 static int  sd_process_sdconf_file(struct sd_lun *un);
1142 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1143     int *data_list, sd_tunables *values);
1144 static void sd_process_sdconf_table(struct sd_lun *un);
1145 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1146 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1147 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1148 	int list_len, char *dataname_ptr);
1149 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1150     sd_tunables *prop_list);
1151 static int  sd_validate_geometry(struct sd_lun *un, int path_flag);
1152 
1153 #if defined(_SUNOS_VTOC_16)
1154 static void sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g);
1155 #endif
1156 
1157 static void sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
1158 	int path_flag);
1159 static int  sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize,
1160 	int path_flag);
1161 static void sd_get_physical_geometry(struct sd_lun *un,
1162 	struct geom_cache *pgeom_p, int capacity, int lbasize, int path_flag);
1163 static void sd_get_virtual_geometry(struct sd_lun *un, int capacity,
1164 	int lbasize);
1165 static int  sd_uselabel(struct sd_lun *un, struct dk_label *l, int path_flag);
1166 static void sd_swap_efi_gpt(efi_gpt_t *);
1167 static void sd_swap_efi_gpe(int nparts, efi_gpe_t *);
1168 static int sd_validate_efi(efi_gpt_t *);
1169 static int sd_use_efi(struct sd_lun *, int);
1170 static void sd_build_default_label(struct sd_lun *un);
1171 
1172 #if defined(_FIRMWARE_NEEDS_FDISK)
1173 static int  sd_has_max_chs_vals(struct ipart *fdp);
1174 #endif
1175 static void sd_inq_fill(char *p, int l, char *s);
1176 
1177 
1178 static void sd_register_devid(struct sd_lun *un, dev_info_t *devi,
1179     int reservation_flag);
1180 static daddr_t  sd_get_devid_block(struct sd_lun *un);
1181 static int  sd_get_devid(struct sd_lun *un);
1182 static int  sd_get_serialnum(struct sd_lun *un, uchar_t *wwn, int *len);
1183 static ddi_devid_t sd_create_devid(struct sd_lun *un);
1184 static int  sd_write_deviceid(struct sd_lun *un);
1185 static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1186 static int  sd_check_vpd_page_support(struct sd_lun *un);
1187 
1188 static void sd_setup_pm(struct sd_lun *un, dev_info_t *devi);
1189 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1190 
1191 static int  sd_ddi_suspend(dev_info_t *devi);
1192 static int  sd_ddi_pm_suspend(struct sd_lun *un);
1193 static int  sd_ddi_resume(dev_info_t *devi);
1194 static int  sd_ddi_pm_resume(struct sd_lun *un);
1195 static int  sdpower(dev_info_t *devi, int component, int level);
1196 
1197 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1198 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1199 static int  sd_unit_attach(dev_info_t *devi);
1200 static int  sd_unit_detach(dev_info_t *devi);
1201 
1202 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1203 static int  sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi);
1204 static void sd_create_errstats(struct sd_lun *un, int instance);
1205 static void sd_set_errstats(struct sd_lun *un);
1206 static void sd_set_pstats(struct sd_lun *un);
1207 
1208 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1209 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1210 static int  sd_send_polled_RQS(struct sd_lun *un);
1211 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1212 
1213 #if (defined(__fibre))
1214 /*
1215  * Event callbacks (photon)
1216  */
1217 static void sd_init_event_callbacks(struct sd_lun *un);
1218 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1219 #endif
1220 
1221 /*
1222  * Defines for sd_cache_control
1223  */
1224 
1225 #define	SD_CACHE_ENABLE		1
1226 #define	SD_CACHE_DISABLE	0
1227 #define	SD_CACHE_NOCHANGE	-1
1228 
1229 static int   sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag);
1230 static int   sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled);
1231 static dev_t sd_make_device(dev_info_t *devi);
1232 
1233 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1234 	uint64_t capacity);
1235 
1236 /*
1237  * Driver entry point functions.
1238  */
1239 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1240 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1241 static int  sd_ready_and_valid(struct sd_lun *un);
1242 
1243 static void sdmin(struct buf *bp);
1244 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1245 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1246 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1247 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1248 
1249 static int sdstrategy(struct buf *bp);
1250 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1251 
1252 /*
1253  * Function prototypes for layering functions in the iostart chain.
1254  */
1255 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1256 	struct buf *bp);
1257 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1258 	struct buf *bp);
1259 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1260 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1261 	struct buf *bp);
1262 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1263 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1264 
1265 /*
1266  * Function prototypes for layering functions in the iodone chain.
1267  */
1268 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1269 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1270 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1271 	struct buf *bp);
1272 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1273 	struct buf *bp);
1274 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1275 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1276 	struct buf *bp);
1277 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1278 
1279 /*
1280  * Prototypes for functions to support buf(9S) based IO.
1281  */
1282 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1283 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1284 static void sd_destroypkt_for_buf(struct buf *);
1285 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1286 	struct buf *bp, int flags,
1287 	int (*callback)(caddr_t), caddr_t callback_arg,
1288 	diskaddr_t lba, uint32_t blockcount);
1289 #if defined(__i386) || defined(__amd64)
1290 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1291 	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1292 #endif /* defined(__i386) || defined(__amd64) */
1293 
1294 /*
1295  * Prototypes for functions to support USCSI IO.
1296  */
1297 static int sd_uscsi_strategy(struct buf *bp);
1298 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1299 static void sd_destroypkt_for_uscsi(struct buf *);
1300 
1301 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1302 	uchar_t chain_type, void *pktinfop);
1303 
1304 static int  sd_pm_entry(struct sd_lun *un);
1305 static void sd_pm_exit(struct sd_lun *un);
1306 
1307 static void sd_pm_idletimeout_handler(void *arg);
1308 
1309 /*
1310  * sd_core internal functions (used at the sd_core_io layer).
1311  */
1312 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1313 static void sdintr(struct scsi_pkt *pktp);
1314 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1315 
1316 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
1317 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
1318 	int path_flag);
1319 
1320 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1321 	daddr_t blkno, int (*func)(struct buf *));
1322 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1323 	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1324 static void sd_bioclone_free(struct buf *bp);
1325 static void sd_shadow_buf_free(struct buf *bp);
1326 
1327 static void sd_print_transport_rejected_message(struct sd_lun *un,
1328 	struct sd_xbuf *xp, int code);
1329 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1330     void *arg, int code);
1331 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1332     void *arg, int code);
1333 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1334     void *arg, int code);
1335 
1336 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1337 	int retry_check_flag,
1338 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1339 		int c),
1340 	void *user_arg, int failure_code,  clock_t retry_delay,
1341 	void (*statp)(kstat_io_t *));
1342 
1343 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1344 	clock_t retry_delay, void (*statp)(kstat_io_t *));
1345 
1346 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1347 	struct scsi_pkt *pktp);
1348 static void sd_start_retry_command(void *arg);
1349 static void sd_start_direct_priority_command(void *arg);
1350 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1351 	int errcode);
1352 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1353 	struct buf *bp, int errcode);
1354 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1355 static void sd_sync_with_callback(struct sd_lun *un);
1356 static int sdrunout(caddr_t arg);
1357 
1358 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1359 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1360 
1361 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1362 static void sd_restore_throttle(void *arg);
1363 
1364 static void sd_init_cdb_limits(struct sd_lun *un);
1365 
1366 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1367 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1368 
1369 /*
1370  * Error handling functions
1371  */
1372 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1373 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1374 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1375 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1376 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1377 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1378 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1379 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1380 
1381 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1382 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1383 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1384 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1385 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1386 	struct sd_xbuf *xp);
1387 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1388 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1389 
1390 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1391 	void *arg, int code);
1392 
1393 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1394 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1395 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1396 	uint8_t *sense_datap,
1397 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1398 static void sd_sense_key_not_ready(struct sd_lun *un,
1399 	uint8_t *sense_datap,
1400 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1401 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1402 	uint8_t *sense_datap,
1403 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1404 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1405 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1406 static void sd_sense_key_unit_attention(struct sd_lun *un,
1407 	uint8_t *sense_datap,
1408 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1409 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1410 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1411 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1412 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1413 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1414 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1415 static void sd_sense_key_default(struct sd_lun *un,
1416 	uint8_t *sense_datap,
1417 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1418 
1419 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1420 	void *arg, int flag);
1421 
1422 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1423 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1424 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1425 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1426 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1427 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1428 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1429 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1430 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1431 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1432 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1433 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1434 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1435 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1436 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1437 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1438 
1439 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1440 
1441 static void sd_start_stop_unit_callback(void *arg);
1442 static void sd_start_stop_unit_task(void *arg);
1443 
1444 static void sd_taskq_create(void);
1445 static void sd_taskq_delete(void);
1446 static void sd_media_change_task(void *arg);
1447 
1448 static int sd_handle_mchange(struct sd_lun *un);
1449 static int sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag);
1450 static int sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp,
1451 	uint32_t *lbap, int path_flag);
1452 static int sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
1453 	uint32_t *lbap, int path_flag);
1454 static int sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag,
1455 	int path_flag);
1456 static int sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr,
1457 	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1458 static int sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag);
1459 static int sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un,
1460 	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1461 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un,
1462 	uchar_t usr_cmd, uchar_t *usr_bufp);
1463 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1464 	struct dk_callback *dkc);
1465 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1466 static int sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un,
1467 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1468 	uchar_t *bufaddr, uint_t buflen);
1469 static int sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
1470 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1471 	uchar_t *bufaddr, uint_t buflen, char feature);
1472 static int sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize,
1473 	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1474 static int sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize,
1475 	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1476 static int sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
1477 	size_t buflen, daddr_t start_block, int path_flag);
1478 #define	sd_send_scsi_READ(un, bufaddr, buflen, start_block, path_flag)	\
1479 	sd_send_scsi_RDWR(un, SCMD_READ, bufaddr, buflen, start_block, \
1480 	path_flag)
1481 #define	sd_send_scsi_WRITE(un, bufaddr, buflen, start_block, path_flag)	\
1482 	sd_send_scsi_RDWR(un, SCMD_WRITE, bufaddr, buflen, start_block,\
1483 	path_flag)
1484 
1485 static int sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr,
1486 	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1487 	uint16_t param_ptr, int path_flag);
1488 
1489 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1490 static void sd_free_rqs(struct sd_lun *un);
1491 
1492 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1493 	uchar_t *data, int len, int fmt);
1494 static void sd_panic_for_res_conflict(struct sd_lun *un);
1495 
1496 /*
1497  * Disk Ioctl Function Prototypes
1498  */
1499 static int sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag);
1500 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1501 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1502 static int sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag,
1503 	int geom_validated);
1504 static int sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag);
1505 static int sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag,
1506 	int geom_validated);
1507 static int sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag);
1508 static int sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag,
1509 	int geom_validated);
1510 static int sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag);
1511 static int sd_dkio_partition(dev_t dev, caddr_t arg, int flag);
1512 static void sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1513 static int sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag);
1514 static int sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag);
1515 static int sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1516 static int sd_write_label(dev_t dev);
1517 static int sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl);
1518 static void sd_clear_vtoc(struct sd_lun *un);
1519 static void sd_clear_efi(struct sd_lun *un);
1520 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1521 static int sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag);
1522 static int sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag);
1523 static void sd_setup_default_geometry(struct sd_lun *un);
1524 #if defined(__i386) || defined(__amd64)
1525 static int sd_update_fdisk_and_vtoc(struct sd_lun *un);
1526 #endif
1527 
1528 /*
1529  * Multi-host Ioctl Prototypes
1530  */
1531 static int sd_check_mhd(dev_t dev, int interval);
1532 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1533 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1534 static char *sd_sname(uchar_t status);
1535 static void sd_mhd_resvd_recover(void *arg);
1536 static void sd_resv_reclaim_thread();
1537 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1538 static int sd_reserve_release(dev_t dev, int cmd);
1539 static void sd_rmv_resv_reclaim_req(dev_t dev);
1540 static void sd_mhd_reset_notify_cb(caddr_t arg);
1541 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1542 	mhioc_inkeys_t *usrp, int flag);
1543 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1544 	mhioc_inresvs_t *usrp, int flag);
1545 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1546 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1547 static int sd_mhdioc_release(dev_t dev);
1548 static int sd_mhdioc_register_devid(dev_t dev);
1549 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1550 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1551 
1552 /*
1553  * SCSI removable prototypes
1554  */
1555 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1556 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1557 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1558 static int sr_pause_resume(dev_t dev, int mode);
1559 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1560 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1561 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1562 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1563 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1564 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1565 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1566 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1567 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1568 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1569 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1570 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1571 static int sr_eject(dev_t dev);
1572 static void sr_ejected(register struct sd_lun *un);
1573 static int sr_check_wp(dev_t dev);
1574 static int sd_check_media(dev_t dev, enum dkio_state state);
1575 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1576 static void sd_delayed_cv_broadcast(void *arg);
1577 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1578 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1579 
1580 static int sd_log_page_supported(struct sd_lun *un, int log_page);
1581 
1582 /*
1583  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1584  */
1585 static void sd_check_for_writable_cd(struct sd_lun *un);
1586 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1587 static void sd_wm_cache_destructor(void *wm, void *un);
1588 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1589 	daddr_t endb, ushort_t typ);
1590 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1591 	daddr_t endb);
1592 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1593 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1594 static void sd_read_modify_write_task(void * arg);
1595 static int
1596 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1597 	struct buf **bpp);
1598 
1599 
1600 /*
1601  * Function prototypes for failfast support.
1602  */
1603 static void sd_failfast_flushq(struct sd_lun *un);
1604 static int sd_failfast_flushq_callback(struct buf *bp);
1605 
1606 /*
1607  * Function prototypes to check for lsi devices
1608  */
1609 static void sd_is_lsi(struct sd_lun *un);
1610 
1611 /*
1612  * Function prototypes for x86 support
1613  */
1614 #if defined(__i386) || defined(__amd64)
1615 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1616 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1617 #endif
1618 
1619 /*
1620  * Constants for failfast support:
1621  *
1622  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1623  * failfast processing being performed.
1624  *
1625  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1626  * failfast processing on all bufs with B_FAILFAST set.
1627  */
1628 
1629 #define	SD_FAILFAST_INACTIVE		0
1630 #define	SD_FAILFAST_ACTIVE		1
1631 
1632 /*
1633  * Bitmask to control behavior of buf(9S) flushes when a transition to
1634  * the failfast state occurs. Optional bits include:
1635  *
1636  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1637  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1638  * be flushed.
1639  *
1640  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1641  * driver, in addition to the regular wait queue. This includes the xbuf
1642  * queues. When clear, only the driver's wait queue will be flushed.
1643  */
1644 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1645 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1646 
1647 /*
1648  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1649  * to flush all queues within the driver.
1650  */
1651 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1652 
1653 
1654 /*
1655  * SD Testing Fault Injection
1656  */
1657 #ifdef SD_FAULT_INJECTION
1658 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1659 static void sd_faultinjection(struct scsi_pkt *pktp);
1660 static void sd_injection_log(char *buf, struct sd_lun *un);
1661 #endif
1662 
1663 /*
1664  * Device driver ops vector
1665  */
1666 static struct cb_ops sd_cb_ops = {
1667 	sdopen,			/* open */
1668 	sdclose,		/* close */
1669 	sdstrategy,		/* strategy */
1670 	nodev,			/* print */
1671 	sddump,			/* dump */
1672 	sdread,			/* read */
1673 	sdwrite,		/* write */
1674 	sdioctl,		/* ioctl */
1675 	nodev,			/* devmap */
1676 	nodev,			/* mmap */
1677 	nodev,			/* segmap */
1678 	nochpoll,		/* poll */
1679 	sd_prop_op,		/* cb_prop_op */
1680 	0,			/* streamtab  */
1681 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1682 	CB_REV,			/* cb_rev */
1683 	sdaread, 		/* async I/O read entry point */
1684 	sdawrite		/* async I/O write entry point */
1685 };
1686 
1687 static struct dev_ops sd_ops = {
1688 	DEVO_REV,		/* devo_rev, */
1689 	0,			/* refcnt  */
1690 	sdinfo,			/* info */
1691 	nulldev,		/* identify */
1692 	sdprobe,		/* probe */
1693 	sdattach,		/* attach */
1694 	sddetach,		/* detach */
1695 	nodev,			/* reset */
1696 	&sd_cb_ops,		/* driver operations */
1697 	NULL,			/* bus operations */
1698 	sdpower			/* power */
1699 };
1700 
1701 
1702 /*
1703  * This is the loadable module wrapper.
1704  */
1705 #include <sys/modctl.h>
1706 
1707 static struct modldrv modldrv = {
1708 	&mod_driverops,		/* Type of module. This one is a driver */
1709 	SD_MODULE_NAME,		/* Module name. */
1710 	&sd_ops			/* driver ops */
1711 };
1712 
1713 
1714 static struct modlinkage modlinkage = {
1715 	MODREV_1,
1716 	&modldrv,
1717 	NULL
1718 };
1719 
1720 
1721 static struct scsi_asq_key_strings sd_additional_codes[] = {
1722 	0x81, 0, "Logical Unit is Reserved",
1723 	0x85, 0, "Audio Address Not Valid",
1724 	0xb6, 0, "Media Load Mechanism Failed",
1725 	0xB9, 0, "Audio Play Operation Aborted",
1726 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1727 	0x53, 2, "Medium removal prevented",
1728 	0x6f, 0, "Authentication failed during key exchange",
1729 	0x6f, 1, "Key not present",
1730 	0x6f, 2, "Key not established",
1731 	0x6f, 3, "Read without proper authentication",
1732 	0x6f, 4, "Mismatched region to this logical unit",
1733 	0x6f, 5, "Region reset count error",
1734 	0xffff, 0x0, NULL
1735 };
1736 
1737 
1738 /*
1739  * Struct for passing printing information for sense data messages
1740  */
1741 struct sd_sense_info {
1742 	int	ssi_severity;
1743 	int	ssi_pfa_flag;
1744 };
1745 
1746 /*
1747  * Table of function pointers for iostart-side routines. Seperate "chains"
1748  * of layered function calls are formed by placing the function pointers
1749  * sequentially in the desired order. Functions are called according to an
1750  * incrementing table index ordering. The last function in each chain must
1751  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1752  * in the sd_iodone_chain[] array.
1753  *
1754  * Note: It may seem more natural to organize both the iostart and iodone
1755  * functions together, into an array of structures (or some similar
1756  * organization) with a common index, rather than two seperate arrays which
1757  * must be maintained in synchronization. The purpose of this division is
1758  * to achiece improved performance: individual arrays allows for more
1759  * effective cache line utilization on certain platforms.
1760  */
1761 
1762 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1763 
1764 
1765 static sd_chain_t sd_iostart_chain[] = {
1766 
1767 	/* Chain for buf IO for disk drive targets (PM enabled) */
1768 	sd_mapblockaddr_iostart,	/* Index: 0 */
1769 	sd_pm_iostart,			/* Index: 1 */
1770 	sd_core_iostart,		/* Index: 2 */
1771 
1772 	/* Chain for buf IO for disk drive targets (PM disabled) */
1773 	sd_mapblockaddr_iostart,	/* Index: 3 */
1774 	sd_core_iostart,		/* Index: 4 */
1775 
1776 	/* Chain for buf IO for removable-media targets (PM enabled) */
1777 	sd_mapblockaddr_iostart,	/* Index: 5 */
1778 	sd_mapblocksize_iostart,	/* Index: 6 */
1779 	sd_pm_iostart,			/* Index: 7 */
1780 	sd_core_iostart,		/* Index: 8 */
1781 
1782 	/* Chain for buf IO for removable-media targets (PM disabled) */
1783 	sd_mapblockaddr_iostart,	/* Index: 9 */
1784 	sd_mapblocksize_iostart,	/* Index: 10 */
1785 	sd_core_iostart,		/* Index: 11 */
1786 
1787 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1788 	sd_mapblockaddr_iostart,	/* Index: 12 */
1789 	sd_checksum_iostart,		/* Index: 13 */
1790 	sd_pm_iostart,			/* Index: 14 */
1791 	sd_core_iostart,		/* Index: 15 */
1792 
1793 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1794 	sd_mapblockaddr_iostart,	/* Index: 16 */
1795 	sd_checksum_iostart,		/* Index: 17 */
1796 	sd_core_iostart,		/* Index: 18 */
1797 
1798 	/* Chain for USCSI commands (all targets) */
1799 	sd_pm_iostart,			/* Index: 19 */
1800 	sd_core_iostart,		/* Index: 20 */
1801 
1802 	/* Chain for checksumming USCSI commands (all targets) */
1803 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1804 	sd_pm_iostart,			/* Index: 22 */
1805 	sd_core_iostart,		/* Index: 23 */
1806 
1807 	/* Chain for "direct" USCSI commands (all targets) */
1808 	sd_core_iostart,		/* Index: 24 */
1809 
1810 	/* Chain for "direct priority" USCSI commands (all targets) */
1811 	sd_core_iostart,		/* Index: 25 */
1812 };
1813 
1814 /*
1815  * Macros to locate the first function of each iostart chain in the
1816  * sd_iostart_chain[] array. These are located by the index in the array.
1817  */
1818 #define	SD_CHAIN_DISK_IOSTART			0
1819 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1820 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1821 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1822 #define	SD_CHAIN_CHKSUM_IOSTART			12
1823 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1824 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1825 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1826 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1827 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1828 
1829 
1830 /*
1831  * Table of function pointers for the iodone-side routines for the driver-
1832  * internal layering mechanism.  The calling sequence for iodone routines
1833  * uses a decrementing table index, so the last routine called in a chain
1834  * must be at the lowest array index location for that chain.  The last
1835  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1836  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1837  * of the functions in an iodone side chain must correspond to the ordering
1838  * of the iostart routines for that chain.  Note that there is no iodone
1839  * side routine that corresponds to sd_core_iostart(), so there is no
1840  * entry in the table for this.
1841  */
1842 
1843 static sd_chain_t sd_iodone_chain[] = {
1844 
1845 	/* Chain for buf IO for disk drive targets (PM enabled) */
1846 	sd_buf_iodone,			/* Index: 0 */
1847 	sd_mapblockaddr_iodone,		/* Index: 1 */
1848 	sd_pm_iodone,			/* Index: 2 */
1849 
1850 	/* Chain for buf IO for disk drive targets (PM disabled) */
1851 	sd_buf_iodone,			/* Index: 3 */
1852 	sd_mapblockaddr_iodone,		/* Index: 4 */
1853 
1854 	/* Chain for buf IO for removable-media targets (PM enabled) */
1855 	sd_buf_iodone,			/* Index: 5 */
1856 	sd_mapblockaddr_iodone,		/* Index: 6 */
1857 	sd_mapblocksize_iodone,		/* Index: 7 */
1858 	sd_pm_iodone,			/* Index: 8 */
1859 
1860 	/* Chain for buf IO for removable-media targets (PM disabled) */
1861 	sd_buf_iodone,			/* Index: 9 */
1862 	sd_mapblockaddr_iodone,		/* Index: 10 */
1863 	sd_mapblocksize_iodone,		/* Index: 11 */
1864 
1865 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1866 	sd_buf_iodone,			/* Index: 12 */
1867 	sd_mapblockaddr_iodone,		/* Index: 13 */
1868 	sd_checksum_iodone,		/* Index: 14 */
1869 	sd_pm_iodone,			/* Index: 15 */
1870 
1871 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1872 	sd_buf_iodone,			/* Index: 16 */
1873 	sd_mapblockaddr_iodone,		/* Index: 17 */
1874 	sd_checksum_iodone,		/* Index: 18 */
1875 
1876 	/* Chain for USCSI commands (non-checksum targets) */
1877 	sd_uscsi_iodone,		/* Index: 19 */
1878 	sd_pm_iodone,			/* Index: 20 */
1879 
1880 	/* Chain for USCSI commands (checksum targets) */
1881 	sd_uscsi_iodone,		/* Index: 21 */
1882 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1883 	sd_pm_iodone,			/* Index: 22 */
1884 
1885 	/* Chain for "direct" USCSI commands (all targets) */
1886 	sd_uscsi_iodone,		/* Index: 24 */
1887 
1888 	/* Chain for "direct priority" USCSI commands (all targets) */
1889 	sd_uscsi_iodone,		/* Index: 25 */
1890 };
1891 
1892 
1893 /*
1894  * Macros to locate the "first" function in the sd_iodone_chain[] array for
1895  * each iodone-side chain. These are located by the array index, but as the
1896  * iodone side functions are called in a decrementing-index order, the
1897  * highest index number in each chain must be specified (as these correspond
1898  * to the first function in the iodone chain that will be called by the core
1899  * at IO completion time).
1900  */
1901 
1902 #define	SD_CHAIN_DISK_IODONE			2
1903 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
1904 #define	SD_CHAIN_RMMEDIA_IODONE			8
1905 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1906 #define	SD_CHAIN_CHKSUM_IODONE			15
1907 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1908 #define	SD_CHAIN_USCSI_CMD_IODONE		20
1909 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1910 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
1911 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1912 
1913 
1914 
1915 
1916 /*
1917  * Array to map a layering chain index to the appropriate initpkt routine.
1918  * The redundant entries are present so that the index used for accessing
1919  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1920  * with this table as well.
1921  */
1922 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1923 
1924 static sd_initpkt_t	sd_initpkt_map[] = {
1925 
1926 	/* Chain for buf IO for disk drive targets (PM enabled) */
1927 	sd_initpkt_for_buf,		/* Index: 0 */
1928 	sd_initpkt_for_buf,		/* Index: 1 */
1929 	sd_initpkt_for_buf,		/* Index: 2 */
1930 
1931 	/* Chain for buf IO for disk drive targets (PM disabled) */
1932 	sd_initpkt_for_buf,		/* Index: 3 */
1933 	sd_initpkt_for_buf,		/* Index: 4 */
1934 
1935 	/* Chain for buf IO for removable-media targets (PM enabled) */
1936 	sd_initpkt_for_buf,		/* Index: 5 */
1937 	sd_initpkt_for_buf,		/* Index: 6 */
1938 	sd_initpkt_for_buf,		/* Index: 7 */
1939 	sd_initpkt_for_buf,		/* Index: 8 */
1940 
1941 	/* Chain for buf IO for removable-media targets (PM disabled) */
1942 	sd_initpkt_for_buf,		/* Index: 9 */
1943 	sd_initpkt_for_buf,		/* Index: 10 */
1944 	sd_initpkt_for_buf,		/* Index: 11 */
1945 
1946 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1947 	sd_initpkt_for_buf,		/* Index: 12 */
1948 	sd_initpkt_for_buf,		/* Index: 13 */
1949 	sd_initpkt_for_buf,		/* Index: 14 */
1950 	sd_initpkt_for_buf,		/* Index: 15 */
1951 
1952 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1953 	sd_initpkt_for_buf,		/* Index: 16 */
1954 	sd_initpkt_for_buf,		/* Index: 17 */
1955 	sd_initpkt_for_buf,		/* Index: 18 */
1956 
1957 	/* Chain for USCSI commands (non-checksum targets) */
1958 	sd_initpkt_for_uscsi,		/* Index: 19 */
1959 	sd_initpkt_for_uscsi,		/* Index: 20 */
1960 
1961 	/* Chain for USCSI commands (checksum targets) */
1962 	sd_initpkt_for_uscsi,		/* Index: 21 */
1963 	sd_initpkt_for_uscsi,		/* Index: 22 */
1964 	sd_initpkt_for_uscsi,		/* Index: 22 */
1965 
1966 	/* Chain for "direct" USCSI commands (all targets) */
1967 	sd_initpkt_for_uscsi,		/* Index: 24 */
1968 
1969 	/* Chain for "direct priority" USCSI commands (all targets) */
1970 	sd_initpkt_for_uscsi,		/* Index: 25 */
1971 
1972 };
1973 
1974 
1975 /*
1976  * Array to map a layering chain index to the appropriate destroypktpkt routine.
1977  * The redundant entries are present so that the index used for accessing
1978  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1979  * with this table as well.
1980  */
1981 typedef void (*sd_destroypkt_t)(struct buf *);
1982 
1983 static sd_destroypkt_t	sd_destroypkt_map[] = {
1984 
1985 	/* Chain for buf IO for disk drive targets (PM enabled) */
1986 	sd_destroypkt_for_buf,		/* Index: 0 */
1987 	sd_destroypkt_for_buf,		/* Index: 1 */
1988 	sd_destroypkt_for_buf,		/* Index: 2 */
1989 
1990 	/* Chain for buf IO for disk drive targets (PM disabled) */
1991 	sd_destroypkt_for_buf,		/* Index: 3 */
1992 	sd_destroypkt_for_buf,		/* Index: 4 */
1993 
1994 	/* Chain for buf IO for removable-media targets (PM enabled) */
1995 	sd_destroypkt_for_buf,		/* Index: 5 */
1996 	sd_destroypkt_for_buf,		/* Index: 6 */
1997 	sd_destroypkt_for_buf,		/* Index: 7 */
1998 	sd_destroypkt_for_buf,		/* Index: 8 */
1999 
2000 	/* Chain for buf IO for removable-media targets (PM disabled) */
2001 	sd_destroypkt_for_buf,		/* Index: 9 */
2002 	sd_destroypkt_for_buf,		/* Index: 10 */
2003 	sd_destroypkt_for_buf,		/* Index: 11 */
2004 
2005 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2006 	sd_destroypkt_for_buf,		/* Index: 12 */
2007 	sd_destroypkt_for_buf,		/* Index: 13 */
2008 	sd_destroypkt_for_buf,		/* Index: 14 */
2009 	sd_destroypkt_for_buf,		/* Index: 15 */
2010 
2011 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2012 	sd_destroypkt_for_buf,		/* Index: 16 */
2013 	sd_destroypkt_for_buf,		/* Index: 17 */
2014 	sd_destroypkt_for_buf,		/* Index: 18 */
2015 
2016 	/* Chain for USCSI commands (non-checksum targets) */
2017 	sd_destroypkt_for_uscsi,	/* Index: 19 */
2018 	sd_destroypkt_for_uscsi,	/* Index: 20 */
2019 
2020 	/* Chain for USCSI commands (checksum targets) */
2021 	sd_destroypkt_for_uscsi,	/* Index: 21 */
2022 	sd_destroypkt_for_uscsi,	/* Index: 22 */
2023 	sd_destroypkt_for_uscsi,	/* Index: 22 */
2024 
2025 	/* Chain for "direct" USCSI commands (all targets) */
2026 	sd_destroypkt_for_uscsi,	/* Index: 24 */
2027 
2028 	/* Chain for "direct priority" USCSI commands (all targets) */
2029 	sd_destroypkt_for_uscsi,	/* Index: 25 */
2030 
2031 };
2032 
2033 
2034 
2035 /*
2036  * Array to map a layering chain index to the appropriate chain "type".
2037  * The chain type indicates a specific property/usage of the chain.
2038  * The redundant entries are present so that the index used for accessing
2039  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2040  * with this table as well.
2041  */
2042 
2043 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
2044 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
2045 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
2046 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
2047 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
2048 						/* (for error recovery) */
2049 
2050 static int sd_chain_type_map[] = {
2051 
2052 	/* Chain for buf IO for disk drive targets (PM enabled) */
2053 	SD_CHAIN_BUFIO,			/* Index: 0 */
2054 	SD_CHAIN_BUFIO,			/* Index: 1 */
2055 	SD_CHAIN_BUFIO,			/* Index: 2 */
2056 
2057 	/* Chain for buf IO for disk drive targets (PM disabled) */
2058 	SD_CHAIN_BUFIO,			/* Index: 3 */
2059 	SD_CHAIN_BUFIO,			/* Index: 4 */
2060 
2061 	/* Chain for buf IO for removable-media targets (PM enabled) */
2062 	SD_CHAIN_BUFIO,			/* Index: 5 */
2063 	SD_CHAIN_BUFIO,			/* Index: 6 */
2064 	SD_CHAIN_BUFIO,			/* Index: 7 */
2065 	SD_CHAIN_BUFIO,			/* Index: 8 */
2066 
2067 	/* Chain for buf IO for removable-media targets (PM disabled) */
2068 	SD_CHAIN_BUFIO,			/* Index: 9 */
2069 	SD_CHAIN_BUFIO,			/* Index: 10 */
2070 	SD_CHAIN_BUFIO,			/* Index: 11 */
2071 
2072 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2073 	SD_CHAIN_BUFIO,			/* Index: 12 */
2074 	SD_CHAIN_BUFIO,			/* Index: 13 */
2075 	SD_CHAIN_BUFIO,			/* Index: 14 */
2076 	SD_CHAIN_BUFIO,			/* Index: 15 */
2077 
2078 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2079 	SD_CHAIN_BUFIO,			/* Index: 16 */
2080 	SD_CHAIN_BUFIO,			/* Index: 17 */
2081 	SD_CHAIN_BUFIO,			/* Index: 18 */
2082 
2083 	/* Chain for USCSI commands (non-checksum targets) */
2084 	SD_CHAIN_USCSI,			/* Index: 19 */
2085 	SD_CHAIN_USCSI,			/* Index: 20 */
2086 
2087 	/* Chain for USCSI commands (checksum targets) */
2088 	SD_CHAIN_USCSI,			/* Index: 21 */
2089 	SD_CHAIN_USCSI,			/* Index: 22 */
2090 	SD_CHAIN_USCSI,			/* Index: 22 */
2091 
2092 	/* Chain for "direct" USCSI commands (all targets) */
2093 	SD_CHAIN_DIRECT,		/* Index: 24 */
2094 
2095 	/* Chain for "direct priority" USCSI commands (all targets) */
2096 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2097 };
2098 
2099 
2100 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2101 #define	SD_IS_BUFIO(xp)			\
2102 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2103 
2104 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2105 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2106 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2107 
2108 
2109 
2110 /*
2111  * Struct, array, and macros to map a specific chain to the appropriate
2112  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2113  *
2114  * The sd_chain_index_map[] array is used at attach time to set the various
2115  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2116  * chain to be used with the instance. This allows different instances to use
2117  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2118  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2119  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2120  * dynamically & without the use of locking; and (2) a layer may update the
2121  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2122  * to allow for deferred processing of an IO within the same chain from a
2123  * different execution context.
2124  */
2125 
2126 struct sd_chain_index {
2127 	int	sci_iostart_index;
2128 	int	sci_iodone_index;
2129 };
2130 
2131 static struct sd_chain_index	sd_chain_index_map[] = {
2132 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2133 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2134 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2135 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2136 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2137 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2138 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2139 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2140 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2141 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2142 };
2143 
2144 
2145 /*
2146  * The following are indexes into the sd_chain_index_map[] array.
2147  */
2148 
2149 /* un->un_buf_chain_type must be set to one of these */
2150 #define	SD_CHAIN_INFO_DISK		0
2151 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2152 #define	SD_CHAIN_INFO_RMMEDIA		2
2153 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2154 #define	SD_CHAIN_INFO_CHKSUM		4
2155 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2156 
2157 /* un->un_uscsi_chain_type must be set to one of these */
2158 #define	SD_CHAIN_INFO_USCSI_CMD		6
2159 /* USCSI with PM disabled is the same as DIRECT */
2160 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2161 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2162 
2163 /* un->un_direct_chain_type must be set to one of these */
2164 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2165 
2166 /* un->un_priority_chain_type must be set to one of these */
2167 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2168 
2169 /* size for devid inquiries */
2170 #define	MAX_INQUIRY_SIZE		0xF0
2171 
2172 /*
2173  * Macros used by functions to pass a given buf(9S) struct along to the
2174  * next function in the layering chain for further processing.
2175  *
2176  * In the following macros, passing more than three arguments to the called
2177  * routines causes the optimizer for the SPARC compiler to stop doing tail
2178  * call elimination which results in significant performance degradation.
2179  */
2180 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2181 	((*(sd_iostart_chain[index]))(index, un, bp))
2182 
2183 #define	SD_BEGIN_IODONE(index, un, bp)	\
2184 	((*(sd_iodone_chain[index]))(index, un, bp))
2185 
2186 #define	SD_NEXT_IOSTART(index, un, bp)				\
2187 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2188 
2189 #define	SD_NEXT_IODONE(index, un, bp)				\
2190 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2191 
2192 /*
2193  *    Function: _init
2194  *
2195  * Description: This is the driver _init(9E) entry point.
2196  *
2197  * Return Code: Returns the value from mod_install(9F) or
2198  *		ddi_soft_state_init(9F) as appropriate.
2199  *
2200  *     Context: Called when driver module loaded.
2201  */
2202 
2203 int
2204 _init(void)
2205 {
2206 	int	err;
2207 
2208 	/* establish driver name from module name */
2209 	sd_label = mod_modname(&modlinkage);
2210 
2211 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2212 		SD_MAXUNIT);
2213 
2214 	if (err != 0) {
2215 		return (err);
2216 	}
2217 
2218 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2219 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2220 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2221 
2222 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2223 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2224 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2225 
2226 	/*
2227 	 * it's ok to init here even for fibre device
2228 	 */
2229 	sd_scsi_probe_cache_init();
2230 
2231 	/*
2232 	 * Creating taskq before mod_install ensures that all callers (threads)
2233 	 * that enter the module after a successfull mod_install encounter
2234 	 * a valid taskq.
2235 	 */
2236 	sd_taskq_create();
2237 
2238 	err = mod_install(&modlinkage);
2239 	if (err != 0) {
2240 		/* delete taskq if install fails */
2241 		sd_taskq_delete();
2242 
2243 		mutex_destroy(&sd_detach_mutex);
2244 		mutex_destroy(&sd_log_mutex);
2245 		mutex_destroy(&sd_label_mutex);
2246 
2247 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2248 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2249 		cv_destroy(&sd_tr.srq_inprocess_cv);
2250 
2251 		sd_scsi_probe_cache_fini();
2252 
2253 		ddi_soft_state_fini(&sd_state);
2254 		return (err);
2255 	}
2256 
2257 	return (err);
2258 }
2259 
2260 
2261 /*
2262  *    Function: _fini
2263  *
2264  * Description: This is the driver _fini(9E) entry point.
2265  *
2266  * Return Code: Returns the value from mod_remove(9F)
2267  *
2268  *     Context: Called when driver module is unloaded.
2269  */
2270 
2271 int
2272 _fini(void)
2273 {
2274 	int err;
2275 
2276 	if ((err = mod_remove(&modlinkage)) != 0) {
2277 		return (err);
2278 	}
2279 
2280 	sd_taskq_delete();
2281 
2282 	mutex_destroy(&sd_detach_mutex);
2283 	mutex_destroy(&sd_log_mutex);
2284 	mutex_destroy(&sd_label_mutex);
2285 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2286 
2287 	sd_scsi_probe_cache_fini();
2288 
2289 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2290 	cv_destroy(&sd_tr.srq_inprocess_cv);
2291 
2292 	ddi_soft_state_fini(&sd_state);
2293 
2294 	return (err);
2295 }
2296 
2297 
2298 /*
2299  *    Function: _info
2300  *
2301  * Description: This is the driver _info(9E) entry point.
2302  *
2303  *   Arguments: modinfop - pointer to the driver modinfo structure
2304  *
2305  * Return Code: Returns the value from mod_info(9F).
2306  *
2307  *     Context: Kernel thread context
2308  */
2309 
2310 int
2311 _info(struct modinfo *modinfop)
2312 {
2313 	return (mod_info(&modlinkage, modinfop));
2314 }
2315 
2316 
2317 /*
2318  * The following routines implement the driver message logging facility.
2319  * They provide component- and level- based debug output filtering.
2320  * Output may also be restricted to messages for a single instance by
2321  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2322  * to NULL, then messages for all instances are printed.
2323  *
2324  * These routines have been cloned from each other due to the language
2325  * constraints of macros and variable argument list processing.
2326  */
2327 
2328 
2329 /*
2330  *    Function: sd_log_err
2331  *
2332  * Description: This routine is called by the SD_ERROR macro for debug
2333  *		logging of error conditions.
2334  *
2335  *   Arguments: comp - driver component being logged
2336  *		dev  - pointer to driver info structure
2337  *		fmt  - error string and format to be logged
2338  */
2339 
2340 static void
2341 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2342 {
2343 	va_list		ap;
2344 	dev_info_t	*dev;
2345 
2346 	ASSERT(un != NULL);
2347 	dev = SD_DEVINFO(un);
2348 	ASSERT(dev != NULL);
2349 
2350 	/*
2351 	 * Filter messages based on the global component and level masks.
2352 	 * Also print if un matches the value of sd_debug_un, or if
2353 	 * sd_debug_un is set to NULL.
2354 	 */
2355 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2356 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2357 		mutex_enter(&sd_log_mutex);
2358 		va_start(ap, fmt);
2359 		(void) vsprintf(sd_log_buf, fmt, ap);
2360 		va_end(ap);
2361 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2362 		mutex_exit(&sd_log_mutex);
2363 	}
2364 #ifdef SD_FAULT_INJECTION
2365 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2366 	if (un->sd_injection_mask & comp) {
2367 		mutex_enter(&sd_log_mutex);
2368 		va_start(ap, fmt);
2369 		(void) vsprintf(sd_log_buf, fmt, ap);
2370 		va_end(ap);
2371 		sd_injection_log(sd_log_buf, un);
2372 		mutex_exit(&sd_log_mutex);
2373 	}
2374 #endif
2375 }
2376 
2377 
2378 /*
2379  *    Function: sd_log_info
2380  *
2381  * Description: This routine is called by the SD_INFO macro for debug
2382  *		logging of general purpose informational conditions.
2383  *
2384  *   Arguments: comp - driver component being logged
2385  *		dev  - pointer to driver info structure
2386  *		fmt  - info string and format to be logged
2387  */
2388 
2389 static void
2390 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2391 {
2392 	va_list		ap;
2393 	dev_info_t	*dev;
2394 
2395 	ASSERT(un != NULL);
2396 	dev = SD_DEVINFO(un);
2397 	ASSERT(dev != NULL);
2398 
2399 	/*
2400 	 * Filter messages based on the global component and level masks.
2401 	 * Also print if un matches the value of sd_debug_un, or if
2402 	 * sd_debug_un is set to NULL.
2403 	 */
2404 	if ((sd_component_mask & component) &&
2405 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2406 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2407 		mutex_enter(&sd_log_mutex);
2408 		va_start(ap, fmt);
2409 		(void) vsprintf(sd_log_buf, fmt, ap);
2410 		va_end(ap);
2411 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2412 		mutex_exit(&sd_log_mutex);
2413 	}
2414 #ifdef SD_FAULT_INJECTION
2415 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2416 	if (un->sd_injection_mask & component) {
2417 		mutex_enter(&sd_log_mutex);
2418 		va_start(ap, fmt);
2419 		(void) vsprintf(sd_log_buf, fmt, ap);
2420 		va_end(ap);
2421 		sd_injection_log(sd_log_buf, un);
2422 		mutex_exit(&sd_log_mutex);
2423 	}
2424 #endif
2425 }
2426 
2427 
2428 /*
2429  *    Function: sd_log_trace
2430  *
2431  * Description: This routine is called by the SD_TRACE macro for debug
2432  *		logging of trace conditions (i.e. function entry/exit).
2433  *
2434  *   Arguments: comp - driver component being logged
2435  *		dev  - pointer to driver info structure
2436  *		fmt  - trace string and format to be logged
2437  */
2438 
2439 static void
2440 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2441 {
2442 	va_list		ap;
2443 	dev_info_t	*dev;
2444 
2445 	ASSERT(un != NULL);
2446 	dev = SD_DEVINFO(un);
2447 	ASSERT(dev != NULL);
2448 
2449 	/*
2450 	 * Filter messages based on the global component and level masks.
2451 	 * Also print if un matches the value of sd_debug_un, or if
2452 	 * sd_debug_un is set to NULL.
2453 	 */
2454 	if ((sd_component_mask & component) &&
2455 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2456 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2457 		mutex_enter(&sd_log_mutex);
2458 		va_start(ap, fmt);
2459 		(void) vsprintf(sd_log_buf, fmt, ap);
2460 		va_end(ap);
2461 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2462 		mutex_exit(&sd_log_mutex);
2463 	}
2464 #ifdef SD_FAULT_INJECTION
2465 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2466 	if (un->sd_injection_mask & component) {
2467 		mutex_enter(&sd_log_mutex);
2468 		va_start(ap, fmt);
2469 		(void) vsprintf(sd_log_buf, fmt, ap);
2470 		va_end(ap);
2471 		sd_injection_log(sd_log_buf, un);
2472 		mutex_exit(&sd_log_mutex);
2473 	}
2474 #endif
2475 }
2476 
2477 
2478 /*
2479  *    Function: sdprobe
2480  *
2481  * Description: This is the driver probe(9e) entry point function.
2482  *
2483  *   Arguments: devi - opaque device info handle
2484  *
2485  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2486  *              DDI_PROBE_FAILURE: If the probe failed.
2487  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2488  *				   but may be present in the future.
2489  */
2490 
2491 static int
2492 sdprobe(dev_info_t *devi)
2493 {
2494 	struct scsi_device	*devp;
2495 	int			rval;
2496 	int			instance;
2497 
2498 	/*
2499 	 * if it wasn't for pln, sdprobe could actually be nulldev
2500 	 * in the "__fibre" case.
2501 	 */
2502 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2503 		return (DDI_PROBE_DONTCARE);
2504 	}
2505 
2506 	devp = ddi_get_driver_private(devi);
2507 
2508 	if (devp == NULL) {
2509 		/* Ooops... nexus driver is mis-configured... */
2510 		return (DDI_PROBE_FAILURE);
2511 	}
2512 
2513 	instance = ddi_get_instance(devi);
2514 
2515 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2516 		return (DDI_PROBE_PARTIAL);
2517 	}
2518 
2519 	/*
2520 	 * Call the SCSA utility probe routine to see if we actually
2521 	 * have a target at this SCSI nexus.
2522 	 */
2523 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2524 	case SCSIPROBE_EXISTS:
2525 		switch (devp->sd_inq->inq_dtype) {
2526 		case DTYPE_DIRECT:
2527 			rval = DDI_PROBE_SUCCESS;
2528 			break;
2529 		case DTYPE_RODIRECT:
2530 			/* CDs etc. Can be removable media */
2531 			rval = DDI_PROBE_SUCCESS;
2532 			break;
2533 		case DTYPE_OPTICAL:
2534 			/*
2535 			 * Rewritable optical driver HP115AA
2536 			 * Can also be removable media
2537 			 */
2538 
2539 			/*
2540 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2541 			 * pre solaris 9 sparc sd behavior is required
2542 			 *
2543 			 * If first time through and sd_dtype_optical_bind
2544 			 * has not been set in /etc/system check properties
2545 			 */
2546 
2547 			if (sd_dtype_optical_bind  < 0) {
2548 			    sd_dtype_optical_bind = ddi_prop_get_int
2549 				(DDI_DEV_T_ANY,	devi,	0,
2550 				"optical-device-bind",	1);
2551 			}
2552 
2553 			if (sd_dtype_optical_bind == 0) {
2554 				rval = DDI_PROBE_FAILURE;
2555 			} else {
2556 				rval = DDI_PROBE_SUCCESS;
2557 			}
2558 			break;
2559 
2560 		case DTYPE_NOTPRESENT:
2561 		default:
2562 			rval = DDI_PROBE_FAILURE;
2563 			break;
2564 		}
2565 		break;
2566 	default:
2567 		rval = DDI_PROBE_PARTIAL;
2568 		break;
2569 	}
2570 
2571 	/*
2572 	 * This routine checks for resource allocation prior to freeing,
2573 	 * so it will take care of the "smart probing" case where a
2574 	 * scsi_probe() may or may not have been issued and will *not*
2575 	 * free previously-freed resources.
2576 	 */
2577 	scsi_unprobe(devp);
2578 	return (rval);
2579 }
2580 
2581 
2582 /*
2583  *    Function: sdinfo
2584  *
2585  * Description: This is the driver getinfo(9e) entry point function.
2586  * 		Given the device number, return the devinfo pointer from
2587  *		the scsi_device structure or the instance number
2588  *		associated with the dev_t.
2589  *
2590  *   Arguments: dip     - pointer to device info structure
2591  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2592  *			  DDI_INFO_DEVT2INSTANCE)
2593  *		arg     - driver dev_t
2594  *		resultp - user buffer for request response
2595  *
2596  * Return Code: DDI_SUCCESS
2597  *              DDI_FAILURE
2598  */
2599 /* ARGSUSED */
2600 static int
2601 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2602 {
2603 	struct sd_lun	*un;
2604 	dev_t		dev;
2605 	int		instance;
2606 	int		error;
2607 
2608 	switch (infocmd) {
2609 	case DDI_INFO_DEVT2DEVINFO:
2610 		dev = (dev_t)arg;
2611 		instance = SDUNIT(dev);
2612 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2613 			return (DDI_FAILURE);
2614 		}
2615 		*result = (void *) SD_DEVINFO(un);
2616 		error = DDI_SUCCESS;
2617 		break;
2618 	case DDI_INFO_DEVT2INSTANCE:
2619 		dev = (dev_t)arg;
2620 		instance = SDUNIT(dev);
2621 		*result = (void *)(uintptr_t)instance;
2622 		error = DDI_SUCCESS;
2623 		break;
2624 	default:
2625 		error = DDI_FAILURE;
2626 	}
2627 	return (error);
2628 }
2629 
2630 /*
2631  *    Function: sd_prop_op
2632  *
2633  * Description: This is the driver prop_op(9e) entry point function.
2634  *		Return the number of blocks for the partition in question
2635  *		or forward the request to the property facilities.
2636  *
2637  *   Arguments: dev       - device number
2638  *		dip       - pointer to device info structure
2639  *		prop_op   - property operator
2640  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2641  *		name      - pointer to property name
2642  *		valuep    - pointer or address of the user buffer
2643  *		lengthp   - property length
2644  *
2645  * Return Code: DDI_PROP_SUCCESS
2646  *              DDI_PROP_NOT_FOUND
2647  *              DDI_PROP_UNDEFINED
2648  *              DDI_PROP_NO_MEMORY
2649  *              DDI_PROP_BUF_TOO_SMALL
2650  */
2651 
2652 static int
2653 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2654 	char *name, caddr_t valuep, int *lengthp)
2655 {
2656 	int		instance = ddi_get_instance(dip);
2657 	struct sd_lun	*un;
2658 	uint64_t	nblocks64;
2659 
2660 	/*
2661 	 * Our dynamic properties are all device specific and size oriented.
2662 	 * Requests issued under conditions where size is valid are passed
2663 	 * to ddi_prop_op_nblocks with the size information, otherwise the
2664 	 * request is passed to ddi_prop_op. Size depends on valid geometry.
2665 	 */
2666 	un = ddi_get_soft_state(sd_state, instance);
2667 	if ((dev == DDI_DEV_T_ANY) || (un == NULL) ||
2668 	    (un->un_f_geometry_is_valid == FALSE)) {
2669 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2670 		    name, valuep, lengthp));
2671 	} else {
2672 		/* get nblocks value */
2673 		ASSERT(!mutex_owned(SD_MUTEX(un)));
2674 		mutex_enter(SD_MUTEX(un));
2675 		nblocks64 = (ulong_t)un->un_map[SDPART(dev)].dkl_nblk;
2676 		mutex_exit(SD_MUTEX(un));
2677 
2678 		return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags,
2679 		    name, valuep, lengthp, nblocks64));
2680 	}
2681 }
2682 
2683 /*
2684  * The following functions are for smart probing:
2685  * sd_scsi_probe_cache_init()
2686  * sd_scsi_probe_cache_fini()
2687  * sd_scsi_clear_probe_cache()
2688  * sd_scsi_probe_with_cache()
2689  */
2690 
2691 /*
2692  *    Function: sd_scsi_probe_cache_init
2693  *
2694  * Description: Initializes the probe response cache mutex and head pointer.
2695  *
2696  *     Context: Kernel thread context
2697  */
2698 
2699 static void
2700 sd_scsi_probe_cache_init(void)
2701 {
2702 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2703 	sd_scsi_probe_cache_head = NULL;
2704 }
2705 
2706 
2707 /*
2708  *    Function: sd_scsi_probe_cache_fini
2709  *
2710  * Description: Frees all resources associated with the probe response cache.
2711  *
2712  *     Context: Kernel thread context
2713  */
2714 
2715 static void
2716 sd_scsi_probe_cache_fini(void)
2717 {
2718 	struct sd_scsi_probe_cache *cp;
2719 	struct sd_scsi_probe_cache *ncp;
2720 
2721 	/* Clean up our smart probing linked list */
2722 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2723 		ncp = cp->next;
2724 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2725 	}
2726 	sd_scsi_probe_cache_head = NULL;
2727 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2728 }
2729 
2730 
2731 /*
2732  *    Function: sd_scsi_clear_probe_cache
2733  *
2734  * Description: This routine clears the probe response cache. This is
2735  *		done when open() returns ENXIO so that when deferred
2736  *		attach is attempted (possibly after a device has been
2737  *		turned on) we will retry the probe. Since we don't know
2738  *		which target we failed to open, we just clear the
2739  *		entire cache.
2740  *
2741  *     Context: Kernel thread context
2742  */
2743 
2744 static void
2745 sd_scsi_clear_probe_cache(void)
2746 {
2747 	struct sd_scsi_probe_cache	*cp;
2748 	int				i;
2749 
2750 	mutex_enter(&sd_scsi_probe_cache_mutex);
2751 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2752 		/*
2753 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2754 		 * force probing to be performed the next time
2755 		 * sd_scsi_probe_with_cache is called.
2756 		 */
2757 		for (i = 0; i < NTARGETS_WIDE; i++) {
2758 			cp->cache[i] = SCSIPROBE_EXISTS;
2759 		}
2760 	}
2761 	mutex_exit(&sd_scsi_probe_cache_mutex);
2762 }
2763 
2764 
2765 /*
2766  *    Function: sd_scsi_probe_with_cache
2767  *
2768  * Description: This routine implements support for a scsi device probe
2769  *		with cache. The driver maintains a cache of the target
2770  *		responses to scsi probes. If we get no response from a
2771  *		target during a probe inquiry, we remember that, and we
2772  *		avoid additional calls to scsi_probe on non-zero LUNs
2773  *		on the same target until the cache is cleared. By doing
2774  *		so we avoid the 1/4 sec selection timeout for nonzero
2775  *		LUNs. lun0 of a target is always probed.
2776  *
2777  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2778  *              waitfunc - indicates what the allocator routines should
2779  *			   do when resources are not available. This value
2780  *			   is passed on to scsi_probe() when that routine
2781  *			   is called.
2782  *
2783  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2784  *		otherwise the value returned by scsi_probe(9F).
2785  *
2786  *     Context: Kernel thread context
2787  */
2788 
2789 static int
2790 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2791 {
2792 	struct sd_scsi_probe_cache	*cp;
2793 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2794 	int		lun, tgt;
2795 
2796 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2797 	    SCSI_ADDR_PROP_LUN, 0);
2798 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2799 	    SCSI_ADDR_PROP_TARGET, -1);
2800 
2801 	/* Make sure caching enabled and target in range */
2802 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2803 		/* do it the old way (no cache) */
2804 		return (scsi_probe(devp, waitfn));
2805 	}
2806 
2807 	mutex_enter(&sd_scsi_probe_cache_mutex);
2808 
2809 	/* Find the cache for this scsi bus instance */
2810 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2811 		if (cp->pdip == pdip) {
2812 			break;
2813 		}
2814 	}
2815 
2816 	/* If we can't find a cache for this pdip, create one */
2817 	if (cp == NULL) {
2818 		int i;
2819 
2820 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2821 		    KM_SLEEP);
2822 		cp->pdip = pdip;
2823 		cp->next = sd_scsi_probe_cache_head;
2824 		sd_scsi_probe_cache_head = cp;
2825 		for (i = 0; i < NTARGETS_WIDE; i++) {
2826 			cp->cache[i] = SCSIPROBE_EXISTS;
2827 		}
2828 	}
2829 
2830 	mutex_exit(&sd_scsi_probe_cache_mutex);
2831 
2832 	/* Recompute the cache for this target if LUN zero */
2833 	if (lun == 0) {
2834 		cp->cache[tgt] = SCSIPROBE_EXISTS;
2835 	}
2836 
2837 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2838 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2839 		return (SCSIPROBE_NORESP);
2840 	}
2841 
2842 	/* Do the actual probe; save & return the result */
2843 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2844 }
2845 
2846 
2847 /*
2848  *    Function: sd_spin_up_unit
2849  *
2850  * Description: Issues the following commands to spin-up the device:
2851  *		START STOP UNIT, and INQUIRY.
2852  *
2853  *   Arguments: un - driver soft state (unit) structure
2854  *
2855  * Return Code: 0 - success
2856  *		EIO - failure
2857  *		EACCES - reservation conflict
2858  *
2859  *     Context: Kernel thread context
2860  */
2861 
2862 static int
2863 sd_spin_up_unit(struct sd_lun *un)
2864 {
2865 	size_t	resid		= 0;
2866 	int	has_conflict	= FALSE;
2867 	uchar_t *bufaddr;
2868 
2869 	ASSERT(un != NULL);
2870 
2871 	/*
2872 	 * Send a throwaway START UNIT command.
2873 	 *
2874 	 * If we fail on this, we don't care presently what precisely
2875 	 * is wrong.  EMC's arrays will also fail this with a check
2876 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
2877 	 * we don't want to fail the attach because it may become
2878 	 * "active" later.
2879 	 */
2880 	if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START, SD_PATH_DIRECT)
2881 	    == EACCES)
2882 		has_conflict = TRUE;
2883 
2884 	/*
2885 	 * Send another INQUIRY command to the target. This is necessary for
2886 	 * non-removable media direct access devices because their INQUIRY data
2887 	 * may not be fully qualified until they are spun up (perhaps via the
2888 	 * START command above).  Note: This seems to be needed for some
2889 	 * legacy devices only.) The INQUIRY command should succeed even if a
2890 	 * Reservation Conflict is present.
2891 	 */
2892 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
2893 	if (sd_send_scsi_INQUIRY(un, bufaddr, SUN_INQSIZE, 0, 0, &resid) != 0) {
2894 		kmem_free(bufaddr, SUN_INQSIZE);
2895 		return (EIO);
2896 	}
2897 
2898 	/*
2899 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
2900 	 * Note that this routine does not return a failure here even if the
2901 	 * INQUIRY command did not return any data.  This is a legacy behavior.
2902 	 */
2903 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
2904 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
2905 	}
2906 
2907 	kmem_free(bufaddr, SUN_INQSIZE);
2908 
2909 	/* If we hit a reservation conflict above, tell the caller. */
2910 	if (has_conflict == TRUE) {
2911 		return (EACCES);
2912 	}
2913 
2914 	return (0);
2915 }
2916 
2917 #ifdef _LP64
2918 /*
2919  *    Function: sd_enable_descr_sense
2920  *
2921  * Description: This routine attempts to select descriptor sense format
2922  *		using the Control mode page.  Devices that support 64 bit
2923  *		LBAs (for >2TB luns) should also implement descriptor
2924  *		sense data so we will call this function whenever we see
2925  *		a lun larger than 2TB.  If for some reason the device
2926  *		supports 64 bit LBAs but doesn't support descriptor sense
2927  *		presumably the mode select will fail.  Everything will
2928  *		continue to work normally except that we will not get
2929  *		complete sense data for commands that fail with an LBA
2930  *		larger than 32 bits.
2931  *
2932  *   Arguments: un - driver soft state (unit) structure
2933  *
2934  *     Context: Kernel thread context only
2935  */
2936 
2937 static void
2938 sd_enable_descr_sense(struct sd_lun *un)
2939 {
2940 	uchar_t			*header;
2941 	struct mode_control_scsi3 *ctrl_bufp;
2942 	size_t			buflen;
2943 	size_t			bd_len;
2944 
2945 	/*
2946 	 * Read MODE SENSE page 0xA, Control Mode Page
2947 	 */
2948 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
2949 	    sizeof (struct mode_control_scsi3);
2950 	header = kmem_zalloc(buflen, KM_SLEEP);
2951 	if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
2952 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT) != 0) {
2953 		SD_ERROR(SD_LOG_COMMON, un,
2954 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
2955 		goto eds_exit;
2956 	}
2957 
2958 	/*
2959 	 * Determine size of Block Descriptors in order to locate
2960 	 * the mode page data. ATAPI devices return 0, SCSI devices
2961 	 * should return MODE_BLK_DESC_LENGTH.
2962 	 */
2963 	bd_len  = ((struct mode_header *)header)->bdesc_length;
2964 
2965 	ctrl_bufp = (struct mode_control_scsi3 *)
2966 	    (header + MODE_HEADER_LENGTH + bd_len);
2967 
2968 	/*
2969 	 * Clear PS bit for MODE SELECT
2970 	 */
2971 	ctrl_bufp->mode_page.ps = 0;
2972 
2973 	/*
2974 	 * Set D_SENSE to enable descriptor sense format.
2975 	 */
2976 	ctrl_bufp->d_sense = 1;
2977 
2978 	/*
2979 	 * Use MODE SELECT to commit the change to the D_SENSE bit
2980 	 */
2981 	if (sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
2982 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT) != 0) {
2983 		SD_INFO(SD_LOG_COMMON, un,
2984 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
2985 		goto eds_exit;
2986 	}
2987 
2988 eds_exit:
2989 	kmem_free(header, buflen);
2990 }
2991 
2992 /*
2993  *    Function: sd_reenable_dsense_task
2994  *
2995  * Description: Re-enable descriptor sense after device or bus reset
2996  *
2997  *     Context: Executes in a taskq() thread context
2998  */
2999 static void
3000 sd_reenable_dsense_task(void *arg)
3001 {
3002 	struct	sd_lun	*un = arg;
3003 
3004 	ASSERT(un != NULL);
3005 	sd_enable_descr_sense(un);
3006 }
3007 #endif /* _LP64 */
3008 
3009 /*
3010  *    Function: sd_set_mmc_caps
3011  *
3012  * Description: This routine determines if the device is MMC compliant and if
3013  *		the device supports CDDA via a mode sense of the CDVD
3014  *		capabilities mode page. Also checks if the device is a
3015  *		dvdram writable device.
3016  *
3017  *   Arguments: un - driver soft state (unit) structure
3018  *
3019  *     Context: Kernel thread context only
3020  */
3021 
3022 static void
3023 sd_set_mmc_caps(struct sd_lun *un)
3024 {
3025 	struct mode_header_grp2		*sense_mhp;
3026 	uchar_t				*sense_page;
3027 	caddr_t				buf;
3028 	int				bd_len;
3029 	int				status;
3030 	struct uscsi_cmd		com;
3031 	int				rtn;
3032 	uchar_t				*out_data_rw, *out_data_hd;
3033 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3034 
3035 	ASSERT(un != NULL);
3036 
3037 	/*
3038 	 * The flags which will be set in this function are - mmc compliant,
3039 	 * dvdram writable device, cdda support. Initialize them to FALSE
3040 	 * and if a capability is detected - it will be set to TRUE.
3041 	 */
3042 	un->un_f_mmc_cap = FALSE;
3043 	un->un_f_dvdram_writable_device = FALSE;
3044 	un->un_f_cfg_cdda = FALSE;
3045 
3046 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3047 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3048 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3049 
3050 	if (status != 0) {
3051 		/* command failed; just return */
3052 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3053 		return;
3054 	}
3055 	/*
3056 	 * If the mode sense request for the CDROM CAPABILITIES
3057 	 * page (0x2A) succeeds the device is assumed to be MMC.
3058 	 */
3059 	un->un_f_mmc_cap = TRUE;
3060 
3061 	/* Get to the page data */
3062 	sense_mhp = (struct mode_header_grp2 *)buf;
3063 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3064 	    sense_mhp->bdesc_length_lo;
3065 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3066 		/*
3067 		 * We did not get back the expected block descriptor
3068 		 * length so we cannot determine if the device supports
3069 		 * CDDA. However, we still indicate the device is MMC
3070 		 * according to the successful response to the page
3071 		 * 0x2A mode sense request.
3072 		 */
3073 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3074 		    "sd_set_mmc_caps: Mode Sense returned "
3075 		    "invalid block descriptor length\n");
3076 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3077 		return;
3078 	}
3079 
3080 	/* See if read CDDA is supported */
3081 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3082 	    bd_len);
3083 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3084 
3085 	/* See if writing DVD RAM is supported. */
3086 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3087 	if (un->un_f_dvdram_writable_device == TRUE) {
3088 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3089 		return;
3090 	}
3091 
3092 	/*
3093 	 * If the device presents DVD or CD capabilities in the mode
3094 	 * page, we can return here since a RRD will not have
3095 	 * these capabilities.
3096 	 */
3097 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3098 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3099 		return;
3100 	}
3101 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3102 
3103 	/*
3104 	 * If un->un_f_dvdram_writable_device is still FALSE,
3105 	 * check for a Removable Rigid Disk (RRD).  A RRD
3106 	 * device is identified by the features RANDOM_WRITABLE and
3107 	 * HARDWARE_DEFECT_MANAGEMENT.
3108 	 */
3109 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3110 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3111 
3112 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3113 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3114 	    RANDOM_WRITABLE);
3115 	if (rtn != 0) {
3116 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3117 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3118 		return;
3119 	}
3120 
3121 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3122 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3123 
3124 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3125 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3126 	    HARDWARE_DEFECT_MANAGEMENT);
3127 	if (rtn == 0) {
3128 		/*
3129 		 * We have good information, check for random writable
3130 		 * and hardware defect features.
3131 		 */
3132 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3133 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3134 			un->un_f_dvdram_writable_device = TRUE;
3135 		}
3136 	}
3137 
3138 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3139 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3140 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3141 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3142 }
3143 
3144 /*
3145  *    Function: sd_check_for_writable_cd
3146  *
3147  * Description: This routine determines if the media in the device is
3148  *		writable or not. It uses the get configuration command (0x46)
3149  *		to determine if the media is writable
3150  *
3151  *   Arguments: un - driver soft state (unit) structure
3152  *
3153  *     Context: Never called at interrupt context.
3154  */
3155 
3156 static void
3157 sd_check_for_writable_cd(struct sd_lun *un)
3158 {
3159 	struct uscsi_cmd		com;
3160 	uchar_t				*out_data;
3161 	uchar_t				*rqbuf;
3162 	int				rtn;
3163 	uchar_t				*out_data_rw, *out_data_hd;
3164 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3165 	struct mode_header_grp2		*sense_mhp;
3166 	uchar_t				*sense_page;
3167 	caddr_t				buf;
3168 	int				bd_len;
3169 	int				status;
3170 
3171 	ASSERT(un != NULL);
3172 	ASSERT(mutex_owned(SD_MUTEX(un)));
3173 
3174 	/*
3175 	 * Initialize the writable media to false, if configuration info.
3176 	 * tells us otherwise then only we will set it.
3177 	 */
3178 	un->un_f_mmc_writable_media = FALSE;
3179 	mutex_exit(SD_MUTEX(un));
3180 
3181 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3182 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3183 
3184 	rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf, SENSE_LENGTH,
3185 	    out_data, SD_PROFILE_HEADER_LEN);
3186 
3187 	mutex_enter(SD_MUTEX(un));
3188 	if (rtn == 0) {
3189 		/*
3190 		 * We have good information, check for writable DVD.
3191 		 */
3192 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3193 			un->un_f_mmc_writable_media = TRUE;
3194 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3195 			kmem_free(rqbuf, SENSE_LENGTH);
3196 			return;
3197 		}
3198 	}
3199 
3200 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3201 	kmem_free(rqbuf, SENSE_LENGTH);
3202 
3203 	/*
3204 	 * Determine if this is a RRD type device.
3205 	 */
3206 	mutex_exit(SD_MUTEX(un));
3207 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3208 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3209 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3210 	mutex_enter(SD_MUTEX(un));
3211 	if (status != 0) {
3212 		/* command failed; just return */
3213 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3214 		return;
3215 	}
3216 
3217 	/* Get to the page data */
3218 	sense_mhp = (struct mode_header_grp2 *)buf;
3219 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3220 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3221 		/*
3222 		 * We did not get back the expected block descriptor length so
3223 		 * we cannot check the mode page.
3224 		 */
3225 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3226 		    "sd_check_for_writable_cd: Mode Sense returned "
3227 		    "invalid block descriptor length\n");
3228 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3229 		return;
3230 	}
3231 
3232 	/*
3233 	 * If the device presents DVD or CD capabilities in the mode
3234 	 * page, we can return here since a RRD device will not have
3235 	 * these capabilities.
3236 	 */
3237 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3238 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3239 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3240 		return;
3241 	}
3242 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3243 
3244 	/*
3245 	 * If un->un_f_mmc_writable_media is still FALSE,
3246 	 * check for RRD type media.  A RRD device is identified
3247 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3248 	 */
3249 	mutex_exit(SD_MUTEX(un));
3250 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3251 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3252 
3253 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3254 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3255 	    RANDOM_WRITABLE);
3256 	if (rtn != 0) {
3257 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3258 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3259 		mutex_enter(SD_MUTEX(un));
3260 		return;
3261 	}
3262 
3263 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3264 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3265 
3266 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3267 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3268 	    HARDWARE_DEFECT_MANAGEMENT);
3269 	mutex_enter(SD_MUTEX(un));
3270 	if (rtn == 0) {
3271 		/*
3272 		 * We have good information, check for random writable
3273 		 * and hardware defect features as current.
3274 		 */
3275 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3276 		    (out_data_rw[10] & 0x1) &&
3277 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3278 		    (out_data_hd[10] & 0x1)) {
3279 			un->un_f_mmc_writable_media = TRUE;
3280 		}
3281 	}
3282 
3283 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3284 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3285 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3286 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3287 }
3288 
3289 /*
3290  *    Function: sd_read_unit_properties
3291  *
3292  * Description: The following implements a property lookup mechanism.
3293  *		Properties for particular disks (keyed on vendor, model
3294  *		and rev numbers) are sought in the sd.conf file via
3295  *		sd_process_sdconf_file(), and if not found there, are
3296  *		looked for in a list hardcoded in this driver via
3297  *		sd_process_sdconf_table() Once located the properties
3298  *		are used to update the driver unit structure.
3299  *
3300  *   Arguments: un - driver soft state (unit) structure
3301  */
3302 
3303 static void
3304 sd_read_unit_properties(struct sd_lun *un)
3305 {
3306 	/*
3307 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3308 	 * the "sd-config-list" property (from the sd.conf file) or if
3309 	 * there was not a match for the inquiry vid/pid. If this event
3310 	 * occurs the static driver configuration table is searched for
3311 	 * a match.
3312 	 */
3313 	ASSERT(un != NULL);
3314 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3315 		sd_process_sdconf_table(un);
3316 	}
3317 
3318 	/* check for LSI device */
3319 	sd_is_lsi(un);
3320 
3321 
3322 }
3323 
3324 
3325 /*
3326  *    Function: sd_process_sdconf_file
3327  *
3328  * Description: Use ddi_getlongprop to obtain the properties from the
3329  *		driver's config file (ie, sd.conf) and update the driver
3330  *		soft state structure accordingly.
3331  *
3332  *   Arguments: un - driver soft state (unit) structure
3333  *
3334  * Return Code: SD_SUCCESS - The properties were successfully set according
3335  *			     to the driver configuration file.
3336  *		SD_FAILURE - The driver config list was not obtained or
3337  *			     there was no vid/pid match. This indicates that
3338  *			     the static config table should be used.
3339  *
3340  * The config file has a property, "sd-config-list", which consists of
3341  * one or more duplets as follows:
3342  *
3343  *  sd-config-list=
3344  *	<duplet>,
3345  *	[<duplet>,]
3346  *	[<duplet>];
3347  *
3348  * The structure of each duplet is as follows:
3349  *
3350  *  <duplet>:= <vid+pid>,<data-property-name_list>
3351  *
3352  * The first entry of the duplet is the device ID string (the concatenated
3353  * vid & pid; not to be confused with a device_id).  This is defined in
3354  * the same way as in the sd_disk_table.
3355  *
3356  * The second part of the duplet is a string that identifies a
3357  * data-property-name-list. The data-property-name-list is defined as
3358  * follows:
3359  *
3360  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3361  *
3362  * The syntax of <data-property-name> depends on the <version> field.
3363  *
3364  * If version = SD_CONF_VERSION_1 we have the following syntax:
3365  *
3366  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3367  *
3368  * where the prop0 value will be used to set prop0 if bit0 set in the
3369  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3370  *
3371  */
3372 
3373 static int
3374 sd_process_sdconf_file(struct sd_lun *un)
3375 {
3376 	char	*config_list = NULL;
3377 	int	config_list_len;
3378 	int	len;
3379 	int	dupletlen = 0;
3380 	char	*vidptr;
3381 	int	vidlen;
3382 	char	*dnlist_ptr;
3383 	char	*dataname_ptr;
3384 	int	dnlist_len;
3385 	int	dataname_len;
3386 	int	*data_list;
3387 	int	data_list_len;
3388 	int	rval = SD_FAILURE;
3389 	int	i;
3390 
3391 	ASSERT(un != NULL);
3392 
3393 	/* Obtain the configuration list associated with the .conf file */
3394 	if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), DDI_PROP_DONTPASS,
3395 	    sd_config_list, (caddr_t)&config_list, &config_list_len)
3396 	    != DDI_PROP_SUCCESS) {
3397 		return (SD_FAILURE);
3398 	}
3399 
3400 	/*
3401 	 * Compare vids in each duplet to the inquiry vid - if a match is
3402 	 * made, get the data value and update the soft state structure
3403 	 * accordingly.
3404 	 *
3405 	 * Note: This algorithm is complex and difficult to maintain. It should
3406 	 * be replaced with a more robust implementation.
3407 	 */
3408 	for (len = config_list_len, vidptr = config_list; len > 0;
3409 	    vidptr += dupletlen, len -= dupletlen) {
3410 		/*
3411 		 * Note: The assumption here is that each vid entry is on
3412 		 * a unique line from its associated duplet.
3413 		 */
3414 		vidlen = dupletlen = (int)strlen(vidptr);
3415 		if ((vidlen == 0) ||
3416 		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3417 			dupletlen++;
3418 			continue;
3419 		}
3420 
3421 		/*
3422 		 * dnlist contains 1 or more blank separated
3423 		 * data-property-name entries
3424 		 */
3425 		dnlist_ptr = vidptr + vidlen + 1;
3426 		dnlist_len = (int)strlen(dnlist_ptr);
3427 		dupletlen += dnlist_len + 2;
3428 
3429 		/*
3430 		 * Set a pointer for the first data-property-name
3431 		 * entry in the list
3432 		 */
3433 		dataname_ptr = dnlist_ptr;
3434 		dataname_len = 0;
3435 
3436 		/*
3437 		 * Loop through all data-property-name entries in the
3438 		 * data-property-name-list setting the properties for each.
3439 		 */
3440 		while (dataname_len < dnlist_len) {
3441 			int version;
3442 
3443 			/*
3444 			 * Determine the length of the current
3445 			 * data-property-name entry by indexing until a
3446 			 * blank or NULL is encountered. When the space is
3447 			 * encountered reset it to a NULL for compliance
3448 			 * with ddi_getlongprop().
3449 			 */
3450 			for (i = 0; ((dataname_ptr[i] != ' ') &&
3451 			    (dataname_ptr[i] != '\0')); i++) {
3452 				;
3453 			}
3454 
3455 			dataname_len += i;
3456 			/* If not null terminated, Make it so */
3457 			if (dataname_ptr[i] == ' ') {
3458 				dataname_ptr[i] = '\0';
3459 			}
3460 			dataname_len++;
3461 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3462 			    "sd_process_sdconf_file: disk:%s, data:%s\n",
3463 			    vidptr, dataname_ptr);
3464 
3465 			/* Get the data list */
3466 			if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), 0,
3467 			    dataname_ptr, (caddr_t)&data_list, &data_list_len)
3468 			    != DDI_PROP_SUCCESS) {
3469 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3470 				    "sd_process_sdconf_file: data property (%s)"
3471 				    " has no value\n", dataname_ptr);
3472 				dataname_ptr = dnlist_ptr + dataname_len;
3473 				continue;
3474 			}
3475 
3476 			version = data_list[0];
3477 
3478 			if (version == SD_CONF_VERSION_1) {
3479 				sd_tunables values;
3480 
3481 				/* Set the properties */
3482 				if (sd_chk_vers1_data(un, data_list[1],
3483 				    &data_list[2], data_list_len, dataname_ptr)
3484 				    == SD_SUCCESS) {
3485 					sd_get_tunables_from_conf(un,
3486 					    data_list[1], &data_list[2],
3487 					    &values);
3488 					sd_set_vers1_properties(un,
3489 					    data_list[1], &values);
3490 					rval = SD_SUCCESS;
3491 				} else {
3492 					rval = SD_FAILURE;
3493 				}
3494 			} else {
3495 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3496 				    "data property %s version 0x%x is invalid.",
3497 				    dataname_ptr, version);
3498 				rval = SD_FAILURE;
3499 			}
3500 			kmem_free(data_list, data_list_len);
3501 			dataname_ptr = dnlist_ptr + dataname_len;
3502 		}
3503 	}
3504 
3505 	/* free up the memory allocated by ddi_getlongprop */
3506 	if (config_list) {
3507 		kmem_free(config_list, config_list_len);
3508 	}
3509 
3510 	return (rval);
3511 }
3512 
3513 /*
3514  *    Function: sd_get_tunables_from_conf()
3515  *
3516  *
3517  *    This function reads the data list from the sd.conf file and pulls
3518  *    the values that can have numeric values as arguments and places
3519  *    the values in the apropriate sd_tunables member.
3520  *    Since the order of the data list members varies across platforms
3521  *    This function reads them from the data list in a platform specific
3522  *    order and places them into the correct sd_tunable member that is
3523  *    a consistant across all platforms.
3524  */
3525 static void
3526 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3527     sd_tunables *values)
3528 {
3529 	int i;
3530 	int mask;
3531 
3532 	bzero(values, sizeof (sd_tunables));
3533 
3534 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3535 
3536 		mask = 1 << i;
3537 		if (mask > flags) {
3538 			break;
3539 		}
3540 
3541 		switch (mask & flags) {
3542 		case 0:	/* This mask bit not set in flags */
3543 			continue;
3544 		case SD_CONF_BSET_THROTTLE:
3545 			values->sdt_throttle = data_list[i];
3546 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3547 			    "sd_get_tunables_from_conf: throttle = %d\n",
3548 			    values->sdt_throttle);
3549 			break;
3550 		case SD_CONF_BSET_CTYPE:
3551 			values->sdt_ctype = data_list[i];
3552 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3553 			    "sd_get_tunables_from_conf: ctype = %d\n",
3554 			    values->sdt_ctype);
3555 			break;
3556 		case SD_CONF_BSET_NRR_COUNT:
3557 			values->sdt_not_rdy_retries = data_list[i];
3558 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3559 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
3560 			    values->sdt_not_rdy_retries);
3561 			break;
3562 		case SD_CONF_BSET_BSY_RETRY_COUNT:
3563 			values->sdt_busy_retries = data_list[i];
3564 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3565 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
3566 			    values->sdt_busy_retries);
3567 			break;
3568 		case SD_CONF_BSET_RST_RETRIES:
3569 			values->sdt_reset_retries = data_list[i];
3570 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3571 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
3572 			    values->sdt_reset_retries);
3573 			break;
3574 		case SD_CONF_BSET_RSV_REL_TIME:
3575 			values->sdt_reserv_rel_time = data_list[i];
3576 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3577 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
3578 			    values->sdt_reserv_rel_time);
3579 			break;
3580 		case SD_CONF_BSET_MIN_THROTTLE:
3581 			values->sdt_min_throttle = data_list[i];
3582 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3583 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
3584 			    values->sdt_min_throttle);
3585 			break;
3586 		case SD_CONF_BSET_DISKSORT_DISABLED:
3587 			values->sdt_disk_sort_dis = data_list[i];
3588 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3589 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
3590 			    values->sdt_disk_sort_dis);
3591 			break;
3592 		case SD_CONF_BSET_LUN_RESET_ENABLED:
3593 			values->sdt_lun_reset_enable = data_list[i];
3594 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3595 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
3596 			    "\n", values->sdt_lun_reset_enable);
3597 			break;
3598 		}
3599 	}
3600 }
3601 
3602 /*
3603  *    Function: sd_process_sdconf_table
3604  *
3605  * Description: Search the static configuration table for a match on the
3606  *		inquiry vid/pid and update the driver soft state structure
3607  *		according to the table property values for the device.
3608  *
3609  *		The form of a configuration table entry is:
3610  *		  <vid+pid>,<flags>,<property-data>
3611  *		  "SEAGATE ST42400N",1,63,0,0			(Fibre)
3612  *		  "SEAGATE ST42400N",1,63,0,0,0,0		(Sparc)
3613  *		  "SEAGATE ST42400N",1,63,0,0,0,0,0,0,0,0,0,0	(Intel)
3614  *
3615  *   Arguments: un - driver soft state (unit) structure
3616  */
3617 
3618 static void
3619 sd_process_sdconf_table(struct sd_lun *un)
3620 {
3621 	char	*id = NULL;
3622 	int	table_index;
3623 	int	idlen;
3624 
3625 	ASSERT(un != NULL);
3626 	for (table_index = 0; table_index < sd_disk_table_size;
3627 	    table_index++) {
3628 		id = sd_disk_table[table_index].device_id;
3629 		idlen = strlen(id);
3630 		if (idlen == 0) {
3631 			continue;
3632 		}
3633 
3634 		/*
3635 		 * The static configuration table currently does not
3636 		 * implement version 10 properties. Additionally,
3637 		 * multiple data-property-name entries are not
3638 		 * implemented in the static configuration table.
3639 		 */
3640 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
3641 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3642 			    "sd_process_sdconf_table: disk %s\n", id);
3643 			sd_set_vers1_properties(un,
3644 			    sd_disk_table[table_index].flags,
3645 			    sd_disk_table[table_index].properties);
3646 			break;
3647 		}
3648 	}
3649 }
3650 
3651 
3652 /*
3653  *    Function: sd_sdconf_id_match
3654  *
3655  * Description: This local function implements a case sensitive vid/pid
3656  *		comparison as well as the boundary cases of wild card and
3657  *		multiple blanks.
3658  *
3659  *		Note: An implicit assumption made here is that the scsi
3660  *		inquiry structure will always keep the vid, pid and
3661  *		revision strings in consecutive sequence, so they can be
3662  *		read as a single string. If this assumption is not the
3663  *		case, a separate string, to be used for the check, needs
3664  *		to be built with these strings concatenated.
3665  *
3666  *   Arguments: un - driver soft state (unit) structure
3667  *		id - table or config file vid/pid
3668  *		idlen  - length of the vid/pid (bytes)
3669  *
3670  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3671  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3672  */
3673 
3674 static int
3675 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
3676 {
3677 	struct scsi_inquiry	*sd_inq;
3678 	int 			rval = SD_SUCCESS;
3679 
3680 	ASSERT(un != NULL);
3681 	sd_inq = un->un_sd->sd_inq;
3682 	ASSERT(id != NULL);
3683 
3684 	/*
3685 	 * We use the inq_vid as a pointer to a buffer containing the
3686 	 * vid and pid and use the entire vid/pid length of the table
3687 	 * entry for the comparison. This works because the inq_pid
3688 	 * data member follows inq_vid in the scsi_inquiry structure.
3689 	 */
3690 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
3691 		/*
3692 		 * The user id string is compared to the inquiry vid/pid
3693 		 * using a case insensitive comparison and ignoring
3694 		 * multiple spaces.
3695 		 */
3696 		rval = sd_blank_cmp(un, id, idlen);
3697 		if (rval != SD_SUCCESS) {
3698 			/*
3699 			 * User id strings that start and end with a "*"
3700 			 * are a special case. These do not have a
3701 			 * specific vendor, and the product string can
3702 			 * appear anywhere in the 16 byte PID portion of
3703 			 * the inquiry data. This is a simple strstr()
3704 			 * type search for the user id in the inquiry data.
3705 			 */
3706 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
3707 				char	*pidptr = &id[1];
3708 				int	i;
3709 				int	j;
3710 				int	pidstrlen = idlen - 2;
3711 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
3712 				    pidstrlen;
3713 
3714 				if (j < 0) {
3715 					return (SD_FAILURE);
3716 				}
3717 				for (i = 0; i < j; i++) {
3718 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
3719 					    pidptr, pidstrlen) == 0) {
3720 						rval = SD_SUCCESS;
3721 						break;
3722 					}
3723 				}
3724 			}
3725 		}
3726 	}
3727 	return (rval);
3728 }
3729 
3730 
3731 /*
3732  *    Function: sd_blank_cmp
3733  *
3734  * Description: If the id string starts and ends with a space, treat
3735  *		multiple consecutive spaces as equivalent to a single
3736  *		space. For example, this causes a sd_disk_table entry
3737  *		of " NEC CDROM " to match a device's id string of
3738  *		"NEC       CDROM".
3739  *
3740  *		Note: The success exit condition for this routine is if
3741  *		the pointer to the table entry is '\0' and the cnt of
3742  *		the inquiry length is zero. This will happen if the inquiry
3743  *		string returned by the device is padded with spaces to be
3744  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
3745  *		SCSI spec states that the inquiry string is to be padded with
3746  *		spaces.
3747  *
3748  *   Arguments: un - driver soft state (unit) structure
3749  *		id - table or config file vid/pid
3750  *		idlen  - length of the vid/pid (bytes)
3751  *
3752  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3753  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3754  */
3755 
3756 static int
3757 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
3758 {
3759 	char		*p1;
3760 	char		*p2;
3761 	int		cnt;
3762 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
3763 	    sizeof (SD_INQUIRY(un)->inq_pid);
3764 
3765 	ASSERT(un != NULL);
3766 	p2 = un->un_sd->sd_inq->inq_vid;
3767 	ASSERT(id != NULL);
3768 	p1 = id;
3769 
3770 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
3771 		/*
3772 		 * Note: string p1 is terminated by a NUL but string p2
3773 		 * isn't.  The end of p2 is determined by cnt.
3774 		 */
3775 		for (;;) {
3776 			/* skip over any extra blanks in both strings */
3777 			while ((*p1 != '\0') && (*p1 == ' ')) {
3778 				p1++;
3779 			}
3780 			while ((cnt != 0) && (*p2 == ' ')) {
3781 				p2++;
3782 				cnt--;
3783 			}
3784 
3785 			/* compare the two strings */
3786 			if ((cnt == 0) ||
3787 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
3788 				break;
3789 			}
3790 			while ((cnt > 0) &&
3791 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
3792 				p1++;
3793 				p2++;
3794 				cnt--;
3795 			}
3796 		}
3797 	}
3798 
3799 	/* return SD_SUCCESS if both strings match */
3800 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
3801 }
3802 
3803 
3804 /*
3805  *    Function: sd_chk_vers1_data
3806  *
3807  * Description: Verify the version 1 device properties provided by the
3808  *		user via the configuration file
3809  *
3810  *   Arguments: un	     - driver soft state (unit) structure
3811  *		flags	     - integer mask indicating properties to be set
3812  *		prop_list    - integer list of property values
3813  *		list_len     - length of user provided data
3814  *
3815  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
3816  *		SD_FAILURE - Indicates the user provided data is invalid
3817  */
3818 
3819 static int
3820 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
3821     int list_len, char *dataname_ptr)
3822 {
3823 	int i;
3824 	int mask = 1;
3825 	int index = 0;
3826 
3827 	ASSERT(un != NULL);
3828 
3829 	/* Check for a NULL property name and list */
3830 	if (dataname_ptr == NULL) {
3831 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3832 		    "sd_chk_vers1_data: NULL data property name.");
3833 		return (SD_FAILURE);
3834 	}
3835 	if (prop_list == NULL) {
3836 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3837 		    "sd_chk_vers1_data: %s NULL data property list.",
3838 		    dataname_ptr);
3839 		return (SD_FAILURE);
3840 	}
3841 
3842 	/* Display a warning if undefined bits are set in the flags */
3843 	if (flags & ~SD_CONF_BIT_MASK) {
3844 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3845 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
3846 		    "Properties not set.",
3847 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
3848 		return (SD_FAILURE);
3849 	}
3850 
3851 	/*
3852 	 * Verify the length of the list by identifying the highest bit set
3853 	 * in the flags and validating that the property list has a length
3854 	 * up to the index of this bit.
3855 	 */
3856 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3857 		if (flags & mask) {
3858 			index++;
3859 		}
3860 		mask = 1 << i;
3861 	}
3862 	if ((list_len / sizeof (int)) < (index + 2)) {
3863 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3864 		    "sd_chk_vers1_data: "
3865 		    "Data property list %s size is incorrect. "
3866 		    "Properties not set.", dataname_ptr);
3867 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
3868 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
3869 		return (SD_FAILURE);
3870 	}
3871 	return (SD_SUCCESS);
3872 }
3873 
3874 
3875 /*
3876  *    Function: sd_set_vers1_properties
3877  *
3878  * Description: Set version 1 device properties based on a property list
3879  *		retrieved from the driver configuration file or static
3880  *		configuration table. Version 1 properties have the format:
3881  *
3882  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3883  *
3884  *		where the prop0 value will be used to set prop0 if bit0
3885  *		is set in the flags
3886  *
3887  *   Arguments: un	     - driver soft state (unit) structure
3888  *		flags	     - integer mask indicating properties to be set
3889  *		prop_list    - integer list of property values
3890  */
3891 
3892 static void
3893 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
3894 {
3895 	ASSERT(un != NULL);
3896 
3897 	/*
3898 	 * Set the flag to indicate cache is to be disabled. An attempt
3899 	 * to disable the cache via sd_cache_control() will be made
3900 	 * later during attach once the basic initialization is complete.
3901 	 */
3902 	if (flags & SD_CONF_BSET_NOCACHE) {
3903 		un->un_f_opt_disable_cache = TRUE;
3904 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3905 		    "sd_set_vers1_properties: caching disabled flag set\n");
3906 	}
3907 
3908 	/* CD-specific configuration parameters */
3909 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
3910 		un->un_f_cfg_playmsf_bcd = TRUE;
3911 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3912 		    "sd_set_vers1_properties: playmsf_bcd set\n");
3913 	}
3914 	if (flags & SD_CONF_BSET_READSUB_BCD) {
3915 		un->un_f_cfg_readsub_bcd = TRUE;
3916 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3917 		    "sd_set_vers1_properties: readsub_bcd set\n");
3918 	}
3919 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
3920 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
3921 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3922 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
3923 	}
3924 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
3925 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
3926 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3927 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
3928 	}
3929 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
3930 		un->un_f_cfg_no_read_header = TRUE;
3931 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3932 			    "sd_set_vers1_properties: no_read_header set\n");
3933 	}
3934 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
3935 		un->un_f_cfg_read_cd_xd4 = TRUE;
3936 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3937 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
3938 	}
3939 
3940 	/* Support for devices which do not have valid/unique serial numbers */
3941 	if (flags & SD_CONF_BSET_FAB_DEVID) {
3942 		un->un_f_opt_fab_devid = TRUE;
3943 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3944 		    "sd_set_vers1_properties: fab_devid bit set\n");
3945 	}
3946 
3947 	/* Support for user throttle configuration */
3948 	if (flags & SD_CONF_BSET_THROTTLE) {
3949 		ASSERT(prop_list != NULL);
3950 		un->un_saved_throttle = un->un_throttle =
3951 		    prop_list->sdt_throttle;
3952 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3953 		    "sd_set_vers1_properties: throttle set to %d\n",
3954 		    prop_list->sdt_throttle);
3955 	}
3956 
3957 	/* Set the per disk retry count according to the conf file or table. */
3958 	if (flags & SD_CONF_BSET_NRR_COUNT) {
3959 		ASSERT(prop_list != NULL);
3960 		if (prop_list->sdt_not_rdy_retries) {
3961 			un->un_notready_retry_count =
3962 				prop_list->sdt_not_rdy_retries;
3963 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3964 			    "sd_set_vers1_properties: not ready retry count"
3965 			    " set to %d\n", un->un_notready_retry_count);
3966 		}
3967 	}
3968 
3969 	/* The controller type is reported for generic disk driver ioctls */
3970 	if (flags & SD_CONF_BSET_CTYPE) {
3971 		ASSERT(prop_list != NULL);
3972 		switch (prop_list->sdt_ctype) {
3973 		case CTYPE_CDROM:
3974 			un->un_ctype = prop_list->sdt_ctype;
3975 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3976 			    "sd_set_vers1_properties: ctype set to "
3977 			    "CTYPE_CDROM\n");
3978 			break;
3979 		case CTYPE_CCS:
3980 			un->un_ctype = prop_list->sdt_ctype;
3981 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3982 				"sd_set_vers1_properties: ctype set to "
3983 				"CTYPE_CCS\n");
3984 			break;
3985 		case CTYPE_ROD:		/* RW optical */
3986 			un->un_ctype = prop_list->sdt_ctype;
3987 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3988 			    "sd_set_vers1_properties: ctype set to "
3989 			    "CTYPE_ROD\n");
3990 			break;
3991 		default:
3992 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3993 			    "sd_set_vers1_properties: Could not set "
3994 			    "invalid ctype value (%d)",
3995 			    prop_list->sdt_ctype);
3996 		}
3997 	}
3998 
3999 	/* Purple failover timeout */
4000 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4001 		ASSERT(prop_list != NULL);
4002 		un->un_busy_retry_count =
4003 			prop_list->sdt_busy_retries;
4004 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4005 		    "sd_set_vers1_properties: "
4006 		    "busy retry count set to %d\n",
4007 		    un->un_busy_retry_count);
4008 	}
4009 
4010 	/* Purple reset retry count */
4011 	if (flags & SD_CONF_BSET_RST_RETRIES) {
4012 		ASSERT(prop_list != NULL);
4013 		un->un_reset_retry_count =
4014 			prop_list->sdt_reset_retries;
4015 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4016 		    "sd_set_vers1_properties: "
4017 		    "reset retry count set to %d\n",
4018 		    un->un_reset_retry_count);
4019 	}
4020 
4021 	/* Purple reservation release timeout */
4022 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4023 		ASSERT(prop_list != NULL);
4024 		un->un_reserve_release_time =
4025 			prop_list->sdt_reserv_rel_time;
4026 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4027 		    "sd_set_vers1_properties: "
4028 		    "reservation release timeout set to %d\n",
4029 		    un->un_reserve_release_time);
4030 	}
4031 
4032 	/*
4033 	 * Driver flag telling the driver to verify that no commands are pending
4034 	 * for a device before issuing a Test Unit Ready. This is a workaround
4035 	 * for a firmware bug in some Seagate eliteI drives.
4036 	 */
4037 	if (flags & SD_CONF_BSET_TUR_CHECK) {
4038 		un->un_f_cfg_tur_check = TRUE;
4039 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4040 		    "sd_set_vers1_properties: tur queue check set\n");
4041 	}
4042 
4043 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4044 		un->un_min_throttle = prop_list->sdt_min_throttle;
4045 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4046 		    "sd_set_vers1_properties: min throttle set to %d\n",
4047 		    un->un_min_throttle);
4048 	}
4049 
4050 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4051 		un->un_f_disksort_disabled =
4052 		    (prop_list->sdt_disk_sort_dis != 0) ?
4053 		    TRUE : FALSE;
4054 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4055 		    "sd_set_vers1_properties: disksort disabled "
4056 		    "flag set to %d\n",
4057 		    prop_list->sdt_disk_sort_dis);
4058 	}
4059 
4060 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4061 		un->un_f_lun_reset_enabled =
4062 		    (prop_list->sdt_lun_reset_enable != 0) ?
4063 		    TRUE : FALSE;
4064 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4065 		    "sd_set_vers1_properties: lun reset enabled "
4066 		    "flag set to %d\n",
4067 		    prop_list->sdt_lun_reset_enable);
4068 	}
4069 
4070 	/*
4071 	 * Validate the throttle values.
4072 	 * If any of the numbers are invalid, set everything to defaults.
4073 	 */
4074 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4075 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4076 	    (un->un_min_throttle > un->un_throttle)) {
4077 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4078 		un->un_min_throttle = sd_min_throttle;
4079 	}
4080 }
4081 
4082 /*
4083  *   Function: sd_is_lsi()
4084  *
4085  *   Description: Check for lsi devices, step throught the static device
4086  *	table to match vid/pid.
4087  *
4088  *   Args: un - ptr to sd_lun
4089  *
4090  *   Notes:  When creating new LSI property, need to add the new LSI property
4091  *		to this function.
4092  */
4093 static void
4094 sd_is_lsi(struct sd_lun *un)
4095 {
4096 	char	*id = NULL;
4097 	int	table_index;
4098 	int	idlen;
4099 	void	*prop;
4100 
4101 	ASSERT(un != NULL);
4102 	for (table_index = 0; table_index < sd_disk_table_size;
4103 	    table_index++) {
4104 		id = sd_disk_table[table_index].device_id;
4105 		idlen = strlen(id);
4106 		if (idlen == 0) {
4107 			continue;
4108 		}
4109 
4110 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4111 			prop = sd_disk_table[table_index].properties;
4112 			if (prop == &lsi_properties ||
4113 			    prop == &lsi_oem_properties ||
4114 			    prop == &lsi_properties_scsi ||
4115 			    prop == &symbios_properties) {
4116 				un->un_f_cfg_is_lsi = TRUE;
4117 			}
4118 			break;
4119 		}
4120 	}
4121 }
4122 
4123 
4124 /*
4125  * The following routines support reading and interpretation of disk labels,
4126  * including Solaris BE (8-slice) vtoc's, Solaris LE (16-slice) vtoc's, and
4127  * fdisk tables.
4128  */
4129 
4130 /*
4131  *    Function: sd_validate_geometry
4132  *
4133  * Description: Read the label from the disk (if present). Update the unit's
4134  *		geometry and vtoc information from the data in the label.
4135  *		Verify that the label is valid.
4136  *
4137  *   Arguments: un - driver soft state (unit) structure
4138  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4139  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4140  *			to use the USCSI "direct" chain and bypass the normal
4141  *			command waitq.
4142  *
4143  * Return Code: 0 - Successful completion
4144  *		EINVAL  - Invalid value in un->un_tgt_blocksize or
4145  *			  un->un_blockcount; or label on disk is corrupted
4146  *			  or unreadable.
4147  *		EACCES  - Reservation conflict at the device.
4148  *		ENOMEM  - Resource allocation error
4149  *		ENOTSUP - geometry not applicable
4150  *
4151  *     Context: Kernel thread only (can sleep).
4152  */
4153 
4154 static int
4155 sd_validate_geometry(struct sd_lun *un, int path_flag)
4156 {
4157 	static	char		labelstring[128];
4158 	static	char		buf[256];
4159 	char	*label		= NULL;
4160 	int	label_error = 0;
4161 	int	gvalid		= un->un_f_geometry_is_valid;
4162 	int	lbasize;
4163 	uint_t	capacity;
4164 	int	count;
4165 
4166 	ASSERT(un != NULL);
4167 	ASSERT(mutex_owned(SD_MUTEX(un)));
4168 
4169 	/*
4170 	 * If the required values are not valid, then try getting them
4171 	 * once via read capacity. If that fails, then fail this call.
4172 	 * This is necessary with the new mpxio failover behavior in
4173 	 * the T300 where we can get an attach for the inactive path
4174 	 * before the active path. The inactive path fails commands with
4175 	 * sense data of 02,04,88 which happens to the read capacity
4176 	 * before mpxio has had sufficient knowledge to know if it should
4177 	 * force a fail over or not. (Which it won't do at attach anyhow).
4178 	 * If the read capacity at attach time fails, un_tgt_blocksize and
4179 	 * un_blockcount won't be valid.
4180 	 */
4181 	if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4182 	    (un->un_f_blockcount_is_valid != TRUE)) {
4183 		uint64_t	cap;
4184 		uint32_t	lbasz;
4185 		int		rval;
4186 
4187 		mutex_exit(SD_MUTEX(un));
4188 		rval = sd_send_scsi_READ_CAPACITY(un, &cap,
4189 		    &lbasz, SD_PATH_DIRECT);
4190 		mutex_enter(SD_MUTEX(un));
4191 		if (rval == 0) {
4192 			/*
4193 			 * The following relies on
4194 			 * sd_send_scsi_READ_CAPACITY never
4195 			 * returning 0 for capacity and/or lbasize.
4196 			 */
4197 			sd_update_block_info(un, lbasz, cap);
4198 		}
4199 
4200 		if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4201 		    (un->un_f_blockcount_is_valid != TRUE)) {
4202 			return (EINVAL);
4203 		}
4204 	}
4205 
4206 	/*
4207 	 * Copy the lbasize and capacity so that if they're reset while we're
4208 	 * not holding the SD_MUTEX, we will continue to use valid values
4209 	 * after the SD_MUTEX is reacquired. (4119659)
4210 	 */
4211 	lbasize  = un->un_tgt_blocksize;
4212 	capacity = un->un_blockcount;
4213 
4214 #if defined(_SUNOS_VTOC_16)
4215 	/*
4216 	 * Set up the "whole disk" fdisk partition; this should always
4217 	 * exist, regardless of whether the disk contains an fdisk table
4218 	 * or vtoc.
4219 	 */
4220 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
4221 	un->un_map[P0_RAW_DISK].dkl_nblk  = capacity;
4222 #endif
4223 
4224 	/*
4225 	 * Refresh the logical and physical geometry caches.
4226 	 * (data from MODE SENSE format/rigid disk geometry pages,
4227 	 * and scsi_ifgetcap("geometry").
4228 	 */
4229 	sd_resync_geom_caches(un, capacity, lbasize, path_flag);
4230 
4231 	label_error = sd_use_efi(un, path_flag);
4232 	if (label_error == 0) {
4233 		/* found a valid EFI label */
4234 		SD_TRACE(SD_LOG_IO_PARTITION, un,
4235 			"sd_validate_geometry: found EFI label\n");
4236 		un->un_solaris_offset = 0;
4237 		un->un_solaris_size = capacity;
4238 		return (ENOTSUP);
4239 	}
4240 	if (un->un_blockcount > DK_MAX_BLOCKS) {
4241 		if (label_error == ESRCH) {
4242 			/*
4243 			 * they've configured a LUN over 1TB, but used
4244 			 * format.dat to restrict format's view of the
4245 			 * capacity to be under 1TB
4246 			 */
4247 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4248 "is >1TB and has a VTOC label: use format(1M) to either decrease the");
4249 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
4250 "size to be < 1TB or relabel the disk with an EFI label");
4251 		} else {
4252 			/* unlabeled disk over 1TB */
4253 #if defined(__i386) || defined(__amd64)
4254 			/*
4255 			 * Refer to comments on off-by-1 at the head of the file
4256 			 * A 1TB disk was treated as (1T - 512)B in the past,
4257 			 * thus, it might have valid solaris partition. We
4258 			 * will return ENOTSUP later only if this disk has no
4259 			 * valid solaris partition.
4260 			 */
4261 			if ((un->un_tgt_blocksize != un->un_sys_blocksize) ||
4262 			    (un->un_blockcount - 1 > DK_MAX_BLOCKS) ||
4263 			    un->un_f_has_removable_media ||
4264 			    un->un_f_is_hotpluggable)
4265 #endif
4266 				return (ENOTSUP);
4267 		}
4268 	}
4269 	label_error = 0;
4270 
4271 	/*
4272 	 * at this point it is either labeled with a VTOC or it is
4273 	 * under 1TB (<= 1TB actually for off-by-1)
4274 	 */
4275 	if (un->un_f_vtoc_label_supported) {
4276 		struct	dk_label *dkl;
4277 		offset_t dkl1;
4278 		offset_t label_addr, real_addr;
4279 		int	rval;
4280 		size_t	buffer_size;
4281 
4282 		/*
4283 		 * Note: This will set up un->un_solaris_size and
4284 		 * un->un_solaris_offset.
4285 		 */
4286 		switch (sd_read_fdisk(un, capacity, lbasize, path_flag)) {
4287 		case SD_CMD_RESERVATION_CONFLICT:
4288 			ASSERT(mutex_owned(SD_MUTEX(un)));
4289 			return (EACCES);
4290 		case SD_CMD_FAILURE:
4291 			ASSERT(mutex_owned(SD_MUTEX(un)));
4292 			return (ENOMEM);
4293 		}
4294 
4295 		if (un->un_solaris_size <= DK_LABEL_LOC) {
4296 
4297 #if defined(__i386) || defined(__amd64)
4298 			/*
4299 			 * Refer to comments on off-by-1 at the head of the file
4300 			 * This is for 1TB disk only. Since that there is no
4301 			 * solaris partitions, return ENOTSUP as we do for
4302 			 * >1TB disk.
4303 			 */
4304 			if (un->un_blockcount > DK_MAX_BLOCKS)
4305 				return (ENOTSUP);
4306 #endif
4307 			/*
4308 			 * Found fdisk table but no Solaris partition entry,
4309 			 * so don't call sd_uselabel() and don't create
4310 			 * a default label.
4311 			 */
4312 			label_error = 0;
4313 			un->un_f_geometry_is_valid = TRUE;
4314 			goto no_solaris_partition;
4315 		}
4316 		label_addr = (daddr_t)(un->un_solaris_offset + DK_LABEL_LOC);
4317 
4318 #if defined(__i386) || defined(__amd64)
4319 		/*
4320 		 * Refer to comments on off-by-1 at the head of the file
4321 		 * Now, this 1TB disk has valid solaris partition. It
4322 		 * must be created by previous sd driver, we have to
4323 		 * treat it as (1T-512)B.
4324 		 */
4325 		if (un->un_blockcount > DK_MAX_BLOCKS) {
4326 			un->un_f_capacity_adjusted = 1;
4327 			un->un_blockcount = DK_MAX_BLOCKS;
4328 			un->un_map[P0_RAW_DISK].dkl_nblk  = DK_MAX_BLOCKS;
4329 
4330 			/*
4331 			 * Refer to sd_read_fdisk, when there is no
4332 			 * fdisk partition table, un_solaris_size is
4333 			 * set to disk's capacity. In this case, we
4334 			 * need to adjust it
4335 			 */
4336 			if (un->un_solaris_size > DK_MAX_BLOCKS)
4337 				un->un_solaris_size = DK_MAX_BLOCKS;
4338 			sd_resync_geom_caches(un, DK_MAX_BLOCKS,
4339 			    lbasize, path_flag);
4340 		}
4341 #endif
4342 
4343 		/*
4344 		 * sys_blocksize != tgt_blocksize, need to re-adjust
4345 		 * blkno and save the index to beginning of dk_label
4346 		 */
4347 		real_addr = SD_SYS2TGTBLOCK(un, label_addr);
4348 		buffer_size = SD_REQBYTES2TGTBYTES(un,
4349 		    sizeof (struct dk_label));
4350 
4351 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_validate_geometry: "
4352 		    "label_addr: 0x%x allocation size: 0x%x\n",
4353 		    label_addr, buffer_size);
4354 		dkl = kmem_zalloc(buffer_size, KM_NOSLEEP);
4355 		if (dkl == NULL) {
4356 			return (ENOMEM);
4357 		}
4358 
4359 		mutex_exit(SD_MUTEX(un));
4360 		rval = sd_send_scsi_READ(un, dkl, buffer_size, real_addr,
4361 		    path_flag);
4362 		mutex_enter(SD_MUTEX(un));
4363 
4364 		switch (rval) {
4365 		case 0:
4366 			/*
4367 			 * sd_uselabel will establish that the geometry
4368 			 * is valid.
4369 			 * For sys_blocksize != tgt_blocksize, need
4370 			 * to index into the beginning of dk_label
4371 			 */
4372 			dkl1 = (daddr_t)dkl
4373 				+ SD_TGTBYTEOFFSET(un, label_addr, real_addr);
4374 			if (sd_uselabel(un, (struct dk_label *)(uintptr_t)dkl1,
4375 			    path_flag) != SD_LABEL_IS_VALID) {
4376 				label_error = EINVAL;
4377 			}
4378 			break;
4379 		case EACCES:
4380 			label_error = EACCES;
4381 			break;
4382 		default:
4383 			label_error = EINVAL;
4384 			break;
4385 		}
4386 
4387 		kmem_free(dkl, buffer_size);
4388 
4389 #if defined(_SUNOS_VTOC_8)
4390 		label = (char *)un->un_asciilabel;
4391 #elif defined(_SUNOS_VTOC_16)
4392 		label = (char *)un->un_vtoc.v_asciilabel;
4393 #else
4394 #error "No VTOC format defined."
4395 #endif
4396 	}
4397 
4398 	/*
4399 	 * If a valid label was not found, AND if no reservation conflict
4400 	 * was detected, then go ahead and create a default label (4069506).
4401 	 */
4402 	if (un->un_f_default_vtoc_supported && (label_error != EACCES)) {
4403 		if (un->un_f_geometry_is_valid == FALSE) {
4404 			sd_build_default_label(un);
4405 		}
4406 		label_error = 0;
4407 	}
4408 
4409 no_solaris_partition:
4410 	if ((!un->un_f_has_removable_media ||
4411 	    (un->un_f_has_removable_media &&
4412 		un->un_mediastate == DKIO_EJECTED)) &&
4413 		(un->un_state == SD_STATE_NORMAL && !gvalid)) {
4414 		/*
4415 		 * Print out a message indicating who and what we are.
4416 		 * We do this only when we happen to really validate the
4417 		 * geometry. We may call sd_validate_geometry() at other
4418 		 * times, e.g., ioctl()'s like Get VTOC in which case we
4419 		 * don't want to print the label.
4420 		 * If the geometry is valid, print the label string,
4421 		 * else print vendor and product info, if available
4422 		 */
4423 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
4424 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "?<%s>\n", label);
4425 		} else {
4426 			mutex_enter(&sd_label_mutex);
4427 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
4428 			    labelstring);
4429 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
4430 			    &labelstring[64]);
4431 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
4432 			    labelstring, &labelstring[64]);
4433 			if (un->un_f_blockcount_is_valid == TRUE) {
4434 				(void) sprintf(&buf[strlen(buf)],
4435 				    ", %llu %u byte blocks\n",
4436 				    (longlong_t)un->un_blockcount,
4437 				    un->un_tgt_blocksize);
4438 			} else {
4439 				(void) sprintf(&buf[strlen(buf)],
4440 				    ", (unknown capacity)\n");
4441 			}
4442 			SD_INFO(SD_LOG_ATTACH_DETACH, un, buf);
4443 			mutex_exit(&sd_label_mutex);
4444 		}
4445 	}
4446 
4447 #if defined(_SUNOS_VTOC_16)
4448 	/*
4449 	 * If we have valid geometry, set up the remaining fdisk partitions.
4450 	 * Note that dkl_cylno is not used for the fdisk map entries, so
4451 	 * we set it to an entirely bogus value.
4452 	 */
4453 	for (count = 0; count < FD_NUMPART; count++) {
4454 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
4455 		un->un_map[FDISK_P1 + count].dkl_nblk =
4456 		    un->un_fmap[count].fmap_nblk;
4457 
4458 		un->un_offset[FDISK_P1 + count] =
4459 		    un->un_fmap[count].fmap_start;
4460 	}
4461 #endif
4462 
4463 	for (count = 0; count < NDKMAP; count++) {
4464 #if defined(_SUNOS_VTOC_8)
4465 		struct dk_map *lp  = &un->un_map[count];
4466 		un->un_offset[count] =
4467 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
4468 #elif defined(_SUNOS_VTOC_16)
4469 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
4470 
4471 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
4472 #else
4473 #error "No VTOC format defined."
4474 #endif
4475 	}
4476 
4477 	return (label_error);
4478 }
4479 
4480 
4481 #if defined(_SUNOS_VTOC_16)
4482 /*
4483  * Macro: MAX_BLKS
4484  *
4485  *	This macro is used for table entries where we need to have the largest
4486  *	possible sector value for that head & SPT (sectors per track)
4487  *	combination.  Other entries for some smaller disk sizes are set by
4488  *	convention to match those used by X86 BIOS usage.
4489  */
4490 #define	MAX_BLKS(heads, spt)	UINT16_MAX * heads * spt, heads, spt
4491 
4492 /*
4493  *    Function: sd_convert_geometry
4494  *
4495  * Description: Convert physical geometry into a dk_geom structure. In
4496  *		other words, make sure we don't wrap 16-bit values.
4497  *		e.g. converting from geom_cache to dk_geom
4498  *
4499  *     Context: Kernel thread only
4500  */
4501 static void
4502 sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g)
4503 {
4504 	int i;
4505 	static const struct chs_values {
4506 		uint_t max_cap;		/* Max Capacity for this HS. */
4507 		uint_t nhead;		/* Heads to use. */
4508 		uint_t nsect;		/* SPT to use. */
4509 	} CHS_values[] = {
4510 		{0x00200000,  64, 32},		/* 1GB or smaller disk. */
4511 		{0x01000000, 128, 32},		/* 8GB or smaller disk. */
4512 		{MAX_BLKS(255,  63)},		/* 502.02GB or smaller disk. */
4513 		{MAX_BLKS(255, 126)},		/* .98TB or smaller disk. */
4514 		{DK_MAX_BLOCKS, 255, 189}	/* Max size is just under 1TB */
4515 	};
4516 
4517 	/* Unlabeled SCSI floppy device */
4518 	if (capacity <= 0x1000) {
4519 		un_g->dkg_nhead = 2;
4520 		un_g->dkg_ncyl = 80;
4521 		un_g->dkg_nsect = capacity / (un_g->dkg_nhead * un_g->dkg_ncyl);
4522 		return;
4523 	}
4524 
4525 	/*
4526 	 * For all devices we calculate cylinders using the
4527 	 * heads and sectors we assign based on capacity of the
4528 	 * device.  The table is designed to be compatible with the
4529 	 * way other operating systems lay out fdisk tables for X86
4530 	 * and to insure that the cylinders never exceed 65535 to
4531 	 * prevent problems with X86 ioctls that report geometry.
4532 	 * We use SPT that are multiples of 63, since other OSes that
4533 	 * are not limited to 16-bits for cylinders stop at 63 SPT
4534 	 * we make do by using multiples of 63 SPT.
4535 	 *
4536 	 * Note than capacities greater than or equal to 1TB will simply
4537 	 * get the largest geometry from the table. This should be okay
4538 	 * since disks this large shouldn't be using CHS values anyway.
4539 	 */
4540 	for (i = 0; CHS_values[i].max_cap < capacity &&
4541 	    CHS_values[i].max_cap != DK_MAX_BLOCKS; i++)
4542 		;
4543 
4544 	un_g->dkg_nhead = CHS_values[i].nhead;
4545 	un_g->dkg_nsect = CHS_values[i].nsect;
4546 }
4547 #endif
4548 
4549 
4550 /*
4551  *    Function: sd_resync_geom_caches
4552  *
4553  * Description: (Re)initialize both geometry caches: the virtual geometry
4554  *		information is extracted from the HBA (the "geometry"
4555  *		capability), and the physical geometry cache data is
4556  *		generated by issuing MODE SENSE commands.
4557  *
4558  *   Arguments: un - driver soft state (unit) structure
4559  *		capacity - disk capacity in #blocks
4560  *		lbasize - disk block size in bytes
4561  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4562  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4563  *			to use the USCSI "direct" chain and bypass the normal
4564  *			command waitq.
4565  *
4566  *     Context: Kernel thread only (can sleep).
4567  */
4568 
4569 static void
4570 sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
4571 	int path_flag)
4572 {
4573 	struct 	geom_cache 	pgeom;
4574 	struct 	geom_cache	*pgeom_p = &pgeom;
4575 	int 	spc;
4576 	unsigned short nhead;
4577 	unsigned short nsect;
4578 
4579 	ASSERT(un != NULL);
4580 	ASSERT(mutex_owned(SD_MUTEX(un)));
4581 
4582 	/*
4583 	 * Ask the controller for its logical geometry.
4584 	 * Note: if the HBA does not support scsi_ifgetcap("geometry"),
4585 	 * then the lgeom cache will be invalid.
4586 	 */
4587 	sd_get_virtual_geometry(un, capacity, lbasize);
4588 
4589 	/*
4590 	 * Initialize the pgeom cache from lgeom, so that if MODE SENSE
4591 	 * doesn't work, DKIOCG_PHYSGEOM can return reasonable values.
4592 	 */
4593 	if (un->un_lgeom.g_nsect == 0 || un->un_lgeom.g_nhead == 0) {
4594 		/*
4595 		 * Note: Perhaps this needs to be more adaptive? The rationale
4596 		 * is that, if there's no HBA geometry from the HBA driver, any
4597 		 * guess is good, since this is the physical geometry. If MODE
4598 		 * SENSE fails this gives a max cylinder size for non-LBA access
4599 		 */
4600 		nhead = 255;
4601 		nsect = 63;
4602 	} else {
4603 		nhead = un->un_lgeom.g_nhead;
4604 		nsect = un->un_lgeom.g_nsect;
4605 	}
4606 
4607 	if (ISCD(un)) {
4608 		pgeom_p->g_nhead = 1;
4609 		pgeom_p->g_nsect = nsect * nhead;
4610 	} else {
4611 		pgeom_p->g_nhead = nhead;
4612 		pgeom_p->g_nsect = nsect;
4613 	}
4614 
4615 	spc = pgeom_p->g_nhead * pgeom_p->g_nsect;
4616 	pgeom_p->g_capacity = capacity;
4617 	pgeom_p->g_ncyl = pgeom_p->g_capacity / spc;
4618 	pgeom_p->g_acyl = 0;
4619 
4620 	/*
4621 	 * Retrieve fresh geometry data from the hardware, stash it
4622 	 * here temporarily before we rebuild the incore label.
4623 	 *
4624 	 * We want to use the MODE SENSE commands to derive the
4625 	 * physical geometry of the device, but if either command
4626 	 * fails, the logical geometry is used as the fallback for
4627 	 * disk label geometry.
4628 	 */
4629 	mutex_exit(SD_MUTEX(un));
4630 	sd_get_physical_geometry(un, pgeom_p, capacity, lbasize, path_flag);
4631 	mutex_enter(SD_MUTEX(un));
4632 
4633 	/*
4634 	 * Now update the real copy while holding the mutex. This
4635 	 * way the global copy is never in an inconsistent state.
4636 	 */
4637 	bcopy(pgeom_p, &un->un_pgeom,  sizeof (un->un_pgeom));
4638 
4639 	SD_INFO(SD_LOG_COMMON, un, "sd_resync_geom_caches: "
4640 	    "(cached from lgeom)\n");
4641 	SD_INFO(SD_LOG_COMMON, un,
4642 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4643 	    un->un_pgeom.g_ncyl, un->un_pgeom.g_acyl,
4644 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
4645 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
4646 	    "intrlv: %d; rpm: %d\n", un->un_pgeom.g_secsize,
4647 	    un->un_pgeom.g_capacity, un->un_pgeom.g_intrlv,
4648 	    un->un_pgeom.g_rpm);
4649 }
4650 
4651 
4652 /*
4653  *    Function: sd_read_fdisk
4654  *
4655  * Description: utility routine to read the fdisk table.
4656  *
4657  *   Arguments: un - driver soft state (unit) structure
4658  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4659  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4660  *			to use the USCSI "direct" chain and bypass the normal
4661  *			command waitq.
4662  *
4663  * Return Code: SD_CMD_SUCCESS
4664  *		SD_CMD_FAILURE
4665  *
4666  *     Context: Kernel thread only (can sleep).
4667  */
4668 /* ARGSUSED */
4669 static int
4670 sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize, int path_flag)
4671 {
4672 #if defined(_NO_FDISK_PRESENT)
4673 
4674 	un->un_solaris_offset = 0;
4675 	un->un_solaris_size = capacity;
4676 	bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4677 	return (SD_CMD_SUCCESS);
4678 
4679 #elif defined(_FIRMWARE_NEEDS_FDISK)
4680 
4681 	struct ipart	*fdp;
4682 	struct mboot	*mbp;
4683 	struct ipart	fdisk[FD_NUMPART];
4684 	int		i;
4685 	char		sigbuf[2];
4686 	caddr_t		bufp;
4687 	int		uidx;
4688 	int		rval;
4689 	int		lba = 0;
4690 	uint_t		solaris_offset;	/* offset to solaris part. */
4691 	daddr_t		solaris_size;	/* size of solaris partition */
4692 	uint32_t	blocksize;
4693 
4694 	ASSERT(un != NULL);
4695 	ASSERT(mutex_owned(SD_MUTEX(un)));
4696 	ASSERT(un->un_f_tgt_blocksize_is_valid == TRUE);
4697 
4698 	blocksize = un->un_tgt_blocksize;
4699 
4700 	/*
4701 	 * Start off assuming no fdisk table
4702 	 */
4703 	solaris_offset = 0;
4704 	solaris_size   = capacity;
4705 
4706 	mutex_exit(SD_MUTEX(un));
4707 	bufp = kmem_zalloc(blocksize, KM_SLEEP);
4708 	rval = sd_send_scsi_READ(un, bufp, blocksize, 0, path_flag);
4709 	mutex_enter(SD_MUTEX(un));
4710 
4711 	if (rval != 0) {
4712 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4713 		    "sd_read_fdisk: fdisk read err\n");
4714 		kmem_free(bufp, blocksize);
4715 		return (SD_CMD_FAILURE);
4716 	}
4717 
4718 	mbp = (struct mboot *)bufp;
4719 
4720 	/*
4721 	 * The fdisk table does not begin on a 4-byte boundary within the
4722 	 * master boot record, so we copy it to an aligned structure to avoid
4723 	 * alignment exceptions on some processors.
4724 	 */
4725 	bcopy(&mbp->parts[0], fdisk, sizeof (fdisk));
4726 
4727 	/*
4728 	 * Check for lba support before verifying sig; sig might not be
4729 	 * there, say on a blank disk, but the max_chs mark may still
4730 	 * be present.
4731 	 *
4732 	 * Note: LBA support and BEFs are an x86-only concept but this
4733 	 * code should work OK on SPARC as well.
4734 	 */
4735 
4736 	/*
4737 	 * First, check for lba-access-ok on root node (or prom root node)
4738 	 * if present there, don't need to search fdisk table.
4739 	 */
4740 	if (ddi_getprop(DDI_DEV_T_ANY, ddi_root_node(), 0,
4741 	    "lba-access-ok", 0) != 0) {
4742 		/* All drives do LBA; don't search fdisk table */
4743 		lba = 1;
4744 	} else {
4745 		/* Okay, look for mark in fdisk table */
4746 		for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4747 			/* accumulate "lba" value from all partitions */
4748 			lba = (lba || sd_has_max_chs_vals(fdp));
4749 		}
4750 	}
4751 
4752 	if (lba != 0) {
4753 		dev_t dev = sd_make_device(SD_DEVINFO(un));
4754 
4755 		if (ddi_getprop(dev, SD_DEVINFO(un), DDI_PROP_DONTPASS,
4756 		    "lba-access-ok", 0) == 0) {
4757 			/* not found; create it */
4758 			if (ddi_prop_create(dev, SD_DEVINFO(un), 0,
4759 			    "lba-access-ok", (caddr_t)NULL, 0) !=
4760 			    DDI_PROP_SUCCESS) {
4761 				SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4762 				    "sd_read_fdisk: Can't create lba property "
4763 				    "for instance %d\n",
4764 				    ddi_get_instance(SD_DEVINFO(un)));
4765 			}
4766 		}
4767 	}
4768 
4769 	bcopy(&mbp->signature, sigbuf, sizeof (sigbuf));
4770 
4771 	/*
4772 	 * Endian-independent signature check
4773 	 */
4774 	if (((sigbuf[1] & 0xFF) != ((MBB_MAGIC >> 8) & 0xFF)) ||
4775 	    (sigbuf[0] != (MBB_MAGIC & 0xFF))) {
4776 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4777 		    "sd_read_fdisk: no fdisk\n");
4778 		bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4779 		rval = SD_CMD_SUCCESS;
4780 		goto done;
4781 	}
4782 
4783 #ifdef SDDEBUG
4784 	if (sd_level_mask & SD_LOGMASK_INFO) {
4785 		fdp = fdisk;
4786 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_read_fdisk:\n");
4787 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "         relsect    "
4788 		    "numsect         sysid       bootid\n");
4789 		for (i = 0; i < FD_NUMPART; i++, fdp++) {
4790 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4791 			    "    %d:  %8d   %8d     0x%08x     0x%08x\n",
4792 			    i, fdp->relsect, fdp->numsect,
4793 			    fdp->systid, fdp->bootid);
4794 		}
4795 	}
4796 #endif
4797 
4798 	/*
4799 	 * Try to find the unix partition
4800 	 */
4801 	uidx = -1;
4802 	solaris_offset = 0;
4803 	solaris_size   = 0;
4804 
4805 	for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4806 		int	relsect;
4807 		int	numsect;
4808 
4809 		if (fdp->numsect == 0) {
4810 			un->un_fmap[i].fmap_start = 0;
4811 			un->un_fmap[i].fmap_nblk  = 0;
4812 			continue;
4813 		}
4814 
4815 		/*
4816 		 * Data in the fdisk table is little-endian.
4817 		 */
4818 		relsect = LE_32(fdp->relsect);
4819 		numsect = LE_32(fdp->numsect);
4820 
4821 		un->un_fmap[i].fmap_start = relsect;
4822 		un->un_fmap[i].fmap_nblk  = numsect;
4823 
4824 		if (fdp->systid != SUNIXOS &&
4825 		    fdp->systid != SUNIXOS2 &&
4826 		    fdp->systid != EFI_PMBR) {
4827 			continue;
4828 		}
4829 
4830 		/*
4831 		 * use the last active solaris partition id found
4832 		 * (there should only be 1 active partition id)
4833 		 *
4834 		 * if there are no active solaris partition id
4835 		 * then use the first inactive solaris partition id
4836 		 */
4837 		if ((uidx == -1) || (fdp->bootid == ACTIVE)) {
4838 			uidx = i;
4839 			solaris_offset = relsect;
4840 			solaris_size   = numsect;
4841 		}
4842 	}
4843 
4844 	SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk 0x%x 0x%lx",
4845 	    un->un_solaris_offset, un->un_solaris_size);
4846 
4847 	rval = SD_CMD_SUCCESS;
4848 
4849 done:
4850 
4851 	/*
4852 	 * Clear the VTOC info, only if the Solaris partition entry
4853 	 * has moved, changed size, been deleted, or if the size of
4854 	 * the partition is too small to even fit the label sector.
4855 	 */
4856 	if ((un->un_solaris_offset != solaris_offset) ||
4857 	    (un->un_solaris_size != solaris_size) ||
4858 	    solaris_size <= DK_LABEL_LOC) {
4859 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk moved 0x%x 0x%lx",
4860 			solaris_offset, solaris_size);
4861 		bzero(&un->un_g, sizeof (struct dk_geom));
4862 		bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
4863 		bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
4864 		un->un_f_geometry_is_valid = FALSE;
4865 	}
4866 	un->un_solaris_offset = solaris_offset;
4867 	un->un_solaris_size = solaris_size;
4868 	kmem_free(bufp, blocksize);
4869 	return (rval);
4870 
4871 #else	/* #elif defined(_FIRMWARE_NEEDS_FDISK) */
4872 #error "fdisk table presence undetermined for this platform."
4873 #endif	/* #if defined(_NO_FDISK_PRESENT) */
4874 }
4875 
4876 
4877 /*
4878  *    Function: sd_get_physical_geometry
4879  *
4880  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4881  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4882  *		target, and use this information to initialize the physical
4883  *		geometry cache specified by pgeom_p.
4884  *
4885  *		MODE SENSE is an optional command, so failure in this case
4886  *		does not necessarily denote an error. We want to use the
4887  *		MODE SENSE commands to derive the physical geometry of the
4888  *		device, but if either command fails, the logical geometry is
4889  *		used as the fallback for disk label geometry.
4890  *
4891  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4892  *		have already been initialized for the current target and
4893  *		that the current values be passed as args so that we don't
4894  *		end up ever trying to use -1 as a valid value. This could
4895  *		happen if either value is reset while we're not holding
4896  *		the mutex.
4897  *
4898  *   Arguments: un - driver soft state (unit) structure
4899  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4900  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4901  *			to use the USCSI "direct" chain and bypass the normal
4902  *			command waitq.
4903  *
4904  *     Context: Kernel thread only (can sleep).
4905  */
4906 
4907 static void
4908 sd_get_physical_geometry(struct sd_lun *un, struct geom_cache *pgeom_p,
4909 	int capacity, int lbasize, int path_flag)
4910 {
4911 	struct	mode_format	*page3p;
4912 	struct	mode_geometry	*page4p;
4913 	struct	mode_header	*headerp;
4914 	int	sector_size;
4915 	int	nsect;
4916 	int	nhead;
4917 	int	ncyl;
4918 	int	intrlv;
4919 	int	spc;
4920 	int	modesense_capacity;
4921 	int	rpm;
4922 	int	bd_len;
4923 	int	mode_header_length;
4924 	uchar_t	*p3bufp;
4925 	uchar_t	*p4bufp;
4926 	int	cdbsize;
4927 
4928 	ASSERT(un != NULL);
4929 	ASSERT(!(mutex_owned(SD_MUTEX(un))));
4930 
4931 	if (un->un_f_blockcount_is_valid != TRUE) {
4932 		return;
4933 	}
4934 
4935 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
4936 		return;
4937 	}
4938 
4939 	if (lbasize == 0) {
4940 		if (ISCD(un)) {
4941 			lbasize = 2048;
4942 		} else {
4943 			lbasize = un->un_sys_blocksize;
4944 		}
4945 	}
4946 	pgeom_p->g_secsize = (unsigned short)lbasize;
4947 
4948 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4949 
4950 	/*
4951 	 * Retrieve MODE SENSE page 3 - Format Device Page
4952 	 */
4953 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4954 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p3bufp,
4955 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag)
4956 	    != 0) {
4957 		SD_ERROR(SD_LOG_COMMON, un,
4958 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4959 		goto page3_exit;
4960 	}
4961 
4962 	/*
4963 	 * Determine size of Block Descriptors in order to locate the mode
4964 	 * page data.  ATAPI devices return 0, SCSI devices should return
4965 	 * MODE_BLK_DESC_LENGTH.
4966 	 */
4967 	headerp = (struct mode_header *)p3bufp;
4968 	if (un->un_f_cfg_is_atapi == TRUE) {
4969 		struct mode_header_grp2 *mhp =
4970 		    (struct mode_header_grp2 *)headerp;
4971 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4972 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4973 	} else {
4974 		mode_header_length = MODE_HEADER_LENGTH;
4975 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4976 	}
4977 
4978 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4979 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4980 		    "received unexpected bd_len of %d, page3\n", bd_len);
4981 		goto page3_exit;
4982 	}
4983 
4984 	page3p = (struct mode_format *)
4985 	    ((caddr_t)headerp + mode_header_length + bd_len);
4986 
4987 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4988 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4989 		    "mode sense pg3 code mismatch %d\n",
4990 		    page3p->mode_page.code);
4991 		goto page3_exit;
4992 	}
4993 
4994 	/*
4995 	 * Use this physical geometry data only if BOTH MODE SENSE commands
4996 	 * complete successfully; otherwise, revert to the logical geometry.
4997 	 * So, we need to save everything in temporary variables.
4998 	 */
4999 	sector_size = BE_16(page3p->data_bytes_sect);
5000 
5001 	/*
5002 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
5003 	 */
5004 	if (sector_size == 0) {
5005 		sector_size = (ISCD(un)) ? 2048 : un->un_sys_blocksize;
5006 	} else {
5007 		sector_size &= ~(un->un_sys_blocksize - 1);
5008 	}
5009 
5010 	nsect  = BE_16(page3p->sect_track);
5011 	intrlv = BE_16(page3p->interleave);
5012 
5013 	SD_INFO(SD_LOG_COMMON, un,
5014 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
5015 	SD_INFO(SD_LOG_COMMON, un,
5016 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
5017 	    page3p->mode_page.code, nsect, sector_size);
5018 	SD_INFO(SD_LOG_COMMON, un,
5019 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
5020 	    BE_16(page3p->track_skew),
5021 	    BE_16(page3p->cylinder_skew));
5022 
5023 
5024 	/*
5025 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
5026 	 */
5027 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
5028 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p4bufp,
5029 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag)
5030 	    != 0) {
5031 		SD_ERROR(SD_LOG_COMMON, un,
5032 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
5033 		goto page4_exit;
5034 	}
5035 
5036 	/*
5037 	 * Determine size of Block Descriptors in order to locate the mode
5038 	 * page data.  ATAPI devices return 0, SCSI devices should return
5039 	 * MODE_BLK_DESC_LENGTH.
5040 	 */
5041 	headerp = (struct mode_header *)p4bufp;
5042 	if (un->un_f_cfg_is_atapi == TRUE) {
5043 		struct mode_header_grp2 *mhp =
5044 		    (struct mode_header_grp2 *)headerp;
5045 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
5046 	} else {
5047 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
5048 	}
5049 
5050 	if (bd_len > MODE_BLK_DESC_LENGTH) {
5051 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
5052 		    "received unexpected bd_len of %d, page4\n", bd_len);
5053 		goto page4_exit;
5054 	}
5055 
5056 	page4p = (struct mode_geometry *)
5057 	    ((caddr_t)headerp + mode_header_length + bd_len);
5058 
5059 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
5060 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
5061 		    "mode sense pg4 code mismatch %d\n",
5062 		    page4p->mode_page.code);
5063 		goto page4_exit;
5064 	}
5065 
5066 	/*
5067 	 * Stash the data now, after we know that both commands completed.
5068 	 */
5069 
5070 	mutex_enter(SD_MUTEX(un));
5071 
5072 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
5073 	spc   = nhead * nsect;
5074 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
5075 	rpm   = BE_16(page4p->rpm);
5076 
5077 	modesense_capacity = spc * ncyl;
5078 
5079 	SD_INFO(SD_LOG_COMMON, un,
5080 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
5081 	SD_INFO(SD_LOG_COMMON, un,
5082 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
5083 	SD_INFO(SD_LOG_COMMON, un,
5084 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
5085 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
5086 	    (void *)pgeom_p, capacity);
5087 
5088 	/*
5089 	 * Compensate if the drive's geometry is not rectangular, i.e.,
5090 	 * the product of C * H * S returned by MODE SENSE >= that returned
5091 	 * by read capacity. This is an idiosyncrasy of the original x86
5092 	 * disk subsystem.
5093 	 */
5094 	if (modesense_capacity >= capacity) {
5095 		SD_INFO(SD_LOG_COMMON, un,
5096 		    "sd_get_physical_geometry: adjusting acyl; "
5097 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
5098 		    (modesense_capacity - capacity + spc - 1) / spc);
5099 		if (sector_size != 0) {
5100 			/* 1243403: NEC D38x7 drives don't support sec size */
5101 			pgeom_p->g_secsize = (unsigned short)sector_size;
5102 		}
5103 		pgeom_p->g_nsect    = (unsigned short)nsect;
5104 		pgeom_p->g_nhead    = (unsigned short)nhead;
5105 		pgeom_p->g_capacity = capacity;
5106 		pgeom_p->g_acyl	    =
5107 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
5108 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
5109 	}
5110 
5111 	pgeom_p->g_rpm    = (unsigned short)rpm;
5112 	pgeom_p->g_intrlv = (unsigned short)intrlv;
5113 
5114 	SD_INFO(SD_LOG_COMMON, un,
5115 	    "sd_get_physical_geometry: mode sense geometry:\n");
5116 	SD_INFO(SD_LOG_COMMON, un,
5117 	    "   nsect: %d; sector size: %d; interlv: %d\n",
5118 	    nsect, sector_size, intrlv);
5119 	SD_INFO(SD_LOG_COMMON, un,
5120 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
5121 	    nhead, ncyl, rpm, modesense_capacity);
5122 	SD_INFO(SD_LOG_COMMON, un,
5123 	    "sd_get_physical_geometry: (cached)\n");
5124 	SD_INFO(SD_LOG_COMMON, un,
5125 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5126 	    un->un_pgeom.g_ncyl,  un->un_pgeom.g_acyl,
5127 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
5128 	SD_INFO(SD_LOG_COMMON, un,
5129 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
5130 	    un->un_pgeom.g_secsize, un->un_pgeom.g_capacity,
5131 	    un->un_pgeom.g_intrlv, un->un_pgeom.g_rpm);
5132 
5133 	mutex_exit(SD_MUTEX(un));
5134 
5135 page4_exit:
5136 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
5137 page3_exit:
5138 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
5139 }
5140 
5141 
5142 /*
5143  *    Function: sd_get_virtual_geometry
5144  *
5145  * Description: Ask the controller to tell us about the target device.
5146  *
5147  *   Arguments: un - pointer to softstate
5148  *		capacity - disk capacity in #blocks
5149  *		lbasize - disk block size in bytes
5150  *
5151  *     Context: Kernel thread only
5152  */
5153 
5154 static void
5155 sd_get_virtual_geometry(struct sd_lun *un, int capacity, int lbasize)
5156 {
5157 	struct	geom_cache 	*lgeom_p = &un->un_lgeom;
5158 	uint_t	geombuf;
5159 	int	spc;
5160 
5161 	ASSERT(un != NULL);
5162 	ASSERT(mutex_owned(SD_MUTEX(un)));
5163 
5164 	mutex_exit(SD_MUTEX(un));
5165 
5166 	/* Set sector size, and total number of sectors */
5167 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
5168 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
5169 
5170 	/* Let the HBA tell us its geometry */
5171 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
5172 
5173 	mutex_enter(SD_MUTEX(un));
5174 
5175 	/* A value of -1 indicates an undefined "geometry" property */
5176 	if (geombuf == (-1)) {
5177 		return;
5178 	}
5179 
5180 	/* Initialize the logical geometry cache. */
5181 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
5182 	lgeom_p->g_nsect   = geombuf & 0xffff;
5183 	lgeom_p->g_secsize = un->un_sys_blocksize;
5184 
5185 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
5186 
5187 	/*
5188 	 * Note: The driver originally converted the capacity value from
5189 	 * target blocks to system blocks. However, the capacity value passed
5190 	 * to this routine is already in terms of system blocks (this scaling
5191 	 * is done when the READ CAPACITY command is issued and processed).
5192 	 * This 'error' may have gone undetected because the usage of g_ncyl
5193 	 * (which is based upon g_capacity) is very limited within the driver
5194 	 */
5195 	lgeom_p->g_capacity = capacity;
5196 
5197 	/*
5198 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
5199 	 * hba may return zero values if the device has been removed.
5200 	 */
5201 	if (spc == 0) {
5202 		lgeom_p->g_ncyl = 0;
5203 	} else {
5204 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
5205 	}
5206 	lgeom_p->g_acyl = 0;
5207 
5208 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
5209 	SD_INFO(SD_LOG_COMMON, un,
5210 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5211 	    un->un_lgeom.g_ncyl,  un->un_lgeom.g_acyl,
5212 	    un->un_lgeom.g_nhead, un->un_lgeom.g_nsect);
5213 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
5214 	    "intrlv: %d; rpm: %d\n", un->un_lgeom.g_secsize,
5215 	    un->un_lgeom.g_capacity, un->un_lgeom.g_intrlv, un->un_lgeom.g_rpm);
5216 }
5217 
5218 
5219 /*
5220  *    Function: sd_update_block_info
5221  *
5222  * Description: Calculate a byte count to sector count bitshift value
5223  *		from sector size.
5224  *
5225  *   Arguments: un: unit struct.
5226  *		lbasize: new target sector size
5227  *		capacity: new target capacity, ie. block count
5228  *
5229  *     Context: Kernel thread context
5230  */
5231 
5232 static void
5233 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
5234 {
5235 	if (lbasize != 0) {
5236 		un->un_tgt_blocksize = lbasize;
5237 		un->un_f_tgt_blocksize_is_valid	= TRUE;
5238 	}
5239 
5240 	if (capacity != 0) {
5241 		un->un_blockcount		= capacity;
5242 		un->un_f_blockcount_is_valid	= TRUE;
5243 	}
5244 }
5245 
5246 
5247 static void
5248 sd_swap_efi_gpt(efi_gpt_t *e)
5249 {
5250 	_NOTE(ASSUMING_PROTECTED(*e))
5251 	e->efi_gpt_Signature = LE_64(e->efi_gpt_Signature);
5252 	e->efi_gpt_Revision = LE_32(e->efi_gpt_Revision);
5253 	e->efi_gpt_HeaderSize = LE_32(e->efi_gpt_HeaderSize);
5254 	e->efi_gpt_HeaderCRC32 = LE_32(e->efi_gpt_HeaderCRC32);
5255 	e->efi_gpt_MyLBA = LE_64(e->efi_gpt_MyLBA);
5256 	e->efi_gpt_AlternateLBA = LE_64(e->efi_gpt_AlternateLBA);
5257 	e->efi_gpt_FirstUsableLBA = LE_64(e->efi_gpt_FirstUsableLBA);
5258 	e->efi_gpt_LastUsableLBA = LE_64(e->efi_gpt_LastUsableLBA);
5259 	UUID_LE_CONVERT(e->efi_gpt_DiskGUID, e->efi_gpt_DiskGUID);
5260 	e->efi_gpt_PartitionEntryLBA = LE_64(e->efi_gpt_PartitionEntryLBA);
5261 	e->efi_gpt_NumberOfPartitionEntries =
5262 	    LE_32(e->efi_gpt_NumberOfPartitionEntries);
5263 	e->efi_gpt_SizeOfPartitionEntry =
5264 	    LE_32(e->efi_gpt_SizeOfPartitionEntry);
5265 	e->efi_gpt_PartitionEntryArrayCRC32 =
5266 	    LE_32(e->efi_gpt_PartitionEntryArrayCRC32);
5267 }
5268 
5269 static void
5270 sd_swap_efi_gpe(int nparts, efi_gpe_t *p)
5271 {
5272 	int i;
5273 
5274 	_NOTE(ASSUMING_PROTECTED(*p))
5275 	for (i = 0; i < nparts; i++) {
5276 		UUID_LE_CONVERT(p[i].efi_gpe_PartitionTypeGUID,
5277 		    p[i].efi_gpe_PartitionTypeGUID);
5278 		p[i].efi_gpe_StartingLBA = LE_64(p[i].efi_gpe_StartingLBA);
5279 		p[i].efi_gpe_EndingLBA = LE_64(p[i].efi_gpe_EndingLBA);
5280 		/* PartitionAttrs */
5281 	}
5282 }
5283 
5284 static int
5285 sd_validate_efi(efi_gpt_t *labp)
5286 {
5287 	if (labp->efi_gpt_Signature != EFI_SIGNATURE)
5288 		return (EINVAL);
5289 	/* at least 96 bytes in this version of the spec. */
5290 	if (sizeof (efi_gpt_t) - sizeof (labp->efi_gpt_Reserved2) >
5291 	    labp->efi_gpt_HeaderSize)
5292 		return (EINVAL);
5293 	/* this should be 128 bytes */
5294 	if (labp->efi_gpt_SizeOfPartitionEntry != sizeof (efi_gpe_t))
5295 		return (EINVAL);
5296 	return (0);
5297 }
5298 
5299 static int
5300 sd_use_efi(struct sd_lun *un, int path_flag)
5301 {
5302 	int		i;
5303 	int		rval = 0;
5304 	efi_gpe_t	*partitions;
5305 	uchar_t		*buf;
5306 	uint_t		lbasize;
5307 	uint64_t	cap = 0;
5308 	uint_t		nparts;
5309 	diskaddr_t	gpe_lba;
5310 	struct uuid	uuid_type_reserved = EFI_RESERVED;
5311 
5312 	ASSERT(mutex_owned(SD_MUTEX(un)));
5313 	lbasize = un->un_tgt_blocksize;
5314 	un->un_reserved = -1;
5315 
5316 	mutex_exit(SD_MUTEX(un));
5317 
5318 	buf = kmem_zalloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
5319 
5320 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
5321 		rval = EINVAL;
5322 		goto done_err;
5323 	}
5324 
5325 	rval = sd_send_scsi_READ(un, buf, lbasize, 0, path_flag);
5326 	if (rval) {
5327 		goto done_err;
5328 	}
5329 	if (((struct dk_label *)buf)->dkl_magic == DKL_MAGIC) {
5330 		/* not ours */
5331 		rval = ESRCH;
5332 		goto done_err;
5333 	}
5334 
5335 	rval = sd_send_scsi_READ(un, buf, lbasize, 1, path_flag);
5336 	if (rval) {
5337 		goto done_err;
5338 	}
5339 	sd_swap_efi_gpt((efi_gpt_t *)buf);
5340 
5341 	if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0) {
5342 		/*
5343 		 * Couldn't read the primary, try the backup.  Our
5344 		 * capacity at this point could be based on CHS, so
5345 		 * check what the device reports.
5346 		 */
5347 		rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
5348 		    path_flag);
5349 		if (rval) {
5350 			goto done_err;
5351 		}
5352 
5353 		/*
5354 		 * The MMC standard allows READ CAPACITY to be
5355 		 * inaccurate by a bounded amount (in the interest of
5356 		 * response latency).  As a result, failed READs are
5357 		 * commonplace (due to the reading of metadata and not
5358 		 * data). Depending on the per-Vendor/drive Sense data,
5359 		 * the failed READ can cause many (unnecessary) retries.
5360 		 */
5361 		if ((rval = sd_send_scsi_READ(un, buf, lbasize,
5362 		    cap - 1, (ISCD(un)) ? SD_PATH_DIRECT_PRIORITY :
5363 			path_flag)) != 0) {
5364 				goto done_err;
5365 		}
5366 
5367 		sd_swap_efi_gpt((efi_gpt_t *)buf);
5368 		if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0) {
5369 
5370 			/*
5371 			 * Refer to comments related to off-by-1 at the
5372 			 * header of this file. Search the next to last
5373 			 * block for backup EFI label.
5374 			 */
5375 			if ((rval = sd_send_scsi_READ(un, buf, lbasize,
5376 			    cap - 2, (ISCD(un)) ? SD_PATH_DIRECT_PRIORITY :
5377 				path_flag)) != 0) {
5378 					goto done_err;
5379 			}
5380 			sd_swap_efi_gpt((efi_gpt_t *)buf);
5381 			if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0)
5382 				goto done_err;
5383 		}
5384 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5385 		    "primary label corrupt; using backup\n");
5386 	}
5387 
5388 	if (cap == 0)
5389 		rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
5390 		    path_flag);
5391 
5392 	nparts = ((efi_gpt_t *)buf)->efi_gpt_NumberOfPartitionEntries;
5393 	gpe_lba = ((efi_gpt_t *)buf)->efi_gpt_PartitionEntryLBA;
5394 
5395 	rval = sd_send_scsi_READ(un, buf, EFI_MIN_ARRAY_SIZE, gpe_lba,
5396 	    path_flag);
5397 	if (rval) {
5398 		goto done_err;
5399 	}
5400 	partitions = (efi_gpe_t *)buf;
5401 
5402 	if (nparts > MAXPART) {
5403 		nparts = MAXPART;
5404 	}
5405 	sd_swap_efi_gpe(nparts, partitions);
5406 
5407 	mutex_enter(SD_MUTEX(un));
5408 
5409 	/* Fill in partition table. */
5410 	for (i = 0; i < nparts; i++) {
5411 		if (partitions->efi_gpe_StartingLBA != 0 ||
5412 		    partitions->efi_gpe_EndingLBA != 0) {
5413 			un->un_map[i].dkl_cylno =
5414 			    partitions->efi_gpe_StartingLBA;
5415 			un->un_map[i].dkl_nblk =
5416 			    partitions->efi_gpe_EndingLBA -
5417 			    partitions->efi_gpe_StartingLBA + 1;
5418 			un->un_offset[i] =
5419 			    partitions->efi_gpe_StartingLBA;
5420 		}
5421 		if (un->un_reserved == -1) {
5422 			if (bcmp(&partitions->efi_gpe_PartitionTypeGUID,
5423 			    &uuid_type_reserved, sizeof (struct uuid)) == 0) {
5424 				un->un_reserved = i;
5425 			}
5426 		}
5427 		if (i == WD_NODE) {
5428 			/*
5429 			 * minor number 7 corresponds to the whole disk
5430 			 */
5431 			un->un_map[i].dkl_cylno = 0;
5432 			un->un_map[i].dkl_nblk = un->un_blockcount;
5433 			un->un_offset[i] = 0;
5434 		}
5435 		partitions++;
5436 	}
5437 	un->un_solaris_offset = 0;
5438 	un->un_solaris_size = cap;
5439 	un->un_f_geometry_is_valid = TRUE;
5440 
5441 	/* clear the vtoc label */
5442 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5443 
5444 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5445 	return (0);
5446 
5447 done_err:
5448 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5449 	mutex_enter(SD_MUTEX(un));
5450 	/*
5451 	 * if we didn't find something that could look like a VTOC
5452 	 * and the disk is over 1TB, we know there isn't a valid label.
5453 	 * Otherwise let sd_uselabel decide what to do.  We only
5454 	 * want to invalidate this if we're certain the label isn't
5455 	 * valid because sd_prop_op will now fail, which in turn
5456 	 * causes things like opens and stats on the partition to fail.
5457 	 */
5458 	if ((un->un_blockcount > DK_MAX_BLOCKS) && (rval != ESRCH)) {
5459 		un->un_f_geometry_is_valid = FALSE;
5460 	}
5461 	return (rval);
5462 }
5463 
5464 
5465 /*
5466  *    Function: sd_uselabel
5467  *
5468  * Description: Validate the disk label and update the relevant data (geometry,
5469  *		partition, vtoc, and capacity data) in the sd_lun struct.
5470  *		Marks the geometry of the unit as being valid.
5471  *
5472  *   Arguments: un: unit struct.
5473  *		dk_label: disk label
5474  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
5475  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
5476  *			to use the USCSI "direct" chain and bypass the normal
5477  *			command waitq.
5478  *
5479  * Return Code: SD_LABEL_IS_VALID: Label read from disk is OK; geometry,
5480  *		partition, vtoc, and capacity data are good.
5481  *
5482  *		SD_LABEL_IS_INVALID: Magic number or checksum error in the
5483  *		label; or computed capacity does not jibe with capacity
5484  *		reported from the READ CAPACITY command.
5485  *
5486  *     Context: Kernel thread only (can sleep).
5487  */
5488 
5489 static int
5490 sd_uselabel(struct sd_lun *un, struct dk_label *labp, int path_flag)
5491 {
5492 	short	*sp;
5493 	short	sum;
5494 	short	count;
5495 	int	label_error = SD_LABEL_IS_VALID;
5496 	int	i;
5497 	int	capacity;
5498 	int	part_end;
5499 	int	track_capacity;
5500 	int	err;
5501 #if defined(_SUNOS_VTOC_16)
5502 	struct	dkl_partition	*vpartp;
5503 #endif
5504 	ASSERT(un != NULL);
5505 	ASSERT(mutex_owned(SD_MUTEX(un)));
5506 
5507 	/* Validate the magic number of the label. */
5508 	if (labp->dkl_magic != DKL_MAGIC) {
5509 #if defined(__sparc)
5510 		if ((un->un_state == SD_STATE_NORMAL) &&
5511 			un->un_f_vtoc_errlog_supported) {
5512 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5513 			    "Corrupt label; wrong magic number\n");
5514 		}
5515 #endif
5516 		return (SD_LABEL_IS_INVALID);
5517 	}
5518 
5519 	/* Validate the checksum of the label. */
5520 	sp  = (short *)labp;
5521 	sum = 0;
5522 	count = sizeof (struct dk_label) / sizeof (short);
5523 	while (count--)	 {
5524 		sum ^= *sp++;
5525 	}
5526 
5527 	if (sum != 0) {
5528 #if	defined(_SUNOS_VTOC_16)
5529 		if ((un->un_state == SD_STATE_NORMAL) && !ISCD(un)) {
5530 #elif defined(_SUNOS_VTOC_8)
5531 		if ((un->un_state == SD_STATE_NORMAL) &&
5532 		    un->un_f_vtoc_errlog_supported) {
5533 #endif
5534 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5535 			    "Corrupt label - label checksum failed\n");
5536 		}
5537 		return (SD_LABEL_IS_INVALID);
5538 	}
5539 
5540 
5541 	/*
5542 	 * Fill in geometry structure with data from label.
5543 	 */
5544 	bzero(&un->un_g, sizeof (struct dk_geom));
5545 	un->un_g.dkg_ncyl   = labp->dkl_ncyl;
5546 	un->un_g.dkg_acyl   = labp->dkl_acyl;
5547 	un->un_g.dkg_bcyl   = 0;
5548 	un->un_g.dkg_nhead  = labp->dkl_nhead;
5549 	un->un_g.dkg_nsect  = labp->dkl_nsect;
5550 	un->un_g.dkg_intrlv = labp->dkl_intrlv;
5551 
5552 #if defined(_SUNOS_VTOC_8)
5553 	un->un_g.dkg_gap1   = labp->dkl_gap1;
5554 	un->un_g.dkg_gap2   = labp->dkl_gap2;
5555 	un->un_g.dkg_bhead  = labp->dkl_bhead;
5556 #endif
5557 #if defined(_SUNOS_VTOC_16)
5558 	un->un_dkg_skew = labp->dkl_skew;
5559 #endif
5560 
5561 #if defined(__i386) || defined(__amd64)
5562 	un->un_g.dkg_apc = labp->dkl_apc;
5563 #endif
5564 
5565 	/*
5566 	 * Currently we rely on the values in the label being accurate. If
5567 	 * dlk_rpm or dlk_pcly are zero in the label, use a default value.
5568 	 *
5569 	 * Note: In the future a MODE SENSE may be used to retrieve this data,
5570 	 * although this command is optional in SCSI-2.
5571 	 */
5572 	un->un_g.dkg_rpm  = (labp->dkl_rpm  != 0) ? labp->dkl_rpm  : 3600;
5573 	un->un_g.dkg_pcyl = (labp->dkl_pcyl != 0) ? labp->dkl_pcyl :
5574 	    (un->un_g.dkg_ncyl + un->un_g.dkg_acyl);
5575 
5576 	/*
5577 	 * The Read and Write reinstruct values may not be valid
5578 	 * for older disks.
5579 	 */
5580 	un->un_g.dkg_read_reinstruct  = labp->dkl_read_reinstruct;
5581 	un->un_g.dkg_write_reinstruct = labp->dkl_write_reinstruct;
5582 
5583 	/* Fill in partition table. */
5584 #if defined(_SUNOS_VTOC_8)
5585 	for (i = 0; i < NDKMAP; i++) {
5586 		un->un_map[i].dkl_cylno = labp->dkl_map[i].dkl_cylno;
5587 		un->un_map[i].dkl_nblk  = labp->dkl_map[i].dkl_nblk;
5588 	}
5589 #endif
5590 #if  defined(_SUNOS_VTOC_16)
5591 	vpartp		= labp->dkl_vtoc.v_part;
5592 	track_capacity	= labp->dkl_nhead * labp->dkl_nsect;
5593 
5594 	/* Prevent divide by zero */
5595 	if (track_capacity == 0) {
5596 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5597 		    "Corrupt label - zero nhead or nsect value\n");
5598 
5599 		return (SD_LABEL_IS_INVALID);
5600 	}
5601 
5602 	for (i = 0; i < NDKMAP; i++, vpartp++) {
5603 		un->un_map[i].dkl_cylno = vpartp->p_start / track_capacity;
5604 		un->un_map[i].dkl_nblk  = vpartp->p_size;
5605 	}
5606 #endif
5607 
5608 	/* Fill in VTOC Structure. */
5609 	bcopy(&labp->dkl_vtoc, &un->un_vtoc, sizeof (struct dk_vtoc));
5610 #if defined(_SUNOS_VTOC_8)
5611 	/*
5612 	 * The 8-slice vtoc does not include the ascii label; save it into
5613 	 * the device's soft state structure here.
5614 	 */
5615 	bcopy(labp->dkl_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
5616 #endif
5617 
5618 	/* Now look for a valid capacity. */
5619 	track_capacity	= (un->un_g.dkg_nhead * un->un_g.dkg_nsect);
5620 	capacity	= (un->un_g.dkg_ncyl  * track_capacity);
5621 
5622 	if (un->un_g.dkg_acyl) {
5623 #if defined(__i386) || defined(__amd64)
5624 		/* we may have > 1 alts cylinder */
5625 		capacity += (track_capacity * un->un_g.dkg_acyl);
5626 #else
5627 		capacity += track_capacity;
5628 #endif
5629 	}
5630 
5631 	/*
5632 	 * Force check here to ensure the computed capacity is valid.
5633 	 * If capacity is zero, it indicates an invalid label and
5634 	 * we should abort updating the relevant data then.
5635 	 */
5636 	if (capacity == 0) {
5637 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5638 		    "Corrupt label - no valid capacity could be retrieved\n");
5639 
5640 		return (SD_LABEL_IS_INVALID);
5641 	}
5642 
5643 	/* Mark the geometry as valid. */
5644 	un->un_f_geometry_is_valid = TRUE;
5645 
5646 	/*
5647 	 * At this point, un->un_blockcount should contain valid data from
5648 	 * the READ CAPACITY command.
5649 	 */
5650 	if (un->un_f_blockcount_is_valid != TRUE) {
5651 		/*
5652 		 * We have a situation where the target didn't give us a good
5653 		 * READ CAPACITY value, yet there appears to be a valid label.
5654 		 * In this case, we'll fake the capacity.
5655 		 */
5656 		un->un_blockcount = capacity;
5657 		un->un_f_blockcount_is_valid = TRUE;
5658 		goto done;
5659 	}
5660 
5661 
5662 	if ((capacity <= un->un_blockcount) ||
5663 	    (un->un_state != SD_STATE_NORMAL)) {
5664 #if defined(_SUNOS_VTOC_8)
5665 		/*
5666 		 * We can't let this happen on drives that are subdivided
5667 		 * into logical disks (i.e., that have an fdisk table).
5668 		 * The un_blockcount field should always hold the full media
5669 		 * size in sectors, period.  This code would overwrite
5670 		 * un_blockcount with the size of the Solaris fdisk partition.
5671 		 */
5672 		SD_ERROR(SD_LOG_COMMON, un,
5673 		    "sd_uselabel: Label %d blocks; Drive %d blocks\n",
5674 		    capacity, un->un_blockcount);
5675 		un->un_blockcount = capacity;
5676 		un->un_f_blockcount_is_valid = TRUE;
5677 #endif	/* defined(_SUNOS_VTOC_8) */
5678 		goto done;
5679 	}
5680 
5681 	if (ISCD(un)) {
5682 		/* For CDROMs, we trust that the data in the label is OK. */
5683 #if defined(_SUNOS_VTOC_8)
5684 		for (i = 0; i < NDKMAP; i++) {
5685 			part_end = labp->dkl_nhead * labp->dkl_nsect *
5686 			    labp->dkl_map[i].dkl_cylno +
5687 			    labp->dkl_map[i].dkl_nblk  - 1;
5688 
5689 			if ((labp->dkl_map[i].dkl_nblk) &&
5690 			    (part_end > un->un_blockcount)) {
5691 				un->un_f_geometry_is_valid = FALSE;
5692 				break;
5693 			}
5694 		}
5695 #endif
5696 #if defined(_SUNOS_VTOC_16)
5697 		vpartp = &(labp->dkl_vtoc.v_part[0]);
5698 		for (i = 0; i < NDKMAP; i++, vpartp++) {
5699 			part_end = vpartp->p_start + vpartp->p_size;
5700 			if ((vpartp->p_size > 0) &&
5701 			    (part_end > un->un_blockcount)) {
5702 				un->un_f_geometry_is_valid = FALSE;
5703 				break;
5704 			}
5705 		}
5706 #endif
5707 	} else {
5708 		uint64_t t_capacity;
5709 		uint32_t t_lbasize;
5710 
5711 		mutex_exit(SD_MUTEX(un));
5712 		err = sd_send_scsi_READ_CAPACITY(un, &t_capacity, &t_lbasize,
5713 		    path_flag);
5714 		ASSERT(t_capacity <= DK_MAX_BLOCKS);
5715 		mutex_enter(SD_MUTEX(un));
5716 
5717 		if (err == 0) {
5718 			sd_update_block_info(un, t_lbasize, t_capacity);
5719 		}
5720 
5721 		if (capacity > un->un_blockcount) {
5722 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5723 			    "Corrupt label - bad geometry\n");
5724 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
5725 			    "Label says %u blocks; Drive says %llu blocks\n",
5726 			    capacity, (unsigned long long)un->un_blockcount);
5727 			un->un_f_geometry_is_valid = FALSE;
5728 			label_error = SD_LABEL_IS_INVALID;
5729 		}
5730 	}
5731 
5732 done:
5733 
5734 	SD_INFO(SD_LOG_COMMON, un, "sd_uselabel: (label geometry)\n");
5735 	SD_INFO(SD_LOG_COMMON, un,
5736 	    "   ncyl: %d; acyl: %d; nhead: %d; nsect: %d\n",
5737 	    un->un_g.dkg_ncyl,  un->un_g.dkg_acyl,
5738 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5739 	SD_INFO(SD_LOG_COMMON, un,
5740 	    "   lbasize: %d; capacity: %d; intrlv: %d; rpm: %d\n",
5741 	    un->un_tgt_blocksize, un->un_blockcount,
5742 	    un->un_g.dkg_intrlv, un->un_g.dkg_rpm);
5743 	SD_INFO(SD_LOG_COMMON, un, "   wrt_reinstr: %d; rd_reinstr: %d\n",
5744 	    un->un_g.dkg_write_reinstruct, un->un_g.dkg_read_reinstruct);
5745 
5746 	ASSERT(mutex_owned(SD_MUTEX(un)));
5747 
5748 	return (label_error);
5749 }
5750 
5751 
5752 /*
5753  *    Function: sd_build_default_label
5754  *
5755  * Description: Generate a default label for those devices that do not have
5756  *		one, e.g., new media, removable cartridges, etc..
5757  *
5758  *     Context: Kernel thread only
5759  */
5760 
5761 static void
5762 sd_build_default_label(struct sd_lun *un)
5763 {
5764 #if defined(_SUNOS_VTOC_16)
5765 	uint_t	phys_spc;
5766 	uint_t	disksize;
5767 	struct	dk_geom un_g;
5768 	uint64_t capacity;
5769 #endif
5770 
5771 	ASSERT(un != NULL);
5772 	ASSERT(mutex_owned(SD_MUTEX(un)));
5773 
5774 #if defined(_SUNOS_VTOC_8)
5775 	/*
5776 	 * Note: This is a legacy check for non-removable devices on VTOC_8
5777 	 * only. This may be a valid check for VTOC_16 as well.
5778 	 * Once we understand why there is this difference between SPARC and
5779 	 * x86 platform, we could remove this legacy check.
5780 	 */
5781 	ASSERT(un->un_f_default_vtoc_supported);
5782 #endif
5783 
5784 	bzero(&un->un_g, sizeof (struct dk_geom));
5785 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5786 	bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
5787 
5788 #if defined(_SUNOS_VTOC_8)
5789 
5790 	/*
5791 	 * It's a REMOVABLE media, therefore no label (on sparc, anyway).
5792 	 * But it is still necessary to set up various geometry information,
5793 	 * and we are doing this here.
5794 	 */
5795 
5796 	/*
5797 	 * For the rpm, we use the minimum for the disk.  For the head, cyl,
5798 	 * and number of sector per track, if the capacity <= 1GB, head = 64,
5799 	 * sect = 32.  else head = 255, sect 63 Note: the capacity should be
5800 	 * equal to C*H*S values.  This will cause some truncation of size due
5801 	 * to round off errors. For CD-ROMs, this truncation can have adverse
5802 	 * side effects, so returning ncyl and nhead as 1. The nsect will
5803 	 * overflow for most of CD-ROMs as nsect is of type ushort. (4190569)
5804 	 */
5805 	if (ISCD(un)) {
5806 		/*
5807 		 * Preserve the old behavior for non-writable
5808 		 * medias. Since dkg_nsect is a ushort, it
5809 		 * will lose bits as cdroms have more than
5810 		 * 65536 sectors. So if we recalculate
5811 		 * capacity, it will become much shorter.
5812 		 * But the dkg_* information is not
5813 		 * used for CDROMs so it is OK. But for
5814 		 * Writable CDs we need this information
5815 		 * to be valid (for newfs say). So we
5816 		 * make nsect and nhead > 1 that way
5817 		 * nsect can still stay within ushort limit
5818 		 * without losing any bits.
5819 		 */
5820 		if (un->un_f_mmc_writable_media == TRUE) {
5821 			un->un_g.dkg_nhead = 64;
5822 			un->un_g.dkg_nsect = 32;
5823 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
5824 			un->un_blockcount = un->un_g.dkg_ncyl *
5825 			    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5826 		} else {
5827 			un->un_g.dkg_ncyl  = 1;
5828 			un->un_g.dkg_nhead = 1;
5829 			un->un_g.dkg_nsect = un->un_blockcount;
5830 		}
5831 	} else {
5832 		if (un->un_blockcount <= 0x1000) {
5833 			/* unlabeled SCSI floppy device */
5834 			un->un_g.dkg_nhead = 2;
5835 			un->un_g.dkg_ncyl = 80;
5836 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
5837 		} else if (un->un_blockcount <= 0x200000) {
5838 			un->un_g.dkg_nhead = 64;
5839 			un->un_g.dkg_nsect = 32;
5840 			un->un_g.dkg_ncyl  = un->un_blockcount / (64 * 32);
5841 		} else {
5842 			un->un_g.dkg_nhead = 255;
5843 			un->un_g.dkg_nsect = 63;
5844 			un->un_g.dkg_ncyl  = un->un_blockcount / (255 * 63);
5845 		}
5846 		un->un_blockcount =
5847 		    un->un_g.dkg_ncyl * un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5848 	}
5849 
5850 	un->un_g.dkg_acyl	= 0;
5851 	un->un_g.dkg_bcyl	= 0;
5852 	un->un_g.dkg_rpm	= 200;
5853 	un->un_asciilabel[0]	= '\0';
5854 	un->un_g.dkg_pcyl	= un->un_g.dkg_ncyl;
5855 
5856 	un->un_map[0].dkl_cylno = 0;
5857 	un->un_map[0].dkl_nblk  = un->un_blockcount;
5858 	un->un_map[2].dkl_cylno = 0;
5859 	un->un_map[2].dkl_nblk  = un->un_blockcount;
5860 
5861 #elif defined(_SUNOS_VTOC_16)
5862 
5863 	if (un->un_solaris_size == 0) {
5864 		/*
5865 		 * Got fdisk table but no solaris entry therefore
5866 		 * don't create a default label
5867 		 */
5868 		un->un_f_geometry_is_valid = TRUE;
5869 		return;
5870 	}
5871 
5872 	/*
5873 	 * For CDs we continue to use the physical geometry to calculate
5874 	 * number of cylinders. All other devices must convert the
5875 	 * physical geometry (geom_cache) to values that will fit
5876 	 * in a dk_geom structure.
5877 	 */
5878 	if (ISCD(un)) {
5879 		phys_spc = un->un_pgeom.g_nhead * un->un_pgeom.g_nsect;
5880 	} else {
5881 		/* Convert physical geometry to disk geometry */
5882 		bzero(&un_g, sizeof (struct dk_geom));
5883 
5884 		/*
5885 		 * Refer to comments related to off-by-1 at the
5886 		 * header of this file.
5887 		 * Before caculating geometry, capacity should be
5888 		 * decreased by 1. That un_f_capacity_adjusted is
5889 		 * TRUE means that we are treating a 1TB disk as
5890 		 * (1T - 512)B. And the capacity of disks is already
5891 		 * decreased by 1.
5892 		 */
5893 		if (!un->un_f_capacity_adjusted &&
5894 		    !un->un_f_has_removable_media &&
5895 		    !un->un_f_is_hotpluggable &&
5896 			un->un_tgt_blocksize == un->un_sys_blocksize)
5897 			capacity = un->un_blockcount - 1;
5898 		else
5899 			capacity = un->un_blockcount;
5900 
5901 		sd_convert_geometry(capacity, &un_g);
5902 		bcopy(&un_g, &un->un_g, sizeof (un->un_g));
5903 		phys_spc = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5904 	}
5905 
5906 	ASSERT(phys_spc != 0);
5907 	un->un_g.dkg_pcyl = un->un_solaris_size / phys_spc;
5908 	un->un_g.dkg_acyl = DK_ACYL;
5909 	un->un_g.dkg_ncyl = un->un_g.dkg_pcyl - DK_ACYL;
5910 	disksize = un->un_g.dkg_ncyl * phys_spc;
5911 
5912 	if (ISCD(un)) {
5913 		/*
5914 		 * CD's don't use the "heads * sectors * cyls"-type of
5915 		 * geometry, but instead use the entire capacity of the media.
5916 		 */
5917 		disksize = un->un_solaris_size;
5918 		un->un_g.dkg_nhead = 1;
5919 		un->un_g.dkg_nsect = 1;
5920 		un->un_g.dkg_rpm =
5921 		    (un->un_pgeom.g_rpm == 0) ? 200 : un->un_pgeom.g_rpm;
5922 
5923 		un->un_vtoc.v_part[0].p_start = 0;
5924 		un->un_vtoc.v_part[0].p_size  = disksize;
5925 		un->un_vtoc.v_part[0].p_tag   = V_BACKUP;
5926 		un->un_vtoc.v_part[0].p_flag  = V_UNMNT;
5927 
5928 		un->un_map[0].dkl_cylno = 0;
5929 		un->un_map[0].dkl_nblk  = disksize;
5930 		un->un_offset[0] = 0;
5931 
5932 	} else {
5933 		/*
5934 		 * Hard disks and removable media cartridges
5935 		 */
5936 		un->un_g.dkg_rpm =
5937 		    (un->un_pgeom.g_rpm == 0) ? 3600: un->un_pgeom.g_rpm;
5938 		un->un_vtoc.v_sectorsz = un->un_sys_blocksize;
5939 
5940 		/* Add boot slice */
5941 		un->un_vtoc.v_part[8].p_start = 0;
5942 		un->un_vtoc.v_part[8].p_size  = phys_spc;
5943 		un->un_vtoc.v_part[8].p_tag   = V_BOOT;
5944 		un->un_vtoc.v_part[8].p_flag  = V_UNMNT;
5945 
5946 		un->un_map[8].dkl_cylno = 0;
5947 		un->un_map[8].dkl_nblk  = phys_spc;
5948 		un->un_offset[8] = 0;
5949 	}
5950 
5951 	un->un_g.dkg_apc = 0;
5952 	un->un_vtoc.v_nparts = V_NUMPAR;
5953 	un->un_vtoc.v_version = V_VERSION;
5954 
5955 	/* Add backup slice */
5956 	un->un_vtoc.v_part[2].p_start = 0;
5957 	un->un_vtoc.v_part[2].p_size  = disksize;
5958 	un->un_vtoc.v_part[2].p_tag   = V_BACKUP;
5959 	un->un_vtoc.v_part[2].p_flag  = V_UNMNT;
5960 
5961 	un->un_map[2].dkl_cylno = 0;
5962 	un->un_map[2].dkl_nblk  = disksize;
5963 	un->un_offset[2] = 0;
5964 
5965 	(void) sprintf(un->un_vtoc.v_asciilabel, "DEFAULT cyl %d alt %d"
5966 	    " hd %d sec %d", un->un_g.dkg_ncyl, un->un_g.dkg_acyl,
5967 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5968 
5969 #else
5970 #error "No VTOC format defined."
5971 #endif
5972 
5973 	un->un_g.dkg_read_reinstruct  = 0;
5974 	un->un_g.dkg_write_reinstruct = 0;
5975 
5976 	un->un_g.dkg_intrlv = 1;
5977 
5978 	un->un_vtoc.v_sanity  = VTOC_SANE;
5979 
5980 	un->un_f_geometry_is_valid = TRUE;
5981 
5982 	SD_INFO(SD_LOG_COMMON, un,
5983 	    "sd_build_default_label: Default label created: "
5984 	    "cyl: %d\tacyl: %d\tnhead: %d\tnsect: %d\tcap: %d\n",
5985 	    un->un_g.dkg_ncyl, un->un_g.dkg_acyl, un->un_g.dkg_nhead,
5986 	    un->un_g.dkg_nsect, un->un_blockcount);
5987 }
5988 
5989 
5990 #if defined(_FIRMWARE_NEEDS_FDISK)
5991 /*
5992  * Max CHS values, as they are encoded into bytes, for 1022/254/63
5993  */
5994 #define	LBA_MAX_SECT	(63 | ((1022 & 0x300) >> 2))
5995 #define	LBA_MAX_CYL	(1022 & 0xFF)
5996 #define	LBA_MAX_HEAD	(254)
5997 
5998 
5999 /*
6000  *    Function: sd_has_max_chs_vals
6001  *
6002  * Description: Return TRUE if Cylinder-Head-Sector values are all at maximum.
6003  *
6004  *   Arguments: fdp - ptr to CHS info
6005  *
6006  * Return Code: True or false
6007  *
6008  *     Context: Any.
6009  */
6010 
6011 static int
6012 sd_has_max_chs_vals(struct ipart *fdp)
6013 {
6014 	return ((fdp->begcyl  == LBA_MAX_CYL)	&&
6015 	    (fdp->beghead == LBA_MAX_HEAD)	&&
6016 	    (fdp->begsect == LBA_MAX_SECT)	&&
6017 	    (fdp->endcyl  == LBA_MAX_CYL)	&&
6018 	    (fdp->endhead == LBA_MAX_HEAD)	&&
6019 	    (fdp->endsect == LBA_MAX_SECT));
6020 }
6021 #endif
6022 
6023 
6024 /*
6025  *    Function: sd_inq_fill
6026  *
6027  * Description: Print a piece of inquiry data, cleaned up for non-printable
6028  *		characters and stopping at the first space character after
6029  *		the beginning of the passed string;
6030  *
6031  *   Arguments: p - source string
6032  *		l - maximum length to copy
6033  *		s - destination string
6034  *
6035  *     Context: Any.
6036  */
6037 
6038 static void
6039 sd_inq_fill(char *p, int l, char *s)
6040 {
6041 	unsigned i = 0;
6042 	char c;
6043 
6044 	while (i++ < l) {
6045 		if ((c = *p++) < ' ' || c >= 0x7F) {
6046 			c = '*';
6047 		} else if (i != 1 && c == ' ') {
6048 			break;
6049 		}
6050 		*s++ = c;
6051 	}
6052 	*s++ = 0;
6053 }
6054 
6055 
6056 /*
6057  *    Function: sd_register_devid
6058  *
6059  * Description: This routine will obtain the device id information from the
6060  *		target, obtain the serial number, and register the device
6061  *		id with the ddi framework.
6062  *
6063  *   Arguments: devi - the system's dev_info_t for the device.
6064  *		un - driver soft state (unit) structure
6065  *		reservation_flag - indicates if a reservation conflict
6066  *		occurred during attach
6067  *
6068  *     Context: Kernel Thread
6069  */
6070 static void
6071 sd_register_devid(struct sd_lun *un, dev_info_t *devi, int reservation_flag)
6072 {
6073 	int		rval		= 0;
6074 	uchar_t		*inq80		= NULL;
6075 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
6076 	size_t		inq80_resid	= 0;
6077 	uchar_t		*inq83		= NULL;
6078 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
6079 	size_t		inq83_resid	= 0;
6080 
6081 	ASSERT(un != NULL);
6082 	ASSERT(mutex_owned(SD_MUTEX(un)));
6083 	ASSERT((SD_DEVINFO(un)) == devi);
6084 
6085 	/*
6086 	 * This is the case of antiquated Sun disk drives that have the
6087 	 * FAB_DEVID property set in the disk_table.  These drives
6088 	 * manage the devid's by storing them in last 2 available sectors
6089 	 * on the drive and have them fabricated by the ddi layer by calling
6090 	 * ddi_devid_init and passing the DEVID_FAB flag.
6091 	 */
6092 	if (un->un_f_opt_fab_devid == TRUE) {
6093 		/*
6094 		 * Depending on EINVAL isn't reliable, since a reserved disk
6095 		 * may result in invalid geometry, so check to make sure a
6096 		 * reservation conflict did not occur during attach.
6097 		 */
6098 		if ((sd_get_devid(un) == EINVAL) &&
6099 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
6100 			/*
6101 			 * The devid is invalid AND there is no reservation
6102 			 * conflict.  Fabricate a new devid.
6103 			 */
6104 			(void) sd_create_devid(un);
6105 		}
6106 
6107 		/* Register the devid if it exists */
6108 		if (un->un_devid != NULL) {
6109 			(void) ddi_devid_register(SD_DEVINFO(un),
6110 			    un->un_devid);
6111 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6112 			    "sd_register_devid: Devid Fabricated\n");
6113 		}
6114 		return;
6115 	}
6116 
6117 	/*
6118 	 * We check the availibility of the World Wide Name (0x83) and Unit
6119 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
6120 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
6121 	 * 0x83 is availible, that is the best choice.  Our next choice is
6122 	 * 0x80.  If neither are availible, we munge the devid from the device
6123 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
6124 	 * to fabricate a devid for non-Sun qualified disks.
6125 	 */
6126 	if (sd_check_vpd_page_support(un) == 0) {
6127 		/* collect page 80 data if available */
6128 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
6129 
6130 			mutex_exit(SD_MUTEX(un));
6131 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
6132 			rval = sd_send_scsi_INQUIRY(un, inq80, inq80_len,
6133 			    0x01, 0x80, &inq80_resid);
6134 
6135 			if (rval != 0) {
6136 				kmem_free(inq80, inq80_len);
6137 				inq80 = NULL;
6138 				inq80_len = 0;
6139 			}
6140 			mutex_enter(SD_MUTEX(un));
6141 		}
6142 
6143 		/* collect page 83 data if available */
6144 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
6145 			mutex_exit(SD_MUTEX(un));
6146 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
6147 			rval = sd_send_scsi_INQUIRY(un, inq83, inq83_len,
6148 			    0x01, 0x83, &inq83_resid);
6149 
6150 			if (rval != 0) {
6151 				kmem_free(inq83, inq83_len);
6152 				inq83 = NULL;
6153 				inq83_len = 0;
6154 			}
6155 			mutex_enter(SD_MUTEX(un));
6156 		}
6157 	}
6158 
6159 	/* encode best devid possible based on data available */
6160 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
6161 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
6162 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
6163 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
6164 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
6165 
6166 		/* devid successfully encoded, register devid */
6167 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
6168 
6169 	} else {
6170 		/*
6171 		 * Unable to encode a devid based on data available.
6172 		 * This is not a Sun qualified disk.  Older Sun disk
6173 		 * drives that have the SD_FAB_DEVID property
6174 		 * set in the disk_table and non Sun qualified
6175 		 * disks are treated in the same manner.  These
6176 		 * drives manage the devid's by storing them in
6177 		 * last 2 available sectors on the drive and
6178 		 * have them fabricated by the ddi layer by
6179 		 * calling ddi_devid_init and passing the
6180 		 * DEVID_FAB flag.
6181 		 * Create a fabricate devid only if there's no
6182 		 * fabricate devid existed.
6183 		 */
6184 		if (sd_get_devid(un) == EINVAL) {
6185 			(void) sd_create_devid(un);
6186 		}
6187 		un->un_f_opt_fab_devid = TRUE;
6188 
6189 		/* Register the devid if it exists */
6190 		if (un->un_devid != NULL) {
6191 			(void) ddi_devid_register(SD_DEVINFO(un),
6192 			    un->un_devid);
6193 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6194 			    "sd_register_devid: devid fabricated using "
6195 			    "ddi framework\n");
6196 		}
6197 	}
6198 
6199 	/* clean up resources */
6200 	if (inq80 != NULL) {
6201 		kmem_free(inq80, inq80_len);
6202 	}
6203 	if (inq83 != NULL) {
6204 		kmem_free(inq83, inq83_len);
6205 	}
6206 }
6207 
6208 static daddr_t
6209 sd_get_devid_block(struct sd_lun *un)
6210 {
6211 	daddr_t			spc, blk, head, cyl;
6212 
6213 	if ((un->un_f_geometry_is_valid == FALSE) ||
6214 	    (un->un_solaris_size < DK_LABEL_LOC))
6215 		return (-1);
6216 
6217 	if (un->un_vtoc.v_sanity != VTOC_SANE) {
6218 		/* EFI labeled */
6219 		if (un->un_reserved != -1) {
6220 			blk = un->un_map[un->un_reserved].dkl_cylno;
6221 		} else {
6222 			return (-1);
6223 		}
6224 	} else {
6225 		/* SMI labeled */
6226 		/* this geometry doesn't allow us to write a devid */
6227 		if (un->un_g.dkg_acyl < 2) {
6228 			return (-1);
6229 		}
6230 
6231 		/*
6232 		 * Subtract 2 guarantees that the next to last cylinder
6233 		 * is used
6234 		 */
6235 		cyl  = un->un_g.dkg_ncyl  + un->un_g.dkg_acyl - 2;
6236 		spc  = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
6237 		head = un->un_g.dkg_nhead - 1;
6238 		blk  = (cyl * (spc - un->un_g.dkg_apc)) +
6239 		    (head * un->un_g.dkg_nsect) + 1;
6240 	}
6241 	return (blk);
6242 }
6243 
6244 /*
6245  *    Function: sd_get_devid
6246  *
6247  * Description: This routine will return 0 if a valid device id has been
6248  *		obtained from the target and stored in the soft state. If a
6249  *		valid device id has not been previously read and stored, a
6250  *		read attempt will be made.
6251  *
6252  *   Arguments: un - driver soft state (unit) structure
6253  *
6254  * Return Code: 0 if we successfully get the device id
6255  *
6256  *     Context: Kernel Thread
6257  */
6258 
6259 static int
6260 sd_get_devid(struct sd_lun *un)
6261 {
6262 	struct dk_devid		*dkdevid;
6263 	ddi_devid_t		tmpid;
6264 	uint_t			*ip;
6265 	size_t			sz;
6266 	daddr_t			blk;
6267 	int			status;
6268 	int			chksum;
6269 	int			i;
6270 	size_t			buffer_size;
6271 
6272 	ASSERT(un != NULL);
6273 	ASSERT(mutex_owned(SD_MUTEX(un)));
6274 
6275 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
6276 	    un);
6277 
6278 	if (un->un_devid != NULL) {
6279 		return (0);
6280 	}
6281 
6282 	blk = sd_get_devid_block(un);
6283 	if (blk < 0)
6284 		return (EINVAL);
6285 
6286 	/*
6287 	 * Read and verify device id, stored in the reserved cylinders at the
6288 	 * end of the disk. Backup label is on the odd sectors of the last
6289 	 * track of the last cylinder. Device id will be on track of the next
6290 	 * to last cylinder.
6291 	 */
6292 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
6293 	mutex_exit(SD_MUTEX(un));
6294 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
6295 	status = sd_send_scsi_READ(un, dkdevid, buffer_size, blk,
6296 	    SD_PATH_DIRECT);
6297 	if (status != 0) {
6298 		goto error;
6299 	}
6300 
6301 	/* Validate the revision */
6302 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
6303 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
6304 		status = EINVAL;
6305 		goto error;
6306 	}
6307 
6308 	/* Calculate the checksum */
6309 	chksum = 0;
6310 	ip = (uint_t *)dkdevid;
6311 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6312 	    i++) {
6313 		chksum ^= ip[i];
6314 	}
6315 
6316 	/* Compare the checksums */
6317 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
6318 		status = EINVAL;
6319 		goto error;
6320 	}
6321 
6322 	/* Validate the device id */
6323 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
6324 		status = EINVAL;
6325 		goto error;
6326 	}
6327 
6328 	/*
6329 	 * Store the device id in the driver soft state
6330 	 */
6331 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
6332 	tmpid = kmem_alloc(sz, KM_SLEEP);
6333 
6334 	mutex_enter(SD_MUTEX(un));
6335 
6336 	un->un_devid = tmpid;
6337 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
6338 
6339 	kmem_free(dkdevid, buffer_size);
6340 
6341 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
6342 
6343 	return (status);
6344 error:
6345 	mutex_enter(SD_MUTEX(un));
6346 	kmem_free(dkdevid, buffer_size);
6347 	return (status);
6348 }
6349 
6350 
6351 /*
6352  *    Function: sd_create_devid
6353  *
6354  * Description: This routine will fabricate the device id and write it
6355  *		to the disk.
6356  *
6357  *   Arguments: un - driver soft state (unit) structure
6358  *
6359  * Return Code: value of the fabricated device id
6360  *
6361  *     Context: Kernel Thread
6362  */
6363 
6364 static ddi_devid_t
6365 sd_create_devid(struct sd_lun *un)
6366 {
6367 	ASSERT(un != NULL);
6368 
6369 	/* Fabricate the devid */
6370 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
6371 	    == DDI_FAILURE) {
6372 		return (NULL);
6373 	}
6374 
6375 	/* Write the devid to disk */
6376 	if (sd_write_deviceid(un) != 0) {
6377 		ddi_devid_free(un->un_devid);
6378 		un->un_devid = NULL;
6379 	}
6380 
6381 	return (un->un_devid);
6382 }
6383 
6384 
6385 /*
6386  *    Function: sd_write_deviceid
6387  *
6388  * Description: This routine will write the device id to the disk
6389  *		reserved sector.
6390  *
6391  *   Arguments: un - driver soft state (unit) structure
6392  *
6393  * Return Code: EINVAL
6394  *		value returned by sd_send_scsi_cmd
6395  *
6396  *     Context: Kernel Thread
6397  */
6398 
6399 static int
6400 sd_write_deviceid(struct sd_lun *un)
6401 {
6402 	struct dk_devid		*dkdevid;
6403 	daddr_t			blk;
6404 	uint_t			*ip, chksum;
6405 	int			status;
6406 	int			i;
6407 
6408 	ASSERT(mutex_owned(SD_MUTEX(un)));
6409 
6410 	blk = sd_get_devid_block(un);
6411 	if (blk < 0)
6412 		return (-1);
6413 	mutex_exit(SD_MUTEX(un));
6414 
6415 	/* Allocate the buffer */
6416 	dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
6417 
6418 	/* Fill in the revision */
6419 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
6420 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
6421 
6422 	/* Copy in the device id */
6423 	mutex_enter(SD_MUTEX(un));
6424 	bcopy(un->un_devid, &dkdevid->dkd_devid,
6425 	    ddi_devid_sizeof(un->un_devid));
6426 	mutex_exit(SD_MUTEX(un));
6427 
6428 	/* Calculate the checksum */
6429 	chksum = 0;
6430 	ip = (uint_t *)dkdevid;
6431 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6432 	    i++) {
6433 		chksum ^= ip[i];
6434 	}
6435 
6436 	/* Fill-in checksum */
6437 	DKD_FORMCHKSUM(chksum, dkdevid);
6438 
6439 	/* Write the reserved sector */
6440 	status = sd_send_scsi_WRITE(un, dkdevid, un->un_sys_blocksize, blk,
6441 	    SD_PATH_DIRECT);
6442 
6443 	kmem_free(dkdevid, un->un_sys_blocksize);
6444 
6445 	mutex_enter(SD_MUTEX(un));
6446 	return (status);
6447 }
6448 
6449 
6450 /*
6451  *    Function: sd_check_vpd_page_support
6452  *
6453  * Description: This routine sends an inquiry command with the EVPD bit set and
6454  *		a page code of 0x00 to the device. It is used to determine which
6455  *		vital product pages are availible to find the devid. We are
6456  *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
6457  *		device does not support that command.
6458  *
6459  *   Arguments: un  - driver soft state (unit) structure
6460  *
6461  * Return Code: 0 - success
6462  *		1 - check condition
6463  *
6464  *     Context: This routine can sleep.
6465  */
6466 
6467 static int
6468 sd_check_vpd_page_support(struct sd_lun *un)
6469 {
6470 	uchar_t	*page_list	= NULL;
6471 	uchar_t	page_length	= 0xff;	/* Use max possible length */
6472 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
6473 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
6474 	int    	rval		= 0;
6475 	int	counter;
6476 
6477 	ASSERT(un != NULL);
6478 	ASSERT(mutex_owned(SD_MUTEX(un)));
6479 
6480 	mutex_exit(SD_MUTEX(un));
6481 
6482 	/*
6483 	 * We'll set the page length to the maximum to save figuring it out
6484 	 * with an additional call.
6485 	 */
6486 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
6487 
6488 	rval = sd_send_scsi_INQUIRY(un, page_list, page_length, evpd,
6489 	    page_code, NULL);
6490 
6491 	mutex_enter(SD_MUTEX(un));
6492 
6493 	/*
6494 	 * Now we must validate that the device accepted the command, as some
6495 	 * drives do not support it.  If the drive does support it, we will
6496 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
6497 	 * not, we return -1.
6498 	 */
6499 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
6500 		/* Loop to find one of the 2 pages we need */
6501 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
6502 
6503 		/*
6504 		 * Pages are returned in ascending order, and 0x83 is what we
6505 		 * are hoping for.
6506 		 */
6507 		while ((page_list[counter] <= 0x83) &&
6508 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
6509 		    VPD_HEAD_OFFSET))) {
6510 			/*
6511 			 * Add 3 because page_list[3] is the number of
6512 			 * pages minus 3
6513 			 */
6514 
6515 			switch (page_list[counter]) {
6516 			case 0x00:
6517 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
6518 				break;
6519 			case 0x80:
6520 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
6521 				break;
6522 			case 0x81:
6523 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
6524 				break;
6525 			case 0x82:
6526 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
6527 				break;
6528 			case 0x83:
6529 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
6530 				break;
6531 			}
6532 			counter++;
6533 		}
6534 
6535 	} else {
6536 		rval = -1;
6537 
6538 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6539 		    "sd_check_vpd_page_support: This drive does not implement "
6540 		    "VPD pages.\n");
6541 	}
6542 
6543 	kmem_free(page_list, page_length);
6544 
6545 	return (rval);
6546 }
6547 
6548 
6549 /*
6550  *    Function: sd_setup_pm
6551  *
6552  * Description: Initialize Power Management on the device
6553  *
6554  *     Context: Kernel Thread
6555  */
6556 
6557 static void
6558 sd_setup_pm(struct sd_lun *un, dev_info_t *devi)
6559 {
6560 	uint_t	log_page_size;
6561 	uchar_t	*log_page_data;
6562 	int	rval;
6563 
6564 	/*
6565 	 * Since we are called from attach, holding a mutex for
6566 	 * un is unnecessary. Because some of the routines called
6567 	 * from here require SD_MUTEX to not be held, assert this
6568 	 * right up front.
6569 	 */
6570 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6571 	/*
6572 	 * Since the sd device does not have the 'reg' property,
6573 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
6574 	 * The following code is to tell cpr that this device
6575 	 * DOES need to be suspended and resumed.
6576 	 */
6577 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
6578 	    "pm-hardware-state", "needs-suspend-resume");
6579 
6580 	/*
6581 	 * This complies with the new power management framework
6582 	 * for certain desktop machines. Create the pm_components
6583 	 * property as a string array property.
6584 	 */
6585 	if (un->un_f_pm_supported) {
6586 		/*
6587 		 * not all devices have a motor, try it first.
6588 		 * some devices may return ILLEGAL REQUEST, some
6589 		 * will hang
6590 		 * The following START_STOP_UNIT is used to check if target
6591 		 * device has a motor.
6592 		 */
6593 		un->un_f_start_stop_supported = TRUE;
6594 		if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
6595 		    SD_PATH_DIRECT) != 0) {
6596 			un->un_f_start_stop_supported = FALSE;
6597 		}
6598 
6599 		/*
6600 		 * create pm properties anyways otherwise the parent can't
6601 		 * go to sleep
6602 		 */
6603 		(void) sd_create_pm_components(devi, un);
6604 		un->un_f_pm_is_enabled = TRUE;
6605 		return;
6606 	}
6607 
6608 	if (!un->un_f_log_sense_supported) {
6609 		un->un_power_level = SD_SPINDLE_ON;
6610 		un->un_f_pm_is_enabled = FALSE;
6611 		return;
6612 	}
6613 
6614 	rval = sd_log_page_supported(un, START_STOP_CYCLE_PAGE);
6615 
6616 #ifdef	SDDEBUG
6617 	if (sd_force_pm_supported) {
6618 		/* Force a successful result */
6619 		rval = 1;
6620 	}
6621 #endif
6622 
6623 	/*
6624 	 * If the start-stop cycle counter log page is not supported
6625 	 * or if the pm-capable property is SD_PM_CAPABLE_FALSE (0)
6626 	 * then we should not create the pm_components property.
6627 	 */
6628 	if (rval == -1) {
6629 		/*
6630 		 * Error.
6631 		 * Reading log sense failed, most likely this is
6632 		 * an older drive that does not support log sense.
6633 		 * If this fails auto-pm is not supported.
6634 		 */
6635 		un->un_power_level = SD_SPINDLE_ON;
6636 		un->un_f_pm_is_enabled = FALSE;
6637 
6638 	} else if (rval == 0) {
6639 		/*
6640 		 * Page not found.
6641 		 * The start stop cycle counter is implemented as page
6642 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
6643 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
6644 		 */
6645 		if (sd_log_page_supported(un, START_STOP_CYCLE_VU_PAGE) == 1) {
6646 			/*
6647 			 * Page found, use this one.
6648 			 */
6649 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
6650 			un->un_f_pm_is_enabled = TRUE;
6651 		} else {
6652 			/*
6653 			 * Error or page not found.
6654 			 * auto-pm is not supported for this device.
6655 			 */
6656 			un->un_power_level = SD_SPINDLE_ON;
6657 			un->un_f_pm_is_enabled = FALSE;
6658 		}
6659 	} else {
6660 		/*
6661 		 * Page found, use it.
6662 		 */
6663 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
6664 		un->un_f_pm_is_enabled = TRUE;
6665 	}
6666 
6667 
6668 	if (un->un_f_pm_is_enabled == TRUE) {
6669 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6670 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6671 
6672 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
6673 		    log_page_size, un->un_start_stop_cycle_page,
6674 		    0x01, 0, SD_PATH_DIRECT);
6675 #ifdef	SDDEBUG
6676 		if (sd_force_pm_supported) {
6677 			/* Force a successful result */
6678 			rval = 0;
6679 		}
6680 #endif
6681 
6682 		/*
6683 		 * If the Log sense for Page( Start/stop cycle counter page)
6684 		 * succeeds, then power managment is supported and we can
6685 		 * enable auto-pm.
6686 		 */
6687 		if (rval == 0)  {
6688 			(void) sd_create_pm_components(devi, un);
6689 		} else {
6690 			un->un_power_level = SD_SPINDLE_ON;
6691 			un->un_f_pm_is_enabled = FALSE;
6692 		}
6693 
6694 		kmem_free(log_page_data, log_page_size);
6695 	}
6696 }
6697 
6698 
6699 /*
6700  *    Function: sd_create_pm_components
6701  *
6702  * Description: Initialize PM property.
6703  *
6704  *     Context: Kernel thread context
6705  */
6706 
6707 static void
6708 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
6709 {
6710 	char *pm_comp[] = { "NAME=spindle-motor", "0=off", "1=on", NULL };
6711 
6712 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6713 
6714 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
6715 	    "pm-components", pm_comp, 3) == DDI_PROP_SUCCESS) {
6716 		/*
6717 		 * When components are initially created they are idle,
6718 		 * power up any non-removables.
6719 		 * Note: the return value of pm_raise_power can't be used
6720 		 * for determining if PM should be enabled for this device.
6721 		 * Even if you check the return values and remove this
6722 		 * property created above, the PM framework will not honor the
6723 		 * change after the first call to pm_raise_power. Hence,
6724 		 * removal of that property does not help if pm_raise_power
6725 		 * fails. In the case of removable media, the start/stop
6726 		 * will fail if the media is not present.
6727 		 */
6728 		if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
6729 		    SD_SPINDLE_ON) == DDI_SUCCESS)) {
6730 			mutex_enter(SD_MUTEX(un));
6731 			un->un_power_level = SD_SPINDLE_ON;
6732 			mutex_enter(&un->un_pm_mutex);
6733 			/* Set to on and not busy. */
6734 			un->un_pm_count = 0;
6735 		} else {
6736 			mutex_enter(SD_MUTEX(un));
6737 			un->un_power_level = SD_SPINDLE_OFF;
6738 			mutex_enter(&un->un_pm_mutex);
6739 			/* Set to off. */
6740 			un->un_pm_count = -1;
6741 		}
6742 		mutex_exit(&un->un_pm_mutex);
6743 		mutex_exit(SD_MUTEX(un));
6744 	} else {
6745 		un->un_power_level = SD_SPINDLE_ON;
6746 		un->un_f_pm_is_enabled = FALSE;
6747 	}
6748 }
6749 
6750 
6751 /*
6752  *    Function: sd_ddi_suspend
6753  *
6754  * Description: Performs system power-down operations. This includes
6755  *		setting the drive state to indicate its suspended so
6756  *		that no new commands will be accepted. Also, wait for
6757  *		all commands that are in transport or queued to a timer
6758  *		for retry to complete. All timeout threads are cancelled.
6759  *
6760  * Return Code: DDI_FAILURE or DDI_SUCCESS
6761  *
6762  *     Context: Kernel thread context
6763  */
6764 
6765 static int
6766 sd_ddi_suspend(dev_info_t *devi)
6767 {
6768 	struct	sd_lun	*un;
6769 	clock_t		wait_cmds_complete;
6770 
6771 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6772 	if (un == NULL) {
6773 		return (DDI_FAILURE);
6774 	}
6775 
6776 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
6777 
6778 	mutex_enter(SD_MUTEX(un));
6779 
6780 	/* Return success if the device is already suspended. */
6781 	if (un->un_state == SD_STATE_SUSPENDED) {
6782 		mutex_exit(SD_MUTEX(un));
6783 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6784 		    "device already suspended, exiting\n");
6785 		return (DDI_SUCCESS);
6786 	}
6787 
6788 	/* Return failure if the device is being used by HA */
6789 	if (un->un_resvd_status &
6790 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
6791 		mutex_exit(SD_MUTEX(un));
6792 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6793 		    "device in use by HA, exiting\n");
6794 		return (DDI_FAILURE);
6795 	}
6796 
6797 	/*
6798 	 * Return failure if the device is in a resource wait
6799 	 * or power changing state.
6800 	 */
6801 	if ((un->un_state == SD_STATE_RWAIT) ||
6802 	    (un->un_state == SD_STATE_PM_CHANGING)) {
6803 		mutex_exit(SD_MUTEX(un));
6804 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6805 		    "device in resource wait state, exiting\n");
6806 		return (DDI_FAILURE);
6807 	}
6808 
6809 
6810 	un->un_save_state = un->un_last_state;
6811 	New_state(un, SD_STATE_SUSPENDED);
6812 
6813 	/*
6814 	 * Wait for all commands that are in transport or queued to a timer
6815 	 * for retry to complete.
6816 	 *
6817 	 * While waiting, no new commands will be accepted or sent because of
6818 	 * the new state we set above.
6819 	 *
6820 	 * Wait till current operation has completed. If we are in the resource
6821 	 * wait state (with an intr outstanding) then we need to wait till the
6822 	 * intr completes and starts the next cmd. We want to wait for
6823 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
6824 	 */
6825 	wait_cmds_complete = ddi_get_lbolt() +
6826 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
6827 
6828 	while (un->un_ncmds_in_transport != 0) {
6829 		/*
6830 		 * Fail if commands do not finish in the specified time.
6831 		 */
6832 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
6833 		    wait_cmds_complete) == -1) {
6834 			/*
6835 			 * Undo the state changes made above. Everything
6836 			 * must go back to it's original value.
6837 			 */
6838 			Restore_state(un);
6839 			un->un_last_state = un->un_save_state;
6840 			/* Wake up any threads that might be waiting. */
6841 			cv_broadcast(&un->un_suspend_cv);
6842 			mutex_exit(SD_MUTEX(un));
6843 			SD_ERROR(SD_LOG_IO_PM, un,
6844 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
6845 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
6846 			return (DDI_FAILURE);
6847 		}
6848 	}
6849 
6850 	/*
6851 	 * Cancel SCSI watch thread and timeouts, if any are active
6852 	 */
6853 
6854 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
6855 		opaque_t temp_token = un->un_swr_token;
6856 		mutex_exit(SD_MUTEX(un));
6857 		scsi_watch_suspend(temp_token);
6858 		mutex_enter(SD_MUTEX(un));
6859 	}
6860 
6861 	if (un->un_reset_throttle_timeid != NULL) {
6862 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
6863 		un->un_reset_throttle_timeid = NULL;
6864 		mutex_exit(SD_MUTEX(un));
6865 		(void) untimeout(temp_id);
6866 		mutex_enter(SD_MUTEX(un));
6867 	}
6868 
6869 	if (un->un_dcvb_timeid != NULL) {
6870 		timeout_id_t temp_id = un->un_dcvb_timeid;
6871 		un->un_dcvb_timeid = NULL;
6872 		mutex_exit(SD_MUTEX(un));
6873 		(void) untimeout(temp_id);
6874 		mutex_enter(SD_MUTEX(un));
6875 	}
6876 
6877 	mutex_enter(&un->un_pm_mutex);
6878 	if (un->un_pm_timeid != NULL) {
6879 		timeout_id_t temp_id = un->un_pm_timeid;
6880 		un->un_pm_timeid = NULL;
6881 		mutex_exit(&un->un_pm_mutex);
6882 		mutex_exit(SD_MUTEX(un));
6883 		(void) untimeout(temp_id);
6884 		mutex_enter(SD_MUTEX(un));
6885 	} else {
6886 		mutex_exit(&un->un_pm_mutex);
6887 	}
6888 
6889 	if (un->un_retry_timeid != NULL) {
6890 		timeout_id_t temp_id = un->un_retry_timeid;
6891 		un->un_retry_timeid = NULL;
6892 		mutex_exit(SD_MUTEX(un));
6893 		(void) untimeout(temp_id);
6894 		mutex_enter(SD_MUTEX(un));
6895 	}
6896 
6897 	if (un->un_direct_priority_timeid != NULL) {
6898 		timeout_id_t temp_id = un->un_direct_priority_timeid;
6899 		un->un_direct_priority_timeid = NULL;
6900 		mutex_exit(SD_MUTEX(un));
6901 		(void) untimeout(temp_id);
6902 		mutex_enter(SD_MUTEX(un));
6903 	}
6904 
6905 	if (un->un_f_is_fibre == TRUE) {
6906 		/*
6907 		 * Remove callbacks for insert and remove events
6908 		 */
6909 		if (un->un_insert_event != NULL) {
6910 			mutex_exit(SD_MUTEX(un));
6911 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
6912 			mutex_enter(SD_MUTEX(un));
6913 			un->un_insert_event = NULL;
6914 		}
6915 
6916 		if (un->un_remove_event != NULL) {
6917 			mutex_exit(SD_MUTEX(un));
6918 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
6919 			mutex_enter(SD_MUTEX(un));
6920 			un->un_remove_event = NULL;
6921 		}
6922 	}
6923 
6924 	mutex_exit(SD_MUTEX(un));
6925 
6926 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
6927 
6928 	return (DDI_SUCCESS);
6929 }
6930 
6931 
6932 /*
6933  *    Function: sd_ddi_pm_suspend
6934  *
6935  * Description: Set the drive state to low power.
6936  *		Someone else is required to actually change the drive
6937  *		power level.
6938  *
6939  *   Arguments: un - driver soft state (unit) structure
6940  *
6941  * Return Code: DDI_FAILURE or DDI_SUCCESS
6942  *
6943  *     Context: Kernel thread context
6944  */
6945 
6946 static int
6947 sd_ddi_pm_suspend(struct sd_lun *un)
6948 {
6949 	ASSERT(un != NULL);
6950 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: entry\n");
6951 
6952 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6953 	mutex_enter(SD_MUTEX(un));
6954 
6955 	/*
6956 	 * Exit if power management is not enabled for this device, or if
6957 	 * the device is being used by HA.
6958 	 */
6959 	if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
6960 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
6961 		mutex_exit(SD_MUTEX(un));
6962 		SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exiting\n");
6963 		return (DDI_SUCCESS);
6964 	}
6965 
6966 	SD_INFO(SD_LOG_POWER, un, "sd_ddi_pm_suspend: un_ncmds_in_driver=%ld\n",
6967 	    un->un_ncmds_in_driver);
6968 
6969 	/*
6970 	 * See if the device is not busy, ie.:
6971 	 *    - we have no commands in the driver for this device
6972 	 *    - not waiting for resources
6973 	 */
6974 	if ((un->un_ncmds_in_driver == 0) &&
6975 	    (un->un_state != SD_STATE_RWAIT)) {
6976 		/*
6977 		 * The device is not busy, so it is OK to go to low power state.
6978 		 * Indicate low power, but rely on someone else to actually
6979 		 * change it.
6980 		 */
6981 		mutex_enter(&un->un_pm_mutex);
6982 		un->un_pm_count = -1;
6983 		mutex_exit(&un->un_pm_mutex);
6984 		un->un_power_level = SD_SPINDLE_OFF;
6985 	}
6986 
6987 	mutex_exit(SD_MUTEX(un));
6988 
6989 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exit\n");
6990 
6991 	return (DDI_SUCCESS);
6992 }
6993 
6994 
6995 /*
6996  *    Function: sd_ddi_resume
6997  *
6998  * Description: Performs system power-up operations..
6999  *
7000  * Return Code: DDI_SUCCESS
7001  *		DDI_FAILURE
7002  *
7003  *     Context: Kernel thread context
7004  */
7005 
7006 static int
7007 sd_ddi_resume(dev_info_t *devi)
7008 {
7009 	struct	sd_lun	*un;
7010 
7011 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
7012 	if (un == NULL) {
7013 		return (DDI_FAILURE);
7014 	}
7015 
7016 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
7017 
7018 	mutex_enter(SD_MUTEX(un));
7019 	Restore_state(un);
7020 
7021 	/*
7022 	 * Restore the state which was saved to give the
7023 	 * the right state in un_last_state
7024 	 */
7025 	un->un_last_state = un->un_save_state;
7026 	/*
7027 	 * Note: throttle comes back at full.
7028 	 * Also note: this MUST be done before calling pm_raise_power
7029 	 * otherwise the system can get hung in biowait. The scenario where
7030 	 * this'll happen is under cpr suspend. Writing of the system
7031 	 * state goes through sddump, which writes 0 to un_throttle. If
7032 	 * writing the system state then fails, example if the partition is
7033 	 * too small, then cpr attempts a resume. If throttle isn't restored
7034 	 * from the saved value until after calling pm_raise_power then
7035 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
7036 	 * in biowait.
7037 	 */
7038 	un->un_throttle = un->un_saved_throttle;
7039 
7040 	/*
7041 	 * The chance of failure is very rare as the only command done in power
7042 	 * entry point is START command when you transition from 0->1 or
7043 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
7044 	 * which suspend was done. Ignore the return value as the resume should
7045 	 * not be failed. In the case of removable media the media need not be
7046 	 * inserted and hence there is a chance that raise power will fail with
7047 	 * media not present.
7048 	 */
7049 	if (un->un_f_attach_spinup) {
7050 		mutex_exit(SD_MUTEX(un));
7051 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
7052 		mutex_enter(SD_MUTEX(un));
7053 	}
7054 
7055 	/*
7056 	 * Don't broadcast to the suspend cv and therefore possibly
7057 	 * start I/O until after power has been restored.
7058 	 */
7059 	cv_broadcast(&un->un_suspend_cv);
7060 	cv_broadcast(&un->un_state_cv);
7061 
7062 	/* restart thread */
7063 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
7064 		scsi_watch_resume(un->un_swr_token);
7065 	}
7066 
7067 #if (defined(__fibre))
7068 	if (un->un_f_is_fibre == TRUE) {
7069 		/*
7070 		 * Add callbacks for insert and remove events
7071 		 */
7072 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
7073 			sd_init_event_callbacks(un);
7074 		}
7075 	}
7076 #endif
7077 
7078 	/*
7079 	 * Transport any pending commands to the target.
7080 	 *
7081 	 * If this is a low-activity device commands in queue will have to wait
7082 	 * until new commands come in, which may take awhile. Also, we
7083 	 * specifically don't check un_ncmds_in_transport because we know that
7084 	 * there really are no commands in progress after the unit was
7085 	 * suspended and we could have reached the throttle level, been
7086 	 * suspended, and have no new commands coming in for awhile. Highly
7087 	 * unlikely, but so is the low-activity disk scenario.
7088 	 */
7089 	ddi_xbuf_dispatch(un->un_xbuf_attr);
7090 
7091 	sd_start_cmds(un, NULL);
7092 	mutex_exit(SD_MUTEX(un));
7093 
7094 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
7095 
7096 	return (DDI_SUCCESS);
7097 }
7098 
7099 
7100 /*
7101  *    Function: sd_ddi_pm_resume
7102  *
7103  * Description: Set the drive state to powered on.
7104  *		Someone else is required to actually change the drive
7105  *		power level.
7106  *
7107  *   Arguments: un - driver soft state (unit) structure
7108  *
7109  * Return Code: DDI_SUCCESS
7110  *
7111  *     Context: Kernel thread context
7112  */
7113 
7114 static int
7115 sd_ddi_pm_resume(struct sd_lun *un)
7116 {
7117 	ASSERT(un != NULL);
7118 
7119 	ASSERT(!mutex_owned(SD_MUTEX(un)));
7120 	mutex_enter(SD_MUTEX(un));
7121 	un->un_power_level = SD_SPINDLE_ON;
7122 
7123 	ASSERT(!mutex_owned(&un->un_pm_mutex));
7124 	mutex_enter(&un->un_pm_mutex);
7125 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
7126 		un->un_pm_count++;
7127 		ASSERT(un->un_pm_count == 0);
7128 		/*
7129 		 * Note: no longer do the cv_broadcast on un_suspend_cv. The
7130 		 * un_suspend_cv is for a system resume, not a power management
7131 		 * device resume. (4297749)
7132 		 *	 cv_broadcast(&un->un_suspend_cv);
7133 		 */
7134 	}
7135 	mutex_exit(&un->un_pm_mutex);
7136 	mutex_exit(SD_MUTEX(un));
7137 
7138 	return (DDI_SUCCESS);
7139 }
7140 
7141 
7142 /*
7143  *    Function: sd_pm_idletimeout_handler
7144  *
7145  * Description: A timer routine that's active only while a device is busy.
7146  *		The purpose is to extend slightly the pm framework's busy
7147  *		view of the device to prevent busy/idle thrashing for
7148  *		back-to-back commands. Do this by comparing the current time
7149  *		to the time at which the last command completed and when the
7150  *		difference is greater than sd_pm_idletime, call
7151  *		pm_idle_component. In addition to indicating idle to the pm
7152  *		framework, update the chain type to again use the internal pm
7153  *		layers of the driver.
7154  *
7155  *   Arguments: arg - driver soft state (unit) structure
7156  *
7157  *     Context: Executes in a timeout(9F) thread context
7158  */
7159 
7160 static void
7161 sd_pm_idletimeout_handler(void *arg)
7162 {
7163 	struct sd_lun *un = arg;
7164 
7165 	time_t	now;
7166 
7167 	mutex_enter(&sd_detach_mutex);
7168 	if (un->un_detach_count != 0) {
7169 		/* Abort if the instance is detaching */
7170 		mutex_exit(&sd_detach_mutex);
7171 		return;
7172 	}
7173 	mutex_exit(&sd_detach_mutex);
7174 
7175 	now = ddi_get_time();
7176 	/*
7177 	 * Grab both mutexes, in the proper order, since we're accessing
7178 	 * both PM and softstate variables.
7179 	 */
7180 	mutex_enter(SD_MUTEX(un));
7181 	mutex_enter(&un->un_pm_mutex);
7182 	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
7183 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
7184 		/*
7185 		 * Update the chain types.
7186 		 * This takes affect on the next new command received.
7187 		 */
7188 		if (un->un_f_non_devbsize_supported) {
7189 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
7190 		} else {
7191 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
7192 		}
7193 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
7194 
7195 		SD_TRACE(SD_LOG_IO_PM, un,
7196 		    "sd_pm_idletimeout_handler: idling device\n");
7197 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7198 		un->un_pm_idle_timeid = NULL;
7199 	} else {
7200 		un->un_pm_idle_timeid =
7201 			timeout(sd_pm_idletimeout_handler, un,
7202 			(drv_usectohz((clock_t)300000))); /* 300 ms. */
7203 	}
7204 	mutex_exit(&un->un_pm_mutex);
7205 	mutex_exit(SD_MUTEX(un));
7206 }
7207 
7208 
7209 /*
7210  *    Function: sd_pm_timeout_handler
7211  *
7212  * Description: Callback to tell framework we are idle.
7213  *
7214  *     Context: timeout(9f) thread context.
7215  */
7216 
7217 static void
7218 sd_pm_timeout_handler(void *arg)
7219 {
7220 	struct sd_lun *un = arg;
7221 
7222 	(void) pm_idle_component(SD_DEVINFO(un), 0);
7223 	mutex_enter(&un->un_pm_mutex);
7224 	un->un_pm_timeid = NULL;
7225 	mutex_exit(&un->un_pm_mutex);
7226 }
7227 
7228 
7229 /*
7230  *    Function: sdpower
7231  *
7232  * Description: PM entry point.
7233  *
7234  * Return Code: DDI_SUCCESS
7235  *		DDI_FAILURE
7236  *
7237  *     Context: Kernel thread context
7238  */
7239 
7240 static int
7241 sdpower(dev_info_t *devi, int component, int level)
7242 {
7243 	struct sd_lun	*un;
7244 	int		instance;
7245 	int		rval = DDI_SUCCESS;
7246 	uint_t		i, log_page_size, maxcycles, ncycles;
7247 	uchar_t		*log_page_data;
7248 	int		log_sense_page;
7249 	int		medium_present;
7250 	time_t		intvlp;
7251 	dev_t		dev;
7252 	struct pm_trans_data	sd_pm_tran_data;
7253 	uchar_t		save_state;
7254 	int		sval;
7255 	uchar_t		state_before_pm;
7256 	int		got_semaphore_here;
7257 
7258 	instance = ddi_get_instance(devi);
7259 
7260 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
7261 	    (SD_SPINDLE_OFF > level) || (level > SD_SPINDLE_ON) ||
7262 	    component != 0) {
7263 		return (DDI_FAILURE);
7264 	}
7265 
7266 	dev = sd_make_device(SD_DEVINFO(un));
7267 
7268 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
7269 
7270 	/*
7271 	 * Must synchronize power down with close.
7272 	 * Attempt to decrement/acquire the open/close semaphore,
7273 	 * but do NOT wait on it. If it's not greater than zero,
7274 	 * ie. it can't be decremented without waiting, then
7275 	 * someone else, either open or close, already has it
7276 	 * and the try returns 0. Use that knowledge here to determine
7277 	 * if it's OK to change the device power level.
7278 	 * Also, only increment it on exit if it was decremented, ie. gotten,
7279 	 * here.
7280 	 */
7281 	got_semaphore_here = sema_tryp(&un->un_semoclose);
7282 
7283 	mutex_enter(SD_MUTEX(un));
7284 
7285 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
7286 	    un->un_ncmds_in_driver);
7287 
7288 	/*
7289 	 * If un_ncmds_in_driver is non-zero it indicates commands are
7290 	 * already being processed in the driver, or if the semaphore was
7291 	 * not gotten here it indicates an open or close is being processed.
7292 	 * At the same time somebody is requesting to go low power which
7293 	 * can't happen, therefore we need to return failure.
7294 	 */
7295 	if ((level == SD_SPINDLE_OFF) &&
7296 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
7297 		mutex_exit(SD_MUTEX(un));
7298 
7299 		if (got_semaphore_here != 0) {
7300 			sema_v(&un->un_semoclose);
7301 		}
7302 		SD_TRACE(SD_LOG_IO_PM, un,
7303 		    "sdpower: exit, device has queued cmds.\n");
7304 		return (DDI_FAILURE);
7305 	}
7306 
7307 	/*
7308 	 * if it is OFFLINE that means the disk is completely dead
7309 	 * in our case we have to put the disk in on or off by sending commands
7310 	 * Of course that will fail anyway so return back here.
7311 	 *
7312 	 * Power changes to a device that's OFFLINE or SUSPENDED
7313 	 * are not allowed.
7314 	 */
7315 	if ((un->un_state == SD_STATE_OFFLINE) ||
7316 	    (un->un_state == SD_STATE_SUSPENDED)) {
7317 		mutex_exit(SD_MUTEX(un));
7318 
7319 		if (got_semaphore_here != 0) {
7320 			sema_v(&un->un_semoclose);
7321 		}
7322 		SD_TRACE(SD_LOG_IO_PM, un,
7323 		    "sdpower: exit, device is off-line.\n");
7324 		return (DDI_FAILURE);
7325 	}
7326 
7327 	/*
7328 	 * Change the device's state to indicate it's power level
7329 	 * is being changed. Do this to prevent a power off in the
7330 	 * middle of commands, which is especially bad on devices
7331 	 * that are really powered off instead of just spun down.
7332 	 */
7333 	state_before_pm = un->un_state;
7334 	un->un_state = SD_STATE_PM_CHANGING;
7335 
7336 	mutex_exit(SD_MUTEX(un));
7337 
7338 	/*
7339 	 * If "pm-capable" property is set to TRUE by HBA drivers,
7340 	 * bypass the following checking, otherwise, check the log
7341 	 * sense information for this device
7342 	 */
7343 	if ((level == SD_SPINDLE_OFF) && un->un_f_log_sense_supported) {
7344 		/*
7345 		 * Get the log sense information to understand whether the
7346 		 * the powercycle counts have gone beyond the threshhold.
7347 		 */
7348 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
7349 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
7350 
7351 		mutex_enter(SD_MUTEX(un));
7352 		log_sense_page = un->un_start_stop_cycle_page;
7353 		mutex_exit(SD_MUTEX(un));
7354 
7355 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
7356 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
7357 #ifdef	SDDEBUG
7358 		if (sd_force_pm_supported) {
7359 			/* Force a successful result */
7360 			rval = 0;
7361 		}
7362 #endif
7363 		if (rval != 0) {
7364 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
7365 			    "Log Sense Failed\n");
7366 			kmem_free(log_page_data, log_page_size);
7367 			/* Cannot support power management on those drives */
7368 
7369 			if (got_semaphore_here != 0) {
7370 				sema_v(&un->un_semoclose);
7371 			}
7372 			/*
7373 			 * On exit put the state back to it's original value
7374 			 * and broadcast to anyone waiting for the power
7375 			 * change completion.
7376 			 */
7377 			mutex_enter(SD_MUTEX(un));
7378 			un->un_state = state_before_pm;
7379 			cv_broadcast(&un->un_suspend_cv);
7380 			mutex_exit(SD_MUTEX(un));
7381 			SD_TRACE(SD_LOG_IO_PM, un,
7382 			    "sdpower: exit, Log Sense Failed.\n");
7383 			return (DDI_FAILURE);
7384 		}
7385 
7386 		/*
7387 		 * From the page data - Convert the essential information to
7388 		 * pm_trans_data
7389 		 */
7390 		maxcycles =
7391 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
7392 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
7393 
7394 		sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
7395 
7396 		ncycles =
7397 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
7398 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
7399 
7400 		sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
7401 
7402 		for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
7403 			sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
7404 			    log_page_data[8+i];
7405 		}
7406 
7407 		kmem_free(log_page_data, log_page_size);
7408 
7409 		/*
7410 		 * Call pm_trans_check routine to get the Ok from
7411 		 * the global policy
7412 		 */
7413 
7414 		sd_pm_tran_data.format = DC_SCSI_FORMAT;
7415 		sd_pm_tran_data.un.scsi_cycles.flag = 0;
7416 
7417 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
7418 #ifdef	SDDEBUG
7419 		if (sd_force_pm_supported) {
7420 			/* Force a successful result */
7421 			rval = 1;
7422 		}
7423 #endif
7424 		switch (rval) {
7425 		case 0:
7426 			/*
7427 			 * Not Ok to Power cycle or error in parameters passed
7428 			 * Would have given the advised time to consider power
7429 			 * cycle. Based on the new intvlp parameter we are
7430 			 * supposed to pretend we are busy so that pm framework
7431 			 * will never call our power entry point. Because of
7432 			 * that install a timeout handler and wait for the
7433 			 * recommended time to elapse so that power management
7434 			 * can be effective again.
7435 			 *
7436 			 * To effect this behavior, call pm_busy_component to
7437 			 * indicate to the framework this device is busy.
7438 			 * By not adjusting un_pm_count the rest of PM in
7439 			 * the driver will function normally, and independant
7440 			 * of this but because the framework is told the device
7441 			 * is busy it won't attempt powering down until it gets
7442 			 * a matching idle. The timeout handler sends this.
7443 			 * Note: sd_pm_entry can't be called here to do this
7444 			 * because sdpower may have been called as a result
7445 			 * of a call to pm_raise_power from within sd_pm_entry.
7446 			 *
7447 			 * If a timeout handler is already active then
7448 			 * don't install another.
7449 			 */
7450 			mutex_enter(&un->un_pm_mutex);
7451 			if (un->un_pm_timeid == NULL) {
7452 				un->un_pm_timeid =
7453 				    timeout(sd_pm_timeout_handler,
7454 				    un, intvlp * drv_usectohz(1000000));
7455 				mutex_exit(&un->un_pm_mutex);
7456 				(void) pm_busy_component(SD_DEVINFO(un), 0);
7457 			} else {
7458 				mutex_exit(&un->un_pm_mutex);
7459 			}
7460 			if (got_semaphore_here != 0) {
7461 				sema_v(&un->un_semoclose);
7462 			}
7463 			/*
7464 			 * On exit put the state back to it's original value
7465 			 * and broadcast to anyone waiting for the power
7466 			 * change completion.
7467 			 */
7468 			mutex_enter(SD_MUTEX(un));
7469 			un->un_state = state_before_pm;
7470 			cv_broadcast(&un->un_suspend_cv);
7471 			mutex_exit(SD_MUTEX(un));
7472 
7473 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
7474 			    "trans check Failed, not ok to power cycle.\n");
7475 			return (DDI_FAILURE);
7476 
7477 		case -1:
7478 			if (got_semaphore_here != 0) {
7479 				sema_v(&un->un_semoclose);
7480 			}
7481 			/*
7482 			 * On exit put the state back to it's original value
7483 			 * and broadcast to anyone waiting for the power
7484 			 * change completion.
7485 			 */
7486 			mutex_enter(SD_MUTEX(un));
7487 			un->un_state = state_before_pm;
7488 			cv_broadcast(&un->un_suspend_cv);
7489 			mutex_exit(SD_MUTEX(un));
7490 			SD_TRACE(SD_LOG_IO_PM, un,
7491 			    "sdpower: exit, trans check command Failed.\n");
7492 			return (DDI_FAILURE);
7493 		}
7494 	}
7495 
7496 	if (level == SD_SPINDLE_OFF) {
7497 		/*
7498 		 * Save the last state... if the STOP FAILS we need it
7499 		 * for restoring
7500 		 */
7501 		mutex_enter(SD_MUTEX(un));
7502 		save_state = un->un_last_state;
7503 		/*
7504 		 * There must not be any cmds. getting processed
7505 		 * in the driver when we get here. Power to the
7506 		 * device is potentially going off.
7507 		 */
7508 		ASSERT(un->un_ncmds_in_driver == 0);
7509 		mutex_exit(SD_MUTEX(un));
7510 
7511 		/*
7512 		 * For now suspend the device completely before spindle is
7513 		 * turned off
7514 		 */
7515 		if ((rval = sd_ddi_pm_suspend(un)) == DDI_FAILURE) {
7516 			if (got_semaphore_here != 0) {
7517 				sema_v(&un->un_semoclose);
7518 			}
7519 			/*
7520 			 * On exit put the state back to it's original value
7521 			 * and broadcast to anyone waiting for the power
7522 			 * change completion.
7523 			 */
7524 			mutex_enter(SD_MUTEX(un));
7525 			un->un_state = state_before_pm;
7526 			cv_broadcast(&un->un_suspend_cv);
7527 			mutex_exit(SD_MUTEX(un));
7528 			SD_TRACE(SD_LOG_IO_PM, un,
7529 			    "sdpower: exit, PM suspend Failed.\n");
7530 			return (DDI_FAILURE);
7531 		}
7532 	}
7533 
7534 	/*
7535 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
7536 	 * close, or strategy. Dump no long uses this routine, it uses it's
7537 	 * own code so it can be done in polled mode.
7538 	 */
7539 
7540 	medium_present = TRUE;
7541 
7542 	/*
7543 	 * When powering up, issue a TUR in case the device is at unit
7544 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
7545 	 * a deadlock on un_pm_busy_cv will occur.
7546 	 */
7547 	if (level == SD_SPINDLE_ON) {
7548 		(void) sd_send_scsi_TEST_UNIT_READY(un,
7549 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
7550 	}
7551 
7552 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
7553 	    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
7554 
7555 	sval = sd_send_scsi_START_STOP_UNIT(un,
7556 	    ((level == SD_SPINDLE_ON) ? SD_TARGET_START : SD_TARGET_STOP),
7557 	    SD_PATH_DIRECT);
7558 	/* Command failed, check for media present. */
7559 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
7560 		medium_present = FALSE;
7561 	}
7562 
7563 	/*
7564 	 * The conditions of interest here are:
7565 	 *   if a spindle off with media present fails,
7566 	 *	then restore the state and return an error.
7567 	 *   else if a spindle on fails,
7568 	 *	then return an error (there's no state to restore).
7569 	 * In all other cases we setup for the new state
7570 	 * and return success.
7571 	 */
7572 	switch (level) {
7573 	case SD_SPINDLE_OFF:
7574 		if ((medium_present == TRUE) && (sval != 0)) {
7575 			/* The stop command from above failed */
7576 			rval = DDI_FAILURE;
7577 			/*
7578 			 * The stop command failed, and we have media
7579 			 * present. Put the level back by calling the
7580 			 * sd_pm_resume() and set the state back to
7581 			 * it's previous value.
7582 			 */
7583 			(void) sd_ddi_pm_resume(un);
7584 			mutex_enter(SD_MUTEX(un));
7585 			un->un_last_state = save_state;
7586 			mutex_exit(SD_MUTEX(un));
7587 			break;
7588 		}
7589 		/*
7590 		 * The stop command from above succeeded.
7591 		 */
7592 		if (un->un_f_monitor_media_state) {
7593 			/*
7594 			 * Terminate watch thread in case of removable media
7595 			 * devices going into low power state. This is as per
7596 			 * the requirements of pm framework, otherwise commands
7597 			 * will be generated for the device (through watch
7598 			 * thread), even when the device is in low power state.
7599 			 */
7600 			mutex_enter(SD_MUTEX(un));
7601 			un->un_f_watcht_stopped = FALSE;
7602 			if (un->un_swr_token != NULL) {
7603 				opaque_t temp_token = un->un_swr_token;
7604 				un->un_f_watcht_stopped = TRUE;
7605 				un->un_swr_token = NULL;
7606 				mutex_exit(SD_MUTEX(un));
7607 				(void) scsi_watch_request_terminate(temp_token,
7608 				    SCSI_WATCH_TERMINATE_WAIT);
7609 			} else {
7610 				mutex_exit(SD_MUTEX(un));
7611 			}
7612 		}
7613 		break;
7614 
7615 	default:	/* The level requested is spindle on... */
7616 		/*
7617 		 * Legacy behavior: return success on a failed spinup
7618 		 * if there is no media in the drive.
7619 		 * Do this by looking at medium_present here.
7620 		 */
7621 		if ((sval != 0) && medium_present) {
7622 			/* The start command from above failed */
7623 			rval = DDI_FAILURE;
7624 			break;
7625 		}
7626 		/*
7627 		 * The start command from above succeeded
7628 		 * Resume the devices now that we have
7629 		 * started the disks
7630 		 */
7631 		(void) sd_ddi_pm_resume(un);
7632 
7633 		/*
7634 		 * Resume the watch thread since it was suspended
7635 		 * when the device went into low power mode.
7636 		 */
7637 		if (un->un_f_monitor_media_state) {
7638 			mutex_enter(SD_MUTEX(un));
7639 			if (un->un_f_watcht_stopped == TRUE) {
7640 				opaque_t temp_token;
7641 
7642 				un->un_f_watcht_stopped = FALSE;
7643 				mutex_exit(SD_MUTEX(un));
7644 				temp_token = scsi_watch_request_submit(
7645 				    SD_SCSI_DEVP(un),
7646 				    sd_check_media_time,
7647 				    SENSE_LENGTH, sd_media_watch_cb,
7648 				    (caddr_t)dev);
7649 				mutex_enter(SD_MUTEX(un));
7650 				un->un_swr_token = temp_token;
7651 			}
7652 			mutex_exit(SD_MUTEX(un));
7653 		}
7654 	}
7655 	if (got_semaphore_here != 0) {
7656 		sema_v(&un->un_semoclose);
7657 	}
7658 	/*
7659 	 * On exit put the state back to it's original value
7660 	 * and broadcast to anyone waiting for the power
7661 	 * change completion.
7662 	 */
7663 	mutex_enter(SD_MUTEX(un));
7664 	un->un_state = state_before_pm;
7665 	cv_broadcast(&un->un_suspend_cv);
7666 	mutex_exit(SD_MUTEX(un));
7667 
7668 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
7669 
7670 	return (rval);
7671 }
7672 
7673 
7674 
7675 /*
7676  *    Function: sdattach
7677  *
7678  * Description: Driver's attach(9e) entry point function.
7679  *
7680  *   Arguments: devi - opaque device info handle
7681  *		cmd  - attach  type
7682  *
7683  * Return Code: DDI_SUCCESS
7684  *		DDI_FAILURE
7685  *
7686  *     Context: Kernel thread context
7687  */
7688 
7689 static int
7690 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
7691 {
7692 	switch (cmd) {
7693 	case DDI_ATTACH:
7694 		return (sd_unit_attach(devi));
7695 	case DDI_RESUME:
7696 		return (sd_ddi_resume(devi));
7697 	default:
7698 		break;
7699 	}
7700 	return (DDI_FAILURE);
7701 }
7702 
7703 
7704 /*
7705  *    Function: sddetach
7706  *
7707  * Description: Driver's detach(9E) entry point function.
7708  *
7709  *   Arguments: devi - opaque device info handle
7710  *		cmd  - detach  type
7711  *
7712  * Return Code: DDI_SUCCESS
7713  *		DDI_FAILURE
7714  *
7715  *     Context: Kernel thread context
7716  */
7717 
7718 static int
7719 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
7720 {
7721 	switch (cmd) {
7722 	case DDI_DETACH:
7723 		return (sd_unit_detach(devi));
7724 	case DDI_SUSPEND:
7725 		return (sd_ddi_suspend(devi));
7726 	default:
7727 		break;
7728 	}
7729 	return (DDI_FAILURE);
7730 }
7731 
7732 
7733 /*
7734  *     Function: sd_sync_with_callback
7735  *
7736  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
7737  *		 state while the callback routine is active.
7738  *
7739  *    Arguments: un: softstate structure for the instance
7740  *
7741  *	Context: Kernel thread context
7742  */
7743 
7744 static void
7745 sd_sync_with_callback(struct sd_lun *un)
7746 {
7747 	ASSERT(un != NULL);
7748 
7749 	mutex_enter(SD_MUTEX(un));
7750 
7751 	ASSERT(un->un_in_callback >= 0);
7752 
7753 	while (un->un_in_callback > 0) {
7754 		mutex_exit(SD_MUTEX(un));
7755 		delay(2);
7756 		mutex_enter(SD_MUTEX(un));
7757 	}
7758 
7759 	mutex_exit(SD_MUTEX(un));
7760 }
7761 
7762 /*
7763  *    Function: sd_unit_attach
7764  *
7765  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
7766  *		the soft state structure for the device and performs
7767  *		all necessary structure and device initializations.
7768  *
7769  *   Arguments: devi: the system's dev_info_t for the device.
7770  *
7771  * Return Code: DDI_SUCCESS if attach is successful.
7772  *		DDI_FAILURE if any part of the attach fails.
7773  *
7774  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
7775  *		Kernel thread context only.  Can sleep.
7776  */
7777 
7778 static int
7779 sd_unit_attach(dev_info_t *devi)
7780 {
7781 	struct	scsi_device	*devp;
7782 	struct	sd_lun		*un;
7783 	char			*variantp;
7784 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
7785 	int	instance;
7786 	int	rval;
7787 	int	wc_enabled;
7788 	uint64_t	capacity;
7789 	uint_t		lbasize;
7790 
7791 	/*
7792 	 * Retrieve the target driver's private data area. This was set
7793 	 * up by the HBA.
7794 	 */
7795 	devp = ddi_get_driver_private(devi);
7796 
7797 	/*
7798 	 * Since we have no idea what state things were left in by the last
7799 	 * user of the device, set up some 'default' settings, ie. turn 'em
7800 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
7801 	 * Do this before the scsi_probe, which sends an inquiry.
7802 	 * This is a fix for bug (4430280).
7803 	 * Of special importance is wide-xfer. The drive could have been left
7804 	 * in wide transfer mode by the last driver to communicate with it,
7805 	 * this includes us. If that's the case, and if the following is not
7806 	 * setup properly or we don't re-negotiate with the drive prior to
7807 	 * transferring data to/from the drive, it causes bus parity errors,
7808 	 * data overruns, and unexpected interrupts. This first occurred when
7809 	 * the fix for bug (4378686) was made.
7810 	 */
7811 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
7812 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
7813 	(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
7814 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
7815 
7816 	/*
7817 	 * Use scsi_probe() to issue an INQUIRY command to the device.
7818 	 * This call will allocate and fill in the scsi_inquiry structure
7819 	 * and point the sd_inq member of the scsi_device structure to it.
7820 	 * If the attach succeeds, then this memory will not be de-allocated
7821 	 * (via scsi_unprobe()) until the instance is detached.
7822 	 */
7823 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
7824 		goto probe_failed;
7825 	}
7826 
7827 	/*
7828 	 * Check the device type as specified in the inquiry data and
7829 	 * claim it if it is of a type that we support.
7830 	 */
7831 	switch (devp->sd_inq->inq_dtype) {
7832 	case DTYPE_DIRECT:
7833 		break;
7834 	case DTYPE_RODIRECT:
7835 		break;
7836 	case DTYPE_OPTICAL:
7837 		break;
7838 	case DTYPE_NOTPRESENT:
7839 	default:
7840 		/* Unsupported device type; fail the attach. */
7841 		goto probe_failed;
7842 	}
7843 
7844 	/*
7845 	 * Allocate the soft state structure for this unit.
7846 	 *
7847 	 * We rely upon this memory being set to all zeroes by
7848 	 * ddi_soft_state_zalloc().  We assume that any member of the
7849 	 * soft state structure that is not explicitly initialized by
7850 	 * this routine will have a value of zero.
7851 	 */
7852 	instance = ddi_get_instance(devp->sd_dev);
7853 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
7854 		goto probe_failed;
7855 	}
7856 
7857 	/*
7858 	 * Retrieve a pointer to the newly-allocated soft state.
7859 	 *
7860 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
7861 	 * was successful, unless something has gone horribly wrong and the
7862 	 * ddi's soft state internals are corrupt (in which case it is
7863 	 * probably better to halt here than just fail the attach....)
7864 	 */
7865 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
7866 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
7867 		    instance);
7868 		/*NOTREACHED*/
7869 	}
7870 
7871 	/*
7872 	 * Link the back ptr of the driver soft state to the scsi_device
7873 	 * struct for this lun.
7874 	 * Save a pointer to the softstate in the driver-private area of
7875 	 * the scsi_device struct.
7876 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
7877 	 * we first set un->un_sd below.
7878 	 */
7879 	un->un_sd = devp;
7880 	devp->sd_private = (opaque_t)un;
7881 
7882 	/*
7883 	 * The following must be after devp is stored in the soft state struct.
7884 	 */
7885 #ifdef SDDEBUG
7886 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7887 	    "%s_unit_attach: un:0x%p instance:%d\n",
7888 	    ddi_driver_name(devi), un, instance);
7889 #endif
7890 
7891 	/*
7892 	 * Set up the device type and node type (for the minor nodes).
7893 	 * By default we assume that the device can at least support the
7894 	 * Common Command Set. Call it a CD-ROM if it reports itself
7895 	 * as a RODIRECT device.
7896 	 */
7897 	switch (devp->sd_inq->inq_dtype) {
7898 	case DTYPE_RODIRECT:
7899 		un->un_node_type = DDI_NT_CD_CHAN;
7900 		un->un_ctype	 = CTYPE_CDROM;
7901 		break;
7902 	case DTYPE_OPTICAL:
7903 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7904 		un->un_ctype	 = CTYPE_ROD;
7905 		break;
7906 	default:
7907 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7908 		un->un_ctype	 = CTYPE_CCS;
7909 		break;
7910 	}
7911 
7912 	/*
7913 	 * Try to read the interconnect type from the HBA.
7914 	 *
7915 	 * Note: This driver is currently compiled as two binaries, a parallel
7916 	 * scsi version (sd) and a fibre channel version (ssd). All functional
7917 	 * differences are determined at compile time. In the future a single
7918 	 * binary will be provided and the inteconnect type will be used to
7919 	 * differentiate between fibre and parallel scsi behaviors. At that time
7920 	 * it will be necessary for all fibre channel HBAs to support this
7921 	 * property.
7922 	 *
7923 	 * set un_f_is_fiber to TRUE ( default fiber )
7924 	 */
7925 	un->un_f_is_fibre = TRUE;
7926 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
7927 	case INTERCONNECT_SSA:
7928 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
7929 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7930 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
7931 		break;
7932 	case INTERCONNECT_PARALLEL:
7933 		un->un_f_is_fibre = FALSE;
7934 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7935 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7936 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
7937 		break;
7938 	case INTERCONNECT_SATA:
7939 		un->un_f_is_fibre = FALSE;
7940 		un->un_interconnect_type = SD_INTERCONNECT_SATA;
7941 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7942 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
7943 		break;
7944 	case INTERCONNECT_FIBRE:
7945 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
7946 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7947 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
7948 		break;
7949 	case INTERCONNECT_FABRIC:
7950 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
7951 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
7952 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7953 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
7954 		break;
7955 	default:
7956 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
7957 		/*
7958 		 * The HBA does not support the "interconnect-type" property
7959 		 * (or did not provide a recognized type).
7960 		 *
7961 		 * Note: This will be obsoleted when a single fibre channel
7962 		 * and parallel scsi driver is delivered. In the meantime the
7963 		 * interconnect type will be set to the platform default.If that
7964 		 * type is not parallel SCSI, it means that we should be
7965 		 * assuming "ssd" semantics. However, here this also means that
7966 		 * the FC HBA is not supporting the "interconnect-type" property
7967 		 * like we expect it to, so log this occurrence.
7968 		 */
7969 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
7970 		if (!SD_IS_PARALLEL_SCSI(un)) {
7971 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7972 			    "sd_unit_attach: un:0x%p Assuming "
7973 			    "INTERCONNECT_FIBRE\n", un);
7974 		} else {
7975 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7976 			    "sd_unit_attach: un:0x%p Assuming "
7977 			    "INTERCONNECT_PARALLEL\n", un);
7978 			un->un_f_is_fibre = FALSE;
7979 		}
7980 #else
7981 		/*
7982 		 * Note: This source will be implemented when a single fibre
7983 		 * channel and parallel scsi driver is delivered. The default
7984 		 * will be to assume that if a device does not support the
7985 		 * "interconnect-type" property it is a parallel SCSI HBA and
7986 		 * we will set the interconnect type for parallel scsi.
7987 		 */
7988 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7989 		un->un_f_is_fibre = FALSE;
7990 #endif
7991 		break;
7992 	}
7993 
7994 	if (un->un_f_is_fibre == TRUE) {
7995 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
7996 			SCSI_VERSION_3) {
7997 			switch (un->un_interconnect_type) {
7998 			case SD_INTERCONNECT_FIBRE:
7999 			case SD_INTERCONNECT_SSA:
8000 				un->un_node_type = DDI_NT_BLOCK_WWN;
8001 				break;
8002 			default:
8003 				break;
8004 			}
8005 		}
8006 	}
8007 
8008 	/*
8009 	 * Initialize the Request Sense command for the target
8010 	 */
8011 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
8012 		goto alloc_rqs_failed;
8013 	}
8014 
8015 	/*
8016 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
8017 	 * with seperate binary for sd and ssd.
8018 	 *
8019 	 * x86 has 1 binary, un_retry_count is set base on connection type.
8020 	 * The hardcoded values will go away when Sparc uses 1 binary
8021 	 * for sd and ssd.  This hardcoded values need to match
8022 	 * SD_RETRY_COUNT in sddef.h
8023 	 * The value used is base on interconnect type.
8024 	 * fibre = 3, parallel = 5
8025 	 */
8026 #if defined(__i386) || defined(__amd64)
8027 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
8028 #else
8029 	un->un_retry_count = SD_RETRY_COUNT;
8030 #endif
8031 
8032 	/*
8033 	 * Set the per disk retry count to the default number of retries
8034 	 * for disks and CDROMs. This value can be overridden by the
8035 	 * disk property list or an entry in sd.conf.
8036 	 */
8037 	un->un_notready_retry_count =
8038 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
8039 			: DISK_NOT_READY_RETRY_COUNT(un);
8040 
8041 	/*
8042 	 * Set the busy retry count to the default value of un_retry_count.
8043 	 * This can be overridden by entries in sd.conf or the device
8044 	 * config table.
8045 	 */
8046 	un->un_busy_retry_count = un->un_retry_count;
8047 
8048 	/*
8049 	 * Init the reset threshold for retries.  This number determines
8050 	 * how many retries must be performed before a reset can be issued
8051 	 * (for certain error conditions). This can be overridden by entries
8052 	 * in sd.conf or the device config table.
8053 	 */
8054 	un->un_reset_retry_count = (un->un_retry_count / 2);
8055 
8056 	/*
8057 	 * Set the victim_retry_count to the default un_retry_count
8058 	 */
8059 	un->un_victim_retry_count = (2 * un->un_retry_count);
8060 
8061 	/*
8062 	 * Set the reservation release timeout to the default value of
8063 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
8064 	 * device config table.
8065 	 */
8066 	un->un_reserve_release_time = 5;
8067 
8068 	/*
8069 	 * Set up the default maximum transfer size. Note that this may
8070 	 * get updated later in the attach, when setting up default wide
8071 	 * operations for disks.
8072 	 */
8073 #if defined(__i386) || defined(__amd64)
8074 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
8075 #else
8076 	un->un_max_xfer_size = (uint_t)maxphys;
8077 #endif
8078 
8079 	/*
8080 	 * Get "allow bus device reset" property (defaults to "enabled" if
8081 	 * the property was not defined). This is to disable bus resets for
8082 	 * certain kinds of error recovery. Note: In the future when a run-time
8083 	 * fibre check is available the soft state flag should default to
8084 	 * enabled.
8085 	 */
8086 	if (un->un_f_is_fibre == TRUE) {
8087 		un->un_f_allow_bus_device_reset = TRUE;
8088 	} else {
8089 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
8090 			"allow-bus-device-reset", 1) != 0) {
8091 			un->un_f_allow_bus_device_reset = TRUE;
8092 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8093 			"sd_unit_attach: un:0x%p Bus device reset enabled\n",
8094 				un);
8095 		} else {
8096 			un->un_f_allow_bus_device_reset = FALSE;
8097 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8098 			"sd_unit_attach: un:0x%p Bus device reset disabled\n",
8099 				un);
8100 		}
8101 	}
8102 
8103 	/*
8104 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
8105 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
8106 	 *
8107 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
8108 	 * property. The new "variant" property with a value of "atapi" has been
8109 	 * introduced so that future 'variants' of standard SCSI behavior (like
8110 	 * atapi) could be specified by the underlying HBA drivers by supplying
8111 	 * a new value for the "variant" property, instead of having to define a
8112 	 * new property.
8113 	 */
8114 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
8115 		un->un_f_cfg_is_atapi = TRUE;
8116 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8117 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
8118 	}
8119 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
8120 	    &variantp) == DDI_PROP_SUCCESS) {
8121 		if (strcmp(variantp, "atapi") == 0) {
8122 			un->un_f_cfg_is_atapi = TRUE;
8123 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8124 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
8125 		}
8126 		ddi_prop_free(variantp);
8127 	}
8128 
8129 	un->un_cmd_timeout	= SD_IO_TIME;
8130 
8131 	/* Info on current states, statuses, etc. (Updated frequently) */
8132 	un->un_state		= SD_STATE_NORMAL;
8133 	un->un_last_state	= SD_STATE_NORMAL;
8134 
8135 	/* Control & status info for command throttling */
8136 	un->un_throttle		= sd_max_throttle;
8137 	un->un_saved_throttle	= sd_max_throttle;
8138 	un->un_min_throttle	= sd_min_throttle;
8139 
8140 	if (un->un_f_is_fibre == TRUE) {
8141 		un->un_f_use_adaptive_throttle = TRUE;
8142 	} else {
8143 		un->un_f_use_adaptive_throttle = FALSE;
8144 	}
8145 
8146 	/* Removable media support. */
8147 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
8148 	un->un_mediastate		= DKIO_NONE;
8149 	un->un_specified_mediastate	= DKIO_NONE;
8150 
8151 	/* CVs for suspend/resume (PM or DR) */
8152 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
8153 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
8154 
8155 	/* Power management support. */
8156 	un->un_power_level = SD_SPINDLE_UNINIT;
8157 
8158 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
8159 	un->un_f_wcc_inprog = 0;
8160 
8161 	/*
8162 	 * The open/close semaphore is used to serialize threads executing
8163 	 * in the driver's open & close entry point routines for a given
8164 	 * instance.
8165 	 */
8166 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
8167 
8168 	/*
8169 	 * The conf file entry and softstate variable is a forceful override,
8170 	 * meaning a non-zero value must be entered to change the default.
8171 	 */
8172 	un->un_f_disksort_disabled = FALSE;
8173 
8174 	/*
8175 	 * Retrieve the properties from the static driver table or the driver
8176 	 * configuration file (.conf) for this unit and update the soft state
8177 	 * for the device as needed for the indicated properties.
8178 	 * Note: the property configuration needs to occur here as some of the
8179 	 * following routines may have dependancies on soft state flags set
8180 	 * as part of the driver property configuration.
8181 	 */
8182 	sd_read_unit_properties(un);
8183 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8184 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
8185 
8186 	/*
8187 	 * Only if a device has "hotpluggable" property, it is
8188 	 * treated as hotpluggable device. Otherwise, it is
8189 	 * regarded as non-hotpluggable one.
8190 	 */
8191 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
8192 	    -1) != -1) {
8193 		un->un_f_is_hotpluggable = TRUE;
8194 	}
8195 
8196 	/*
8197 	 * set unit's attributes(flags) according to "hotpluggable" and
8198 	 * RMB bit in INQUIRY data.
8199 	 */
8200 	sd_set_unit_attributes(un, devi);
8201 
8202 	/*
8203 	 * By default, we mark the capacity, lbasize, and geometry
8204 	 * as invalid. Only if we successfully read a valid capacity
8205 	 * will we update the un_blockcount and un_tgt_blocksize with the
8206 	 * valid values (the geometry will be validated later).
8207 	 */
8208 	un->un_f_blockcount_is_valid	= FALSE;
8209 	un->un_f_tgt_blocksize_is_valid	= FALSE;
8210 	un->un_f_geometry_is_valid	= FALSE;
8211 
8212 	/*
8213 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
8214 	 * otherwise.
8215 	 */
8216 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
8217 	un->un_blockcount = 0;
8218 
8219 	/*
8220 	 * Set up the per-instance info needed to determine the correct
8221 	 * CDBs and other info for issuing commands to the target.
8222 	 */
8223 	sd_init_cdb_limits(un);
8224 
8225 	/*
8226 	 * Set up the IO chains to use, based upon the target type.
8227 	 */
8228 	if (un->un_f_non_devbsize_supported) {
8229 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
8230 	} else {
8231 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
8232 	}
8233 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
8234 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
8235 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
8236 
8237 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
8238 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
8239 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
8240 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
8241 
8242 
8243 	if (ISCD(un)) {
8244 		un->un_additional_codes = sd_additional_codes;
8245 	} else {
8246 		un->un_additional_codes = NULL;
8247 	}
8248 
8249 	/*
8250 	 * Create the kstats here so they can be available for attach-time
8251 	 * routines that send commands to the unit (either polled or via
8252 	 * sd_send_scsi_cmd).
8253 	 *
8254 	 * Note: This is a critical sequence that needs to be maintained:
8255 	 *	1) Instantiate the kstats here, before any routines using the
8256 	 *	   iopath (i.e. sd_send_scsi_cmd).
8257 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8258 	 *	   stats (sd_set_pstats), following sd_validate_geometry(),
8259 	 *	   sd_register_devid(), and sd_cache_control().
8260 	 */
8261 
8262 	un->un_stats = kstat_create(sd_label, instance,
8263 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
8264 	if (un->un_stats != NULL) {
8265 		un->un_stats->ks_lock = SD_MUTEX(un);
8266 		kstat_install(un->un_stats);
8267 	}
8268 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8269 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
8270 
8271 	sd_create_errstats(un, instance);
8272 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8273 	    "sd_unit_attach: un:0x%p errstats created\n", un);
8274 
8275 	/*
8276 	 * The following if/else code was relocated here from below as part
8277 	 * of the fix for bug (4430280). However with the default setup added
8278 	 * on entry to this routine, it's no longer absolutely necessary for
8279 	 * this to be before the call to sd_spin_up_unit.
8280 	 */
8281 	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
8282 		/*
8283 		 * If SCSI-2 tagged queueing is supported by the target
8284 		 * and by the host adapter then we will enable it.
8285 		 */
8286 		un->un_tagflags = 0;
8287 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8288 		    (devp->sd_inq->inq_cmdque) &&
8289 		    (un->un_f_arq_enabled == TRUE)) {
8290 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
8291 			    1, 1) == 1) {
8292 				un->un_tagflags = FLAG_STAG;
8293 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8294 				    "sd_unit_attach: un:0x%p tag queueing "
8295 				    "enabled\n", un);
8296 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
8297 			    "untagged-qing", 0) == 1) {
8298 				un->un_f_opt_queueing = TRUE;
8299 				un->un_saved_throttle = un->un_throttle =
8300 				    min(un->un_throttle, 3);
8301 			} else {
8302 				un->un_f_opt_queueing = FALSE;
8303 				un->un_saved_throttle = un->un_throttle = 1;
8304 			}
8305 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
8306 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
8307 			/* The Host Adapter supports internal queueing. */
8308 			un->un_f_opt_queueing = TRUE;
8309 			un->un_saved_throttle = un->un_throttle =
8310 			    min(un->un_throttle, 3);
8311 		} else {
8312 			un->un_f_opt_queueing = FALSE;
8313 			un->un_saved_throttle = un->un_throttle = 1;
8314 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8315 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
8316 		}
8317 
8318 		/*
8319 		 * Enable large transfers for SATA/SAS drives
8320 		 */
8321 		if (SD_IS_SERIAL(un)) {
8322 			un->un_max_xfer_size =
8323 			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8324 			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
8325 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8326 			    "sd_unit_attach: un:0x%p max transfer "
8327 			    "size=0x%x\n", un, un->un_max_xfer_size);
8328 
8329 		}
8330 
8331 		/* Setup or tear down default wide operations for disks */
8332 
8333 		/*
8334 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
8335 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
8336 		 * system and be set to different values. In the future this
8337 		 * code may need to be updated when the ssd module is
8338 		 * obsoleted and removed from the system. (4299588)
8339 		 */
8340 		if (SD_IS_PARALLEL_SCSI(un) &&
8341 		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8342 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
8343 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8344 			    1, 1) == 1) {
8345 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8346 				    "sd_unit_attach: un:0x%p Wide Transfer "
8347 				    "enabled\n", un);
8348 			}
8349 
8350 			/*
8351 			 * If tagged queuing has also been enabled, then
8352 			 * enable large xfers
8353 			 */
8354 			if (un->un_saved_throttle == sd_max_throttle) {
8355 				un->un_max_xfer_size =
8356 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8357 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
8358 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8359 				    "sd_unit_attach: un:0x%p max transfer "
8360 				    "size=0x%x\n", un, un->un_max_xfer_size);
8361 			}
8362 		} else {
8363 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8364 			    0, 1) == 1) {
8365 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8366 				    "sd_unit_attach: un:0x%p "
8367 				    "Wide Transfer disabled\n", un);
8368 			}
8369 		}
8370 	} else {
8371 		un->un_tagflags = FLAG_STAG;
8372 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
8373 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
8374 	}
8375 
8376 	/*
8377 	 * If this target supports LUN reset, try to enable it.
8378 	 */
8379 	if (un->un_f_lun_reset_enabled) {
8380 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
8381 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8382 			    "un:0x%p lun_reset capability set\n", un);
8383 		} else {
8384 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8385 			    "un:0x%p lun-reset capability not set\n", un);
8386 		}
8387 	}
8388 
8389 	/*
8390 	 * At this point in the attach, we have enough info in the
8391 	 * soft state to be able to issue commands to the target.
8392 	 *
8393 	 * All command paths used below MUST issue their commands as
8394 	 * SD_PATH_DIRECT. This is important as intermediate layers
8395 	 * are not all initialized yet (such as PM).
8396 	 */
8397 
8398 	/*
8399 	 * Send a TEST UNIT READY command to the device. This should clear
8400 	 * any outstanding UNIT ATTENTION that may be present.
8401 	 *
8402 	 * Note: Don't check for success, just track if there is a reservation,
8403 	 * this is a throw away command to clear any unit attentions.
8404 	 *
8405 	 * Note: This MUST be the first command issued to the target during
8406 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
8407 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
8408 	 * with attempts at spinning up a device with no media.
8409 	 */
8410 	if (sd_send_scsi_TEST_UNIT_READY(un, SD_DONT_RETRY_TUR) == EACCES) {
8411 		reservation_flag = SD_TARGET_IS_RESERVED;
8412 	}
8413 
8414 	/*
8415 	 * If the device is NOT a removable media device, attempt to spin
8416 	 * it up (using the START_STOP_UNIT command) and read its capacity
8417 	 * (using the READ CAPACITY command).  Note, however, that either
8418 	 * of these could fail and in some cases we would continue with
8419 	 * the attach despite the failure (see below).
8420 	 */
8421 	if (un->un_f_descr_format_supported) {
8422 		switch (sd_spin_up_unit(un)) {
8423 		case 0:
8424 			/*
8425 			 * Spin-up was successful; now try to read the
8426 			 * capacity.  If successful then save the results
8427 			 * and mark the capacity & lbasize as valid.
8428 			 */
8429 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8430 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
8431 
8432 			switch (sd_send_scsi_READ_CAPACITY(un, &capacity,
8433 			    &lbasize, SD_PATH_DIRECT)) {
8434 			case 0: {
8435 				if (capacity > DK_MAX_BLOCKS) {
8436 #ifdef _LP64
8437 					if (capacity + 1 >
8438 					    SD_GROUP1_MAX_ADDRESS) {
8439 						/*
8440 						 * Enable descriptor format
8441 						 * sense data so that we can
8442 						 * get 64 bit sense data
8443 						 * fields.
8444 						 */
8445 						sd_enable_descr_sense(un);
8446 					}
8447 #else
8448 					/* 32-bit kernels can't handle this */
8449 					scsi_log(SD_DEVINFO(un),
8450 					    sd_label, CE_WARN,
8451 					    "disk has %llu blocks, which "
8452 					    "is too large for a 32-bit "
8453 					    "kernel", capacity);
8454 
8455 #if defined(__i386) || defined(__amd64)
8456 					/*
8457 					 * Refer to comments related to off-by-1
8458 					 * at the header of this file.
8459 					 * 1TB disk was treated as (1T - 512)B
8460 					 * in the past, so that it might has
8461 					 * valid VTOC and solaris partitions,
8462 					 * we have to allow it to continue to
8463 					 * work.
8464 					 */
8465 					if (capacity -1 > DK_MAX_BLOCKS)
8466 #endif
8467 					goto spinup_failed;
8468 #endif
8469 				}
8470 
8471 				/*
8472 				 * Here it's not necessary to check the case:
8473 				 * the capacity of the device is bigger than
8474 				 * what the max hba cdb can support. Because
8475 				 * sd_send_scsi_READ_CAPACITY will retrieve
8476 				 * the capacity by sending USCSI command, which
8477 				 * is constrained by the max hba cdb. Actually,
8478 				 * sd_send_scsi_READ_CAPACITY will return
8479 				 * EINVAL when using bigger cdb than required
8480 				 * cdb length. Will handle this case in
8481 				 * "case EINVAL".
8482 				 */
8483 
8484 				/*
8485 				 * The following relies on
8486 				 * sd_send_scsi_READ_CAPACITY never
8487 				 * returning 0 for capacity and/or lbasize.
8488 				 */
8489 				sd_update_block_info(un, lbasize, capacity);
8490 
8491 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8492 				    "sd_unit_attach: un:0x%p capacity = %ld "
8493 				    "blocks; lbasize= %ld.\n", un,
8494 				    un->un_blockcount, un->un_tgt_blocksize);
8495 
8496 				break;
8497 			}
8498 			case EINVAL:
8499 				/*
8500 				 * In the case where the max-cdb-length property
8501 				 * is smaller than the required CDB length for
8502 				 * a SCSI device, a target driver can fail to
8503 				 * attach to that device.
8504 				 */
8505 				scsi_log(SD_DEVINFO(un),
8506 				    sd_label, CE_WARN,
8507 				    "disk capacity is too large "
8508 				    "for current cdb length");
8509 				goto spinup_failed;
8510 			case EACCES:
8511 				/*
8512 				 * Should never get here if the spin-up
8513 				 * succeeded, but code it in anyway.
8514 				 * From here, just continue with the attach...
8515 				 */
8516 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8517 				    "sd_unit_attach: un:0x%p "
8518 				    "sd_send_scsi_READ_CAPACITY "
8519 				    "returned reservation conflict\n", un);
8520 				reservation_flag = SD_TARGET_IS_RESERVED;
8521 				break;
8522 			default:
8523 				/*
8524 				 * Likewise, should never get here if the
8525 				 * spin-up succeeded. Just continue with
8526 				 * the attach...
8527 				 */
8528 				break;
8529 			}
8530 			break;
8531 		case EACCES:
8532 			/*
8533 			 * Device is reserved by another host.  In this case
8534 			 * we could not spin it up or read the capacity, but
8535 			 * we continue with the attach anyway.
8536 			 */
8537 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8538 			    "sd_unit_attach: un:0x%p spin-up reservation "
8539 			    "conflict.\n", un);
8540 			reservation_flag = SD_TARGET_IS_RESERVED;
8541 			break;
8542 		default:
8543 			/* Fail the attach if the spin-up failed. */
8544 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8545 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
8546 			goto spinup_failed;
8547 		}
8548 	}
8549 
8550 	/*
8551 	 * Check to see if this is a MMC drive
8552 	 */
8553 	if (ISCD(un)) {
8554 		sd_set_mmc_caps(un);
8555 	}
8556 
8557 	/*
8558 	 * Create the minor nodes for the device.
8559 	 * Note: If we want to support fdisk on both sparc and intel, this will
8560 	 * have to separate out the notion that VTOC8 is always sparc, and
8561 	 * VTOC16 is always intel (tho these can be the defaults).  The vtoc
8562 	 * type will have to be determined at run-time, and the fdisk
8563 	 * partitioning will have to have been read & set up before we
8564 	 * create the minor nodes. (any other inits (such as kstats) that
8565 	 * also ought to be done before creating the minor nodes?) (Doesn't
8566 	 * setting up the minor nodes kind of imply that we're ready to
8567 	 * handle an open from userland?)
8568 	 */
8569 	if (sd_create_minor_nodes(un, devi) != DDI_SUCCESS) {
8570 		goto create_minor_nodes_failed;
8571 	}
8572 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8573 	    "sd_unit_attach: un:0x%p minor nodes created\n", un);
8574 
8575 	/*
8576 	 * Add a zero-length attribute to tell the world we support
8577 	 * kernel ioctls (for layered drivers)
8578 	 */
8579 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8580 	    DDI_KERNEL_IOCTL, NULL, 0);
8581 
8582 	/*
8583 	 * Add a boolean property to tell the world we support
8584 	 * the B_FAILFAST flag (for layered drivers)
8585 	 */
8586 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8587 	    "ddi-failfast-supported", NULL, 0);
8588 
8589 	/*
8590 	 * Initialize power management
8591 	 */
8592 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
8593 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
8594 	sd_setup_pm(un, devi);
8595 	if (un->un_f_pm_is_enabled == FALSE) {
8596 		/*
8597 		 * For performance, point to a jump table that does
8598 		 * not include pm.
8599 		 * The direct and priority chains don't change with PM.
8600 		 *
8601 		 * Note: this is currently done based on individual device
8602 		 * capabilities. When an interface for determining system
8603 		 * power enabled state becomes available, or when additional
8604 		 * layers are added to the command chain, these values will
8605 		 * have to be re-evaluated for correctness.
8606 		 */
8607 		if (un->un_f_non_devbsize_supported) {
8608 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
8609 		} else {
8610 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
8611 		}
8612 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8613 	}
8614 
8615 	/*
8616 	 * This property is set to 0 by HA software to avoid retries
8617 	 * on a reserved disk. (The preferred property name is
8618 	 * "retry-on-reservation-conflict") (1189689)
8619 	 *
8620 	 * Note: The use of a global here can have unintended consequences. A
8621 	 * per instance variable is preferrable to match the capabilities of
8622 	 * different underlying hba's (4402600)
8623 	 */
8624 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
8625 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
8626 	    sd_retry_on_reservation_conflict);
8627 	if (sd_retry_on_reservation_conflict != 0) {
8628 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
8629 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
8630 		    sd_retry_on_reservation_conflict);
8631 	}
8632 
8633 	/* Set up options for QFULL handling. */
8634 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8635 	    "qfull-retries", -1)) != -1) {
8636 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
8637 		    rval, 1);
8638 	}
8639 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8640 	    "qfull-retry-interval", -1)) != -1) {
8641 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
8642 		    rval, 1);
8643 	}
8644 
8645 	/*
8646 	 * This just prints a message that announces the existence of the
8647 	 * device. The message is always printed in the system logfile, but
8648 	 * only appears on the console if the system is booted with the
8649 	 * -v (verbose) argument.
8650 	 */
8651 	ddi_report_dev(devi);
8652 
8653 	/*
8654 	 * The framework calls driver attach routines single-threaded
8655 	 * for a given instance.  However we still acquire SD_MUTEX here
8656 	 * because this required for calling the sd_validate_geometry()
8657 	 * and sd_register_devid() functions.
8658 	 */
8659 	mutex_enter(SD_MUTEX(un));
8660 	un->un_f_geometry_is_valid = FALSE;
8661 	un->un_mediastate = DKIO_NONE;
8662 	un->un_reserved = -1;
8663 
8664 	/*
8665 	 * Read and validate the device's geometry (ie, disk label)
8666 	 * A new unformatted drive will not have a valid geometry, but
8667 	 * the driver needs to successfully attach to this device so
8668 	 * the drive can be formatted via ioctls.
8669 	 */
8670 	if (((sd_validate_geometry(un, SD_PATH_DIRECT) ==
8671 	    ENOTSUP)) &&
8672 	    (un->un_blockcount < DK_MAX_BLOCKS)) {
8673 		/*
8674 		 * We found a small disk with an EFI label on it;
8675 		 * we need to fix up the minor nodes accordingly.
8676 		 */
8677 		ddi_remove_minor_node(devi, "h");
8678 		ddi_remove_minor_node(devi, "h,raw");
8679 		(void) ddi_create_minor_node(devi, "wd",
8680 		    S_IFBLK,
8681 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8682 		    un->un_node_type, NULL);
8683 		(void) ddi_create_minor_node(devi, "wd,raw",
8684 		    S_IFCHR,
8685 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8686 		    un->un_node_type, NULL);
8687 	}
8688 #if defined(__i386) || defined(__amd64)
8689 	else if (un->un_f_capacity_adjusted == 1) {
8690 		/*
8691 		 * Refer to comments related to off-by-1 at the
8692 		 * header of this file.
8693 		 * Adjust minor node for 1TB disk.
8694 		 */
8695 		ddi_remove_minor_node(devi, "wd");
8696 		ddi_remove_minor_node(devi, "wd,raw");
8697 		(void) ddi_create_minor_node(devi, "h",
8698 		    S_IFBLK,
8699 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8700 		    un->un_node_type, NULL);
8701 		(void) ddi_create_minor_node(devi, "h,raw",
8702 		    S_IFCHR,
8703 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8704 		    un->un_node_type, NULL);
8705 	}
8706 #endif
8707 	/*
8708 	 * Read and initialize the devid for the unit.
8709 	 */
8710 	ASSERT(un->un_errstats != NULL);
8711 	if (un->un_f_devid_supported) {
8712 		sd_register_devid(un, devi, reservation_flag);
8713 	}
8714 	mutex_exit(SD_MUTEX(un));
8715 
8716 #if (defined(__fibre))
8717 	/*
8718 	 * Register callbacks for fibre only.  You can't do this soley
8719 	 * on the basis of the devid_type because this is hba specific.
8720 	 * We need to query our hba capabilities to find out whether to
8721 	 * register or not.
8722 	 */
8723 	if (un->un_f_is_fibre) {
8724 	    if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
8725 		sd_init_event_callbacks(un);
8726 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8727 		    "sd_unit_attach: un:0x%p event callbacks inserted", un);
8728 	    }
8729 	}
8730 #endif
8731 
8732 	if (un->un_f_opt_disable_cache == TRUE) {
8733 		/*
8734 		 * Disable both read cache and write cache.  This is
8735 		 * the historic behavior of the keywords in the config file.
8736 		 */
8737 		if (sd_cache_control(un, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
8738 		    0) {
8739 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8740 			    "sd_unit_attach: un:0x%p Could not disable "
8741 			    "caching", un);
8742 			goto devid_failed;
8743 		}
8744 	}
8745 
8746 	/*
8747 	 * Check the value of the WCE bit now and
8748 	 * set un_f_write_cache_enabled accordingly.
8749 	 */
8750 	(void) sd_get_write_cache_enabled(un, &wc_enabled);
8751 	mutex_enter(SD_MUTEX(un));
8752 	un->un_f_write_cache_enabled = (wc_enabled != 0);
8753 	mutex_exit(SD_MUTEX(un));
8754 
8755 	/*
8756 	 * Set the pstat and error stat values here, so data obtained during the
8757 	 * previous attach-time routines is available.
8758 	 *
8759 	 * Note: This is a critical sequence that needs to be maintained:
8760 	 *	1) Instantiate the kstats before any routines using the iopath
8761 	 *	   (i.e. sd_send_scsi_cmd).
8762 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8763 	 *	   stats (sd_set_pstats)here, following sd_validate_geometry(),
8764 	 *	   sd_register_devid(), and sd_cache_control().
8765 	 */
8766 	if (un->un_f_pkstats_enabled) {
8767 		sd_set_pstats(un);
8768 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8769 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
8770 	}
8771 
8772 	sd_set_errstats(un);
8773 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8774 	    "sd_unit_attach: un:0x%p errstats set\n", un);
8775 
8776 	/*
8777 	 * Find out what type of reservation this disk supports.
8778 	 */
8779 	switch (sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS, 0, NULL)) {
8780 	case 0:
8781 		/*
8782 		 * SCSI-3 reservations are supported.
8783 		 */
8784 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8785 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8786 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
8787 		break;
8788 	case ENOTSUP:
8789 		/*
8790 		 * The PERSISTENT RESERVE IN command would not be recognized by
8791 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
8792 		 */
8793 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8794 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
8795 		un->un_reservation_type = SD_SCSI2_RESERVATION;
8796 		break;
8797 	default:
8798 		/*
8799 		 * default to SCSI-3 reservations
8800 		 */
8801 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8802 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
8803 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8804 		break;
8805 	}
8806 
8807 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8808 	    "sd_unit_attach: un:0x%p exit success\n", un);
8809 
8810 	return (DDI_SUCCESS);
8811 
8812 	/*
8813 	 * An error occurred during the attach; clean up & return failure.
8814 	 */
8815 
8816 devid_failed:
8817 
8818 setup_pm_failed:
8819 	ddi_remove_minor_node(devi, NULL);
8820 
8821 create_minor_nodes_failed:
8822 	/*
8823 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8824 	 */
8825 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8826 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8827 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8828 
8829 	if (un->un_f_is_fibre == FALSE) {
8830 	    (void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8831 	}
8832 
8833 spinup_failed:
8834 
8835 	mutex_enter(SD_MUTEX(un));
8836 
8837 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
8838 	if (un->un_direct_priority_timeid != NULL) {
8839 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8840 		un->un_direct_priority_timeid = NULL;
8841 		mutex_exit(SD_MUTEX(un));
8842 		(void) untimeout(temp_id);
8843 		mutex_enter(SD_MUTEX(un));
8844 	}
8845 
8846 	/* Cancel any pending start/stop timeouts */
8847 	if (un->un_startstop_timeid != NULL) {
8848 		timeout_id_t temp_id = un->un_startstop_timeid;
8849 		un->un_startstop_timeid = NULL;
8850 		mutex_exit(SD_MUTEX(un));
8851 		(void) untimeout(temp_id);
8852 		mutex_enter(SD_MUTEX(un));
8853 	}
8854 
8855 	/* Cancel any pending reset-throttle timeouts */
8856 	if (un->un_reset_throttle_timeid != NULL) {
8857 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8858 		un->un_reset_throttle_timeid = NULL;
8859 		mutex_exit(SD_MUTEX(un));
8860 		(void) untimeout(temp_id);
8861 		mutex_enter(SD_MUTEX(un));
8862 	}
8863 
8864 	/* Cancel any pending retry timeouts */
8865 	if (un->un_retry_timeid != NULL) {
8866 		timeout_id_t temp_id = un->un_retry_timeid;
8867 		un->un_retry_timeid = NULL;
8868 		mutex_exit(SD_MUTEX(un));
8869 		(void) untimeout(temp_id);
8870 		mutex_enter(SD_MUTEX(un));
8871 	}
8872 
8873 	/* Cancel any pending delayed cv broadcast timeouts */
8874 	if (un->un_dcvb_timeid != NULL) {
8875 		timeout_id_t temp_id = un->un_dcvb_timeid;
8876 		un->un_dcvb_timeid = NULL;
8877 		mutex_exit(SD_MUTEX(un));
8878 		(void) untimeout(temp_id);
8879 		mutex_enter(SD_MUTEX(un));
8880 	}
8881 
8882 	mutex_exit(SD_MUTEX(un));
8883 
8884 	/* There should not be any in-progress I/O so ASSERT this check */
8885 	ASSERT(un->un_ncmds_in_transport == 0);
8886 	ASSERT(un->un_ncmds_in_driver == 0);
8887 
8888 	/* Do not free the softstate if the callback routine is active */
8889 	sd_sync_with_callback(un);
8890 
8891 	/*
8892 	 * Partition stats apparently are not used with removables. These would
8893 	 * not have been created during attach, so no need to clean them up...
8894 	 */
8895 	if (un->un_stats != NULL) {
8896 		kstat_delete(un->un_stats);
8897 		un->un_stats = NULL;
8898 	}
8899 	if (un->un_errstats != NULL) {
8900 		kstat_delete(un->un_errstats);
8901 		un->un_errstats = NULL;
8902 	}
8903 
8904 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8905 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8906 
8907 	ddi_prop_remove_all(devi);
8908 	sema_destroy(&un->un_semoclose);
8909 	cv_destroy(&un->un_state_cv);
8910 
8911 getrbuf_failed:
8912 
8913 	sd_free_rqs(un);
8914 
8915 alloc_rqs_failed:
8916 
8917 	devp->sd_private = NULL;
8918 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
8919 
8920 get_softstate_failed:
8921 	/*
8922 	 * Note: the man pages are unclear as to whether or not doing a
8923 	 * ddi_soft_state_free(sd_state, instance) is the right way to
8924 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
8925 	 * ddi_get_soft_state() fails.  The implication seems to be
8926 	 * that the get_soft_state cannot fail if the zalloc succeeds.
8927 	 */
8928 	ddi_soft_state_free(sd_state, instance);
8929 
8930 probe_failed:
8931 	scsi_unprobe(devp);
8932 #ifdef SDDEBUG
8933 	if ((sd_component_mask & SD_LOG_ATTACH_DETACH) &&
8934 	    (sd_level_mask & SD_LOGMASK_TRACE)) {
8935 		cmn_err(CE_CONT, "sd_unit_attach: un:0x%p exit failure\n",
8936 		    (void *)un);
8937 	}
8938 #endif
8939 	return (DDI_FAILURE);
8940 }
8941 
8942 
8943 /*
8944  *    Function: sd_unit_detach
8945  *
8946  * Description: Performs DDI_DETACH processing for sddetach().
8947  *
8948  * Return Code: DDI_SUCCESS
8949  *		DDI_FAILURE
8950  *
8951  *     Context: Kernel thread context
8952  */
8953 
8954 static int
8955 sd_unit_detach(dev_info_t *devi)
8956 {
8957 	struct scsi_device	*devp;
8958 	struct sd_lun		*un;
8959 	int			i;
8960 	dev_t			dev;
8961 	int			instance = ddi_get_instance(devi);
8962 
8963 	mutex_enter(&sd_detach_mutex);
8964 
8965 	/*
8966 	 * Fail the detach for any of the following:
8967 	 *  - Unable to get the sd_lun struct for the instance
8968 	 *  - A layered driver has an outstanding open on the instance
8969 	 *  - Another thread is already detaching this instance
8970 	 *  - Another thread is currently performing an open
8971 	 */
8972 	devp = ddi_get_driver_private(devi);
8973 	if ((devp == NULL) ||
8974 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
8975 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
8976 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
8977 		mutex_exit(&sd_detach_mutex);
8978 		return (DDI_FAILURE);
8979 	}
8980 
8981 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
8982 
8983 	/*
8984 	 * Mark this instance as currently in a detach, to inhibit any
8985 	 * opens from a layered driver.
8986 	 */
8987 	un->un_detach_count++;
8988 	mutex_exit(&sd_detach_mutex);
8989 
8990 	dev = sd_make_device(SD_DEVINFO(un));
8991 
8992 	_NOTE(COMPETING_THREADS_NOW);
8993 
8994 	mutex_enter(SD_MUTEX(un));
8995 
8996 	/*
8997 	 * Fail the detach if there are any outstanding layered
8998 	 * opens on this device.
8999 	 */
9000 	for (i = 0; i < NDKMAP; i++) {
9001 		if (un->un_ocmap.lyropen[i] != 0) {
9002 			goto err_notclosed;
9003 		}
9004 	}
9005 
9006 	/*
9007 	 * Verify there are NO outstanding commands issued to this device.
9008 	 * ie, un_ncmds_in_transport == 0.
9009 	 * It's possible to have outstanding commands through the physio
9010 	 * code path, even though everything's closed.
9011 	 */
9012 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
9013 	    (un->un_direct_priority_timeid != NULL) ||
9014 	    (un->un_state == SD_STATE_RWAIT)) {
9015 		mutex_exit(SD_MUTEX(un));
9016 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9017 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
9018 		goto err_stillbusy;
9019 	}
9020 
9021 	/*
9022 	 * If we have the device reserved, release the reservation.
9023 	 */
9024 	if ((un->un_resvd_status & SD_RESERVE) &&
9025 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
9026 		mutex_exit(SD_MUTEX(un));
9027 		/*
9028 		 * Note: sd_reserve_release sends a command to the device
9029 		 * via the sd_ioctlcmd() path, and can sleep.
9030 		 */
9031 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
9032 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9033 			    "sd_dr_detach: Cannot release reservation \n");
9034 		}
9035 	} else {
9036 		mutex_exit(SD_MUTEX(un));
9037 	}
9038 
9039 	/*
9040 	 * Untimeout any reserve recover, throttle reset, restart unit
9041 	 * and delayed broadcast timeout threads. Protect the timeout pointer
9042 	 * from getting nulled by their callback functions.
9043 	 */
9044 	mutex_enter(SD_MUTEX(un));
9045 	if (un->un_resvd_timeid != NULL) {
9046 		timeout_id_t temp_id = un->un_resvd_timeid;
9047 		un->un_resvd_timeid = NULL;
9048 		mutex_exit(SD_MUTEX(un));
9049 		(void) untimeout(temp_id);
9050 		mutex_enter(SD_MUTEX(un));
9051 	}
9052 
9053 	if (un->un_reset_throttle_timeid != NULL) {
9054 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
9055 		un->un_reset_throttle_timeid = NULL;
9056 		mutex_exit(SD_MUTEX(un));
9057 		(void) untimeout(temp_id);
9058 		mutex_enter(SD_MUTEX(un));
9059 	}
9060 
9061 	if (un->un_startstop_timeid != NULL) {
9062 		timeout_id_t temp_id = un->un_startstop_timeid;
9063 		un->un_startstop_timeid = NULL;
9064 		mutex_exit(SD_MUTEX(un));
9065 		(void) untimeout(temp_id);
9066 		mutex_enter(SD_MUTEX(un));
9067 	}
9068 
9069 	if (un->un_dcvb_timeid != NULL) {
9070 		timeout_id_t temp_id = un->un_dcvb_timeid;
9071 		un->un_dcvb_timeid = NULL;
9072 		mutex_exit(SD_MUTEX(un));
9073 		(void) untimeout(temp_id);
9074 	} else {
9075 		mutex_exit(SD_MUTEX(un));
9076 	}
9077 
9078 	/* Remove any pending reservation reclaim requests for this device */
9079 	sd_rmv_resv_reclaim_req(dev);
9080 
9081 	mutex_enter(SD_MUTEX(un));
9082 
9083 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
9084 	if (un->un_direct_priority_timeid != NULL) {
9085 		timeout_id_t temp_id = un->un_direct_priority_timeid;
9086 		un->un_direct_priority_timeid = NULL;
9087 		mutex_exit(SD_MUTEX(un));
9088 		(void) untimeout(temp_id);
9089 		mutex_enter(SD_MUTEX(un));
9090 	}
9091 
9092 	/* Cancel any active multi-host disk watch thread requests */
9093 	if (un->un_mhd_token != NULL) {
9094 		mutex_exit(SD_MUTEX(un));
9095 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
9096 		if (scsi_watch_request_terminate(un->un_mhd_token,
9097 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
9098 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9099 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
9100 			/*
9101 			 * Note: We are returning here after having removed
9102 			 * some driver timeouts above. This is consistent with
9103 			 * the legacy implementation but perhaps the watch
9104 			 * terminate call should be made with the wait flag set.
9105 			 */
9106 			goto err_stillbusy;
9107 		}
9108 		mutex_enter(SD_MUTEX(un));
9109 		un->un_mhd_token = NULL;
9110 	}
9111 
9112 	if (un->un_swr_token != NULL) {
9113 		mutex_exit(SD_MUTEX(un));
9114 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
9115 		if (scsi_watch_request_terminate(un->un_swr_token,
9116 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
9117 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9118 			    "sd_dr_detach: Cannot cancel swr watch request\n");
9119 			/*
9120 			 * Note: We are returning here after having removed
9121 			 * some driver timeouts above. This is consistent with
9122 			 * the legacy implementation but perhaps the watch
9123 			 * terminate call should be made with the wait flag set.
9124 			 */
9125 			goto err_stillbusy;
9126 		}
9127 		mutex_enter(SD_MUTEX(un));
9128 		un->un_swr_token = NULL;
9129 	}
9130 
9131 	mutex_exit(SD_MUTEX(un));
9132 
9133 	/*
9134 	 * Clear any scsi_reset_notifies. We clear the reset notifies
9135 	 * if we have not registered one.
9136 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
9137 	 */
9138 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
9139 	    sd_mhd_reset_notify_cb, (caddr_t)un);
9140 
9141 	/*
9142 	 * protect the timeout pointers from getting nulled by
9143 	 * their callback functions during the cancellation process.
9144 	 * In such a scenario untimeout can be invoked with a null value.
9145 	 */
9146 	_NOTE(NO_COMPETING_THREADS_NOW);
9147 
9148 	mutex_enter(&un->un_pm_mutex);
9149 	if (un->un_pm_idle_timeid != NULL) {
9150 		timeout_id_t temp_id = un->un_pm_idle_timeid;
9151 		un->un_pm_idle_timeid = NULL;
9152 		mutex_exit(&un->un_pm_mutex);
9153 
9154 		/*
9155 		 * Timeout is active; cancel it.
9156 		 * Note that it'll never be active on a device
9157 		 * that does not support PM therefore we don't
9158 		 * have to check before calling pm_idle_component.
9159 		 */
9160 		(void) untimeout(temp_id);
9161 		(void) pm_idle_component(SD_DEVINFO(un), 0);
9162 		mutex_enter(&un->un_pm_mutex);
9163 	}
9164 
9165 	/*
9166 	 * Check whether there is already a timeout scheduled for power
9167 	 * management. If yes then don't lower the power here, that's.
9168 	 * the timeout handler's job.
9169 	 */
9170 	if (un->un_pm_timeid != NULL) {
9171 		timeout_id_t temp_id = un->un_pm_timeid;
9172 		un->un_pm_timeid = NULL;
9173 		mutex_exit(&un->un_pm_mutex);
9174 		/*
9175 		 * Timeout is active; cancel it.
9176 		 * Note that it'll never be active on a device
9177 		 * that does not support PM therefore we don't
9178 		 * have to check before calling pm_idle_component.
9179 		 */
9180 		(void) untimeout(temp_id);
9181 		(void) pm_idle_component(SD_DEVINFO(un), 0);
9182 
9183 	} else {
9184 		mutex_exit(&un->un_pm_mutex);
9185 		if ((un->un_f_pm_is_enabled == TRUE) &&
9186 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_SPINDLE_OFF) !=
9187 		    DDI_SUCCESS)) {
9188 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9189 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
9190 			/*
9191 			 * Fix for bug: 4297749, item # 13
9192 			 * The above test now includes a check to see if PM is
9193 			 * supported by this device before call
9194 			 * pm_lower_power().
9195 			 * Note, the following is not dead code. The call to
9196 			 * pm_lower_power above will generate a call back into
9197 			 * our sdpower routine which might result in a timeout
9198 			 * handler getting activated. Therefore the following
9199 			 * code is valid and necessary.
9200 			 */
9201 			mutex_enter(&un->un_pm_mutex);
9202 			if (un->un_pm_timeid != NULL) {
9203 				timeout_id_t temp_id = un->un_pm_timeid;
9204 				un->un_pm_timeid = NULL;
9205 				mutex_exit(&un->un_pm_mutex);
9206 				(void) untimeout(temp_id);
9207 				(void) pm_idle_component(SD_DEVINFO(un), 0);
9208 			} else {
9209 				mutex_exit(&un->un_pm_mutex);
9210 			}
9211 		}
9212 	}
9213 
9214 	/*
9215 	 * Cleanup from the scsi_ifsetcap() calls (437868)
9216 	 * Relocated here from above to be after the call to
9217 	 * pm_lower_power, which was getting errors.
9218 	 */
9219 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
9220 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
9221 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
9222 
9223 	if (un->un_f_is_fibre == FALSE) {
9224 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
9225 	}
9226 
9227 	/*
9228 	 * Remove any event callbacks, fibre only
9229 	 */
9230 	if (un->un_f_is_fibre == TRUE) {
9231 		if ((un->un_insert_event != NULL) &&
9232 			(ddi_remove_event_handler(un->un_insert_cb_id) !=
9233 				DDI_SUCCESS)) {
9234 			/*
9235 			 * Note: We are returning here after having done
9236 			 * substantial cleanup above. This is consistent
9237 			 * with the legacy implementation but this may not
9238 			 * be the right thing to do.
9239 			 */
9240 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9241 				"sd_dr_detach: Cannot cancel insert event\n");
9242 			goto err_remove_event;
9243 		}
9244 		un->un_insert_event = NULL;
9245 
9246 		if ((un->un_remove_event != NULL) &&
9247 			(ddi_remove_event_handler(un->un_remove_cb_id) !=
9248 				DDI_SUCCESS)) {
9249 			/*
9250 			 * Note: We are returning here after having done
9251 			 * substantial cleanup above. This is consistent
9252 			 * with the legacy implementation but this may not
9253 			 * be the right thing to do.
9254 			 */
9255 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9256 				"sd_dr_detach: Cannot cancel remove event\n");
9257 			goto err_remove_event;
9258 		}
9259 		un->un_remove_event = NULL;
9260 	}
9261 
9262 	/* Do not free the softstate if the callback routine is active */
9263 	sd_sync_with_callback(un);
9264 
9265 	/*
9266 	 * Hold the detach mutex here, to make sure that no other threads ever
9267 	 * can access a (partially) freed soft state structure.
9268 	 */
9269 	mutex_enter(&sd_detach_mutex);
9270 
9271 	/*
9272 	 * Clean up the soft state struct.
9273 	 * Cleanup is done in reverse order of allocs/inits.
9274 	 * At this point there should be no competing threads anymore.
9275 	 */
9276 
9277 	/* Unregister and free device id. */
9278 	ddi_devid_unregister(devi);
9279 	if (un->un_devid) {
9280 		ddi_devid_free(un->un_devid);
9281 		un->un_devid = NULL;
9282 	}
9283 
9284 	/*
9285 	 * Destroy wmap cache if it exists.
9286 	 */
9287 	if (un->un_wm_cache != NULL) {
9288 		kmem_cache_destroy(un->un_wm_cache);
9289 		un->un_wm_cache = NULL;
9290 	}
9291 
9292 	/* Remove minor nodes */
9293 	ddi_remove_minor_node(devi, NULL);
9294 
9295 	/*
9296 	 * kstat cleanup is done in detach for all device types (4363169).
9297 	 * We do not want to fail detach if the device kstats are not deleted
9298 	 * since there is a confusion about the devo_refcnt for the device.
9299 	 * We just delete the kstats and let detach complete successfully.
9300 	 */
9301 	if (un->un_stats != NULL) {
9302 		kstat_delete(un->un_stats);
9303 		un->un_stats = NULL;
9304 	}
9305 	if (un->un_errstats != NULL) {
9306 		kstat_delete(un->un_errstats);
9307 		un->un_errstats = NULL;
9308 	}
9309 
9310 	/* Remove partition stats */
9311 	if (un->un_f_pkstats_enabled) {
9312 		for (i = 0; i < NSDMAP; i++) {
9313 			if (un->un_pstats[i] != NULL) {
9314 				kstat_delete(un->un_pstats[i]);
9315 				un->un_pstats[i] = NULL;
9316 			}
9317 		}
9318 	}
9319 
9320 	/* Remove xbuf registration */
9321 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
9322 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
9323 
9324 	/* Remove driver properties */
9325 	ddi_prop_remove_all(devi);
9326 
9327 	mutex_destroy(&un->un_pm_mutex);
9328 	cv_destroy(&un->un_pm_busy_cv);
9329 
9330 	cv_destroy(&un->un_wcc_cv);
9331 
9332 	/* Open/close semaphore */
9333 	sema_destroy(&un->un_semoclose);
9334 
9335 	/* Removable media condvar. */
9336 	cv_destroy(&un->un_state_cv);
9337 
9338 	/* Suspend/resume condvar. */
9339 	cv_destroy(&un->un_suspend_cv);
9340 	cv_destroy(&un->un_disk_busy_cv);
9341 
9342 	sd_free_rqs(un);
9343 
9344 	/* Free up soft state */
9345 	devp->sd_private = NULL;
9346 	bzero(un, sizeof (struct sd_lun));
9347 	ddi_soft_state_free(sd_state, instance);
9348 
9349 	mutex_exit(&sd_detach_mutex);
9350 
9351 	/* This frees up the INQUIRY data associated with the device. */
9352 	scsi_unprobe(devp);
9353 
9354 	return (DDI_SUCCESS);
9355 
9356 err_notclosed:
9357 	mutex_exit(SD_MUTEX(un));
9358 
9359 err_stillbusy:
9360 	_NOTE(NO_COMPETING_THREADS_NOW);
9361 
9362 err_remove_event:
9363 	mutex_enter(&sd_detach_mutex);
9364 	un->un_detach_count--;
9365 	mutex_exit(&sd_detach_mutex);
9366 
9367 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
9368 	return (DDI_FAILURE);
9369 }
9370 
9371 
9372 /*
9373  * Driver minor node structure and data table
9374  */
9375 struct driver_minor_data {
9376 	char	*name;
9377 	minor_t	minor;
9378 	int	type;
9379 };
9380 
9381 static struct driver_minor_data sd_minor_data[] = {
9382 	{"a", 0, S_IFBLK},
9383 	{"b", 1, S_IFBLK},
9384 	{"c", 2, S_IFBLK},
9385 	{"d", 3, S_IFBLK},
9386 	{"e", 4, S_IFBLK},
9387 	{"f", 5, S_IFBLK},
9388 	{"g", 6, S_IFBLK},
9389 	{"h", 7, S_IFBLK},
9390 #if defined(_SUNOS_VTOC_16)
9391 	{"i", 8, S_IFBLK},
9392 	{"j", 9, S_IFBLK},
9393 	{"k", 10, S_IFBLK},
9394 	{"l", 11, S_IFBLK},
9395 	{"m", 12, S_IFBLK},
9396 	{"n", 13, S_IFBLK},
9397 	{"o", 14, S_IFBLK},
9398 	{"p", 15, S_IFBLK},
9399 #endif			/* defined(_SUNOS_VTOC_16) */
9400 #if defined(_FIRMWARE_NEEDS_FDISK)
9401 	{"q", 16, S_IFBLK},
9402 	{"r", 17, S_IFBLK},
9403 	{"s", 18, S_IFBLK},
9404 	{"t", 19, S_IFBLK},
9405 	{"u", 20, S_IFBLK},
9406 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9407 	{"a,raw", 0, S_IFCHR},
9408 	{"b,raw", 1, S_IFCHR},
9409 	{"c,raw", 2, S_IFCHR},
9410 	{"d,raw", 3, S_IFCHR},
9411 	{"e,raw", 4, S_IFCHR},
9412 	{"f,raw", 5, S_IFCHR},
9413 	{"g,raw", 6, S_IFCHR},
9414 	{"h,raw", 7, S_IFCHR},
9415 #if defined(_SUNOS_VTOC_16)
9416 	{"i,raw", 8, S_IFCHR},
9417 	{"j,raw", 9, S_IFCHR},
9418 	{"k,raw", 10, S_IFCHR},
9419 	{"l,raw", 11, S_IFCHR},
9420 	{"m,raw", 12, S_IFCHR},
9421 	{"n,raw", 13, S_IFCHR},
9422 	{"o,raw", 14, S_IFCHR},
9423 	{"p,raw", 15, S_IFCHR},
9424 #endif			/* defined(_SUNOS_VTOC_16) */
9425 #if defined(_FIRMWARE_NEEDS_FDISK)
9426 	{"q,raw", 16, S_IFCHR},
9427 	{"r,raw", 17, S_IFCHR},
9428 	{"s,raw", 18, S_IFCHR},
9429 	{"t,raw", 19, S_IFCHR},
9430 	{"u,raw", 20, S_IFCHR},
9431 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9432 	{0}
9433 };
9434 
9435 static struct driver_minor_data sd_minor_data_efi[] = {
9436 	{"a", 0, S_IFBLK},
9437 	{"b", 1, S_IFBLK},
9438 	{"c", 2, S_IFBLK},
9439 	{"d", 3, S_IFBLK},
9440 	{"e", 4, S_IFBLK},
9441 	{"f", 5, S_IFBLK},
9442 	{"g", 6, S_IFBLK},
9443 	{"wd", 7, S_IFBLK},
9444 #if defined(_FIRMWARE_NEEDS_FDISK)
9445 	{"q", 16, S_IFBLK},
9446 	{"r", 17, S_IFBLK},
9447 	{"s", 18, S_IFBLK},
9448 	{"t", 19, S_IFBLK},
9449 	{"u", 20, S_IFBLK},
9450 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9451 	{"a,raw", 0, S_IFCHR},
9452 	{"b,raw", 1, S_IFCHR},
9453 	{"c,raw", 2, S_IFCHR},
9454 	{"d,raw", 3, S_IFCHR},
9455 	{"e,raw", 4, S_IFCHR},
9456 	{"f,raw", 5, S_IFCHR},
9457 	{"g,raw", 6, S_IFCHR},
9458 	{"wd,raw", 7, S_IFCHR},
9459 #if defined(_FIRMWARE_NEEDS_FDISK)
9460 	{"q,raw", 16, S_IFCHR},
9461 	{"r,raw", 17, S_IFCHR},
9462 	{"s,raw", 18, S_IFCHR},
9463 	{"t,raw", 19, S_IFCHR},
9464 	{"u,raw", 20, S_IFCHR},
9465 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9466 	{0}
9467 };
9468 
9469 
9470 /*
9471  *    Function: sd_create_minor_nodes
9472  *
9473  * Description: Create the minor device nodes for the instance.
9474  *
9475  *   Arguments: un - driver soft state (unit) structure
9476  *		devi - pointer to device info structure
9477  *
9478  * Return Code: DDI_SUCCESS
9479  *		DDI_FAILURE
9480  *
9481  *     Context: Kernel thread context
9482  */
9483 
9484 static int
9485 sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi)
9486 {
9487 	struct driver_minor_data	*dmdp;
9488 	struct scsi_device		*devp;
9489 	int				instance;
9490 	char				name[48];
9491 
9492 	ASSERT(un != NULL);
9493 	devp = ddi_get_driver_private(devi);
9494 	instance = ddi_get_instance(devp->sd_dev);
9495 
9496 	/*
9497 	 * Create all the minor nodes for this target.
9498 	 */
9499 	if (un->un_blockcount > DK_MAX_BLOCKS)
9500 		dmdp = sd_minor_data_efi;
9501 	else
9502 		dmdp = sd_minor_data;
9503 	while (dmdp->name != NULL) {
9504 
9505 		(void) sprintf(name, "%s", dmdp->name);
9506 
9507 		if (ddi_create_minor_node(devi, name, dmdp->type,
9508 		    (instance << SDUNIT_SHIFT) | dmdp->minor,
9509 		    un->un_node_type, NULL) == DDI_FAILURE) {
9510 			/*
9511 			 * Clean up any nodes that may have been created, in
9512 			 * case this fails in the middle of the loop.
9513 			 */
9514 			ddi_remove_minor_node(devi, NULL);
9515 			return (DDI_FAILURE);
9516 		}
9517 		dmdp++;
9518 	}
9519 
9520 	return (DDI_SUCCESS);
9521 }
9522 
9523 
9524 /*
9525  *    Function: sd_create_errstats
9526  *
9527  * Description: This routine instantiates the device error stats.
9528  *
9529  *		Note: During attach the stats are instantiated first so they are
9530  *		available for attach-time routines that utilize the driver
9531  *		iopath to send commands to the device. The stats are initialized
9532  *		separately so data obtained during some attach-time routines is
9533  *		available. (4362483)
9534  *
9535  *   Arguments: un - driver soft state (unit) structure
9536  *		instance - driver instance
9537  *
9538  *     Context: Kernel thread context
9539  */
9540 
9541 static void
9542 sd_create_errstats(struct sd_lun *un, int instance)
9543 {
9544 	struct	sd_errstats	*stp;
9545 	char	kstatmodule_err[KSTAT_STRLEN];
9546 	char	kstatname[KSTAT_STRLEN];
9547 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
9548 
9549 	ASSERT(un != NULL);
9550 
9551 	if (un->un_errstats != NULL) {
9552 		return;
9553 	}
9554 
9555 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
9556 	    "%serr", sd_label);
9557 	(void) snprintf(kstatname, sizeof (kstatname),
9558 	    "%s%d,err", sd_label, instance);
9559 
9560 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
9561 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
9562 
9563 	if (un->un_errstats == NULL) {
9564 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9565 		    "sd_create_errstats: Failed kstat_create\n");
9566 		return;
9567 	}
9568 
9569 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9570 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
9571 	    KSTAT_DATA_UINT32);
9572 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
9573 	    KSTAT_DATA_UINT32);
9574 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
9575 	    KSTAT_DATA_UINT32);
9576 	kstat_named_init(&stp->sd_vid,		"Vendor",
9577 	    KSTAT_DATA_CHAR);
9578 	kstat_named_init(&stp->sd_pid,		"Product",
9579 	    KSTAT_DATA_CHAR);
9580 	kstat_named_init(&stp->sd_revision,	"Revision",
9581 	    KSTAT_DATA_CHAR);
9582 	kstat_named_init(&stp->sd_serial,	"Serial No",
9583 	    KSTAT_DATA_CHAR);
9584 	kstat_named_init(&stp->sd_capacity,	"Size",
9585 	    KSTAT_DATA_ULONGLONG);
9586 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
9587 	    KSTAT_DATA_UINT32);
9588 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
9589 	    KSTAT_DATA_UINT32);
9590 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
9591 	    KSTAT_DATA_UINT32);
9592 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
9593 	    KSTAT_DATA_UINT32);
9594 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
9595 	    KSTAT_DATA_UINT32);
9596 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
9597 	    KSTAT_DATA_UINT32);
9598 
9599 	un->un_errstats->ks_private = un;
9600 	un->un_errstats->ks_update  = nulldev;
9601 
9602 	kstat_install(un->un_errstats);
9603 }
9604 
9605 
9606 /*
9607  *    Function: sd_set_errstats
9608  *
9609  * Description: This routine sets the value of the vendor id, product id,
9610  *		revision, serial number, and capacity device error stats.
9611  *
9612  *		Note: During attach the stats are instantiated first so they are
9613  *		available for attach-time routines that utilize the driver
9614  *		iopath to send commands to the device. The stats are initialized
9615  *		separately so data obtained during some attach-time routines is
9616  *		available. (4362483)
9617  *
9618  *   Arguments: un - driver soft state (unit) structure
9619  *
9620  *     Context: Kernel thread context
9621  */
9622 
9623 static void
9624 sd_set_errstats(struct sd_lun *un)
9625 {
9626 	struct	sd_errstats	*stp;
9627 
9628 	ASSERT(un != NULL);
9629 	ASSERT(un->un_errstats != NULL);
9630 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9631 	ASSERT(stp != NULL);
9632 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
9633 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
9634 	(void) strncpy(stp->sd_revision.value.c,
9635 	    un->un_sd->sd_inq->inq_revision, 4);
9636 
9637 	/*
9638 	 * All the errstats are persistent across detach/attach,
9639 	 * so reset all the errstats here in case of the hot
9640 	 * replacement of disk drives, except for not changed
9641 	 * Sun qualified drives.
9642 	 */
9643 	if ((bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) != 0) ||
9644 	    (bcmp(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9645 	    sizeof (SD_INQUIRY(un)->inq_serial)) != 0)) {
9646 		stp->sd_softerrs.value.ui32 = 0;
9647 		stp->sd_harderrs.value.ui32 = 0;
9648 		stp->sd_transerrs.value.ui32 = 0;
9649 		stp->sd_rq_media_err.value.ui32 = 0;
9650 		stp->sd_rq_ntrdy_err.value.ui32 = 0;
9651 		stp->sd_rq_nodev_err.value.ui32 = 0;
9652 		stp->sd_rq_recov_err.value.ui32 = 0;
9653 		stp->sd_rq_illrq_err.value.ui32 = 0;
9654 		stp->sd_rq_pfa_err.value.ui32 = 0;
9655 	}
9656 
9657 	/*
9658 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
9659 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
9660 	 * (4376302))
9661 	 */
9662 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
9663 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9664 		    sizeof (SD_INQUIRY(un)->inq_serial));
9665 	}
9666 
9667 	if (un->un_f_blockcount_is_valid != TRUE) {
9668 		/*
9669 		 * Set capacity error stat to 0 for no media. This ensures
9670 		 * a valid capacity is displayed in response to 'iostat -E'
9671 		 * when no media is present in the device.
9672 		 */
9673 		stp->sd_capacity.value.ui64 = 0;
9674 	} else {
9675 		/*
9676 		 * Multiply un_blockcount by un->un_sys_blocksize to get
9677 		 * capacity.
9678 		 *
9679 		 * Note: for non-512 blocksize devices "un_blockcount" has been
9680 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
9681 		 * (un_tgt_blocksize / un->un_sys_blocksize).
9682 		 */
9683 		stp->sd_capacity.value.ui64 = (uint64_t)
9684 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
9685 	}
9686 }
9687 
9688 
9689 /*
9690  *    Function: sd_set_pstats
9691  *
9692  * Description: This routine instantiates and initializes the partition
9693  *              stats for each partition with more than zero blocks.
9694  *		(4363169)
9695  *
9696  *   Arguments: un - driver soft state (unit) structure
9697  *
9698  *     Context: Kernel thread context
9699  */
9700 
9701 static void
9702 sd_set_pstats(struct sd_lun *un)
9703 {
9704 	char	kstatname[KSTAT_STRLEN];
9705 	int	instance;
9706 	int	i;
9707 
9708 	ASSERT(un != NULL);
9709 
9710 	instance = ddi_get_instance(SD_DEVINFO(un));
9711 
9712 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
9713 	for (i = 0; i < NSDMAP; i++) {
9714 		if ((un->un_pstats[i] == NULL) &&
9715 		    (un->un_map[i].dkl_nblk != 0)) {
9716 			(void) snprintf(kstatname, sizeof (kstatname),
9717 			    "%s%d,%s", sd_label, instance,
9718 			    sd_minor_data[i].name);
9719 			un->un_pstats[i] = kstat_create(sd_label,
9720 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
9721 			    1, KSTAT_FLAG_PERSISTENT);
9722 			if (un->un_pstats[i] != NULL) {
9723 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
9724 				kstat_install(un->un_pstats[i]);
9725 			}
9726 		}
9727 	}
9728 }
9729 
9730 
9731 #if (defined(__fibre))
9732 /*
9733  *    Function: sd_init_event_callbacks
9734  *
9735  * Description: This routine initializes the insertion and removal event
9736  *		callbacks. (fibre only)
9737  *
9738  *   Arguments: un - driver soft state (unit) structure
9739  *
9740  *     Context: Kernel thread context
9741  */
9742 
9743 static void
9744 sd_init_event_callbacks(struct sd_lun *un)
9745 {
9746 	ASSERT(un != NULL);
9747 
9748 	if ((un->un_insert_event == NULL) &&
9749 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
9750 	    &un->un_insert_event) == DDI_SUCCESS)) {
9751 		/*
9752 		 * Add the callback for an insertion event
9753 		 */
9754 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9755 		    un->un_insert_event, sd_event_callback, (void *)un,
9756 		    &(un->un_insert_cb_id));
9757 	}
9758 
9759 	if ((un->un_remove_event == NULL) &&
9760 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
9761 	    &un->un_remove_event) == DDI_SUCCESS)) {
9762 		/*
9763 		 * Add the callback for a removal event
9764 		 */
9765 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9766 		    un->un_remove_event, sd_event_callback, (void *)un,
9767 		    &(un->un_remove_cb_id));
9768 	}
9769 }
9770 
9771 
9772 /*
9773  *    Function: sd_event_callback
9774  *
9775  * Description: This routine handles insert/remove events (photon). The
9776  *		state is changed to OFFLINE which can be used to supress
9777  *		error msgs. (fibre only)
9778  *
9779  *   Arguments: un - driver soft state (unit) structure
9780  *
9781  *     Context: Callout thread context
9782  */
9783 /* ARGSUSED */
9784 static void
9785 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
9786     void *bus_impldata)
9787 {
9788 	struct sd_lun *un = (struct sd_lun *)arg;
9789 
9790 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
9791 	if (event == un->un_insert_event) {
9792 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
9793 		mutex_enter(SD_MUTEX(un));
9794 		if (un->un_state == SD_STATE_OFFLINE) {
9795 			if (un->un_last_state != SD_STATE_SUSPENDED) {
9796 				un->un_state = un->un_last_state;
9797 			} else {
9798 				/*
9799 				 * We have gone through SUSPEND/RESUME while
9800 				 * we were offline. Restore the last state
9801 				 */
9802 				un->un_state = un->un_save_state;
9803 			}
9804 		}
9805 		mutex_exit(SD_MUTEX(un));
9806 
9807 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
9808 	} else if (event == un->un_remove_event) {
9809 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
9810 		mutex_enter(SD_MUTEX(un));
9811 		/*
9812 		 * We need to handle an event callback that occurs during
9813 		 * the suspend operation, since we don't prevent it.
9814 		 */
9815 		if (un->un_state != SD_STATE_OFFLINE) {
9816 			if (un->un_state != SD_STATE_SUSPENDED) {
9817 				New_state(un, SD_STATE_OFFLINE);
9818 			} else {
9819 				un->un_last_state = SD_STATE_OFFLINE;
9820 			}
9821 		}
9822 		mutex_exit(SD_MUTEX(un));
9823 	} else {
9824 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
9825 		    "!Unknown event\n");
9826 	}
9827 
9828 }
9829 #endif
9830 
9831 /*
9832  *    Function: sd_cache_control()
9833  *
9834  * Description: This routine is the driver entry point for setting
9835  *		read and write caching by modifying the WCE (write cache
9836  *		enable) and RCD (read cache disable) bits of mode
9837  *		page 8 (MODEPAGE_CACHING).
9838  *
9839  *   Arguments: un - driver soft state (unit) structure
9840  *		rcd_flag - flag for controlling the read cache
9841  *		wce_flag - flag for controlling the write cache
9842  *
9843  * Return Code: EIO
9844  *		code returned by sd_send_scsi_MODE_SENSE and
9845  *		sd_send_scsi_MODE_SELECT
9846  *
9847  *     Context: Kernel Thread
9848  */
9849 
9850 static int
9851 sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag)
9852 {
9853 	struct mode_caching	*mode_caching_page;
9854 	uchar_t			*header;
9855 	size_t			buflen;
9856 	int			hdrlen;
9857 	int			bd_len;
9858 	int			rval = 0;
9859 	struct mode_header_grp2	*mhp;
9860 
9861 	ASSERT(un != NULL);
9862 
9863 	/*
9864 	 * Do a test unit ready, otherwise a mode sense may not work if this
9865 	 * is the first command sent to the device after boot.
9866 	 */
9867 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9868 
9869 	if (un->un_f_cfg_is_atapi == TRUE) {
9870 		hdrlen = MODE_HEADER_LENGTH_GRP2;
9871 	} else {
9872 		hdrlen = MODE_HEADER_LENGTH;
9873 	}
9874 
9875 	/*
9876 	 * Allocate memory for the retrieved mode page and its headers.  Set
9877 	 * a pointer to the page itself.  Use mode_cache_scsi3 to insure
9878 	 * we get all of the mode sense data otherwise, the mode select
9879 	 * will fail.  mode_cache_scsi3 is a superset of mode_caching.
9880 	 */
9881 	buflen = hdrlen + MODE_BLK_DESC_LENGTH +
9882 		sizeof (struct mode_cache_scsi3);
9883 
9884 	header = kmem_zalloc(buflen, KM_SLEEP);
9885 
9886 	/* Get the information from the device. */
9887 	if (un->un_f_cfg_is_atapi == TRUE) {
9888 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
9889 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9890 	} else {
9891 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
9892 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9893 	}
9894 	if (rval != 0) {
9895 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9896 		    "sd_cache_control: Mode Sense Failed\n");
9897 		kmem_free(header, buflen);
9898 		return (rval);
9899 	}
9900 
9901 	/*
9902 	 * Determine size of Block Descriptors in order to locate
9903 	 * the mode page data. ATAPI devices return 0, SCSI devices
9904 	 * should return MODE_BLK_DESC_LENGTH.
9905 	 */
9906 	if (un->un_f_cfg_is_atapi == TRUE) {
9907 		mhp	= (struct mode_header_grp2 *)header;
9908 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9909 	} else {
9910 		bd_len  = ((struct mode_header *)header)->bdesc_length;
9911 	}
9912 
9913 	if (bd_len > MODE_BLK_DESC_LENGTH) {
9914 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9915 		    "sd_cache_control: Mode Sense returned invalid "
9916 		    "block descriptor length\n");
9917 		kmem_free(header, buflen);
9918 		return (EIO);
9919 	}
9920 
9921 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9922 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
9923 		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
9924 		    " caching page code mismatch %d\n",
9925 		    mode_caching_page->mode_page.code);
9926 		kmem_free(header, buflen);
9927 		return (EIO);
9928 	}
9929 
9930 	/* Check the relevant bits on successful mode sense. */
9931 	if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
9932 	    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
9933 	    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
9934 	    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
9935 
9936 		size_t sbuflen;
9937 		uchar_t save_pg;
9938 
9939 		/*
9940 		 * Construct select buffer length based on the
9941 		 * length of the sense data returned.
9942 		 */
9943 		sbuflen =  hdrlen + MODE_BLK_DESC_LENGTH +
9944 				sizeof (struct mode_page) +
9945 				(int)mode_caching_page->mode_page.length;
9946 
9947 		/*
9948 		 * Set the caching bits as requested.
9949 		 */
9950 		if (rcd_flag == SD_CACHE_ENABLE)
9951 			mode_caching_page->rcd = 0;
9952 		else if (rcd_flag == SD_CACHE_DISABLE)
9953 			mode_caching_page->rcd = 1;
9954 
9955 		if (wce_flag == SD_CACHE_ENABLE)
9956 			mode_caching_page->wce = 1;
9957 		else if (wce_flag == SD_CACHE_DISABLE)
9958 			mode_caching_page->wce = 0;
9959 
9960 		/*
9961 		 * Save the page if the mode sense says the
9962 		 * drive supports it.
9963 		 */
9964 		save_pg = mode_caching_page->mode_page.ps ?
9965 				SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
9966 
9967 		/* Clear reserved bits before mode select. */
9968 		mode_caching_page->mode_page.ps = 0;
9969 
9970 		/*
9971 		 * Clear out mode header for mode select.
9972 		 * The rest of the retrieved page will be reused.
9973 		 */
9974 		bzero(header, hdrlen);
9975 
9976 		if (un->un_f_cfg_is_atapi == TRUE) {
9977 			mhp = (struct mode_header_grp2 *)header;
9978 			mhp->bdesc_length_hi = bd_len >> 8;
9979 			mhp->bdesc_length_lo = (uchar_t)bd_len & 0xff;
9980 		} else {
9981 			((struct mode_header *)header)->bdesc_length = bd_len;
9982 		}
9983 
9984 		/* Issue mode select to change the cache settings */
9985 		if (un->un_f_cfg_is_atapi == TRUE) {
9986 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, header,
9987 			    sbuflen, save_pg, SD_PATH_DIRECT);
9988 		} else {
9989 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
9990 			    sbuflen, save_pg, SD_PATH_DIRECT);
9991 		}
9992 	}
9993 
9994 	kmem_free(header, buflen);
9995 	return (rval);
9996 }
9997 
9998 
9999 /*
10000  *    Function: sd_get_write_cache_enabled()
10001  *
10002  * Description: This routine is the driver entry point for determining if
10003  *		write caching is enabled.  It examines the WCE (write cache
10004  *		enable) bits of mode page 8 (MODEPAGE_CACHING).
10005  *
10006  *   Arguments: un - driver soft state (unit) structure
10007  *   		is_enabled - pointer to int where write cache enabled state
10008  *   			is returned (non-zero -> write cache enabled)
10009  *
10010  *
10011  * Return Code: EIO
10012  *		code returned by sd_send_scsi_MODE_SENSE
10013  *
10014  *     Context: Kernel Thread
10015  *
10016  * NOTE: If ioctl is added to disable write cache, this sequence should
10017  * be followed so that no locking is required for accesses to
10018  * un->un_f_write_cache_enabled:
10019  * 	do mode select to clear wce
10020  * 	do synchronize cache to flush cache
10021  * 	set un->un_f_write_cache_enabled = FALSE
10022  *
10023  * Conversely, an ioctl to enable the write cache should be done
10024  * in this order:
10025  * 	set un->un_f_write_cache_enabled = TRUE
10026  * 	do mode select to set wce
10027  */
10028 
10029 static int
10030 sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled)
10031 {
10032 	struct mode_caching	*mode_caching_page;
10033 	uchar_t			*header;
10034 	size_t			buflen;
10035 	int			hdrlen;
10036 	int			bd_len;
10037 	int			rval = 0;
10038 
10039 	ASSERT(un != NULL);
10040 	ASSERT(is_enabled != NULL);
10041 
10042 	/* in case of error, flag as enabled */
10043 	*is_enabled = TRUE;
10044 
10045 	/*
10046 	 * Do a test unit ready, otherwise a mode sense may not work if this
10047 	 * is the first command sent to the device after boot.
10048 	 */
10049 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
10050 
10051 	if (un->un_f_cfg_is_atapi == TRUE) {
10052 		hdrlen = MODE_HEADER_LENGTH_GRP2;
10053 	} else {
10054 		hdrlen = MODE_HEADER_LENGTH;
10055 	}
10056 
10057 	/*
10058 	 * Allocate memory for the retrieved mode page and its headers.  Set
10059 	 * a pointer to the page itself.
10060 	 */
10061 	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
10062 	header = kmem_zalloc(buflen, KM_SLEEP);
10063 
10064 	/* Get the information from the device. */
10065 	if (un->un_f_cfg_is_atapi == TRUE) {
10066 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
10067 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
10068 	} else {
10069 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
10070 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
10071 	}
10072 	if (rval != 0) {
10073 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
10074 		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
10075 		kmem_free(header, buflen);
10076 		return (rval);
10077 	}
10078 
10079 	/*
10080 	 * Determine size of Block Descriptors in order to locate
10081 	 * the mode page data. ATAPI devices return 0, SCSI devices
10082 	 * should return MODE_BLK_DESC_LENGTH.
10083 	 */
10084 	if (un->un_f_cfg_is_atapi == TRUE) {
10085 		struct mode_header_grp2	*mhp;
10086 		mhp	= (struct mode_header_grp2 *)header;
10087 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
10088 	} else {
10089 		bd_len  = ((struct mode_header *)header)->bdesc_length;
10090 	}
10091 
10092 	if (bd_len > MODE_BLK_DESC_LENGTH) {
10093 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10094 		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
10095 		    "block descriptor length\n");
10096 		kmem_free(header, buflen);
10097 		return (EIO);
10098 	}
10099 
10100 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
10101 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
10102 		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
10103 		    " caching page code mismatch %d\n",
10104 		    mode_caching_page->mode_page.code);
10105 		kmem_free(header, buflen);
10106 		return (EIO);
10107 	}
10108 	*is_enabled = mode_caching_page->wce;
10109 
10110 	kmem_free(header, buflen);
10111 	return (0);
10112 }
10113 
10114 
10115 /*
10116  *    Function: sd_make_device
10117  *
10118  * Description: Utility routine to return the Solaris device number from
10119  *		the data in the device's dev_info structure.
10120  *
10121  * Return Code: The Solaris device number
10122  *
10123  *     Context: Any
10124  */
10125 
10126 static dev_t
10127 sd_make_device(dev_info_t *devi)
10128 {
10129 	return (makedevice(ddi_name_to_major(ddi_get_name(devi)),
10130 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
10131 }
10132 
10133 
10134 /*
10135  *    Function: sd_pm_entry
10136  *
10137  * Description: Called at the start of a new command to manage power
10138  *		and busy status of a device. This includes determining whether
10139  *		the current power state of the device is sufficient for
10140  *		performing the command or whether it must be changed.
10141  *		The PM framework is notified appropriately.
10142  *		Only with a return status of DDI_SUCCESS will the
10143  *		component be busy to the framework.
10144  *
10145  *		All callers of sd_pm_entry must check the return status
10146  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
10147  *		of DDI_FAILURE indicates the device failed to power up.
10148  *		In this case un_pm_count has been adjusted so the result
10149  *		on exit is still powered down, ie. count is less than 0.
10150  *		Calling sd_pm_exit with this count value hits an ASSERT.
10151  *
10152  * Return Code: DDI_SUCCESS or DDI_FAILURE
10153  *
10154  *     Context: Kernel thread context.
10155  */
10156 
10157 static int
10158 sd_pm_entry(struct sd_lun *un)
10159 {
10160 	int return_status = DDI_SUCCESS;
10161 
10162 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10163 	ASSERT(!mutex_owned(&un->un_pm_mutex));
10164 
10165 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
10166 
10167 	if (un->un_f_pm_is_enabled == FALSE) {
10168 		SD_TRACE(SD_LOG_IO_PM, un,
10169 		    "sd_pm_entry: exiting, PM not enabled\n");
10170 		return (return_status);
10171 	}
10172 
10173 	/*
10174 	 * Just increment a counter if PM is enabled. On the transition from
10175 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
10176 	 * the count with each IO and mark the device as idle when the count
10177 	 * hits 0.
10178 	 *
10179 	 * If the count is less than 0 the device is powered down. If a powered
10180 	 * down device is successfully powered up then the count must be
10181 	 * incremented to reflect the power up. Note that it'll get incremented
10182 	 * a second time to become busy.
10183 	 *
10184 	 * Because the following has the potential to change the device state
10185 	 * and must release the un_pm_mutex to do so, only one thread can be
10186 	 * allowed through at a time.
10187 	 */
10188 
10189 	mutex_enter(&un->un_pm_mutex);
10190 	while (un->un_pm_busy == TRUE) {
10191 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
10192 	}
10193 	un->un_pm_busy = TRUE;
10194 
10195 	if (un->un_pm_count < 1) {
10196 
10197 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
10198 
10199 		/*
10200 		 * Indicate we are now busy so the framework won't attempt to
10201 		 * power down the device. This call will only fail if either
10202 		 * we passed a bad component number or the device has no
10203 		 * components. Neither of these should ever happen.
10204 		 */
10205 		mutex_exit(&un->un_pm_mutex);
10206 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
10207 		ASSERT(return_status == DDI_SUCCESS);
10208 
10209 		mutex_enter(&un->un_pm_mutex);
10210 
10211 		if (un->un_pm_count < 0) {
10212 			mutex_exit(&un->un_pm_mutex);
10213 
10214 			SD_TRACE(SD_LOG_IO_PM, un,
10215 			    "sd_pm_entry: power up component\n");
10216 
10217 			/*
10218 			 * pm_raise_power will cause sdpower to be called
10219 			 * which brings the device power level to the
10220 			 * desired state, ON in this case. If successful,
10221 			 * un_pm_count and un_power_level will be updated
10222 			 * appropriately.
10223 			 */
10224 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
10225 			    SD_SPINDLE_ON);
10226 
10227 			mutex_enter(&un->un_pm_mutex);
10228 
10229 			if (return_status != DDI_SUCCESS) {
10230 				/*
10231 				 * Power up failed.
10232 				 * Idle the device and adjust the count
10233 				 * so the result on exit is that we're
10234 				 * still powered down, ie. count is less than 0.
10235 				 */
10236 				SD_TRACE(SD_LOG_IO_PM, un,
10237 				    "sd_pm_entry: power up failed,"
10238 				    " idle the component\n");
10239 
10240 				(void) pm_idle_component(SD_DEVINFO(un), 0);
10241 				un->un_pm_count--;
10242 			} else {
10243 				/*
10244 				 * Device is powered up, verify the
10245 				 * count is non-negative.
10246 				 * This is debug only.
10247 				 */
10248 				ASSERT(un->un_pm_count == 0);
10249 			}
10250 		}
10251 
10252 		if (return_status == DDI_SUCCESS) {
10253 			/*
10254 			 * For performance, now that the device has been tagged
10255 			 * as busy, and it's known to be powered up, update the
10256 			 * chain types to use jump tables that do not include
10257 			 * pm. This significantly lowers the overhead and
10258 			 * therefore improves performance.
10259 			 */
10260 
10261 			mutex_exit(&un->un_pm_mutex);
10262 			mutex_enter(SD_MUTEX(un));
10263 			SD_TRACE(SD_LOG_IO_PM, un,
10264 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
10265 			    un->un_uscsi_chain_type);
10266 
10267 			if (un->un_f_non_devbsize_supported) {
10268 				un->un_buf_chain_type =
10269 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
10270 			} else {
10271 				un->un_buf_chain_type =
10272 				    SD_CHAIN_INFO_DISK_NO_PM;
10273 			}
10274 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
10275 
10276 			SD_TRACE(SD_LOG_IO_PM, un,
10277 			    "             changed  uscsi_chain_type to   %d\n",
10278 			    un->un_uscsi_chain_type);
10279 			mutex_exit(SD_MUTEX(un));
10280 			mutex_enter(&un->un_pm_mutex);
10281 
10282 			if (un->un_pm_idle_timeid == NULL) {
10283 				/* 300 ms. */
10284 				un->un_pm_idle_timeid =
10285 				    timeout(sd_pm_idletimeout_handler, un,
10286 				    (drv_usectohz((clock_t)300000)));
10287 				/*
10288 				 * Include an extra call to busy which keeps the
10289 				 * device busy with-respect-to the PM layer
10290 				 * until the timer fires, at which time it'll
10291 				 * get the extra idle call.
10292 				 */
10293 				(void) pm_busy_component(SD_DEVINFO(un), 0);
10294 			}
10295 		}
10296 	}
10297 	un->un_pm_busy = FALSE;
10298 	/* Next... */
10299 	cv_signal(&un->un_pm_busy_cv);
10300 
10301 	un->un_pm_count++;
10302 
10303 	SD_TRACE(SD_LOG_IO_PM, un,
10304 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
10305 
10306 	mutex_exit(&un->un_pm_mutex);
10307 
10308 	return (return_status);
10309 }
10310 
10311 
10312 /*
10313  *    Function: sd_pm_exit
10314  *
10315  * Description: Called at the completion of a command to manage busy
10316  *		status for the device. If the device becomes idle the
10317  *		PM framework is notified.
10318  *
10319  *     Context: Kernel thread context
10320  */
10321 
10322 static void
10323 sd_pm_exit(struct sd_lun *un)
10324 {
10325 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10326 	ASSERT(!mutex_owned(&un->un_pm_mutex));
10327 
10328 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
10329 
10330 	/*
10331 	 * After attach the following flag is only read, so don't
10332 	 * take the penalty of acquiring a mutex for it.
10333 	 */
10334 	if (un->un_f_pm_is_enabled == TRUE) {
10335 
10336 		mutex_enter(&un->un_pm_mutex);
10337 		un->un_pm_count--;
10338 
10339 		SD_TRACE(SD_LOG_IO_PM, un,
10340 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
10341 
10342 		ASSERT(un->un_pm_count >= 0);
10343 		if (un->un_pm_count == 0) {
10344 			mutex_exit(&un->un_pm_mutex);
10345 
10346 			SD_TRACE(SD_LOG_IO_PM, un,
10347 			    "sd_pm_exit: idle component\n");
10348 
10349 			(void) pm_idle_component(SD_DEVINFO(un), 0);
10350 
10351 		} else {
10352 			mutex_exit(&un->un_pm_mutex);
10353 		}
10354 	}
10355 
10356 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
10357 }
10358 
10359 
10360 /*
10361  *    Function: sdopen
10362  *
10363  * Description: Driver's open(9e) entry point function.
10364  *
10365  *   Arguments: dev_i   - pointer to device number
10366  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
10367  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10368  *		cred_p  - user credential pointer
10369  *
10370  * Return Code: EINVAL
10371  *		ENXIO
10372  *		EIO
10373  *		EROFS
10374  *		EBUSY
10375  *
10376  *     Context: Kernel thread context
10377  */
10378 /* ARGSUSED */
10379 static int
10380 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
10381 {
10382 	struct sd_lun	*un;
10383 	int		nodelay;
10384 	int		part;
10385 	uint64_t	partmask;
10386 	int		instance;
10387 	dev_t		dev;
10388 	int		rval = EIO;
10389 
10390 	/* Validate the open type */
10391 	if (otyp >= OTYPCNT) {
10392 		return (EINVAL);
10393 	}
10394 
10395 	dev = *dev_p;
10396 	instance = SDUNIT(dev);
10397 	mutex_enter(&sd_detach_mutex);
10398 
10399 	/*
10400 	 * Fail the open if there is no softstate for the instance, or
10401 	 * if another thread somewhere is trying to detach the instance.
10402 	 */
10403 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
10404 	    (un->un_detach_count != 0)) {
10405 		mutex_exit(&sd_detach_mutex);
10406 		/*
10407 		 * The probe cache only needs to be cleared when open (9e) fails
10408 		 * with ENXIO (4238046).
10409 		 */
10410 		/*
10411 		 * un-conditionally clearing probe cache is ok with
10412 		 * separate sd/ssd binaries
10413 		 * x86 platform can be an issue with both parallel
10414 		 * and fibre in 1 binary
10415 		 */
10416 		sd_scsi_clear_probe_cache();
10417 		return (ENXIO);
10418 	}
10419 
10420 	/*
10421 	 * The un_layer_count is to prevent another thread in specfs from
10422 	 * trying to detach the instance, which can happen when we are
10423 	 * called from a higher-layer driver instead of thru specfs.
10424 	 * This will not be needed when DDI provides a layered driver
10425 	 * interface that allows specfs to know that an instance is in
10426 	 * use by a layered driver & should not be detached.
10427 	 *
10428 	 * Note: the semantics for layered driver opens are exactly one
10429 	 * close for every open.
10430 	 */
10431 	if (otyp == OTYP_LYR) {
10432 		un->un_layer_count++;
10433 	}
10434 
10435 	/*
10436 	 * Keep a count of the current # of opens in progress. This is because
10437 	 * some layered drivers try to call us as a regular open. This can
10438 	 * cause problems that we cannot prevent, however by keeping this count
10439 	 * we can at least keep our open and detach routines from racing against
10440 	 * each other under such conditions.
10441 	 */
10442 	un->un_opens_in_progress++;
10443 	mutex_exit(&sd_detach_mutex);
10444 
10445 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
10446 	part	 = SDPART(dev);
10447 	partmask = 1 << part;
10448 
10449 	/*
10450 	 * We use a semaphore here in order to serialize
10451 	 * open and close requests on the device.
10452 	 */
10453 	sema_p(&un->un_semoclose);
10454 
10455 	mutex_enter(SD_MUTEX(un));
10456 
10457 	/*
10458 	 * All device accesses go thru sdstrategy() where we check
10459 	 * on suspend status but there could be a scsi_poll command,
10460 	 * which bypasses sdstrategy(), so we need to check pm
10461 	 * status.
10462 	 */
10463 
10464 	if (!nodelay) {
10465 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10466 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10467 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10468 		}
10469 
10470 		mutex_exit(SD_MUTEX(un));
10471 		if (sd_pm_entry(un) != DDI_SUCCESS) {
10472 			rval = EIO;
10473 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
10474 			    "sdopen: sd_pm_entry failed\n");
10475 			goto open_failed_with_pm;
10476 		}
10477 		mutex_enter(SD_MUTEX(un));
10478 	}
10479 
10480 	/* check for previous exclusive open */
10481 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
10482 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10483 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
10484 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
10485 
10486 	if (un->un_exclopen & (partmask)) {
10487 		goto excl_open_fail;
10488 	}
10489 
10490 	if (flag & FEXCL) {
10491 		int i;
10492 		if (un->un_ocmap.lyropen[part]) {
10493 			goto excl_open_fail;
10494 		}
10495 		for (i = 0; i < (OTYPCNT - 1); i++) {
10496 			if (un->un_ocmap.regopen[i] & (partmask)) {
10497 				goto excl_open_fail;
10498 			}
10499 		}
10500 	}
10501 
10502 	/*
10503 	 * Check the write permission if this is a removable media device,
10504 	 * NDELAY has not been set, and writable permission is requested.
10505 	 *
10506 	 * Note: If NDELAY was set and this is write-protected media the WRITE
10507 	 * attempt will fail with EIO as part of the I/O processing. This is a
10508 	 * more permissive implementation that allows the open to succeed and
10509 	 * WRITE attempts to fail when appropriate.
10510 	 */
10511 	if (un->un_f_chk_wp_open) {
10512 		if ((flag & FWRITE) && (!nodelay)) {
10513 			mutex_exit(SD_MUTEX(un));
10514 			/*
10515 			 * Defer the check for write permission on writable
10516 			 * DVD drive till sdstrategy and will not fail open even
10517 			 * if FWRITE is set as the device can be writable
10518 			 * depending upon the media and the media can change
10519 			 * after the call to open().
10520 			 */
10521 			if (un->un_f_dvdram_writable_device == FALSE) {
10522 				if (ISCD(un) || sr_check_wp(dev)) {
10523 				rval = EROFS;
10524 				mutex_enter(SD_MUTEX(un));
10525 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10526 				    "write to cd or write protected media\n");
10527 				goto open_fail;
10528 				}
10529 			}
10530 			mutex_enter(SD_MUTEX(un));
10531 		}
10532 	}
10533 
10534 	/*
10535 	 * If opening in NDELAY/NONBLOCK mode, just return.
10536 	 * Check if disk is ready and has a valid geometry later.
10537 	 */
10538 	if (!nodelay) {
10539 		mutex_exit(SD_MUTEX(un));
10540 		rval = sd_ready_and_valid(un);
10541 		mutex_enter(SD_MUTEX(un));
10542 		/*
10543 		 * Fail if device is not ready or if the number of disk
10544 		 * blocks is zero or negative for non CD devices.
10545 		 */
10546 		if ((rval != SD_READY_VALID) ||
10547 		    (!ISCD(un) && un->un_map[part].dkl_nblk <= 0)) {
10548 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
10549 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10550 			    "device not ready or invalid disk block value\n");
10551 			goto open_fail;
10552 		}
10553 #if defined(__i386) || defined(__amd64)
10554 	} else {
10555 		uchar_t *cp;
10556 		/*
10557 		 * x86 requires special nodelay handling, so that p0 is
10558 		 * always defined and accessible.
10559 		 * Invalidate geometry only if device is not already open.
10560 		 */
10561 		cp = &un->un_ocmap.chkd[0];
10562 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10563 			if (*cp != (uchar_t)0) {
10564 			    break;
10565 			}
10566 			cp++;
10567 		}
10568 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10569 			un->un_f_geometry_is_valid = FALSE;
10570 		}
10571 
10572 #endif
10573 	}
10574 
10575 	if (otyp == OTYP_LYR) {
10576 		un->un_ocmap.lyropen[part]++;
10577 	} else {
10578 		un->un_ocmap.regopen[otyp] |= partmask;
10579 	}
10580 
10581 	/* Set up open and exclusive open flags */
10582 	if (flag & FEXCL) {
10583 		un->un_exclopen |= (partmask);
10584 	}
10585 
10586 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10587 	    "open of part %d type %d\n", part, otyp);
10588 
10589 	mutex_exit(SD_MUTEX(un));
10590 	if (!nodelay) {
10591 		sd_pm_exit(un);
10592 	}
10593 
10594 	sema_v(&un->un_semoclose);
10595 
10596 	mutex_enter(&sd_detach_mutex);
10597 	un->un_opens_in_progress--;
10598 	mutex_exit(&sd_detach_mutex);
10599 
10600 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
10601 	return (DDI_SUCCESS);
10602 
10603 excl_open_fail:
10604 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
10605 	rval = EBUSY;
10606 
10607 open_fail:
10608 	mutex_exit(SD_MUTEX(un));
10609 
10610 	/*
10611 	 * On a failed open we must exit the pm management.
10612 	 */
10613 	if (!nodelay) {
10614 		sd_pm_exit(un);
10615 	}
10616 open_failed_with_pm:
10617 	sema_v(&un->un_semoclose);
10618 
10619 	mutex_enter(&sd_detach_mutex);
10620 	un->un_opens_in_progress--;
10621 	if (otyp == OTYP_LYR) {
10622 		un->un_layer_count--;
10623 	}
10624 	mutex_exit(&sd_detach_mutex);
10625 
10626 	return (rval);
10627 }
10628 
10629 
10630 /*
10631  *    Function: sdclose
10632  *
10633  * Description: Driver's close(9e) entry point function.
10634  *
10635  *   Arguments: dev    - device number
10636  *		flag   - file status flag, informational only
10637  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10638  *		cred_p - user credential pointer
10639  *
10640  * Return Code: ENXIO
10641  *
10642  *     Context: Kernel thread context
10643  */
10644 /* ARGSUSED */
10645 static int
10646 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
10647 {
10648 	struct sd_lun	*un;
10649 	uchar_t		*cp;
10650 	int		part;
10651 	int		nodelay;
10652 	int		rval = 0;
10653 
10654 	/* Validate the open type */
10655 	if (otyp >= OTYPCNT) {
10656 		return (ENXIO);
10657 	}
10658 
10659 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10660 		return (ENXIO);
10661 	}
10662 
10663 	part = SDPART(dev);
10664 	nodelay = flag & (FNDELAY | FNONBLOCK);
10665 
10666 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10667 	    "sdclose: close of part %d type %d\n", part, otyp);
10668 
10669 	/*
10670 	 * We use a semaphore here in order to serialize
10671 	 * open and close requests on the device.
10672 	 */
10673 	sema_p(&un->un_semoclose);
10674 
10675 	mutex_enter(SD_MUTEX(un));
10676 
10677 	/* Don't proceed if power is being changed. */
10678 	while (un->un_state == SD_STATE_PM_CHANGING) {
10679 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10680 	}
10681 
10682 	if (un->un_exclopen & (1 << part)) {
10683 		un->un_exclopen &= ~(1 << part);
10684 	}
10685 
10686 	/* Update the open partition map */
10687 	if (otyp == OTYP_LYR) {
10688 		un->un_ocmap.lyropen[part] -= 1;
10689 	} else {
10690 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
10691 	}
10692 
10693 	cp = &un->un_ocmap.chkd[0];
10694 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10695 		if (*cp != NULL) {
10696 			break;
10697 		}
10698 		cp++;
10699 	}
10700 
10701 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10702 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
10703 
10704 		/*
10705 		 * We avoid persistance upon the last close, and set
10706 		 * the throttle back to the maximum.
10707 		 */
10708 		un->un_throttle = un->un_saved_throttle;
10709 
10710 		if (un->un_state == SD_STATE_OFFLINE) {
10711 			if (un->un_f_is_fibre == FALSE) {
10712 				scsi_log(SD_DEVINFO(un), sd_label,
10713 					CE_WARN, "offline\n");
10714 			}
10715 			un->un_f_geometry_is_valid = FALSE;
10716 
10717 		} else {
10718 			/*
10719 			 * Flush any outstanding writes in NVRAM cache.
10720 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
10721 			 * cmd, it may not work for non-Pluto devices.
10722 			 * SYNCHRONIZE CACHE is not required for removables,
10723 			 * except DVD-RAM drives.
10724 			 *
10725 			 * Also note: because SYNCHRONIZE CACHE is currently
10726 			 * the only command issued here that requires the
10727 			 * drive be powered up, only do the power up before
10728 			 * sending the Sync Cache command. If additional
10729 			 * commands are added which require a powered up
10730 			 * drive, the following sequence may have to change.
10731 			 *
10732 			 * And finally, note that parallel SCSI on SPARC
10733 			 * only issues a Sync Cache to DVD-RAM, a newly
10734 			 * supported device.
10735 			 */
10736 #if defined(__i386) || defined(__amd64)
10737 			if (un->un_f_sync_cache_supported ||
10738 			    un->un_f_dvdram_writable_device == TRUE) {
10739 #else
10740 			if (un->un_f_dvdram_writable_device == TRUE) {
10741 #endif
10742 				mutex_exit(SD_MUTEX(un));
10743 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10744 					rval =
10745 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
10746 					    NULL);
10747 					/* ignore error if not supported */
10748 					if (rval == ENOTSUP) {
10749 						rval = 0;
10750 					} else if (rval != 0) {
10751 						rval = EIO;
10752 					}
10753 					sd_pm_exit(un);
10754 				} else {
10755 					rval = EIO;
10756 				}
10757 				mutex_enter(SD_MUTEX(un));
10758 			}
10759 
10760 			/*
10761 			 * For devices which supports DOOR_LOCK, send an ALLOW
10762 			 * MEDIA REMOVAL command, but don't get upset if it
10763 			 * fails. We need to raise the power of the drive before
10764 			 * we can call sd_send_scsi_DOORLOCK()
10765 			 */
10766 			if (un->un_f_doorlock_supported) {
10767 				mutex_exit(SD_MUTEX(un));
10768 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10769 					rval = sd_send_scsi_DOORLOCK(un,
10770 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
10771 
10772 					sd_pm_exit(un);
10773 					if (ISCD(un) && (rval != 0) &&
10774 					    (nodelay != 0)) {
10775 						rval = ENXIO;
10776 					}
10777 				} else {
10778 					rval = EIO;
10779 				}
10780 				mutex_enter(SD_MUTEX(un));
10781 			}
10782 
10783 			/*
10784 			 * If a device has removable media, invalidate all
10785 			 * parameters related to media, such as geometry,
10786 			 * blocksize, and blockcount.
10787 			 */
10788 			if (un->un_f_has_removable_media) {
10789 				sr_ejected(un);
10790 			}
10791 
10792 			/*
10793 			 * Destroy the cache (if it exists) which was
10794 			 * allocated for the write maps since this is
10795 			 * the last close for this media.
10796 			 */
10797 			if (un->un_wm_cache) {
10798 				/*
10799 				 * Check if there are pending commands.
10800 				 * and if there are give a warning and
10801 				 * do not destroy the cache.
10802 				 */
10803 				if (un->un_ncmds_in_driver > 0) {
10804 					scsi_log(SD_DEVINFO(un),
10805 					    sd_label, CE_WARN,
10806 					    "Unable to clean up memory "
10807 					    "because of pending I/O\n");
10808 				} else {
10809 					kmem_cache_destroy(
10810 					    un->un_wm_cache);
10811 					un->un_wm_cache = NULL;
10812 				}
10813 			}
10814 		}
10815 	}
10816 
10817 	mutex_exit(SD_MUTEX(un));
10818 	sema_v(&un->un_semoclose);
10819 
10820 	if (otyp == OTYP_LYR) {
10821 		mutex_enter(&sd_detach_mutex);
10822 		/*
10823 		 * The detach routine may run when the layer count
10824 		 * drops to zero.
10825 		 */
10826 		un->un_layer_count--;
10827 		mutex_exit(&sd_detach_mutex);
10828 	}
10829 
10830 	return (rval);
10831 }
10832 
10833 
10834 /*
10835  *    Function: sd_ready_and_valid
10836  *
10837  * Description: Test if device is ready and has a valid geometry.
10838  *
10839  *   Arguments: dev - device number
10840  *		un  - driver soft state (unit) structure
10841  *
10842  * Return Code: SD_READY_VALID		ready and valid label
10843  *		SD_READY_NOT_VALID	ready, geom ops never applicable
10844  *		SD_NOT_READY_VALID	not ready, no label
10845  *		SD_RESERVED_BY_OTHERS	reservation conflict
10846  *
10847  *     Context: Never called at interrupt context.
10848  */
10849 
10850 static int
10851 sd_ready_and_valid(struct sd_lun *un)
10852 {
10853 	struct sd_errstats	*stp;
10854 	uint64_t		capacity;
10855 	uint_t			lbasize;
10856 	int			rval = SD_READY_VALID;
10857 	char			name_str[48];
10858 
10859 	ASSERT(un != NULL);
10860 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10861 
10862 	mutex_enter(SD_MUTEX(un));
10863 	/*
10864 	 * If a device has removable media, we must check if media is
10865 	 * ready when checking if this device is ready and valid.
10866 	 */
10867 	if (un->un_f_has_removable_media) {
10868 		mutex_exit(SD_MUTEX(un));
10869 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
10870 			rval = SD_NOT_READY_VALID;
10871 			mutex_enter(SD_MUTEX(un));
10872 			goto done;
10873 		}
10874 
10875 		mutex_enter(SD_MUTEX(un));
10876 		if ((un->un_f_geometry_is_valid == FALSE) ||
10877 		    (un->un_f_blockcount_is_valid == FALSE) ||
10878 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
10879 
10880 			/* capacity has to be read every open. */
10881 			mutex_exit(SD_MUTEX(un));
10882 			if (sd_send_scsi_READ_CAPACITY(un, &capacity,
10883 			    &lbasize, SD_PATH_DIRECT) != 0) {
10884 				mutex_enter(SD_MUTEX(un));
10885 				un->un_f_geometry_is_valid = FALSE;
10886 				rval = SD_NOT_READY_VALID;
10887 				goto done;
10888 			} else {
10889 				mutex_enter(SD_MUTEX(un));
10890 				sd_update_block_info(un, lbasize, capacity);
10891 			}
10892 		}
10893 
10894 		/*
10895 		 * Check if the media in the device is writable or not.
10896 		 */
10897 		if ((un->un_f_geometry_is_valid == FALSE) && ISCD(un)) {
10898 			sd_check_for_writable_cd(un);
10899 		}
10900 
10901 	} else {
10902 		/*
10903 		 * Do a test unit ready to clear any unit attention from non-cd
10904 		 * devices.
10905 		 */
10906 		mutex_exit(SD_MUTEX(un));
10907 		(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
10908 		mutex_enter(SD_MUTEX(un));
10909 	}
10910 
10911 
10912 	/*
10913 	 * If this is a non 512 block device, allocate space for
10914 	 * the wmap cache. This is being done here since every time
10915 	 * a media is changed this routine will be called and the
10916 	 * block size is a function of media rather than device.
10917 	 */
10918 	if (un->un_f_non_devbsize_supported && NOT_DEVBSIZE(un)) {
10919 		if (!(un->un_wm_cache)) {
10920 			(void) snprintf(name_str, sizeof (name_str),
10921 			    "%s%d_cache",
10922 			    ddi_driver_name(SD_DEVINFO(un)),
10923 			    ddi_get_instance(SD_DEVINFO(un)));
10924 			un->un_wm_cache = kmem_cache_create(
10925 			    name_str, sizeof (struct sd_w_map),
10926 			    8, sd_wm_cache_constructor,
10927 			    sd_wm_cache_destructor, NULL,
10928 			    (void *)un, NULL, 0);
10929 			if (!(un->un_wm_cache)) {
10930 					rval = ENOMEM;
10931 					goto done;
10932 			}
10933 		}
10934 	}
10935 
10936 	if (un->un_state == SD_STATE_NORMAL) {
10937 		/*
10938 		 * If the target is not yet ready here (defined by a TUR
10939 		 * failure), invalidate the geometry and print an 'offline'
10940 		 * message. This is a legacy message, as the state of the
10941 		 * target is not actually changed to SD_STATE_OFFLINE.
10942 		 *
10943 		 * If the TUR fails for EACCES (Reservation Conflict),
10944 		 * SD_RESERVED_BY_OTHERS will be returned to indicate
10945 		 * reservation conflict. If the TUR fails for other
10946 		 * reasons, SD_NOT_READY_VALID will be returned.
10947 		 */
10948 		int err;
10949 
10950 		mutex_exit(SD_MUTEX(un));
10951 		err = sd_send_scsi_TEST_UNIT_READY(un, 0);
10952 		mutex_enter(SD_MUTEX(un));
10953 
10954 		if (err != 0) {
10955 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10956 			    "offline or reservation conflict\n");
10957 			un->un_f_geometry_is_valid = FALSE;
10958 			if (err == EACCES) {
10959 				rval = SD_RESERVED_BY_OTHERS;
10960 			} else {
10961 				rval = SD_NOT_READY_VALID;
10962 			}
10963 			goto done;
10964 		}
10965 	}
10966 
10967 	if (un->un_f_format_in_progress == FALSE) {
10968 		/*
10969 		 * Note: sd_validate_geometry may return TRUE, but that does
10970 		 * not necessarily mean un_f_geometry_is_valid == TRUE!
10971 		 */
10972 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
10973 		if (rval == ENOTSUP) {
10974 			if (un->un_f_geometry_is_valid == TRUE)
10975 				rval = 0;
10976 			else {
10977 				rval = SD_READY_NOT_VALID;
10978 				goto done;
10979 			}
10980 		}
10981 		if (rval != 0) {
10982 			/*
10983 			 * We don't check the validity of geometry for
10984 			 * CDROMs. Also we assume we have a good label
10985 			 * even if sd_validate_geometry returned ENOMEM.
10986 			 */
10987 			if (!ISCD(un) && rval != ENOMEM) {
10988 				rval = SD_NOT_READY_VALID;
10989 				goto done;
10990 			}
10991 		}
10992 	}
10993 
10994 	/*
10995 	 * If this device supports DOOR_LOCK command, try and send
10996 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
10997 	 * if it fails. For a CD, however, it is an error
10998 	 */
10999 	if (un->un_f_doorlock_supported) {
11000 		mutex_exit(SD_MUTEX(un));
11001 		if ((sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
11002 		    SD_PATH_DIRECT) != 0) && ISCD(un)) {
11003 			rval = SD_NOT_READY_VALID;
11004 			mutex_enter(SD_MUTEX(un));
11005 			goto done;
11006 		}
11007 		mutex_enter(SD_MUTEX(un));
11008 	}
11009 
11010 	/* The state has changed, inform the media watch routines */
11011 	un->un_mediastate = DKIO_INSERTED;
11012 	cv_broadcast(&un->un_state_cv);
11013 	rval = SD_READY_VALID;
11014 
11015 done:
11016 
11017 	/*
11018 	 * Initialize the capacity kstat value, if no media previously
11019 	 * (capacity kstat is 0) and a media has been inserted
11020 	 * (un_blockcount > 0).
11021 	 */
11022 	if (un->un_errstats != NULL) {
11023 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
11024 		if ((stp->sd_capacity.value.ui64 == 0) &&
11025 		    (un->un_f_blockcount_is_valid == TRUE)) {
11026 			stp->sd_capacity.value.ui64 =
11027 			    (uint64_t)((uint64_t)un->un_blockcount *
11028 			    un->un_sys_blocksize);
11029 		}
11030 	}
11031 
11032 	mutex_exit(SD_MUTEX(un));
11033 	return (rval);
11034 }
11035 
11036 
11037 /*
11038  *    Function: sdmin
11039  *
11040  * Description: Routine to limit the size of a data transfer. Used in
11041  *		conjunction with physio(9F).
11042  *
11043  *   Arguments: bp - pointer to the indicated buf(9S) struct.
11044  *
11045  *     Context: Kernel thread context.
11046  */
11047 
11048 static void
11049 sdmin(struct buf *bp)
11050 {
11051 	struct sd_lun	*un;
11052 	int		instance;
11053 
11054 	instance = SDUNIT(bp->b_edev);
11055 
11056 	un = ddi_get_soft_state(sd_state, instance);
11057 	ASSERT(un != NULL);
11058 
11059 	if (bp->b_bcount > un->un_max_xfer_size) {
11060 		bp->b_bcount = un->un_max_xfer_size;
11061 	}
11062 }
11063 
11064 
11065 /*
11066  *    Function: sdread
11067  *
11068  * Description: Driver's read(9e) entry point function.
11069  *
11070  *   Arguments: dev   - device number
11071  *		uio   - structure pointer describing where data is to be stored
11072  *			in user's space
11073  *		cred_p  - user credential pointer
11074  *
11075  * Return Code: ENXIO
11076  *		EIO
11077  *		EINVAL
11078  *		value returned by physio
11079  *
11080  *     Context: Kernel thread context.
11081  */
11082 /* ARGSUSED */
11083 static int
11084 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
11085 {
11086 	struct sd_lun	*un = NULL;
11087 	int		secmask;
11088 	int		err;
11089 
11090 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11091 		return (ENXIO);
11092 	}
11093 
11094 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11095 
11096 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11097 		mutex_enter(SD_MUTEX(un));
11098 		/*
11099 		 * Because the call to sd_ready_and_valid will issue I/O we
11100 		 * must wait here if either the device is suspended or
11101 		 * if it's power level is changing.
11102 		 */
11103 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11104 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11105 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11106 		}
11107 		un->un_ncmds_in_driver++;
11108 		mutex_exit(SD_MUTEX(un));
11109 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11110 			mutex_enter(SD_MUTEX(un));
11111 			un->un_ncmds_in_driver--;
11112 			ASSERT(un->un_ncmds_in_driver >= 0);
11113 			mutex_exit(SD_MUTEX(un));
11114 			return (EIO);
11115 		}
11116 		mutex_enter(SD_MUTEX(un));
11117 		un->un_ncmds_in_driver--;
11118 		ASSERT(un->un_ncmds_in_driver >= 0);
11119 		mutex_exit(SD_MUTEX(un));
11120 	}
11121 
11122 	/*
11123 	 * Read requests are restricted to multiples of the system block size.
11124 	 */
11125 	secmask = un->un_sys_blocksize - 1;
11126 
11127 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11128 		SD_ERROR(SD_LOG_READ_WRITE, un,
11129 		    "sdread: file offset not modulo %d\n",
11130 		    un->un_sys_blocksize);
11131 		err = EINVAL;
11132 	} else if (uio->uio_iov->iov_len & (secmask)) {
11133 		SD_ERROR(SD_LOG_READ_WRITE, un,
11134 		    "sdread: transfer length not modulo %d\n",
11135 		    un->un_sys_blocksize);
11136 		err = EINVAL;
11137 	} else {
11138 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
11139 	}
11140 	return (err);
11141 }
11142 
11143 
11144 /*
11145  *    Function: sdwrite
11146  *
11147  * Description: Driver's write(9e) entry point function.
11148  *
11149  *   Arguments: dev   - device number
11150  *		uio   - structure pointer describing where data is stored in
11151  *			user's space
11152  *		cred_p  - user credential pointer
11153  *
11154  * Return Code: ENXIO
11155  *		EIO
11156  *		EINVAL
11157  *		value returned by physio
11158  *
11159  *     Context: Kernel thread context.
11160  */
11161 /* ARGSUSED */
11162 static int
11163 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
11164 {
11165 	struct sd_lun	*un = NULL;
11166 	int		secmask;
11167 	int		err;
11168 
11169 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11170 		return (ENXIO);
11171 	}
11172 
11173 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11174 
11175 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11176 		mutex_enter(SD_MUTEX(un));
11177 		/*
11178 		 * Because the call to sd_ready_and_valid will issue I/O we
11179 		 * must wait here if either the device is suspended or
11180 		 * if it's power level is changing.
11181 		 */
11182 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11183 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11184 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11185 		}
11186 		un->un_ncmds_in_driver++;
11187 		mutex_exit(SD_MUTEX(un));
11188 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11189 			mutex_enter(SD_MUTEX(un));
11190 			un->un_ncmds_in_driver--;
11191 			ASSERT(un->un_ncmds_in_driver >= 0);
11192 			mutex_exit(SD_MUTEX(un));
11193 			return (EIO);
11194 		}
11195 		mutex_enter(SD_MUTEX(un));
11196 		un->un_ncmds_in_driver--;
11197 		ASSERT(un->un_ncmds_in_driver >= 0);
11198 		mutex_exit(SD_MUTEX(un));
11199 	}
11200 
11201 	/*
11202 	 * Write requests are restricted to multiples of the system block size.
11203 	 */
11204 	secmask = un->un_sys_blocksize - 1;
11205 
11206 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11207 		SD_ERROR(SD_LOG_READ_WRITE, un,
11208 		    "sdwrite: file offset not modulo %d\n",
11209 		    un->un_sys_blocksize);
11210 		err = EINVAL;
11211 	} else if (uio->uio_iov->iov_len & (secmask)) {
11212 		SD_ERROR(SD_LOG_READ_WRITE, un,
11213 		    "sdwrite: transfer length not modulo %d\n",
11214 		    un->un_sys_blocksize);
11215 		err = EINVAL;
11216 	} else {
11217 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
11218 	}
11219 	return (err);
11220 }
11221 
11222 
11223 /*
11224  *    Function: sdaread
11225  *
11226  * Description: Driver's aread(9e) entry point function.
11227  *
11228  *   Arguments: dev   - device number
11229  *		aio   - structure pointer describing where data is to be stored
11230  *		cred_p  - user credential pointer
11231  *
11232  * Return Code: ENXIO
11233  *		EIO
11234  *		EINVAL
11235  *		value returned by aphysio
11236  *
11237  *     Context: Kernel thread context.
11238  */
11239 /* ARGSUSED */
11240 static int
11241 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11242 {
11243 	struct sd_lun	*un = NULL;
11244 	struct uio	*uio = aio->aio_uio;
11245 	int		secmask;
11246 	int		err;
11247 
11248 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11249 		return (ENXIO);
11250 	}
11251 
11252 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11253 
11254 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11255 		mutex_enter(SD_MUTEX(un));
11256 		/*
11257 		 * Because the call to sd_ready_and_valid will issue I/O we
11258 		 * must wait here if either the device is suspended or
11259 		 * if it's power level is changing.
11260 		 */
11261 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11262 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11263 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11264 		}
11265 		un->un_ncmds_in_driver++;
11266 		mutex_exit(SD_MUTEX(un));
11267 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11268 			mutex_enter(SD_MUTEX(un));
11269 			un->un_ncmds_in_driver--;
11270 			ASSERT(un->un_ncmds_in_driver >= 0);
11271 			mutex_exit(SD_MUTEX(un));
11272 			return (EIO);
11273 		}
11274 		mutex_enter(SD_MUTEX(un));
11275 		un->un_ncmds_in_driver--;
11276 		ASSERT(un->un_ncmds_in_driver >= 0);
11277 		mutex_exit(SD_MUTEX(un));
11278 	}
11279 
11280 	/*
11281 	 * Read requests are restricted to multiples of the system block size.
11282 	 */
11283 	secmask = un->un_sys_blocksize - 1;
11284 
11285 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11286 		SD_ERROR(SD_LOG_READ_WRITE, un,
11287 		    "sdaread: file offset not modulo %d\n",
11288 		    un->un_sys_blocksize);
11289 		err = EINVAL;
11290 	} else if (uio->uio_iov->iov_len & (secmask)) {
11291 		SD_ERROR(SD_LOG_READ_WRITE, un,
11292 		    "sdaread: transfer length not modulo %d\n",
11293 		    un->un_sys_blocksize);
11294 		err = EINVAL;
11295 	} else {
11296 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
11297 	}
11298 	return (err);
11299 }
11300 
11301 
11302 /*
11303  *    Function: sdawrite
11304  *
11305  * Description: Driver's awrite(9e) entry point function.
11306  *
11307  *   Arguments: dev   - device number
11308  *		aio   - structure pointer describing where data is stored
11309  *		cred_p  - user credential pointer
11310  *
11311  * Return Code: ENXIO
11312  *		EIO
11313  *		EINVAL
11314  *		value returned by aphysio
11315  *
11316  *     Context: Kernel thread context.
11317  */
11318 /* ARGSUSED */
11319 static int
11320 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11321 {
11322 	struct sd_lun	*un = NULL;
11323 	struct uio	*uio = aio->aio_uio;
11324 	int		secmask;
11325 	int		err;
11326 
11327 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11328 		return (ENXIO);
11329 	}
11330 
11331 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11332 
11333 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11334 		mutex_enter(SD_MUTEX(un));
11335 		/*
11336 		 * Because the call to sd_ready_and_valid will issue I/O we
11337 		 * must wait here if either the device is suspended or
11338 		 * if it's power level is changing.
11339 		 */
11340 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11341 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11342 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11343 		}
11344 		un->un_ncmds_in_driver++;
11345 		mutex_exit(SD_MUTEX(un));
11346 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11347 			mutex_enter(SD_MUTEX(un));
11348 			un->un_ncmds_in_driver--;
11349 			ASSERT(un->un_ncmds_in_driver >= 0);
11350 			mutex_exit(SD_MUTEX(un));
11351 			return (EIO);
11352 		}
11353 		mutex_enter(SD_MUTEX(un));
11354 		un->un_ncmds_in_driver--;
11355 		ASSERT(un->un_ncmds_in_driver >= 0);
11356 		mutex_exit(SD_MUTEX(un));
11357 	}
11358 
11359 	/*
11360 	 * Write requests are restricted to multiples of the system block size.
11361 	 */
11362 	secmask = un->un_sys_blocksize - 1;
11363 
11364 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11365 		SD_ERROR(SD_LOG_READ_WRITE, un,
11366 		    "sdawrite: file offset not modulo %d\n",
11367 		    un->un_sys_blocksize);
11368 		err = EINVAL;
11369 	} else if (uio->uio_iov->iov_len & (secmask)) {
11370 		SD_ERROR(SD_LOG_READ_WRITE, un,
11371 		    "sdawrite: transfer length not modulo %d\n",
11372 		    un->un_sys_blocksize);
11373 		err = EINVAL;
11374 	} else {
11375 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
11376 	}
11377 	return (err);
11378 }
11379 
11380 
11381 
11382 
11383 
11384 /*
11385  * Driver IO processing follows the following sequence:
11386  *
11387  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
11388  *         |                |                     ^
11389  *         v                v                     |
11390  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
11391  *         |                |                     |                   |
11392  *         v                |                     |                   |
11393  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
11394  *         |                |                     ^                   ^
11395  *         v                v                     |                   |
11396  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
11397  *         |                |                     |                   |
11398  *     +---+                |                     +------------+      +-------+
11399  *     |                    |                                  |              |
11400  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11401  *     |                    v                                  |              |
11402  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
11403  *     |                    |                                  ^              |
11404  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11405  *     |                    v                                  |              |
11406  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
11407  *     |                    |                                  ^              |
11408  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11409  *     |                    v                                  |              |
11410  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
11411  *     |                    |                                  ^              |
11412  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
11413  *     |                    v                                  |              |
11414  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
11415  *     |                    |                                  ^              |
11416  *     |                    |                                  |              |
11417  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
11418  *                          |                           ^
11419  *                          v                           |
11420  *                   sd_core_iostart()                  |
11421  *                          |                           |
11422  *                          |                           +------>(*destroypkt)()
11423  *                          +-> sd_start_cmds() <-+     |           |
11424  *                          |                     |     |           v
11425  *                          |                     |     |  scsi_destroy_pkt(9F)
11426  *                          |                     |     |
11427  *                          +->(*initpkt)()       +- sdintr()
11428  *                          |  |                        |  |
11429  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
11430  *                          |  +-> scsi_setup_cdb(9F)   |
11431  *                          |                           |
11432  *                          +--> scsi_transport(9F)     |
11433  *                                     |                |
11434  *                                     +----> SCSA ---->+
11435  *
11436  *
11437  * This code is based upon the following presumtions:
11438  *
11439  *   - iostart and iodone functions operate on buf(9S) structures. These
11440  *     functions perform the necessary operations on the buf(9S) and pass
11441  *     them along to the next function in the chain by using the macros
11442  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
11443  *     (for iodone side functions).
11444  *
11445  *   - The iostart side functions may sleep. The iodone side functions
11446  *     are called under interrupt context and may NOT sleep. Therefore
11447  *     iodone side functions also may not call iostart side functions.
11448  *     (NOTE: iostart side functions should NOT sleep for memory, as
11449  *     this could result in deadlock.)
11450  *
11451  *   - An iostart side function may call its corresponding iodone side
11452  *     function directly (if necessary).
11453  *
11454  *   - In the event of an error, an iostart side function can return a buf(9S)
11455  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
11456  *     b_error in the usual way of course).
11457  *
11458  *   - The taskq mechanism may be used by the iodone side functions to dispatch
11459  *     requests to the iostart side functions.  The iostart side functions in
11460  *     this case would be called under the context of a taskq thread, so it's
11461  *     OK for them to block/sleep/spin in this case.
11462  *
11463  *   - iostart side functions may allocate "shadow" buf(9S) structs and
11464  *     pass them along to the next function in the chain.  The corresponding
11465  *     iodone side functions must coalesce the "shadow" bufs and return
11466  *     the "original" buf to the next higher layer.
11467  *
11468  *   - The b_private field of the buf(9S) struct holds a pointer to
11469  *     an sd_xbuf struct, which contains information needed to
11470  *     construct the scsi_pkt for the command.
11471  *
11472  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
11473  *     layer must acquire & release the SD_MUTEX(un) as needed.
11474  */
11475 
11476 
11477 /*
11478  * Create taskq for all targets in the system. This is created at
11479  * _init(9E) and destroyed at _fini(9E).
11480  *
11481  * Note: here we set the minalloc to a reasonably high number to ensure that
11482  * we will have an adequate supply of task entries available at interrupt time.
11483  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
11484  * sd_create_taskq().  Since we do not want to sleep for allocations at
11485  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
11486  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
11487  * requests any one instant in time.
11488  */
11489 #define	SD_TASKQ_NUMTHREADS	8
11490 #define	SD_TASKQ_MINALLOC	256
11491 #define	SD_TASKQ_MAXALLOC	256
11492 
11493 static taskq_t	*sd_tq = NULL;
11494 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
11495 
11496 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
11497 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
11498 
11499 /*
11500  * The following task queue is being created for the write part of
11501  * read-modify-write of non-512 block size devices.
11502  * Limit the number of threads to 1 for now. This number has been choosen
11503  * considering the fact that it applies only to dvd ram drives/MO drives
11504  * currently. Performance for which is not main criteria at this stage.
11505  * Note: It needs to be explored if we can use a single taskq in future
11506  */
11507 #define	SD_WMR_TASKQ_NUMTHREADS	1
11508 static taskq_t	*sd_wmr_tq = NULL;
11509 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
11510 
11511 /*
11512  *    Function: sd_taskq_create
11513  *
11514  * Description: Create taskq thread(s) and preallocate task entries
11515  *
11516  * Return Code: Returns a pointer to the allocated taskq_t.
11517  *
11518  *     Context: Can sleep. Requires blockable context.
11519  *
11520  *       Notes: - The taskq() facility currently is NOT part of the DDI.
11521  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
11522  *		- taskq_create() will block for memory, also it will panic
11523  *		  if it cannot create the requested number of threads.
11524  *		- Currently taskq_create() creates threads that cannot be
11525  *		  swapped.
11526  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
11527  *		  supply of taskq entries at interrupt time (ie, so that we
11528  *		  do not have to sleep for memory)
11529  */
11530 
11531 static void
11532 sd_taskq_create(void)
11533 {
11534 	char	taskq_name[TASKQ_NAMELEN];
11535 
11536 	ASSERT(sd_tq == NULL);
11537 	ASSERT(sd_wmr_tq == NULL);
11538 
11539 	(void) snprintf(taskq_name, sizeof (taskq_name),
11540 	    "%s_drv_taskq", sd_label);
11541 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
11542 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11543 	    TASKQ_PREPOPULATE));
11544 
11545 	(void) snprintf(taskq_name, sizeof (taskq_name),
11546 	    "%s_rmw_taskq", sd_label);
11547 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
11548 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11549 	    TASKQ_PREPOPULATE));
11550 }
11551 
11552 
11553 /*
11554  *    Function: sd_taskq_delete
11555  *
11556  * Description: Complementary cleanup routine for sd_taskq_create().
11557  *
11558  *     Context: Kernel thread context.
11559  */
11560 
11561 static void
11562 sd_taskq_delete(void)
11563 {
11564 	ASSERT(sd_tq != NULL);
11565 	ASSERT(sd_wmr_tq != NULL);
11566 	taskq_destroy(sd_tq);
11567 	taskq_destroy(sd_wmr_tq);
11568 	sd_tq = NULL;
11569 	sd_wmr_tq = NULL;
11570 }
11571 
11572 
11573 /*
11574  *    Function: sdstrategy
11575  *
11576  * Description: Driver's strategy (9E) entry point function.
11577  *
11578  *   Arguments: bp - pointer to buf(9S)
11579  *
11580  * Return Code: Always returns zero
11581  *
11582  *     Context: Kernel thread context.
11583  */
11584 
11585 static int
11586 sdstrategy(struct buf *bp)
11587 {
11588 	struct sd_lun *un;
11589 
11590 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11591 	if (un == NULL) {
11592 		bioerror(bp, EIO);
11593 		bp->b_resid = bp->b_bcount;
11594 		biodone(bp);
11595 		return (0);
11596 	}
11597 	/* As was done in the past, fail new cmds. if state is dumping. */
11598 	if (un->un_state == SD_STATE_DUMPING) {
11599 		bioerror(bp, ENXIO);
11600 		bp->b_resid = bp->b_bcount;
11601 		biodone(bp);
11602 		return (0);
11603 	}
11604 
11605 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11606 
11607 	/*
11608 	 * Commands may sneak in while we released the mutex in
11609 	 * DDI_SUSPEND, we should block new commands. However, old
11610 	 * commands that are still in the driver at this point should
11611 	 * still be allowed to drain.
11612 	 */
11613 	mutex_enter(SD_MUTEX(un));
11614 	/*
11615 	 * Must wait here if either the device is suspended or
11616 	 * if it's power level is changing.
11617 	 */
11618 	while ((un->un_state == SD_STATE_SUSPENDED) ||
11619 	    (un->un_state == SD_STATE_PM_CHANGING)) {
11620 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11621 	}
11622 
11623 	un->un_ncmds_in_driver++;
11624 
11625 	/*
11626 	 * atapi: Since we are running the CD for now in PIO mode we need to
11627 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11628 	 * the HBA's init_pkt routine.
11629 	 */
11630 	if (un->un_f_cfg_is_atapi == TRUE) {
11631 		mutex_exit(SD_MUTEX(un));
11632 		bp_mapin(bp);
11633 		mutex_enter(SD_MUTEX(un));
11634 	}
11635 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
11636 	    un->un_ncmds_in_driver);
11637 
11638 	mutex_exit(SD_MUTEX(un));
11639 
11640 	/*
11641 	 * This will (eventually) allocate the sd_xbuf area and
11642 	 * call sd_xbuf_strategy().  We just want to return the
11643 	 * result of ddi_xbuf_qstrategy so that we have an opt-
11644 	 * imized tail call which saves us a stack frame.
11645 	 */
11646 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
11647 }
11648 
11649 
11650 /*
11651  *    Function: sd_xbuf_strategy
11652  *
11653  * Description: Function for initiating IO operations via the
11654  *		ddi_xbuf_qstrategy() mechanism.
11655  *
11656  *     Context: Kernel thread context.
11657  */
11658 
11659 static void
11660 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
11661 {
11662 	struct sd_lun *un = arg;
11663 
11664 	ASSERT(bp != NULL);
11665 	ASSERT(xp != NULL);
11666 	ASSERT(un != NULL);
11667 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11668 
11669 	/*
11670 	 * Initialize the fields in the xbuf and save a pointer to the
11671 	 * xbuf in bp->b_private.
11672 	 */
11673 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
11674 
11675 	/* Send the buf down the iostart chain */
11676 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
11677 }
11678 
11679 
11680 /*
11681  *    Function: sd_xbuf_init
11682  *
11683  * Description: Prepare the given sd_xbuf struct for use.
11684  *
11685  *   Arguments: un - ptr to softstate
11686  *		bp - ptr to associated buf(9S)
11687  *		xp - ptr to associated sd_xbuf
11688  *		chain_type - IO chain type to use:
11689  *			SD_CHAIN_NULL
11690  *			SD_CHAIN_BUFIO
11691  *			SD_CHAIN_USCSI
11692  *			SD_CHAIN_DIRECT
11693  *			SD_CHAIN_DIRECT_PRIORITY
11694  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
11695  *			initialization; may be NULL if none.
11696  *
11697  *     Context: Kernel thread context
11698  */
11699 
11700 static void
11701 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
11702 	uchar_t chain_type, void *pktinfop)
11703 {
11704 	int index;
11705 
11706 	ASSERT(un != NULL);
11707 	ASSERT(bp != NULL);
11708 	ASSERT(xp != NULL);
11709 
11710 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
11711 	    bp, chain_type);
11712 
11713 	xp->xb_un	= un;
11714 	xp->xb_pktp	= NULL;
11715 	xp->xb_pktinfo	= pktinfop;
11716 	xp->xb_private	= bp->b_private;
11717 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
11718 
11719 	/*
11720 	 * Set up the iostart and iodone chain indexes in the xbuf, based
11721 	 * upon the specified chain type to use.
11722 	 */
11723 	switch (chain_type) {
11724 	case SD_CHAIN_NULL:
11725 		/*
11726 		 * Fall thru to just use the values for the buf type, even
11727 		 * tho for the NULL chain these values will never be used.
11728 		 */
11729 		/* FALLTHRU */
11730 	case SD_CHAIN_BUFIO:
11731 		index = un->un_buf_chain_type;
11732 		break;
11733 	case SD_CHAIN_USCSI:
11734 		index = un->un_uscsi_chain_type;
11735 		break;
11736 	case SD_CHAIN_DIRECT:
11737 		index = un->un_direct_chain_type;
11738 		break;
11739 	case SD_CHAIN_DIRECT_PRIORITY:
11740 		index = un->un_priority_chain_type;
11741 		break;
11742 	default:
11743 		/* We're really broken if we ever get here... */
11744 		panic("sd_xbuf_init: illegal chain type!");
11745 		/*NOTREACHED*/
11746 	}
11747 
11748 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
11749 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
11750 
11751 	/*
11752 	 * It might be a bit easier to simply bzero the entire xbuf above,
11753 	 * but it turns out that since we init a fair number of members anyway,
11754 	 * we save a fair number cycles by doing explicit assignment of zero.
11755 	 */
11756 	xp->xb_pkt_flags	= 0;
11757 	xp->xb_dma_resid	= 0;
11758 	xp->xb_retry_count	= 0;
11759 	xp->xb_victim_retry_count = 0;
11760 	xp->xb_ua_retry_count	= 0;
11761 	xp->xb_sense_bp		= NULL;
11762 	xp->xb_sense_status	= 0;
11763 	xp->xb_sense_state	= 0;
11764 	xp->xb_sense_resid	= 0;
11765 
11766 	bp->b_private	= xp;
11767 	bp->b_flags	&= ~(B_DONE | B_ERROR);
11768 	bp->b_resid	= 0;
11769 	bp->av_forw	= NULL;
11770 	bp->av_back	= NULL;
11771 	bioerror(bp, 0);
11772 
11773 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
11774 }
11775 
11776 
11777 /*
11778  *    Function: sd_uscsi_strategy
11779  *
11780  * Description: Wrapper for calling into the USCSI chain via physio(9F)
11781  *
11782  *   Arguments: bp - buf struct ptr
11783  *
11784  * Return Code: Always returns 0
11785  *
11786  *     Context: Kernel thread context
11787  */
11788 
11789 static int
11790 sd_uscsi_strategy(struct buf *bp)
11791 {
11792 	struct sd_lun		*un;
11793 	struct sd_uscsi_info	*uip;
11794 	struct sd_xbuf		*xp;
11795 	uchar_t			chain_type;
11796 
11797 	ASSERT(bp != NULL);
11798 
11799 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11800 	if (un == NULL) {
11801 		bioerror(bp, EIO);
11802 		bp->b_resid = bp->b_bcount;
11803 		biodone(bp);
11804 		return (0);
11805 	}
11806 
11807 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11808 
11809 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
11810 
11811 	mutex_enter(SD_MUTEX(un));
11812 	/*
11813 	 * atapi: Since we are running the CD for now in PIO mode we need to
11814 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11815 	 * the HBA's init_pkt routine.
11816 	 */
11817 	if (un->un_f_cfg_is_atapi == TRUE) {
11818 		mutex_exit(SD_MUTEX(un));
11819 		bp_mapin(bp);
11820 		mutex_enter(SD_MUTEX(un));
11821 	}
11822 	un->un_ncmds_in_driver++;
11823 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
11824 	    un->un_ncmds_in_driver);
11825 	mutex_exit(SD_MUTEX(un));
11826 
11827 	/*
11828 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
11829 	 */
11830 	ASSERT(bp->b_private != NULL);
11831 	uip = (struct sd_uscsi_info *)bp->b_private;
11832 
11833 	switch (uip->ui_flags) {
11834 	case SD_PATH_DIRECT:
11835 		chain_type = SD_CHAIN_DIRECT;
11836 		break;
11837 	case SD_PATH_DIRECT_PRIORITY:
11838 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
11839 		break;
11840 	default:
11841 		chain_type = SD_CHAIN_USCSI;
11842 		break;
11843 	}
11844 
11845 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
11846 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
11847 
11848 	/* Use the index obtained within xbuf_init */
11849 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
11850 
11851 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
11852 
11853 	return (0);
11854 }
11855 
11856 
11857 /*
11858  * These routines perform raw i/o operations.
11859  */
11860 /*ARGSUSED*/
11861 static void
11862 sduscsimin(struct buf *bp)
11863 {
11864 	/*
11865 	 * do not break up because the CDB count would then
11866 	 * be incorrect and data underruns would result (incomplete
11867 	 * read/writes which would be retried and then failed, see
11868 	 * sdintr().
11869 	 */
11870 }
11871 
11872 
11873 
11874 /*
11875  *    Function: sd_send_scsi_cmd
11876  *
11877  * Description: Runs a USCSI command for user (when called thru sdioctl),
11878  *		or for the driver
11879  *
11880  *   Arguments: dev - the dev_t for the device
11881  *		incmd - ptr to a valid uscsi_cmd struct
11882  *		cdbspace - UIO_USERSPACE or UIO_SYSSPACE
11883  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11884  *		rqbufspace - UIO_USERSPACE or UIO_SYSSPACE
11885  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11886  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11887  *			to use the USCSI "direct" chain and bypass the normal
11888  *			command waitq.
11889  *
11890  * Return Code: 0 -  successful completion of the given command
11891  *		EIO - scsi_reset() failed, or see biowait()/physio() codes.
11892  *		ENXIO  - soft state not found for specified dev
11893  *		EINVAL
11894  *		EFAULT - copyin/copyout error
11895  *		return code of biowait(9F) or physio(9F):
11896  *			EIO - IO error, caller may check incmd->uscsi_status
11897  *			ENXIO
11898  *			EACCES - reservation conflict
11899  *
11900  *     Context: Waits for command to complete. Can sleep.
11901  */
11902 
11903 static int
11904 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
11905 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
11906 	int path_flag)
11907 {
11908 	struct sd_uscsi_info	*uip;
11909 	struct uscsi_cmd	*uscmd;
11910 	struct sd_lun	*un;
11911 	struct buf	*bp;
11912 	int	rval;
11913 	int	flags;
11914 
11915 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
11916 	if (un == NULL) {
11917 		return (ENXIO);
11918 	}
11919 
11920 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11921 
11922 #ifdef SDDEBUG
11923 	switch (dataspace) {
11924 	case UIO_USERSPACE:
11925 		SD_TRACE(SD_LOG_IO, un,
11926 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_USERSPACE\n", un);
11927 		break;
11928 	case UIO_SYSSPACE:
11929 		SD_TRACE(SD_LOG_IO, un,
11930 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_SYSSPACE\n", un);
11931 		break;
11932 	default:
11933 		SD_TRACE(SD_LOG_IO, un,
11934 		    "sd_send_scsi_cmd: entry: un:0x%p UNEXPECTED SPACE\n", un);
11935 		break;
11936 	}
11937 #endif
11938 
11939 	/*
11940 	 * Perform resets directly; no need to generate a command to do it.
11941 	 */
11942 	if (incmd->uscsi_flags & (USCSI_RESET | USCSI_RESET_ALL)) {
11943 		flags = ((incmd->uscsi_flags & USCSI_RESET_ALL) != 0) ?
11944 		    RESET_ALL : RESET_TARGET;
11945 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: Issuing reset\n");
11946 		if (scsi_reset(SD_ADDRESS(un), flags) == 0) {
11947 			/* Reset attempt was unsuccessful */
11948 			SD_TRACE(SD_LOG_IO, un,
11949 			    "sd_send_scsi_cmd: reset: failure\n");
11950 			return (EIO);
11951 		}
11952 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: reset: success\n");
11953 		return (0);
11954 	}
11955 
11956 	/* Perfunctory sanity check... */
11957 	if (incmd->uscsi_cdblen <= 0) {
11958 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11959 		    "invalid uscsi_cdblen, returning EINVAL\n");
11960 		return (EINVAL);
11961 	} else if (incmd->uscsi_cdblen > un->un_max_hba_cdb) {
11962 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11963 		    "unsupported uscsi_cdblen, returning EINVAL\n");
11964 		return (EINVAL);
11965 	}
11966 
11967 	/*
11968 	 * In order to not worry about where the uscsi structure came from
11969 	 * (or where the cdb it points to came from) we're going to make
11970 	 * kmem_alloc'd copies of them here. This will also allow reference
11971 	 * to the data they contain long after this process has gone to
11972 	 * sleep and its kernel stack has been unmapped, etc.
11973 	 *
11974 	 * First get some memory for the uscsi_cmd struct and copy the
11975 	 * contents of the given uscsi_cmd struct into it.
11976 	 */
11977 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
11978 	bcopy(incmd, uscmd, sizeof (struct uscsi_cmd));
11979 
11980 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: uscsi_cmd",
11981 	    (uchar_t *)uscmd, sizeof (struct uscsi_cmd), SD_LOG_HEX);
11982 
11983 	/*
11984 	 * Now get some space for the CDB, and copy the given CDB into
11985 	 * it. Use ddi_copyin() in case the data is in user space.
11986 	 */
11987 	uscmd->uscsi_cdb = kmem_zalloc((size_t)incmd->uscsi_cdblen, KM_SLEEP);
11988 	flags = (cdbspace == UIO_SYSSPACE) ? FKIOCTL : 0;
11989 	if (ddi_copyin(incmd->uscsi_cdb, uscmd->uscsi_cdb,
11990 	    (uint_t)incmd->uscsi_cdblen, flags) != 0) {
11991 		kmem_free(uscmd->uscsi_cdb, (size_t)incmd->uscsi_cdblen);
11992 		kmem_free(uscmd, sizeof (struct uscsi_cmd));
11993 		return (EFAULT);
11994 	}
11995 
11996 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: CDB",
11997 	    (uchar_t *)uscmd->uscsi_cdb, incmd->uscsi_cdblen, SD_LOG_HEX);
11998 
11999 	bp = getrbuf(KM_SLEEP);
12000 
12001 	/*
12002 	 * Allocate an sd_uscsi_info struct and fill it with the info
12003 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
12004 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
12005 	 * since we allocate the buf here in this function, we do not
12006 	 * need to preserve the prior contents of b_private.
12007 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
12008 	 */
12009 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
12010 	uip->ui_flags = path_flag;
12011 	uip->ui_cmdp  = uscmd;
12012 	bp->b_private = uip;
12013 
12014 	/*
12015 	 * Initialize Request Sense buffering, if requested.
12016 	 */
12017 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
12018 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
12019 		/*
12020 		 * Here uscmd->uscsi_rqbuf currently points to the caller's
12021 		 * buffer, but we replace this with a kernel buffer that
12022 		 * we allocate to use with the sense data. The sense data
12023 		 * (if present) gets copied into this new buffer before the
12024 		 * command is completed.  Then we copy the sense data from
12025 		 * our allocated buf into the caller's buffer below. Note
12026 		 * that incmd->uscsi_rqbuf and incmd->uscsi_rqlen are used
12027 		 * below to perform the copy back to the caller's buf.
12028 		 */
12029 		uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
12030 		if (rqbufspace == UIO_USERSPACE) {
12031 			uscmd->uscsi_rqlen   = SENSE_LENGTH;
12032 			uscmd->uscsi_rqresid = SENSE_LENGTH;
12033 		} else {
12034 			uchar_t rlen = min(SENSE_LENGTH, uscmd->uscsi_rqlen);
12035 			uscmd->uscsi_rqlen   = rlen;
12036 			uscmd->uscsi_rqresid = rlen;
12037 		}
12038 	} else {
12039 		uscmd->uscsi_rqbuf = NULL;
12040 		uscmd->uscsi_rqlen   = 0;
12041 		uscmd->uscsi_rqresid = 0;
12042 	}
12043 
12044 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: rqbuf:0x%p  rqlen:%d\n",
12045 	    uscmd->uscsi_rqbuf, uscmd->uscsi_rqlen);
12046 
12047 	if (un->un_f_is_fibre == FALSE) {
12048 		/*
12049 		 * Force asynchronous mode, if necessary.  Doing this here
12050 		 * has the unfortunate effect of running other queued
12051 		 * commands async also, but since the main purpose of this
12052 		 * capability is downloading new drive firmware, we can
12053 		 * probably live with it.
12054 		 */
12055 		if ((uscmd->uscsi_flags & USCSI_ASYNC) != 0) {
12056 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
12057 				== 1) {
12058 				if (scsi_ifsetcap(SD_ADDRESS(un),
12059 					    "synchronous", 0, 1) == 1) {
12060 					SD_TRACE(SD_LOG_IO, un,
12061 					"sd_send_scsi_cmd: forced async ok\n");
12062 				} else {
12063 					SD_TRACE(SD_LOG_IO, un,
12064 					"sd_send_scsi_cmd:\
12065 					forced async failed\n");
12066 					rval = EINVAL;
12067 					goto done;
12068 				}
12069 			}
12070 		}
12071 
12072 		/*
12073 		 * Re-enable synchronous mode, if requested
12074 		 */
12075 		if (uscmd->uscsi_flags & USCSI_SYNC) {
12076 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
12077 				== 0) {
12078 				int i = scsi_ifsetcap(SD_ADDRESS(un),
12079 						"synchronous", 1, 1);
12080 				SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12081 					"re-enabled sync %s\n",
12082 					(i == 1) ? "ok" : "failed");
12083 			}
12084 		}
12085 	}
12086 
12087 	/*
12088 	 * Commands sent with priority are intended for error recovery
12089 	 * situations, and do not have retries performed.
12090 	 */
12091 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
12092 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
12093 	}
12094 
12095 	/*
12096 	 * If we're going to do actual I/O, let physio do all the right things
12097 	 */
12098 	if (uscmd->uscsi_buflen != 0) {
12099 		struct iovec	aiov;
12100 		struct uio	auio;
12101 		struct uio	*uio = &auio;
12102 
12103 		bzero(&auio, sizeof (struct uio));
12104 		bzero(&aiov, sizeof (struct iovec));
12105 		aiov.iov_base = uscmd->uscsi_bufaddr;
12106 		aiov.iov_len  = uscmd->uscsi_buflen;
12107 		uio->uio_iov  = &aiov;
12108 
12109 		uio->uio_iovcnt  = 1;
12110 		uio->uio_resid   = uscmd->uscsi_buflen;
12111 		uio->uio_segflg  = dataspace;
12112 
12113 		/*
12114 		 * physio() will block here until the command completes....
12115 		 */
12116 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling physio.\n");
12117 
12118 		rval = physio(sd_uscsi_strategy, bp, dev,
12119 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE),
12120 		    sduscsimin, uio);
12121 
12122 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12123 		    "returned from physio with 0x%x\n", rval);
12124 
12125 	} else {
12126 		/*
12127 		 * We have to mimic what physio would do here! Argh!
12128 		 */
12129 		bp->b_flags  = B_BUSY |
12130 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE);
12131 		bp->b_edev   = dev;
12132 		bp->b_dev    = cmpdev(dev);	/* maybe unnecessary? */
12133 		bp->b_bcount = 0;
12134 		bp->b_blkno  = 0;
12135 
12136 		SD_TRACE(SD_LOG_IO, un,
12137 		    "sd_send_scsi_cmd: calling sd_uscsi_strategy...\n");
12138 
12139 		(void) sd_uscsi_strategy(bp);
12140 
12141 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling biowait\n");
12142 
12143 		rval = biowait(bp);
12144 
12145 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12146 		    "returned from  biowait with 0x%x\n", rval);
12147 	}
12148 
12149 done:
12150 
12151 #ifdef SDDEBUG
12152 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12153 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
12154 	    uscmd->uscsi_status, uscmd->uscsi_resid);
12155 	if (uscmd->uscsi_bufaddr != NULL) {
12156 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12157 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
12158 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
12159 		if (dataspace == UIO_SYSSPACE) {
12160 			SD_DUMP_MEMORY(un, SD_LOG_IO,
12161 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
12162 			    uscmd->uscsi_buflen, SD_LOG_HEX);
12163 		}
12164 	}
12165 #endif
12166 
12167 	/*
12168 	 * Get the status and residual to return to the caller.
12169 	 */
12170 	incmd->uscsi_status = uscmd->uscsi_status;
12171 	incmd->uscsi_resid  = uscmd->uscsi_resid;
12172 
12173 	/*
12174 	 * If the caller wants sense data, copy back whatever sense data
12175 	 * we may have gotten, and update the relevant rqsense info.
12176 	 */
12177 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
12178 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
12179 
12180 		int rqlen = uscmd->uscsi_rqlen - uscmd->uscsi_rqresid;
12181 		rqlen = min(((int)incmd->uscsi_rqlen), rqlen);
12182 
12183 		/* Update the Request Sense status and resid */
12184 		incmd->uscsi_rqresid  = incmd->uscsi_rqlen - rqlen;
12185 		incmd->uscsi_rqstatus = uscmd->uscsi_rqstatus;
12186 
12187 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12188 		    "uscsi_rqstatus: 0x%02x  uscsi_rqresid:0x%x\n",
12189 		    incmd->uscsi_rqstatus, incmd->uscsi_rqresid);
12190 
12191 		/* Copy out the sense data for user processes */
12192 		if ((incmd->uscsi_rqbuf != NULL) && (rqlen != 0)) {
12193 			int flags =
12194 			    (rqbufspace == UIO_USERSPACE) ? 0 : FKIOCTL;
12195 			if (ddi_copyout(uscmd->uscsi_rqbuf, incmd->uscsi_rqbuf,
12196 			    rqlen, flags) != 0) {
12197 				rval = EFAULT;
12198 			}
12199 			/*
12200 			 * Note: Can't touch incmd->uscsi_rqbuf so use
12201 			 * uscmd->uscsi_rqbuf instead. They're the same.
12202 			 */
12203 			SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12204 			    "incmd->uscsi_rqbuf: 0x%p  rqlen:%d\n",
12205 			    incmd->uscsi_rqbuf, rqlen);
12206 			SD_DUMP_MEMORY(un, SD_LOG_IO, "rq",
12207 			    (uchar_t *)uscmd->uscsi_rqbuf, rqlen, SD_LOG_HEX);
12208 		}
12209 	}
12210 
12211 	/*
12212 	 * Free allocated resources and return; mapout the buf in case it was
12213 	 * mapped in by a lower layer.
12214 	 */
12215 	bp_mapout(bp);
12216 	freerbuf(bp);
12217 	kmem_free(uip, sizeof (struct sd_uscsi_info));
12218 	if (uscmd->uscsi_rqbuf != NULL) {
12219 		kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
12220 	}
12221 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
12222 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
12223 
12224 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: exit\n");
12225 
12226 	return (rval);
12227 }
12228 
12229 
12230 /*
12231  *    Function: sd_buf_iodone
12232  *
12233  * Description: Frees the sd_xbuf & returns the buf to its originator.
12234  *
12235  *     Context: May be called from interrupt context.
12236  */
12237 /* ARGSUSED */
12238 static void
12239 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
12240 {
12241 	struct sd_xbuf *xp;
12242 
12243 	ASSERT(un != NULL);
12244 	ASSERT(bp != NULL);
12245 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12246 
12247 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
12248 
12249 	xp = SD_GET_XBUF(bp);
12250 	ASSERT(xp != NULL);
12251 
12252 	mutex_enter(SD_MUTEX(un));
12253 
12254 	/*
12255 	 * Grab time when the cmd completed.
12256 	 * This is used for determining if the system has been
12257 	 * idle long enough to make it idle to the PM framework.
12258 	 * This is for lowering the overhead, and therefore improving
12259 	 * performance per I/O operation.
12260 	 */
12261 	un->un_pm_idle_time = ddi_get_time();
12262 
12263 	un->un_ncmds_in_driver--;
12264 	ASSERT(un->un_ncmds_in_driver >= 0);
12265 	SD_INFO(SD_LOG_IO, un, "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
12266 	    un->un_ncmds_in_driver);
12267 
12268 	mutex_exit(SD_MUTEX(un));
12269 
12270 	ddi_xbuf_done(bp, un->un_xbuf_attr);	/* xbuf is gone after this */
12271 	biodone(bp);				/* bp is gone after this */
12272 
12273 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
12274 }
12275 
12276 
12277 /*
12278  *    Function: sd_uscsi_iodone
12279  *
12280  * Description: Frees the sd_xbuf & returns the buf to its originator.
12281  *
12282  *     Context: May be called from interrupt context.
12283  */
12284 /* ARGSUSED */
12285 static void
12286 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12287 {
12288 	struct sd_xbuf *xp;
12289 
12290 	ASSERT(un != NULL);
12291 	ASSERT(bp != NULL);
12292 
12293 	xp = SD_GET_XBUF(bp);
12294 	ASSERT(xp != NULL);
12295 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12296 
12297 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
12298 
12299 	bp->b_private = xp->xb_private;
12300 
12301 	mutex_enter(SD_MUTEX(un));
12302 
12303 	/*
12304 	 * Grab time when the cmd completed.
12305 	 * This is used for determining if the system has been
12306 	 * idle long enough to make it idle to the PM framework.
12307 	 * This is for lowering the overhead, and therefore improving
12308 	 * performance per I/O operation.
12309 	 */
12310 	un->un_pm_idle_time = ddi_get_time();
12311 
12312 	un->un_ncmds_in_driver--;
12313 	ASSERT(un->un_ncmds_in_driver >= 0);
12314 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
12315 	    un->un_ncmds_in_driver);
12316 
12317 	mutex_exit(SD_MUTEX(un));
12318 
12319 	kmem_free(xp, sizeof (struct sd_xbuf));
12320 	biodone(bp);
12321 
12322 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
12323 }
12324 
12325 
12326 /*
12327  *    Function: sd_mapblockaddr_iostart
12328  *
12329  * Description: Verify request lies withing the partition limits for
12330  *		the indicated minor device.  Issue "overrun" buf if
12331  *		request would exceed partition range.  Converts
12332  *		partition-relative block address to absolute.
12333  *
12334  *     Context: Can sleep
12335  *
12336  *      Issues: This follows what the old code did, in terms of accessing
12337  *		some of the partition info in the unit struct without holding
12338  *		the mutext.  This is a general issue, if the partition info
12339  *		can be altered while IO is in progress... as soon as we send
12340  *		a buf, its partitioning can be invalid before it gets to the
12341  *		device.  Probably the right fix is to move partitioning out
12342  *		of the driver entirely.
12343  */
12344 
12345 static void
12346 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
12347 {
12348 	daddr_t	nblocks;	/* #blocks in the given partition */
12349 	daddr_t	blocknum;	/* Block number specified by the buf */
12350 	size_t	requested_nblocks;
12351 	size_t	available_nblocks;
12352 	int	partition;
12353 	diskaddr_t	partition_offset;
12354 	struct sd_xbuf *xp;
12355 
12356 
12357 	ASSERT(un != NULL);
12358 	ASSERT(bp != NULL);
12359 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12360 
12361 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12362 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
12363 
12364 	xp = SD_GET_XBUF(bp);
12365 	ASSERT(xp != NULL);
12366 
12367 	/*
12368 	 * If the geometry is not indicated as valid, attempt to access
12369 	 * the unit & verify the geometry/label. This can be the case for
12370 	 * removable-media devices, of if the device was opened in
12371 	 * NDELAY/NONBLOCK mode.
12372 	 */
12373 	if ((un->un_f_geometry_is_valid != TRUE) &&
12374 	    (sd_ready_and_valid(un) != SD_READY_VALID)) {
12375 		/*
12376 		 * For removable devices it is possible to start an I/O
12377 		 * without a media by opening the device in nodelay mode.
12378 		 * Also for writable CDs there can be many scenarios where
12379 		 * there is no geometry yet but volume manager is trying to
12380 		 * issue a read() just because it can see TOC on the CD. So
12381 		 * do not print a message for removables.
12382 		 */
12383 		if (!un->un_f_has_removable_media) {
12384 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12385 			    "i/o to invalid geometry\n");
12386 		}
12387 		bioerror(bp, EIO);
12388 		bp->b_resid = bp->b_bcount;
12389 		SD_BEGIN_IODONE(index, un, bp);
12390 		return;
12391 	}
12392 
12393 	partition = SDPART(bp->b_edev);
12394 
12395 	/* #blocks in partition */
12396 	nblocks = un->un_map[partition].dkl_nblk;    /* #blocks in partition */
12397 
12398 	/* Use of a local variable potentially improves performance slightly */
12399 	partition_offset = un->un_offset[partition];
12400 
12401 	/*
12402 	 * blocknum is the starting block number of the request. At this
12403 	 * point it is still relative to the start of the minor device.
12404 	 */
12405 	blocknum = xp->xb_blkno;
12406 
12407 	/*
12408 	 * Legacy: If the starting block number is one past the last block
12409 	 * in the partition, do not set B_ERROR in the buf.
12410 	 */
12411 	if (blocknum == nblocks)  {
12412 		goto error_exit;
12413 	}
12414 
12415 	/*
12416 	 * Confirm that the first block of the request lies within the
12417 	 * partition limits. Also the requested number of bytes must be
12418 	 * a multiple of the system block size.
12419 	 */
12420 	if ((blocknum < 0) || (blocknum >= nblocks) ||
12421 	    ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
12422 		bp->b_flags |= B_ERROR;
12423 		goto error_exit;
12424 	}
12425 
12426 	/*
12427 	 * If the requsted # blocks exceeds the available # blocks, that
12428 	 * is an overrun of the partition.
12429 	 */
12430 	requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
12431 	available_nblocks = (size_t)(nblocks - blocknum);
12432 	ASSERT(nblocks >= blocknum);
12433 
12434 	if (requested_nblocks > available_nblocks) {
12435 		/*
12436 		 * Allocate an "overrun" buf to allow the request to proceed
12437 		 * for the amount of space available in the partition. The
12438 		 * amount not transferred will be added into the b_resid
12439 		 * when the operation is complete. The overrun buf
12440 		 * replaces the original buf here, and the original buf
12441 		 * is saved inside the overrun buf, for later use.
12442 		 */
12443 		size_t resid = SD_SYSBLOCKS2BYTES(un,
12444 		    (offset_t)(requested_nblocks - available_nblocks));
12445 		size_t count = bp->b_bcount - resid;
12446 		/*
12447 		 * Note: count is an unsigned entity thus it'll NEVER
12448 		 * be less than 0 so ASSERT the original values are
12449 		 * correct.
12450 		 */
12451 		ASSERT(bp->b_bcount >= resid);
12452 
12453 		bp = sd_bioclone_alloc(bp, count, blocknum,
12454 			(int (*)(struct buf *)) sd_mapblockaddr_iodone);
12455 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
12456 		ASSERT(xp != NULL);
12457 	}
12458 
12459 	/* At this point there should be no residual for this buf. */
12460 	ASSERT(bp->b_resid == 0);
12461 
12462 	/* Convert the block number to an absolute address. */
12463 	xp->xb_blkno += partition_offset;
12464 
12465 	SD_NEXT_IOSTART(index, un, bp);
12466 
12467 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12468 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
12469 
12470 	return;
12471 
12472 error_exit:
12473 	bp->b_resid = bp->b_bcount;
12474 	SD_BEGIN_IODONE(index, un, bp);
12475 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12476 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
12477 }
12478 
12479 
12480 /*
12481  *    Function: sd_mapblockaddr_iodone
12482  *
12483  * Description: Completion-side processing for partition management.
12484  *
12485  *     Context: May be called under interrupt context
12486  */
12487 
12488 static void
12489 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
12490 {
12491 	/* int	partition; */	/* Not used, see below. */
12492 	ASSERT(un != NULL);
12493 	ASSERT(bp != NULL);
12494 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12495 
12496 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12497 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
12498 
12499 	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
12500 		/*
12501 		 * We have an "overrun" buf to deal with...
12502 		 */
12503 		struct sd_xbuf	*xp;
12504 		struct buf	*obp;	/* ptr to the original buf */
12505 
12506 		xp = SD_GET_XBUF(bp);
12507 		ASSERT(xp != NULL);
12508 
12509 		/* Retrieve the pointer to the original buf */
12510 		obp = (struct buf *)xp->xb_private;
12511 		ASSERT(obp != NULL);
12512 
12513 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
12514 		bioerror(obp, bp->b_error);
12515 
12516 		sd_bioclone_free(bp);
12517 
12518 		/*
12519 		 * Get back the original buf.
12520 		 * Note that since the restoration of xb_blkno below
12521 		 * was removed, the sd_xbuf is not needed.
12522 		 */
12523 		bp = obp;
12524 		/*
12525 		 * xp = SD_GET_XBUF(bp);
12526 		 * ASSERT(xp != NULL);
12527 		 */
12528 	}
12529 
12530 	/*
12531 	 * Convert sd->xb_blkno back to a minor-device relative value.
12532 	 * Note: this has been commented out, as it is not needed in the
12533 	 * current implementation of the driver (ie, since this function
12534 	 * is at the top of the layering chains, so the info will be
12535 	 * discarded) and it is in the "hot" IO path.
12536 	 *
12537 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
12538 	 * xp->xb_blkno -= un->un_offset[partition];
12539 	 */
12540 
12541 	SD_NEXT_IODONE(index, un, bp);
12542 
12543 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12544 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
12545 }
12546 
12547 
12548 /*
12549  *    Function: sd_mapblocksize_iostart
12550  *
12551  * Description: Convert between system block size (un->un_sys_blocksize)
12552  *		and target block size (un->un_tgt_blocksize).
12553  *
12554  *     Context: Can sleep to allocate resources.
12555  *
12556  * Assumptions: A higher layer has already performed any partition validation,
12557  *		and converted the xp->xb_blkno to an absolute value relative
12558  *		to the start of the device.
12559  *
12560  *		It is also assumed that the higher layer has implemented
12561  *		an "overrun" mechanism for the case where the request would
12562  *		read/write beyond the end of a partition.  In this case we
12563  *		assume (and ASSERT) that bp->b_resid == 0.
12564  *
12565  *		Note: The implementation for this routine assumes the target
12566  *		block size remains constant between allocation and transport.
12567  */
12568 
12569 static void
12570 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
12571 {
12572 	struct sd_mapblocksize_info	*bsp;
12573 	struct sd_xbuf			*xp;
12574 	offset_t first_byte;
12575 	daddr_t	start_block, end_block;
12576 	daddr_t	request_bytes;
12577 	ushort_t is_aligned = FALSE;
12578 
12579 	ASSERT(un != NULL);
12580 	ASSERT(bp != NULL);
12581 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12582 	ASSERT(bp->b_resid == 0);
12583 
12584 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12585 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
12586 
12587 	/*
12588 	 * For a non-writable CD, a write request is an error
12589 	 */
12590 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
12591 	    (un->un_f_mmc_writable_media == FALSE)) {
12592 		bioerror(bp, EIO);
12593 		bp->b_resid = bp->b_bcount;
12594 		SD_BEGIN_IODONE(index, un, bp);
12595 		return;
12596 	}
12597 
12598 	/*
12599 	 * We do not need a shadow buf if the device is using
12600 	 * un->un_sys_blocksize as its block size or if bcount == 0.
12601 	 * In this case there is no layer-private data block allocated.
12602 	 */
12603 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12604 	    (bp->b_bcount == 0)) {
12605 		goto done;
12606 	}
12607 
12608 #if defined(__i386) || defined(__amd64)
12609 	/* We do not support non-block-aligned transfers for ROD devices */
12610 	ASSERT(!ISROD(un));
12611 #endif
12612 
12613 	xp = SD_GET_XBUF(bp);
12614 	ASSERT(xp != NULL);
12615 
12616 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12617 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
12618 	    un->un_tgt_blocksize, un->un_sys_blocksize);
12619 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12620 	    "request start block:0x%x\n", xp->xb_blkno);
12621 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12622 	    "request len:0x%x\n", bp->b_bcount);
12623 
12624 	/*
12625 	 * Allocate the layer-private data area for the mapblocksize layer.
12626 	 * Layers are allowed to use the xp_private member of the sd_xbuf
12627 	 * struct to store the pointer to their layer-private data block, but
12628 	 * each layer also has the responsibility of restoring the prior
12629 	 * contents of xb_private before returning the buf/xbuf to the
12630 	 * higher layer that sent it.
12631 	 *
12632 	 * Here we save the prior contents of xp->xb_private into the
12633 	 * bsp->mbs_oprivate field of our layer-private data area. This value
12634 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
12635 	 * the layer-private area and returning the buf/xbuf to the layer
12636 	 * that sent it.
12637 	 *
12638 	 * Note that here we use kmem_zalloc for the allocation as there are
12639 	 * parts of the mapblocksize code that expect certain fields to be
12640 	 * zero unless explicitly set to a required value.
12641 	 */
12642 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12643 	bsp->mbs_oprivate = xp->xb_private;
12644 	xp->xb_private = bsp;
12645 
12646 	/*
12647 	 * This treats the data on the disk (target) as an array of bytes.
12648 	 * first_byte is the byte offset, from the beginning of the device,
12649 	 * to the location of the request. This is converted from a
12650 	 * un->un_sys_blocksize block address to a byte offset, and then back
12651 	 * to a block address based upon a un->un_tgt_blocksize block size.
12652 	 *
12653 	 * xp->xb_blkno should be absolute upon entry into this function,
12654 	 * but, but it is based upon partitions that use the "system"
12655 	 * block size. It must be adjusted to reflect the block size of
12656 	 * the target.
12657 	 *
12658 	 * Note that end_block is actually the block that follows the last
12659 	 * block of the request, but that's what is needed for the computation.
12660 	 */
12661 	first_byte  = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12662 	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
12663 	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
12664 	    un->un_tgt_blocksize;
12665 
12666 	/* request_bytes is rounded up to a multiple of the target block size */
12667 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
12668 
12669 	/*
12670 	 * See if the starting address of the request and the request
12671 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
12672 	 * then we do not need to allocate a shadow buf to handle the request.
12673 	 */
12674 	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
12675 	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
12676 		is_aligned = TRUE;
12677 	}
12678 
12679 	if ((bp->b_flags & B_READ) == 0) {
12680 		/*
12681 		 * Lock the range for a write operation. An aligned request is
12682 		 * considered a simple write; otherwise the request must be a
12683 		 * read-modify-write.
12684 		 */
12685 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
12686 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
12687 	}
12688 
12689 	/*
12690 	 * Alloc a shadow buf if the request is not aligned. Also, this is
12691 	 * where the READ command is generated for a read-modify-write. (The
12692 	 * write phase is deferred until after the read completes.)
12693 	 */
12694 	if (is_aligned == FALSE) {
12695 
12696 		struct sd_mapblocksize_info	*shadow_bsp;
12697 		struct sd_xbuf	*shadow_xp;
12698 		struct buf	*shadow_bp;
12699 
12700 		/*
12701 		 * Allocate the shadow buf and it associated xbuf. Note that
12702 		 * after this call the xb_blkno value in both the original
12703 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
12704 		 * same: absolute relative to the start of the device, and
12705 		 * adjusted for the target block size. The b_blkno in the
12706 		 * shadow buf will also be set to this value. We should never
12707 		 * change b_blkno in the original bp however.
12708 		 *
12709 		 * Note also that the shadow buf will always need to be a
12710 		 * READ command, regardless of whether the incoming command
12711 		 * is a READ or a WRITE.
12712 		 */
12713 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
12714 		    xp->xb_blkno,
12715 		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
12716 
12717 		shadow_xp = SD_GET_XBUF(shadow_bp);
12718 
12719 		/*
12720 		 * Allocate the layer-private data for the shadow buf.
12721 		 * (No need to preserve xb_private in the shadow xbuf.)
12722 		 */
12723 		shadow_xp->xb_private = shadow_bsp =
12724 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12725 
12726 		/*
12727 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
12728 		 * to figure out where the start of the user data is (based upon
12729 		 * the system block size) in the data returned by the READ
12730 		 * command (which will be based upon the target blocksize). Note
12731 		 * that this is only really used if the request is unaligned.
12732 		 */
12733 		bsp->mbs_copy_offset = (ssize_t)(first_byte -
12734 		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
12735 		ASSERT((bsp->mbs_copy_offset >= 0) &&
12736 		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
12737 
12738 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
12739 
12740 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
12741 
12742 		/* Transfer the wmap (if any) to the shadow buf */
12743 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
12744 		bsp->mbs_wmp = NULL;
12745 
12746 		/*
12747 		 * The shadow buf goes on from here in place of the
12748 		 * original buf.
12749 		 */
12750 		shadow_bsp->mbs_orig_bp = bp;
12751 		bp = shadow_bp;
12752 	}
12753 
12754 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12755 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
12756 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12757 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
12758 	    request_bytes);
12759 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12760 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
12761 
12762 done:
12763 	SD_NEXT_IOSTART(index, un, bp);
12764 
12765 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12766 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
12767 }
12768 
12769 
12770 /*
12771  *    Function: sd_mapblocksize_iodone
12772  *
12773  * Description: Completion side processing for block-size mapping.
12774  *
12775  *     Context: May be called under interrupt context
12776  */
12777 
12778 static void
12779 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
12780 {
12781 	struct sd_mapblocksize_info	*bsp;
12782 	struct sd_xbuf	*xp;
12783 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
12784 	struct buf	*orig_bp;	/* ptr to the original buf */
12785 	offset_t	shadow_end;
12786 	offset_t	request_end;
12787 	offset_t	shadow_start;
12788 	ssize_t		copy_offset;
12789 	size_t		copy_length;
12790 	size_t		shortfall;
12791 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
12792 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
12793 
12794 	ASSERT(un != NULL);
12795 	ASSERT(bp != NULL);
12796 
12797 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12798 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
12799 
12800 	/*
12801 	 * There is no shadow buf or layer-private data if the target is
12802 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
12803 	 */
12804 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12805 	    (bp->b_bcount == 0)) {
12806 		goto exit;
12807 	}
12808 
12809 	xp = SD_GET_XBUF(bp);
12810 	ASSERT(xp != NULL);
12811 
12812 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
12813 	bsp = xp->xb_private;
12814 
12815 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
12816 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
12817 
12818 	if (is_write) {
12819 		/*
12820 		 * For a WRITE request we must free up the block range that
12821 		 * we have locked up.  This holds regardless of whether this is
12822 		 * an aligned write request or a read-modify-write request.
12823 		 */
12824 		sd_range_unlock(un, bsp->mbs_wmp);
12825 		bsp->mbs_wmp = NULL;
12826 	}
12827 
12828 	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
12829 		/*
12830 		 * An aligned read or write command will have no shadow buf;
12831 		 * there is not much else to do with it.
12832 		 */
12833 		goto done;
12834 	}
12835 
12836 	orig_bp = bsp->mbs_orig_bp;
12837 	ASSERT(orig_bp != NULL);
12838 	orig_xp = SD_GET_XBUF(orig_bp);
12839 	ASSERT(orig_xp != NULL);
12840 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12841 
12842 	if (!is_write && has_wmap) {
12843 		/*
12844 		 * A READ with a wmap means this is the READ phase of a
12845 		 * read-modify-write. If an error occurred on the READ then
12846 		 * we do not proceed with the WRITE phase or copy any data.
12847 		 * Just release the write maps and return with an error.
12848 		 */
12849 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
12850 			orig_bp->b_resid = orig_bp->b_bcount;
12851 			bioerror(orig_bp, bp->b_error);
12852 			sd_range_unlock(un, bsp->mbs_wmp);
12853 			goto freebuf_done;
12854 		}
12855 	}
12856 
12857 	/*
12858 	 * Here is where we set up to copy the data from the shadow buf
12859 	 * into the space associated with the original buf.
12860 	 *
12861 	 * To deal with the conversion between block sizes, these
12862 	 * computations treat the data as an array of bytes, with the
12863 	 * first byte (byte 0) corresponding to the first byte in the
12864 	 * first block on the disk.
12865 	 */
12866 
12867 	/*
12868 	 * shadow_start and shadow_len indicate the location and size of
12869 	 * the data returned with the shadow IO request.
12870 	 */
12871 	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12872 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
12873 
12874 	/*
12875 	 * copy_offset gives the offset (in bytes) from the start of the first
12876 	 * block of the READ request to the beginning of the data.  We retrieve
12877 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
12878 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
12879 	 * data to be copied (in bytes).
12880 	 */
12881 	copy_offset  = bsp->mbs_copy_offset;
12882 	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
12883 	copy_length  = orig_bp->b_bcount;
12884 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
12885 
12886 	/*
12887 	 * Set up the resid and error fields of orig_bp as appropriate.
12888 	 */
12889 	if (shadow_end >= request_end) {
12890 		/* We got all the requested data; set resid to zero */
12891 		orig_bp->b_resid = 0;
12892 	} else {
12893 		/*
12894 		 * We failed to get enough data to fully satisfy the original
12895 		 * request. Just copy back whatever data we got and set
12896 		 * up the residual and error code as required.
12897 		 *
12898 		 * 'shortfall' is the amount by which the data received with the
12899 		 * shadow buf has "fallen short" of the requested amount.
12900 		 */
12901 		shortfall = (size_t)(request_end - shadow_end);
12902 
12903 		if (shortfall > orig_bp->b_bcount) {
12904 			/*
12905 			 * We did not get enough data to even partially
12906 			 * fulfill the original request.  The residual is
12907 			 * equal to the amount requested.
12908 			 */
12909 			orig_bp->b_resid = orig_bp->b_bcount;
12910 		} else {
12911 			/*
12912 			 * We did not get all the data that we requested
12913 			 * from the device, but we will try to return what
12914 			 * portion we did get.
12915 			 */
12916 			orig_bp->b_resid = shortfall;
12917 		}
12918 		ASSERT(copy_length >= orig_bp->b_resid);
12919 		copy_length  -= orig_bp->b_resid;
12920 	}
12921 
12922 	/* Propagate the error code from the shadow buf to the original buf */
12923 	bioerror(orig_bp, bp->b_error);
12924 
12925 	if (is_write) {
12926 		goto freebuf_done;	/* No data copying for a WRITE */
12927 	}
12928 
12929 	if (has_wmap) {
12930 		/*
12931 		 * This is a READ command from the READ phase of a
12932 		 * read-modify-write request. We have to copy the data given
12933 		 * by the user OVER the data returned by the READ command,
12934 		 * then convert the command from a READ to a WRITE and send
12935 		 * it back to the target.
12936 		 */
12937 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
12938 		    copy_length);
12939 
12940 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
12941 
12942 		/*
12943 		 * Dispatch the WRITE command to the taskq thread, which
12944 		 * will in turn send the command to the target. When the
12945 		 * WRITE command completes, we (sd_mapblocksize_iodone())
12946 		 * will get called again as part of the iodone chain
12947 		 * processing for it. Note that we will still be dealing
12948 		 * with the shadow buf at that point.
12949 		 */
12950 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
12951 		    KM_NOSLEEP) != 0) {
12952 			/*
12953 			 * Dispatch was successful so we are done. Return
12954 			 * without going any higher up the iodone chain. Do
12955 			 * not free up any layer-private data until after the
12956 			 * WRITE completes.
12957 			 */
12958 			return;
12959 		}
12960 
12961 		/*
12962 		 * Dispatch of the WRITE command failed; set up the error
12963 		 * condition and send this IO back up the iodone chain.
12964 		 */
12965 		bioerror(orig_bp, EIO);
12966 		orig_bp->b_resid = orig_bp->b_bcount;
12967 
12968 	} else {
12969 		/*
12970 		 * This is a regular READ request (ie, not a RMW). Copy the
12971 		 * data from the shadow buf into the original buf. The
12972 		 * copy_offset compensates for any "misalignment" between the
12973 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
12974 		 * original buf (with its un->un_sys_blocksize blocks).
12975 		 */
12976 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
12977 		    copy_length);
12978 	}
12979 
12980 freebuf_done:
12981 
12982 	/*
12983 	 * At this point we still have both the shadow buf AND the original
12984 	 * buf to deal with, as well as the layer-private data area in each.
12985 	 * Local variables are as follows:
12986 	 *
12987 	 * bp -- points to shadow buf
12988 	 * xp -- points to xbuf of shadow buf
12989 	 * bsp -- points to layer-private data area of shadow buf
12990 	 * orig_bp -- points to original buf
12991 	 *
12992 	 * First free the shadow buf and its associated xbuf, then free the
12993 	 * layer-private data area from the shadow buf. There is no need to
12994 	 * restore xb_private in the shadow xbuf.
12995 	 */
12996 	sd_shadow_buf_free(bp);
12997 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12998 
12999 	/*
13000 	 * Now update the local variables to point to the original buf, xbuf,
13001 	 * and layer-private area.
13002 	 */
13003 	bp = orig_bp;
13004 	xp = SD_GET_XBUF(bp);
13005 	ASSERT(xp != NULL);
13006 	ASSERT(xp == orig_xp);
13007 	bsp = xp->xb_private;
13008 	ASSERT(bsp != NULL);
13009 
13010 done:
13011 	/*
13012 	 * Restore xb_private to whatever it was set to by the next higher
13013 	 * layer in the chain, then free the layer-private data area.
13014 	 */
13015 	xp->xb_private = bsp->mbs_oprivate;
13016 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
13017 
13018 exit:
13019 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
13020 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
13021 
13022 	SD_NEXT_IODONE(index, un, bp);
13023 }
13024 
13025 
13026 /*
13027  *    Function: sd_checksum_iostart
13028  *
13029  * Description: A stub function for a layer that's currently not used.
13030  *		For now just a placeholder.
13031  *
13032  *     Context: Kernel thread context
13033  */
13034 
13035 static void
13036 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
13037 {
13038 	ASSERT(un != NULL);
13039 	ASSERT(bp != NULL);
13040 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13041 	SD_NEXT_IOSTART(index, un, bp);
13042 }
13043 
13044 
13045 /*
13046  *    Function: sd_checksum_iodone
13047  *
13048  * Description: A stub function for a layer that's currently not used.
13049  *		For now just a placeholder.
13050  *
13051  *     Context: May be called under interrupt context
13052  */
13053 
13054 static void
13055 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
13056 {
13057 	ASSERT(un != NULL);
13058 	ASSERT(bp != NULL);
13059 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13060 	SD_NEXT_IODONE(index, un, bp);
13061 }
13062 
13063 
13064 /*
13065  *    Function: sd_checksum_uscsi_iostart
13066  *
13067  * Description: A stub function for a layer that's currently not used.
13068  *		For now just a placeholder.
13069  *
13070  *     Context: Kernel thread context
13071  */
13072 
13073 static void
13074 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
13075 {
13076 	ASSERT(un != NULL);
13077 	ASSERT(bp != NULL);
13078 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13079 	SD_NEXT_IOSTART(index, un, bp);
13080 }
13081 
13082 
13083 /*
13084  *    Function: sd_checksum_uscsi_iodone
13085  *
13086  * Description: A stub function for a layer that's currently not used.
13087  *		For now just a placeholder.
13088  *
13089  *     Context: May be called under interrupt context
13090  */
13091 
13092 static void
13093 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
13094 {
13095 	ASSERT(un != NULL);
13096 	ASSERT(bp != NULL);
13097 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13098 	SD_NEXT_IODONE(index, un, bp);
13099 }
13100 
13101 
13102 /*
13103  *    Function: sd_pm_iostart
13104  *
13105  * Description: iostart-side routine for Power mangement.
13106  *
13107  *     Context: Kernel thread context
13108  */
13109 
13110 static void
13111 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
13112 {
13113 	ASSERT(un != NULL);
13114 	ASSERT(bp != NULL);
13115 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13116 	ASSERT(!mutex_owned(&un->un_pm_mutex));
13117 
13118 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
13119 
13120 	if (sd_pm_entry(un) != DDI_SUCCESS) {
13121 		/*
13122 		 * Set up to return the failed buf back up the 'iodone'
13123 		 * side of the calling chain.
13124 		 */
13125 		bioerror(bp, EIO);
13126 		bp->b_resid = bp->b_bcount;
13127 
13128 		SD_BEGIN_IODONE(index, un, bp);
13129 
13130 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13131 		return;
13132 	}
13133 
13134 	SD_NEXT_IOSTART(index, un, bp);
13135 
13136 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13137 }
13138 
13139 
13140 /*
13141  *    Function: sd_pm_iodone
13142  *
13143  * Description: iodone-side routine for power mangement.
13144  *
13145  *     Context: may be called from interrupt context
13146  */
13147 
13148 static void
13149 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
13150 {
13151 	ASSERT(un != NULL);
13152 	ASSERT(bp != NULL);
13153 	ASSERT(!mutex_owned(&un->un_pm_mutex));
13154 
13155 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
13156 
13157 	/*
13158 	 * After attach the following flag is only read, so don't
13159 	 * take the penalty of acquiring a mutex for it.
13160 	 */
13161 	if (un->un_f_pm_is_enabled == TRUE) {
13162 		sd_pm_exit(un);
13163 	}
13164 
13165 	SD_NEXT_IODONE(index, un, bp);
13166 
13167 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
13168 }
13169 
13170 
13171 /*
13172  *    Function: sd_core_iostart
13173  *
13174  * Description: Primary driver function for enqueuing buf(9S) structs from
13175  *		the system and initiating IO to the target device
13176  *
13177  *     Context: Kernel thread context. Can sleep.
13178  *
13179  * Assumptions:  - The given xp->xb_blkno is absolute
13180  *		   (ie, relative to the start of the device).
13181  *		 - The IO is to be done using the native blocksize of
13182  *		   the device, as specified in un->un_tgt_blocksize.
13183  */
13184 /* ARGSUSED */
13185 static void
13186 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
13187 {
13188 	struct sd_xbuf *xp;
13189 
13190 	ASSERT(un != NULL);
13191 	ASSERT(bp != NULL);
13192 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13193 	ASSERT(bp->b_resid == 0);
13194 
13195 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
13196 
13197 	xp = SD_GET_XBUF(bp);
13198 	ASSERT(xp != NULL);
13199 
13200 	mutex_enter(SD_MUTEX(un));
13201 
13202 	/*
13203 	 * If we are currently in the failfast state, fail any new IO
13204 	 * that has B_FAILFAST set, then return.
13205 	 */
13206 	if ((bp->b_flags & B_FAILFAST) &&
13207 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
13208 		mutex_exit(SD_MUTEX(un));
13209 		bioerror(bp, EIO);
13210 		bp->b_resid = bp->b_bcount;
13211 		SD_BEGIN_IODONE(index, un, bp);
13212 		return;
13213 	}
13214 
13215 	if (SD_IS_DIRECT_PRIORITY(xp)) {
13216 		/*
13217 		 * Priority command -- transport it immediately.
13218 		 *
13219 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
13220 		 * because all direct priority commands should be associated
13221 		 * with error recovery actions which we don't want to retry.
13222 		 */
13223 		sd_start_cmds(un, bp);
13224 	} else {
13225 		/*
13226 		 * Normal command -- add it to the wait queue, then start
13227 		 * transporting commands from the wait queue.
13228 		 */
13229 		sd_add_buf_to_waitq(un, bp);
13230 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
13231 		sd_start_cmds(un, NULL);
13232 	}
13233 
13234 	mutex_exit(SD_MUTEX(un));
13235 
13236 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
13237 }
13238 
13239 
13240 /*
13241  *    Function: sd_init_cdb_limits
13242  *
13243  * Description: This is to handle scsi_pkt initialization differences
13244  *		between the driver platforms.
13245  *
13246  *		Legacy behaviors:
13247  *
13248  *		If the block number or the sector count exceeds the
13249  *		capabilities of a Group 0 command, shift over to a
13250  *		Group 1 command. We don't blindly use Group 1
13251  *		commands because a) some drives (CDC Wren IVs) get a
13252  *		bit confused, and b) there is probably a fair amount
13253  *		of speed difference for a target to receive and decode
13254  *		a 10 byte command instead of a 6 byte command.
13255  *
13256  *		The xfer time difference of 6 vs 10 byte CDBs is
13257  *		still significant so this code is still worthwhile.
13258  *		10 byte CDBs are very inefficient with the fas HBA driver
13259  *		and older disks. Each CDB byte took 1 usec with some
13260  *		popular disks.
13261  *
13262  *     Context: Must be called at attach time
13263  */
13264 
13265 static void
13266 sd_init_cdb_limits(struct sd_lun *un)
13267 {
13268 	int hba_cdb_limit;
13269 
13270 	/*
13271 	 * Use CDB_GROUP1 commands for most devices except for
13272 	 * parallel SCSI fixed drives in which case we get better
13273 	 * performance using CDB_GROUP0 commands (where applicable).
13274 	 */
13275 	un->un_mincdb = SD_CDB_GROUP1;
13276 #if !defined(__fibre)
13277 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
13278 	    !un->un_f_has_removable_media) {
13279 		un->un_mincdb = SD_CDB_GROUP0;
13280 	}
13281 #endif
13282 
13283 	/*
13284 	 * Try to read the max-cdb-length supported by HBA.
13285 	 */
13286 	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
13287 	if (0 >= un->un_max_hba_cdb) {
13288 		un->un_max_hba_cdb = CDB_GROUP4;
13289 		hba_cdb_limit = SD_CDB_GROUP4;
13290 	} else if (0 < un->un_max_hba_cdb &&
13291 	    un->un_max_hba_cdb < CDB_GROUP1) {
13292 		hba_cdb_limit = SD_CDB_GROUP0;
13293 	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
13294 	    un->un_max_hba_cdb < CDB_GROUP5) {
13295 		hba_cdb_limit = SD_CDB_GROUP1;
13296 	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
13297 	    un->un_max_hba_cdb < CDB_GROUP4) {
13298 		hba_cdb_limit = SD_CDB_GROUP5;
13299 	} else {
13300 		hba_cdb_limit = SD_CDB_GROUP4;
13301 	}
13302 
13303 	/*
13304 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
13305 	 * commands for fixed disks unless we are building for a 32 bit
13306 	 * kernel.
13307 	 */
13308 #ifdef _LP64
13309 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13310 	    min(hba_cdb_limit, SD_CDB_GROUP4);
13311 #else
13312 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13313 	    min(hba_cdb_limit, SD_CDB_GROUP1);
13314 #endif
13315 
13316 	/*
13317 	 * x86 systems require the PKT_DMA_PARTIAL flag
13318 	 */
13319 #if defined(__x86)
13320 	un->un_pkt_flags = PKT_DMA_PARTIAL;
13321 #else
13322 	un->un_pkt_flags = 0;
13323 #endif
13324 
13325 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
13326 	    ? sizeof (struct scsi_arq_status) : 1);
13327 	un->un_cmd_timeout = (ushort_t)sd_io_time;
13328 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
13329 }
13330 
13331 
13332 /*
13333  *    Function: sd_initpkt_for_buf
13334  *
13335  * Description: Allocate and initialize for transport a scsi_pkt struct,
13336  *		based upon the info specified in the given buf struct.
13337  *
13338  *		Assumes the xb_blkno in the request is absolute (ie,
13339  *		relative to the start of the device (NOT partition!).
13340  *		Also assumes that the request is using the native block
13341  *		size of the device (as returned by the READ CAPACITY
13342  *		command).
13343  *
13344  * Return Code: SD_PKT_ALLOC_SUCCESS
13345  *		SD_PKT_ALLOC_FAILURE
13346  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13347  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13348  *
13349  *     Context: Kernel thread and may be called from software interrupt context
13350  *		as part of a sdrunout callback. This function may not block or
13351  *		call routines that block
13352  */
13353 
13354 static int
13355 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
13356 {
13357 	struct sd_xbuf	*xp;
13358 	struct scsi_pkt *pktp = NULL;
13359 	struct sd_lun	*un;
13360 	size_t		blockcount;
13361 	daddr_t		startblock;
13362 	int		rval;
13363 	int		cmd_flags;
13364 
13365 	ASSERT(bp != NULL);
13366 	ASSERT(pktpp != NULL);
13367 	xp = SD_GET_XBUF(bp);
13368 	ASSERT(xp != NULL);
13369 	un = SD_GET_UN(bp);
13370 	ASSERT(un != NULL);
13371 	ASSERT(mutex_owned(SD_MUTEX(un)));
13372 	ASSERT(bp->b_resid == 0);
13373 
13374 	SD_TRACE(SD_LOG_IO_CORE, un,
13375 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
13376 
13377 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13378 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
13379 		/*
13380 		 * Already have a scsi_pkt -- just need DMA resources.
13381 		 * We must recompute the CDB in case the mapping returns
13382 		 * a nonzero pkt_resid.
13383 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
13384 		 * that is being retried, the unmap/remap of the DMA resouces
13385 		 * will result in the entire transfer starting over again
13386 		 * from the very first block.
13387 		 */
13388 		ASSERT(xp->xb_pktp != NULL);
13389 		pktp = xp->xb_pktp;
13390 	} else {
13391 		pktp = NULL;
13392 	}
13393 #endif /* __i386 || __amd64 */
13394 
13395 	startblock = xp->xb_blkno;	/* Absolute block num. */
13396 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
13397 
13398 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13399 
13400 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
13401 
13402 #else
13403 
13404 	cmd_flags = un->un_pkt_flags | xp->xb_pkt_flags;
13405 
13406 #endif
13407 
13408 	/*
13409 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
13410 	 * call scsi_init_pkt, and build the CDB.
13411 	 */
13412 	rval = sd_setup_rw_pkt(un, &pktp, bp,
13413 	    cmd_flags, sdrunout, (caddr_t)un,
13414 	    startblock, blockcount);
13415 
13416 	if (rval == 0) {
13417 		/*
13418 		 * Success.
13419 		 *
13420 		 * If partial DMA is being used and required for this transfer.
13421 		 * set it up here.
13422 		 */
13423 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
13424 		    (pktp->pkt_resid != 0)) {
13425 
13426 			/*
13427 			 * Save the CDB length and pkt_resid for the
13428 			 * next xfer
13429 			 */
13430 			xp->xb_dma_resid = pktp->pkt_resid;
13431 
13432 			/* rezero resid */
13433 			pktp->pkt_resid = 0;
13434 
13435 		} else {
13436 			xp->xb_dma_resid = 0;
13437 		}
13438 
13439 		pktp->pkt_flags = un->un_tagflags;
13440 		pktp->pkt_time  = un->un_cmd_timeout;
13441 		pktp->pkt_comp  = sdintr;
13442 
13443 		pktp->pkt_private = bp;
13444 		*pktpp = pktp;
13445 
13446 		SD_TRACE(SD_LOG_IO_CORE, un,
13447 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
13448 
13449 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13450 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
13451 #endif
13452 
13453 		return (SD_PKT_ALLOC_SUCCESS);
13454 
13455 	}
13456 
13457 	/*
13458 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
13459 	 * from sd_setup_rw_pkt.
13460 	 */
13461 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
13462 
13463 	if (rval == SD_PKT_ALLOC_FAILURE) {
13464 		*pktpp = NULL;
13465 		/*
13466 		 * Set the driver state to RWAIT to indicate the driver
13467 		 * is waiting on resource allocations. The driver will not
13468 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13469 		 */
13470 		New_state(un, SD_STATE_RWAIT);
13471 
13472 		SD_ERROR(SD_LOG_IO_CORE, un,
13473 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
13474 
13475 		if ((bp->b_flags & B_ERROR) != 0) {
13476 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13477 		}
13478 		return (SD_PKT_ALLOC_FAILURE);
13479 	} else {
13480 		/*
13481 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13482 		 *
13483 		 * This should never happen.  Maybe someone messed with the
13484 		 * kernel's minphys?
13485 		 */
13486 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13487 		    "Request rejected: too large for CDB: "
13488 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
13489 		SD_ERROR(SD_LOG_IO_CORE, un,
13490 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
13491 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13492 
13493 	}
13494 }
13495 
13496 
13497 /*
13498  *    Function: sd_destroypkt_for_buf
13499  *
13500  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
13501  *
13502  *     Context: Kernel thread or interrupt context
13503  */
13504 
13505 static void
13506 sd_destroypkt_for_buf(struct buf *bp)
13507 {
13508 	ASSERT(bp != NULL);
13509 	ASSERT(SD_GET_UN(bp) != NULL);
13510 
13511 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13512 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
13513 
13514 	ASSERT(SD_GET_PKTP(bp) != NULL);
13515 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13516 
13517 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13518 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
13519 }
13520 
13521 /*
13522  *    Function: sd_setup_rw_pkt
13523  *
13524  * Description: Determines appropriate CDB group for the requested LBA
13525  *		and transfer length, calls scsi_init_pkt, and builds
13526  *		the CDB.  Do not use for partial DMA transfers except
13527  *		for the initial transfer since the CDB size must
13528  *		remain constant.
13529  *
13530  *     Context: Kernel thread and may be called from software interrupt
13531  *		context as part of a sdrunout callback. This function may not
13532  *		block or call routines that block
13533  */
13534 
13535 
13536 int
13537 sd_setup_rw_pkt(struct sd_lun *un,
13538     struct scsi_pkt **pktpp, struct buf *bp, int flags,
13539     int (*callback)(caddr_t), caddr_t callback_arg,
13540     diskaddr_t lba, uint32_t blockcount)
13541 {
13542 	struct scsi_pkt *return_pktp;
13543 	union scsi_cdb *cdbp;
13544 	struct sd_cdbinfo *cp = NULL;
13545 	int i;
13546 
13547 	/*
13548 	 * See which size CDB to use, based upon the request.
13549 	 */
13550 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
13551 
13552 		/*
13553 		 * Check lba and block count against sd_cdbtab limits.
13554 		 * In the partial DMA case, we have to use the same size
13555 		 * CDB for all the transfers.  Check lba + blockcount
13556 		 * against the max LBA so we know that segment of the
13557 		 * transfer can use the CDB we select.
13558 		 */
13559 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
13560 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
13561 
13562 			/*
13563 			 * The command will fit into the CDB type
13564 			 * specified by sd_cdbtab[i].
13565 			 */
13566 			cp = sd_cdbtab + i;
13567 
13568 			/*
13569 			 * Call scsi_init_pkt so we can fill in the
13570 			 * CDB.
13571 			 */
13572 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
13573 			    bp, cp->sc_grpcode, un->un_status_len, 0,
13574 			    flags, callback, callback_arg);
13575 
13576 			if (return_pktp != NULL) {
13577 
13578 				/*
13579 				 * Return new value of pkt
13580 				 */
13581 				*pktpp = return_pktp;
13582 
13583 				/*
13584 				 * To be safe, zero the CDB insuring there is
13585 				 * no leftover data from a previous command.
13586 				 */
13587 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
13588 
13589 				/*
13590 				 * Handle partial DMA mapping
13591 				 */
13592 				if (return_pktp->pkt_resid != 0) {
13593 
13594 					/*
13595 					 * Not going to xfer as many blocks as
13596 					 * originally expected
13597 					 */
13598 					blockcount -=
13599 					    SD_BYTES2TGTBLOCKS(un,
13600 						return_pktp->pkt_resid);
13601 				}
13602 
13603 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
13604 
13605 				/*
13606 				 * Set command byte based on the CDB
13607 				 * type we matched.
13608 				 */
13609 				cdbp->scc_cmd = cp->sc_grpmask |
13610 				    ((bp->b_flags & B_READ) ?
13611 					SCMD_READ : SCMD_WRITE);
13612 
13613 				SD_FILL_SCSI1_LUN(un, return_pktp);
13614 
13615 				/*
13616 				 * Fill in LBA and length
13617 				 */
13618 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
13619 				    (cp->sc_grpcode == CDB_GROUP4) ||
13620 				    (cp->sc_grpcode == CDB_GROUP0) ||
13621 				    (cp->sc_grpcode == CDB_GROUP5));
13622 
13623 				if (cp->sc_grpcode == CDB_GROUP1) {
13624 					FORMG1ADDR(cdbp, lba);
13625 					FORMG1COUNT(cdbp, blockcount);
13626 					return (0);
13627 				} else if (cp->sc_grpcode == CDB_GROUP4) {
13628 					FORMG4LONGADDR(cdbp, lba);
13629 					FORMG4COUNT(cdbp, blockcount);
13630 					return (0);
13631 				} else if (cp->sc_grpcode == CDB_GROUP0) {
13632 					FORMG0ADDR(cdbp, lba);
13633 					FORMG0COUNT(cdbp, blockcount);
13634 					return (0);
13635 				} else if (cp->sc_grpcode == CDB_GROUP5) {
13636 					FORMG5ADDR(cdbp, lba);
13637 					FORMG5COUNT(cdbp, blockcount);
13638 					return (0);
13639 				}
13640 
13641 				/*
13642 				 * It should be impossible to not match one
13643 				 * of the CDB types above, so we should never
13644 				 * reach this point.  Set the CDB command byte
13645 				 * to test-unit-ready to avoid writing
13646 				 * to somewhere we don't intend.
13647 				 */
13648 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
13649 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13650 			} else {
13651 				/*
13652 				 * Couldn't get scsi_pkt
13653 				 */
13654 				return (SD_PKT_ALLOC_FAILURE);
13655 			}
13656 		}
13657 	}
13658 
13659 	/*
13660 	 * None of the available CDB types were suitable.  This really
13661 	 * should never happen:  on a 64 bit system we support
13662 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
13663 	 * and on a 32 bit system we will refuse to bind to a device
13664 	 * larger than 2TB so addresses will never be larger than 32 bits.
13665 	 */
13666 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13667 }
13668 
13669 #if defined(__i386) || defined(__amd64)
13670 /*
13671  *    Function: sd_setup_next_rw_pkt
13672  *
13673  * Description: Setup packet for partial DMA transfers, except for the
13674  * 		initial transfer.  sd_setup_rw_pkt should be used for
13675  *		the initial transfer.
13676  *
13677  *     Context: Kernel thread and may be called from interrupt context.
13678  */
13679 
13680 int
13681 sd_setup_next_rw_pkt(struct sd_lun *un,
13682     struct scsi_pkt *pktp, struct buf *bp,
13683     diskaddr_t lba, uint32_t blockcount)
13684 {
13685 	uchar_t com;
13686 	union scsi_cdb *cdbp;
13687 	uchar_t cdb_group_id;
13688 
13689 	ASSERT(pktp != NULL);
13690 	ASSERT(pktp->pkt_cdbp != NULL);
13691 
13692 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
13693 	com = cdbp->scc_cmd;
13694 	cdb_group_id = CDB_GROUPID(com);
13695 
13696 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
13697 	    (cdb_group_id == CDB_GROUPID_1) ||
13698 	    (cdb_group_id == CDB_GROUPID_4) ||
13699 	    (cdb_group_id == CDB_GROUPID_5));
13700 
13701 	/*
13702 	 * Move pkt to the next portion of the xfer.
13703 	 * func is NULL_FUNC so we do not have to release
13704 	 * the disk mutex here.
13705 	 */
13706 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
13707 	    NULL_FUNC, NULL) == pktp) {
13708 		/* Success.  Handle partial DMA */
13709 		if (pktp->pkt_resid != 0) {
13710 			blockcount -=
13711 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
13712 		}
13713 
13714 		cdbp->scc_cmd = com;
13715 		SD_FILL_SCSI1_LUN(un, pktp);
13716 		if (cdb_group_id == CDB_GROUPID_1) {
13717 			FORMG1ADDR(cdbp, lba);
13718 			FORMG1COUNT(cdbp, blockcount);
13719 			return (0);
13720 		} else if (cdb_group_id == CDB_GROUPID_4) {
13721 			FORMG4LONGADDR(cdbp, lba);
13722 			FORMG4COUNT(cdbp, blockcount);
13723 			return (0);
13724 		} else if (cdb_group_id == CDB_GROUPID_0) {
13725 			FORMG0ADDR(cdbp, lba);
13726 			FORMG0COUNT(cdbp, blockcount);
13727 			return (0);
13728 		} else if (cdb_group_id == CDB_GROUPID_5) {
13729 			FORMG5ADDR(cdbp, lba);
13730 			FORMG5COUNT(cdbp, blockcount);
13731 			return (0);
13732 		}
13733 
13734 		/* Unreachable */
13735 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13736 	}
13737 
13738 	/*
13739 	 * Error setting up next portion of cmd transfer.
13740 	 * Something is definitely very wrong and this
13741 	 * should not happen.
13742 	 */
13743 	return (SD_PKT_ALLOC_FAILURE);
13744 }
13745 #endif /* defined(__i386) || defined(__amd64) */
13746 
13747 /*
13748  *    Function: sd_initpkt_for_uscsi
13749  *
13750  * Description: Allocate and initialize for transport a scsi_pkt struct,
13751  *		based upon the info specified in the given uscsi_cmd struct.
13752  *
13753  * Return Code: SD_PKT_ALLOC_SUCCESS
13754  *		SD_PKT_ALLOC_FAILURE
13755  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13756  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13757  *
13758  *     Context: Kernel thread and may be called from software interrupt context
13759  *		as part of a sdrunout callback. This function may not block or
13760  *		call routines that block
13761  */
13762 
13763 static int
13764 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
13765 {
13766 	struct uscsi_cmd *uscmd;
13767 	struct sd_xbuf	*xp;
13768 	struct scsi_pkt	*pktp;
13769 	struct sd_lun	*un;
13770 	uint32_t	flags = 0;
13771 
13772 	ASSERT(bp != NULL);
13773 	ASSERT(pktpp != NULL);
13774 	xp = SD_GET_XBUF(bp);
13775 	ASSERT(xp != NULL);
13776 	un = SD_GET_UN(bp);
13777 	ASSERT(un != NULL);
13778 	ASSERT(mutex_owned(SD_MUTEX(un)));
13779 
13780 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13781 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13782 	ASSERT(uscmd != NULL);
13783 
13784 	SD_TRACE(SD_LOG_IO_CORE, un,
13785 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
13786 
13787 	/*
13788 	 * Allocate the scsi_pkt for the command.
13789 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
13790 	 *	 during scsi_init_pkt time and will continue to use the
13791 	 *	 same path as long as the same scsi_pkt is used without
13792 	 *	 intervening scsi_dma_free(). Since uscsi command does
13793 	 *	 not call scsi_dmafree() before retry failed command, it
13794 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
13795 	 *	 set such that scsi_vhci can use other available path for
13796 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
13797 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
13798 	 */
13799 	pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
13800 	    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
13801 	    sizeof (struct scsi_arq_status), 0,
13802 	    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
13803 	    sdrunout, (caddr_t)un);
13804 
13805 	if (pktp == NULL) {
13806 		*pktpp = NULL;
13807 		/*
13808 		 * Set the driver state to RWAIT to indicate the driver
13809 		 * is waiting on resource allocations. The driver will not
13810 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13811 		 */
13812 		New_state(un, SD_STATE_RWAIT);
13813 
13814 		SD_ERROR(SD_LOG_IO_CORE, un,
13815 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
13816 
13817 		if ((bp->b_flags & B_ERROR) != 0) {
13818 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13819 		}
13820 		return (SD_PKT_ALLOC_FAILURE);
13821 	}
13822 
13823 	/*
13824 	 * We do not do DMA breakup for USCSI commands, so return failure
13825 	 * here if all the needed DMA resources were not allocated.
13826 	 */
13827 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
13828 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
13829 		scsi_destroy_pkt(pktp);
13830 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
13831 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
13832 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
13833 	}
13834 
13835 	/* Init the cdb from the given uscsi struct */
13836 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
13837 	    uscmd->uscsi_cdb[0], 0, 0, 0);
13838 
13839 	SD_FILL_SCSI1_LUN(un, pktp);
13840 
13841 	/*
13842 	 * Set up the optional USCSI flags. See the uscsi (7I) man page
13843 	 * for listing of the supported flags.
13844 	 */
13845 
13846 	if (uscmd->uscsi_flags & USCSI_SILENT) {
13847 		flags |= FLAG_SILENT;
13848 	}
13849 
13850 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
13851 		flags |= FLAG_DIAGNOSE;
13852 	}
13853 
13854 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
13855 		flags |= FLAG_ISOLATE;
13856 	}
13857 
13858 	if (un->un_f_is_fibre == FALSE) {
13859 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
13860 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
13861 		}
13862 	}
13863 
13864 	/*
13865 	 * Set the pkt flags here so we save time later.
13866 	 * Note: These flags are NOT in the uscsi man page!!!
13867 	 */
13868 	if (uscmd->uscsi_flags & USCSI_HEAD) {
13869 		flags |= FLAG_HEAD;
13870 	}
13871 
13872 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
13873 		flags |= FLAG_NOINTR;
13874 	}
13875 
13876 	/*
13877 	 * For tagged queueing, things get a bit complicated.
13878 	 * Check first for head of queue and last for ordered queue.
13879 	 * If neither head nor order, use the default driver tag flags.
13880 	 */
13881 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
13882 		if (uscmd->uscsi_flags & USCSI_HTAG) {
13883 			flags |= FLAG_HTAG;
13884 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
13885 			flags |= FLAG_OTAG;
13886 		} else {
13887 			flags |= un->un_tagflags & FLAG_TAGMASK;
13888 		}
13889 	}
13890 
13891 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
13892 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
13893 	}
13894 
13895 	pktp->pkt_flags = flags;
13896 
13897 	/* Copy the caller's CDB into the pkt... */
13898 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
13899 
13900 	if (uscmd->uscsi_timeout == 0) {
13901 		pktp->pkt_time = un->un_uscsi_timeout;
13902 	} else {
13903 		pktp->pkt_time = uscmd->uscsi_timeout;
13904 	}
13905 
13906 	/* need it later to identify USCSI request in sdintr */
13907 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
13908 
13909 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
13910 
13911 	pktp->pkt_private = bp;
13912 	pktp->pkt_comp = sdintr;
13913 	*pktpp = pktp;
13914 
13915 	SD_TRACE(SD_LOG_IO_CORE, un,
13916 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
13917 
13918 	return (SD_PKT_ALLOC_SUCCESS);
13919 }
13920 
13921 
13922 /*
13923  *    Function: sd_destroypkt_for_uscsi
13924  *
13925  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
13926  *		IOs.. Also saves relevant info into the associated uscsi_cmd
13927  *		struct.
13928  *
13929  *     Context: May be called under interrupt context
13930  */
13931 
13932 static void
13933 sd_destroypkt_for_uscsi(struct buf *bp)
13934 {
13935 	struct uscsi_cmd *uscmd;
13936 	struct sd_xbuf	*xp;
13937 	struct scsi_pkt	*pktp;
13938 	struct sd_lun	*un;
13939 
13940 	ASSERT(bp != NULL);
13941 	xp = SD_GET_XBUF(bp);
13942 	ASSERT(xp != NULL);
13943 	un = SD_GET_UN(bp);
13944 	ASSERT(un != NULL);
13945 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13946 	pktp = SD_GET_PKTP(bp);
13947 	ASSERT(pktp != NULL);
13948 
13949 	SD_TRACE(SD_LOG_IO_CORE, un,
13950 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
13951 
13952 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13953 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13954 	ASSERT(uscmd != NULL);
13955 
13956 	/* Save the status and the residual into the uscsi_cmd struct */
13957 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
13958 	uscmd->uscsi_resid  = bp->b_resid;
13959 
13960 	/*
13961 	 * If enabled, copy any saved sense data into the area specified
13962 	 * by the uscsi command.
13963 	 */
13964 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
13965 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
13966 		/*
13967 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
13968 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
13969 		 */
13970 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
13971 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
13972 		bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf, SENSE_LENGTH);
13973 	}
13974 
13975 	/* We are done with the scsi_pkt; free it now */
13976 	ASSERT(SD_GET_PKTP(bp) != NULL);
13977 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13978 
13979 	SD_TRACE(SD_LOG_IO_CORE, un,
13980 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
13981 }
13982 
13983 
13984 /*
13985  *    Function: sd_bioclone_alloc
13986  *
13987  * Description: Allocate a buf(9S) and init it as per the given buf
13988  *		and the various arguments.  The associated sd_xbuf
13989  *		struct is (nearly) duplicated.  The struct buf *bp
13990  *		argument is saved in new_xp->xb_private.
13991  *
13992  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13993  *		datalen - size of data area for the shadow bp
13994  *		blkno - starting LBA
13995  *		func - function pointer for b_iodone in the shadow buf. (May
13996  *			be NULL if none.)
13997  *
13998  * Return Code: Pointer to allocates buf(9S) struct
13999  *
14000  *     Context: Can sleep.
14001  */
14002 
14003 static struct buf *
14004 sd_bioclone_alloc(struct buf *bp, size_t datalen,
14005 	daddr_t blkno, int (*func)(struct buf *))
14006 {
14007 	struct	sd_lun	*un;
14008 	struct	sd_xbuf	*xp;
14009 	struct	sd_xbuf	*new_xp;
14010 	struct	buf	*new_bp;
14011 
14012 	ASSERT(bp != NULL);
14013 	xp = SD_GET_XBUF(bp);
14014 	ASSERT(xp != NULL);
14015 	un = SD_GET_UN(bp);
14016 	ASSERT(un != NULL);
14017 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14018 
14019 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
14020 	    NULL, KM_SLEEP);
14021 
14022 	new_bp->b_lblkno	= blkno;
14023 
14024 	/*
14025 	 * Allocate an xbuf for the shadow bp and copy the contents of the
14026 	 * original xbuf into it.
14027 	 */
14028 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14029 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
14030 
14031 	/*
14032 	 * The given bp is automatically saved in the xb_private member
14033 	 * of the new xbuf.  Callers are allowed to depend on this.
14034 	 */
14035 	new_xp->xb_private = bp;
14036 
14037 	new_bp->b_private  = new_xp;
14038 
14039 	return (new_bp);
14040 }
14041 
14042 /*
14043  *    Function: sd_shadow_buf_alloc
14044  *
14045  * Description: Allocate a buf(9S) and init it as per the given buf
14046  *		and the various arguments.  The associated sd_xbuf
14047  *		struct is (nearly) duplicated.  The struct buf *bp
14048  *		argument is saved in new_xp->xb_private.
14049  *
14050  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
14051  *		datalen - size of data area for the shadow bp
14052  *		bflags - B_READ or B_WRITE (pseudo flag)
14053  *		blkno - starting LBA
14054  *		func - function pointer for b_iodone in the shadow buf. (May
14055  *			be NULL if none.)
14056  *
14057  * Return Code: Pointer to allocates buf(9S) struct
14058  *
14059  *     Context: Can sleep.
14060  */
14061 
14062 static struct buf *
14063 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
14064 	daddr_t blkno, int (*func)(struct buf *))
14065 {
14066 	struct	sd_lun	*un;
14067 	struct	sd_xbuf	*xp;
14068 	struct	sd_xbuf	*new_xp;
14069 	struct	buf	*new_bp;
14070 
14071 	ASSERT(bp != NULL);
14072 	xp = SD_GET_XBUF(bp);
14073 	ASSERT(xp != NULL);
14074 	un = SD_GET_UN(bp);
14075 	ASSERT(un != NULL);
14076 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14077 
14078 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
14079 		bp_mapin(bp);
14080 	}
14081 
14082 	bflags &= (B_READ | B_WRITE);
14083 #if defined(__i386) || defined(__amd64)
14084 	new_bp = getrbuf(KM_SLEEP);
14085 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
14086 	new_bp->b_bcount = datalen;
14087 	new_bp->b_flags = bflags |
14088 	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
14089 #else
14090 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
14091 	    datalen, bflags, SLEEP_FUNC, NULL);
14092 #endif
14093 	new_bp->av_forw	= NULL;
14094 	new_bp->av_back	= NULL;
14095 	new_bp->b_dev	= bp->b_dev;
14096 	new_bp->b_blkno	= blkno;
14097 	new_bp->b_iodone = func;
14098 	new_bp->b_edev	= bp->b_edev;
14099 	new_bp->b_resid	= 0;
14100 
14101 	/* We need to preserve the B_FAILFAST flag */
14102 	if (bp->b_flags & B_FAILFAST) {
14103 		new_bp->b_flags |= B_FAILFAST;
14104 	}
14105 
14106 	/*
14107 	 * Allocate an xbuf for the shadow bp and copy the contents of the
14108 	 * original xbuf into it.
14109 	 */
14110 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14111 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
14112 
14113 	/* Need later to copy data between the shadow buf & original buf! */
14114 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
14115 
14116 	/*
14117 	 * The given bp is automatically saved in the xb_private member
14118 	 * of the new xbuf.  Callers are allowed to depend on this.
14119 	 */
14120 	new_xp->xb_private = bp;
14121 
14122 	new_bp->b_private  = new_xp;
14123 
14124 	return (new_bp);
14125 }
14126 
14127 /*
14128  *    Function: sd_bioclone_free
14129  *
14130  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
14131  *		in the larger than partition operation.
14132  *
14133  *     Context: May be called under interrupt context
14134  */
14135 
14136 static void
14137 sd_bioclone_free(struct buf *bp)
14138 {
14139 	struct sd_xbuf	*xp;
14140 
14141 	ASSERT(bp != NULL);
14142 	xp = SD_GET_XBUF(bp);
14143 	ASSERT(xp != NULL);
14144 
14145 	/*
14146 	 * Call bp_mapout() before freeing the buf,  in case a lower
14147 	 * layer or HBA  had done a bp_mapin().  we must do this here
14148 	 * as we are the "originator" of the shadow buf.
14149 	 */
14150 	bp_mapout(bp);
14151 
14152 	/*
14153 	 * Null out b_iodone before freeing the bp, to ensure that the driver
14154 	 * never gets confused by a stale value in this field. (Just a little
14155 	 * extra defensiveness here.)
14156 	 */
14157 	bp->b_iodone = NULL;
14158 
14159 	freerbuf(bp);
14160 
14161 	kmem_free(xp, sizeof (struct sd_xbuf));
14162 }
14163 
14164 /*
14165  *    Function: sd_shadow_buf_free
14166  *
14167  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
14168  *
14169  *     Context: May be called under interrupt context
14170  */
14171 
14172 static void
14173 sd_shadow_buf_free(struct buf *bp)
14174 {
14175 	struct sd_xbuf	*xp;
14176 
14177 	ASSERT(bp != NULL);
14178 	xp = SD_GET_XBUF(bp);
14179 	ASSERT(xp != NULL);
14180 
14181 #if defined(__sparc)
14182 	/*
14183 	 * Call bp_mapout() before freeing the buf,  in case a lower
14184 	 * layer or HBA  had done a bp_mapin().  we must do this here
14185 	 * as we are the "originator" of the shadow buf.
14186 	 */
14187 	bp_mapout(bp);
14188 #endif
14189 
14190 	/*
14191 	 * Null out b_iodone before freeing the bp, to ensure that the driver
14192 	 * never gets confused by a stale value in this field. (Just a little
14193 	 * extra defensiveness here.)
14194 	 */
14195 	bp->b_iodone = NULL;
14196 
14197 #if defined(__i386) || defined(__amd64)
14198 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
14199 	freerbuf(bp);
14200 #else
14201 	scsi_free_consistent_buf(bp);
14202 #endif
14203 
14204 	kmem_free(xp, sizeof (struct sd_xbuf));
14205 }
14206 
14207 
14208 /*
14209  *    Function: sd_print_transport_rejected_message
14210  *
14211  * Description: This implements the ludicrously complex rules for printing
14212  *		a "transport rejected" message.  This is to address the
14213  *		specific problem of having a flood of this error message
14214  *		produced when a failover occurs.
14215  *
14216  *     Context: Any.
14217  */
14218 
14219 static void
14220 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
14221 	int code)
14222 {
14223 	ASSERT(un != NULL);
14224 	ASSERT(mutex_owned(SD_MUTEX(un)));
14225 	ASSERT(xp != NULL);
14226 
14227 	/*
14228 	 * Print the "transport rejected" message under the following
14229 	 * conditions:
14230 	 *
14231 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
14232 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
14233 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
14234 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
14235 	 *   scsi_transport(9F) (which indicates that the target might have
14236 	 *   gone off-line).  This uses the un->un_tran_fatal_count
14237 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
14238 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
14239 	 *   from scsi_transport().
14240 	 *
14241 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
14242 	 * the preceeding cases in order for the message to be printed.
14243 	 */
14244 	if ((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) {
14245 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
14246 		    (code != TRAN_FATAL_ERROR) ||
14247 		    (un->un_tran_fatal_count == 1)) {
14248 			switch (code) {
14249 			case TRAN_BADPKT:
14250 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14251 				    "transport rejected bad packet\n");
14252 				break;
14253 			case TRAN_FATAL_ERROR:
14254 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14255 				    "transport rejected fatal error\n");
14256 				break;
14257 			default:
14258 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14259 				    "transport rejected (%d)\n", code);
14260 				break;
14261 			}
14262 		}
14263 	}
14264 }
14265 
14266 
14267 /*
14268  *    Function: sd_add_buf_to_waitq
14269  *
14270  * Description: Add the given buf(9S) struct to the wait queue for the
14271  *		instance.  If sorting is enabled, then the buf is added
14272  *		to the queue via an elevator sort algorithm (a la
14273  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
14274  *		If sorting is not enabled, then the buf is just added
14275  *		to the end of the wait queue.
14276  *
14277  * Return Code: void
14278  *
14279  *     Context: Does not sleep/block, therefore technically can be called
14280  *		from any context.  However if sorting is enabled then the
14281  *		execution time is indeterminate, and may take long if
14282  *		the wait queue grows large.
14283  */
14284 
14285 static void
14286 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
14287 {
14288 	struct buf *ap;
14289 
14290 	ASSERT(bp != NULL);
14291 	ASSERT(un != NULL);
14292 	ASSERT(mutex_owned(SD_MUTEX(un)));
14293 
14294 	/* If the queue is empty, add the buf as the only entry & return. */
14295 	if (un->un_waitq_headp == NULL) {
14296 		ASSERT(un->un_waitq_tailp == NULL);
14297 		un->un_waitq_headp = un->un_waitq_tailp = bp;
14298 		bp->av_forw = NULL;
14299 		return;
14300 	}
14301 
14302 	ASSERT(un->un_waitq_tailp != NULL);
14303 
14304 	/*
14305 	 * If sorting is disabled, just add the buf to the tail end of
14306 	 * the wait queue and return.
14307 	 */
14308 	if (un->un_f_disksort_disabled) {
14309 		un->un_waitq_tailp->av_forw = bp;
14310 		un->un_waitq_tailp = bp;
14311 		bp->av_forw = NULL;
14312 		return;
14313 	}
14314 
14315 	/*
14316 	 * Sort thru the list of requests currently on the wait queue
14317 	 * and add the new buf request at the appropriate position.
14318 	 *
14319 	 * The un->un_waitq_headp is an activity chain pointer on which
14320 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
14321 	 * first queue holds those requests which are positioned after
14322 	 * the current SD_GET_BLKNO() (in the first request); the second holds
14323 	 * requests which came in after their SD_GET_BLKNO() number was passed.
14324 	 * Thus we implement a one way scan, retracting after reaching
14325 	 * the end of the drive to the first request on the second
14326 	 * queue, at which time it becomes the first queue.
14327 	 * A one-way scan is natural because of the way UNIX read-ahead
14328 	 * blocks are allocated.
14329 	 *
14330 	 * If we lie after the first request, then we must locate the
14331 	 * second request list and add ourselves to it.
14332 	 */
14333 	ap = un->un_waitq_headp;
14334 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
14335 		while (ap->av_forw != NULL) {
14336 			/*
14337 			 * Look for an "inversion" in the (normally
14338 			 * ascending) block numbers. This indicates
14339 			 * the start of the second request list.
14340 			 */
14341 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
14342 				/*
14343 				 * Search the second request list for the
14344 				 * first request at a larger block number.
14345 				 * We go before that; however if there is
14346 				 * no such request, we go at the end.
14347 				 */
14348 				do {
14349 					if (SD_GET_BLKNO(bp) <
14350 					    SD_GET_BLKNO(ap->av_forw)) {
14351 						goto insert;
14352 					}
14353 					ap = ap->av_forw;
14354 				} while (ap->av_forw != NULL);
14355 				goto insert;		/* after last */
14356 			}
14357 			ap = ap->av_forw;
14358 		}
14359 
14360 		/*
14361 		 * No inversions... we will go after the last, and
14362 		 * be the first request in the second request list.
14363 		 */
14364 		goto insert;
14365 	}
14366 
14367 	/*
14368 	 * Request is at/after the current request...
14369 	 * sort in the first request list.
14370 	 */
14371 	while (ap->av_forw != NULL) {
14372 		/*
14373 		 * We want to go after the current request (1) if
14374 		 * there is an inversion after it (i.e. it is the end
14375 		 * of the first request list), or (2) if the next
14376 		 * request is a larger block no. than our request.
14377 		 */
14378 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
14379 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
14380 			goto insert;
14381 		}
14382 		ap = ap->av_forw;
14383 	}
14384 
14385 	/*
14386 	 * Neither a second list nor a larger request, therefore
14387 	 * we go at the end of the first list (which is the same
14388 	 * as the end of the whole schebang).
14389 	 */
14390 insert:
14391 	bp->av_forw = ap->av_forw;
14392 	ap->av_forw = bp;
14393 
14394 	/*
14395 	 * If we inserted onto the tail end of the waitq, make sure the
14396 	 * tail pointer is updated.
14397 	 */
14398 	if (ap == un->un_waitq_tailp) {
14399 		un->un_waitq_tailp = bp;
14400 	}
14401 }
14402 
14403 
14404 /*
14405  *    Function: sd_start_cmds
14406  *
14407  * Description: Remove and transport cmds from the driver queues.
14408  *
14409  *   Arguments: un - pointer to the unit (soft state) struct for the target.
14410  *
14411  *		immed_bp - ptr to a buf to be transported immediately. Only
14412  *		the immed_bp is transported; bufs on the waitq are not
14413  *		processed and the un_retry_bp is not checked.  If immed_bp is
14414  *		NULL, then normal queue processing is performed.
14415  *
14416  *     Context: May be called from kernel thread context, interrupt context,
14417  *		or runout callback context. This function may not block or
14418  *		call routines that block.
14419  */
14420 
14421 static void
14422 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
14423 {
14424 	struct	sd_xbuf	*xp;
14425 	struct	buf	*bp;
14426 	void	(*statp)(kstat_io_t *);
14427 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14428 	void	(*saved_statp)(kstat_io_t *);
14429 #endif
14430 	int	rval;
14431 
14432 	ASSERT(un != NULL);
14433 	ASSERT(mutex_owned(SD_MUTEX(un)));
14434 	ASSERT(un->un_ncmds_in_transport >= 0);
14435 	ASSERT(un->un_throttle >= 0);
14436 
14437 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
14438 
14439 	do {
14440 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14441 		saved_statp = NULL;
14442 #endif
14443 
14444 		/*
14445 		 * If we are syncing or dumping, fail the command to
14446 		 * avoid recursively calling back into scsi_transport().
14447 		 * The dump I/O itself uses a separate code path so this
14448 		 * only prevents non-dump I/O from being sent while dumping.
14449 		 * File system sync takes place before dumping begins.
14450 		 * During panic, filesystem I/O is allowed provided
14451 		 * un_in_callback is <= 1.  This is to prevent recursion
14452 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
14453 		 * sd_start_cmds and so on.  See panic.c for more information
14454 		 * about the states the system can be in during panic.
14455 		 */
14456 		if ((un->un_state == SD_STATE_DUMPING) ||
14457 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
14458 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14459 			    "sd_start_cmds: panicking\n");
14460 			goto exit;
14461 		}
14462 
14463 		if ((bp = immed_bp) != NULL) {
14464 			/*
14465 			 * We have a bp that must be transported immediately.
14466 			 * It's OK to transport the immed_bp here without doing
14467 			 * the throttle limit check because the immed_bp is
14468 			 * always used in a retry/recovery case. This means
14469 			 * that we know we are not at the throttle limit by
14470 			 * virtue of the fact that to get here we must have
14471 			 * already gotten a command back via sdintr(). This also
14472 			 * relies on (1) the command on un_retry_bp preventing
14473 			 * further commands from the waitq from being issued;
14474 			 * and (2) the code in sd_retry_command checking the
14475 			 * throttle limit before issuing a delayed or immediate
14476 			 * retry. This holds even if the throttle limit is
14477 			 * currently ratcheted down from its maximum value.
14478 			 */
14479 			statp = kstat_runq_enter;
14480 			if (bp == un->un_retry_bp) {
14481 				ASSERT((un->un_retry_statp == NULL) ||
14482 				    (un->un_retry_statp == kstat_waitq_enter) ||
14483 				    (un->un_retry_statp ==
14484 				    kstat_runq_back_to_waitq));
14485 				/*
14486 				 * If the waitq kstat was incremented when
14487 				 * sd_set_retry_bp() queued this bp for a retry,
14488 				 * then we must set up statp so that the waitq
14489 				 * count will get decremented correctly below.
14490 				 * Also we must clear un->un_retry_statp to
14491 				 * ensure that we do not act on a stale value
14492 				 * in this field.
14493 				 */
14494 				if ((un->un_retry_statp == kstat_waitq_enter) ||
14495 				    (un->un_retry_statp ==
14496 				    kstat_runq_back_to_waitq)) {
14497 					statp = kstat_waitq_to_runq;
14498 				}
14499 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14500 				saved_statp = un->un_retry_statp;
14501 #endif
14502 				un->un_retry_statp = NULL;
14503 
14504 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14505 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
14506 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
14507 				    un, un->un_retry_bp, un->un_throttle,
14508 				    un->un_ncmds_in_transport);
14509 			} else {
14510 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
14511 				    "processing priority bp:0x%p\n", bp);
14512 			}
14513 
14514 		} else if ((bp = un->un_waitq_headp) != NULL) {
14515 			/*
14516 			 * A command on the waitq is ready to go, but do not
14517 			 * send it if:
14518 			 *
14519 			 * (1) the throttle limit has been reached, or
14520 			 * (2) a retry is pending, or
14521 			 * (3) a START_STOP_UNIT callback pending, or
14522 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
14523 			 *	command is pending.
14524 			 *
14525 			 * For all of these conditions, IO processing will
14526 			 * restart after the condition is cleared.
14527 			 */
14528 			if (un->un_ncmds_in_transport >= un->un_throttle) {
14529 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14530 				    "sd_start_cmds: exiting, "
14531 				    "throttle limit reached!\n");
14532 				goto exit;
14533 			}
14534 			if (un->un_retry_bp != NULL) {
14535 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14536 				    "sd_start_cmds: exiting, retry pending!\n");
14537 				goto exit;
14538 			}
14539 			if (un->un_startstop_timeid != NULL) {
14540 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14541 				    "sd_start_cmds: exiting, "
14542 				    "START_STOP pending!\n");
14543 				goto exit;
14544 			}
14545 			if (un->un_direct_priority_timeid != NULL) {
14546 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14547 				    "sd_start_cmds: exiting, "
14548 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
14549 				goto exit;
14550 			}
14551 
14552 			/* Dequeue the command */
14553 			un->un_waitq_headp = bp->av_forw;
14554 			if (un->un_waitq_headp == NULL) {
14555 				un->un_waitq_tailp = NULL;
14556 			}
14557 			bp->av_forw = NULL;
14558 			statp = kstat_waitq_to_runq;
14559 			SD_TRACE(SD_LOG_IO_CORE, un,
14560 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
14561 
14562 		} else {
14563 			/* No work to do so bail out now */
14564 			SD_TRACE(SD_LOG_IO_CORE, un,
14565 			    "sd_start_cmds: no more work, exiting!\n");
14566 			goto exit;
14567 		}
14568 
14569 		/*
14570 		 * Reset the state to normal. This is the mechanism by which
14571 		 * the state transitions from either SD_STATE_RWAIT or
14572 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
14573 		 * If state is SD_STATE_PM_CHANGING then this command is
14574 		 * part of the device power control and the state must
14575 		 * not be put back to normal. Doing so would would
14576 		 * allow new commands to proceed when they shouldn't,
14577 		 * the device may be going off.
14578 		 */
14579 		if ((un->un_state != SD_STATE_SUSPENDED) &&
14580 		    (un->un_state != SD_STATE_PM_CHANGING)) {
14581 			New_state(un, SD_STATE_NORMAL);
14582 		    }
14583 
14584 		xp = SD_GET_XBUF(bp);
14585 		ASSERT(xp != NULL);
14586 
14587 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14588 		/*
14589 		 * Allocate the scsi_pkt if we need one, or attach DMA
14590 		 * resources if we have a scsi_pkt that needs them. The
14591 		 * latter should only occur for commands that are being
14592 		 * retried.
14593 		 */
14594 		if ((xp->xb_pktp == NULL) ||
14595 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
14596 #else
14597 		if (xp->xb_pktp == NULL) {
14598 #endif
14599 			/*
14600 			 * There is no scsi_pkt allocated for this buf. Call
14601 			 * the initpkt function to allocate & init one.
14602 			 *
14603 			 * The scsi_init_pkt runout callback functionality is
14604 			 * implemented as follows:
14605 			 *
14606 			 * 1) The initpkt function always calls
14607 			 *    scsi_init_pkt(9F) with sdrunout specified as the
14608 			 *    callback routine.
14609 			 * 2) A successful packet allocation is initialized and
14610 			 *    the I/O is transported.
14611 			 * 3) The I/O associated with an allocation resource
14612 			 *    failure is left on its queue to be retried via
14613 			 *    runout or the next I/O.
14614 			 * 4) The I/O associated with a DMA error is removed
14615 			 *    from the queue and failed with EIO. Processing of
14616 			 *    the transport queues is also halted to be
14617 			 *    restarted via runout or the next I/O.
14618 			 * 5) The I/O associated with a CDB size or packet
14619 			 *    size error is removed from the queue and failed
14620 			 *    with EIO. Processing of the transport queues is
14621 			 *    continued.
14622 			 *
14623 			 * Note: there is no interface for canceling a runout
14624 			 * callback. To prevent the driver from detaching or
14625 			 * suspending while a runout is pending the driver
14626 			 * state is set to SD_STATE_RWAIT
14627 			 *
14628 			 * Note: using the scsi_init_pkt callback facility can
14629 			 * result in an I/O request persisting at the head of
14630 			 * the list which cannot be satisfied even after
14631 			 * multiple retries. In the future the driver may
14632 			 * implement some kind of maximum runout count before
14633 			 * failing an I/O.
14634 			 *
14635 			 * Note: the use of funcp below may seem superfluous,
14636 			 * but it helps warlock figure out the correct
14637 			 * initpkt function calls (see [s]sd.wlcmd).
14638 			 */
14639 			struct scsi_pkt	*pktp;
14640 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
14641 
14642 			ASSERT(bp != un->un_rqs_bp);
14643 
14644 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
14645 			switch ((*funcp)(bp, &pktp)) {
14646 			case  SD_PKT_ALLOC_SUCCESS:
14647 				xp->xb_pktp = pktp;
14648 				SD_TRACE(SD_LOG_IO_CORE, un,
14649 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
14650 				    pktp);
14651 				goto got_pkt;
14652 
14653 			case SD_PKT_ALLOC_FAILURE:
14654 				/*
14655 				 * Temporary (hopefully) resource depletion.
14656 				 * Since retries and RQS commands always have a
14657 				 * scsi_pkt allocated, these cases should never
14658 				 * get here. So the only cases this needs to
14659 				 * handle is a bp from the waitq (which we put
14660 				 * back onto the waitq for sdrunout), or a bp
14661 				 * sent as an immed_bp (which we just fail).
14662 				 */
14663 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14664 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
14665 
14666 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14667 
14668 				if (bp == immed_bp) {
14669 					/*
14670 					 * If SD_XB_DMA_FREED is clear, then
14671 					 * this is a failure to allocate a
14672 					 * scsi_pkt, and we must fail the
14673 					 * command.
14674 					 */
14675 					if ((xp->xb_pkt_flags &
14676 					    SD_XB_DMA_FREED) == 0) {
14677 						break;
14678 					}
14679 
14680 					/*
14681 					 * If this immediate command is NOT our
14682 					 * un_retry_bp, then we must fail it.
14683 					 */
14684 					if (bp != un->un_retry_bp) {
14685 						break;
14686 					}
14687 
14688 					/*
14689 					 * We get here if this cmd is our
14690 					 * un_retry_bp that was DMAFREED, but
14691 					 * scsi_init_pkt() failed to reallocate
14692 					 * DMA resources when we attempted to
14693 					 * retry it. This can happen when an
14694 					 * mpxio failover is in progress, but
14695 					 * we don't want to just fail the
14696 					 * command in this case.
14697 					 *
14698 					 * Use timeout(9F) to restart it after
14699 					 * a 100ms delay.  We don't want to
14700 					 * let sdrunout() restart it, because
14701 					 * sdrunout() is just supposed to start
14702 					 * commands that are sitting on the
14703 					 * wait queue.  The un_retry_bp stays
14704 					 * set until the command completes, but
14705 					 * sdrunout can be called many times
14706 					 * before that happens.  Since sdrunout
14707 					 * cannot tell if the un_retry_bp is
14708 					 * already in the transport, it could
14709 					 * end up calling scsi_transport() for
14710 					 * the un_retry_bp multiple times.
14711 					 *
14712 					 * Also: don't schedule the callback
14713 					 * if some other callback is already
14714 					 * pending.
14715 					 */
14716 					if (un->un_retry_statp == NULL) {
14717 						/*
14718 						 * restore the kstat pointer to
14719 						 * keep kstat counts coherent
14720 						 * when we do retry the command.
14721 						 */
14722 						un->un_retry_statp =
14723 						    saved_statp;
14724 					}
14725 
14726 					if ((un->un_startstop_timeid == NULL) &&
14727 					    (un->un_retry_timeid == NULL) &&
14728 					    (un->un_direct_priority_timeid ==
14729 					    NULL)) {
14730 
14731 						un->un_retry_timeid =
14732 						    timeout(
14733 						    sd_start_retry_command,
14734 						    un, SD_RESTART_TIMEOUT);
14735 					}
14736 					goto exit;
14737 				}
14738 
14739 #else
14740 				if (bp == immed_bp) {
14741 					break;	/* Just fail the command */
14742 				}
14743 #endif
14744 
14745 				/* Add the buf back to the head of the waitq */
14746 				bp->av_forw = un->un_waitq_headp;
14747 				un->un_waitq_headp = bp;
14748 				if (un->un_waitq_tailp == NULL) {
14749 					un->un_waitq_tailp = bp;
14750 				}
14751 				goto exit;
14752 
14753 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
14754 				/*
14755 				 * HBA DMA resource failure. Fail the command
14756 				 * and continue processing of the queues.
14757 				 */
14758 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14759 				    "sd_start_cmds: "
14760 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
14761 				break;
14762 
14763 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
14764 				/*
14765 				 * Note:x86: Partial DMA mapping not supported
14766 				 * for USCSI commands, and all the needed DMA
14767 				 * resources were not allocated.
14768 				 */
14769 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14770 				    "sd_start_cmds: "
14771 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
14772 				break;
14773 
14774 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
14775 				/*
14776 				 * Note:x86: Request cannot fit into CDB based
14777 				 * on lba and len.
14778 				 */
14779 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14780 				    "sd_start_cmds: "
14781 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
14782 				break;
14783 
14784 			default:
14785 				/* Should NEVER get here! */
14786 				panic("scsi_initpkt error");
14787 				/*NOTREACHED*/
14788 			}
14789 
14790 			/*
14791 			 * Fatal error in allocating a scsi_pkt for this buf.
14792 			 * Update kstats & return the buf with an error code.
14793 			 * We must use sd_return_failed_command_no_restart() to
14794 			 * avoid a recursive call back into sd_start_cmds().
14795 			 * However this also means that we must keep processing
14796 			 * the waitq here in order to avoid stalling.
14797 			 */
14798 			if (statp == kstat_waitq_to_runq) {
14799 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
14800 			}
14801 			sd_return_failed_command_no_restart(un, bp, EIO);
14802 			if (bp == immed_bp) {
14803 				/* immed_bp is gone by now, so clear this */
14804 				immed_bp = NULL;
14805 			}
14806 			continue;
14807 		}
14808 got_pkt:
14809 		if (bp == immed_bp) {
14810 			/* goto the head of the class.... */
14811 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
14812 		}
14813 
14814 		un->un_ncmds_in_transport++;
14815 		SD_UPDATE_KSTATS(un, statp, bp);
14816 
14817 		/*
14818 		 * Call scsi_transport() to send the command to the target.
14819 		 * According to SCSA architecture, we must drop the mutex here
14820 		 * before calling scsi_transport() in order to avoid deadlock.
14821 		 * Note that the scsi_pkt's completion routine can be executed
14822 		 * (from interrupt context) even before the call to
14823 		 * scsi_transport() returns.
14824 		 */
14825 		SD_TRACE(SD_LOG_IO_CORE, un,
14826 		    "sd_start_cmds: calling scsi_transport()\n");
14827 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
14828 
14829 		mutex_exit(SD_MUTEX(un));
14830 		rval = scsi_transport(xp->xb_pktp);
14831 		mutex_enter(SD_MUTEX(un));
14832 
14833 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14834 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
14835 
14836 		switch (rval) {
14837 		case TRAN_ACCEPT:
14838 			/* Clear this with every pkt accepted by the HBA */
14839 			un->un_tran_fatal_count = 0;
14840 			break;	/* Success; try the next cmd (if any) */
14841 
14842 		case TRAN_BUSY:
14843 			un->un_ncmds_in_transport--;
14844 			ASSERT(un->un_ncmds_in_transport >= 0);
14845 
14846 			/*
14847 			 * Don't retry request sense, the sense data
14848 			 * is lost when another request is sent.
14849 			 * Free up the rqs buf and retry
14850 			 * the original failed cmd.  Update kstat.
14851 			 */
14852 			if (bp == un->un_rqs_bp) {
14853 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14854 				bp = sd_mark_rqs_idle(un, xp);
14855 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
14856 					NULL, NULL, EIO, SD_BSY_TIMEOUT / 500,
14857 					kstat_waitq_enter);
14858 				goto exit;
14859 			}
14860 
14861 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14862 			/*
14863 			 * Free the DMA resources for the  scsi_pkt. This will
14864 			 * allow mpxio to select another path the next time
14865 			 * we call scsi_transport() with this scsi_pkt.
14866 			 * See sdintr() for the rationalization behind this.
14867 			 */
14868 			if ((un->un_f_is_fibre == TRUE) &&
14869 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14870 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
14871 				scsi_dmafree(xp->xb_pktp);
14872 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14873 			}
14874 #endif
14875 
14876 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
14877 				/*
14878 				 * Commands that are SD_PATH_DIRECT_PRIORITY
14879 				 * are for error recovery situations. These do
14880 				 * not use the normal command waitq, so if they
14881 				 * get a TRAN_BUSY we cannot put them back onto
14882 				 * the waitq for later retry. One possible
14883 				 * problem is that there could already be some
14884 				 * other command on un_retry_bp that is waiting
14885 				 * for this one to complete, so we would be
14886 				 * deadlocked if we put this command back onto
14887 				 * the waitq for later retry (since un_retry_bp
14888 				 * must complete before the driver gets back to
14889 				 * commands on the waitq).
14890 				 *
14891 				 * To avoid deadlock we must schedule a callback
14892 				 * that will restart this command after a set
14893 				 * interval.  This should keep retrying for as
14894 				 * long as the underlying transport keeps
14895 				 * returning TRAN_BUSY (just like for other
14896 				 * commands).  Use the same timeout interval as
14897 				 * for the ordinary TRAN_BUSY retry.
14898 				 */
14899 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14900 				    "sd_start_cmds: scsi_transport() returned "
14901 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
14902 
14903 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14904 				un->un_direct_priority_timeid =
14905 				    timeout(sd_start_direct_priority_command,
14906 				    bp, SD_BSY_TIMEOUT / 500);
14907 
14908 				goto exit;
14909 			}
14910 
14911 			/*
14912 			 * For TRAN_BUSY, we want to reduce the throttle value,
14913 			 * unless we are retrying a command.
14914 			 */
14915 			if (bp != un->un_retry_bp) {
14916 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
14917 			}
14918 
14919 			/*
14920 			 * Set up the bp to be tried again 10 ms later.
14921 			 * Note:x86: Is there a timeout value in the sd_lun
14922 			 * for this condition?
14923 			 */
14924 			sd_set_retry_bp(un, bp, SD_BSY_TIMEOUT / 500,
14925 				kstat_runq_back_to_waitq);
14926 			goto exit;
14927 
14928 		case TRAN_FATAL_ERROR:
14929 			un->un_tran_fatal_count++;
14930 			/* FALLTHRU */
14931 
14932 		case TRAN_BADPKT:
14933 		default:
14934 			un->un_ncmds_in_transport--;
14935 			ASSERT(un->un_ncmds_in_transport >= 0);
14936 
14937 			/*
14938 			 * If this is our REQUEST SENSE command with a
14939 			 * transport error, we must get back the pointers
14940 			 * to the original buf, and mark the REQUEST
14941 			 * SENSE command as "available".
14942 			 */
14943 			if (bp == un->un_rqs_bp) {
14944 				bp = sd_mark_rqs_idle(un, xp);
14945 				xp = SD_GET_XBUF(bp);
14946 			} else {
14947 				/*
14948 				 * Legacy behavior: do not update transport
14949 				 * error count for request sense commands.
14950 				 */
14951 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
14952 			}
14953 
14954 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14955 			sd_print_transport_rejected_message(un, xp, rval);
14956 
14957 			/*
14958 			 * We must use sd_return_failed_command_no_restart() to
14959 			 * avoid a recursive call back into sd_start_cmds().
14960 			 * However this also means that we must keep processing
14961 			 * the waitq here in order to avoid stalling.
14962 			 */
14963 			sd_return_failed_command_no_restart(un, bp, EIO);
14964 
14965 			/*
14966 			 * Notify any threads waiting in sd_ddi_suspend() that
14967 			 * a command completion has occurred.
14968 			 */
14969 			if (un->un_state == SD_STATE_SUSPENDED) {
14970 				cv_broadcast(&un->un_disk_busy_cv);
14971 			}
14972 
14973 			if (bp == immed_bp) {
14974 				/* immed_bp is gone by now, so clear this */
14975 				immed_bp = NULL;
14976 			}
14977 			break;
14978 		}
14979 
14980 	} while (immed_bp == NULL);
14981 
14982 exit:
14983 	ASSERT(mutex_owned(SD_MUTEX(un)));
14984 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
14985 }
14986 
14987 
14988 /*
14989  *    Function: sd_return_command
14990  *
14991  * Description: Returns a command to its originator (with or without an
14992  *		error).  Also starts commands waiting to be transported
14993  *		to the target.
14994  *
14995  *     Context: May be called from interrupt, kernel, or timeout context
14996  */
14997 
14998 static void
14999 sd_return_command(struct sd_lun *un, struct buf *bp)
15000 {
15001 	struct sd_xbuf *xp;
15002 #if defined(__i386) || defined(__amd64)
15003 	struct scsi_pkt *pktp;
15004 #endif
15005 
15006 	ASSERT(bp != NULL);
15007 	ASSERT(un != NULL);
15008 	ASSERT(mutex_owned(SD_MUTEX(un)));
15009 	ASSERT(bp != un->un_rqs_bp);
15010 	xp = SD_GET_XBUF(bp);
15011 	ASSERT(xp != NULL);
15012 
15013 #if defined(__i386) || defined(__amd64)
15014 	pktp = SD_GET_PKTP(bp);
15015 #endif
15016 
15017 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
15018 
15019 #if defined(__i386) || defined(__amd64)
15020 	/*
15021 	 * Note:x86: check for the "sdrestart failed" case.
15022 	 */
15023 	if (((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
15024 		(geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
15025 		(xp->xb_pktp->pkt_resid == 0)) {
15026 
15027 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
15028 			/*
15029 			 * Successfully set up next portion of cmd
15030 			 * transfer, try sending it
15031 			 */
15032 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15033 			    NULL, NULL, 0, (clock_t)0, NULL);
15034 			sd_start_cmds(un, NULL);
15035 			return;	/* Note:x86: need a return here? */
15036 		}
15037 	}
15038 #endif
15039 
15040 	/*
15041 	 * If this is the failfast bp, clear it from un_failfast_bp. This
15042 	 * can happen if upon being re-tried the failfast bp either
15043 	 * succeeded or encountered another error (possibly even a different
15044 	 * error than the one that precipitated the failfast state, but in
15045 	 * that case it would have had to exhaust retries as well). Regardless,
15046 	 * this should not occur whenever the instance is in the active
15047 	 * failfast state.
15048 	 */
15049 	if (bp == un->un_failfast_bp) {
15050 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
15051 		un->un_failfast_bp = NULL;
15052 	}
15053 
15054 	/*
15055 	 * Clear the failfast state upon successful completion of ANY cmd.
15056 	 */
15057 	if (bp->b_error == 0) {
15058 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15059 	}
15060 
15061 	/*
15062 	 * This is used if the command was retried one or more times. Show that
15063 	 * we are done with it, and allow processing of the waitq to resume.
15064 	 */
15065 	if (bp == un->un_retry_bp) {
15066 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15067 		    "sd_return_command: un:0x%p: "
15068 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15069 		un->un_retry_bp = NULL;
15070 		un->un_retry_statp = NULL;
15071 	}
15072 
15073 	SD_UPDATE_RDWR_STATS(un, bp);
15074 	SD_UPDATE_PARTITION_STATS(un, bp);
15075 
15076 	switch (un->un_state) {
15077 	case SD_STATE_SUSPENDED:
15078 		/*
15079 		 * Notify any threads waiting in sd_ddi_suspend() that
15080 		 * a command completion has occurred.
15081 		 */
15082 		cv_broadcast(&un->un_disk_busy_cv);
15083 		break;
15084 	default:
15085 		sd_start_cmds(un, NULL);
15086 		break;
15087 	}
15088 
15089 	/* Return this command up the iodone chain to its originator. */
15090 	mutex_exit(SD_MUTEX(un));
15091 
15092 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15093 	xp->xb_pktp = NULL;
15094 
15095 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15096 
15097 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15098 	mutex_enter(SD_MUTEX(un));
15099 
15100 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
15101 }
15102 
15103 
15104 /*
15105  *    Function: sd_return_failed_command
15106  *
15107  * Description: Command completion when an error occurred.
15108  *
15109  *     Context: May be called from interrupt context
15110  */
15111 
15112 static void
15113 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
15114 {
15115 	ASSERT(bp != NULL);
15116 	ASSERT(un != NULL);
15117 	ASSERT(mutex_owned(SD_MUTEX(un)));
15118 
15119 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15120 	    "sd_return_failed_command: entry\n");
15121 
15122 	/*
15123 	 * b_resid could already be nonzero due to a partial data
15124 	 * transfer, so do not change it here.
15125 	 */
15126 	SD_BIOERROR(bp, errcode);
15127 
15128 	sd_return_command(un, bp);
15129 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15130 	    "sd_return_failed_command: exit\n");
15131 }
15132 
15133 
15134 /*
15135  *    Function: sd_return_failed_command_no_restart
15136  *
15137  * Description: Same as sd_return_failed_command, but ensures that no
15138  *		call back into sd_start_cmds will be issued.
15139  *
15140  *     Context: May be called from interrupt context
15141  */
15142 
15143 static void
15144 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
15145 	int errcode)
15146 {
15147 	struct sd_xbuf *xp;
15148 
15149 	ASSERT(bp != NULL);
15150 	ASSERT(un != NULL);
15151 	ASSERT(mutex_owned(SD_MUTEX(un)));
15152 	xp = SD_GET_XBUF(bp);
15153 	ASSERT(xp != NULL);
15154 	ASSERT(errcode != 0);
15155 
15156 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15157 	    "sd_return_failed_command_no_restart: entry\n");
15158 
15159 	/*
15160 	 * b_resid could already be nonzero due to a partial data
15161 	 * transfer, so do not change it here.
15162 	 */
15163 	SD_BIOERROR(bp, errcode);
15164 
15165 	/*
15166 	 * If this is the failfast bp, clear it. This can happen if the
15167 	 * failfast bp encounterd a fatal error when we attempted to
15168 	 * re-try it (such as a scsi_transport(9F) failure).  However
15169 	 * we should NOT be in an active failfast state if the failfast
15170 	 * bp is not NULL.
15171 	 */
15172 	if (bp == un->un_failfast_bp) {
15173 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
15174 		un->un_failfast_bp = NULL;
15175 	}
15176 
15177 	if (bp == un->un_retry_bp) {
15178 		/*
15179 		 * This command was retried one or more times. Show that we are
15180 		 * done with it, and allow processing of the waitq to resume.
15181 		 */
15182 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15183 		    "sd_return_failed_command_no_restart: "
15184 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15185 		un->un_retry_bp = NULL;
15186 		un->un_retry_statp = NULL;
15187 	}
15188 
15189 	SD_UPDATE_RDWR_STATS(un, bp);
15190 	SD_UPDATE_PARTITION_STATS(un, bp);
15191 
15192 	mutex_exit(SD_MUTEX(un));
15193 
15194 	if (xp->xb_pktp != NULL) {
15195 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15196 		xp->xb_pktp = NULL;
15197 	}
15198 
15199 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15200 
15201 	mutex_enter(SD_MUTEX(un));
15202 
15203 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15204 	    "sd_return_failed_command_no_restart: exit\n");
15205 }
15206 
15207 
15208 /*
15209  *    Function: sd_retry_command
15210  *
15211  * Description: queue up a command for retry, or (optionally) fail it
15212  *		if retry counts are exhausted.
15213  *
15214  *   Arguments: un - Pointer to the sd_lun struct for the target.
15215  *
15216  *		bp - Pointer to the buf for the command to be retried.
15217  *
15218  *		retry_check_flag - Flag to see which (if any) of the retry
15219  *		   counts should be decremented/checked. If the indicated
15220  *		   retry count is exhausted, then the command will not be
15221  *		   retried; it will be failed instead. This should use a
15222  *		   value equal to one of the following:
15223  *
15224  *			SD_RETRIES_NOCHECK
15225  *			SD_RESD_RETRIES_STANDARD
15226  *			SD_RETRIES_VICTIM
15227  *
15228  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
15229  *		   if the check should be made to see of FLAG_ISOLATE is set
15230  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
15231  *		   not retried, it is simply failed.
15232  *
15233  *		user_funcp - Ptr to function to call before dispatching the
15234  *		   command. May be NULL if no action needs to be performed.
15235  *		   (Primarily intended for printing messages.)
15236  *
15237  *		user_arg - Optional argument to be passed along to
15238  *		   the user_funcp call.
15239  *
15240  *		failure_code - errno return code to set in the bp if the
15241  *		   command is going to be failed.
15242  *
15243  *		retry_delay - Retry delay interval in (clock_t) units. May
15244  *		   be zero which indicates that the retry should be retried
15245  *		   immediately (ie, without an intervening delay).
15246  *
15247  *		statp - Ptr to kstat function to be updated if the command
15248  *		   is queued for a delayed retry. May be NULL if no kstat
15249  *		   update is desired.
15250  *
15251  *     Context: May be called from interupt context.
15252  */
15253 
15254 static void
15255 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
15256 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
15257 	code), void *user_arg, int failure_code,  clock_t retry_delay,
15258 	void (*statp)(kstat_io_t *))
15259 {
15260 	struct sd_xbuf	*xp;
15261 	struct scsi_pkt	*pktp;
15262 
15263 	ASSERT(un != NULL);
15264 	ASSERT(mutex_owned(SD_MUTEX(un)));
15265 	ASSERT(bp != NULL);
15266 	xp = SD_GET_XBUF(bp);
15267 	ASSERT(xp != NULL);
15268 	pktp = SD_GET_PKTP(bp);
15269 	ASSERT(pktp != NULL);
15270 
15271 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15272 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
15273 
15274 	/*
15275 	 * If we are syncing or dumping, fail the command to avoid
15276 	 * recursively calling back into scsi_transport().
15277 	 */
15278 	if (ddi_in_panic()) {
15279 		goto fail_command_no_log;
15280 	}
15281 
15282 	/*
15283 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
15284 	 * log an error and fail the command.
15285 	 */
15286 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
15287 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15288 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
15289 		sd_dump_memory(un, SD_LOG_IO, "CDB",
15290 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15291 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15292 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15293 		goto fail_command;
15294 	}
15295 
15296 	/*
15297 	 * If we are suspended, then put the command onto head of the
15298 	 * wait queue since we don't want to start more commands.
15299 	 */
15300 	switch (un->un_state) {
15301 	case SD_STATE_SUSPENDED:
15302 	case SD_STATE_DUMPING:
15303 		bp->av_forw = un->un_waitq_headp;
15304 		un->un_waitq_headp = bp;
15305 		if (un->un_waitq_tailp == NULL) {
15306 			un->un_waitq_tailp = bp;
15307 		}
15308 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
15309 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
15310 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
15311 		return;
15312 	default:
15313 		break;
15314 	}
15315 
15316 	/*
15317 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
15318 	 * is set; if it is then we do not want to retry the command.
15319 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
15320 	 */
15321 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
15322 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
15323 			goto fail_command;
15324 		}
15325 	}
15326 
15327 
15328 	/*
15329 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
15330 	 * command timeout or a selection timeout has occurred. This means
15331 	 * that we were unable to establish an kind of communication with
15332 	 * the target, and subsequent retries and/or commands are likely
15333 	 * to encounter similar results and take a long time to complete.
15334 	 *
15335 	 * If this is a failfast error condition, we need to update the
15336 	 * failfast state, even if this bp does not have B_FAILFAST set.
15337 	 */
15338 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
15339 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
15340 			ASSERT(un->un_failfast_bp == NULL);
15341 			/*
15342 			 * If we are already in the active failfast state, and
15343 			 * another failfast error condition has been detected,
15344 			 * then fail this command if it has B_FAILFAST set.
15345 			 * If B_FAILFAST is clear, then maintain the legacy
15346 			 * behavior of retrying heroically, even tho this will
15347 			 * take a lot more time to fail the command.
15348 			 */
15349 			if (bp->b_flags & B_FAILFAST) {
15350 				goto fail_command;
15351 			}
15352 		} else {
15353 			/*
15354 			 * We're not in the active failfast state, but we
15355 			 * have a failfast error condition, so we must begin
15356 			 * transition to the next state. We do this regardless
15357 			 * of whether or not this bp has B_FAILFAST set.
15358 			 */
15359 			if (un->un_failfast_bp == NULL) {
15360 				/*
15361 				 * This is the first bp to meet a failfast
15362 				 * condition so save it on un_failfast_bp &
15363 				 * do normal retry processing. Do not enter
15364 				 * active failfast state yet. This marks
15365 				 * entry into the "failfast pending" state.
15366 				 */
15367 				un->un_failfast_bp = bp;
15368 
15369 			} else if (un->un_failfast_bp == bp) {
15370 				/*
15371 				 * This is the second time *this* bp has
15372 				 * encountered a failfast error condition,
15373 				 * so enter active failfast state & flush
15374 				 * queues as appropriate.
15375 				 */
15376 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
15377 				un->un_failfast_bp = NULL;
15378 				sd_failfast_flushq(un);
15379 
15380 				/*
15381 				 * Fail this bp now if B_FAILFAST set;
15382 				 * otherwise continue with retries. (It would
15383 				 * be pretty ironic if this bp succeeded on a
15384 				 * subsequent retry after we just flushed all
15385 				 * the queues).
15386 				 */
15387 				if (bp->b_flags & B_FAILFAST) {
15388 					goto fail_command;
15389 				}
15390 
15391 #if !defined(lint) && !defined(__lint)
15392 			} else {
15393 				/*
15394 				 * If neither of the preceeding conditionals
15395 				 * was true, it means that there is some
15396 				 * *other* bp that has met an inital failfast
15397 				 * condition and is currently either being
15398 				 * retried or is waiting to be retried. In
15399 				 * that case we should perform normal retry
15400 				 * processing on *this* bp, since there is a
15401 				 * chance that the current failfast condition
15402 				 * is transient and recoverable. If that does
15403 				 * not turn out to be the case, then retries
15404 				 * will be cleared when the wait queue is
15405 				 * flushed anyway.
15406 				 */
15407 #endif
15408 			}
15409 		}
15410 	} else {
15411 		/*
15412 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
15413 		 * likely were able to at least establish some level of
15414 		 * communication with the target and subsequent commands
15415 		 * and/or retries are likely to get through to the target,
15416 		 * In this case we want to be aggressive about clearing
15417 		 * the failfast state. Note that this does not affect
15418 		 * the "failfast pending" condition.
15419 		 */
15420 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15421 	}
15422 
15423 
15424 	/*
15425 	 * Check the specified retry count to see if we can still do
15426 	 * any retries with this pkt before we should fail it.
15427 	 */
15428 	switch (retry_check_flag & SD_RETRIES_MASK) {
15429 	case SD_RETRIES_VICTIM:
15430 		/*
15431 		 * Check the victim retry count. If exhausted, then fall
15432 		 * thru & check against the standard retry count.
15433 		 */
15434 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
15435 			/* Increment count & proceed with the retry */
15436 			xp->xb_victim_retry_count++;
15437 			break;
15438 		}
15439 		/* Victim retries exhausted, fall back to std. retries... */
15440 		/* FALLTHRU */
15441 
15442 	case SD_RETRIES_STANDARD:
15443 		if (xp->xb_retry_count >= un->un_retry_count) {
15444 			/* Retries exhausted, fail the command */
15445 			SD_TRACE(SD_LOG_IO_CORE, un,
15446 			    "sd_retry_command: retries exhausted!\n");
15447 			/*
15448 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
15449 			 * commands with nonzero pkt_resid.
15450 			 */
15451 			if ((pktp->pkt_reason == CMD_CMPLT) &&
15452 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
15453 			    (pktp->pkt_resid != 0)) {
15454 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
15455 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
15456 					SD_UPDATE_B_RESID(bp, pktp);
15457 				}
15458 			}
15459 			goto fail_command;
15460 		}
15461 		xp->xb_retry_count++;
15462 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15463 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15464 		break;
15465 
15466 	case SD_RETRIES_UA:
15467 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
15468 			/* Retries exhausted, fail the command */
15469 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15470 			    "Unit Attention retries exhausted. "
15471 			    "Check the target.\n");
15472 			goto fail_command;
15473 		}
15474 		xp->xb_ua_retry_count++;
15475 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15476 		    "sd_retry_command: retry count:%d\n",
15477 			xp->xb_ua_retry_count);
15478 		break;
15479 
15480 	case SD_RETRIES_BUSY:
15481 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
15482 			/* Retries exhausted, fail the command */
15483 			SD_TRACE(SD_LOG_IO_CORE, un,
15484 			    "sd_retry_command: retries exhausted!\n");
15485 			goto fail_command;
15486 		}
15487 		xp->xb_retry_count++;
15488 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15489 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15490 		break;
15491 
15492 	case SD_RETRIES_NOCHECK:
15493 	default:
15494 		/* No retry count to check. Just proceed with the retry */
15495 		break;
15496 	}
15497 
15498 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15499 
15500 	/*
15501 	 * If we were given a zero timeout, we must attempt to retry the
15502 	 * command immediately (ie, without a delay).
15503 	 */
15504 	if (retry_delay == 0) {
15505 		/*
15506 		 * Check some limiting conditions to see if we can actually
15507 		 * do the immediate retry.  If we cannot, then we must
15508 		 * fall back to queueing up a delayed retry.
15509 		 */
15510 		if (un->un_ncmds_in_transport >= un->un_throttle) {
15511 			/*
15512 			 * We are at the throttle limit for the target,
15513 			 * fall back to delayed retry.
15514 			 */
15515 			retry_delay = SD_BSY_TIMEOUT;
15516 			statp = kstat_waitq_enter;
15517 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15518 			    "sd_retry_command: immed. retry hit "
15519 			    "throttle!\n");
15520 		} else {
15521 			/*
15522 			 * We're clear to proceed with the immediate retry.
15523 			 * First call the user-provided function (if any)
15524 			 */
15525 			if (user_funcp != NULL) {
15526 				(*user_funcp)(un, bp, user_arg,
15527 				    SD_IMMEDIATE_RETRY_ISSUED);
15528 #ifdef __lock_lint
15529 				sd_print_incomplete_msg(un, bp, user_arg,
15530 				    SD_IMMEDIATE_RETRY_ISSUED);
15531 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
15532 				    SD_IMMEDIATE_RETRY_ISSUED);
15533 				sd_print_sense_failed_msg(un, bp, user_arg,
15534 				    SD_IMMEDIATE_RETRY_ISSUED);
15535 #endif
15536 			}
15537 
15538 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15539 			    "sd_retry_command: issuing immediate retry\n");
15540 
15541 			/*
15542 			 * Call sd_start_cmds() to transport the command to
15543 			 * the target.
15544 			 */
15545 			sd_start_cmds(un, bp);
15546 
15547 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15548 			    "sd_retry_command exit\n");
15549 			return;
15550 		}
15551 	}
15552 
15553 	/*
15554 	 * Set up to retry the command after a delay.
15555 	 * First call the user-provided function (if any)
15556 	 */
15557 	if (user_funcp != NULL) {
15558 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
15559 	}
15560 
15561 	sd_set_retry_bp(un, bp, retry_delay, statp);
15562 
15563 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15564 	return;
15565 
15566 fail_command:
15567 
15568 	if (user_funcp != NULL) {
15569 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
15570 	}
15571 
15572 fail_command_no_log:
15573 
15574 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15575 	    "sd_retry_command: returning failed command\n");
15576 
15577 	sd_return_failed_command(un, bp, failure_code);
15578 
15579 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15580 }
15581 
15582 
15583 /*
15584  *    Function: sd_set_retry_bp
15585  *
15586  * Description: Set up the given bp for retry.
15587  *
15588  *   Arguments: un - ptr to associated softstate
15589  *		bp - ptr to buf(9S) for the command
15590  *		retry_delay - time interval before issuing retry (may be 0)
15591  *		statp - optional pointer to kstat function
15592  *
15593  *     Context: May be called under interrupt context
15594  */
15595 
15596 static void
15597 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
15598 	void (*statp)(kstat_io_t *))
15599 {
15600 	ASSERT(un != NULL);
15601 	ASSERT(mutex_owned(SD_MUTEX(un)));
15602 	ASSERT(bp != NULL);
15603 
15604 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15605 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
15606 
15607 	/*
15608 	 * Indicate that the command is being retried. This will not allow any
15609 	 * other commands on the wait queue to be transported to the target
15610 	 * until this command has been completed (success or failure). The
15611 	 * "retry command" is not transported to the target until the given
15612 	 * time delay expires, unless the user specified a 0 retry_delay.
15613 	 *
15614 	 * Note: the timeout(9F) callback routine is what actually calls
15615 	 * sd_start_cmds() to transport the command, with the exception of a
15616 	 * zero retry_delay. The only current implementor of a zero retry delay
15617 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
15618 	 */
15619 	if (un->un_retry_bp == NULL) {
15620 		ASSERT(un->un_retry_statp == NULL);
15621 		un->un_retry_bp = bp;
15622 
15623 		/*
15624 		 * If the user has not specified a delay the command should
15625 		 * be queued and no timeout should be scheduled.
15626 		 */
15627 		if (retry_delay == 0) {
15628 			/*
15629 			 * Save the kstat pointer that will be used in the
15630 			 * call to SD_UPDATE_KSTATS() below, so that
15631 			 * sd_start_cmds() can correctly decrement the waitq
15632 			 * count when it is time to transport this command.
15633 			 */
15634 			un->un_retry_statp = statp;
15635 			goto done;
15636 		}
15637 	}
15638 
15639 	if (un->un_retry_bp == bp) {
15640 		/*
15641 		 * Save the kstat pointer that will be used in the call to
15642 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
15643 		 * correctly decrement the waitq count when it is time to
15644 		 * transport this command.
15645 		 */
15646 		un->un_retry_statp = statp;
15647 
15648 		/*
15649 		 * Schedule a timeout if:
15650 		 *   1) The user has specified a delay.
15651 		 *   2) There is not a START_STOP_UNIT callback pending.
15652 		 *
15653 		 * If no delay has been specified, then it is up to the caller
15654 		 * to ensure that IO processing continues without stalling.
15655 		 * Effectively, this means that the caller will issue the
15656 		 * required call to sd_start_cmds(). The START_STOP_UNIT
15657 		 * callback does this after the START STOP UNIT command has
15658 		 * completed. In either of these cases we should not schedule
15659 		 * a timeout callback here.  Also don't schedule the timeout if
15660 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
15661 		 */
15662 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
15663 		    (un->un_direct_priority_timeid == NULL)) {
15664 			un->un_retry_timeid =
15665 			    timeout(sd_start_retry_command, un, retry_delay);
15666 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15667 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
15668 			    " bp:0x%p un_retry_timeid:0x%p\n",
15669 			    un, bp, un->un_retry_timeid);
15670 		}
15671 	} else {
15672 		/*
15673 		 * We only get in here if there is already another command
15674 		 * waiting to be retried.  In this case, we just put the
15675 		 * given command onto the wait queue, so it can be transported
15676 		 * after the current retry command has completed.
15677 		 *
15678 		 * Also we have to make sure that if the command at the head
15679 		 * of the wait queue is the un_failfast_bp, that we do not
15680 		 * put ahead of it any other commands that are to be retried.
15681 		 */
15682 		if ((un->un_failfast_bp != NULL) &&
15683 		    (un->un_failfast_bp == un->un_waitq_headp)) {
15684 			/*
15685 			 * Enqueue this command AFTER the first command on
15686 			 * the wait queue (which is also un_failfast_bp).
15687 			 */
15688 			bp->av_forw = un->un_waitq_headp->av_forw;
15689 			un->un_waitq_headp->av_forw = bp;
15690 			if (un->un_waitq_headp == un->un_waitq_tailp) {
15691 				un->un_waitq_tailp = bp;
15692 			}
15693 		} else {
15694 			/* Enqueue this command at the head of the waitq. */
15695 			bp->av_forw = un->un_waitq_headp;
15696 			un->un_waitq_headp = bp;
15697 			if (un->un_waitq_tailp == NULL) {
15698 				un->un_waitq_tailp = bp;
15699 			}
15700 		}
15701 
15702 		if (statp == NULL) {
15703 			statp = kstat_waitq_enter;
15704 		}
15705 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15706 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
15707 	}
15708 
15709 done:
15710 	if (statp != NULL) {
15711 		SD_UPDATE_KSTATS(un, statp, bp);
15712 	}
15713 
15714 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15715 	    "sd_set_retry_bp: exit un:0x%p\n", un);
15716 }
15717 
15718 
15719 /*
15720  *    Function: sd_start_retry_command
15721  *
15722  * Description: Start the command that has been waiting on the target's
15723  *		retry queue.  Called from timeout(9F) context after the
15724  *		retry delay interval has expired.
15725  *
15726  *   Arguments: arg - pointer to associated softstate for the device.
15727  *
15728  *     Context: timeout(9F) thread context.  May not sleep.
15729  */
15730 
15731 static void
15732 sd_start_retry_command(void *arg)
15733 {
15734 	struct sd_lun *un = arg;
15735 
15736 	ASSERT(un != NULL);
15737 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15738 
15739 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15740 	    "sd_start_retry_command: entry\n");
15741 
15742 	mutex_enter(SD_MUTEX(un));
15743 
15744 	un->un_retry_timeid = NULL;
15745 
15746 	if (un->un_retry_bp != NULL) {
15747 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15748 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
15749 		    un, un->un_retry_bp);
15750 		sd_start_cmds(un, un->un_retry_bp);
15751 	}
15752 
15753 	mutex_exit(SD_MUTEX(un));
15754 
15755 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15756 	    "sd_start_retry_command: exit\n");
15757 }
15758 
15759 
15760 /*
15761  *    Function: sd_start_direct_priority_command
15762  *
15763  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
15764  *		received TRAN_BUSY when we called scsi_transport() to send it
15765  *		to the underlying HBA. This function is called from timeout(9F)
15766  *		context after the delay interval has expired.
15767  *
15768  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
15769  *
15770  *     Context: timeout(9F) thread context.  May not sleep.
15771  */
15772 
15773 static void
15774 sd_start_direct_priority_command(void *arg)
15775 {
15776 	struct buf	*priority_bp = arg;
15777 	struct sd_lun	*un;
15778 
15779 	ASSERT(priority_bp != NULL);
15780 	un = SD_GET_UN(priority_bp);
15781 	ASSERT(un != NULL);
15782 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15783 
15784 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15785 	    "sd_start_direct_priority_command: entry\n");
15786 
15787 	mutex_enter(SD_MUTEX(un));
15788 	un->un_direct_priority_timeid = NULL;
15789 	sd_start_cmds(un, priority_bp);
15790 	mutex_exit(SD_MUTEX(un));
15791 
15792 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15793 	    "sd_start_direct_priority_command: exit\n");
15794 }
15795 
15796 
15797 /*
15798  *    Function: sd_send_request_sense_command
15799  *
15800  * Description: Sends a REQUEST SENSE command to the target
15801  *
15802  *     Context: May be called from interrupt context.
15803  */
15804 
15805 static void
15806 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
15807 	struct scsi_pkt *pktp)
15808 {
15809 	ASSERT(bp != NULL);
15810 	ASSERT(un != NULL);
15811 	ASSERT(mutex_owned(SD_MUTEX(un)));
15812 
15813 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
15814 	    "entry: buf:0x%p\n", bp);
15815 
15816 	/*
15817 	 * If we are syncing or dumping, then fail the command to avoid a
15818 	 * recursive callback into scsi_transport(). Also fail the command
15819 	 * if we are suspended (legacy behavior).
15820 	 */
15821 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
15822 	    (un->un_state == SD_STATE_DUMPING)) {
15823 		sd_return_failed_command(un, bp, EIO);
15824 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15825 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
15826 		return;
15827 	}
15828 
15829 	/*
15830 	 * Retry the failed command and don't issue the request sense if:
15831 	 *    1) the sense buf is busy
15832 	 *    2) we have 1 or more outstanding commands on the target
15833 	 *    (the sense data will be cleared or invalidated any way)
15834 	 *
15835 	 * Note: There could be an issue with not checking a retry limit here,
15836 	 * the problem is determining which retry limit to check.
15837 	 */
15838 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
15839 		/* Don't retry if the command is flagged as non-retryable */
15840 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15841 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15842 			    NULL, NULL, 0, SD_BSY_TIMEOUT, kstat_waitq_enter);
15843 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15844 			    "sd_send_request_sense_command: "
15845 			    "at full throttle, retrying exit\n");
15846 		} else {
15847 			sd_return_failed_command(un, bp, EIO);
15848 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15849 			    "sd_send_request_sense_command: "
15850 			    "at full throttle, non-retryable exit\n");
15851 		}
15852 		return;
15853 	}
15854 
15855 	sd_mark_rqs_busy(un, bp);
15856 	sd_start_cmds(un, un->un_rqs_bp);
15857 
15858 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15859 	    "sd_send_request_sense_command: exit\n");
15860 }
15861 
15862 
15863 /*
15864  *    Function: sd_mark_rqs_busy
15865  *
15866  * Description: Indicate that the request sense bp for this instance is
15867  *		in use.
15868  *
15869  *     Context: May be called under interrupt context
15870  */
15871 
15872 static void
15873 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
15874 {
15875 	struct sd_xbuf	*sense_xp;
15876 
15877 	ASSERT(un != NULL);
15878 	ASSERT(bp != NULL);
15879 	ASSERT(mutex_owned(SD_MUTEX(un)));
15880 	ASSERT(un->un_sense_isbusy == 0);
15881 
15882 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
15883 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
15884 
15885 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
15886 	ASSERT(sense_xp != NULL);
15887 
15888 	SD_INFO(SD_LOG_IO, un,
15889 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
15890 
15891 	ASSERT(sense_xp->xb_pktp != NULL);
15892 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
15893 	    == (FLAG_SENSING | FLAG_HEAD));
15894 
15895 	un->un_sense_isbusy = 1;
15896 	un->un_rqs_bp->b_resid = 0;
15897 	sense_xp->xb_pktp->pkt_resid  = 0;
15898 	sense_xp->xb_pktp->pkt_reason = 0;
15899 
15900 	/* So we can get back the bp at interrupt time! */
15901 	sense_xp->xb_sense_bp = bp;
15902 
15903 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
15904 
15905 	/*
15906 	 * Mark this buf as awaiting sense data. (This is already set in
15907 	 * the pkt_flags for the RQS packet.)
15908 	 */
15909 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
15910 
15911 	sense_xp->xb_retry_count	= 0;
15912 	sense_xp->xb_victim_retry_count = 0;
15913 	sense_xp->xb_ua_retry_count	= 0;
15914 	sense_xp->xb_dma_resid  = 0;
15915 
15916 	/* Clean up the fields for auto-request sense */
15917 	sense_xp->xb_sense_status = 0;
15918 	sense_xp->xb_sense_state  = 0;
15919 	sense_xp->xb_sense_resid  = 0;
15920 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
15921 
15922 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
15923 }
15924 
15925 
15926 /*
15927  *    Function: sd_mark_rqs_idle
15928  *
15929  * Description: SD_MUTEX must be held continuously through this routine
15930  *		to prevent reuse of the rqs struct before the caller can
15931  *		complete it's processing.
15932  *
15933  * Return Code: Pointer to the RQS buf
15934  *
15935  *     Context: May be called under interrupt context
15936  */
15937 
15938 static struct buf *
15939 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
15940 {
15941 	struct buf *bp;
15942 	ASSERT(un != NULL);
15943 	ASSERT(sense_xp != NULL);
15944 	ASSERT(mutex_owned(SD_MUTEX(un)));
15945 	ASSERT(un->un_sense_isbusy != 0);
15946 
15947 	un->un_sense_isbusy = 0;
15948 	bp = sense_xp->xb_sense_bp;
15949 	sense_xp->xb_sense_bp = NULL;
15950 
15951 	/* This pkt is no longer interested in getting sense data */
15952 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
15953 
15954 	return (bp);
15955 }
15956 
15957 
15958 
15959 /*
15960  *    Function: sd_alloc_rqs
15961  *
15962  * Description: Set up the unit to receive auto request sense data
15963  *
15964  * Return Code: DDI_SUCCESS or DDI_FAILURE
15965  *
15966  *     Context: Called under attach(9E) context
15967  */
15968 
15969 static int
15970 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
15971 {
15972 	struct sd_xbuf *xp;
15973 
15974 	ASSERT(un != NULL);
15975 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15976 	ASSERT(un->un_rqs_bp == NULL);
15977 	ASSERT(un->un_rqs_pktp == NULL);
15978 
15979 	/*
15980 	 * First allocate the required buf and scsi_pkt structs, then set up
15981 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
15982 	 */
15983 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
15984 	    SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
15985 	if (un->un_rqs_bp == NULL) {
15986 		return (DDI_FAILURE);
15987 	}
15988 
15989 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
15990 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
15991 
15992 	if (un->un_rqs_pktp == NULL) {
15993 		sd_free_rqs(un);
15994 		return (DDI_FAILURE);
15995 	}
15996 
15997 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
15998 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
15999 	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
16000 
16001 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
16002 
16003 	/* Set up the other needed members in the ARQ scsi_pkt. */
16004 	un->un_rqs_pktp->pkt_comp   = sdintr;
16005 	un->un_rqs_pktp->pkt_time   = sd_io_time;
16006 	un->un_rqs_pktp->pkt_flags |=
16007 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
16008 
16009 	/*
16010 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
16011 	 * provide any intpkt, destroypkt routines as we take care of
16012 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
16013 	 */
16014 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
16015 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
16016 	xp->xb_pktp = un->un_rqs_pktp;
16017 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
16018 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
16019 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
16020 
16021 	/*
16022 	 * Save the pointer to the request sense private bp so it can
16023 	 * be retrieved in sdintr.
16024 	 */
16025 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
16026 	ASSERT(un->un_rqs_bp->b_private == xp);
16027 
16028 	/*
16029 	 * See if the HBA supports auto-request sense for the specified
16030 	 * target/lun. If it does, then try to enable it (if not already
16031 	 * enabled).
16032 	 *
16033 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
16034 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
16035 	 * return success.  However, in both of these cases ARQ is always
16036 	 * enabled and scsi_ifgetcap will always return true. The best approach
16037 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
16038 	 *
16039 	 * The 3rd case is the HBA (adp) always return enabled on
16040 	 * scsi_ifgetgetcap even when it's not enable, the best approach
16041 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
16042 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
16043 	 */
16044 
16045 	if (un->un_f_is_fibre == TRUE) {
16046 		un->un_f_arq_enabled = TRUE;
16047 	} else {
16048 #if defined(__i386) || defined(__amd64)
16049 		/*
16050 		 * Circumvent the Adaptec bug, remove this code when
16051 		 * the bug is fixed
16052 		 */
16053 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
16054 #endif
16055 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
16056 		case 0:
16057 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16058 				"sd_alloc_rqs: HBA supports ARQ\n");
16059 			/*
16060 			 * ARQ is supported by this HBA but currently is not
16061 			 * enabled. Attempt to enable it and if successful then
16062 			 * mark this instance as ARQ enabled.
16063 			 */
16064 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
16065 				== 1) {
16066 				/* Successfully enabled ARQ in the HBA */
16067 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16068 					"sd_alloc_rqs: ARQ enabled\n");
16069 				un->un_f_arq_enabled = TRUE;
16070 			} else {
16071 				/* Could not enable ARQ in the HBA */
16072 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16073 				"sd_alloc_rqs: failed ARQ enable\n");
16074 				un->un_f_arq_enabled = FALSE;
16075 			}
16076 			break;
16077 		case 1:
16078 			/*
16079 			 * ARQ is supported by this HBA and is already enabled.
16080 			 * Just mark ARQ as enabled for this instance.
16081 			 */
16082 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16083 				"sd_alloc_rqs: ARQ already enabled\n");
16084 			un->un_f_arq_enabled = TRUE;
16085 			break;
16086 		default:
16087 			/*
16088 			 * ARQ is not supported by this HBA; disable it for this
16089 			 * instance.
16090 			 */
16091 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16092 				"sd_alloc_rqs: HBA does not support ARQ\n");
16093 			un->un_f_arq_enabled = FALSE;
16094 			break;
16095 		}
16096 	}
16097 
16098 	return (DDI_SUCCESS);
16099 }
16100 
16101 
16102 /*
16103  *    Function: sd_free_rqs
16104  *
16105  * Description: Cleanup for the pre-instance RQS command.
16106  *
16107  *     Context: Kernel thread context
16108  */
16109 
16110 static void
16111 sd_free_rqs(struct sd_lun *un)
16112 {
16113 	ASSERT(un != NULL);
16114 
16115 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
16116 
16117 	/*
16118 	 * If consistent memory is bound to a scsi_pkt, the pkt
16119 	 * has to be destroyed *before* freeing the consistent memory.
16120 	 * Don't change the sequence of this operations.
16121 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
16122 	 * after it was freed in scsi_free_consistent_buf().
16123 	 */
16124 	if (un->un_rqs_pktp != NULL) {
16125 		scsi_destroy_pkt(un->un_rqs_pktp);
16126 		un->un_rqs_pktp = NULL;
16127 	}
16128 
16129 	if (un->un_rqs_bp != NULL) {
16130 		kmem_free(SD_GET_XBUF(un->un_rqs_bp), sizeof (struct sd_xbuf));
16131 		scsi_free_consistent_buf(un->un_rqs_bp);
16132 		un->un_rqs_bp = NULL;
16133 	}
16134 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
16135 }
16136 
16137 
16138 
16139 /*
16140  *    Function: sd_reduce_throttle
16141  *
16142  * Description: Reduces the maximun # of outstanding commands on a
16143  *		target to the current number of outstanding commands.
16144  *		Queues a tiemout(9F) callback to restore the limit
16145  *		after a specified interval has elapsed.
16146  *		Typically used when we get a TRAN_BUSY return code
16147  *		back from scsi_transport().
16148  *
16149  *   Arguments: un - ptr to the sd_lun softstate struct
16150  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
16151  *
16152  *     Context: May be called from interrupt context
16153  */
16154 
16155 static void
16156 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
16157 {
16158 	ASSERT(un != NULL);
16159 	ASSERT(mutex_owned(SD_MUTEX(un)));
16160 	ASSERT(un->un_ncmds_in_transport >= 0);
16161 
16162 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16163 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
16164 	    un, un->un_throttle, un->un_ncmds_in_transport);
16165 
16166 	if (un->un_throttle > 1) {
16167 		if (un->un_f_use_adaptive_throttle == TRUE) {
16168 			switch (throttle_type) {
16169 			case SD_THROTTLE_TRAN_BUSY:
16170 				if (un->un_busy_throttle == 0) {
16171 					un->un_busy_throttle = un->un_throttle;
16172 				}
16173 				break;
16174 			case SD_THROTTLE_QFULL:
16175 				un->un_busy_throttle = 0;
16176 				break;
16177 			default:
16178 				ASSERT(FALSE);
16179 			}
16180 
16181 			if (un->un_ncmds_in_transport > 0) {
16182 			    un->un_throttle = un->un_ncmds_in_transport;
16183 			}
16184 
16185 		} else {
16186 			if (un->un_ncmds_in_transport == 0) {
16187 				un->un_throttle = 1;
16188 			} else {
16189 				un->un_throttle = un->un_ncmds_in_transport;
16190 			}
16191 		}
16192 	}
16193 
16194 	/* Reschedule the timeout if none is currently active */
16195 	if (un->un_reset_throttle_timeid == NULL) {
16196 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
16197 		    un, SD_THROTTLE_RESET_INTERVAL);
16198 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16199 		    "sd_reduce_throttle: timeout scheduled!\n");
16200 	}
16201 
16202 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16203 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16204 }
16205 
16206 
16207 
16208 /*
16209  *    Function: sd_restore_throttle
16210  *
16211  * Description: Callback function for timeout(9F).  Resets the current
16212  *		value of un->un_throttle to its default.
16213  *
16214  *   Arguments: arg - pointer to associated softstate for the device.
16215  *
16216  *     Context: May be called from interrupt context
16217  */
16218 
16219 static void
16220 sd_restore_throttle(void *arg)
16221 {
16222 	struct sd_lun	*un = arg;
16223 
16224 	ASSERT(un != NULL);
16225 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16226 
16227 	mutex_enter(SD_MUTEX(un));
16228 
16229 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16230 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16231 
16232 	un->un_reset_throttle_timeid = NULL;
16233 
16234 	if (un->un_f_use_adaptive_throttle == TRUE) {
16235 		/*
16236 		 * If un_busy_throttle is nonzero, then it contains the
16237 		 * value that un_throttle was when we got a TRAN_BUSY back
16238 		 * from scsi_transport(). We want to revert back to this
16239 		 * value.
16240 		 *
16241 		 * In the QFULL case, the throttle limit will incrementally
16242 		 * increase until it reaches max throttle.
16243 		 */
16244 		if (un->un_busy_throttle > 0) {
16245 			un->un_throttle = un->un_busy_throttle;
16246 			un->un_busy_throttle = 0;
16247 		} else {
16248 			/*
16249 			 * increase throttle by 10% open gate slowly, schedule
16250 			 * another restore if saved throttle has not been
16251 			 * reached
16252 			 */
16253 			short throttle;
16254 			if (sd_qfull_throttle_enable) {
16255 				throttle = un->un_throttle +
16256 				    max((un->un_throttle / 10), 1);
16257 				un->un_throttle =
16258 				    (throttle < un->un_saved_throttle) ?
16259 				    throttle : un->un_saved_throttle;
16260 				if (un->un_throttle < un->un_saved_throttle) {
16261 				    un->un_reset_throttle_timeid =
16262 					timeout(sd_restore_throttle,
16263 					un, SD_QFULL_THROTTLE_RESET_INTERVAL);
16264 				}
16265 			}
16266 		}
16267 
16268 		/*
16269 		 * If un_throttle has fallen below the low-water mark, we
16270 		 * restore the maximum value here (and allow it to ratchet
16271 		 * down again if necessary).
16272 		 */
16273 		if (un->un_throttle < un->un_min_throttle) {
16274 			un->un_throttle = un->un_saved_throttle;
16275 		}
16276 	} else {
16277 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16278 		    "restoring limit from 0x%x to 0x%x\n",
16279 		    un->un_throttle, un->un_saved_throttle);
16280 		un->un_throttle = un->un_saved_throttle;
16281 	}
16282 
16283 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16284 	    "sd_restore_throttle: calling sd_start_cmds!\n");
16285 
16286 	sd_start_cmds(un, NULL);
16287 
16288 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16289 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
16290 	    un, un->un_throttle);
16291 
16292 	mutex_exit(SD_MUTEX(un));
16293 
16294 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
16295 }
16296 
16297 /*
16298  *    Function: sdrunout
16299  *
16300  * Description: Callback routine for scsi_init_pkt when a resource allocation
16301  *		fails.
16302  *
16303  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
16304  *		soft state instance.
16305  *
16306  * Return Code: The scsi_init_pkt routine allows for the callback function to
16307  *		return a 0 indicating the callback should be rescheduled or a 1
16308  *		indicating not to reschedule. This routine always returns 1
16309  *		because the driver always provides a callback function to
16310  *		scsi_init_pkt. This results in a callback always being scheduled
16311  *		(via the scsi_init_pkt callback implementation) if a resource
16312  *		failure occurs.
16313  *
16314  *     Context: This callback function may not block or call routines that block
16315  *
16316  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
16317  *		request persisting at the head of the list which cannot be
16318  *		satisfied even after multiple retries. In the future the driver
16319  *		may implement some time of maximum runout count before failing
16320  *		an I/O.
16321  */
16322 
16323 static int
16324 sdrunout(caddr_t arg)
16325 {
16326 	struct sd_lun	*un = (struct sd_lun *)arg;
16327 
16328 	ASSERT(un != NULL);
16329 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16330 
16331 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
16332 
16333 	mutex_enter(SD_MUTEX(un));
16334 	sd_start_cmds(un, NULL);
16335 	mutex_exit(SD_MUTEX(un));
16336 	/*
16337 	 * This callback routine always returns 1 (i.e. do not reschedule)
16338 	 * because we always specify sdrunout as the callback handler for
16339 	 * scsi_init_pkt inside the call to sd_start_cmds.
16340 	 */
16341 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
16342 	return (1);
16343 }
16344 
16345 
16346 /*
16347  *    Function: sdintr
16348  *
16349  * Description: Completion callback routine for scsi_pkt(9S) structs
16350  *		sent to the HBA driver via scsi_transport(9F).
16351  *
16352  *     Context: Interrupt context
16353  */
16354 
16355 static void
16356 sdintr(struct scsi_pkt *pktp)
16357 {
16358 	struct buf	*bp;
16359 	struct sd_xbuf	*xp;
16360 	struct sd_lun	*un;
16361 
16362 	ASSERT(pktp != NULL);
16363 	bp = (struct buf *)pktp->pkt_private;
16364 	ASSERT(bp != NULL);
16365 	xp = SD_GET_XBUF(bp);
16366 	ASSERT(xp != NULL);
16367 	ASSERT(xp->xb_pktp != NULL);
16368 	un = SD_GET_UN(bp);
16369 	ASSERT(un != NULL);
16370 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16371 
16372 #ifdef SD_FAULT_INJECTION
16373 
16374 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
16375 	/* SD FaultInjection */
16376 	sd_faultinjection(pktp);
16377 
16378 #endif /* SD_FAULT_INJECTION */
16379 
16380 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
16381 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
16382 
16383 	mutex_enter(SD_MUTEX(un));
16384 
16385 	/* Reduce the count of the #commands currently in transport */
16386 	un->un_ncmds_in_transport--;
16387 	ASSERT(un->un_ncmds_in_transport >= 0);
16388 
16389 	/* Increment counter to indicate that the callback routine is active */
16390 	un->un_in_callback++;
16391 
16392 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
16393 
16394 #ifdef	SDDEBUG
16395 	if (bp == un->un_retry_bp) {
16396 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
16397 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
16398 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
16399 	}
16400 #endif
16401 
16402 	/*
16403 	 * If pkt_reason is CMD_DEV_GONE, just fail the command
16404 	 */
16405 	if (pktp->pkt_reason == CMD_DEV_GONE) {
16406 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16407 			    "Device is gone\n");
16408 		sd_return_failed_command(un, bp, EIO);
16409 		goto exit;
16410 	}
16411 
16412 	/*
16413 	 * First see if the pkt has auto-request sense data with it....
16414 	 * Look at the packet state first so we don't take a performance
16415 	 * hit looking at the arq enabled flag unless absolutely necessary.
16416 	 */
16417 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
16418 	    (un->un_f_arq_enabled == TRUE)) {
16419 		/*
16420 		 * The HBA did an auto request sense for this command so check
16421 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16422 		 * driver command that should not be retried.
16423 		 */
16424 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16425 			/*
16426 			 * Save the relevant sense info into the xp for the
16427 			 * original cmd.
16428 			 */
16429 			struct scsi_arq_status *asp;
16430 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16431 			xp->xb_sense_status =
16432 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
16433 			xp->xb_sense_state  = asp->sts_rqpkt_state;
16434 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16435 			bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16436 			    min(sizeof (struct scsi_extended_sense),
16437 			    SENSE_LENGTH));
16438 
16439 			/* fail the command */
16440 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16441 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
16442 			sd_return_failed_command(un, bp, EIO);
16443 			goto exit;
16444 		}
16445 
16446 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16447 		/*
16448 		 * We want to either retry or fail this command, so free
16449 		 * the DMA resources here.  If we retry the command then
16450 		 * the DMA resources will be reallocated in sd_start_cmds().
16451 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
16452 		 * causes the *entire* transfer to start over again from the
16453 		 * beginning of the request, even for PARTIAL chunks that
16454 		 * have already transferred successfully.
16455 		 */
16456 		if ((un->un_f_is_fibre == TRUE) &&
16457 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16458 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16459 			scsi_dmafree(pktp);
16460 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16461 		}
16462 #endif
16463 
16464 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16465 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
16466 
16467 		sd_handle_auto_request_sense(un, bp, xp, pktp);
16468 		goto exit;
16469 	}
16470 
16471 	/* Next see if this is the REQUEST SENSE pkt for the instance */
16472 	if (pktp->pkt_flags & FLAG_SENSING)  {
16473 		/* This pktp is from the unit's REQUEST_SENSE command */
16474 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16475 		    "sdintr: sd_handle_request_sense\n");
16476 		sd_handle_request_sense(un, bp, xp, pktp);
16477 		goto exit;
16478 	}
16479 
16480 	/*
16481 	 * Check to see if the command successfully completed as requested;
16482 	 * this is the most common case (and also the hot performance path).
16483 	 *
16484 	 * Requirements for successful completion are:
16485 	 * pkt_reason is CMD_CMPLT and packet status is status good.
16486 	 * In addition:
16487 	 * - A residual of zero indicates successful completion no matter what
16488 	 *   the command is.
16489 	 * - If the residual is not zero and the command is not a read or
16490 	 *   write, then it's still defined as successful completion. In other
16491 	 *   words, if the command is a read or write the residual must be
16492 	 *   zero for successful completion.
16493 	 * - If the residual is not zero and the command is a read or
16494 	 *   write, and it's a USCSICMD, then it's still defined as
16495 	 *   successful completion.
16496 	 */
16497 	if ((pktp->pkt_reason == CMD_CMPLT) &&
16498 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
16499 
16500 		/*
16501 		 * Since this command is returned with a good status, we
16502 		 * can reset the count for Sonoma failover.
16503 		 */
16504 		un->un_sonoma_failure_count = 0;
16505 
16506 		/*
16507 		 * Return all USCSI commands on good status
16508 		 */
16509 		if (pktp->pkt_resid == 0) {
16510 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16511 			    "sdintr: returning command for resid == 0\n");
16512 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
16513 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
16514 			SD_UPDATE_B_RESID(bp, pktp);
16515 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16516 			    "sdintr: returning command for resid != 0\n");
16517 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16518 			SD_UPDATE_B_RESID(bp, pktp);
16519 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16520 				"sdintr: returning uscsi command\n");
16521 		} else {
16522 			goto not_successful;
16523 		}
16524 		sd_return_command(un, bp);
16525 
16526 		/*
16527 		 * Decrement counter to indicate that the callback routine
16528 		 * is done.
16529 		 */
16530 		un->un_in_callback--;
16531 		ASSERT(un->un_in_callback >= 0);
16532 		mutex_exit(SD_MUTEX(un));
16533 
16534 		return;
16535 	}
16536 
16537 not_successful:
16538 
16539 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16540 	/*
16541 	 * The following is based upon knowledge of the underlying transport
16542 	 * and its use of DMA resources.  This code should be removed when
16543 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
16544 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
16545 	 * and sd_start_cmds().
16546 	 *
16547 	 * Free any DMA resources associated with this command if there
16548 	 * is a chance it could be retried or enqueued for later retry.
16549 	 * If we keep the DMA binding then mpxio cannot reissue the
16550 	 * command on another path whenever a path failure occurs.
16551 	 *
16552 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
16553 	 * causes the *entire* transfer to start over again from the
16554 	 * beginning of the request, even for PARTIAL chunks that
16555 	 * have already transferred successfully.
16556 	 *
16557 	 * This is only done for non-uscsi commands (and also skipped for the
16558 	 * driver's internal RQS command). Also just do this for Fibre Channel
16559 	 * devices as these are the only ones that support mpxio.
16560 	 */
16561 	if ((un->un_f_is_fibre == TRUE) &&
16562 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16563 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16564 		scsi_dmafree(pktp);
16565 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16566 	}
16567 #endif
16568 
16569 	/*
16570 	 * The command did not successfully complete as requested so check
16571 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16572 	 * driver command that should not be retried so just return. If
16573 	 * FLAG_DIAGNOSE is not set the error will be processed below.
16574 	 */
16575 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16576 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16577 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
16578 		/*
16579 		 * Issue a request sense if a check condition caused the error
16580 		 * (we handle the auto request sense case above), otherwise
16581 		 * just fail the command.
16582 		 */
16583 		if ((pktp->pkt_reason == CMD_CMPLT) &&
16584 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
16585 			sd_send_request_sense_command(un, bp, pktp);
16586 		} else {
16587 			sd_return_failed_command(un, bp, EIO);
16588 		}
16589 		goto exit;
16590 	}
16591 
16592 	/*
16593 	 * The command did not successfully complete as requested so process
16594 	 * the error, retry, and/or attempt recovery.
16595 	 */
16596 	switch (pktp->pkt_reason) {
16597 	case CMD_CMPLT:
16598 		switch (SD_GET_PKT_STATUS(pktp)) {
16599 		case STATUS_GOOD:
16600 			/*
16601 			 * The command completed successfully with a non-zero
16602 			 * residual
16603 			 */
16604 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16605 			    "sdintr: STATUS_GOOD \n");
16606 			sd_pkt_status_good(un, bp, xp, pktp);
16607 			break;
16608 
16609 		case STATUS_CHECK:
16610 		case STATUS_TERMINATED:
16611 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16612 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
16613 			sd_pkt_status_check_condition(un, bp, xp, pktp);
16614 			break;
16615 
16616 		case STATUS_BUSY:
16617 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16618 			    "sdintr: STATUS_BUSY\n");
16619 			sd_pkt_status_busy(un, bp, xp, pktp);
16620 			break;
16621 
16622 		case STATUS_RESERVATION_CONFLICT:
16623 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16624 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
16625 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16626 			break;
16627 
16628 		case STATUS_QFULL:
16629 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16630 			    "sdintr: STATUS_QFULL\n");
16631 			sd_pkt_status_qfull(un, bp, xp, pktp);
16632 			break;
16633 
16634 		case STATUS_MET:
16635 		case STATUS_INTERMEDIATE:
16636 		case STATUS_SCSI2:
16637 		case STATUS_INTERMEDIATE_MET:
16638 		case STATUS_ACA_ACTIVE:
16639 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16640 			    "Unexpected SCSI status received: 0x%x\n",
16641 			    SD_GET_PKT_STATUS(pktp));
16642 			sd_return_failed_command(un, bp, EIO);
16643 			break;
16644 
16645 		default:
16646 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16647 			    "Invalid SCSI status received: 0x%x\n",
16648 			    SD_GET_PKT_STATUS(pktp));
16649 			sd_return_failed_command(un, bp, EIO);
16650 			break;
16651 
16652 		}
16653 		break;
16654 
16655 	case CMD_INCOMPLETE:
16656 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16657 		    "sdintr:  CMD_INCOMPLETE\n");
16658 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
16659 		break;
16660 	case CMD_TRAN_ERR:
16661 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16662 		    "sdintr: CMD_TRAN_ERR\n");
16663 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
16664 		break;
16665 	case CMD_RESET:
16666 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16667 		    "sdintr: CMD_RESET \n");
16668 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
16669 		break;
16670 	case CMD_ABORTED:
16671 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16672 		    "sdintr: CMD_ABORTED \n");
16673 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
16674 		break;
16675 	case CMD_TIMEOUT:
16676 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16677 		    "sdintr: CMD_TIMEOUT\n");
16678 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
16679 		break;
16680 	case CMD_UNX_BUS_FREE:
16681 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16682 		    "sdintr: CMD_UNX_BUS_FREE \n");
16683 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
16684 		break;
16685 	case CMD_TAG_REJECT:
16686 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16687 		    "sdintr: CMD_TAG_REJECT\n");
16688 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
16689 		break;
16690 	default:
16691 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16692 		    "sdintr: default\n");
16693 		sd_pkt_reason_default(un, bp, xp, pktp);
16694 		break;
16695 	}
16696 
16697 exit:
16698 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
16699 
16700 	/* Decrement counter to indicate that the callback routine is done. */
16701 	un->un_in_callback--;
16702 	ASSERT(un->un_in_callback >= 0);
16703 
16704 	/*
16705 	 * At this point, the pkt has been dispatched, ie, it is either
16706 	 * being re-tried or has been returned to its caller and should
16707 	 * not be referenced.
16708 	 */
16709 
16710 	mutex_exit(SD_MUTEX(un));
16711 }
16712 
16713 
16714 /*
16715  *    Function: sd_print_incomplete_msg
16716  *
16717  * Description: Prints the error message for a CMD_INCOMPLETE error.
16718  *
16719  *   Arguments: un - ptr to associated softstate for the device.
16720  *		bp - ptr to the buf(9S) for the command.
16721  *		arg - message string ptr
16722  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
16723  *			or SD_NO_RETRY_ISSUED.
16724  *
16725  *     Context: May be called under interrupt context
16726  */
16727 
16728 static void
16729 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
16730 {
16731 	struct scsi_pkt	*pktp;
16732 	char	*msgp;
16733 	char	*cmdp = arg;
16734 
16735 	ASSERT(un != NULL);
16736 	ASSERT(mutex_owned(SD_MUTEX(un)));
16737 	ASSERT(bp != NULL);
16738 	ASSERT(arg != NULL);
16739 	pktp = SD_GET_PKTP(bp);
16740 	ASSERT(pktp != NULL);
16741 
16742 	switch (code) {
16743 	case SD_DELAYED_RETRY_ISSUED:
16744 	case SD_IMMEDIATE_RETRY_ISSUED:
16745 		msgp = "retrying";
16746 		break;
16747 	case SD_NO_RETRY_ISSUED:
16748 	default:
16749 		msgp = "giving up";
16750 		break;
16751 	}
16752 
16753 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16754 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16755 		    "incomplete %s- %s\n", cmdp, msgp);
16756 	}
16757 }
16758 
16759 
16760 
16761 /*
16762  *    Function: sd_pkt_status_good
16763  *
16764  * Description: Processing for a STATUS_GOOD code in pkt_status.
16765  *
16766  *     Context: May be called under interrupt context
16767  */
16768 
16769 static void
16770 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
16771 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16772 {
16773 	char	*cmdp;
16774 
16775 	ASSERT(un != NULL);
16776 	ASSERT(mutex_owned(SD_MUTEX(un)));
16777 	ASSERT(bp != NULL);
16778 	ASSERT(xp != NULL);
16779 	ASSERT(pktp != NULL);
16780 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
16781 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
16782 	ASSERT(pktp->pkt_resid != 0);
16783 
16784 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
16785 
16786 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16787 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
16788 	case SCMD_READ:
16789 		cmdp = "read";
16790 		break;
16791 	case SCMD_WRITE:
16792 		cmdp = "write";
16793 		break;
16794 	default:
16795 		SD_UPDATE_B_RESID(bp, pktp);
16796 		sd_return_command(un, bp);
16797 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16798 		return;
16799 	}
16800 
16801 	/*
16802 	 * See if we can retry the read/write, preferrably immediately.
16803 	 * If retries are exhaused, then sd_retry_command() will update
16804 	 * the b_resid count.
16805 	 */
16806 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
16807 	    cmdp, EIO, (clock_t)0, NULL);
16808 
16809 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16810 }
16811 
16812 
16813 
16814 
16815 
16816 /*
16817  *    Function: sd_handle_request_sense
16818  *
16819  * Description: Processing for non-auto Request Sense command.
16820  *
16821  *   Arguments: un - ptr to associated softstate
16822  *		sense_bp - ptr to buf(9S) for the RQS command
16823  *		sense_xp - ptr to the sd_xbuf for the RQS command
16824  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
16825  *
16826  *     Context: May be called under interrupt context
16827  */
16828 
16829 static void
16830 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
16831 	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
16832 {
16833 	struct buf	*cmd_bp;	/* buf for the original command */
16834 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
16835 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
16836 
16837 	ASSERT(un != NULL);
16838 	ASSERT(mutex_owned(SD_MUTEX(un)));
16839 	ASSERT(sense_bp != NULL);
16840 	ASSERT(sense_xp != NULL);
16841 	ASSERT(sense_pktp != NULL);
16842 
16843 	/*
16844 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
16845 	 * RQS command and not the original command.
16846 	 */
16847 	ASSERT(sense_pktp == un->un_rqs_pktp);
16848 	ASSERT(sense_bp   == un->un_rqs_bp);
16849 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
16850 	    (FLAG_SENSING | FLAG_HEAD));
16851 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
16852 	    FLAG_SENSING) == FLAG_SENSING);
16853 
16854 	/* These are the bp, xp, and pktp for the original command */
16855 	cmd_bp = sense_xp->xb_sense_bp;
16856 	cmd_xp = SD_GET_XBUF(cmd_bp);
16857 	cmd_pktp = SD_GET_PKTP(cmd_bp);
16858 
16859 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
16860 		/*
16861 		 * The REQUEST SENSE command failed.  Release the REQUEST
16862 		 * SENSE command for re-use, get back the bp for the original
16863 		 * command, and attempt to re-try the original command if
16864 		 * FLAG_DIAGNOSE is not set in the original packet.
16865 		 */
16866 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16867 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16868 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
16869 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
16870 			    NULL, NULL, EIO, (clock_t)0, NULL);
16871 			return;
16872 		}
16873 	}
16874 
16875 	/*
16876 	 * Save the relevant sense info into the xp for the original cmd.
16877 	 *
16878 	 * Note: if the request sense failed the state info will be zero
16879 	 * as set in sd_mark_rqs_busy()
16880 	 */
16881 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
16882 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
16883 	cmd_xp->xb_sense_resid  = sense_pktp->pkt_resid;
16884 	bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data, SENSE_LENGTH);
16885 
16886 	/*
16887 	 *  Free up the RQS command....
16888 	 *  NOTE:
16889 	 *	Must do this BEFORE calling sd_validate_sense_data!
16890 	 *	sd_validate_sense_data may return the original command in
16891 	 *	which case the pkt will be freed and the flags can no
16892 	 *	longer be touched.
16893 	 *	SD_MUTEX is held through this process until the command
16894 	 *	is dispatched based upon the sense data, so there are
16895 	 *	no race conditions.
16896 	 */
16897 	(void) sd_mark_rqs_idle(un, sense_xp);
16898 
16899 	/*
16900 	 * For a retryable command see if we have valid sense data, if so then
16901 	 * turn it over to sd_decode_sense() to figure out the right course of
16902 	 * action. Just fail a non-retryable command.
16903 	 */
16904 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16905 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp) ==
16906 		    SD_SENSE_DATA_IS_VALID) {
16907 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
16908 		}
16909 	} else {
16910 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
16911 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
16912 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
16913 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
16914 		sd_return_failed_command(un, cmd_bp, EIO);
16915 	}
16916 }
16917 
16918 
16919 
16920 
16921 /*
16922  *    Function: sd_handle_auto_request_sense
16923  *
16924  * Description: Processing for auto-request sense information.
16925  *
16926  *   Arguments: un - ptr to associated softstate
16927  *		bp - ptr to buf(9S) for the command
16928  *		xp - ptr to the sd_xbuf for the command
16929  *		pktp - ptr to the scsi_pkt(9S) for the command
16930  *
16931  *     Context: May be called under interrupt context
16932  */
16933 
16934 static void
16935 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
16936 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16937 {
16938 	struct scsi_arq_status *asp;
16939 
16940 	ASSERT(un != NULL);
16941 	ASSERT(mutex_owned(SD_MUTEX(un)));
16942 	ASSERT(bp != NULL);
16943 	ASSERT(xp != NULL);
16944 	ASSERT(pktp != NULL);
16945 	ASSERT(pktp != un->un_rqs_pktp);
16946 	ASSERT(bp   != un->un_rqs_bp);
16947 
16948 	/*
16949 	 * For auto-request sense, we get a scsi_arq_status back from
16950 	 * the HBA, with the sense data in the sts_sensedata member.
16951 	 * The pkt_scbp of the packet points to this scsi_arq_status.
16952 	 */
16953 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16954 
16955 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
16956 		/*
16957 		 * The auto REQUEST SENSE failed; see if we can re-try
16958 		 * the original command.
16959 		 */
16960 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16961 		    "auto request sense failed (reason=%s)\n",
16962 		    scsi_rname(asp->sts_rqpkt_reason));
16963 
16964 		sd_reset_target(un, pktp);
16965 
16966 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16967 		    NULL, NULL, EIO, (clock_t)0, NULL);
16968 		return;
16969 	}
16970 
16971 	/* Save the relevant sense info into the xp for the original cmd. */
16972 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
16973 	xp->xb_sense_state  = asp->sts_rqpkt_state;
16974 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16975 	bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16976 	    min(sizeof (struct scsi_extended_sense), SENSE_LENGTH));
16977 
16978 	/*
16979 	 * See if we have valid sense data, if so then turn it over to
16980 	 * sd_decode_sense() to figure out the right course of action.
16981 	 */
16982 	if (sd_validate_sense_data(un, bp, xp) == SD_SENSE_DATA_IS_VALID) {
16983 		sd_decode_sense(un, bp, xp, pktp);
16984 	}
16985 }
16986 
16987 
16988 /*
16989  *    Function: sd_print_sense_failed_msg
16990  *
16991  * Description: Print log message when RQS has failed.
16992  *
16993  *   Arguments: un - ptr to associated softstate
16994  *		bp - ptr to buf(9S) for the command
16995  *		arg - generic message string ptr
16996  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16997  *			or SD_NO_RETRY_ISSUED
16998  *
16999  *     Context: May be called from interrupt context
17000  */
17001 
17002 static void
17003 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
17004 	int code)
17005 {
17006 	char	*msgp = arg;
17007 
17008 	ASSERT(un != NULL);
17009 	ASSERT(mutex_owned(SD_MUTEX(un)));
17010 	ASSERT(bp != NULL);
17011 
17012 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
17013 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
17014 	}
17015 }
17016 
17017 
17018 /*
17019  *    Function: sd_validate_sense_data
17020  *
17021  * Description: Check the given sense data for validity.
17022  *		If the sense data is not valid, the command will
17023  *		be either failed or retried!
17024  *
17025  * Return Code: SD_SENSE_DATA_IS_INVALID
17026  *		SD_SENSE_DATA_IS_VALID
17027  *
17028  *     Context: May be called from interrupt context
17029  */
17030 
17031 static int
17032 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp)
17033 {
17034 	struct scsi_extended_sense *esp;
17035 	struct	scsi_pkt *pktp;
17036 	size_t	actual_len;
17037 	char	*msgp = NULL;
17038 
17039 	ASSERT(un != NULL);
17040 	ASSERT(mutex_owned(SD_MUTEX(un)));
17041 	ASSERT(bp != NULL);
17042 	ASSERT(bp != un->un_rqs_bp);
17043 	ASSERT(xp != NULL);
17044 
17045 	pktp = SD_GET_PKTP(bp);
17046 	ASSERT(pktp != NULL);
17047 
17048 	/*
17049 	 * Check the status of the RQS command (auto or manual).
17050 	 */
17051 	switch (xp->xb_sense_status & STATUS_MASK) {
17052 	case STATUS_GOOD:
17053 		break;
17054 
17055 	case STATUS_RESERVATION_CONFLICT:
17056 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
17057 		return (SD_SENSE_DATA_IS_INVALID);
17058 
17059 	case STATUS_BUSY:
17060 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17061 		    "Busy Status on REQUEST SENSE\n");
17062 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
17063 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
17064 		return (SD_SENSE_DATA_IS_INVALID);
17065 
17066 	case STATUS_QFULL:
17067 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17068 		    "QFULL Status on REQUEST SENSE\n");
17069 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
17070 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
17071 		return (SD_SENSE_DATA_IS_INVALID);
17072 
17073 	case STATUS_CHECK:
17074 	case STATUS_TERMINATED:
17075 		msgp = "Check Condition on REQUEST SENSE\n";
17076 		goto sense_failed;
17077 
17078 	default:
17079 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
17080 		goto sense_failed;
17081 	}
17082 
17083 	/*
17084 	 * See if we got the minimum required amount of sense data.
17085 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
17086 	 * or less.
17087 	 */
17088 	actual_len = (int)(SENSE_LENGTH - xp->xb_sense_resid);
17089 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
17090 	    (actual_len == 0)) {
17091 		msgp = "Request Sense couldn't get sense data\n";
17092 		goto sense_failed;
17093 	}
17094 
17095 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
17096 		msgp = "Not enough sense information\n";
17097 		goto sense_failed;
17098 	}
17099 
17100 	/*
17101 	 * We require the extended sense data
17102 	 */
17103 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
17104 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
17105 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17106 			static char tmp[8];
17107 			static char buf[148];
17108 			char *p = (char *)(xp->xb_sense_data);
17109 			int i;
17110 
17111 			mutex_enter(&sd_sense_mutex);
17112 			(void) strcpy(buf, "undecodable sense information:");
17113 			for (i = 0; i < actual_len; i++) {
17114 				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
17115 				(void) strcpy(&buf[strlen(buf)], tmp);
17116 			}
17117 			i = strlen(buf);
17118 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
17119 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, buf);
17120 			mutex_exit(&sd_sense_mutex);
17121 		}
17122 		/* Note: Legacy behavior, fail the command with no retry */
17123 		sd_return_failed_command(un, bp, EIO);
17124 		return (SD_SENSE_DATA_IS_INVALID);
17125 	}
17126 
17127 	/*
17128 	 * Check that es_code is valid (es_class concatenated with es_code
17129 	 * make up the "response code" field.  es_class will always be 7, so
17130 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
17131 	 * format.
17132 	 */
17133 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
17134 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
17135 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
17136 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
17137 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
17138 		goto sense_failed;
17139 	}
17140 
17141 	return (SD_SENSE_DATA_IS_VALID);
17142 
17143 sense_failed:
17144 	/*
17145 	 * If the request sense failed (for whatever reason), attempt
17146 	 * to retry the original command.
17147 	 */
17148 #if defined(__i386) || defined(__amd64)
17149 	/*
17150 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
17151 	 * sddef.h for Sparc platform, and x86 uses 1 binary
17152 	 * for both SCSI/FC.
17153 	 * The SD_RETRY_DELAY value need to be adjusted here
17154 	 * when SD_RETRY_DELAY change in sddef.h
17155 	 */
17156 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17157 	    sd_print_sense_failed_msg, msgp, EIO,
17158 		un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
17159 #else
17160 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17161 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
17162 #endif
17163 
17164 	return (SD_SENSE_DATA_IS_INVALID);
17165 }
17166 
17167 
17168 
17169 /*
17170  *    Function: sd_decode_sense
17171  *
17172  * Description: Take recovery action(s) when SCSI Sense Data is received.
17173  *
17174  *     Context: Interrupt context.
17175  */
17176 
17177 static void
17178 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17179 	struct scsi_pkt *pktp)
17180 {
17181 	uint8_t sense_key;
17182 
17183 	ASSERT(un != NULL);
17184 	ASSERT(mutex_owned(SD_MUTEX(un)));
17185 	ASSERT(bp != NULL);
17186 	ASSERT(bp != un->un_rqs_bp);
17187 	ASSERT(xp != NULL);
17188 	ASSERT(pktp != NULL);
17189 
17190 	sense_key = scsi_sense_key(xp->xb_sense_data);
17191 
17192 	switch (sense_key) {
17193 	case KEY_NO_SENSE:
17194 		sd_sense_key_no_sense(un, bp, xp, pktp);
17195 		break;
17196 	case KEY_RECOVERABLE_ERROR:
17197 		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
17198 		    bp, xp, pktp);
17199 		break;
17200 	case KEY_NOT_READY:
17201 		sd_sense_key_not_ready(un, xp->xb_sense_data,
17202 		    bp, xp, pktp);
17203 		break;
17204 	case KEY_MEDIUM_ERROR:
17205 	case KEY_HARDWARE_ERROR:
17206 		sd_sense_key_medium_or_hardware_error(un,
17207 		    xp->xb_sense_data, bp, xp, pktp);
17208 		break;
17209 	case KEY_ILLEGAL_REQUEST:
17210 		sd_sense_key_illegal_request(un, bp, xp, pktp);
17211 		break;
17212 	case KEY_UNIT_ATTENTION:
17213 		sd_sense_key_unit_attention(un, xp->xb_sense_data,
17214 		    bp, xp, pktp);
17215 		break;
17216 	case KEY_WRITE_PROTECT:
17217 	case KEY_VOLUME_OVERFLOW:
17218 	case KEY_MISCOMPARE:
17219 		sd_sense_key_fail_command(un, bp, xp, pktp);
17220 		break;
17221 	case KEY_BLANK_CHECK:
17222 		sd_sense_key_blank_check(un, bp, xp, pktp);
17223 		break;
17224 	case KEY_ABORTED_COMMAND:
17225 		sd_sense_key_aborted_command(un, bp, xp, pktp);
17226 		break;
17227 	case KEY_VENDOR_UNIQUE:
17228 	case KEY_COPY_ABORTED:
17229 	case KEY_EQUAL:
17230 	case KEY_RESERVED:
17231 	default:
17232 		sd_sense_key_default(un, xp->xb_sense_data,
17233 		    bp, xp, pktp);
17234 		break;
17235 	}
17236 }
17237 
17238 
17239 /*
17240  *    Function: sd_dump_memory
17241  *
17242  * Description: Debug logging routine to print the contents of a user provided
17243  *		buffer. The output of the buffer is broken up into 256 byte
17244  *		segments due to a size constraint of the scsi_log.
17245  *		implementation.
17246  *
17247  *   Arguments: un - ptr to softstate
17248  *		comp - component mask
17249  *		title - "title" string to preceed data when printed
17250  *		data - ptr to data block to be printed
17251  *		len - size of data block to be printed
17252  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
17253  *
17254  *     Context: May be called from interrupt context
17255  */
17256 
17257 #define	SD_DUMP_MEMORY_BUF_SIZE	256
17258 
17259 static char *sd_dump_format_string[] = {
17260 		" 0x%02x",
17261 		" %c"
17262 };
17263 
17264 static void
17265 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
17266     int len, int fmt)
17267 {
17268 	int	i, j;
17269 	int	avail_count;
17270 	int	start_offset;
17271 	int	end_offset;
17272 	size_t	entry_len;
17273 	char	*bufp;
17274 	char	*local_buf;
17275 	char	*format_string;
17276 
17277 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
17278 
17279 	/*
17280 	 * In the debug version of the driver, this function is called from a
17281 	 * number of places which are NOPs in the release driver.
17282 	 * The debug driver therefore has additional methods of filtering
17283 	 * debug output.
17284 	 */
17285 #ifdef SDDEBUG
17286 	/*
17287 	 * In the debug version of the driver we can reduce the amount of debug
17288 	 * messages by setting sd_error_level to something other than
17289 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
17290 	 * sd_component_mask.
17291 	 */
17292 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
17293 	    (sd_error_level != SCSI_ERR_ALL)) {
17294 		return;
17295 	}
17296 	if (((sd_component_mask & comp) == 0) ||
17297 	    (sd_error_level != SCSI_ERR_ALL)) {
17298 		return;
17299 	}
17300 #else
17301 	if (sd_error_level != SCSI_ERR_ALL) {
17302 		return;
17303 	}
17304 #endif
17305 
17306 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
17307 	bufp = local_buf;
17308 	/*
17309 	 * Available length is the length of local_buf[], minus the
17310 	 * length of the title string, minus one for the ":", minus
17311 	 * one for the newline, minus one for the NULL terminator.
17312 	 * This gives the #bytes available for holding the printed
17313 	 * values from the given data buffer.
17314 	 */
17315 	if (fmt == SD_LOG_HEX) {
17316 		format_string = sd_dump_format_string[0];
17317 	} else /* SD_LOG_CHAR */ {
17318 		format_string = sd_dump_format_string[1];
17319 	}
17320 	/*
17321 	 * Available count is the number of elements from the given
17322 	 * data buffer that we can fit into the available length.
17323 	 * This is based upon the size of the format string used.
17324 	 * Make one entry and find it's size.
17325 	 */
17326 	(void) sprintf(bufp, format_string, data[0]);
17327 	entry_len = strlen(bufp);
17328 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
17329 
17330 	j = 0;
17331 	while (j < len) {
17332 		bufp = local_buf;
17333 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
17334 		start_offset = j;
17335 
17336 		end_offset = start_offset + avail_count;
17337 
17338 		(void) sprintf(bufp, "%s:", title);
17339 		bufp += strlen(bufp);
17340 		for (i = start_offset; ((i < end_offset) && (j < len));
17341 		    i++, j++) {
17342 			(void) sprintf(bufp, format_string, data[i]);
17343 			bufp += entry_len;
17344 		}
17345 		(void) sprintf(bufp, "\n");
17346 
17347 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
17348 	}
17349 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
17350 }
17351 
17352 /*
17353  *    Function: sd_print_sense_msg
17354  *
17355  * Description: Log a message based upon the given sense data.
17356  *
17357  *   Arguments: un - ptr to associated softstate
17358  *		bp - ptr to buf(9S) for the command
17359  *		arg - ptr to associate sd_sense_info struct
17360  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17361  *			or SD_NO_RETRY_ISSUED
17362  *
17363  *     Context: May be called from interrupt context
17364  */
17365 
17366 static void
17367 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17368 {
17369 	struct sd_xbuf	*xp;
17370 	struct scsi_pkt	*pktp;
17371 	uint8_t *sensep;
17372 	daddr_t request_blkno;
17373 	diskaddr_t err_blkno;
17374 	int severity;
17375 	int pfa_flag;
17376 	extern struct scsi_key_strings scsi_cmds[];
17377 
17378 	ASSERT(un != NULL);
17379 	ASSERT(mutex_owned(SD_MUTEX(un)));
17380 	ASSERT(bp != NULL);
17381 	xp = SD_GET_XBUF(bp);
17382 	ASSERT(xp != NULL);
17383 	pktp = SD_GET_PKTP(bp);
17384 	ASSERT(pktp != NULL);
17385 	ASSERT(arg != NULL);
17386 
17387 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
17388 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
17389 
17390 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
17391 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
17392 		severity = SCSI_ERR_RETRYABLE;
17393 	}
17394 
17395 	/* Use absolute block number for the request block number */
17396 	request_blkno = xp->xb_blkno;
17397 
17398 	/*
17399 	 * Now try to get the error block number from the sense data
17400 	 */
17401 	sensep = xp->xb_sense_data;
17402 
17403 	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
17404 		(uint64_t *)&err_blkno)) {
17405 		/*
17406 		 * We retrieved the error block number from the information
17407 		 * portion of the sense data.
17408 		 *
17409 		 * For USCSI commands we are better off using the error
17410 		 * block no. as the requested block no. (This is the best
17411 		 * we can estimate.)
17412 		 */
17413 		if ((SD_IS_BUFIO(xp) == FALSE) &&
17414 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
17415 			request_blkno = err_blkno;
17416 		}
17417 	} else {
17418 		/*
17419 		 * Without the es_valid bit set (for fixed format) or an
17420 		 * information descriptor (for descriptor format) we cannot
17421 		 * be certain of the error blkno, so just use the
17422 		 * request_blkno.
17423 		 */
17424 		err_blkno = (diskaddr_t)request_blkno;
17425 	}
17426 
17427 	/*
17428 	 * The following will log the buffer contents for the release driver
17429 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
17430 	 * level is set to verbose.
17431 	 */
17432 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
17433 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17434 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
17435 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
17436 
17437 	if (pfa_flag == FALSE) {
17438 		/* This is normally only set for USCSI */
17439 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
17440 			return;
17441 		}
17442 
17443 		if ((SD_IS_BUFIO(xp) == TRUE) &&
17444 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
17445 		    (severity < sd_error_level))) {
17446 			return;
17447 		}
17448 	}
17449 
17450 	/*
17451 	 * Check for Sonoma Failover and keep a count of how many failed I/O's
17452 	 */
17453 	if ((SD_IS_LSI(un)) &&
17454 	    (scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) &&
17455 	    (scsi_sense_asc(sensep) == 0x94) &&
17456 	    (scsi_sense_ascq(sensep) == 0x01)) {
17457 		un->un_sonoma_failure_count++;
17458 		if (un->un_sonoma_failure_count > 1) {
17459 			return;
17460 		}
17461 	}
17462 
17463 	scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
17464 	    request_blkno, err_blkno, scsi_cmds,
17465 	    (struct scsi_extended_sense *)sensep,
17466 	    un->un_additional_codes, NULL);
17467 }
17468 
17469 /*
17470  *    Function: sd_sense_key_no_sense
17471  *
17472  * Description: Recovery action when sense data was not received.
17473  *
17474  *     Context: May be called from interrupt context
17475  */
17476 
17477 static void
17478 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
17479 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17480 {
17481 	struct sd_sense_info	si;
17482 
17483 	ASSERT(un != NULL);
17484 	ASSERT(mutex_owned(SD_MUTEX(un)));
17485 	ASSERT(bp != NULL);
17486 	ASSERT(xp != NULL);
17487 	ASSERT(pktp != NULL);
17488 
17489 	si.ssi_severity = SCSI_ERR_FATAL;
17490 	si.ssi_pfa_flag = FALSE;
17491 
17492 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17493 
17494 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17495 		&si, EIO, (clock_t)0, NULL);
17496 }
17497 
17498 
17499 /*
17500  *    Function: sd_sense_key_recoverable_error
17501  *
17502  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
17503  *
17504  *     Context: May be called from interrupt context
17505  */
17506 
17507 static void
17508 sd_sense_key_recoverable_error(struct sd_lun *un,
17509 	uint8_t *sense_datap,
17510 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17511 {
17512 	struct sd_sense_info	si;
17513 	uint8_t asc = scsi_sense_asc(sense_datap);
17514 
17515 	ASSERT(un != NULL);
17516 	ASSERT(mutex_owned(SD_MUTEX(un)));
17517 	ASSERT(bp != NULL);
17518 	ASSERT(xp != NULL);
17519 	ASSERT(pktp != NULL);
17520 
17521 	/*
17522 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
17523 	 */
17524 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
17525 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17526 		si.ssi_severity = SCSI_ERR_INFO;
17527 		si.ssi_pfa_flag = TRUE;
17528 	} else {
17529 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
17530 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
17531 		si.ssi_severity = SCSI_ERR_RECOVERED;
17532 		si.ssi_pfa_flag = FALSE;
17533 	}
17534 
17535 	if (pktp->pkt_resid == 0) {
17536 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17537 		sd_return_command(un, bp);
17538 		return;
17539 	}
17540 
17541 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17542 	    &si, EIO, (clock_t)0, NULL);
17543 }
17544 
17545 
17546 
17547 
17548 /*
17549  *    Function: sd_sense_key_not_ready
17550  *
17551  * Description: Recovery actions for a SCSI "Not Ready" sense key.
17552  *
17553  *     Context: May be called from interrupt context
17554  */
17555 
17556 static void
17557 sd_sense_key_not_ready(struct sd_lun *un,
17558 	uint8_t *sense_datap,
17559 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17560 {
17561 	struct sd_sense_info	si;
17562 	uint8_t asc = scsi_sense_asc(sense_datap);
17563 	uint8_t ascq = scsi_sense_ascq(sense_datap);
17564 
17565 	ASSERT(un != NULL);
17566 	ASSERT(mutex_owned(SD_MUTEX(un)));
17567 	ASSERT(bp != NULL);
17568 	ASSERT(xp != NULL);
17569 	ASSERT(pktp != NULL);
17570 
17571 	si.ssi_severity = SCSI_ERR_FATAL;
17572 	si.ssi_pfa_flag = FALSE;
17573 
17574 	/*
17575 	 * Update error stats after first NOT READY error. Disks may have
17576 	 * been powered down and may need to be restarted.  For CDROMs,
17577 	 * report NOT READY errors only if media is present.
17578 	 */
17579 	if ((ISCD(un) && (un->un_f_geometry_is_valid == TRUE)) ||
17580 	    (xp->xb_retry_count > 0)) {
17581 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17582 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
17583 	}
17584 
17585 	/*
17586 	 * Just fail if the "not ready" retry limit has been reached.
17587 	 */
17588 	if (xp->xb_retry_count >= un->un_notready_retry_count) {
17589 		/* Special check for error message printing for removables. */
17590 		if (un->un_f_has_removable_media && (asc == 0x04) &&
17591 		    (ascq >= 0x04)) {
17592 			si.ssi_severity = SCSI_ERR_ALL;
17593 		}
17594 		goto fail_command;
17595 	}
17596 
17597 	/*
17598 	 * Check the ASC and ASCQ in the sense data as needed, to determine
17599 	 * what to do.
17600 	 */
17601 	switch (asc) {
17602 	case 0x04:	/* LOGICAL UNIT NOT READY */
17603 		/*
17604 		 * disk drives that don't spin up result in a very long delay
17605 		 * in format without warning messages. We will log a message
17606 		 * if the error level is set to verbose.
17607 		 */
17608 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17609 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17610 			    "logical unit not ready, resetting disk\n");
17611 		}
17612 
17613 		/*
17614 		 * There are different requirements for CDROMs and disks for
17615 		 * the number of retries.  If a CD-ROM is giving this, it is
17616 		 * probably reading TOC and is in the process of getting
17617 		 * ready, so we should keep on trying for a long time to make
17618 		 * sure that all types of media are taken in account (for
17619 		 * some media the drive takes a long time to read TOC).  For
17620 		 * disks we do not want to retry this too many times as this
17621 		 * can cause a long hang in format when the drive refuses to
17622 		 * spin up (a very common failure).
17623 		 */
17624 		switch (ascq) {
17625 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
17626 			/*
17627 			 * Disk drives frequently refuse to spin up which
17628 			 * results in a very long hang in format without
17629 			 * warning messages.
17630 			 *
17631 			 * Note: This code preserves the legacy behavior of
17632 			 * comparing xb_retry_count against zero for fibre
17633 			 * channel targets instead of comparing against the
17634 			 * un_reset_retry_count value.  The reason for this
17635 			 * discrepancy has been so utterly lost beneath the
17636 			 * Sands of Time that even Indiana Jones could not
17637 			 * find it.
17638 			 */
17639 			if (un->un_f_is_fibre == TRUE) {
17640 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17641 					(xp->xb_retry_count > 0)) &&
17642 					(un->un_startstop_timeid == NULL)) {
17643 					scsi_log(SD_DEVINFO(un), sd_label,
17644 					CE_WARN, "logical unit not ready, "
17645 					"resetting disk\n");
17646 					sd_reset_target(un, pktp);
17647 				}
17648 			} else {
17649 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17650 					(xp->xb_retry_count >
17651 					un->un_reset_retry_count)) &&
17652 					(un->un_startstop_timeid == NULL)) {
17653 					scsi_log(SD_DEVINFO(un), sd_label,
17654 					CE_WARN, "logical unit not ready, "
17655 					"resetting disk\n");
17656 					sd_reset_target(un, pktp);
17657 				}
17658 			}
17659 			break;
17660 
17661 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
17662 			/*
17663 			 * If the target is in the process of becoming
17664 			 * ready, just proceed with the retry. This can
17665 			 * happen with CD-ROMs that take a long time to
17666 			 * read TOC after a power cycle or reset.
17667 			 */
17668 			goto do_retry;
17669 
17670 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
17671 			break;
17672 
17673 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
17674 			/*
17675 			 * Retries cannot help here so just fail right away.
17676 			 */
17677 			goto fail_command;
17678 
17679 		case 0x88:
17680 			/*
17681 			 * Vendor-unique code for T3/T4: it indicates a
17682 			 * path problem in a mutipathed config, but as far as
17683 			 * the target driver is concerned it equates to a fatal
17684 			 * error, so we should just fail the command right away
17685 			 * (without printing anything to the console). If this
17686 			 * is not a T3/T4, fall thru to the default recovery
17687 			 * action.
17688 			 * T3/T4 is FC only, don't need to check is_fibre
17689 			 */
17690 			if (SD_IS_T3(un) || SD_IS_T4(un)) {
17691 				sd_return_failed_command(un, bp, EIO);
17692 				return;
17693 			}
17694 			/* FALLTHRU */
17695 
17696 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
17697 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
17698 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
17699 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
17700 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
17701 		default:    /* Possible future codes in SCSI spec? */
17702 			/*
17703 			 * For removable-media devices, do not retry if
17704 			 * ASCQ > 2 as these result mostly from USCSI commands
17705 			 * on MMC devices issued to check status of an
17706 			 * operation initiated in immediate mode.  Also for
17707 			 * ASCQ >= 4 do not print console messages as these
17708 			 * mainly represent a user-initiated operation
17709 			 * instead of a system failure.
17710 			 */
17711 			if (un->un_f_has_removable_media) {
17712 				si.ssi_severity = SCSI_ERR_ALL;
17713 				goto fail_command;
17714 			}
17715 			break;
17716 		}
17717 
17718 		/*
17719 		 * As part of our recovery attempt for the NOT READY
17720 		 * condition, we issue a START STOP UNIT command. However
17721 		 * we want to wait for a short delay before attempting this
17722 		 * as there may still be more commands coming back from the
17723 		 * target with the check condition. To do this we use
17724 		 * timeout(9F) to call sd_start_stop_unit_callback() after
17725 		 * the delay interval expires. (sd_start_stop_unit_callback()
17726 		 * dispatches sd_start_stop_unit_task(), which will issue
17727 		 * the actual START STOP UNIT command. The delay interval
17728 		 * is one-half of the delay that we will use to retry the
17729 		 * command that generated the NOT READY condition.
17730 		 *
17731 		 * Note that we could just dispatch sd_start_stop_unit_task()
17732 		 * from here and allow it to sleep for the delay interval,
17733 		 * but then we would be tying up the taskq thread
17734 		 * uncesessarily for the duration of the delay.
17735 		 *
17736 		 * Do not issue the START STOP UNIT if the current command
17737 		 * is already a START STOP UNIT.
17738 		 */
17739 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
17740 			break;
17741 		}
17742 
17743 		/*
17744 		 * Do not schedule the timeout if one is already pending.
17745 		 */
17746 		if (un->un_startstop_timeid != NULL) {
17747 			SD_INFO(SD_LOG_ERROR, un,
17748 			    "sd_sense_key_not_ready: restart already issued to"
17749 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
17750 			    ddi_get_instance(SD_DEVINFO(un)));
17751 			break;
17752 		}
17753 
17754 		/*
17755 		 * Schedule the START STOP UNIT command, then queue the command
17756 		 * for a retry.
17757 		 *
17758 		 * Note: A timeout is not scheduled for this retry because we
17759 		 * want the retry to be serial with the START_STOP_UNIT. The
17760 		 * retry will be started when the START_STOP_UNIT is completed
17761 		 * in sd_start_stop_unit_task.
17762 		 */
17763 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
17764 		    un, SD_BSY_TIMEOUT / 2);
17765 		xp->xb_retry_count++;
17766 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
17767 		return;
17768 
17769 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
17770 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17771 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17772 			    "unit does not respond to selection\n");
17773 		}
17774 		break;
17775 
17776 	case 0x3A:	/* MEDIUM NOT PRESENT */
17777 		if (sd_error_level >= SCSI_ERR_FATAL) {
17778 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17779 			    "Caddy not inserted in drive\n");
17780 		}
17781 
17782 		sr_ejected(un);
17783 		un->un_mediastate = DKIO_EJECTED;
17784 		/* The state has changed, inform the media watch routines */
17785 		cv_broadcast(&un->un_state_cv);
17786 		/* Just fail if no media is present in the drive. */
17787 		goto fail_command;
17788 
17789 	default:
17790 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17791 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
17792 			    "Unit not Ready. Additional sense code 0x%x\n",
17793 			    asc);
17794 		}
17795 		break;
17796 	}
17797 
17798 do_retry:
17799 
17800 	/*
17801 	 * Retry the command, as some targets may report NOT READY for
17802 	 * several seconds after being reset.
17803 	 */
17804 	xp->xb_retry_count++;
17805 	si.ssi_severity = SCSI_ERR_RETRYABLE;
17806 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17807 	    &si, EIO, SD_BSY_TIMEOUT, NULL);
17808 
17809 	return;
17810 
17811 fail_command:
17812 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17813 	sd_return_failed_command(un, bp, EIO);
17814 }
17815 
17816 
17817 
17818 /*
17819  *    Function: sd_sense_key_medium_or_hardware_error
17820  *
17821  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
17822  *		sense key.
17823  *
17824  *     Context: May be called from interrupt context
17825  */
17826 
17827 static void
17828 sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
17829 	uint8_t *sense_datap,
17830 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17831 {
17832 	struct sd_sense_info	si;
17833 	uint8_t sense_key = scsi_sense_key(sense_datap);
17834 	uint8_t asc = scsi_sense_asc(sense_datap);
17835 
17836 	ASSERT(un != NULL);
17837 	ASSERT(mutex_owned(SD_MUTEX(un)));
17838 	ASSERT(bp != NULL);
17839 	ASSERT(xp != NULL);
17840 	ASSERT(pktp != NULL);
17841 
17842 	si.ssi_severity = SCSI_ERR_FATAL;
17843 	si.ssi_pfa_flag = FALSE;
17844 
17845 	if (sense_key == KEY_MEDIUM_ERROR) {
17846 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
17847 	}
17848 
17849 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17850 
17851 	if ((un->un_reset_retry_count != 0) &&
17852 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
17853 		mutex_exit(SD_MUTEX(un));
17854 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
17855 		if (un->un_f_allow_bus_device_reset == TRUE) {
17856 
17857 			boolean_t try_resetting_target = B_TRUE;
17858 
17859 			/*
17860 			 * We need to be able to handle specific ASC when we are
17861 			 * handling a KEY_HARDWARE_ERROR. In particular
17862 			 * taking the default action of resetting the target may
17863 			 * not be the appropriate way to attempt recovery.
17864 			 * Resetting a target because of a single LUN failure
17865 			 * victimizes all LUNs on that target.
17866 			 *
17867 			 * This is true for the LSI arrays, if an LSI
17868 			 * array controller returns an ASC of 0x84 (LUN Dead) we
17869 			 * should trust it.
17870 			 */
17871 
17872 			if (sense_key == KEY_HARDWARE_ERROR) {
17873 				switch (asc) {
17874 				case 0x84:
17875 					if (SD_IS_LSI(un)) {
17876 						try_resetting_target = B_FALSE;
17877 					}
17878 					break;
17879 				default:
17880 					break;
17881 				}
17882 			}
17883 
17884 			if (try_resetting_target == B_TRUE) {
17885 				int reset_retval = 0;
17886 				if (un->un_f_lun_reset_enabled == TRUE) {
17887 					SD_TRACE(SD_LOG_IO_CORE, un,
17888 					    "sd_sense_key_medium_or_hardware_"
17889 					    "error: issuing RESET_LUN\n");
17890 					reset_retval =
17891 					    scsi_reset(SD_ADDRESS(un),
17892 					    RESET_LUN);
17893 				}
17894 				if (reset_retval == 0) {
17895 					SD_TRACE(SD_LOG_IO_CORE, un,
17896 					    "sd_sense_key_medium_or_hardware_"
17897 					    "error: issuing RESET_TARGET\n");
17898 					(void) scsi_reset(SD_ADDRESS(un),
17899 					    RESET_TARGET);
17900 				}
17901 			}
17902 		}
17903 		mutex_enter(SD_MUTEX(un));
17904 	}
17905 
17906 	/*
17907 	 * This really ought to be a fatal error, but we will retry anyway
17908 	 * as some drives report this as a spurious error.
17909 	 */
17910 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17911 	    &si, EIO, (clock_t)0, NULL);
17912 }
17913 
17914 
17915 
17916 /*
17917  *    Function: sd_sense_key_illegal_request
17918  *
17919  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
17920  *
17921  *     Context: May be called from interrupt context
17922  */
17923 
17924 static void
17925 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
17926 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17927 {
17928 	struct sd_sense_info	si;
17929 
17930 	ASSERT(un != NULL);
17931 	ASSERT(mutex_owned(SD_MUTEX(un)));
17932 	ASSERT(bp != NULL);
17933 	ASSERT(xp != NULL);
17934 	ASSERT(pktp != NULL);
17935 
17936 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17937 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
17938 
17939 	si.ssi_severity = SCSI_ERR_INFO;
17940 	si.ssi_pfa_flag = FALSE;
17941 
17942 	/* Pointless to retry if the target thinks it's an illegal request */
17943 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17944 	sd_return_failed_command(un, bp, EIO);
17945 }
17946 
17947 
17948 
17949 
17950 /*
17951  *    Function: sd_sense_key_unit_attention
17952  *
17953  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
17954  *
17955  *     Context: May be called from interrupt context
17956  */
17957 
17958 static void
17959 sd_sense_key_unit_attention(struct sd_lun *un,
17960 	uint8_t *sense_datap,
17961 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17962 {
17963 	/*
17964 	 * For UNIT ATTENTION we allow retries for one minute. Devices
17965 	 * like Sonoma can return UNIT ATTENTION close to a minute
17966 	 * under certain conditions.
17967 	 */
17968 	int	retry_check_flag = SD_RETRIES_UA;
17969 	boolean_t	kstat_updated = B_FALSE;
17970 	struct	sd_sense_info		si;
17971 	uint8_t asc = scsi_sense_asc(sense_datap);
17972 
17973 	ASSERT(un != NULL);
17974 	ASSERT(mutex_owned(SD_MUTEX(un)));
17975 	ASSERT(bp != NULL);
17976 	ASSERT(xp != NULL);
17977 	ASSERT(pktp != NULL);
17978 
17979 	si.ssi_severity = SCSI_ERR_INFO;
17980 	si.ssi_pfa_flag = FALSE;
17981 
17982 
17983 	switch (asc) {
17984 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
17985 		if (sd_report_pfa != 0) {
17986 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17987 			si.ssi_pfa_flag = TRUE;
17988 			retry_check_flag = SD_RETRIES_STANDARD;
17989 			goto do_retry;
17990 		}
17991 
17992 		break;
17993 
17994 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
17995 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
17996 			un->un_resvd_status |=
17997 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
17998 		}
17999 #ifdef _LP64
18000 		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
18001 			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
18002 			    un, KM_NOSLEEP) == 0) {
18003 				/*
18004 				 * If we can't dispatch the task we'll just
18005 				 * live without descriptor sense.  We can
18006 				 * try again on the next "unit attention"
18007 				 */
18008 				SD_ERROR(SD_LOG_ERROR, un,
18009 				    "sd_sense_key_unit_attention: "
18010 				    "Could not dispatch "
18011 				    "sd_reenable_dsense_task\n");
18012 			}
18013 		}
18014 #endif /* _LP64 */
18015 		/* FALLTHRU */
18016 
18017 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
18018 		if (!un->un_f_has_removable_media) {
18019 			break;
18020 		}
18021 
18022 		/*
18023 		 * When we get a unit attention from a removable-media device,
18024 		 * it may be in a state that will take a long time to recover
18025 		 * (e.g., from a reset).  Since we are executing in interrupt
18026 		 * context here, we cannot wait around for the device to come
18027 		 * back. So hand this command off to sd_media_change_task()
18028 		 * for deferred processing under taskq thread context. (Note
18029 		 * that the command still may be failed if a problem is
18030 		 * encountered at a later time.)
18031 		 */
18032 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
18033 		    KM_NOSLEEP) == 0) {
18034 			/*
18035 			 * Cannot dispatch the request so fail the command.
18036 			 */
18037 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
18038 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18039 			si.ssi_severity = SCSI_ERR_FATAL;
18040 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18041 			sd_return_failed_command(un, bp, EIO);
18042 		}
18043 
18044 		/*
18045 		 * If failed to dispatch sd_media_change_task(), we already
18046 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
18047 		 * we should update kstat later if it encounters an error. So,
18048 		 * we update kstat_updated flag here.
18049 		 */
18050 		kstat_updated = B_TRUE;
18051 
18052 		/*
18053 		 * Either the command has been successfully dispatched to a
18054 		 * task Q for retrying, or the dispatch failed. In either case
18055 		 * do NOT retry again by calling sd_retry_command. This sets up
18056 		 * two retries of the same command and when one completes and
18057 		 * frees the resources the other will access freed memory,
18058 		 * a bad thing.
18059 		 */
18060 		return;
18061 
18062 	default:
18063 		break;
18064 	}
18065 
18066 	/*
18067 	 * Update kstat if we haven't done that.
18068 	 */
18069 	if (!kstat_updated) {
18070 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18071 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18072 	}
18073 
18074 do_retry:
18075 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
18076 	    EIO, SD_UA_RETRY_DELAY, NULL);
18077 }
18078 
18079 
18080 
18081 /*
18082  *    Function: sd_sense_key_fail_command
18083  *
18084  * Description: Use to fail a command when we don't like the sense key that
18085  *		was returned.
18086  *
18087  *     Context: May be called from interrupt context
18088  */
18089 
18090 static void
18091 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
18092 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18093 {
18094 	struct sd_sense_info	si;
18095 
18096 	ASSERT(un != NULL);
18097 	ASSERT(mutex_owned(SD_MUTEX(un)));
18098 	ASSERT(bp != NULL);
18099 	ASSERT(xp != NULL);
18100 	ASSERT(pktp != NULL);
18101 
18102 	si.ssi_severity = SCSI_ERR_FATAL;
18103 	si.ssi_pfa_flag = FALSE;
18104 
18105 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18106 	sd_return_failed_command(un, bp, EIO);
18107 }
18108 
18109 
18110 
18111 /*
18112  *    Function: sd_sense_key_blank_check
18113  *
18114  * Description: Recovery actions for a SCSI "Blank Check" sense key.
18115  *		Has no monetary connotation.
18116  *
18117  *     Context: May be called from interrupt context
18118  */
18119 
18120 static void
18121 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
18122 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18123 {
18124 	struct sd_sense_info	si;
18125 
18126 	ASSERT(un != NULL);
18127 	ASSERT(mutex_owned(SD_MUTEX(un)));
18128 	ASSERT(bp != NULL);
18129 	ASSERT(xp != NULL);
18130 	ASSERT(pktp != NULL);
18131 
18132 	/*
18133 	 * Blank check is not fatal for removable devices, therefore
18134 	 * it does not require a console message.
18135 	 */
18136 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
18137 	    SCSI_ERR_FATAL;
18138 	si.ssi_pfa_flag = FALSE;
18139 
18140 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18141 	sd_return_failed_command(un, bp, EIO);
18142 }
18143 
18144 
18145 
18146 
18147 /*
18148  *    Function: sd_sense_key_aborted_command
18149  *
18150  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
18151  *
18152  *     Context: May be called from interrupt context
18153  */
18154 
18155 static void
18156 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
18157 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18158 {
18159 	struct sd_sense_info	si;
18160 
18161 	ASSERT(un != NULL);
18162 	ASSERT(mutex_owned(SD_MUTEX(un)));
18163 	ASSERT(bp != NULL);
18164 	ASSERT(xp != NULL);
18165 	ASSERT(pktp != NULL);
18166 
18167 	si.ssi_severity = SCSI_ERR_FATAL;
18168 	si.ssi_pfa_flag = FALSE;
18169 
18170 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18171 
18172 	/*
18173 	 * This really ought to be a fatal error, but we will retry anyway
18174 	 * as some drives report this as a spurious error.
18175 	 */
18176 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18177 	    &si, EIO, (clock_t)0, NULL);
18178 }
18179 
18180 
18181 
18182 /*
18183  *    Function: sd_sense_key_default
18184  *
18185  * Description: Default recovery action for several SCSI sense keys (basically
18186  *		attempts a retry).
18187  *
18188  *     Context: May be called from interrupt context
18189  */
18190 
18191 static void
18192 sd_sense_key_default(struct sd_lun *un,
18193 	uint8_t *sense_datap,
18194 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18195 {
18196 	struct sd_sense_info	si;
18197 	uint8_t sense_key = scsi_sense_key(sense_datap);
18198 
18199 	ASSERT(un != NULL);
18200 	ASSERT(mutex_owned(SD_MUTEX(un)));
18201 	ASSERT(bp != NULL);
18202 	ASSERT(xp != NULL);
18203 	ASSERT(pktp != NULL);
18204 
18205 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18206 
18207 	/*
18208 	 * Undecoded sense key.	Attempt retries and hope that will fix
18209 	 * the problem.  Otherwise, we're dead.
18210 	 */
18211 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
18212 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18213 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
18214 	}
18215 
18216 	si.ssi_severity = SCSI_ERR_FATAL;
18217 	si.ssi_pfa_flag = FALSE;
18218 
18219 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18220 	    &si, EIO, (clock_t)0, NULL);
18221 }
18222 
18223 
18224 
18225 /*
18226  *    Function: sd_print_retry_msg
18227  *
18228  * Description: Print a message indicating the retry action being taken.
18229  *
18230  *   Arguments: un - ptr to associated softstate
18231  *		bp - ptr to buf(9S) for the command
18232  *		arg - not used.
18233  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18234  *			or SD_NO_RETRY_ISSUED
18235  *
18236  *     Context: May be called from interrupt context
18237  */
18238 /* ARGSUSED */
18239 static void
18240 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
18241 {
18242 	struct sd_xbuf	*xp;
18243 	struct scsi_pkt *pktp;
18244 	char *reasonp;
18245 	char *msgp;
18246 
18247 	ASSERT(un != NULL);
18248 	ASSERT(mutex_owned(SD_MUTEX(un)));
18249 	ASSERT(bp != NULL);
18250 	pktp = SD_GET_PKTP(bp);
18251 	ASSERT(pktp != NULL);
18252 	xp = SD_GET_XBUF(bp);
18253 	ASSERT(xp != NULL);
18254 
18255 	ASSERT(!mutex_owned(&un->un_pm_mutex));
18256 	mutex_enter(&un->un_pm_mutex);
18257 	if ((un->un_state == SD_STATE_SUSPENDED) ||
18258 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
18259 	    (pktp->pkt_flags & FLAG_SILENT)) {
18260 		mutex_exit(&un->un_pm_mutex);
18261 		goto update_pkt_reason;
18262 	}
18263 	mutex_exit(&un->un_pm_mutex);
18264 
18265 	/*
18266 	 * Suppress messages if they are all the same pkt_reason; with
18267 	 * TQ, many (up to 256) are returned with the same pkt_reason.
18268 	 * If we are in panic, then suppress the retry messages.
18269 	 */
18270 	switch (flag) {
18271 	case SD_NO_RETRY_ISSUED:
18272 		msgp = "giving up";
18273 		break;
18274 	case SD_IMMEDIATE_RETRY_ISSUED:
18275 	case SD_DELAYED_RETRY_ISSUED:
18276 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
18277 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
18278 		    (sd_error_level != SCSI_ERR_ALL))) {
18279 			return;
18280 		}
18281 		msgp = "retrying command";
18282 		break;
18283 	default:
18284 		goto update_pkt_reason;
18285 	}
18286 
18287 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
18288 	    scsi_rname(pktp->pkt_reason));
18289 
18290 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18291 	    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
18292 
18293 update_pkt_reason:
18294 	/*
18295 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
18296 	 * This is to prevent multiple console messages for the same failure
18297 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
18298 	 * when the command is retried successfully because there still may be
18299 	 * more commands coming back with the same value of pktp->pkt_reason.
18300 	 */
18301 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
18302 		un->un_last_pkt_reason = pktp->pkt_reason;
18303 	}
18304 }
18305 
18306 
18307 /*
18308  *    Function: sd_print_cmd_incomplete_msg
18309  *
18310  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
18311  *
18312  *   Arguments: un - ptr to associated softstate
18313  *		bp - ptr to buf(9S) for the command
18314  *		arg - passed to sd_print_retry_msg()
18315  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18316  *			or SD_NO_RETRY_ISSUED
18317  *
18318  *     Context: May be called from interrupt context
18319  */
18320 
18321 static void
18322 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
18323 	int code)
18324 {
18325 	dev_info_t	*dip;
18326 
18327 	ASSERT(un != NULL);
18328 	ASSERT(mutex_owned(SD_MUTEX(un)));
18329 	ASSERT(bp != NULL);
18330 
18331 	switch (code) {
18332 	case SD_NO_RETRY_ISSUED:
18333 		/* Command was failed. Someone turned off this target? */
18334 		if (un->un_state != SD_STATE_OFFLINE) {
18335 			/*
18336 			 * Suppress message if we are detaching and
18337 			 * device has been disconnected
18338 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
18339 			 * private interface and not part of the DDI
18340 			 */
18341 			dip = un->un_sd->sd_dev;
18342 			if (!(DEVI_IS_DETACHING(dip) &&
18343 			    DEVI_IS_DEVICE_REMOVED(dip))) {
18344 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18345 				"disk not responding to selection\n");
18346 			}
18347 			New_state(un, SD_STATE_OFFLINE);
18348 		}
18349 		break;
18350 
18351 	case SD_DELAYED_RETRY_ISSUED:
18352 	case SD_IMMEDIATE_RETRY_ISSUED:
18353 	default:
18354 		/* Command was successfully queued for retry */
18355 		sd_print_retry_msg(un, bp, arg, code);
18356 		break;
18357 	}
18358 }
18359 
18360 
18361 /*
18362  *    Function: sd_pkt_reason_cmd_incomplete
18363  *
18364  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
18365  *
18366  *     Context: May be called from interrupt context
18367  */
18368 
18369 static void
18370 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
18371 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18372 {
18373 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
18374 
18375 	ASSERT(un != NULL);
18376 	ASSERT(mutex_owned(SD_MUTEX(un)));
18377 	ASSERT(bp != NULL);
18378 	ASSERT(xp != NULL);
18379 	ASSERT(pktp != NULL);
18380 
18381 	/* Do not do a reset if selection did not complete */
18382 	/* Note: Should this not just check the bit? */
18383 	if (pktp->pkt_state != STATE_GOT_BUS) {
18384 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18385 		sd_reset_target(un, pktp);
18386 	}
18387 
18388 	/*
18389 	 * If the target was not successfully selected, then set
18390 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
18391 	 * with the target, and further retries and/or commands are
18392 	 * likely to take a long time.
18393 	 */
18394 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
18395 		flag |= SD_RETRIES_FAILFAST;
18396 	}
18397 
18398 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18399 
18400 	sd_retry_command(un, bp, flag,
18401 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18402 }
18403 
18404 
18405 
18406 /*
18407  *    Function: sd_pkt_reason_cmd_tran_err
18408  *
18409  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
18410  *
18411  *     Context: May be called from interrupt context
18412  */
18413 
18414 static void
18415 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
18416 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18417 {
18418 	ASSERT(un != NULL);
18419 	ASSERT(mutex_owned(SD_MUTEX(un)));
18420 	ASSERT(bp != NULL);
18421 	ASSERT(xp != NULL);
18422 	ASSERT(pktp != NULL);
18423 
18424 	/*
18425 	 * Do not reset if we got a parity error, or if
18426 	 * selection did not complete.
18427 	 */
18428 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18429 	/* Note: Should this not just check the bit for pkt_state? */
18430 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
18431 	    (pktp->pkt_state != STATE_GOT_BUS)) {
18432 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18433 		sd_reset_target(un, pktp);
18434 	}
18435 
18436 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18437 
18438 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18439 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18440 }
18441 
18442 
18443 
18444 /*
18445  *    Function: sd_pkt_reason_cmd_reset
18446  *
18447  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
18448  *
18449  *     Context: May be called from interrupt context
18450  */
18451 
18452 static void
18453 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
18454 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18455 {
18456 	ASSERT(un != NULL);
18457 	ASSERT(mutex_owned(SD_MUTEX(un)));
18458 	ASSERT(bp != NULL);
18459 	ASSERT(xp != NULL);
18460 	ASSERT(pktp != NULL);
18461 
18462 	/* The target may still be running the command, so try to reset. */
18463 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18464 	sd_reset_target(un, pktp);
18465 
18466 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18467 
18468 	/*
18469 	 * If pkt_reason is CMD_RESET chances are that this pkt got
18470 	 * reset because another target on this bus caused it. The target
18471 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18472 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18473 	 */
18474 
18475 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18476 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18477 }
18478 
18479 
18480 
18481 
18482 /*
18483  *    Function: sd_pkt_reason_cmd_aborted
18484  *
18485  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
18486  *
18487  *     Context: May be called from interrupt context
18488  */
18489 
18490 static void
18491 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
18492 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18493 {
18494 	ASSERT(un != NULL);
18495 	ASSERT(mutex_owned(SD_MUTEX(un)));
18496 	ASSERT(bp != NULL);
18497 	ASSERT(xp != NULL);
18498 	ASSERT(pktp != NULL);
18499 
18500 	/* The target may still be running the command, so try to reset. */
18501 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18502 	sd_reset_target(un, pktp);
18503 
18504 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18505 
18506 	/*
18507 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
18508 	 * aborted because another target on this bus caused it. The target
18509 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18510 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18511 	 */
18512 
18513 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18514 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18515 }
18516 
18517 
18518 
18519 /*
18520  *    Function: sd_pkt_reason_cmd_timeout
18521  *
18522  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
18523  *
18524  *     Context: May be called from interrupt context
18525  */
18526 
18527 static void
18528 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
18529 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18530 {
18531 	ASSERT(un != NULL);
18532 	ASSERT(mutex_owned(SD_MUTEX(un)));
18533 	ASSERT(bp != NULL);
18534 	ASSERT(xp != NULL);
18535 	ASSERT(pktp != NULL);
18536 
18537 
18538 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18539 	sd_reset_target(un, pktp);
18540 
18541 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18542 
18543 	/*
18544 	 * A command timeout indicates that we could not establish
18545 	 * communication with the target, so set SD_RETRIES_FAILFAST
18546 	 * as further retries/commands are likely to take a long time.
18547 	 */
18548 	sd_retry_command(un, bp,
18549 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
18550 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18551 }
18552 
18553 
18554 
18555 /*
18556  *    Function: sd_pkt_reason_cmd_unx_bus_free
18557  *
18558  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
18559  *
18560  *     Context: May be called from interrupt context
18561  */
18562 
18563 static void
18564 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
18565 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18566 {
18567 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
18568 
18569 	ASSERT(un != NULL);
18570 	ASSERT(mutex_owned(SD_MUTEX(un)));
18571 	ASSERT(bp != NULL);
18572 	ASSERT(xp != NULL);
18573 	ASSERT(pktp != NULL);
18574 
18575 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18576 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18577 
18578 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
18579 	    sd_print_retry_msg : NULL;
18580 
18581 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18582 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18583 }
18584 
18585 
18586 /*
18587  *    Function: sd_pkt_reason_cmd_tag_reject
18588  *
18589  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
18590  *
18591  *     Context: May be called from interrupt context
18592  */
18593 
18594 static void
18595 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
18596 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18597 {
18598 	ASSERT(un != NULL);
18599 	ASSERT(mutex_owned(SD_MUTEX(un)));
18600 	ASSERT(bp != NULL);
18601 	ASSERT(xp != NULL);
18602 	ASSERT(pktp != NULL);
18603 
18604 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18605 	pktp->pkt_flags = 0;
18606 	un->un_tagflags = 0;
18607 	if (un->un_f_opt_queueing == TRUE) {
18608 		un->un_throttle = min(un->un_throttle, 3);
18609 	} else {
18610 		un->un_throttle = 1;
18611 	}
18612 	mutex_exit(SD_MUTEX(un));
18613 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
18614 	mutex_enter(SD_MUTEX(un));
18615 
18616 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18617 
18618 	/* Legacy behavior not to check retry counts here. */
18619 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
18620 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18621 }
18622 
18623 
18624 /*
18625  *    Function: sd_pkt_reason_default
18626  *
18627  * Description: Default recovery actions for SCSA pkt_reason values that
18628  *		do not have more explicit recovery actions.
18629  *
18630  *     Context: May be called from interrupt context
18631  */
18632 
18633 static void
18634 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
18635 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18636 {
18637 	ASSERT(un != NULL);
18638 	ASSERT(mutex_owned(SD_MUTEX(un)));
18639 	ASSERT(bp != NULL);
18640 	ASSERT(xp != NULL);
18641 	ASSERT(pktp != NULL);
18642 
18643 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18644 	sd_reset_target(un, pktp);
18645 
18646 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18647 
18648 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18649 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18650 }
18651 
18652 
18653 
18654 /*
18655  *    Function: sd_pkt_status_check_condition
18656  *
18657  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
18658  *
18659  *     Context: May be called from interrupt context
18660  */
18661 
18662 static void
18663 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
18664 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18665 {
18666 	ASSERT(un != NULL);
18667 	ASSERT(mutex_owned(SD_MUTEX(un)));
18668 	ASSERT(bp != NULL);
18669 	ASSERT(xp != NULL);
18670 	ASSERT(pktp != NULL);
18671 
18672 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
18673 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
18674 
18675 	/*
18676 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
18677 	 * command will be retried after the request sense). Otherwise, retry
18678 	 * the command. Note: we are issuing the request sense even though the
18679 	 * retry limit may have been reached for the failed command.
18680 	 */
18681 	if (un->un_f_arq_enabled == FALSE) {
18682 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18683 		    "no ARQ, sending request sense command\n");
18684 		sd_send_request_sense_command(un, bp, pktp);
18685 	} else {
18686 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18687 		    "ARQ,retrying request sense command\n");
18688 #if defined(__i386) || defined(__amd64)
18689 		/*
18690 		 * The SD_RETRY_DELAY value need to be adjusted here
18691 		 * when SD_RETRY_DELAY change in sddef.h
18692 		 */
18693 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18694 			un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
18695 			NULL);
18696 #else
18697 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
18698 		    EIO, SD_RETRY_DELAY, NULL);
18699 #endif
18700 	}
18701 
18702 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
18703 }
18704 
18705 
18706 /*
18707  *    Function: sd_pkt_status_busy
18708  *
18709  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
18710  *
18711  *     Context: May be called from interrupt context
18712  */
18713 
18714 static void
18715 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18716 	struct scsi_pkt *pktp)
18717 {
18718 	ASSERT(un != NULL);
18719 	ASSERT(mutex_owned(SD_MUTEX(un)));
18720 	ASSERT(bp != NULL);
18721 	ASSERT(xp != NULL);
18722 	ASSERT(pktp != NULL);
18723 
18724 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18725 	    "sd_pkt_status_busy: entry\n");
18726 
18727 	/* If retries are exhausted, just fail the command. */
18728 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
18729 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18730 		    "device busy too long\n");
18731 		sd_return_failed_command(un, bp, EIO);
18732 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18733 		    "sd_pkt_status_busy: exit\n");
18734 		return;
18735 	}
18736 	xp->xb_retry_count++;
18737 
18738 	/*
18739 	 * Try to reset the target. However, we do not want to perform
18740 	 * more than one reset if the device continues to fail. The reset
18741 	 * will be performed when the retry count reaches the reset
18742 	 * threshold.  This threshold should be set such that at least
18743 	 * one retry is issued before the reset is performed.
18744 	 */
18745 	if (xp->xb_retry_count ==
18746 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
18747 		int rval = 0;
18748 		mutex_exit(SD_MUTEX(un));
18749 		if (un->un_f_allow_bus_device_reset == TRUE) {
18750 			/*
18751 			 * First try to reset the LUN; if we cannot then
18752 			 * try to reset the target.
18753 			 */
18754 			if (un->un_f_lun_reset_enabled == TRUE) {
18755 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18756 				    "sd_pkt_status_busy: RESET_LUN\n");
18757 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18758 			}
18759 			if (rval == 0) {
18760 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18761 				    "sd_pkt_status_busy: RESET_TARGET\n");
18762 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18763 			}
18764 		}
18765 		if (rval == 0) {
18766 			/*
18767 			 * If the RESET_LUN and/or RESET_TARGET failed,
18768 			 * try RESET_ALL
18769 			 */
18770 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18771 			    "sd_pkt_status_busy: RESET_ALL\n");
18772 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
18773 		}
18774 		mutex_enter(SD_MUTEX(un));
18775 		if (rval == 0) {
18776 			/*
18777 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
18778 			 * At this point we give up & fail the command.
18779 			 */
18780 			sd_return_failed_command(un, bp, EIO);
18781 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18782 			    "sd_pkt_status_busy: exit (failed cmd)\n");
18783 			return;
18784 		}
18785 	}
18786 
18787 	/*
18788 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
18789 	 * we have already checked the retry counts above.
18790 	 */
18791 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
18792 	    EIO, SD_BSY_TIMEOUT, NULL);
18793 
18794 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18795 	    "sd_pkt_status_busy: exit\n");
18796 }
18797 
18798 
18799 /*
18800  *    Function: sd_pkt_status_reservation_conflict
18801  *
18802  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
18803  *		command status.
18804  *
18805  *     Context: May be called from interrupt context
18806  */
18807 
18808 static void
18809 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
18810 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18811 {
18812 	ASSERT(un != NULL);
18813 	ASSERT(mutex_owned(SD_MUTEX(un)));
18814 	ASSERT(bp != NULL);
18815 	ASSERT(xp != NULL);
18816 	ASSERT(pktp != NULL);
18817 
18818 	/*
18819 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
18820 	 * conflict could be due to various reasons like incorrect keys, not
18821 	 * registered or not reserved etc. So, we return EACCES to the caller.
18822 	 */
18823 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
18824 		int cmd = SD_GET_PKT_OPCODE(pktp);
18825 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
18826 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
18827 			sd_return_failed_command(un, bp, EACCES);
18828 			return;
18829 		}
18830 	}
18831 
18832 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
18833 
18834 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
18835 		if (sd_failfast_enable != 0) {
18836 			/* By definition, we must panic here.... */
18837 			sd_panic_for_res_conflict(un);
18838 			/*NOTREACHED*/
18839 		}
18840 		SD_ERROR(SD_LOG_IO, un,
18841 		    "sd_handle_resv_conflict: Disk Reserved\n");
18842 		sd_return_failed_command(un, bp, EACCES);
18843 		return;
18844 	}
18845 
18846 	/*
18847 	 * 1147670: retry only if sd_retry_on_reservation_conflict
18848 	 * property is set (default is 1). Retries will not succeed
18849 	 * on a disk reserved by another initiator. HA systems
18850 	 * may reset this via sd.conf to avoid these retries.
18851 	 *
18852 	 * Note: The legacy return code for this failure is EIO, however EACCES
18853 	 * seems more appropriate for a reservation conflict.
18854 	 */
18855 	if (sd_retry_on_reservation_conflict == 0) {
18856 		SD_ERROR(SD_LOG_IO, un,
18857 		    "sd_handle_resv_conflict: Device Reserved\n");
18858 		sd_return_failed_command(un, bp, EIO);
18859 		return;
18860 	}
18861 
18862 	/*
18863 	 * Retry the command if we can.
18864 	 *
18865 	 * Note: The legacy return code for this failure is EIO, however EACCES
18866 	 * seems more appropriate for a reservation conflict.
18867 	 */
18868 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18869 	    (clock_t)2, NULL);
18870 }
18871 
18872 
18873 
18874 /*
18875  *    Function: sd_pkt_status_qfull
18876  *
18877  * Description: Handle a QUEUE FULL condition from the target.  This can
18878  *		occur if the HBA does not handle the queue full condition.
18879  *		(Basically this means third-party HBAs as Sun HBAs will
18880  *		handle the queue full condition.)  Note that if there are
18881  *		some commands already in the transport, then the queue full
18882  *		has occurred because the queue for this nexus is actually
18883  *		full. If there are no commands in the transport, then the
18884  *		queue full is resulting from some other initiator or lun
18885  *		consuming all the resources at the target.
18886  *
18887  *     Context: May be called from interrupt context
18888  */
18889 
18890 static void
18891 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
18892 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18893 {
18894 	ASSERT(un != NULL);
18895 	ASSERT(mutex_owned(SD_MUTEX(un)));
18896 	ASSERT(bp != NULL);
18897 	ASSERT(xp != NULL);
18898 	ASSERT(pktp != NULL);
18899 
18900 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18901 	    "sd_pkt_status_qfull: entry\n");
18902 
18903 	/*
18904 	 * Just lower the QFULL throttle and retry the command.  Note that
18905 	 * we do not limit the number of retries here.
18906 	 */
18907 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
18908 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
18909 	    SD_RESTART_TIMEOUT, NULL);
18910 
18911 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18912 	    "sd_pkt_status_qfull: exit\n");
18913 }
18914 
18915 
18916 /*
18917  *    Function: sd_reset_target
18918  *
18919  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
18920  *		RESET_TARGET, or RESET_ALL.
18921  *
18922  *     Context: May be called under interrupt context.
18923  */
18924 
18925 static void
18926 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
18927 {
18928 	int rval = 0;
18929 
18930 	ASSERT(un != NULL);
18931 	ASSERT(mutex_owned(SD_MUTEX(un)));
18932 	ASSERT(pktp != NULL);
18933 
18934 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
18935 
18936 	/*
18937 	 * No need to reset if the transport layer has already done so.
18938 	 */
18939 	if ((pktp->pkt_statistics &
18940 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
18941 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18942 		    "sd_reset_target: no reset\n");
18943 		return;
18944 	}
18945 
18946 	mutex_exit(SD_MUTEX(un));
18947 
18948 	if (un->un_f_allow_bus_device_reset == TRUE) {
18949 		if (un->un_f_lun_reset_enabled == TRUE) {
18950 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18951 			    "sd_reset_target: RESET_LUN\n");
18952 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18953 		}
18954 		if (rval == 0) {
18955 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18956 			    "sd_reset_target: RESET_TARGET\n");
18957 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18958 		}
18959 	}
18960 
18961 	if (rval == 0) {
18962 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18963 		    "sd_reset_target: RESET_ALL\n");
18964 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
18965 	}
18966 
18967 	mutex_enter(SD_MUTEX(un));
18968 
18969 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
18970 }
18971 
18972 
18973 /*
18974  *    Function: sd_media_change_task
18975  *
18976  * Description: Recovery action for CDROM to become available.
18977  *
18978  *     Context: Executes in a taskq() thread context
18979  */
18980 
18981 static void
18982 sd_media_change_task(void *arg)
18983 {
18984 	struct	scsi_pkt	*pktp = arg;
18985 	struct	sd_lun		*un;
18986 	struct	buf		*bp;
18987 	struct	sd_xbuf		*xp;
18988 	int	err		= 0;
18989 	int	retry_count	= 0;
18990 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
18991 	struct	sd_sense_info	si;
18992 
18993 	ASSERT(pktp != NULL);
18994 	bp = (struct buf *)pktp->pkt_private;
18995 	ASSERT(bp != NULL);
18996 	xp = SD_GET_XBUF(bp);
18997 	ASSERT(xp != NULL);
18998 	un = SD_GET_UN(bp);
18999 	ASSERT(un != NULL);
19000 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19001 	ASSERT(un->un_f_monitor_media_state);
19002 
19003 	si.ssi_severity = SCSI_ERR_INFO;
19004 	si.ssi_pfa_flag = FALSE;
19005 
19006 	/*
19007 	 * When a reset is issued on a CDROM, it takes a long time to
19008 	 * recover. First few attempts to read capacity and other things
19009 	 * related to handling unit attention fail (with a ASC 0x4 and
19010 	 * ASCQ 0x1). In that case we want to do enough retries and we want
19011 	 * to limit the retries in other cases of genuine failures like
19012 	 * no media in drive.
19013 	 */
19014 	while (retry_count++ < retry_limit) {
19015 		if ((err = sd_handle_mchange(un)) == 0) {
19016 			break;
19017 		}
19018 		if (err == EAGAIN) {
19019 			retry_limit = SD_UNIT_ATTENTION_RETRY;
19020 		}
19021 		/* Sleep for 0.5 sec. & try again */
19022 		delay(drv_usectohz(500000));
19023 	}
19024 
19025 	/*
19026 	 * Dispatch (retry or fail) the original command here,
19027 	 * along with appropriate console messages....
19028 	 *
19029 	 * Must grab the mutex before calling sd_retry_command,
19030 	 * sd_print_sense_msg and sd_return_failed_command.
19031 	 */
19032 	mutex_enter(SD_MUTEX(un));
19033 	if (err != SD_CMD_SUCCESS) {
19034 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
19035 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
19036 		si.ssi_severity = SCSI_ERR_FATAL;
19037 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
19038 		sd_return_failed_command(un, bp, EIO);
19039 	} else {
19040 		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
19041 		    &si, EIO, (clock_t)0, NULL);
19042 	}
19043 	mutex_exit(SD_MUTEX(un));
19044 }
19045 
19046 
19047 
19048 /*
19049  *    Function: sd_handle_mchange
19050  *
19051  * Description: Perform geometry validation & other recovery when CDROM
19052  *		has been removed from drive.
19053  *
19054  * Return Code: 0 for success
19055  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
19056  *		sd_send_scsi_READ_CAPACITY()
19057  *
19058  *     Context: Executes in a taskq() thread context
19059  */
19060 
19061 static int
19062 sd_handle_mchange(struct sd_lun *un)
19063 {
19064 	uint64_t	capacity;
19065 	uint32_t	lbasize;
19066 	int		rval;
19067 
19068 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19069 	ASSERT(un->un_f_monitor_media_state);
19070 
19071 	if ((rval = sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
19072 	    SD_PATH_DIRECT_PRIORITY)) != 0) {
19073 		return (rval);
19074 	}
19075 
19076 	mutex_enter(SD_MUTEX(un));
19077 	sd_update_block_info(un, lbasize, capacity);
19078 
19079 	if (un->un_errstats != NULL) {
19080 		struct	sd_errstats *stp =
19081 		    (struct sd_errstats *)un->un_errstats->ks_data;
19082 		stp->sd_capacity.value.ui64 = (uint64_t)
19083 		    ((uint64_t)un->un_blockcount *
19084 		    (uint64_t)un->un_tgt_blocksize);
19085 	}
19086 
19087 	/*
19088 	 * Note: Maybe let the strategy/partitioning chain worry about getting
19089 	 * valid geometry.
19090 	 */
19091 	un->un_f_geometry_is_valid = FALSE;
19092 	(void) sd_validate_geometry(un, SD_PATH_DIRECT_PRIORITY);
19093 	if (un->un_f_geometry_is_valid == FALSE) {
19094 		mutex_exit(SD_MUTEX(un));
19095 		return (EIO);
19096 	}
19097 
19098 	mutex_exit(SD_MUTEX(un));
19099 
19100 	/*
19101 	 * Try to lock the door
19102 	 */
19103 	return (sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
19104 	    SD_PATH_DIRECT_PRIORITY));
19105 }
19106 
19107 
19108 /*
19109  *    Function: sd_send_scsi_DOORLOCK
19110  *
19111  * Description: Issue the scsi DOOR LOCK command
19112  *
19113  *   Arguments: un    - pointer to driver soft state (unit) structure for
19114  *			this target.
19115  *		flag  - SD_REMOVAL_ALLOW
19116  *			SD_REMOVAL_PREVENT
19117  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19118  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19119  *			to use the USCSI "direct" chain and bypass the normal
19120  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19121  *			command is issued as part of an error recovery action.
19122  *
19123  * Return Code: 0   - Success
19124  *		errno return code from sd_send_scsi_cmd()
19125  *
19126  *     Context: Can sleep.
19127  */
19128 
19129 static int
19130 sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag)
19131 {
19132 	union scsi_cdb		cdb;
19133 	struct uscsi_cmd	ucmd_buf;
19134 	struct scsi_extended_sense	sense_buf;
19135 	int			status;
19136 
19137 	ASSERT(un != NULL);
19138 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19139 
19140 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
19141 
19142 	/* already determined doorlock is not supported, fake success */
19143 	if (un->un_f_doorlock_supported == FALSE) {
19144 		return (0);
19145 	}
19146 
19147 	bzero(&cdb, sizeof (cdb));
19148 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19149 
19150 	cdb.scc_cmd = SCMD_DOORLOCK;
19151 	cdb.cdb_opaque[4] = (uchar_t)flag;
19152 
19153 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19154 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19155 	ucmd_buf.uscsi_bufaddr	= NULL;
19156 	ucmd_buf.uscsi_buflen	= 0;
19157 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19158 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19159 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19160 	ucmd_buf.uscsi_timeout	= 15;
19161 
19162 	SD_TRACE(SD_LOG_IO, un,
19163 	    "sd_send_scsi_DOORLOCK: returning sd_send_scsi_cmd()\n");
19164 
19165 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19166 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19167 
19168 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
19169 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19170 	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
19171 		/* fake success and skip subsequent doorlock commands */
19172 		un->un_f_doorlock_supported = FALSE;
19173 		return (0);
19174 	}
19175 
19176 	return (status);
19177 }
19178 
19179 /*
19180  *    Function: sd_send_scsi_READ_CAPACITY
19181  *
19182  * Description: This routine uses the scsi READ CAPACITY command to determine
19183  *		the device capacity in number of blocks and the device native
19184  *		block size. If this function returns a failure, then the
19185  *		values in *capp and *lbap are undefined.  If the capacity
19186  *		returned is 0xffffffff then the lun is too large for a
19187  *		normal READ CAPACITY command and the results of a
19188  *		READ CAPACITY 16 will be used instead.
19189  *
19190  *   Arguments: un   - ptr to soft state struct for the target
19191  *		capp - ptr to unsigned 64-bit variable to receive the
19192  *			capacity value from the command.
19193  *		lbap - ptr to unsigned 32-bit varaible to receive the
19194  *			block size value from the command
19195  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19196  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19197  *			to use the USCSI "direct" chain and bypass the normal
19198  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19199  *			command is issued as part of an error recovery action.
19200  *
19201  * Return Code: 0   - Success
19202  *		EIO - IO error
19203  *		EACCES - Reservation conflict detected
19204  *		EAGAIN - Device is becoming ready
19205  *		errno return code from sd_send_scsi_cmd()
19206  *
19207  *     Context: Can sleep.  Blocks until command completes.
19208  */
19209 
19210 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
19211 
19212 static int
19213 sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp, uint32_t *lbap,
19214 	int path_flag)
19215 {
19216 	struct	scsi_extended_sense	sense_buf;
19217 	struct	uscsi_cmd	ucmd_buf;
19218 	union	scsi_cdb	cdb;
19219 	uint32_t		*capacity_buf;
19220 	uint64_t		capacity;
19221 	uint32_t		lbasize;
19222 	int			status;
19223 
19224 	ASSERT(un != NULL);
19225 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19226 	ASSERT(capp != NULL);
19227 	ASSERT(lbap != NULL);
19228 
19229 	SD_TRACE(SD_LOG_IO, un,
19230 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19231 
19232 	/*
19233 	 * First send a READ_CAPACITY command to the target.
19234 	 * (This command is mandatory under SCSI-2.)
19235 	 *
19236 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
19237 	 * Medium Indicator bit is cleared.  The address field must be
19238 	 * zero if the PMI bit is zero.
19239 	 */
19240 	bzero(&cdb, sizeof (cdb));
19241 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19242 
19243 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
19244 
19245 	cdb.scc_cmd = SCMD_READ_CAPACITY;
19246 
19247 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19248 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19249 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
19250 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
19251 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19252 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19253 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19254 	ucmd_buf.uscsi_timeout	= 60;
19255 
19256 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19257 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19258 
19259 	switch (status) {
19260 	case 0:
19261 		/* Return failure if we did not get valid capacity data. */
19262 		if (ucmd_buf.uscsi_resid != 0) {
19263 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19264 			return (EIO);
19265 		}
19266 
19267 		/*
19268 		 * Read capacity and block size from the READ CAPACITY 10 data.
19269 		 * This data may be adjusted later due to device specific
19270 		 * issues.
19271 		 *
19272 		 * According to the SCSI spec, the READ CAPACITY 10
19273 		 * command returns the following:
19274 		 *
19275 		 *  bytes 0-3: Maximum logical block address available.
19276 		 *		(MSB in byte:0 & LSB in byte:3)
19277 		 *
19278 		 *  bytes 4-7: Block length in bytes
19279 		 *		(MSB in byte:4 & LSB in byte:7)
19280 		 *
19281 		 */
19282 		capacity = BE_32(capacity_buf[0]);
19283 		lbasize = BE_32(capacity_buf[1]);
19284 
19285 		/*
19286 		 * Done with capacity_buf
19287 		 */
19288 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19289 
19290 		/*
19291 		 * if the reported capacity is set to all 0xf's, then
19292 		 * this disk is too large and requires SBC-2 commands.
19293 		 * Reissue the request using READ CAPACITY 16.
19294 		 */
19295 		if (capacity == 0xffffffff) {
19296 			status = sd_send_scsi_READ_CAPACITY_16(un, &capacity,
19297 			    &lbasize, path_flag);
19298 			if (status != 0) {
19299 				return (status);
19300 			}
19301 		}
19302 		break;	/* Success! */
19303 	case EIO:
19304 		switch (ucmd_buf.uscsi_status) {
19305 		case STATUS_RESERVATION_CONFLICT:
19306 			status = EACCES;
19307 			break;
19308 		case STATUS_CHECK:
19309 			/*
19310 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19311 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19312 			 */
19313 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19314 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19315 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19316 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19317 				return (EAGAIN);
19318 			}
19319 			break;
19320 		default:
19321 			break;
19322 		}
19323 		/* FALLTHRU */
19324 	default:
19325 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19326 		return (status);
19327 	}
19328 
19329 	/*
19330 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
19331 	 * (2352 and 0 are common) so for these devices always force the value
19332 	 * to 2048 as required by the ATAPI specs.
19333 	 */
19334 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
19335 		lbasize = 2048;
19336 	}
19337 
19338 	/*
19339 	 * Get the maximum LBA value from the READ CAPACITY data.
19340 	 * Here we assume that the Partial Medium Indicator (PMI) bit
19341 	 * was cleared when issuing the command. This means that the LBA
19342 	 * returned from the device is the LBA of the last logical block
19343 	 * on the logical unit.  The actual logical block count will be
19344 	 * this value plus one.
19345 	 *
19346 	 * Currently the capacity is saved in terms of un->un_sys_blocksize,
19347 	 * so scale the capacity value to reflect this.
19348 	 */
19349 	capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
19350 
19351 #if defined(__i386) || defined(__amd64)
19352 	/*
19353 	 * Refer to comments related to off-by-1 at the
19354 	 * header of this file.
19355 	 * Treat 1TB disk as (1T - 512)B.
19356 	 */
19357 	if (un->un_f_capacity_adjusted == 1)
19358 	    capacity = DK_MAX_BLOCKS;
19359 #endif
19360 
19361 	/*
19362 	 * Copy the values from the READ CAPACITY command into the space
19363 	 * provided by the caller.
19364 	 */
19365 	*capp = capacity;
19366 	*lbap = lbasize;
19367 
19368 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
19369 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19370 
19371 	/*
19372 	 * Both the lbasize and capacity from the device must be nonzero,
19373 	 * otherwise we assume that the values are not valid and return
19374 	 * failure to the caller. (4203735)
19375 	 */
19376 	if ((capacity == 0) || (lbasize == 0)) {
19377 		return (EIO);
19378 	}
19379 
19380 	return (0);
19381 }
19382 
19383 /*
19384  *    Function: sd_send_scsi_READ_CAPACITY_16
19385  *
19386  * Description: This routine uses the scsi READ CAPACITY 16 command to
19387  *		determine the device capacity in number of blocks and the
19388  *		device native block size.  If this function returns a failure,
19389  *		then the values in *capp and *lbap are undefined.
19390  *		This routine should always be called by
19391  *		sd_send_scsi_READ_CAPACITY which will appy any device
19392  *		specific adjustments to capacity and lbasize.
19393  *
19394  *   Arguments: un   - ptr to soft state struct for the target
19395  *		capp - ptr to unsigned 64-bit variable to receive the
19396  *			capacity value from the command.
19397  *		lbap - ptr to unsigned 32-bit varaible to receive the
19398  *			block size value from the command
19399  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19400  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19401  *			to use the USCSI "direct" chain and bypass the normal
19402  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
19403  *			this command is issued as part of an error recovery
19404  *			action.
19405  *
19406  * Return Code: 0   - Success
19407  *		EIO - IO error
19408  *		EACCES - Reservation conflict detected
19409  *		EAGAIN - Device is becoming ready
19410  *		errno return code from sd_send_scsi_cmd()
19411  *
19412  *     Context: Can sleep.  Blocks until command completes.
19413  */
19414 
19415 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
19416 
19417 static int
19418 sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
19419 	uint32_t *lbap, int path_flag)
19420 {
19421 	struct	scsi_extended_sense	sense_buf;
19422 	struct	uscsi_cmd	ucmd_buf;
19423 	union	scsi_cdb	cdb;
19424 	uint64_t		*capacity16_buf;
19425 	uint64_t		capacity;
19426 	uint32_t		lbasize;
19427 	int			status;
19428 
19429 	ASSERT(un != NULL);
19430 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19431 	ASSERT(capp != NULL);
19432 	ASSERT(lbap != NULL);
19433 
19434 	SD_TRACE(SD_LOG_IO, un,
19435 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19436 
19437 	/*
19438 	 * First send a READ_CAPACITY_16 command to the target.
19439 	 *
19440 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
19441 	 * Medium Indicator bit is cleared.  The address field must be
19442 	 * zero if the PMI bit is zero.
19443 	 */
19444 	bzero(&cdb, sizeof (cdb));
19445 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19446 
19447 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
19448 
19449 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19450 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
19451 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
19452 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
19453 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19454 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19455 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19456 	ucmd_buf.uscsi_timeout	= 60;
19457 
19458 	/*
19459 	 * Read Capacity (16) is a Service Action In command.  One
19460 	 * command byte (0x9E) is overloaded for multiple operations,
19461 	 * with the second CDB byte specifying the desired operation
19462 	 */
19463 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
19464 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
19465 
19466 	/*
19467 	 * Fill in allocation length field
19468 	 */
19469 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
19470 
19471 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19472 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19473 
19474 	switch (status) {
19475 	case 0:
19476 		/* Return failure if we did not get valid capacity data. */
19477 		if (ucmd_buf.uscsi_resid > 20) {
19478 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19479 			return (EIO);
19480 		}
19481 
19482 		/*
19483 		 * Read capacity and block size from the READ CAPACITY 10 data.
19484 		 * This data may be adjusted later due to device specific
19485 		 * issues.
19486 		 *
19487 		 * According to the SCSI spec, the READ CAPACITY 10
19488 		 * command returns the following:
19489 		 *
19490 		 *  bytes 0-7: Maximum logical block address available.
19491 		 *		(MSB in byte:0 & LSB in byte:7)
19492 		 *
19493 		 *  bytes 8-11: Block length in bytes
19494 		 *		(MSB in byte:8 & LSB in byte:11)
19495 		 *
19496 		 */
19497 		capacity = BE_64(capacity16_buf[0]);
19498 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
19499 
19500 		/*
19501 		 * Done with capacity16_buf
19502 		 */
19503 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19504 
19505 		/*
19506 		 * if the reported capacity is set to all 0xf's, then
19507 		 * this disk is too large.  This could only happen with
19508 		 * a device that supports LBAs larger than 64 bits which
19509 		 * are not defined by any current T10 standards.
19510 		 */
19511 		if (capacity == 0xffffffffffffffff) {
19512 			return (EIO);
19513 		}
19514 		break;	/* Success! */
19515 	case EIO:
19516 		switch (ucmd_buf.uscsi_status) {
19517 		case STATUS_RESERVATION_CONFLICT:
19518 			status = EACCES;
19519 			break;
19520 		case STATUS_CHECK:
19521 			/*
19522 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19523 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19524 			 */
19525 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19526 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19527 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19528 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19529 				return (EAGAIN);
19530 			}
19531 			break;
19532 		default:
19533 			break;
19534 		}
19535 		/* FALLTHRU */
19536 	default:
19537 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19538 		return (status);
19539 	}
19540 
19541 	*capp = capacity;
19542 	*lbap = lbasize;
19543 
19544 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
19545 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19546 
19547 	return (0);
19548 }
19549 
19550 
19551 /*
19552  *    Function: sd_send_scsi_START_STOP_UNIT
19553  *
19554  * Description: Issue a scsi START STOP UNIT command to the target.
19555  *
19556  *   Arguments: un    - pointer to driver soft state (unit) structure for
19557  *			this target.
19558  *		flag  - SD_TARGET_START
19559  *			SD_TARGET_STOP
19560  *			SD_TARGET_EJECT
19561  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19562  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19563  *			to use the USCSI "direct" chain and bypass the normal
19564  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19565  *			command is issued as part of an error recovery action.
19566  *
19567  * Return Code: 0   - Success
19568  *		EIO - IO error
19569  *		EACCES - Reservation conflict detected
19570  *		ENXIO  - Not Ready, medium not present
19571  *		errno return code from sd_send_scsi_cmd()
19572  *
19573  *     Context: Can sleep.
19574  */
19575 
19576 static int
19577 sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag, int path_flag)
19578 {
19579 	struct	scsi_extended_sense	sense_buf;
19580 	union scsi_cdb		cdb;
19581 	struct uscsi_cmd	ucmd_buf;
19582 	int			status;
19583 
19584 	ASSERT(un != NULL);
19585 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19586 
19587 	SD_TRACE(SD_LOG_IO, un,
19588 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
19589 
19590 	if (un->un_f_check_start_stop &&
19591 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
19592 	    (un->un_f_start_stop_supported != TRUE)) {
19593 		return (0);
19594 	}
19595 
19596 	bzero(&cdb, sizeof (cdb));
19597 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19598 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19599 
19600 	cdb.scc_cmd = SCMD_START_STOP;
19601 	cdb.cdb_opaque[4] = (uchar_t)flag;
19602 
19603 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19604 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19605 	ucmd_buf.uscsi_bufaddr	= NULL;
19606 	ucmd_buf.uscsi_buflen	= 0;
19607 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19608 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19609 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19610 	ucmd_buf.uscsi_timeout	= 200;
19611 
19612 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19613 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19614 
19615 	switch (status) {
19616 	case 0:
19617 		break;	/* Success! */
19618 	case EIO:
19619 		switch (ucmd_buf.uscsi_status) {
19620 		case STATUS_RESERVATION_CONFLICT:
19621 			status = EACCES;
19622 			break;
19623 		case STATUS_CHECK:
19624 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
19625 				switch (scsi_sense_key(
19626 						(uint8_t *)&sense_buf)) {
19627 				case KEY_ILLEGAL_REQUEST:
19628 					status = ENOTSUP;
19629 					break;
19630 				case KEY_NOT_READY:
19631 					if (scsi_sense_asc(
19632 						    (uint8_t *)&sense_buf)
19633 					    == 0x3A) {
19634 						status = ENXIO;
19635 					}
19636 					break;
19637 				default:
19638 					break;
19639 				}
19640 			}
19641 			break;
19642 		default:
19643 			break;
19644 		}
19645 		break;
19646 	default:
19647 		break;
19648 	}
19649 
19650 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
19651 
19652 	return (status);
19653 }
19654 
19655 
19656 /*
19657  *    Function: sd_start_stop_unit_callback
19658  *
19659  * Description: timeout(9F) callback to begin recovery process for a
19660  *		device that has spun down.
19661  *
19662  *   Arguments: arg - pointer to associated softstate struct.
19663  *
19664  *     Context: Executes in a timeout(9F) thread context
19665  */
19666 
19667 static void
19668 sd_start_stop_unit_callback(void *arg)
19669 {
19670 	struct sd_lun	*un = arg;
19671 	ASSERT(un != NULL);
19672 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19673 
19674 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
19675 
19676 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
19677 }
19678 
19679 
19680 /*
19681  *    Function: sd_start_stop_unit_task
19682  *
19683  * Description: Recovery procedure when a drive is spun down.
19684  *
19685  *   Arguments: arg - pointer to associated softstate struct.
19686  *
19687  *     Context: Executes in a taskq() thread context
19688  */
19689 
19690 static void
19691 sd_start_stop_unit_task(void *arg)
19692 {
19693 	struct sd_lun	*un = arg;
19694 
19695 	ASSERT(un != NULL);
19696 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19697 
19698 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
19699 
19700 	/*
19701 	 * Some unformatted drives report not ready error, no need to
19702 	 * restart if format has been initiated.
19703 	 */
19704 	mutex_enter(SD_MUTEX(un));
19705 	if (un->un_f_format_in_progress == TRUE) {
19706 		mutex_exit(SD_MUTEX(un));
19707 		return;
19708 	}
19709 	mutex_exit(SD_MUTEX(un));
19710 
19711 	/*
19712 	 * When a START STOP command is issued from here, it is part of a
19713 	 * failure recovery operation and must be issued before any other
19714 	 * commands, including any pending retries. Thus it must be sent
19715 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
19716 	 * succeeds or not, we will start I/O after the attempt.
19717 	 */
19718 	(void) sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
19719 	    SD_PATH_DIRECT_PRIORITY);
19720 
19721 	/*
19722 	 * The above call blocks until the START_STOP_UNIT command completes.
19723 	 * Now that it has completed, we must re-try the original IO that
19724 	 * received the NOT READY condition in the first place. There are
19725 	 * three possible conditions here:
19726 	 *
19727 	 *  (1) The original IO is on un_retry_bp.
19728 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
19729 	 *	is NULL.
19730 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
19731 	 *	points to some other, unrelated bp.
19732 	 *
19733 	 * For each case, we must call sd_start_cmds() with un_retry_bp
19734 	 * as the argument. If un_retry_bp is NULL, this will initiate
19735 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
19736 	 * then this will process the bp on un_retry_bp. That may or may not
19737 	 * be the original IO, but that does not matter: the important thing
19738 	 * is to keep the IO processing going at this point.
19739 	 *
19740 	 * Note: This is a very specific error recovery sequence associated
19741 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
19742 	 * serialize the I/O with completion of the spin-up.
19743 	 */
19744 	mutex_enter(SD_MUTEX(un));
19745 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19746 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
19747 	    un, un->un_retry_bp);
19748 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
19749 	sd_start_cmds(un, un->un_retry_bp);
19750 	mutex_exit(SD_MUTEX(un));
19751 
19752 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
19753 }
19754 
19755 
19756 /*
19757  *    Function: sd_send_scsi_INQUIRY
19758  *
19759  * Description: Issue the scsi INQUIRY command.
19760  *
19761  *   Arguments: un
19762  *		bufaddr
19763  *		buflen
19764  *		evpd
19765  *		page_code
19766  *		page_length
19767  *
19768  * Return Code: 0   - Success
19769  *		errno return code from sd_send_scsi_cmd()
19770  *
19771  *     Context: Can sleep. Does not return until command is completed.
19772  */
19773 
19774 static int
19775 sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr, size_t buflen,
19776 	uchar_t evpd, uchar_t page_code, size_t *residp)
19777 {
19778 	union scsi_cdb		cdb;
19779 	struct uscsi_cmd	ucmd_buf;
19780 	int			status;
19781 
19782 	ASSERT(un != NULL);
19783 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19784 	ASSERT(bufaddr != NULL);
19785 
19786 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
19787 
19788 	bzero(&cdb, sizeof (cdb));
19789 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19790 	bzero(bufaddr, buflen);
19791 
19792 	cdb.scc_cmd = SCMD_INQUIRY;
19793 	cdb.cdb_opaque[1] = evpd;
19794 	cdb.cdb_opaque[2] = page_code;
19795 	FORMG0COUNT(&cdb, buflen);
19796 
19797 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19798 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19799 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19800 	ucmd_buf.uscsi_buflen	= buflen;
19801 	ucmd_buf.uscsi_rqbuf	= NULL;
19802 	ucmd_buf.uscsi_rqlen	= 0;
19803 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
19804 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
19805 
19806 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19807 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_DIRECT);
19808 
19809 	if ((status == 0) && (residp != NULL)) {
19810 		*residp = ucmd_buf.uscsi_resid;
19811 	}
19812 
19813 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
19814 
19815 	return (status);
19816 }
19817 
19818 
19819 /*
19820  *    Function: sd_send_scsi_TEST_UNIT_READY
19821  *
19822  * Description: Issue the scsi TEST UNIT READY command.
19823  *		This routine can be told to set the flag USCSI_DIAGNOSE to
19824  *		prevent retrying failed commands. Use this when the intent
19825  *		is either to check for device readiness, to clear a Unit
19826  *		Attention, or to clear any outstanding sense data.
19827  *		However under specific conditions the expected behavior
19828  *		is for retries to bring a device ready, so use the flag
19829  *		with caution.
19830  *
19831  *   Arguments: un
19832  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
19833  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
19834  *			0: dont check for media present, do retries on cmd.
19835  *
19836  * Return Code: 0   - Success
19837  *		EIO - IO error
19838  *		EACCES - Reservation conflict detected
19839  *		ENXIO  - Not Ready, medium not present
19840  *		errno return code from sd_send_scsi_cmd()
19841  *
19842  *     Context: Can sleep. Does not return until command is completed.
19843  */
19844 
19845 static int
19846 sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag)
19847 {
19848 	struct	scsi_extended_sense	sense_buf;
19849 	union scsi_cdb		cdb;
19850 	struct uscsi_cmd	ucmd_buf;
19851 	int			status;
19852 
19853 	ASSERT(un != NULL);
19854 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19855 
19856 	SD_TRACE(SD_LOG_IO, un,
19857 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
19858 
19859 	/*
19860 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
19861 	 * timeouts when they receive a TUR and the queue is not empty. Check
19862 	 * the configuration flag set during attach (indicating the drive has
19863 	 * this firmware bug) and un_ncmds_in_transport before issuing the
19864 	 * TUR. If there are
19865 	 * pending commands return success, this is a bit arbitrary but is ok
19866 	 * for non-removables (i.e. the eliteI disks) and non-clustering
19867 	 * configurations.
19868 	 */
19869 	if (un->un_f_cfg_tur_check == TRUE) {
19870 		mutex_enter(SD_MUTEX(un));
19871 		if (un->un_ncmds_in_transport != 0) {
19872 			mutex_exit(SD_MUTEX(un));
19873 			return (0);
19874 		}
19875 		mutex_exit(SD_MUTEX(un));
19876 	}
19877 
19878 	bzero(&cdb, sizeof (cdb));
19879 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19880 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19881 
19882 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
19883 
19884 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19885 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19886 	ucmd_buf.uscsi_bufaddr	= NULL;
19887 	ucmd_buf.uscsi_buflen	= 0;
19888 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19889 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19890 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19891 
19892 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
19893 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
19894 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
19895 	}
19896 	ucmd_buf.uscsi_timeout	= 60;
19897 
19898 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19899 	    UIO_SYSSPACE, UIO_SYSSPACE,
19900 	    ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT : SD_PATH_STANDARD));
19901 
19902 	switch (status) {
19903 	case 0:
19904 		break;	/* Success! */
19905 	case EIO:
19906 		switch (ucmd_buf.uscsi_status) {
19907 		case STATUS_RESERVATION_CONFLICT:
19908 			status = EACCES;
19909 			break;
19910 		case STATUS_CHECK:
19911 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
19912 				break;
19913 			}
19914 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19915 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
19916 				KEY_NOT_READY) &&
19917 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
19918 				status = ENXIO;
19919 			}
19920 			break;
19921 		default:
19922 			break;
19923 		}
19924 		break;
19925 	default:
19926 		break;
19927 	}
19928 
19929 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
19930 
19931 	return (status);
19932 }
19933 
19934 
19935 /*
19936  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
19937  *
19938  * Description: Issue the scsi PERSISTENT RESERVE IN command.
19939  *
19940  *   Arguments: un
19941  *
19942  * Return Code: 0   - Success
19943  *		EACCES
19944  *		ENOTSUP
19945  *		errno return code from sd_send_scsi_cmd()
19946  *
19947  *     Context: Can sleep. Does not return until command is completed.
19948  */
19949 
19950 static int
19951 sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un, uchar_t  usr_cmd,
19952 	uint16_t data_len, uchar_t *data_bufp)
19953 {
19954 	struct scsi_extended_sense	sense_buf;
19955 	union scsi_cdb		cdb;
19956 	struct uscsi_cmd	ucmd_buf;
19957 	int			status;
19958 	int			no_caller_buf = FALSE;
19959 
19960 	ASSERT(un != NULL);
19961 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19962 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
19963 
19964 	SD_TRACE(SD_LOG_IO, un,
19965 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
19966 
19967 	bzero(&cdb, sizeof (cdb));
19968 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19969 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19970 	if (data_bufp == NULL) {
19971 		/* Allocate a default buf if the caller did not give one */
19972 		ASSERT(data_len == 0);
19973 		data_len  = MHIOC_RESV_KEY_SIZE;
19974 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
19975 		no_caller_buf = TRUE;
19976 	}
19977 
19978 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
19979 	cdb.cdb_opaque[1] = usr_cmd;
19980 	FORMG1COUNT(&cdb, data_len);
19981 
19982 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19983 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19984 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
19985 	ucmd_buf.uscsi_buflen	= data_len;
19986 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19987 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19988 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19989 	ucmd_buf.uscsi_timeout	= 60;
19990 
19991 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19992 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
19993 
19994 	switch (status) {
19995 	case 0:
19996 		break;	/* Success! */
19997 	case EIO:
19998 		switch (ucmd_buf.uscsi_status) {
19999 		case STATUS_RESERVATION_CONFLICT:
20000 			status = EACCES;
20001 			break;
20002 		case STATUS_CHECK:
20003 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20004 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20005 				KEY_ILLEGAL_REQUEST)) {
20006 				status = ENOTSUP;
20007 			}
20008 			break;
20009 		default:
20010 			break;
20011 		}
20012 		break;
20013 	default:
20014 		break;
20015 	}
20016 
20017 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
20018 
20019 	if (no_caller_buf == TRUE) {
20020 		kmem_free(data_bufp, data_len);
20021 	}
20022 
20023 	return (status);
20024 }
20025 
20026 
20027 /*
20028  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
20029  *
20030  * Description: This routine is the driver entry point for handling CD-ROM
20031  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
20032  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
20033  *		device.
20034  *
20035  *   Arguments: un  -   Pointer to soft state struct for the target.
20036  *		usr_cmd SCSI-3 reservation facility command (one of
20037  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
20038  *			SD_SCSI3_PREEMPTANDABORT)
20039  *		usr_bufp - user provided pointer register, reserve descriptor or
20040  *			preempt and abort structure (mhioc_register_t,
20041  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
20042  *
20043  * Return Code: 0   - Success
20044  *		EACCES
20045  *		ENOTSUP
20046  *		errno return code from sd_send_scsi_cmd()
20047  *
20048  *     Context: Can sleep. Does not return until command is completed.
20049  */
20050 
20051 static int
20052 sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un, uchar_t usr_cmd,
20053 	uchar_t	*usr_bufp)
20054 {
20055 	struct scsi_extended_sense	sense_buf;
20056 	union scsi_cdb		cdb;
20057 	struct uscsi_cmd	ucmd_buf;
20058 	int			status;
20059 	uchar_t			data_len = sizeof (sd_prout_t);
20060 	sd_prout_t		*prp;
20061 
20062 	ASSERT(un != NULL);
20063 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20064 	ASSERT(data_len == 24);	/* required by scsi spec */
20065 
20066 	SD_TRACE(SD_LOG_IO, un,
20067 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
20068 
20069 	if (usr_bufp == NULL) {
20070 		return (EINVAL);
20071 	}
20072 
20073 	bzero(&cdb, sizeof (cdb));
20074 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20075 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20076 	prp = kmem_zalloc(data_len, KM_SLEEP);
20077 
20078 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
20079 	cdb.cdb_opaque[1] = usr_cmd;
20080 	FORMG1COUNT(&cdb, data_len);
20081 
20082 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20083 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20084 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
20085 	ucmd_buf.uscsi_buflen	= data_len;
20086 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20087 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20088 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20089 	ucmd_buf.uscsi_timeout	= 60;
20090 
20091 	switch (usr_cmd) {
20092 	case SD_SCSI3_REGISTER: {
20093 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
20094 
20095 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20096 		bcopy(ptr->newkey.key, prp->service_key,
20097 		    MHIOC_RESV_KEY_SIZE);
20098 		prp->aptpl = ptr->aptpl;
20099 		break;
20100 	}
20101 	case SD_SCSI3_RESERVE:
20102 	case SD_SCSI3_RELEASE: {
20103 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
20104 
20105 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20106 		prp->scope_address = BE_32(ptr->scope_specific_addr);
20107 		cdb.cdb_opaque[2] = ptr->type;
20108 		break;
20109 	}
20110 	case SD_SCSI3_PREEMPTANDABORT: {
20111 		mhioc_preemptandabort_t *ptr =
20112 		    (mhioc_preemptandabort_t *)usr_bufp;
20113 
20114 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20115 		bcopy(ptr->victim_key.key, prp->service_key,
20116 		    MHIOC_RESV_KEY_SIZE);
20117 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
20118 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
20119 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
20120 		break;
20121 	}
20122 	case SD_SCSI3_REGISTERANDIGNOREKEY:
20123 	{
20124 		mhioc_registerandignorekey_t *ptr;
20125 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
20126 		bcopy(ptr->newkey.key,
20127 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
20128 		prp->aptpl = ptr->aptpl;
20129 		break;
20130 	}
20131 	default:
20132 		ASSERT(FALSE);
20133 		break;
20134 	}
20135 
20136 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20137 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20138 
20139 	switch (status) {
20140 	case 0:
20141 		break;	/* Success! */
20142 	case EIO:
20143 		switch (ucmd_buf.uscsi_status) {
20144 		case STATUS_RESERVATION_CONFLICT:
20145 			status = EACCES;
20146 			break;
20147 		case STATUS_CHECK:
20148 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20149 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20150 				KEY_ILLEGAL_REQUEST)) {
20151 				status = ENOTSUP;
20152 			}
20153 			break;
20154 		default:
20155 			break;
20156 		}
20157 		break;
20158 	default:
20159 		break;
20160 	}
20161 
20162 	kmem_free(prp, data_len);
20163 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
20164 	return (status);
20165 }
20166 
20167 
20168 /*
20169  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
20170  *
20171  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
20172  *
20173  *   Arguments: un - pointer to the target's soft state struct
20174  *
20175  * Return Code: 0 - success
20176  *		errno-type error code
20177  *
20178  *     Context: kernel thread context only.
20179  */
20180 
20181 static int
20182 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
20183 {
20184 	struct sd_uscsi_info	*uip;
20185 	struct uscsi_cmd	*uscmd;
20186 	union scsi_cdb		*cdb;
20187 	struct buf		*bp;
20188 	int			rval = 0;
20189 
20190 	SD_TRACE(SD_LOG_IO, un,
20191 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
20192 
20193 	ASSERT(un != NULL);
20194 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20195 
20196 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
20197 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
20198 
20199 	/*
20200 	 * First get some memory for the uscsi_cmd struct and cdb
20201 	 * and initialize for SYNCHRONIZE_CACHE cmd.
20202 	 */
20203 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
20204 	uscmd->uscsi_cdblen = CDB_GROUP1;
20205 	uscmd->uscsi_cdb = (caddr_t)cdb;
20206 	uscmd->uscsi_bufaddr = NULL;
20207 	uscmd->uscsi_buflen = 0;
20208 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
20209 	uscmd->uscsi_rqlen = SENSE_LENGTH;
20210 	uscmd->uscsi_rqresid = SENSE_LENGTH;
20211 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
20212 	uscmd->uscsi_timeout = sd_io_time;
20213 
20214 	/*
20215 	 * Allocate an sd_uscsi_info struct and fill it with the info
20216 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
20217 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
20218 	 * since we allocate the buf here in this function, we do not
20219 	 * need to preserve the prior contents of b_private.
20220 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
20221 	 */
20222 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
20223 	uip->ui_flags = SD_PATH_DIRECT;
20224 	uip->ui_cmdp  = uscmd;
20225 
20226 	bp = getrbuf(KM_SLEEP);
20227 	bp->b_private = uip;
20228 
20229 	/*
20230 	 * Setup buffer to carry uscsi request.
20231 	 */
20232 	bp->b_flags  = B_BUSY;
20233 	bp->b_bcount = 0;
20234 	bp->b_blkno  = 0;
20235 
20236 	if (dkc != NULL) {
20237 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
20238 		uip->ui_dkc = *dkc;
20239 	}
20240 
20241 	bp->b_edev = SD_GET_DEV(un);
20242 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
20243 
20244 	(void) sd_uscsi_strategy(bp);
20245 
20246 	/*
20247 	 * If synchronous request, wait for completion
20248 	 * If async just return and let b_iodone callback
20249 	 * cleanup.
20250 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
20251 	 * but it was also incremented in sd_uscsi_strategy(), so
20252 	 * we should be ok.
20253 	 */
20254 	if (dkc == NULL) {
20255 		(void) biowait(bp);
20256 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
20257 	}
20258 
20259 	return (rval);
20260 }
20261 
20262 
20263 static int
20264 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
20265 {
20266 	struct sd_uscsi_info *uip;
20267 	struct uscsi_cmd *uscmd;
20268 	uint8_t *sense_buf;
20269 	struct sd_lun *un;
20270 	int status;
20271 
20272 	uip = (struct sd_uscsi_info *)(bp->b_private);
20273 	ASSERT(uip != NULL);
20274 
20275 	uscmd = uip->ui_cmdp;
20276 	ASSERT(uscmd != NULL);
20277 
20278 	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
20279 	ASSERT(sense_buf != NULL);
20280 
20281 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
20282 	ASSERT(un != NULL);
20283 
20284 	status = geterror(bp);
20285 	switch (status) {
20286 	case 0:
20287 		break;	/* Success! */
20288 	case EIO:
20289 		switch (uscmd->uscsi_status) {
20290 		case STATUS_RESERVATION_CONFLICT:
20291 			/* Ignore reservation conflict */
20292 			status = 0;
20293 			goto done;
20294 
20295 		case STATUS_CHECK:
20296 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
20297 			    (scsi_sense_key(sense_buf) ==
20298 				KEY_ILLEGAL_REQUEST)) {
20299 				/* Ignore Illegal Request error */
20300 				mutex_enter(SD_MUTEX(un));
20301 				un->un_f_sync_cache_supported = FALSE;
20302 				mutex_exit(SD_MUTEX(un));
20303 				status = ENOTSUP;
20304 				goto done;
20305 			}
20306 			break;
20307 		default:
20308 			break;
20309 		}
20310 		/* FALLTHRU */
20311 	default:
20312 		/*
20313 		 * Don't log an error message if this device
20314 		 * has removable media.
20315 		 */
20316 		if (!un->un_f_has_removable_media) {
20317 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
20318 			    "SYNCHRONIZE CACHE command failed (%d)\n", status);
20319 		}
20320 		break;
20321 	}
20322 
20323 done:
20324 	if (uip->ui_dkc.dkc_callback != NULL) {
20325 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
20326 	}
20327 
20328 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
20329 	freerbuf(bp);
20330 	kmem_free(uip, sizeof (struct sd_uscsi_info));
20331 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
20332 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
20333 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
20334 
20335 	return (status);
20336 }
20337 
20338 
20339 /*
20340  *    Function: sd_send_scsi_GET_CONFIGURATION
20341  *
20342  * Description: Issues the get configuration command to the device.
20343  *		Called from sd_check_for_writable_cd & sd_get_media_info
20344  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
20345  *   Arguments: un
20346  *		ucmdbuf
20347  *		rqbuf
20348  *		rqbuflen
20349  *		bufaddr
20350  *		buflen
20351  *
20352  * Return Code: 0   - Success
20353  *		errno return code from sd_send_scsi_cmd()
20354  *
20355  *     Context: Can sleep. Does not return until command is completed.
20356  *
20357  */
20358 
20359 static int
20360 sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un, struct uscsi_cmd *ucmdbuf,
20361 	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen)
20362 {
20363 	char	cdb[CDB_GROUP1];
20364 	int	status;
20365 
20366 	ASSERT(un != NULL);
20367 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20368 	ASSERT(bufaddr != NULL);
20369 	ASSERT(ucmdbuf != NULL);
20370 	ASSERT(rqbuf != NULL);
20371 
20372 	SD_TRACE(SD_LOG_IO, un,
20373 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
20374 
20375 	bzero(cdb, sizeof (cdb));
20376 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20377 	bzero(rqbuf, rqbuflen);
20378 	bzero(bufaddr, buflen);
20379 
20380 	/*
20381 	 * Set up cdb field for the get configuration command.
20382 	 */
20383 	cdb[0] = SCMD_GET_CONFIGURATION;
20384 	cdb[1] = 0x02;  /* Requested Type */
20385 	cdb[8] = SD_PROFILE_HEADER_LEN;
20386 	ucmdbuf->uscsi_cdb = cdb;
20387 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20388 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20389 	ucmdbuf->uscsi_buflen = buflen;
20390 	ucmdbuf->uscsi_timeout = sd_io_time;
20391 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20392 	ucmdbuf->uscsi_rqlen = rqbuflen;
20393 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20394 
20395 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20396 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20397 
20398 	switch (status) {
20399 	case 0:
20400 		break;  /* Success! */
20401 	case EIO:
20402 		switch (ucmdbuf->uscsi_status) {
20403 		case STATUS_RESERVATION_CONFLICT:
20404 			status = EACCES;
20405 			break;
20406 		default:
20407 			break;
20408 		}
20409 		break;
20410 	default:
20411 		break;
20412 	}
20413 
20414 	if (status == 0) {
20415 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20416 		    "sd_send_scsi_GET_CONFIGURATION: data",
20417 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20418 	}
20419 
20420 	SD_TRACE(SD_LOG_IO, un,
20421 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
20422 
20423 	return (status);
20424 }
20425 
20426 /*
20427  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
20428  *
20429  * Description: Issues the get configuration command to the device to
20430  *              retrieve a specfic feature. Called from
20431  *		sd_check_for_writable_cd & sd_set_mmc_caps.
20432  *   Arguments: un
20433  *              ucmdbuf
20434  *              rqbuf
20435  *              rqbuflen
20436  *              bufaddr
20437  *              buflen
20438  *		feature
20439  *
20440  * Return Code: 0   - Success
20441  *              errno return code from sd_send_scsi_cmd()
20442  *
20443  *     Context: Can sleep. Does not return until command is completed.
20444  *
20445  */
20446 static int
20447 sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
20448 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
20449 	uchar_t *bufaddr, uint_t buflen, char feature)
20450 {
20451 	char    cdb[CDB_GROUP1];
20452 	int	status;
20453 
20454 	ASSERT(un != NULL);
20455 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20456 	ASSERT(bufaddr != NULL);
20457 	ASSERT(ucmdbuf != NULL);
20458 	ASSERT(rqbuf != NULL);
20459 
20460 	SD_TRACE(SD_LOG_IO, un,
20461 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
20462 
20463 	bzero(cdb, sizeof (cdb));
20464 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20465 	bzero(rqbuf, rqbuflen);
20466 	bzero(bufaddr, buflen);
20467 
20468 	/*
20469 	 * Set up cdb field for the get configuration command.
20470 	 */
20471 	cdb[0] = SCMD_GET_CONFIGURATION;
20472 	cdb[1] = 0x02;  /* Requested Type */
20473 	cdb[3] = feature;
20474 	cdb[8] = buflen;
20475 	ucmdbuf->uscsi_cdb = cdb;
20476 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20477 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20478 	ucmdbuf->uscsi_buflen = buflen;
20479 	ucmdbuf->uscsi_timeout = sd_io_time;
20480 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20481 	ucmdbuf->uscsi_rqlen = rqbuflen;
20482 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20483 
20484 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20485 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20486 
20487 	switch (status) {
20488 	case 0:
20489 		break;  /* Success! */
20490 	case EIO:
20491 		switch (ucmdbuf->uscsi_status) {
20492 		case STATUS_RESERVATION_CONFLICT:
20493 			status = EACCES;
20494 			break;
20495 		default:
20496 			break;
20497 		}
20498 		break;
20499 	default:
20500 		break;
20501 	}
20502 
20503 	if (status == 0) {
20504 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20505 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
20506 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20507 	}
20508 
20509 	SD_TRACE(SD_LOG_IO, un,
20510 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
20511 
20512 	return (status);
20513 }
20514 
20515 
20516 /*
20517  *    Function: sd_send_scsi_MODE_SENSE
20518  *
20519  * Description: Utility function for issuing a scsi MODE SENSE command.
20520  *		Note: This routine uses a consistent implementation for Group0,
20521  *		Group1, and Group2 commands across all platforms. ATAPI devices
20522  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20523  *
20524  *   Arguments: un - pointer to the softstate struct for the target.
20525  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20526  *			  CDB_GROUP[1|2] (10 byte).
20527  *		bufaddr - buffer for page data retrieved from the target.
20528  *		buflen - size of page to be retrieved.
20529  *		page_code - page code of data to be retrieved from the target.
20530  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20531  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20532  *			to use the USCSI "direct" chain and bypass the normal
20533  *			command waitq.
20534  *
20535  * Return Code: 0   - Success
20536  *		errno return code from sd_send_scsi_cmd()
20537  *
20538  *     Context: Can sleep. Does not return until command is completed.
20539  */
20540 
20541 static int
20542 sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20543 	size_t buflen,  uchar_t page_code, int path_flag)
20544 {
20545 	struct	scsi_extended_sense	sense_buf;
20546 	union scsi_cdb		cdb;
20547 	struct uscsi_cmd	ucmd_buf;
20548 	int			status;
20549 	int			headlen;
20550 
20551 	ASSERT(un != NULL);
20552 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20553 	ASSERT(bufaddr != NULL);
20554 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20555 	    (cdbsize == CDB_GROUP2));
20556 
20557 	SD_TRACE(SD_LOG_IO, un,
20558 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
20559 
20560 	bzero(&cdb, sizeof (cdb));
20561 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20562 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20563 	bzero(bufaddr, buflen);
20564 
20565 	if (cdbsize == CDB_GROUP0) {
20566 		cdb.scc_cmd = SCMD_MODE_SENSE;
20567 		cdb.cdb_opaque[2] = page_code;
20568 		FORMG0COUNT(&cdb, buflen);
20569 		headlen = MODE_HEADER_LENGTH;
20570 	} else {
20571 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
20572 		cdb.cdb_opaque[2] = page_code;
20573 		FORMG1COUNT(&cdb, buflen);
20574 		headlen = MODE_HEADER_LENGTH_GRP2;
20575 	}
20576 
20577 	ASSERT(headlen <= buflen);
20578 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20579 
20580 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20581 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20582 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20583 	ucmd_buf.uscsi_buflen	= buflen;
20584 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20585 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20586 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20587 	ucmd_buf.uscsi_timeout	= 60;
20588 
20589 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20590 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20591 
20592 	switch (status) {
20593 	case 0:
20594 		/*
20595 		 * sr_check_wp() uses 0x3f page code and check the header of
20596 		 * mode page to determine if target device is write-protected.
20597 		 * But some USB devices return 0 bytes for 0x3f page code. For
20598 		 * this case, make sure that mode page header is returned at
20599 		 * least.
20600 		 */
20601 		if (buflen - ucmd_buf.uscsi_resid <  headlen)
20602 			status = EIO;
20603 		break;	/* Success! */
20604 	case EIO:
20605 		switch (ucmd_buf.uscsi_status) {
20606 		case STATUS_RESERVATION_CONFLICT:
20607 			status = EACCES;
20608 			break;
20609 		default:
20610 			break;
20611 		}
20612 		break;
20613 	default:
20614 		break;
20615 	}
20616 
20617 	if (status == 0) {
20618 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
20619 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20620 	}
20621 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
20622 
20623 	return (status);
20624 }
20625 
20626 
20627 /*
20628  *    Function: sd_send_scsi_MODE_SELECT
20629  *
20630  * Description: Utility function for issuing a scsi MODE SELECT command.
20631  *		Note: This routine uses a consistent implementation for Group0,
20632  *		Group1, and Group2 commands across all platforms. ATAPI devices
20633  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20634  *
20635  *   Arguments: un - pointer to the softstate struct for the target.
20636  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20637  *			  CDB_GROUP[1|2] (10 byte).
20638  *		bufaddr - buffer for page data retrieved from the target.
20639  *		buflen - size of page to be retrieved.
20640  *		save_page - boolean to determin if SP bit should be set.
20641  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20642  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20643  *			to use the USCSI "direct" chain and bypass the normal
20644  *			command waitq.
20645  *
20646  * Return Code: 0   - Success
20647  *		errno return code from sd_send_scsi_cmd()
20648  *
20649  *     Context: Can sleep. Does not return until command is completed.
20650  */
20651 
20652 static int
20653 sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20654 	size_t buflen,  uchar_t save_page, int path_flag)
20655 {
20656 	struct	scsi_extended_sense	sense_buf;
20657 	union scsi_cdb		cdb;
20658 	struct uscsi_cmd	ucmd_buf;
20659 	int			status;
20660 
20661 	ASSERT(un != NULL);
20662 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20663 	ASSERT(bufaddr != NULL);
20664 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20665 	    (cdbsize == CDB_GROUP2));
20666 
20667 	SD_TRACE(SD_LOG_IO, un,
20668 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
20669 
20670 	bzero(&cdb, sizeof (cdb));
20671 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20672 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20673 
20674 	/* Set the PF bit for many third party drives */
20675 	cdb.cdb_opaque[1] = 0x10;
20676 
20677 	/* Set the savepage(SP) bit if given */
20678 	if (save_page == SD_SAVE_PAGE) {
20679 		cdb.cdb_opaque[1] |= 0x01;
20680 	}
20681 
20682 	if (cdbsize == CDB_GROUP0) {
20683 		cdb.scc_cmd = SCMD_MODE_SELECT;
20684 		FORMG0COUNT(&cdb, buflen);
20685 	} else {
20686 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
20687 		FORMG1COUNT(&cdb, buflen);
20688 	}
20689 
20690 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20691 
20692 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20693 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20694 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20695 	ucmd_buf.uscsi_buflen	= buflen;
20696 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20697 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20698 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20699 	ucmd_buf.uscsi_timeout	= 60;
20700 
20701 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20702 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20703 
20704 	switch (status) {
20705 	case 0:
20706 		break;	/* Success! */
20707 	case EIO:
20708 		switch (ucmd_buf.uscsi_status) {
20709 		case STATUS_RESERVATION_CONFLICT:
20710 			status = EACCES;
20711 			break;
20712 		default:
20713 			break;
20714 		}
20715 		break;
20716 	default:
20717 		break;
20718 	}
20719 
20720 	if (status == 0) {
20721 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
20722 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20723 	}
20724 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
20725 
20726 	return (status);
20727 }
20728 
20729 
20730 /*
20731  *    Function: sd_send_scsi_RDWR
20732  *
20733  * Description: Issue a scsi READ or WRITE command with the given parameters.
20734  *
20735  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20736  *		cmd:	 SCMD_READ or SCMD_WRITE
20737  *		bufaddr: Address of caller's buffer to receive the RDWR data
20738  *		buflen:  Length of caller's buffer receive the RDWR data.
20739  *		start_block: Block number for the start of the RDWR operation.
20740  *			 (Assumes target-native block size.)
20741  *		residp:  Pointer to variable to receive the redisual of the
20742  *			 RDWR operation (may be NULL of no residual requested).
20743  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20744  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20745  *			to use the USCSI "direct" chain and bypass the normal
20746  *			command waitq.
20747  *
20748  * Return Code: 0   - Success
20749  *		errno return code from sd_send_scsi_cmd()
20750  *
20751  *     Context: Can sleep. Does not return until command is completed.
20752  */
20753 
20754 static int
20755 sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
20756 	size_t buflen, daddr_t start_block, int path_flag)
20757 {
20758 	struct	scsi_extended_sense	sense_buf;
20759 	union scsi_cdb		cdb;
20760 	struct uscsi_cmd	ucmd_buf;
20761 	uint32_t		block_count;
20762 	int			status;
20763 	int			cdbsize;
20764 	uchar_t			flag;
20765 
20766 	ASSERT(un != NULL);
20767 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20768 	ASSERT(bufaddr != NULL);
20769 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
20770 
20771 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
20772 
20773 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
20774 		return (EINVAL);
20775 	}
20776 
20777 	mutex_enter(SD_MUTEX(un));
20778 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
20779 	mutex_exit(SD_MUTEX(un));
20780 
20781 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
20782 
20783 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
20784 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
20785 	    bufaddr, buflen, start_block, block_count);
20786 
20787 	bzero(&cdb, sizeof (cdb));
20788 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20789 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20790 
20791 	/* Compute CDB size to use */
20792 	if (start_block > 0xffffffff)
20793 		cdbsize = CDB_GROUP4;
20794 	else if ((start_block & 0xFFE00000) ||
20795 	    (un->un_f_cfg_is_atapi == TRUE))
20796 		cdbsize = CDB_GROUP1;
20797 	else
20798 		cdbsize = CDB_GROUP0;
20799 
20800 	switch (cdbsize) {
20801 	case CDB_GROUP0:	/* 6-byte CDBs */
20802 		cdb.scc_cmd = cmd;
20803 		FORMG0ADDR(&cdb, start_block);
20804 		FORMG0COUNT(&cdb, block_count);
20805 		break;
20806 	case CDB_GROUP1:	/* 10-byte CDBs */
20807 		cdb.scc_cmd = cmd | SCMD_GROUP1;
20808 		FORMG1ADDR(&cdb, start_block);
20809 		FORMG1COUNT(&cdb, block_count);
20810 		break;
20811 	case CDB_GROUP4:	/* 16-byte CDBs */
20812 		cdb.scc_cmd = cmd | SCMD_GROUP4;
20813 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
20814 		FORMG4COUNT(&cdb, block_count);
20815 		break;
20816 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
20817 	default:
20818 		/* All others reserved */
20819 		return (EINVAL);
20820 	}
20821 
20822 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
20823 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20824 
20825 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20826 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20827 	ucmd_buf.uscsi_bufaddr	= bufaddr;
20828 	ucmd_buf.uscsi_buflen	= buflen;
20829 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20830 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20831 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
20832 	ucmd_buf.uscsi_timeout	= 60;
20833 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20834 				UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20835 	switch (status) {
20836 	case 0:
20837 		break;	/* Success! */
20838 	case EIO:
20839 		switch (ucmd_buf.uscsi_status) {
20840 		case STATUS_RESERVATION_CONFLICT:
20841 			status = EACCES;
20842 			break;
20843 		default:
20844 			break;
20845 		}
20846 		break;
20847 	default:
20848 		break;
20849 	}
20850 
20851 	if (status == 0) {
20852 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
20853 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20854 	}
20855 
20856 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
20857 
20858 	return (status);
20859 }
20860 
20861 
20862 /*
20863  *    Function: sd_send_scsi_LOG_SENSE
20864  *
20865  * Description: Issue a scsi LOG_SENSE command with the given parameters.
20866  *
20867  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20868  *
20869  * Return Code: 0   - Success
20870  *		errno return code from sd_send_scsi_cmd()
20871  *
20872  *     Context: Can sleep. Does not return until command is completed.
20873  */
20874 
20875 static int
20876 sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr, uint16_t buflen,
20877 	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
20878 	int path_flag)
20879 
20880 {
20881 	struct	scsi_extended_sense	sense_buf;
20882 	union scsi_cdb		cdb;
20883 	struct uscsi_cmd	ucmd_buf;
20884 	int			status;
20885 
20886 	ASSERT(un != NULL);
20887 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20888 
20889 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
20890 
20891 	bzero(&cdb, sizeof (cdb));
20892 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20893 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20894 
20895 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
20896 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
20897 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
20898 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
20899 	FORMG1COUNT(&cdb, buflen);
20900 
20901 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20902 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20903 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20904 	ucmd_buf.uscsi_buflen	= buflen;
20905 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20906 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20907 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20908 	ucmd_buf.uscsi_timeout	= 60;
20909 
20910 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20911 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20912 
20913 	switch (status) {
20914 	case 0:
20915 		break;
20916 	case EIO:
20917 		switch (ucmd_buf.uscsi_status) {
20918 		case STATUS_RESERVATION_CONFLICT:
20919 			status = EACCES;
20920 			break;
20921 		case STATUS_CHECK:
20922 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20923 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20924 				KEY_ILLEGAL_REQUEST) &&
20925 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
20926 				/*
20927 				 * ASC 0x24: INVALID FIELD IN CDB
20928 				 */
20929 				switch (page_code) {
20930 				case START_STOP_CYCLE_PAGE:
20931 					/*
20932 					 * The start stop cycle counter is
20933 					 * implemented as page 0x31 in earlier
20934 					 * generation disks. In new generation
20935 					 * disks the start stop cycle counter is
20936 					 * implemented as page 0xE. To properly
20937 					 * handle this case if an attempt for
20938 					 * log page 0xE is made and fails we
20939 					 * will try again using page 0x31.
20940 					 *
20941 					 * Network storage BU committed to
20942 					 * maintain the page 0x31 for this
20943 					 * purpose and will not have any other
20944 					 * page implemented with page code 0x31
20945 					 * until all disks transition to the
20946 					 * standard page.
20947 					 */
20948 					mutex_enter(SD_MUTEX(un));
20949 					un->un_start_stop_cycle_page =
20950 					    START_STOP_CYCLE_VU_PAGE;
20951 					cdb.cdb_opaque[2] =
20952 					    (char)(page_control << 6) |
20953 					    un->un_start_stop_cycle_page;
20954 					mutex_exit(SD_MUTEX(un));
20955 					status = sd_send_scsi_cmd(
20956 					    SD_GET_DEV(un), &ucmd_buf,
20957 					    UIO_SYSSPACE, UIO_SYSSPACE,
20958 					    UIO_SYSSPACE, path_flag);
20959 
20960 					break;
20961 				case TEMPERATURE_PAGE:
20962 					status = ENOTTY;
20963 					break;
20964 				default:
20965 					break;
20966 				}
20967 			}
20968 			break;
20969 		default:
20970 			break;
20971 		}
20972 		break;
20973 	default:
20974 		break;
20975 	}
20976 
20977 	if (status == 0) {
20978 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
20979 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20980 	}
20981 
20982 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
20983 
20984 	return (status);
20985 }
20986 
20987 
20988 /*
20989  *    Function: sdioctl
20990  *
20991  * Description: Driver's ioctl(9e) entry point function.
20992  *
20993  *   Arguments: dev     - device number
20994  *		cmd     - ioctl operation to be performed
20995  *		arg     - user argument, contains data to be set or reference
20996  *			  parameter for get
20997  *		flag    - bit flag, indicating open settings, 32/64 bit type
20998  *		cred_p  - user credential pointer
20999  *		rval_p  - calling process return value (OPT)
21000  *
21001  * Return Code: EINVAL
21002  *		ENOTTY
21003  *		ENXIO
21004  *		EIO
21005  *		EFAULT
21006  *		ENOTSUP
21007  *		EPERM
21008  *
21009  *     Context: Called from the device switch at normal priority.
21010  */
21011 
21012 static int
21013 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
21014 {
21015 	struct sd_lun	*un = NULL;
21016 	int		geom_validated = FALSE;
21017 	int		err = 0;
21018 	int		i = 0;
21019 	cred_t		*cr;
21020 
21021 	/*
21022 	 * All device accesses go thru sdstrategy where we check on suspend
21023 	 * status
21024 	 */
21025 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21026 		return (ENXIO);
21027 	}
21028 
21029 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21030 
21031 	/*
21032 	 * Moved this wait from sd_uscsi_strategy to here for
21033 	 * reasons of deadlock prevention. Internal driver commands,
21034 	 * specifically those to change a devices power level, result
21035 	 * in a call to sd_uscsi_strategy.
21036 	 */
21037 	mutex_enter(SD_MUTEX(un));
21038 	while ((un->un_state == SD_STATE_SUSPENDED) ||
21039 	    (un->un_state == SD_STATE_PM_CHANGING)) {
21040 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
21041 	}
21042 	/*
21043 	 * Twiddling the counter here protects commands from now
21044 	 * through to the top of sd_uscsi_strategy. Without the
21045 	 * counter inc. a power down, for example, could get in
21046 	 * after the above check for state is made and before
21047 	 * execution gets to the top of sd_uscsi_strategy.
21048 	 * That would cause problems.
21049 	 */
21050 	un->un_ncmds_in_driver++;
21051 
21052 	if ((un->un_f_geometry_is_valid == FALSE) &&
21053 	    (flag & (FNDELAY | FNONBLOCK))) {
21054 		switch (cmd) {
21055 		case CDROMPAUSE:
21056 		case CDROMRESUME:
21057 		case CDROMPLAYMSF:
21058 		case CDROMPLAYTRKIND:
21059 		case CDROMREADTOCHDR:
21060 		case CDROMREADTOCENTRY:
21061 		case CDROMSTOP:
21062 		case CDROMSTART:
21063 		case CDROMVOLCTRL:
21064 		case CDROMSUBCHNL:
21065 		case CDROMREADMODE2:
21066 		case CDROMREADMODE1:
21067 		case CDROMREADOFFSET:
21068 		case CDROMSBLKMODE:
21069 		case CDROMGBLKMODE:
21070 		case CDROMGDRVSPEED:
21071 		case CDROMSDRVSPEED:
21072 		case CDROMCDDA:
21073 		case CDROMCDXA:
21074 		case CDROMSUBCODE:
21075 			if (!ISCD(un)) {
21076 				un->un_ncmds_in_driver--;
21077 				ASSERT(un->un_ncmds_in_driver >= 0);
21078 				mutex_exit(SD_MUTEX(un));
21079 				return (ENOTTY);
21080 			}
21081 			break;
21082 		case FDEJECT:
21083 		case DKIOCEJECT:
21084 		case CDROMEJECT:
21085 			if (!un->un_f_eject_media_supported) {
21086 				un->un_ncmds_in_driver--;
21087 				ASSERT(un->un_ncmds_in_driver >= 0);
21088 				mutex_exit(SD_MUTEX(un));
21089 				return (ENOTTY);
21090 			}
21091 			break;
21092 		case DKIOCSVTOC:
21093 		case DKIOCSETEFI:
21094 		case DKIOCSMBOOT:
21095 		case DKIOCFLUSHWRITECACHE:
21096 			mutex_exit(SD_MUTEX(un));
21097 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
21098 			if (err != 0) {
21099 				mutex_enter(SD_MUTEX(un));
21100 				un->un_ncmds_in_driver--;
21101 				ASSERT(un->un_ncmds_in_driver >= 0);
21102 				mutex_exit(SD_MUTEX(un));
21103 				return (EIO);
21104 			}
21105 			mutex_enter(SD_MUTEX(un));
21106 			/* FALLTHROUGH */
21107 		case DKIOCREMOVABLE:
21108 		case DKIOCHOTPLUGGABLE:
21109 		case DKIOCINFO:
21110 		case DKIOCGMEDIAINFO:
21111 		case MHIOCENFAILFAST:
21112 		case MHIOCSTATUS:
21113 		case MHIOCTKOWN:
21114 		case MHIOCRELEASE:
21115 		case MHIOCGRP_INKEYS:
21116 		case MHIOCGRP_INRESV:
21117 		case MHIOCGRP_REGISTER:
21118 		case MHIOCGRP_RESERVE:
21119 		case MHIOCGRP_PREEMPTANDABORT:
21120 		case MHIOCGRP_REGISTERANDIGNOREKEY:
21121 		case CDROMCLOSETRAY:
21122 		case USCSICMD:
21123 			goto skip_ready_valid;
21124 		default:
21125 			break;
21126 		}
21127 
21128 		mutex_exit(SD_MUTEX(un));
21129 		err = sd_ready_and_valid(un);
21130 		mutex_enter(SD_MUTEX(un));
21131 		if (err == SD_READY_NOT_VALID) {
21132 			switch (cmd) {
21133 			case DKIOCGAPART:
21134 			case DKIOCGGEOM:
21135 			case DKIOCSGEOM:
21136 			case DKIOCGVTOC:
21137 			case DKIOCSVTOC:
21138 			case DKIOCSAPART:
21139 			case DKIOCG_PHYGEOM:
21140 			case DKIOCG_VIRTGEOM:
21141 				err = ENOTSUP;
21142 				un->un_ncmds_in_driver--;
21143 				ASSERT(un->un_ncmds_in_driver >= 0);
21144 				mutex_exit(SD_MUTEX(un));
21145 				return (err);
21146 			}
21147 		}
21148 		if (err != SD_READY_VALID) {
21149 			switch (cmd) {
21150 			case DKIOCSTATE:
21151 			case CDROMGDRVSPEED:
21152 			case CDROMSDRVSPEED:
21153 			case FDEJECT:	/* for eject command */
21154 			case DKIOCEJECT:
21155 			case CDROMEJECT:
21156 			case DKIOCGETEFI:
21157 			case DKIOCSGEOM:
21158 			case DKIOCREMOVABLE:
21159 			case DKIOCHOTPLUGGABLE:
21160 			case DKIOCSAPART:
21161 			case DKIOCSETEFI:
21162 				break;
21163 			default:
21164 				if (un->un_f_has_removable_media) {
21165 					err = ENXIO;
21166 				} else {
21167 				/* Do not map SD_RESERVED_BY_OTHERS to EIO */
21168 					if (err == SD_RESERVED_BY_OTHERS) {
21169 						err = EACCES;
21170 					} else {
21171 						err = EIO;
21172 					}
21173 				}
21174 				un->un_ncmds_in_driver--;
21175 				ASSERT(un->un_ncmds_in_driver >= 0);
21176 				mutex_exit(SD_MUTEX(un));
21177 				return (err);
21178 			}
21179 		}
21180 		geom_validated = TRUE;
21181 	}
21182 	if ((un->un_f_geometry_is_valid == TRUE) &&
21183 	    (un->un_solaris_size > 0)) {
21184 		/*
21185 		 * the "geometry_is_valid" flag could be true if we
21186 		 * have an fdisk table but no Solaris partition
21187 		 */
21188 		if (un->un_vtoc.v_sanity != VTOC_SANE) {
21189 			/* it is EFI, so return ENOTSUP for these */
21190 			switch (cmd) {
21191 			case DKIOCGAPART:
21192 			case DKIOCGGEOM:
21193 			case DKIOCGVTOC:
21194 			case DKIOCSVTOC:
21195 			case DKIOCSAPART:
21196 				err = ENOTSUP;
21197 				un->un_ncmds_in_driver--;
21198 				ASSERT(un->un_ncmds_in_driver >= 0);
21199 				mutex_exit(SD_MUTEX(un));
21200 				return (err);
21201 			}
21202 		}
21203 	}
21204 
21205 skip_ready_valid:
21206 	mutex_exit(SD_MUTEX(un));
21207 
21208 	switch (cmd) {
21209 	case DKIOCINFO:
21210 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
21211 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
21212 		break;
21213 
21214 	case DKIOCGMEDIAINFO:
21215 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
21216 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
21217 		break;
21218 
21219 	case DKIOCGGEOM:
21220 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGGEOM\n");
21221 		err = sd_dkio_get_geometry(dev, (caddr_t)arg, flag,
21222 		    geom_validated);
21223 		break;
21224 
21225 	case DKIOCSGEOM:
21226 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSGEOM\n");
21227 		err = sd_dkio_set_geometry(dev, (caddr_t)arg, flag);
21228 		break;
21229 
21230 	case DKIOCGAPART:
21231 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGAPART\n");
21232 		err = sd_dkio_get_partition(dev, (caddr_t)arg, flag,
21233 		    geom_validated);
21234 		break;
21235 
21236 	case DKIOCSAPART:
21237 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSAPART\n");
21238 		err = sd_dkio_set_partition(dev, (caddr_t)arg, flag);
21239 		break;
21240 
21241 	case DKIOCGVTOC:
21242 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGVTOC\n");
21243 		err = sd_dkio_get_vtoc(dev, (caddr_t)arg, flag,
21244 		    geom_validated);
21245 		break;
21246 
21247 	case DKIOCGETEFI:
21248 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGETEFI\n");
21249 		err = sd_dkio_get_efi(dev, (caddr_t)arg, flag);
21250 		break;
21251 
21252 	case DKIOCPARTITION:
21253 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTITION\n");
21254 		err = sd_dkio_partition(dev, (caddr_t)arg, flag);
21255 		break;
21256 
21257 	case DKIOCSVTOC:
21258 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSVTOC\n");
21259 		err = sd_dkio_set_vtoc(dev, (caddr_t)arg, flag);
21260 		break;
21261 
21262 	case DKIOCSETEFI:
21263 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSETEFI\n");
21264 		err = sd_dkio_set_efi(dev, (caddr_t)arg, flag);
21265 		break;
21266 
21267 	case DKIOCGMBOOT:
21268 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMBOOT\n");
21269 		err = sd_dkio_get_mboot(dev, (caddr_t)arg, flag);
21270 		break;
21271 
21272 	case DKIOCSMBOOT:
21273 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSMBOOT\n");
21274 		err = sd_dkio_set_mboot(dev, (caddr_t)arg, flag);
21275 		break;
21276 
21277 	case DKIOCLOCK:
21278 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
21279 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
21280 		    SD_PATH_STANDARD);
21281 		break;
21282 
21283 	case DKIOCUNLOCK:
21284 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
21285 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
21286 		    SD_PATH_STANDARD);
21287 		break;
21288 
21289 	case DKIOCSTATE: {
21290 		enum dkio_state		state;
21291 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
21292 
21293 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
21294 			err = EFAULT;
21295 		} else {
21296 			err = sd_check_media(dev, state);
21297 			if (err == 0) {
21298 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
21299 				    sizeof (int), flag) != 0)
21300 					err = EFAULT;
21301 			}
21302 		}
21303 		break;
21304 	}
21305 
21306 	case DKIOCREMOVABLE:
21307 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
21308 		/*
21309 		 * At present, vold only does automount for removable-media
21310 		 * devices, in order not to break current applications, we
21311 		 * still let hopluggable devices pretend to be removable media
21312 		 * devices for vold. In the near future, once vold is EOL'ed,
21313 		 * we should remove this workaround.
21314 		 */
21315 		if (un->un_f_has_removable_media || un->un_f_is_hotpluggable) {
21316 			i = 1;
21317 		} else {
21318 			i = 0;
21319 		}
21320 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21321 			err = EFAULT;
21322 		} else {
21323 			err = 0;
21324 		}
21325 		break;
21326 
21327 	case DKIOCHOTPLUGGABLE:
21328 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
21329 		if (un->un_f_is_hotpluggable) {
21330 			i = 1;
21331 		} else {
21332 			i = 0;
21333 		}
21334 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21335 			err = EFAULT;
21336 		} else {
21337 			err = 0;
21338 		}
21339 		break;
21340 
21341 	case DKIOCGTEMPERATURE:
21342 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
21343 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
21344 		break;
21345 
21346 	case MHIOCENFAILFAST:
21347 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
21348 		if ((err = drv_priv(cred_p)) == 0) {
21349 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
21350 		}
21351 		break;
21352 
21353 	case MHIOCTKOWN:
21354 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
21355 		if ((err = drv_priv(cred_p)) == 0) {
21356 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
21357 		}
21358 		break;
21359 
21360 	case MHIOCRELEASE:
21361 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
21362 		if ((err = drv_priv(cred_p)) == 0) {
21363 			err = sd_mhdioc_release(dev);
21364 		}
21365 		break;
21366 
21367 	case MHIOCSTATUS:
21368 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
21369 		if ((err = drv_priv(cred_p)) == 0) {
21370 			switch (sd_send_scsi_TEST_UNIT_READY(un, 0)) {
21371 			case 0:
21372 				err = 0;
21373 				break;
21374 			case EACCES:
21375 				*rval_p = 1;
21376 				err = 0;
21377 				break;
21378 			default:
21379 				err = EIO;
21380 				break;
21381 			}
21382 		}
21383 		break;
21384 
21385 	case MHIOCQRESERVE:
21386 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
21387 		if ((err = drv_priv(cred_p)) == 0) {
21388 			err = sd_reserve_release(dev, SD_RESERVE);
21389 		}
21390 		break;
21391 
21392 	case MHIOCREREGISTERDEVID:
21393 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
21394 		if (drv_priv(cred_p) == EPERM) {
21395 			err = EPERM;
21396 		} else if (!un->un_f_devid_supported) {
21397 			err = ENOTTY;
21398 		} else {
21399 			err = sd_mhdioc_register_devid(dev);
21400 		}
21401 		break;
21402 
21403 	case MHIOCGRP_INKEYS:
21404 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
21405 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21406 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21407 				err = ENOTSUP;
21408 			} else {
21409 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
21410 				    flag);
21411 			}
21412 		}
21413 		break;
21414 
21415 	case MHIOCGRP_INRESV:
21416 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
21417 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21418 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21419 				err = ENOTSUP;
21420 			} else {
21421 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
21422 			}
21423 		}
21424 		break;
21425 
21426 	case MHIOCGRP_REGISTER:
21427 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
21428 		if ((err = drv_priv(cred_p)) != EPERM) {
21429 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21430 				err = ENOTSUP;
21431 			} else if (arg != NULL) {
21432 				mhioc_register_t reg;
21433 				if (ddi_copyin((void *)arg, &reg,
21434 				    sizeof (mhioc_register_t), flag) != 0) {
21435 					err = EFAULT;
21436 				} else {
21437 					err =
21438 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21439 					    un, SD_SCSI3_REGISTER,
21440 					    (uchar_t *)&reg);
21441 				}
21442 			}
21443 		}
21444 		break;
21445 
21446 	case MHIOCGRP_RESERVE:
21447 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
21448 		if ((err = drv_priv(cred_p)) != EPERM) {
21449 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21450 				err = ENOTSUP;
21451 			} else if (arg != NULL) {
21452 				mhioc_resv_desc_t resv_desc;
21453 				if (ddi_copyin((void *)arg, &resv_desc,
21454 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
21455 					err = EFAULT;
21456 				} else {
21457 					err =
21458 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21459 					    un, SD_SCSI3_RESERVE,
21460 					    (uchar_t *)&resv_desc);
21461 				}
21462 			}
21463 		}
21464 		break;
21465 
21466 	case MHIOCGRP_PREEMPTANDABORT:
21467 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21468 		if ((err = drv_priv(cred_p)) != EPERM) {
21469 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21470 				err = ENOTSUP;
21471 			} else if (arg != NULL) {
21472 				mhioc_preemptandabort_t preempt_abort;
21473 				if (ddi_copyin((void *)arg, &preempt_abort,
21474 				    sizeof (mhioc_preemptandabort_t),
21475 				    flag) != 0) {
21476 					err = EFAULT;
21477 				} else {
21478 					err =
21479 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21480 					    un, SD_SCSI3_PREEMPTANDABORT,
21481 					    (uchar_t *)&preempt_abort);
21482 				}
21483 			}
21484 		}
21485 		break;
21486 
21487 	case MHIOCGRP_REGISTERANDIGNOREKEY:
21488 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21489 		if ((err = drv_priv(cred_p)) != EPERM) {
21490 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21491 				err = ENOTSUP;
21492 			} else if (arg != NULL) {
21493 				mhioc_registerandignorekey_t r_and_i;
21494 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
21495 				    sizeof (mhioc_registerandignorekey_t),
21496 				    flag) != 0) {
21497 					err = EFAULT;
21498 				} else {
21499 					err =
21500 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21501 					    un, SD_SCSI3_REGISTERANDIGNOREKEY,
21502 					    (uchar_t *)&r_and_i);
21503 				}
21504 			}
21505 		}
21506 		break;
21507 
21508 	case USCSICMD:
21509 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
21510 		cr = ddi_get_cred();
21511 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
21512 			err = EPERM;
21513 		} else {
21514 			err = sd_uscsi_ioctl(dev, (caddr_t)arg, flag);
21515 		}
21516 		break;
21517 
21518 	case CDROMPAUSE:
21519 	case CDROMRESUME:
21520 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
21521 		if (!ISCD(un)) {
21522 			err = ENOTTY;
21523 		} else {
21524 			err = sr_pause_resume(dev, cmd);
21525 		}
21526 		break;
21527 
21528 	case CDROMPLAYMSF:
21529 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
21530 		if (!ISCD(un)) {
21531 			err = ENOTTY;
21532 		} else {
21533 			err = sr_play_msf(dev, (caddr_t)arg, flag);
21534 		}
21535 		break;
21536 
21537 	case CDROMPLAYTRKIND:
21538 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
21539 #if defined(__i386) || defined(__amd64)
21540 		/*
21541 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
21542 		 */
21543 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21544 #else
21545 		if (!ISCD(un)) {
21546 #endif
21547 			err = ENOTTY;
21548 		} else {
21549 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
21550 		}
21551 		break;
21552 
21553 	case CDROMREADTOCHDR:
21554 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
21555 		if (!ISCD(un)) {
21556 			err = ENOTTY;
21557 		} else {
21558 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
21559 		}
21560 		break;
21561 
21562 	case CDROMREADTOCENTRY:
21563 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
21564 		if (!ISCD(un)) {
21565 			err = ENOTTY;
21566 		} else {
21567 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
21568 		}
21569 		break;
21570 
21571 	case CDROMSTOP:
21572 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
21573 		if (!ISCD(un)) {
21574 			err = ENOTTY;
21575 		} else {
21576 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_STOP,
21577 			    SD_PATH_STANDARD);
21578 		}
21579 		break;
21580 
21581 	case CDROMSTART:
21582 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
21583 		if (!ISCD(un)) {
21584 			err = ENOTTY;
21585 		} else {
21586 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
21587 			    SD_PATH_STANDARD);
21588 		}
21589 		break;
21590 
21591 	case CDROMCLOSETRAY:
21592 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
21593 		if (!ISCD(un)) {
21594 			err = ENOTTY;
21595 		} else {
21596 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_CLOSE,
21597 			    SD_PATH_STANDARD);
21598 		}
21599 		break;
21600 
21601 	case FDEJECT:	/* for eject command */
21602 	case DKIOCEJECT:
21603 	case CDROMEJECT:
21604 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
21605 		if (!un->un_f_eject_media_supported) {
21606 			err = ENOTTY;
21607 		} else {
21608 			err = sr_eject(dev);
21609 		}
21610 		break;
21611 
21612 	case CDROMVOLCTRL:
21613 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
21614 		if (!ISCD(un)) {
21615 			err = ENOTTY;
21616 		} else {
21617 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
21618 		}
21619 		break;
21620 
21621 	case CDROMSUBCHNL:
21622 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
21623 		if (!ISCD(un)) {
21624 			err = ENOTTY;
21625 		} else {
21626 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
21627 		}
21628 		break;
21629 
21630 	case CDROMREADMODE2:
21631 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
21632 		if (!ISCD(un)) {
21633 			err = ENOTTY;
21634 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21635 			/*
21636 			 * If the drive supports READ CD, use that instead of
21637 			 * switching the LBA size via a MODE SELECT
21638 			 * Block Descriptor
21639 			 */
21640 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
21641 		} else {
21642 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
21643 		}
21644 		break;
21645 
21646 	case CDROMREADMODE1:
21647 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
21648 		if (!ISCD(un)) {
21649 			err = ENOTTY;
21650 		} else {
21651 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
21652 		}
21653 		break;
21654 
21655 	case CDROMREADOFFSET:
21656 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
21657 		if (!ISCD(un)) {
21658 			err = ENOTTY;
21659 		} else {
21660 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
21661 			    flag);
21662 		}
21663 		break;
21664 
21665 	case CDROMSBLKMODE:
21666 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
21667 		/*
21668 		 * There is no means of changing block size in case of atapi
21669 		 * drives, thus return ENOTTY if drive type is atapi
21670 		 */
21671 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21672 			err = ENOTTY;
21673 		} else if (un->un_f_mmc_cap == TRUE) {
21674 
21675 			/*
21676 			 * MMC Devices do not support changing the
21677 			 * logical block size
21678 			 *
21679 			 * Note: EINVAL is being returned instead of ENOTTY to
21680 			 * maintain consistancy with the original mmc
21681 			 * driver update.
21682 			 */
21683 			err = EINVAL;
21684 		} else {
21685 			mutex_enter(SD_MUTEX(un));
21686 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
21687 			    (un->un_ncmds_in_transport > 0)) {
21688 				mutex_exit(SD_MUTEX(un));
21689 				err = EINVAL;
21690 			} else {
21691 				mutex_exit(SD_MUTEX(un));
21692 				err = sr_change_blkmode(dev, cmd, arg, flag);
21693 			}
21694 		}
21695 		break;
21696 
21697 	case CDROMGBLKMODE:
21698 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
21699 		if (!ISCD(un)) {
21700 			err = ENOTTY;
21701 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
21702 		    (un->un_f_blockcount_is_valid != FALSE)) {
21703 			/*
21704 			 * Drive is an ATAPI drive so return target block
21705 			 * size for ATAPI drives since we cannot change the
21706 			 * blocksize on ATAPI drives. Used primarily to detect
21707 			 * if an ATAPI cdrom is present.
21708 			 */
21709 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
21710 			    sizeof (int), flag) != 0) {
21711 				err = EFAULT;
21712 			} else {
21713 				err = 0;
21714 			}
21715 
21716 		} else {
21717 			/*
21718 			 * Drive supports changing block sizes via a Mode
21719 			 * Select.
21720 			 */
21721 			err = sr_change_blkmode(dev, cmd, arg, flag);
21722 		}
21723 		break;
21724 
21725 	case CDROMGDRVSPEED:
21726 	case CDROMSDRVSPEED:
21727 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
21728 		if (!ISCD(un)) {
21729 			err = ENOTTY;
21730 		} else if (un->un_f_mmc_cap == TRUE) {
21731 			/*
21732 			 * Note: In the future the driver implementation
21733 			 * for getting and
21734 			 * setting cd speed should entail:
21735 			 * 1) If non-mmc try the Toshiba mode page
21736 			 *    (sr_change_speed)
21737 			 * 2) If mmc but no support for Real Time Streaming try
21738 			 *    the SET CD SPEED (0xBB) command
21739 			 *   (sr_atapi_change_speed)
21740 			 * 3) If mmc and support for Real Time Streaming
21741 			 *    try the GET PERFORMANCE and SET STREAMING
21742 			 *    commands (not yet implemented, 4380808)
21743 			 */
21744 			/*
21745 			 * As per recent MMC spec, CD-ROM speed is variable
21746 			 * and changes with LBA. Since there is no such
21747 			 * things as drive speed now, fail this ioctl.
21748 			 *
21749 			 * Note: EINVAL is returned for consistancy of original
21750 			 * implementation which included support for getting
21751 			 * the drive speed of mmc devices but not setting
21752 			 * the drive speed. Thus EINVAL would be returned
21753 			 * if a set request was made for an mmc device.
21754 			 * We no longer support get or set speed for
21755 			 * mmc but need to remain consistant with regard
21756 			 * to the error code returned.
21757 			 */
21758 			err = EINVAL;
21759 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21760 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
21761 		} else {
21762 			err = sr_change_speed(dev, cmd, arg, flag);
21763 		}
21764 		break;
21765 
21766 	case CDROMCDDA:
21767 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
21768 		if (!ISCD(un)) {
21769 			err = ENOTTY;
21770 		} else {
21771 			err = sr_read_cdda(dev, (void *)arg, flag);
21772 		}
21773 		break;
21774 
21775 	case CDROMCDXA:
21776 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
21777 		if (!ISCD(un)) {
21778 			err = ENOTTY;
21779 		} else {
21780 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
21781 		}
21782 		break;
21783 
21784 	case CDROMSUBCODE:
21785 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
21786 		if (!ISCD(un)) {
21787 			err = ENOTTY;
21788 		} else {
21789 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
21790 		}
21791 		break;
21792 
21793 	case DKIOCPARTINFO: {
21794 		/*
21795 		 * Return parameters describing the selected disk slice.
21796 		 * Note: this ioctl is for the intel platform only
21797 		 */
21798 #if defined(__i386) || defined(__amd64)
21799 		int part;
21800 
21801 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21802 		part = SDPART(dev);
21803 
21804 		/* don't check un_solaris_size for pN */
21805 		if (part < P0_RAW_DISK && un->un_solaris_size == 0) {
21806 			err = EIO;
21807 		} else {
21808 			struct part_info p;
21809 
21810 			p.p_start = (daddr_t)un->un_offset[part];
21811 			p.p_length = (int)un->un_map[part].dkl_nblk;
21812 #ifdef _MULTI_DATAMODEL
21813 			switch (ddi_model_convert_from(flag & FMODELS)) {
21814 			case DDI_MODEL_ILP32:
21815 			{
21816 				struct part_info32 p32;
21817 
21818 				p32.p_start = (daddr32_t)p.p_start;
21819 				p32.p_length = p.p_length;
21820 				if (ddi_copyout(&p32, (void *)arg,
21821 				    sizeof (p32), flag))
21822 					err = EFAULT;
21823 				break;
21824 			}
21825 
21826 			case DDI_MODEL_NONE:
21827 			{
21828 				if (ddi_copyout(&p, (void *)arg, sizeof (p),
21829 				    flag))
21830 					err = EFAULT;
21831 				break;
21832 			}
21833 			}
21834 #else /* ! _MULTI_DATAMODEL */
21835 			if (ddi_copyout(&p, (void *)arg, sizeof (p), flag))
21836 				err = EFAULT;
21837 #endif /* _MULTI_DATAMODEL */
21838 		}
21839 #else
21840 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21841 		err = ENOTTY;
21842 #endif
21843 		break;
21844 	}
21845 
21846 	case DKIOCG_PHYGEOM: {
21847 		/* Return the driver's notion of the media physical geometry */
21848 #if defined(__i386) || defined(__amd64)
21849 		uint64_t	capacity;
21850 		struct dk_geom	disk_geom;
21851 		struct dk_geom	*dkgp = &disk_geom;
21852 
21853 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21854 		mutex_enter(SD_MUTEX(un));
21855 
21856 		if (un->un_g.dkg_nhead != 0 &&
21857 		    un->un_g.dkg_nsect != 0) {
21858 			/*
21859 			 * We succeeded in getting a geometry, but
21860 			 * right now it is being reported as just the
21861 			 * Solaris fdisk partition, just like for
21862 			 * DKIOCGGEOM. We need to change that to be
21863 			 * correct for the entire disk now.
21864 			 */
21865 			bcopy(&un->un_g, dkgp, sizeof (*dkgp));
21866 			dkgp->dkg_acyl = 0;
21867 			dkgp->dkg_ncyl = un->un_blockcount /
21868 			    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21869 		} else {
21870 			bzero(dkgp, sizeof (struct dk_geom));
21871 			/*
21872 			 * This disk does not have a Solaris VTOC
21873 			 * so we must present a physical geometry
21874 			 * that will remain consistent regardless
21875 			 * of how the disk is used. This will ensure
21876 			 * that the geometry does not change regardless
21877 			 * of the fdisk partition type (ie. EFI, FAT32,
21878 			 * Solaris, etc).
21879 			 */
21880 			if (ISCD(un)) {
21881 				dkgp->dkg_nhead = un->un_pgeom.g_nhead;
21882 				dkgp->dkg_nsect = un->un_pgeom.g_nsect;
21883 				dkgp->dkg_ncyl = un->un_pgeom.g_ncyl;
21884 				dkgp->dkg_acyl = un->un_pgeom.g_acyl;
21885 			} else {
21886 				/*
21887 				 * Invalid un_blockcount can generate invalid
21888 				 * dk_geom and may result in division by zero
21889 				 * system failure. Should make sure blockcount
21890 				 * is valid before using it here.
21891 				 */
21892 				if (un->un_f_blockcount_is_valid == FALSE) {
21893 					mutex_exit(SD_MUTEX(un));
21894 					err = EIO;
21895 
21896 					break;
21897 				}
21898 
21899 				/*
21900 				 * Refer to comments related to off-by-1 at the
21901 				 * header of this file
21902 				 */
21903 				if (!un->un_f_capacity_adjusted &&
21904 					!un->un_f_has_removable_media &&
21905 				    !un->un_f_is_hotpluggable &&
21906 					(un->un_tgt_blocksize ==
21907 					un->un_sys_blocksize))
21908 					capacity = un->un_blockcount - 1;
21909 				else
21910 					capacity = un->un_blockcount;
21911 
21912 				sd_convert_geometry(capacity, dkgp);
21913 				dkgp->dkg_acyl = 0;
21914 				dkgp->dkg_ncyl = capacity /
21915 				    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21916 			}
21917 		}
21918 		dkgp->dkg_pcyl = dkgp->dkg_ncyl + dkgp->dkg_acyl;
21919 
21920 		if (ddi_copyout(dkgp, (void *)arg,
21921 		    sizeof (struct dk_geom), flag)) {
21922 			mutex_exit(SD_MUTEX(un));
21923 			err = EFAULT;
21924 		} else {
21925 			mutex_exit(SD_MUTEX(un));
21926 			err = 0;
21927 		}
21928 #else
21929 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21930 		err = ENOTTY;
21931 #endif
21932 		break;
21933 	}
21934 
21935 	case DKIOCG_VIRTGEOM: {
21936 		/* Return the driver's notion of the media's logical geometry */
21937 #if defined(__i386) || defined(__amd64)
21938 		struct dk_geom	disk_geom;
21939 		struct dk_geom	*dkgp = &disk_geom;
21940 
21941 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21942 		mutex_enter(SD_MUTEX(un));
21943 		/*
21944 		 * If there is no HBA geometry available, or
21945 		 * if the HBA returned us something that doesn't
21946 		 * really fit into an Int 13/function 8 geometry
21947 		 * result, just fail the ioctl.  See PSARC 1998/313.
21948 		 */
21949 		if (un->un_lgeom.g_nhead == 0 ||
21950 		    un->un_lgeom.g_nsect == 0 ||
21951 		    un->un_lgeom.g_ncyl > 1024) {
21952 			mutex_exit(SD_MUTEX(un));
21953 			err = EINVAL;
21954 		} else {
21955 			dkgp->dkg_ncyl	= un->un_lgeom.g_ncyl;
21956 			dkgp->dkg_acyl	= un->un_lgeom.g_acyl;
21957 			dkgp->dkg_pcyl	= dkgp->dkg_ncyl + dkgp->dkg_acyl;
21958 			dkgp->dkg_nhead	= un->un_lgeom.g_nhead;
21959 			dkgp->dkg_nsect	= un->un_lgeom.g_nsect;
21960 
21961 			if (ddi_copyout(dkgp, (void *)arg,
21962 			    sizeof (struct dk_geom), flag)) {
21963 				mutex_exit(SD_MUTEX(un));
21964 				err = EFAULT;
21965 			} else {
21966 				mutex_exit(SD_MUTEX(un));
21967 				err = 0;
21968 			}
21969 		}
21970 #else
21971 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21972 		err = ENOTTY;
21973 #endif
21974 		break;
21975 	}
21976 #ifdef SDDEBUG
21977 /* RESET/ABORTS testing ioctls */
21978 	case DKIOCRESET: {
21979 		int	reset_level;
21980 
21981 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
21982 			err = EFAULT;
21983 		} else {
21984 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
21985 			    "reset_level = 0x%lx\n", reset_level);
21986 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
21987 				err = 0;
21988 			} else {
21989 				err = EIO;
21990 			}
21991 		}
21992 		break;
21993 	}
21994 
21995 	case DKIOCABORT:
21996 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
21997 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
21998 			err = 0;
21999 		} else {
22000 			err = EIO;
22001 		}
22002 		break;
22003 #endif
22004 
22005 #ifdef SD_FAULT_INJECTION
22006 /* SDIOC FaultInjection testing ioctls */
22007 	case SDIOCSTART:
22008 	case SDIOCSTOP:
22009 	case SDIOCINSERTPKT:
22010 	case SDIOCINSERTXB:
22011 	case SDIOCINSERTUN:
22012 	case SDIOCINSERTARQ:
22013 	case SDIOCPUSH:
22014 	case SDIOCRETRIEVE:
22015 	case SDIOCRUN:
22016 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
22017 		    "SDIOC detected cmd:0x%X:\n", cmd);
22018 		/* call error generator */
22019 		sd_faultinjection_ioctl(cmd, arg, un);
22020 		err = 0;
22021 		break;
22022 
22023 #endif /* SD_FAULT_INJECTION */
22024 
22025 	case DKIOCFLUSHWRITECACHE:
22026 		{
22027 			struct dk_callback *dkc = (struct dk_callback *)arg;
22028 
22029 			mutex_enter(SD_MUTEX(un));
22030 			if (!un->un_f_sync_cache_supported ||
22031 			    !un->un_f_write_cache_enabled) {
22032 				err = un->un_f_sync_cache_supported ?
22033 					0 : ENOTSUP;
22034 				mutex_exit(SD_MUTEX(un));
22035 				if ((flag & FKIOCTL) && dkc != NULL &&
22036 				    dkc->dkc_callback != NULL) {
22037 					(*dkc->dkc_callback)(dkc->dkc_cookie,
22038 					    err);
22039 					/*
22040 					 * Did callback and reported error.
22041 					 * Since we did a callback, ioctl
22042 					 * should return 0.
22043 					 */
22044 					err = 0;
22045 				}
22046 				break;
22047 			}
22048 			mutex_exit(SD_MUTEX(un));
22049 
22050 			if ((flag & FKIOCTL) && dkc != NULL &&
22051 			    dkc->dkc_callback != NULL) {
22052 				/* async SYNC CACHE request */
22053 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
22054 			} else {
22055 				/* synchronous SYNC CACHE request */
22056 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
22057 			}
22058 		}
22059 		break;
22060 
22061 	case DKIOCGETWCE: {
22062 
22063 		int wce;
22064 
22065 		if ((err = sd_get_write_cache_enabled(un, &wce)) != 0) {
22066 			break;
22067 		}
22068 
22069 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
22070 			err = EFAULT;
22071 		}
22072 		break;
22073 	}
22074 
22075 	case DKIOCSETWCE: {
22076 
22077 		int wce, sync_supported;
22078 
22079 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
22080 			err = EFAULT;
22081 			break;
22082 		}
22083 
22084 		/*
22085 		 * Synchronize multiple threads trying to enable
22086 		 * or disable the cache via the un_f_wcc_cv
22087 		 * condition variable.
22088 		 */
22089 		mutex_enter(SD_MUTEX(un));
22090 
22091 		/*
22092 		 * Don't allow the cache to be enabled if the
22093 		 * config file has it disabled.
22094 		 */
22095 		if (un->un_f_opt_disable_cache && wce) {
22096 			mutex_exit(SD_MUTEX(un));
22097 			err = EINVAL;
22098 			break;
22099 		}
22100 
22101 		/*
22102 		 * Wait for write cache change in progress
22103 		 * bit to be clear before proceeding.
22104 		 */
22105 		while (un->un_f_wcc_inprog)
22106 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
22107 
22108 		un->un_f_wcc_inprog = 1;
22109 
22110 		if (un->un_f_write_cache_enabled && wce == 0) {
22111 			/*
22112 			 * Disable the write cache.  Don't clear
22113 			 * un_f_write_cache_enabled until after
22114 			 * the mode select and flush are complete.
22115 			 */
22116 			sync_supported = un->un_f_sync_cache_supported;
22117 			mutex_exit(SD_MUTEX(un));
22118 			if ((err = sd_cache_control(un, SD_CACHE_NOCHANGE,
22119 			    SD_CACHE_DISABLE)) == 0 && sync_supported) {
22120 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
22121 			}
22122 
22123 			mutex_enter(SD_MUTEX(un));
22124 			if (err == 0) {
22125 				un->un_f_write_cache_enabled = 0;
22126 			}
22127 
22128 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
22129 			/*
22130 			 * Set un_f_write_cache_enabled first, so there is
22131 			 * no window where the cache is enabled, but the
22132 			 * bit says it isn't.
22133 			 */
22134 			un->un_f_write_cache_enabled = 1;
22135 			mutex_exit(SD_MUTEX(un));
22136 
22137 			err = sd_cache_control(un, SD_CACHE_NOCHANGE,
22138 				SD_CACHE_ENABLE);
22139 
22140 			mutex_enter(SD_MUTEX(un));
22141 
22142 			if (err) {
22143 				un->un_f_write_cache_enabled = 0;
22144 			}
22145 		}
22146 
22147 		un->un_f_wcc_inprog = 0;
22148 		cv_broadcast(&un->un_wcc_cv);
22149 		mutex_exit(SD_MUTEX(un));
22150 		break;
22151 	}
22152 
22153 	default:
22154 		err = ENOTTY;
22155 		break;
22156 	}
22157 	mutex_enter(SD_MUTEX(un));
22158 	un->un_ncmds_in_driver--;
22159 	ASSERT(un->un_ncmds_in_driver >= 0);
22160 	mutex_exit(SD_MUTEX(un));
22161 
22162 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
22163 	return (err);
22164 }
22165 
22166 
22167 /*
22168  *    Function: sd_uscsi_ioctl
22169  *
22170  * Description: This routine is the driver entry point for handling USCSI ioctl
22171  *		requests (USCSICMD).
22172  *
22173  *   Arguments: dev	- the device number
22174  *		arg	- user provided scsi command
22175  *		flag	- this argument is a pass through to ddi_copyxxx()
22176  *			  directly from the mode argument of ioctl().
22177  *
22178  * Return Code: code returned by sd_send_scsi_cmd
22179  *		ENXIO
22180  *		EFAULT
22181  *		EAGAIN
22182  */
22183 
22184 static int
22185 sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag)
22186 {
22187 #ifdef _MULTI_DATAMODEL
22188 	/*
22189 	 * For use when a 32 bit app makes a call into a
22190 	 * 64 bit ioctl
22191 	 */
22192 	struct uscsi_cmd32	uscsi_cmd_32_for_64;
22193 	struct uscsi_cmd32	*ucmd32 = &uscsi_cmd_32_for_64;
22194 	model_t			model;
22195 #endif /* _MULTI_DATAMODEL */
22196 	struct uscsi_cmd	*scmd = NULL;
22197 	struct sd_lun		*un = NULL;
22198 	enum uio_seg		uioseg;
22199 	char			cdb[CDB_GROUP0];
22200 	int			rval = 0;
22201 
22202 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22203 		return (ENXIO);
22204 	}
22205 
22206 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: entry: un:0x%p\n", un);
22207 
22208 	scmd = (struct uscsi_cmd *)
22209 	    kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
22210 
22211 #ifdef _MULTI_DATAMODEL
22212 	switch (model = ddi_model_convert_from(flag & FMODELS)) {
22213 	case DDI_MODEL_ILP32:
22214 	{
22215 		if (ddi_copyin((void *)arg, ucmd32, sizeof (*ucmd32), flag)) {
22216 			rval = EFAULT;
22217 			goto done;
22218 		}
22219 		/*
22220 		 * Convert the ILP32 uscsi data from the
22221 		 * application to LP64 for internal use.
22222 		 */
22223 		uscsi_cmd32touscsi_cmd(ucmd32, scmd);
22224 		break;
22225 	}
22226 	case DDI_MODEL_NONE:
22227 		if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
22228 			rval = EFAULT;
22229 			goto done;
22230 		}
22231 		break;
22232 	}
22233 #else /* ! _MULTI_DATAMODEL */
22234 	if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
22235 		rval = EFAULT;
22236 		goto done;
22237 	}
22238 #endif /* _MULTI_DATAMODEL */
22239 
22240 	scmd->uscsi_flags &= ~USCSI_NOINTR;
22241 	uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE : UIO_USERSPACE;
22242 	if (un->un_f_format_in_progress == TRUE) {
22243 		rval = EAGAIN;
22244 		goto done;
22245 	}
22246 
22247 	/*
22248 	 * Gotta do the ddi_copyin() here on the uscsi_cdb so that
22249 	 * we will have a valid cdb[0] to test.
22250 	 */
22251 	if ((ddi_copyin(scmd->uscsi_cdb, cdb, CDB_GROUP0, flag) == 0) &&
22252 	    (cdb[0] == SCMD_FORMAT)) {
22253 		SD_TRACE(SD_LOG_IOCTL, un,
22254 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
22255 		mutex_enter(SD_MUTEX(un));
22256 		un->un_f_format_in_progress = TRUE;
22257 		mutex_exit(SD_MUTEX(un));
22258 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
22259 		    SD_PATH_STANDARD);
22260 		mutex_enter(SD_MUTEX(un));
22261 		un->un_f_format_in_progress = FALSE;
22262 		mutex_exit(SD_MUTEX(un));
22263 	} else {
22264 		SD_TRACE(SD_LOG_IOCTL, un,
22265 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
22266 		/*
22267 		 * It's OK to fall into here even if the ddi_copyin()
22268 		 * on the uscsi_cdb above fails, because sd_send_scsi_cmd()
22269 		 * does this same copyin and will return the EFAULT
22270 		 * if it fails.
22271 		 */
22272 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
22273 		    SD_PATH_STANDARD);
22274 	}
22275 #ifdef _MULTI_DATAMODEL
22276 	switch (model) {
22277 	case DDI_MODEL_ILP32:
22278 		/*
22279 		 * Convert back to ILP32 before copyout to the
22280 		 * application
22281 		 */
22282 		uscsi_cmdtouscsi_cmd32(scmd, ucmd32);
22283 		if (ddi_copyout(ucmd32, (void *)arg, sizeof (*ucmd32), flag)) {
22284 			if (rval != 0) {
22285 				rval = EFAULT;
22286 			}
22287 		}
22288 		break;
22289 	case DDI_MODEL_NONE:
22290 		if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
22291 			if (rval != 0) {
22292 				rval = EFAULT;
22293 			}
22294 		}
22295 		break;
22296 	}
22297 #else /* ! _MULTI_DATAMODE */
22298 	if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
22299 		if (rval != 0) {
22300 			rval = EFAULT;
22301 		}
22302 	}
22303 #endif /* _MULTI_DATAMODE */
22304 done:
22305 	kmem_free(scmd, sizeof (struct uscsi_cmd));
22306 
22307 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: exit: un:0x%p\n", un);
22308 
22309 	return (rval);
22310 }
22311 
22312 
22313 /*
22314  *    Function: sd_dkio_ctrl_info
22315  *
22316  * Description: This routine is the driver entry point for handling controller
22317  *		information ioctl requests (DKIOCINFO).
22318  *
22319  *   Arguments: dev  - the device number
22320  *		arg  - pointer to user provided dk_cinfo structure
22321  *		       specifying the controller type and attributes.
22322  *		flag - this argument is a pass through to ddi_copyxxx()
22323  *		       directly from the mode argument of ioctl().
22324  *
22325  * Return Code: 0
22326  *		EFAULT
22327  *		ENXIO
22328  */
22329 
22330 static int
22331 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
22332 {
22333 	struct sd_lun	*un = NULL;
22334 	struct dk_cinfo	*info;
22335 	dev_info_t	*pdip;
22336 	int		lun, tgt;
22337 
22338 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22339 		return (ENXIO);
22340 	}
22341 
22342 	info = (struct dk_cinfo *)
22343 		kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
22344 
22345 	switch (un->un_ctype) {
22346 	case CTYPE_CDROM:
22347 		info->dki_ctype = DKC_CDROM;
22348 		break;
22349 	default:
22350 		info->dki_ctype = DKC_SCSI_CCS;
22351 		break;
22352 	}
22353 	pdip = ddi_get_parent(SD_DEVINFO(un));
22354 	info->dki_cnum = ddi_get_instance(pdip);
22355 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
22356 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
22357 	} else {
22358 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
22359 		    DK_DEVLEN - 1);
22360 	}
22361 
22362 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22363 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
22364 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22365 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
22366 
22367 	/* Unit Information */
22368 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
22369 	info->dki_slave = ((tgt << 3) | lun);
22370 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
22371 	    DK_DEVLEN - 1);
22372 	info->dki_flags = DKI_FMTVOL;
22373 	info->dki_partition = SDPART(dev);
22374 
22375 	/* Max Transfer size of this device in blocks */
22376 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
22377 	info->dki_addr = 0;
22378 	info->dki_space = 0;
22379 	info->dki_prio = 0;
22380 	info->dki_vec = 0;
22381 
22382 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
22383 		kmem_free(info, sizeof (struct dk_cinfo));
22384 		return (EFAULT);
22385 	} else {
22386 		kmem_free(info, sizeof (struct dk_cinfo));
22387 		return (0);
22388 	}
22389 }
22390 
22391 
22392 /*
22393  *    Function: sd_get_media_info
22394  *
22395  * Description: This routine is the driver entry point for handling ioctl
22396  *		requests for the media type or command set profile used by the
22397  *		drive to operate on the media (DKIOCGMEDIAINFO).
22398  *
22399  *   Arguments: dev	- the device number
22400  *		arg	- pointer to user provided dk_minfo structure
22401  *			  specifying the media type, logical block size and
22402  *			  drive capacity.
22403  *		flag	- this argument is a pass through to ddi_copyxxx()
22404  *			  directly from the mode argument of ioctl().
22405  *
22406  * Return Code: 0
22407  *		EACCESS
22408  *		EFAULT
22409  *		ENXIO
22410  *		EIO
22411  */
22412 
22413 static int
22414 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
22415 {
22416 	struct sd_lun		*un = NULL;
22417 	struct uscsi_cmd	com;
22418 	struct scsi_inquiry	*sinq;
22419 	struct dk_minfo		media_info;
22420 	u_longlong_t		media_capacity;
22421 	uint64_t		capacity;
22422 	uint_t			lbasize;
22423 	uchar_t			*out_data;
22424 	uchar_t			*rqbuf;
22425 	int			rval = 0;
22426 	int			rtn;
22427 
22428 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
22429 	    (un->un_state == SD_STATE_OFFLINE)) {
22430 		return (ENXIO);
22431 	}
22432 
22433 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
22434 
22435 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
22436 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
22437 
22438 	/* Issue a TUR to determine if the drive is ready with media present */
22439 	rval = sd_send_scsi_TEST_UNIT_READY(un, SD_CHECK_FOR_MEDIA);
22440 	if (rval == ENXIO) {
22441 		goto done;
22442 	}
22443 
22444 	/* Now get configuration data */
22445 	if (ISCD(un)) {
22446 		media_info.dki_media_type = DK_CDROM;
22447 
22448 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
22449 		if (un->un_f_mmc_cap == TRUE) {
22450 			rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf,
22451 				SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN);
22452 
22453 			if (rtn) {
22454 				/*
22455 				 * Failed for other than an illegal request
22456 				 * or command not supported
22457 				 */
22458 				if ((com.uscsi_status == STATUS_CHECK) &&
22459 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
22460 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
22461 					    (rqbuf[12] != 0x20)) {
22462 						rval = EIO;
22463 						goto done;
22464 					}
22465 				}
22466 			} else {
22467 				/*
22468 				 * The GET CONFIGURATION command succeeded
22469 				 * so set the media type according to the
22470 				 * returned data
22471 				 */
22472 				media_info.dki_media_type = out_data[6];
22473 				media_info.dki_media_type <<= 8;
22474 				media_info.dki_media_type |= out_data[7];
22475 			}
22476 		}
22477 	} else {
22478 		/*
22479 		 * The profile list is not available, so we attempt to identify
22480 		 * the media type based on the inquiry data
22481 		 */
22482 		sinq = un->un_sd->sd_inq;
22483 		if (sinq->inq_qual == 0) {
22484 			/* This is a direct access device */
22485 			media_info.dki_media_type = DK_FIXED_DISK;
22486 
22487 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
22488 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
22489 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
22490 					media_info.dki_media_type = DK_ZIP;
22491 				} else if (
22492 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
22493 					media_info.dki_media_type = DK_JAZ;
22494 				}
22495 			}
22496 		} else {
22497 			/* Not a CD or direct access so return unknown media */
22498 			media_info.dki_media_type = DK_UNKNOWN;
22499 		}
22500 	}
22501 
22502 	/* Now read the capacity so we can provide the lbasize and capacity */
22503 	switch (sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
22504 	    SD_PATH_DIRECT)) {
22505 	case 0:
22506 		break;
22507 	case EACCES:
22508 		rval = EACCES;
22509 		goto done;
22510 	default:
22511 		rval = EIO;
22512 		goto done;
22513 	}
22514 
22515 	media_info.dki_lbsize = lbasize;
22516 	media_capacity = capacity;
22517 
22518 	/*
22519 	 * sd_send_scsi_READ_CAPACITY() reports capacity in
22520 	 * un->un_sys_blocksize chunks. So we need to convert it into
22521 	 * cap.lbasize chunks.
22522 	 */
22523 	media_capacity *= un->un_sys_blocksize;
22524 	media_capacity /= lbasize;
22525 	media_info.dki_capacity = media_capacity;
22526 
22527 	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
22528 		rval = EFAULT;
22529 		/* Put goto. Anybody might add some code below in future */
22530 		goto done;
22531 	}
22532 done:
22533 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
22534 	kmem_free(rqbuf, SENSE_LENGTH);
22535 	return (rval);
22536 }
22537 
22538 
22539 /*
22540  *    Function: sd_dkio_get_geometry
22541  *
22542  * Description: This routine is the driver entry point for handling user
22543  *		requests to get the device geometry (DKIOCGGEOM).
22544  *
22545  *   Arguments: dev  - the device number
22546  *		arg  - pointer to user provided dk_geom structure specifying
22547  *			the controller's notion of the current geometry.
22548  *		flag - this argument is a pass through to ddi_copyxxx()
22549  *		       directly from the mode argument of ioctl().
22550  *		geom_validated - flag indicating if the device geometry has been
22551  *				 previously validated in the sdioctl routine.
22552  *
22553  * Return Code: 0
22554  *		EFAULT
22555  *		ENXIO
22556  *		EIO
22557  */
22558 
22559 static int
22560 sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag, int geom_validated)
22561 {
22562 	struct sd_lun	*un = NULL;
22563 	struct dk_geom	*tmp_geom = NULL;
22564 	int		rval = 0;
22565 
22566 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22567 		return (ENXIO);
22568 	}
22569 
22570 	if (geom_validated == FALSE) {
22571 		/*
22572 		 * sd_validate_geometry does not spin a disk up
22573 		 * if it was spun down. We need to make sure it
22574 		 * is ready.
22575 		 */
22576 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22577 			return (rval);
22578 		}
22579 		mutex_enter(SD_MUTEX(un));
22580 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
22581 		mutex_exit(SD_MUTEX(un));
22582 	}
22583 	if (rval)
22584 		return (rval);
22585 
22586 	/*
22587 	 * It is possible that un_solaris_size is 0(uninitialized)
22588 	 * after sd_unit_attach. Reservation conflict may cause the
22589 	 * above situation. Thus, the zero check of un_solaris_size
22590 	 * should occur after the sd_validate_geometry() call.
22591 	 */
22592 #if defined(__i386) || defined(__amd64)
22593 	if (un->un_solaris_size == 0) {
22594 		return (EIO);
22595 	}
22596 #endif
22597 
22598 	/*
22599 	 * Make a local copy of the soft state geometry to avoid some potential
22600 	 * race conditions associated with holding the mutex and updating the
22601 	 * write_reinstruct value
22602 	 */
22603 	tmp_geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22604 	mutex_enter(SD_MUTEX(un));
22605 	bcopy(&un->un_g, tmp_geom, sizeof (struct dk_geom));
22606 	mutex_exit(SD_MUTEX(un));
22607 
22608 	if (tmp_geom->dkg_write_reinstruct == 0) {
22609 		tmp_geom->dkg_write_reinstruct =
22610 		    (int)((int)(tmp_geom->dkg_nsect * tmp_geom->dkg_rpm *
22611 		    sd_rot_delay) / (int)60000);
22612 	}
22613 
22614 	rval = ddi_copyout(tmp_geom, (void *)arg, sizeof (struct dk_geom),
22615 	    flag);
22616 	if (rval != 0) {
22617 		rval = EFAULT;
22618 	}
22619 
22620 	kmem_free(tmp_geom, sizeof (struct dk_geom));
22621 	return (rval);
22622 
22623 }
22624 
22625 
22626 /*
22627  *    Function: sd_dkio_set_geometry
22628  *
22629  * Description: This routine is the driver entry point for handling user
22630  *		requests to set the device geometry (DKIOCSGEOM). The actual
22631  *		device geometry is not updated, just the driver "notion" of it.
22632  *
22633  *   Arguments: dev  - the device number
22634  *		arg  - pointer to user provided dk_geom structure used to set
22635  *			the controller's notion of the current geometry.
22636  *		flag - this argument is a pass through to ddi_copyxxx()
22637  *		       directly from the mode argument of ioctl().
22638  *
22639  * Return Code: 0
22640  *		EFAULT
22641  *		ENXIO
22642  *		EIO
22643  */
22644 
22645 static int
22646 sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag)
22647 {
22648 	struct sd_lun	*un = NULL;
22649 	struct dk_geom	*tmp_geom;
22650 	struct dk_map	*lp;
22651 	int		rval = 0;
22652 	int		i;
22653 
22654 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22655 		return (ENXIO);
22656 	}
22657 
22658 	/*
22659 	 * Make sure there is no reservation conflict on the lun.
22660 	 */
22661 	if (sd_send_scsi_TEST_UNIT_READY(un, 0) == EACCES) {
22662 		return (EACCES);
22663 	}
22664 
22665 #if defined(__i386) || defined(__amd64)
22666 	if (un->un_solaris_size == 0) {
22667 		return (EIO);
22668 	}
22669 #endif
22670 
22671 	/*
22672 	 * We need to copy the user specified geometry into local
22673 	 * storage and then update the softstate. We don't want to hold
22674 	 * the mutex and copyin directly from the user to the soft state
22675 	 */
22676 	tmp_geom = (struct dk_geom *)
22677 	    kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22678 	rval = ddi_copyin(arg, tmp_geom, sizeof (struct dk_geom), flag);
22679 	if (rval != 0) {
22680 		kmem_free(tmp_geom, sizeof (struct dk_geom));
22681 		return (EFAULT);
22682 	}
22683 
22684 	mutex_enter(SD_MUTEX(un));
22685 	bcopy(tmp_geom, &un->un_g, sizeof (struct dk_geom));
22686 	for (i = 0; i < NDKMAP; i++) {
22687 		lp  = &un->un_map[i];
22688 		un->un_offset[i] =
22689 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22690 #if defined(__i386) || defined(__amd64)
22691 		un->un_offset[i] += un->un_solaris_offset;
22692 #endif
22693 	}
22694 	un->un_f_geometry_is_valid = FALSE;
22695 	mutex_exit(SD_MUTEX(un));
22696 	kmem_free(tmp_geom, sizeof (struct dk_geom));
22697 
22698 	return (rval);
22699 }
22700 
22701 
22702 /*
22703  *    Function: sd_dkio_get_partition
22704  *
22705  * Description: This routine is the driver entry point for handling user
22706  *		requests to get the partition table (DKIOCGAPART).
22707  *
22708  *   Arguments: dev  - the device number
22709  *		arg  - pointer to user provided dk_allmap structure specifying
22710  *			the controller's notion of the current partition table.
22711  *		flag - this argument is a pass through to ddi_copyxxx()
22712  *		       directly from the mode argument of ioctl().
22713  *		geom_validated - flag indicating if the device geometry has been
22714  *				 previously validated in the sdioctl routine.
22715  *
22716  * Return Code: 0
22717  *		EFAULT
22718  *		ENXIO
22719  *		EIO
22720  */
22721 
22722 static int
22723 sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag, int geom_validated)
22724 {
22725 	struct sd_lun	*un = NULL;
22726 	int		rval = 0;
22727 	int		size;
22728 
22729 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22730 		return (ENXIO);
22731 	}
22732 
22733 	/*
22734 	 * Make sure the geometry is valid before getting the partition
22735 	 * information.
22736 	 */
22737 	mutex_enter(SD_MUTEX(un));
22738 	if (geom_validated == FALSE) {
22739 		/*
22740 		 * sd_validate_geometry does not spin a disk up
22741 		 * if it was spun down. We need to make sure it
22742 		 * is ready before validating the geometry.
22743 		 */
22744 		mutex_exit(SD_MUTEX(un));
22745 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22746 			return (rval);
22747 		}
22748 		mutex_enter(SD_MUTEX(un));
22749 
22750 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22751 			mutex_exit(SD_MUTEX(un));
22752 			return (rval);
22753 		}
22754 	}
22755 	mutex_exit(SD_MUTEX(un));
22756 
22757 	/*
22758 	 * It is possible that un_solaris_size is 0(uninitialized)
22759 	 * after sd_unit_attach. Reservation conflict may cause the
22760 	 * above situation. Thus, the zero check of un_solaris_size
22761 	 * should occur after the sd_validate_geometry() call.
22762 	 */
22763 #if defined(__i386) || defined(__amd64)
22764 	if (un->un_solaris_size == 0) {
22765 		return (EIO);
22766 	}
22767 #endif
22768 
22769 #ifdef _MULTI_DATAMODEL
22770 	switch (ddi_model_convert_from(flag & FMODELS)) {
22771 	case DDI_MODEL_ILP32: {
22772 		struct dk_map32 dk_map32[NDKMAP];
22773 		int		i;
22774 
22775 		for (i = 0; i < NDKMAP; i++) {
22776 			dk_map32[i].dkl_cylno = un->un_map[i].dkl_cylno;
22777 			dk_map32[i].dkl_nblk  = un->un_map[i].dkl_nblk;
22778 		}
22779 		size = NDKMAP * sizeof (struct dk_map32);
22780 		rval = ddi_copyout(dk_map32, (void *)arg, size, flag);
22781 		if (rval != 0) {
22782 			rval = EFAULT;
22783 		}
22784 		break;
22785 	}
22786 	case DDI_MODEL_NONE:
22787 		size = NDKMAP * sizeof (struct dk_map);
22788 		rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22789 		if (rval != 0) {
22790 			rval = EFAULT;
22791 		}
22792 		break;
22793 	}
22794 #else /* ! _MULTI_DATAMODEL */
22795 	size = NDKMAP * sizeof (struct dk_map);
22796 	rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22797 	if (rval != 0) {
22798 		rval = EFAULT;
22799 	}
22800 #endif /* _MULTI_DATAMODEL */
22801 	return (rval);
22802 }
22803 
22804 
22805 /*
22806  *    Function: sd_dkio_set_partition
22807  *
22808  * Description: This routine is the driver entry point for handling user
22809  *		requests to set the partition table (DKIOCSAPART). The actual
22810  *		device partition is not updated.
22811  *
22812  *   Arguments: dev  - the device number
22813  *		arg  - pointer to user provided dk_allmap structure used to set
22814  *			the controller's notion of the partition table.
22815  *		flag - this argument is a pass through to ddi_copyxxx()
22816  *		       directly from the mode argument of ioctl().
22817  *
22818  * Return Code: 0
22819  *		EINVAL
22820  *		EFAULT
22821  *		ENXIO
22822  *		EIO
22823  */
22824 
22825 static int
22826 sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag)
22827 {
22828 	struct sd_lun	*un = NULL;
22829 	struct dk_map	dk_map[NDKMAP];
22830 	struct dk_map	*lp;
22831 	int		rval = 0;
22832 	int		size;
22833 	int		i;
22834 #if defined(_SUNOS_VTOC_16)
22835 	struct dkl_partition	*vp;
22836 #endif
22837 
22838 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22839 		return (ENXIO);
22840 	}
22841 
22842 	/*
22843 	 * Set the map for all logical partitions.  We lock
22844 	 * the priority just to make sure an interrupt doesn't
22845 	 * come in while the map is half updated.
22846 	 */
22847 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_solaris_size))
22848 	mutex_enter(SD_MUTEX(un));
22849 	if (un->un_blockcount > DK_MAX_BLOCKS) {
22850 		mutex_exit(SD_MUTEX(un));
22851 		return (ENOTSUP);
22852 	}
22853 	mutex_exit(SD_MUTEX(un));
22854 
22855 	/*
22856 	 * Make sure there is no reservation conflict on the lun.
22857 	 */
22858 	if (sd_send_scsi_TEST_UNIT_READY(un, 0) == EACCES) {
22859 		return (EACCES);
22860 	}
22861 
22862 #if defined(__i386) || defined(__amd64)
22863 	if (un->un_solaris_size == 0) {
22864 		return (EIO);
22865 	}
22866 #endif
22867 
22868 #ifdef _MULTI_DATAMODEL
22869 	switch (ddi_model_convert_from(flag & FMODELS)) {
22870 	case DDI_MODEL_ILP32: {
22871 		struct dk_map32 dk_map32[NDKMAP];
22872 
22873 		size = NDKMAP * sizeof (struct dk_map32);
22874 		rval = ddi_copyin((void *)arg, dk_map32, size, flag);
22875 		if (rval != 0) {
22876 			return (EFAULT);
22877 		}
22878 		for (i = 0; i < NDKMAP; i++) {
22879 			dk_map[i].dkl_cylno = dk_map32[i].dkl_cylno;
22880 			dk_map[i].dkl_nblk  = dk_map32[i].dkl_nblk;
22881 		}
22882 		break;
22883 	}
22884 	case DDI_MODEL_NONE:
22885 		size = NDKMAP * sizeof (struct dk_map);
22886 		rval = ddi_copyin((void *)arg, dk_map, size, flag);
22887 		if (rval != 0) {
22888 			return (EFAULT);
22889 		}
22890 		break;
22891 	}
22892 #else /* ! _MULTI_DATAMODEL */
22893 	size = NDKMAP * sizeof (struct dk_map);
22894 	rval = ddi_copyin((void *)arg, dk_map, size, flag);
22895 	if (rval != 0) {
22896 		return (EFAULT);
22897 	}
22898 #endif /* _MULTI_DATAMODEL */
22899 
22900 	mutex_enter(SD_MUTEX(un));
22901 	/* Note: The size used in this bcopy is set based upon the data model */
22902 	bcopy(dk_map, un->un_map, size);
22903 #if defined(_SUNOS_VTOC_16)
22904 	vp = (struct dkl_partition *)&(un->un_vtoc);
22905 #endif	/* defined(_SUNOS_VTOC_16) */
22906 	for (i = 0; i < NDKMAP; i++) {
22907 		lp  = &un->un_map[i];
22908 		un->un_offset[i] =
22909 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22910 #if defined(_SUNOS_VTOC_16)
22911 		vp->p_start = un->un_offset[i];
22912 		vp->p_size = lp->dkl_nblk;
22913 		vp++;
22914 #endif	/* defined(_SUNOS_VTOC_16) */
22915 #if defined(__i386) || defined(__amd64)
22916 		un->un_offset[i] += un->un_solaris_offset;
22917 #endif
22918 	}
22919 	mutex_exit(SD_MUTEX(un));
22920 	return (rval);
22921 }
22922 
22923 
22924 /*
22925  *    Function: sd_dkio_get_vtoc
22926  *
22927  * Description: This routine is the driver entry point for handling user
22928  *		requests to get the current volume table of contents
22929  *		(DKIOCGVTOC).
22930  *
22931  *   Arguments: dev  - the device number
22932  *		arg  - pointer to user provided vtoc structure specifying
22933  *			the current vtoc.
22934  *		flag - this argument is a pass through to ddi_copyxxx()
22935  *		       directly from the mode argument of ioctl().
22936  *		geom_validated - flag indicating if the device geometry has been
22937  *				 previously validated in the sdioctl routine.
22938  *
22939  * Return Code: 0
22940  *		EFAULT
22941  *		ENXIO
22942  *		EIO
22943  */
22944 
22945 static int
22946 sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag, int geom_validated)
22947 {
22948 	struct sd_lun	*un = NULL;
22949 #if defined(_SUNOS_VTOC_8)
22950 	struct vtoc	user_vtoc;
22951 #endif	/* defined(_SUNOS_VTOC_8) */
22952 	int		rval = 0;
22953 
22954 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22955 		return (ENXIO);
22956 	}
22957 
22958 	mutex_enter(SD_MUTEX(un));
22959 	if (geom_validated == FALSE) {
22960 		/*
22961 		 * sd_validate_geometry does not spin a disk up
22962 		 * if it was spun down. We need to make sure it
22963 		 * is ready.
22964 		 */
22965 		mutex_exit(SD_MUTEX(un));
22966 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22967 			return (rval);
22968 		}
22969 		mutex_enter(SD_MUTEX(un));
22970 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22971 			mutex_exit(SD_MUTEX(un));
22972 			return (rval);
22973 		}
22974 	}
22975 
22976 #if defined(_SUNOS_VTOC_8)
22977 	sd_build_user_vtoc(un, &user_vtoc);
22978 	mutex_exit(SD_MUTEX(un));
22979 
22980 #ifdef _MULTI_DATAMODEL
22981 	switch (ddi_model_convert_from(flag & FMODELS)) {
22982 	case DDI_MODEL_ILP32: {
22983 		struct vtoc32 user_vtoc32;
22984 
22985 		vtoctovtoc32(user_vtoc, user_vtoc32);
22986 		if (ddi_copyout(&user_vtoc32, (void *)arg,
22987 		    sizeof (struct vtoc32), flag)) {
22988 			return (EFAULT);
22989 		}
22990 		break;
22991 	}
22992 
22993 	case DDI_MODEL_NONE:
22994 		if (ddi_copyout(&user_vtoc, (void *)arg,
22995 		    sizeof (struct vtoc), flag)) {
22996 			return (EFAULT);
22997 		}
22998 		break;
22999 	}
23000 #else /* ! _MULTI_DATAMODEL */
23001 	if (ddi_copyout(&user_vtoc, (void *)arg, sizeof (struct vtoc), flag)) {
23002 		return (EFAULT);
23003 	}
23004 #endif /* _MULTI_DATAMODEL */
23005 
23006 #elif defined(_SUNOS_VTOC_16)
23007 	mutex_exit(SD_MUTEX(un));
23008 
23009 #ifdef _MULTI_DATAMODEL
23010 	/*
23011 	 * The un_vtoc structure is a "struct dk_vtoc"  which is always
23012 	 * 32-bit to maintain compatibility with existing on-disk
23013 	 * structures.  Thus, we need to convert the structure when copying
23014 	 * it out to a datamodel-dependent "struct vtoc" in a 64-bit
23015 	 * program.  If the target is a 32-bit program, then no conversion
23016 	 * is necessary.
23017 	 */
23018 	/* LINTED: logical expression always true: op "||" */
23019 	ASSERT(sizeof (un->un_vtoc) == sizeof (struct vtoc32));
23020 	switch (ddi_model_convert_from(flag & FMODELS)) {
23021 	case DDI_MODEL_ILP32:
23022 		if (ddi_copyout(&(un->un_vtoc), (void *)arg,
23023 		    sizeof (un->un_vtoc), flag)) {
23024 			return (EFAULT);
23025 		}
23026 		break;
23027 
23028 	case DDI_MODEL_NONE: {
23029 		struct vtoc user_vtoc;
23030 
23031 		vtoc32tovtoc(un->un_vtoc, user_vtoc);
23032 		if (ddi_copyout(&user_vtoc, (void *)arg,
23033 		    sizeof (struct vtoc), flag)) {
23034 			return (EFAULT);
23035 		}
23036 		break;
23037 	}
23038 	}
23039 #else /* ! _MULTI_DATAMODEL */
23040 	if (ddi_copyout(&(un->un_vtoc), (void *)arg, sizeof (un->un_vtoc),
23041 	    flag)) {
23042 		return (EFAULT);
23043 	}
23044 #endif /* _MULTI_DATAMODEL */
23045 #else
23046 #error "No VTOC format defined."
23047 #endif
23048 
23049 	return (rval);
23050 }
23051 
23052 static int
23053 sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag)
23054 {
23055 	struct sd_lun	*un = NULL;
23056 	dk_efi_t	user_efi;
23057 	int		rval = 0;
23058 	void		*buffer;
23059 
23060 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
23061 		return (ENXIO);
23062 
23063 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
23064 		return (EFAULT);
23065 
23066 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
23067 
23068 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
23069 	    (user_efi.dki_length > un->un_max_xfer_size))
23070 		return (EINVAL);
23071 
23072 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
23073 	rval = sd_send_scsi_READ(un, buffer, user_efi.dki_length,
23074 	    user_efi.dki_lba, SD_PATH_DIRECT);
23075 	if (rval == 0 && ddi_copyout(buffer, user_efi.dki_data,
23076 	    user_efi.dki_length, flag) != 0)
23077 		rval = EFAULT;
23078 
23079 	kmem_free(buffer, user_efi.dki_length);
23080 	return (rval);
23081 }
23082 
23083 /*
23084  *    Function: sd_build_user_vtoc
23085  *
23086  * Description: This routine populates a pass by reference variable with the
23087  *		current volume table of contents.
23088  *
23089  *   Arguments: un - driver soft state (unit) structure
23090  *		user_vtoc - pointer to vtoc structure to be populated
23091  */
23092 
23093 static void
23094 sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
23095 {
23096 	struct dk_map2		*lpart;
23097 	struct dk_map		*lmap;
23098 	struct partition	*vpart;
23099 	int			nblks;
23100 	int			i;
23101 
23102 	ASSERT(mutex_owned(SD_MUTEX(un)));
23103 
23104 	/*
23105 	 * Return vtoc structure fields in the provided VTOC area, addressed
23106 	 * by *vtoc.
23107 	 */
23108 	bzero(user_vtoc, sizeof (struct vtoc));
23109 	user_vtoc->v_bootinfo[0] = un->un_vtoc.v_bootinfo[0];
23110 	user_vtoc->v_bootinfo[1] = un->un_vtoc.v_bootinfo[1];
23111 	user_vtoc->v_bootinfo[2] = un->un_vtoc.v_bootinfo[2];
23112 	user_vtoc->v_sanity	= VTOC_SANE;
23113 	user_vtoc->v_version	= un->un_vtoc.v_version;
23114 	bcopy(un->un_vtoc.v_volume, user_vtoc->v_volume, LEN_DKL_VVOL);
23115 	user_vtoc->v_sectorsz = un->un_sys_blocksize;
23116 	user_vtoc->v_nparts = un->un_vtoc.v_nparts;
23117 	bcopy(un->un_vtoc.v_reserved, user_vtoc->v_reserved,
23118 	    sizeof (un->un_vtoc.v_reserved));
23119 	/*
23120 	 * Convert partitioning information.
23121 	 *
23122 	 * Note the conversion from starting cylinder number
23123 	 * to starting sector number.
23124 	 */
23125 	lmap = un->un_map;
23126 	lpart = (struct dk_map2 *)un->un_vtoc.v_part;
23127 	vpart = user_vtoc->v_part;
23128 
23129 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
23130 
23131 	for (i = 0; i < V_NUMPAR; i++) {
23132 		vpart->p_tag	= lpart->p_tag;
23133 		vpart->p_flag	= lpart->p_flag;
23134 		vpart->p_start	= lmap->dkl_cylno * nblks;
23135 		vpart->p_size	= lmap->dkl_nblk;
23136 		lmap++;
23137 		lpart++;
23138 		vpart++;
23139 
23140 		/* (4364927) */
23141 		user_vtoc->timestamp[i] = (time_t)un->un_vtoc.v_timestamp[i];
23142 	}
23143 
23144 	bcopy(un->un_asciilabel, user_vtoc->v_asciilabel, LEN_DKL_ASCII);
23145 }
23146 
23147 static int
23148 sd_dkio_partition(dev_t dev, caddr_t arg, int flag)
23149 {
23150 	struct sd_lun		*un = NULL;
23151 	struct partition64	p64;
23152 	int			rval = 0;
23153 	uint_t			nparts;
23154 	efi_gpe_t		*partitions;
23155 	efi_gpt_t		*buffer;
23156 	diskaddr_t		gpe_lba;
23157 
23158 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23159 		return (ENXIO);
23160 	}
23161 
23162 	if (ddi_copyin((const void *)arg, &p64,
23163 	    sizeof (struct partition64), flag)) {
23164 		return (EFAULT);
23165 	}
23166 
23167 	buffer = kmem_alloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
23168 	rval = sd_send_scsi_READ(un, buffer, DEV_BSIZE,
23169 		1, SD_PATH_DIRECT);
23170 	if (rval != 0)
23171 		goto done_error;
23172 
23173 	sd_swap_efi_gpt(buffer);
23174 
23175 	if ((rval = sd_validate_efi(buffer)) != 0)
23176 		goto done_error;
23177 
23178 	nparts = buffer->efi_gpt_NumberOfPartitionEntries;
23179 	gpe_lba = buffer->efi_gpt_PartitionEntryLBA;
23180 	if (p64.p_partno > nparts) {
23181 		/* couldn't find it */
23182 		rval = ESRCH;
23183 		goto done_error;
23184 	}
23185 	/*
23186 	 * if we're dealing with a partition that's out of the normal
23187 	 * 16K block, adjust accordingly
23188 	 */
23189 	gpe_lba += p64.p_partno / sizeof (efi_gpe_t);
23190 	rval = sd_send_scsi_READ(un, buffer, EFI_MIN_ARRAY_SIZE,
23191 			gpe_lba, SD_PATH_DIRECT);
23192 	if (rval) {
23193 		goto done_error;
23194 	}
23195 	partitions = (efi_gpe_t *)buffer;
23196 
23197 	sd_swap_efi_gpe(nparts, partitions);
23198 
23199 	partitions += p64.p_partno;
23200 	bcopy(&partitions->efi_gpe_PartitionTypeGUID, &p64.p_type,
23201 	    sizeof (struct uuid));
23202 	p64.p_start = partitions->efi_gpe_StartingLBA;
23203 	p64.p_size = partitions->efi_gpe_EndingLBA -
23204 			p64.p_start + 1;
23205 
23206 	if (ddi_copyout(&p64, (void *)arg, sizeof (struct partition64), flag))
23207 		rval = EFAULT;
23208 
23209 done_error:
23210 	kmem_free(buffer, EFI_MIN_ARRAY_SIZE);
23211 	return (rval);
23212 }
23213 
23214 
23215 /*
23216  *    Function: sd_dkio_set_vtoc
23217  *
23218  * Description: This routine is the driver entry point for handling user
23219  *		requests to set the current volume table of contents
23220  *		(DKIOCSVTOC).
23221  *
23222  *   Arguments: dev  - the device number
23223  *		arg  - pointer to user provided vtoc structure used to set the
23224  *			current vtoc.
23225  *		flag - this argument is a pass through to ddi_copyxxx()
23226  *		       directly from the mode argument of ioctl().
23227  *
23228  * Return Code: 0
23229  *		EFAULT
23230  *		ENXIO
23231  *		EINVAL
23232  *		ENOTSUP
23233  */
23234 
23235 static int
23236 sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag)
23237 {
23238 	struct sd_lun	*un = NULL;
23239 	struct vtoc	user_vtoc;
23240 	int		rval = 0;
23241 
23242 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23243 		return (ENXIO);
23244 	}
23245 
23246 #if defined(__i386) || defined(__amd64)
23247 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
23248 		return (EINVAL);
23249 	}
23250 #endif
23251 
23252 #ifdef _MULTI_DATAMODEL
23253 	switch (ddi_model_convert_from(flag & FMODELS)) {
23254 	case DDI_MODEL_ILP32: {
23255 		struct vtoc32 user_vtoc32;
23256 
23257 		if (ddi_copyin((const void *)arg, &user_vtoc32,
23258 		    sizeof (struct vtoc32), flag)) {
23259 			return (EFAULT);
23260 		}
23261 		vtoc32tovtoc(user_vtoc32, user_vtoc);
23262 		break;
23263 	}
23264 
23265 	case DDI_MODEL_NONE:
23266 		if (ddi_copyin((const void *)arg, &user_vtoc,
23267 		    sizeof (struct vtoc), flag)) {
23268 			return (EFAULT);
23269 		}
23270 		break;
23271 	}
23272 #else /* ! _MULTI_DATAMODEL */
23273 	if (ddi_copyin((const void *)arg, &user_vtoc,
23274 	    sizeof (struct vtoc), flag)) {
23275 		return (EFAULT);
23276 	}
23277 #endif /* _MULTI_DATAMODEL */
23278 
23279 	mutex_enter(SD_MUTEX(un));
23280 	if (un->un_blockcount > DK_MAX_BLOCKS) {
23281 		mutex_exit(SD_MUTEX(un));
23282 		return (ENOTSUP);
23283 	}
23284 	if (un->un_g.dkg_ncyl == 0) {
23285 		mutex_exit(SD_MUTEX(un));
23286 		return (EINVAL);
23287 	}
23288 
23289 	mutex_exit(SD_MUTEX(un));
23290 	sd_clear_efi(un);
23291 	ddi_remove_minor_node(SD_DEVINFO(un), "wd");
23292 	ddi_remove_minor_node(SD_DEVINFO(un), "wd,raw");
23293 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h",
23294 	    S_IFBLK, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23295 	    un->un_node_type, NULL);
23296 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h,raw",
23297 	    S_IFCHR, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23298 	    un->un_node_type, NULL);
23299 	mutex_enter(SD_MUTEX(un));
23300 
23301 	if ((rval = sd_build_label_vtoc(un, &user_vtoc)) == 0) {
23302 		if ((rval = sd_write_label(dev)) == 0) {
23303 			if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT))
23304 			    != 0) {
23305 				SD_ERROR(SD_LOG_IOCTL_DKIO, un,
23306 				    "sd_dkio_set_vtoc: "
23307 				    "Failed validate geometry\n");
23308 			}
23309 		}
23310 	}
23311 
23312 	/*
23313 	 * If sd_build_label_vtoc, or sd_write_label failed above write the
23314 	 * devid anyway, what can it hurt? Also preserve the device id by
23315 	 * writing to the disk acyl for the case where a devid has been
23316 	 * fabricated.
23317 	 */
23318 	if (un->un_f_devid_supported &&
23319 	    (un->un_f_opt_fab_devid == TRUE)) {
23320 		if (un->un_devid == NULL) {
23321 			sd_register_devid(un, SD_DEVINFO(un),
23322 			    SD_TARGET_IS_UNRESERVED);
23323 		} else {
23324 			/*
23325 			 * The device id for this disk has been
23326 			 * fabricated. Fabricated device id's are
23327 			 * managed by storing them in the last 2
23328 			 * available sectors on the drive. The device
23329 			 * id must be preserved by writing it back out
23330 			 * to this location.
23331 			 */
23332 			if (sd_write_deviceid(un) != 0) {
23333 				ddi_devid_free(un->un_devid);
23334 				un->un_devid = NULL;
23335 			}
23336 		}
23337 	}
23338 	mutex_exit(SD_MUTEX(un));
23339 	return (rval);
23340 }
23341 
23342 
23343 /*
23344  *    Function: sd_build_label_vtoc
23345  *
23346  * Description: This routine updates the driver soft state current volume table
23347  *		of contents based on a user specified vtoc.
23348  *
23349  *   Arguments: un - driver soft state (unit) structure
23350  *		user_vtoc - pointer to vtoc structure specifying vtoc to be used
23351  *			    to update the driver soft state.
23352  *
23353  * Return Code: 0
23354  *		EINVAL
23355  */
23356 
23357 static int
23358 sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
23359 {
23360 	struct dk_map		*lmap;
23361 	struct partition	*vpart;
23362 	int			nblks;
23363 #if defined(_SUNOS_VTOC_8)
23364 	int			ncyl;
23365 	struct dk_map2		*lpart;
23366 #endif	/* defined(_SUNOS_VTOC_8) */
23367 	int			i;
23368 
23369 	ASSERT(mutex_owned(SD_MUTEX(un)));
23370 
23371 	/* Sanity-check the vtoc */
23372 	if (user_vtoc->v_sanity != VTOC_SANE ||
23373 	    user_vtoc->v_sectorsz != un->un_sys_blocksize ||
23374 	    user_vtoc->v_nparts != V_NUMPAR) {
23375 		return (EINVAL);
23376 	}
23377 
23378 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
23379 	if (nblks == 0) {
23380 		return (EINVAL);
23381 	}
23382 
23383 #if defined(_SUNOS_VTOC_8)
23384 	vpart = user_vtoc->v_part;
23385 	for (i = 0; i < V_NUMPAR; i++) {
23386 		if ((vpart->p_start % nblks) != 0) {
23387 			return (EINVAL);
23388 		}
23389 		ncyl = vpart->p_start / nblks;
23390 		ncyl += vpart->p_size / nblks;
23391 		if ((vpart->p_size % nblks) != 0) {
23392 			ncyl++;
23393 		}
23394 		if (ncyl > (int)un->un_g.dkg_ncyl) {
23395 			return (EINVAL);
23396 		}
23397 		vpart++;
23398 	}
23399 #endif	/* defined(_SUNOS_VTOC_8) */
23400 
23401 	/* Put appropriate vtoc structure fields into the disk label */
23402 #if defined(_SUNOS_VTOC_16)
23403 	/*
23404 	 * The vtoc is always a 32bit data structure to maintain the
23405 	 * on-disk format. Convert "in place" instead of bcopying it.
23406 	 */
23407 	vtoctovtoc32((*user_vtoc), (*((struct vtoc32 *)&(un->un_vtoc))));
23408 
23409 	/*
23410 	 * in the 16-slice vtoc, starting sectors are expressed in
23411 	 * numbers *relative* to the start of the Solaris fdisk partition.
23412 	 */
23413 	lmap = un->un_map;
23414 	vpart = user_vtoc->v_part;
23415 
23416 	for (i = 0; i < (int)user_vtoc->v_nparts; i++, lmap++, vpart++) {
23417 		lmap->dkl_cylno = vpart->p_start / nblks;
23418 		lmap->dkl_nblk = vpart->p_size;
23419 	}
23420 
23421 #elif defined(_SUNOS_VTOC_8)
23422 
23423 	un->un_vtoc.v_bootinfo[0] = (uint32_t)user_vtoc->v_bootinfo[0];
23424 	un->un_vtoc.v_bootinfo[1] = (uint32_t)user_vtoc->v_bootinfo[1];
23425 	un->un_vtoc.v_bootinfo[2] = (uint32_t)user_vtoc->v_bootinfo[2];
23426 
23427 	un->un_vtoc.v_sanity = (uint32_t)user_vtoc->v_sanity;
23428 	un->un_vtoc.v_version = (uint32_t)user_vtoc->v_version;
23429 
23430 	bcopy(user_vtoc->v_volume, un->un_vtoc.v_volume, LEN_DKL_VVOL);
23431 
23432 	un->un_vtoc.v_nparts = user_vtoc->v_nparts;
23433 
23434 	bcopy(user_vtoc->v_reserved, un->un_vtoc.v_reserved,
23435 	    sizeof (un->un_vtoc.v_reserved));
23436 
23437 	/*
23438 	 * Note the conversion from starting sector number
23439 	 * to starting cylinder number.
23440 	 * Return error if division results in a remainder.
23441 	 */
23442 	lmap = un->un_map;
23443 	lpart = un->un_vtoc.v_part;
23444 	vpart = user_vtoc->v_part;
23445 
23446 	for (i = 0; i < (int)user_vtoc->v_nparts; i++) {
23447 		lpart->p_tag  = vpart->p_tag;
23448 		lpart->p_flag = vpart->p_flag;
23449 		lmap->dkl_cylno = vpart->p_start / nblks;
23450 		lmap->dkl_nblk = vpart->p_size;
23451 
23452 		lmap++;
23453 		lpart++;
23454 		vpart++;
23455 
23456 		/* (4387723) */
23457 #ifdef _LP64
23458 		if (user_vtoc->timestamp[i] > TIME32_MAX) {
23459 			un->un_vtoc.v_timestamp[i] = TIME32_MAX;
23460 		} else {
23461 			un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23462 		}
23463 #else
23464 		un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23465 #endif
23466 	}
23467 
23468 	bcopy(user_vtoc->v_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
23469 #else
23470 #error "No VTOC format defined."
23471 #endif
23472 	return (0);
23473 }
23474 
23475 /*
23476  *    Function: sd_clear_efi
23477  *
23478  * Description: This routine clears all EFI labels.
23479  *
23480  *   Arguments: un - driver soft state (unit) structure
23481  *
23482  * Return Code: void
23483  */
23484 
23485 static void
23486 sd_clear_efi(struct sd_lun *un)
23487 {
23488 	efi_gpt_t	*gpt;
23489 	uint_t		lbasize;
23490 	uint64_t	cap;
23491 	int rval;
23492 
23493 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23494 
23495 	mutex_enter(SD_MUTEX(un));
23496 	un->un_reserved = -1;
23497 	mutex_exit(SD_MUTEX(un));
23498 	gpt = kmem_alloc(sizeof (efi_gpt_t), KM_SLEEP);
23499 
23500 	if (sd_send_scsi_READ(un, gpt, DEV_BSIZE, 1, SD_PATH_DIRECT) != 0) {
23501 		goto done;
23502 	}
23503 
23504 	sd_swap_efi_gpt(gpt);
23505 	rval = sd_validate_efi(gpt);
23506 	if (rval == 0) {
23507 		/* clear primary */
23508 		bzero(gpt, sizeof (efi_gpt_t));
23509 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE, 1,
23510 			SD_PATH_DIRECT))) {
23511 			SD_INFO(SD_LOG_IO_PARTITION, un,
23512 				"sd_clear_efi: clear primary label failed\n");
23513 		}
23514 	}
23515 	/* the backup */
23516 	rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
23517 	    SD_PATH_DIRECT);
23518 	if (rval) {
23519 		goto done;
23520 	}
23521 	/*
23522 	 * The MMC standard allows READ CAPACITY to be
23523 	 * inaccurate by a bounded amount (in the interest of
23524 	 * response latency).  As a result, failed READs are
23525 	 * commonplace (due to the reading of metadata and not
23526 	 * data). Depending on the per-Vendor/drive Sense data,
23527 	 * the failed READ can cause many (unnecessary) retries.
23528 	 */
23529 	if ((rval = sd_send_scsi_READ(un, gpt, lbasize,
23530 	    cap - 1, ISCD(un) ? SD_PATH_DIRECT_PRIORITY :
23531 		SD_PATH_DIRECT)) != 0) {
23532 		goto done;
23533 	}
23534 	sd_swap_efi_gpt(gpt);
23535 	rval = sd_validate_efi(gpt);
23536 	if (rval == 0) {
23537 		/* clear backup */
23538 		SD_TRACE(SD_LOG_IOCTL, un, "sd_clear_efi clear backup@%lu\n",
23539 			cap-1);
23540 		bzero(gpt, sizeof (efi_gpt_t));
23541 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE,
23542 		    cap-1, SD_PATH_DIRECT))) {
23543 			SD_INFO(SD_LOG_IO_PARTITION, un,
23544 				"sd_clear_efi: clear backup label failed\n");
23545 		}
23546 	} else {
23547 		/*
23548 		 * Refer to comments related to off-by-1 at the
23549 		 * header of this file
23550 		 */
23551 		if ((rval = sd_send_scsi_READ(un, gpt, lbasize,
23552 		    cap - 2, ISCD(un) ? SD_PATH_DIRECT_PRIORITY :
23553 			SD_PATH_DIRECT)) != 0) {
23554 			goto done;
23555 		}
23556 		sd_swap_efi_gpt(gpt);
23557 		rval = sd_validate_efi(gpt);
23558 		if (rval == 0) {
23559 			/* clear legacy backup EFI label */
23560 			SD_TRACE(SD_LOG_IOCTL, un,
23561 			    "sd_clear_efi clear backup@%lu\n", cap-2);
23562 			bzero(gpt, sizeof (efi_gpt_t));
23563 			if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE,
23564 			    cap-2, SD_PATH_DIRECT))) {
23565 				SD_INFO(SD_LOG_IO_PARTITION,
23566 				    un, "sd_clear_efi: "
23567 				    " clear legacy backup label failed\n");
23568 			}
23569 		}
23570 	}
23571 
23572 done:
23573 	kmem_free(gpt, sizeof (efi_gpt_t));
23574 }
23575 
23576 /*
23577  *    Function: sd_set_vtoc
23578  *
23579  * Description: This routine writes data to the appropriate positions
23580  *
23581  *   Arguments: un - driver soft state (unit) structure
23582  *              dkl  - the data to be written
23583  *
23584  * Return: void
23585  */
23586 
23587 static int
23588 sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl)
23589 {
23590 	void			*shadow_buf;
23591 	uint_t			label_addr;
23592 	int			sec;
23593 	int			blk;
23594 	int			head;
23595 	int			cyl;
23596 	int			rval;
23597 
23598 #if defined(__i386) || defined(__amd64)
23599 	label_addr = un->un_solaris_offset + DK_LABEL_LOC;
23600 #else
23601 	/* Write the primary label at block 0 of the solaris partition. */
23602 	label_addr = 0;
23603 #endif
23604 
23605 	if (NOT_DEVBSIZE(un)) {
23606 		shadow_buf = kmem_zalloc(un->un_tgt_blocksize, KM_SLEEP);
23607 		/*
23608 		 * Read the target's first block.
23609 		 */
23610 		if ((rval = sd_send_scsi_READ(un, shadow_buf,
23611 		    un->un_tgt_blocksize, label_addr,
23612 		    SD_PATH_STANDARD)) != 0) {
23613 			goto exit;
23614 		}
23615 		/*
23616 		 * Copy the contents of the label into the shadow buffer
23617 		 * which is of the size of target block size.
23618 		 */
23619 		bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23620 	}
23621 
23622 	/* Write the primary label */
23623 	if (NOT_DEVBSIZE(un)) {
23624 		rval = sd_send_scsi_WRITE(un, shadow_buf, un->un_tgt_blocksize,
23625 		    label_addr, SD_PATH_STANDARD);
23626 	} else {
23627 		rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23628 		    label_addr, SD_PATH_STANDARD);
23629 	}
23630 	if (rval != 0) {
23631 		return (rval);
23632 	}
23633 
23634 	/*
23635 	 * Calculate where the backup labels go.  They are always on
23636 	 * the last alternate cylinder, but some older drives put them
23637 	 * on head 2 instead of the last head.	They are always on the
23638 	 * first 5 odd sectors of the appropriate track.
23639 	 *
23640 	 * We have no choice at this point, but to believe that the
23641 	 * disk label is valid.	 Use the geometry of the disk
23642 	 * as described in the label.
23643 	 */
23644 	cyl  = dkl->dkl_ncyl  + dkl->dkl_acyl - 1;
23645 	head = dkl->dkl_nhead - 1;
23646 
23647 	/*
23648 	 * Write and verify the backup labels. Make sure we don't try to
23649 	 * write past the last cylinder.
23650 	 */
23651 	for (sec = 1; ((sec < 5 * 2 + 1) && (sec < dkl->dkl_nsect)); sec += 2) {
23652 		blk = (daddr_t)(
23653 		    (cyl * ((dkl->dkl_nhead * dkl->dkl_nsect) - dkl->dkl_apc)) +
23654 		    (head * dkl->dkl_nsect) + sec);
23655 #if defined(__i386) || defined(__amd64)
23656 		blk += un->un_solaris_offset;
23657 #endif
23658 		if (NOT_DEVBSIZE(un)) {
23659 			uint64_t	tblk;
23660 			/*
23661 			 * Need to read the block first for read modify write.
23662 			 */
23663 			tblk = (uint64_t)blk;
23664 			blk = (int)((tblk * un->un_sys_blocksize) /
23665 			    un->un_tgt_blocksize);
23666 			if ((rval = sd_send_scsi_READ(un, shadow_buf,
23667 			    un->un_tgt_blocksize, blk,
23668 			    SD_PATH_STANDARD)) != 0) {
23669 				goto exit;
23670 			}
23671 			/*
23672 			 * Modify the shadow buffer with the label.
23673 			 */
23674 			bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23675 			rval = sd_send_scsi_WRITE(un, shadow_buf,
23676 			    un->un_tgt_blocksize, blk, SD_PATH_STANDARD);
23677 		} else {
23678 			rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23679 			    blk, SD_PATH_STANDARD);
23680 			SD_INFO(SD_LOG_IO_PARTITION, un,
23681 			"sd_set_vtoc: wrote backup label %d\n", blk);
23682 		}
23683 		if (rval != 0) {
23684 			goto exit;
23685 		}
23686 	}
23687 exit:
23688 	if (NOT_DEVBSIZE(un)) {
23689 		kmem_free(shadow_buf, un->un_tgt_blocksize);
23690 	}
23691 	return (rval);
23692 }
23693 
23694 /*
23695  *    Function: sd_clear_vtoc
23696  *
23697  * Description: This routine clears out the VTOC labels.
23698  *
23699  *   Arguments: un - driver soft state (unit) structure
23700  *
23701  * Return: void
23702  */
23703 
23704 static void
23705 sd_clear_vtoc(struct sd_lun *un)
23706 {
23707 	struct dk_label		*dkl;
23708 
23709 	mutex_exit(SD_MUTEX(un));
23710 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23711 	mutex_enter(SD_MUTEX(un));
23712 	/*
23713 	 * sd_set_vtoc uses these fields in order to figure out
23714 	 * where to overwrite the backup labels
23715 	 */
23716 	dkl->dkl_apc    = un->un_g.dkg_apc;
23717 	dkl->dkl_ncyl   = un->un_g.dkg_ncyl;
23718 	dkl->dkl_acyl   = un->un_g.dkg_acyl;
23719 	dkl->dkl_nhead  = un->un_g.dkg_nhead;
23720 	dkl->dkl_nsect  = un->un_g.dkg_nsect;
23721 	mutex_exit(SD_MUTEX(un));
23722 	(void) sd_set_vtoc(un, dkl);
23723 	kmem_free(dkl, sizeof (struct dk_label));
23724 
23725 	mutex_enter(SD_MUTEX(un));
23726 }
23727 
23728 /*
23729  *    Function: sd_write_label
23730  *
23731  * Description: This routine will validate and write the driver soft state vtoc
23732  *		contents to the device.
23733  *
23734  *   Arguments: dev - the device number
23735  *
23736  * Return Code: the code returned by sd_send_scsi_cmd()
23737  *		0
23738  *		EINVAL
23739  *		ENXIO
23740  *		ENOMEM
23741  */
23742 
23743 static int
23744 sd_write_label(dev_t dev)
23745 {
23746 	struct sd_lun		*un;
23747 	struct dk_label		*dkl;
23748 	short			sum;
23749 	short			*sp;
23750 	int			i;
23751 	int			rval;
23752 
23753 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23754 	    (un->un_state == SD_STATE_OFFLINE)) {
23755 		return (ENXIO);
23756 	}
23757 	ASSERT(mutex_owned(SD_MUTEX(un)));
23758 	mutex_exit(SD_MUTEX(un));
23759 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23760 	mutex_enter(SD_MUTEX(un));
23761 
23762 	bcopy(&un->un_vtoc, &dkl->dkl_vtoc, sizeof (struct dk_vtoc));
23763 	dkl->dkl_rpm	= un->un_g.dkg_rpm;
23764 	dkl->dkl_pcyl	= un->un_g.dkg_pcyl;
23765 	dkl->dkl_apc	= un->un_g.dkg_apc;
23766 	dkl->dkl_intrlv = un->un_g.dkg_intrlv;
23767 	dkl->dkl_ncyl	= un->un_g.dkg_ncyl;
23768 	dkl->dkl_acyl	= un->un_g.dkg_acyl;
23769 	dkl->dkl_nhead	= un->un_g.dkg_nhead;
23770 	dkl->dkl_nsect	= un->un_g.dkg_nsect;
23771 
23772 #if defined(_SUNOS_VTOC_8)
23773 	dkl->dkl_obs1	= un->un_g.dkg_obs1;
23774 	dkl->dkl_obs2	= un->un_g.dkg_obs2;
23775 	dkl->dkl_obs3	= un->un_g.dkg_obs3;
23776 	for (i = 0; i < NDKMAP; i++) {
23777 		dkl->dkl_map[i].dkl_cylno = un->un_map[i].dkl_cylno;
23778 		dkl->dkl_map[i].dkl_nblk  = un->un_map[i].dkl_nblk;
23779 	}
23780 	bcopy(un->un_asciilabel, dkl->dkl_asciilabel, LEN_DKL_ASCII);
23781 #elif defined(_SUNOS_VTOC_16)
23782 	dkl->dkl_skew	= un->un_dkg_skew;
23783 #else
23784 #error "No VTOC format defined."
23785 #endif
23786 
23787 	dkl->dkl_magic			= DKL_MAGIC;
23788 	dkl->dkl_write_reinstruct	= un->un_g.dkg_write_reinstruct;
23789 	dkl->dkl_read_reinstruct	= un->un_g.dkg_read_reinstruct;
23790 
23791 	/* Construct checksum for the new disk label */
23792 	sum = 0;
23793 	sp = (short *)dkl;
23794 	i = sizeof (struct dk_label) / sizeof (short);
23795 	while (i--) {
23796 		sum ^= *sp++;
23797 	}
23798 	dkl->dkl_cksum = sum;
23799 
23800 	mutex_exit(SD_MUTEX(un));
23801 
23802 	rval = sd_set_vtoc(un, dkl);
23803 exit:
23804 	kmem_free(dkl, sizeof (struct dk_label));
23805 	mutex_enter(SD_MUTEX(un));
23806 	return (rval);
23807 }
23808 
23809 static int
23810 sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag)
23811 {
23812 	struct sd_lun	*un = NULL;
23813 	dk_efi_t	user_efi;
23814 	int		rval = 0;
23815 	void		*buffer;
23816 	int		valid_efi;
23817 
23818 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
23819 		return (ENXIO);
23820 
23821 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
23822 		return (EFAULT);
23823 
23824 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
23825 
23826 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
23827 	    (user_efi.dki_length > un->un_max_xfer_size))
23828 		return (EINVAL);
23829 
23830 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
23831 	if (ddi_copyin(user_efi.dki_data, buffer, user_efi.dki_length, flag)) {
23832 		rval = EFAULT;
23833 	} else {
23834 		/*
23835 		 * let's clear the vtoc labels and clear the softstate
23836 		 * vtoc.
23837 		 */
23838 		mutex_enter(SD_MUTEX(un));
23839 		if (un->un_vtoc.v_sanity == VTOC_SANE) {
23840 			SD_TRACE(SD_LOG_IO_PARTITION, un,
23841 				"sd_dkio_set_efi: CLEAR VTOC\n");
23842 			sd_clear_vtoc(un);
23843 			bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
23844 			mutex_exit(SD_MUTEX(un));
23845 			ddi_remove_minor_node(SD_DEVINFO(un), "h");
23846 			ddi_remove_minor_node(SD_DEVINFO(un), "h,raw");
23847 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd",
23848 			    S_IFBLK,
23849 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23850 			    un->un_node_type, NULL);
23851 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd,raw",
23852 			    S_IFCHR,
23853 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23854 			    un->un_node_type, NULL);
23855 		} else
23856 			mutex_exit(SD_MUTEX(un));
23857 		rval = sd_send_scsi_WRITE(un, buffer, user_efi.dki_length,
23858 		    user_efi.dki_lba, SD_PATH_DIRECT);
23859 		if (rval == 0) {
23860 			mutex_enter(SD_MUTEX(un));
23861 
23862 			/*
23863 			 * Set the un_reserved for valid efi label.
23864 			 * Function clear_efi in fdisk and efi_write in
23865 			 * libefi both change efi label on disk in 3 steps
23866 			 * 1. Change primary gpt and gpe
23867 			 * 2. Change backup gpe
23868 			 * 3. Change backup gpt, which is one block
23869 			 * We only reread the efi label after the 3rd step,
23870 			 * or there will be warning "primary label corrupt".
23871 			 */
23872 			if (user_efi.dki_length == un->un_tgt_blocksize) {
23873 				un->un_f_geometry_is_valid = FALSE;
23874 				valid_efi = sd_use_efi(un, SD_PATH_DIRECT);
23875 				if ((valid_efi == 0) &&
23876 				    un->un_f_devid_supported &&
23877 				    (un->un_f_opt_fab_devid == TRUE)) {
23878 					if (un->un_devid == NULL) {
23879 						sd_register_devid(un,
23880 						    SD_DEVINFO(un),
23881 						    SD_TARGET_IS_UNRESERVED);
23882 					} else {
23883 						/*
23884 						 * The device id for this disk
23885 						 * has been fabricated. The
23886 						 * device id must be preserved
23887 						 * by writing it back out to
23888 						 * disk.
23889 						 */
23890 						if (sd_write_deviceid(un)
23891 						    != 0) {
23892 							ddi_devid_free(
23893 							    un->un_devid);
23894 							un->un_devid = NULL;
23895 						}
23896 					}
23897 				}
23898 			}
23899 
23900 			mutex_exit(SD_MUTEX(un));
23901 		}
23902 	}
23903 	kmem_free(buffer, user_efi.dki_length);
23904 	return (rval);
23905 }
23906 
23907 /*
23908  *    Function: sd_dkio_get_mboot
23909  *
23910  * Description: This routine is the driver entry point for handling user
23911  *		requests to get the current device mboot (DKIOCGMBOOT)
23912  *
23913  *   Arguments: dev  - the device number
23914  *		arg  - pointer to user provided mboot structure specifying
23915  *			the current mboot.
23916  *		flag - this argument is a pass through to ddi_copyxxx()
23917  *		       directly from the mode argument of ioctl().
23918  *
23919  * Return Code: 0
23920  *		EINVAL
23921  *		EFAULT
23922  *		ENXIO
23923  */
23924 
23925 static int
23926 sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag)
23927 {
23928 	struct sd_lun	*un;
23929 	struct mboot	*mboot;
23930 	int		rval;
23931 	size_t		buffer_size;
23932 
23933 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23934 	    (un->un_state == SD_STATE_OFFLINE)) {
23935 		return (ENXIO);
23936 	}
23937 
23938 	if (!un->un_f_mboot_supported || arg == NULL) {
23939 		return (EINVAL);
23940 	}
23941 
23942 	/*
23943 	 * Read the mboot block, located at absolute block 0 on the target.
23944 	 */
23945 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct mboot));
23946 
23947 	SD_TRACE(SD_LOG_IO_PARTITION, un,
23948 	    "sd_dkio_get_mboot: allocation size: 0x%x\n", buffer_size);
23949 
23950 	mboot = kmem_zalloc(buffer_size, KM_SLEEP);
23951 	if ((rval = sd_send_scsi_READ(un, mboot, buffer_size, 0,
23952 	    SD_PATH_STANDARD)) == 0) {
23953 		if (ddi_copyout(mboot, (void *)arg,
23954 		    sizeof (struct mboot), flag) != 0) {
23955 			rval = EFAULT;
23956 		}
23957 	}
23958 	kmem_free(mboot, buffer_size);
23959 	return (rval);
23960 }
23961 
23962 
23963 /*
23964  *    Function: sd_dkio_set_mboot
23965  *
23966  * Description: This routine is the driver entry point for handling user
23967  *		requests to validate and set the device master boot
23968  *		(DKIOCSMBOOT).
23969  *
23970  *   Arguments: dev  - the device number
23971  *		arg  - pointer to user provided mboot structure used to set the
23972  *			master boot.
23973  *		flag - this argument is a pass through to ddi_copyxxx()
23974  *		       directly from the mode argument of ioctl().
23975  *
23976  * Return Code: 0
23977  *		EINVAL
23978  *		EFAULT
23979  *		ENXIO
23980  */
23981 
23982 static int
23983 sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag)
23984 {
23985 	struct sd_lun	*un = NULL;
23986 	struct mboot	*mboot = NULL;
23987 	int		rval;
23988 	ushort_t	magic;
23989 
23990 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23991 		return (ENXIO);
23992 	}
23993 
23994 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23995 
23996 	if (!un->un_f_mboot_supported) {
23997 		return (EINVAL);
23998 	}
23999 
24000 	if (arg == NULL) {
24001 		return (EINVAL);
24002 	}
24003 
24004 	mboot = kmem_zalloc(sizeof (struct mboot), KM_SLEEP);
24005 
24006 	if (ddi_copyin((const void *)arg, mboot,
24007 	    sizeof (struct mboot), flag) != 0) {
24008 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
24009 		return (EFAULT);
24010 	}
24011 
24012 	/* Is this really a master boot record? */
24013 	magic = LE_16(mboot->signature);
24014 	if (magic != MBB_MAGIC) {
24015 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
24016 		return (EINVAL);
24017 	}
24018 
24019 	rval = sd_send_scsi_WRITE(un, mboot, un->un_sys_blocksize, 0,
24020 	    SD_PATH_STANDARD);
24021 
24022 	mutex_enter(SD_MUTEX(un));
24023 #if defined(__i386) || defined(__amd64)
24024 	if (rval == 0) {
24025 		/*
24026 		 * mboot has been written successfully.
24027 		 * update the fdisk and vtoc tables in memory
24028 		 */
24029 		rval = sd_update_fdisk_and_vtoc(un);
24030 		if ((un->un_f_geometry_is_valid == FALSE) || (rval != 0)) {
24031 			mutex_exit(SD_MUTEX(un));
24032 			kmem_free(mboot, (size_t)(sizeof (struct mboot)));
24033 			return (rval);
24034 		}
24035 	}
24036 
24037 #ifdef __lock_lint
24038 	sd_setup_default_geometry(un);
24039 #endif
24040 
24041 #else
24042 	if (rval == 0) {
24043 		/*
24044 		 * mboot has been written successfully.
24045 		 * set up the default geometry and VTOC
24046 		 */
24047 		if (un->un_blockcount <= DK_MAX_BLOCKS)
24048 			sd_setup_default_geometry(un);
24049 	}
24050 #endif
24051 	mutex_exit(SD_MUTEX(un));
24052 	kmem_free(mboot, (size_t)(sizeof (struct mboot)));
24053 	return (rval);
24054 }
24055 
24056 
24057 /*
24058  *    Function: sd_setup_default_geometry
24059  *
24060  * Description: This local utility routine sets the default geometry as part of
24061  *		setting the device mboot.
24062  *
24063  *   Arguments: un - driver soft state (unit) structure
24064  *
24065  * Note: This may be redundant with sd_build_default_label.
24066  */
24067 
24068 static void
24069 sd_setup_default_geometry(struct sd_lun *un)
24070 {
24071 	/* zero out the soft state geometry and partition table. */
24072 	bzero(&un->un_g, sizeof (struct dk_geom));
24073 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
24074 	bzero(un->un_map, NDKMAP * (sizeof (struct dk_map)));
24075 	un->un_asciilabel[0] = '\0';
24076 
24077 	/*
24078 	 * For the rpm, we use the minimum for the disk.
24079 	 * For the head, cyl and number of sector per track,
24080 	 * if the capacity <= 1GB, head = 64, sect = 32.
24081 	 * else head = 255, sect 63
24082 	 * Note: the capacity should be equal to C*H*S values.
24083 	 * This will cause some truncation of size due to
24084 	 * round off errors. For CD-ROMs, this truncation can
24085 	 * have adverse side effects, so returning ncyl and
24086 	 * nhead as 1. The nsect will overflow for most of
24087 	 * CD-ROMs as nsect is of type ushort.
24088 	 */
24089 	if (ISCD(un)) {
24090 		un->un_g.dkg_ncyl = 1;
24091 		un->un_g.dkg_nhead = 1;
24092 		un->un_g.dkg_nsect = un->un_blockcount;
24093 	} else {
24094 		if (un->un_blockcount <= 0x1000) {
24095 			/* Needed for unlabeled SCSI floppies. */
24096 			un->un_g.dkg_nhead = 2;
24097 			un->un_g.dkg_ncyl = 80;
24098 			un->un_g.dkg_pcyl = 80;
24099 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
24100 		} else if (un->un_blockcount <= 0x200000) {
24101 			un->un_g.dkg_nhead = 64;
24102 			un->un_g.dkg_nsect = 32;
24103 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
24104 		} else {
24105 			un->un_g.dkg_nhead = 255;
24106 			un->un_g.dkg_nsect = 63;
24107 			un->un_g.dkg_ncyl = un->un_blockcount / (255 * 63);
24108 		}
24109 		un->un_blockcount = un->un_g.dkg_ncyl *
24110 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
24111 	}
24112 	un->un_g.dkg_acyl = 0;
24113 	un->un_g.dkg_bcyl = 0;
24114 	un->un_g.dkg_intrlv = 1;
24115 	un->un_g.dkg_rpm = 200;
24116 	un->un_g.dkg_read_reinstruct = 0;
24117 	un->un_g.dkg_write_reinstruct = 0;
24118 	if (un->un_g.dkg_pcyl == 0) {
24119 		un->un_g.dkg_pcyl = un->un_g.dkg_ncyl + un->un_g.dkg_acyl;
24120 	}
24121 
24122 	un->un_map['a'-'a'].dkl_cylno = 0;
24123 	un->un_map['a'-'a'].dkl_nblk = un->un_blockcount;
24124 	un->un_map['c'-'a'].dkl_cylno = 0;
24125 	un->un_map['c'-'a'].dkl_nblk = un->un_blockcount;
24126 	un->un_f_geometry_is_valid = FALSE;
24127 }
24128 
24129 
24130 #if defined(__i386) || defined(__amd64)
24131 /*
24132  *    Function: sd_update_fdisk_and_vtoc
24133  *
24134  * Description: This local utility routine updates the device fdisk and vtoc
24135  *		as part of setting the device mboot.
24136  *
24137  *   Arguments: un - driver soft state (unit) structure
24138  *
24139  * Return Code: 0 for success or errno-type return code.
24140  *
24141  *    Note:x86: This looks like a duplicate of sd_validate_geometry(), but
24142  *		these did exist seperately in x86 sd.c!!!
24143  */
24144 
24145 static int
24146 sd_update_fdisk_and_vtoc(struct sd_lun *un)
24147 {
24148 	static char	labelstring[128];
24149 	static char	buf[256];
24150 	char		*label = 0;
24151 	int		count;
24152 	int		label_rc = 0;
24153 	int		gvalid = un->un_f_geometry_is_valid;
24154 	int		fdisk_rval;
24155 	int		lbasize;
24156 	int		capacity;
24157 
24158 	ASSERT(mutex_owned(SD_MUTEX(un)));
24159 
24160 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
24161 		return (EINVAL);
24162 	}
24163 
24164 	if (un->un_f_blockcount_is_valid == FALSE) {
24165 		return (EINVAL);
24166 	}
24167 
24168 #if defined(_SUNOS_VTOC_16)
24169 	/*
24170 	 * Set up the "whole disk" fdisk partition; this should always
24171 	 * exist, regardless of whether the disk contains an fdisk table
24172 	 * or vtoc.
24173 	 */
24174 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
24175 	un->un_map[P0_RAW_DISK].dkl_nblk = un->un_blockcount;
24176 #endif	/* defined(_SUNOS_VTOC_16) */
24177 
24178 	/*
24179 	 * copy the lbasize and capacity so that if they're
24180 	 * reset while we're not holding the SD_MUTEX(un), we will
24181 	 * continue to use valid values after the SD_MUTEX(un) is
24182 	 * reacquired.
24183 	 */
24184 	lbasize  = un->un_tgt_blocksize;
24185 	capacity = un->un_blockcount;
24186 
24187 	/*
24188 	 * refresh the logical and physical geometry caches.
24189 	 * (data from mode sense format/rigid disk geometry pages,
24190 	 * and scsi_ifgetcap("geometry").
24191 	 */
24192 	sd_resync_geom_caches(un, capacity, lbasize, SD_PATH_DIRECT);
24193 
24194 	/*
24195 	 * Only DIRECT ACCESS devices will have Sun labels.
24196 	 * CD's supposedly have a Sun label, too
24197 	 */
24198 	if (un->un_f_vtoc_label_supported) {
24199 		fdisk_rval = sd_read_fdisk(un, capacity, lbasize,
24200 		    SD_PATH_DIRECT);
24201 		if (fdisk_rval == SD_CMD_FAILURE) {
24202 			ASSERT(mutex_owned(SD_MUTEX(un)));
24203 			return (EIO);
24204 		}
24205 
24206 		if (fdisk_rval == SD_CMD_RESERVATION_CONFLICT) {
24207 			ASSERT(mutex_owned(SD_MUTEX(un)));
24208 			return (EACCES);
24209 		}
24210 
24211 		if (un->un_solaris_size <= DK_LABEL_LOC) {
24212 			/*
24213 			 * Found fdisk table but no Solaris partition entry,
24214 			 * so don't call sd_uselabel() and don't create
24215 			 * a default label.
24216 			 */
24217 			label_rc = 0;
24218 			un->un_f_geometry_is_valid = TRUE;
24219 			goto no_solaris_partition;
24220 		}
24221 
24222 #if defined(_SUNOS_VTOC_8)
24223 		label = (char *)un->un_asciilabel;
24224 #elif defined(_SUNOS_VTOC_16)
24225 		label = (char *)un->un_vtoc.v_asciilabel;
24226 #else
24227 #error "No VTOC format defined."
24228 #endif
24229 	} else if (capacity < 0) {
24230 		ASSERT(mutex_owned(SD_MUTEX(un)));
24231 		return (EINVAL);
24232 	}
24233 
24234 	/*
24235 	 * For Removable media We reach here if we have found a
24236 	 * SOLARIS PARTITION.
24237 	 * If un_f_geometry_is_valid is FALSE it indicates that the SOLARIS
24238 	 * PARTITION has changed from the previous one, hence we will setup a
24239 	 * default VTOC in this case.
24240 	 */
24241 	if (un->un_f_geometry_is_valid == FALSE) {
24242 		sd_build_default_label(un);
24243 		label_rc = 0;
24244 	}
24245 
24246 no_solaris_partition:
24247 	if ((!un->un_f_has_removable_media ||
24248 	    (un->un_f_has_removable_media &&
24249 	    un->un_mediastate == DKIO_EJECTED)) &&
24250 		(un->un_state == SD_STATE_NORMAL && !gvalid)) {
24251 		/*
24252 		 * Print out a message indicating who and what we are.
24253 		 * We do this only when we happen to really validate the
24254 		 * geometry. We may call sd_validate_geometry() at other
24255 		 * times, ioctl()'s like Get VTOC in which case we
24256 		 * don't want to print the label.
24257 		 * If the geometry is valid, print the label string,
24258 		 * else print vendor and product info, if available
24259 		 */
24260 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
24261 			SD_INFO(SD_LOG_IOCTL_DKIO, un, "?<%s>\n", label);
24262 		} else {
24263 			mutex_enter(&sd_label_mutex);
24264 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
24265 			    labelstring);
24266 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
24267 			    &labelstring[64]);
24268 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
24269 			    labelstring, &labelstring[64]);
24270 			if (un->un_f_blockcount_is_valid == TRUE) {
24271 				(void) sprintf(&buf[strlen(buf)],
24272 				    ", %" PRIu64 " %u byte blocks\n",
24273 				    un->un_blockcount,
24274 				    un->un_tgt_blocksize);
24275 			} else {
24276 				(void) sprintf(&buf[strlen(buf)],
24277 				    ", (unknown capacity)\n");
24278 			}
24279 			SD_INFO(SD_LOG_IOCTL_DKIO, un, buf);
24280 			mutex_exit(&sd_label_mutex);
24281 		}
24282 	}
24283 
24284 #if defined(_SUNOS_VTOC_16)
24285 	/*
24286 	 * If we have valid geometry, set up the remaining fdisk partitions.
24287 	 * Note that dkl_cylno is not used for the fdisk map entries, so
24288 	 * we set it to an entirely bogus value.
24289 	 */
24290 	for (count = 0; count < FD_NUMPART; count++) {
24291 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
24292 		un->un_map[FDISK_P1 + count].dkl_nblk =
24293 		    un->un_fmap[count].fmap_nblk;
24294 		un->un_offset[FDISK_P1 + count] =
24295 		    un->un_fmap[count].fmap_start;
24296 	}
24297 #endif
24298 
24299 	for (count = 0; count < NDKMAP; count++) {
24300 #if defined(_SUNOS_VTOC_8)
24301 		struct dk_map *lp  = &un->un_map[count];
24302 		un->un_offset[count] =
24303 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
24304 #elif defined(_SUNOS_VTOC_16)
24305 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
24306 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
24307 #else
24308 #error "No VTOC format defined."
24309 #endif
24310 	}
24311 
24312 	ASSERT(mutex_owned(SD_MUTEX(un)));
24313 	return (label_rc);
24314 }
24315 #endif
24316 
24317 
24318 /*
24319  *    Function: sd_check_media
24320  *
24321  * Description: This utility routine implements the functionality for the
24322  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
24323  *		driver state changes from that specified by the user
24324  *		(inserted or ejected). For example, if the user specifies
24325  *		DKIO_EJECTED and the current media state is inserted this
24326  *		routine will immediately return DKIO_INSERTED. However, if the
24327  *		current media state is not inserted the user thread will be
24328  *		blocked until the drive state changes. If DKIO_NONE is specified
24329  *		the user thread will block until a drive state change occurs.
24330  *
24331  *   Arguments: dev  - the device number
24332  *		state  - user pointer to a dkio_state, updated with the current
24333  *			drive state at return.
24334  *
24335  * Return Code: ENXIO
24336  *		EIO
24337  *		EAGAIN
24338  *		EINTR
24339  */
24340 
24341 static int
24342 sd_check_media(dev_t dev, enum dkio_state state)
24343 {
24344 	struct sd_lun		*un = NULL;
24345 	enum dkio_state		prev_state;
24346 	opaque_t		token = NULL;
24347 	int			rval = 0;
24348 
24349 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24350 		return (ENXIO);
24351 	}
24352 
24353 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
24354 
24355 	mutex_enter(SD_MUTEX(un));
24356 
24357 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
24358 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
24359 
24360 	prev_state = un->un_mediastate;
24361 
24362 	/* is there anything to do? */
24363 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
24364 		/*
24365 		 * submit the request to the scsi_watch service;
24366 		 * scsi_media_watch_cb() does the real work
24367 		 */
24368 		mutex_exit(SD_MUTEX(un));
24369 
24370 		/*
24371 		 * This change handles the case where a scsi watch request is
24372 		 * added to a device that is powered down. To accomplish this
24373 		 * we power up the device before adding the scsi watch request,
24374 		 * since the scsi watch sends a TUR directly to the device
24375 		 * which the device cannot handle if it is powered down.
24376 		 */
24377 		if (sd_pm_entry(un) != DDI_SUCCESS) {
24378 			mutex_enter(SD_MUTEX(un));
24379 			goto done;
24380 		}
24381 
24382 		token = scsi_watch_request_submit(SD_SCSI_DEVP(un),
24383 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
24384 		    (caddr_t)dev);
24385 
24386 		sd_pm_exit(un);
24387 
24388 		mutex_enter(SD_MUTEX(un));
24389 		if (token == NULL) {
24390 			rval = EAGAIN;
24391 			goto done;
24392 		}
24393 
24394 		/*
24395 		 * This is a special case IOCTL that doesn't return
24396 		 * until the media state changes. Routine sdpower
24397 		 * knows about and handles this so don't count it
24398 		 * as an active cmd in the driver, which would
24399 		 * keep the device busy to the pm framework.
24400 		 * If the count isn't decremented the device can't
24401 		 * be powered down.
24402 		 */
24403 		un->un_ncmds_in_driver--;
24404 		ASSERT(un->un_ncmds_in_driver >= 0);
24405 
24406 		/*
24407 		 * if a prior request had been made, this will be the same
24408 		 * token, as scsi_watch was designed that way.
24409 		 */
24410 		un->un_swr_token = token;
24411 		un->un_specified_mediastate = state;
24412 
24413 		/*
24414 		 * now wait for media change
24415 		 * we will not be signalled unless mediastate == state but it is
24416 		 * still better to test for this condition, since there is a
24417 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
24418 		 */
24419 		SD_TRACE(SD_LOG_COMMON, un,
24420 		    "sd_check_media: waiting for media state change\n");
24421 		while (un->un_mediastate == state) {
24422 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
24423 				SD_TRACE(SD_LOG_COMMON, un,
24424 				    "sd_check_media: waiting for media state "
24425 				    "was interrupted\n");
24426 				un->un_ncmds_in_driver++;
24427 				rval = EINTR;
24428 				goto done;
24429 			}
24430 			SD_TRACE(SD_LOG_COMMON, un,
24431 			    "sd_check_media: received signal, state=%x\n",
24432 			    un->un_mediastate);
24433 		}
24434 		/*
24435 		 * Inc the counter to indicate the device once again
24436 		 * has an active outstanding cmd.
24437 		 */
24438 		un->un_ncmds_in_driver++;
24439 	}
24440 
24441 	/* invalidate geometry */
24442 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
24443 		sr_ejected(un);
24444 	}
24445 
24446 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
24447 		uint64_t	capacity;
24448 		uint_t		lbasize;
24449 
24450 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
24451 		mutex_exit(SD_MUTEX(un));
24452 		/*
24453 		 * Since the following routines use SD_PATH_DIRECT, we must
24454 		 * call PM directly before the upcoming disk accesses. This
24455 		 * may cause the disk to be power/spin up.
24456 		 */
24457 
24458 		if (sd_pm_entry(un) == DDI_SUCCESS) {
24459 			rval = sd_send_scsi_READ_CAPACITY(un,
24460 			    &capacity,
24461 			    &lbasize, SD_PATH_DIRECT);
24462 			if (rval != 0) {
24463 				sd_pm_exit(un);
24464 				mutex_enter(SD_MUTEX(un));
24465 				goto done;
24466 			}
24467 		} else {
24468 			rval = EIO;
24469 			mutex_enter(SD_MUTEX(un));
24470 			goto done;
24471 		}
24472 		mutex_enter(SD_MUTEX(un));
24473 
24474 		sd_update_block_info(un, lbasize, capacity);
24475 
24476 		un->un_f_geometry_is_valid	= FALSE;
24477 		(void) sd_validate_geometry(un, SD_PATH_DIRECT);
24478 
24479 		mutex_exit(SD_MUTEX(un));
24480 		rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
24481 		    SD_PATH_DIRECT);
24482 		sd_pm_exit(un);
24483 
24484 		mutex_enter(SD_MUTEX(un));
24485 	}
24486 done:
24487 	un->un_f_watcht_stopped = FALSE;
24488 	if (un->un_swr_token) {
24489 		/*
24490 		 * Use of this local token and the mutex ensures that we avoid
24491 		 * some race conditions associated with terminating the
24492 		 * scsi watch.
24493 		 */
24494 		token = un->un_swr_token;
24495 		un->un_swr_token = (opaque_t)NULL;
24496 		mutex_exit(SD_MUTEX(un));
24497 		(void) scsi_watch_request_terminate(token,
24498 		    SCSI_WATCH_TERMINATE_WAIT);
24499 		mutex_enter(SD_MUTEX(un));
24500 	}
24501 
24502 	/*
24503 	 * Update the capacity kstat value, if no media previously
24504 	 * (capacity kstat is 0) and a media has been inserted
24505 	 * (un_f_blockcount_is_valid == TRUE)
24506 	 */
24507 	if (un->un_errstats) {
24508 		struct sd_errstats	*stp = NULL;
24509 
24510 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
24511 		if ((stp->sd_capacity.value.ui64 == 0) &&
24512 		    (un->un_f_blockcount_is_valid == TRUE)) {
24513 			stp->sd_capacity.value.ui64 =
24514 			    (uint64_t)((uint64_t)un->un_blockcount *
24515 			    un->un_sys_blocksize);
24516 		}
24517 	}
24518 	mutex_exit(SD_MUTEX(un));
24519 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
24520 	return (rval);
24521 }
24522 
24523 
24524 /*
24525  *    Function: sd_delayed_cv_broadcast
24526  *
24527  * Description: Delayed cv_broadcast to allow for target to recover from media
24528  *		insertion.
24529  *
24530  *   Arguments: arg - driver soft state (unit) structure
24531  */
24532 
24533 static void
24534 sd_delayed_cv_broadcast(void *arg)
24535 {
24536 	struct sd_lun *un = arg;
24537 
24538 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
24539 
24540 	mutex_enter(SD_MUTEX(un));
24541 	un->un_dcvb_timeid = NULL;
24542 	cv_broadcast(&un->un_state_cv);
24543 	mutex_exit(SD_MUTEX(un));
24544 }
24545 
24546 
24547 /*
24548  *    Function: sd_media_watch_cb
24549  *
24550  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
24551  *		routine processes the TUR sense data and updates the driver
24552  *		state if a transition has occurred. The user thread
24553  *		(sd_check_media) is then signalled.
24554  *
24555  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24556  *			among multiple watches that share this callback function
24557  *		resultp - scsi watch facility result packet containing scsi
24558  *			  packet, status byte and sense data
24559  *
24560  * Return Code: 0 for success, -1 for failure
24561  */
24562 
24563 static int
24564 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
24565 {
24566 	struct sd_lun			*un;
24567 	struct scsi_status		*statusp = resultp->statusp;
24568 	uint8_t				*sensep = (uint8_t *)resultp->sensep;
24569 	enum dkio_state			state = DKIO_NONE;
24570 	dev_t				dev = (dev_t)arg;
24571 	uchar_t				actual_sense_length;
24572 	uint8_t				skey, asc, ascq;
24573 
24574 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24575 		return (-1);
24576 	}
24577 	actual_sense_length = resultp->actual_sense_length;
24578 
24579 	mutex_enter(SD_MUTEX(un));
24580 	SD_TRACE(SD_LOG_COMMON, un,
24581 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
24582 	    *((char *)statusp), (void *)sensep, actual_sense_length);
24583 
24584 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
24585 		un->un_mediastate = DKIO_DEV_GONE;
24586 		cv_broadcast(&un->un_state_cv);
24587 		mutex_exit(SD_MUTEX(un));
24588 
24589 		return (0);
24590 	}
24591 
24592 	/*
24593 	 * If there was a check condition then sensep points to valid sense data
24594 	 * If status was not a check condition but a reservation or busy status
24595 	 * then the new state is DKIO_NONE
24596 	 */
24597 	if (sensep != NULL) {
24598 		skey = scsi_sense_key(sensep);
24599 		asc = scsi_sense_asc(sensep);
24600 		ascq = scsi_sense_ascq(sensep);
24601 
24602 		SD_INFO(SD_LOG_COMMON, un,
24603 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
24604 		    skey, asc, ascq);
24605 		/* This routine only uses up to 13 bytes of sense data. */
24606 		if (actual_sense_length >= 13) {
24607 			if (skey == KEY_UNIT_ATTENTION) {
24608 				if (asc == 0x28) {
24609 					state = DKIO_INSERTED;
24610 				}
24611 			} else {
24612 				/*
24613 				 * if 02/04/02  means that the host
24614 				 * should send start command. Explicitly
24615 				 * leave the media state as is
24616 				 * (inserted) as the media is inserted
24617 				 * and host has stopped device for PM
24618 				 * reasons. Upon next true read/write
24619 				 * to this media will bring the
24620 				 * device to the right state good for
24621 				 * media access.
24622 				 */
24623 				if ((skey == KEY_NOT_READY) &&
24624 				    (asc == 0x3a)) {
24625 					state = DKIO_EJECTED;
24626 				}
24627 
24628 				/*
24629 				 * If the drivge is busy with an operation
24630 				 * or long write, keep the media in an
24631 				 * inserted state.
24632 				 */
24633 
24634 				if ((skey == KEY_NOT_READY) &&
24635 				    (asc == 0x04) &&
24636 				    ((ascq == 0x02) ||
24637 				    (ascq == 0x07) ||
24638 				    (ascq == 0x08))) {
24639 					state = DKIO_INSERTED;
24640 				}
24641 			}
24642 		}
24643 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
24644 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
24645 		state = DKIO_INSERTED;
24646 	}
24647 
24648 	SD_TRACE(SD_LOG_COMMON, un,
24649 	    "sd_media_watch_cb: state=%x, specified=%x\n",
24650 	    state, un->un_specified_mediastate);
24651 
24652 	/*
24653 	 * now signal the waiting thread if this is *not* the specified state;
24654 	 * delay the signal if the state is DKIO_INSERTED to allow the target
24655 	 * to recover
24656 	 */
24657 	if (state != un->un_specified_mediastate) {
24658 		un->un_mediastate = state;
24659 		if (state == DKIO_INSERTED) {
24660 			/*
24661 			 * delay the signal to give the drive a chance
24662 			 * to do what it apparently needs to do
24663 			 */
24664 			SD_TRACE(SD_LOG_COMMON, un,
24665 			    "sd_media_watch_cb: delayed cv_broadcast\n");
24666 			if (un->un_dcvb_timeid == NULL) {
24667 				un->un_dcvb_timeid =
24668 				    timeout(sd_delayed_cv_broadcast, un,
24669 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
24670 			}
24671 		} else {
24672 			SD_TRACE(SD_LOG_COMMON, un,
24673 			    "sd_media_watch_cb: immediate cv_broadcast\n");
24674 			cv_broadcast(&un->un_state_cv);
24675 		}
24676 	}
24677 	mutex_exit(SD_MUTEX(un));
24678 	return (0);
24679 }
24680 
24681 
24682 /*
24683  *    Function: sd_dkio_get_temp
24684  *
24685  * Description: This routine is the driver entry point for handling ioctl
24686  *		requests to get the disk temperature.
24687  *
24688  *   Arguments: dev  - the device number
24689  *		arg  - pointer to user provided dk_temperature structure.
24690  *		flag - this argument is a pass through to ddi_copyxxx()
24691  *		       directly from the mode argument of ioctl().
24692  *
24693  * Return Code: 0
24694  *		EFAULT
24695  *		ENXIO
24696  *		EAGAIN
24697  */
24698 
24699 static int
24700 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
24701 {
24702 	struct sd_lun		*un = NULL;
24703 	struct dk_temperature	*dktemp = NULL;
24704 	uchar_t			*temperature_page;
24705 	int			rval = 0;
24706 	int			path_flag = SD_PATH_STANDARD;
24707 
24708 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24709 		return (ENXIO);
24710 	}
24711 
24712 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
24713 
24714 	/* copyin the disk temp argument to get the user flags */
24715 	if (ddi_copyin((void *)arg, dktemp,
24716 	    sizeof (struct dk_temperature), flag) != 0) {
24717 		rval = EFAULT;
24718 		goto done;
24719 	}
24720 
24721 	/* Initialize the temperature to invalid. */
24722 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24723 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24724 
24725 	/*
24726 	 * Note: Investigate removing the "bypass pm" semantic.
24727 	 * Can we just bypass PM always?
24728 	 */
24729 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
24730 		path_flag = SD_PATH_DIRECT;
24731 		ASSERT(!mutex_owned(&un->un_pm_mutex));
24732 		mutex_enter(&un->un_pm_mutex);
24733 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
24734 			/*
24735 			 * If DKT_BYPASS_PM is set, and the drive happens to be
24736 			 * in low power mode, we can not wake it up, Need to
24737 			 * return EAGAIN.
24738 			 */
24739 			mutex_exit(&un->un_pm_mutex);
24740 			rval = EAGAIN;
24741 			goto done;
24742 		} else {
24743 			/*
24744 			 * Indicate to PM the device is busy. This is required
24745 			 * to avoid a race - i.e. the ioctl is issuing a
24746 			 * command and the pm framework brings down the device
24747 			 * to low power mode (possible power cut-off on some
24748 			 * platforms).
24749 			 */
24750 			mutex_exit(&un->un_pm_mutex);
24751 			if (sd_pm_entry(un) != DDI_SUCCESS) {
24752 				rval = EAGAIN;
24753 				goto done;
24754 			}
24755 		}
24756 	}
24757 
24758 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
24759 
24760 	if ((rval = sd_send_scsi_LOG_SENSE(un, temperature_page,
24761 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag)) != 0) {
24762 		goto done2;
24763 	}
24764 
24765 	/*
24766 	 * For the current temperature verify that the parameter length is 0x02
24767 	 * and the parameter code is 0x00
24768 	 */
24769 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
24770 	    (temperature_page[5] == 0x00)) {
24771 		if (temperature_page[9] == 0xFF) {
24772 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24773 		} else {
24774 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
24775 		}
24776 	}
24777 
24778 	/*
24779 	 * For the reference temperature verify that the parameter
24780 	 * length is 0x02 and the parameter code is 0x01
24781 	 */
24782 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
24783 	    (temperature_page[11] == 0x01)) {
24784 		if (temperature_page[15] == 0xFF) {
24785 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24786 		} else {
24787 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
24788 		}
24789 	}
24790 
24791 	/* Do the copyout regardless of the temperature commands status. */
24792 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
24793 	    flag) != 0) {
24794 		rval = EFAULT;
24795 	}
24796 
24797 done2:
24798 	if (path_flag == SD_PATH_DIRECT) {
24799 		sd_pm_exit(un);
24800 	}
24801 
24802 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
24803 done:
24804 	if (dktemp != NULL) {
24805 		kmem_free(dktemp, sizeof (struct dk_temperature));
24806 	}
24807 
24808 	return (rval);
24809 }
24810 
24811 
24812 /*
24813  *    Function: sd_log_page_supported
24814  *
24815  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
24816  *		supported log pages.
24817  *
24818  *   Arguments: un -
24819  *		log_page -
24820  *
24821  * Return Code: -1 - on error (log sense is optional and may not be supported).
24822  *		0  - log page not found.
24823  *  		1  - log page found.
24824  */
24825 
24826 static int
24827 sd_log_page_supported(struct sd_lun *un, int log_page)
24828 {
24829 	uchar_t *log_page_data;
24830 	int	i;
24831 	int	match = 0;
24832 	int	log_size;
24833 
24834 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
24835 
24836 	if (sd_send_scsi_LOG_SENSE(un, log_page_data, 0xFF, 0, 0x01, 0,
24837 	    SD_PATH_DIRECT) != 0) {
24838 		SD_ERROR(SD_LOG_COMMON, un,
24839 		    "sd_log_page_supported: failed log page retrieval\n");
24840 		kmem_free(log_page_data, 0xFF);
24841 		return (-1);
24842 	}
24843 	log_size = log_page_data[3];
24844 
24845 	/*
24846 	 * The list of supported log pages start from the fourth byte. Check
24847 	 * until we run out of log pages or a match is found.
24848 	 */
24849 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
24850 		if (log_page_data[i] == log_page) {
24851 			match++;
24852 		}
24853 	}
24854 	kmem_free(log_page_data, 0xFF);
24855 	return (match);
24856 }
24857 
24858 
24859 /*
24860  *    Function: sd_mhdioc_failfast
24861  *
24862  * Description: This routine is the driver entry point for handling ioctl
24863  *		requests to enable/disable the multihost failfast option.
24864  *		(MHIOCENFAILFAST)
24865  *
24866  *   Arguments: dev	- the device number
24867  *		arg	- user specified probing interval.
24868  *		flag	- this argument is a pass through to ddi_copyxxx()
24869  *			  directly from the mode argument of ioctl().
24870  *
24871  * Return Code: 0
24872  *		EFAULT
24873  *		ENXIO
24874  */
24875 
24876 static int
24877 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
24878 {
24879 	struct sd_lun	*un = NULL;
24880 	int		mh_time;
24881 	int		rval = 0;
24882 
24883 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24884 		return (ENXIO);
24885 	}
24886 
24887 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
24888 		return (EFAULT);
24889 
24890 	if (mh_time) {
24891 		mutex_enter(SD_MUTEX(un));
24892 		un->un_resvd_status |= SD_FAILFAST;
24893 		mutex_exit(SD_MUTEX(un));
24894 		/*
24895 		 * If mh_time is INT_MAX, then this ioctl is being used for
24896 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
24897 		 */
24898 		if (mh_time != INT_MAX) {
24899 			rval = sd_check_mhd(dev, mh_time);
24900 		}
24901 	} else {
24902 		(void) sd_check_mhd(dev, 0);
24903 		mutex_enter(SD_MUTEX(un));
24904 		un->un_resvd_status &= ~SD_FAILFAST;
24905 		mutex_exit(SD_MUTEX(un));
24906 	}
24907 	return (rval);
24908 }
24909 
24910 
24911 /*
24912  *    Function: sd_mhdioc_takeown
24913  *
24914  * Description: This routine is the driver entry point for handling ioctl
24915  *		requests to forcefully acquire exclusive access rights to the
24916  *		multihost disk (MHIOCTKOWN).
24917  *
24918  *   Arguments: dev	- the device number
24919  *		arg	- user provided structure specifying the delay
24920  *			  parameters in milliseconds
24921  *		flag	- this argument is a pass through to ddi_copyxxx()
24922  *			  directly from the mode argument of ioctl().
24923  *
24924  * Return Code: 0
24925  *		EFAULT
24926  *		ENXIO
24927  */
24928 
24929 static int
24930 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
24931 {
24932 	struct sd_lun		*un = NULL;
24933 	struct mhioctkown	*tkown = NULL;
24934 	int			rval = 0;
24935 
24936 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24937 		return (ENXIO);
24938 	}
24939 
24940 	if (arg != NULL) {
24941 		tkown = (struct mhioctkown *)
24942 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
24943 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
24944 		if (rval != 0) {
24945 			rval = EFAULT;
24946 			goto error;
24947 		}
24948 	}
24949 
24950 	rval = sd_take_ownership(dev, tkown);
24951 	mutex_enter(SD_MUTEX(un));
24952 	if (rval == 0) {
24953 		un->un_resvd_status |= SD_RESERVE;
24954 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
24955 			sd_reinstate_resv_delay =
24956 			    tkown->reinstate_resv_delay * 1000;
24957 		} else {
24958 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
24959 		}
24960 		/*
24961 		 * Give the scsi_watch routine interval set by
24962 		 * the MHIOCENFAILFAST ioctl precedence here.
24963 		 */
24964 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
24965 			mutex_exit(SD_MUTEX(un));
24966 			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
24967 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
24968 			    "sd_mhdioc_takeown : %d\n",
24969 			    sd_reinstate_resv_delay);
24970 		} else {
24971 			mutex_exit(SD_MUTEX(un));
24972 		}
24973 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
24974 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24975 	} else {
24976 		un->un_resvd_status &= ~SD_RESERVE;
24977 		mutex_exit(SD_MUTEX(un));
24978 	}
24979 
24980 error:
24981 	if (tkown != NULL) {
24982 		kmem_free(tkown, sizeof (struct mhioctkown));
24983 	}
24984 	return (rval);
24985 }
24986 
24987 
24988 /*
24989  *    Function: sd_mhdioc_release
24990  *
24991  * Description: This routine is the driver entry point for handling ioctl
24992  *		requests to release exclusive access rights to the multihost
24993  *		disk (MHIOCRELEASE).
24994  *
24995  *   Arguments: dev	- the device number
24996  *
24997  * Return Code: 0
24998  *		ENXIO
24999  */
25000 
25001 static int
25002 sd_mhdioc_release(dev_t dev)
25003 {
25004 	struct sd_lun		*un = NULL;
25005 	timeout_id_t		resvd_timeid_save;
25006 	int			resvd_status_save;
25007 	int			rval = 0;
25008 
25009 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25010 		return (ENXIO);
25011 	}
25012 
25013 	mutex_enter(SD_MUTEX(un));
25014 	resvd_status_save = un->un_resvd_status;
25015 	un->un_resvd_status &=
25016 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
25017 	if (un->un_resvd_timeid) {
25018 		resvd_timeid_save = un->un_resvd_timeid;
25019 		un->un_resvd_timeid = NULL;
25020 		mutex_exit(SD_MUTEX(un));
25021 		(void) untimeout(resvd_timeid_save);
25022 	} else {
25023 		mutex_exit(SD_MUTEX(un));
25024 	}
25025 
25026 	/*
25027 	 * destroy any pending timeout thread that may be attempting to
25028 	 * reinstate reservation on this device.
25029 	 */
25030 	sd_rmv_resv_reclaim_req(dev);
25031 
25032 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
25033 		mutex_enter(SD_MUTEX(un));
25034 		if ((un->un_mhd_token) &&
25035 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
25036 			mutex_exit(SD_MUTEX(un));
25037 			(void) sd_check_mhd(dev, 0);
25038 		} else {
25039 			mutex_exit(SD_MUTEX(un));
25040 		}
25041 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
25042 		    sd_mhd_reset_notify_cb, (caddr_t)un);
25043 	} else {
25044 		/*
25045 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
25046 		 */
25047 		mutex_enter(SD_MUTEX(un));
25048 		un->un_resvd_status = resvd_status_save;
25049 		mutex_exit(SD_MUTEX(un));
25050 	}
25051 	return (rval);
25052 }
25053 
25054 
25055 /*
25056  *    Function: sd_mhdioc_register_devid
25057  *
25058  * Description: This routine is the driver entry point for handling ioctl
25059  *		requests to register the device id (MHIOCREREGISTERDEVID).
25060  *
25061  *		Note: The implementation for this ioctl has been updated to
25062  *		be consistent with the original PSARC case (1999/357)
25063  *		(4375899, 4241671, 4220005)
25064  *
25065  *   Arguments: dev	- the device number
25066  *
25067  * Return Code: 0
25068  *		ENXIO
25069  */
25070 
25071 static int
25072 sd_mhdioc_register_devid(dev_t dev)
25073 {
25074 	struct sd_lun	*un = NULL;
25075 	int		rval = 0;
25076 
25077 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25078 		return (ENXIO);
25079 	}
25080 
25081 	ASSERT(!mutex_owned(SD_MUTEX(un)));
25082 
25083 	mutex_enter(SD_MUTEX(un));
25084 
25085 	/* If a devid already exists, de-register it */
25086 	if (un->un_devid != NULL) {
25087 		ddi_devid_unregister(SD_DEVINFO(un));
25088 		/*
25089 		 * After unregister devid, needs to free devid memory
25090 		 */
25091 		ddi_devid_free(un->un_devid);
25092 		un->un_devid = NULL;
25093 	}
25094 
25095 	/* Check for reservation conflict */
25096 	mutex_exit(SD_MUTEX(un));
25097 	rval = sd_send_scsi_TEST_UNIT_READY(un, 0);
25098 	mutex_enter(SD_MUTEX(un));
25099 
25100 	switch (rval) {
25101 	case 0:
25102 		sd_register_devid(un, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
25103 		break;
25104 	case EACCES:
25105 		break;
25106 	default:
25107 		rval = EIO;
25108 	}
25109 
25110 	mutex_exit(SD_MUTEX(un));
25111 	return (rval);
25112 }
25113 
25114 
25115 /*
25116  *    Function: sd_mhdioc_inkeys
25117  *
25118  * Description: This routine is the driver entry point for handling ioctl
25119  *		requests to issue the SCSI-3 Persistent In Read Keys command
25120  *		to the device (MHIOCGRP_INKEYS).
25121  *
25122  *   Arguments: dev	- the device number
25123  *		arg	- user provided in_keys structure
25124  *		flag	- this argument is a pass through to ddi_copyxxx()
25125  *			  directly from the mode argument of ioctl().
25126  *
25127  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
25128  *		ENXIO
25129  *		EFAULT
25130  */
25131 
25132 static int
25133 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
25134 {
25135 	struct sd_lun		*un;
25136 	mhioc_inkeys_t		inkeys;
25137 	int			rval = 0;
25138 
25139 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25140 		return (ENXIO);
25141 	}
25142 
25143 #ifdef _MULTI_DATAMODEL
25144 	switch (ddi_model_convert_from(flag & FMODELS)) {
25145 	case DDI_MODEL_ILP32: {
25146 		struct mhioc_inkeys32	inkeys32;
25147 
25148 		if (ddi_copyin(arg, &inkeys32,
25149 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
25150 			return (EFAULT);
25151 		}
25152 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
25153 		if ((rval = sd_persistent_reservation_in_read_keys(un,
25154 		    &inkeys, flag)) != 0) {
25155 			return (rval);
25156 		}
25157 		inkeys32.generation = inkeys.generation;
25158 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
25159 		    flag) != 0) {
25160 			return (EFAULT);
25161 		}
25162 		break;
25163 	}
25164 	case DDI_MODEL_NONE:
25165 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
25166 		    flag) != 0) {
25167 			return (EFAULT);
25168 		}
25169 		if ((rval = sd_persistent_reservation_in_read_keys(un,
25170 		    &inkeys, flag)) != 0) {
25171 			return (rval);
25172 		}
25173 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
25174 		    flag) != 0) {
25175 			return (EFAULT);
25176 		}
25177 		break;
25178 	}
25179 
25180 #else /* ! _MULTI_DATAMODEL */
25181 
25182 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
25183 		return (EFAULT);
25184 	}
25185 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
25186 	if (rval != 0) {
25187 		return (rval);
25188 	}
25189 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
25190 		return (EFAULT);
25191 	}
25192 
25193 #endif /* _MULTI_DATAMODEL */
25194 
25195 	return (rval);
25196 }
25197 
25198 
25199 /*
25200  *    Function: sd_mhdioc_inresv
25201  *
25202  * Description: This routine is the driver entry point for handling ioctl
25203  *		requests to issue the SCSI-3 Persistent In Read Reservations
25204  *		command to the device (MHIOCGRP_INKEYS).
25205  *
25206  *   Arguments: dev	- the device number
25207  *		arg	- user provided in_resv structure
25208  *		flag	- this argument is a pass through to ddi_copyxxx()
25209  *			  directly from the mode argument of ioctl().
25210  *
25211  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
25212  *		ENXIO
25213  *		EFAULT
25214  */
25215 
25216 static int
25217 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
25218 {
25219 	struct sd_lun		*un;
25220 	mhioc_inresvs_t		inresvs;
25221 	int			rval = 0;
25222 
25223 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25224 		return (ENXIO);
25225 	}
25226 
25227 #ifdef _MULTI_DATAMODEL
25228 
25229 	switch (ddi_model_convert_from(flag & FMODELS)) {
25230 	case DDI_MODEL_ILP32: {
25231 		struct mhioc_inresvs32	inresvs32;
25232 
25233 		if (ddi_copyin(arg, &inresvs32,
25234 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
25235 			return (EFAULT);
25236 		}
25237 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
25238 		if ((rval = sd_persistent_reservation_in_read_resv(un,
25239 		    &inresvs, flag)) != 0) {
25240 			return (rval);
25241 		}
25242 		inresvs32.generation = inresvs.generation;
25243 		if (ddi_copyout(&inresvs32, arg,
25244 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
25245 			return (EFAULT);
25246 		}
25247 		break;
25248 	}
25249 	case DDI_MODEL_NONE:
25250 		if (ddi_copyin(arg, &inresvs,
25251 		    sizeof (mhioc_inresvs_t), flag) != 0) {
25252 			return (EFAULT);
25253 		}
25254 		if ((rval = sd_persistent_reservation_in_read_resv(un,
25255 		    &inresvs, flag)) != 0) {
25256 			return (rval);
25257 		}
25258 		if (ddi_copyout(&inresvs, arg,
25259 		    sizeof (mhioc_inresvs_t), flag) != 0) {
25260 			return (EFAULT);
25261 		}
25262 		break;
25263 	}
25264 
25265 #else /* ! _MULTI_DATAMODEL */
25266 
25267 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
25268 		return (EFAULT);
25269 	}
25270 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
25271 	if (rval != 0) {
25272 		return (rval);
25273 	}
25274 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
25275 		return (EFAULT);
25276 	}
25277 
25278 #endif /* ! _MULTI_DATAMODEL */
25279 
25280 	return (rval);
25281 }
25282 
25283 
25284 /*
25285  * The following routines support the clustering functionality described below
25286  * and implement lost reservation reclaim functionality.
25287  *
25288  * Clustering
25289  * ----------
25290  * The clustering code uses two different, independent forms of SCSI
25291  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
25292  * Persistent Group Reservations. For any particular disk, it will use either
25293  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
25294  *
25295  * SCSI-2
25296  * The cluster software takes ownership of a multi-hosted disk by issuing the
25297  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
25298  * MHIOCRELEASE ioctl.Closely related is the MHIOCENFAILFAST ioctl -- a cluster,
25299  * just after taking ownership of the disk with the MHIOCTKOWN ioctl then issues
25300  * the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the driver. The
25301  * meaning of failfast is that if the driver (on this host) ever encounters the
25302  * scsi error return code RESERVATION_CONFLICT from the device, it should
25303  * immediately panic the host. The motivation for this ioctl is that if this
25304  * host does encounter reservation conflict, the underlying cause is that some
25305  * other host of the cluster has decided that this host is no longer in the
25306  * cluster and has seized control of the disks for itself. Since this host is no
25307  * longer in the cluster, it ought to panic itself. The MHIOCENFAILFAST ioctl
25308  * does two things:
25309  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
25310  *      error to panic the host
25311  *      (b) it sets up a periodic timer to test whether this host still has
25312  *      "access" (in that no other host has reserved the device):  if the
25313  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
25314  *      purpose of that periodic timer is to handle scenarios where the host is
25315  *      otherwise temporarily quiescent, temporarily doing no real i/o.
25316  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
25317  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
25318  * the device itself.
25319  *
25320  * SCSI-3 PGR
25321  * A direct semantic implementation of the SCSI-3 Persistent Reservation
25322  * facility is supported through the shared multihost disk ioctls
25323  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
25324  * MHIOCGRP_PREEMPTANDABORT)
25325  *
25326  * Reservation Reclaim:
25327  * --------------------
25328  * To support the lost reservation reclaim operations this driver creates a
25329  * single thread to handle reinstating reservations on all devices that have
25330  * lost reservations sd_resv_reclaim_requests are logged for all devices that
25331  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
25332  * and the reservation reclaim thread loops through the requests to regain the
25333  * lost reservations.
25334  */
25335 
25336 /*
25337  *    Function: sd_check_mhd()
25338  *
25339  * Description: This function sets up and submits a scsi watch request or
25340  *		terminates an existing watch request. This routine is used in
25341  *		support of reservation reclaim.
25342  *
25343  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
25344  *			 among multiple watches that share the callback function
25345  *		interval - the number of microseconds specifying the watch
25346  *			   interval for issuing TEST UNIT READY commands. If
25347  *			   set to 0 the watch should be terminated. If the
25348  *			   interval is set to 0 and if the device is required
25349  *			   to hold reservation while disabling failfast, the
25350  *			   watch is restarted with an interval of
25351  *			   reinstate_resv_delay.
25352  *
25353  * Return Code: 0	   - Successful submit/terminate of scsi watch request
25354  *		ENXIO      - Indicates an invalid device was specified
25355  *		EAGAIN     - Unable to submit the scsi watch request
25356  */
25357 
25358 static int
25359 sd_check_mhd(dev_t dev, int interval)
25360 {
25361 	struct sd_lun	*un;
25362 	opaque_t	token;
25363 
25364 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25365 		return (ENXIO);
25366 	}
25367 
25368 	/* is this a watch termination request? */
25369 	if (interval == 0) {
25370 		mutex_enter(SD_MUTEX(un));
25371 		/* if there is an existing watch task then terminate it */
25372 		if (un->un_mhd_token) {
25373 			token = un->un_mhd_token;
25374 			un->un_mhd_token = NULL;
25375 			mutex_exit(SD_MUTEX(un));
25376 			(void) scsi_watch_request_terminate(token,
25377 			    SCSI_WATCH_TERMINATE_WAIT);
25378 			mutex_enter(SD_MUTEX(un));
25379 		} else {
25380 			mutex_exit(SD_MUTEX(un));
25381 			/*
25382 			 * Note: If we return here we don't check for the
25383 			 * failfast case. This is the original legacy
25384 			 * implementation but perhaps we should be checking
25385 			 * the failfast case.
25386 			 */
25387 			return (0);
25388 		}
25389 		/*
25390 		 * If the device is required to hold reservation while
25391 		 * disabling failfast, we need to restart the scsi_watch
25392 		 * routine with an interval of reinstate_resv_delay.
25393 		 */
25394 		if (un->un_resvd_status & SD_RESERVE) {
25395 			interval = sd_reinstate_resv_delay/1000;
25396 		} else {
25397 			/* no failfast so bail */
25398 			mutex_exit(SD_MUTEX(un));
25399 			return (0);
25400 		}
25401 		mutex_exit(SD_MUTEX(un));
25402 	}
25403 
25404 	/*
25405 	 * adjust minimum time interval to 1 second,
25406 	 * and convert from msecs to usecs
25407 	 */
25408 	if (interval > 0 && interval < 1000) {
25409 		interval = 1000;
25410 	}
25411 	interval *= 1000;
25412 
25413 	/*
25414 	 * submit the request to the scsi_watch service
25415 	 */
25416 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
25417 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
25418 	if (token == NULL) {
25419 		return (EAGAIN);
25420 	}
25421 
25422 	/*
25423 	 * save token for termination later on
25424 	 */
25425 	mutex_enter(SD_MUTEX(un));
25426 	un->un_mhd_token = token;
25427 	mutex_exit(SD_MUTEX(un));
25428 	return (0);
25429 }
25430 
25431 
25432 /*
25433  *    Function: sd_mhd_watch_cb()
25434  *
25435  * Description: This function is the call back function used by the scsi watch
25436  *		facility. The scsi watch facility sends the "Test Unit Ready"
25437  *		and processes the status. If applicable (i.e. a "Unit Attention"
25438  *		status and automatic "Request Sense" not used) the scsi watch
25439  *		facility will send a "Request Sense" and retrieve the sense data
25440  *		to be passed to this callback function. In either case the
25441  *		automatic "Request Sense" or the facility submitting one, this
25442  *		callback is passed the status and sense data.
25443  *
25444  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25445  *			among multiple watches that share this callback function
25446  *		resultp - scsi watch facility result packet containing scsi
25447  *			  packet, status byte and sense data
25448  *
25449  * Return Code: 0 - continue the watch task
25450  *		non-zero - terminate the watch task
25451  */
25452 
25453 static int
25454 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
25455 {
25456 	struct sd_lun			*un;
25457 	struct scsi_status		*statusp;
25458 	uint8_t				*sensep;
25459 	struct scsi_pkt			*pkt;
25460 	uchar_t				actual_sense_length;
25461 	dev_t  				dev = (dev_t)arg;
25462 
25463 	ASSERT(resultp != NULL);
25464 	statusp			= resultp->statusp;
25465 	sensep			= (uint8_t *)resultp->sensep;
25466 	pkt			= resultp->pkt;
25467 	actual_sense_length	= resultp->actual_sense_length;
25468 
25469 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25470 		return (ENXIO);
25471 	}
25472 
25473 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25474 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
25475 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
25476 
25477 	/* Begin processing of the status and/or sense data */
25478 	if (pkt->pkt_reason != CMD_CMPLT) {
25479 		/* Handle the incomplete packet */
25480 		sd_mhd_watch_incomplete(un, pkt);
25481 		return (0);
25482 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
25483 		if (*((unsigned char *)statusp)
25484 		    == STATUS_RESERVATION_CONFLICT) {
25485 			/*
25486 			 * Handle a reservation conflict by panicking if
25487 			 * configured for failfast or by logging the conflict
25488 			 * and updating the reservation status
25489 			 */
25490 			mutex_enter(SD_MUTEX(un));
25491 			if ((un->un_resvd_status & SD_FAILFAST) &&
25492 			    (sd_failfast_enable)) {
25493 				sd_panic_for_res_conflict(un);
25494 				/*NOTREACHED*/
25495 			}
25496 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25497 			    "sd_mhd_watch_cb: Reservation Conflict\n");
25498 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
25499 			mutex_exit(SD_MUTEX(un));
25500 		}
25501 	}
25502 
25503 	if (sensep != NULL) {
25504 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
25505 			mutex_enter(SD_MUTEX(un));
25506 			if ((scsi_sense_asc(sensep) ==
25507 			    SD_SCSI_RESET_SENSE_CODE) &&
25508 			    (un->un_resvd_status & SD_RESERVE)) {
25509 				/*
25510 				 * The additional sense code indicates a power
25511 				 * on or bus device reset has occurred; update
25512 				 * the reservation status.
25513 				 */
25514 				un->un_resvd_status |=
25515 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25516 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25517 				    "sd_mhd_watch_cb: Lost Reservation\n");
25518 			}
25519 		} else {
25520 			return (0);
25521 		}
25522 	} else {
25523 		mutex_enter(SD_MUTEX(un));
25524 	}
25525 
25526 	if ((un->un_resvd_status & SD_RESERVE) &&
25527 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
25528 		if (un->un_resvd_status & SD_WANT_RESERVE) {
25529 			/*
25530 			 * A reset occurred in between the last probe and this
25531 			 * one so if a timeout is pending cancel it.
25532 			 */
25533 			if (un->un_resvd_timeid) {
25534 				timeout_id_t temp_id = un->un_resvd_timeid;
25535 				un->un_resvd_timeid = NULL;
25536 				mutex_exit(SD_MUTEX(un));
25537 				(void) untimeout(temp_id);
25538 				mutex_enter(SD_MUTEX(un));
25539 			}
25540 			un->un_resvd_status &= ~SD_WANT_RESERVE;
25541 		}
25542 		if (un->un_resvd_timeid == 0) {
25543 			/* Schedule a timeout to handle the lost reservation */
25544 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
25545 			    (void *)dev,
25546 			    drv_usectohz(sd_reinstate_resv_delay));
25547 		}
25548 	}
25549 	mutex_exit(SD_MUTEX(un));
25550 	return (0);
25551 }
25552 
25553 
25554 /*
25555  *    Function: sd_mhd_watch_incomplete()
25556  *
25557  * Description: This function is used to find out why a scsi pkt sent by the
25558  *		scsi watch facility was not completed. Under some scenarios this
25559  *		routine will return. Otherwise it will send a bus reset to see
25560  *		if the drive is still online.
25561  *
25562  *   Arguments: un  - driver soft state (unit) structure
25563  *		pkt - incomplete scsi pkt
25564  */
25565 
25566 static void
25567 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
25568 {
25569 	int	be_chatty;
25570 	int	perr;
25571 
25572 	ASSERT(pkt != NULL);
25573 	ASSERT(un != NULL);
25574 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
25575 	perr		= (pkt->pkt_statistics & STAT_PERR);
25576 
25577 	mutex_enter(SD_MUTEX(un));
25578 	if (un->un_state == SD_STATE_DUMPING) {
25579 		mutex_exit(SD_MUTEX(un));
25580 		return;
25581 	}
25582 
25583 	switch (pkt->pkt_reason) {
25584 	case CMD_UNX_BUS_FREE:
25585 		/*
25586 		 * If we had a parity error that caused the target to drop BSY*,
25587 		 * don't be chatty about it.
25588 		 */
25589 		if (perr && be_chatty) {
25590 			be_chatty = 0;
25591 		}
25592 		break;
25593 	case CMD_TAG_REJECT:
25594 		/*
25595 		 * The SCSI-2 spec states that a tag reject will be sent by the
25596 		 * target if tagged queuing is not supported. A tag reject may
25597 		 * also be sent during certain initialization periods or to
25598 		 * control internal resources. For the latter case the target
25599 		 * may also return Queue Full.
25600 		 *
25601 		 * If this driver receives a tag reject from a target that is
25602 		 * going through an init period or controlling internal
25603 		 * resources tagged queuing will be disabled. This is a less
25604 		 * than optimal behavior but the driver is unable to determine
25605 		 * the target state and assumes tagged queueing is not supported
25606 		 */
25607 		pkt->pkt_flags = 0;
25608 		un->un_tagflags = 0;
25609 
25610 		if (un->un_f_opt_queueing == TRUE) {
25611 			un->un_throttle = min(un->un_throttle, 3);
25612 		} else {
25613 			un->un_throttle = 1;
25614 		}
25615 		mutex_exit(SD_MUTEX(un));
25616 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
25617 		mutex_enter(SD_MUTEX(un));
25618 		break;
25619 	case CMD_INCOMPLETE:
25620 		/*
25621 		 * The transport stopped with an abnormal state, fallthrough and
25622 		 * reset the target and/or bus unless selection did not complete
25623 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
25624 		 * go through a target/bus reset
25625 		 */
25626 		if (pkt->pkt_state == STATE_GOT_BUS) {
25627 			break;
25628 		}
25629 		/*FALLTHROUGH*/
25630 
25631 	case CMD_TIMEOUT:
25632 	default:
25633 		/*
25634 		 * The lun may still be running the command, so a lun reset
25635 		 * should be attempted. If the lun reset fails or cannot be
25636 		 * issued, than try a target reset. Lastly try a bus reset.
25637 		 */
25638 		if ((pkt->pkt_statistics &
25639 		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
25640 			int reset_retval = 0;
25641 			mutex_exit(SD_MUTEX(un));
25642 			if (un->un_f_allow_bus_device_reset == TRUE) {
25643 				if (un->un_f_lun_reset_enabled == TRUE) {
25644 					reset_retval =
25645 					    scsi_reset(SD_ADDRESS(un),
25646 					    RESET_LUN);
25647 				}
25648 				if (reset_retval == 0) {
25649 					reset_retval =
25650 					    scsi_reset(SD_ADDRESS(un),
25651 					    RESET_TARGET);
25652 				}
25653 			}
25654 			if (reset_retval == 0) {
25655 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
25656 			}
25657 			mutex_enter(SD_MUTEX(un));
25658 		}
25659 		break;
25660 	}
25661 
25662 	/* A device/bus reset has occurred; update the reservation status. */
25663 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
25664 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
25665 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25666 			un->un_resvd_status |=
25667 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25668 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25669 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
25670 		}
25671 	}
25672 
25673 	/*
25674 	 * The disk has been turned off; Update the device state.
25675 	 *
25676 	 * Note: Should we be offlining the disk here?
25677 	 */
25678 	if (pkt->pkt_state == STATE_GOT_BUS) {
25679 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
25680 		    "Disk not responding to selection\n");
25681 		if (un->un_state != SD_STATE_OFFLINE) {
25682 			New_state(un, SD_STATE_OFFLINE);
25683 		}
25684 	} else if (be_chatty) {
25685 		/*
25686 		 * suppress messages if they are all the same pkt reason;
25687 		 * with TQ, many (up to 256) are returned with the same
25688 		 * pkt_reason
25689 		 */
25690 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
25691 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25692 			    "sd_mhd_watch_incomplete: "
25693 			    "SCSI transport failed: reason '%s'\n",
25694 			    scsi_rname(pkt->pkt_reason));
25695 		}
25696 	}
25697 	un->un_last_pkt_reason = pkt->pkt_reason;
25698 	mutex_exit(SD_MUTEX(un));
25699 }
25700 
25701 
25702 /*
25703  *    Function: sd_sname()
25704  *
25705  * Description: This is a simple little routine to return a string containing
25706  *		a printable description of command status byte for use in
25707  *		logging.
25708  *
25709  *   Arguments: status - pointer to a status byte
25710  *
25711  * Return Code: char * - string containing status description.
25712  */
25713 
25714 static char *
25715 sd_sname(uchar_t status)
25716 {
25717 	switch (status & STATUS_MASK) {
25718 	case STATUS_GOOD:
25719 		return ("good status");
25720 	case STATUS_CHECK:
25721 		return ("check condition");
25722 	case STATUS_MET:
25723 		return ("condition met");
25724 	case STATUS_BUSY:
25725 		return ("busy");
25726 	case STATUS_INTERMEDIATE:
25727 		return ("intermediate");
25728 	case STATUS_INTERMEDIATE_MET:
25729 		return ("intermediate - condition met");
25730 	case STATUS_RESERVATION_CONFLICT:
25731 		return ("reservation_conflict");
25732 	case STATUS_TERMINATED:
25733 		return ("command terminated");
25734 	case STATUS_QFULL:
25735 		return ("queue full");
25736 	default:
25737 		return ("<unknown status>");
25738 	}
25739 }
25740 
25741 
25742 /*
25743  *    Function: sd_mhd_resvd_recover()
25744  *
25745  * Description: This function adds a reservation entry to the
25746  *		sd_resv_reclaim_request list and signals the reservation
25747  *		reclaim thread that there is work pending. If the reservation
25748  *		reclaim thread has not been previously created this function
25749  *		will kick it off.
25750  *
25751  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25752  *			among multiple watches that share this callback function
25753  *
25754  *     Context: This routine is called by timeout() and is run in interrupt
25755  *		context. It must not sleep or call other functions which may
25756  *		sleep.
25757  */
25758 
25759 static void
25760 sd_mhd_resvd_recover(void *arg)
25761 {
25762 	dev_t			dev = (dev_t)arg;
25763 	struct sd_lun		*un;
25764 	struct sd_thr_request	*sd_treq = NULL;
25765 	struct sd_thr_request	*sd_cur = NULL;
25766 	struct sd_thr_request	*sd_prev = NULL;
25767 	int			already_there = 0;
25768 
25769 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25770 		return;
25771 	}
25772 
25773 	mutex_enter(SD_MUTEX(un));
25774 	un->un_resvd_timeid = NULL;
25775 	if (un->un_resvd_status & SD_WANT_RESERVE) {
25776 		/*
25777 		 * There was a reset so don't issue the reserve, allow the
25778 		 * sd_mhd_watch_cb callback function to notice this and
25779 		 * reschedule the timeout for reservation.
25780 		 */
25781 		mutex_exit(SD_MUTEX(un));
25782 		return;
25783 	}
25784 	mutex_exit(SD_MUTEX(un));
25785 
25786 	/*
25787 	 * Add this device to the sd_resv_reclaim_request list and the
25788 	 * sd_resv_reclaim_thread should take care of the rest.
25789 	 *
25790 	 * Note: We can't sleep in this context so if the memory allocation
25791 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
25792 	 * reschedule the timeout for reservation.  (4378460)
25793 	 */
25794 	sd_treq = (struct sd_thr_request *)
25795 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
25796 	if (sd_treq == NULL) {
25797 		return;
25798 	}
25799 
25800 	sd_treq->sd_thr_req_next = NULL;
25801 	sd_treq->dev = dev;
25802 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25803 	if (sd_tr.srq_thr_req_head == NULL) {
25804 		sd_tr.srq_thr_req_head = sd_treq;
25805 	} else {
25806 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
25807 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
25808 			if (sd_cur->dev == dev) {
25809 				/*
25810 				 * already in Queue so don't log
25811 				 * another request for the device
25812 				 */
25813 				already_there = 1;
25814 				break;
25815 			}
25816 			sd_prev = sd_cur;
25817 		}
25818 		if (!already_there) {
25819 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
25820 			    "logging request for %lx\n", dev);
25821 			sd_prev->sd_thr_req_next = sd_treq;
25822 		} else {
25823 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
25824 		}
25825 	}
25826 
25827 	/*
25828 	 * Create a kernel thread to do the reservation reclaim and free up this
25829 	 * thread. We cannot block this thread while we go away to do the
25830 	 * reservation reclaim
25831 	 */
25832 	if (sd_tr.srq_resv_reclaim_thread == NULL)
25833 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
25834 		    sd_resv_reclaim_thread, NULL,
25835 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
25836 
25837 	/* Tell the reservation reclaim thread that it has work to do */
25838 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
25839 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25840 }
25841 
25842 /*
25843  *    Function: sd_resv_reclaim_thread()
25844  *
25845  * Description: This function implements the reservation reclaim operations
25846  *
25847  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
25848  *		      among multiple watches that share this callback function
25849  */
25850 
25851 static void
25852 sd_resv_reclaim_thread()
25853 {
25854 	struct sd_lun		*un;
25855 	struct sd_thr_request	*sd_mhreq;
25856 
25857 	/* Wait for work */
25858 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25859 	if (sd_tr.srq_thr_req_head == NULL) {
25860 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
25861 		    &sd_tr.srq_resv_reclaim_mutex);
25862 	}
25863 
25864 	/* Loop while we have work */
25865 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
25866 		un = ddi_get_soft_state(sd_state,
25867 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
25868 		if (un == NULL) {
25869 			/*
25870 			 * softstate structure is NULL so just
25871 			 * dequeue the request and continue
25872 			 */
25873 			sd_tr.srq_thr_req_head =
25874 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25875 			kmem_free(sd_tr.srq_thr_cur_req,
25876 			    sizeof (struct sd_thr_request));
25877 			continue;
25878 		}
25879 
25880 		/* dequeue the request */
25881 		sd_mhreq = sd_tr.srq_thr_cur_req;
25882 		sd_tr.srq_thr_req_head =
25883 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25884 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25885 
25886 		/*
25887 		 * Reclaim reservation only if SD_RESERVE is still set. There
25888 		 * may have been a call to MHIOCRELEASE before we got here.
25889 		 */
25890 		mutex_enter(SD_MUTEX(un));
25891 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25892 			/*
25893 			 * Note: The SD_LOST_RESERVE flag is cleared before
25894 			 * reclaiming the reservation. If this is done after the
25895 			 * call to sd_reserve_release a reservation loss in the
25896 			 * window between pkt completion of reserve cmd and
25897 			 * mutex_enter below may not be recognized
25898 			 */
25899 			un->un_resvd_status &= ~SD_LOST_RESERVE;
25900 			mutex_exit(SD_MUTEX(un));
25901 
25902 			if (sd_reserve_release(sd_mhreq->dev,
25903 			    SD_RESERVE) == 0) {
25904 				mutex_enter(SD_MUTEX(un));
25905 				un->un_resvd_status |= SD_RESERVE;
25906 				mutex_exit(SD_MUTEX(un));
25907 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25908 				    "sd_resv_reclaim_thread: "
25909 				    "Reservation Recovered\n");
25910 			} else {
25911 				mutex_enter(SD_MUTEX(un));
25912 				un->un_resvd_status |= SD_LOST_RESERVE;
25913 				mutex_exit(SD_MUTEX(un));
25914 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25915 				    "sd_resv_reclaim_thread: Failed "
25916 				    "Reservation Recovery\n");
25917 			}
25918 		} else {
25919 			mutex_exit(SD_MUTEX(un));
25920 		}
25921 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25922 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
25923 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25924 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
25925 		/*
25926 		 * wakeup the destroy thread if anyone is waiting on
25927 		 * us to complete.
25928 		 */
25929 		cv_signal(&sd_tr.srq_inprocess_cv);
25930 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
25931 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
25932 	}
25933 
25934 	/*
25935 	 * cleanup the sd_tr structure now that this thread will not exist
25936 	 */
25937 	ASSERT(sd_tr.srq_thr_req_head == NULL);
25938 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
25939 	sd_tr.srq_resv_reclaim_thread = NULL;
25940 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25941 	thread_exit();
25942 }
25943 
25944 
25945 /*
25946  *    Function: sd_rmv_resv_reclaim_req()
25947  *
25948  * Description: This function removes any pending reservation reclaim requests
25949  *		for the specified device.
25950  *
25951  *   Arguments: dev - the device 'dev_t'
25952  */
25953 
25954 static void
25955 sd_rmv_resv_reclaim_req(dev_t dev)
25956 {
25957 	struct sd_thr_request *sd_mhreq;
25958 	struct sd_thr_request *sd_prev;
25959 
25960 	/* Remove a reservation reclaim request from the list */
25961 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25962 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
25963 		/*
25964 		 * We are attempting to reinstate reservation for
25965 		 * this device. We wait for sd_reserve_release()
25966 		 * to return before we return.
25967 		 */
25968 		cv_wait(&sd_tr.srq_inprocess_cv,
25969 		    &sd_tr.srq_resv_reclaim_mutex);
25970 	} else {
25971 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
25972 		if (sd_mhreq && sd_mhreq->dev == dev) {
25973 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
25974 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25975 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25976 			return;
25977 		}
25978 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
25979 			if (sd_mhreq && sd_mhreq->dev == dev) {
25980 				break;
25981 			}
25982 			sd_prev = sd_mhreq;
25983 		}
25984 		if (sd_mhreq != NULL) {
25985 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
25986 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25987 		}
25988 	}
25989 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25990 }
25991 
25992 
25993 /*
25994  *    Function: sd_mhd_reset_notify_cb()
25995  *
25996  * Description: This is a call back function for scsi_reset_notify. This
25997  *		function updates the softstate reserved status and logs the
25998  *		reset. The driver scsi watch facility callback function
25999  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
26000  *		will reclaim the reservation.
26001  *
26002  *   Arguments: arg  - driver soft state (unit) structure
26003  */
26004 
26005 static void
26006 sd_mhd_reset_notify_cb(caddr_t arg)
26007 {
26008 	struct sd_lun *un = (struct sd_lun *)arg;
26009 
26010 	mutex_enter(SD_MUTEX(un));
26011 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
26012 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
26013 		SD_INFO(SD_LOG_IOCTL_MHD, un,
26014 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
26015 	}
26016 	mutex_exit(SD_MUTEX(un));
26017 }
26018 
26019 
26020 /*
26021  *    Function: sd_take_ownership()
26022  *
26023  * Description: This routine implements an algorithm to achieve a stable
26024  *		reservation on disks which don't implement priority reserve,
26025  *		and makes sure that other host lose re-reservation attempts.
26026  *		This algorithm contains of a loop that keeps issuing the RESERVE
26027  *		for some period of time (min_ownership_delay, default 6 seconds)
26028  *		During that loop, it looks to see if there has been a bus device
26029  *		reset or bus reset (both of which cause an existing reservation
26030  *		to be lost). If the reservation is lost issue RESERVE until a
26031  *		period of min_ownership_delay with no resets has gone by, or
26032  *		until max_ownership_delay has expired. This loop ensures that
26033  *		the host really did manage to reserve the device, in spite of
26034  *		resets. The looping for min_ownership_delay (default six
26035  *		seconds) is important to early generation clustering products,
26036  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
26037  *		MHIOCENFAILFAST periodic timer of two seconds. By having
26038  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
26039  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
26040  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
26041  *		have already noticed, via the MHIOCENFAILFAST polling, that it
26042  *		no longer "owns" the disk and will have panicked itself.  Thus,
26043  *		the host issuing the MHIOCTKOWN is assured (with timing
26044  *		dependencies) that by the time it actually starts to use the
26045  *		disk for real work, the old owner is no longer accessing it.
26046  *
26047  *		min_ownership_delay is the minimum amount of time for which the
26048  *		disk must be reserved continuously devoid of resets before the
26049  *		MHIOCTKOWN ioctl will return success.
26050  *
26051  *		max_ownership_delay indicates the amount of time by which the
26052  *		take ownership should succeed or timeout with an error.
26053  *
26054  *   Arguments: dev - the device 'dev_t'
26055  *		*p  - struct containing timing info.
26056  *
26057  * Return Code: 0 for success or error code
26058  */
26059 
26060 static int
26061 sd_take_ownership(dev_t dev, struct mhioctkown *p)
26062 {
26063 	struct sd_lun	*un;
26064 	int		rval;
26065 	int		err;
26066 	int		reservation_count   = 0;
26067 	int		min_ownership_delay =  6000000; /* in usec */
26068 	int		max_ownership_delay = 30000000; /* in usec */
26069 	clock_t		start_time;	/* starting time of this algorithm */
26070 	clock_t		end_time;	/* time limit for giving up */
26071 	clock_t		ownership_time;	/* time limit for stable ownership */
26072 	clock_t		current_time;
26073 	clock_t		previous_current_time;
26074 
26075 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26076 		return (ENXIO);
26077 	}
26078 
26079 	/*
26080 	 * Attempt a device reservation. A priority reservation is requested.
26081 	 */
26082 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
26083 	    != SD_SUCCESS) {
26084 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26085 		    "sd_take_ownership: return(1)=%d\n", rval);
26086 		return (rval);
26087 	}
26088 
26089 	/* Update the softstate reserved status to indicate the reservation */
26090 	mutex_enter(SD_MUTEX(un));
26091 	un->un_resvd_status |= SD_RESERVE;
26092 	un->un_resvd_status &=
26093 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
26094 	mutex_exit(SD_MUTEX(un));
26095 
26096 	if (p != NULL) {
26097 		if (p->min_ownership_delay != 0) {
26098 			min_ownership_delay = p->min_ownership_delay * 1000;
26099 		}
26100 		if (p->max_ownership_delay != 0) {
26101 			max_ownership_delay = p->max_ownership_delay * 1000;
26102 		}
26103 	}
26104 	SD_INFO(SD_LOG_IOCTL_MHD, un,
26105 	    "sd_take_ownership: min, max delays: %d, %d\n",
26106 	    min_ownership_delay, max_ownership_delay);
26107 
26108 	start_time = ddi_get_lbolt();
26109 	current_time	= start_time;
26110 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
26111 	end_time	= start_time + drv_usectohz(max_ownership_delay);
26112 
26113 	while (current_time - end_time < 0) {
26114 		delay(drv_usectohz(500000));
26115 
26116 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
26117 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
26118 				mutex_enter(SD_MUTEX(un));
26119 				rval = (un->un_resvd_status &
26120 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
26121 				mutex_exit(SD_MUTEX(un));
26122 				break;
26123 			}
26124 		}
26125 		previous_current_time = current_time;
26126 		current_time = ddi_get_lbolt();
26127 		mutex_enter(SD_MUTEX(un));
26128 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
26129 			ownership_time = ddi_get_lbolt() +
26130 			    drv_usectohz(min_ownership_delay);
26131 			reservation_count = 0;
26132 		} else {
26133 			reservation_count++;
26134 		}
26135 		un->un_resvd_status |= SD_RESERVE;
26136 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
26137 		mutex_exit(SD_MUTEX(un));
26138 
26139 		SD_INFO(SD_LOG_IOCTL_MHD, un,
26140 		    "sd_take_ownership: ticks for loop iteration=%ld, "
26141 		    "reservation=%s\n", (current_time - previous_current_time),
26142 		    reservation_count ? "ok" : "reclaimed");
26143 
26144 		if (current_time - ownership_time >= 0 &&
26145 		    reservation_count >= 4) {
26146 			rval = 0; /* Achieved a stable ownership */
26147 			break;
26148 		}
26149 		if (current_time - end_time >= 0) {
26150 			rval = EACCES; /* No ownership in max possible time */
26151 			break;
26152 		}
26153 	}
26154 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
26155 	    "sd_take_ownership: return(2)=%d\n", rval);
26156 	return (rval);
26157 }
26158 
26159 
26160 /*
26161  *    Function: sd_reserve_release()
26162  *
26163  * Description: This function builds and sends scsi RESERVE, RELEASE, and
26164  *		PRIORITY RESERVE commands based on a user specified command type
26165  *
26166  *   Arguments: dev - the device 'dev_t'
26167  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
26168  *		      SD_RESERVE, SD_RELEASE
26169  *
26170  * Return Code: 0 or Error Code
26171  */
26172 
26173 static int
26174 sd_reserve_release(dev_t dev, int cmd)
26175 {
26176 	struct uscsi_cmd	*com = NULL;
26177 	struct sd_lun		*un = NULL;
26178 	char			cdb[CDB_GROUP0];
26179 	int			rval;
26180 
26181 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
26182 	    (cmd == SD_PRIORITY_RESERVE));
26183 
26184 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26185 		return (ENXIO);
26186 	}
26187 
26188 	/* instantiate and initialize the command and cdb */
26189 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26190 	bzero(cdb, CDB_GROUP0);
26191 	com->uscsi_flags   = USCSI_SILENT;
26192 	com->uscsi_timeout = un->un_reserve_release_time;
26193 	com->uscsi_cdblen  = CDB_GROUP0;
26194 	com->uscsi_cdb	   = cdb;
26195 	if (cmd == SD_RELEASE) {
26196 		cdb[0] = SCMD_RELEASE;
26197 	} else {
26198 		cdb[0] = SCMD_RESERVE;
26199 	}
26200 
26201 	/* Send the command. */
26202 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
26203 	    UIO_SYSSPACE, SD_PATH_STANDARD);
26204 
26205 	/*
26206 	 * "break" a reservation that is held by another host, by issuing a
26207 	 * reset if priority reserve is desired, and we could not get the
26208 	 * device.
26209 	 */
26210 	if ((cmd == SD_PRIORITY_RESERVE) &&
26211 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
26212 		/*
26213 		 * First try to reset the LUN. If we cannot, then try a target
26214 		 * reset, followed by a bus reset if the target reset fails.
26215 		 */
26216 		int reset_retval = 0;
26217 		if (un->un_f_lun_reset_enabled == TRUE) {
26218 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
26219 		}
26220 		if (reset_retval == 0) {
26221 			/* The LUN reset either failed or was not issued */
26222 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26223 		}
26224 		if ((reset_retval == 0) &&
26225 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
26226 			rval = EIO;
26227 			kmem_free(com, sizeof (*com));
26228 			return (rval);
26229 		}
26230 
26231 		bzero(com, sizeof (struct uscsi_cmd));
26232 		com->uscsi_flags   = USCSI_SILENT;
26233 		com->uscsi_cdb	   = cdb;
26234 		com->uscsi_cdblen  = CDB_GROUP0;
26235 		com->uscsi_timeout = 5;
26236 
26237 		/*
26238 		 * Reissue the last reserve command, this time without request
26239 		 * sense.  Assume that it is just a regular reserve command.
26240 		 */
26241 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
26242 		    UIO_SYSSPACE, SD_PATH_STANDARD);
26243 	}
26244 
26245 	/* Return an error if still getting a reservation conflict. */
26246 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
26247 		rval = EACCES;
26248 	}
26249 
26250 	kmem_free(com, sizeof (*com));
26251 	return (rval);
26252 }
26253 
26254 
26255 #define	SD_NDUMP_RETRIES	12
26256 /*
26257  *	System Crash Dump routine
26258  */
26259 
26260 static int
26261 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
26262 {
26263 	int		instance;
26264 	int		partition;
26265 	int		i;
26266 	int		err;
26267 	struct sd_lun	*un;
26268 	struct dk_map	*lp;
26269 	struct scsi_pkt *wr_pktp;
26270 	struct buf	*wr_bp;
26271 	struct buf	wr_buf;
26272 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
26273 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
26274 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
26275 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
26276 	size_t		io_start_offset;
26277 	int		doing_rmw = FALSE;
26278 	int		rval;
26279 #if defined(__i386) || defined(__amd64)
26280 	ssize_t dma_resid;
26281 	daddr_t oblkno;
26282 #endif
26283 
26284 	instance = SDUNIT(dev);
26285 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
26286 	    (!un->un_f_geometry_is_valid) || ISCD(un)) {
26287 		return (ENXIO);
26288 	}
26289 
26290 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
26291 
26292 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
26293 
26294 	partition = SDPART(dev);
26295 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
26296 
26297 	/* Validate blocks to dump at against partition size. */
26298 	lp = &un->un_map[partition];
26299 	if ((blkno + nblk) > lp->dkl_nblk) {
26300 		SD_TRACE(SD_LOG_DUMP, un,
26301 		    "sddump: dump range larger than partition: "
26302 		    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
26303 		    blkno, nblk, lp->dkl_nblk);
26304 		return (EINVAL);
26305 	}
26306 
26307 	mutex_enter(&un->un_pm_mutex);
26308 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
26309 		struct scsi_pkt *start_pktp;
26310 
26311 		mutex_exit(&un->un_pm_mutex);
26312 
26313 		/*
26314 		 * use pm framework to power on HBA 1st
26315 		 */
26316 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
26317 
26318 		/*
26319 		 * Dump no long uses sdpower to power on a device, it's
26320 		 * in-line here so it can be done in polled mode.
26321 		 */
26322 
26323 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
26324 
26325 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
26326 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
26327 
26328 		if (start_pktp == NULL) {
26329 			/* We were not given a SCSI packet, fail. */
26330 			return (EIO);
26331 		}
26332 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
26333 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
26334 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
26335 		start_pktp->pkt_flags = FLAG_NOINTR;
26336 
26337 		mutex_enter(SD_MUTEX(un));
26338 		SD_FILL_SCSI1_LUN(un, start_pktp);
26339 		mutex_exit(SD_MUTEX(un));
26340 		/*
26341 		 * Scsi_poll returns 0 (success) if the command completes and
26342 		 * the status block is STATUS_GOOD.
26343 		 */
26344 		if (sd_scsi_poll(un, start_pktp) != 0) {
26345 			scsi_destroy_pkt(start_pktp);
26346 			return (EIO);
26347 		}
26348 		scsi_destroy_pkt(start_pktp);
26349 		(void) sd_ddi_pm_resume(un);
26350 	} else {
26351 		mutex_exit(&un->un_pm_mutex);
26352 	}
26353 
26354 	mutex_enter(SD_MUTEX(un));
26355 	un->un_throttle = 0;
26356 
26357 	/*
26358 	 * The first time through, reset the specific target device.
26359 	 * However, when cpr calls sddump we know that sd is in a
26360 	 * a good state so no bus reset is required.
26361 	 * Clear sense data via Request Sense cmd.
26362 	 * In sddump we don't care about allow_bus_device_reset anymore
26363 	 */
26364 
26365 	if ((un->un_state != SD_STATE_SUSPENDED) &&
26366 	    (un->un_state != SD_STATE_DUMPING)) {
26367 
26368 		New_state(un, SD_STATE_DUMPING);
26369 
26370 		if (un->un_f_is_fibre == FALSE) {
26371 			mutex_exit(SD_MUTEX(un));
26372 			/*
26373 			 * Attempt a bus reset for parallel scsi.
26374 			 *
26375 			 * Note: A bus reset is required because on some host
26376 			 * systems (i.e. E420R) a bus device reset is
26377 			 * insufficient to reset the state of the target.
26378 			 *
26379 			 * Note: Don't issue the reset for fibre-channel,
26380 			 * because this tends to hang the bus (loop) for
26381 			 * too long while everyone is logging out and in
26382 			 * and the deadman timer for dumping will fire
26383 			 * before the dump is complete.
26384 			 */
26385 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
26386 				mutex_enter(SD_MUTEX(un));
26387 				Restore_state(un);
26388 				mutex_exit(SD_MUTEX(un));
26389 				return (EIO);
26390 			}
26391 
26392 			/* Delay to give the device some recovery time. */
26393 			drv_usecwait(10000);
26394 
26395 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
26396 				SD_INFO(SD_LOG_DUMP, un,
26397 					"sddump: sd_send_polled_RQS failed\n");
26398 			}
26399 			mutex_enter(SD_MUTEX(un));
26400 		}
26401 	}
26402 
26403 	/*
26404 	 * Convert the partition-relative block number to a
26405 	 * disk physical block number.
26406 	 */
26407 	blkno += un->un_offset[partition];
26408 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
26409 
26410 
26411 	/*
26412 	 * Check if the device has a non-512 block size.
26413 	 */
26414 	wr_bp = NULL;
26415 	if (NOT_DEVBSIZE(un)) {
26416 		tgt_byte_offset = blkno * un->un_sys_blocksize;
26417 		tgt_byte_count = nblk * un->un_sys_blocksize;
26418 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
26419 		    (tgt_byte_count % un->un_tgt_blocksize)) {
26420 			doing_rmw = TRUE;
26421 			/*
26422 			 * Calculate the block number and number of block
26423 			 * in terms of the media block size.
26424 			 */
26425 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26426 			tgt_nblk =
26427 			    ((tgt_byte_offset + tgt_byte_count +
26428 				(un->un_tgt_blocksize - 1)) /
26429 				un->un_tgt_blocksize) - tgt_blkno;
26430 
26431 			/*
26432 			 * Invoke the routine which is going to do read part
26433 			 * of read-modify-write.
26434 			 * Note that this routine returns a pointer to
26435 			 * a valid bp in wr_bp.
26436 			 */
26437 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
26438 			    &wr_bp);
26439 			if (err) {
26440 				mutex_exit(SD_MUTEX(un));
26441 				return (err);
26442 			}
26443 			/*
26444 			 * Offset is being calculated as -
26445 			 * (original block # * system block size) -
26446 			 * (new block # * target block size)
26447 			 */
26448 			io_start_offset =
26449 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
26450 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
26451 
26452 			ASSERT((io_start_offset >= 0) &&
26453 			    (io_start_offset < un->un_tgt_blocksize));
26454 			/*
26455 			 * Do the modify portion of read modify write.
26456 			 */
26457 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
26458 			    (size_t)nblk * un->un_sys_blocksize);
26459 		} else {
26460 			doing_rmw = FALSE;
26461 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26462 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
26463 		}
26464 
26465 		/* Convert blkno and nblk to target blocks */
26466 		blkno = tgt_blkno;
26467 		nblk = tgt_nblk;
26468 	} else {
26469 		wr_bp = &wr_buf;
26470 		bzero(wr_bp, sizeof (struct buf));
26471 		wr_bp->b_flags		= B_BUSY;
26472 		wr_bp->b_un.b_addr	= addr;
26473 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
26474 		wr_bp->b_resid		= 0;
26475 	}
26476 
26477 	mutex_exit(SD_MUTEX(un));
26478 
26479 	/*
26480 	 * Obtain a SCSI packet for the write command.
26481 	 * It should be safe to call the allocator here without
26482 	 * worrying about being locked for DVMA mapping because
26483 	 * the address we're passed is already a DVMA mapping
26484 	 *
26485 	 * We are also not going to worry about semaphore ownership
26486 	 * in the dump buffer. Dumping is single threaded at present.
26487 	 */
26488 
26489 	wr_pktp = NULL;
26490 
26491 #if defined(__i386) || defined(__amd64)
26492 	dma_resid = wr_bp->b_bcount;
26493 	oblkno = blkno;
26494 	while (dma_resid != 0) {
26495 #endif
26496 
26497 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26498 		wr_bp->b_flags &= ~B_ERROR;
26499 
26500 #if defined(__i386) || defined(__amd64)
26501 		blkno = oblkno +
26502 			((wr_bp->b_bcount - dma_resid) /
26503 			    un->un_tgt_blocksize);
26504 		nblk = dma_resid / un->un_tgt_blocksize;
26505 
26506 		if (wr_pktp) {
26507 			/* Partial DMA transfers after initial transfer */
26508 			rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
26509 			    blkno, nblk);
26510 		} else {
26511 			/* Initial transfer */
26512 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26513 			    un->un_pkt_flags, NULL_FUNC, NULL,
26514 			    blkno, nblk);
26515 		}
26516 #else
26517 		rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26518 		    0, NULL_FUNC, NULL, blkno, nblk);
26519 #endif
26520 
26521 		if (rval == 0) {
26522 			/* We were given a SCSI packet, continue. */
26523 			break;
26524 		}
26525 
26526 		if (i == 0) {
26527 			if (wr_bp->b_flags & B_ERROR) {
26528 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26529 				    "no resources for dumping; "
26530 				    "error code: 0x%x, retrying",
26531 				    geterror(wr_bp));
26532 			} else {
26533 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26534 				    "no resources for dumping; retrying");
26535 			}
26536 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
26537 			if (wr_bp->b_flags & B_ERROR) {
26538 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26539 				    "no resources for dumping; error code: "
26540 				    "0x%x, retrying\n", geterror(wr_bp));
26541 			}
26542 		} else {
26543 			if (wr_bp->b_flags & B_ERROR) {
26544 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26545 				    "no resources for dumping; "
26546 				    "error code: 0x%x, retries failed, "
26547 				    "giving up.\n", geterror(wr_bp));
26548 			} else {
26549 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26550 				    "no resources for dumping; "
26551 				    "retries failed, giving up.\n");
26552 			}
26553 			mutex_enter(SD_MUTEX(un));
26554 			Restore_state(un);
26555 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
26556 				mutex_exit(SD_MUTEX(un));
26557 				scsi_free_consistent_buf(wr_bp);
26558 			} else {
26559 				mutex_exit(SD_MUTEX(un));
26560 			}
26561 			return (EIO);
26562 		}
26563 		drv_usecwait(10000);
26564 	}
26565 
26566 #if defined(__i386) || defined(__amd64)
26567 	/*
26568 	 * save the resid from PARTIAL_DMA
26569 	 */
26570 	dma_resid = wr_pktp->pkt_resid;
26571 	if (dma_resid != 0)
26572 		nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
26573 	wr_pktp->pkt_resid = 0;
26574 #endif
26575 
26576 	/* SunBug 1222170 */
26577 	wr_pktp->pkt_flags = FLAG_NOINTR;
26578 
26579 	err = EIO;
26580 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26581 
26582 		/*
26583 		 * Scsi_poll returns 0 (success) if the command completes and
26584 		 * the status block is STATUS_GOOD.  We should only check
26585 		 * errors if this condition is not true.  Even then we should
26586 		 * send our own request sense packet only if we have a check
26587 		 * condition and auto request sense has not been performed by
26588 		 * the hba.
26589 		 */
26590 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
26591 
26592 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
26593 		    (wr_pktp->pkt_resid == 0)) {
26594 			err = SD_SUCCESS;
26595 			break;
26596 		}
26597 
26598 		/*
26599 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
26600 		 */
26601 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
26602 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26603 			    "Device is gone\n");
26604 			break;
26605 		}
26606 
26607 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
26608 			SD_INFO(SD_LOG_DUMP, un,
26609 			    "sddump: write failed with CHECK, try # %d\n", i);
26610 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
26611 				(void) sd_send_polled_RQS(un);
26612 			}
26613 
26614 			continue;
26615 		}
26616 
26617 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
26618 			int reset_retval = 0;
26619 
26620 			SD_INFO(SD_LOG_DUMP, un,
26621 			    "sddump: write failed with BUSY, try # %d\n", i);
26622 
26623 			if (un->un_f_lun_reset_enabled == TRUE) {
26624 				reset_retval = scsi_reset(SD_ADDRESS(un),
26625 				    RESET_LUN);
26626 			}
26627 			if (reset_retval == 0) {
26628 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26629 			}
26630 			(void) sd_send_polled_RQS(un);
26631 
26632 		} else {
26633 			SD_INFO(SD_LOG_DUMP, un,
26634 			    "sddump: write failed with 0x%x, try # %d\n",
26635 			    SD_GET_PKT_STATUS(wr_pktp), i);
26636 			mutex_enter(SD_MUTEX(un));
26637 			sd_reset_target(un, wr_pktp);
26638 			mutex_exit(SD_MUTEX(un));
26639 		}
26640 
26641 		/*
26642 		 * If we are not getting anywhere with lun/target resets,
26643 		 * let's reset the bus.
26644 		 */
26645 		if (i == SD_NDUMP_RETRIES/2) {
26646 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26647 			(void) sd_send_polled_RQS(un);
26648 		}
26649 
26650 	}
26651 #if defined(__i386) || defined(__amd64)
26652 	}	/* dma_resid */
26653 #endif
26654 
26655 	scsi_destroy_pkt(wr_pktp);
26656 	mutex_enter(SD_MUTEX(un));
26657 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
26658 		mutex_exit(SD_MUTEX(un));
26659 		scsi_free_consistent_buf(wr_bp);
26660 	} else {
26661 		mutex_exit(SD_MUTEX(un));
26662 	}
26663 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
26664 	return (err);
26665 }
26666 
26667 /*
26668  *    Function: sd_scsi_poll()
26669  *
26670  * Description: This is a wrapper for the scsi_poll call.
26671  *
26672  *   Arguments: sd_lun - The unit structure
26673  *              scsi_pkt - The scsi packet being sent to the device.
26674  *
26675  * Return Code: 0 - Command completed successfully with good status
26676  *             -1 - Command failed.  This could indicate a check condition
26677  *                  or other status value requiring recovery action.
26678  *
26679  */
26680 
26681 static int
26682 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
26683 {
26684 	int status;
26685 
26686 	ASSERT(un != NULL);
26687 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26688 	ASSERT(pktp != NULL);
26689 
26690 	status = SD_SUCCESS;
26691 
26692 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
26693 		pktp->pkt_flags |= un->un_tagflags;
26694 		pktp->pkt_flags &= ~FLAG_NODISCON;
26695 	}
26696 
26697 	status = sd_ddi_scsi_poll(pktp);
26698 	/*
26699 	 * Scsi_poll returns 0 (success) if the command completes and the
26700 	 * status block is STATUS_GOOD.  We should only check errors if this
26701 	 * condition is not true.  Even then we should send our own request
26702 	 * sense packet only if we have a check condition and auto
26703 	 * request sense has not been performed by the hba.
26704 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
26705 	 */
26706 	if ((status != SD_SUCCESS) &&
26707 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
26708 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
26709 	    (pktp->pkt_reason != CMD_DEV_GONE))
26710 		(void) sd_send_polled_RQS(un);
26711 
26712 	return (status);
26713 }
26714 
26715 /*
26716  *    Function: sd_send_polled_RQS()
26717  *
26718  * Description: This sends the request sense command to a device.
26719  *
26720  *   Arguments: sd_lun - The unit structure
26721  *
26722  * Return Code: 0 - Command completed successfully with good status
26723  *             -1 - Command failed.
26724  *
26725  */
26726 
26727 static int
26728 sd_send_polled_RQS(struct sd_lun *un)
26729 {
26730 	int	ret_val;
26731 	struct	scsi_pkt	*rqs_pktp;
26732 	struct	buf		*rqs_bp;
26733 
26734 	ASSERT(un != NULL);
26735 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26736 
26737 	ret_val = SD_SUCCESS;
26738 
26739 	rqs_pktp = un->un_rqs_pktp;
26740 	rqs_bp	 = un->un_rqs_bp;
26741 
26742 	mutex_enter(SD_MUTEX(un));
26743 
26744 	if (un->un_sense_isbusy) {
26745 		ret_val = SD_FAILURE;
26746 		mutex_exit(SD_MUTEX(un));
26747 		return (ret_val);
26748 	}
26749 
26750 	/*
26751 	 * If the request sense buffer (and packet) is not in use,
26752 	 * let's set the un_sense_isbusy and send our packet
26753 	 */
26754 	un->un_sense_isbusy 	= 1;
26755 	rqs_pktp->pkt_resid  	= 0;
26756 	rqs_pktp->pkt_reason 	= 0;
26757 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
26758 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
26759 
26760 	mutex_exit(SD_MUTEX(un));
26761 
26762 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
26763 	    " 0x%p\n", rqs_bp->b_un.b_addr);
26764 
26765 	/*
26766 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
26767 	 * axle - it has a call into us!
26768 	 */
26769 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
26770 		SD_INFO(SD_LOG_COMMON, un,
26771 		    "sd_send_polled_RQS: RQS failed\n");
26772 	}
26773 
26774 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
26775 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
26776 
26777 	mutex_enter(SD_MUTEX(un));
26778 	un->un_sense_isbusy = 0;
26779 	mutex_exit(SD_MUTEX(un));
26780 
26781 	return (ret_val);
26782 }
26783 
26784 /*
26785  * Defines needed for localized version of the scsi_poll routine.
26786  */
26787 #define	SD_CSEC		10000			/* usecs */
26788 #define	SD_SEC_TO_CSEC	(1000000/SD_CSEC)
26789 
26790 
26791 /*
26792  *    Function: sd_ddi_scsi_poll()
26793  *
26794  * Description: Localized version of the scsi_poll routine.  The purpose is to
26795  *		send a scsi_pkt to a device as a polled command.  This version
26796  *		is to ensure more robust handling of transport errors.
26797  *		Specifically this routine cures not ready, coming ready
26798  *		transition for power up and reset of sonoma's.  This can take
26799  *		up to 45 seconds for power-on and 20 seconds for reset of a
26800  * 		sonoma lun.
26801  *
26802  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
26803  *
26804  * Return Code: 0 - Command completed successfully with good status
26805  *             -1 - Command failed.
26806  *
26807  */
26808 
26809 static int
26810 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
26811 {
26812 	int busy_count;
26813 	int timeout;
26814 	int rval = SD_FAILURE;
26815 	int savef;
26816 	uint8_t *sensep;
26817 	long savet;
26818 	void (*savec)();
26819 	/*
26820 	 * The following is defined in machdep.c and is used in determining if
26821 	 * the scsi transport system will do polled I/O instead of interrupt
26822 	 * I/O when called from xx_dump().
26823 	 */
26824 	extern int do_polled_io;
26825 
26826 	/*
26827 	 * save old flags in pkt, to restore at end
26828 	 */
26829 	savef = pkt->pkt_flags;
26830 	savec = pkt->pkt_comp;
26831 	savet = pkt->pkt_time;
26832 
26833 	pkt->pkt_flags |= FLAG_NOINTR;
26834 
26835 	/*
26836 	 * XXX there is nothing in the SCSA spec that states that we should not
26837 	 * do a callback for polled cmds; however, removing this will break sd
26838 	 * and probably other target drivers
26839 	 */
26840 	pkt->pkt_comp = NULL;
26841 
26842 	/*
26843 	 * we don't like a polled command without timeout.
26844 	 * 60 seconds seems long enough.
26845 	 */
26846 	if (pkt->pkt_time == 0) {
26847 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
26848 	}
26849 
26850 	/*
26851 	 * Send polled cmd.
26852 	 *
26853 	 * We do some error recovery for various errors.  Tran_busy,
26854 	 * queue full, and non-dispatched commands are retried every 10 msec.
26855 	 * as they are typically transient failures.  Busy status and Not
26856 	 * Ready are retried every second as this status takes a while to
26857 	 * change.  Unit attention is retried for pkt_time (60) times
26858 	 * with no delay.
26859 	 */
26860 	timeout = pkt->pkt_time * SD_SEC_TO_CSEC;
26861 
26862 	for (busy_count = 0; busy_count < timeout; busy_count++) {
26863 		int rc;
26864 		int poll_delay;
26865 
26866 		/*
26867 		 * Initialize pkt status variables.
26868 		 */
26869 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
26870 
26871 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
26872 			if (rc != TRAN_BUSY) {
26873 				/* Transport failed - give up. */
26874 				break;
26875 			} else {
26876 				/* Transport busy - try again. */
26877 				poll_delay = 1 * SD_CSEC; /* 10 msec */
26878 			}
26879 		} else {
26880 			/*
26881 			 * Transport accepted - check pkt status.
26882 			 */
26883 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
26884 			if (pkt->pkt_reason == CMD_CMPLT &&
26885 			    rc == STATUS_CHECK &&
26886 			    pkt->pkt_state & STATE_ARQ_DONE) {
26887 				struct scsi_arq_status *arqstat =
26888 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
26889 
26890 				sensep = (uint8_t *)&arqstat->sts_sensedata;
26891 			} else {
26892 				sensep = NULL;
26893 			}
26894 
26895 			if ((pkt->pkt_reason == CMD_CMPLT) &&
26896 			    (rc == STATUS_GOOD)) {
26897 				/* No error - we're done */
26898 				rval = SD_SUCCESS;
26899 				break;
26900 
26901 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
26902 				/* Lost connection - give up */
26903 				break;
26904 
26905 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
26906 			    (pkt->pkt_state == 0)) {
26907 				/* Pkt not dispatched - try again. */
26908 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26909 
26910 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26911 			    (rc == STATUS_QFULL)) {
26912 				/* Queue full - try again. */
26913 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26914 
26915 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26916 			    (rc == STATUS_BUSY)) {
26917 				/* Busy - try again. */
26918 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26919 				busy_count += (SD_SEC_TO_CSEC - 1);
26920 
26921 			} else if ((sensep != NULL) &&
26922 			    (scsi_sense_key(sensep) ==
26923 				KEY_UNIT_ATTENTION)) {
26924 				/* Unit Attention - try again */
26925 				busy_count += (SD_SEC_TO_CSEC - 1); /* 1 */
26926 				continue;
26927 
26928 			} else if ((sensep != NULL) &&
26929 			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
26930 			    (scsi_sense_asc(sensep) == 0x04) &&
26931 			    (scsi_sense_ascq(sensep) == 0x01)) {
26932 				/* Not ready -> ready - try again. */
26933 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26934 				busy_count += (SD_SEC_TO_CSEC - 1);
26935 
26936 			} else {
26937 				/* BAD status - give up. */
26938 				break;
26939 			}
26940 		}
26941 
26942 		if ((curthread->t_flag & T_INTR_THREAD) == 0 &&
26943 		    !do_polled_io) {
26944 			delay(drv_usectohz(poll_delay));
26945 		} else {
26946 			/* we busy wait during cpr_dump or interrupt threads */
26947 			drv_usecwait(poll_delay);
26948 		}
26949 	}
26950 
26951 	pkt->pkt_flags = savef;
26952 	pkt->pkt_comp = savec;
26953 	pkt->pkt_time = savet;
26954 	return (rval);
26955 }
26956 
26957 
26958 /*
26959  *    Function: sd_persistent_reservation_in_read_keys
26960  *
26961  * Description: This routine is the driver entry point for handling CD-ROM
26962  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
26963  *		by sending the SCSI-3 PRIN commands to the device.
26964  *		Processes the read keys command response by copying the
26965  *		reservation key information into the user provided buffer.
26966  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
26967  *
26968  *   Arguments: un   -  Pointer to soft state struct for the target.
26969  *		usrp -	user provided pointer to multihost Persistent In Read
26970  *			Keys structure (mhioc_inkeys_t)
26971  *		flag -	this argument is a pass through to ddi_copyxxx()
26972  *			directly from the mode argument of ioctl().
26973  *
26974  * Return Code: 0   - Success
26975  *		EACCES
26976  *		ENOTSUP
26977  *		errno return code from sd_send_scsi_cmd()
26978  *
26979  *     Context: Can sleep. Does not return until command is completed.
26980  */
26981 
26982 static int
26983 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
26984     mhioc_inkeys_t *usrp, int flag)
26985 {
26986 #ifdef _MULTI_DATAMODEL
26987 	struct mhioc_key_list32	li32;
26988 #endif
26989 	sd_prin_readkeys_t	*in;
26990 	mhioc_inkeys_t		*ptr;
26991 	mhioc_key_list_t	li;
26992 	uchar_t			*data_bufp;
26993 	int 			data_len;
26994 	int			rval;
26995 	size_t			copysz;
26996 
26997 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
26998 		return (EINVAL);
26999 	}
27000 	bzero(&li, sizeof (mhioc_key_list_t));
27001 
27002 	/*
27003 	 * Get the listsize from user
27004 	 */
27005 #ifdef _MULTI_DATAMODEL
27006 
27007 	switch (ddi_model_convert_from(flag & FMODELS)) {
27008 	case DDI_MODEL_ILP32:
27009 		copysz = sizeof (struct mhioc_key_list32);
27010 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
27011 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27012 			    "sd_persistent_reservation_in_read_keys: "
27013 			    "failed ddi_copyin: mhioc_key_list32_t\n");
27014 			rval = EFAULT;
27015 			goto done;
27016 		}
27017 		li.listsize = li32.listsize;
27018 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
27019 		break;
27020 
27021 	case DDI_MODEL_NONE:
27022 		copysz = sizeof (mhioc_key_list_t);
27023 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
27024 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27025 			    "sd_persistent_reservation_in_read_keys: "
27026 			    "failed ddi_copyin: mhioc_key_list_t\n");
27027 			rval = EFAULT;
27028 			goto done;
27029 		}
27030 		break;
27031 	}
27032 
27033 #else /* ! _MULTI_DATAMODEL */
27034 	copysz = sizeof (mhioc_key_list_t);
27035 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
27036 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27037 		    "sd_persistent_reservation_in_read_keys: "
27038 		    "failed ddi_copyin: mhioc_key_list_t\n");
27039 		rval = EFAULT;
27040 		goto done;
27041 	}
27042 #endif
27043 
27044 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
27045 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
27046 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
27047 
27048 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS,
27049 	    data_len, data_bufp)) != 0) {
27050 		goto done;
27051 	}
27052 	in = (sd_prin_readkeys_t *)data_bufp;
27053 	ptr->generation = BE_32(in->generation);
27054 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
27055 
27056 	/*
27057 	 * Return the min(listsize, listlen) keys
27058 	 */
27059 #ifdef _MULTI_DATAMODEL
27060 
27061 	switch (ddi_model_convert_from(flag & FMODELS)) {
27062 	case DDI_MODEL_ILP32:
27063 		li32.listlen = li.listlen;
27064 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
27065 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27066 			    "sd_persistent_reservation_in_read_keys: "
27067 			    "failed ddi_copyout: mhioc_key_list32_t\n");
27068 			rval = EFAULT;
27069 			goto done;
27070 		}
27071 		break;
27072 
27073 	case DDI_MODEL_NONE:
27074 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
27075 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27076 			    "sd_persistent_reservation_in_read_keys: "
27077 			    "failed ddi_copyout: mhioc_key_list_t\n");
27078 			rval = EFAULT;
27079 			goto done;
27080 		}
27081 		break;
27082 	}
27083 
27084 #else /* ! _MULTI_DATAMODEL */
27085 
27086 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
27087 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27088 		    "sd_persistent_reservation_in_read_keys: "
27089 		    "failed ddi_copyout: mhioc_key_list_t\n");
27090 		rval = EFAULT;
27091 		goto done;
27092 	}
27093 
27094 #endif /* _MULTI_DATAMODEL */
27095 
27096 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
27097 	    li.listsize * MHIOC_RESV_KEY_SIZE);
27098 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
27099 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27100 		    "sd_persistent_reservation_in_read_keys: "
27101 		    "failed ddi_copyout: keylist\n");
27102 		rval = EFAULT;
27103 	}
27104 done:
27105 	kmem_free(data_bufp, data_len);
27106 	return (rval);
27107 }
27108 
27109 
27110 /*
27111  *    Function: sd_persistent_reservation_in_read_resv
27112  *
27113  * Description: This routine is the driver entry point for handling CD-ROM
27114  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
27115  *		by sending the SCSI-3 PRIN commands to the device.
27116  *		Process the read persistent reservations command response by
27117  *		copying the reservation information into the user provided
27118  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
27119  *
27120  *   Arguments: un   -  Pointer to soft state struct for the target.
27121  *		usrp -	user provided pointer to multihost Persistent In Read
27122  *			Keys structure (mhioc_inkeys_t)
27123  *		flag -	this argument is a pass through to ddi_copyxxx()
27124  *			directly from the mode argument of ioctl().
27125  *
27126  * Return Code: 0   - Success
27127  *		EACCES
27128  *		ENOTSUP
27129  *		errno return code from sd_send_scsi_cmd()
27130  *
27131  *     Context: Can sleep. Does not return until command is completed.
27132  */
27133 
27134 static int
27135 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
27136     mhioc_inresvs_t *usrp, int flag)
27137 {
27138 #ifdef _MULTI_DATAMODEL
27139 	struct mhioc_resv_desc_list32 resvlist32;
27140 #endif
27141 	sd_prin_readresv_t	*in;
27142 	mhioc_inresvs_t		*ptr;
27143 	sd_readresv_desc_t	*readresv_ptr;
27144 	mhioc_resv_desc_list_t	resvlist;
27145 	mhioc_resv_desc_t 	resvdesc;
27146 	uchar_t			*data_bufp;
27147 	int 			data_len;
27148 	int			rval;
27149 	int			i;
27150 	size_t			copysz;
27151 	mhioc_resv_desc_t	*bufp;
27152 
27153 	if ((ptr = usrp) == NULL) {
27154 		return (EINVAL);
27155 	}
27156 
27157 	/*
27158 	 * Get the listsize from user
27159 	 */
27160 #ifdef _MULTI_DATAMODEL
27161 	switch (ddi_model_convert_from(flag & FMODELS)) {
27162 	case DDI_MODEL_ILP32:
27163 		copysz = sizeof (struct mhioc_resv_desc_list32);
27164 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
27165 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27166 			    "sd_persistent_reservation_in_read_resv: "
27167 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27168 			rval = EFAULT;
27169 			goto done;
27170 		}
27171 		resvlist.listsize = resvlist32.listsize;
27172 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
27173 		break;
27174 
27175 	case DDI_MODEL_NONE:
27176 		copysz = sizeof (mhioc_resv_desc_list_t);
27177 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
27178 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27179 			    "sd_persistent_reservation_in_read_resv: "
27180 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27181 			rval = EFAULT;
27182 			goto done;
27183 		}
27184 		break;
27185 	}
27186 #else /* ! _MULTI_DATAMODEL */
27187 	copysz = sizeof (mhioc_resv_desc_list_t);
27188 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
27189 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27190 		    "sd_persistent_reservation_in_read_resv: "
27191 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27192 		rval = EFAULT;
27193 		goto done;
27194 	}
27195 #endif /* ! _MULTI_DATAMODEL */
27196 
27197 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
27198 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
27199 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
27200 
27201 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_RESV,
27202 	    data_len, data_bufp)) != 0) {
27203 		goto done;
27204 	}
27205 	in = (sd_prin_readresv_t *)data_bufp;
27206 	ptr->generation = BE_32(in->generation);
27207 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
27208 
27209 	/*
27210 	 * Return the min(listsize, listlen( keys
27211 	 */
27212 #ifdef _MULTI_DATAMODEL
27213 
27214 	switch (ddi_model_convert_from(flag & FMODELS)) {
27215 	case DDI_MODEL_ILP32:
27216 		resvlist32.listlen = resvlist.listlen;
27217 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
27218 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27219 			    "sd_persistent_reservation_in_read_resv: "
27220 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27221 			rval = EFAULT;
27222 			goto done;
27223 		}
27224 		break;
27225 
27226 	case DDI_MODEL_NONE:
27227 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
27228 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27229 			    "sd_persistent_reservation_in_read_resv: "
27230 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27231 			rval = EFAULT;
27232 			goto done;
27233 		}
27234 		break;
27235 	}
27236 
27237 #else /* ! _MULTI_DATAMODEL */
27238 
27239 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
27240 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27241 		    "sd_persistent_reservation_in_read_resv: "
27242 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27243 		rval = EFAULT;
27244 		goto done;
27245 	}
27246 
27247 #endif /* ! _MULTI_DATAMODEL */
27248 
27249 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
27250 	bufp = resvlist.list;
27251 	copysz = sizeof (mhioc_resv_desc_t);
27252 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
27253 	    i++, readresv_ptr++, bufp++) {
27254 
27255 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
27256 		    MHIOC_RESV_KEY_SIZE);
27257 		resvdesc.type  = readresv_ptr->type;
27258 		resvdesc.scope = readresv_ptr->scope;
27259 		resvdesc.scope_specific_addr =
27260 		    BE_32(readresv_ptr->scope_specific_addr);
27261 
27262 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
27263 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27264 			    "sd_persistent_reservation_in_read_resv: "
27265 			    "failed ddi_copyout: resvlist\n");
27266 			rval = EFAULT;
27267 			goto done;
27268 		}
27269 	}
27270 done:
27271 	kmem_free(data_bufp, data_len);
27272 	return (rval);
27273 }
27274 
27275 
27276 /*
27277  *    Function: sr_change_blkmode()
27278  *
27279  * Description: This routine is the driver entry point for handling CD-ROM
27280  *		block mode ioctl requests. Support for returning and changing
27281  *		the current block size in use by the device is implemented. The
27282  *		LBA size is changed via a MODE SELECT Block Descriptor.
27283  *
27284  *		This routine issues a mode sense with an allocation length of
27285  *		12 bytes for the mode page header and a single block descriptor.
27286  *
27287  *   Arguments: dev - the device 'dev_t'
27288  *		cmd - the request type; one of CDROMGBLKMODE (get) or
27289  *		      CDROMSBLKMODE (set)
27290  *		data - current block size or requested block size
27291  *		flag - this argument is a pass through to ddi_copyxxx() directly
27292  *		       from the mode argument of ioctl().
27293  *
27294  * Return Code: the code returned by sd_send_scsi_cmd()
27295  *		EINVAL if invalid arguments are provided
27296  *		EFAULT if ddi_copyxxx() fails
27297  *		ENXIO if fail ddi_get_soft_state
27298  *		EIO if invalid mode sense block descriptor length
27299  *
27300  */
27301 
27302 static int
27303 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
27304 {
27305 	struct sd_lun			*un = NULL;
27306 	struct mode_header		*sense_mhp, *select_mhp;
27307 	struct block_descriptor		*sense_desc, *select_desc;
27308 	int				current_bsize;
27309 	int				rval = EINVAL;
27310 	uchar_t				*sense = NULL;
27311 	uchar_t				*select = NULL;
27312 
27313 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
27314 
27315 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27316 		return (ENXIO);
27317 	}
27318 
27319 	/*
27320 	 * The block length is changed via the Mode Select block descriptor, the
27321 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
27322 	 * required as part of this routine. Therefore the mode sense allocation
27323 	 * length is specified to be the length of a mode page header and a
27324 	 * block descriptor.
27325 	 */
27326 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27327 
27328 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
27329 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD)) != 0) {
27330 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27331 		    "sr_change_blkmode: Mode Sense Failed\n");
27332 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27333 		return (rval);
27334 	}
27335 
27336 	/* Check the block descriptor len to handle only 1 block descriptor */
27337 	sense_mhp = (struct mode_header *)sense;
27338 	if ((sense_mhp->bdesc_length == 0) ||
27339 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
27340 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27341 		    "sr_change_blkmode: Mode Sense returned invalid block"
27342 		    " descriptor length\n");
27343 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27344 		return (EIO);
27345 	}
27346 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
27347 	current_bsize = ((sense_desc->blksize_hi << 16) |
27348 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
27349 
27350 	/* Process command */
27351 	switch (cmd) {
27352 	case CDROMGBLKMODE:
27353 		/* Return the block size obtained during the mode sense */
27354 		if (ddi_copyout(&current_bsize, (void *)data,
27355 		    sizeof (int), flag) != 0)
27356 			rval = EFAULT;
27357 		break;
27358 	case CDROMSBLKMODE:
27359 		/* Validate the requested block size */
27360 		switch (data) {
27361 		case CDROM_BLK_512:
27362 		case CDROM_BLK_1024:
27363 		case CDROM_BLK_2048:
27364 		case CDROM_BLK_2056:
27365 		case CDROM_BLK_2336:
27366 		case CDROM_BLK_2340:
27367 		case CDROM_BLK_2352:
27368 		case CDROM_BLK_2368:
27369 		case CDROM_BLK_2448:
27370 		case CDROM_BLK_2646:
27371 		case CDROM_BLK_2647:
27372 			break;
27373 		default:
27374 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27375 			    "sr_change_blkmode: "
27376 			    "Block Size '%ld' Not Supported\n", data);
27377 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27378 			return (EINVAL);
27379 		}
27380 
27381 		/*
27382 		 * The current block size matches the requested block size so
27383 		 * there is no need to send the mode select to change the size
27384 		 */
27385 		if (current_bsize == data) {
27386 			break;
27387 		}
27388 
27389 		/* Build the select data for the requested block size */
27390 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27391 		select_mhp = (struct mode_header *)select;
27392 		select_desc =
27393 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
27394 		/*
27395 		 * The LBA size is changed via the block descriptor, so the
27396 		 * descriptor is built according to the user data
27397 		 */
27398 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
27399 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
27400 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
27401 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
27402 
27403 		/* Send the mode select for the requested block size */
27404 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
27405 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27406 		    SD_PATH_STANDARD)) != 0) {
27407 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27408 			    "sr_change_blkmode: Mode Select Failed\n");
27409 			/*
27410 			 * The mode select failed for the requested block size,
27411 			 * so reset the data for the original block size and
27412 			 * send it to the target. The error is indicated by the
27413 			 * return value for the failed mode select.
27414 			 */
27415 			select_desc->blksize_hi  = sense_desc->blksize_hi;
27416 			select_desc->blksize_mid = sense_desc->blksize_mid;
27417 			select_desc->blksize_lo  = sense_desc->blksize_lo;
27418 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
27419 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27420 			    SD_PATH_STANDARD);
27421 		} else {
27422 			ASSERT(!mutex_owned(SD_MUTEX(un)));
27423 			mutex_enter(SD_MUTEX(un));
27424 			sd_update_block_info(un, (uint32_t)data, 0);
27425 
27426 			mutex_exit(SD_MUTEX(un));
27427 		}
27428 		break;
27429 	default:
27430 		/* should not reach here, but check anyway */
27431 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27432 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
27433 		rval = EINVAL;
27434 		break;
27435 	}
27436 
27437 	if (select) {
27438 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
27439 	}
27440 	if (sense) {
27441 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27442 	}
27443 	return (rval);
27444 }
27445 
27446 
27447 /*
27448  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
27449  * implement driver support for getting and setting the CD speed. The command
27450  * set used will be based on the device type. If the device has not been
27451  * identified as MMC the Toshiba vendor specific mode page will be used. If
27452  * the device is MMC but does not support the Real Time Streaming feature
27453  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
27454  * be used to read the speed.
27455  */
27456 
27457 /*
27458  *    Function: sr_change_speed()
27459  *
27460  * Description: This routine is the driver entry point for handling CD-ROM
27461  *		drive speed ioctl requests for devices supporting the Toshiba
27462  *		vendor specific drive speed mode page. Support for returning
27463  *		and changing the current drive speed in use by the device is
27464  *		implemented.
27465  *
27466  *   Arguments: dev - the device 'dev_t'
27467  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
27468  *		      CDROMSDRVSPEED (set)
27469  *		data - current drive speed or requested drive speed
27470  *		flag - this argument is a pass through to ddi_copyxxx() directly
27471  *		       from the mode argument of ioctl().
27472  *
27473  * Return Code: the code returned by sd_send_scsi_cmd()
27474  *		EINVAL if invalid arguments are provided
27475  *		EFAULT if ddi_copyxxx() fails
27476  *		ENXIO if fail ddi_get_soft_state
27477  *		EIO if invalid mode sense block descriptor length
27478  */
27479 
27480 static int
27481 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27482 {
27483 	struct sd_lun			*un = NULL;
27484 	struct mode_header		*sense_mhp, *select_mhp;
27485 	struct mode_speed		*sense_page, *select_page;
27486 	int				current_speed;
27487 	int				rval = EINVAL;
27488 	int				bd_len;
27489 	uchar_t				*sense = NULL;
27490 	uchar_t				*select = NULL;
27491 
27492 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27493 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27494 		return (ENXIO);
27495 	}
27496 
27497 	/*
27498 	 * Note: The drive speed is being modified here according to a Toshiba
27499 	 * vendor specific mode page (0x31).
27500 	 */
27501 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27502 
27503 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
27504 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
27505 		SD_PATH_STANDARD)) != 0) {
27506 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27507 		    "sr_change_speed: Mode Sense Failed\n");
27508 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27509 		return (rval);
27510 	}
27511 	sense_mhp  = (struct mode_header *)sense;
27512 
27513 	/* Check the block descriptor len to handle only 1 block descriptor */
27514 	bd_len = sense_mhp->bdesc_length;
27515 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27516 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27517 		    "sr_change_speed: Mode Sense returned invalid block "
27518 		    "descriptor length\n");
27519 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27520 		return (EIO);
27521 	}
27522 
27523 	sense_page = (struct mode_speed *)
27524 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
27525 	current_speed = sense_page->speed;
27526 
27527 	/* Process command */
27528 	switch (cmd) {
27529 	case CDROMGDRVSPEED:
27530 		/* Return the drive speed obtained during the mode sense */
27531 		if (current_speed == 0x2) {
27532 			current_speed = CDROM_TWELVE_SPEED;
27533 		}
27534 		if (ddi_copyout(&current_speed, (void *)data,
27535 		    sizeof (int), flag) != 0) {
27536 			rval = EFAULT;
27537 		}
27538 		break;
27539 	case CDROMSDRVSPEED:
27540 		/* Validate the requested drive speed */
27541 		switch ((uchar_t)data) {
27542 		case CDROM_TWELVE_SPEED:
27543 			data = 0x2;
27544 			/*FALLTHROUGH*/
27545 		case CDROM_NORMAL_SPEED:
27546 		case CDROM_DOUBLE_SPEED:
27547 		case CDROM_QUAD_SPEED:
27548 		case CDROM_MAXIMUM_SPEED:
27549 			break;
27550 		default:
27551 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27552 			    "sr_change_speed: "
27553 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
27554 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27555 			return (EINVAL);
27556 		}
27557 
27558 		/*
27559 		 * The current drive speed matches the requested drive speed so
27560 		 * there is no need to send the mode select to change the speed
27561 		 */
27562 		if (current_speed == data) {
27563 			break;
27564 		}
27565 
27566 		/* Build the select data for the requested drive speed */
27567 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27568 		select_mhp = (struct mode_header *)select;
27569 		select_mhp->bdesc_length = 0;
27570 		select_page =
27571 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27572 		select_page =
27573 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27574 		select_page->mode_page.code = CDROM_MODE_SPEED;
27575 		select_page->mode_page.length = 2;
27576 		select_page->speed = (uchar_t)data;
27577 
27578 		/* Send the mode select for the requested block size */
27579 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27580 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27581 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
27582 			/*
27583 			 * The mode select failed for the requested drive speed,
27584 			 * so reset the data for the original drive speed and
27585 			 * send it to the target. The error is indicated by the
27586 			 * return value for the failed mode select.
27587 			 */
27588 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27589 			    "sr_drive_speed: Mode Select Failed\n");
27590 			select_page->speed = sense_page->speed;
27591 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27592 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27593 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27594 		}
27595 		break;
27596 	default:
27597 		/* should not reach here, but check anyway */
27598 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27599 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
27600 		rval = EINVAL;
27601 		break;
27602 	}
27603 
27604 	if (select) {
27605 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
27606 	}
27607 	if (sense) {
27608 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27609 	}
27610 
27611 	return (rval);
27612 }
27613 
27614 
27615 /*
27616  *    Function: sr_atapi_change_speed()
27617  *
27618  * Description: This routine is the driver entry point for handling CD-ROM
27619  *		drive speed ioctl requests for MMC devices that do not support
27620  *		the Real Time Streaming feature (0x107).
27621  *
27622  *		Note: This routine will use the SET SPEED command which may not
27623  *		be supported by all devices.
27624  *
27625  *   Arguments: dev- the device 'dev_t'
27626  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
27627  *		     CDROMSDRVSPEED (set)
27628  *		data- current drive speed or requested drive speed
27629  *		flag- this argument is a pass through to ddi_copyxxx() directly
27630  *		      from the mode argument of ioctl().
27631  *
27632  * Return Code: the code returned by sd_send_scsi_cmd()
27633  *		EINVAL if invalid arguments are provided
27634  *		EFAULT if ddi_copyxxx() fails
27635  *		ENXIO if fail ddi_get_soft_state
27636  *		EIO if invalid mode sense block descriptor length
27637  */
27638 
27639 static int
27640 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27641 {
27642 	struct sd_lun			*un;
27643 	struct uscsi_cmd		*com = NULL;
27644 	struct mode_header_grp2		*sense_mhp;
27645 	uchar_t				*sense_page;
27646 	uchar_t				*sense = NULL;
27647 	char				cdb[CDB_GROUP5];
27648 	int				bd_len;
27649 	int				current_speed = 0;
27650 	int				max_speed = 0;
27651 	int				rval;
27652 
27653 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27654 
27655 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27656 		return (ENXIO);
27657 	}
27658 
27659 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
27660 
27661 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
27662 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
27663 	    SD_PATH_STANDARD)) != 0) {
27664 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27665 		    "sr_atapi_change_speed: Mode Sense Failed\n");
27666 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27667 		return (rval);
27668 	}
27669 
27670 	/* Check the block descriptor len to handle only 1 block descriptor */
27671 	sense_mhp = (struct mode_header_grp2 *)sense;
27672 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
27673 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27674 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27675 		    "sr_atapi_change_speed: Mode Sense returned invalid "
27676 		    "block descriptor length\n");
27677 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27678 		return (EIO);
27679 	}
27680 
27681 	/* Calculate the current and maximum drive speeds */
27682 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
27683 	current_speed = (sense_page[14] << 8) | sense_page[15];
27684 	max_speed = (sense_page[8] << 8) | sense_page[9];
27685 
27686 	/* Process the command */
27687 	switch (cmd) {
27688 	case CDROMGDRVSPEED:
27689 		current_speed /= SD_SPEED_1X;
27690 		if (ddi_copyout(&current_speed, (void *)data,
27691 		    sizeof (int), flag) != 0)
27692 			rval = EFAULT;
27693 		break;
27694 	case CDROMSDRVSPEED:
27695 		/* Convert the speed code to KB/sec */
27696 		switch ((uchar_t)data) {
27697 		case CDROM_NORMAL_SPEED:
27698 			current_speed = SD_SPEED_1X;
27699 			break;
27700 		case CDROM_DOUBLE_SPEED:
27701 			current_speed = 2 * SD_SPEED_1X;
27702 			break;
27703 		case CDROM_QUAD_SPEED:
27704 			current_speed = 4 * SD_SPEED_1X;
27705 			break;
27706 		case CDROM_TWELVE_SPEED:
27707 			current_speed = 12 * SD_SPEED_1X;
27708 			break;
27709 		case CDROM_MAXIMUM_SPEED:
27710 			current_speed = 0xffff;
27711 			break;
27712 		default:
27713 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27714 			    "sr_atapi_change_speed: invalid drive speed %d\n",
27715 			    (uchar_t)data);
27716 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27717 			return (EINVAL);
27718 		}
27719 
27720 		/* Check the request against the drive's max speed. */
27721 		if (current_speed != 0xffff) {
27722 			if (current_speed > max_speed) {
27723 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27724 				return (EINVAL);
27725 			}
27726 		}
27727 
27728 		/*
27729 		 * Build and send the SET SPEED command
27730 		 *
27731 		 * Note: The SET SPEED (0xBB) command used in this routine is
27732 		 * obsolete per the SCSI MMC spec but still supported in the
27733 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27734 		 * therefore the command is still implemented in this routine.
27735 		 */
27736 		bzero(cdb, sizeof (cdb));
27737 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
27738 		cdb[2] = (uchar_t)(current_speed >> 8);
27739 		cdb[3] = (uchar_t)current_speed;
27740 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27741 		com->uscsi_cdb	   = (caddr_t)cdb;
27742 		com->uscsi_cdblen  = CDB_GROUP5;
27743 		com->uscsi_bufaddr = NULL;
27744 		com->uscsi_buflen  = 0;
27745 		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
27746 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, 0,
27747 		    UIO_SYSSPACE, SD_PATH_STANDARD);
27748 		break;
27749 	default:
27750 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27751 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
27752 		rval = EINVAL;
27753 	}
27754 
27755 	if (sense) {
27756 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27757 	}
27758 	if (com) {
27759 		kmem_free(com, sizeof (*com));
27760 	}
27761 	return (rval);
27762 }
27763 
27764 
27765 /*
27766  *    Function: sr_pause_resume()
27767  *
27768  * Description: This routine is the driver entry point for handling CD-ROM
27769  *		pause/resume ioctl requests. This only affects the audio play
27770  *		operation.
27771  *
27772  *   Arguments: dev - the device 'dev_t'
27773  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
27774  *		      for setting the resume bit of the cdb.
27775  *
27776  * Return Code: the code returned by sd_send_scsi_cmd()
27777  *		EINVAL if invalid mode specified
27778  *
27779  */
27780 
27781 static int
27782 sr_pause_resume(dev_t dev, int cmd)
27783 {
27784 	struct sd_lun		*un;
27785 	struct uscsi_cmd	*com;
27786 	char			cdb[CDB_GROUP1];
27787 	int			rval;
27788 
27789 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27790 		return (ENXIO);
27791 	}
27792 
27793 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27794 	bzero(cdb, CDB_GROUP1);
27795 	cdb[0] = SCMD_PAUSE_RESUME;
27796 	switch (cmd) {
27797 	case CDROMRESUME:
27798 		cdb[8] = 1;
27799 		break;
27800 	case CDROMPAUSE:
27801 		cdb[8] = 0;
27802 		break;
27803 	default:
27804 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
27805 		    " Command '%x' Not Supported\n", cmd);
27806 		rval = EINVAL;
27807 		goto done;
27808 	}
27809 
27810 	com->uscsi_cdb    = cdb;
27811 	com->uscsi_cdblen = CDB_GROUP1;
27812 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27813 
27814 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27815 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27816 
27817 done:
27818 	kmem_free(com, sizeof (*com));
27819 	return (rval);
27820 }
27821 
27822 
27823 /*
27824  *    Function: sr_play_msf()
27825  *
27826  * Description: This routine is the driver entry point for handling CD-ROM
27827  *		ioctl requests to output the audio signals at the specified
27828  *		starting address and continue the audio play until the specified
27829  *		ending address (CDROMPLAYMSF) The address is in Minute Second
27830  *		Frame (MSF) format.
27831  *
27832  *   Arguments: dev	- the device 'dev_t'
27833  *		data	- pointer to user provided audio msf structure,
27834  *		          specifying start/end addresses.
27835  *		flag	- this argument is a pass through to ddi_copyxxx()
27836  *		          directly from the mode argument of ioctl().
27837  *
27838  * Return Code: the code returned by sd_send_scsi_cmd()
27839  *		EFAULT if ddi_copyxxx() fails
27840  *		ENXIO if fail ddi_get_soft_state
27841  *		EINVAL if data pointer is NULL
27842  */
27843 
27844 static int
27845 sr_play_msf(dev_t dev, caddr_t data, int flag)
27846 {
27847 	struct sd_lun		*un;
27848 	struct uscsi_cmd	*com;
27849 	struct cdrom_msf	msf_struct;
27850 	struct cdrom_msf	*msf = &msf_struct;
27851 	char			cdb[CDB_GROUP1];
27852 	int			rval;
27853 
27854 	if (data == NULL) {
27855 		return (EINVAL);
27856 	}
27857 
27858 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27859 		return (ENXIO);
27860 	}
27861 
27862 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
27863 		return (EFAULT);
27864 	}
27865 
27866 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27867 	bzero(cdb, CDB_GROUP1);
27868 	cdb[0] = SCMD_PLAYAUDIO_MSF;
27869 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
27870 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
27871 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
27872 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
27873 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
27874 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
27875 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
27876 	} else {
27877 		cdb[3] = msf->cdmsf_min0;
27878 		cdb[4] = msf->cdmsf_sec0;
27879 		cdb[5] = msf->cdmsf_frame0;
27880 		cdb[6] = msf->cdmsf_min1;
27881 		cdb[7] = msf->cdmsf_sec1;
27882 		cdb[8] = msf->cdmsf_frame1;
27883 	}
27884 	com->uscsi_cdb    = cdb;
27885 	com->uscsi_cdblen = CDB_GROUP1;
27886 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27887 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27888 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27889 	kmem_free(com, sizeof (*com));
27890 	return (rval);
27891 }
27892 
27893 
27894 /*
27895  *    Function: sr_play_trkind()
27896  *
27897  * Description: This routine is the driver entry point for handling CD-ROM
27898  *		ioctl requests to output the audio signals at the specified
27899  *		starting address and continue the audio play until the specified
27900  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
27901  *		format.
27902  *
27903  *   Arguments: dev	- the device 'dev_t'
27904  *		data	- pointer to user provided audio track/index structure,
27905  *		          specifying start/end addresses.
27906  *		flag	- this argument is a pass through to ddi_copyxxx()
27907  *		          directly from the mode argument of ioctl().
27908  *
27909  * Return Code: the code returned by sd_send_scsi_cmd()
27910  *		EFAULT if ddi_copyxxx() fails
27911  *		ENXIO if fail ddi_get_soft_state
27912  *		EINVAL if data pointer is NULL
27913  */
27914 
27915 static int
27916 sr_play_trkind(dev_t dev, caddr_t data, int flag)
27917 {
27918 	struct cdrom_ti		ti_struct;
27919 	struct cdrom_ti		*ti = &ti_struct;
27920 	struct uscsi_cmd	*com = NULL;
27921 	char			cdb[CDB_GROUP1];
27922 	int			rval;
27923 
27924 	if (data == NULL) {
27925 		return (EINVAL);
27926 	}
27927 
27928 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
27929 		return (EFAULT);
27930 	}
27931 
27932 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27933 	bzero(cdb, CDB_GROUP1);
27934 	cdb[0] = SCMD_PLAYAUDIO_TI;
27935 	cdb[4] = ti->cdti_trk0;
27936 	cdb[5] = ti->cdti_ind0;
27937 	cdb[7] = ti->cdti_trk1;
27938 	cdb[8] = ti->cdti_ind1;
27939 	com->uscsi_cdb    = cdb;
27940 	com->uscsi_cdblen = CDB_GROUP1;
27941 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27942 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27943 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27944 	kmem_free(com, sizeof (*com));
27945 	return (rval);
27946 }
27947 
27948 
27949 /*
27950  *    Function: sr_read_all_subcodes()
27951  *
27952  * Description: This routine is the driver entry point for handling CD-ROM
27953  *		ioctl requests to return raw subcode data while the target is
27954  *		playing audio (CDROMSUBCODE).
27955  *
27956  *   Arguments: dev	- the device 'dev_t'
27957  *		data	- pointer to user provided cdrom subcode structure,
27958  *		          specifying the transfer length and address.
27959  *		flag	- this argument is a pass through to ddi_copyxxx()
27960  *		          directly from the mode argument of ioctl().
27961  *
27962  * Return Code: the code returned by sd_send_scsi_cmd()
27963  *		EFAULT if ddi_copyxxx() fails
27964  *		ENXIO if fail ddi_get_soft_state
27965  *		EINVAL if data pointer is NULL
27966  */
27967 
27968 static int
27969 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
27970 {
27971 	struct sd_lun		*un = NULL;
27972 	struct uscsi_cmd	*com = NULL;
27973 	struct cdrom_subcode	*subcode = NULL;
27974 	int			rval;
27975 	size_t			buflen;
27976 	char			cdb[CDB_GROUP5];
27977 
27978 #ifdef _MULTI_DATAMODEL
27979 	/* To support ILP32 applications in an LP64 world */
27980 	struct cdrom_subcode32		cdrom_subcode32;
27981 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
27982 #endif
27983 	if (data == NULL) {
27984 		return (EINVAL);
27985 	}
27986 
27987 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27988 		return (ENXIO);
27989 	}
27990 
27991 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
27992 
27993 #ifdef _MULTI_DATAMODEL
27994 	switch (ddi_model_convert_from(flag & FMODELS)) {
27995 	case DDI_MODEL_ILP32:
27996 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
27997 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27998 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27999 			kmem_free(subcode, sizeof (struct cdrom_subcode));
28000 			return (EFAULT);
28001 		}
28002 		/* Convert the ILP32 uscsi data from the application to LP64 */
28003 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
28004 		break;
28005 	case DDI_MODEL_NONE:
28006 		if (ddi_copyin(data, subcode,
28007 		    sizeof (struct cdrom_subcode), flag)) {
28008 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28009 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
28010 			kmem_free(subcode, sizeof (struct cdrom_subcode));
28011 			return (EFAULT);
28012 		}
28013 		break;
28014 	}
28015 #else /* ! _MULTI_DATAMODEL */
28016 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
28017 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28018 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
28019 		kmem_free(subcode, sizeof (struct cdrom_subcode));
28020 		return (EFAULT);
28021 	}
28022 #endif /* _MULTI_DATAMODEL */
28023 
28024 	/*
28025 	 * Since MMC-2 expects max 3 bytes for length, check if the
28026 	 * length input is greater than 3 bytes
28027 	 */
28028 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
28029 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28030 		    "sr_read_all_subcodes: "
28031 		    "cdrom transfer length too large: %d (limit %d)\n",
28032 		    subcode->cdsc_length, 0xFFFFFF);
28033 		kmem_free(subcode, sizeof (struct cdrom_subcode));
28034 		return (EINVAL);
28035 	}
28036 
28037 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
28038 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28039 	bzero(cdb, CDB_GROUP5);
28040 
28041 	if (un->un_f_mmc_cap == TRUE) {
28042 		cdb[0] = (char)SCMD_READ_CD;
28043 		cdb[2] = (char)0xff;
28044 		cdb[3] = (char)0xff;
28045 		cdb[4] = (char)0xff;
28046 		cdb[5] = (char)0xff;
28047 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
28048 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
28049 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
28050 		cdb[10] = 1;
28051 	} else {
28052 		/*
28053 		 * Note: A vendor specific command (0xDF) is being used her to
28054 		 * request a read of all subcodes.
28055 		 */
28056 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
28057 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
28058 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
28059 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
28060 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
28061 	}
28062 	com->uscsi_cdb	   = cdb;
28063 	com->uscsi_cdblen  = CDB_GROUP5;
28064 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
28065 	com->uscsi_buflen  = buflen;
28066 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28067 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28068 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28069 	kmem_free(subcode, sizeof (struct cdrom_subcode));
28070 	kmem_free(com, sizeof (*com));
28071 	return (rval);
28072 }
28073 
28074 
28075 /*
28076  *    Function: sr_read_subchannel()
28077  *
28078  * Description: This routine is the driver entry point for handling CD-ROM
28079  *		ioctl requests to return the Q sub-channel data of the CD
28080  *		current position block. (CDROMSUBCHNL) The data includes the
28081  *		track number, index number, absolute CD-ROM address (LBA or MSF
28082  *		format per the user) , track relative CD-ROM address (LBA or MSF
28083  *		format per the user), control data and audio status.
28084  *
28085  *   Arguments: dev	- the device 'dev_t'
28086  *		data	- pointer to user provided cdrom sub-channel structure
28087  *		flag	- this argument is a pass through to ddi_copyxxx()
28088  *		          directly from the mode argument of ioctl().
28089  *
28090  * Return Code: the code returned by sd_send_scsi_cmd()
28091  *		EFAULT if ddi_copyxxx() fails
28092  *		ENXIO if fail ddi_get_soft_state
28093  *		EINVAL if data pointer is NULL
28094  */
28095 
28096 static int
28097 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
28098 {
28099 	struct sd_lun		*un;
28100 	struct uscsi_cmd	*com;
28101 	struct cdrom_subchnl	subchanel;
28102 	struct cdrom_subchnl	*subchnl = &subchanel;
28103 	char			cdb[CDB_GROUP1];
28104 	caddr_t			buffer;
28105 	int			rval;
28106 
28107 	if (data == NULL) {
28108 		return (EINVAL);
28109 	}
28110 
28111 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28112 	    (un->un_state == SD_STATE_OFFLINE)) {
28113 		return (ENXIO);
28114 	}
28115 
28116 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
28117 		return (EFAULT);
28118 	}
28119 
28120 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
28121 	bzero(cdb, CDB_GROUP1);
28122 	cdb[0] = SCMD_READ_SUBCHANNEL;
28123 	/* Set the MSF bit based on the user requested address format */
28124 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
28125 	/*
28126 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
28127 	 * returned
28128 	 */
28129 	cdb[2] = 0x40;
28130 	/*
28131 	 * Set byte 3 to specify the return data format. A value of 0x01
28132 	 * indicates that the CD-ROM current position should be returned.
28133 	 */
28134 	cdb[3] = 0x01;
28135 	cdb[8] = 0x10;
28136 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28137 	com->uscsi_cdb	   = cdb;
28138 	com->uscsi_cdblen  = CDB_GROUP1;
28139 	com->uscsi_bufaddr = buffer;
28140 	com->uscsi_buflen  = 16;
28141 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28142 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28143 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28144 	if (rval != 0) {
28145 		kmem_free(buffer, 16);
28146 		kmem_free(com, sizeof (*com));
28147 		return (rval);
28148 	}
28149 
28150 	/* Process the returned Q sub-channel data */
28151 	subchnl->cdsc_audiostatus = buffer[1];
28152 	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
28153 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
28154 	subchnl->cdsc_trk	= buffer[6];
28155 	subchnl->cdsc_ind	= buffer[7];
28156 	if (subchnl->cdsc_format & CDROM_LBA) {
28157 		subchnl->cdsc_absaddr.lba =
28158 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28159 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28160 		subchnl->cdsc_reladdr.lba =
28161 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
28162 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
28163 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
28164 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
28165 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
28166 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
28167 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
28168 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
28169 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
28170 	} else {
28171 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
28172 		subchnl->cdsc_absaddr.msf.second = buffer[10];
28173 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
28174 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
28175 		subchnl->cdsc_reladdr.msf.second = buffer[14];
28176 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
28177 	}
28178 	kmem_free(buffer, 16);
28179 	kmem_free(com, sizeof (*com));
28180 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
28181 	    != 0) {
28182 		return (EFAULT);
28183 	}
28184 	return (rval);
28185 }
28186 
28187 
28188 /*
28189  *    Function: sr_read_tocentry()
28190  *
28191  * Description: This routine is the driver entry point for handling CD-ROM
28192  *		ioctl requests to read from the Table of Contents (TOC)
28193  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
28194  *		fields, the starting address (LBA or MSF format per the user)
28195  *		and the data mode if the user specified track is a data track.
28196  *
28197  *		Note: The READ HEADER (0x44) command used in this routine is
28198  *		obsolete per the SCSI MMC spec but still supported in the
28199  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
28200  *		therefore the command is still implemented in this routine.
28201  *
28202  *   Arguments: dev	- the device 'dev_t'
28203  *		data	- pointer to user provided toc entry structure,
28204  *			  specifying the track # and the address format
28205  *			  (LBA or MSF).
28206  *		flag	- this argument is a pass through to ddi_copyxxx()
28207  *		          directly from the mode argument of ioctl().
28208  *
28209  * Return Code: the code returned by sd_send_scsi_cmd()
28210  *		EFAULT if ddi_copyxxx() fails
28211  *		ENXIO if fail ddi_get_soft_state
28212  *		EINVAL if data pointer is NULL
28213  */
28214 
28215 static int
28216 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
28217 {
28218 	struct sd_lun		*un = NULL;
28219 	struct uscsi_cmd	*com;
28220 	struct cdrom_tocentry	toc_entry;
28221 	struct cdrom_tocentry	*entry = &toc_entry;
28222 	caddr_t			buffer;
28223 	int			rval;
28224 	char			cdb[CDB_GROUP1];
28225 
28226 	if (data == NULL) {
28227 		return (EINVAL);
28228 	}
28229 
28230 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28231 	    (un->un_state == SD_STATE_OFFLINE)) {
28232 		return (ENXIO);
28233 	}
28234 
28235 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
28236 		return (EFAULT);
28237 	}
28238 
28239 	/* Validate the requested track and address format */
28240 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
28241 		return (EINVAL);
28242 	}
28243 
28244 	if (entry->cdte_track == 0) {
28245 		return (EINVAL);
28246 	}
28247 
28248 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
28249 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28250 	bzero(cdb, CDB_GROUP1);
28251 
28252 	cdb[0] = SCMD_READ_TOC;
28253 	/* Set the MSF bit based on the user requested address format  */
28254 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
28255 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28256 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
28257 	} else {
28258 		cdb[6] = entry->cdte_track;
28259 	}
28260 
28261 	/*
28262 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
28263 	 * (4 byte TOC response header + 8 byte track descriptor)
28264 	 */
28265 	cdb[8] = 12;
28266 	com->uscsi_cdb	   = cdb;
28267 	com->uscsi_cdblen  = CDB_GROUP1;
28268 	com->uscsi_bufaddr = buffer;
28269 	com->uscsi_buflen  = 0x0C;
28270 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
28271 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28272 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28273 	if (rval != 0) {
28274 		kmem_free(buffer, 12);
28275 		kmem_free(com, sizeof (*com));
28276 		return (rval);
28277 	}
28278 
28279 	/* Process the toc entry */
28280 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
28281 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
28282 	if (entry->cdte_format & CDROM_LBA) {
28283 		entry->cdte_addr.lba =
28284 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28285 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28286 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
28287 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
28288 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
28289 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
28290 		/*
28291 		 * Send a READ TOC command using the LBA address format to get
28292 		 * the LBA for the track requested so it can be used in the
28293 		 * READ HEADER request
28294 		 *
28295 		 * Note: The MSF bit of the READ HEADER command specifies the
28296 		 * output format. The block address specified in that command
28297 		 * must be in LBA format.
28298 		 */
28299 		cdb[1] = 0;
28300 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28301 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28302 		if (rval != 0) {
28303 			kmem_free(buffer, 12);
28304 			kmem_free(com, sizeof (*com));
28305 			return (rval);
28306 		}
28307 	} else {
28308 		entry->cdte_addr.msf.minute	= buffer[9];
28309 		entry->cdte_addr.msf.second	= buffer[10];
28310 		entry->cdte_addr.msf.frame	= buffer[11];
28311 		/*
28312 		 * Send a READ TOC command using the LBA address format to get
28313 		 * the LBA for the track requested so it can be used in the
28314 		 * READ HEADER request
28315 		 *
28316 		 * Note: The MSF bit of the READ HEADER command specifies the
28317 		 * output format. The block address specified in that command
28318 		 * must be in LBA format.
28319 		 */
28320 		cdb[1] = 0;
28321 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28322 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28323 		if (rval != 0) {
28324 			kmem_free(buffer, 12);
28325 			kmem_free(com, sizeof (*com));
28326 			return (rval);
28327 		}
28328 	}
28329 
28330 	/*
28331 	 * Build and send the READ HEADER command to determine the data mode of
28332 	 * the user specified track.
28333 	 */
28334 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
28335 	    (entry->cdte_track != CDROM_LEADOUT)) {
28336 		bzero(cdb, CDB_GROUP1);
28337 		cdb[0] = SCMD_READ_HEADER;
28338 		cdb[2] = buffer[8];
28339 		cdb[3] = buffer[9];
28340 		cdb[4] = buffer[10];
28341 		cdb[5] = buffer[11];
28342 		cdb[8] = 0x08;
28343 		com->uscsi_buflen = 0x08;
28344 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28345 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28346 		if (rval == 0) {
28347 			entry->cdte_datamode = buffer[0];
28348 		} else {
28349 			/*
28350 			 * READ HEADER command failed, since this is
28351 			 * obsoleted in one spec, its better to return
28352 			 * -1 for an invlid track so that we can still
28353 			 * recieve the rest of the TOC data.
28354 			 */
28355 			entry->cdte_datamode = (uchar_t)-1;
28356 		}
28357 	} else {
28358 		entry->cdte_datamode = (uchar_t)-1;
28359 	}
28360 
28361 	kmem_free(buffer, 12);
28362 	kmem_free(com, sizeof (*com));
28363 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
28364 		return (EFAULT);
28365 
28366 	return (rval);
28367 }
28368 
28369 
28370 /*
28371  *    Function: sr_read_tochdr()
28372  *
28373  * Description: This routine is the driver entry point for handling CD-ROM
28374  * 		ioctl requests to read the Table of Contents (TOC) header
28375  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
28376  *		and ending track numbers
28377  *
28378  *   Arguments: dev	- the device 'dev_t'
28379  *		data	- pointer to user provided toc header structure,
28380  *			  specifying the starting and ending track numbers.
28381  *		flag	- this argument is a pass through to ddi_copyxxx()
28382  *			  directly from the mode argument of ioctl().
28383  *
28384  * Return Code: the code returned by sd_send_scsi_cmd()
28385  *		EFAULT if ddi_copyxxx() fails
28386  *		ENXIO if fail ddi_get_soft_state
28387  *		EINVAL if data pointer is NULL
28388  */
28389 
28390 static int
28391 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
28392 {
28393 	struct sd_lun		*un;
28394 	struct uscsi_cmd	*com;
28395 	struct cdrom_tochdr	toc_header;
28396 	struct cdrom_tochdr	*hdr = &toc_header;
28397 	char			cdb[CDB_GROUP1];
28398 	int			rval;
28399 	caddr_t			buffer;
28400 
28401 	if (data == NULL) {
28402 		return (EINVAL);
28403 	}
28404 
28405 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28406 	    (un->un_state == SD_STATE_OFFLINE)) {
28407 		return (ENXIO);
28408 	}
28409 
28410 	buffer = kmem_zalloc(4, KM_SLEEP);
28411 	bzero(cdb, CDB_GROUP1);
28412 	cdb[0] = SCMD_READ_TOC;
28413 	/*
28414 	 * Specifying a track number of 0x00 in the READ TOC command indicates
28415 	 * that the TOC header should be returned
28416 	 */
28417 	cdb[6] = 0x00;
28418 	/*
28419 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
28420 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
28421 	 */
28422 	cdb[8] = 0x04;
28423 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28424 	com->uscsi_cdb	   = cdb;
28425 	com->uscsi_cdblen  = CDB_GROUP1;
28426 	com->uscsi_bufaddr = buffer;
28427 	com->uscsi_buflen  = 0x04;
28428 	com->uscsi_timeout = 300;
28429 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28430 
28431 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28432 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28433 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28434 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
28435 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
28436 	} else {
28437 		hdr->cdth_trk0 = buffer[2];
28438 		hdr->cdth_trk1 = buffer[3];
28439 	}
28440 	kmem_free(buffer, 4);
28441 	kmem_free(com, sizeof (*com));
28442 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
28443 		return (EFAULT);
28444 	}
28445 	return (rval);
28446 }
28447 
28448 
28449 /*
28450  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
28451  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
28452  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
28453  * digital audio and extended architecture digital audio. These modes are
28454  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
28455  * MMC specs.
28456  *
28457  * In addition to support for the various data formats these routines also
28458  * include support for devices that implement only the direct access READ
28459  * commands (0x08, 0x28), devices that implement the READ_CD commands
28460  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
28461  * READ CDXA commands (0xD8, 0xDB)
28462  */
28463 
28464 /*
28465  *    Function: sr_read_mode1()
28466  *
28467  * Description: This routine is the driver entry point for handling CD-ROM
28468  *		ioctl read mode1 requests (CDROMREADMODE1).
28469  *
28470  *   Arguments: dev	- the device 'dev_t'
28471  *		data	- pointer to user provided cd read structure specifying
28472  *			  the lba buffer address and length.
28473  *		flag	- this argument is a pass through to ddi_copyxxx()
28474  *			  directly from the mode argument of ioctl().
28475  *
28476  * Return Code: the code returned by sd_send_scsi_cmd()
28477  *		EFAULT if ddi_copyxxx() fails
28478  *		ENXIO if fail ddi_get_soft_state
28479  *		EINVAL if data pointer is NULL
28480  */
28481 
28482 static int
28483 sr_read_mode1(dev_t dev, caddr_t data, int flag)
28484 {
28485 	struct sd_lun		*un;
28486 	struct cdrom_read	mode1_struct;
28487 	struct cdrom_read	*mode1 = &mode1_struct;
28488 	int			rval;
28489 #ifdef _MULTI_DATAMODEL
28490 	/* To support ILP32 applications in an LP64 world */
28491 	struct cdrom_read32	cdrom_read32;
28492 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28493 #endif /* _MULTI_DATAMODEL */
28494 
28495 	if (data == NULL) {
28496 		return (EINVAL);
28497 	}
28498 
28499 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28500 	    (un->un_state == SD_STATE_OFFLINE)) {
28501 		return (ENXIO);
28502 	}
28503 
28504 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28505 	    "sd_read_mode1: entry: un:0x%p\n", un);
28506 
28507 #ifdef _MULTI_DATAMODEL
28508 	switch (ddi_model_convert_from(flag & FMODELS)) {
28509 	case DDI_MODEL_ILP32:
28510 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28511 			return (EFAULT);
28512 		}
28513 		/* Convert the ILP32 uscsi data from the application to LP64 */
28514 		cdrom_read32tocdrom_read(cdrd32, mode1);
28515 		break;
28516 	case DDI_MODEL_NONE:
28517 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28518 			return (EFAULT);
28519 		}
28520 	}
28521 #else /* ! _MULTI_DATAMODEL */
28522 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28523 		return (EFAULT);
28524 	}
28525 #endif /* _MULTI_DATAMODEL */
28526 
28527 	rval = sd_send_scsi_READ(un, mode1->cdread_bufaddr,
28528 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
28529 
28530 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28531 	    "sd_read_mode1: exit: un:0x%p\n", un);
28532 
28533 	return (rval);
28534 }
28535 
28536 
28537 /*
28538  *    Function: sr_read_cd_mode2()
28539  *
28540  * Description: This routine is the driver entry point for handling CD-ROM
28541  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28542  *		support the READ CD (0xBE) command or the 1st generation
28543  *		READ CD (0xD4) command.
28544  *
28545  *   Arguments: dev	- the device 'dev_t'
28546  *		data	- pointer to user provided cd read structure specifying
28547  *			  the lba buffer address and length.
28548  *		flag	- this argument is a pass through to ddi_copyxxx()
28549  *			  directly from the mode argument of ioctl().
28550  *
28551  * Return Code: the code returned by sd_send_scsi_cmd()
28552  *		EFAULT if ddi_copyxxx() fails
28553  *		ENXIO if fail ddi_get_soft_state
28554  *		EINVAL if data pointer is NULL
28555  */
28556 
28557 static int
28558 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
28559 {
28560 	struct sd_lun		*un;
28561 	struct uscsi_cmd	*com;
28562 	struct cdrom_read	mode2_struct;
28563 	struct cdrom_read	*mode2 = &mode2_struct;
28564 	uchar_t			cdb[CDB_GROUP5];
28565 	int			nblocks;
28566 	int			rval;
28567 #ifdef _MULTI_DATAMODEL
28568 	/*  To support ILP32 applications in an LP64 world */
28569 	struct cdrom_read32	cdrom_read32;
28570 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28571 #endif /* _MULTI_DATAMODEL */
28572 
28573 	if (data == NULL) {
28574 		return (EINVAL);
28575 	}
28576 
28577 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28578 	    (un->un_state == SD_STATE_OFFLINE)) {
28579 		return (ENXIO);
28580 	}
28581 
28582 #ifdef _MULTI_DATAMODEL
28583 	switch (ddi_model_convert_from(flag & FMODELS)) {
28584 	case DDI_MODEL_ILP32:
28585 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28586 			return (EFAULT);
28587 		}
28588 		/* Convert the ILP32 uscsi data from the application to LP64 */
28589 		cdrom_read32tocdrom_read(cdrd32, mode2);
28590 		break;
28591 	case DDI_MODEL_NONE:
28592 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28593 			return (EFAULT);
28594 		}
28595 		break;
28596 	}
28597 
28598 #else /* ! _MULTI_DATAMODEL */
28599 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28600 		return (EFAULT);
28601 	}
28602 #endif /* _MULTI_DATAMODEL */
28603 
28604 	bzero(cdb, sizeof (cdb));
28605 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
28606 		/* Read command supported by 1st generation atapi drives */
28607 		cdb[0] = SCMD_READ_CDD4;
28608 	} else {
28609 		/* Universal CD Access Command */
28610 		cdb[0] = SCMD_READ_CD;
28611 	}
28612 
28613 	/*
28614 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
28615 	 */
28616 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
28617 
28618 	/* set the start address */
28619 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
28620 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
28621 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28622 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
28623 
28624 	/* set the transfer length */
28625 	nblocks = mode2->cdread_buflen / 2336;
28626 	cdb[6] = (uchar_t)(nblocks >> 16);
28627 	cdb[7] = (uchar_t)(nblocks >> 8);
28628 	cdb[8] = (uchar_t)nblocks;
28629 
28630 	/* set the filter bits */
28631 	cdb[9] = CDROM_READ_CD_USERDATA;
28632 
28633 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28634 	com->uscsi_cdb = (caddr_t)cdb;
28635 	com->uscsi_cdblen = sizeof (cdb);
28636 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28637 	com->uscsi_buflen = mode2->cdread_buflen;
28638 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28639 
28640 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28641 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28642 	kmem_free(com, sizeof (*com));
28643 	return (rval);
28644 }
28645 
28646 
28647 /*
28648  *    Function: sr_read_mode2()
28649  *
28650  * Description: This routine is the driver entry point for handling CD-ROM
28651  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28652  *		do not support the READ CD (0xBE) command.
28653  *
28654  *   Arguments: dev	- the device 'dev_t'
28655  *		data	- pointer to user provided cd read structure specifying
28656  *			  the lba buffer address and length.
28657  *		flag	- this argument is a pass through to ddi_copyxxx()
28658  *			  directly from the mode argument of ioctl().
28659  *
28660  * Return Code: the code returned by sd_send_scsi_cmd()
28661  *		EFAULT if ddi_copyxxx() fails
28662  *		ENXIO if fail ddi_get_soft_state
28663  *		EINVAL if data pointer is NULL
28664  *		EIO if fail to reset block size
28665  *		EAGAIN if commands are in progress in the driver
28666  */
28667 
28668 static int
28669 sr_read_mode2(dev_t dev, caddr_t data, int flag)
28670 {
28671 	struct sd_lun		*un;
28672 	struct cdrom_read	mode2_struct;
28673 	struct cdrom_read	*mode2 = &mode2_struct;
28674 	int			rval;
28675 	uint32_t		restore_blksize;
28676 	struct uscsi_cmd	*com;
28677 	uchar_t			cdb[CDB_GROUP0];
28678 	int			nblocks;
28679 
28680 #ifdef _MULTI_DATAMODEL
28681 	/* To support ILP32 applications in an LP64 world */
28682 	struct cdrom_read32	cdrom_read32;
28683 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28684 #endif /* _MULTI_DATAMODEL */
28685 
28686 	if (data == NULL) {
28687 		return (EINVAL);
28688 	}
28689 
28690 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28691 	    (un->un_state == SD_STATE_OFFLINE)) {
28692 		return (ENXIO);
28693 	}
28694 
28695 	/*
28696 	 * Because this routine will update the device and driver block size
28697 	 * being used we want to make sure there are no commands in progress.
28698 	 * If commands are in progress the user will have to try again.
28699 	 *
28700 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
28701 	 * in sdioctl to protect commands from sdioctl through to the top of
28702 	 * sd_uscsi_strategy. See sdioctl for details.
28703 	 */
28704 	mutex_enter(SD_MUTEX(un));
28705 	if (un->un_ncmds_in_driver != 1) {
28706 		mutex_exit(SD_MUTEX(un));
28707 		return (EAGAIN);
28708 	}
28709 	mutex_exit(SD_MUTEX(un));
28710 
28711 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28712 	    "sd_read_mode2: entry: un:0x%p\n", un);
28713 
28714 #ifdef _MULTI_DATAMODEL
28715 	switch (ddi_model_convert_from(flag & FMODELS)) {
28716 	case DDI_MODEL_ILP32:
28717 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28718 			return (EFAULT);
28719 		}
28720 		/* Convert the ILP32 uscsi data from the application to LP64 */
28721 		cdrom_read32tocdrom_read(cdrd32, mode2);
28722 		break;
28723 	case DDI_MODEL_NONE:
28724 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28725 			return (EFAULT);
28726 		}
28727 		break;
28728 	}
28729 #else /* ! _MULTI_DATAMODEL */
28730 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
28731 		return (EFAULT);
28732 	}
28733 #endif /* _MULTI_DATAMODEL */
28734 
28735 	/* Store the current target block size for restoration later */
28736 	restore_blksize = un->un_tgt_blocksize;
28737 
28738 	/* Change the device and soft state target block size to 2336 */
28739 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
28740 		rval = EIO;
28741 		goto done;
28742 	}
28743 
28744 
28745 	bzero(cdb, sizeof (cdb));
28746 
28747 	/* set READ operation */
28748 	cdb[0] = SCMD_READ;
28749 
28750 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
28751 	mode2->cdread_lba >>= 2;
28752 
28753 	/* set the start address */
28754 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
28755 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28756 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
28757 
28758 	/* set the transfer length */
28759 	nblocks = mode2->cdread_buflen / 2336;
28760 	cdb[4] = (uchar_t)nblocks & 0xFF;
28761 
28762 	/* build command */
28763 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28764 	com->uscsi_cdb = (caddr_t)cdb;
28765 	com->uscsi_cdblen = sizeof (cdb);
28766 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28767 	com->uscsi_buflen = mode2->cdread_buflen;
28768 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28769 
28770 	/*
28771 	 * Issue SCSI command with user space address for read buffer.
28772 	 *
28773 	 * This sends the command through main channel in the driver.
28774 	 *
28775 	 * Since this is accessed via an IOCTL call, we go through the
28776 	 * standard path, so that if the device was powered down, then
28777 	 * it would be 'awakened' to handle the command.
28778 	 */
28779 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28780 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28781 
28782 	kmem_free(com, sizeof (*com));
28783 
28784 	/* Restore the device and soft state target block size */
28785 	if (sr_sector_mode(dev, restore_blksize) != 0) {
28786 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28787 		    "can't do switch back to mode 1\n");
28788 		/*
28789 		 * If sd_send_scsi_READ succeeded we still need to report
28790 		 * an error because we failed to reset the block size
28791 		 */
28792 		if (rval == 0) {
28793 			rval = EIO;
28794 		}
28795 	}
28796 
28797 done:
28798 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28799 	    "sd_read_mode2: exit: un:0x%p\n", un);
28800 
28801 	return (rval);
28802 }
28803 
28804 
28805 /*
28806  *    Function: sr_sector_mode()
28807  *
28808  * Description: This utility function is used by sr_read_mode2 to set the target
28809  *		block size based on the user specified size. This is a legacy
28810  *		implementation based upon a vendor specific mode page
28811  *
28812  *   Arguments: dev	- the device 'dev_t'
28813  *		data	- flag indicating if block size is being set to 2336 or
28814  *			  512.
28815  *
28816  * Return Code: the code returned by sd_send_scsi_cmd()
28817  *		EFAULT if ddi_copyxxx() fails
28818  *		ENXIO if fail ddi_get_soft_state
28819  *		EINVAL if data pointer is NULL
28820  */
28821 
28822 static int
28823 sr_sector_mode(dev_t dev, uint32_t blksize)
28824 {
28825 	struct sd_lun	*un;
28826 	uchar_t		*sense;
28827 	uchar_t		*select;
28828 	int		rval;
28829 
28830 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28831 	    (un->un_state == SD_STATE_OFFLINE)) {
28832 		return (ENXIO);
28833 	}
28834 
28835 	sense = kmem_zalloc(20, KM_SLEEP);
28836 
28837 	/* Note: This is a vendor specific mode page (0x81) */
28838 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, 20, 0x81,
28839 	    SD_PATH_STANDARD)) != 0) {
28840 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28841 		    "sr_sector_mode: Mode Sense failed\n");
28842 		kmem_free(sense, 20);
28843 		return (rval);
28844 	}
28845 	select = kmem_zalloc(20, KM_SLEEP);
28846 	select[3] = 0x08;
28847 	select[10] = ((blksize >> 8) & 0xff);
28848 	select[11] = (blksize & 0xff);
28849 	select[12] = 0x01;
28850 	select[13] = 0x06;
28851 	select[14] = sense[14];
28852 	select[15] = sense[15];
28853 	if (blksize == SD_MODE2_BLKSIZE) {
28854 		select[14] |= 0x01;
28855 	}
28856 
28857 	if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select, 20,
28858 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
28859 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28860 		    "sr_sector_mode: Mode Select failed\n");
28861 	} else {
28862 		/*
28863 		 * Only update the softstate block size if we successfully
28864 		 * changed the device block mode.
28865 		 */
28866 		mutex_enter(SD_MUTEX(un));
28867 		sd_update_block_info(un, blksize, 0);
28868 		mutex_exit(SD_MUTEX(un));
28869 	}
28870 	kmem_free(sense, 20);
28871 	kmem_free(select, 20);
28872 	return (rval);
28873 }
28874 
28875 
28876 /*
28877  *    Function: sr_read_cdda()
28878  *
28879  * Description: This routine is the driver entry point for handling CD-ROM
28880  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
28881  *		the target supports CDDA these requests are handled via a vendor
28882  *		specific command (0xD8) If the target does not support CDDA
28883  *		these requests are handled via the READ CD command (0xBE).
28884  *
28885  *   Arguments: dev	- the device 'dev_t'
28886  *		data	- pointer to user provided CD-DA structure specifying
28887  *			  the track starting address, transfer length, and
28888  *			  subcode options.
28889  *		flag	- this argument is a pass through to ddi_copyxxx()
28890  *			  directly from the mode argument of ioctl().
28891  *
28892  * Return Code: the code returned by sd_send_scsi_cmd()
28893  *		EFAULT if ddi_copyxxx() fails
28894  *		ENXIO if fail ddi_get_soft_state
28895  *		EINVAL if invalid arguments are provided
28896  *		ENOTTY
28897  */
28898 
28899 static int
28900 sr_read_cdda(dev_t dev, caddr_t data, int flag)
28901 {
28902 	struct sd_lun			*un;
28903 	struct uscsi_cmd		*com;
28904 	struct cdrom_cdda		*cdda;
28905 	int				rval;
28906 	size_t				buflen;
28907 	char				cdb[CDB_GROUP5];
28908 
28909 #ifdef _MULTI_DATAMODEL
28910 	/* To support ILP32 applications in an LP64 world */
28911 	struct cdrom_cdda32	cdrom_cdda32;
28912 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
28913 #endif /* _MULTI_DATAMODEL */
28914 
28915 	if (data == NULL) {
28916 		return (EINVAL);
28917 	}
28918 
28919 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28920 		return (ENXIO);
28921 	}
28922 
28923 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
28924 
28925 #ifdef _MULTI_DATAMODEL
28926 	switch (ddi_model_convert_from(flag & FMODELS)) {
28927 	case DDI_MODEL_ILP32:
28928 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
28929 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28930 			    "sr_read_cdda: ddi_copyin Failed\n");
28931 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28932 			return (EFAULT);
28933 		}
28934 		/* Convert the ILP32 uscsi data from the application to LP64 */
28935 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
28936 		break;
28937 	case DDI_MODEL_NONE:
28938 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28939 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28940 			    "sr_read_cdda: ddi_copyin Failed\n");
28941 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28942 			return (EFAULT);
28943 		}
28944 		break;
28945 	}
28946 #else /* ! _MULTI_DATAMODEL */
28947 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28948 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28949 		    "sr_read_cdda: ddi_copyin Failed\n");
28950 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28951 		return (EFAULT);
28952 	}
28953 #endif /* _MULTI_DATAMODEL */
28954 
28955 	/*
28956 	 * Since MMC-2 expects max 3 bytes for length, check if the
28957 	 * length input is greater than 3 bytes
28958 	 */
28959 	if ((cdda->cdda_length & 0xFF000000) != 0) {
28960 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
28961 		    "cdrom transfer length too large: %d (limit %d)\n",
28962 		    cdda->cdda_length, 0xFFFFFF);
28963 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28964 		return (EINVAL);
28965 	}
28966 
28967 	switch (cdda->cdda_subcode) {
28968 	case CDROM_DA_NO_SUBCODE:
28969 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
28970 		break;
28971 	case CDROM_DA_SUBQ:
28972 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
28973 		break;
28974 	case CDROM_DA_ALL_SUBCODE:
28975 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
28976 		break;
28977 	case CDROM_DA_SUBCODE_ONLY:
28978 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
28979 		break;
28980 	default:
28981 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28982 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
28983 		    cdda->cdda_subcode);
28984 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28985 		return (EINVAL);
28986 	}
28987 
28988 	/* Build and send the command */
28989 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28990 	bzero(cdb, CDB_GROUP5);
28991 
28992 	if (un->un_f_cfg_cdda == TRUE) {
28993 		cdb[0] = (char)SCMD_READ_CD;
28994 		cdb[1] = 0x04;
28995 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28996 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28997 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28998 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28999 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
29000 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
29001 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
29002 		cdb[9] = 0x10;
29003 		switch (cdda->cdda_subcode) {
29004 		case CDROM_DA_NO_SUBCODE :
29005 			cdb[10] = 0x0;
29006 			break;
29007 		case CDROM_DA_SUBQ :
29008 			cdb[10] = 0x2;
29009 			break;
29010 		case CDROM_DA_ALL_SUBCODE :
29011 			cdb[10] = 0x1;
29012 			break;
29013 		case CDROM_DA_SUBCODE_ONLY :
29014 			/* FALLTHROUGH */
29015 		default :
29016 			kmem_free(cdda, sizeof (struct cdrom_cdda));
29017 			kmem_free(com, sizeof (*com));
29018 			return (ENOTTY);
29019 		}
29020 	} else {
29021 		cdb[0] = (char)SCMD_READ_CDDA;
29022 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
29023 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
29024 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
29025 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
29026 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
29027 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
29028 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
29029 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
29030 		cdb[10] = cdda->cdda_subcode;
29031 	}
29032 
29033 	com->uscsi_cdb = cdb;
29034 	com->uscsi_cdblen = CDB_GROUP5;
29035 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
29036 	com->uscsi_buflen = buflen;
29037 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29038 
29039 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
29040 	    UIO_SYSSPACE, SD_PATH_STANDARD);
29041 
29042 	kmem_free(cdda, sizeof (struct cdrom_cdda));
29043 	kmem_free(com, sizeof (*com));
29044 	return (rval);
29045 }
29046 
29047 
29048 /*
29049  *    Function: sr_read_cdxa()
29050  *
29051  * Description: This routine is the driver entry point for handling CD-ROM
29052  *		ioctl requests to return CD-XA (Extended Architecture) data.
29053  *		(CDROMCDXA).
29054  *
29055  *   Arguments: dev	- the device 'dev_t'
29056  *		data	- pointer to user provided CD-XA structure specifying
29057  *			  the data starting address, transfer length, and format
29058  *		flag	- this argument is a pass through to ddi_copyxxx()
29059  *			  directly from the mode argument of ioctl().
29060  *
29061  * Return Code: the code returned by sd_send_scsi_cmd()
29062  *		EFAULT if ddi_copyxxx() fails
29063  *		ENXIO if fail ddi_get_soft_state
29064  *		EINVAL if data pointer is NULL
29065  */
29066 
29067 static int
29068 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
29069 {
29070 	struct sd_lun		*un;
29071 	struct uscsi_cmd	*com;
29072 	struct cdrom_cdxa	*cdxa;
29073 	int			rval;
29074 	size_t			buflen;
29075 	char			cdb[CDB_GROUP5];
29076 	uchar_t			read_flags;
29077 
29078 #ifdef _MULTI_DATAMODEL
29079 	/* To support ILP32 applications in an LP64 world */
29080 	struct cdrom_cdxa32		cdrom_cdxa32;
29081 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
29082 #endif /* _MULTI_DATAMODEL */
29083 
29084 	if (data == NULL) {
29085 		return (EINVAL);
29086 	}
29087 
29088 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29089 		return (ENXIO);
29090 	}
29091 
29092 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
29093 
29094 #ifdef _MULTI_DATAMODEL
29095 	switch (ddi_model_convert_from(flag & FMODELS)) {
29096 	case DDI_MODEL_ILP32:
29097 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
29098 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29099 			return (EFAULT);
29100 		}
29101 		/*
29102 		 * Convert the ILP32 uscsi data from the
29103 		 * application to LP64 for internal use.
29104 		 */
29105 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
29106 		break;
29107 	case DDI_MODEL_NONE:
29108 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
29109 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29110 			return (EFAULT);
29111 		}
29112 		break;
29113 	}
29114 #else /* ! _MULTI_DATAMODEL */
29115 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
29116 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29117 		return (EFAULT);
29118 	}
29119 #endif /* _MULTI_DATAMODEL */
29120 
29121 	/*
29122 	 * Since MMC-2 expects max 3 bytes for length, check if the
29123 	 * length input is greater than 3 bytes
29124 	 */
29125 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
29126 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
29127 		    "cdrom transfer length too large: %d (limit %d)\n",
29128 		    cdxa->cdxa_length, 0xFFFFFF);
29129 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29130 		return (EINVAL);
29131 	}
29132 
29133 	switch (cdxa->cdxa_format) {
29134 	case CDROM_XA_DATA:
29135 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
29136 		read_flags = 0x10;
29137 		break;
29138 	case CDROM_XA_SECTOR_DATA:
29139 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
29140 		read_flags = 0xf8;
29141 		break;
29142 	case CDROM_XA_DATA_W_ERROR:
29143 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
29144 		read_flags = 0xfc;
29145 		break;
29146 	default:
29147 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29148 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
29149 		    cdxa->cdxa_format);
29150 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29151 		return (EINVAL);
29152 	}
29153 
29154 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29155 	bzero(cdb, CDB_GROUP5);
29156 	if (un->un_f_mmc_cap == TRUE) {
29157 		cdb[0] = (char)SCMD_READ_CD;
29158 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
29159 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
29160 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
29161 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
29162 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
29163 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
29164 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
29165 		cdb[9] = (char)read_flags;
29166 	} else {
29167 		/*
29168 		 * Note: A vendor specific command (0xDB) is being used her to
29169 		 * request a read of all subcodes.
29170 		 */
29171 		cdb[0] = (char)SCMD_READ_CDXA;
29172 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
29173 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
29174 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
29175 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
29176 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
29177 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
29178 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
29179 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
29180 		cdb[10] = cdxa->cdxa_format;
29181 	}
29182 	com->uscsi_cdb	   = cdb;
29183 	com->uscsi_cdblen  = CDB_GROUP5;
29184 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
29185 	com->uscsi_buflen  = buflen;
29186 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29187 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
29188 	    UIO_SYSSPACE, SD_PATH_STANDARD);
29189 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29190 	kmem_free(com, sizeof (*com));
29191 	return (rval);
29192 }
29193 
29194 
29195 /*
29196  *    Function: sr_eject()
29197  *
29198  * Description: This routine is the driver entry point for handling CD-ROM
29199  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
29200  *
29201  *   Arguments: dev	- the device 'dev_t'
29202  *
29203  * Return Code: the code returned by sd_send_scsi_cmd()
29204  */
29205 
29206 static int
29207 sr_eject(dev_t dev)
29208 {
29209 	struct sd_lun	*un;
29210 	int		rval;
29211 
29212 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29213 	    (un->un_state == SD_STATE_OFFLINE)) {
29214 		return (ENXIO);
29215 	}
29216 	if ((rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
29217 	    SD_PATH_STANDARD)) != 0) {
29218 		return (rval);
29219 	}
29220 
29221 	rval = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_EJECT,
29222 	    SD_PATH_STANDARD);
29223 
29224 	if (rval == 0) {
29225 		mutex_enter(SD_MUTEX(un));
29226 		sr_ejected(un);
29227 		un->un_mediastate = DKIO_EJECTED;
29228 		cv_broadcast(&un->un_state_cv);
29229 		mutex_exit(SD_MUTEX(un));
29230 	}
29231 	return (rval);
29232 }
29233 
29234 
29235 /*
29236  *    Function: sr_ejected()
29237  *
29238  * Description: This routine updates the soft state structure to invalidate the
29239  *		geometry information after the media has been ejected or a
29240  *		media eject has been detected.
29241  *
29242  *   Arguments: un - driver soft state (unit) structure
29243  */
29244 
29245 static void
29246 sr_ejected(struct sd_lun *un)
29247 {
29248 	struct sd_errstats *stp;
29249 
29250 	ASSERT(un != NULL);
29251 	ASSERT(mutex_owned(SD_MUTEX(un)));
29252 
29253 	un->un_f_blockcount_is_valid	= FALSE;
29254 	un->un_f_tgt_blocksize_is_valid	= FALSE;
29255 	un->un_f_geometry_is_valid	= FALSE;
29256 
29257 	if (un->un_errstats != NULL) {
29258 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
29259 		stp->sd_capacity.value.ui64 = 0;
29260 	}
29261 }
29262 
29263 
29264 /*
29265  *    Function: sr_check_wp()
29266  *
29267  * Description: This routine checks the write protection of a removable
29268  *      media disk and hotpluggable devices via the write protect bit of
29269  *      the Mode Page Header device specific field. Some devices choke
29270  *      on unsupported mode page. In order to workaround this issue,
29271  *      this routine has been implemented to use 0x3f mode page(request
29272  *      for all pages) for all device types.
29273  *
29274  *   Arguments: dev		- the device 'dev_t'
29275  *
29276  * Return Code: int indicating if the device is write protected (1) or not (0)
29277  *
29278  *     Context: Kernel thread.
29279  *
29280  */
29281 
29282 static int
29283 sr_check_wp(dev_t dev)
29284 {
29285 	struct sd_lun	*un;
29286 	uchar_t		device_specific;
29287 	uchar_t		*sense;
29288 	int		hdrlen;
29289 	int		rval = FALSE;
29290 
29291 	/*
29292 	 * Note: The return codes for this routine should be reworked to
29293 	 * properly handle the case of a NULL softstate.
29294 	 */
29295 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29296 		return (FALSE);
29297 	}
29298 
29299 	if (un->un_f_cfg_is_atapi == TRUE) {
29300 		/*
29301 		 * The mode page contents are not required; set the allocation
29302 		 * length for the mode page header only
29303 		 */
29304 		hdrlen = MODE_HEADER_LENGTH_GRP2;
29305 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29306 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense, hdrlen,
29307 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
29308 			goto err_exit;
29309 		device_specific =
29310 		    ((struct mode_header_grp2 *)sense)->device_specific;
29311 	} else {
29312 		hdrlen = MODE_HEADER_LENGTH;
29313 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29314 		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, hdrlen,
29315 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
29316 			goto err_exit;
29317 		device_specific =
29318 		    ((struct mode_header *)sense)->device_specific;
29319 	}
29320 
29321 	/*
29322 	 * Write protect mode sense failed; not all disks
29323 	 * understand this query. Return FALSE assuming that
29324 	 * these devices are not writable.
29325 	 */
29326 	if (device_specific & WRITE_PROTECT) {
29327 		rval = TRUE;
29328 	}
29329 
29330 err_exit:
29331 	kmem_free(sense, hdrlen);
29332 	return (rval);
29333 }
29334 
29335 /*
29336  *    Function: sr_volume_ctrl()
29337  *
29338  * Description: This routine is the driver entry point for handling CD-ROM
29339  *		audio output volume ioctl requests. (CDROMVOLCTRL)
29340  *
29341  *   Arguments: dev	- the device 'dev_t'
29342  *		data	- pointer to user audio volume control structure
29343  *		flag	- this argument is a pass through to ddi_copyxxx()
29344  *			  directly from the mode argument of ioctl().
29345  *
29346  * Return Code: the code returned by sd_send_scsi_cmd()
29347  *		EFAULT if ddi_copyxxx() fails
29348  *		ENXIO if fail ddi_get_soft_state
29349  *		EINVAL if data pointer is NULL
29350  *
29351  */
29352 
29353 static int
29354 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
29355 {
29356 	struct sd_lun		*un;
29357 	struct cdrom_volctrl    volume;
29358 	struct cdrom_volctrl    *vol = &volume;
29359 	uchar_t			*sense_page;
29360 	uchar_t			*select_page;
29361 	uchar_t			*sense;
29362 	uchar_t			*select;
29363 	int			sense_buflen;
29364 	int			select_buflen;
29365 	int			rval;
29366 
29367 	if (data == NULL) {
29368 		return (EINVAL);
29369 	}
29370 
29371 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29372 	    (un->un_state == SD_STATE_OFFLINE)) {
29373 		return (ENXIO);
29374 	}
29375 
29376 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
29377 		return (EFAULT);
29378 	}
29379 
29380 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29381 		struct mode_header_grp2		*sense_mhp;
29382 		struct mode_header_grp2		*select_mhp;
29383 		int				bd_len;
29384 
29385 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
29386 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
29387 		    MODEPAGE_AUDIO_CTRL_LEN;
29388 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29389 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29390 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
29391 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29392 		    SD_PATH_STANDARD)) != 0) {
29393 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
29394 			    "sr_volume_ctrl: Mode Sense Failed\n");
29395 			kmem_free(sense, sense_buflen);
29396 			kmem_free(select, select_buflen);
29397 			return (rval);
29398 		}
29399 		sense_mhp = (struct mode_header_grp2 *)sense;
29400 		select_mhp = (struct mode_header_grp2 *)select;
29401 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
29402 		    sense_mhp->bdesc_length_lo;
29403 		if (bd_len > MODE_BLK_DESC_LENGTH) {
29404 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29405 			    "sr_volume_ctrl: Mode Sense returned invalid "
29406 			    "block descriptor length\n");
29407 			kmem_free(sense, sense_buflen);
29408 			kmem_free(select, select_buflen);
29409 			return (EIO);
29410 		}
29411 		sense_page = (uchar_t *)
29412 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
29413 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
29414 		select_mhp->length_msb = 0;
29415 		select_mhp->length_lsb = 0;
29416 		select_mhp->bdesc_length_hi = 0;
29417 		select_mhp->bdesc_length_lo = 0;
29418 	} else {
29419 		struct mode_header		*sense_mhp, *select_mhp;
29420 
29421 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29422 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29423 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29424 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29425 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
29426 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29427 		    SD_PATH_STANDARD)) != 0) {
29428 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29429 			    "sr_volume_ctrl: Mode Sense Failed\n");
29430 			kmem_free(sense, sense_buflen);
29431 			kmem_free(select, select_buflen);
29432 			return (rval);
29433 		}
29434 		sense_mhp  = (struct mode_header *)sense;
29435 		select_mhp = (struct mode_header *)select;
29436 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
29437 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29438 			    "sr_volume_ctrl: Mode Sense returned invalid "
29439 			    "block descriptor length\n");
29440 			kmem_free(sense, sense_buflen);
29441 			kmem_free(select, select_buflen);
29442 			return (EIO);
29443 		}
29444 		sense_page = (uchar_t *)
29445 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
29446 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
29447 		select_mhp->length = 0;
29448 		select_mhp->bdesc_length = 0;
29449 	}
29450 	/*
29451 	 * Note: An audio control data structure could be created and overlayed
29452 	 * on the following in place of the array indexing method implemented.
29453 	 */
29454 
29455 	/* Build the select data for the user volume data */
29456 	select_page[0] = MODEPAGE_AUDIO_CTRL;
29457 	select_page[1] = 0xE;
29458 	/* Set the immediate bit */
29459 	select_page[2] = 0x04;
29460 	/* Zero out reserved fields */
29461 	select_page[3] = 0x00;
29462 	select_page[4] = 0x00;
29463 	/* Return sense data for fields not to be modified */
29464 	select_page[5] = sense_page[5];
29465 	select_page[6] = sense_page[6];
29466 	select_page[7] = sense_page[7];
29467 	/* Set the user specified volume levels for channel 0 and 1 */
29468 	select_page[8] = 0x01;
29469 	select_page[9] = vol->channel0;
29470 	select_page[10] = 0x02;
29471 	select_page[11] = vol->channel1;
29472 	/* Channel 2 and 3 are currently unsupported so return the sense data */
29473 	select_page[12] = sense_page[12];
29474 	select_page[13] = sense_page[13];
29475 	select_page[14] = sense_page[14];
29476 	select_page[15] = sense_page[15];
29477 
29478 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29479 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, select,
29480 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29481 	} else {
29482 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
29483 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29484 	}
29485 
29486 	kmem_free(sense, sense_buflen);
29487 	kmem_free(select, select_buflen);
29488 	return (rval);
29489 }
29490 
29491 
29492 /*
29493  *    Function: sr_read_sony_session_offset()
29494  *
29495  * Description: This routine is the driver entry point for handling CD-ROM
29496  *		ioctl requests for session offset information. (CDROMREADOFFSET)
29497  *		The address of the first track in the last session of a
29498  *		multi-session CD-ROM is returned
29499  *
29500  *		Note: This routine uses a vendor specific key value in the
29501  *		command control field without implementing any vendor check here
29502  *		or in the ioctl routine.
29503  *
29504  *   Arguments: dev	- the device 'dev_t'
29505  *		data	- pointer to an int to hold the requested address
29506  *		flag	- this argument is a pass through to ddi_copyxxx()
29507  *			  directly from the mode argument of ioctl().
29508  *
29509  * Return Code: the code returned by sd_send_scsi_cmd()
29510  *		EFAULT if ddi_copyxxx() fails
29511  *		ENXIO if fail ddi_get_soft_state
29512  *		EINVAL if data pointer is NULL
29513  */
29514 
29515 static int
29516 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
29517 {
29518 	struct sd_lun		*un;
29519 	struct uscsi_cmd	*com;
29520 	caddr_t			buffer;
29521 	char			cdb[CDB_GROUP1];
29522 	int			session_offset = 0;
29523 	int			rval;
29524 
29525 	if (data == NULL) {
29526 		return (EINVAL);
29527 	}
29528 
29529 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29530 	    (un->un_state == SD_STATE_OFFLINE)) {
29531 		return (ENXIO);
29532 	}
29533 
29534 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
29535 	bzero(cdb, CDB_GROUP1);
29536 	cdb[0] = SCMD_READ_TOC;
29537 	/*
29538 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
29539 	 * (4 byte TOC response header + 8 byte response data)
29540 	 */
29541 	cdb[8] = SONY_SESSION_OFFSET_LEN;
29542 	/* Byte 9 is the control byte. A vendor specific value is used */
29543 	cdb[9] = SONY_SESSION_OFFSET_KEY;
29544 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29545 	com->uscsi_cdb = cdb;
29546 	com->uscsi_cdblen = CDB_GROUP1;
29547 	com->uscsi_bufaddr = buffer;
29548 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
29549 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29550 
29551 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
29552 	    UIO_SYSSPACE, SD_PATH_STANDARD);
29553 	if (rval != 0) {
29554 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29555 		kmem_free(com, sizeof (*com));
29556 		return (rval);
29557 	}
29558 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
29559 		session_offset =
29560 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
29561 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
29562 		/*
29563 		 * Offset returned offset in current lbasize block's. Convert to
29564 		 * 2k block's to return to the user
29565 		 */
29566 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
29567 			session_offset >>= 2;
29568 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
29569 			session_offset >>= 1;
29570 		}
29571 	}
29572 
29573 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
29574 		rval = EFAULT;
29575 	}
29576 
29577 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29578 	kmem_free(com, sizeof (*com));
29579 	return (rval);
29580 }
29581 
29582 
29583 /*
29584  *    Function: sd_wm_cache_constructor()
29585  *
29586  * Description: Cache Constructor for the wmap cache for the read/modify/write
29587  * 		devices.
29588  *
29589  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29590  *		un	- sd_lun structure for the device.
29591  *		flag	- the km flags passed to constructor
29592  *
29593  * Return Code: 0 on success.
29594  *		-1 on failure.
29595  */
29596 
29597 /*ARGSUSED*/
29598 static int
29599 sd_wm_cache_constructor(void *wm, void *un, int flags)
29600 {
29601 	bzero(wm, sizeof (struct sd_w_map));
29602 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
29603 	return (0);
29604 }
29605 
29606 
29607 /*
29608  *    Function: sd_wm_cache_destructor()
29609  *
29610  * Description: Cache destructor for the wmap cache for the read/modify/write
29611  * 		devices.
29612  *
29613  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29614  *		un	- sd_lun structure for the device.
29615  */
29616 /*ARGSUSED*/
29617 static void
29618 sd_wm_cache_destructor(void *wm, void *un)
29619 {
29620 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
29621 }
29622 
29623 
29624 /*
29625  *    Function: sd_range_lock()
29626  *
29627  * Description: Lock the range of blocks specified as parameter to ensure
29628  *		that read, modify write is atomic and no other i/o writes
29629  *		to the same location. The range is specified in terms
29630  *		of start and end blocks. Block numbers are the actual
29631  *		media block numbers and not system.
29632  *
29633  *   Arguments: un	- sd_lun structure for the device.
29634  *		startb - The starting block number
29635  *		endb - The end block number
29636  *		typ - type of i/o - simple/read_modify_write
29637  *
29638  * Return Code: wm  - pointer to the wmap structure.
29639  *
29640  *     Context: This routine can sleep.
29641  */
29642 
29643 static struct sd_w_map *
29644 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
29645 {
29646 	struct sd_w_map *wmp = NULL;
29647 	struct sd_w_map *sl_wmp = NULL;
29648 	struct sd_w_map *tmp_wmp;
29649 	wm_state state = SD_WM_CHK_LIST;
29650 
29651 
29652 	ASSERT(un != NULL);
29653 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29654 
29655 	mutex_enter(SD_MUTEX(un));
29656 
29657 	while (state != SD_WM_DONE) {
29658 
29659 		switch (state) {
29660 		case SD_WM_CHK_LIST:
29661 			/*
29662 			 * This is the starting state. Check the wmap list
29663 			 * to see if the range is currently available.
29664 			 */
29665 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
29666 				/*
29667 				 * If this is a simple write and no rmw
29668 				 * i/o is pending then try to lock the
29669 				 * range as the range should be available.
29670 				 */
29671 				state = SD_WM_LOCK_RANGE;
29672 			} else {
29673 				tmp_wmp = sd_get_range(un, startb, endb);
29674 				if (tmp_wmp != NULL) {
29675 					if ((wmp != NULL) && ONLIST(un, wmp)) {
29676 						/*
29677 						 * Should not keep onlist wmps
29678 						 * while waiting this macro
29679 						 * will also do wmp = NULL;
29680 						 */
29681 						FREE_ONLIST_WMAP(un, wmp);
29682 					}
29683 					/*
29684 					 * sl_wmp is the wmap on which wait
29685 					 * is done, since the tmp_wmp points
29686 					 * to the inuse wmap, set sl_wmp to
29687 					 * tmp_wmp and change the state to sleep
29688 					 */
29689 					sl_wmp = tmp_wmp;
29690 					state = SD_WM_WAIT_MAP;
29691 				} else {
29692 					state = SD_WM_LOCK_RANGE;
29693 				}
29694 
29695 			}
29696 			break;
29697 
29698 		case SD_WM_LOCK_RANGE:
29699 			ASSERT(un->un_wm_cache);
29700 			/*
29701 			 * The range need to be locked, try to get a wmap.
29702 			 * First attempt it with NO_SLEEP, want to avoid a sleep
29703 			 * if possible as we will have to release the sd mutex
29704 			 * if we have to sleep.
29705 			 */
29706 			if (wmp == NULL)
29707 				wmp = kmem_cache_alloc(un->un_wm_cache,
29708 				    KM_NOSLEEP);
29709 			if (wmp == NULL) {
29710 				mutex_exit(SD_MUTEX(un));
29711 				_NOTE(DATA_READABLE_WITHOUT_LOCK
29712 				    (sd_lun::un_wm_cache))
29713 				wmp = kmem_cache_alloc(un->un_wm_cache,
29714 				    KM_SLEEP);
29715 				mutex_enter(SD_MUTEX(un));
29716 				/*
29717 				 * we released the mutex so recheck and go to
29718 				 * check list state.
29719 				 */
29720 				state = SD_WM_CHK_LIST;
29721 			} else {
29722 				/*
29723 				 * We exit out of state machine since we
29724 				 * have the wmap. Do the housekeeping first.
29725 				 * place the wmap on the wmap list if it is not
29726 				 * on it already and then set the state to done.
29727 				 */
29728 				wmp->wm_start = startb;
29729 				wmp->wm_end = endb;
29730 				wmp->wm_flags = typ | SD_WM_BUSY;
29731 				if (typ & SD_WTYPE_RMW) {
29732 					un->un_rmw_count++;
29733 				}
29734 				/*
29735 				 * If not already on the list then link
29736 				 */
29737 				if (!ONLIST(un, wmp)) {
29738 					wmp->wm_next = un->un_wm;
29739 					wmp->wm_prev = NULL;
29740 					if (wmp->wm_next)
29741 						wmp->wm_next->wm_prev = wmp;
29742 					un->un_wm = wmp;
29743 				}
29744 				state = SD_WM_DONE;
29745 			}
29746 			break;
29747 
29748 		case SD_WM_WAIT_MAP:
29749 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
29750 			/*
29751 			 * Wait is done on sl_wmp, which is set in the
29752 			 * check_list state.
29753 			 */
29754 			sl_wmp->wm_wanted_count++;
29755 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
29756 			sl_wmp->wm_wanted_count--;
29757 			/*
29758 			 * We can reuse the memory from the completed sl_wmp
29759 			 * lock range for our new lock, but only if noone is
29760 			 * waiting for it.
29761 			 */
29762 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
29763 			if (sl_wmp->wm_wanted_count == 0) {
29764 				if (wmp != NULL)
29765 					CHK_N_FREEWMP(un, wmp);
29766 				wmp = sl_wmp;
29767 			}
29768 			sl_wmp = NULL;
29769 			/*
29770 			 * After waking up, need to recheck for availability of
29771 			 * range.
29772 			 */
29773 			state = SD_WM_CHK_LIST;
29774 			break;
29775 
29776 		default:
29777 			panic("sd_range_lock: "
29778 			    "Unknown state %d in sd_range_lock", state);
29779 			/*NOTREACHED*/
29780 		} /* switch(state) */
29781 
29782 	} /* while(state != SD_WM_DONE) */
29783 
29784 	mutex_exit(SD_MUTEX(un));
29785 
29786 	ASSERT(wmp != NULL);
29787 
29788 	return (wmp);
29789 }
29790 
29791 
29792 /*
29793  *    Function: sd_get_range()
29794  *
29795  * Description: Find if there any overlapping I/O to this one
29796  *		Returns the write-map of 1st such I/O, NULL otherwise.
29797  *
29798  *   Arguments: un	- sd_lun structure for the device.
29799  *		startb - The starting block number
29800  *		endb - The end block number
29801  *
29802  * Return Code: wm  - pointer to the wmap structure.
29803  */
29804 
29805 static struct sd_w_map *
29806 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
29807 {
29808 	struct sd_w_map *wmp;
29809 
29810 	ASSERT(un != NULL);
29811 
29812 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
29813 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
29814 			continue;
29815 		}
29816 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
29817 			break;
29818 		}
29819 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
29820 			break;
29821 		}
29822 	}
29823 
29824 	return (wmp);
29825 }
29826 
29827 
29828 /*
29829  *    Function: sd_free_inlist_wmap()
29830  *
29831  * Description: Unlink and free a write map struct.
29832  *
29833  *   Arguments: un      - sd_lun structure for the device.
29834  *		wmp	- sd_w_map which needs to be unlinked.
29835  */
29836 
29837 static void
29838 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
29839 {
29840 	ASSERT(un != NULL);
29841 
29842 	if (un->un_wm == wmp) {
29843 		un->un_wm = wmp->wm_next;
29844 	} else {
29845 		wmp->wm_prev->wm_next = wmp->wm_next;
29846 	}
29847 
29848 	if (wmp->wm_next) {
29849 		wmp->wm_next->wm_prev = wmp->wm_prev;
29850 	}
29851 
29852 	wmp->wm_next = wmp->wm_prev = NULL;
29853 
29854 	kmem_cache_free(un->un_wm_cache, wmp);
29855 }
29856 
29857 
29858 /*
29859  *    Function: sd_range_unlock()
29860  *
29861  * Description: Unlock the range locked by wm.
29862  *		Free write map if nobody else is waiting on it.
29863  *
29864  *   Arguments: un      - sd_lun structure for the device.
29865  *              wmp     - sd_w_map which needs to be unlinked.
29866  */
29867 
29868 static void
29869 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
29870 {
29871 	ASSERT(un != NULL);
29872 	ASSERT(wm != NULL);
29873 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29874 
29875 	mutex_enter(SD_MUTEX(un));
29876 
29877 	if (wm->wm_flags & SD_WTYPE_RMW) {
29878 		un->un_rmw_count--;
29879 	}
29880 
29881 	if (wm->wm_wanted_count) {
29882 		wm->wm_flags = 0;
29883 		/*
29884 		 * Broadcast that the wmap is available now.
29885 		 */
29886 		cv_broadcast(&wm->wm_avail);
29887 	} else {
29888 		/*
29889 		 * If no one is waiting on the map, it should be free'ed.
29890 		 */
29891 		sd_free_inlist_wmap(un, wm);
29892 	}
29893 
29894 	mutex_exit(SD_MUTEX(un));
29895 }
29896 
29897 
29898 /*
29899  *    Function: sd_read_modify_write_task
29900  *
29901  * Description: Called from a taskq thread to initiate the write phase of
29902  *		a read-modify-write request.  This is used for targets where
29903  *		un->un_sys_blocksize != un->un_tgt_blocksize.
29904  *
29905  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
29906  *
29907  *     Context: Called under taskq thread context.
29908  */
29909 
29910 static void
29911 sd_read_modify_write_task(void *arg)
29912 {
29913 	struct sd_mapblocksize_info	*bsp;
29914 	struct buf	*bp;
29915 	struct sd_xbuf	*xp;
29916 	struct sd_lun	*un;
29917 
29918 	bp = arg;	/* The bp is given in arg */
29919 	ASSERT(bp != NULL);
29920 
29921 	/* Get the pointer to the layer-private data struct */
29922 	xp = SD_GET_XBUF(bp);
29923 	ASSERT(xp != NULL);
29924 	bsp = xp->xb_private;
29925 	ASSERT(bsp != NULL);
29926 
29927 	un = SD_GET_UN(bp);
29928 	ASSERT(un != NULL);
29929 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29930 
29931 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29932 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
29933 
29934 	/*
29935 	 * This is the write phase of a read-modify-write request, called
29936 	 * under the context of a taskq thread in response to the completion
29937 	 * of the read portion of the rmw request completing under interrupt
29938 	 * context. The write request must be sent from here down the iostart
29939 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
29940 	 * we use the layer index saved in the layer-private data area.
29941 	 */
29942 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
29943 
29944 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29945 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
29946 }
29947 
29948 
29949 /*
29950  *    Function: sddump_do_read_of_rmw()
29951  *
29952  * Description: This routine will be called from sddump, If sddump is called
29953  *		with an I/O which not aligned on device blocksize boundary
29954  *		then the write has to be converted to read-modify-write.
29955  *		Do the read part here in order to keep sddump simple.
29956  *		Note - That the sd_mutex is held across the call to this
29957  *		routine.
29958  *
29959  *   Arguments: un	- sd_lun
29960  *		blkno	- block number in terms of media block size.
29961  *		nblk	- number of blocks.
29962  *		bpp	- pointer to pointer to the buf structure. On return
29963  *			from this function, *bpp points to the valid buffer
29964  *			to which the write has to be done.
29965  *
29966  * Return Code: 0 for success or errno-type return code
29967  */
29968 
29969 static int
29970 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
29971 	struct buf **bpp)
29972 {
29973 	int err;
29974 	int i;
29975 	int rval;
29976 	struct buf *bp;
29977 	struct scsi_pkt *pkt = NULL;
29978 	uint32_t target_blocksize;
29979 
29980 	ASSERT(un != NULL);
29981 	ASSERT(mutex_owned(SD_MUTEX(un)));
29982 
29983 	target_blocksize = un->un_tgt_blocksize;
29984 
29985 	mutex_exit(SD_MUTEX(un));
29986 
29987 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
29988 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
29989 	if (bp == NULL) {
29990 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29991 		    "no resources for dumping; giving up");
29992 		err = ENOMEM;
29993 		goto done;
29994 	}
29995 
29996 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
29997 	    blkno, nblk);
29998 	if (rval != 0) {
29999 		scsi_free_consistent_buf(bp);
30000 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
30001 		    "no resources for dumping; giving up");
30002 		err = ENOMEM;
30003 		goto done;
30004 	}
30005 
30006 	pkt->pkt_flags |= FLAG_NOINTR;
30007 
30008 	err = EIO;
30009 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
30010 
30011 		/*
30012 		 * Scsi_poll returns 0 (success) if the command completes and
30013 		 * the status block is STATUS_GOOD.  We should only check
30014 		 * errors if this condition is not true.  Even then we should
30015 		 * send our own request sense packet only if we have a check
30016 		 * condition and auto request sense has not been performed by
30017 		 * the hba.
30018 		 */
30019 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
30020 
30021 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
30022 			err = 0;
30023 			break;
30024 		}
30025 
30026 		/*
30027 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
30028 		 * no need to read RQS data.
30029 		 */
30030 		if (pkt->pkt_reason == CMD_DEV_GONE) {
30031 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
30032 			    "Device is gone\n");
30033 			break;
30034 		}
30035 
30036 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
30037 			SD_INFO(SD_LOG_DUMP, un,
30038 			    "sddump: read failed with CHECK, try # %d\n", i);
30039 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
30040 				(void) sd_send_polled_RQS(un);
30041 			}
30042 
30043 			continue;
30044 		}
30045 
30046 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
30047 			int reset_retval = 0;
30048 
30049 			SD_INFO(SD_LOG_DUMP, un,
30050 			    "sddump: read failed with BUSY, try # %d\n", i);
30051 
30052 			if (un->un_f_lun_reset_enabled == TRUE) {
30053 				reset_retval = scsi_reset(SD_ADDRESS(un),
30054 				    RESET_LUN);
30055 			}
30056 			if (reset_retval == 0) {
30057 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
30058 			}
30059 			(void) sd_send_polled_RQS(un);
30060 
30061 		} else {
30062 			SD_INFO(SD_LOG_DUMP, un,
30063 			    "sddump: read failed with 0x%x, try # %d\n",
30064 			    SD_GET_PKT_STATUS(pkt), i);
30065 			mutex_enter(SD_MUTEX(un));
30066 			sd_reset_target(un, pkt);
30067 			mutex_exit(SD_MUTEX(un));
30068 		}
30069 
30070 		/*
30071 		 * If we are not getting anywhere with lun/target resets,
30072 		 * let's reset the bus.
30073 		 */
30074 		if (i > SD_NDUMP_RETRIES/2) {
30075 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
30076 			(void) sd_send_polled_RQS(un);
30077 		}
30078 
30079 	}
30080 	scsi_destroy_pkt(pkt);
30081 
30082 	if (err != 0) {
30083 		scsi_free_consistent_buf(bp);
30084 		*bpp = NULL;
30085 	} else {
30086 		*bpp = bp;
30087 	}
30088 
30089 done:
30090 	mutex_enter(SD_MUTEX(un));
30091 	return (err);
30092 }
30093 
30094 
30095 /*
30096  *    Function: sd_failfast_flushq
30097  *
30098  * Description: Take all bp's on the wait queue that have B_FAILFAST set
30099  *		in b_flags and move them onto the failfast queue, then kick
30100  *		off a thread to return all bp's on the failfast queue to
30101  *		their owners with an error set.
30102  *
30103  *   Arguments: un - pointer to the soft state struct for the instance.
30104  *
30105  *     Context: may execute in interrupt context.
30106  */
30107 
30108 static void
30109 sd_failfast_flushq(struct sd_lun *un)
30110 {
30111 	struct buf *bp;
30112 	struct buf *next_waitq_bp;
30113 	struct buf *prev_waitq_bp = NULL;
30114 
30115 	ASSERT(un != NULL);
30116 	ASSERT(mutex_owned(SD_MUTEX(un)));
30117 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
30118 	ASSERT(un->un_failfast_bp == NULL);
30119 
30120 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
30121 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
30122 
30123 	/*
30124 	 * Check if we should flush all bufs when entering failfast state, or
30125 	 * just those with B_FAILFAST set.
30126 	 */
30127 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
30128 		/*
30129 		 * Move *all* bp's on the wait queue to the failfast flush
30130 		 * queue, including those that do NOT have B_FAILFAST set.
30131 		 */
30132 		if (un->un_failfast_headp == NULL) {
30133 			ASSERT(un->un_failfast_tailp == NULL);
30134 			un->un_failfast_headp = un->un_waitq_headp;
30135 		} else {
30136 			ASSERT(un->un_failfast_tailp != NULL);
30137 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
30138 		}
30139 
30140 		un->un_failfast_tailp = un->un_waitq_tailp;
30141 
30142 		/* update kstat for each bp moved out of the waitq */
30143 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
30144 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
30145 		}
30146 
30147 		/* empty the waitq */
30148 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
30149 
30150 	} else {
30151 		/*
30152 		 * Go thru the wait queue, pick off all entries with
30153 		 * B_FAILFAST set, and move these onto the failfast queue.
30154 		 */
30155 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
30156 			/*
30157 			 * Save the pointer to the next bp on the wait queue,
30158 			 * so we get to it on the next iteration of this loop.
30159 			 */
30160 			next_waitq_bp = bp->av_forw;
30161 
30162 			/*
30163 			 * If this bp from the wait queue does NOT have
30164 			 * B_FAILFAST set, just move on to the next element
30165 			 * in the wait queue. Note, this is the only place
30166 			 * where it is correct to set prev_waitq_bp.
30167 			 */
30168 			if ((bp->b_flags & B_FAILFAST) == 0) {
30169 				prev_waitq_bp = bp;
30170 				continue;
30171 			}
30172 
30173 			/*
30174 			 * Remove the bp from the wait queue.
30175 			 */
30176 			if (bp == un->un_waitq_headp) {
30177 				/* The bp is the first element of the waitq. */
30178 				un->un_waitq_headp = next_waitq_bp;
30179 				if (un->un_waitq_headp == NULL) {
30180 					/* The wait queue is now empty */
30181 					un->un_waitq_tailp = NULL;
30182 				}
30183 			} else {
30184 				/*
30185 				 * The bp is either somewhere in the middle
30186 				 * or at the end of the wait queue.
30187 				 */
30188 				ASSERT(un->un_waitq_headp != NULL);
30189 				ASSERT(prev_waitq_bp != NULL);
30190 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
30191 				    == 0);
30192 				if (bp == un->un_waitq_tailp) {
30193 					/* bp is the last entry on the waitq. */
30194 					ASSERT(next_waitq_bp == NULL);
30195 					un->un_waitq_tailp = prev_waitq_bp;
30196 				}
30197 				prev_waitq_bp->av_forw = next_waitq_bp;
30198 			}
30199 			bp->av_forw = NULL;
30200 
30201 			/*
30202 			 * update kstat since the bp is moved out of
30203 			 * the waitq
30204 			 */
30205 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
30206 
30207 			/*
30208 			 * Now put the bp onto the failfast queue.
30209 			 */
30210 			if (un->un_failfast_headp == NULL) {
30211 				/* failfast queue is currently empty */
30212 				ASSERT(un->un_failfast_tailp == NULL);
30213 				un->un_failfast_headp =
30214 				    un->un_failfast_tailp = bp;
30215 			} else {
30216 				/* Add the bp to the end of the failfast q */
30217 				ASSERT(un->un_failfast_tailp != NULL);
30218 				ASSERT(un->un_failfast_tailp->b_flags &
30219 				    B_FAILFAST);
30220 				un->un_failfast_tailp->av_forw = bp;
30221 				un->un_failfast_tailp = bp;
30222 			}
30223 		}
30224 	}
30225 
30226 	/*
30227 	 * Now return all bp's on the failfast queue to their owners.
30228 	 */
30229 	while ((bp = un->un_failfast_headp) != NULL) {
30230 
30231 		un->un_failfast_headp = bp->av_forw;
30232 		if (un->un_failfast_headp == NULL) {
30233 			un->un_failfast_tailp = NULL;
30234 		}
30235 
30236 		/*
30237 		 * We want to return the bp with a failure error code, but
30238 		 * we do not want a call to sd_start_cmds() to occur here,
30239 		 * so use sd_return_failed_command_no_restart() instead of
30240 		 * sd_return_failed_command().
30241 		 */
30242 		sd_return_failed_command_no_restart(un, bp, EIO);
30243 	}
30244 
30245 	/* Flush the xbuf queues if required. */
30246 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
30247 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
30248 	}
30249 
30250 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
30251 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
30252 }
30253 
30254 
30255 /*
30256  *    Function: sd_failfast_flushq_callback
30257  *
30258  * Description: Return TRUE if the given bp meets the criteria for failfast
30259  *		flushing. Used with ddi_xbuf_flushq(9F).
30260  *
30261  *   Arguments: bp - ptr to buf struct to be examined.
30262  *
30263  *     Context: Any
30264  */
30265 
30266 static int
30267 sd_failfast_flushq_callback(struct buf *bp)
30268 {
30269 	/*
30270 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
30271 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
30272 	 */
30273 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
30274 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
30275 }
30276 
30277 
30278 
30279 #if defined(__i386) || defined(__amd64)
30280 /*
30281  * Function: sd_setup_next_xfer
30282  *
30283  * Description: Prepare next I/O operation using DMA_PARTIAL
30284  *
30285  */
30286 
30287 static int
30288 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
30289     struct scsi_pkt *pkt, struct sd_xbuf *xp)
30290 {
30291 	ssize_t	num_blks_not_xfered;
30292 	daddr_t	strt_blk_num;
30293 	ssize_t	bytes_not_xfered;
30294 	int	rval;
30295 
30296 	ASSERT(pkt->pkt_resid == 0);
30297 
30298 	/*
30299 	 * Calculate next block number and amount to be transferred.
30300 	 *
30301 	 * How much data NOT transfered to the HBA yet.
30302 	 */
30303 	bytes_not_xfered = xp->xb_dma_resid;
30304 
30305 	/*
30306 	 * figure how many blocks NOT transfered to the HBA yet.
30307 	 */
30308 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
30309 
30310 	/*
30311 	 * set starting block number to the end of what WAS transfered.
30312 	 */
30313 	strt_blk_num = xp->xb_blkno +
30314 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
30315 
30316 	/*
30317 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
30318 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
30319 	 * the disk mutex here.
30320 	 */
30321 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
30322 	    strt_blk_num, num_blks_not_xfered);
30323 
30324 	if (rval == 0) {
30325 
30326 		/*
30327 		 * Success.
30328 		 *
30329 		 * Adjust things if there are still more blocks to be
30330 		 * transfered.
30331 		 */
30332 		xp->xb_dma_resid = pkt->pkt_resid;
30333 		pkt->pkt_resid = 0;
30334 
30335 		return (1);
30336 	}
30337 
30338 	/*
30339 	 * There's really only one possible return value from
30340 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
30341 	 * returns NULL.
30342 	 */
30343 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
30344 
30345 	bp->b_resid = bp->b_bcount;
30346 	bp->b_flags |= B_ERROR;
30347 
30348 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30349 	    "Error setting up next portion of DMA transfer\n");
30350 
30351 	return (0);
30352 }
30353 #endif
30354 
30355 /*
30356  *    Function: sd_panic_for_res_conflict
30357  *
30358  * Description: Call panic with a string formated with "Reservation Conflict"
30359  *		and a human readable identifier indicating the SD instance
30360  *		that experienced the reservation conflict.
30361  *
30362  *   Arguments: un - pointer to the soft state struct for the instance.
30363  *
30364  *     Context: may execute in interrupt context.
30365  */
30366 
30367 #define	SD_RESV_CONFLICT_FMT_LEN 40
30368 void
30369 sd_panic_for_res_conflict(struct sd_lun *un)
30370 {
30371 	char panic_str[SD_RESV_CONFLICT_FMT_LEN+MAXPATHLEN];
30372 	char path_str[MAXPATHLEN];
30373 
30374 	(void) snprintf(panic_str, sizeof (panic_str),
30375 	    "Reservation Conflict\nDisk: %s",
30376 	    ddi_pathname(SD_DEVINFO(un), path_str));
30377 
30378 	panic(panic_str);
30379 }
30380 
30381 /*
30382  * Note: The following sd_faultinjection_ioctl( ) routines implement
30383  * driver support for handling fault injection for error analysis
30384  * causing faults in multiple layers of the driver.
30385  *
30386  */
30387 
30388 #ifdef SD_FAULT_INJECTION
30389 static uint_t   sd_fault_injection_on = 0;
30390 
30391 /*
30392  *    Function: sd_faultinjection_ioctl()
30393  *
30394  * Description: This routine is the driver entry point for handling
30395  *              faultinjection ioctls to inject errors into the
30396  *              layer model
30397  *
30398  *   Arguments: cmd	- the ioctl cmd recieved
30399  *		arg	- the arguments from user and returns
30400  */
30401 
30402 static void
30403 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
30404 
30405 	uint_t i;
30406 	uint_t rval;
30407 
30408 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
30409 
30410 	mutex_enter(SD_MUTEX(un));
30411 
30412 	switch (cmd) {
30413 	case SDIOCRUN:
30414 		/* Allow pushed faults to be injected */
30415 		SD_INFO(SD_LOG_SDTEST, un,
30416 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
30417 
30418 		sd_fault_injection_on = 1;
30419 
30420 		SD_INFO(SD_LOG_IOERR, un,
30421 		    "sd_faultinjection_ioctl: run finished\n");
30422 		break;
30423 
30424 	case SDIOCSTART:
30425 		/* Start Injection Session */
30426 		SD_INFO(SD_LOG_SDTEST, un,
30427 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
30428 
30429 		sd_fault_injection_on = 0;
30430 		un->sd_injection_mask = 0xFFFFFFFF;
30431 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30432 			un->sd_fi_fifo_pkt[i] = NULL;
30433 			un->sd_fi_fifo_xb[i] = NULL;
30434 			un->sd_fi_fifo_un[i] = NULL;
30435 			un->sd_fi_fifo_arq[i] = NULL;
30436 		}
30437 		un->sd_fi_fifo_start = 0;
30438 		un->sd_fi_fifo_end = 0;
30439 
30440 		mutex_enter(&(un->un_fi_mutex));
30441 		un->sd_fi_log[0] = '\0';
30442 		un->sd_fi_buf_len = 0;
30443 		mutex_exit(&(un->un_fi_mutex));
30444 
30445 		SD_INFO(SD_LOG_IOERR, un,
30446 		    "sd_faultinjection_ioctl: start finished\n");
30447 		break;
30448 
30449 	case SDIOCSTOP:
30450 		/* Stop Injection Session */
30451 		SD_INFO(SD_LOG_SDTEST, un,
30452 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
30453 		sd_fault_injection_on = 0;
30454 		un->sd_injection_mask = 0x0;
30455 
30456 		/* Empty stray or unuseds structs from fifo */
30457 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30458 			if (un->sd_fi_fifo_pkt[i] != NULL) {
30459 				kmem_free(un->sd_fi_fifo_pkt[i],
30460 				    sizeof (struct sd_fi_pkt));
30461 			}
30462 			if (un->sd_fi_fifo_xb[i] != NULL) {
30463 				kmem_free(un->sd_fi_fifo_xb[i],
30464 				    sizeof (struct sd_fi_xb));
30465 			}
30466 			if (un->sd_fi_fifo_un[i] != NULL) {
30467 				kmem_free(un->sd_fi_fifo_un[i],
30468 				    sizeof (struct sd_fi_un));
30469 			}
30470 			if (un->sd_fi_fifo_arq[i] != NULL) {
30471 				kmem_free(un->sd_fi_fifo_arq[i],
30472 				    sizeof (struct sd_fi_arq));
30473 			}
30474 			un->sd_fi_fifo_pkt[i] = NULL;
30475 			un->sd_fi_fifo_un[i] = NULL;
30476 			un->sd_fi_fifo_xb[i] = NULL;
30477 			un->sd_fi_fifo_arq[i] = NULL;
30478 		}
30479 		un->sd_fi_fifo_start = 0;
30480 		un->sd_fi_fifo_end = 0;
30481 
30482 		SD_INFO(SD_LOG_IOERR, un,
30483 		    "sd_faultinjection_ioctl: stop finished\n");
30484 		break;
30485 
30486 	case SDIOCINSERTPKT:
30487 		/* Store a packet struct to be pushed onto fifo */
30488 		SD_INFO(SD_LOG_SDTEST, un,
30489 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
30490 
30491 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30492 
30493 		sd_fault_injection_on = 0;
30494 
30495 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
30496 		if (un->sd_fi_fifo_pkt[i] != NULL) {
30497 			kmem_free(un->sd_fi_fifo_pkt[i],
30498 			    sizeof (struct sd_fi_pkt));
30499 		}
30500 		if (arg != NULL) {
30501 			un->sd_fi_fifo_pkt[i] =
30502 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
30503 			if (un->sd_fi_fifo_pkt[i] == NULL) {
30504 				/* Alloc failed don't store anything */
30505 				break;
30506 			}
30507 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
30508 			    sizeof (struct sd_fi_pkt), 0);
30509 			if (rval == -1) {
30510 				kmem_free(un->sd_fi_fifo_pkt[i],
30511 				    sizeof (struct sd_fi_pkt));
30512 				un->sd_fi_fifo_pkt[i] = NULL;
30513 			}
30514 		} else {
30515 			SD_INFO(SD_LOG_IOERR, un,
30516 			    "sd_faultinjection_ioctl: pkt null\n");
30517 		}
30518 		break;
30519 
30520 	case SDIOCINSERTXB:
30521 		/* Store a xb struct to be pushed onto fifo */
30522 		SD_INFO(SD_LOG_SDTEST, un,
30523 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
30524 
30525 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30526 
30527 		sd_fault_injection_on = 0;
30528 
30529 		if (un->sd_fi_fifo_xb[i] != NULL) {
30530 			kmem_free(un->sd_fi_fifo_xb[i],
30531 			    sizeof (struct sd_fi_xb));
30532 			un->sd_fi_fifo_xb[i] = NULL;
30533 		}
30534 		if (arg != NULL) {
30535 			un->sd_fi_fifo_xb[i] =
30536 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
30537 			if (un->sd_fi_fifo_xb[i] == NULL) {
30538 				/* Alloc failed don't store anything */
30539 				break;
30540 			}
30541 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
30542 			    sizeof (struct sd_fi_xb), 0);
30543 
30544 			if (rval == -1) {
30545 				kmem_free(un->sd_fi_fifo_xb[i],
30546 				    sizeof (struct sd_fi_xb));
30547 				un->sd_fi_fifo_xb[i] = NULL;
30548 			}
30549 		} else {
30550 			SD_INFO(SD_LOG_IOERR, un,
30551 			    "sd_faultinjection_ioctl: xb null\n");
30552 		}
30553 		break;
30554 
30555 	case SDIOCINSERTUN:
30556 		/* Store a un struct to be pushed onto fifo */
30557 		SD_INFO(SD_LOG_SDTEST, un,
30558 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
30559 
30560 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30561 
30562 		sd_fault_injection_on = 0;
30563 
30564 		if (un->sd_fi_fifo_un[i] != NULL) {
30565 			kmem_free(un->sd_fi_fifo_un[i],
30566 			    sizeof (struct sd_fi_un));
30567 			un->sd_fi_fifo_un[i] = NULL;
30568 		}
30569 		if (arg != NULL) {
30570 			un->sd_fi_fifo_un[i] =
30571 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
30572 			if (un->sd_fi_fifo_un[i] == NULL) {
30573 				/* Alloc failed don't store anything */
30574 				break;
30575 			}
30576 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
30577 			    sizeof (struct sd_fi_un), 0);
30578 			if (rval == -1) {
30579 				kmem_free(un->sd_fi_fifo_un[i],
30580 				    sizeof (struct sd_fi_un));
30581 				un->sd_fi_fifo_un[i] = NULL;
30582 			}
30583 
30584 		} else {
30585 			SD_INFO(SD_LOG_IOERR, un,
30586 			    "sd_faultinjection_ioctl: un null\n");
30587 		}
30588 
30589 		break;
30590 
30591 	case SDIOCINSERTARQ:
30592 		/* Store a arq struct to be pushed onto fifo */
30593 		SD_INFO(SD_LOG_SDTEST, un,
30594 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
30595 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30596 
30597 		sd_fault_injection_on = 0;
30598 
30599 		if (un->sd_fi_fifo_arq[i] != NULL) {
30600 			kmem_free(un->sd_fi_fifo_arq[i],
30601 			    sizeof (struct sd_fi_arq));
30602 			un->sd_fi_fifo_arq[i] = NULL;
30603 		}
30604 		if (arg != NULL) {
30605 			un->sd_fi_fifo_arq[i] =
30606 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
30607 			if (un->sd_fi_fifo_arq[i] == NULL) {
30608 				/* Alloc failed don't store anything */
30609 				break;
30610 			}
30611 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
30612 			    sizeof (struct sd_fi_arq), 0);
30613 			if (rval == -1) {
30614 				kmem_free(un->sd_fi_fifo_arq[i],
30615 				    sizeof (struct sd_fi_arq));
30616 				un->sd_fi_fifo_arq[i] = NULL;
30617 			}
30618 
30619 		} else {
30620 			SD_INFO(SD_LOG_IOERR, un,
30621 			    "sd_faultinjection_ioctl: arq null\n");
30622 		}
30623 
30624 		break;
30625 
30626 	case SDIOCPUSH:
30627 		/* Push stored xb, pkt, un, and arq onto fifo */
30628 		sd_fault_injection_on = 0;
30629 
30630 		if (arg != NULL) {
30631 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
30632 			if (rval != -1 &&
30633 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30634 				un->sd_fi_fifo_end += i;
30635 			}
30636 		} else {
30637 			SD_INFO(SD_LOG_IOERR, un,
30638 			    "sd_faultinjection_ioctl: push arg null\n");
30639 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30640 				un->sd_fi_fifo_end++;
30641 			}
30642 		}
30643 		SD_INFO(SD_LOG_IOERR, un,
30644 		    "sd_faultinjection_ioctl: push to end=%d\n",
30645 		    un->sd_fi_fifo_end);
30646 		break;
30647 
30648 	case SDIOCRETRIEVE:
30649 		/* Return buffer of log from Injection session */
30650 		SD_INFO(SD_LOG_SDTEST, un,
30651 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
30652 
30653 		sd_fault_injection_on = 0;
30654 
30655 		mutex_enter(&(un->un_fi_mutex));
30656 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
30657 		    un->sd_fi_buf_len+1, 0);
30658 		mutex_exit(&(un->un_fi_mutex));
30659 
30660 		if (rval == -1) {
30661 			/*
30662 			 * arg is possibly invalid setting
30663 			 * it to NULL for return
30664 			 */
30665 			arg = NULL;
30666 		}
30667 		break;
30668 	}
30669 
30670 	mutex_exit(SD_MUTEX(un));
30671 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
30672 			    " exit\n");
30673 }
30674 
30675 
30676 /*
30677  *    Function: sd_injection_log()
30678  *
30679  * Description: This routine adds buff to the already existing injection log
30680  *              for retrieval via faultinjection_ioctl for use in fault
30681  *              detection and recovery
30682  *
30683  *   Arguments: buf - the string to add to the log
30684  */
30685 
30686 static void
30687 sd_injection_log(char *buf, struct sd_lun *un)
30688 {
30689 	uint_t len;
30690 
30691 	ASSERT(un != NULL);
30692 	ASSERT(buf != NULL);
30693 
30694 	mutex_enter(&(un->un_fi_mutex));
30695 
30696 	len = min(strlen(buf), 255);
30697 	/* Add logged value to Injection log to be returned later */
30698 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
30699 		uint_t	offset = strlen((char *)un->sd_fi_log);
30700 		char *destp = (char *)un->sd_fi_log + offset;
30701 		int i;
30702 		for (i = 0; i < len; i++) {
30703 			*destp++ = *buf++;
30704 		}
30705 		un->sd_fi_buf_len += len;
30706 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
30707 	}
30708 
30709 	mutex_exit(&(un->un_fi_mutex));
30710 }
30711 
30712 
30713 /*
30714  *    Function: sd_faultinjection()
30715  *
30716  * Description: This routine takes the pkt and changes its
30717  *		content based on error injection scenerio.
30718  *
30719  *   Arguments: pktp	- packet to be changed
30720  */
30721 
30722 static void
30723 sd_faultinjection(struct scsi_pkt *pktp)
30724 {
30725 	uint_t i;
30726 	struct sd_fi_pkt *fi_pkt;
30727 	struct sd_fi_xb *fi_xb;
30728 	struct sd_fi_un *fi_un;
30729 	struct sd_fi_arq *fi_arq;
30730 	struct buf *bp;
30731 	struct sd_xbuf *xb;
30732 	struct sd_lun *un;
30733 
30734 	ASSERT(pktp != NULL);
30735 
30736 	/* pull bp xb and un from pktp */
30737 	bp = (struct buf *)pktp->pkt_private;
30738 	xb = SD_GET_XBUF(bp);
30739 	un = SD_GET_UN(bp);
30740 
30741 	ASSERT(un != NULL);
30742 
30743 	mutex_enter(SD_MUTEX(un));
30744 
30745 	SD_TRACE(SD_LOG_SDTEST, un,
30746 	    "sd_faultinjection: entry Injection from sdintr\n");
30747 
30748 	/* if injection is off return */
30749 	if (sd_fault_injection_on == 0 ||
30750 		un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
30751 		mutex_exit(SD_MUTEX(un));
30752 		return;
30753 	}
30754 
30755 
30756 	/* take next set off fifo */
30757 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
30758 
30759 	fi_pkt = un->sd_fi_fifo_pkt[i];
30760 	fi_xb = un->sd_fi_fifo_xb[i];
30761 	fi_un = un->sd_fi_fifo_un[i];
30762 	fi_arq = un->sd_fi_fifo_arq[i];
30763 
30764 
30765 	/* set variables accordingly */
30766 	/* set pkt if it was on fifo */
30767 	if (fi_pkt != NULL) {
30768 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
30769 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
30770 		SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
30771 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
30772 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
30773 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
30774 
30775 	}
30776 
30777 	/* set xb if it was on fifo */
30778 	if (fi_xb != NULL) {
30779 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
30780 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
30781 		SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
30782 		SD_CONDSET(xb, xb, xb_victim_retry_count,
30783 		    "xb_victim_retry_count");
30784 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
30785 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
30786 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
30787 
30788 		/* copy in block data from sense */
30789 		if (fi_xb->xb_sense_data[0] != -1) {
30790 			bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
30791 			    SENSE_LENGTH);
30792 		}
30793 
30794 		/* copy in extended sense codes */
30795 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_code,
30796 		    "es_code");
30797 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_key,
30798 		    "es_key");
30799 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_add_code,
30800 		    "es_add_code");
30801 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb,
30802 		    es_qual_code, "es_qual_code");
30803 	}
30804 
30805 	/* set un if it was on fifo */
30806 	if (fi_un != NULL) {
30807 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
30808 		SD_CONDSET(un, un, un_ctype, "un_ctype");
30809 		SD_CONDSET(un, un, un_reset_retry_count,
30810 		    "un_reset_retry_count");
30811 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
30812 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
30813 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
30814 		SD_CONDSET(un, un, un_f_geometry_is_valid,
30815 		    "un_f_geometry_is_valid");
30816 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
30817 		    "un_f_allow_bus_device_reset");
30818 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
30819 
30820 	}
30821 
30822 	/* copy in auto request sense if it was on fifo */
30823 	if (fi_arq != NULL) {
30824 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
30825 	}
30826 
30827 	/* free structs */
30828 	if (un->sd_fi_fifo_pkt[i] != NULL) {
30829 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
30830 	}
30831 	if (un->sd_fi_fifo_xb[i] != NULL) {
30832 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
30833 	}
30834 	if (un->sd_fi_fifo_un[i] != NULL) {
30835 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
30836 	}
30837 	if (un->sd_fi_fifo_arq[i] != NULL) {
30838 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
30839 	}
30840 
30841 	/*
30842 	 * kmem_free does not gurantee to set to NULL
30843 	 * since we uses these to determine if we set
30844 	 * values or not lets confirm they are always
30845 	 * NULL after free
30846 	 */
30847 	un->sd_fi_fifo_pkt[i] = NULL;
30848 	un->sd_fi_fifo_un[i] = NULL;
30849 	un->sd_fi_fifo_xb[i] = NULL;
30850 	un->sd_fi_fifo_arq[i] = NULL;
30851 
30852 	un->sd_fi_fifo_start++;
30853 
30854 	mutex_exit(SD_MUTEX(un));
30855 
30856 	SD_TRACE(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
30857 }
30858 
30859 #endif /* SD_FAULT_INJECTION */
30860 
30861 /*
30862  * This routine is invoked in sd_unit_attach(). Before calling it, the
30863  * properties in conf file should be processed already, and "hotpluggable"
30864  * property was processed also.
30865  *
30866  * The sd driver distinguishes 3 different type of devices: removable media,
30867  * non-removable media, and hotpluggable. Below the differences are defined:
30868  *
30869  * 1. Device ID
30870  *
30871  *     The device ID of a device is used to identify this device. Refer to
30872  *     ddi_devid_register(9F).
30873  *
30874  *     For a non-removable media disk device which can provide 0x80 or 0x83
30875  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
30876  *     device ID is created to identify this device. For other non-removable
30877  *     media devices, a default device ID is created only if this device has
30878  *     at least 2 alter cylinders. Otherwise, this device has no devid.
30879  *
30880  *     -------------------------------------------------------
30881  *     removable media   hotpluggable  | Can Have Device ID
30882  *     -------------------------------------------------------
30883  *         false             false     |     Yes
30884  *         false             true      |     Yes
30885  *         true                x       |     No
30886  *     ------------------------------------------------------
30887  *
30888  *
30889  * 2. SCSI group 4 commands
30890  *
30891  *     In SCSI specs, only some commands in group 4 command set can use
30892  *     8-byte addresses that can be used to access >2TB storage spaces.
30893  *     Other commands have no such capability. Without supporting group4,
30894  *     it is impossible to make full use of storage spaces of a disk with
30895  *     capacity larger than 2TB.
30896  *
30897  *     -----------------------------------------------
30898  *     removable media   hotpluggable   LP64  |  Group
30899  *     -----------------------------------------------
30900  *           false          false       false |   1
30901  *           false          false       true  |   4
30902  *           false          true        false |   1
30903  *           false          true        true  |   4
30904  *           true             x           x   |   5
30905  *     -----------------------------------------------
30906  *
30907  *
30908  * 3. Check for VTOC Label
30909  *
30910  *     If a direct-access disk has no EFI label, sd will check if it has a
30911  *     valid VTOC label. Now, sd also does that check for removable media
30912  *     and hotpluggable devices.
30913  *
30914  *     --------------------------------------------------------------
30915  *     Direct-Access   removable media    hotpluggable |  Check Label
30916  *     -------------------------------------------------------------
30917  *         false          false           false        |   No
30918  *         false          false           true         |   No
30919  *         false          true            false        |   Yes
30920  *         false          true            true         |   Yes
30921  *         true            x                x          |   Yes
30922  *     --------------------------------------------------------------
30923  *
30924  *
30925  * 4. Building default VTOC label
30926  *
30927  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
30928  *     If those devices have no valid VTOC label, sd(7d) will attempt to
30929  *     create default VTOC for them. Currently sd creates default VTOC label
30930  *     for all devices on x86 platform (VTOC_16), but only for removable
30931  *     media devices on SPARC (VTOC_8).
30932  *
30933  *     -----------------------------------------------------------
30934  *       removable media hotpluggable platform   |   Default Label
30935  *     -----------------------------------------------------------
30936  *             false          false    sparc     |     No
30937  *             false          true      x86      |     Yes
30938  *             false          true     sparc     |     Yes
30939  *             true             x        x       |     Yes
30940  *     ----------------------------------------------------------
30941  *
30942  *
30943  * 5. Supported blocksizes of target devices
30944  *
30945  *     Sd supports non-512-byte blocksize for removable media devices only.
30946  *     For other devices, only 512-byte blocksize is supported. This may be
30947  *     changed in near future because some RAID devices require non-512-byte
30948  *     blocksize
30949  *
30950  *     -----------------------------------------------------------
30951  *     removable media    hotpluggable    | non-512-byte blocksize
30952  *     -----------------------------------------------------------
30953  *           false          false         |   No
30954  *           false          true          |   No
30955  *           true             x           |   Yes
30956  *     -----------------------------------------------------------
30957  *
30958  *
30959  * 6. Automatic mount & unmount (i.e. vold)
30960  *
30961  *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
30962  *     if a device is removable media device. It return 1 for removable media
30963  *     devices, and 0 for others.
30964  *
30965  *     Vold treats a device as removable one only if DKIOREMOVABLE returns 1.
30966  *     And it does automounting only for removable media devices. In order to
30967  *     preserve users' experience and let vold continue to do automounting for
30968  *     USB disk devices, DKIOCREMOVABLE ioctl still returns 1 for USB/1394 disk
30969  *     devices.
30970  *
30971  *      ------------------------------------------------------
30972  *       removable media    hotpluggable   |  automatic mount
30973  *      ------------------------------------------------------
30974  *             false          false        |   No
30975  *             false          true         |   Yes
30976  *             true             x          |   Yes
30977  *      ------------------------------------------------------
30978  *
30979  *
30980  * 7. fdisk partition management
30981  *
30982  *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
30983  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
30984  *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
30985  *     fdisk partitions on both x86 and SPARC platform.
30986  *
30987  *     -----------------------------------------------------------
30988  *       platform   removable media  USB/1394  |  fdisk supported
30989  *     -----------------------------------------------------------
30990  *        x86         X               X        |       true
30991  *     ------------------------------------------------------------
30992  *        sparc       X               X        |       false
30993  *     ------------------------------------------------------------
30994  *
30995  *
30996  * 8. MBOOT/MBR
30997  *
30998  *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
30999  *     read/write mboot for removable media devices on sparc platform.
31000  *
31001  *     -----------------------------------------------------------
31002  *       platform   removable media  USB/1394  |  mboot supported
31003  *     -----------------------------------------------------------
31004  *        x86         X               X        |       true
31005  *     ------------------------------------------------------------
31006  *        sparc      false           false     |       false
31007  *        sparc      false           true      |       true
31008  *        sparc      true            false     |       true
31009  *        sparc      true            true      |       true
31010  *     ------------------------------------------------------------
31011  *
31012  *
31013  * 9.  error handling during opening device
31014  *
31015  *     If failed to open a disk device, an errno is returned. For some kinds
31016  *     of errors, different errno is returned depending on if this device is
31017  *     a removable media device. This brings USB/1394 hard disks in line with
31018  *     expected hard disk behavior. It is not expected that this breaks any
31019  *     application.
31020  *
31021  *     ------------------------------------------------------
31022  *       removable media    hotpluggable   |  errno
31023  *     ------------------------------------------------------
31024  *             false          false        |   EIO
31025  *             false          true         |   EIO
31026  *             true             x          |   ENXIO
31027  *     ------------------------------------------------------
31028  *
31029  *
31030  * 11. ioctls: DKIOCEJECT, CDROMEJECT
31031  *
31032  *     These IOCTLs are applicable only to removable media devices.
31033  *
31034  *     -----------------------------------------------------------
31035  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
31036  *     -----------------------------------------------------------
31037  *             false          false        |     No
31038  *             false          true         |     No
31039  *             true            x           |     Yes
31040  *     -----------------------------------------------------------
31041  *
31042  *
31043  * 12. Kstats for partitions
31044  *
31045  *     sd creates partition kstat for non-removable media devices. USB and
31046  *     Firewire hard disks now have partition kstats
31047  *
31048  *      ------------------------------------------------------
31049  *       removable media    hotplugable    |   kstat
31050  *      ------------------------------------------------------
31051  *             false          false        |    Yes
31052  *             false          true         |    Yes
31053  *             true             x          |    No
31054  *       ------------------------------------------------------
31055  *
31056  *
31057  * 13. Removable media & hotpluggable properties
31058  *
31059  *     Sd driver creates a "removable-media" property for removable media
31060  *     devices. Parent nexus drivers create a "hotpluggable" property if
31061  *     it supports hotplugging.
31062  *
31063  *     ---------------------------------------------------------------------
31064  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
31065  *     ---------------------------------------------------------------------
31066  *       false            false       |    No                   No
31067  *       false            true        |    No                   Yes
31068  *       true             false       |    Yes                  No
31069  *       true             true        |    Yes                  Yes
31070  *     ---------------------------------------------------------------------
31071  *
31072  *
31073  * 14. Power Management
31074  *
31075  *     sd only power manages removable media devices or devices that support
31076  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
31077  *
31078  *     A parent nexus that supports hotplugging can also set "pm-capable"
31079  *     if the disk can be power managed.
31080  *
31081  *     ------------------------------------------------------------
31082  *       removable media hotpluggable pm-capable  |   power manage
31083  *     ------------------------------------------------------------
31084  *             false          false     false     |     No
31085  *             false          false     true      |     Yes
31086  *             false          true      false     |     No
31087  *             false          true      true      |     Yes
31088  *             true             x        x        |     Yes
31089  *     ------------------------------------------------------------
31090  *
31091  *      USB and firewire hard disks can now be power managed independently
31092  *      of the framebuffer
31093  *
31094  *
31095  * 15. Support for USB disks with capacity larger than 1TB
31096  *
31097  *     Currently, sd doesn't permit a fixed disk device with capacity
31098  *     larger than 1TB to be used in a 32-bit operating system environment.
31099  *     However, sd doesn't do that for removable media devices. Instead, it
31100  *     assumes that removable media devices cannot have a capacity larger
31101  *     than 1TB. Therefore, using those devices on 32-bit system is partially
31102  *     supported, which can cause some unexpected results.
31103  *
31104  *     ---------------------------------------------------------------------
31105  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
31106  *     ---------------------------------------------------------------------
31107  *             false          false  |   true         |     no
31108  *             false          true   |   true         |     no
31109  *             true           false  |   true         |     Yes
31110  *             true           true   |   true         |     Yes
31111  *     ---------------------------------------------------------------------
31112  *
31113  *
31114  * 16. Check write-protection at open time
31115  *
31116  *     When a removable media device is being opened for writing without NDELAY
31117  *     flag, sd will check if this device is writable. If attempting to open
31118  *     without NDELAY flag a write-protected device, this operation will abort.
31119  *
31120  *     ------------------------------------------------------------
31121  *       removable media    USB/1394   |   WP Check
31122  *     ------------------------------------------------------------
31123  *             false          false    |     No
31124  *             false          true     |     No
31125  *             true           false    |     Yes
31126  *             true           true     |     Yes
31127  *     ------------------------------------------------------------
31128  *
31129  *
31130  * 17. syslog when corrupted VTOC is encountered
31131  *
31132  *      Currently, if an invalid VTOC is encountered, sd only print syslog
31133  *      for fixed SCSI disks.
31134  *     ------------------------------------------------------------
31135  *       removable media    USB/1394   |   print syslog
31136  *     ------------------------------------------------------------
31137  *             false          false    |     Yes
31138  *             false          true     |     No
31139  *             true           false    |     No
31140  *             true           true     |     No
31141  *     ------------------------------------------------------------
31142  */
31143 static void
31144 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
31145 {
31146 	int	pm_capable_prop;
31147 
31148 	ASSERT(un->un_sd);
31149 	ASSERT(un->un_sd->sd_inq);
31150 
31151 #if defined(_SUNOS_VTOC_16)
31152 	/*
31153 	 * For VTOC_16 devices, the default label will be created for all
31154 	 * devices. (see sd_build_default_label)
31155 	 */
31156 	un->un_f_default_vtoc_supported = TRUE;
31157 #endif
31158 
31159 	if (un->un_sd->sd_inq->inq_rmb) {
31160 		/*
31161 		 * The media of this device is removable. And for this kind
31162 		 * of devices, it is possible to change medium after openning
31163 		 * devices. Thus we should support this operation.
31164 		 */
31165 		un->un_f_has_removable_media = TRUE;
31166 
31167 #if defined(_SUNOS_VTOC_8)
31168 		/*
31169 		 * Note: currently, for VTOC_8 devices, default label is
31170 		 * created for removable and hotpluggable devices only.
31171 		 */
31172 		un->un_f_default_vtoc_supported = TRUE;
31173 #endif
31174 		/*
31175 		 * support non-512-byte blocksize of removable media devices
31176 		 */
31177 		un->un_f_non_devbsize_supported = TRUE;
31178 
31179 		/*
31180 		 * Assume that all removable media devices support DOOR_LOCK
31181 		 */
31182 		un->un_f_doorlock_supported = TRUE;
31183 
31184 		/*
31185 		 * For a removable media device, it is possible to be opened
31186 		 * with NDELAY flag when there is no media in drive, in this
31187 		 * case we don't care if device is writable. But if without
31188 		 * NDELAY flag, we need to check if media is write-protected.
31189 		 */
31190 		un->un_f_chk_wp_open = TRUE;
31191 
31192 		/*
31193 		 * need to start a SCSI watch thread to monitor media state,
31194 		 * when media is being inserted or ejected, notify syseventd.
31195 		 */
31196 		un->un_f_monitor_media_state = TRUE;
31197 
31198 		/*
31199 		 * Some devices don't support START_STOP_UNIT command.
31200 		 * Therefore, we'd better check if a device supports it
31201 		 * before sending it.
31202 		 */
31203 		un->un_f_check_start_stop = TRUE;
31204 
31205 		/*
31206 		 * support eject media ioctl:
31207 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
31208 		 */
31209 		un->un_f_eject_media_supported = TRUE;
31210 
31211 		/*
31212 		 * Because many removable-media devices don't support
31213 		 * LOG_SENSE, we couldn't use this command to check if
31214 		 * a removable media device support power-management.
31215 		 * We assume that they support power-management via
31216 		 * START_STOP_UNIT command and can be spun up and down
31217 		 * without limitations.
31218 		 */
31219 		un->un_f_pm_supported = TRUE;
31220 
31221 		/*
31222 		 * Need to create a zero length (Boolean) property
31223 		 * removable-media for the removable media devices.
31224 		 * Note that the return value of the property is not being
31225 		 * checked, since if unable to create the property
31226 		 * then do not want the attach to fail altogether. Consistent
31227 		 * with other property creation in attach.
31228 		 */
31229 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
31230 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
31231 
31232 	} else {
31233 		/*
31234 		 * create device ID for device
31235 		 */
31236 		un->un_f_devid_supported = TRUE;
31237 
31238 		/*
31239 		 * Spin up non-removable-media devices once it is attached
31240 		 */
31241 		un->un_f_attach_spinup = TRUE;
31242 
31243 		/*
31244 		 * According to SCSI specification, Sense data has two kinds of
31245 		 * format: fixed format, and descriptor format. At present, we
31246 		 * don't support descriptor format sense data for removable
31247 		 * media.
31248 		 */
31249 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31250 			un->un_f_descr_format_supported = TRUE;
31251 		}
31252 
31253 		/*
31254 		 * kstats are created only for non-removable media devices.
31255 		 *
31256 		 * Set this in sd.conf to 0 in order to disable kstats.  The
31257 		 * default is 1, so they are enabled by default.
31258 		 */
31259 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
31260 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
31261 			"enable-partition-kstats", 1));
31262 
31263 		/*
31264 		 * Check if HBA has set the "pm-capable" property.
31265 		 * If "pm-capable" exists and is non-zero then we can
31266 		 * power manage the device without checking the start/stop
31267 		 * cycle count log sense page.
31268 		 *
31269 		 * If "pm-capable" exists and is SD_PM_CAPABLE_FALSE (0)
31270 		 * then we should not power manage the device.
31271 		 *
31272 		 * If "pm-capable" doesn't exist then pm_capable_prop will
31273 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
31274 		 * sd will check the start/stop cycle count log sense page
31275 		 * and power manage the device if the cycle count limit has
31276 		 * not been exceeded.
31277 		 */
31278 		pm_capable_prop = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
31279 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
31280 		if (pm_capable_prop == SD_PM_CAPABLE_UNDEFINED) {
31281 			un->un_f_log_sense_supported = TRUE;
31282 		} else {
31283 			/*
31284 			 * pm-capable property exists.
31285 			 *
31286 			 * Convert "TRUE" values for pm_capable_prop to
31287 			 * SD_PM_CAPABLE_TRUE (1) to make it easier to check
31288 			 * later. "TRUE" values are any values except
31289 			 * SD_PM_CAPABLE_FALSE (0) and
31290 			 * SD_PM_CAPABLE_UNDEFINED (-1)
31291 			 */
31292 			if (pm_capable_prop == SD_PM_CAPABLE_FALSE) {
31293 				un->un_f_log_sense_supported = FALSE;
31294 			} else {
31295 				un->un_f_pm_supported = TRUE;
31296 			}
31297 
31298 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
31299 			    "sd_unit_attach: un:0x%p pm-capable "
31300 			    "property set to %d.\n", un, un->un_f_pm_supported);
31301 		}
31302 	}
31303 
31304 	if (un->un_f_is_hotpluggable) {
31305 #if defined(_SUNOS_VTOC_8)
31306 		/*
31307 		 * Note: currently, for VTOC_8 devices, default label is
31308 		 * created for removable and hotpluggable devices only.
31309 		 */
31310 		un->un_f_default_vtoc_supported = TRUE;
31311 #endif
31312 
31313 		/*
31314 		 * Temporarily, let hotpluggable devices pretend to be
31315 		 * removable-media devices for vold.
31316 		 */
31317 		un->un_f_monitor_media_state = TRUE;
31318 
31319 		un->un_f_check_start_stop = TRUE;
31320 
31321 	}
31322 
31323 	/*
31324 	 * By default, only DIRECT ACCESS devices and CDs will have Sun
31325 	 * labels.
31326 	 */
31327 	if ((SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) ||
31328 	    (un->un_sd->sd_inq->inq_rmb)) {
31329 		/*
31330 		 * Direct access devices have disk label
31331 		 */
31332 		un->un_f_vtoc_label_supported = TRUE;
31333 	}
31334 
31335 	/*
31336 	 * Fdisk partitions are supported for all direct access devices on
31337 	 * x86 platform, and just for removable media and hotpluggable
31338 	 * devices on SPARC platform. Later, we will set the following flag
31339 	 * to FALSE if current device is not removable media or hotpluggable
31340 	 * device and if sd works on SAPRC platform.
31341 	 */
31342 	if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31343 		un->un_f_mboot_supported = TRUE;
31344 	}
31345 
31346 	if (!un->un_f_is_hotpluggable &&
31347 	    !un->un_sd->sd_inq->inq_rmb) {
31348 
31349 #if defined(_SUNOS_VTOC_8)
31350 		/*
31351 		 * Don't support fdisk on fixed disk
31352 		 */
31353 		un->un_f_mboot_supported = FALSE;
31354 #endif
31355 
31356 		/*
31357 		 * Fixed disk support SYNC CACHE
31358 		 */
31359 		un->un_f_sync_cache_supported = TRUE;
31360 
31361 		/*
31362 		 * For fixed disk, if its VTOC is not valid, we will write
31363 		 * errlog into system log
31364 		 */
31365 		if (un->un_f_vtoc_label_supported)
31366 			un->un_f_vtoc_errlog_supported = TRUE;
31367 	}
31368 }
31369